OmniSciDB  5ade3759e0
Executor Class Reference

#include <Execute.h>

+ Collaboration diagram for Executor:

Classes

struct  CompilationResult
 
class  ExecutionDispatch
 
class  FetchCacheAnchor
 
struct  FetchResult
 
struct  GroupColLLVMValue
 
struct  JoinHashTableOrError
 

Public Types

typedef std::tuple< std::string, const Analyzer::Expr *, int64_t, const size_t > AggInfo
 

Public Member Functions

 Executor (const int db_id, const size_t block_size_x, const size_t grid_size_x, const std::string &debug_dir, const std::string &debug_file, ::QueryRenderer::QueryRenderManager *render_manager)
 
std::shared_ptr< ResultSetexecute (const Planner::RootPlan *root_plan, const Catalog_Namespace::SessionInfo &session, const bool hoist_literals, const ExecutorDeviceType device_type, const ExecutorOptLevel, const bool allow_multifrag, const bool allow_loop_joins, RenderInfo *render_query_data=nullptr)
 
std::shared_ptr< ResultSetrenderPointsNonInSitu (const std::string &queryStr, const ExecutionResult &results, const Catalog_Namespace::SessionInfo &session, const int render_widget_id, const ::QueryRenderer::JSONLocation *data_loc, RenderInfo *render_query_data)
 
std::shared_ptr< ResultSetrenderPointsInSitu (RenderInfo *render_query_data)
 
std::shared_ptr< ResultSetrenderPolygonsNonInSitu (const std::string &queryStr, const ExecutionResult &results, const Catalog_Namespace::SessionInfo &session, const int render_widget_id, const ::QueryRenderer::JSONLocation *data_loc, RenderInfo *render_query_data, const std::string &poly_table_name)
 
std::shared_ptr< ResultSetrenderLinesNonInSitu (const std::string &queryStr, const ExecutionResult &results, const Catalog_Namespace::SessionInfo &session, const int render_widget_id, const ::QueryRenderer::JSONLocation *data_loc, RenderInfo *render_query_data)
 
std::vector< int32_t > getStringIds (const std::string &col_name, const std::vector< std::string > &col_vals, const ::QueryRenderer::QueryDataLayout *query_data_layout, const ResultSet *results, const std::shared_ptr< RowSetMemoryOwner > &row_set_mem_owner, const bool warn=false) const
 
std::vector< std::string > getStringsFromIds (const std::string &col_name, const std::vector< int32_t > &ids, const ::QueryRenderer::QueryDataLayout *query_data_layout, const ResultSet *results, const std::shared_ptr< RowSetMemoryOwner > &row_set_mem_owner) const
 
StringDictionaryProxygetStringDictionaryProxy (const int dictId, const std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner, const bool with_generation) const
 
bool isCPUOnly () const
 
bool isArchMaxwell (const ExecutorDeviceType dt) const
 
bool containsLeftDeepOuterJoin () const
 
const ColumnDescriptorgetColumnDescriptor (const Analyzer::ColumnVar *) const
 
const ColumnDescriptorgetPhysicalColumnDescriptor (const Analyzer::ColumnVar *, int) const
 
const Catalog_Namespace::CataloggetCatalog () const
 
const std::shared_ptr< RowSetMemoryOwnergetRowSetMemoryOwner () const
 
const TemporaryTablesgetTemporaryTables () const
 
Fragmenter_Namespace::TableInfo getTableInfo (const int table_id) const
 
const TableGenerationgetTableGeneration (const int table_id) const
 
ExpressionRange getColRange (const PhysicalInput &) const
 
size_t getNumBytesForFetchedRow () const
 
std::vector< ColumnLazyFetchInfogetColLazyFetchInfo (const std::vector< Analyzer::Expr *> &target_exprs) const
 
void registerActiveModule (void *module, const int device_id) const
 
void unregisterActiveModule (void *module, const int device_id) const
 
void interrupt ()
 
void resetInterrupt ()
 

Static Public Member Functions

static std::shared_ptr< ExecutorgetExecutor (const int db_id, const std::string &debug_dir="", const std::string &debug_file="", const MapDParameters mapd_parameters=MapDParameters(), ::QueryRenderer::QueryRenderManager *render_manager=nullptr)
 
static void nukeCacheOfExecutors ()
 
static void clearMemory (const Data_Namespace::MemoryLevel memory_level)
 
static std::pair< int64_t, int32_t > reduceResults (const SQLAgg agg, const SQLTypeInfo &ti, const int64_t agg_init_val, const int8_t out_byte_width, const int64_t *out_vec, const size_t out_vec_sz, const bool is_group_by, const bool float_argument_input)
 
static void addCodeToCache (const CodeCacheKey &, std::vector< std::tuple< void *, ExecutionEngineWrapper >>, llvm::Module *, CodeCache &)
 

Static Public Attributes

static const size_t high_scan_limit {32000000}
 

Private Member Functions

void clearMetaInfoCache ()
 
int deviceCount (const ExecutorDeviceType) const
 
int deviceCountForMemoryLevel (const Data_Namespace::MemoryLevel memory_level) const
 
llvm::Value * codegenWindowFunction (const size_t target_index, const CompilationOptions &co)
 
llvm::Value * codegenWindowFunctionAggregate (const CompilationOptions &co)
 
llvm::BasicBlock * codegenWindowResetStateControlFlow ()
 
void codegenWindowFunctionStateInit (llvm::Value *aggregate_state)
 
llvm::Value * codegenWindowFunctionAggregateCalls (llvm::Value *aggregate_state, const CompilationOptions &co)
 
void codegenWindowAvgEpilogue (llvm::Value *crt_val, llvm::Value *window_func_null_val, llvm::Value *multiplicity_lv)
 
llvm::Value * codegenAggregateWindowState ()
 
llvm::Value * aggregateWindowStatePtr ()
 
bool isArchPascalOrLater (const ExecutorDeviceType dt) const
 
bool needFetchAllFragments (const InputColDescriptor &col_desc, const RelAlgExecutionUnit &ra_exe_unit, const FragmentsList &selected_fragments) const
 
ResultSetPtr executeWorkUnit (size_t &max_groups_buffer_entry_guess, const bool is_agg, const std::vector< InputTableInfo > &, const RelAlgExecutionUnit &, const CompilationOptions &, const ExecutionOptions &options, const Catalog_Namespace::Catalog &, std::shared_ptr< RowSetMemoryOwner >, RenderInfo *render_info, const bool has_cardinality_estimation, ColumnCacheMap &column_cache)
 
void executeUpdate (const RelAlgExecutionUnit &ra_exe_unit, const InputTableInfo &table_info, const CompilationOptions &co, const ExecutionOptions &eo, const Catalog_Namespace::Catalog &cat, std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner, const UpdateLogForFragment::Callback &cb) __attribute__((hot))
 
void executeWorkUnitPerFragment (const RelAlgExecutionUnit &ra_exe_unit, const InputTableInfo &table_info, const CompilationOptions &co, const ExecutionOptions &eo, const Catalog_Namespace::Catalog &cat, PerFragmentCB &cb)
 Compiles and dispatches a work unit per fragment processing results with the per fragment callback. Currently used for computing metrics over fragments (metadata). More...
 
ResultSetPtr executeExplain (const QueryCompilationDescriptor &)
 
ExecutorDeviceType getDeviceTypeForTargets (const RelAlgExecutionUnit &ra_exe_unit, const ExecutorDeviceType requested_device_type)
 
ResultSetPtr collectAllDeviceResults (ExecutionDispatch &execution_dispatch, const std::vector< Analyzer::Expr *> &target_exprs, const QueryMemoryDescriptor &query_mem_desc, const ExecutorDeviceType device_type, std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner)
 
ResultSetPtr collectAllDeviceShardedTopResults (ExecutionDispatch &execution_dispatch) const
 
std::unordered_map< int, const Analyzer::BinOper * > getInnerTabIdToJoinCond () const
 
void dispatchFragments (const std::function< void(const ExecutorDeviceType chosen_device_type, int chosen_device_id, const QueryCompilationDescriptor &query_comp_desc, const QueryMemoryDescriptor &query_mem_desc, const FragmentsList &frag_list, const ExecutorDispatchMode kernel_dispatch_mode, const int64_t rowid_lookup_key)> dispatch, const ExecutionDispatch &execution_dispatch, const std::vector< InputTableInfo > &table_infos, const ExecutionOptions &eo, const bool is_agg, const bool allow_single_frag_table_opt, const size_t context_count, const QueryCompilationDescriptor &query_comp_desc, const QueryMemoryDescriptor &query_mem_desc, QueryFragmentDescriptor &fragment_descriptor, std::unordered_set< int > &available_gpus, int &available_cpus)
 
std::vector< size_t > getTableFragmentIndices (const RelAlgExecutionUnit &ra_exe_unit, const ExecutorDeviceType device_type, const size_t table_idx, const size_t outer_frag_idx, std::map< int, const TableFragments *> &selected_tables_fragments, const std::unordered_map< int, const Analyzer::BinOper *> &inner_table_id_to_join_condition)
 
bool skipFragmentPair (const Fragmenter_Namespace::FragmentInfo &outer_fragment_info, const Fragmenter_Namespace::FragmentInfo &inner_fragment_info, const int inner_table_id, const std::unordered_map< int, const Analyzer::BinOper *> &inner_table_id_to_join_condition, const RelAlgExecutionUnit &ra_exe_unit, const ExecutorDeviceType device_type)
 
FetchResult fetchChunks (const ColumnFetcher &, const RelAlgExecutionUnit &ra_exe_unit, const int device_id, const Data_Namespace::MemoryLevel, const std::map< int, const TableFragments *> &, const FragmentsList &selected_fragments, const Catalog_Namespace::Catalog &, std::list< ChunkIter > &, std::list< std::shared_ptr< Chunk_NS::Chunk >> &)
 
std::pair< std::vector< std::vector< int64_t > >, std::vector< std::vector< uint64_t > > > getRowCountAndOffsetForAllFrags (const RelAlgExecutionUnit &ra_exe_unit, const CartesianProduct< std::vector< std::vector< size_t >>> &frag_ids_crossjoin, const std::vector< InputDescriptor > &input_descs, const std::map< int, const TableFragments *> &all_tables_fragments)
 
void buildSelectedFragsMapping (std::vector< std::vector< size_t >> &selected_fragments_crossjoin, std::vector< size_t > &local_col_to_frag_pos, const std::list< std::shared_ptr< const InputColDescriptor >> &col_global_ids, const FragmentsList &selected_fragments, const RelAlgExecutionUnit &ra_exe_unit)
 
std::vector< size_t > getFragmentCount (const FragmentsList &selected_fragments, const size_t scan_idx, const RelAlgExecutionUnit &ra_exe_unit)
 
int32_t executePlanWithGroupBy (const RelAlgExecutionUnit &ra_exe_unit, const CompilationResult &, const bool hoist_literals, ResultSetPtr &results, const ExecutorDeviceType device_type, std::vector< std::vector< const int8_t *>> &col_buffers, const std::vector< size_t > outer_tab_frag_ids, QueryExecutionContext *, const std::vector< std::vector< int64_t >> &num_rows, const std::vector< std::vector< uint64_t >> &frag_offsets, Data_Namespace::DataMgr *, const int device_id, const int64_t limit, const uint32_t start_rowid, const uint32_t num_tables, RenderInfo *render_info)
 
int32_t executePlanWithoutGroupBy (const RelAlgExecutionUnit &ra_exe_unit, const CompilationResult &, const bool hoist_literals, ResultSetPtr &results, const std::vector< Analyzer::Expr *> &target_exprs, const ExecutorDeviceType device_type, std::vector< std::vector< const int8_t *>> &col_buffers, QueryExecutionContext *query_exe_context, const std::vector< std::vector< int64_t >> &num_rows, const std::vector< std::vector< uint64_t >> &frag_offsets, Data_Namespace::DataMgr *data_mgr, const int device_id, const uint32_t start_rowid, const uint32_t num_tables, RenderInfo *render_info)
 
std::vector< int64_t > getJoinHashTablePtrs (const ExecutorDeviceType device_type, const int device_id)
 
ResultSetPtr reduceMultiDeviceResults (const RelAlgExecutionUnit &, std::vector< std::pair< ResultSetPtr, std::vector< size_t >>> &all_fragment_results, std::shared_ptr< RowSetMemoryOwner >, const QueryMemoryDescriptor &) const
 
ResultSetPtr reduceMultiDeviceResultSets (std::vector< std::pair< ResultSetPtr, std::vector< size_t >>> &all_fragment_results, std::shared_ptr< RowSetMemoryOwner >, const QueryMemoryDescriptor &) const
 
ResultSetPtr reduceSpeculativeTopN (const RelAlgExecutionUnit &, std::vector< std::pair< ResultSetPtr, std::vector< size_t >>> &all_fragment_results, std::shared_ptr< RowSetMemoryOwner >, const QueryMemoryDescriptor &) const
 
void executeSimpleInsert (const Planner::RootPlan *root_plan)
 
ResultSetPtr executeWorkUnitImpl (size_t &max_groups_buffer_entry_guess, const bool is_agg, const bool allow_single_frag_table_opt, const std::vector< InputTableInfo > &, const RelAlgExecutionUnit &, const CompilationOptions &, const ExecutionOptions &options, const Catalog_Namespace::Catalog &, std::shared_ptr< RowSetMemoryOwner >, RenderInfo *render_info, const bool has_cardinality_estimation, ColumnCacheMap &column_cache)
 
std::vector< llvm::Value * > inlineHoistedLiterals ()
 
std::tuple< Executor::CompilationResult, std::unique_ptr< QueryMemoryDescriptor > > compileWorkUnit (const std::vector< InputTableInfo > &query_infos, const RelAlgExecutionUnit &ra_exe_unit, const CompilationOptions &co, const ExecutionOptions &eo, const CudaMgr_Namespace::CudaMgr *cuda_mgr, const bool allow_lazy_fetch, std::shared_ptr< RowSetMemoryOwner >, const size_t max_groups_buffer_entry_count, const int8_t crt_min_byte_width, const bool has_cardinality_estimation, ColumnCacheMap &column_cache, RenderInfo *render_info=nullptr)
 
llvm::BasicBlock * codegenSkipDeletedOuterTableRow (const RelAlgExecutionUnit &ra_exe_unit, const CompilationOptions &co)
 
std::vector< JoinLoopbuildJoinLoops (RelAlgExecutionUnit &ra_exe_unit, const CompilationOptions &co, const ExecutionOptions &eo, const std::vector< InputTableInfo > &query_infos, ColumnCacheMap &column_cache)
 
std::function< llvm::Value *(const std::vector< llvm::Value * > &, llvm::Value *)> buildIsDeletedCb (const RelAlgExecutionUnit &ra_exe_unit, const size_t level_idx, const CompilationOptions &co)
 
std::shared_ptr< JoinHashTableInterfacebuildCurrentLevelHashTable (const JoinCondition &current_level_join_conditions, RelAlgExecutionUnit &ra_exe_unit, const CompilationOptions &co, const std::vector< InputTableInfo > &query_infos, ColumnCacheMap &column_cache, std::vector< std::string > &fail_reasons)
 
llvm::Value * addJoinLoopIterator (const std::vector< llvm::Value *> &prev_iters, const size_t level_idx)
 
void codegenJoinLoops (const std::vector< JoinLoop > &join_loops, const RelAlgExecutionUnit &ra_exe_unit, GroupByAndAggregate &group_by_and_aggregate, llvm::Function *query_func, llvm::BasicBlock *entry_bb, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const ExecutionOptions &eo)
 
bool compileBody (const RelAlgExecutionUnit &ra_exe_unit, GroupByAndAggregate &group_by_and_aggregate, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co)
 
void createErrorCheckControlFlow (llvm::Function *query_func, bool run_with_dynamic_watchdog, ExecutorDeviceType device_type)
 
void preloadFragOffsets (const std::vector< InputDescriptor > &input_descs, const std::vector< InputTableInfo > &query_infos)
 
JoinHashTableOrError buildHashTableForQualifier (const std::shared_ptr< Analyzer::BinOper > &qual_bin_oper, const std::vector< InputTableInfo > &query_infos, const MemoryLevel memory_level, const JoinHashTableInterface::HashType preferred_hash_type, ColumnCacheMap &column_cache)
 
void nukeOldState (const bool allow_lazy_fetch, const std::vector< InputTableInfo > &query_infos, const RelAlgExecutionUnit &ra_exe_unit)
 
std::vector< std::pair< void *, void * > > optimizeAndCodegenCPU (llvm::Function *, llvm::Function *, const std::unordered_set< llvm::Function *> &, const CompilationOptions &)
 
std::vector< std::pair< void *, void * > > optimizeAndCodegenGPU (llvm::Function *, llvm::Function *, std::unordered_set< llvm::Function *> &, const bool no_inline, const CudaMgr_Namespace::CudaMgr *cuda_mgr, const CompilationOptions &)
 
std::string generatePTX (const std::string &) const
 
void initializeNVPTXBackend () const
 
int8_t warpSize () const
 
unsigned gridSize () const
 
unsigned blockSize () const
 
int64_t deviceCycles (int milliseconds) const
 
GroupColLLVMValue groupByColumnCodegen (Analyzer::Expr *group_by_col, const size_t col_width, const CompilationOptions &, const bool translate_null_val, const int64_t translated_null_val, GroupByAndAggregate::DiamondCodegen &, std::stack< llvm::BasicBlock *> &, const bool thread_mem_shared)
 
llvm::Value * castToFP (llvm::Value *val)
 
llvm::Value * castToIntPtrTyIn (llvm::Value *val, const size_t bit_width)
 
RelAlgExecutionUnit addDeletedColumn (const RelAlgExecutionUnit &ra_exe_unit)
 
std::pair< bool, int64_t > skipFragment (const InputDescriptor &table_desc, const Fragmenter_Namespace::FragmentInfo &frag_info, const std::list< std::shared_ptr< Analyzer::Expr >> &simple_quals, const std::vector< uint64_t > &frag_offsets, const size_t frag_idx)
 
std::pair< bool, int64_t > skipFragmentInnerJoins (const InputDescriptor &table_desc, const RelAlgExecutionUnit &ra_exe_unit, const Fragmenter_Namespace::FragmentInfo &fragment, const std::vector< uint64_t > &frag_offsets, const size_t frag_idx)
 
std::vector< std::pair< void *, void * > > getCodeFromCache (const CodeCacheKey &, const CodeCache &)
 
void addCodeToCache (const CodeCacheKey &, const std::vector< std::tuple< void *, GpuCompilationContext *>> &, llvm::Module *, CodeCache &)
 
std::vector< int8_t > serializeLiterals (const std::unordered_map< int, CgenState::LiteralValues > &literals, const int device_id)
 
llvm::Value * spillDoubleElement (llvm::Value *elem_val, llvm::Type *elem_ty)
 

Static Private Member Functions

static ResultSetPtr resultsUnion (ExecutionDispatch &execution_dispatch)
 
static size_t align (const size_t off_in, const size_t alignment)
 

Private Attributes

std::unique_ptr< CgenStatecgen_state_
 
std::unique_ptr< PlanStateplan_state_
 
std::shared_ptr< RowSetMemoryOwnerrow_set_mem_owner_
 
std::mutex gpu_exec_mutex_ [max_gpu_count]
 
std::mutex gpu_active_modules_mutex_
 
uint32_t gpu_active_modules_device_mask_
 
void * gpu_active_modules_ [max_gpu_count]
 
bool interrupted_
 
std::shared_ptr< StringDictionaryProxylit_str_dict_proxy_
 
std::mutex str_dict_mutex_
 
std::unique_ptr< llvm::TargetMachine > nvptx_target_machine_
 
CodeCache cpu_code_cache_
 
CodeCache gpu_code_cache_
 
::QueryRenderer::QueryRenderManager * render_manager_
 
const unsigned block_size_x_
 
const unsigned grid_size_x_
 
const std::string debug_dir_
 
const std::string debug_file_
 
const int db_id_
 
const Catalog_Namespace::Catalogcatalog_
 
const TemporaryTablestemporary_tables_
 
InputTableInfoCache input_table_info_cache_
 
AggregatedColRange agg_col_range_cache_
 
StringDictionaryGenerations string_dictionary_generations_
 
TableGenerations table_generations_
 

Static Private Attributes

static const int max_gpu_count {16}
 
static const size_t baseline_threshold
 
static const size_t code_cache_size {10000}
 
static std::map< std::pair< int, ::QueryRenderer::QueryRenderManager * >, std::shared_ptr< Executor > > executors_
 
static std::mutex execute_mutex_
 
static mapd_shared_mutex executors_cache_mutex_
 
static const int32_t ERR_DIV_BY_ZERO {1}
 
static const int32_t ERR_OUT_OF_GPU_MEM {2}
 
static const int32_t ERR_OUT_OF_SLOTS {3}
 
static const int32_t ERR_UNSUPPORTED_SELF_JOIN {4}
 
static const int32_t ERR_OUT_OF_RENDER_MEM {5}
 
static const int32_t ERR_OUT_OF_CPU_MEM {6}
 
static const int32_t ERR_OVERFLOW_OR_UNDERFLOW {7}
 
static const int32_t ERR_SPECULATIVE_TOP_OOM {8}
 
static const int32_t ERR_OUT_OF_TIME {9}
 
static const int32_t ERR_INTERRUPTED {10}
 
static const int32_t ERR_COLUMNAR_CONVERSION_NOT_SUPPORTED {11}
 
static const int32_t ERR_TOO_MANY_LITERALS {12}
 
static const int32_t ERR_STRING_CONST_IN_RESULTSET {13}
 
static const int32_t ERR_STREAMING_TOP_N_NOT_SUPPORTED_IN_RENDER_QUERY {14}
 

Friends

class BaselineJoinHashTable
 
class CodeGenerator
 
class ColumnFetcher
 
class OverlapsJoinHashTable
 
class GroupByAndAggregate
 
class QueryCompilationDescriptor
 
class QueryMemoryDescriptor
 
class QueryMemoryInitializer
 
class QueryFragmentDescriptor
 
class QueryExecutionContext
 
class ResultSet
 
class InValuesBitmap
 
class JoinHashTable
 
class LeafAggregator
 
class QueryRewriter
 
class PendingExecutionClosure
 
class RelAlgExecutor
 
class TableOptimizer
 
struct TargetExprCodegenBuilder
 
struct TargetExprCodegen
 
template<typename META_TYPE_CLASS >
class AggregateReductionEgress
 

Detailed Description

Definition at line 328 of file Execute.h.

Member Typedef Documentation

◆ AggInfo

typedef std::tuple<std::string, const Analyzer::Expr*, int64_t, const size_t> Executor::AggInfo

Definition at line 358 of file Execute.h.

Constructor & Destructor Documentation

◆ Executor()

Executor::Executor ( const int  db_id,
const size_t  block_size_x,
const size_t  grid_size_x,
const std::string &  debug_dir,
const std::string &  debug_file,
::QueryRenderer::QueryRenderManager *  render_manager 
)

Definition at line 101 of file Execute.cpp.

References block_size_x_, catalog_, code_cache_size, cpu_code_cache_, db_id_, debug_dir_, debug_file_, gpu_active_modules_device_mask_, gpu_code_cache_, grid_size_x_, input_table_info_cache_, interrupted_, render_manager_, and temporary_tables_.

Referenced by WatchdogException::WatchdogException().

107  : cgen_state_(new CgenState({}, false))
109  , interrupted_(false)
112  , render_manager_(render_manager)
113  , block_size_x_(block_size_x)
114  , grid_size_x_(grid_size_x)
115  , debug_dir_(debug_dir)
116  , debug_file_(debug_file)
117  , db_id_(db_id)
118  , catalog_(nullptr)
119  , temporary_tables_(nullptr)
120  , input_table_info_cache_(this) {}
const std::string debug_dir_
Definition: Execute.h:986
static const size_t code_cache_size
Definition: Execute.h:982
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:944
uint32_t gpu_active_modules_device_mask_
Definition: Execute.h:966
CodeCache gpu_code_cache_
Definition: Execute.h:976
::QueryRenderer::QueryRenderManager * render_manager_
Definition: Execute.h:978
const Catalog_Namespace::Catalog * catalog_
Definition: Execute.h:990
const std::string debug_file_
Definition: Execute.h:987
const unsigned block_size_x_
Definition: Execute.h:984
const unsigned grid_size_x_
Definition: Execute.h:985
InputTableInfoCache input_table_info_cache_
Definition: Execute.h:993
CodeCache cpu_code_cache_
Definition: Execute.h:975
bool interrupted_
Definition: Execute.h:968
const int db_id_
Definition: Execute.h:989
const TemporaryTables * temporary_tables_
Definition: Execute.h:991
+ Here is the caller graph for this function:

Member Function Documentation

◆ addCodeToCache() [1/2]

static void Executor::addCodeToCache ( const CodeCacheKey ,
std::vector< std::tuple< void *, ExecutionEngineWrapper >>  ,
llvm::Module *  ,
CodeCache  
)
static

Referenced by ResultSetReductionJIT::finalizeReductionCode(), and getCodeFromCache().

+ Here is the caller graph for this function:

◆ addCodeToCache() [2/2]

void Executor::addCodeToCache ( const CodeCacheKey ,
const std::vector< std::tuple< void *, GpuCompilationContext *>> &  ,
llvm::Module *  ,
CodeCache  
)
private

◆ addDeletedColumn()

RelAlgExecutionUnit Executor::addDeletedColumn ( const RelAlgExecutionUnit ra_exe_unit)
private

Definition at line 2905 of file Execute.cpp.

References catalog_, CHECK, Catalog_Namespace::Catalog::getDeletedColumnIfRowsDeleted(), Catalog_Namespace::Catalog::getMetadataForTable(), and TABLE.

Referenced by executeWorkUnitImpl(), and executeWorkUnitPerFragment().

2905  {
2906  auto ra_exe_unit_with_deleted = ra_exe_unit;
2907  for (const auto& input_table : ra_exe_unit_with_deleted.input_descs) {
2908  if (input_table.getSourceType() != InputSourceType::TABLE) {
2909  continue;
2910  }
2911  const auto td = catalog_->getMetadataForTable(input_table.getTableId());
2912  CHECK(td);
2913  const auto deleted_cd = catalog_->getDeletedColumnIfRowsDeleted(td);
2914  if (!deleted_cd) {
2915  continue;
2916  }
2917  CHECK(deleted_cd->columnType.is_boolean());
2918  // check deleted column is not already present
2919  bool found = false;
2920  for (const auto& input_col : ra_exe_unit_with_deleted.input_col_descs) {
2921  if (input_col.get()->getColId() == deleted_cd->columnId &&
2922  input_col.get()->getScanDesc().getTableId() == deleted_cd->tableId &&
2923  input_col.get()->getScanDesc().getNestLevel() == input_table.getNestLevel()) {
2924  found = true;
2925  }
2926  }
2927  if (!found) {
2928  // add deleted column
2929  ra_exe_unit_with_deleted.input_col_descs.emplace_back(new InputColDescriptor(
2930  deleted_cd->columnId, deleted_cd->tableId, input_table.getNestLevel()));
2931  }
2932  }
2933  return ra_exe_unit_with_deleted;
2934 }
const TableDescriptor * getMetadataForTable(const std::string &tableName, const bool populateFragmenter=true) const
Returns a pointer to a const TableDescriptor struct matching the provided tableName.
const ColumnDescriptor * getDeletedColumnIfRowsDeleted(const TableDescriptor *td) const
Definition: Catalog.cpp:2202
const Catalog_Namespace::Catalog * catalog_
Definition: Execute.h:990
#define CHECK(condition)
Definition: Logger.h:187
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ addJoinLoopIterator()

llvm::Value * Executor::addJoinLoopIterator ( const std::vector< llvm::Value *> &  prev_iters,
const size_t  level_idx 
)
private

Definition at line 430 of file IRCodegen.cpp.

References CodeGenerator::cgen_state_, CHECK, and CgenState::scan_idx_to_hash_pos_.

431  {
432  // Iterators are added for loop-outer joins when the head of the loop is generated,
433  // then once again when the body if generated. Allow this instead of special handling
434  // of call sites.
435  const auto it = cgen_state_->scan_idx_to_hash_pos_.find(level_idx);
436  if (it != cgen_state_->scan_idx_to_hash_pos_.end()) {
437  return it->second;
438  }
439  CHECK(!prev_iters.empty());
440  llvm::Value* matching_row_index = prev_iters.back();
441  const auto it_ok =
442  cgen_state_->scan_idx_to_hash_pos_.emplace(level_idx, matching_row_index);
443  CHECK(it_ok.second);
444  return matching_row_index;
445 }
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:944
#define CHECK(condition)
Definition: Logger.h:187

◆ aggregateWindowStatePtr()

llvm::Value * Executor::aggregateWindowStatePtr ( )
private

Definition at line 122 of file WindowFunctionIR.cpp.

References anonymous_namespace{WindowFunctionIR.cpp}::get_adjusted_window_type_info(), get_int_type(), WindowProjectNodeContext::getActiveWindowFunctionContext(), and kFLOAT.

122  {
123  const auto window_func_context =
125  const auto window_func = window_func_context->getWindowFunction();
126  const auto arg_ti = get_adjusted_window_type_info(window_func);
127  llvm::Type* aggregate_state_type =
128  arg_ti.get_type() == kFLOAT
129  ? llvm::PointerType::get(get_int_type(32, cgen_state_->context_), 0)
130  : llvm::PointerType::get(get_int_type(64, cgen_state_->context_), 0);
131  const auto aggregate_state_i64 = cgen_state_->llInt(
132  reinterpret_cast<const int64_t>(window_func_context->aggregateState()));
133  return cgen_state_->ir_builder_.CreateIntToPtr(aggregate_state_i64,
134  aggregate_state_type);
135 }
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:944
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
static WindowFunctionContext * getActiveWindowFunctionContext()
SQLTypeInfo get_adjusted_window_type_info(const Analyzer::WindowFunction *window_func)
+ Here is the call graph for this function:

◆ align()

static size_t Executor::align ( const size_t  off_in,
const size_t  alignment 
)
inlinestaticprivate

Definition at line 936 of file Execute.h.

Referenced by serializeLiterals().

936  {
937  size_t off = off_in;
938  if (off % alignment != 0) {
939  off += (alignment - off % alignment);
940  }
941  return off;
942  }
+ Here is the caller graph for this function:

◆ blockSize()

unsigned Executor::blockSize ( ) const
private

Definition at line 2839 of file Execute.cpp.

References block_size_x_, catalog_, CHECK, Data_Namespace::DataMgr::getCudaMgr(), and Catalog_Namespace::Catalog::getDataMgr().

Referenced by executePlanWithGroupBy(), and executePlanWithoutGroupBy().

2839  {
2840  CHECK(catalog_);
2841  const auto cuda_mgr = catalog_->getDataMgr().getCudaMgr();
2842  CHECK(cuda_mgr);
2843  const auto& dev_props = cuda_mgr->getAllDeviceProperties();
2844  return block_size_x_ ? block_size_x_ : dev_props.front().maxThreadsPerBlock;
2845 }
Data_Namespace::DataMgr & getDataMgr() const
Definition: Catalog.h:177
const Catalog_Namespace::Catalog * catalog_
Definition: Execute.h:990
const unsigned block_size_x_
Definition: Execute.h:984
CudaMgr_Namespace::CudaMgr * getCudaMgr() const
Definition: DataMgr.h:116
#define CHECK(condition)
Definition: Logger.h:187
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ buildCurrentLevelHashTable()

std::shared_ptr< JoinHashTableInterface > Executor::buildCurrentLevelHashTable ( const JoinCondition current_level_join_conditions,
RelAlgExecutionUnit ra_exe_unit,
const CompilationOptions co,
const std::vector< InputTableInfo > &  query_infos,
ColumnCacheMap column_cache,
std::vector< std::string > &  fail_reasons 
)
private

Definition at line 384 of file IRCodegen.cpp.

References anonymous_namespace{IRCodegen.cpp}::add_qualifier_to_execution_unit(), Data_Namespace::CPU_LEVEL, CompilationOptions::device_type_, Executor::JoinHashTableOrError::fail_reason, GPU, Data_Namespace::GPU_LEVEL, Executor::JoinHashTableOrError::hash_table, INNER, IS_EQUIVALENCE, PlanState::join_info_, JoinHashTableInterface::OneToOne, CodeGenerator::plan_state_, JoinCondition::quals, and JoinCondition::type.

390  {
391  if (current_level_join_conditions.type != JoinType::INNER &&
392  current_level_join_conditions.quals.size() > 1) {
393  fail_reasons.emplace_back("No equijoin expression found for outer join");
394  return nullptr;
395  }
396  std::shared_ptr<JoinHashTableInterface> current_level_hash_table;
397  for (const auto& join_qual : current_level_join_conditions.quals) {
398  auto qual_bin_oper = std::dynamic_pointer_cast<Analyzer::BinOper>(join_qual);
399  if (!qual_bin_oper || !IS_EQUIVALENCE(qual_bin_oper->get_optype())) {
400  fail_reasons.emplace_back("No equijoin expression found");
401  if (current_level_join_conditions.type == JoinType::INNER) {
402  add_qualifier_to_execution_unit(ra_exe_unit, join_qual);
403  }
404  continue;
405  }
406  JoinHashTableOrError hash_table_or_error;
407  if (!current_level_hash_table) {
408  hash_table_or_error = buildHashTableForQualifier(
409  qual_bin_oper,
410  query_infos,
414  column_cache);
415  current_level_hash_table = hash_table_or_error.hash_table;
416  }
417  if (hash_table_or_error.hash_table) {
418  plan_state_->join_info_.join_hash_tables_.push_back(hash_table_or_error.hash_table);
419  plan_state_->join_info_.equi_join_tautologies_.push_back(qual_bin_oper);
420  } else {
421  fail_reasons.push_back(hash_table_or_error.fail_reason);
422  if (current_level_join_conditions.type == JoinType::INNER) {
423  add_qualifier_to_execution_unit(ra_exe_unit, qual_bin_oper);
424  }
425  }
426  }
427  return current_level_hash_table;
428 }
#define IS_EQUIVALENCE(X)
Definition: sqldefs.h:67
std::unique_ptr< PlanState > plan_state_
Definition: Execute.h:959
void add_qualifier_to_execution_unit(RelAlgExecutionUnit &ra_exe_unit, const std::shared_ptr< Analyzer::Expr > &qual)
Definition: IRCodegen.cpp:174
ExecutorDeviceType device_type_
std::list< std::shared_ptr< Analyzer::Expr > > quals
JoinHashTableOrError buildHashTableForQualifier(const std::shared_ptr< Analyzer::BinOper > &qual_bin_oper, const std::vector< InputTableInfo > &query_infos, const MemoryLevel memory_level, const JoinHashTableInterface::HashType preferred_hash_type, ColumnCacheMap &column_cache)
Definition: Execute.cpp:2765
+ Here is the call graph for this function:

◆ buildHashTableForQualifier()

Executor::JoinHashTableOrError Executor::buildHashTableForQualifier ( const std::shared_ptr< Analyzer::BinOper > &  qual_bin_oper,
const std::vector< InputTableInfo > &  query_infos,
const MemoryLevel  memory_level,
const JoinHashTableInterface::HashType  preferred_hash_type,
ColumnCacheMap column_cache 
)
private

Definition at line 2765 of file Execute.cpp.

References CHECK, CHECK_EQ, CHECK_GT, coalesce_singleton_equi_join(), deviceCountForMemoryLevel(), g_enable_overlaps_hashjoin, OverlapsJoinHashTable::getInstance(), BaselineJoinHashTable::getInstance(), and JoinHashTable::getInstance().

2770  {
2771  std::shared_ptr<JoinHashTableInterface> join_hash_table;
2772  const int device_count = deviceCountForMemoryLevel(memory_level);
2773  CHECK_GT(device_count, 0);
2774  if (!g_enable_overlaps_hashjoin && qual_bin_oper->is_overlaps_oper()) {
2775  return {nullptr, "Overlaps hash join disabled, attempting to fall back to loop join"};
2776  }
2777  try {
2778  if (qual_bin_oper->is_overlaps_oper()) {
2779  join_hash_table = OverlapsJoinHashTable::getInstance(
2780  qual_bin_oper, query_infos, memory_level, device_count, column_cache, this);
2781  } else if (dynamic_cast<const Analyzer::ExpressionTuple*>(
2782  qual_bin_oper->get_left_operand())) {
2783  join_hash_table = BaselineJoinHashTable::getInstance(qual_bin_oper,
2784  query_infos,
2785  memory_level,
2786  preferred_hash_type,
2787  device_count,
2788  column_cache,
2789  this);
2790  } else {
2791  try {
2792  join_hash_table = JoinHashTable::getInstance(qual_bin_oper,
2793  query_infos,
2794  memory_level,
2795  preferred_hash_type,
2796  device_count,
2797  column_cache,
2798  this);
2799  } catch (TooManyHashEntries&) {
2800  const auto join_quals = coalesce_singleton_equi_join(qual_bin_oper);
2801  CHECK_EQ(join_quals.size(), size_t(1));
2802  const auto join_qual =
2803  std::dynamic_pointer_cast<Analyzer::BinOper>(join_quals.front());
2804  join_hash_table = BaselineJoinHashTable::getInstance(join_qual,
2805  query_infos,
2806  memory_level,
2807  preferred_hash_type,
2808  device_count,
2809  column_cache,
2810  this);
2811  }
2812  }
2813  CHECK(join_hash_table);
2814  return {join_hash_table, ""};
2815  } catch (const HashJoinFail& e) {
2816  return {nullptr, e.what()};
2817  }
2818  CHECK(false);
2819  return {nullptr, ""};
2820 }
#define CHECK_EQ(x, y)
Definition: Logger.h:195
std::list< std::shared_ptr< Analyzer::Expr > > coalesce_singleton_equi_join(const std::shared_ptr< Analyzer::BinOper > &join_qual)
static std::shared_ptr< OverlapsJoinHashTable > getInstance(const std::shared_ptr< Analyzer::BinOper > condition, const std::vector< InputTableInfo > &query_infos, const Data_Namespace::MemoryLevel memory_level, const int device_count, ColumnCacheMap &column_map, Executor *executor)
#define CHECK_GT(x, y)
Definition: Logger.h:199
bool g_enable_overlaps_hashjoin
Definition: Execute.cpp:85
static std::shared_ptr< JoinHashTable > getInstance(const std::shared_ptr< Analyzer::BinOper > qual_bin_oper, const std::vector< InputTableInfo > &query_infos, const Data_Namespace::MemoryLevel memory_level, const HashType preferred_hash_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor)
int deviceCountForMemoryLevel(const Data_Namespace::MemoryLevel memory_level) const
Definition: Execute.cpp:594
#define CHECK(condition)
Definition: Logger.h:187
static std::shared_ptr< BaselineJoinHashTable > getInstance(const std::shared_ptr< Analyzer::BinOper > condition, const std::vector< InputTableInfo > &query_infos, const Data_Namespace::MemoryLevel memory_level, const HashType preferred_hash_type, const int device_count, ColumnCacheMap &column_map, Executor *executor)
+ Here is the call graph for this function:

◆ buildIsDeletedCb()

std::function< llvm::Value *(const std::vector< llvm::Value * > &, llvm::Value *)> Executor::buildIsDeletedCb ( const RelAlgExecutionUnit ra_exe_unit,
const size_t  level_idx,
const CompilationOptions co 
)
private

Definition at line 328 of file IRCodegen.cpp.

References CodeGenerator::cgen_state_, CHECK, CHECK_LT, CodeGenerator::codegen(), CgenState::context_, RelAlgExecutionUnit::input_descs, CgenState::ir_builder_, CgenState::llBool(), CgenState::llInt(), CgenState::row_func_, TABLE, and CodeGenerator::toBool().

330  {
331  CHECK_LT(level_idx + 1, ra_exe_unit.input_descs.size());
332  const auto input_desc = ra_exe_unit.input_descs[level_idx + 1];
333  if (input_desc.getSourceType() != InputSourceType::TABLE) {
334  return nullptr;
335  }
336  const auto td = catalog_->getMetadataForTable(input_desc.getTableId());
337  CHECK(td);
338  const auto deleted_cd = catalog_->getDeletedColumnIfRowsDeleted(td);
339  if (!deleted_cd) {
340  return nullptr;
341  }
342  CHECK(deleted_cd->columnType.is_boolean());
343  const auto deleted_expr = makeExpr<Analyzer::ColumnVar>(deleted_cd->columnType,
344  input_desc.getTableId(),
345  deleted_cd->columnId,
346  input_desc.getNestLevel());
347  return [this, deleted_expr, level_idx, &co](const std::vector<llvm::Value*>& prev_iters,
348  llvm::Value* have_more_inner_rows) {
349  const auto matching_row_index = addJoinLoopIterator(prev_iters, level_idx + 1);
350  // Avoid fetching the deleted column from a position which is not valid.
351  // An invalid position can be returned by a one to one hash lookup (negative)
352  // or at the end of iteration over a set of matching values.
353  llvm::Value* is_valid_it{nullptr};
354  if (have_more_inner_rows) {
355  is_valid_it = have_more_inner_rows;
356  } else {
357  is_valid_it = cgen_state_->ir_builder_.CreateICmp(
358  llvm::ICmpInst::ICMP_SGE, matching_row_index, cgen_state_->llInt<int64_t>(0));
359  }
360  const auto it_valid_bb = llvm::BasicBlock::Create(
361  cgen_state_->context_, "it_valid", cgen_state_->row_func_);
362  const auto it_not_valid_bb = llvm::BasicBlock::Create(
363  cgen_state_->context_, "it_not_valid", cgen_state_->row_func_);
364  cgen_state_->ir_builder_.CreateCondBr(is_valid_it, it_valid_bb, it_not_valid_bb);
365  const auto row_is_deleted_bb = llvm::BasicBlock::Create(
366  cgen_state_->context_, "row_is_deleted", cgen_state_->row_func_);
367  cgen_state_->ir_builder_.SetInsertPoint(it_valid_bb);
368  CodeGenerator code_generator(this);
369  const auto row_is_deleted = code_generator.toBool(
370  code_generator.codegen(deleted_expr.get(), true, co).front());
371  cgen_state_->ir_builder_.CreateBr(row_is_deleted_bb);
372  cgen_state_->ir_builder_.SetInsertPoint(it_not_valid_bb);
373  const auto row_is_deleted_default = cgen_state_->llBool(false);
374  cgen_state_->ir_builder_.CreateBr(row_is_deleted_bb);
375  cgen_state_->ir_builder_.SetInsertPoint(row_is_deleted_bb);
376  auto row_is_deleted_or_default =
377  cgen_state_->ir_builder_.CreatePHI(row_is_deleted->getType(), 2);
378  row_is_deleted_or_default->addIncoming(row_is_deleted, it_valid_bb);
379  row_is_deleted_or_default->addIncoming(row_is_deleted_default, it_not_valid_bb);
380  return row_is_deleted_or_default;
381  };
382 }
const TableDescriptor * getMetadataForTable(const std::string &tableName, const bool populateFragmenter=true) const
Returns a pointer to a const TableDescriptor struct matching the provided tableName.
const ColumnDescriptor * getDeletedColumnIfRowsDeleted(const TableDescriptor *td) const
Definition: Catalog.cpp:2202
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:944
const std::vector< InputDescriptor > input_descs
const Catalog_Namespace::Catalog * catalog_
Definition: Execute.h:990
#define CHECK_LT(x, y)
Definition: Logger.h:197
#define CHECK(condition)
Definition: Logger.h:187
llvm::Value * addJoinLoopIterator(const std::vector< llvm::Value *> &prev_iters, const size_t level_idx)
Definition: IRCodegen.cpp:430
+ Here is the call graph for this function:

◆ buildJoinLoops()

std::vector< JoinLoop > Executor::buildJoinLoops ( RelAlgExecutionUnit ra_exe_unit,
const CompilationOptions co,
const ExecutionOptions eo,
const std::vector< InputTableInfo > &  query_infos,
ColumnCacheMap column_cache 
)
private

Definition at line 206 of file IRCodegen.cpp.

References CodeGenerator::cgen_state_, CHECK, anonymous_namespace{IRCodegen.cpp}::check_if_loop_join_is_allowed(), CHECK_LT, CodeGenerator::codegen(), get_arg_by_name(), INJECT_TIMER, CgenState::ir_builder_, join(), RelAlgExecutionUnit::join_quals, LEFT, CgenState::llBool(), CgenState::llInt(), JoinHashTableInterface::OneToOne, CgenState::outer_join_match_found_per_level_, CgenState::row_func_, Set, Singleton, CodeGenerator::toBool(), UpperBound, and VLOG.

211  {
213  std::vector<JoinLoop> join_loops;
214  for (size_t level_idx = 0, current_hash_table_idx = 0;
215  level_idx < ra_exe_unit.join_quals.size();
216  ++level_idx) {
217  const auto& current_level_join_conditions = ra_exe_unit.join_quals[level_idx];
218  std::vector<std::string> fail_reasons;
219  const auto current_level_hash_table =
220  buildCurrentLevelHashTable(current_level_join_conditions,
221  ra_exe_unit,
222  co,
223  query_infos,
224  column_cache,
225  fail_reasons);
226  const auto found_outer_join_matches_cb =
227  [this, level_idx](llvm::Value* found_outer_join_matches) {
228  CHECK_LT(level_idx, cgen_state_->outer_join_match_found_per_level_.size());
229  CHECK(!cgen_state_->outer_join_match_found_per_level_[level_idx]);
230  cgen_state_->outer_join_match_found_per_level_[level_idx] =
231  found_outer_join_matches;
232  };
233  const auto is_deleted_cb = buildIsDeletedCb(ra_exe_unit, level_idx, co);
234  if (current_level_hash_table) {
235  if (current_level_hash_table->getHashType() == JoinHashTable::HashType::OneToOne) {
236  join_loops.emplace_back(
238  current_level_join_conditions.type,
239  [this, current_hash_table_idx, level_idx, current_level_hash_table, &co](
240  const std::vector<llvm::Value*>& prev_iters) {
241  addJoinLoopIterator(prev_iters, level_idx);
242  JoinLoopDomain domain{{0}};
243  domain.slot_lookup_result =
244  current_level_hash_table->codegenSlot(co, current_hash_table_idx);
245  return domain;
246  },
247  nullptr,
248  current_level_join_conditions.type == JoinType::LEFT
249  ? std::function<void(llvm::Value*)>(found_outer_join_matches_cb)
250  : nullptr,
251  is_deleted_cb);
252  } else {
253  join_loops.emplace_back(
255  current_level_join_conditions.type,
256  [this, current_hash_table_idx, level_idx, current_level_hash_table, &co](
257  const std::vector<llvm::Value*>& prev_iters) {
258  addJoinLoopIterator(prev_iters, level_idx);
259  JoinLoopDomain domain{{0}};
260  const auto matching_set = current_level_hash_table->codegenMatchingSet(
261  co, current_hash_table_idx);
262  domain.values_buffer = matching_set.elements;
263  domain.element_count = matching_set.count;
264  return domain;
265  },
266  nullptr,
267  current_level_join_conditions.type == JoinType::LEFT
268  ? std::function<void(llvm::Value*)>(found_outer_join_matches_cb)
269  : nullptr,
270  is_deleted_cb);
271  }
272  ++current_hash_table_idx;
273  } else {
274  const auto fail_reasons_str = current_level_join_conditions.quals.empty()
275  ? "No equijoin expression found"
276  : boost::algorithm::join(fail_reasons, " | ");
278  ra_exe_unit, eo, query_infos, level_idx, fail_reasons_str);
279  // Callback provided to the `JoinLoop` framework to evaluate the (outer) join
280  // condition.
281  VLOG(1) << "Unable to build hash table, falling back to loop join: "
282  << fail_reasons_str;
283  const auto outer_join_condition_cb =
284  [this, level_idx, &co, &current_level_join_conditions](
285  const std::vector<llvm::Value*>& prev_iters) {
286  // The values generated for the match path don't dominate all uses
287  // since on the non-match path nulls are generated. Reset the cache
288  // once the condition is generated to avoid incorrect reuse.
289  FetchCacheAnchor anchor(cgen_state_.get());
290  addJoinLoopIterator(prev_iters, level_idx + 1);
291  llvm::Value* left_join_cond = cgen_state_->llBool(true);
292  CodeGenerator code_generator(this);
293  for (auto expr : current_level_join_conditions.quals) {
294  left_join_cond = cgen_state_->ir_builder_.CreateAnd(
295  left_join_cond,
296  code_generator.toBool(
297  code_generator.codegen(expr.get(), true, co).front()));
298  }
299  return left_join_cond;
300  };
301  join_loops.emplace_back(
303  current_level_join_conditions.type,
304  [this, level_idx](const std::vector<llvm::Value*>& prev_iters) {
305  addJoinLoopIterator(prev_iters, level_idx);
306  JoinLoopDomain domain{{0}};
307  const auto rows_per_scan_ptr = cgen_state_->ir_builder_.CreateGEP(
308  get_arg_by_name(cgen_state_->row_func_, "num_rows_per_scan"),
309  cgen_state_->llInt(int32_t(level_idx + 1)));
310  domain.upper_bound = cgen_state_->ir_builder_.CreateLoad(rows_per_scan_ptr,
311  "num_rows_per_scan");
312  return domain;
313  },
314  current_level_join_conditions.type == JoinType::LEFT
315  ? std::function<llvm::Value*(const std::vector<llvm::Value*>&)>(
316  outer_join_condition_cb)
317  : nullptr,
318  current_level_join_conditions.type == JoinType::LEFT
319  ? std::function<void(llvm::Value*)>(found_outer_join_matches_cb)
320  : nullptr,
321  is_deleted_cb);
322  }
323  }
324  return join_loops;
325 }
std::string join(T const &container, std::string const &delim)
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:944
llvm::Value * get_arg_by_name(llvm::Function *func, const std::string &name)
Definition: Execute.h:114
#define INJECT_TIMER(DESC)
Definition: measure.h:91
const JoinQualsPerNestingLevel join_quals
#define CHECK_LT(x, y)
Definition: Logger.h:197
#define CHECK(condition)
Definition: Logger.h:187
void check_if_loop_join_is_allowed(RelAlgExecutionUnit &ra_exe_unit, const ExecutionOptions &eo, const std::vector< InputTableInfo > &query_infos, const size_t level_idx, const std::string &fail_reason)
Definition: IRCodegen.cpp:184
std::vector< JoinLoop > buildJoinLoops(RelAlgExecutionUnit &ra_exe_unit, const CompilationOptions &co, const ExecutionOptions &eo, const std::vector< InputTableInfo > &query_infos, ColumnCacheMap &column_cache)
Definition: IRCodegen.cpp:206
std::function< llvm::Value *(const std::vector< llvm::Value * > &, llvm::Value *)> buildIsDeletedCb(const RelAlgExecutionUnit &ra_exe_unit, const size_t level_idx, const CompilationOptions &co)
Definition: IRCodegen.cpp:328
llvm::Value * addJoinLoopIterator(const std::vector< llvm::Value *> &prev_iters, const size_t level_idx)
Definition: IRCodegen.cpp:430
#define VLOG(n)
Definition: Logger.h:277
std::shared_ptr< JoinHashTableInterface > buildCurrentLevelHashTable(const JoinCondition &current_level_join_conditions, RelAlgExecutionUnit &ra_exe_unit, const CompilationOptions &co, const std::vector< InputTableInfo > &query_infos, ColumnCacheMap &column_cache, std::vector< std::string > &fail_reasons)
Definition: IRCodegen.cpp:384
+ Here is the call graph for this function:

◆ buildSelectedFragsMapping()

void Executor::buildSelectedFragsMapping ( std::vector< std::vector< size_t >> &  selected_fragments_crossjoin,
std::vector< size_t > &  local_col_to_frag_pos,
const std::list< std::shared_ptr< const InputColDescriptor >> &  col_global_ids,
const FragmentsList selected_fragments,
const RelAlgExecutionUnit ra_exe_unit 
)
private

Definition at line 1989 of file Execute.cpp.

References CHECK, CHECK_EQ, CHECK_LT, getFragmentCount(), RelAlgExecutionUnit::input_descs, and plan_state_.

Referenced by fetchChunks().

1994  {
1995  local_col_to_frag_pos.resize(plan_state_->global_to_local_col_ids_.size());
1996  size_t frag_pos{0};
1997  const auto& input_descs = ra_exe_unit.input_descs;
1998  for (size_t scan_idx = 0; scan_idx < input_descs.size(); ++scan_idx) {
1999  const int table_id = input_descs[scan_idx].getTableId();
2000  CHECK_EQ(selected_fragments[scan_idx].table_id, table_id);
2001  selected_fragments_crossjoin.push_back(
2002  getFragmentCount(selected_fragments, scan_idx, ra_exe_unit));
2003  for (const auto& col_id : col_global_ids) {
2004  CHECK(col_id);
2005  const auto& input_desc = col_id->getScanDesc();
2006  if (input_desc.getTableId() != table_id ||
2007  input_desc.getNestLevel() != static_cast<int>(scan_idx)) {
2008  continue;
2009  }
2010  auto it = plan_state_->global_to_local_col_ids_.find(*col_id);
2011  CHECK(it != plan_state_->global_to_local_col_ids_.end());
2012  CHECK_LT(static_cast<size_t>(it->second),
2013  plan_state_->global_to_local_col_ids_.size());
2014  local_col_to_frag_pos[it->second] = frag_pos;
2015  }
2016  ++frag_pos;
2017  }
2018 }
#define CHECK_EQ(x, y)
Definition: Logger.h:195
const std::vector< InputDescriptor > input_descs
std::unique_ptr< PlanState > plan_state_
Definition: Execute.h:959
#define CHECK_LT(x, y)
Definition: Logger.h:197
std::vector< size_t > getFragmentCount(const FragmentsList &selected_fragments, const size_t scan_idx, const RelAlgExecutionUnit &ra_exe_unit)
Definition: Execute.cpp:1975
#define CHECK(condition)
Definition: Logger.h:187
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ castToFP()

llvm::Value * Executor::castToFP ( llvm::Value *  val)
private

Definition at line 2855 of file Execute.cpp.

References cgen_state_, and CHECK.

2855  {
2856  if (!val->getType()->isIntegerTy()) {
2857  return val;
2858  }
2859 
2860  auto val_width = static_cast<llvm::IntegerType*>(val->getType())->getBitWidth();
2861  llvm::Type* dest_ty{nullptr};
2862  switch (val_width) {
2863  case 32:
2864  dest_ty = llvm::Type::getFloatTy(cgen_state_->context_);
2865  break;
2866  case 64:
2867  dest_ty = llvm::Type::getDoubleTy(cgen_state_->context_);
2868  break;
2869  default:
2870  CHECK(false);
2871  }
2872  return cgen_state_->ir_builder_.CreateSIToFP(val, dest_ty);
2873 }
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:944
#define CHECK(condition)
Definition: Logger.h:187

◆ castToIntPtrTyIn()

llvm::Value * Executor::castToIntPtrTyIn ( llvm::Value *  val,
const size_t  bit_width 
)
private

Definition at line 2875 of file Execute.cpp.

References cgen_state_, CHECK, CHECK_LT, and get_int_type().

2875  {
2876  CHECK(val->getType()->isPointerTy());
2877 
2878  const auto val_ptr_type = static_cast<llvm::PointerType*>(val->getType());
2879  const auto val_type = val_ptr_type->getElementType();
2880  size_t val_width = 0;
2881  if (val_type->isIntegerTy()) {
2882  val_width = val_type->getIntegerBitWidth();
2883  } else {
2884  if (val_type->isFloatTy()) {
2885  val_width = 32;
2886  } else {
2887  CHECK(val_type->isDoubleTy());
2888  val_width = 64;
2889  }
2890  }
2891  CHECK_LT(size_t(0), val_width);
2892  if (bitWidth == val_width) {
2893  return val;
2894  }
2895  return cgen_state_->ir_builder_.CreateBitCast(
2896  val, llvm::PointerType::get(get_int_type(bitWidth, cgen_state_->context_), 0));
2897 }
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:944
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
#define CHECK_LT(x, y)
Definition: Logger.h:197
#define CHECK(condition)
Definition: Logger.h:187
+ Here is the call graph for this function:

◆ clearMemory()

void Executor::clearMemory ( const Data_Namespace::MemoryLevel  memory_level)
static

Definition at line 155 of file Execute.cpp.

References Data_Namespace::DataMgr::clearMemory(), Data_Namespace::CPU_LEVEL, execute_mutex_, Catalog_Namespace::SysCatalog::getDataMgr(), Data_Namespace::GPU_LEVEL, Catalog_Namespace::SysCatalog::instance(), and CacheInvalidator< CACHE_HOLDING_TYPES >::invalidateCaches().

Referenced by MapDHandler::clear_cpu_memory(), MapDHandler::clear_gpu_memory(), QueryRunner::QueryRunner::clearCpuMemory(), and QueryRunner::QueryRunner::clearGpuMemory().

155  {
156  switch (memory_level) {
159  std::lock_guard<std::mutex> flush_lock(
160  execute_mutex_); // Don't flush memory while queries are running
161 
163  if (memory_level == Data_Namespace::MemoryLevel::CPU_LEVEL) {
164  // The hash table cache uses CPU memory not managed by the buffer manager. In the
165  // future, we should manage these allocations with the buffer manager directly.
166  // For now, assume the user wants to purge the hash table cache when they clear
167  // CPU memory (currently used in ExecuteTest to lower memory pressure)
169  }
170  break;
171  }
172  default: {
173  throw std::runtime_error(
174  "Clearing memory levels other than the CPU level or GPU level is not "
175  "supported.");
176  }
177  }
178 }
Data_Namespace::DataMgr & getDataMgr() const
Definition: SysCatalog.h:173
void clearMemory(const MemoryLevel memLevel)
Definition: DataMgr.cpp:300
static void invalidateCaches()
static SysCatalog & instance()
Definition: SysCatalog.h:240
static std::mutex execute_mutex_
Definition: Execute.h:1001
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ clearMetaInfoCache()

void Executor::clearMetaInfoCache ( )
private

Definition at line 319 of file Execute.cpp.

References agg_col_range_cache_, StringDictionaryGenerations::clear(), TableGenerations::clear(), AggregatedColRange::clear(), InputTableInfoCache::clear(), input_table_info_cache_, string_dictionary_generations_, and table_generations_.

319  {
324 }
AggregatedColRange agg_col_range_cache_
Definition: Execute.h:994
StringDictionaryGenerations string_dictionary_generations_
Definition: Execute.h:995
InputTableInfoCache input_table_info_cache_
Definition: Execute.h:993
TableGenerations table_generations_
Definition: Execute.h:996
+ Here is the call graph for this function:

◆ codegenAggregateWindowState()

llvm::Value * Executor::codegenAggregateWindowState ( )
private

Definition at line 318 of file WindowFunctionIR.cpp.

References AVG, COUNT, anonymous_namespace{WindowFunctionIR.cpp}::get_adjusted_window_type_info(), get_int_type(), WindowProjectNodeContext::getActiveWindowFunctionContext(), Analyzer::WindowFunction::getKind(), kDECIMAL, kDOUBLE, and kFLOAT.

318  {
319  const auto pi32_type =
320  llvm::PointerType::get(get_int_type(32, cgen_state_->context_), 0);
321  const auto pi64_type =
322  llvm::PointerType::get(get_int_type(64, cgen_state_->context_), 0);
323  const auto window_func_context =
325  const Analyzer::WindowFunction* window_func = window_func_context->getWindowFunction();
326  const auto window_func_ti = get_adjusted_window_type_info(window_func);
327  const auto aggregate_state_type =
328  window_func_ti.get_type() == kFLOAT ? pi32_type : pi64_type;
329  auto aggregate_state = aggregateWindowStatePtr();
330  if (window_func->getKind() == SqlWindowFunctionKind::AVG) {
331  const auto aggregate_state_count_i64 = cgen_state_->llInt(
332  reinterpret_cast<const int64_t>(window_func_context->aggregateStateCount()));
333  auto aggregate_state_count = cgen_state_->ir_builder_.CreateIntToPtr(
334  aggregate_state_count_i64, aggregate_state_type);
335  const auto double_null_lv = cgen_state_->inlineFpNull(SQLTypeInfo(kDOUBLE));
336  switch (window_func_ti.get_type()) {
337  case kFLOAT: {
338  return cgen_state_->emitCall(
339  "load_avg_float", {aggregate_state, aggregate_state_count, double_null_lv});
340  }
341  case kDOUBLE: {
342  return cgen_state_->emitCall(
343  "load_avg_double", {aggregate_state, aggregate_state_count, double_null_lv});
344  }
345  case kDECIMAL: {
346  return cgen_state_->emitCall(
347  "load_avg_decimal",
348  {aggregate_state,
349  aggregate_state_count,
350  double_null_lv,
351  cgen_state_->llInt<int32_t>(window_func_ti.get_scale())});
352  }
353  default: {
354  return cgen_state_->emitCall(
355  "load_avg_int", {aggregate_state, aggregate_state_count, double_null_lv});
356  }
357  }
358  }
359  if (window_func->getKind() == SqlWindowFunctionKind::COUNT) {
360  return cgen_state_->ir_builder_.CreateLoad(aggregate_state);
361  }
362  switch (window_func_ti.get_type()) {
363  case kFLOAT: {
364  return cgen_state_->emitCall("load_float", {aggregate_state});
365  }
366  case kDOUBLE: {
367  return cgen_state_->emitCall("load_double", {aggregate_state});
368  }
369  default: {
370  return cgen_state_->ir_builder_.CreateLoad(aggregate_state);
371  }
372  }
373 }
llvm::Value * aggregateWindowStatePtr()
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:944
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
SQLTypeInfoCore< ArrayContextTypeSizer, ExecutorTypePackaging, DateTimeFacilities > SQLTypeInfo
Definition: sqltypes.h:823
SqlWindowFunctionKind getKind() const
Definition: Analyzer.h:1339
static WindowFunctionContext * getActiveWindowFunctionContext()
SQLTypeInfo get_adjusted_window_type_info(const Analyzer::WindowFunction *window_func)
+ Here is the call graph for this function:

◆ codegenJoinLoops()

void Executor::codegenJoinLoops ( const std::vector< JoinLoop > &  join_loops,
const RelAlgExecutionUnit ra_exe_unit,
GroupByAndAggregate group_by_and_aggregate,
llvm::Function *  query_func,
llvm::BasicBlock *  entry_bb,
const QueryMemoryDescriptor query_mem_desc,
const CompilationOptions co,
const ExecutionOptions eo 
)
private

Definition at line 447 of file IRCodegen.cpp.

References CodeGenerator::cgen_state_, JoinLoop::codegen(), CgenState::context_, CompilationOptions::device_type_, CgenState::ir_builder_, CgenState::llInt(), CgenState::needs_error_check_, CodeGenerator::posArg(), CgenState::row_func_, and ExecutionOptions::with_dynamic_watchdog.

454  {
455  const auto exit_bb =
456  llvm::BasicBlock::Create(cgen_state_->context_, "exit", cgen_state_->row_func_);
457  cgen_state_->ir_builder_.SetInsertPoint(exit_bb);
458  cgen_state_->ir_builder_.CreateRet(cgen_state_->llInt<int32_t>(0));
459  cgen_state_->ir_builder_.SetInsertPoint(entry_bb);
460  CodeGenerator code_generator(this);
461  const auto loops_entry_bb = JoinLoop::codegen(
462  join_loops,
463  [this,
464  query_func,
465  &query_mem_desc,
466  &co,
467  &eo,
468  &group_by_and_aggregate,
469  &join_loops,
470  &ra_exe_unit](const std::vector<llvm::Value*>& prev_iters) {
471  addJoinLoopIterator(prev_iters, join_loops.size());
472  auto& builder = cgen_state_->ir_builder_;
473  const auto loop_body_bb = llvm::BasicBlock::Create(
474  builder.getContext(), "loop_body", builder.GetInsertBlock()->getParent());
475  builder.SetInsertPoint(loop_body_bb);
476  const bool can_return_error =
477  compileBody(ra_exe_unit, group_by_and_aggregate, query_mem_desc, co);
478  if (can_return_error || cgen_state_->needs_error_check_ ||
481  query_func, eo.with_dynamic_watchdog, co.device_type_);
482  }
483  return loop_body_bb;
484  },
485  code_generator.posArg(nullptr),
486  exit_bb,
487  cgen_state_->ir_builder_);
488  cgen_state_->ir_builder_.SetInsertPoint(entry_bb);
489  cgen_state_->ir_builder_.CreateBr(loops_entry_bb);
490 }
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:944
static llvm::BasicBlock * codegen(const std::vector< JoinLoop > &join_loops, const std::function< llvm::BasicBlock *(const std::vector< llvm::Value *> &)> &body_codegen, llvm::Value *outer_iter, llvm::BasicBlock *exit_bb, llvm::IRBuilder<> &builder)
Definition: JoinLoop.cpp:45
bool compileBody(const RelAlgExecutionUnit &ra_exe_unit, GroupByAndAggregate &group_by_and_aggregate, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co)
const bool with_dynamic_watchdog
ExecutorDeviceType device_type_
void createErrorCheckControlFlow(llvm::Function *query_func, bool run_with_dynamic_watchdog, ExecutorDeviceType device_type)
llvm::Value * addJoinLoopIterator(const std::vector< llvm::Value *> &prev_iters, const size_t level_idx)
Definition: IRCodegen.cpp:430
+ Here is the call graph for this function:

◆ codegenSkipDeletedOuterTableRow()

llvm::BasicBlock * Executor::codegenSkipDeletedOuterTableRow ( const RelAlgExecutionUnit ra_exe_unit,
const CompilationOptions co 
)
private

Definition at line 1798 of file NativeCodegen.cpp.

References CHECK, RelAlgExecutionUnit::input_descs, and TABLE.

1800  {
1801  CHECK(!ra_exe_unit.input_descs.empty());
1802  const auto& outer_input_desc = ra_exe_unit.input_descs[0];
1803  if (outer_input_desc.getSourceType() != InputSourceType::TABLE) {
1804  return nullptr;
1805  }
1806  const auto td = catalog_->getMetadataForTable(outer_input_desc.getTableId());
1807  CHECK(td);
1808  const auto deleted_cd = catalog_->getDeletedColumnIfRowsDeleted(td);
1809  if (!deleted_cd) {
1810  return nullptr;
1811  }
1812  CHECK(deleted_cd->columnType.is_boolean());
1813  const auto deleted_expr =
1814  makeExpr<Analyzer::ColumnVar>(deleted_cd->columnType,
1815  outer_input_desc.getTableId(),
1816  deleted_cd->columnId,
1817  outer_input_desc.getNestLevel());
1818  CodeGenerator code_generator(this);
1819  const auto is_deleted =
1820  code_generator.toBool(code_generator.codegen(deleted_expr.get(), true, co).front());
1821  const auto is_deleted_bb = llvm::BasicBlock::Create(
1822  cgen_state_->context_, "is_deleted", cgen_state_->row_func_);
1823  llvm::BasicBlock* bb = llvm::BasicBlock::Create(
1824  cgen_state_->context_, "is_not_deleted", cgen_state_->row_func_);
1825  cgen_state_->ir_builder_.CreateCondBr(is_deleted, is_deleted_bb, bb);
1826  cgen_state_->ir_builder_.SetInsertPoint(is_deleted_bb);
1827  cgen_state_->ir_builder_.CreateRet(cgen_state_->llInt<int32_t>(0));
1828  cgen_state_->ir_builder_.SetInsertPoint(bb);
1829  return bb;
1830 }
const TableDescriptor * getMetadataForTable(const std::string &tableName, const bool populateFragmenter=true) const
Returns a pointer to a const TableDescriptor struct matching the provided tableName.
const ColumnDescriptor * getDeletedColumnIfRowsDeleted(const TableDescriptor *td) const
Definition: Catalog.cpp:2202
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:944
const std::vector< InputDescriptor > input_descs
const Catalog_Namespace::Catalog * catalog_
Definition: Execute.h:990
#define CHECK(condition)
Definition: Logger.h:187

◆ codegenWindowAvgEpilogue()

void Executor::codegenWindowAvgEpilogue ( llvm::Value *  crt_val,
llvm::Value *  window_func_null_val,
llvm::Value *  multiplicity_lv 
)
private

Definition at line 282 of file WindowFunctionIR.cpp.

References anonymous_namespace{WindowFunctionIR.cpp}::get_adjusted_window_type_info(), get_int_type(), WindowProjectNodeContext::getActiveWindowFunctionContext(), kDOUBLE, and kFLOAT.

284  {
285  const auto window_func_context =
287  const auto window_func = window_func_context->getWindowFunction();
288  const auto window_func_ti = get_adjusted_window_type_info(window_func);
289  const auto pi32_type =
290  llvm::PointerType::get(get_int_type(32, cgen_state_->context_), 0);
291  const auto pi64_type =
292  llvm::PointerType::get(get_int_type(64, cgen_state_->context_), 0);
293  const auto aggregate_state_type =
294  window_func_ti.get_type() == kFLOAT ? pi32_type : pi64_type;
295  const auto aggregate_state_count_i64 = cgen_state_->llInt(
296  reinterpret_cast<const int64_t>(window_func_context->aggregateStateCount()));
297  auto aggregate_state_count = cgen_state_->ir_builder_.CreateIntToPtr(
298  aggregate_state_count_i64, aggregate_state_type);
299  std::string agg_count_func_name = "agg_count";
300  switch (window_func_ti.get_type()) {
301  case kFLOAT: {
302  agg_count_func_name += "_float";
303  break;
304  }
305  case kDOUBLE: {
306  agg_count_func_name += "_double";
307  break;
308  }
309  default: {
310  break;
311  }
312  }
313  agg_count_func_name += "_skip_val";
314  cgen_state_->emitCall(agg_count_func_name,
315  {aggregate_state_count, crt_val, window_func_null_val});
316 }
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:944
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
static WindowFunctionContext * getActiveWindowFunctionContext()
SQLTypeInfo get_adjusted_window_type_info(const Analyzer::WindowFunction *window_func)
+ Here is the call graph for this function:

◆ codegenWindowFunction()

llvm::Value * Executor::codegenWindowFunction ( const size_t  target_index,
const CompilationOptions co 
)
private

Definition at line 21 of file WindowFunctionIR.cpp.

References WindowProjectNodeContext::activateWindowFunctionContext(), run-benchmark-import::args, AVG, CHECK, CHECK_EQ, CodeGenerator::codegen(), COUNT, CUME_DIST, DENSE_RANK, logger::FATAL, FIRST_VALUE, WindowProjectNodeContext::get(), WindowFunctionContext::getWindowFunction(), LAG, LAST_VALUE, LEAD, LOG, MAX, MIN, NTILE, PERCENT_RANK, CodeGenerator::posArg(), RANK, ROW_NUMBER, and SUM.

22  {
23  CodeGenerator code_generator(this);
24  const auto window_func_context =
26  const auto window_func = window_func_context->getWindowFunction();
27  switch (window_func->getKind()) {
32  return cgen_state_->emitCall("row_number_window_func",
33  {cgen_state_->llInt(reinterpret_cast<const int64_t>(
34  window_func_context->output())),
35  code_generator.posArg(nullptr)});
36  }
39  return cgen_state_->emitCall("percent_window_func",
40  {cgen_state_->llInt(reinterpret_cast<const int64_t>(
41  window_func_context->output())),
42  code_generator.posArg(nullptr)});
43  }
49  const auto& args = window_func->getArgs();
50  CHECK(!args.empty());
51  const auto arg_lvs = code_generator.codegen(args.front().get(), true, co);
52  CHECK_EQ(arg_lvs.size(), size_t(1));
53  return arg_lvs.front();
54  }
61  }
62  default: {
63  LOG(FATAL) << "Invalid window function kind";
64  }
65  }
66  return nullptr;
67 }
#define CHECK_EQ(x, y)
Definition: Logger.h:195
#define LOG(tag)
Definition: Logger.h:182
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:944
const Analyzer::WindowFunction * getWindowFunction() const
static const WindowProjectNodeContext * get()
llvm::Value * codegenWindowFunctionAggregate(const CompilationOptions &co)
const WindowFunctionContext * activateWindowFunctionContext(const size_t target_index) const
#define CHECK(condition)
Definition: Logger.h:187
+ Here is the call graph for this function:

◆ codegenWindowFunctionAggregate()

llvm::Value * Executor::codegenWindowFunctionAggregate ( const CompilationOptions co)
private

Definition at line 137 of file WindowFunctionIR.cpp.

References AVG, CHECK, WindowProjectNodeContext::get(), get_int_type(), and WindowProjectNodeContext::getActiveWindowFunctionContext().

137  {
138  const auto reset_state_false_bb = codegenWindowResetStateControlFlow();
139  auto aggregate_state = aggregateWindowStatePtr();
140  llvm::Value* aggregate_state_count = nullptr;
141  const auto window_func_context =
143  const auto window_func = window_func_context->getWindowFunction();
144  if (window_func->getKind() == SqlWindowFunctionKind::AVG) {
145  const auto aggregate_state_count_i64 = cgen_state_->llInt(
146  reinterpret_cast<const int64_t>(window_func_context->aggregateStateCount()));
147  const auto pi64_type =
148  llvm::PointerType::get(get_int_type(64, cgen_state_->context_), 0);
149  aggregate_state_count =
150  cgen_state_->ir_builder_.CreateIntToPtr(aggregate_state_count_i64, pi64_type);
151  }
152  codegenWindowFunctionStateInit(aggregate_state);
153  if (window_func->getKind() == SqlWindowFunctionKind::AVG) {
154  const auto count_zero = cgen_state_->llInt(int64_t(0));
155  cgen_state_->emitCall("agg_id", {aggregate_state_count, count_zero});
156  }
157  cgen_state_->ir_builder_.CreateBr(reset_state_false_bb);
158  cgen_state_->ir_builder_.SetInsertPoint(reset_state_false_bb);
160  return codegenWindowFunctionAggregateCalls(aggregate_state, co);
161 }
llvm::Value * aggregateWindowStatePtr()
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:944
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
static const WindowProjectNodeContext * get()
void codegenWindowFunctionStateInit(llvm::Value *aggregate_state)
#define CHECK(condition)
Definition: Logger.h:187
llvm::Value * codegenWindowFunctionAggregateCalls(llvm::Value *aggregate_state, const CompilationOptions &co)
static WindowFunctionContext * getActiveWindowFunctionContext()
llvm::BasicBlock * codegenWindowResetStateControlFlow()
+ Here is the call graph for this function:

◆ codegenWindowFunctionAggregateCalls()

llvm::Value * Executor::codegenWindowFunctionAggregateCalls ( llvm::Value *  aggregate_state,
const CompilationOptions co 
)
private

Definition at line 240 of file WindowFunctionIR.cpp.

References run-benchmark-import::args, AVG, CHECK, CHECK_EQ, CodeGenerator::codegen(), CodeGenerator::codegenCastBetweenIntTypes(), COUNT, anonymous_namespace{WindowFunctionIR.cpp}::get_adjusted_window_type_info(), anonymous_namespace{WindowFunctionIR.cpp}::get_window_agg_name(), WindowProjectNodeContext::getActiveWindowFunctionContext(), kFLOAT, and SUM.

241  {
242  const auto window_func_context =
244  const auto window_func = window_func_context->getWindowFunction();
245  const auto window_func_ti = get_adjusted_window_type_info(window_func);
246  const auto window_func_null_val =
247  window_func_ti.is_fp()
248  ? cgen_state_->inlineFpNull(window_func_ti)
249  : cgen_state_->castToTypeIn(cgen_state_->inlineIntNull(window_func_ti), 64);
250  const auto& args = window_func->getArgs();
251  llvm::Value* crt_val;
252  if (args.empty()) {
253  CHECK(window_func->getKind() == SqlWindowFunctionKind::COUNT);
254  crt_val = cgen_state_->llInt(int64_t(1));
255  } else {
256  CodeGenerator code_generator(this);
257  const auto arg_lvs = code_generator.codegen(args.front().get(), true, co);
258  CHECK_EQ(arg_lvs.size(), size_t(1));
259  if (window_func->getKind() == SqlWindowFunctionKind::SUM && !window_func_ti.is_fp()) {
260  crt_val = code_generator.codegenCastBetweenIntTypes(
261  arg_lvs.front(), args.front()->get_type_info(), window_func_ti, false);
262  } else {
263  crt_val = window_func_ti.get_type() == kFLOAT
264  ? arg_lvs.front()
265  : cgen_state_->castToTypeIn(arg_lvs.front(), 64);
266  }
267  }
268  const auto agg_name = get_window_agg_name(window_func->getKind(), window_func_ti);
269  llvm::Value* multiplicity_lv = nullptr;
270  if (args.empty()) {
271  cgen_state_->emitCall(agg_name, {aggregate_state, crt_val});
272  } else {
273  cgen_state_->emitCall(agg_name + "_skip_val",
274  {aggregate_state, crt_val, window_func_null_val});
275  }
276  if (window_func->getKind() == SqlWindowFunctionKind::AVG) {
277  codegenWindowAvgEpilogue(crt_val, window_func_null_val, multiplicity_lv);
278  }
280 }
#define CHECK_EQ(x, y)
Definition: Logger.h:195
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:944
std::string get_window_agg_name(const SqlWindowFunctionKind kind, const SQLTypeInfo &window_func_ti)
void codegenWindowAvgEpilogue(llvm::Value *crt_val, llvm::Value *window_func_null_val, llvm::Value *multiplicity_lv)
llvm::Value * codegenAggregateWindowState()
#define CHECK(condition)
Definition: Logger.h:187
static WindowFunctionContext * getActiveWindowFunctionContext()
SQLTypeInfo get_adjusted_window_type_info(const Analyzer::WindowFunction *window_func)
+ Here is the call graph for this function:

◆ codegenWindowFunctionStateInit()

void Executor::codegenWindowFunctionStateInit ( llvm::Value *  aggregate_state)
private

Definition at line 191 of file WindowFunctionIR.cpp.

References COUNT, anonymous_namespace{WindowFunctionIR.cpp}::get_adjusted_window_type_info(), get_int_type(), WindowProjectNodeContext::getActiveWindowFunctionContext(), kDOUBLE, and kFLOAT.

191  {
192  const auto window_func_context =
194  const auto window_func = window_func_context->getWindowFunction();
195  const auto window_func_ti = get_adjusted_window_type_info(window_func);
196  const auto window_func_null_val =
197  window_func_ti.is_fp()
198  ? cgen_state_->inlineFpNull(window_func_ti)
199  : cgen_state_->castToTypeIn(cgen_state_->inlineIntNull(window_func_ti), 64);
200  llvm::Value* window_func_init_val;
201  if (window_func_context->getWindowFunction()->getKind() ==
203  switch (window_func_ti.get_type()) {
204  case kFLOAT: {
205  window_func_init_val = cgen_state_->llFp(float(0));
206  break;
207  }
208  case kDOUBLE: {
209  window_func_init_val = cgen_state_->llFp(double(0));
210  break;
211  }
212  default: {
213  window_func_init_val = cgen_state_->llInt(int64_t(0));
214  break;
215  }
216  }
217  } else {
218  window_func_init_val = window_func_null_val;
219  }
220  const auto pi32_type =
221  llvm::PointerType::get(get_int_type(32, cgen_state_->context_), 0);
222  switch (window_func_ti.get_type()) {
223  case kDOUBLE: {
224  cgen_state_->emitCall("agg_id_double", {aggregate_state, window_func_init_val});
225  break;
226  }
227  case kFLOAT: {
228  aggregate_state =
229  cgen_state_->ir_builder_.CreateBitCast(aggregate_state, pi32_type);
230  cgen_state_->emitCall("agg_id_float", {aggregate_state, window_func_init_val});
231  break;
232  }
233  default: {
234  cgen_state_->emitCall("agg_id", {aggregate_state, window_func_init_val});
235  break;
236  }
237  }
238 }
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:944
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
static WindowFunctionContext * getActiveWindowFunctionContext()
SQLTypeInfo get_adjusted_window_type_info(const Analyzer::WindowFunction *window_func)
+ Here is the call graph for this function:

◆ codegenWindowResetStateControlFlow()

llvm::BasicBlock * Executor::codegenWindowResetStateControlFlow ( )
private

Definition at line 163 of file WindowFunctionIR.cpp.

References WindowProjectNodeContext::getActiveWindowFunctionContext(), and CodeGenerator::toBool().

163  {
164  const auto window_func_context =
166  const auto bitset = cgen_state_->llInt(
167  reinterpret_cast<const int64_t>(window_func_context->partitionStart()));
168  const auto min_val = cgen_state_->llInt(int64_t(0));
169  const auto max_val = cgen_state_->llInt(window_func_context->elementCount() - 1);
170  const auto null_val = cgen_state_->llInt(inline_int_null_value<int64_t>());
171  const auto null_bool_val = cgen_state_->llInt<int8_t>(inline_int_null_value<int8_t>());
172  CodeGenerator code_generator(this);
173  const auto reset_state =
174  code_generator.toBool(cgen_state_->emitCall("bit_is_set",
175  {bitset,
176  code_generator.posArg(nullptr),
177  min_val,
178  max_val,
179  null_val,
180  null_bool_val}));
181  const auto reset_state_true_bb = llvm::BasicBlock::Create(
182  cgen_state_->context_, "reset_state.true", cgen_state_->row_func_);
183  const auto reset_state_false_bb = llvm::BasicBlock::Create(
184  cgen_state_->context_, "reset_state.false", cgen_state_->row_func_);
185  cgen_state_->ir_builder_.CreateCondBr(
186  reset_state, reset_state_true_bb, reset_state_false_bb);
187  cgen_state_->ir_builder_.SetInsertPoint(reset_state_true_bb);
188  return reset_state_false_bb;
189 }
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:944
static WindowFunctionContext * getActiveWindowFunctionContext()
+ Here is the call graph for this function:

◆ collectAllDeviceResults()

ResultSetPtr Executor::collectAllDeviceResults ( ExecutionDispatch execution_dispatch,
const std::vector< Analyzer::Expr *> &  target_exprs,
const QueryMemoryDescriptor query_mem_desc,
const ExecutorDeviceType  device_type,
std::shared_ptr< RowSetMemoryOwner row_set_mem_owner 
)
private

Definition at line 1464 of file Execute.cpp.

References anonymous_namespace{Execute.cpp}::build_row_for_empty_input(), catalog_, collectAllDeviceShardedTopResults(), Executor::ExecutionDispatch::getExecutionUnit(), Executor::ExecutionDispatch::getFragmentResults(), QueryMemoryDescriptor::getQueryDescriptionType(), GPU, NonGroupedAggregate, reduceMultiDeviceResults(), reduceSpeculativeTopN(), GroupByAndAggregate::shard_count_for_top_groups(), and use_speculative_top_n().

Referenced by executeWorkUnitImpl().

1469  {
1470  auto& result_per_device = execution_dispatch.getFragmentResults();
1471  if (result_per_device.empty() && query_mem_desc.getQueryDescriptionType() ==
1473  return build_row_for_empty_input(target_exprs, query_mem_desc, device_type);
1474  }
1475  const auto& ra_exe_unit = execution_dispatch.getExecutionUnit();
1476  if (use_speculative_top_n(ra_exe_unit, query_mem_desc)) {
1477  return reduceSpeculativeTopN(
1478  ra_exe_unit, result_per_device, row_set_mem_owner, query_mem_desc);
1479  }
1480  const auto shard_count =
1481  device_type == ExecutorDeviceType::GPU
1483  : 0;
1484 
1485  if (shard_count && !result_per_device.empty()) {
1486  return collectAllDeviceShardedTopResults(execution_dispatch);
1487  }
1488  return reduceMultiDeviceResults(
1489  ra_exe_unit, result_per_device, row_set_mem_owner, query_mem_desc);
1490 }
ResultSetPtr build_row_for_empty_input(const std::vector< Analyzer::Expr *> &target_exprs_in, const QueryMemoryDescriptor &query_mem_desc, const ExecutorDeviceType device_type)
Definition: Execute.cpp:1427
ResultSetPtr reduceMultiDeviceResults(const RelAlgExecutionUnit &, std::vector< std::pair< ResultSetPtr, std::vector< size_t >>> &all_fragment_results, std::shared_ptr< RowSetMemoryOwner >, const QueryMemoryDescriptor &) const
Definition: Execute.cpp:821
bool use_speculative_top_n(const RelAlgExecutionUnit &ra_exe_unit, const QueryMemoryDescriptor &query_mem_desc)
const Catalog_Namespace::Catalog * catalog_
Definition: Execute.h:990
ResultSetPtr reduceSpeculativeTopN(const RelAlgExecutionUnit &, std::vector< std::pair< ResultSetPtr, std::vector< size_t >>> &all_fragment_results, std::shared_ptr< RowSetMemoryOwner >, const QueryMemoryDescriptor &) const
Definition: Execute.cpp:907
ResultSetPtr collectAllDeviceShardedTopResults(ExecutionDispatch &execution_dispatch) const
Definition: Execute.cpp:1495
QueryDescriptionType getQueryDescriptionType() const
static size_t shard_count_for_top_groups(const RelAlgExecutionUnit &ra_exe_unit, const Catalog_Namespace::Catalog &catalog)
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ collectAllDeviceShardedTopResults()

ResultSetPtr Executor::collectAllDeviceShardedTopResults ( ExecutionDispatch execution_dispatch) const
private

Definition at line 1495 of file Execute.cpp.

References CHECK, CHECK_EQ, CHECK_LE, Executor::ExecutionDispatch::getExecutionUnit(), Executor::ExecutionDispatch::getFragmentResults(), and run-benchmark-import::result.

Referenced by collectAllDeviceResults().

1496  {
1497  const auto& ra_exe_unit = execution_dispatch.getExecutionUnit();
1498  auto& result_per_device = execution_dispatch.getFragmentResults();
1499  const auto first_result_set = result_per_device.front().first;
1500  CHECK(first_result_set);
1501  auto top_query_mem_desc = first_result_set->getQueryMemDesc();
1502  CHECK(!top_query_mem_desc.didOutputColumnar());
1503  CHECK(!top_query_mem_desc.hasInterleavedBinsOnGpu());
1504  const auto top_n = ra_exe_unit.sort_info.limit + ra_exe_unit.sort_info.offset;
1505  top_query_mem_desc.setEntryCount(0);
1506  for (auto& result : result_per_device) {
1507  const auto result_set = result.first;
1508  CHECK(result_set);
1509  result_set->sort(ra_exe_unit.sort_info.order_entries, top_n);
1510  size_t new_entry_cnt = top_query_mem_desc.getEntryCount() + result_set->rowCount();
1511  top_query_mem_desc.setEntryCount(new_entry_cnt);
1512  }
1513  auto top_result_set = std::make_shared<ResultSet>(first_result_set->getTargetInfos(),
1514  first_result_set->getDeviceType(),
1515  top_query_mem_desc,
1516  first_result_set->getRowSetMemOwner(),
1517  this);
1518  auto top_storage = top_result_set->allocateStorage();
1519  const auto top_result_set_buffer = top_storage->getUnderlyingBuffer();
1520  size_t top_output_row_idx{0};
1521  for (auto& result : result_per_device) {
1522  const auto result_set = result.first;
1523  CHECK(result_set);
1524  const auto& top_permutation = result_set->getPermutationBuffer();
1525  CHECK_LE(top_permutation.size(), top_n);
1526  const auto result_set_buffer = result_set->getStorage()->getUnderlyingBuffer();
1527  for (const auto sorted_idx : top_permutation) {
1528  const auto row_ptr =
1529  result_set_buffer + sorted_idx * top_query_mem_desc.getRowSize();
1530  memcpy(top_result_set_buffer + top_output_row_idx * top_query_mem_desc.getRowSize(),
1531  row_ptr,
1532  top_query_mem_desc.getRowSize());
1533  ++top_output_row_idx;
1534  }
1535  }
1536  CHECK_EQ(top_output_row_idx, top_query_mem_desc.getEntryCount());
1537  return top_result_set;
1538 }
#define CHECK_EQ(x, y)
Definition: Logger.h:195
#define CHECK_LE(x, y)
Definition: Logger.h:198
#define CHECK(condition)
Definition: Logger.h:187
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ compileBody()

bool Executor::compileBody ( const RelAlgExecutionUnit ra_exe_unit,
GroupByAndAggregate group_by_and_aggregate,
const QueryMemoryDescriptor query_mem_desc,
const CompilationOptions co 
)
private

Definition at line 1832 of file NativeCodegen.cpp.

References CHECK, CodeGenerator::codegen(), GroupByAndAggregate::codegen(), RelAlgExecutionUnit::join_quals, CodeGenerator::prioritizeQuals(), to_string(), CodeGenerator::toBool(), and VLOG.

1835  {
1836  // generate the code for the filter
1837  std::vector<Analyzer::Expr*> primary_quals;
1838  std::vector<Analyzer::Expr*> deferred_quals;
1839  bool short_circuited =
1840  CodeGenerator::prioritizeQuals(ra_exe_unit, primary_quals, deferred_quals);
1841  if (short_circuited) {
1842  VLOG(1) << "Prioritized " << std::to_string(primary_quals.size()) << " quals, "
1843  << "short-circuited and deferred " << std::to_string(deferred_quals.size())
1844  << " quals";
1845  }
1846  llvm::Value* filter_lv = cgen_state_->llBool(true);
1847  CodeGenerator code_generator(this);
1848  for (auto expr : primary_quals) {
1849  // Generate the filter for primary quals
1850  auto cond = code_generator.toBool(code_generator.codegen(expr, true, co).front());
1851  filter_lv = cgen_state_->ir_builder_.CreateAnd(filter_lv, cond);
1852  }
1853  CHECK(filter_lv->getType()->isIntegerTy(1));
1854  llvm::BasicBlock* sc_false{nullptr};
1855  if (!deferred_quals.empty()) {
1856  auto sc_true = llvm::BasicBlock::Create(
1857  cgen_state_->context_, "sc_true", cgen_state_->row_func_);
1858  sc_false = llvm::BasicBlock::Create(
1859  cgen_state_->context_, "sc_false", cgen_state_->row_func_);
1860  cgen_state_->ir_builder_.CreateCondBr(filter_lv, sc_true, sc_false);
1861  cgen_state_->ir_builder_.SetInsertPoint(sc_false);
1862  if (ra_exe_unit.join_quals.empty()) {
1863  cgen_state_->ir_builder_.CreateRet(cgen_state_->llInt(int32_t(0)));
1864  }
1865  cgen_state_->ir_builder_.SetInsertPoint(sc_true);
1866  filter_lv = cgen_state_->llBool(true);
1867  }
1868  for (auto expr : deferred_quals) {
1869  filter_lv = cgen_state_->ir_builder_.CreateAnd(
1870  filter_lv, code_generator.toBool(code_generator.codegen(expr, true, co).front()));
1871  }
1872 
1873  CHECK(filter_lv->getType()->isIntegerTy(1));
1874  return group_by_and_aggregate.codegen(filter_lv, sc_false, query_mem_desc, co);
1875 }
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:944
std::string to_string(char const *&&v)
bool codegen(llvm::Value *filter_result, llvm::BasicBlock *sc_false, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co)
const JoinQualsPerNestingLevel join_quals
static bool prioritizeQuals(const RelAlgExecutionUnit &ra_exe_unit, std::vector< Analyzer::Expr *> &primary_quals, std::vector< Analyzer::Expr *> &deferred_quals)
Definition: LogicalIR.cpp:157
#define CHECK(condition)
Definition: Logger.h:187
#define VLOG(n)
Definition: Logger.h:277
+ Here is the call graph for this function:

◆ compileWorkUnit()

std::tuple< Executor::CompilationResult, std::unique_ptr< QueryMemoryDescriptor > > Executor::compileWorkUnit ( const std::vector< InputTableInfo > &  query_infos,
const RelAlgExecutionUnit ra_exe_unit,
const CompilationOptions co,
const ExecutionOptions eo,
const CudaMgr_Namespace::CudaMgr cuda_mgr,
const bool  allow_lazy_fetch,
std::shared_ptr< RowSetMemoryOwner row_set_mem_owner,
const size_t  max_groups_buffer_entry_count,
const int8_t  crt_min_byte_width,
const bool  has_cardinality_estimation,
ColumnCacheMap column_cache,
RenderInfo render_info = nullptr 
)
private

Definition at line 1562 of file NativeCodegen.cpp.

References ExecutionOptions::allow_multifrag, anonymous_namespace{NativeCodegen.cpp}::always_clone_runtime_function(), run-benchmark-import::args, anonymous_namespace{NativeCodegen.cpp}::bind_pos_placeholders(), anonymous_namespace{NativeCodegen.cpp}::bind_query(), CHECK, CPU, anonymous_namespace{NativeCodegen.cpp}::create_row_function(), CompilationOptions::device_type_, RelAlgExecutionUnit::estimator, CompilationOptions::explain_type_, anonymous_namespace{NativeCodegen.cpp}::get_agg_fnames(), get_arg_by_name(), get_gpu_data_layout(), get_gpu_target_triple_string(), GPU, RelAlgExecutionUnit::groupby_exprs, GroupByBaselineHash, CompilationOptions::hoist_literals_, anonymous_namespace{OutputBufferInitialization.cpp}::init_agg_val_vec(), GroupByAndAggregate::initQueryMemoryDescriptor(), RelAlgExecutionUnit::input_col_descs, RelAlgExecutionUnit::input_descs, Invalid, logger::IR, is_rt_udf_module_present(), is_udf_module_present(), RenderInfo::isPotentialInSituRender(), ExecutionOptions::just_explain, anonymous_namespace{NativeCodegen.cpp}::link_udf_module(), LOG, mark_function_always_inline(), CodeGenerator::markDeadRuntimeFuncs(), anonymous_namespace{NativeCodegen.cpp}::optimize_ir(), Optimized, ExecutionOptions::output_columnar_hint, RelAlgExecutionUnit::quals, query_group_by_template(), query_template(), rt_udf_cpu_module, rt_udf_gpu_module, RelAlgExecutionUnit::scan_limit, serialize_llvm_object(), StdSet, RelAlgExecutionUnit::target_exprs, udf_cpu_module, udf_gpu_module, verify_function_ir(), VLOG, and ExecutionOptions::with_dynamic_watchdog.

1573  {
1574  nukeOldState(allow_lazy_fetch, query_infos, ra_exe_unit);
1575 
1576  GroupByAndAggregate group_by_and_aggregate(
1577  this, co.device_type_, ra_exe_unit, query_infos, row_set_mem_owner);
1578  auto query_mem_desc =
1579  group_by_and_aggregate.initQueryMemoryDescriptor(eo.allow_multifrag,
1580  max_groups_buffer_entry_guess,
1581  crt_min_byte_width,
1582  render_info,
1584 
1585  if (query_mem_desc->getQueryDescriptionType() ==
1587  !has_cardinality_estimation &&
1588  (!render_info || !render_info->isPotentialInSituRender()) && !eo.just_explain) {
1590  }
1591 
1592  const bool output_columnar = query_mem_desc->didOutputColumnar();
1593 
1595  const size_t num_count_distinct_descs =
1596  query_mem_desc->getCountDistinctDescriptorsSize();
1597  for (size_t i = 0; i < num_count_distinct_descs; i++) {
1598  const auto& count_distinct_descriptor =
1599  query_mem_desc->getCountDistinctDescriptor(i);
1600  if (count_distinct_descriptor.impl_type_ == CountDistinctImplType::StdSet ||
1601  (count_distinct_descriptor.impl_type_ != CountDistinctImplType::Invalid &&
1602  !co.hoist_literals_)) {
1603  throw QueryMustRunOnCpu();
1604  }
1605  }
1606  }
1607 
1608  // Read the module template and target either CPU or GPU
1609  // by binding the stream position functions to the right implementation:
1610  // stride access for GPU, contiguous for CPU
1611  auto rt_module_copy = llvm::CloneModule(
1612 #if LLVM_VERSION_MAJOR >= 7
1613  *g_rt_module.get(),
1614 #else
1615  g_rt_module.get(),
1616 #endif
1617  cgen_state_->vmap_,
1618  [](const llvm::GlobalValue* gv) {
1619  auto func = llvm::dyn_cast<llvm::Function>(gv);
1620  if (!func) {
1621  return true;
1622  }
1623  return (func->getLinkage() == llvm::GlobalValue::LinkageTypes::PrivateLinkage ||
1624  func->getLinkage() == llvm::GlobalValue::LinkageTypes::InternalLinkage ||
1626  });
1627 
1629  if (is_udf_module_present(true)) {
1630  link_udf_module(udf_cpu_module, *rt_module_copy, cgen_state_.get());
1631  }
1632  if (is_rt_udf_module_present(true)) {
1633  link_udf_module(rt_udf_cpu_module, *rt_module_copy, cgen_state_.get());
1634  }
1635  } else {
1636  rt_module_copy->setDataLayout(get_gpu_data_layout());
1637  rt_module_copy->setTargetTriple(get_gpu_target_triple_string());
1638  if (is_udf_module_present()) {
1639  link_udf_module(udf_gpu_module, *rt_module_copy, cgen_state_.get());
1640  }
1641  if (is_rt_udf_module_present()) {
1642  link_udf_module(rt_udf_gpu_module, *rt_module_copy, cgen_state_.get());
1643  }
1644  }
1645 
1646  cgen_state_->module_ = rt_module_copy.release();
1647 
1648  auto agg_fnames =
1649  get_agg_fnames(ra_exe_unit.target_exprs, !ra_exe_unit.groupby_exprs.empty());
1650 
1651  const auto agg_slot_count = ra_exe_unit.estimator ? size_t(1) : agg_fnames.size();
1652 
1653  const bool is_group_by{query_mem_desc->isGroupBy()};
1654  auto query_func = is_group_by ? query_group_by_template(cgen_state_->module_,
1655  co.hoist_literals_,
1656  *query_mem_desc,
1657  co.device_type_,
1658  ra_exe_unit.scan_limit)
1659  : query_template(cgen_state_->module_,
1660  agg_slot_count,
1661  co.hoist_literals_,
1662  !!ra_exe_unit.estimator);
1663  bind_pos_placeholders("pos_start", true, query_func, cgen_state_->module_);
1664  bind_pos_placeholders("group_buff_idx", false, query_func, cgen_state_->module_);
1665  bind_pos_placeholders("pos_step", false, query_func, cgen_state_->module_);
1666 
1667  cgen_state_->query_func_ = query_func;
1668  cgen_state_->query_func_entry_ir_builder_.SetInsertPoint(
1669  &query_func->getEntryBlock().front());
1670 
1671  std::vector<llvm::Value*> col_heads;
1672  std::tie(cgen_state_->row_func_, col_heads) =
1673  create_row_function(ra_exe_unit.input_col_descs.size(),
1674  is_group_by ? 0 : agg_slot_count,
1675  co.hoist_literals_,
1676  query_func,
1677  cgen_state_->module_,
1678  cgen_state_->context_);
1679  CHECK(cgen_state_->row_func_);
1680  // make sure it's in-lined, we don't want register spills in the inner loop
1682  auto bb =
1683  llvm::BasicBlock::Create(cgen_state_->context_, "entry", cgen_state_->row_func_);
1684  cgen_state_->ir_builder_.SetInsertPoint(bb);
1685  preloadFragOffsets(ra_exe_unit.input_descs, query_infos);
1686  RelAlgExecutionUnit body_execution_unit = ra_exe_unit;
1687  const auto join_loops =
1688  buildJoinLoops(body_execution_unit, co, eo, query_infos, column_cache);
1689  plan_state_->allocateLocalColumnIds(ra_exe_unit.input_col_descs);
1690  const auto is_not_deleted_bb = codegenSkipDeletedOuterTableRow(ra_exe_unit, co);
1691  if (is_not_deleted_bb) {
1692  bb = is_not_deleted_bb;
1693  }
1694  if (!join_loops.empty()) {
1695  codegenJoinLoops(join_loops,
1696  body_execution_unit,
1697  group_by_and_aggregate,
1698  query_func,
1699  bb,
1700  *(query_mem_desc.get()),
1701  co,
1702  eo);
1703  } else {
1704  const bool can_return_error =
1705  compileBody(ra_exe_unit, group_by_and_aggregate, *query_mem_desc, co);
1706  if (can_return_error || cgen_state_->needs_error_check_ || eo.with_dynamic_watchdog) {
1708  }
1709  }
1710  std::vector<llvm::Value*> hoisted_literals;
1711 
1712  if (co.hoist_literals_) {
1713  VLOG(1) << "number of hoisted literals: "
1714  << cgen_state_->query_func_literal_loads_.size()
1715  << " / literal buffer usage: " << cgen_state_->getLiteralBufferUsage(0)
1716  << " bytes";
1717  }
1718 
1719  if (co.hoist_literals_ && !cgen_state_->query_func_literal_loads_.empty()) {
1720  // we have some hoisted literals...
1721  hoisted_literals = inlineHoistedLiterals();
1722  }
1723  // iterate through all the instruction in the query template function and
1724  // replace the call to the filter placeholder with the call to the actual filter
1725  for (auto it = llvm::inst_begin(query_func), e = llvm::inst_end(query_func); it != e;
1726  ++it) {
1727  if (!llvm::isa<llvm::CallInst>(*it)) {
1728  continue;
1729  }
1730  auto& filter_call = llvm::cast<llvm::CallInst>(*it);
1731  if (std::string(filter_call.getCalledFunction()->getName()) == "row_process") {
1732  std::vector<llvm::Value*> args;
1733  for (size_t i = 0; i < filter_call.getNumArgOperands(); ++i) {
1734  args.push_back(filter_call.getArgOperand(i));
1735  }
1736  args.insert(args.end(), col_heads.begin(), col_heads.end());
1737  args.push_back(get_arg_by_name(query_func, "join_hash_tables"));
1738  // push hoisted literals arguments, if any
1739  args.insert(args.end(), hoisted_literals.begin(), hoisted_literals.end());
1740 
1741  llvm::ReplaceInstWithInst(&filter_call,
1742  llvm::CallInst::Create(cgen_state_->row_func_, args, ""));
1743  break;
1744  }
1745  }
1746  plan_state_->init_agg_vals_ =
1747  init_agg_val_vec(ra_exe_unit.target_exprs, ra_exe_unit.quals, *query_mem_desc);
1748 
1749  auto multifrag_query_func = cgen_state_->module_->getFunction(
1750  "multifrag_query" + std::string(co.hoist_literals_ ? "_hoisted_literals" : ""));
1751  CHECK(multifrag_query_func);
1752 
1753  bind_query(query_func,
1754  "query_stub" + std::string(co.hoist_literals_ ? "_hoisted_literals" : ""),
1755  multifrag_query_func,
1756  cgen_state_->module_);
1757 
1758  auto live_funcs =
1760  {query_func, cgen_state_->row_func_},
1761  {multifrag_query_func});
1762 
1763  std::string llvm_ir;
1764  if (eo.just_explain) {
1766 #ifdef WITH_JIT_DEBUG
1767  throw std::runtime_error(
1768  "Explain optimized not available when JIT runtime debug symbols are enabled");
1769 #else
1770  optimize_ir(query_func, cgen_state_->module_, live_funcs, co);
1771 #endif // WITH_JIT_DEBUG
1772  }
1773  llvm_ir =
1774  serialize_llvm_object(query_func) + serialize_llvm_object(cgen_state_->row_func_);
1775  }
1776  verify_function_ir(cgen_state_->row_func_);
1777 
1778  LOG(IR) << query_mem_desc->toString() << "\nGenerated IR\n"
1779  << serialize_llvm_object(query_func)
1780  << serialize_llvm_object(cgen_state_->row_func_) << "\nEnd of IR";
1781 
1782  return std::make_tuple(
1785  ? optimizeAndCodegenCPU(query_func, multifrag_query_func, live_funcs, co)
1786  : optimizeAndCodegenGPU(query_func,
1787  multifrag_query_func,
1788  live_funcs,
1789  is_group_by || ra_exe_unit.estimator,
1790  cuda_mgr,
1791  co),
1792  cgen_state_->getLiterals(),
1793  output_columnar,
1794  llvm_ir},
1795  std::move(query_mem_desc));
1796 }
std::vector< Analyzer::Expr * > target_exprs
bool is_udf_module_present(bool cpu_only)
std::unique_ptr< llvm::Module > rt_udf_cpu_module
void link_udf_module(const std::unique_ptr< llvm::Module > &udf_module, llvm::Module &module, CgenState *cgen_state, llvm::Linker::Flags flags=llvm::Linker::Flags::None)
std::unique_ptr< llvm::Module > udf_cpu_module
void codegenJoinLoops(const std::vector< JoinLoop > &join_loops, const RelAlgExecutionUnit &ra_exe_unit, GroupByAndAggregate &group_by_and_aggregate, llvm::Function *query_func, llvm::BasicBlock *entry_bb, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const ExecutionOptions &eo)
Definition: IRCodegen.cpp:447
#define LOG(tag)
Definition: Logger.h:182
std::unique_ptr< llvm::Module > rt_udf_gpu_module
void mark_function_always_inline(llvm::Function *func)
static std::unordered_set< llvm::Function * > markDeadRuntimeFuncs(llvm::Module &module, const std::vector< llvm::Function *> &roots, const std::vector< llvm::Function *> &leaves)
llvm::StringRef get_gpu_data_layout()
void verify_function_ir(const llvm::Function *func)
std::vector< std::pair< void *, void * > > optimizeAndCodegenCPU(llvm::Function *, llvm::Function *, const std::unordered_set< llvm::Function *> &, const CompilationOptions &)
std::vector< int64_t > init_agg_val_vec(const std::vector< TargetInfo > &targets, const QueryMemoryDescriptor &query_mem_desc)
llvm::Function * query_template(llvm::Module *module, const size_t aggr_col_count, const bool hoist_literals, const bool is_estimate_query)
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:944
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
std::unique_ptr< llvm::Module > udf_gpu_module
const std::vector< InputDescriptor > input_descs
void preloadFragOffsets(const std::vector< InputDescriptor > &input_descs, const std::vector< InputTableInfo > &query_infos)
Definition: Execute.cpp:2745
bool isPotentialInSituRender() const
Definition: RenderInfo.cpp:55
llvm::StringRef get_gpu_target_triple_string()
const bool allow_multifrag
llvm::Value * get_arg_by_name(llvm::Function *func, const std::string &name)
Definition: Execute.h:114
bool compileBody(const RelAlgExecutionUnit &ra_exe_unit, GroupByAndAggregate &group_by_and_aggregate, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co)
const bool with_dynamic_watchdog
std::unique_ptr< llvm::Module > g_rt_module(read_template_module(getGlobalLLVMContext()))
std::unique_ptr< PlanState > plan_state_
Definition: Execute.h:959
const bool output_columnar_hint
llvm::Function * query_group_by_template(llvm::Module *module, const bool hoist_literals, const QueryMemoryDescriptor &query_mem_desc, const ExecutorDeviceType device_type, const bool check_scan_limit)
const std::shared_ptr< Analyzer::Estimator > estimator
std::vector< std::string > get_agg_fnames(const std::vector< Analyzer::Expr *> &target_exprs, const bool is_group_by)
ExecutorDeviceType device_type_
void createErrorCheckControlFlow(llvm::Function *query_func, bool run_with_dynamic_watchdog, ExecutorDeviceType device_type)
const ExecutorExplainType explain_type_
void nukeOldState(const bool allow_lazy_fetch, const std::vector< InputTableInfo > &query_infos, const RelAlgExecutionUnit &ra_exe_unit)
Definition: Execute.cpp:2731
llvm::BasicBlock * codegenSkipDeletedOuterTableRow(const RelAlgExecutionUnit &ra_exe_unit, const CompilationOptions &co)
void bind_pos_placeholders(const std::string &pos_fn_name, const bool use_resume_param, llvm::Function *query_func, llvm::Module *module)
std::string serialize_llvm_object(const T *llvm_obj)
std::list< std::shared_ptr< Analyzer::Expr > > quals
#define CHECK(condition)
Definition: Logger.h:187
std::vector< llvm::Value * > inlineHoistedLiterals()
void optimize_ir(llvm::Function *query_func, llvm::Module *module, const std::unordered_set< llvm::Function *> &live_funcs, const CompilationOptions &co)
std::list< std::shared_ptr< const InputColDescriptor > > input_col_descs
std::vector< JoinLoop > buildJoinLoops(RelAlgExecutionUnit &ra_exe_unit, const CompilationOptions &co, const ExecutionOptions &eo, const std::vector< InputTableInfo > &query_infos, ColumnCacheMap &column_cache)
Definition: IRCodegen.cpp:206
bool always_clone_runtime_function(const llvm::Function *func)
std::vector< std::pair< void *, void * > > optimizeAndCodegenGPU(llvm::Function *, llvm::Function *, std::unordered_set< llvm::Function *> &, const bool no_inline, const CudaMgr_Namespace::CudaMgr *cuda_mgr, const CompilationOptions &)
#define VLOG(n)
Definition: Logger.h:277
std::pair< llvm::Function *, std::vector< llvm::Value * > > create_row_function(const size_t in_col_count, const size_t agg_col_count, const bool hoist_literals, llvm::Function *query_func, llvm::Module *module, llvm::LLVMContext &context)
void bind_query(llvm::Function *query_func, const std::string &query_fname, llvm::Function *multifrag_query_func, llvm::Module *module)
bool is_rt_udf_module_present(bool cpu_only)
+ Here is the call graph for this function:

◆ containsLeftDeepOuterJoin()

bool Executor::containsLeftDeepOuterJoin ( ) const
inline

Definition at line 434 of file Execute.h.

434  {
435  return cgen_state_->contains_left_deep_outer_join_;
436  }
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:944

◆ createErrorCheckControlFlow()

void Executor::createErrorCheckControlFlow ( llvm::Function *  query_func,
bool  run_with_dynamic_watchdog,
ExecutorDeviceType  device_type 
)
private

Definition at line 1318 of file NativeCodegen.cpp.

References CHECK, ERR_OUT_OF_TIME, get_arg_by_name(), and GPU.

1320  {
1321  // check whether the row processing was successful; currently, it can
1322  // fail by running out of group by buffer slots
1323 
1324  llvm::Value* row_count = nullptr;
1325  if (run_with_dynamic_watchdog && device_type == ExecutorDeviceType::GPU) {
1326  row_count =
1327  find_variable_in_basic_block<llvm::LoadInst>(query_func, ".entry", "row_count");
1328  }
1329 
1330  bool done_splitting = false;
1331  for (auto bb_it = query_func->begin(); bb_it != query_func->end() && !done_splitting;
1332  ++bb_it) {
1333  llvm::Value* pos = nullptr;
1334  for (auto inst_it = bb_it->begin(); inst_it != bb_it->end(); ++inst_it) {
1335  if (run_with_dynamic_watchdog && llvm::isa<llvm::PHINode>(*inst_it)) {
1336  if (inst_it->getName() == "pos") {
1337  pos = &*inst_it;
1338  }
1339  continue;
1340  }
1341  if (!llvm::isa<llvm::CallInst>(*inst_it)) {
1342  continue;
1343  }
1344  auto& filter_call = llvm::cast<llvm::CallInst>(*inst_it);
1345  if (std::string(filter_call.getCalledFunction()->getName()) == "row_process") {
1346  auto next_inst_it = inst_it;
1347  ++next_inst_it;
1348  auto new_bb = bb_it->splitBasicBlock(next_inst_it);
1349  auto& br_instr = bb_it->back();
1350  llvm::IRBuilder<> ir_builder(&br_instr);
1351  llvm::Value* err_lv = &*inst_it;
1352  if (run_with_dynamic_watchdog) {
1353  CHECK(pos);
1354  llvm::Value* call_watchdog_lv = nullptr;
1355  if (device_type == ExecutorDeviceType::GPU) {
1356  // In order to make sure all threads wihtin a block see the same barrier,
1357  // only those blocks whose none of their threads have experienced the critical
1358  // edge will go through the dynamic watchdog computation
1359  CHECK(row_count);
1360  auto crit_edge_rem =
1361  (blockSize() & (blockSize() - 1))
1362  ? ir_builder.CreateSRem(
1363  row_count,
1364  cgen_state_->llInt(static_cast<int64_t>(blockSize())))
1365  : ir_builder.CreateAnd(
1366  row_count,
1367  cgen_state_->llInt(static_cast<int64_t>(blockSize() - 1)));
1368  auto crit_edge_threshold = ir_builder.CreateSub(row_count, crit_edge_rem);
1369  crit_edge_threshold->setName("crit_edge_threshold");
1370 
1371  // only those threads where pos < crit_edge_threshold go through dynamic
1372  // watchdog call
1373  call_watchdog_lv =
1374  ir_builder.CreateICmp(llvm::ICmpInst::ICMP_SLT, pos, crit_edge_threshold);
1375  } else {
1376  // CPU path: run watchdog for every 64th row
1377  auto dw_predicate = ir_builder.CreateAnd(pos, uint64_t(0x3f));
1378  call_watchdog_lv = ir_builder.CreateICmp(
1379  llvm::ICmpInst::ICMP_EQ, dw_predicate, cgen_state_->llInt(int64_t(0LL)));
1380  }
1381  CHECK(call_watchdog_lv);
1382  auto error_check_bb = bb_it->splitBasicBlock(
1383  llvm::BasicBlock::iterator(br_instr), ".error_check");
1384  auto& watchdog_br_instr = bb_it->back();
1385 
1386  auto watchdog_check_bb = llvm::BasicBlock::Create(
1387  cgen_state_->context_, ".watchdog_check", query_func, error_check_bb);
1388  llvm::IRBuilder<> watchdog_ir_builder(watchdog_check_bb);
1389  auto detected_timeout = watchdog_ir_builder.CreateCall(
1390  cgen_state_->module_->getFunction("dynamic_watchdog"), {});
1391  auto timeout_err_lv = watchdog_ir_builder.CreateSelect(
1392  detected_timeout, cgen_state_->llInt(Executor::ERR_OUT_OF_TIME), err_lv);
1393  watchdog_ir_builder.CreateBr(error_check_bb);
1394 
1395  llvm::ReplaceInstWithInst(
1396  &watchdog_br_instr,
1397  llvm::BranchInst::Create(
1398  watchdog_check_bb, error_check_bb, call_watchdog_lv));
1399  ir_builder.SetInsertPoint(&br_instr);
1400  auto unified_err_lv = ir_builder.CreatePHI(err_lv->getType(), 2);
1401 
1402  unified_err_lv->addIncoming(timeout_err_lv, watchdog_check_bb);
1403  unified_err_lv->addIncoming(err_lv, &*bb_it);
1404  err_lv = unified_err_lv;
1405  }
1406  const auto error_code_arg = get_arg_by_name(query_func, "error_code");
1407  err_lv =
1408  ir_builder.CreateCall(cgen_state_->module_->getFunction("record_error_code"),
1409  std::vector<llvm::Value*>{err_lv, error_code_arg});
1410  if (device_type == ExecutorDeviceType::GPU) {
1411  // let kernel execution finish as expected, regardless of the observed error,
1412  // unless it is from the dynamic watchdog where all threads within that block
1413  // return together.
1414  err_lv = ir_builder.CreateICmp(llvm::ICmpInst::ICMP_EQ,
1415  err_lv,
1417  } else {
1418  err_lv = ir_builder.CreateICmp(llvm::ICmpInst::ICMP_NE,
1419  err_lv,
1420  cgen_state_->llInt(static_cast<int32_t>(0)));
1421  }
1422  auto error_bb = llvm::BasicBlock::Create(
1423  cgen_state_->context_, ".error_exit", query_func, new_bb);
1424  llvm::ReturnInst::Create(cgen_state_->context_, error_bb);
1425  llvm::ReplaceInstWithInst(&br_instr,
1426  llvm::BranchInst::Create(error_bb, new_bb, err_lv));
1427  done_splitting = true;
1428  break;
1429  }
1430  }
1431  }
1432  CHECK(done_splitting);
1433 }
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:944
llvm::Value * get_arg_by_name(llvm::Function *func, const std::string &name)
Definition: Execute.h:114
static const int32_t ERR_OUT_OF_TIME
Definition: Execute.h:1012
#define CHECK(condition)
Definition: Logger.h:187
unsigned blockSize() const
Definition: Execute.cpp:2839
+ Here is the call graph for this function:

◆ deviceCount()

int Executor::deviceCount ( const ExecutorDeviceType  device_type) const
private

Definition at line 584 of file Execute.cpp.

References catalog_, CHECK, Data_Namespace::DataMgr::getCudaMgr(), Catalog_Namespace::Catalog::getDataMgr(), and GPU.

Referenced by deviceCountForMemoryLevel(), and dispatchFragments().

584  {
585  if (device_type == ExecutorDeviceType::GPU) {
586  const auto cuda_mgr = catalog_->getDataMgr().getCudaMgr();
587  CHECK(cuda_mgr);
588  return cuda_mgr->getDeviceCount();
589  } else {
590  return 1;
591  }
592 }
Data_Namespace::DataMgr & getDataMgr() const
Definition: Catalog.h:177
const Catalog_Namespace::Catalog * catalog_
Definition: Execute.h:990
CudaMgr_Namespace::CudaMgr * getCudaMgr() const
Definition: DataMgr.h:116
#define CHECK(condition)
Definition: Logger.h:187
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ deviceCountForMemoryLevel()

int Executor::deviceCountForMemoryLevel ( const Data_Namespace::MemoryLevel  memory_level) const
private

Definition at line 594 of file Execute.cpp.

References CPU, deviceCount(), GPU, and Data_Namespace::GPU_LEVEL.

Referenced by buildHashTableForQualifier().

595  {
596  return memory_level == GPU_LEVEL ? deviceCount(ExecutorDeviceType::GPU)
597  : deviceCount(ExecutorDeviceType::CPU);
598 }
ExecutorDeviceType
int deviceCount(const ExecutorDeviceType) const
Definition: Execute.cpp:584
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ deviceCycles()

int64_t Executor::deviceCycles ( int  milliseconds) const
private

Definition at line 2847 of file Execute.cpp.

References catalog_, CHECK, Data_Namespace::DataMgr::getCudaMgr(), and Catalog_Namespace::Catalog::getDataMgr().

2847  {
2848  CHECK(catalog_);
2849  const auto cuda_mgr = catalog_->getDataMgr().getCudaMgr();
2850  CHECK(cuda_mgr);
2851  const auto& dev_props = cuda_mgr->getAllDeviceProperties();
2852  return static_cast<int64_t>(dev_props.front().clockKhz) * milliseconds;
2853 }
Data_Namespace::DataMgr & getDataMgr() const
Definition: Catalog.h:177
const Catalog_Namespace::Catalog * catalog_
Definition: Execute.h:990
CudaMgr_Namespace::CudaMgr * getCudaMgr() const
Definition: DataMgr.h:116
#define CHECK(condition)
Definition: Logger.h:187
+ Here is the call graph for this function:

◆ dispatchFragments()

void Executor::dispatchFragments ( const std::function< void(const ExecutorDeviceType chosen_device_type, int chosen_device_id, const QueryCompilationDescriptor &query_comp_desc, const QueryMemoryDescriptor &query_mem_desc, const FragmentsList &frag_list, const ExecutorDispatchMode kernel_dispatch_mode, const int64_t rowid_lookup_key)>  dispatch,
const ExecutionDispatch execution_dispatch,
const std::vector< InputTableInfo > &  table_infos,
const ExecutionOptions eo,
const bool  is_agg,
const bool  allow_single_frag_table_opt,
const size_t  context_count,
const QueryCompilationDescriptor query_comp_desc,
const QueryMemoryDescriptor query_mem_desc,
QueryFragmentDescriptor fragment_descriptor,
std::unordered_set< int > &  available_gpus,
int &  available_cpus 
)
private

Definition at line 1566 of file Execute.cpp.

References ExecutionOptions::allow_multifrag, QueryFragmentDescriptor::assignFragsToKernelDispatch(), QueryFragmentDescriptor::assignFragsToMultiDispatch(), QueryFragmentDescriptor::buildFragmentKernelMap(), catalog_, CHECK, CHECK_GE, CHECK_GT, anonymous_namespace{Execute.cpp}::checkWorkUnitWatchdog(), deviceCount(), g_inner_join_fragment_skipping, getColLazyFetchInfo(), QueryCompilationDescriptor::getDeviceType(), QueryMemoryDescriptor::getEntryCount(), Executor::ExecutionDispatch::getExecutionUnit(), Executor::ExecutionDispatch::getFragOffsets(), QueryMemoryDescriptor::getQueryDescriptionType(), GPU, anonymous_namespace{Execute.cpp}::has_lazy_fetched_columns(), logger::INFO, KernelPerFragment, LOG, MultifragmentKernel, plan_state_, Projection, QueryFragmentDescriptor::shouldCheckWorkUnitWatchdog(), QueryMemoryDescriptor::toString(), VLOG, and ExecutionOptions::with_watchdog.

Referenced by executeWorkUnitImpl().

1584  {
1585  std::vector<std::future<void>> query_threads;
1586  const auto& ra_exe_unit = execution_dispatch.getExecutionUnit();
1587  CHECK(!ra_exe_unit.input_descs.empty());
1588 
1589  const auto device_type = query_comp_desc.getDeviceType();
1590  const bool uses_lazy_fetch =
1591  plan_state_->allow_lazy_fetch_ &&
1592  has_lazy_fetched_columns(getColLazyFetchInfo(ra_exe_unit.target_exprs));
1593  const bool use_multifrag_kernel = (device_type == ExecutorDeviceType::GPU) &&
1594  eo.allow_multifrag && (!uses_lazy_fetch || is_agg);
1595 
1596  const auto device_count = deviceCount(device_type);
1597  CHECK_GT(device_count, 0);
1598 
1599  fragment_descriptor.buildFragmentKernelMap(ra_exe_unit,
1600  execution_dispatch.getFragOffsets(),
1601  device_count,
1602  device_type,
1603  use_multifrag_kernel,
1605  this);
1606  if (eo.with_watchdog && fragment_descriptor.shouldCheckWorkUnitWatchdog()) {
1607  checkWorkUnitWatchdog(ra_exe_unit, table_infos, *catalog_, device_type, device_count);
1608  }
1609 
1610  if (use_multifrag_kernel) {
1611  VLOG(1) << "Dispatching multifrag kernels";
1612  VLOG(1) << query_mem_desc.toString();
1613 
1614  // NB: We should never be on this path when the query is retried because of running
1615  // out of group by slots; also, for scan only queries on CPU we want the
1616  // high-granularity, fragment by fragment execution instead. For scan only queries on
1617  // GPU, we want the multifrag kernel path to save the overhead of allocating an output
1618  // buffer per fragment.
1619  auto multifrag_kernel_dispatch =
1620  [&query_threads, &dispatch, query_comp_desc, query_mem_desc](
1621  const int device_id,
1622  const FragmentsList& frag_list,
1623  const int64_t rowid_lookup_key) {
1624  query_threads.push_back(std::async(std::launch::async,
1625  dispatch,
1627  device_id,
1628  query_comp_desc,
1629  query_mem_desc,
1630  frag_list,
1632  rowid_lookup_key));
1633  };
1634  fragment_descriptor.assignFragsToMultiDispatch(multifrag_kernel_dispatch);
1635  } else {
1636  VLOG(1) << "Dispatching kernel per fragment";
1637  VLOG(1) << query_mem_desc.toString();
1638 
1639  if (!ra_exe_unit.use_bump_allocator && allow_single_frag_table_opt &&
1641  table_infos.size() == 1 && table_infos.front().table_id > 0) {
1642  const auto max_frag_size =
1643  table_infos.front().info.getFragmentNumTuplesUpperBound();
1644  if (max_frag_size < query_mem_desc.getEntryCount()) {
1645  LOG(INFO) << "Lowering scan limit from " << query_mem_desc.getEntryCount()
1646  << " to match max fragment size " << max_frag_size
1647  << " for kernel per fragment execution path.";
1648  throw CompilationRetryNewScanLimit(max_frag_size);
1649  }
1650  }
1651 
1652  size_t frag_list_idx{0};
1653  auto fragment_per_kernel_dispatch = [&query_threads,
1654  &dispatch,
1655  &frag_list_idx,
1656  &device_type,
1657  query_comp_desc,
1658  query_mem_desc](const int device_id,
1659  const FragmentsList& frag_list,
1660  const int64_t rowid_lookup_key) {
1661  if (!frag_list.size()) {
1662  return;
1663  }
1664  CHECK_GE(device_id, 0);
1665 
1666  query_threads.push_back(std::async(std::launch::async,
1667  dispatch,
1668  device_type,
1669  device_id,
1670  query_comp_desc,
1671  query_mem_desc,
1672  frag_list,
1674  rowid_lookup_key));
1675 
1676  ++frag_list_idx;
1677  };
1678 
1679  fragment_descriptor.assignFragsToKernelDispatch(fragment_per_kernel_dispatch,
1680  ra_exe_unit);
1681  }
1682  for (auto& child : query_threads) {
1683  child.wait();
1684  }
1685  for (auto& child : query_threads) {
1686  child.get();
1687  }
1688 }
std::vector< ColumnLazyFetchInfo > getColLazyFetchInfo(const std::vector< Analyzer::Expr *> &target_exprs) const
Definition: Execute.cpp:278
#define LOG(tag)
Definition: Logger.h:182
void assignFragsToMultiDispatch(DISPATCH_FCN f) const
#define CHECK_GE(x, y)
Definition: Logger.h:200
#define CHECK_GT(x, y)
Definition: Logger.h:199
std::vector< FragmentsPerTable > FragmentsList
bool g_inner_join_fragment_skipping
Definition: Execute.cpp:76
const bool allow_multifrag
const Catalog_Namespace::Catalog * catalog_
Definition: Execute.h:990
int deviceCount(const ExecutorDeviceType) const
Definition: Execute.cpp:584
ExecutorDeviceType getDeviceType() const
std::unique_ptr< PlanState > plan_state_
Definition: Execute.h:959
void checkWorkUnitWatchdog(const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &table_infos, const Catalog_Namespace::Catalog &cat, const ExecutorDeviceType device_type, const int device_count)
Definition: Execute.cpp:1008
void assignFragsToKernelDispatch(DISPATCH_FCN f, const RelAlgExecutionUnit &ra_exe_unit) const
#define CHECK(condition)
Definition: Logger.h:187
QueryDescriptionType getQueryDescriptionType() const
void buildFragmentKernelMap(const RelAlgExecutionUnit &ra_exe_unit, const std::vector< uint64_t > &frag_offsets, const int device_count, const ExecutorDeviceType &device_type, const bool enable_multifrag_kernels, const bool enable_inner_join_fragment_skipping, Executor *executor)
bool has_lazy_fetched_columns(const std::vector< ColumnLazyFetchInfo > &fetched_cols)
Definition: Execute.cpp:1555
#define VLOG(n)
Definition: Logger.h:277
const bool with_watchdog
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ execute()

std::shared_ptr< ResultSet > Executor::execute ( const Planner::RootPlan root_plan,
const Catalog_Namespace::SessionInfo session,
const bool  hoist_literals,
const ExecutorDeviceType  device_type,
const ExecutorOptLevel  opt_level,
const bool  allow_multifrag,
const bool  allow_loop_joins,
RenderInfo render_query_data = nullptr 
)

Definition at line 32 of file LegacyExecute.cpp.

References CHECK, CPU, g_enable_dynamic_watchdog, Catalog_Namespace::SessionInfo::get_currentUser(), Planner::RootPlan::get_plan_dest(), Planner::RootPlan::get_result_table_id(), Planner::RootPlan::get_stmt_type(), Catalog_Namespace::SessionInfo::getCatalog(), Planner::RootPlan::getCatalog(), AccessPrivileges::INSERT_INTO_TABLE, Catalog_Namespace::SysCatalog::instance(), Planner::RootPlan::kEXPLAIN, kINSERT, kSELECT, DBObject::loadKey(), anonymous_namespace{DBObjectPrivilegesTest.cpp}::privObjects, DBObject::setPrivileges(), anonymous_namespace{DBObjectPrivilegesTest.cpp}::sys_cat, TableDBObjectType, timer_start(), and timer_stop().

40  {
41  catalog_ = &root_plan->getCatalog();
42  const auto stmt_type = root_plan->get_stmt_type();
43  // capture the lock acquistion time
44  auto clock_begin = timer_start();
45  std::lock_guard<std::mutex> lock(execute_mutex_);
48  }
49  ScopeGuard restore_metainfo_cache = [this] { clearMetaInfoCache(); };
50  int64_t queue_time_ms = timer_stop(clock_begin);
51  ScopeGuard row_set_holder = [this] { row_set_mem_owner_ = nullptr; };
52  switch (stmt_type) {
53  case kSELECT: {
54  throw std::runtime_error("The legacy SELECT path has been fully deprecated.");
55  }
56  case kINSERT: {
57  if (root_plan->get_plan_dest() == Planner::RootPlan::kEXPLAIN) {
58  auto explanation_rs = std::make_shared<ResultSet>("No explanation available.");
59  explanation_rs->setQueueTime(queue_time_ms);
60  return explanation_rs;
61  }
62  auto& cat = session.getCatalog();
64  auto user_metadata = session.get_currentUser();
65  const int table_id = root_plan->get_result_table_id();
66  auto td = cat.getMetadataForTable(table_id);
67  DBObject dbObject(td->tableName, TableDBObjectType);
68  dbObject.loadKey(cat);
69  dbObject.setPrivileges(AccessPrivileges::INSERT_INTO_TABLE);
70  std::vector<DBObject> privObjects;
71  privObjects.push_back(dbObject);
72  if (!sys_cat.checkPrivileges(user_metadata, privObjects)) {
73  throw std::runtime_error(
74  "Violation of access privileges: user " + user_metadata.userName +
75  " has no insert privileges for table " + td->tableName + ".");
76  break;
77  }
78  executeSimpleInsert(root_plan);
79  auto empty_rs = std::make_shared<ResultSet>(std::vector<TargetInfo>{},
82  nullptr,
83  this);
84  empty_rs->setQueueTime(queue_time_ms);
85  return empty_rs;
86  }
87  default:
88  CHECK(false);
89  }
90  CHECK(false);
91  return nullptr;
92 }
int get_result_table_id() const
Definition: Planner.h:291
SQLStmtType get_stmt_type() const
Definition: Planner.h:290
static const AccessPrivileges INSERT_INTO_TABLE
Definition: DBObject.h:154
TypeR::rep timer_stop(Type clock_begin)
Definition: measure.h:46
const UserMetadata & get_currentUser() const
Definition: SessionInfo.h:93
bool g_enable_dynamic_watchdog
Definition: Execute.cpp:70
void executeSimpleInsert(const Planner::RootPlan *root_plan)
Definition: Execute.cpp:2453
static SysCatalog & instance()
Definition: SysCatalog.h:240
const Catalog_Namespace::Catalog & getCatalog() const
Definition: Planner.h:293
const Catalog_Namespace::Catalog * catalog_
Definition: Execute.h:990
Catalog & getCatalog() const
Definition: SessionInfo.h:90
std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner_
Definition: Execute.h:960
friend class QueryMemoryDescriptor
Definition: Execute.h:1024
void resetInterrupt()
#define CHECK(condition)
Definition: Logger.h:187
static std::mutex execute_mutex_
Definition: Execute.h:1001
Type timer_start()
Definition: measure.h:40
void clearMetaInfoCache()
Definition: Execute.cpp:319
Dest get_plan_dest() const
Definition: Planner.h:299
+ Here is the call graph for this function:

◆ executeExplain()

ResultSetPtr Executor::executeExplain ( const QueryCompilationDescriptor query_comp_desc)
private

Definition at line 1329 of file Execute.cpp.

References QueryCompilationDescriptor::getIR().

Referenced by executeWorkUnitImpl().

1329  {
1330  return std::make_shared<ResultSet>(query_comp_desc.getIR());
1331 }
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ executePlanWithGroupBy()

int32_t Executor::executePlanWithGroupBy ( const RelAlgExecutionUnit ra_exe_unit,
const CompilationResult compilation_result,
const bool  hoist_literals,
ResultSetPtr results,
const ExecutorDeviceType  device_type,
std::vector< std::vector< const int8_t *>> &  col_buffers,
const std::vector< size_t >  outer_tab_frag_ids,
QueryExecutionContext query_exe_context,
const std::vector< std::vector< int64_t >> &  num_rows,
const std::vector< std::vector< uint64_t >> &  frag_offsets,
Data_Namespace::DataMgr data_mgr,
const int  device_id,
const int64_t  limit,
const uint32_t  start_rowid,
const uint32_t  num_tables,
RenderInfo render_info 
)
private

Definition at line 2274 of file Execute.cpp.

References blockSize(), CHECK, CHECK_NE, anonymous_namespace{Execute.cpp}::check_rows_less_than_needed(), CPU, ERR_DIV_BY_ZERO, ERR_INTERRUPTED, ERR_OUT_OF_GPU_MEM, ERR_OUT_OF_RENDER_MEM, ERR_OUT_OF_TIME, ERR_OVERFLOW_OR_UNDERFLOW, ERR_SPECULATIVE_TOP_OOM, ERR_STREAMING_TOP_N_NOT_SUPPORTED_IN_RENDER_QUERY, error_code, logger::FATAL, g_enable_dynamic_watchdog, getJoinHashTablePtrs(), QueryExecutionContext::getRowSet(), GPU, gridSize(), RelAlgExecutionUnit::groupby_exprs, INJECT_TIMER, interrupted_, QueryExecutionContext::launchCpuCode(), QueryExecutionContext::launchGpuCode(), Executor::CompilationResult::literal_values, LOG, Executor::CompilationResult::native_functions, num_rows, QueryExecutionContext::query_mem_desc_, RenderInfo::render_allocator_map_ptr, serializeLiterals(), and RenderInfo::useCudaBuffers().

2290  {
2292  CHECK(!results);
2293  if (col_buffers.empty()) {
2294  return 0;
2295  }
2296  CHECK_NE(ra_exe_unit.groupby_exprs.size(), size_t(0));
2297  // TODO(alex):
2298  // 1. Optimize size (make keys more compact).
2299  // 2. Resize on overflow.
2300  // 3. Optimize runtime.
2301  auto hoist_buf = serializeLiterals(compilation_result.literal_values, device_id);
2302  int32_t error_code = device_type == ExecutorDeviceType::GPU ? 0 : start_rowid;
2303  const auto join_hash_table_ptrs = getJoinHashTablePtrs(device_type, device_id);
2305  return ERR_INTERRUPTED;
2306  }
2307 
2308  RenderAllocatorMap* render_allocator_map_ptr = nullptr;
2309  if (render_info && render_info->useCudaBuffers()) {
2310  render_allocator_map_ptr = render_info->render_allocator_map_ptr.get();
2311  }
2312 
2313  if (device_type == ExecutorDeviceType::CPU) {
2314  query_exe_context->launchCpuCode(ra_exe_unit,
2315  compilation_result.native_functions,
2316  hoist_literals,
2317  hoist_buf,
2318  col_buffers,
2319  num_rows,
2320  frag_offsets,
2321  scan_limit,
2322  &error_code,
2323  num_tables,
2324  join_hash_table_ptrs);
2325  } else {
2326  try {
2327  query_exe_context->launchGpuCode(ra_exe_unit,
2328  compilation_result.native_functions,
2329  hoist_literals,
2330  hoist_buf,
2331  col_buffers,
2332  num_rows,
2333  frag_offsets,
2334  scan_limit,
2335  data_mgr,
2336  blockSize(),
2337  gridSize(),
2338  device_id,
2339  &error_code,
2340  num_tables,
2341  join_hash_table_ptrs,
2342  render_allocator_map_ptr);
2343  } catch (const OutOfMemory&) {
2344  return ERR_OUT_OF_GPU_MEM;
2345  } catch (const OutOfRenderMemory&) {
2346  return ERR_OUT_OF_RENDER_MEM;
2347  } catch (const StreamingTopNNotSupportedInRenderQuery&) {
2349  } catch (const std::bad_alloc&) {
2350  return ERR_SPECULATIVE_TOP_OOM;
2351  } catch (const std::exception& e) {
2352  LOG(FATAL) << "Error launching the GPU kernel: " << e.what();
2353  }
2354  }
2355 
2356  if (error_code == Executor::ERR_OVERFLOW_OR_UNDERFLOW ||
2357  error_code == Executor::ERR_DIV_BY_ZERO ||
2358  error_code == Executor::ERR_OUT_OF_TIME ||
2359  error_code == Executor::ERR_INTERRUPTED) {
2360  return error_code;
2361  }
2362 
2363  if (error_code != Executor::ERR_OVERFLOW_OR_UNDERFLOW &&
2364  error_code != Executor::ERR_DIV_BY_ZERO && !render_allocator_map_ptr) {
2365  results =
2366  query_exe_context->getRowSet(ra_exe_unit, query_exe_context->query_mem_desc_);
2367  CHECK(results);
2368  results->holdLiterals(hoist_buf);
2369  }
2370  if (error_code < 0 && render_allocator_map_ptr) {
2371  // More rows passed the filter than available slots. We don't have a count to check,
2372  // so assume we met the limit if a scan limit is set
2373  if (scan_limit != 0) {
2374  return 0;
2375  } else {
2376  return error_code;
2377  }
2378  }
2379  if (error_code && (!scan_limit || check_rows_less_than_needed(results, scan_limit))) {
2380  return error_code; // unlucky, not enough results and we ran out of slots
2381  }
2382 
2383  return 0;
2384 }
std::vector< int64_t * > launchGpuCode(const RelAlgExecutionUnit &ra_exe_unit, const std::vector< std::pair< void *, void *>> &cu_functions, const bool hoist_literals, const std::vector< int8_t > &literal_buff, std::vector< std::vector< const int8_t *>> col_buffers, const std::vector< std::vector< int64_t >> &num_rows, const std::vector< std::vector< uint64_t >> &frag_row_offsets, const int32_t scan_limit, Data_Namespace::DataMgr *data_mgr, const unsigned block_size_x, const unsigned grid_size_x, const int device_id, int32_t *error_code, const uint32_t num_tables, const std::vector< int64_t > &join_hash_tables, RenderAllocatorMap *render_allocator_map)
const int8_t const int64_t * num_rows
static const int32_t ERR_INTERRUPTED
Definition: Execute.h:1013
#define LOG(tag)
Definition: Logger.h:182
unsigned gridSize() const
Definition: Execute.cpp:2831
bool g_enable_dynamic_watchdog
Definition: Execute.cpp:70
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
std::vector< int64_t * > launchCpuCode(const RelAlgExecutionUnit &ra_exe_unit, const std::vector< std::pair< void *, void *>> &fn_ptrs, const bool hoist_literals, const std::vector< int8_t > &literal_buff, std::vector< std::vector< const int8_t *>> col_buffers, const std::vector< std::vector< int64_t >> &num_rows, const std::vector< std::vector< uint64_t >> &frag_row_offsets, const int32_t scan_limit, int32_t *error_code, const uint32_t num_tables, const std::vector< int64_t > &join_hash_tables)
bool useCudaBuffers() const
Definition: RenderInfo.cpp:60
std::vector< int64_t > getJoinHashTablePtrs(const ExecutorDeviceType device_type, const int device_id)
Definition: Execute.cpp:2386
static const int32_t ERR_STREAMING_TOP_N_NOT_SUPPORTED_IN_RENDER_QUERY
Definition: Execute.h:1017
static const int32_t ERR_DIV_BY_ZERO
Definition: Execute.h:1004
#define INJECT_TIMER(DESC)
Definition: measure.h:91
#define CHECK_NE(x, y)
Definition: Logger.h:196
static const int32_t ERR_OUT_OF_RENDER_MEM
Definition: Execute.h:1008
const int8_t const int64_t const uint64_t const int32_t const int64_t int64_t uint32_t const int64_t int32_t * error_code
static const int32_t ERR_OVERFLOW_OR_UNDERFLOW
Definition: Execute.h:1010
static const int32_t ERR_OUT_OF_TIME
Definition: Execute.h:1012
ResultSetPtr getRowSet(const RelAlgExecutionUnit &ra_exe_unit, const QueryMemoryDescriptor &query_mem_desc) const
static const int32_t ERR_OUT_OF_GPU_MEM
Definition: Execute.h:1005
int32_t executePlanWithGroupBy(const RelAlgExecutionUnit &ra_exe_unit, const CompilationResult &, const bool hoist_literals, ResultSetPtr &results, const ExecutorDeviceType device_type, std::vector< std::vector< const int8_t *>> &col_buffers, const std::vector< size_t > outer_tab_frag_ids, QueryExecutionContext *, const std::vector< std::vector< int64_t >> &num_rows, const std::vector< std::vector< uint64_t >> &frag_offsets, Data_Namespace::DataMgr *, const int device_id, const int64_t limit, const uint32_t start_rowid, const uint32_t num_tables, RenderInfo *render_info)
Definition: Execute.cpp:2274
std::vector< int8_t > serializeLiterals(const std::unordered_map< int, CgenState::LiteralValues > &literals, const int device_id)
Definition: Execute.cpp:326
std::unique_ptr< RenderAllocatorMap > render_allocator_map_ptr
Definition: RenderInfo.h:32
bool check_rows_less_than_needed(const ResultSetPtr &results, const size_t scan_limit)
Definition: Execute.cpp:2267
static const int32_t ERR_SPECULATIVE_TOP_OOM
Definition: Execute.h:1011
#define CHECK(condition)
Definition: Logger.h:187
const QueryMemoryDescriptor query_mem_desc_
bool interrupted_
Definition: Execute.h:968
unsigned blockSize() const
Definition: Execute.cpp:2839
+ Here is the call graph for this function:

◆ executePlanWithoutGroupBy()

int32_t Executor::executePlanWithoutGroupBy ( const RelAlgExecutionUnit ra_exe_unit,
const CompilationResult compilation_result,
const bool  hoist_literals,
ResultSetPtr results,
const std::vector< Analyzer::Expr *> &  target_exprs,
const ExecutorDeviceType  device_type,
std::vector< std::vector< const int8_t *>> &  col_buffers,
QueryExecutionContext query_exe_context,
const std::vector< std::vector< int64_t >> &  num_rows,
const std::vector< std::vector< uint64_t >> &  frag_offsets,
Data_Namespace::DataMgr data_mgr,
const int  device_id,
const uint32_t  start_rowid,
const uint32_t  num_tables,
RenderInfo render_info 
)
private

Definition at line 2132 of file Execute.cpp.

References blockSize(), CHECK, CHECK_EQ, CPU, ERR_DIV_BY_ZERO, ERR_INTERRUPTED, ERR_OUT_OF_GPU_MEM, ERR_OUT_OF_TIME, ERR_OVERFLOW_OR_UNDERFLOW, error_code, RelAlgExecutionUnit::estimator, QueryExecutionContext::estimator_result_set_, logger::FATAL, g_bigint_count, g_enable_dynamic_watchdog, get_target_info(), getJoinHashTablePtrs(), Experimental::MetaTypeClassFactory< CLASSIFICATIONS_PACK >::getMetaTypeClass(), GPU, gridSize(), INJECT_TIMER, interrupted_, RenderInfo::isPotentialInSituRender(), QueryExecutionContext::launchCpuCode(), QueryExecutionContext::launchGpuCode(), Executor::CompilationResult::literal_values, LOG, Executor::CompilationResult::native_functions, num_rows, out, QueryExecutionContext::query_buffers_, RenderInfo::render_allocator_map_ptr, serializeLiterals(), and RenderInfo::useCudaBuffers().

2147  {
2149  CHECK(!results);
2150  if (col_buffers.empty()) {
2151  return 0;
2152  }
2153 
2154  RenderAllocatorMap* render_allocator_map_ptr = nullptr;
2155  if (render_info) {
2156  // TODO(adb): make sure that we either never get here in the CPU case, or if we do get
2157  // here, we are in non-insitu mode.
2158  CHECK(render_info->useCudaBuffers() || !render_info->isPotentialInSituRender())
2159  << "CUDA disabled rendering in the executePlanWithoutGroupBy query path is "
2160  "currently unsupported.";
2161  render_allocator_map_ptr = render_info->render_allocator_map_ptr.get();
2162  }
2163 
2164  int32_t error_code = device_type == ExecutorDeviceType::GPU ? 0 : start_rowid;
2165  std::vector<int64_t*> out_vec;
2166  const auto hoist_buf = serializeLiterals(compilation_result.literal_values, device_id);
2167  const auto join_hash_table_ptrs = getJoinHashTablePtrs(device_type, device_id);
2168  std::unique_ptr<OutVecOwner> output_memory_scope;
2170  return ERR_INTERRUPTED;
2171  }
2172  if (device_type == ExecutorDeviceType::CPU) {
2173  out_vec = query_exe_context->launchCpuCode(ra_exe_unit,
2174  compilation_result.native_functions,
2175  hoist_literals,
2176  hoist_buf,
2177  col_buffers,
2178  num_rows,
2179  frag_offsets,
2180  0,
2181  &error_code,
2182  num_tables,
2183  join_hash_table_ptrs);
2184  output_memory_scope.reset(new OutVecOwner(out_vec));
2185  } else {
2186  try {
2187  out_vec = query_exe_context->launchGpuCode(ra_exe_unit,
2188  compilation_result.native_functions,
2189  hoist_literals,
2190  hoist_buf,
2191  col_buffers,
2192  num_rows,
2193  frag_offsets,
2194  0,
2195  data_mgr,
2196  blockSize(),
2197  gridSize(),
2198  device_id,
2199  &error_code,
2200  num_tables,
2201  join_hash_table_ptrs,
2202  render_allocator_map_ptr);
2203  output_memory_scope.reset(new OutVecOwner(out_vec));
2204  } catch (const OutOfMemory&) {
2205  return ERR_OUT_OF_GPU_MEM;
2206  } catch (const std::exception& e) {
2207  LOG(FATAL) << "Error launching the GPU kernel: " << e.what();
2208  }
2209  }
2210  if (error_code == Executor::ERR_OVERFLOW_OR_UNDERFLOW ||
2211  error_code == Executor::ERR_DIV_BY_ZERO ||
2212  error_code == Executor::ERR_OUT_OF_TIME ||
2213  error_code == Executor::ERR_INTERRUPTED) {
2214  return error_code;
2215  }
2216  if (ra_exe_unit.estimator) {
2217  CHECK(!error_code);
2218  results =
2219  std::shared_ptr<ResultSet>(query_exe_context->estimator_result_set_.release());
2220  return 0;
2221  }
2222  std::vector<int64_t> reduced_outs;
2223  const auto num_frags = col_buffers.size();
2224  const size_t entry_count = device_type == ExecutorDeviceType::GPU
2225  ? num_frags * blockSize() * gridSize()
2226  : num_frags;
2227  if (size_t(1) == entry_count) {
2228  for (auto out : out_vec) {
2229  CHECK(out);
2230  reduced_outs.push_back(*out);
2231  }
2232  } else {
2233  size_t out_vec_idx = 0;
2234 
2235  for (const auto target_expr : target_exprs) {
2236  const auto agg_info = get_target_info(target_expr, g_bigint_count);
2237  CHECK(agg_info.is_agg);
2238 
2239  auto meta_class(
2241  auto agg_reduction_impl =
2243  agg_reduction_impl(meta_class,
2244  entry_count,
2245  error_code,
2246  agg_info,
2247  out_vec_idx,
2248  out_vec,
2249  reduced_outs,
2250  query_exe_context);
2251  if (error_code) {
2252  break;
2253  }
2254  }
2255  }
2256 
2257  CHECK_EQ(size_t(1), query_exe_context->query_buffers_->result_sets_.size());
2258  auto rows_ptr = std::shared_ptr<ResultSet>(
2259  query_exe_context->query_buffers_->result_sets_[0].release());
2260  rows_ptr->fillOneEntry(reduced_outs);
2261  results = std::move(rows_ptr);
2262  return error_code;
2263 }
std::vector< int64_t * > launchGpuCode(const RelAlgExecutionUnit &ra_exe_unit, const std::vector< std::pair< void *, void *>> &cu_functions, const bool hoist_literals, const std::vector< int8_t > &literal_buff, std::vector< std::vector< const int8_t *>> col_buffers, const std::vector< std::vector< int64_t >> &num_rows, const std::vector< std::vector< uint64_t >> &frag_row_offsets, const int32_t scan_limit, Data_Namespace::DataMgr *data_mgr, const unsigned block_size_x, const unsigned grid_size_x, const int device_id, int32_t *error_code, const uint32_t num_tables, const std::vector< int64_t > &join_hash_tables, RenderAllocatorMap *render_allocator_map)
#define CHECK_EQ(x, y)
Definition: Logger.h:195
static auto getMetaTypeClass(SQL_TYPE_INFO const &sql_type_info) -> MetaTypeClassContainer
const int8_t const int64_t * num_rows
static const int32_t ERR_INTERRUPTED
Definition: Execute.h:1013
#define LOG(tag)
Definition: Logger.h:182
TargetInfo get_target_info(const PointerType target_expr, const bool bigint_count)
Definition: TargetInfo.h:65
unsigned gridSize() const
Definition: Execute.cpp:2831
int32_t executePlanWithoutGroupBy(const RelAlgExecutionUnit &ra_exe_unit, const CompilationResult &, const bool hoist_literals, ResultSetPtr &results, const std::vector< Analyzer::Expr *> &target_exprs, const ExecutorDeviceType device_type, std::vector< std::vector< const int8_t *>> &col_buffers, QueryExecutionContext *query_exe_context, const std::vector< std::vector< int64_t >> &num_rows, const std::vector< std::vector< uint64_t >> &frag_offsets, Data_Namespace::DataMgr *data_mgr, const int device_id, const uint32_t start_rowid, const uint32_t num_tables, RenderInfo *render_info)
Definition: Execute.cpp:2132
bool g_enable_dynamic_watchdog
Definition: Execute.cpp:70
std::vector< int64_t * > launchCpuCode(const RelAlgExecutionUnit &ra_exe_unit, const std::vector< std::pair< void *, void *>> &fn_ptrs, const bool hoist_literals, const std::vector< int8_t > &literal_buff, std::vector< std::vector< const int8_t *>> col_buffers, const std::vector< std::vector< int64_t >> &num_rows, const std::vector< std::vector< uint64_t >> &frag_row_offsets, const int32_t scan_limit, int32_t *error_code, const uint32_t num_tables, const std::vector< int64_t > &join_hash_tables)
bool useCudaBuffers() const
Definition: RenderInfo.cpp:60
std::unique_ptr< QueryMemoryInitializer > query_buffers_
std::vector< int64_t > getJoinHashTablePtrs(const ExecutorDeviceType device_type, const int device_id)
Definition: Execute.cpp:2386
bool isPotentialInSituRender() const
Definition: RenderInfo.cpp:55
static const int32_t ERR_DIV_BY_ZERO
Definition: Execute.h:1004
#define INJECT_TIMER(DESC)
Definition: measure.h:91
const int8_t const int64_t const uint64_t const int32_t const int64_t int64_t uint32_t const int64_t int32_t * error_code
static const int32_t ERR_OVERFLOW_OR_UNDERFLOW
Definition: Execute.h:1010
static const int32_t ERR_OUT_OF_TIME
Definition: Execute.h:1012
bool g_bigint_count
const std::shared_ptr< Analyzer::Estimator > estimator
static const int32_t ERR_OUT_OF_GPU_MEM
Definition: Execute.h:1005
std::vector< int8_t > serializeLiterals(const std::unordered_map< int, CgenState::LiteralValues > &literals, const int device_id)
Definition: Execute.cpp:326
std::unique_ptr< RenderAllocatorMap > render_allocator_map_ptr
Definition: RenderInfo.h:32
#define CHECK(condition)
Definition: Logger.h:187
MetaTypeClassHandler< SPECIALIZED_HANDLER, Geometry > GeoVsNonGeoClassHandler
const int8_t const int64_t const uint64_t const int32_t const int64_t int64_t ** out
std::unique_ptr< ResultSet > estimator_result_set_
bool interrupted_
Definition: Execute.h:968
unsigned blockSize() const
Definition: Execute.cpp:2839
+ Here is the call graph for this function:

◆ executeSimpleInsert()

void Executor::executeSimpleInsert ( const Planner::RootPlan root_plan)
private

Definition at line 2453 of file Execute.cpp.

References Importer_NS::appendDatum(), DataBlockPtr::arraysPtr, anonymous_namespace{ExecuteTest.cpp}::c(), CHECK, CHECK_EQ, checked_malloc(), Fragmenter_Namespace::InsertData::columnIds, Fragmenter_Namespace::InsertData::data, Fragmenter_Namespace::InsertData::databaseId, get_column_descriptor(), SQLTypeInfoCore< TYPE_FACET_PACK >::get_compression(), SQLTypeInfoCore< TYPE_FACET_PACK >::get_elem_type(), Planner::RootPlan::get_plan(), Planner::RootPlan::get_result_col_list(), Planner::RootPlan::get_result_table_id(), SQLTypeInfoCore< TYPE_FACET_PACK >::get_size(), Planner::RootPlan::getCatalog(), Catalog_Namespace::Catalog::getMetadataForTable(), inline_fixed_encoding_null_val(), anonymous_namespace{Execute.cpp}::insert_one_dict_str(), anonymous_namespace{TypedDataAccessors.h}::is_null(), SQLTypeInfoCore< TYPE_FACET_PACK >::is_string(), kARRAY, kBIGINT, kBOOLEAN, kCAST, kCHAR, kDATE, kDECIMAL, kDOUBLE, kENCODING_DICT, kENCODING_NONE, kFLOAT, kINT, kLINESTRING, kMULTIPOLYGON, kNUMERIC, kPOINT, kPOLYGON, kSMALLINT, kTEXT, kTIME, kTIMESTAMP, kTINYINT, kVARCHAR, DataBlockPtr::numbersPtr, Fragmenter_Namespace::InsertData::numRows, anonymous_namespace{TypedDataAccessors.h}::put_null(), anonymous_namespace{TypedDataAccessors.h}::put_null_array(), row_set_mem_owner_, SHARD_FOR_KEY, DataBlockPtr::stringsPtr, Fragmenter_Namespace::InsertData::tableId, and to_string().

2453  {
2454  const auto plan = root_plan->get_plan();
2455  CHECK(plan);
2456  const auto values_plan = dynamic_cast<const Planner::ValuesScan*>(plan);
2457  if (!values_plan) {
2458  throw std::runtime_error(
2459  "Only simple INSERT of immediate tuples is currently supported");
2460  }
2461  row_set_mem_owner_ = std::make_shared<RowSetMemoryOwner>();
2462  const auto& targets = values_plan->get_targetlist();
2463  const int table_id = root_plan->get_result_table_id();
2464  const auto& col_id_list = root_plan->get_result_col_list();
2465  std::vector<const ColumnDescriptor*> col_descriptors;
2466  std::vector<int> col_ids;
2467  std::unordered_map<int, std::unique_ptr<uint8_t[]>> col_buffers;
2468  std::unordered_map<int, std::vector<std::string>> str_col_buffers;
2469  std::unordered_map<int, std::vector<ArrayDatum>> arr_col_buffers;
2470  auto& cat = root_plan->getCatalog();
2471  const auto table_descriptor = cat.getMetadataForTable(table_id);
2472  const auto shard_tables = cat.getPhysicalTablesDescriptors(table_descriptor);
2473  const TableDescriptor* shard{nullptr};
2474  for (const int col_id : col_id_list) {
2475  const auto cd = get_column_descriptor(col_id, table_id, cat);
2476  const auto col_enc = cd->columnType.get_compression();
2477  if (cd->columnType.is_string()) {
2478  switch (col_enc) {
2479  case kENCODING_NONE: {
2480  auto it_ok =
2481  str_col_buffers.insert(std::make_pair(col_id, std::vector<std::string>{}));
2482  CHECK(it_ok.second);
2483  break;
2484  }
2485  case kENCODING_DICT: {
2486  const auto dd = cat.getMetadataForDict(cd->columnType.get_comp_param());
2487  CHECK(dd);
2488  const auto it_ok = col_buffers.emplace(
2489  col_id, std::unique_ptr<uint8_t[]>(new uint8_t[cd->columnType.get_size()]));
2490  CHECK(it_ok.second);
2491  break;
2492  }
2493  default:
2494  CHECK(false);
2495  }
2496  } else if (cd->columnType.is_geometry()) {
2497  auto it_ok =
2498  str_col_buffers.insert(std::make_pair(col_id, std::vector<std::string>{}));
2499  CHECK(it_ok.second);
2500  } else if (cd->columnType.is_array()) {
2501  auto it_ok =
2502  arr_col_buffers.insert(std::make_pair(col_id, std::vector<ArrayDatum>{}));
2503  CHECK(it_ok.second);
2504  } else {
2505  const auto it_ok = col_buffers.emplace(
2506  col_id,
2507  std::unique_ptr<uint8_t[]>(
2508  new uint8_t[cd->columnType.get_logical_size()]())); // changed to zero-init
2509  // the buffer
2510  CHECK(it_ok.second);
2511  }
2512  col_descriptors.push_back(cd);
2513  col_ids.push_back(col_id);
2514  }
2515  size_t col_idx = 0;
2517  insert_data.databaseId = cat.getCurrentDB().dbId;
2518  insert_data.tableId = table_id;
2519  int64_t int_col_val{0};
2520  for (auto target_entry : targets) {
2521  auto col_cv = dynamic_cast<const Analyzer::Constant*>(target_entry->get_expr());
2522  if (!col_cv) {
2523  auto col_cast = dynamic_cast<const Analyzer::UOper*>(target_entry->get_expr());
2524  CHECK(col_cast);
2525  CHECK_EQ(kCAST, col_cast->get_optype());
2526  col_cv = dynamic_cast<const Analyzer::Constant*>(col_cast->get_operand());
2527  }
2528  CHECK(col_cv);
2529  const auto cd = col_descriptors[col_idx];
2530  auto col_datum = col_cv->get_constval();
2531  auto col_type = cd->columnType.get_type();
2532  uint8_t* col_data_bytes{nullptr};
2533  if (!cd->columnType.is_array() && !cd->columnType.is_geometry() &&
2534  (!cd->columnType.is_string() ||
2535  cd->columnType.get_compression() == kENCODING_DICT)) {
2536  const auto col_data_bytes_it = col_buffers.find(col_ids[col_idx]);
2537  CHECK(col_data_bytes_it != col_buffers.end());
2538  col_data_bytes = col_data_bytes_it->second.get();
2539  }
2540  switch (col_type) {
2541  case kBOOLEAN: {
2542  auto col_data = col_data_bytes;
2543  *col_data = col_cv->get_is_null() ? inline_fixed_encoding_null_val(cd->columnType)
2544  : (col_datum.boolval ? 1 : 0);
2545  break;
2546  }
2547  case kTINYINT: {
2548  auto col_data = reinterpret_cast<int8_t*>(col_data_bytes);
2549  *col_data = col_cv->get_is_null() ? inline_fixed_encoding_null_val(cd->columnType)
2550  : col_datum.tinyintval;
2551  int_col_val = col_datum.tinyintval;
2552  break;
2553  }
2554  case kSMALLINT: {
2555  auto col_data = reinterpret_cast<int16_t*>(col_data_bytes);
2556  *col_data = col_cv->get_is_null() ? inline_fixed_encoding_null_val(cd->columnType)
2557  : col_datum.smallintval;
2558  int_col_val = col_datum.smallintval;
2559  break;
2560  }
2561  case kINT: {
2562  auto col_data = reinterpret_cast<int32_t*>(col_data_bytes);
2563  *col_data = col_cv->get_is_null() ? inline_fixed_encoding_null_val(cd->columnType)
2564  : col_datum.intval;
2565  int_col_val = col_datum.intval;
2566  break;
2567  }
2568  case kBIGINT:
2569  case kDECIMAL:
2570  case kNUMERIC: {
2571  auto col_data = reinterpret_cast<int64_t*>(col_data_bytes);
2572  *col_data = col_cv->get_is_null() ? inline_fixed_encoding_null_val(cd->columnType)
2573  : col_datum.bigintval;
2574  int_col_val = col_datum.bigintval;
2575  break;
2576  }
2577  case kFLOAT: {
2578  auto col_data = reinterpret_cast<float*>(col_data_bytes);
2579  *col_data = col_datum.floatval;
2580  break;
2581  }
2582  case kDOUBLE: {
2583  auto col_data = reinterpret_cast<double*>(col_data_bytes);
2584  *col_data = col_datum.doubleval;
2585  break;
2586  }
2587  case kTEXT:
2588  case kVARCHAR:
2589  case kCHAR: {
2590  switch (cd->columnType.get_compression()) {
2591  case kENCODING_NONE:
2592  str_col_buffers[col_ids[col_idx]].push_back(
2593  col_datum.stringval ? *col_datum.stringval : "");
2594  break;
2595  case kENCODING_DICT: {
2596  switch (cd->columnType.get_size()) {
2597  case 1:
2598  int_col_val = insert_one_dict_str(
2599  reinterpret_cast<uint8_t*>(col_data_bytes), cd, col_cv, cat);
2600  break;
2601  case 2:
2602  int_col_val = insert_one_dict_str(
2603  reinterpret_cast<uint16_t*>(col_data_bytes), cd, col_cv, cat);
2604  break;
2605  case 4:
2606  int_col_val = insert_one_dict_str(
2607  reinterpret_cast<int32_t*>(col_data_bytes), cd, col_cv, cat);
2608  break;
2609  default:
2610  CHECK(false);
2611  }
2612  break;
2613  }
2614  default:
2615  CHECK(false);
2616  }
2617  break;
2618  }
2619  case kTIME:
2620  case kTIMESTAMP:
2621  case kDATE: {
2622  auto col_data = reinterpret_cast<int64_t*>(col_data_bytes);
2623  *col_data = col_cv->get_is_null() ? inline_fixed_encoding_null_val(cd->columnType)
2624  : col_datum.bigintval;
2625  break;
2626  }
2627  case kARRAY: {
2628  const auto is_null = col_cv->get_is_null();
2629  const auto size = cd->columnType.get_size();
2630  const SQLTypeInfo elem_ti = cd->columnType.get_elem_type();
2631  if (is_null) {
2632  if (size > 0) {
2633  // NULL fixlen array: NULL_ARRAY sentinel followed by NULL sentinels
2634  if (elem_ti.is_string() && elem_ti.get_compression() == kENCODING_DICT) {
2635  throw std::runtime_error("Column " + cd->columnName +
2636  " doesn't accept NULL values");
2637  }
2638  int8_t* buf = (int8_t*)checked_malloc(size);
2639  put_null_array(static_cast<void*>(buf), elem_ti, "");
2640  for (int8_t* p = buf + elem_ti.get_size(); (p - buf) < size;
2641  p += elem_ti.get_size()) {
2642  put_null(static_cast<void*>(p), elem_ti, "");
2643  }
2644  arr_col_buffers[col_ids[col_idx]].emplace_back(size, buf, is_null);
2645  } else {
2646  arr_col_buffers[col_ids[col_idx]].emplace_back(0, nullptr, is_null);
2647  }
2648  break;
2649  }
2650  const auto l = col_cv->get_value_list();
2651  size_t len = l.size() * elem_ti.get_size();
2652  if (size > 0 && static_cast<size_t>(size) != len) {
2653  throw std::runtime_error("Array column " + cd->columnName + " expects " +
2654  std::to_string(size / elem_ti.get_size()) +
2655  " values, " + "received " + std::to_string(l.size()));
2656  }
2657  if (elem_ti.is_string()) {
2658  CHECK(kENCODING_DICT == elem_ti.get_compression());
2659  CHECK(4 == elem_ti.get_size());
2660 
2661  int8_t* buf = (int8_t*)checked_malloc(len);
2662  int32_t* p = reinterpret_cast<int32_t*>(buf);
2663 
2664  int elemIndex = 0;
2665  for (auto& e : l) {
2666  auto c = std::dynamic_pointer_cast<Analyzer::Constant>(e);
2667  CHECK(c);
2668 
2669  int_col_val =
2670  insert_one_dict_str(&p[elemIndex], cd->columnName, elem_ti, c.get(), cat);
2671 
2672  elemIndex++;
2673  }
2674  arr_col_buffers[col_ids[col_idx]].push_back(ArrayDatum(len, buf, is_null));
2675 
2676  } else {
2677  int8_t* buf = (int8_t*)checked_malloc(len);
2678  int8_t* p = buf;
2679  for (auto& e : l) {
2680  auto c = std::dynamic_pointer_cast<Analyzer::Constant>(e);
2681  CHECK(c);
2682  p = Importer_NS::appendDatum(p, c->get_constval(), elem_ti);
2683  }
2684  arr_col_buffers[col_ids[col_idx]].push_back(ArrayDatum(len, buf, is_null));
2685  }
2686  break;
2687  }
2688  case kPOINT:
2689  case kLINESTRING:
2690  case kPOLYGON:
2691  case kMULTIPOLYGON:
2692  str_col_buffers[col_ids[col_idx]].push_back(
2693  col_datum.stringval ? *col_datum.stringval : "");
2694  break;
2695  default:
2696  CHECK(false);
2697  }
2698  ++col_idx;
2699  if (col_idx == static_cast<size_t>(table_descriptor->shardedColumnId)) {
2700  const auto shard_count = shard_tables.size();
2701  const size_t shard_idx = SHARD_FOR_KEY(int_col_val, shard_count);
2702  shard = shard_tables[shard_idx];
2703  }
2704  }
2705  for (const auto& kv : col_buffers) {
2706  insert_data.columnIds.push_back(kv.first);
2707  DataBlockPtr p;
2708  p.numbersPtr = reinterpret_cast<int8_t*>(kv.second.get());
2709  insert_data.data.push_back(p);
2710  }
2711  for (auto& kv : str_col_buffers) {
2712  insert_data.columnIds.push_back(kv.first);
2713  DataBlockPtr p;
2714  p.stringsPtr = &kv.second;
2715  insert_data.data.push_back(p);
2716  }
2717  for (auto& kv : arr_col_buffers) {
2718  insert_data.columnIds.push_back(kv.first);
2719  DataBlockPtr p;
2720  p.arraysPtr = &kv.second;
2721  insert_data.data.push_back(p);
2722  }
2723  insert_data.numRows = 1;
2724  if (shard) {
2725  shard->fragmenter->insertData(insert_data);
2726  } else {
2727  table_descriptor->fragmenter->insertData(insert_data);
2728  }
2729 }
#define CHECK_EQ(x, y)
Definition: Logger.h:195
HOST DEVICE int get_size() const
Definition: sqltypes.h:333
Definition: sqltypes.h:51
std::vector< std::string > * stringsPtr
Definition: sqltypes.h:138
int get_result_table_id() const
Definition: Planner.h:291
std::vector< ArrayDatum > * arraysPtr
Definition: sqltypes.h:139
const TableDescriptor * getMetadataForTable(const std::string &tableName, const bool populateFragmenter=true) const
Returns a pointer to a const TableDescriptor struct matching the provided tableName.
int64_t insert_one_dict_str(T *col_data, const ColumnDescriptor *cd, const Analyzer::Constant *col_cv, const Catalog_Namespace::Catalog &catalog)
Definition: Execute.cpp:2438
void c(const std::string &query_string, const ExecutorDeviceType device_type)
Definition: sqldefs.h:49
HOST DEVICE EncodingType get_compression() const
Definition: sqltypes.h:331
std::string to_string(char const *&&v)
int tableId
identifies the database into which the data is being inserted
Definition: Fragmenter.h:61
size_t numRows
a vector of column ids for the row(s) being inserted
Definition: Fragmenter.h:63
int8_t * appendDatum(int8_t *buf, Datum d, const SQLTypeInfo &ti)
Definition: Importer.cpp:323
const Catalog_Namespace::Catalog & getCatalog() const
Definition: Planner.h:293
void * checked_malloc(const size_t size)
Definition: checked_alloc.h:40
std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner_
Definition: Execute.h:960
void put_null_array(void *ndptr, const SQLTypeInfo &ntype, const std::string col_name)
SQLTypeInfoCore get_elem_type() const
Definition: sqltypes.h:632
void put_null(void *ndptr, const SQLTypeInfo &ntype, const std::string col_name)
Definition: sqltypes.h:54
Definition: sqltypes.h:55
const Plan * get_plan() const
Definition: Planner.h:289
bool is_null(const T &v, const SQLTypeInfo &t)
std::vector< DataBlockPtr > data
the number of rows being inserted
Definition: Fragmenter.h:64
Definition: sqltypes.h:43
#define CHECK(condition)
Definition: Logger.h:187
int64_t inline_fixed_encoding_null_val(const SQL_TYPE_INFO &ti)
const std::list< int > & get_result_col_list() const
Definition: Planner.h:292
The data to be inserted using the fragment manager.
Definition: Fragmenter.h:59
Definition: sqltypes.h:47
specifies the content in-memory of a row in the table metadata table
int8_t * numbersPtr
Definition: sqltypes.h:137
std::vector< int > columnIds
identifies the table into which the data is being inserted
Definition: Fragmenter.h:62
bool is_string() const
Definition: sqltypes.h:450
#define SHARD_FOR_KEY(key, num_shards)
Definition: shard_key.h:20
std::conditional_t< isCudaCC(), DeviceArrayDatum, HostArrayDatum > ArrayDatum
Definition: sqltypes.h:119
const ColumnDescriptor * get_column_descriptor(const int col_id, const int table_id, const Catalog_Namespace::Catalog &cat)
Definition: Execute.h:139
+ Here is the call graph for this function:

◆ executeUpdate()

void Executor::executeUpdate ( const RelAlgExecutionUnit ra_exe_unit,
const InputTableInfo table_info,
const CompilationOptions co,
const ExecutionOptions eo,
const Catalog_Namespace::Catalog cat,
std::shared_ptr< RowSetMemoryOwner row_set_mem_owner,
const UpdateLogForFragment::Callback cb 
)
private

Definition at line 60 of file ExecuteUpdate.cpp.

References CHECK, CHECK_EQ, Executor::ExecutionDispatch::compile(), UpdateLogForFragment::count(), create_count_all_execution_unit(), CompilationOptions::device_type_, Fragmenter_Namespace::TableInfo::fragments, g_bigint_count, InputTableInfo::info, kBIGINT, kCOUNT, KernelPerFragment, kINT, and Executor::ExecutionDispatch::run().

66  {
67  CHECK(cb);
68  const auto ra_exe_unit = addDeletedColumn(ra_exe_unit_in);
69  ColumnCacheMap column_cache;
70 
71  const auto count =
72  makeExpr<Analyzer::AggExpr>(SQLTypeInfo(g_bigint_count ? kBIGINT : kINT, false),
73  kCOUNT,
74  nullptr,
75  false,
76  nullptr);
77  const auto count_all_exe_unit = create_count_all_execution_unit(ra_exe_unit, count);
78 
79  std::vector<InputTableInfo> table_infos{table_info};
80  ExecutionDispatch execution_dispatch(
81  this, count_all_exe_unit, table_infos, cat, row_set_mem_owner, nullptr);
82  ColumnFetcher column_fetcher(this, column_cache);
83  const auto execution_descriptors =
84  execution_dispatch.compile(0, 8, co, eo, column_fetcher, false);
85  CHECK_EQ(size_t(1), ra_exe_unit.input_descs.size());
86  const auto table_id = ra_exe_unit.input_descs[0].getTableId();
87  const auto& outer_fragments = table_info.info.fragments;
88  for (size_t fragment_index = 0; fragment_index < outer_fragments.size();
89  ++fragment_index) {
90  // We may want to consider in the future allowing this to execute on devices other
91  // than CPU
92  execution_dispatch.run(
93  co.device_type_,
94  0,
95  eo,
96  column_fetcher,
97  *std::get<QueryCompilationDescriptorOwned>(execution_descriptors),
98  *std::get<QueryMemoryDescriptorOwned>(execution_descriptors),
99  {{table_id, {fragment_index}}},
101  -1);
102  }
103  // Further optimization possible here to skip fragments
104  CHECK_EQ(outer_fragments.size(), execution_dispatch.getFragmentResults().size());
105  // There could be benefit to multithread this once we see where the bottle necks really
106  // are
107  for (size_t fragment_index = 0; fragment_index < outer_fragments.size();
108  ++fragment_index) {
109  const auto& fragment_results =
110  execution_dispatch.getFragmentResults()[fragment_index];
111  const auto count_result_set = fragment_results.first;
112  CHECK(count_result_set);
113  const auto count_row = count_result_set->getNextRow(false, false);
114  CHECK_EQ(size_t(1), count_row.size());
115  const auto& count_tv = count_row.front();
116  const auto count_scalar_tv = boost::get<ScalarTargetValue>(&count_tv);
117  CHECK(count_scalar_tv);
118  const auto count_ptr = boost::get<int64_t>(count_scalar_tv);
119  CHECK(count_ptr);
120  ExecutionDispatch current_fragment_execution_dispatch(
121  this, ra_exe_unit, table_infos, cat, row_set_mem_owner, nullptr);
122  const auto execution_descriptors = current_fragment_execution_dispatch.compile(
123  *count_ptr, 8, co, eo, column_fetcher, false);
124  // We may want to consider in the future allowing this to execute on devices other
125  // than CPU
126  current_fragment_execution_dispatch.run(
127  co.device_type_,
128  0,
129  eo,
130  column_fetcher,
131  *std::get<QueryCompilationDescriptorOwned>(execution_descriptors),
132  *std::get<QueryMemoryDescriptorOwned>(execution_descriptors),
133  {FragmentsPerTable{table_id, {fragment_index}}},
135  -1);
136  const auto& proj_fragment_results =
137  current_fragment_execution_dispatch.getFragmentResults();
138  if (proj_fragment_results.empty()) {
139  continue;
140  }
141  const auto& proj_fragment_result = proj_fragment_results[0];
142  const auto proj_result_set = proj_fragment_result.first;
143  CHECK(proj_result_set);
144  cb({outer_fragments[fragment_index], fragment_index, proj_result_set});
145  }
146 }
#define CHECK_EQ(x, y)
Definition: Logger.h:195
Fragmenter_Namespace::TableInfo info
Definition: InputMetadata.h:35
RelAlgExecutionUnit addDeletedColumn(const RelAlgExecutionUnit &ra_exe_unit)
Definition: Execute.cpp:2905
std::deque< FragmentInfo > fragments
Definition: Fragmenter.h:167
const std::vector< InputDescriptor > input_descs
RelAlgExecutionUnit create_count_all_execution_unit(const RelAlgExecutionUnit &ra_exe_unit, std::shared_ptr< Analyzer::Expr > replacement_target)
bool g_bigint_count
SQLTypeInfoCore< ArrayContextTypeSizer, ExecutorTypePackaging, DateTimeFacilities > SQLTypeInfo
Definition: sqltypes.h:823
ExecutorDeviceType device_type_
Definition: sqldefs.h:71
#define CHECK(condition)
Definition: Logger.h:187
std::unordered_map< int, std::unordered_map< int, std::shared_ptr< const ColumnarResults > > > ColumnCacheMap
Definition: sqltypes.h:47
+ Here is the call graph for this function:

◆ executeWorkUnit()

ResultSetPtr Executor::executeWorkUnit ( size_t &  max_groups_buffer_entry_guess,
const bool  is_agg,
const std::vector< InputTableInfo > &  query_infos,
const RelAlgExecutionUnit ra_exe_unit_in,
const CompilationOptions co,
const ExecutionOptions options,
const Catalog_Namespace::Catalog cat,
std::shared_ptr< RowSetMemoryOwner row_set_mem_owner,
RenderInfo render_info,
const bool  has_cardinality_estimation,
ColumnCacheMap column_cache 
)
private

Definition at line 1096 of file Execute.cpp.

References executeWorkUnitImpl(), CompilationRetryNewScanLimit::new_scan_limit_, and anonymous_namespace{Execute.cpp}::replace_scan_limit().

1107  {
1108  try {
1109  return executeWorkUnitImpl(max_groups_buffer_entry_guess,
1110  is_agg,
1111  true,
1112  query_infos,
1113  ra_exe_unit_in,
1114  co,
1115  eo,
1116  cat,
1117  row_set_mem_owner,
1118  render_info,
1119  has_cardinality_estimation,
1120  column_cache);
1121  } catch (const CompilationRetryNewScanLimit& e) {
1122  return executeWorkUnitImpl(max_groups_buffer_entry_guess,
1123  is_agg,
1124  false,
1125  query_infos,
1126  replace_scan_limit(ra_exe_unit_in, e.new_scan_limit_),
1127  co,
1128  eo,
1129  cat,
1130  row_set_mem_owner,
1131  render_info,
1132  has_cardinality_estimation,
1133  column_cache);
1134  }
1135 }
RelAlgExecutionUnit replace_scan_limit(const RelAlgExecutionUnit &ra_exe_unit_in, const size_t new_scan_limit)
Definition: Execute.cpp:1078
ResultSetPtr executeWorkUnitImpl(size_t &max_groups_buffer_entry_guess, const bool is_agg, const bool allow_single_frag_table_opt, const std::vector< InputTableInfo > &, const RelAlgExecutionUnit &, const CompilationOptions &, const ExecutionOptions &options, const Catalog_Namespace::Catalog &, std::shared_ptr< RowSetMemoryOwner >, RenderInfo *render_info, const bool has_cardinality_estimation, ColumnCacheMap &column_cache)
Definition: Execute.cpp:1137
+ Here is the call graph for this function:

◆ executeWorkUnitImpl()

ResultSetPtr Executor::executeWorkUnitImpl ( size_t &  max_groups_buffer_entry_guess,
const bool  is_agg,
const bool  allow_single_frag_table_opt,
const std::vector< InputTableInfo > &  query_infos,
const RelAlgExecutionUnit ra_exe_unit_in,
const CompilationOptions co,
const ExecutionOptions options,
const Catalog_Namespace::Catalog cat,
std::shared_ptr< RowSetMemoryOwner row_set_mem_owner,
RenderInfo render_info,
const bool  has_cardinality_estimation,
ColumnCacheMap column_cache 
)
private

Definition at line 1137 of file Execute.cpp.

References addDeletedColumn(), CHECK, collectAllDeviceResults(), Executor::ExecutionDispatch::compile(), anonymous_namespace{Execute.cpp}::compute_buffer_entry_guess(), CPU, cpu_threads(), CompilationOptions::device_type_, dispatchFragments(), ERR_INTERRUPTED, ERR_OUT_OF_SLOTS, ERR_OUT_OF_TIME, ERR_OVERFLOW_OR_UNDERFLOW, executeExplain(), CompilationOptions::explain_type_, Data_Namespace::DataMgr::freeAllBuffers(), get_available_gpus(), get_context_count(), get_min_byte_width(), Catalog_Namespace::Catalog::getDataMgr(), getDeviceTypeForTargets(), QueryExecutionError::getErrorCode(), Data_Namespace::DataMgr::getMemoryInfo(), GPU, ExecutionOptions::gpu_input_mem_limit_percent, Data_Namespace::GPU_LEVEL, CompilationOptions::hoist_literals_, INJECT_TIMER, interrupted_, ExecutionOptions::just_explain, ExecutionOptions::just_validate, MAX_BYTE_WIDTH_SUPPORTED, CompilationOptions::opt_level_, plan_state_, QueryMemoryDescriptor, CompilationOptions::register_intel_jit_listener_, resultsUnion(), Executor::ExecutionDispatch::run(), ExecutionOptions::with_dynamic_watchdog, and CompilationOptions::with_dynamic_watchdog_.

Referenced by executeWorkUnit().

1149  {
1150  INJECT_TIMER(Exec_executeWorkUnit);
1151  const auto ra_exe_unit = addDeletedColumn(ra_exe_unit_in);
1152  const auto device_type = getDeviceTypeForTargets(ra_exe_unit, co.device_type_);
1153  CHECK(!query_infos.empty());
1154  if (!max_groups_buffer_entry_guess) {
1155  // The query has failed the first execution attempt because of running out
1156  // of group by slots. Make the conservative choice: allocate fragment size
1157  // slots and run on the CPU.
1158  CHECK(device_type == ExecutorDeviceType::CPU);
1159  max_groups_buffer_entry_guess = compute_buffer_entry_guess(query_infos);
1160  }
1161 
1162  int8_t crt_min_byte_width{get_min_byte_width()};
1163  do {
1164  ExecutionDispatch execution_dispatch(
1165  this, ra_exe_unit, query_infos, cat, row_set_mem_owner, render_info);
1166  ColumnFetcher column_fetcher(this, column_cache);
1167  std::unique_ptr<QueryCompilationDescriptor> query_comp_desc_owned;
1168  std::unique_ptr<QueryMemoryDescriptor> query_mem_desc_owned;
1169  try {
1170  INJECT_TIMER(execution_dispatch_comp);
1171  std::tie(query_comp_desc_owned, query_mem_desc_owned) =
1172  execution_dispatch.compile(max_groups_buffer_entry_guess,
1173  crt_min_byte_width,
1174  {device_type,
1175  co.hoist_literals_,
1176  co.opt_level_,
1178  co.explain_type_,
1180  eo,
1181  column_fetcher,
1182  has_cardinality_estimation);
1183  CHECK(query_comp_desc_owned);
1184  crt_min_byte_width = query_comp_desc_owned->getMinByteWidth();
1185  } catch (CompilationRetryNoCompaction&) {
1186  crt_min_byte_width = MAX_BYTE_WIDTH_SUPPORTED;
1187  continue;
1188  }
1189  if (eo.just_explain) {
1190  return executeExplain(*query_comp_desc_owned);
1191  }
1192 
1193  for (const auto target_expr : ra_exe_unit.target_exprs) {
1194  plan_state_->target_exprs_.push_back(target_expr);
1195  }
1196 
1197  auto dispatch = [&execution_dispatch, &column_fetcher, &eo](
1198  const ExecutorDeviceType chosen_device_type,
1199  int chosen_device_id,
1200  const QueryCompilationDescriptor& query_comp_desc,
1201  const QueryMemoryDescriptor& query_mem_desc,
1202  const FragmentsList& frag_list,
1203  const ExecutorDispatchMode kernel_dispatch_mode,
1204  const int64_t rowid_lookup_key) {
1205  INJECT_TIMER(execution_dispatch_run);
1206  execution_dispatch.run(chosen_device_type,
1207  chosen_device_id,
1208  eo,
1209  column_fetcher,
1210  query_comp_desc,
1211  query_mem_desc,
1212  frag_list,
1213  kernel_dispatch_mode,
1214  rowid_lookup_key);
1215  };
1216 
1217  QueryFragmentDescriptor fragment_descriptor(
1218  ra_exe_unit,
1219  query_infos,
1220  query_comp_desc_owned->getDeviceType() == ExecutorDeviceType::GPU
1222  : std::vector<Data_Namespace::MemoryInfo>{},
1223  eo.gpu_input_mem_limit_percent);
1224 
1225  if (!eo.just_validate) {
1226  int available_cpus = cpu_threads();
1227  auto available_gpus = get_available_gpus(cat);
1228 
1229  const auto context_count =
1230  get_context_count(device_type, available_cpus, available_gpus.size());
1231  try {
1232  dispatchFragments(dispatch,
1233  execution_dispatch,
1234  query_infos,
1235  eo,
1236  is_agg,
1237  allow_single_frag_table_opt,
1238  context_count,
1239  *query_comp_desc_owned,
1240  *query_mem_desc_owned,
1241  fragment_descriptor,
1242  available_gpus,
1243  available_cpus);
1244  } catch (QueryExecutionError& e) {
1245  if (eo.with_dynamic_watchdog && interrupted_ &&
1246  e.getErrorCode() == ERR_OUT_OF_TIME) {
1248  }
1249  cat.getDataMgr().freeAllBuffers();
1251  static_cast<size_t>(crt_min_byte_width << 1) <= sizeof(int64_t)) {
1252  crt_min_byte_width <<= 1;
1253  continue;
1254  }
1255  throw;
1256  }
1257  }
1258  cat.getDataMgr().freeAllBuffers();
1259  if (is_agg) {
1260  try {
1261  return collectAllDeviceResults(execution_dispatch,
1262  ra_exe_unit.target_exprs,
1263  *query_mem_desc_owned,
1264  query_comp_desc_owned->getDeviceType(),
1265  row_set_mem_owner);
1266  } catch (ReductionRanOutOfSlots&) {
1268  } catch (OverflowOrUnderflow&) {
1269  crt_min_byte_width <<= 1;
1270  continue;
1271  }
1272  }
1273  return resultsUnion(execution_dispatch);
1274 
1275  } while (static_cast<size_t>(crt_min_byte_width) <= sizeof(int64_t));
1276 
1277  return std::make_shared<ResultSet>(std::vector<TargetInfo>{},
1280  nullptr,
1281  this);
1282 }
int32_t getErrorCode() const
Definition: ErrorHandling.h:55
static const int32_t ERR_INTERRUPTED
Definition: Execute.h:1013
void dispatchFragments(const std::function< void(const ExecutorDeviceType chosen_device_type, int chosen_device_id, const QueryCompilationDescriptor &query_comp_desc, const QueryMemoryDescriptor &query_mem_desc, const FragmentsList &frag_list, const ExecutorDispatchMode kernel_dispatch_mode, const int64_t rowid_lookup_key)> dispatch, const ExecutionDispatch &execution_dispatch, const std::vector< InputTableInfo > &table_infos, const ExecutionOptions &eo, const bool is_agg, const bool allow_single_frag_table_opt, const size_t context_count, const QueryCompilationDescriptor &query_comp_desc, const QueryMemoryDescriptor &query_mem_desc, QueryFragmentDescriptor &fragment_descriptor, std::unordered_set< int > &available_gpus, int &available_cpus)
Definition: Execute.cpp:1566
ExecutorDeviceType
Data_Namespace::DataMgr & getDataMgr() const
Definition: Catalog.h:177
std::unordered_set< int > get_available_gpus(const Catalog_Namespace::Catalog &cat)
Definition: Execute.cpp:934
const ExecutorOptLevel opt_level_
RelAlgExecutionUnit addDeletedColumn(const RelAlgExecutionUnit &ra_exe_unit)
Definition: Execute.cpp:2905
ResultSetPtr collectAllDeviceResults(ExecutionDispatch &execution_dispatch, const std::vector< Analyzer::Expr *> &target_exprs, const QueryMemoryDescriptor &query_mem_desc, const ExecutorDeviceType device_type, std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner)
Definition: Execute.cpp:1464
size_t compute_buffer_entry_guess(const std::vector< InputTableInfo > &query_infos)
Definition: Execute.cpp:958
std::vector< FragmentsPerTable > FragmentsList
int8_t get_min_byte_width()
ExecutorDispatchMode
size_t get_context_count(const ExecutorDeviceType device_type, const size_t cpu_count, const size_t gpu_count)
Definition: Execute.cpp:946
#define INJECT_TIMER(DESC)
Definition: measure.h:91
friend class QueryMemoryDescriptor
Definition: Execute.h:1024
static const int32_t ERR_OVERFLOW_OR_UNDERFLOW
Definition: Execute.h:1010
std::unique_ptr< PlanState > plan_state_
Definition: Execute.h:959
static const int32_t ERR_OUT_OF_TIME
Definition: Execute.h:1012
const bool register_intel_jit_listener_
std::vector< MemoryInfo > getMemoryInfo(const MemoryLevel memLevel)
Definition: DataMgr.cpp:176
static ResultSetPtr resultsUnion(ExecutionDispatch &execution_dispatch)
Definition: Execute.cpp:798
ExecutorDeviceType device_type_
const ExecutorExplainType explain_type_
#define CHECK(condition)
Definition: Logger.h:187
static const int32_t ERR_OUT_OF_SLOTS
Definition: Execute.h:1006
constexpr int8_t MAX_BYTE_WIDTH_SUPPORTED
ExecutorDeviceType getDeviceTypeForTargets(const RelAlgExecutionUnit &ra_exe_unit, const ExecutorDeviceType requested_device_type)
Definition: Execute.cpp:1335
const bool with_dynamic_watchdog_
int cpu_threads()
Definition: thread_count.h:23
bool interrupted_
Definition: Execute.h:968
ResultSetPtr executeExplain(const QueryCompilationDescriptor &)
Definition: Execute.cpp:1329
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ executeWorkUnitPerFragment()

void Executor::executeWorkUnitPerFragment ( const RelAlgExecutionUnit ra_exe_unit,
const InputTableInfo table_info,
const CompilationOptions co,
const ExecutionOptions eo,
const Catalog_Namespace::Catalog cat,
PerFragmentCB cb 
)
private

Compiles and dispatches a work unit per fragment processing results with the per fragment callback. Currently used for computing metrics over fragments (metadata).

Definition at line 1284 of file Execute.cpp.

References addDeletedColumn(), CHECK_EQ, Executor::ExecutionDispatch::compile(), CompilationOptions::device_type_, Fragmenter_Namespace::TableInfo::fragments, InputTableInfo::info, KernelPerFragment, row_set_mem_owner_, and Executor::ExecutionDispatch::run().

1289  {
1290  const auto ra_exe_unit = addDeletedColumn(ra_exe_unit_in);
1291  ColumnCacheMap column_cache;
1292 
1293  std::vector<InputTableInfo> table_infos{table_info};
1294  // TODO(adb): ensure this is under a try / catch
1295  ExecutionDispatch execution_dispatch(
1296  this, ra_exe_unit, table_infos, cat, row_set_mem_owner_, nullptr);
1297  ColumnFetcher column_fetcher(this, column_cache);
1298  std::unique_ptr<QueryCompilationDescriptor> query_comp_desc_owned;
1299  std::unique_ptr<QueryMemoryDescriptor> query_mem_desc_owned;
1300  std::tie(query_comp_desc_owned, query_mem_desc_owned) =
1301  execution_dispatch.compile(0, 8, co, eo, column_fetcher, false);
1302  CHECK_EQ(size_t(1), ra_exe_unit.input_descs.size());
1303  const auto table_id = ra_exe_unit.input_descs[0].getTableId();
1304  const auto& outer_fragments = table_info.info.fragments;
1305  for (size_t fragment_index = 0; fragment_index < outer_fragments.size();
1306  ++fragment_index) {
1307  // We may want to consider in the future allowing this to execute on devices other
1308  // than CPU
1309  execution_dispatch.run(co.device_type_,
1310  0,
1311  eo,
1312  column_fetcher,
1313  *query_comp_desc_owned,
1314  *query_mem_desc_owned,
1315  {{table_id, {fragment_index}}},
1317  -1);
1318  }
1319 
1320  const auto& all_fragment_results = execution_dispatch.getFragmentResults();
1321 
1322  for (size_t fragment_index = 0; fragment_index < outer_fragments.size();
1323  ++fragment_index) {
1324  const auto fragment_results = all_fragment_results[fragment_index];
1325  cb(fragment_results.first, outer_fragments[fragment_index]);
1326  }
1327 }
#define CHECK_EQ(x, y)
Definition: Logger.h:195
Fragmenter_Namespace::TableInfo info
Definition: InputMetadata.h:35
RelAlgExecutionUnit addDeletedColumn(const RelAlgExecutionUnit &ra_exe_unit)
Definition: Execute.cpp:2905
std::deque< FragmentInfo > fragments
Definition: Fragmenter.h:167
const std::vector< InputDescriptor > input_descs
std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner_
Definition: Execute.h:960
ExecutorDeviceType device_type_
std::unordered_map< int, std::unordered_map< int, std::shared_ptr< const ColumnarResults > > > ColumnCacheMap
+ Here is the call graph for this function:

◆ fetchChunks()

Executor::FetchResult Executor::fetchChunks ( const ColumnFetcher column_fetcher,
const RelAlgExecutionUnit ra_exe_unit,
const int  device_id,
const Data_Namespace::MemoryLevel  memory_level,
const std::map< int, const TableFragments *> &  all_tables_fragments,
const FragmentsList selected_fragments,
const Catalog_Namespace::Catalog cat,
std::list< ChunkIter > &  chunk_iterators,
std::list< std::shared_ptr< Chunk_NS::Chunk >> &  chunks 
)
private

Definition at line 1888 of file Execute.cpp.

References buildSelectedFragsMapping(), CHECK, CHECK_EQ, CHECK_LT, Data_Namespace::CPU_LEVEL, ColumnFetcher::getAllTableColumnFragments(), ColumnFetcher::getOneTableColumnFragment(), ColumnFetcher::getResultSetColumn(), getRowCountAndOffsetForAllFrags(), INJECT_TIMER, RelAlgExecutionUnit::input_col_descs, RelAlgExecutionUnit::input_descs, needFetchAllFragments(), plan_state_, RESULT, and anonymous_namespace{Execute.cpp}::try_get_column_descriptor().

1897  {
1899  const auto& col_global_ids = ra_exe_unit.input_col_descs;
1900  std::vector<std::vector<size_t>> selected_fragments_crossjoin;
1901  std::vector<size_t> local_col_to_frag_pos;
1902  buildSelectedFragsMapping(selected_fragments_crossjoin,
1903  local_col_to_frag_pos,
1904  col_global_ids,
1905  selected_fragments,
1906  ra_exe_unit);
1907 
1909  selected_fragments_crossjoin);
1910 
1911  std::vector<std::vector<const int8_t*>> all_frag_col_buffers;
1912  std::vector<std::vector<int64_t>> all_num_rows;
1913  std::vector<std::vector<uint64_t>> all_frag_offsets;
1914 
1915  for (const auto& selected_frag_ids : frag_ids_crossjoin) {
1916  std::vector<const int8_t*> frag_col_buffers(
1917  plan_state_->global_to_local_col_ids_.size());
1918  for (const auto& col_id : col_global_ids) {
1919  CHECK(col_id);
1920  const int table_id = col_id->getScanDesc().getTableId();
1921  const auto cd = try_get_column_descriptor(col_id.get(), cat);
1922  if (cd && cd->isVirtualCol) {
1923  CHECK_EQ("rowid", cd->columnName);
1924  continue;
1925  }
1926  const auto fragments_it = all_tables_fragments.find(table_id);
1927  CHECK(fragments_it != all_tables_fragments.end());
1928  const auto fragments = fragments_it->second;
1929  auto it = plan_state_->global_to_local_col_ids_.find(*col_id);
1930  CHECK(it != plan_state_->global_to_local_col_ids_.end());
1931  CHECK_LT(static_cast<size_t>(it->second),
1932  plan_state_->global_to_local_col_ids_.size());
1933  const size_t frag_id = selected_frag_ids[local_col_to_frag_pos[it->second]];
1934  if (!fragments->size()) {
1935  return {};
1936  }
1937  CHECK_LT(frag_id, fragments->size());
1938  auto memory_level_for_column = memory_level;
1939  if (plan_state_->columns_to_fetch_.find(
1940  std::make_pair(col_id->getScanDesc().getTableId(), col_id->getColId())) ==
1941  plan_state_->columns_to_fetch_.end()) {
1942  memory_level_for_column = Data_Namespace::CPU_LEVEL;
1943  }
1944  if (col_id->getScanDesc().getSourceType() == InputSourceType::RESULT) {
1945  frag_col_buffers[it->second] = column_fetcher.getResultSetColumn(
1946  col_id.get(), memory_level_for_column, device_id);
1947  } else {
1948  if (needFetchAllFragments(*col_id, ra_exe_unit, selected_fragments)) {
1949  frag_col_buffers[it->second] =
1950  column_fetcher.getAllTableColumnFragments(table_id,
1951  col_id->getColId(),
1952  all_tables_fragments,
1953  memory_level_for_column,
1954  device_id);
1955  } else {
1956  frag_col_buffers[it->second] =
1957  column_fetcher.getOneTableColumnFragment(table_id,
1958  frag_id,
1959  col_id->getColId(),
1960  all_tables_fragments,
1961  chunks,
1962  chunk_iterators,
1963  memory_level_for_column,
1964  device_id);
1965  }
1966  }
1967  }
1968  all_frag_col_buffers.push_back(frag_col_buffers);
1969  }
1970  std::tie(all_num_rows, all_frag_offsets) = getRowCountAndOffsetForAllFrags(
1971  ra_exe_unit, frag_ids_crossjoin, ra_exe_unit.input_descs, all_tables_fragments);
1972  return {all_frag_col_buffers, all_num_rows, all_frag_offsets};
1973 }
#define CHECK_EQ(x, y)
Definition: Logger.h:195
const ColumnDescriptor * try_get_column_descriptor(const InputColDescriptor *col_desc, const Catalog_Namespace::Catalog &cat)
Definition: Execute.cpp:1794
FetchResult fetchChunks(const ColumnFetcher &, const RelAlgExecutionUnit &ra_exe_unit, const int device_id, const Data_Namespace::MemoryLevel, const std::map< int, const TableFragments *> &, const FragmentsList &selected_fragments, const Catalog_Namespace::Catalog &, std::list< ChunkIter > &, std::list< std::shared_ptr< Chunk_NS::Chunk >> &)
Definition: Execute.cpp:1888
const int8_t * getResultSetColumn(const InputColDescriptor *col_desc, const Data_Namespace::MemoryLevel memory_level, const int device_id) const
const std::vector< InputDescriptor > input_descs
#define INJECT_TIMER(DESC)
Definition: measure.h:91
std::unique_ptr< PlanState > plan_state_
Definition: Execute.h:959
#define CHECK_LT(x, y)
Definition: Logger.h:197
const int8_t * getAllTableColumnFragments(const int table_id, const int col_id, const std::map< int, const TableFragments *> &all_tables_fragments, const Data_Namespace::MemoryLevel memory_level, const int device_id) const
bool needFetchAllFragments(const InputColDescriptor &col_desc, const RelAlgExecutionUnit &ra_exe_unit, const FragmentsList &selected_fragments) const
Definition: Execute.cpp:1869
void buildSelectedFragsMapping(std::vector< std::vector< size_t >> &selected_fragments_crossjoin, std::vector< size_t > &local_col_to_frag_pos, const std::list< std::shared_ptr< const InputColDescriptor >> &col_global_ids, const FragmentsList &selected_fragments, const RelAlgExecutionUnit &ra_exe_unit)
Definition: Execute.cpp:1989
#define CHECK(condition)
Definition: Logger.h:187
const int8_t * getOneTableColumnFragment(const int table_id, const int frag_id, const int col_id, const std::map< int, const TableFragments *> &all_tables_fragments, std::list< std::shared_ptr< Chunk_NS::Chunk >> &chunk_holder, std::list< ChunkIter > &chunk_iter_holder, const Data_Namespace::MemoryLevel memory_level, const int device_id) const
std::pair< std::vector< std::vector< int64_t > >, std::vector< std::vector< uint64_t > > > getRowCountAndOffsetForAllFrags(const RelAlgExecutionUnit &ra_exe_unit, const CartesianProduct< std::vector< std::vector< size_t >>> &frag_ids_crossjoin, const std::vector< InputDescriptor > &input_descs, const std::map< int, const TableFragments *> &all_tables_fragments)
Definition: Execute.cpp:1822
std::list< std::shared_ptr< const InputColDescriptor > > input_col_descs
+ Here is the call graph for this function:

◆ generatePTX()

std::string Executor::generatePTX ( const std::string &  cuda_llir) const
private

Definition at line 876 of file NativeCodegen.cpp.

References CodeGenerator::generatePTX().

876  {
878  cuda_llir, nvptx_target_machine_.get(), cgen_state_.get());
879 }
std::unique_ptr< llvm::TargetMachine > nvptx_target_machine_
Definition: Execute.h:973
static std::string generatePTX(const std::string &cuda_llir, llvm::TargetMachine *nvptx_target_machine, CgenState *cgen_state)
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:944
+ Here is the call graph for this function:

◆ getCatalog()

const Catalog_Namespace::Catalog * Executor::getCatalog ( ) const

Definition at line 228 of file Execute.cpp.

References catalog_.

Referenced by skipFragmentPair().

228  {
229  return catalog_;
230 }
const Catalog_Namespace::Catalog * catalog_
Definition: Execute.h:990
+ Here is the caller graph for this function:

◆ getCodeFromCache()

std::vector< std::pair< void *, void * > > Executor::getCodeFromCache ( const CodeCacheKey key,
const CodeCache cache 
)
private

Definition at line 170 of file NativeCodegen.cpp.

References addCodeToCache(), LruCache< key_t, value_t, hash_t >::cend(), CHECK, ExecutionEngineWrapper::ExecutionEngineWrapper(), LruCache< key_t, value_t, hash_t >::find(), GpuCompilationContext::module(), and LruCache< key_t, value_t, hash_t >::put().

171  {
172  auto it = cache.find(key);
173  if (it != cache.cend()) {
174  delete cgen_state_->module_;
175  cgen_state_->module_ = it->second.second;
176  std::vector<std::pair<void*, void*>> native_functions;
177  for (auto& native_code : it->second.first) {
178  GpuCompilationContext* gpu_context = std::get<2>(native_code).get();
179  native_functions.emplace_back(std::get<0>(native_code),
180  gpu_context ? gpu_context->module() : nullptr);
181  }
182  return native_functions;
183  }
184  return {};
185 }
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:944
const_list_iterator_t find(const key_t &key) const
Definition: LruCache.hpp:49
const_list_iterator_t cend() const
Definition: LruCache.hpp:55
+ Here is the call graph for this function:

◆ getColLazyFetchInfo()

std::vector< ColumnLazyFetchInfo > Executor::getColLazyFetchInfo ( const std::vector< Analyzer::Expr *> &  target_exprs) const

Definition at line 278 of file Execute.cpp.

References catalog_, CHECK, ColumnDescriptor::columnType, get_column_descriptor(), IS_GEO, kNULLT, and plan_state_.

Referenced by dispatchFragments().

279  {
281  CHECK(catalog_);
282  std::vector<ColumnLazyFetchInfo> col_lazy_fetch_info;
283  for (const auto target_expr : target_exprs) {
284  if (!plan_state_->isLazyFetchColumn(target_expr)) {
285  col_lazy_fetch_info.emplace_back(
286  ColumnLazyFetchInfo{false, -1, SQLTypeInfo(kNULLT, false)});
287  } else {
288  const auto col_var = dynamic_cast<const Analyzer::ColumnVar*>(target_expr);
289  CHECK(col_var);
290  auto col_id = col_var->get_column_id();
291  auto rte_idx = (col_var->get_rte_idx() == -1) ? 0 : col_var->get_rte_idx();
292  auto cd = (col_var->get_table_id() > 0)
293  ? get_column_descriptor(col_id, col_var->get_table_id(), *catalog_)
294  : nullptr;
295  if (cd && IS_GEO(cd->columnType.get_type())) {
296  // Geo coords cols will be processed in sequence. So we only need to track the
297  // first coords col in lazy fetch info.
298  {
299  auto cd0 =
300  get_column_descriptor(col_id + 1, col_var->get_table_id(), *catalog_);
301  auto col0_ti = cd0->columnType;
302  CHECK(!cd0->isVirtualCol);
303  auto col0_var = makeExpr<Analyzer::ColumnVar>(
304  col0_ti, col_var->get_table_id(), cd0->columnId, rte_idx);
305  auto local_col0_id = plan_state_->getLocalColumnId(col0_var.get(), false);
306  col_lazy_fetch_info.emplace_back(
307  ColumnLazyFetchInfo{true, local_col0_id, col0_ti});
308  }
309  } else {
310  auto local_col_id = plan_state_->getLocalColumnId(col_var, false);
311  const auto& col_ti = col_var->get_type_info();
312  col_lazy_fetch_info.emplace_back(ColumnLazyFetchInfo{true, local_col_id, col_ti});
313  }
314  }
315  }
316  return col_lazy_fetch_info;
317 }
const Catalog_Namespace::Catalog * catalog_
Definition: Execute.h:990
std::unique_ptr< PlanState > plan_state_
Definition: Execute.h:959
SQLTypeInfoCore< ArrayContextTypeSizer, ExecutorTypePackaging, DateTimeFacilities > SQLTypeInfo
Definition: sqltypes.h:823
#define CHECK(condition)
Definition: Logger.h:187
SQLTypeInfo columnType
#define IS_GEO(T)
Definition: sqltypes.h:164
const ColumnDescriptor * get_column_descriptor(const int col_id, const int table_id, const Catalog_Namespace::Catalog &cat)
Definition: Execute.h:139
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ getColRange()

ExpressionRange Executor::getColRange ( const PhysicalInput phys_input) const

Definition at line 248 of file Execute.cpp.

References agg_col_range_cache_, and AggregatedColRange::getColRange().

248  {
249  return agg_col_range_cache_.getColRange(phys_input);
250 }
AggregatedColRange agg_col_range_cache_
Definition: Execute.h:994
ExpressionRange getColRange(const PhysicalInput &) const
+ Here is the call graph for this function:

◆ getColumnDescriptor()

const ColumnDescriptor * Executor::getColumnDescriptor ( const Analyzer::ColumnVar col_var) const

Definition at line 211 of file Execute.cpp.

References catalog_, get_column_descriptor_maybe(), Analyzer::ColumnVar::get_column_id(), and Analyzer::ColumnVar::get_table_id().

Referenced by getPhysicalColumnDescriptor().

212  {
214  col_var->get_column_id(), col_var->get_table_id(), *catalog_);
215 }
int get_column_id() const
Definition: Analyzer.h:194
const ColumnDescriptor * get_column_descriptor_maybe(const int col_id, const int table_id, const Catalog_Namespace::Catalog &cat)
Definition: Execute.h:168
const Catalog_Namespace::Catalog * catalog_
Definition: Execute.h:990
int get_table_id() const
Definition: Analyzer.h:193
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ getDeviceTypeForTargets()

ExecutorDeviceType Executor::getDeviceTypeForTargets ( const RelAlgExecutionUnit ra_exe_unit,
const ExecutorDeviceType  requested_device_type 
)
private

Definition at line 1335 of file Execute.cpp.

References CPU, g_bigint_count, get_target_info(), RelAlgExecutionUnit::groupby_exprs, isArchPascalOrLater(), kAVG, kDOUBLE, kSUM, and RelAlgExecutionUnit::target_exprs.

Referenced by executeWorkUnitImpl().

1337  {
1338  for (const auto target_expr : ra_exe_unit.target_exprs) {
1339  const auto agg_info = get_target_info(target_expr, g_bigint_count);
1340  if (!ra_exe_unit.groupby_exprs.empty() &&
1341  !isArchPascalOrLater(requested_device_type)) {
1342  if ((agg_info.agg_kind == kAVG || agg_info.agg_kind == kSUM) &&
1343  agg_info.agg_arg_type.get_type() == kDOUBLE) {
1344  return ExecutorDeviceType::CPU;
1345  }
1346  }
1347  if (dynamic_cast<const Analyzer::RegexpExpr*>(target_expr)) {
1348  return ExecutorDeviceType::CPU;
1349  }
1350  }
1351  return requested_device_type;
1352 }
std::vector< Analyzer::Expr * > target_exprs
TargetInfo get_target_info(const PointerType target_expr, const bool bigint_count)
Definition: TargetInfo.h:65
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
bool isArchPascalOrLater(const ExecutorDeviceType dt) const
Definition: Execute.h:510
bool g_bigint_count
Definition: sqldefs.h:71
Definition: sqldefs.h:71
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ getExecutor()

std::shared_ptr< Executor > Executor::getExecutor ( const int  db_id,
const std::string &  debug_dir = "",
const std::string &  debug_file = "",
const MapDParameters