OmniSciDB  addbbd5075
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Executor Class Reference

#include <Execute.h>

+ Collaboration diagram for Executor:

Classes

struct  CompilationResult
 
class  ExecutionDispatch
 
class  FetchCacheAnchor
 
struct  FetchResult
 
struct  GroupColLLVMValue
 
struct  JoinHashTableOrError
 

Public Types

typedef std::tuple
< std::string, const
Analyzer::Expr *, int64_t,
const size_t > 
AggInfo
 

Public Member Functions

 Executor (const int db_id, const size_t block_size_x, const size_t grid_size_x, const std::string &debug_dir, const std::string &debug_file)
 
std::shared_ptr< ResultSetexecute (const Planner::RootPlan *root_plan, const Catalog_Namespace::SessionInfo &session, const bool hoist_literals, const ExecutorDeviceType device_type, const ExecutorOptLevel, const bool allow_multifrag, const bool allow_loop_joins, RenderInfo *render_query_data=nullptr)
 
StringDictionaryProxygetStringDictionaryProxy (const int dictId, const std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner, const bool with_generation) const
 
bool isCPUOnly () const
 
bool isArchMaxwell (const ExecutorDeviceType dt) const
 
bool containsLeftDeepOuterJoin () const
 
const ColumnDescriptorgetColumnDescriptor (const Analyzer::ColumnVar *) const
 
const ColumnDescriptorgetPhysicalColumnDescriptor (const Analyzer::ColumnVar *, int) const
 
const Catalog_Namespace::CataloggetCatalog () const
 
void setCatalog (const Catalog_Namespace::Catalog *catalog)
 
const std::shared_ptr
< RowSetMemoryOwner
getRowSetMemoryOwner () const
 
const TemporaryTablesgetTemporaryTables () const
 
Fragmenter_Namespace::TableInfo getTableInfo (const int table_id) const
 
const TableGenerationgetTableGeneration (const int table_id) const
 
ExpressionRange getColRange (const PhysicalInput &) const
 
size_t getNumBytesForFetchedRow () const
 
std::vector< ColumnLazyFetchInfogetColLazyFetchInfo (const std::vector< Analyzer::Expr * > &target_exprs) const
 
void registerActiveModule (void *module, const int device_id) const
 
void unregisterActiveModule (void *module, const int device_id) const
 
void interrupt ()
 
void resetInterrupt ()
 
int8_t warpSize () const
 
unsigned gridSize () const
 
unsigned blockSize () const
 
void setupCaching (const std::unordered_set< PhysicalInput > &phys_inputs, const std::unordered_set< int > &phys_table_ids)
 

Static Public Member Functions

static std::shared_ptr< ExecutorgetExecutor (const int db_id, const std::string &debug_dir="", const std::string &debug_file="", const MapDParameters mapd_parameters=MapDParameters())
 
static void nukeCacheOfExecutors ()
 
static void clearMemory (const Data_Namespace::MemoryLevel memory_level)
 
static std::pair< int64_t,
int32_t > 
reduceResults (const SQLAgg agg, const SQLTypeInfo &ti, const int64_t agg_init_val, const int8_t out_byte_width, const int64_t *out_vec, const size_t out_vec_sz, const bool is_group_by, const bool float_argument_input)
 
static void addCodeToCache (const CodeCacheKey &, std::vector< std::tuple< void *, ExecutionEngineWrapper >>, llvm::Module *, CodeCache &)
 

Static Public Attributes

static const size_t high_scan_limit
 
static const int32_t ERR_DIV_BY_ZERO {1}
 
static const int32_t ERR_OUT_OF_GPU_MEM {2}
 
static const int32_t ERR_OUT_OF_SLOTS {3}
 
static const int32_t ERR_UNSUPPORTED_SELF_JOIN {4}
 
static const int32_t ERR_OUT_OF_RENDER_MEM {5}
 
static const int32_t ERR_OUT_OF_CPU_MEM {6}
 
static const int32_t ERR_OVERFLOW_OR_UNDERFLOW {7}
 
static const int32_t ERR_SPECULATIVE_TOP_OOM {8}
 
static const int32_t ERR_OUT_OF_TIME {9}
 
static const int32_t ERR_INTERRUPTED {10}
 
static const int32_t ERR_COLUMNAR_CONVERSION_NOT_SUPPORTED {11}
 
static const int32_t ERR_TOO_MANY_LITERALS {12}
 
static const int32_t ERR_STRING_CONST_IN_RESULTSET {13}
 
static const int32_t ERR_STREAMING_TOP_N_NOT_SUPPORTED_IN_RENDER_QUERY {14}
 
static const int32_t ERR_SINGLE_VALUE_FOUND_MULTIPLE_VALUES {15}
 

Private Types

using PerFragmentCallBack = std::function< void(ResultSetPtr, const Fragmenter_Namespace::FragmentInfo &)>
 

Private Member Functions

void clearMetaInfoCache ()
 
int deviceCount (const ExecutorDeviceType) const
 
int deviceCountForMemoryLevel (const Data_Namespace::MemoryLevel memory_level) const
 
llvm::Value * codegenWindowFunction (const size_t target_index, const CompilationOptions &co)
 
llvm::Value * codegenWindowFunctionAggregate (const CompilationOptions &co)
 
llvm::BasicBlock * codegenWindowResetStateControlFlow ()
 
void codegenWindowFunctionStateInit (llvm::Value *aggregate_state)
 
llvm::Value * codegenWindowFunctionAggregateCalls (llvm::Value *aggregate_state, const CompilationOptions &co)
 
void codegenWindowAvgEpilogue (llvm::Value *crt_val, llvm::Value *window_func_null_val, llvm::Value *multiplicity_lv)
 
llvm::Value * codegenAggregateWindowState ()
 
llvm::Value * aggregateWindowStatePtr ()
 
bool isArchPascalOrLater (const ExecutorDeviceType dt) const
 
bool needFetchAllFragments (const InputColDescriptor &col_desc, const RelAlgExecutionUnit &ra_exe_unit, const FragmentsList &selected_fragments) const
 
ResultSetPtr executeWorkUnit (size_t &max_groups_buffer_entry_guess, const bool is_agg, const std::vector< InputTableInfo > &, const RelAlgExecutionUnit &, const CompilationOptions &, const ExecutionOptions &options, const Catalog_Namespace::Catalog &, std::shared_ptr< RowSetMemoryOwner >, RenderInfo *render_info, const bool has_cardinality_estimation, ColumnCacheMap &column_cache)
 
void executeUpdate (const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &table_infos, const CompilationOptions &co, const ExecutionOptions &eo, const Catalog_Namespace::Catalog &cat, std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner, const UpdateLogForFragment::Callback &cb, const bool is_agg=false)
 
void executeWorkUnitPerFragment (const RelAlgExecutionUnit &ra_exe_unit, const InputTableInfo &table_info, const CompilationOptions &co, const ExecutionOptions &eo, const Catalog_Namespace::Catalog &cat, PerFragmentCallBack &cb)
 Compiles and dispatches a work unit per fragment processing results with the per fragment callback. Currently used for computing metrics over fragments (metadata). More...
 
ResultSetPtr executeExplain (const QueryCompilationDescriptor &)
 
ResultSetPtr executeTableFunction (const TableFunctionExecutionUnit exe_unit, const std::vector< InputTableInfo > &table_infos, const CompilationOptions &co, const ExecutionOptions &eo, const Catalog_Namespace::Catalog &cat)
 Compiles and dispatches a table function; that is, a function that takes as input one or more columns and returns a ResultSet, which can be parsed by subsequent execution steps. More...
 
ExecutorDeviceType getDeviceTypeForTargets (const RelAlgExecutionUnit &ra_exe_unit, const ExecutorDeviceType requested_device_type)
 
ResultSetPtr collectAllDeviceResults (ExecutionDispatch &execution_dispatch, const std::vector< Analyzer::Expr * > &target_exprs, const QueryMemoryDescriptor &query_mem_desc, const ExecutorDeviceType device_type, std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner)
 
ResultSetPtr collectAllDeviceShardedTopResults (ExecutionDispatch &execution_dispatch) const
 
std::unordered_map< int, const
Analyzer::BinOper * > 
getInnerTabIdToJoinCond () const
 
void dispatchFragments (const std::function< void(const ExecutorDeviceType chosen_device_type, int chosen_device_id, const QueryCompilationDescriptor &query_comp_desc, const QueryMemoryDescriptor &query_mem_desc, const FragmentsList &frag_list, const ExecutorDispatchMode kernel_dispatch_mode, const int64_t rowid_lookup_key)> dispatch, const ExecutionDispatch &execution_dispatch, const std::vector< InputTableInfo > &table_infos, const ExecutionOptions &eo, const bool is_agg, const bool allow_single_frag_table_opt, const size_t context_count, const QueryCompilationDescriptor &query_comp_desc, const QueryMemoryDescriptor &query_mem_desc, QueryFragmentDescriptor &fragment_descriptor, std::unordered_set< int > &available_gpus, int &available_cpus)
 
std::vector< size_t > getTableFragmentIndices (const RelAlgExecutionUnit &ra_exe_unit, const ExecutorDeviceType device_type, const size_t table_idx, const size_t outer_frag_idx, std::map< int, const TableFragments * > &selected_tables_fragments, const std::unordered_map< int, const Analyzer::BinOper * > &inner_table_id_to_join_condition)
 
bool skipFragmentPair (const Fragmenter_Namespace::FragmentInfo &outer_fragment_info, const Fragmenter_Namespace::FragmentInfo &inner_fragment_info, const int inner_table_id, const std::unordered_map< int, const Analyzer::BinOper * > &inner_table_id_to_join_condition, const RelAlgExecutionUnit &ra_exe_unit, const ExecutorDeviceType device_type)
 
FetchResult fetchChunks (const ColumnFetcher &, const RelAlgExecutionUnit &ra_exe_unit, const int device_id, const Data_Namespace::MemoryLevel, const std::map< int, const TableFragments * > &, const FragmentsList &selected_fragments, const Catalog_Namespace::Catalog &, std::list< ChunkIter > &, std::list< std::shared_ptr< Chunk_NS::Chunk >> &)
 
std::pair< std::vector
< std::vector< int64_t >
>, std::vector< std::vector
< uint64_t > > > 
getRowCountAndOffsetForAllFrags (const RelAlgExecutionUnit &ra_exe_unit, const CartesianProduct< std::vector< std::vector< size_t >>> &frag_ids_crossjoin, const std::vector< InputDescriptor > &input_descs, const std::map< int, const TableFragments * > &all_tables_fragments)
 
void buildSelectedFragsMapping (std::vector< std::vector< size_t >> &selected_fragments_crossjoin, std::vector< size_t > &local_col_to_frag_pos, const std::list< std::shared_ptr< const InputColDescriptor >> &col_global_ids, const FragmentsList &selected_fragments, const RelAlgExecutionUnit &ra_exe_unit)
 
std::vector< size_t > getFragmentCount (const FragmentsList &selected_fragments, const size_t scan_idx, const RelAlgExecutionUnit &ra_exe_unit)
 
int32_t executePlanWithGroupBy (const RelAlgExecutionUnit &ra_exe_unit, const CompilationResult &, const bool hoist_literals, ResultSetPtr &results, const ExecutorDeviceType device_type, std::vector< std::vector< const int8_t * >> &col_buffers, const std::vector< size_t > outer_tab_frag_ids, QueryExecutionContext *, const std::vector< std::vector< int64_t >> &num_rows, const std::vector< std::vector< uint64_t >> &frag_offsets, Data_Namespace::DataMgr *, const int device_id, const int64_t limit, const uint32_t start_rowid, const uint32_t num_tables, RenderInfo *render_info)
 
int32_t executePlanWithoutGroupBy (const RelAlgExecutionUnit &ra_exe_unit, const CompilationResult &, const bool hoist_literals, ResultSetPtr &results, const std::vector< Analyzer::Expr * > &target_exprs, const ExecutorDeviceType device_type, std::vector< std::vector< const int8_t * >> &col_buffers, QueryExecutionContext *query_exe_context, const std::vector< std::vector< int64_t >> &num_rows, const std::vector< std::vector< uint64_t >> &frag_offsets, Data_Namespace::DataMgr *data_mgr, const int device_id, const uint32_t start_rowid, const uint32_t num_tables, RenderInfo *render_info)
 
ResultSetPtr resultsUnion (ExecutionDispatch &execution_dispatch)
 
std::vector< int64_t > getJoinHashTablePtrs (const ExecutorDeviceType device_type, const int device_id)
 
ResultSetPtr reduceMultiDeviceResults (const RelAlgExecutionUnit &, std::vector< std::pair< ResultSetPtr, std::vector< size_t >>> &all_fragment_results, std::shared_ptr< RowSetMemoryOwner >, const QueryMemoryDescriptor &) const
 
ResultSetPtr reduceMultiDeviceResultSets (std::vector< std::pair< ResultSetPtr, std::vector< size_t >>> &all_fragment_results, std::shared_ptr< RowSetMemoryOwner >, const QueryMemoryDescriptor &) const
 
ResultSetPtr reduceSpeculativeTopN (const RelAlgExecutionUnit &, std::vector< std::pair< ResultSetPtr, std::vector< size_t >>> &all_fragment_results, std::shared_ptr< RowSetMemoryOwner >, const QueryMemoryDescriptor &) const
 
void executeSimpleInsert (const Planner::RootPlan *root_plan)
 
ResultSetPtr executeWorkUnitImpl (size_t &max_groups_buffer_entry_guess, const bool is_agg, const bool allow_single_frag_table_opt, const std::vector< InputTableInfo > &, const RelAlgExecutionUnit &, const CompilationOptions &, const ExecutionOptions &options, const Catalog_Namespace::Catalog &, std::shared_ptr< RowSetMemoryOwner >, RenderInfo *render_info, const bool has_cardinality_estimation, ColumnCacheMap &column_cache)
 
std::vector< llvm::Value * > inlineHoistedLiterals ()
 
std::tuple
< Executor::CompilationResult,
std::unique_ptr
< QueryMemoryDescriptor > > 
compileWorkUnit (const std::vector< InputTableInfo > &query_infos, const RelAlgExecutionUnit &ra_exe_unit, const CompilationOptions &co, const ExecutionOptions &eo, const CudaMgr_Namespace::CudaMgr *cuda_mgr, const bool allow_lazy_fetch, std::shared_ptr< RowSetMemoryOwner >, const size_t max_groups_buffer_entry_count, const int8_t crt_min_byte_width, const bool has_cardinality_estimation, ColumnCacheMap &column_cache, RenderInfo *render_info=nullptr)
 
llvm::BasicBlock * codegenSkipDeletedOuterTableRow (const RelAlgExecutionUnit &ra_exe_unit, const CompilationOptions &co)
 
std::vector< JoinLoopbuildJoinLoops (RelAlgExecutionUnit &ra_exe_unit, const CompilationOptions &co, const ExecutionOptions &eo, const std::vector< InputTableInfo > &query_infos, ColumnCacheMap &column_cache)
 
std::function< llvm::Value
*(const std::vector
< llvm::Value * >
&, llvm::Value *)> 
buildIsDeletedCb (const RelAlgExecutionUnit &ra_exe_unit, const size_t level_idx, const CompilationOptions &co)
 
std::shared_ptr
< JoinHashTableInterface
buildCurrentLevelHashTable (const JoinCondition &current_level_join_conditions, RelAlgExecutionUnit &ra_exe_unit, const CompilationOptions &co, const std::vector< InputTableInfo > &query_infos, ColumnCacheMap &column_cache, std::vector< std::string > &fail_reasons)
 
llvm::Value * addJoinLoopIterator (const std::vector< llvm::Value * > &prev_iters, const size_t level_idx)
 
void codegenJoinLoops (const std::vector< JoinLoop > &join_loops, const RelAlgExecutionUnit &ra_exe_unit, GroupByAndAggregate &group_by_and_aggregate, llvm::Function *query_func, llvm::BasicBlock *entry_bb, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const ExecutionOptions &eo)
 
bool compileBody (const RelAlgExecutionUnit &ra_exe_unit, GroupByAndAggregate &group_by_and_aggregate, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co)
 
void createErrorCheckControlFlow (llvm::Function *query_func, bool run_with_dynamic_watchdog, ExecutorDeviceType device_type)
 
void preloadFragOffsets (const std::vector< InputDescriptor > &input_descs, const std::vector< InputTableInfo > &query_infos)
 
JoinHashTableOrError buildHashTableForQualifier (const std::shared_ptr< Analyzer::BinOper > &qual_bin_oper, const std::vector< InputTableInfo > &query_infos, const MemoryLevel memory_level, const JoinHashTableInterface::HashType preferred_hash_type, ColumnCacheMap &column_cache)
 
void nukeOldState (const bool allow_lazy_fetch, const std::vector< InputTableInfo > &query_infos, const RelAlgExecutionUnit *ra_exe_unit)
 
std::vector< std::pair< void
*, void * > > 
optimizeAndCodegenCPU (llvm::Function *, llvm::Function *, const std::unordered_set< llvm::Function * > &, const CompilationOptions &)
 
std::vector< std::pair< void
*, void * > > 
optimizeAndCodegenGPU (llvm::Function *, llvm::Function *, std::unordered_set< llvm::Function * > &, const bool no_inline, const CudaMgr_Namespace::CudaMgr *cuda_mgr, const CompilationOptions &)
 
std::string generatePTX (const std::string &) const
 
void initializeNVPTXBackend () const
 
int64_t deviceCycles (int milliseconds) const
 
GroupColLLVMValue groupByColumnCodegen (Analyzer::Expr *group_by_col, const size_t col_width, const CompilationOptions &, const bool translate_null_val, const int64_t translated_null_val, GroupByAndAggregate::DiamondCodegen &, std::stack< llvm::BasicBlock * > &, const bool thread_mem_shared)
 
llvm::Value * castToFP (llvm::Value *val)
 
llvm::Value * castToIntPtrTyIn (llvm::Value *val, const size_t bit_width)
 
RelAlgExecutionUnit addDeletedColumn (const RelAlgExecutionUnit &ra_exe_unit)
 
std::pair< bool, int64_t > skipFragment (const InputDescriptor &table_desc, const Fragmenter_Namespace::FragmentInfo &frag_info, const std::list< std::shared_ptr< Analyzer::Expr >> &simple_quals, const std::vector< uint64_t > &frag_offsets, const size_t frag_idx)
 
std::pair< bool, int64_t > skipFragmentInnerJoins (const InputDescriptor &table_desc, const RelAlgExecutionUnit &ra_exe_unit, const Fragmenter_Namespace::FragmentInfo &fragment, const std::vector< uint64_t > &frag_offsets, const size_t frag_idx)
 
AggregatedColRange computeColRangesCache (const std::unordered_set< PhysicalInput > &phys_inputs)
 
StringDictionaryGenerations computeStringDictionaryGenerations (const std::unordered_set< PhysicalInput > &phys_inputs)
 
TableGenerations computeTableGenerations (std::unordered_set< int > phys_table_ids)
 
std::vector< std::pair< void
*, void * > > 
getCodeFromCache (const CodeCacheKey &, const CodeCache &)
 
void addCodeToCache (const CodeCacheKey &, const std::vector< std::tuple< void *, GpuCompilationContext * >> &, llvm::Module *, CodeCache &)
 
std::vector< int8_t > serializeLiterals (const std::unordered_map< int, CgenState::LiteralValues > &literals, const int device_id)
 
llvm::Value * spillDoubleElement (llvm::Value *elem_val, llvm::Type *elem_ty)
 

Static Private Member Functions

static size_t align (const size_t off_in, const size_t alignment)
 

Private Attributes

std::unique_ptr< CgenStatecgen_state_
 
std::unique_ptr< PlanStateplan_state_
 
std::shared_ptr
< RowSetMemoryOwner
row_set_mem_owner_
 
std::mutex gpu_exec_mutex_ [max_gpu_count]
 
std::mutex gpu_active_modules_mutex_
 
uint32_t gpu_active_modules_device_mask_
 
void * gpu_active_modules_ [max_gpu_count]
 
bool interrupted_
 
std::shared_ptr
< StringDictionaryProxy
lit_str_dict_proxy_
 
std::mutex str_dict_mutex_
 
std::unique_ptr
< llvm::TargetMachine > 
nvptx_target_machine_
 
CodeCache cpu_code_cache_
 
CodeCache gpu_code_cache_
 
const unsigned block_size_x_
 
const unsigned grid_size_x_
 
const std::string debug_dir_
 
const std::string debug_file_
 
const int db_id_
 
const Catalog_Namespace::Catalogcatalog_
 
const TemporaryTablestemporary_tables_
 
InputTableInfoCache input_table_info_cache_
 
AggregatedColRange agg_col_range_cache_
 
StringDictionaryGenerations string_dictionary_generations_
 
TableGenerations table_generations_
 

Static Private Attributes

static const int max_gpu_count {16}
 
static const size_t baseline_threshold
 
static const size_t code_cache_size {10000}
 
static std::map< int,
std::shared_ptr< Executor > > 
executors_
 
static std::mutex execute_mutex_
 
static mapd_shared_mutex executors_cache_mutex_
 

Friends

class BaselineJoinHashTable
 
class CodeGenerator
 
class ColumnFetcher
 
class OverlapsJoinHashTable
 
class GroupByAndAggregate
 
class QueryCompilationDescriptor
 
class QueryMemoryDescriptor
 
class QueryMemoryInitializer
 
class QueryFragmentDescriptor
 
class QueryExecutionContext
 
class ResultSet
 
class InValuesBitmap
 
class JoinHashTable
 
class LeafAggregator
 
class QueryRewriter
 
class PendingExecutionClosure
 
class RelAlgExecutor
 
class TableOptimizer
 
class TableFunctionCompilationContext
 
class TableFunctionExecutionContext
 
struct TargetExprCodegenBuilder
 
struct TargetExprCodegen
 
template<typename META_TYPE_CLASS >
class AggregateReductionEgress
 

Detailed Description

Definition at line 329 of file Execute.h.

Member Typedef Documentation

typedef std::tuple<std::string, const Analyzer::Expr*, int64_t, const size_t> Executor::AggInfo

Definition at line 357 of file Execute.h.

Definition at line 567 of file Execute.h.

Constructor & Destructor Documentation

Executor::Executor ( const int  db_id,
const size_t  block_size_x,
const size_t  grid_size_x,
const std::string &  debug_dir,
const std::string &  debug_file 
)

Definition at line 108 of file Execute.cpp.

113  : cgen_state_(new CgenState({}, false))
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:914

Member Function Documentation

static void Executor::addCodeToCache ( const CodeCacheKey ,
std::vector< std::tuple< void *, ExecutionEngineWrapper >>  ,
llvm::Module *  ,
CodeCache  
)
static

Referenced by StubGenerator::generateStub().

+ Here is the caller graph for this function:

void Executor::addCodeToCache ( const CodeCacheKey ,
const std::vector< std::tuple< void *, GpuCompilationContext * >> &  ,
llvm::Module *  ,
CodeCache  
)
private
RelAlgExecutionUnit Executor::addDeletedColumn ( const RelAlgExecutionUnit ra_exe_unit)
private

Definition at line 3052 of file Execute.cpp.

References catalog_(), CHECK(), and TABLE.

3052  {
3053  auto ra_exe_unit_with_deleted = ra_exe_unit;
3054  for (const auto& input_table : ra_exe_unit_with_deleted.input_descs) {
3055  if (input_table.getSourceType() != InputSourceType::TABLE) {
3056  continue;
3057  }
3058  const auto td = catalog_->getMetadataForTable(input_table.getTableId());
3059  CHECK(td);
3060  const auto deleted_cd = catalog_->getDeletedColumnIfRowsDeleted(td);
3061  if (!deleted_cd) {
3062  continue;
3063  }
3064  CHECK(deleted_cd->columnType.is_boolean());
3065  // check deleted column is not already present
3066  bool found = false;
3067  for (const auto& input_col : ra_exe_unit_with_deleted.input_col_descs) {
3068  if (input_col.get()->getColId() == deleted_cd->columnId &&
3069  input_col.get()->getScanDesc().getTableId() == deleted_cd->tableId &&
3070  input_col.get()->getScanDesc().getNestLevel() == input_table.getNestLevel()) {
3071  found = true;
3072  }
3073  }
3074  if (!found) {
3075  // add deleted column
3076  ra_exe_unit_with_deleted.input_col_descs.emplace_back(new InputColDescriptor(
3077  deleted_cd->columnId, deleted_cd->tableId, input_table.getNestLevel()));
3078  }
3079  }
3080  return ra_exe_unit_with_deleted;
3081 }
CHECK(cgen_state)
const Catalog_Namespace::Catalog * catalog_
Definition: Execute.h:958
const ColumnDescriptor * getDeletedColumnIfRowsDeleted(const TableDescriptor *td) const
Definition: Catalog.cpp:2336
const TableDescriptor * getMetadataForTable(const std::string &tableName, const bool populateFragmenter=true) const
Returns a pointer to a const TableDescriptor struct matching the provided tableName.

+ Here is the call graph for this function:

llvm::Value * Executor::addJoinLoopIterator ( const std::vector< llvm::Value * > &  prev_iters,
const size_t  level_idx 
)
private

Definition at line 442 of file IRCodegen.cpp.

References CHECK().

443  {
444  // Iterators are added for loop-outer joins when the head of the loop is generated,
445  // then once again when the body if generated. Allow this instead of special handling
446  // of call sites.
447  const auto it = cgen_state_->scan_idx_to_hash_pos_.find(level_idx);
448  if (it != cgen_state_->scan_idx_to_hash_pos_.end()) {
449  return it->second;
450  }
451  CHECK(!prev_iters.empty());
452  llvm::Value* matching_row_index = prev_iters.back();
453  const auto it_ok =
454  cgen_state_->scan_idx_to_hash_pos_.emplace(level_idx, matching_row_index);
455  CHECK(it_ok.second);
456  return matching_row_index;
457 }
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:914
CHECK(cgen_state)

+ Here is the call graph for this function:

llvm::Value * Executor::aggregateWindowStatePtr ( )
private

Definition at line 122 of file WindowFunctionIR.cpp.

References anonymous_namespace{WindowFunctionIR.cpp}::get_adjusted_window_type_info(), get_int_type(), WindowProjectNodeContext::getActiveWindowFunctionContext(), and kFLOAT.

122  {
123  const auto window_func_context =
125  const auto window_func = window_func_context->getWindowFunction();
126  const auto arg_ti = get_adjusted_window_type_info(window_func);
127  llvm::Type* aggregate_state_type =
128  arg_ti.get_type() == kFLOAT
129  ? llvm::PointerType::get(get_int_type(32, cgen_state_->context_), 0)
130  : llvm::PointerType::get(get_int_type(64, cgen_state_->context_), 0);
131  const auto aggregate_state_i64 = cgen_state_->llInt(
132  reinterpret_cast<const int64_t>(window_func_context->aggregateState()));
133  return cgen_state_->ir_builder_.CreateIntToPtr(aggregate_state_i64,
134  aggregate_state_type);
135 }
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:914
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
static WindowFunctionContext * getActiveWindowFunctionContext()
SQLTypeInfo get_adjusted_window_type_info(const Analyzer::WindowFunction *window_func)

+ Here is the call graph for this function:

static size_t Executor::align ( const size_t  off_in,
const size_t  alignment 
)
inlinestaticprivate

Definition at line 906 of file Execute.h.

906  {
907  size_t off = off_in;
908  if (off % alignment != 0) {
909  off += (alignment - off % alignment);
910  }
911  return off;
912  }
unsigned Executor::blockSize ( ) const

Definition at line 2986 of file Execute.cpp.

References block_size_x_(), catalog_(), and CHECK().

2986  {
2987  CHECK(catalog_);
2988  const auto cuda_mgr = catalog_->getDataMgr().getCudaMgr();
2989  CHECK(cuda_mgr);
2990  const auto& dev_props = cuda_mgr->getAllDeviceProperties();
2991  return block_size_x_ ? block_size_x_ : dev_props.front().maxThreadsPerBlock;
2992 }
CudaMgr_Namespace::CudaMgr * getCudaMgr() const
Definition: DataMgr.h:117
Data_Namespace::DataMgr & getDataMgr() const
Definition: Catalog.h:177
CHECK(cgen_state)
const Catalog_Namespace::Catalog * catalog_
Definition: Execute.h:958
const unsigned block_size_x_
Definition: Execute.h:952

+ Here is the call graph for this function:

std::shared_ptr< JoinHashTableInterface > Executor::buildCurrentLevelHashTable ( const JoinCondition current_level_join_conditions,
RelAlgExecutionUnit ra_exe_unit,
const CompilationOptions co,
const std::vector< InputTableInfo > &  query_infos,
ColumnCacheMap column_cache,
std::vector< std::string > &  fail_reasons 
)
private

Definition at line 396 of file IRCodegen.cpp.

References anonymous_namespace{IRCodegen.cpp}::add_qualifier_to_execution_unit(), Data_Namespace::CPU_LEVEL, CompilationOptions::device_type_, Executor::JoinHashTableOrError::fail_reason, GPU, Data_Namespace::GPU_LEVEL, Executor::JoinHashTableOrError::hash_table, INNER, IS_EQUIVALENCE, JoinHashTableInterface::OneToOne, JoinCondition::quals, and JoinCondition::type.

402  {
403  if (current_level_join_conditions.type != JoinType::INNER &&
404  current_level_join_conditions.quals.size() > 1) {
405  fail_reasons.emplace_back("No equijoin expression found for outer join");
406  return nullptr;
407  }
408  std::shared_ptr<JoinHashTableInterface> current_level_hash_table;
409  for (const auto& join_qual : current_level_join_conditions.quals) {
410  auto qual_bin_oper = std::dynamic_pointer_cast<Analyzer::BinOper>(join_qual);
411  if (!qual_bin_oper || !IS_EQUIVALENCE(qual_bin_oper->get_optype())) {
412  fail_reasons.emplace_back("No equijoin expression found");
413  if (current_level_join_conditions.type == JoinType::INNER) {
414  add_qualifier_to_execution_unit(ra_exe_unit, join_qual);
415  }
416  continue;
417  }
418  JoinHashTableOrError hash_table_or_error;
419  if (!current_level_hash_table) {
420  hash_table_or_error = buildHashTableForQualifier(
421  qual_bin_oper,
422  query_infos,
426  column_cache);
427  current_level_hash_table = hash_table_or_error.hash_table;
428  }
429  if (hash_table_or_error.hash_table) {
430  plan_state_->join_info_.join_hash_tables_.push_back(hash_table_or_error.hash_table);
431  plan_state_->join_info_.equi_join_tautologies_.push_back(qual_bin_oper);
432  } else {
433  fail_reasons.push_back(hash_table_or_error.fail_reason);
434  if (current_level_join_conditions.type == JoinType::INNER) {
435  add_qualifier_to_execution_unit(ra_exe_unit, qual_bin_oper);
436  }
437  }
438  }
439  return current_level_hash_table;
440 }
#define IS_EQUIVALENCE(X)
Definition: sqldefs.h:67
std::unique_ptr< PlanState > plan_state_
Definition: Execute.h:929
void add_qualifier_to_execution_unit(RelAlgExecutionUnit &ra_exe_unit, const std::shared_ptr< Analyzer::Expr > &qual)
Definition: IRCodegen.cpp:186
ExecutorDeviceType device_type_
std::list< std::shared_ptr< Analyzer::Expr > > quals
JoinHashTableOrError buildHashTableForQualifier(const std::shared_ptr< Analyzer::BinOper > &qual_bin_oper, const std::vector< InputTableInfo > &query_infos, const MemoryLevel memory_level, const JoinHashTableInterface::HashType preferred_hash_type, ColumnCacheMap &column_cache)
Definition: Execute.cpp:2912

+ Here is the call graph for this function:

Executor::JoinHashTableOrError Executor::buildHashTableForQualifier ( const std::shared_ptr< Analyzer::BinOper > &  qual_bin_oper,
const std::vector< InputTableInfo > &  query_infos,
const MemoryLevel  memory_level,
const JoinHashTableInterface::HashType  preferred_hash_type,
ColumnCacheMap column_cache 
)
private

Definition at line 2912 of file Execute.cpp.

References CHECK(), CHECK_EQ, CHECK_GT, coalesce_singleton_equi_join(), g_enable_overlaps_hashjoin, OverlapsJoinHashTable::getInstance(), BaselineJoinHashTable::getInstance(), and JoinHashTable::getInstance().

2917  {
2918  std::shared_ptr<JoinHashTableInterface> join_hash_table;
2919  const int device_count = deviceCountForMemoryLevel(memory_level);
2920  CHECK_GT(device_count, 0);
2921  if (!g_enable_overlaps_hashjoin && qual_bin_oper->is_overlaps_oper()) {
2922  return {nullptr, "Overlaps hash join disabled, attempting to fall back to loop join"};
2923  }
2924  try {
2925  if (qual_bin_oper->is_overlaps_oper()) {
2926  join_hash_table = OverlapsJoinHashTable::getInstance(
2927  qual_bin_oper, query_infos, memory_level, device_count, column_cache, this);
2928  } else if (dynamic_cast<const Analyzer::ExpressionTuple*>(
2929  qual_bin_oper->get_left_operand())) {
2930  join_hash_table = BaselineJoinHashTable::getInstance(qual_bin_oper,
2931  query_infos,
2932  memory_level,
2933  preferred_hash_type,
2934  device_count,
2935  column_cache,
2936  this);
2937  } else {
2938  try {
2939  join_hash_table = JoinHashTable::getInstance(qual_bin_oper,
2940  query_infos,
2941  memory_level,
2942  preferred_hash_type,
2943  device_count,
2944  column_cache,
2945  this);
2946  } catch (TooManyHashEntries&) {
2947  const auto join_quals = coalesce_singleton_equi_join(qual_bin_oper);
2948  CHECK_EQ(join_quals.size(), size_t(1));
2949  const auto join_qual =
2950  std::dynamic_pointer_cast<Analyzer::BinOper>(join_quals.front());
2951  join_hash_table = BaselineJoinHashTable::getInstance(join_qual,
2952  query_infos,
2953  memory_level,
2954  preferred_hash_type,
2955  device_count,
2956  column_cache,
2957  this);
2958  }
2959  }
2960  CHECK(join_hash_table);
2961  return {join_hash_table, ""};
2962  } catch (const HashJoinFail& e) {
2963  return {nullptr, e.what()};
2964  }
2965  CHECK(false);
2966  return {nullptr, ""};
2967 }
#define CHECK_EQ(x, y)
Definition: Logger.h:201
std::list< std::shared_ptr< Analyzer::Expr > > coalesce_singleton_equi_join(const std::shared_ptr< Analyzer::BinOper > &join_qual)
static std::shared_ptr< OverlapsJoinHashTable > getInstance(const std::shared_ptr< Analyzer::BinOper > condition, const std::vector< InputTableInfo > &query_infos, const Data_Namespace::MemoryLevel memory_level, const int device_count, ColumnCacheMap &column_cache, Executor *executor)
Make hash table from an in-flight SQL query&#39;s parse tree etc.
#define CHECK_GT(x, y)
Definition: Logger.h:205
static std::shared_ptr< BaselineJoinHashTable > getInstance(const std::shared_ptr< Analyzer::BinOper > condition, const std::vector< InputTableInfo > &query_infos, const Data_Namespace::MemoryLevel memory_level, const HashType preferred_hash_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor)
Make hash table from an in-flight SQL query&#39;s parse tree etc.
bool g_enable_overlaps_hashjoin
Definition: Execute.cpp:87
CHECK(cgen_state)
static std::shared_ptr< JoinHashTable > getInstance(const std::shared_ptr< Analyzer::BinOper > qual_bin_oper, const std::vector< InputTableInfo > &query_infos, const Data_Namespace::MemoryLevel memory_level, const HashType preferred_hash_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor)
Make hash table from an in-flight SQL query&#39;s parse tree etc.
int deviceCountForMemoryLevel(const Data_Namespace::MemoryLevel memory_level) const
Definition: Execute.cpp:604

+ Here is the call graph for this function:

std::function< llvm::Value *(const std::vector< llvm::Value * > &, llvm::Value *)> Executor::buildIsDeletedCb ( const RelAlgExecutionUnit ra_exe_unit,
const size_t  level_idx,
const CompilationOptions co 
)
private

Definition at line 340 of file IRCodegen.cpp.

References catalog_(), CHECK(), CHECK_LT, CodeGenerator::codegen(), RelAlgExecutionUnit::input_descs, TABLE, and CodeGenerator::toBool().

342  {
343  CHECK_LT(level_idx + 1, ra_exe_unit.input_descs.size());
344  const auto input_desc = ra_exe_unit.input_descs[level_idx + 1];
345  if (input_desc.getSourceType() != InputSourceType::TABLE) {
346  return nullptr;
347  }
348  const auto td = catalog_->getMetadataForTable(input_desc.getTableId());
349  CHECK(td);
350  const auto deleted_cd = catalog_->getDeletedColumnIfRowsDeleted(td);
351  if (!deleted_cd) {
352  return nullptr;
353  }
354  CHECK(deleted_cd->columnType.is_boolean());
355  const auto deleted_expr = makeExpr<Analyzer::ColumnVar>(deleted_cd->columnType,
356  input_desc.getTableId(),
357  deleted_cd->columnId,
358  input_desc.getNestLevel());
359  return [this, deleted_expr, level_idx, &co](const std::vector<llvm::Value*>& prev_iters,
360  llvm::Value* have_more_inner_rows) {
361  const auto matching_row_index = addJoinLoopIterator(prev_iters, level_idx + 1);
362  // Avoid fetching the deleted column from a position which is not valid.
363  // An invalid position can be returned by a one to one hash lookup (negative)
364  // or at the end of iteration over a set of matching values.
365  llvm::Value* is_valid_it{nullptr};
366  if (have_more_inner_rows) {
367  is_valid_it = have_more_inner_rows;
368  } else {
369  is_valid_it = cgen_state_->ir_builder_.CreateICmp(
370  llvm::ICmpInst::ICMP_SGE, matching_row_index, cgen_state_->llInt<int64_t>(0));
371  }
372  const auto it_valid_bb = llvm::BasicBlock::Create(
373  cgen_state_->context_, "it_valid", cgen_state_->row_func_);
374  const auto it_not_valid_bb = llvm::BasicBlock::Create(
375  cgen_state_->context_, "it_not_valid", cgen_state_->row_func_);
376  cgen_state_->ir_builder_.CreateCondBr(is_valid_it, it_valid_bb, it_not_valid_bb);
377  const auto row_is_deleted_bb = llvm::BasicBlock::Create(
378  cgen_state_->context_, "row_is_deleted", cgen_state_->row_func_);
379  cgen_state_->ir_builder_.SetInsertPoint(it_valid_bb);
380  CodeGenerator code_generator(this);
381  const auto row_is_deleted = code_generator.toBool(
382  code_generator.codegen(deleted_expr.get(), true, co).front());
383  cgen_state_->ir_builder_.CreateBr(row_is_deleted_bb);
384  cgen_state_->ir_builder_.SetInsertPoint(it_not_valid_bb);
385  const auto row_is_deleted_default = cgen_state_->llBool(false);
386  cgen_state_->ir_builder_.CreateBr(row_is_deleted_bb);
387  cgen_state_->ir_builder_.SetInsertPoint(row_is_deleted_bb);
388  auto row_is_deleted_or_default =
389  cgen_state_->ir_builder_.CreatePHI(row_is_deleted->getType(), 2);
390  row_is_deleted_or_default->addIncoming(row_is_deleted, it_valid_bb);
391  row_is_deleted_or_default->addIncoming(row_is_deleted_default, it_not_valid_bb);
392  return row_is_deleted_or_default;
393  };
394 }
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:914
const std::vector< InputDescriptor > input_descs
CHECK(cgen_state)
const Catalog_Namespace::Catalog * catalog_
Definition: Execute.h:958
const ColumnDescriptor * getDeletedColumnIfRowsDeleted(const TableDescriptor *td) const
Definition: Catalog.cpp:2336
#define CHECK_LT(x, y)
Definition: Logger.h:203
llvm::Value * addJoinLoopIterator(const std::vector< llvm::Value * > &prev_iters, const size_t level_idx)
Definition: IRCodegen.cpp:442
const TableDescriptor * getMetadataForTable(const std::string &tableName, const bool populateFragmenter=true) const
Returns a pointer to a const TableDescriptor struct matching the provided tableName.

+ Here is the call graph for this function:

std::vector< JoinLoop > Executor::buildJoinLoops ( RelAlgExecutionUnit ra_exe_unit,
const CompilationOptions co,
const ExecutionOptions eo,
const std::vector< InputTableInfo > &  query_infos,
ColumnCacheMap column_cache 
)
private

Definition at line 218 of file IRCodegen.cpp.

References CodeGenerator::cgen_state_, CHECK(), CHECK_LT, INJECT_TIMER, RelAlgExecutionUnit::join_quals, LEFT, JoinHashTableInterface::OneToOne, CgenState::outer_join_match_found_per_level_, Set, Singleton, JoinLoopDomain::slot_lookup_result, and JoinLoopDomain::values_buffer.

223  {
225  std::vector<JoinLoop> join_loops;
226  for (size_t level_idx = 0, current_hash_table_idx = 0;
227  level_idx < ra_exe_unit.join_quals.size();
228  ++level_idx) {
229  const auto& current_level_join_conditions = ra_exe_unit.join_quals[level_idx];
230  std::vector<std::string> fail_reasons;
231  const auto current_level_hash_table =
232  buildCurrentLevelHashTable(current_level_join_conditions,
233  ra_exe_unit,
234  co,
235  query_infos,
236  column_cache,
237  fail_reasons);
238  const auto found_outer_join_matches_cb =
239  [this, level_idx](llvm::Value* found_outer_join_matches) {
240  CHECK_LT(level_idx, cgen_state_->outer_join_match_found_per_level_.size());
241  CHECK(!cgen_state_->outer_join_match_found_per_level_[level_idx]);
242  cgen_state_->outer_join_match_found_per_level_[level_idx] =
243  found_outer_join_matches;
244  };
245  const auto is_deleted_cb = buildIsDeletedCb(ra_exe_unit, level_idx, co);
246  if (current_level_hash_table) {
247  if (current_level_hash_table->getHashType() == JoinHashTable::HashType::OneToOne) {
248  join_loops.emplace_back(
250  current_level_join_conditions.type,
251  [this, current_hash_table_idx, level_idx, current_level_hash_table, &co](
252  const std::vector<llvm::Value*>& prev_iters) {
253  addJoinLoopIterator(prev_iters, level_idx);
254  JoinLoopDomain domain{{0}};
255  domain.slot_lookup_result =
256  current_level_hash_table->codegenSlot(co, current_hash_table_idx);
257  return domain;
258  },
259  nullptr,
260  current_level_join_conditions.type == JoinType::LEFT
261  ? std::function<void(llvm::Value*)>(found_outer_join_matches_cb)
262  : nullptr,
263  is_deleted_cb);
264  } else {
265  join_loops.emplace_back(
267  current_level_join_conditions.type,
268  [this, current_hash_table_idx, level_idx, current_level_hash_table, &co](
269  const std::vector<llvm::Value*>& prev_iters) {
270  addJoinLoopIterator(prev_iters, level_idx);
271  JoinLoopDomain domain{{0}};
272  const auto matching_set = current_level_hash_table->codegenMatchingSet(
273  co, current_hash_table_idx);
274  domain.values_buffer = matching_set.elements;
275  domain.element_count = matching_set.count;
276  return domain;
277  },
278  nullptr,
279  current_level_join_conditions.type == JoinType::LEFT
280  ? std::function<void(llvm::Value*)>(found_outer_join_matches_cb)
281  : nullptr,
282  is_deleted_cb);
283  }
284  ++current_hash_table_idx;
285  } else {
286  const auto fail_reasons_str = current_level_join_conditions.quals.empty()
287  ? "No equijoin expression found"
288  : boost::algorithm::join(fail_reasons, " | ");
290  ra_exe_unit, eo, query_infos, level_idx, fail_reasons_str);
291  // Callback provided to the `JoinLoop` framework to evaluate the (outer) join
292  // condition.
293  VLOG(1) << "Unable to build hash table, falling back to loop join: "
294  << fail_reasons_str;
295  const auto outer_join_condition_cb =
296  [this, level_idx, &co, &current_level_join_conditions](
297  const std::vector<llvm::Value*>& prev_iters) {
298  // The values generated for the match path don't dominate all uses
299  // since on the non-match path nulls are generated. Reset the cache
300  // once the condition is generated to avoid incorrect reuse.
301  FetchCacheAnchor anchor(cgen_state_.get());
302  addJoinLoopIterator(prev_iters, level_idx + 1);
303  llvm::Value* left_join_cond = cgen_state_->llBool(true);
304  CodeGenerator code_generator(this);
305  for (auto expr : current_level_join_conditions.quals) {
306  left_join_cond = cgen_state_->ir_builder_.CreateAnd(
307  left_join_cond,
308  code_generator.toBool(
309  code_generator.codegen(expr.get(), true, co).front()));
310  }
311  return left_join_cond;
312  };
313  join_loops.emplace_back(
315  current_level_join_conditions.type,
316  [this, level_idx](const std::vector<llvm::Value*>& prev_iters) {
317  addJoinLoopIterator(prev_iters, level_idx);
318  JoinLoopDomain domain{{0}};
319  const auto rows_per_scan_ptr = cgen_state_->ir_builder_.CreateGEP(
320  get_arg_by_name(cgen_state_->row_func_, "num_rows_per_scan"),
321  cgen_state_->llInt(int32_t(level_idx + 1)));
322  domain.upper_bound = cgen_state_->ir_builder_.CreateLoad(rows_per_scan_ptr,
323  "num_rows_per_scan");
324  return domain;
325  },
326  current_level_join_conditions.type == JoinType::LEFT
327  ? std::function<llvm::Value*(const std::vector<llvm::Value*>&)>(
328  outer_join_condition_cb)
329  : nullptr,
330  current_level_join_conditions.type == JoinType::LEFT
331  ? std::function<void(llvm::Value*)>(found_outer_join_matches_cb)
332  : nullptr,
333  is_deleted_cb);
334  }
335  }
336  return join_loops;
337 }
llvm::Value * values_buffer
Definition: JoinLoop.h:47
std::string join(T const &container, std::string const &delim)
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:914
CHECK(cgen_state)
llvm::Value * get_arg_by_name(llvm::Function *func, const std::string &name)
Definition: Execute.h:116
#define INJECT_TIMER(DESC)
Definition: measure.h:91
const JoinQualsPerNestingLevel join_quals
llvm::Value * slot_lookup_result
Definition: JoinLoop.h:45
#define CHECK_LT(x, y)
Definition: Logger.h:203
llvm::Value * addJoinLoopIterator(const std::vector< llvm::Value * > &prev_iters, const size_t level_idx)
Definition: IRCodegen.cpp:442
void check_if_loop_join_is_allowed(RelAlgExecutionUnit &ra_exe_unit, const ExecutionOptions &eo, const std::vector< InputTableInfo > &query_infos, const size_t level_idx, const std::string &fail_reason)
Definition: IRCodegen.cpp:196
std::vector< JoinLoop > buildJoinLoops(RelAlgExecutionUnit &ra_exe_unit, const CompilationOptions &co, const ExecutionOptions &eo, const std::vector< InputTableInfo > &query_infos, ColumnCacheMap &column_cache)
Definition: IRCodegen.cpp:218
std::function< llvm::Value *(const std::vector< llvm::Value * > &, llvm::Value *)> buildIsDeletedCb(const RelAlgExecutionUnit &ra_exe_unit, const size_t level_idx, const CompilationOptions &co)
Definition: IRCodegen.cpp:340
#define VLOG(n)
Definition: Logger.h:283
std::shared_ptr< JoinHashTableInterface > buildCurrentLevelHashTable(const JoinCondition &current_level_join_conditions, RelAlgExecutionUnit &ra_exe_unit, const CompilationOptions &co, const std::vector< InputTableInfo > &query_infos, ColumnCacheMap &column_cache, std::vector< std::string > &fail_reasons)
Definition: IRCodegen.cpp:396

+ Here is the call graph for this function:

void Executor::buildSelectedFragsMapping ( std::vector< std::vector< size_t >> &  selected_fragments_crossjoin,
std::vector< size_t > &  local_col_to_frag_pos,
const std::list< std::shared_ptr< const InputColDescriptor >> &  col_global_ids,
const FragmentsList selected_fragments,
const RelAlgExecutionUnit ra_exe_unit 
)
private

Definition at line 2126 of file Execute.cpp.

References CHECK(), CHECK_EQ, CHECK_LT, and RelAlgExecutionUnit::input_descs.

2131  {
2132  local_col_to_frag_pos.resize(plan_state_->global_to_local_col_ids_.size());
2133  size_t frag_pos{0};
2134  const auto& input_descs = ra_exe_unit.input_descs;
2135  for (size_t scan_idx = 0; scan_idx < input_descs.size(); ++scan_idx) {
2136  const int table_id = input_descs[scan_idx].getTableId();
2137  CHECK_EQ(selected_fragments[scan_idx].table_id, table_id);
2138  selected_fragments_crossjoin.push_back(
2139  getFragmentCount(selected_fragments, scan_idx, ra_exe_unit));
2140  for (const auto& col_id : col_global_ids) {
2141  CHECK(col_id);
2142  const auto& input_desc = col_id->getScanDesc();
2143  if (input_desc.getTableId() != table_id ||
2144  input_desc.getNestLevel() != static_cast<int>(scan_idx)) {
2145  continue;
2146  }
2147  auto it = plan_state_->global_to_local_col_ids_.find(*col_id);
2148  CHECK(it != plan_state_->global_to_local_col_ids_.end());
2149  CHECK_LT(static_cast<size_t>(it->second),
2150  plan_state_->global_to_local_col_ids_.size());
2151  local_col_to_frag_pos[it->second] = frag_pos;
2152  }
2153  ++frag_pos;
2154  }
2155 }
#define CHECK_EQ(x, y)
Definition: Logger.h:201
const std::vector< InputDescriptor > input_descs
CHECK(cgen_state)
std::unique_ptr< PlanState > plan_state_
Definition: Execute.h:929
#define CHECK_LT(x, y)
Definition: Logger.h:203
std::vector< size_t > getFragmentCount(const FragmentsList &selected_fragments, const size_t scan_idx, const RelAlgExecutionUnit &ra_exe_unit)
Definition: Execute.cpp:2112

+ Here is the call graph for this function:

llvm::Value * Executor::castToFP ( llvm::Value *  val)
private

Definition at line 3002 of file Execute.cpp.

References CHECK().

3002  {
3003  if (!val->getType()->isIntegerTy()) {
3004  return val;
3005  }
3006 
3007  auto val_width = static_cast<llvm::IntegerType*>(val->getType())->getBitWidth();
3008  llvm::Type* dest_ty{nullptr};
3009  switch (val_width) {
3010  case 32:
3011  dest_ty = llvm::Type::getFloatTy(cgen_state_->context_);
3012  break;
3013  case 64:
3014  dest_ty = llvm::Type::getDoubleTy(cgen_state_->context_);
3015  break;
3016  default:
3017  CHECK(false);
3018  }
3019  return cgen_state_->ir_builder_.CreateSIToFP(val, dest_ty);
3020 }
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:914
CHECK(cgen_state)

+ Here is the call graph for this function:

llvm::Value * Executor::castToIntPtrTyIn ( llvm::Value *  val,
const size_t  bit_width 
)
private

Definition at line 3022 of file Execute.cpp.

References CHECK(), CHECK_LT, and get_int_type().

3022  {
3023  CHECK(val->getType()->isPointerTy());
3024 
3025  const auto val_ptr_type = static_cast<llvm::PointerType*>(val->getType());
3026  const auto val_type = val_ptr_type->getElementType();
3027  size_t val_width = 0;
3028  if (val_type->isIntegerTy()) {
3029  val_width = val_type->getIntegerBitWidth();
3030  } else {
3031  if (val_type->isFloatTy()) {
3032  val_width = 32;
3033  } else {
3034  CHECK(val_type->isDoubleTy());
3035  val_width = 64;
3036  }
3037  }
3038  CHECK_LT(size_t(0), val_width);
3039  if (bitWidth == val_width) {
3040  return val;
3041  }
3042  return cgen_state_->ir_builder_.CreateBitCast(
3043  val, llvm::PointerType::get(get_int_type(bitWidth, cgen_state_->context_), 0));
3044 }
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:914
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
CHECK(cgen_state)
#define CHECK_LT(x, y)
Definition: Logger.h:203

+ Here is the call graph for this function:

void Executor::clearMemory ( const Data_Namespace::MemoryLevel  memory_level)
static

Definition at line 157 of file Execute.cpp.

References Data_Namespace::DataMgr::clearMemory(), Data_Namespace::CPU_LEVEL, Catalog_Namespace::SysCatalog::getDataMgr(), Data_Namespace::GPU_LEVEL, Catalog_Namespace::SysCatalog::instance(), and CacheInvalidator< CACHE_HOLDING_TYPES >::invalidateCaches().

Referenced by MapDHandler::clear_cpu_memory(), MapDHandler::clear_gpu_memory(), QueryRunner::QueryRunner::clearCpuMemory(), and QueryRunner::QueryRunner::clearGpuMemory().

157  {
158  switch (memory_level) {
161  std::lock_guard<std::mutex> flush_lock(
162  execute_mutex_); // Don't flush memory while queries are running
163 
165  if (memory_level == Data_Namespace::MemoryLevel::CPU_LEVEL) {
166  // The hash table cache uses CPU memory not managed by the buffer manager. In the
167  // future, we should manage these allocations with the buffer manager directly.
168  // For now, assume the user wants to purge the hash table cache when they clear
169  // CPU memory (currently used in ExecuteTest to lower memory pressure)
171  }
172  break;
173  }
174  default: {
175  throw std::runtime_error(
176  "Clearing memory levels other than the CPU level or GPU level is not "
177  "supported.");
178  }
179  }
180 }
void clearMemory(const MemoryLevel memLevel)
Definition: DataMgr.cpp:300
static void invalidateCaches()
Data_Namespace::DataMgr & getDataMgr() const
Definition: SysCatalog.h:190
static SysCatalog & instance()
Definition: SysCatalog.h:257
static std::mutex execute_mutex_
Definition: Execute.h:967

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void Executor::clearMetaInfoCache ( )
private

Definition at line 325 of file Execute.cpp.

References input_table_info_cache_().

325  {
330 }
AggregatedColRange agg_col_range_cache_
Definition: Execute.h:962
StringDictionaryGenerations string_dictionary_generations_
Definition: Execute.h:963
InputTableInfoCache input_table_info_cache_
Definition: Execute.h:961
TableGenerations table_generations_
Definition: Execute.h:964

+ Here is the call graph for this function:

llvm::Value * Executor::codegenAggregateWindowState ( )
private

Definition at line 318 of file WindowFunctionIR.cpp.

References AVG, COUNT, anonymous_namespace{WindowFunctionIR.cpp}::get_adjusted_window_type_info(), get_int_type(), WindowProjectNodeContext::getActiveWindowFunctionContext(), Analyzer::WindowFunction::getKind(), kDECIMAL, kDOUBLE, and kFLOAT.

318  {
319  const auto pi32_type =
320  llvm::PointerType::get(get_int_type(32, cgen_state_->context_), 0);
321  const auto pi64_type =
322  llvm::PointerType::get(get_int_type(64, cgen_state_->context_), 0);
323  const auto window_func_context =
325  const Analyzer::WindowFunction* window_func = window_func_context->getWindowFunction();
326  const auto window_func_ti = get_adjusted_window_type_info(window_func);
327  const auto aggregate_state_type =
328  window_func_ti.get_type() == kFLOAT ? pi32_type : pi64_type;
329  auto aggregate_state = aggregateWindowStatePtr();
330  if (window_func->getKind() == SqlWindowFunctionKind::AVG) {
331  const auto aggregate_state_count_i64 = cgen_state_->llInt(
332  reinterpret_cast<const int64_t>(window_func_context->aggregateStateCount()));
333  auto aggregate_state_count = cgen_state_->ir_builder_.CreateIntToPtr(
334  aggregate_state_count_i64, aggregate_state_type);
335  const auto double_null_lv = cgen_state_->inlineFpNull(SQLTypeInfo(kDOUBLE));
336  switch (window_func_ti.get_type()) {
337  case kFLOAT: {
338  return cgen_state_->emitCall(
339  "load_avg_float", {aggregate_state, aggregate_state_count, double_null_lv});
340  }
341  case kDOUBLE: {
342  return cgen_state_->emitCall(
343  "load_avg_double", {aggregate_state, aggregate_state_count, double_null_lv});
344  }
345  case kDECIMAL: {
346  return cgen_state_->emitCall(
347  "load_avg_decimal",
348  {aggregate_state,
349  aggregate_state_count,
350  double_null_lv,
351  cgen_state_->llInt<int32_t>(window_func_ti.get_scale())});
352  }
353  default: {
354  return cgen_state_->emitCall(
355  "load_avg_int", {aggregate_state, aggregate_state_count, double_null_lv});
356  }
357  }
358  }
359  if (window_func->getKind() == SqlWindowFunctionKind::COUNT) {
360  return cgen_state_->ir_builder_.CreateLoad(aggregate_state);
361  }
362  switch (window_func_ti.get_type()) {
363  case kFLOAT: {
364  return cgen_state_->emitCall("load_float", {aggregate_state});
365  }
366  case kDOUBLE: {
367  return cgen_state_->emitCall("load_double", {aggregate_state});
368  }
369  default: {
370  return cgen_state_->ir_builder_.CreateLoad(aggregate_state);
371  }
372  }
373 }
SqlWindowFunctionKind getKind() const
Definition: Analyzer.h:1396
llvm::Value * aggregateWindowStatePtr()
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:914
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
SQLTypeInfoCore< ArrayContextTypeSizer, ExecutorTypePackaging, DateTimeFacilities > SQLTypeInfo
Definition: sqltypes.h:852
static WindowFunctionContext * getActiveWindowFunctionContext()
SQLTypeInfo get_adjusted_window_type_info(const Analyzer::WindowFunction *window_func)

+ Here is the call graph for this function:

void Executor::codegenJoinLoops ( const std::vector< JoinLoop > &  join_loops,
const RelAlgExecutionUnit ra_exe_unit,
GroupByAndAggregate group_by_and_aggregate,
llvm::Function *  query_func,
llvm::BasicBlock *  entry_bb,
const QueryMemoryDescriptor query_mem_desc,
const CompilationOptions co,
const ExecutionOptions eo 
)
private

Definition at line 459 of file IRCodegen.cpp.

References JoinLoop::codegen(), CompilationOptions::device_type_, CodeGenerator::posArg(), and ExecutionOptions::with_dynamic_watchdog.

466  {
467  const auto exit_bb =
468  llvm::BasicBlock::Create(cgen_state_->context_, "exit", cgen_state_->row_func_);
469  cgen_state_->ir_builder_.SetInsertPoint(exit_bb);
470  cgen_state_->ir_builder_.CreateRet(cgen_state_->llInt<int32_t>(0));
471  cgen_state_->ir_builder_.SetInsertPoint(entry_bb);
472  CodeGenerator code_generator(this);
473  const auto loops_entry_bb = JoinLoop::codegen(
474  join_loops,
475  [this,
476  query_func,
477  &query_mem_desc,
478  &co,
479  &eo,
480  &group_by_and_aggregate,
481  &join_loops,
482  &ra_exe_unit](const std::vector<llvm::Value*>& prev_iters) {
483  addJoinLoopIterator(prev_iters, join_loops.size());
484  auto& builder = cgen_state_->ir_builder_;
485  const auto loop_body_bb = llvm::BasicBlock::Create(
486  builder.getContext(), "loop_body", builder.GetInsertBlock()->getParent());
487  builder.SetInsertPoint(loop_body_bb);
488  const bool can_return_error =
489  compileBody(ra_exe_unit, group_by_and_aggregate, query_mem_desc, co);
490  if (can_return_error || cgen_state_->needs_error_check_ ||
493  query_func, eo.with_dynamic_watchdog, co.device_type_);
494  }
495  return loop_body_bb;
496  },
497  code_generator.posArg(nullptr),
498  exit_bb,
499  cgen_state_->ir_builder_);
500  cgen_state_->ir_builder_.SetInsertPoint(entry_bb);
501  cgen_state_->ir_builder_.CreateBr(loops_entry_bb);
502 }
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:914
bool compileBody(const RelAlgExecutionUnit &ra_exe_unit, GroupByAndAggregate &group_by_and_aggregate, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co)
const bool with_dynamic_watchdog
ExecutorDeviceType device_type_
void createErrorCheckControlFlow(llvm::Function *query_func, bool run_with_dynamic_watchdog, ExecutorDeviceType device_type)
static llvm::BasicBlock * codegen(const std::vector< JoinLoop > &join_loops, const std::function< llvm::BasicBlock *(const std::vector< llvm::Value * > &)> &body_codegen, llvm::Value *outer_iter, llvm::BasicBlock *exit_bb, llvm::IRBuilder<> &builder)
Definition: JoinLoop.cpp:45
llvm::Value * addJoinLoopIterator(const std::vector< llvm::Value * > &prev_iters, const size_t level_idx)
Definition: IRCodegen.cpp:442

+ Here is the call graph for this function:

llvm::BasicBlock * Executor::codegenSkipDeletedOuterTableRow ( const RelAlgExecutionUnit ra_exe_unit,
const CompilationOptions co 
)
private

Definition at line 1818 of file NativeCodegen.cpp.

1820  {
1821  CHECK(!ra_exe_unit.input_descs.empty());
1822  const auto& outer_input_desc = ra_exe_unit.input_descs[0];
1823  if (outer_input_desc.getSourceType() != InputSourceType::TABLE) {
1824  return nullptr;
1825  }
1826  const auto td = catalog_->getMetadataForTable(outer_input_desc.getTableId());
1827  CHECK(td);
1828  const auto deleted_cd = catalog_->getDeletedColumnIfRowsDeleted(td);
1829  if (!deleted_cd) {
1830  return nullptr;
1831  }
1832  CHECK(deleted_cd->columnType.is_boolean());
1833  const auto deleted_expr =
1834  makeExpr<Analyzer::ColumnVar>(deleted_cd->columnType,
1835  outer_input_desc.getTableId(),
1836  deleted_cd->columnId,
1837  outer_input_desc.getNestLevel());
1838  CodeGenerator code_generator(this);
1839  const auto is_deleted =
1840  code_generator.toBool(code_generator.codegen(deleted_expr.get(), true, co).front());
1841  const auto is_deleted_bb = llvm::BasicBlock::Create(
1842  cgen_state_->context_, "is_deleted", cgen_state_->row_func_);
1843  llvm::BasicBlock* bb = llvm::BasicBlock::Create(
1844  cgen_state_->context_, "is_not_deleted", cgen_state_->row_func_);
1845  cgen_state_->ir_builder_.CreateCondBr(is_deleted, is_deleted_bb, bb);
1846  cgen_state_->ir_builder_.SetInsertPoint(is_deleted_bb);
1847  cgen_state_->ir_builder_.CreateRet(cgen_state_->llInt<int32_t>(0));
1848  cgen_state_->ir_builder_.SetInsertPoint(bb);
1849  return bb;
1850 }
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:914
const std::vector< InputDescriptor > input_descs
CHECK(cgen_state)
const Catalog_Namespace::Catalog * catalog_
Definition: Execute.h:958
const ColumnDescriptor * getDeletedColumnIfRowsDeleted(const TableDescriptor *td) const
Definition: Catalog.cpp:2336
const TableDescriptor * getMetadataForTable(const std::string &tableName, const bool populateFragmenter=true) const
Returns a pointer to a const TableDescriptor struct matching the provided tableName.
void Executor::codegenWindowAvgEpilogue ( llvm::Value *  crt_val,
llvm::Value *  window_func_null_val,
llvm::Value *  multiplicity_lv 
)
private

Definition at line 282 of file WindowFunctionIR.cpp.

References anonymous_namespace{WindowFunctionIR.cpp}::get_adjusted_window_type_info(), get_int_type(), WindowProjectNodeContext::getActiveWindowFunctionContext(), kDOUBLE, and kFLOAT.

284  {
285  const auto window_func_context =
287  const auto window_func = window_func_context->getWindowFunction();
288  const auto window_func_ti = get_adjusted_window_type_info(window_func);
289  const auto pi32_type =
290  llvm::PointerType::get(get_int_type(32, cgen_state_->context_), 0);
291  const auto pi64_type =
292  llvm::PointerType::get(get_int_type(64, cgen_state_->context_), 0);
293  const auto aggregate_state_type =
294  window_func_ti.get_type() == kFLOAT ? pi32_type : pi64_type;
295  const auto aggregate_state_count_i64 = cgen_state_->llInt(
296  reinterpret_cast<const int64_t>(window_func_context->aggregateStateCount()));
297  auto aggregate_state_count = cgen_state_->ir_builder_.CreateIntToPtr(
298  aggregate_state_count_i64, aggregate_state_type);
299  std::string agg_count_func_name = "agg_count";
300  switch (window_func_ti.get_type()) {
301  case kFLOAT: {
302  agg_count_func_name += "_float";
303  break;
304  }
305  case kDOUBLE: {
306  agg_count_func_name += "_double";
307  break;
308  }
309  default: {
310  break;
311  }
312  }
313  agg_count_func_name += "_skip_val";
314  cgen_state_->emitCall(agg_count_func_name,
315  {aggregate_state_count, crt_val, window_func_null_val});
316 }
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:914
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
static WindowFunctionContext * getActiveWindowFunctionContext()
SQLTypeInfo get_adjusted_window_type_info(const Analyzer::WindowFunction *window_func)

+ Here is the call graph for this function:

llvm::Value * Executor::codegenWindowFunction ( const size_t  target_index,
const CompilationOptions co 
)
private

Definition at line 21 of file WindowFunctionIR.cpp.

References WindowProjectNodeContext::activateWindowFunctionContext(), run_benchmark_import::args, AVG, CHECK(), CHECK_EQ, CodeGenerator::codegen(), COUNT, CUME_DIST, DENSE_RANK, logger::FATAL, FIRST_VALUE, WindowProjectNodeContext::get(), WindowFunctionContext::getWindowFunction(), LAG, LAST_VALUE, LEAD, LOG, MAX, MIN, NTILE, PERCENT_RANK, CodeGenerator::posArg(), RANK, ROW_NUMBER, and SUM.

22  {
23  CodeGenerator code_generator(this);
24  const auto window_func_context =
26  const auto window_func = window_func_context->getWindowFunction();
27  switch (window_func->getKind()) {
32  return cgen_state_->emitCall("row_number_window_func",
33  {cgen_state_->llInt(reinterpret_cast<const int64_t>(
34  window_func_context->output())),
35  code_generator.posArg(nullptr)});
36  }
39  return cgen_state_->emitCall("percent_window_func",
40  {cgen_state_->llInt(reinterpret_cast<const int64_t>(
41  window_func_context->output())),
42  code_generator.posArg(nullptr)});
43  }
49  const auto& args = window_func->getArgs();
50  CHECK(!args.empty());
51  const auto arg_lvs = code_generator.codegen(args.front().get(), true, co);
52  CHECK_EQ(arg_lvs.size(), size_t(1));
53  return arg_lvs.front();
54  }
61  }
62  default: {
63  LOG(FATAL) << "Invalid window function kind";
64  }
65  }
66  return nullptr;
67 }
#define CHECK_EQ(x, y)
Definition: Logger.h:201
#define LOG(tag)
Definition: Logger.h:188
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:914
const WindowFunctionContext * activateWindowFunctionContext(const size_t target_index) const
CHECK(cgen_state)
static const WindowProjectNodeContext * get()
llvm::Value * codegenWindowFunctionAggregate(const CompilationOptions &co)
const Analyzer::WindowFunction * getWindowFunction() const

+ Here is the call graph for this function:

llvm::Value * Executor::codegenWindowFunctionAggregate ( const CompilationOptions co)
private

Definition at line 137 of file WindowFunctionIR.cpp.

References AVG, CHECK(), WindowProjectNodeContext::get(), get_int_type(), and WindowProjectNodeContext::getActiveWindowFunctionContext().

137  {
138  const auto reset_state_false_bb = codegenWindowResetStateControlFlow();
139  auto aggregate_state = aggregateWindowStatePtr();
140  llvm::Value* aggregate_state_count = nullptr;
141  const auto window_func_context =
143  const auto window_func = window_func_context->getWindowFunction();
144  if (window_func->getKind() == SqlWindowFunctionKind::AVG) {
145  const auto aggregate_state_count_i64 = cgen_state_->llInt(
146  reinterpret_cast<const int64_t>(window_func_context->aggregateStateCount()));
147  const auto pi64_type =
148  llvm::PointerType::get(get_int_type(64, cgen_state_->context_), 0);
149  aggregate_state_count =
150  cgen_state_->ir_builder_.CreateIntToPtr(aggregate_state_count_i64, pi64_type);
151  }
152  codegenWindowFunctionStateInit(aggregate_state);
153  if (window_func->getKind() == SqlWindowFunctionKind::AVG) {
154  const auto count_zero = cgen_state_->llInt(int64_t(0));
155  cgen_state_->emitCall("agg_id", {aggregate_state_count, count_zero});
156  }
157  cgen_state_->ir_builder_.CreateBr(reset_state_false_bb);
158  cgen_state_->ir_builder_.SetInsertPoint(reset_state_false_bb);
160  return codegenWindowFunctionAggregateCalls(aggregate_state, co);
161 }
llvm::Value * aggregateWindowStatePtr()
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:914
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
CHECK(cgen_state)
static const WindowProjectNodeContext * get()
void codegenWindowFunctionStateInit(llvm::Value *aggregate_state)
llvm::Value * codegenWindowFunctionAggregateCalls(llvm::Value *aggregate_state, const CompilationOptions &co)
static WindowFunctionContext * getActiveWindowFunctionContext()
llvm::BasicBlock * codegenWindowResetStateControlFlow()

+ Here is the call graph for this function:

llvm::Value * Executor::codegenWindowFunctionAggregateCalls ( llvm::Value *  aggregate_state,
const CompilationOptions co 
)
private

Definition at line 240 of file WindowFunctionIR.cpp.

References run_benchmark_import::args, AVG, CHECK(), CHECK_EQ, CodeGenerator::codegen(), CodeGenerator::codegenCastBetweenIntTypes(), COUNT, anonymous_namespace{WindowFunctionIR.cpp}::get_adjusted_window_type_info(), anonymous_namespace{WindowFunctionIR.cpp}::get_window_agg_name(), WindowProjectNodeContext::getActiveWindowFunctionContext(), kFLOAT, and SUM.

241  {
242  const auto window_func_context =
244  const auto window_func = window_func_context->getWindowFunction();
245  const auto window_func_ti = get_adjusted_window_type_info(window_func);
246  const auto window_func_null_val =
247  window_func_ti.is_fp()
248  ? cgen_state_->inlineFpNull(window_func_ti)
249  : cgen_state_->castToTypeIn(cgen_state_->inlineIntNull(window_func_ti), 64);
250  const auto& args = window_func->getArgs();
251  llvm::Value* crt_val;
252  if (args.empty()) {
253  CHECK(window_func->getKind() == SqlWindowFunctionKind::COUNT);
254  crt_val = cgen_state_->llInt(int64_t(1));
255  } else {
256  CodeGenerator code_generator(this);
257  const auto arg_lvs = code_generator.codegen(args.front().get(), true, co);
258  CHECK_EQ(arg_lvs.size(), size_t(1));
259  if (window_func->getKind() == SqlWindowFunctionKind::SUM && !window_func_ti.is_fp()) {
260  crt_val = code_generator.codegenCastBetweenIntTypes(
261  arg_lvs.front(), args.front()->get_type_info(), window_func_ti, false);
262  } else {
263  crt_val = window_func_ti.get_type() == kFLOAT
264  ? arg_lvs.front()
265  : cgen_state_->castToTypeIn(arg_lvs.front(), 64);
266  }
267  }
268  const auto agg_name = get_window_agg_name(window_func->getKind(), window_func_ti);
269  llvm::Value* multiplicity_lv = nullptr;
270  if (args.empty()) {
271  cgen_state_->emitCall(agg_name, {aggregate_state, crt_val});
272  } else {
273  cgen_state_->emitCall(agg_name + "_skip_val",
274  {aggregate_state, crt_val, window_func_null_val});
275  }
276  if (window_func->getKind() == SqlWindowFunctionKind::AVG) {
277  codegenWindowAvgEpilogue(crt_val, window_func_null_val, multiplicity_lv);
278  }
280 }
#define CHECK_EQ(x, y)
Definition: Logger.h:201
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:914
std::string get_window_agg_name(const SqlWindowFunctionKind kind, const SQLTypeInfo &window_func_ti)
void codegenWindowAvgEpilogue(llvm::Value *crt_val, llvm::Value *window_func_null_val, llvm::Value *multiplicity_lv)
CHECK(cgen_state)
llvm::Value * codegenAggregateWindowState()
static WindowFunctionContext * getActiveWindowFunctionContext()
SQLTypeInfo get_adjusted_window_type_info(const Analyzer::WindowFunction *window_func)

+ Here is the call graph for this function:

void Executor::codegenWindowFunctionStateInit ( llvm::Value *  aggregate_state)
private

Definition at line 191 of file WindowFunctionIR.cpp.

References COUNT, anonymous_namespace{WindowFunctionIR.cpp}::get_adjusted_window_type_info(), get_int_type(), WindowProjectNodeContext::getActiveWindowFunctionContext(), kDOUBLE, and kFLOAT.

191  {
192  const auto window_func_context =
194  const auto window_func = window_func_context->getWindowFunction();
195  const auto window_func_ti = get_adjusted_window_type_info(window_func);
196  const auto window_func_null_val =
197  window_func_ti.is_fp()
198  ? cgen_state_->inlineFpNull(window_func_ti)
199  : cgen_state_->castToTypeIn(cgen_state_->inlineIntNull(window_func_ti), 64);
200  llvm::Value* window_func_init_val;
201  if (window_func_context->getWindowFunction()->getKind() ==
203  switch (window_func_ti.get_type()) {
204  case kFLOAT: {
205  window_func_init_val = cgen_state_->llFp(float(0));
206  break;
207  }
208  case kDOUBLE: {
209  window_func_init_val = cgen_state_->llFp(double(0));
210  break;
211  }
212  default: {
213  window_func_init_val = cgen_state_->llInt(int64_t(0));
214  break;
215  }
216  }
217  } else {
218  window_func_init_val = window_func_null_val;
219  }
220  const auto pi32_type =
221  llvm::PointerType::get(get_int_type(32, cgen_state_->context_), 0);
222  switch (window_func_ti.get_type()) {
223  case kDOUBLE: {
224  cgen_state_->emitCall("agg_id_double", {aggregate_state, window_func_init_val});
225  break;
226  }
227  case kFLOAT: {
228  aggregate_state =
229  cgen_state_->ir_builder_.CreateBitCast(aggregate_state, pi32_type);
230  cgen_state_->emitCall("agg_id_float", {aggregate_state, window_func_init_val});
231  break;
232  }
233  default: {
234  cgen_state_->emitCall("agg_id", {aggregate_state, window_func_init_val});
235  break;
236  }
237  }
238 }
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:914
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
static WindowFunctionContext * getActiveWindowFunctionContext()
SQLTypeInfo get_adjusted_window_type_info(const Analyzer::WindowFunction *window_func)

+ Here is the call graph for this function:

llvm::BasicBlock * Executor::codegenWindowResetStateControlFlow ( )
private

Definition at line 163 of file WindowFunctionIR.cpp.

References WindowProjectNodeContext::getActiveWindowFunctionContext(), CodeGenerator::posArg(), and CodeGenerator::toBool().

163  {
164  const auto window_func_context =
166  const auto bitset = cgen_state_->llInt(
167  reinterpret_cast<const int64_t>(window_func_context->partitionStart()));
168  const auto min_val = cgen_state_->llInt(int64_t(0));
169  const auto max_val = cgen_state_->llInt(window_func_context->elementCount() - 1);
170  const auto null_val = cgen_state_->llInt(inline_int_null_value<int64_t>());
171  const auto null_bool_val = cgen_state_->llInt<int8_t>(inline_int_null_value<int8_t>());
172  CodeGenerator code_generator(this);
173  const auto reset_state =
174  code_generator.toBool(cgen_state_->emitCall("bit_is_set",
175  {bitset,
176  code_generator.posArg(nullptr),
177  min_val,
178  max_val,
179  null_val,
180  null_bool_val}));
181  const auto reset_state_true_bb = llvm::BasicBlock::Create(
182  cgen_state_->context_, "reset_state.true", cgen_state_->row_func_);
183  const auto reset_state_false_bb = llvm::BasicBlock::Create(
184  cgen_state_->context_, "reset_state.false", cgen_state_->row_func_);
185  cgen_state_->ir_builder_.CreateCondBr(
186  reset_state, reset_state_true_bb, reset_state_false_bb);
187  cgen_state_->ir_builder_.SetInsertPoint(reset_state_true_bb);
188  return reset_state_false_bb;
189 }
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:914
static WindowFunctionContext * getActiveWindowFunctionContext()

+ Here is the call graph for this function:

ResultSetPtr Executor::collectAllDeviceResults ( ExecutionDispatch execution_dispatch,
const std::vector< Analyzer::Expr * > &  target_exprs,
const QueryMemoryDescriptor query_mem_desc,
const ExecutorDeviceType  device_type,
std::shared_ptr< RowSetMemoryOwner row_set_mem_owner 
)
private

Definition at line 1517 of file Execute.cpp.

References anonymous_namespace{Execute.cpp}::build_row_for_empty_input(), catalog_(), DEBUG_TIMER, Executor::ExecutionDispatch::getExecutionUnit(), Executor::ExecutionDispatch::getFragmentResults(), QueryMemoryDescriptor::getQueryDescriptionType(), GPU, NonGroupedAggregate, GroupByAndAggregate::shard_count_for_top_groups(), and use_speculative_top_n().

1522  {
1523  auto timer = DEBUG_TIMER(__func__);
1524  auto& result_per_device = execution_dispatch.getFragmentResults();
1525  if (result_per_device.empty() && query_mem_desc.getQueryDescriptionType() ==
1527  return build_row_for_empty_input(target_exprs, query_mem_desc, device_type);
1528  }
1529  const auto& ra_exe_unit = execution_dispatch.getExecutionUnit();
1530  if (use_speculative_top_n(ra_exe_unit, query_mem_desc)) {
1531  return reduceSpeculativeTopN(
1532  ra_exe_unit, result_per_device, row_set_mem_owner, query_mem_desc);
1533  }
1534  const auto shard_count =
1535  device_type == ExecutorDeviceType::GPU
1537  : 0;
1538 
1539  if (shard_count && !result_per_device.empty()) {
1540  return collectAllDeviceShardedTopResults(execution_dispatch);
1541  }
1542  return reduceMultiDeviceResults(
1543  ra_exe_unit, result_per_device, row_set_mem_owner, query_mem_desc);
1544 }
bool use_speculative_top_n(const RelAlgExecutionUnit &ra_exe_unit, const QueryMemoryDescriptor &query_mem_desc)
ResultSetPtr reduceSpeculativeTopN(const RelAlgExecutionUnit &, std::vector< std::pair< ResultSetPtr, std::vector< size_t >>> &all_fragment_results, std::shared_ptr< RowSetMemoryOwner >, const QueryMemoryDescriptor &) const
Definition: Execute.cpp:930
const Catalog_Namespace::Catalog * catalog_
Definition: Execute.h:958
ResultSetPtr reduceMultiDeviceResults(const RelAlgExecutionUnit &, std::vector< std::pair< ResultSetPtr, std::vector< size_t >>> &all_fragment_results, std::shared_ptr< RowSetMemoryOwner >, const QueryMemoryDescriptor &) const
Definition: Execute.cpp:847
QueryDescriptionType getQueryDescriptionType() const
ResultSetPtr collectAllDeviceShardedTopResults(ExecutionDispatch &execution_dispatch) const
Definition: Execute.cpp:1628
ResultSetPtr build_row_for_empty_input(const std::vector< Analyzer::Expr * > &target_exprs_in, const QueryMemoryDescriptor &query_mem_desc, const ExecutorDeviceType device_type)
Definition: Execute.cpp:1480
#define DEBUG_TIMER(name)
Definition: Logger.h:299
static size_t shard_count_for_top_groups(const RelAlgExecutionUnit &ra_exe_unit, const Catalog_Namespace::Catalog &catalog)

+ Here is the call graph for this function:

ResultSetPtr Executor::collectAllDeviceShardedTopResults ( ExecutionDispatch execution_dispatch) const
private

Definition at line 1628 of file Execute.cpp.

References CHECK(), CHECK_EQ, CHECK_LE, Executor::ExecutionDispatch::getExecutionUnit(), Executor::ExecutionDispatch::getFragmentResults(), anonymous_namespace{Execute.cpp}::permute_storage_columnar(), anonymous_namespace{Execute.cpp}::permute_storage_row_wise(), and run_benchmark_import::result.

1629  {
1630  const auto& ra_exe_unit = execution_dispatch.getExecutionUnit();
1631  auto& result_per_device = execution_dispatch.getFragmentResults();
1632  const auto first_result_set = result_per_device.front().first;
1633  CHECK(first_result_set);
1634  auto top_query_mem_desc = first_result_set->getQueryMemDesc();
1635  CHECK(!top_query_mem_desc.hasInterleavedBinsOnGpu());
1636  const auto top_n = ra_exe_unit.sort_info.limit + ra_exe_unit.sort_info.offset;
1637  top_query_mem_desc.setEntryCount(0);
1638  for (auto& result : result_per_device) {
1639  const auto result_set = result.first;
1640  CHECK(result_set);
1641  result_set->sort(ra_exe_unit.sort_info.order_entries, top_n);
1642  size_t new_entry_cnt = top_query_mem_desc.getEntryCount() + result_set->rowCount();
1643  top_query_mem_desc.setEntryCount(new_entry_cnt);
1644  }
1645  auto top_result_set = std::make_shared<ResultSet>(first_result_set->getTargetInfos(),
1646  first_result_set->getDeviceType(),
1647  top_query_mem_desc,
1648  first_result_set->getRowSetMemOwner(),
1649  this);
1650  auto top_storage = top_result_set->allocateStorage();
1651  size_t top_output_row_idx{0};
1652  for (auto& result : result_per_device) {
1653  const auto result_set = result.first;
1654  CHECK(result_set);
1655  const auto& top_permutation = result_set->getPermutationBuffer();
1656  CHECK_LE(top_permutation.size(), top_n);
1657  if (top_query_mem_desc.didOutputColumnar()) {
1658  top_output_row_idx = permute_storage_columnar(result_set->getStorage(),
1659  result_set->getQueryMemDesc(),
1660  top_storage,
1661  top_output_row_idx,
1662  top_query_mem_desc,
1663  top_permutation);
1664  } else {
1665  top_output_row_idx = permute_storage_row_wise(result_set->getStorage(),
1666  top_storage,
1667  top_output_row_idx,
1668  top_query_mem_desc,
1669  top_permutation);
1670  }
1671  }
1672  CHECK_EQ(top_output_row_idx, top_query_mem_desc.getEntryCount());
1673  return top_result_set;
1674 }
#define CHECK_EQ(x, y)
Definition: Logger.h:201
size_t permute_storage_row_wise(const ResultSetStorage *input_storage, const ResultSetStorage *output_storage, size_t output_row_index, const QueryMemoryDescriptor &output_query_mem_desc, const std::vector< uint32_t > &top_permutation)
Definition: Execute.cpp:1607
CHECK(cgen_state)
#define CHECK_LE(x, y)
Definition: Logger.h:204
size_t permute_storage_columnar(const ResultSetStorage *input_storage, const QueryMemoryDescriptor &input_query_mem_desc, const ResultSetStorage *output_storage, size_t output_row_index, const QueryMemoryDescriptor &output_query_mem_desc, const std::vector< uint32_t > &top_permutation)
Definition: Execute.cpp:1557

+ Here is the call graph for this function:

bool Executor::compileBody ( const RelAlgExecutionUnit ra_exe_unit,
GroupByAndAggregate group_by_and_aggregate,
const QueryMemoryDescriptor query_mem_desc,
const CompilationOptions co 
)
private

Definition at line 1852 of file NativeCodegen.cpp.

1855  {
1856  // generate the code for the filter
1857  std::vector<Analyzer::Expr*> primary_quals;
1858  std::vector<Analyzer::Expr*> deferred_quals;
1859  bool short_circuited =
1860  CodeGenerator::prioritizeQuals(ra_exe_unit, primary_quals, deferred_quals);
1861  if (short_circuited) {
1862  VLOG(1) << "Prioritized " << std::to_string(primary_quals.size()) << " quals, "
1863  << "short-circuited and deferred " << std::to_string(deferred_quals.size())
1864  << " quals";
1865  }
1866  llvm::Value* filter_lv = cgen_state_->llBool(true);
1867  CodeGenerator code_generator(this);
1868  for (auto expr : primary_quals) {
1869  // Generate the filter for primary quals
1870  auto cond = code_generator.toBool(code_generator.codegen(expr, true, co).front());
1871  filter_lv = cgen_state_->ir_builder_.CreateAnd(filter_lv, cond);
1872  }
1873  CHECK(filter_lv->getType()->isIntegerTy(1));
1874  llvm::BasicBlock* sc_false{nullptr};
1875  if (!deferred_quals.empty()) {
1876  auto sc_true = llvm::BasicBlock::Create(
1877  cgen_state_->context_, "sc_true", cgen_state_->row_func_);
1878  sc_false = llvm::BasicBlock::Create(
1879  cgen_state_->context_, "sc_false", cgen_state_->row_func_);
1880  cgen_state_->ir_builder_.CreateCondBr(filter_lv, sc_true, sc_false);
1881  cgen_state_->ir_builder_.SetInsertPoint(sc_false);
1882  if (ra_exe_unit.join_quals.empty()) {
1883  cgen_state_->ir_builder_.CreateRet(cgen_state_->llInt(int32_t(0)));
1884  }
1885  cgen_state_->ir_builder_.SetInsertPoint(sc_true);
1886  filter_lv = cgen_state_->llBool(true);
1887  }
1888  for (auto expr : deferred_quals) {
1889  filter_lv = cgen_state_->ir_builder_.CreateAnd(
1890  filter_lv, code_generator.toBool(code_generator.codegen(expr, true, co).front()));
1891  }
1892 
1893  CHECK(filter_lv->getType()->isIntegerTy(1));
1894  return group_by_and_aggregate.codegen(filter_lv, sc_false, query_mem_desc, co);
1895 }
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:914
static bool prioritizeQuals(const RelAlgExecutionUnit &ra_exe_unit, std::vector< Analyzer::Expr * > &primary_quals, std::vector< Analyzer::Expr * > &deferred_quals)
Definition: LogicalIR.cpp:157
std::string to_string(char const *&&v)
CHECK(cgen_state)
bool codegen(llvm::Value *filter_result, llvm::BasicBlock *sc_false, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co)
const JoinQualsPerNestingLevel join_quals
#define VLOG(n)
Definition: Logger.h:283
std::tuple< Executor::CompilationResult, std::unique_ptr< QueryMemoryDescriptor > > Executor::compileWorkUnit ( const std::vector< InputTableInfo > &  query_infos,
const RelAlgExecutionUnit ra_exe_unit,
const CompilationOptions co,
const ExecutionOptions eo,
const CudaMgr_Namespace::CudaMgr cuda_mgr,
const bool  allow_lazy_fetch,
std::shared_ptr< RowSetMemoryOwner row_set_mem_owner,
const size_t  max_groups_buffer_entry_count,
const int8_t  crt_min_byte_width,
const bool  has_cardinality_estimation,
ColumnCacheMap column_cache,
RenderInfo render_info = nullptr 
)
private

Definition at line 1572 of file NativeCodegen.cpp.

1583  {
1584  auto timer = DEBUG_TIMER(__func__);
1585  nukeOldState(allow_lazy_fetch, query_infos, &ra_exe_unit);
1586 
1587  GroupByAndAggregate group_by_and_aggregate(
1588  this, co.device_type_, ra_exe_unit, query_infos, row_set_mem_owner);
1589  auto query_mem_desc =
1590  group_by_and_aggregate.initQueryMemoryDescriptor(eo.allow_multifrag,
1591  max_groups_buffer_entry_guess,
1592  crt_min_byte_width,
1593  render_info,
1595 
1596  if (query_mem_desc->getQueryDescriptionType() ==
1598  !has_cardinality_estimation &&
1599  (!render_info || !render_info->isPotentialInSituRender()) && !eo.just_explain) {
1601  }
1602 
1603  const bool output_columnar = query_mem_desc->didOutputColumnar();
1604 
1606  const size_t num_count_distinct_descs =
1607  query_mem_desc->getCountDistinctDescriptorsSize();
1608  for (size_t i = 0; i < num_count_distinct_descs; i++) {
1609  const auto& count_distinct_descriptor =
1610  query_mem_desc->getCountDistinctDescriptor(i);
1611  if (count_distinct_descriptor.impl_type_ == CountDistinctImplType::StdSet ||
1612  (count_distinct_descriptor.impl_type_ != CountDistinctImplType::Invalid &&
1613  !co.hoist_literals_)) {
1614  throw QueryMustRunOnCpu();
1615  }
1616  }
1617  }
1618 
1619  // Read the module template and target either CPU or GPU
1620  // by binding the stream position functions to the right implementation:
1621  // stride access for GPU, contiguous for CPU
1622  auto rt_module_copy = llvm::CloneModule(
1623 #if LLVM_VERSION_MAJOR >= 7
1624  *g_rt_module.get(),
1625 #else
1626  g_rt_module.get(),
1627 #endif
1628  cgen_state_->vmap_,
1629  [](const llvm::GlobalValue* gv) {
1630  auto func = llvm::dyn_cast<llvm::Function>(gv);
1631  if (!func) {
1632  return true;
1633  }
1634  return (func->getLinkage() == llvm::GlobalValue::LinkageTypes::PrivateLinkage ||
1635  func->getLinkage() == llvm::GlobalValue::LinkageTypes::InternalLinkage ||
1637  });
1638 
1640  if (is_udf_module_present(true)) {
1642  }
1643  if (is_rt_udf_module_present(true)) {
1645  rt_udf_cpu_module, *rt_module_copy, cgen_state_.get());
1646  }
1647  } else {
1648  rt_module_copy->setDataLayout(get_gpu_data_layout());
1649  rt_module_copy->setTargetTriple(get_gpu_target_triple_string());
1650 
1651  if (is_udf_module_present()) {
1652  llvm::Triple gpu_triple(udf_gpu_module->getTargetTriple());
1653 
1654  if (!gpu_triple.isNVPTX()) {
1655  throw QueryMustRunOnCpu();
1656  }
1657 
1659  }
1660  if (is_rt_udf_module_present()) {
1662  rt_udf_gpu_module, *rt_module_copy, cgen_state_.get());
1663  }
1664  }
1665 
1666  cgen_state_->module_ = rt_module_copy.release();
1667 
1668  auto agg_fnames =
1669  get_agg_fnames(ra_exe_unit.target_exprs, !ra_exe_unit.groupby_exprs.empty());
1670 
1671  const auto agg_slot_count = ra_exe_unit.estimator ? size_t(1) : agg_fnames.size();
1672 
1673  const bool is_group_by{query_mem_desc->isGroupBy()};
1674  auto query_func = is_group_by ? query_group_by_template(cgen_state_->module_,
1675  co.hoist_literals_,
1676  *query_mem_desc,
1677  co.device_type_,
1678  ra_exe_unit.scan_limit)
1680  agg_slot_count,
1681  co.hoist_literals_,
1682  !!ra_exe_unit.estimator);
1683  bind_pos_placeholders("pos_start", true, query_func, cgen_state_->module_);
1684  bind_pos_placeholders("group_buff_idx", false, query_func, cgen_state_->module_);
1685  bind_pos_placeholders("pos_step", false, query_func, cgen_state_->module_);
1686 
1687  cgen_state_->query_func_ = query_func;
1688  cgen_state_->query_func_entry_ir_builder_.SetInsertPoint(
1689  &query_func->getEntryBlock().front());
1690 
1691  std::vector<llvm::Value*> col_heads;
1692  std::tie(cgen_state_->row_func_, col_heads) =
1693  create_row_function(ra_exe_unit.input_col_descs.size(),
1694  is_group_by ? 0 : agg_slot_count,
1695  co.hoist_literals_,
1696  query_func,
1697  cgen_state_->module_,
1698  cgen_state_->context_);
1699  CHECK(cgen_state_->row_func_);
1700  // make sure it's in-lined, we don't want register spills in the inner loop
1702  auto bb =
1703  llvm::BasicBlock::Create(cgen_state_->context_, "entry", cgen_state_->row_func_);
1704  cgen_state_->ir_builder_.SetInsertPoint(bb);
1705  preloadFragOffsets(ra_exe_unit.input_descs, query_infos);
1706  RelAlgExecutionUnit body_execution_unit = ra_exe_unit;
1707  const auto join_loops =
1708  buildJoinLoops(body_execution_unit, co, eo, query_infos, column_cache);
1709  plan_state_->allocateLocalColumnIds(ra_exe_unit.input_col_descs);
1710  const auto is_not_deleted_bb = codegenSkipDeletedOuterTableRow(ra_exe_unit, co);
1711  if (is_not_deleted_bb) {
1712  bb = is_not_deleted_bb;
1713  }
1714  if (!join_loops.empty()) {
1715  codegenJoinLoops(join_loops,
1716  body_execution_unit,
1717  group_by_and_aggregate,
1718  query_func,
1719  bb,
1720  *(query_mem_desc.get()),
1721  co,
1722  eo);
1723  } else {
1724  const bool can_return_error =
1725  compileBody(ra_exe_unit, group_by_and_aggregate, *query_mem_desc, co);
1726  if (can_return_error || cgen_state_->needs_error_check_ || eo.with_dynamic_watchdog) {
1728  }
1729  }
1730  std::vector<llvm::Value*> hoisted_literals;
1731 
1732  if (co.hoist_literals_) {
1733  VLOG(1) << "number of hoisted literals: "
1734  << cgen_state_->query_func_literal_loads_.size()
1735  << " / literal buffer usage: " << cgen_state_->getLiteralBufferUsage(0)
1736  << " bytes";
1737  }
1738 
1739  if (co.hoist_literals_ && !cgen_state_->query_func_literal_loads_.empty()) {
1740  // we have some hoisted literals...
1741  hoisted_literals = inlineHoistedLiterals();
1742  }
1743  // iterate through all the instruction in the query template function and
1744  // replace the call to the filter placeholder with the call to the actual filter
1745  for (auto it = llvm::inst_begin(query_func), e = llvm::inst_end(query_func); it != e;
1746  ++it) {
1747  if (!llvm::isa<llvm::CallInst>(*it)) {
1748  continue;
1749  }
1750  auto& filter_call = llvm::cast<llvm::CallInst>(*it);
1751  if (std::string(filter_call.getCalledFunction()->getName()) == "row_process") {
1752  std::vector<llvm::Value*> args;
1753  for (size_t i = 0; i < filter_call.getNumArgOperands(); ++i) {
1754  args.push_back(filter_call.getArgOperand(i));
1755  }
1756  args.insert(args.end(), col_heads.begin(), col_heads.end());
1757  args.push_back(get_arg_by_name(query_func, "join_hash_tables"));
1758  // push hoisted literals arguments, if any
1759  args.insert(args.end(), hoisted_literals.begin(), hoisted_literals.end());
1760 
1761  llvm::ReplaceInstWithInst(&filter_call,
1762  llvm::CallInst::Create(cgen_state_->row_func_, args, ""));
1763  break;
1764  }
1765  }
1766  plan_state_->init_agg_vals_ =
1767  init_agg_val_vec(ra_exe_unit.target_exprs, ra_exe_unit.quals, *query_mem_desc);
1768 
1769  auto multifrag_query_func = cgen_state_->module_->getFunction(
1770  "multifrag_query" + std::string(co.hoist_literals_ ? "_hoisted_literals" : ""));
1771  CHECK(multifrag_query_func);
1772 
1773  bind_query(query_func,
1774  "query_stub" + std::string(co.hoist_literals_ ? "_hoisted_literals" : ""),
1775  multifrag_query_func,
1776  cgen_state_->module_);
1777 
1778  auto live_funcs =
1780  {query_func, cgen_state_->row_func_},
1781  {multifrag_query_func});
1782 
1783  std::string llvm_ir;
1784  if (eo.just_explain) {
1786 #ifdef WITH_JIT_DEBUG
1787  throw std::runtime_error(
1788  "Explain optimized not available when JIT runtime debug symbols are enabled");
1789 #else
1790  optimize_ir(query_func, cgen_state_->module_, live_funcs, co);
1791 #endif // WITH_JIT_DEBUG
1792  }
1793  llvm_ir =
1794  serialize_llvm_object(query_func) + serialize_llvm_object(cgen_state_->row_func_);
1795  }
1796  verify_function_ir(cgen_state_->row_func_);
1797 
1798  LOG(IR) << query_mem_desc->toString() << "\nGenerated IR\n"
1799  << serialize_llvm_object(query_func)
1800  << serialize_llvm_object(cgen_state_->row_func_) << "\nEnd of IR";
1801 
1802  return std::make_tuple(
1805  ? optimizeAndCodegenCPU(query_func, multifrag_query_func, live_funcs, co)
1806  : optimizeAndCodegenGPU(query_func,
1807  multifrag_query_func,
1808  live_funcs,
1809  is_group_by || ra_exe_unit.estimator,
1810  cuda_mgr,
1811  co),
1812  cgen_state_->getLiterals(),
1813  output_columnar,
1814  llvm_ir},
1815  std::move(query_mem_desc));
1816 }
std::vector< Analyzer::Expr * > target_exprs
std::unique_ptr< llvm::Module > rt_udf_cpu_module
void codegenJoinLoops(const std::vector< JoinLoop > &join_loops, const RelAlgExecutionUnit &ra_exe_unit, GroupByAndAggregate &group_by_and_aggregate, llvm::Function *query_func, llvm::BasicBlock *entry_bb, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const ExecutionOptions &eo)
Definition: IRCodegen.cpp:459
std::unique_ptr< llvm::Module > udf_gpu_module
#define LOG(tag)
Definition: Logger.h:188
std::unique_ptr< llvm::Module > rt_udf_gpu_module
void mark_function_always_inline(llvm::Function *func)
llvm::StringRef get_gpu_data_layout()
bool is_udf_module_present(bool cpu_only=false)
std::vector< int64_t > init_agg_val_vec(const std::vector< TargetInfo > &targets, const QueryMemoryDescriptor &query_mem_desc)
llvm::Function * query_template(llvm::Module *module, const size_t aggr_col_count, const bool hoist_literals, const bool is_estimate_query)
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:914
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
const std::vector< InputDescriptor > input_descs
std::vector< std::string > get_agg_fnames(const std::vector< Analyzer::Expr * > &target_exprs, const bool is_group_by)
std::vector< std::pair< void *, void * > > optimizeAndCodegenCPU(llvm::Function *, llvm::Function *, const std::unordered_set< llvm::Function * > &, const CompilationOptions &)
void preloadFragOffsets(const std::vector< InputDescriptor > &input_descs, const std::vector< InputTableInfo > &query_infos)
Definition: Execute.cpp:2892
llvm::StringRef get_gpu_target_triple_string()
void verify_function_ir(const llvm::Function *func)
const bool allow_multifrag
CHECK(cgen_state)
llvm::Value * get_arg_by_name(llvm::Function *func, const std::string &name)
Definition: Execute.h:116
static std::unordered_set< llvm::Function * > markDeadRuntimeFuncs(llvm::Module &module, const std::vector< llvm::Function * > &roots, const std::vector< llvm::Function * > &leaves)
bool compileBody(const RelAlgExecutionUnit &ra_exe_unit, GroupByAndAggregate &group_by_and_aggregate, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co)
const bool with_dynamic_watchdog
std::unique_ptr< llvm::Module > g_rt_module
std::unique_ptr< PlanState > plan_state_
Definition: Execute.h:929
void nukeOldState(const bool allow_lazy_fetch, const std::vector< InputTableInfo > &query_infos, const RelAlgExecutionUnit *ra_exe_unit)
Definition: Execute.cpp:2878
const bool output_columnar_hint
llvm::Function * query_group_by_template(llvm::Module *module, const bool hoist_literals, const QueryMemoryDescriptor &query_mem_desc, const ExecutorDeviceType device_type, const bool check_scan_limit)
static void link_udf_module(const std::unique_ptr< llvm::Module > &udf_module, llvm::Module &module, CgenState *cgen_state, llvm::Linker::Flags flags=llvm::Linker::Flags::None)
const std::shared_ptr< Analyzer::Estimator > estimator
ExecutorDeviceType device_type_
void createErrorCheckControlFlow(llvm::Function *query_func, bool run_with_dynamic_watchdog, ExecutorDeviceType device_type)
const ExecutorExplainType explain_type_
llvm::BasicBlock * codegenSkipDeletedOuterTableRow(const RelAlgExecutionUnit &ra_exe_unit, const CompilationOptions &co)
std::string serialize_llvm_object(const T *llvm_obj)
static bool alwaysCloneRuntimeFunction(const llvm::Function *func)
std::unique_ptr< llvm::Module > udf_cpu_module
void bind_pos_placeholders(const std::string &pos_fn_name, const bool use_resume_param, llvm::Function *query_func, llvm::Module *module)
std::list< std::shared_ptr< Analyzer::Expr > > quals
bool isPotentialInSituRender() const
Definition: RenderInfo.cpp:61
#define DEBUG_TIMER(name)
Definition: Logger.h:299
std::vector< std::pair< void *, void * > > optimizeAndCodegenGPU(llvm::Function *, llvm::Function *, std::unordered_set< llvm::Function * > &, const bool no_inline, const CudaMgr_Namespace::CudaMgr *cuda_mgr, const CompilationOptions &)
std::vector< llvm::Value * > inlineHoistedLiterals()
std::list< std::shared_ptr< const InputColDescriptor > > input_col_descs
std::vector< JoinLoop > buildJoinLoops(RelAlgExecutionUnit &ra_exe_unit, const CompilationOptions &co, const ExecutionOptions &eo, const std::vector< InputTableInfo > &query_infos, ColumnCacheMap &column_cache)
Definition: IRCodegen.cpp:218
void optimize_ir(llvm::Function *query_func, llvm::Module *module, const std::unordered_set< llvm::Function * > &live_funcs, const CompilationOptions &co)
cgen_state module_
bool is_rt_udf_module_present(bool cpu_only=false)
#define VLOG(n)
Definition: Logger.h:283
std::pair< llvm::Function *, std::vector< llvm::Value * > > create_row_function(const size_t in_col_count, const size_t agg_col_count, const bool hoist_literals, llvm::Function *query_func, llvm::Module *module, llvm::LLVMContext &context)
void bind_query(llvm::Function *query_func, const std::string &query_fname, llvm::Function *multifrag_query_func, llvm::Module *module)
AggregatedColRange Executor::computeColRangesCache ( const std::unordered_set< PhysicalInput > &  phys_inputs)
private

Definition at line 3262 of file Execute.cpp.

References catalog_(), CHECK(), getLeafColumnRange(), AggregatedColRange::setColRange(), and ExpressionRange::typeSupportsRange().

3263  {
3264  AggregatedColRange agg_col_range_cache;
3265  CHECK(catalog_);
3266  std::unordered_set<int> phys_table_ids;
3267  for (const auto& phys_input : phys_inputs) {
3268  phys_table_ids.insert(phys_input.table_id);
3269  }
3270  std::vector<InputTableInfo> query_infos;
3271  for (const int table_id : phys_table_ids) {
3272  query_infos.emplace_back(InputTableInfo{table_id, getTableInfo(table_id)});
3273  }
3274  for (const auto& phys_input : phys_inputs) {
3275  const auto cd =
3276  catalog_->getMetadataForColumn(phys_input.table_id, phys_input.col_id);
3277  CHECK(cd);
3278  if (ExpressionRange::typeSupportsRange(cd->columnType)) {
3279  const auto col_var = boost::make_unique<Analyzer::ColumnVar>(
3280  cd->columnType, phys_input.table_id, phys_input.col_id, 0);
3281  const auto col_range = getLeafColumnRange(col_var.get(), query_infos, this, false);
3282  agg_col_range_cache.setColRange(phys_input, col_range);
3283  }
3284  }
3285  return agg_col_range_cache;
3286 }
Fragmenter_Namespace::TableInfo getTableInfo(const int table_id) const
Definition: Execute.cpp:246
CHECK(cgen_state)
const Catalog_Namespace::Catalog * catalog_
Definition: Execute.h:958
const ColumnDescriptor * getMetadataForColumn(int tableId, const std::string &colName) const
ExpressionRange getLeafColumnRange(const Analyzer::ColumnVar *col_expr, const std::vector< InputTableInfo > &query_infos, const Executor *executor, const bool is_outer_join_proj)
void setColRange(const PhysicalInput &, const ExpressionRange &)
static bool typeSupportsRange(const SQLTypeInfo &ti)

+ Here is the call graph for this function:

StringDictionaryGenerations Executor::computeStringDictionaryGenerations ( const std::unordered_set< PhysicalInput > &  phys_inputs)
private

Definition at line 3288 of file Execute.cpp.

References catalog_(), CHECK(), kENCODING_DICT, and StringDictionaryGenerations::setGeneration().

3289  {
3290  StringDictionaryGenerations string_dictionary_generations;
3291  CHECK(catalog_);
3292  for (const auto& phys_input : phys_inputs) {
3293  const auto cd =
3294  catalog_->getMetadataForColumn(phys_input.table_id, phys_input.col_id);
3295  CHECK(cd);
3296  const auto& col_ti =
3297  cd->columnType.is_array() ? cd->columnType.get_elem_type() : cd->columnType;
3298  if (col_ti.is_string() && col_ti.get_compression() == kENCODING_DICT) {
3299  const int dict_id = col_ti.get_comp_param();
3300  const auto dd = catalog_->getMetadataForDict(dict_id);
3301  CHECK(dd && dd->stringDict);
3302  string_dictionary_generations.setGeneration(dict_id,
3303  dd->stringDict->storageEntryCount());
3304  }
3305  }
3306  return string_dictionary_generations;
3307 }
CHECK(cgen_state)
const Catalog_Namespace::Catalog * catalog_
Definition: Execute.h:958
const ColumnDescriptor * getMetadataForColumn(int tableId, const std::string &colName) const
void setGeneration(const uint32_t id, const size_t generation)
const DictDescriptor * getMetadataForDict(int dict_ref, bool loadDict=true) const
Definition: Catalog.cpp:1377

+ Here is the call graph for this function:

TableGenerations Executor::computeTableGenerations ( std::unordered_set< int >  phys_table_ids)
private

Definition at line 3309 of file Execute.cpp.

References TableGenerations::setGeneration().

3310  {
3311  TableGenerations table_generations;
3312  for (const int table_id : phys_table_ids) {
3313  const auto table_info = getTableInfo(table_id);
3314  table_generations.setGeneration(
3315  table_id, TableGeneration{table_info.getPhysicalNumTuples(), 0});
3316  }
3317  return table_generations;
3318 }
Fragmenter_Namespace::TableInfo getTableInfo(const int table_id) const
Definition: Execute.cpp:246
void setGeneration(const uint32_t id, const TableGeneration &generation)

+ Here is the call graph for this function:

bool Executor::containsLeftDeepOuterJoin ( ) const
inline

Definition at line 377 of file Execute.h.

References cgen_state_.

377  {
378  return cgen_state_->contains_left_deep_outer_join_;
379  }
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:914
void Executor::createErrorCheckControlFlow ( llvm::Function *  query_func,
bool  run_with_dynamic_watchdog,
ExecutorDeviceType  device_type 
)
private

Definition at line 1347 of file NativeCodegen.cpp.

1349  {
1350  // check whether the row processing was successful; currently, it can
1351  // fail by running out of group by buffer slots
1352 
1353  llvm::Value* row_count = nullptr;
1354  if (run_with_dynamic_watchdog && device_type == ExecutorDeviceType::GPU) {
1355  row_count =
1356  find_variable_in_basic_block<llvm::LoadInst>(query_func, ".entry", "row_count");
1357  }
1358 
1359  bool done_splitting = false;
1360  for (auto bb_it = query_func->begin(); bb_it != query_func->end() && !done_splitting;
1361  ++bb_it) {
1362  llvm::Value* pos = nullptr;
1363  for (auto inst_it = bb_it->begin(); inst_it != bb_it->end(); ++inst_it) {
1364  if (run_with_dynamic_watchdog && llvm::isa<llvm::PHINode>(*inst_it)) {
1365  if (inst_it->getName() == "pos") {
1366  pos = &*inst_it;
1367  }
1368  continue;
1369  }
1370  if (!llvm::isa<llvm::CallInst>(*inst_it)) {
1371  continue;
1372  }
1373  auto& filter_call = llvm::cast<llvm::CallInst>(*inst_it);
1374  if (std::string(filter_call.getCalledFunction()->getName()) == "row_process") {
1375  auto next_inst_it = inst_it;
1376  ++next_inst_it;
1377  auto new_bb = bb_it->splitBasicBlock(next_inst_it);
1378  auto& br_instr = bb_it->back();
1379  llvm::IRBuilder<> ir_builder(&br_instr);
1380  llvm::Value* err_lv = &*inst_it;
1381  if (run_with_dynamic_watchdog) {
1382  CHECK(pos);
1383  llvm::Value* call_watchdog_lv = nullptr;
1384  if (device_type == ExecutorDeviceType::GPU) {
1385  // In order to make sure all threads wihtin a block see the same barrier,
1386  // only those blocks whose none of their threads have experienced the critical
1387  // edge will go through the dynamic watchdog computation
1388  CHECK(row_count);
1389  auto crit_edge_rem =
1390  (blockSize() & (blockSize() - 1))
1391  ? ir_builder.CreateSRem(
1392  row_count,
1393  cgen_state_->llInt(static_cast<int64_t>(blockSize())))
1394  : ir_builder.CreateAnd(
1395  row_count,
1396  cgen_state_->llInt(static_cast<int64_t>(blockSize() - 1)));
1397  auto crit_edge_threshold = ir_builder.CreateSub(row_count, crit_edge_rem);
1398  crit_edge_threshold->setName("crit_edge_threshold");
1399 
1400  // only those threads where pos < crit_edge_threshold go through dynamic
1401  // watchdog call
1402  call_watchdog_lv =
1403  ir_builder.CreateICmp(llvm::ICmpInst::ICMP_SLT, pos, crit_edge_threshold);
1404  } else {
1405  // CPU path: run watchdog for every 64th row
1406  auto dw_predicate = ir_builder.CreateAnd(pos, uint64_t(0x3f));
1407  call_watchdog_lv = ir_builder.CreateICmp(
1408  llvm::ICmpInst::ICMP_EQ, dw_predicate, cgen_state_->llInt(int64_t(0LL)));
1409  }
1410  CHECK(call_watchdog_lv);
1411  auto error_check_bb = bb_it->splitBasicBlock(
1412  llvm::BasicBlock::iterator(br_instr), ".error_check");
1413  auto& watchdog_br_instr = bb_it->back();
1414 
1415  auto watchdog_check_bb = llvm::BasicBlock::Create(
1416  cgen_state_->context_, ".watchdog_check", query_func, error_check_bb);
1417  llvm::IRBuilder<> watchdog_ir_builder(watchdog_check_bb);
1418  auto detected_timeout = watchdog_ir_builder.CreateCall(
1419  cgen_state_->module_->getFunction("dynamic_watchdog"), {});
1420  auto timeout_err_lv = watchdog_ir_builder.CreateSelect(
1421  detected_timeout, cgen_state_->llInt(Executor::ERR_OUT_OF_TIME), err_lv);
1422  watchdog_ir_builder.CreateBr(error_check_bb);
1423 
1424  llvm::ReplaceInstWithInst(
1425  &watchdog_br_instr,
1426  llvm::BranchInst::Create(
1427  watchdog_check_bb, error_check_bb, call_watchdog_lv));
1428  ir_builder.SetInsertPoint(&br_instr);
1429  auto unified_err_lv = ir_builder.CreatePHI(err_lv->getType(), 2);
1430 
1431  unified_err_lv->addIncoming(timeout_err_lv, watchdog_check_bb);
1432  unified_err_lv->addIncoming(err_lv, &*bb_it);
1433  err_lv = unified_err_lv;
1434  }
1435  const auto error_code_arg = get_arg_by_name(query_func, "error_code");
1436  err_lv =
1437  ir_builder.CreateCall(cgen_state_->module_->getFunction("record_error_code"),
1438  std::vector<llvm::Value*>{err_lv, error_code_arg});
1439  if (device_type == ExecutorDeviceType::GPU) {
1440  // let kernel execution finish as expected, regardless of the observed error,
1441  // unless it is from the dynamic watchdog where all threads within that block
1442  // return together.
1443  err_lv = ir_builder.CreateICmp(llvm::ICmpInst::ICMP_EQ,
1444  err_lv,
1446  } else {
1447  err_lv = ir_builder.CreateICmp(llvm::ICmpInst::ICMP_NE,
1448  err_lv,
1449  cgen_state_->llInt(static_cast<int32_t>(0)));
1450  }
1451  auto error_bb = llvm::BasicBlock::Create(
1452  cgen_state_->context_, ".error_exit", query_func, new_bb);
1453  llvm::ReturnInst::Create(cgen_state_->context_, error_bb);
1454  llvm::ReplaceInstWithInst(&br_instr,
1455  llvm::BranchInst::Create(error_bb, new_bb, err_lv));
1456  done_splitting = true;
1457  break;
1458  }
1459  }
1460  }
1461  CHECK(done_splitting);
1462 }
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:914
CHECK(cgen_state)
llvm::Value * get_arg_by_name(llvm::Function *func, const std::string &name)
Definition: Execute.h:116
static const int32_t ERR_OUT_OF_TIME
Definition: Execute.h:979
unsigned blockSize() const
Definition: Execute.cpp:2986
int Executor::deviceCount ( const ExecutorDeviceType  device_type) const
private

Definition at line 594 of file Execute.cpp.

References catalog_(), CHECK(), and GPU.

594  {
595  if (device_type == ExecutorDeviceType::GPU) {
596  const auto cuda_mgr = catalog_->getDataMgr().getCudaMgr();
597  CHECK(cuda_mgr);
598  return cuda_mgr->getDeviceCount();
599  } else {
600  return 1;
601  }
602 }
CudaMgr_Namespace::CudaMgr * getCudaMgr() const
Definition: DataMgr.h:117
Data_Namespace::DataMgr & getDataMgr() const
Definition: Catalog.h:177
CHECK(cgen_state)
const Catalog_Namespace::Catalog * catalog_
Definition: Execute.h:958

+ Here is the call graph for this function:

int Executor::deviceCountForMemoryLevel ( const Data_Namespace::MemoryLevel  memory_level) const
private

Definition at line 604 of file Execute.cpp.

References CPU, GPU, and Data_Namespace::GPU_LEVEL.

605  {
606  return memory_level == GPU_LEVEL ? deviceCount(ExecutorDeviceType::GPU)
607  : deviceCount(ExecutorDeviceType::CPU);
608 }
ExecutorDeviceType
int deviceCount(const ExecutorDeviceType) const
Definition: Execute.cpp:594
int64_t Executor::deviceCycles ( int  milliseconds) const
private

Definition at line 2994 of file Execute.cpp.

References catalog_(), and CHECK().

2994  {
2995  CHECK(catalog_);
2996  const auto cuda_mgr = catalog_->getDataMgr().getCudaMgr();
2997  CHECK(cuda_mgr);
2998  const auto& dev_props = cuda_mgr->getAllDeviceProperties();
2999  return static_cast<int64_t>(dev_props.front().clockKhz) * milliseconds;
3000 }
CudaMgr_Namespace::CudaMgr * getCudaMgr() const
Definition: DataMgr.h:117
Data_Namespace::DataMgr & getDataMgr() const
Definition: Catalog.h:177
CHECK(cgen_state)
const Catalog_Namespace::Catalog * catalog_
Definition: Execute.h:958

+ Here is the call graph for this function:

void Executor::dispatchFragments ( const std::function< void(const ExecutorDeviceType chosen_device_type, int chosen_device_id, const QueryCompilationDescriptor &query_comp_desc, const QueryMemoryDescriptor &query_mem_desc, const FragmentsList &frag_list, const ExecutorDispatchMode kernel_dispatch_mode, const int64_t rowid_lookup_key)>  dispatch,
const ExecutionDispatch execution_dispatch,
const std::vector< InputTableInfo > &  table_infos,
const ExecutionOptions eo,
const bool  is_agg,
const bool  allow_single_frag_table_opt,
const size_t  context_count,
const QueryCompilationDescriptor query_comp_desc,
const QueryMemoryDescriptor query_mem_desc,
QueryFragmentDescriptor fragment_descriptor,
std::unordered_set< int > &  available_gpus,
int &  available_cpus 
)
private

Definition at line 1702 of file Execute.cpp.

References ExecutionOptions::allow_multifrag, QueryFragmentDescriptor::assignFragsToKernelDispatch(), QueryFragmentDescriptor::assignFragsToMultiDispatch(), QueryFragmentDescriptor::buildFragmentKernelMap(), catalog_(), CHECK(), CHECK_GE, CHECK_GT, anonymous_namespace{Execute.cpp}::checkWorkUnitWatchdog(), g_inner_join_fragment_skipping, QueryCompilationDescriptor::getDeviceType(), QueryMemoryDescriptor::getEntryCount(), Executor::ExecutionDispatch::getExecutionUnit(), Executor::ExecutionDispatch::getFragOffsets(), QueryMemoryDescriptor::getQueryDescriptionType(), GPU, anonymous_namespace{Execute.cpp}::has_lazy_fetched_columns(), logger::INFO, KernelPerFragment, LOG, MultifragmentKernel, Projection, query_mem_desc, QueryFragmentDescriptor::shouldCheckWorkUnitWatchdog(), QueryMemoryDescriptor::toString(), VLOG, and ExecutionOptions::with_watchdog.

1720  {
1721  std::vector<std::future<void>> query_threads;
1722  const auto& ra_exe_unit = execution_dispatch.getExecutionUnit();
1723  CHECK(!ra_exe_unit.input_descs.empty());
1724 
1725  const auto device_type = query_comp_desc.getDeviceType();
1726  const bool uses_lazy_fetch =
1727  plan_state_->allow_lazy_fetch_ &&
1728  has_lazy_fetched_columns(getColLazyFetchInfo(ra_exe_unit.target_exprs));
1729  const bool use_multifrag_kernel = (device_type == ExecutorDeviceType::GPU) &&
1730  eo.allow_multifrag && (!uses_lazy_fetch || is_agg);
1731 
1732  const auto device_count = deviceCount(device_type);
1733  CHECK_GT(device_count, 0);
1734 
1735  fragment_descriptor.buildFragmentKernelMap(ra_exe_unit,
1736  execution_dispatch.getFragOffsets(),
1737  device_count,
1738  device_type,
1739  use_multifrag_kernel,
1741  this);
1742  if (eo.with_watchdog && fragment_descriptor.shouldCheckWorkUnitWatchdog()) {
1743  checkWorkUnitWatchdog(ra_exe_unit, table_infos, *catalog_, device_type, device_count);
1744  }
1745 
1746  if (use_multifrag_kernel) {
1747  VLOG(1) << "Dispatching multifrag kernels";
1748  VLOG(1) << query_mem_desc.toString();
1749 
1750  // NB: We should never be on this path when the query is retried because of running
1751  // out of group by slots; also, for scan only queries on CPU we want the
1752  // high-granularity, fragment by fragment execution instead. For scan only queries on
1753  // GPU, we want the multifrag kernel path to save the overhead of allocating an output
1754  // buffer per fragment.
1755  auto multifrag_kernel_dispatch =
1756  [&query_threads, &dispatch, query_comp_desc, query_mem_desc](
1757  const int device_id,
1758  const FragmentsList& frag_list,
1759  const int64_t rowid_lookup_key) {
1760  query_threads.push_back(std::async(std::launch::async,
1761  dispatch,
1763  device_id,
1764  query_comp_desc,
1765  query_mem_desc,
1766  frag_list,
1768  rowid_lookup_key));
1769  };
1770  fragment_descriptor.assignFragsToMultiDispatch(multifrag_kernel_dispatch);
1771  } else {
1772  VLOG(1) << "Dispatching kernel per fragment";
1773  VLOG(1) << query_mem_desc.toString();
1774 
1775  if (!ra_exe_unit.use_bump_allocator && allow_single_frag_table_opt &&
1777  table_infos.size() == 1 && table_infos.front().table_id > 0) {
1778  const auto max_frag_size =
1779  table_infos.front().info.getFragmentNumTuplesUpperBound();
1780  if (max_frag_size < query_mem_desc.getEntryCount()) {
1781  LOG(INFO) << "Lowering scan limit from " << query_mem_desc.getEntryCount()
1782  << " to match max fragment size " << max_frag_size
1783  << " for kernel per fragment execution path.";
1784  throw CompilationRetryNewScanLimit(max_frag_size);
1785  }
1786  }
1787 
1788  size_t frag_list_idx{0};
1789  auto fragment_per_kernel_dispatch = [&query_threads,
1790  &dispatch,
1791  &frag_list_idx,
1792  &device_type,
1793  query_comp_desc,
1794  query_mem_desc](const int device_id,
1795  const FragmentsList& frag_list,
1796  const int64_t rowid_lookup_key) {
1797  if (!frag_list.size()) {
1798  return;
1799  }
1800  CHECK_GE(device_id, 0);
1801 
1802  query_threads.push_back(std::async(std::launch::async,
1803  dispatch,
1804  device_type,
1805  device_id,
1806  query_comp_desc,
1807  query_mem_desc,
1808  frag_list,
1810  rowid_lookup_key));
1811 
1812  ++frag_list_idx;
1813  };
1814 
1815  fragment_descriptor.assignFragsToKernelDispatch(fragment_per_kernel_dispatch,
1816  ra_exe_unit);
1817  }
1818  for (auto& child : query_threads) {
1819  child.wait();
1820  }
1821  for (auto& child : query_threads) {
1822  child.get();
1823  }
1824 }
bool is_agg(const Analyzer::Expr *expr)
ExecutorDeviceType getDeviceType() const
std::string toString() const
#define LOG(tag)
Definition: Logger.h:188
std::vector< ColumnLazyFetchInfo > getColLazyFetchInfo(const std::vector< Analyzer::Expr * > &target_exprs) const
Definition: Execute.cpp:284
#define CHECK_GE(x, y)
Definition: Logger.h:206
void assignFragsToKernelDispatch(DISPATCH_FCN f, const RelAlgExecutionUnit &ra_exe_unit) const
int deviceCount(const ExecutorDeviceType) const
Definition: Execute.cpp:594
#define CHECK_GT(x, y)
Definition: Logger.h:205
std::vector< FragmentsPerTable > FragmentsList
bool g_inner_join_fragment_skipping
Definition: Execute.cpp:78
const bool allow_multifrag
CHECK(cgen_state)
const Catalog_Namespace::Catalog * catalog_
Definition: Execute.h:958
std::unique_ptr< PlanState > plan_state_
Definition: Execute.h:929
void checkWorkUnitWatchdog(const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &table_infos, const Catalog_Namespace::Catalog &cat, const ExecutorDeviceType device_type, const int device_count)
Definition: Execute.cpp:1031
QueryDescriptionType getQueryDescriptionType() const
void assignFragsToMultiDispatch(DISPATCH_FCN f) const
void buildFragmentKernelMap(const RelAlgExecutionUnit &ra_exe_unit, const std::vector< uint64_t > &frag_offsets, const int device_count, const ExecutorDeviceType &device_type, const bool enable_multifrag_kernels, const bool enable_inner_join_fragment_skipping, Executor *executor)
bool has_lazy_fetched_columns(const std::vector< ColumnLazyFetchInfo > &fetched_cols)
Definition: Execute.cpp:1691
#define VLOG(n)
Definition: Logger.h:283
const bool with_watchdog

+ Here is the call graph for this function:

std::shared_ptr< ResultSet > Executor::execute ( const Planner::RootPlan root_plan,
const Catalog_Namespace::SessionInfo session,
const bool  hoist_literals,
const ExecutorDeviceType  device_type,
const ExecutorOptLevel  opt_level,
const bool  allow_multifrag,
const bool  allow_loop_joins,
RenderInfo render_query_data = nullptr 
)

Definition at line 32 of file LegacyExecute.cpp.

References catalog_(), CHECK(), CPU, g_enable_dynamic_watchdog, Catalog_Namespace::SessionInfo::get_currentUser(), Planner::RootPlan::get_plan_dest(), Planner::RootPlan::get_result_table_id(), Planner::RootPlan::get_stmt_type(), Catalog_Namespace::SessionInfo::getCatalog(), Planner::RootPlan::getCatalog(), AccessPrivileges::INSERT_INTO_TABLE, Catalog_Namespace::SysCatalog::instance(), Planner::RootPlan::kEXPLAIN, kINSERT, kSELECT, DBObject::loadKey(), DBObject::setPrivileges(), TableDBObjectType, timer_start(), and timer_stop().

40  {
41  catalog_ = &root_plan->getCatalog();
42  const auto stmt_type = root_plan->get_stmt_type();
43  // capture the lock acquistion time
44  auto clock_begin = timer_start();
45  std::lock_guard<std::mutex> lock(execute_mutex_);
48  }
49  ScopeGuard restore_metainfo_cache = [this] { clearMetaInfoCache(); };
50  int64_t queue_time_ms = timer_stop(clock_begin);
51  ScopeGuard row_set_holder = [this] { row_set_mem_owner_ = nullptr; };
52  switch (stmt_type) {
53  case kSELECT: {
54  throw std::runtime_error("The legacy SELECT path has been fully deprecated.");
55  }
56  case kINSERT: {
57  if (root_plan->get_plan_dest() == Planner::RootPlan::kEXPLAIN) {
58  auto explanation_rs = std::make_shared<ResultSet>("No explanation available.");
59  explanation_rs->setQueueTime(queue_time_ms);
60  return explanation_rs;
61  }
62  auto& cat = session.getCatalog();
64  auto user_metadata = session.get_currentUser();
65  const int table_id = root_plan->get_result_table_id();
66  auto td = cat.getMetadataForTable(table_id);
67  DBObject dbObject(td->tableName, TableDBObjectType);
68  dbObject.loadKey(cat);
69  dbObject.setPrivileges(AccessPrivileges::INSERT_INTO_TABLE);
70  std::vector<DBObject> privObjects;
71  privObjects.push_back(dbObject);
72  if (!sys_cat.checkPrivileges(user_metadata, privObjects)) {
73  throw std::runtime_error(
74  "Violation of access privileges: user " + user_metadata.userName +
75  " has no insert privileges for table " + td->tableName + ".");
76  break;
77  }
78  executeSimpleInsert(root_plan);
79  auto empty_rs = std::make_shared<ResultSet>(std::vector<TargetInfo>{},
82  nullptr,
83  this);
84  empty_rs->setQueueTime(queue_time_ms);
85  return empty_rs;
86  }
87  default:
88  CHECK(false);
89  }
90  CHECK(false);
91  return nullptr;
92 }
int get_result_table_id() const
Definition: Planner.h:291
SQLStmtType get_stmt_type() const
Definition: Planner.h:290
static const AccessPrivileges INSERT_INTO_TABLE
Definition: DBObject.h:154
TypeR::rep timer_stop(Type clock_begin)
Definition: measure.h:46
const Catalog_Namespace::Catalog & getCatalog() const
Definition: Planner.h:293
bool g_enable_dynamic_watchdog
Definition: Execute.cpp:72
void executeSimpleInsert(const Planner::RootPlan *root_plan)
Definition: Execute.cpp:2600
static SysCatalog & instance()
Definition: SysCatalog.h:257
CHECK(cgen_state)
const Catalog_Namespace::Catalog * catalog_
Definition: Execute.h:958
std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner_
Definition: Execute.h:930
friend class QueryMemoryDescriptor
Definition: Execute.h:993
Catalog & getCatalog() const
Definition: SessionInfo.h:90
Dest get_plan_dest() const
Definition: Planner.h:299
void resetInterrupt()
static std::mutex execute_mutex_
Definition: Execute.h:967
const UserMetadata & get_currentUser() const
Definition: SessionInfo.h:93
Type timer_start()
Definition: measure.h:40
void clearMetaInfoCache()
Definition: Execute.cpp:325

+ Here is the call graph for this function:

ResultSetPtr Executor::executeExplain ( const QueryCompilationDescriptor query_comp_desc)
private

Definition at line 1382 of file Execute.cpp.

References QueryCompilationDescriptor::getIR().

1382  {
1383  return std::make_shared<ResultSet>(query_comp_desc.getIR());
1384 }

+ Here is the call graph for this function:

int32_t Executor::executePlanWithGroupBy ( const RelAlgExecutionUnit ra_exe_unit,
const CompilationResult compilation_result,
const bool  hoist_literals,
ResultSetPtr results,
const ExecutorDeviceType  device_type,
std::vector< std::vector< const int8_t * >> &  col_buffers,
const std::vector< size_t >  outer_tab_frag_ids,
QueryExecutionContext query_exe_context,
const std::vector< std::vector< int64_t >> &  num_rows,
const std::vector< std::vector< uint64_t >> &  frag_offsets,
Data_Namespace::DataMgr data_mgr,
const int  device_id,
const int64_t  limit,
const uint32_t  start_rowid,
const uint32_t  num_tables,
RenderInfo render_info 
)
private

Definition at line 2417 of file Execute.cpp.

References CHECK(), CHECK_NE, anonymous_namespace{Execute.cpp}::check_rows_less_than_needed(), CPU, DEBUG_TIMER, ERR_DIV_BY_ZERO, ERR_INTERRUPTED, ERR_OUT_OF_TIME, ERR_OVERFLOW_OR_UNDERFLOW, ERR_SINGLE_VALUE_FOUND_MULTIPLE_VALUES, error_code, logger::FATAL, g_enable_dynamic_watchdog, QueryExecutionContext::getRowSet(), GPU, RelAlgExecutionUnit::groupby_exprs, INJECT_TIMER, interrupted_(), QueryExecutionContext::launchCpuCode(), QueryExecutionContext::launchGpuCode(), Executor::CompilationResult::literal_values, LOG, Executor::CompilationResult::native_functions, num_rows, QueryExecutionContext::query_mem_desc_, RenderInfo::render_allocator_map_ptr, and RenderInfo::useCudaBuffers().

2433  {
2434  auto timer = DEBUG_TIMER(__func__);
2436  CHECK(!results);
2437  if (col_buffers.empty()) {
2438  return 0;
2439  }
2440  CHECK_NE(ra_exe_unit.groupby_exprs.size(), size_t(0));
2441  // TODO(alex):
2442  // 1. Optimize size (make keys more compact).
2443  // 2. Resize on overflow.
2444  // 3. Optimize runtime.
2445  auto hoist_buf = serializeLiterals(compilation_result.literal_values, device_id);
2446  int32_t error_code = device_type == ExecutorDeviceType::GPU ? 0 : start_rowid;
2447  const auto join_hash_table_ptrs = getJoinHashTablePtrs(device_type, device_id);
2449  return ERR_INTERRUPTED;
2450  }
2451 
2452  RenderAllocatorMap* render_allocator_map_ptr = nullptr;
2453  if (render_info && render_info->useCudaBuffers()) {
2454  render_allocator_map_ptr = render_info->render_allocator_map_ptr.get();
2455  }
2456 
2457  if (device_type == ExecutorDeviceType::CPU) {
2458  query_exe_context->launchCpuCode(ra_exe_unit,
2459  compilation_result.native_functions,
2460  hoist_literals,
2461  hoist_buf,
2462  col_buffers,
2463  num_rows,
2464  frag_offsets,
2465  scan_limit,
2466  &error_code,
2467  num_tables,
2468  join_hash_table_ptrs);
2469  } else {
2470  try {
2471  query_exe_context->launchGpuCode(ra_exe_unit,
2472  compilation_result.native_functions,
2473  hoist_literals,
2474  hoist_buf,
2475  col_buffers,
2476  num_rows,
2477  frag_offsets,
2478  scan_limit,
2479  data_mgr,
2480  blockSize(),
2481  gridSize(),
2482  device_id,
2483  &error_code,
2484  num_tables,
2485  join_hash_table_ptrs,
2486  render_allocator_map_ptr);
2487  } catch (const OutOfMemory&) {
2488  return ERR_OUT_OF_GPU_MEM;
2489  } catch (const OutOfRenderMemory&) {
2490  return ERR_OUT_OF_RENDER_MEM;
2491  } catch (const StreamingTopNNotSupportedInRenderQuery&) {
2493  } catch (const std::bad_alloc&) {
2494  return ERR_SPECULATIVE_TOP_OOM;
2495  } catch (const std::exception& e) {
2496  LOG(FATAL) << "Error launching the GPU kernel: " << e.what();
2497  }
2498  }
2499 
2500  if (error_code == Executor::ERR_OVERFLOW_OR_UNDERFLOW ||
2501  error_code == Executor::ERR_DIV_BY_ZERO ||
2502  error_code == Executor::ERR_OUT_OF_TIME ||
2503  error_code == Executor::ERR_INTERRUPTED ||
2505  return error_code;
2506  }
2507 
2508  if (error_code != Executor::ERR_OVERFLOW_OR_UNDERFLOW &&
2509  error_code != Executor::ERR_DIV_BY_ZERO && !render_allocator_map_ptr) {
2510  results =
2511  query_exe_context->getRowSet(ra_exe_unit, query_exe_context->query_mem_desc_);
2512  CHECK(results);
2513  results->holdLiterals(hoist_buf);
2514  }
2515  if (error_code < 0 && render_allocator_map_ptr) {
2516  // More rows passed the filter than available slots. We don't have a count to check,
2517  // so assume we met the limit if a scan limit is set
2518  if (scan_limit != 0) {
2519  return 0;
2520  } else {
2521  return error_code;
2522  }
2523  }
2524  if (error_code && (!scan_limit || check_rows_less_than_needed(results, scan_limit))) {
2525  return error_code; // unlucky, not enough results and we ran out of slots
2526  }
2527 
2528  return 0;
2529 }
bool useCudaBuffers() const
Definition: RenderInfo.cpp:66
const int8_t const int64_t * num_rows
static const int32_t ERR_INTERRUPTED
Definition: Execute.h:980
std::vector< int64_t * > launchCpuCode(const RelAlgExecutionUnit &ra_exe_unit, const std::vector< std::pair< void *, void * >> &fn_ptrs, const bool hoist_literals, const std::vector< int8_t > &literal_buff, std::vector< std::vector< const int8_t * >> col_buffers, const std::vector< std::vector< int64_t >> &num_rows, const std::vector< std::vector< uint64_t >> &frag_row_offsets, const int32_t scan_limit, int32_t *error_code, const uint32_t num_tables, const std::vector< int64_t > &join_hash_tables)
std::vector< int64_t * > launchGpuCode(const RelAlgExecutionUnit &ra_exe_unit, const std::vector< std::pair< void *, void * >> &cu_functions, const bool hoist_literals, const std::vector< int8_t > &literal_buff, std::vector< std::vector< const int8_t * >> col_buffers, const std::vector< std::vector< int64_t >> &num_rows, const std::vector< std::vector< uint64_t >> &frag_row_offsets, const int32_t scan_limit, Data_Namespace::DataMgr *data_mgr, const unsigned block_size_x, const unsigned grid_size_x, const int device_id, int32_t *error_code, const uint32_t num_tables, const std::vector< int64_t > &join_hash_tables, RenderAllocatorMap *render_allocator_map)
#define LOG(tag)
Definition: Logger.h:188
bool g_enable_dynamic_watchdog
Definition: Execute.cpp:72
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
std::vector< int64_t > getJoinHashTablePtrs(const ExecutorDeviceType device_type, const int device_id)
Definition: Execute.cpp:2531
static const int32_t ERR_STREAMING_TOP_N_NOT_SUPPORTED_IN_RENDER_QUERY
Definition: Execute.h:984
static const int32_t ERR_DIV_BY_ZERO
Definition: Execute.h:971
ResultSetPtr getRowSet(const RelAlgExecutionUnit &ra_exe_unit, const QueryMemoryDescriptor &query_mem_desc) const
CHECK(cgen_state)
#define INJECT_TIMER(DESC)
Definition: measure.h:91
#define CHECK_NE(x, y)
Definition: Logger.h:202
static const int32_t ERR_OUT_OF_RENDER_MEM
Definition: Execute.h:975
const int8_t const int64_t const uint64_t const int32_t const int64_t int64_t uint32_t const int64_t int32_t * error_code
static const int32_t ERR_OVERFLOW_OR_UNDERFLOW
Definition: Execute.h:977
static const int32_t ERR_OUT_OF_TIME
Definition: Execute.h:979
static const int32_t ERR_SINGLE_VALUE_FOUND_MULTIPLE_VALUES
Definition: Execute.h:985
static const int32_t ERR_OUT_OF_GPU_MEM
Definition: Execute.h:972
std::vector< int8_t > serializeLiterals(const std::unordered_map< int, CgenState::LiteralValues > &literals, const int device_id)
Definition: Execute.cpp:332
unsigned gridSize() const
Definition: Execute.cpp:2978
std::unique_ptr< RenderAllocatorMap > render_allocator_map_ptr
Definition: RenderInfo.h:32
bool check_rows_less_than_needed(const ResultSetPtr &results, const size_t scan_limit)
Definition: Execute.cpp:2410
static const int32_t ERR_SPECULATIVE_TOP_OOM
Definition: Execute.h:978
#define DEBUG_TIMER(name)
Definition: Logger.h:299
int32_t executePlanWithGroupBy(const RelAlgExecutionUnit &ra_exe_unit, const CompilationResult &, const bool hoist_literals, ResultSetPtr &results, const ExecutorDeviceType device_type, std::vector< std::vector< const int8_t * >> &col_buffers, const std::vector< size_t > outer_tab_frag_ids, QueryExecutionContext *, const std::vector< std::vector< int64_t >> &num_rows, const std::vector< std::vector< uint64_t >> &frag_offsets, Data_Namespace::DataMgr *, const int device_id, const int64_t limit, const uint32_t start_rowid, const uint32_t num_tables, RenderInfo *render_info)
Definition: Execute.cpp:2417
unsigned blockSize() const
Definition: Execute.cpp:2986
const QueryMemoryDescriptor query_mem_desc_
bool interrupted_
Definition: Execute.h:938

+ Here is the call graph for this function:

int32_t Executor::executePlanWithoutGroupBy ( const RelAlgExecutionUnit ra_exe_unit,
const CompilationResult compilation_result,
const bool  hoist_literals,
ResultSetPtr results,
const std::vector< Analyzer::Expr * > &  target_exprs,
const ExecutorDeviceType  device_type,
std::vector< std::vector< const int8_t * >> &  col_buffers,
QueryExecutionContext query_exe_context,
const std::vector< std::vector< int64_t >> &  num_rows,
const std::vector< std::vector< uint64_t >> &  frag_offsets,
Data_Namespace::DataMgr data_mgr,
const int  device_id,
const uint32_t  start_rowid,
const uint32_t  num_tables,
RenderInfo render_info 
)
private

Definition at line 2269 of file Execute.cpp.

References CHECK(), CHECK_EQ, CPU, DEBUG_TIMER, ERR_DIV_BY_ZERO, ERR_INTERRUPTED, ERR_OUT_OF_TIME, ERR_OVERFLOW_OR_UNDERFLOW, ERR_SINGLE_VALUE_FOUND_MULTIPLE_VALUES, error_code, RelAlgExecutionUnit::estimator, QueryExecutionContext::estimator_result_set_, logger::FATAL, g_bigint_count, g_enable_dynamic_watchdog, get_target_info(), Experimental::MetaTypeClassFactory< CLASSIFICATIONS_PACK >::getMetaTypeClass(), GPU, INJECT_TIMER, interrupted_(), RenderInfo::isPotentialInSituRender(), QueryExecutionContext::launchCpuCode(), QueryExecutionContext::launchGpuCode(), Executor::CompilationResult::literal_values, LOG, Executor::CompilationResult::native_functions, num_rows, out, QueryExecutionContext::query_buffers_, RenderInfo::render_allocator_map_ptr, and RenderInfo::useCudaBuffers().

2284  {
2286  auto timer = DEBUG_TIMER(__func__);
2287  CHECK(!results);
2288  if (col_buffers.empty()) {
2289  return 0;
2290  }
2291 
2292  RenderAllocatorMap* render_allocator_map_ptr = nullptr;
2293  if (render_info) {
2294  // TODO(adb): make sure that we either never get here in the CPU case, or if we do get
2295  // here, we are in non-insitu mode.
2296  CHECK(render_info->useCudaBuffers() || !render_info->isPotentialInSituRender())
2297  << "CUDA disabled rendering in the executePlanWithoutGroupBy query path is "
2298  "currently unsupported.";
2299  render_allocator_map_ptr = render_info->render_allocator_map_ptr.get();
2300  }
2301 
2302  int32_t error_code = device_type == ExecutorDeviceType::GPU ? 0 : start_rowid;
2303  std::vector<int64_t*> out_vec;
2304  const auto hoist_buf = serializeLiterals(compilation_result.literal_values, device_id);
2305  const auto join_hash_table_ptrs = getJoinHashTablePtrs(device_type, device_id);
2306  std::unique_ptr<OutVecOwner> output_memory_scope;
2308  return ERR_INTERRUPTED;
2309  }
2310  if (device_type == ExecutorDeviceType::CPU) {
2311  out_vec = query_exe_context->launchCpuCode(ra_exe_unit,
2312  compilation_result.native_functions,
2313  hoist_literals,
2314  hoist_buf,
2315  col_buffers,
2316  num_rows,
2317  frag_offsets,
2318  0,
2319  &error_code,
2320  num_tables,
2321  join_hash_table_ptrs);
2322  output_memory_scope.reset(new OutVecOwner(out_vec));
2323  } else {
2324  try {
2325  out_vec = query_exe_context->launchGpuCode(ra_exe_unit,
2326  compilation_result.native_functions,
2327  hoist_literals,
2328  hoist_buf,
2329  col_buffers,
2330  num_rows,
2331  frag_offsets,
2332  0,
2333  data_mgr,
2334  blockSize(),
2335  gridSize(),
2336  device_id,
2337  &error_code,
2338  num_tables,
2339  join_hash_table_ptrs,
2340  render_allocator_map_ptr);
2341  output_memory_scope.reset(new OutVecOwner(out_vec));
2342  } catch (const OutOfMemory&) {
2343  return ERR_OUT_OF_GPU_MEM;
2344  } catch (const std::exception& e) {
2345  LOG(FATAL) << "Error launching the GPU kernel: " << e.what();
2346  }
2347  }
2348  if (error_code == Executor::ERR_OVERFLOW_OR_UNDERFLOW ||
2349  error_code == Executor::ERR_DIV_BY_ZERO ||
2350  error_code == Executor::ERR_OUT_OF_TIME ||
2351  error_code == Executor::ERR_INTERRUPTED ||
2353  return error_code;
2354  }
2355  if (ra_exe_unit.estimator) {
2356  CHECK(!error_code);
2357  results =
2358  std::shared_ptr<ResultSet>(query_exe_context->estimator_result_set_.release());
2359  return 0;
2360  }
2361  std::vector<int64_t> reduced_outs;
2362  const auto num_frags = col_buffers.size();
2363  const size_t entry_count = device_type == ExecutorDeviceType::GPU
2364  ? num_frags * blockSize() * gridSize()
2365  : num_frags;
2366  if (size_t(1) == entry_count) {
2367  for (auto out : out_vec) {
2368  CHECK(out);
2369  reduced_outs.push_back(*out);
2370  }
2371  } else {
2372  size_t out_vec_idx = 0;
2373 
2374  for (const auto target_expr : target_exprs) {
2375  const auto agg_info = get_target_info(target_expr, g_bigint_count);
2376  CHECK(agg_info.is_agg);
2377 
2378  auto meta_class(
2380  auto agg_reduction_impl =
2382  agg_reduction_impl(meta_class,
2383  entry_count,
2384  error_code,
2385  agg_info,
2386  out_vec_idx,
2387  out_vec,
2388  reduced_outs,
2389  query_exe_context);
2390  if (error_code) {
2391  break;
2392  }
2393  }
2394  }
2395 
2396  if (error_code) {
2397  return error_code;
2398  }
2399 
2400  CHECK_EQ(size_t(1), query_exe_context->query_buffers_->result_sets_.size());
2401  auto rows_ptr = std::shared_ptr<ResultSet>(
2402  query_exe_context->query_buffers_->result_sets_[0].release());
2403  rows_ptr->fillOneEntry(reduced_outs);
2404  results = std::move(rows_ptr);
2405  return error_code;
2406 }
#define CHECK_EQ(x, y)
Definition: Logger.h:201
bool useCudaBuffers() const
Definition: RenderInfo.cpp:66
static auto getMetaTypeClass(SQL_TYPE_INFO const &sql_type_info) -> MetaTypeClassContainer
const int8_t const int64_t * num_rows
static const int32_t ERR_INTERRUPTED
Definition: Execute.h:980
std::vector< int64_t * > launchCpuCode(const RelAlgExecutionUnit &ra_exe_unit, const std::vector< std::pair< void *, void * >> &fn_ptrs, const bool hoist_literals, const std::vector< int8_t > &literal_buff, std::vector< std::vector< const int8_t * >> col_buffers, const std::vector< std::vector< int64_t >> &num_rows, const std::vector< std::vector< uint64_t >> &frag_row_offsets, const int32_t scan_limit, int32_t *error_code, const uint32_t num_tables, const std::vector< int64_t > &join_hash_tables)
std::vector< int64_t * > launchGpuCode(const RelAlgExecutionUnit &ra_exe_unit, const std::vector< std::pair< void *, void * >> &cu_functions, const bool hoist_literals, const std::vector< int8_t > &literal_buff, std::vector< std::vector< const int8_t * >> col_buffers, const std::vector< std::vector< int64_t >> &num_rows, const std::vector< std::vector< uint64_t >> &frag_row_offsets, const int32_t scan_limit, Data_Namespace::DataMgr *data_mgr, const unsigned block_size_x, const unsigned grid_size_x, const int device_id, int32_t *error_code, const uint32_t num_tables, const std::vector< int64_t > &join_hash_tables, RenderAllocatorMap *render_allocator_map)
#define LOG(tag)
Definition: Logger.h:188
TargetInfo get_target_info(const PointerType target_expr, const bool bigint_count)
Definition: TargetInfo.h:66
bool g_enable_dynamic_watchdog
Definition: Execute.cpp:72
std::unique_ptr< QueryMemoryInitializer > query_buffers_
std::vector< int64_t > getJoinHashTablePtrs(const ExecutorDeviceType device_type, const int device_id)
Definition: Execute.cpp:2531
static const int32_t ERR_DIV_BY_ZERO
Definition: Execute.h:971
CHECK(cgen_state)
#define INJECT_TIMER(DESC)
Definition: measure.h:91
const int8_t const int64_t const uint64_t const int32_t const int64_t int64_t uint32_t const int64_t int32_t * error_code
static const int32_t ERR_OVERFLOW_OR_UNDERFLOW
Definition: Execute.h:977
static const int32_t ERR_OUT_OF_TIME
Definition: Execute.h:979
bool g_bigint_count
static const int32_t ERR_SINGLE_VALUE_FOUND_MULTIPLE_VALUES
Definition: Execute.h:985
const std::shared_ptr< Analyzer::Estimator > estimator
static const int32_t ERR_OUT_OF_GPU_MEM
Definition: Execute.h:972
int32_t executePlanWithoutGroupBy(const RelAlgExecutionUnit &ra_exe_unit, const CompilationResult &, const bool hoist_literals, ResultSetPtr &results, const std::vector< Analyzer::Expr * > &target_exprs, const ExecutorDeviceType device_type, std::vector< std::vector< const int8_t * >> &col_buffers, QueryExecutionContext *query_exe_context, const std::vector< std::vector< int64_t >> &num_rows, const std::vector< std::vector< uint64_t >> &frag_offsets, Data_Namespace::DataMgr *data_mgr, const int device_id, const uint32_t start_rowid, const uint32_t num_tables, RenderInfo *render_info)
Definition: Execute.cpp:2269
std::vector< int8_t > serializeLiterals(const std::unordered_map< int, CgenState::LiteralValues > &literals, const int device_id)
Definition: Execute.cpp:332
unsigned gridSize() const
Definition: Execute.cpp:2978
std::unique_ptr< RenderAllocatorMap > render_allocator_map_ptr
Definition: RenderInfo.h:32
bool isPotentialInSituRender() const
Definition: RenderInfo.cpp:61
#define DEBUG_TIMER(name)
Definition: Logger.h:299
MetaTypeClassHandler< SPECIALIZED_HANDLER, Geometry > GeoVsNonGeoClassHandler
unsigned blockSize() const
Definition: Execute.cpp:2986
const int8_t const int64_t const uint64_t const int32_t const int64_t int64_t ** out
std::unique_ptr< ResultSet > estimator_result_set_
bool interrupted_
Definition: Execute.h:938

+ Here is the call graph for this function:

void Executor::executeSimpleInsert ( const Planner::RootPlan root_plan)
private

Definition at line 2600 of file Execute.cpp.

References Importer_NS::appendDatum(), DataBlockPtr::arraysPtr, CHECK(), CHECK_EQ, checked_malloc(), Fragmenter_Namespace::InsertData::columnIds, Fragmenter_Namespace::InsertData::data, Fragmenter_Namespace::InsertData::databaseId, get_column_descriptor(), SQLTypeInfoCore< TYPE_FACET_PACK >::get_compression(), SQLTypeInfoCore< TYPE_FACET_PACK >::get_elem_type(), Planner::RootPlan::get_plan(), Planner::RootPlan::get_result_col_list(), Planner::RootPlan::get_result_table_id(), SQLTypeInfoCore< TYPE_FACET_PACK >::get_size(), Planner::RootPlan::getCatalog(), Catalog_Namespace::Catalog::getMetadataForTable(), inline_fixed_encoding_null_val(), anonymous_namespace{Execute.cpp}::insert_one_dict_str(), anonymous_namespace{TypedDataAccessors.h}::is_null(), SQLTypeInfoCore< TYPE_FACET_PACK >::is_string(), kARRAY, kBIGINT, kBOOLEAN, kCAST, kCHAR, kDATE, kDECIMAL, kDOUBLE, kENCODING_DICT, kENCODING_NONE, kFLOAT, kINT, kLINESTRING, kMULTIPOLYGON, kNUMERIC, kPOINT, kPOLYGON, kSMALLINT, kTEXT, kTIME, kTIMESTAMP, kTINYINT, kVARCHAR, DataBlockPtr::numbersPtr, Fragmenter_Namespace::InsertData::numRows, anonymous_namespace{TypedDataAccessors.h}::put_null(), anonymous_namespace{TypedDataAccessors.h}::put_null_array(), SHARD_FOR_KEY, DataBlockPtr::stringsPtr, Fragmenter_Namespace::InsertData::tableId, and to_string().

2600  {
2601  const auto plan = root_plan->get_plan();
2602  CHECK(plan);
2603  const auto values_plan = dynamic_cast<const Planner::ValuesScan*>(plan);
2604  if (!values_plan) {
2605  throw std::runtime_error(
2606  "Only simple INSERT of immediate tuples is currently supported");
2607  }
2608  row_set_mem_owner_ = std::make_shared<RowSetMemoryOwner>();
2609  const auto& targets = values_plan->get_targetlist();
2610  const int table_id = root_plan->get_result_table_id();
2611  const auto& col_id_list = root_plan->get_result_col_list();
2612  std::vector<const ColumnDescriptor*> col_descriptors;
2613  std::vector<int> col_ids;
2614  std::unordered_map<int, std::unique_ptr<uint8_t[]>> col_buffers;
2615  std::unordered_map<int, std::vector<std::string>> str_col_buffers;
2616  std::unordered_map<int, std::vector<ArrayDatum>> arr_col_buffers;
2617  auto& cat = root_plan->getCatalog();
2618  const auto table_descriptor = cat.getMetadataForTable(table_id);
2619  const auto shard_tables = cat.getPhysicalTablesDescriptors(table_descriptor);
2620  const TableDescriptor* shard{nullptr};
2621  for (const int col_id : col_id_list) {
2622  const auto cd = get_column_descriptor(col_id, table_id, cat);
2623  const auto col_enc = cd->columnType.get_compression();
2624  if (cd->columnType.is_string()) {
2625  switch (col_enc) {
2626  case kENCODING_NONE: {
2627  auto it_ok =
2628  str_col_buffers.insert(std::make_pair(col_id, std::vector<std::string>{}));
2629  CHECK(it_ok.second);
2630  break;
2631  }
2632  case kENCODING_DICT: {
2633  const auto dd = cat.getMetadataForDict(cd->columnType.get_comp_param());
2634  CHECK(dd);
2635  const auto it_ok = col_buffers.emplace(
2636  col_id, std::unique_ptr<uint8_t[]>(new uint8_t[cd->columnType.get_size()]));
2637  CHECK(it_ok.second);
2638  break;
2639  }
2640  default:
2641  CHECK(false);
2642  }
2643  } else if (cd->columnType.is_geometry()) {
2644  auto it_ok =
2645  str_col_buffers.insert(std::make_pair(col_id, std::vector<std::string>{}));
2646  CHECK(it_ok.second);
2647  } else if (cd->columnType.is_array()) {
2648  auto it_ok =
2649  arr_col_buffers.insert(std::make_pair(col_id, std::vector<ArrayDatum>{}));
2650  CHECK(it_ok.second);
2651  } else {
2652  const auto it_ok = col_buffers.emplace(
2653  col_id,
2654  std::unique_ptr<uint8_t[]>(
2655  new uint8_t[cd->columnType.get_logical_size()]())); // changed to zero-init
2656  // the buffer
2657  CHECK(it_ok.second);
2658  }
2659  col_descriptors.push_back(cd);
2660  col_ids.push_back(col_id);
2661  }
2662  size_t col_idx = 0;
2664  insert_data.databaseId = cat.getCurrentDB().dbId;
2665  insert_data.tableId = table_id;
2666  int64_t int_col_val{0};
2667  for (auto target_entry : targets) {
2668  auto col_cv = dynamic_cast<const Analyzer::Constant*>(target_entry->get_expr());
2669  if (!col_cv) {
2670  auto col_cast = dynamic_cast<const Analyzer::UOper*>(target_entry->get_expr());
2671  CHECK(col_cast);
2672  CHECK_EQ(kCAST, col_cast->get_optype());
2673  col_cv = dynamic_cast<const Analyzer::Constant*>(col_cast->get_operand());
2674  }
2675  CHECK(col_cv);
2676  const auto cd = col_descriptors[col_idx];
2677  auto col_datum = col_cv->get_constval();
2678  auto col_type = cd->columnType.get_type();
2679  uint8_t* col_data_bytes{nullptr};
2680  if (!cd->columnType.is_array() && !cd->columnType.is_geometry() &&
2681  (!cd->columnType.is_string() ||
2682  cd->columnType.get_compression() == kENCODING_DICT)) {
2683  const auto col_data_bytes_it = col_buffers.find(col_ids[col_idx]);
2684  CHECK(col_data_bytes_it != col_buffers.end());
2685  col_data_bytes = col_data_bytes_it->second.get();
2686  }
2687  switch (col_type) {
2688  case kBOOLEAN: {
2689  auto col_data = col_data_bytes;
2690  *col_data = col_cv->get_is_null() ? inline_fixed_encoding_null_val(cd->columnType)
2691  : (col_datum.boolval ? 1 : 0);
2692  break;
2693  }
2694  case kTINYINT: {
2695  auto col_data = reinterpret_cast<int8_t*>(col_data_bytes);
2696  *col_data = col_cv->get_is_null() ? inline_fixed_encoding_null_val(cd->columnType)
2697  : col_datum.tinyintval;
2698  int_col_val = col_datum.tinyintval;
2699  break;
2700  }
2701  case kSMALLINT: {
2702  auto col_data = reinterpret_cast<int16_t*>(col_data_bytes);
2703  *col_data = col_cv->get_is_null() ? inline_fixed_encoding_null_val(cd->columnType)
2704  : col_datum.smallintval;
2705  int_col_val = col_datum.smallintval;
2706  break;
2707  }
2708  case kINT: {
2709  auto col_data = reinterpret_cast<int32_t*>(col_data_bytes);
2710  *col_data = col_cv->get_is_null() ? inline_fixed_encoding_null_val(cd->columnType)
2711  : col_datum.intval;
2712  int_col_val = col_datum.intval;
2713  break;
2714  }
2715  case kBIGINT:
2716  case kDECIMAL:
2717  case kNUMERIC: {
2718  auto col_data = reinterpret_cast<int64_t*>(col_data_bytes);
2719  *col_data = col_cv->get_is_null() ? inline_fixed_encoding_null_val(cd->columnType)
2720  : col_datum.bigintval;
2721  int_col_val = col_datum.bigintval;
2722  break;
2723  }
2724  case kFLOAT: {
2725  auto col_data = reinterpret_cast<float*>(col_data_bytes);
2726  *col_data = col_datum.floatval;
2727  break;
2728  }
2729  case kDOUBLE: {
2730  auto col_data = reinterpret_cast<double*>(col_data_bytes);
2731  *col_data = col_datum.doubleval;
2732  break;
2733  }
2734  case kTEXT:
2735  case kVARCHAR:
2736  case kCHAR: {
2737  switch (cd->columnType.get_compression()) {
2738  case kENCODING_NONE:
2739  str_col_buffers[col_ids[col_idx]].push_back(
2740  col_datum.stringval ? *col_datum.stringval : "");
2741  break;
2742  case kENCODING_DICT: {
2743  switch (cd->columnType.get_size()) {
2744  case 1:
2745  int_col_val = insert_one_dict_str(
2746  reinterpret_cast<uint8_t*>(col_data_bytes), cd, col_cv, cat);
2747  break;
2748  case 2:
2749  int_col_val = insert_one_dict_str(
2750  reinterpret_cast<uint16_t*>(col_data_bytes), cd, col_cv, cat);
2751  break;
2752  case 4:
2753  int_col_val = insert_one_dict_str(
2754  reinterpret_cast<int32_t*>(col_data_bytes), cd, col_cv, cat);
2755  break;
2756  default:
2757  CHECK(false);
2758  }
2759  break;
2760  }
2761  default:
2762  CHECK(false);
2763  }
2764  break;
2765  }
2766  case kTIME:
2767  case kTIMESTAMP:
2768  case kDATE: {
2769  auto col_data = reinterpret_cast<int64_t*>(col_data_bytes);
2770  *col_data = col_cv->get_is_null() ? inline_fixed_encoding_null_val(cd->columnType)
2771  : col_datum.bigintval;
2772  break;
2773  }
2774  case kARRAY: {
2775  const auto is_null = col_cv->get_is_null();
2776  const auto size = cd->columnType.get_size();
2777  const SQLTypeInfo elem_ti = cd->columnType.get_elem_type();
2778  if (is_null) {
2779  if (size > 0) {
2780  // NULL fixlen array: NULL_ARRAY sentinel followed by NULL sentinels
2781  if (elem_ti.is_string() && elem_ti.get_compression() == kENCODING_DICT) {
2782  throw std::runtime_error("Column " + cd->columnName +
2783  " doesn't accept NULL values");
2784  }
2785  int8_t* buf = (int8_t*)checked_malloc(size);
2786  put_null_array(static_cast<void*>(buf), elem_ti, "");
2787  for (int8_t* p = buf + elem_ti.get_size(); (p - buf) < size;
2788  p += elem_ti.get_size()) {
2789  put_null(static_cast<void*>(p), elem_ti, "");
2790  }
2791  arr_col_buffers[col_ids[col_idx]].emplace_back(size, buf, is_null);
2792  } else {
2793  arr_col_buffers[col_ids[col_idx]].emplace_back(0, nullptr, is_null);
2794  }
2795  break;
2796  }
2797  const auto l = col_cv->get_value_list();
2798  size_t len = l.size() * elem_ti.get_size();
2799  if (size > 0 && static_cast<size_t>(size) != len) {
2800  throw std::runtime_error("Array column " + cd->columnName + " expects " +
2801  std::to_string(size / elem_ti.get_size()) +
2802  " values, " + "received " + std::to_string(l.size()));
2803  }
2804  if (elem_ti.is_string()) {
2805  CHECK(kENCODING_DICT == elem_ti.get_compression());
2806  CHECK(4 == elem_ti.get_size());
2807 
2808  int8_t* buf = (int8_t*)checked_malloc(len);
2809  int32_t* p = reinterpret_cast<int32_t*>(buf);
2810 
2811  int elemIndex = 0;
2812  for (auto& e : l) {
2813  auto c = std::dynamic_pointer_cast<Analyzer::Constant>(e);
2814  CHECK(c);
2815 
2816  int_col_val =
2817  insert_one_dict_str(&p[elemIndex], cd->columnName, elem_ti, c.get(), cat);
2818 
2819  elemIndex++;
2820  }
2821  arr_col_buffers[col_ids[col_idx]].push_back(ArrayDatum(len, buf, is_null));
2822 
2823  } else {
2824  int8_t* buf = (int8_t*)checked_malloc(len);
2825  int8_t* p = buf;
2826  for (auto& e : l) {
2827  auto c = std::dynamic_pointer_cast<Analyzer::Constant>(e);
2828  CHECK(c);
2829  p = Importer_NS::appendDatum(p, c->get_constval(), elem_ti);
2830  }
2831  arr_col_buffers[col_ids[col_idx]].push_back(ArrayDatum(len, buf, is_null));
2832  }
2833  break;
2834  }
2835  case kPOINT:
2836  case kLINESTRING:
2837  case kPOLYGON:
2838  case kMULTIPOLYGON:
2839  str_col_buffers[col_ids[col_idx]].push_back(
2840  col_datum.stringval ? *col_datum.stringval : "");
2841  break;
2842  default:
2843  CHECK(false);
2844  }
2845  ++col_idx;
2846  if (col_idx == static_cast<size_t>(table_descriptor->shardedColumnId)) {
2847  const auto shard_count = shard_tables.size();
2848  const size_t shard_idx = SHARD_FOR_KEY(int_col_val, shard_count);
2849  shard = shard_tables[shard_idx];
2850  }
2851  }
2852  for (const auto& kv : col_buffers) {
2853  insert_data.columnIds.push_back(kv.first);
2854  DataBlockPtr p;
2855  p.numbersPtr = reinterpret_cast<int8_t*>(kv.second.get());
2856  insert_data.data.push_back(p);
2857  }
2858  for (auto& kv : str_col_buffers) {
2859  insert_data.columnIds.push_back(kv.first);
2860  DataBlockPtr p;
2861  p.stringsPtr = &kv.second;
2862  insert_data.data.push_back(p);
2863  }
2864  for (auto& kv : arr_col_buffers) {
2865  insert_data.columnIds.push_back(kv.first);
2866  DataBlockPtr p;
2867  p.arraysPtr = &kv.second;
2868  insert_data.data.push_back(p);
2869  }
2870  insert_data.numRows = 1;
2871  if (shard) {
2872  shard->fragmenter->insertData(insert_data);
2873  } else {
2874  table_descriptor->fragmenter->insertData(insert_data);
2875  }
2876 }
const Plan * get_plan() const
Definition: Planner.h:289
#define CHECK_EQ(x, y)
Definition: Logger.h:201
int get_result_table_id() const
Definition: Planner.h:291
Definition: sqltypes.h:52
HOST DEVICE EncodingType get_compression() const
Definition: sqltypes.h:334
std::vector< std::string > * stringsPtr
Definition: sqltypes.h:141
std::vector< ArrayDatum > * arraysPtr
Definition: sqltypes.h:142
HOST DEVICE int get_size() const
Definition: sqltypes.h:336
Definition: sqldefs.h:49
const Catalog_Namespace::Catalog & getCatalog() const
Definition: Planner.h:293
std::string to_string(char const *&&v)
int tableId
identifies the database into which the data is being inserted
Definition: Fragmenter.h:61
size_t numRows
a vector of column ids for the row(s) being inserted
Definition: Fragmenter.h:63
int8_t * appendDatum(int8_t *buf, Datum d, const SQLTypeInfo &ti)
Definition: Importer.cpp:232
CHECK(cgen_state)
void * checked_malloc(const size_t size)
Definition: checked_alloc.h:40
int64_t insert_one_dict_str(T *col_data, const std::string &columnName, const SQLTypeInfo &columnType, const Analyzer::Constant *col_cv, const Catalog_Namespace::Catalog &catalog)
Definition: Execute.cpp:2549
std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner_
Definition: Execute.h:930
void put_null_array(void *ndptr, const SQLTypeInfo &ntype, const std::string col_name)
const std::list< int > & get_result_col_list() const
Definition: Planner.h:292
void put_null(void *ndptr, const SQLTypeInfo &ntype, const std::string col_name)
Definition: sqltypes.h:55
Definition: sqltypes.h:56
bool is_null(const T &v, const SQLTypeInfo &t)
std::vector< DataBlockPtr > data
the number of rows being inserted
Definition: Fragmenter.h:64
Definition: sqltypes.h:44
bool is_string() const
Definition: sqltypes.h:477
int64_t inline_fixed_encoding_null_val(const SQL_TYPE_INFO &ti)
The data to be inserted using the fragment manager.
Definition: Fragmenter.h:59
Definition: sqltypes.h:48
SQLTypeInfoCore get_elem_type() const
Definition: sqltypes.h:659
const TableDescriptor * getMetadataForTable(const std::string &tableName, const bool populateFragmenter=true) const
Returns a pointer to a const TableDescriptor struct matching the provided tableName.
specifies the content in-memory of a row in the table metadata table
int8_t * numbersPtr
Definition: sqltypes.h:140
std::vector< int > columnIds
identifies the table into which the data is being inserted
Definition: Fragmenter.h:62
#define SHARD_FOR_KEY(key, num_shards)
Definition: shard_key.h:20
std::conditional_t< isCudaCC(), DeviceArrayDatum, HostArrayDatum > ArrayDatum
Definition: sqltypes.h:122
const ColumnDescriptor * get_column_descriptor(const int col_id, const int table_id, const Catalog_Namespace::Catalog &cat)
Definition: Execute.h:141

+ Here is the call graph for this function:

ResultSetPtr Executor::executeTableFunction ( const TableFunctionExecutionUnit  exe_unit,
const std::vector< InputTableInfo > &  table_infos,
const CompilationOptions co,
const ExecutionOptions eo,
const Catalog_Namespace::Catalog cat 
)
private

Compiles and dispatches a table function; that is, a function that takes as input one or more columns and returns a ResultSet, which can be parsed by subsequent execution steps.

Definition at line 1356 of file Execute.cpp.

References CHECK_EQ, TableFunctionCompilationContext::compile(), CompilationOptions::device_type_, TableFunctionExecutionContext::execute(), and INJECT_TIMER.

1361  {
1362  INJECT_TIMER(Exec_executeTableFunction);
1363  nukeOldState(false, table_infos, nullptr);
1364 
1365  ColumnCacheMap column_cache; // Note: if we add retries to the table function
1366  // framework, we may want to move this up a level
1367 
1368  ColumnFetcher column_fetcher(this, column_cache);
1369  TableFunctionCompilationContext compilation_context;
1370  compilation_context.compile(exe_unit, co, this);
1371 
1373  CHECK_EQ(table_infos.size(), size_t(1));
1374  return exe_context.execute(exe_unit,
1375  table_infos.front(),
1376  &compilation_context,
1377  column_fetcher,
1378  co.device_type_,
1379  this);
1380 }
#define CHECK_EQ(x, y)
Definition: Logger.h:201
const std::shared_ptr< RowSetMemoryOwner > getRowSetMemoryOwner() const
Definition: Execute.cpp:238
#define INJECT_TIMER(DESC)
Definition: measure.h:91
void nukeOldState(const bool allow_lazy_fetch, const std::vector< InputTableInfo > &query_infos, const RelAlgExecutionUnit *ra_exe_unit)
Definition: Execute.cpp:2878
ExecutorDeviceType device_type_
std::unordered_map< int, std::unordered_map< int, std::shared_ptr< const ColumnarResults > > > ColumnCacheMap
void compile(const TableFunctionExecutionUnit &exe_unit, const CompilationOptions &co, Executor *executor)

+ Here is the call graph for this function:

void Executor::executeUpdate ( const RelAlgExecutionUnit ra_exe_unit,
const std::vector< InputTableInfo > &  table_infos,
const CompilationOptions co,
const ExecutionOptions eo,
const Catalog_Namespace::Catalog cat,
std::shared_ptr< RowSetMemoryOwner row_set_mem_owner,
const UpdateLogForFragment::Callback cb,
const bool  is_agg = false 
)
private

Definition at line 60 of file ExecuteUpdate.cpp.

References CHECK(), CHECK_EQ, CHECK_GT, Executor::ExecutionDispatch::compile(), CompilationOptions::device_type_, FragmentsPerTable::fragment_ids, Executor::ExecutionDispatch::getFragmentResults(), KernelPerFragment, and Executor::ExecutionDispatch::run().

67  {
68  CHECK(cb);
69  const auto ra_exe_unit = addDeletedColumn(ra_exe_unit_in);
70  ColumnCacheMap column_cache;
71 
72  ColumnFetcher column_fetcher(this, column_cache);
73  CHECK_GT(ra_exe_unit.input_descs.size(), size_t(0));
74  const auto table_id = ra_exe_unit.input_descs[0].getTableId();
75  const auto& outer_fragments = table_infos.front().info.fragments;
76 
77  std::vector<FragmentsPerTable> fragments = {{0, {0}}};
78  for (size_t tab_idx = 1; tab_idx < ra_exe_unit.input_descs.size(); tab_idx++) {
79  int table_id = ra_exe_unit.input_descs[tab_idx].getTableId();
80  CHECK_EQ(table_infos[tab_idx].table_id, table_id);
81  const auto& fragmentsPerTable = table_infos[tab_idx].info.fragments;
82  FragmentsPerTable entry = {table_id, {}};
83  for (size_t innerFragId = 0; innerFragId < fragmentsPerTable.size(); innerFragId++) {
84  entry.fragment_ids.push_back(innerFragId);
85  }
86  fragments.push_back(entry);
87  }
88 
89  // There could be benefit to multithread this once we see where the bottle necks really
90  // are
91  for (size_t fragment_index = 0; fragment_index < outer_fragments.size();
92  ++fragment_index) {
93  ExecutionDispatch current_fragment_execution_dispatch(
94  this, ra_exe_unit, table_infos, cat, row_set_mem_owner, nullptr);
95 
96  const int64_t crt_fragment_tuple_count =
97  outer_fragments[fragment_index].getNumTuples();
98  int64_t max_groups_buffer_entry_guess = crt_fragment_tuple_count;
99  if (is_agg) {
100  max_groups_buffer_entry_guess =
101  std::min(2 * max_groups_buffer_entry_guess, static_cast<int64_t>(100'000'000));
102  }
103 
104  const auto execution_descriptors = current_fragment_execution_dispatch.compile(
105  max_groups_buffer_entry_guess, 8, co, eo, column_fetcher, true);
106  // We may want to consider in the future allowing this to execute on devices other
107  // than CPU
108 
109  fragments[0] = {table_id, {fragment_index}};
110 
111  current_fragment_execution_dispatch.run(
112  co.device_type_,
113  0,
114  eo,
115  column_fetcher,
116  *std::get<QueryCompilationDescriptorOwned>(execution_descriptors),
117  *std::get<QueryMemoryDescriptorOwned>(execution_descriptors),
118  fragments,
120  -1);
121  const auto& proj_fragment_results =
122  current_fragment_execution_dispatch.getFragmentResults();
123  if (proj_fragment_results.empty()) {
124  continue;
125  }
126  const auto& proj_fragment_result = proj_fragment_results[0];
127  const auto proj_result_set = proj_fragment_result.first;
128  CHECK(proj_result_set);
129  cb({outer_fragments[fragment_index], fragment_index, proj_result_set});
130  }
131 }
bool is_agg(const Analyzer::Expr *expr)
#define CHECK_EQ(x, y)
Definition: Logger.h:201
RelAlgExecutionUnit addDeletedColumn(const RelAlgExecutionUnit &ra_exe_unit)
Definition: Execute.cpp:3052
const std::vector< InputDescriptor > input_descs
#define CHECK_GT(x, y)
Definition: Logger.h:205
CHECK(cgen_state)
ExecutorDeviceType device_type_
std::unordered_map< int, std::unordered_map< int, std::shared_ptr< const ColumnarResults > > > ColumnCacheMap
std::vector< size_t > fragment_ids

+ Here is the call graph for this function:

ResultSetPtr Executor::executeWorkUnit ( size_t &  max_groups_buffer_entry_guess,
const bool  is_agg,
const std::vector< InputTableInfo > &  query_infos,
const RelAlgExecutionUnit ra_exe_unit_in,
const CompilationOptions co,
const ExecutionOptions options,
const Catalog_Namespace::Catalog cat,
std::shared_ptr< RowSetMemoryOwner row_set_mem_owner,
RenderInfo render_info,
const bool  has_cardinality_estimation,
ColumnCacheMap column_cache 
)
private

Definition at line 1119 of file Execute.cpp.

References CompilationRetryNewScanLimit::new_scan_limit_, and anonymous_namespace{Execute.cpp}::replace_scan_limit().

1130  {
1131  try {
1132  return executeWorkUnitImpl(max_groups_buffer_entry_guess,
1133  is_agg,
1134  true,
1135  query_infos,
1136  ra_exe_unit_in,
1137  co,
1138  eo,
1139  cat,
1140  row_set_mem_owner,
1141  render_info,
1142  has_cardinality_estimation,
1143  column_cache);
1144  } catch (const CompilationRetryNewScanLimit& e) {
1145  return executeWorkUnitImpl(max_groups_buffer_entry_guess,
1146  is_agg,
1147  false,
1148  query_infos,
1149  replace_scan_limit(ra_exe_unit_in, e.new_scan_limit_),
1150  co,
1151  eo,
1152  cat,
1153  row_set_mem_owner,
1154  render_info,
1155  has_cardinality_estimation,
1156  column_cache);
1157  }
1158 }
bool is_agg(const Analyzer::Expr *expr)
RelAlgExecutionUnit replace_scan_limit(const RelAlgExecutionUnit &ra_exe_unit_in, const size_t new_scan_limit)
Definition: Execute.cpp:1101
ResultSetPtr executeWorkUnitImpl(size_t &max_groups_buffer_entry_guess, const bool is_agg, const bool allow_single_frag_table_opt, const std::vector< InputTableInfo > &, const RelAlgExecutionUnit &, const CompilationOptions &, const ExecutionOptions &options, const Catalog_Namespace::Catalog &, std::shared_ptr< RowSetMemoryOwner >, RenderInfo *render_info, const bool has_cardinality_estimation, ColumnCacheMap &column_cache)
Definition: Execute.cpp:1160

+ Here is the call graph for this function:

ResultSetPtr Executor::executeWorkUnitImpl ( size_t &  max_groups_buffer_entry_guess,
const bool  is_agg,
const bool  allow_single_frag_table_opt,
const std::vector< InputTableInfo > &  query_infos,
const RelAlgExecutionUnit ra_exe_unit_in,
const CompilationOptions co,
const ExecutionOptions options,
const Catalog_Namespace::Catalog cat,
std::shared_ptr< RowSetMemoryOwner row_set_mem_owner,
RenderInfo render_info,
const bool  has_cardinality_estimation,
ColumnCacheMap column_cache 
)
private

Definition at line 1160 of file Execute.cpp.

References CHECK(), Executor::ExecutionDispatch::compile(), anonymous_namespace{Execute.cpp}::compute_buffer_entry_guess(), CPU, cpu_threads(), DEBUG_TIMER_NEW_THREAD, CompilationOptions::device_type_, CompilationOptions::explain_type_, Data_Namespace::DataMgr::freeAllBuffers(), get_available_gpus(), get_context_count(), get_min_byte_width(), Catalog_Namespace::Catalog::getDataMgr(), QueryExecutionError::getErrorCode(), Data_Namespace::DataMgr::getMemoryInfo(), GPU, Data_Namespace::GPU_LEVEL, CompilationOptions::hoist_literals_, INJECT_TIMER, interrupted_(), ExecutionOptions::just_explain, MAX_BYTE_WIDTH_SUPPORTED, CompilationOptions::opt_level_, query_mem_desc, CompilationOptions::register_intel_jit_listener_, Executor::ExecutionDispatch::run(), and CompilationOptions::with_dynamic_watchdog_.

1172  {
1173  INJECT_TIMER(Exec_executeWorkUnit);
1174  const auto ra_exe_unit = addDeletedColumn(ra_exe_unit_in);
1175  const auto device_type = getDeviceTypeForTargets(ra_exe_unit, co.device_type_);
1176  CHECK(!query_infos.empty());
1177  if (!max_groups_buffer_entry_guess) {
1178  // The query has failed the first execution attempt because of running out
1179  // of group by slots. Make the conservative choice: allocate fragment size
1180  // slots and run on the CPU.
1181  CHECK(device_type == ExecutorDeviceType::CPU);
1182  max_groups_buffer_entry_guess = compute_buffer_entry_guess(query_infos);
1183  }
1184 
1185  int8_t crt_min_byte_width{get_min_byte_width()};
1186  do {
1187  ExecutionDispatch execution_dispatch(
1188  this, ra_exe_unit, query_infos, cat, row_set_mem_owner, render_info);
1189  ColumnFetcher column_fetcher(this, column_cache);
1190  std::unique_ptr<QueryCompilationDescriptor> query_comp_desc_owned;
1191  std::unique_ptr<QueryMemoryDescriptor> query_mem_desc_owned;
1192  try {
1193  INJECT_TIMER(execution_dispatch_comp);
1194  std::tie(query_comp_desc_owned, query_mem_desc_owned) =
1195  execution_dispatch.compile(max_groups_buffer_entry_guess,
1196  crt_min_byte_width,
1197  {device_type,
1198  co.hoist_literals_,
1199  co.opt_level_,
1201  co.explain_type_,
1203  eo,
1204  column_fetcher,
1205  has_cardinality_estimation);
1206  CHECK(query_comp_desc_owned);
1207  crt_min_byte_width = query_comp_desc_owned->getMinByteWidth();
1208  } catch (CompilationRetryNoCompaction&) {
1209  crt_min_byte_width = MAX_BYTE_WIDTH_SUPPORTED;
1210  continue;
1211  }
1212  if (eo.just_explain) {
1213  return executeExplain(*query_comp_desc_owned);
1214  }
1215 
1216  for (const auto target_expr : ra_exe_unit.target_exprs) {
1217  plan_state_->target_exprs_.push_back(target_expr);
1218  }
1219 
1220  auto dispatch = [&execution_dispatch,
1221  &column_fetcher,
1222  &eo,
1223  parent_thread_id = std::this_thread::get_id()](
1224  const ExecutorDeviceType chosen_device_type,
1225  int chosen_device_id,
1226  const QueryCompilationDescriptor& query_comp_desc,
1228  const FragmentsList& frag_list,
1229  const ExecutorDispatchMode kernel_dispatch_mode,
1230  const int64_t rowid_lookup_key) {
1231  DEBUG_TIMER_NEW_THREAD(parent_thread_id);
1232  INJECT_TIMER(execution_dispatch_run);
1233  execution_dispatch.run(chosen_device_type,
1234  chosen_device_id,
1235  eo,
1236  column_fetcher,
1237  query_comp_desc,
1238  query_mem_desc,
1239  frag_list,
1240  kernel_dispatch_mode,
1241  rowid_lookup_key);
1242  };
1243 
1244  QueryFragmentDescriptor fragment_descriptor(
1245  ra_exe_unit,
1246  query_infos,
1247  query_comp_desc_owned->getDeviceType() == ExecutorDeviceType::GPU
1249  : std::vector<Data_Namespace::MemoryInfo>{},
1250  eo.gpu_input_mem_limit_percent);
1251 
1252  if (!eo.just_validate) {
1253  int available_cpus = cpu_threads();
1254  auto available_gpus = get_available_gpus(cat);
1255 
1256  const auto context_count =
1257  get_context_count(device_type, available_cpus, available_gpus.size());
1258  try {
1259  dispatchFragments(dispatch,
1260  execution_dispatch,
1261  query_infos,
1262  eo,
1263  is_agg,
1264  allow_single_frag_table_opt,
1265  context_count,
1266  *query_comp_desc_owned,
1267  *query_mem_desc_owned,
1268  fragment_descriptor,
1269  available_gpus,
1270  available_cpus);
1271  } catch (QueryExecutionError& e) {
1272  if (eo.with_dynamic_watchdog && interrupted_ &&
1273  e.getErrorCode() == ERR_OUT_OF_TIME) {
1275  }
1276  cat.getDataMgr().freeAllBuffers();
1278  static_cast<size_t>(crt_min_byte_width << 1) <= sizeof(int64_t)) {
1279  crt_min_byte_width <<= 1;
1280  continue;
1281  }
1282  throw;
1283  }
1284  }
1285  cat.getDataMgr().freeAllBuffers();
1286  if (is_agg) {
1287  try {
1288  return collectAllDeviceResults(execution_dispatch,
1289  ra_exe_unit.target_exprs,
1290  *query_mem_desc_owned,
1291  query_comp_desc_owned->getDeviceType(),
1292  row_set_mem_owner);
1293  } catch (ReductionRanOutOfSlots&) {
1295  } catch (OverflowOrUnderflow&) {
1296  crt_min_byte_width <<= 1;
1297  continue;
1298  }
1299  }
1300  return resultsUnion(execution_dispatch);
1301 
1302  } while (static_cast<size_t>(crt_min_byte_width) <= sizeof(int64_t));
1303 
1304  return std::make_shared<ResultSet>(std::vector<TargetInfo>{},
1307  nullptr,
1308  this);
1309 }
bool is_agg(const Analyzer::Expr *expr)
ResultSetPtr collectAllDeviceResults(ExecutionDispatch &execution_dispatch, const std::vector< Analyzer::Expr * > &target_exprs, const QueryMemoryDescriptor &query_mem_desc, const ExecutorDeviceType device_type, std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner)
Definition: Execute.cpp:1517
int32_t getErrorCode() const
Definition: ErrorHandling.h:55
static const int32_t ERR_INTERRUPTED
Definition: Execute.h:980
void dispatchFragments(const std::function< void(const ExecutorDeviceType chosen_device_type, int chosen_device_id, const QueryCompilationDescriptor &query_comp_desc, const QueryMemoryDescriptor &query_mem_desc, const FragmentsList &frag_list, const ExecutorDispatchMode kernel_dispatch_mode, const int64_t rowid_lookup_key)> dispatch, const ExecutionDispatch &execution_dispatch, const std::vector< InputTableInfo > &table_infos, const ExecutionOptions &eo, const bool is_agg, const bool allow_single_frag_table_opt, const size_t context_count, const QueryCompilationDescriptor &query_comp_desc, const QueryMemoryDescriptor &query_mem_desc, QueryFragmentDescriptor &fragment_descriptor, std::unordered_set< int > &available_gpus, int &available_cpus)
Definition: Execute.cpp:1702
ExecutorDeviceType
Data_Namespace::DataMgr & getDataMgr() const
Definition: Catalog.h:177
std::unordered_set< int > get_available_gpus(const Catalog_Namespace::Catalog &cat)
Definition: Execute.cpp:957
const ExecutorOptLevel opt_level_
RelAlgExecutionUnit addDeletedColumn(const RelAlgExecutionUnit &ra_exe_unit)
Definition: Execute.cpp:3052
#define DEBUG_TIMER_NEW_THREAD(parent_thread_id)
Definition: Logger.h:301
size_t compute_buffer_entry_guess(const std::vector< InputTableInfo > &query_infos)
Definition: Execute.cpp:981
std::vector< FragmentsPerTable > FragmentsList
int8_t get_min_byte_width()
ExecutorDispatchMode
CHECK(cgen_state)
size_t get_context_count(const ExecutorDeviceType device_type, const size_t cpu_count, const size_t gpu_count)
Definition: Execute.cpp:969
#define INJECT_TIMER(DESC)
Definition: measure.h:91
friend class QueryMemoryDescriptor
Definition: Execute.h:993
static const int32_t ERR_OVERFLOW_OR_UNDERFLOW
Definition: Execute.h:977
std::unique_ptr< PlanState > plan_state_
Definition: Execute.h:929
static const int32_t ERR_OUT_OF_TIME
Definition: Execute.h:979
const bool register_intel_jit_listener_
std::vector< MemoryInfo > getMemoryInfo(const MemoryLevel memLevel)
Definition: DataMgr.cpp:176
ResultSetPtr resultsUnion(ExecutionDispatch &execution_dispatch)
Definition: Execute.cpp:821
ExecutorDeviceType device_type_
const ExecutorExplainType explain_type_
static const int32_t ERR_OUT_OF_SLOTS
Definition: Execute.h:973
constexpr int8_t MAX_BYTE_WIDTH_SUPPORTED
ExecutorDeviceType getDeviceTypeForTargets(const RelAlgExecutionUnit &ra_exe_unit, const ExecutorDeviceType requested_device_type)
Definition: Execute.cpp:1388
const bool with_dynamic_watchdog_
int cpu_threads()
Definition: thread_count.h:25
bool interrupted_
Definition: Execute.h:938
ResultSetPtr executeExplain(const QueryCompilationDescriptor &)
Definition: Execute.cpp:1382

+ Here is the call graph for this function:

void Executor::executeWorkUnitPerFragment ( const RelAlgExecutionUnit ra_exe_unit,
const InputTableInfo table_info,
const CompilationOptions co,
const ExecutionOptions eo,
const Catalog_Namespace::Catalog cat,
PerFragmentCallBack cb 
)
private

Compiles and dispatches a work unit per fragment processing results with the per fragment callback. Currently used for computing metrics over fragments (metadata).

Definition at line 1311 of file Execute.cpp.

References CHECK_EQ, Executor::ExecutionDispatch::compile(), CompilationOptions::device_type_, Fragmenter_Namespace::TableInfo::fragments, InputTableInfo::info, and Executor::ExecutionDispatch::run().

1316  {
1317  const auto ra_exe_unit = addDeletedColumn(ra_exe_unit_in);
1318  ColumnCacheMap column_cache;
1319 
1320  std::vector<InputTableInfo> table_infos{table_info};
1321  // TODO(adb): ensure this is under a try / catch
1322  ExecutionDispatch execution_dispatch(
1323  this, ra_exe_unit, table_infos, cat, row_set_mem_owner_, nullptr);
1324  ColumnFetcher column_fetcher(this, column_cache);
1325  std::unique_ptr<QueryCompilationDescriptor> query_comp_desc_owned;
1326  std::unique_ptr<QueryMemoryDescriptor> query_mem_desc_owned;
1327  std::tie(query_comp_desc_owned, query_mem_desc_owned) =
1328  execution_dispatch.compile(0, 8, co, eo, column_fetcher, false);
1329  CHECK_EQ(size_t(1), ra_exe_unit.input_descs.size());
1330  const auto table_id = ra_exe_unit.input_descs[0].getTableId();
1331  const auto& outer_fragments = table_info.info.fragments;
1332  for (size_t fragment_index = 0; fragment_index < outer_fragments.size();
1333  ++fragment_index) {
1334  // We may want to consider in the future allowing this to execute on devices other
1335  // than CPU
1336  execution_dispatch.run(co.device_type_,
1337  0,
1338  eo,
1339  column_fetcher,
1340  *query_comp_desc_owned,
1341  *query_mem_desc_owned,
1342  {{table_id, {fragment_index}}},
1344  -1);
1345  }
1346 
1347  const auto& all_fragment_results = execution_dispatch.getFragmentResults();
1348 
1349  for (size_t fragment_index = 0; fragment_index < outer_fragments.size();
1350  ++fragment_index) {
1351  const auto fragment_results = all_fragment_results[fragment_index];
1352  cb(fragment_results.first, outer_fragments[fragment_index]);
1353  }
1354 }
#define CHECK_EQ(x, y)
Definition: Logger.h:201
Fragmenter_Namespace::TableInfo info
Definition: InputMetadata.h:35
RelAlgExecutionUnit addDeletedColumn(const RelAlgExecutionUnit &ra_exe_unit)
Definition: Execute.cpp:3052
std::deque< FragmentInfo > fragments
Definition: Fragmenter.h:167
const std::vector< InputDescriptor > input_descs
std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner_
Definition: Execute.h:930
ExecutorDeviceType device_type_
std::unordered_map< int, std::unordered_map< int, std::shared_ptr< const ColumnarResults > > > ColumnCacheMap

+ Here is the call graph for this function:

Executor::FetchResult Executor::fetchChunks ( const ColumnFetcher column_fetcher,
const RelAlgExecutionUnit ra_exe_unit,
const int  device_id,
const Data_Namespace::MemoryLevel  memory_level,
const std::map< int, const TableFragments * > &  all_tables_fragments,
const FragmentsList selected_fragments,
const Catalog_Namespace::Catalog cat,
std::list< ChunkIter > &  chunk_iterators,
std::list< std::shared_ptr< Chunk_NS::Chunk >> &  chunks 
)
private

Definition at line 2024 of file Execute.cpp.

References CHECK(), CHECK_EQ, CHECK_LT, Data_Namespace::CPU_LEVEL, DEBUG_TIMER, ColumnFetcher::getAllTableColumnFragments(), ColumnFetcher::getOneTableColumnFragment(), ColumnFetcher::getResultSetColumn(), INJECT_TIMER, RelAlgExecutionUnit::input_col_descs, RelAlgExecutionUnit::input_descs, RESULT, and anonymous_namespace{Execute.cpp}::try_get_column_descriptor().

2033  {
2034  auto timer = DEBUG_TIMER(__func__);
2036  const auto& col_global_ids = ra_exe_unit.input_col_descs;
2037  std::vector<std::vector<size_t>> selected_fragments_crossjoin;
2038  std::vector<size_t> local_col_to_frag_pos;
2039  buildSelectedFragsMapping(selected_fragments_crossjoin,
2040  local_col_to_frag_pos,
2041  col_global_ids,
2042  selected_fragments,
2043  ra_exe_unit);
2044 
2046  selected_fragments_crossjoin);
2047 
2048  std::vector<std::vector<const int8_t*>> all_frag_col_buffers;
2049  std::vector<std::vector<int64_t>> all_num_rows;
2050  std::vector<std::vector<uint64_t>> all_frag_offsets;
2051 
2052  for (const auto& selected_frag_ids : frag_ids_crossjoin) {
2053  std::vector<const int8_t*> frag_col_buffers(
2054  plan_state_->global_to_local_col_ids_.size());
2055  for (const auto& col_id : col_global_ids) {
2056  CHECK(col_id);
2057  const int table_id = col_id->getScanDesc().getTableId();
2058  const auto cd = try_get_column_descriptor(col_id.get(), cat);
2059  if (cd && cd->isVirtualCol) {
2060  CHECK_EQ("rowid", cd->columnName);
2061  continue;
2062  }
2063  const auto fragments_it = all_tables_fragments.find(table_id);
2064  CHECK(fragments_it != all_tables_fragments.end());
2065  const auto fragments = fragments_it->second;
2066  auto it = plan_state_->global_to_local_col_ids_.find(*col_id);
2067  CHECK(it != plan_state_->global_to_local_col_ids_.end());
2068  CHECK_LT(static_cast<size_t>(it->second),
2069  plan_state_->global_to_local_col_ids_.size());
2070  const size_t frag_id = selected_frag_ids[local_col_to_frag_pos[it->second]];
2071  if (!fragments->size()) {
2072  return {};
2073  }
2074  CHECK_LT(frag_id, fragments->size());
2075  auto memory_level_for_column = memory_level;
2076  if (plan_state_->columns_to_fetch_.find(
2077  std::make_pair(col_id->getScanDesc().getTableId(), col_id->getColId())) ==
2078  plan_state_->columns_to_fetch_.end()) {
2079  memory_level_for_column = Data_Namespace::CPU_LEVEL;
2080  }
2081  if (col_id->getScanDesc().getSourceType() == InputSourceType::RESULT) {
2082  frag_col_buffers[it->second] = column_fetcher.getResultSetColumn(
2083  col_id.get(), memory_level_for_column, device_id);
2084  } else {
2085  if (needFetchAllFragments(*col_id, ra_exe_unit, selected_fragments)) {
2086  frag_col_buffers[it->second] =
2087  column_fetcher.getAllTableColumnFragments(table_id,
2088  col_id->getColId(),
2089  all_tables_fragments,
2090  memory_level_for_column,
2091  device_id);
2092  } else {
2093  frag_col_buffers[it->second] =
2094  column_fetcher.getOneTableColumnFragment(table_id,
2095  frag_id,
2096  col_id->getColId(),
2097  all_tables_fragments,
2098  chunks,
2099  chunk_iterators,
2100  memory_level_for_column,
2101  device_id);
2102  }
2103  }
2104  }
2105  all_frag_col_buffers.push_back(frag_col_buffers);
2106  }
2107  std::tie(all_num_rows, all_frag_offsets) = getRowCountAndOffsetForAllFrags(
2108  ra_exe_unit, frag_ids_crossjoin, ra_exe_unit.input_descs, all_tables_fragments);
2109  return {all_frag_col_buffers, all_num_rows, all_frag_offsets};
2110 }
#define CHECK_EQ(x, y)
Definition: Logger.h:201
const ColumnDescriptor * try_get_column_descriptor(const InputColDescriptor *col_desc, const Catalog_Namespace::Catalog &cat)
Definition: Execute.cpp:1930
const int8_t * getAllTableColumnFragments(const int table_id, const int col_id, const std::map< int, const TableFragments * > &all_tables_fragments, const Data_Namespace::MemoryLevel memory_level, const int device_id) const
const std::vector< InputDescriptor > input_descs
bool needFetchAllFragments(const InputColDescriptor &col_desc, const RelAlgExecutionUnit &ra_exe_unit, const FragmentsList &selected_fragments) const
Definition: Execute.cpp:2005
const int8_t * getOneTableColumnFragment(const int table_id, const int frag_id, const int col_id, const std::map< int, const TableFragments * > &all_tables_fragments, std::list< std::shared_ptr< Chunk_NS::Chunk >> &chunk_holder, std::list< ChunkIter > &chunk_iter_holder, const Data_Namespace::MemoryLevel memory_level, const int device_id) const
CHECK(cgen_state)
#define INJECT_TIMER(DESC)
Definition: measure.h:91
std::unique_ptr< PlanState > plan_state_
Definition: Execute.h:929
#define CHECK_LT(x, y)
Definition: Logger.h:203
FetchResult fetchChunks(const ColumnFetcher &, const RelAlgExecutionUnit &ra_exe_unit, const int device_id, const Data_Namespace::MemoryLevel, const std::map< int, const TableFragments * > &, const FragmentsList &selected_fragments, const Catalog_Namespace::Catalog &, std::list< ChunkIter > &, std::list< std::shared_ptr< Chunk_NS::Chunk >> &)
Definition: Execute.cpp:2024
std::pair< std::vector< std::vector< int64_t > >, std::vector< std::vector< uint64_t > > > getRowCountAndOffsetForAllFrags(const RelAlgExecutionUnit &ra_exe_unit, const CartesianProduct< std::vector< std::vector< size_t >>> &frag_ids_crossjoin, const std::vector< InputDescriptor > &input_descs, const std::map< int, const TableFragments * > &all_tables_fragments)
Definition: Execute.cpp:1958
void buildSelectedFragsMapping(std::vector< std::vector< size_t >> &selected_fragments_crossjoin, std::vector< size_t > &local_col_to_frag_pos, const std::list< std::shared_ptr< const InputColDescriptor >> &col_global_ids, const FragmentsList &selected_fragments, const RelAlgExecutionUnit &ra_exe_unit)
Definition: Execute.cpp:2126
#define DEBUG_TIMER(name)
Definition: Logger.h:299
const int8_t * getResultSetColumn(const InputColDescriptor *col_desc, const Data_Namespace::MemoryLevel memory_level, const int device_id) const
std::list< std::shared_ptr< const InputColDescriptor > > input_col_descs

+ Here is the call graph for this function:

std::string Executor::generatePTX ( const std::string &  cuda_llir) const
private

Definition at line 899 of file NativeCodegen.cpp.

899  {
901  cuda_llir, nvptx_target_machine_.get(), cgen_state_.get());
902 }
std::unique_ptr< llvm::TargetMachine > nvptx_target_machine_
Definition: Execute.h:943
static std::string generatePTX(const std::string &cuda_llir, llvm::TargetMachine *nvptx_target_machine, CgenState *cgen_state)
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:914
const Catalog_Namespace::Catalog * Executor::getCatalog ( ) const

Definition at line 230 of file Execute.cpp.

References catalog_().

230  {
231  return catalog_;
232 }
const Catalog_Namespace::Catalog * catalog_
Definition: Execute.h:958

+ Here is the call graph for this function:

std::vector< std::pair< void *, void * > > Executor::getCodeFromCache ( const CodeCacheKey key,
const CodeCache cache 
)
private

Definition at line 171 of file NativeCodegen.cpp.

References LruCache< key_t, value_t, hash_t >::cend(), LruCache< key_t, value_t, hash_t >::find(), and GpuCompilationContext::module().

172  {
173  auto it = cache.find(key);
174  if (it != cache.cend()) {
175  delete cgen_state_->module_;
176  cgen_state_->module_ = it->second.second;
177  std::vector<std::pair<void*, void*>> native_functions;
178  for (auto& native_code : it->second.first) {
179  GpuCompilationContext* gpu_context = std::get<2>(native_code).get();
180  native_functions.emplace_back(std::get<0>(native_code),
181  gpu_context ? gpu_context->module() : nullptr);
182  }
183  return native_functions;
184  }
185  return {};
186 }
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:914
const_list_iterator_t cend() const
Definition: LruCache.hpp:55
const_list_iterator_t find(const key_t &key) const
Definition: LruCache.hpp:49

+ Here is the call graph for this function:

std::vector< ColumnLazyFetchInfo > Executor::getColLazyFetchInfo ( const std::vector< Analyzer::Expr * > &  target_exprs) const

Definition at line 284 of file Execute.cpp.

References catalog_(), CHECK(), ColumnDescriptor::columnType, get_column_descriptor(), IS_GEO, and kNULLT.

285  {
287  CHECK(catalog_);
288  std::vector<ColumnLazyFetchInfo> col_lazy_fetch_info;
289  for (const auto target_expr : target_exprs) {
290  if (!plan_state_->isLazyFetchColumn(target_expr)) {
291  col_lazy_fetch_info.emplace_back(
292  ColumnLazyFetchInfo{false, -1, SQLTypeInfo(kNULLT, false)});
293  } else {
294  const auto col_var = dynamic_cast<const Analyzer::ColumnVar*>(target_expr);
295  CHECK(col_var);
296  auto col_id = col_var->get_column_id();
297  auto rte_idx = (col_var->get_rte_idx() == -1) ? 0 : col_var->get_rte_idx();
298  auto cd = (col_var->get_table_id() > 0)
299  ? get_column_descriptor(col_id, col_var->get_table_id(), *catalog_)
300  : nullptr;
301  if (cd && IS_GEO(cd->columnType.get_type())) {
302  // Geo coords cols will be processed in sequence. So we only need to track the
303  // first coords col in lazy fetch info.
304  {
305  auto cd0 =
306  get_column_descriptor(col_id + 1, col_var->get_table_id(), *catalog_);
307  auto col0_ti = cd0->columnType;
308  CHECK(!cd0->isVirtualCol);
309  auto col0_var = makeExpr<Analyzer::ColumnVar>(
310  col0_ti, col_var->get_table_id(), cd0->columnId, rte_idx);
311  auto local_col0_id = plan_state_->getLocalColumnId(col0_var.get(), false);
312  col_lazy_fetch_info.emplace_back(
313  ColumnLazyFetchInfo{true, local_col0_id, col0_ti});
314  }
315  } else {
316  auto local_col_id = plan_state_->getLocalColumnId(col_var, false);
317  const auto& col_ti = col_var->get_type_info();
318  col_lazy_fetch_info.emplace_back(ColumnLazyFetchInfo{true, local_col_id, col_ti});
319  }
320  }
321  }
322  return col_lazy_fetch_info;
323 }
CHECK(cgen_state)
const Catalog_Namespace::Catalog * catalog_
Definition: Execute.h:958
std::unique_ptr< PlanState > plan_state_
Definition: Execute.h:929
SQLTypeInfoCore< ArrayContextTypeSizer, ExecutorTypePackaging, DateTimeFacilities > SQLTypeInfo
Definition: sqltypes.h:852
SQLTypeInfo columnType
#define IS_GEO(T)
Definition: sqltypes.h:167
const ColumnDescriptor * get_column_descriptor(const int col_id, const int table_id, const Catalog_Namespace::Catalog &cat)
Definition: Execute.h:141

+ Here is the call graph for this function:

ExpressionRange Executor::getColRange ( const PhysicalInput phys_input) const

Definition at line 254 of file Execute.cpp.

254  {
255  return agg_col_range_cache_.getColRange(phys_input);
256 }
ExpressionRange getColRange(const PhysicalInput &) const
AggregatedColRange agg_col_range_cache_
Definition: Execute.h:962
const ColumnDescriptor * Executor::getColumnDescriptor ( const Analyzer::ColumnVar col_var) const

Definition at line 213 of file Execute.cpp.

References catalog_(), get_column_descriptor_maybe(), Analyzer::ColumnVar::get_column_id(), and Analyzer::ColumnVar::get_table_id().

214  {
216  col_var->get_column_id(), col_var->get_table_id(), *catalog_);
217 }
int get_table_id() const
Definition: Analyzer.h:194
const ColumnDescriptor * get_column_descriptor_maybe(const int col_id, const int table_id, const Catalog_Namespace::Catalog &cat)
Definition: Execute.h:170
const Catalog_Namespace::Catalog * catalog_
Definition: Execute.h:958
int get_column_id() const
Definition: Analyzer.h:195

+ Here is the call graph for this function:

ExecutorDeviceType Executor::getDeviceTypeForTargets ( const RelAlgExecutionUnit ra_exe_unit,
const ExecutorDeviceType  requested_device_type 
)
private

Definition at line 1388 of file Execute.cpp.

References CPU, g_bigint_count, get_target_info(), RelAlgExecutionUnit::groupby_exprs, kAVG, kDOUBLE, kSUM, and RelAlgExecutionUnit::target_exprs.

1390  {
1391  for (const auto target_expr : ra_exe_unit.target_exprs) {
1392  const auto agg_info = get_target_info(target_expr, g_bigint_count);
1393  if (!ra_exe_unit.groupby_exprs.empty() &&
1394  !isArchPascalOrLater(requested_device_type)) {
1395  if ((agg_info.agg_kind == kAVG || agg_info.agg_kind == kSUM) &&
1396  agg_info.agg_arg_type.get_type() == kDOUBLE) {
1397  return ExecutorDeviceType::CPU;
1398  }
1399  }
1400  if (dynamic_cast<const Analyzer::RegexpExpr*>(target_expr)) {
1401  return ExecutorDeviceType::CPU;
1402  }
1403  }
1404  return requested_device_type;
1405 }
std::vector< Analyzer::Expr * > target_exprs
TargetInfo get_target_info(const PointerType target_expr, const bool bigint_count)
Definition: TargetInfo.h:66
bool isArchPascalOrLater(const ExecutorDeviceType dt) const
Definition: Execute.h:458
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
bool g_bigint_count
Definition: sqldefs.h:75
Definition: sqldefs.h:72

+ Here is the call graph for this function:

std::shared_ptr< Executor > Executor::getExecutor ( const int  db_id,
const std::string &  debug_dir = "",
const std::string &  debug_file = "",
const MapDParameters  mapd_parameters = MapDParameters() 
)
static

Definition at line 127 of file Execute.cpp.

References CHECK(), MapDParameters::cuda_block_size, MapDParameters::cuda_grid_size, and INJECT_TIMER.

Referenced by Catalog_Namespace::Catalog::checkDateInDaysColumnMigration(), MapDHandler::execute_rel_alg(), MapDHandler::execute_rel_alg_df(), MapDHandler::execute_root_plan(), Parser::getResultSet(), MapDHandler::interrupt(), Parser::InsertIntoTableAsSelectStmt::populateData(), QueryRunner::anonymous_namespace{QueryRunner.cpp}::run_select_query_with_filter_push_down(), QueryRunner::QueryRunner::runSelectQuery(), QueryRunner::QueryRunner::runSQL(), MapDHandler::sql_execute_impl(), and Fragmenter_Namespace::InsertOrderFragmenter::updateColumns().

130  {
132  const auto executor_key = db_id;
133  {
134  mapd_shared_lock<mapd_shared_mutex> read_lock(executors_cache_mutex_);
135  auto it = executors_.find(executor_key);
136  if (it != executors_.end()) {
137  return it->second;
138  }
139  }
140  {
141  mapd_unique_lock<mapd_shared_mutex> write_lock(executors_cache_mutex_);
142  auto it = executors_.find(executor_key);
143  if (it != executors_.end()) {
144  return it->second;
145  }
146  auto executor = std::make_shared<Executor>(db_id,
147  mapd_parameters.cuda_block_size,
148  mapd_parameters.cuda_grid_size,
149  debug_dir,
150  debug_file);
151  auto it_ok = executors_.insert(std::make_pair(executor_key, executor));
152  CHECK(it_ok.second);
153  return executor;
154  }
155 }
static std::shared_ptr< Executor > getExecutor(const int db_id, const std::string &debug_dir="", const std::string &debug_file="", const MapDParameters mapd_parameters=MapDParameters())
Definition: Execute.cpp:127
static mapd_shared_mutex executors_cache_mutex_
Definition: Execute.h:968
size_t cuda_block_size
CHECK(cgen_state)
#define INJECT_TIMER(DESC)
Definition: measure.h:91
size_t cuda_grid_size
static std::map< int, std::shared_ptr< Executor > > executors_
Definition: Execute.h:966

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::vector< size_t > Executor::getFragmentCount ( const FragmentsList selected_fragments,
const size_t  scan_idx,
const RelAlgExecutionUnit ra_exe_unit 
)
private

Definition at line 2112 of file Execute.cpp.

References RelAlgExecutionUnit::input_descs, and RelAlgExecutionUnit::join_quals.

2114  {
2115  if ((ra_exe_unit.input_descs.size() > size_t(2) || !ra_exe_unit.join_quals.empty()) &&
2116  scan_idx > 0 &&
2117  !plan_state_->join_info_.sharded_range_table_indices_.count(scan_idx) &&
2118  !selected_fragments[scan_idx].fragment_ids.empty()) {
2119  // Fetch all fragments
2120  return {size_t(0)};
2121  }
2122 
2123  return selected_fragments[scan_idx].fragment_ids;
2124 }
const std::vector< InputDescriptor > input_descs
const JoinQualsPerNestingLevel join_quals
std::unique_ptr< PlanState > plan_state_
Definition: Execute.h:929
std::unordered_map< int, const Analyzer::BinOper * > Executor::getInnerTabIdToJoinCond ( ) const
private

Definition at line 1676 of file Execute.cpp.

References CHECK_EQ.

1677  {
1678  std::unordered_map<int, const Analyzer::BinOper*> id_to_cond;
1679  const auto& join_info = plan_state_->join_info_;
1680  CHECK_EQ(join_info.equi_join_tautologies_.size(), join_info.join_hash_tables_.size());
1681  for (size_t i = 0; i < join_info.join_hash_tables_.size(); ++i) {
1682  int inner_table_id = join_info.join_hash_tables_[i]->getInnerTableId();
1683  id_to_cond.insert(
1684  std::make_pair(inner_table_id, join_info.equi_join_tautologies_[i].get()));
1685  }
1686  return id_to_cond;
1687 }
#define CHECK_EQ(x, y)
Definition: Logger.h:201
std::unique_ptr< PlanState > plan_state_
Definition: Execute.h:929
std::vector< int64_t > Executor::getJoinHashTablePtrs ( const ExecutorDeviceType  device_type,
const int  device_id 
)
private

Definition at line 2531 of file Execute.cpp.

References CHECK(), GPU, and join_hash_tables.

2532  {
2533  std::vector<int64_t> table_ptrs;
2534  const auto& join_hash_tables = plan_state_->join_info_.join_hash_tables_;
2535  for (auto hash_table : join_hash_tables) {
2536  if (!hash_table) {
2537  CHECK(table_ptrs.empty());
2538  return {};
2539  }
2540  table_ptrs.push_back(hash_table->getJoinHashBuffer(
2541  device_type, device_type == ExecutorDeviceType::GPU ? device_id : 0));
2542  }
2543  return table_ptrs;
2544 }
const int8_t const int64_t const uint64_t const int32_t const int64_t int64_t uint32_t const int64_t * join_hash_tables
CHECK(cgen_state)
std::unique_ptr< PlanState > plan_state_
Definition: Execute.h:929

+ Here is the call graph for this function:

size_t Executor::getNumBytesForFetchedRow ( ) const

Definition at line 258 of file Execute.cpp.

References catalog_(), kENCODING_DICT, and kTEXT.

258  {
259  size_t num_bytes = 0;
260  if (!plan_state_) {
261  return 0;
262  }
263  for (const auto& fetched_col_pair : plan_state_->columns_to_fetch_) {
264  if (fetched_col_pair.first < 0) {
265  num_bytes += 8;
266  } else {
267  const auto cd =
268  catalog_->getMetadataForColumn(fetched_col_pair.first, fetched_col_pair.second);
269  const auto& ti = cd->columnType;
270  const auto sz = ti.get_type() == kTEXT && ti.get_compression() == kENCODING_DICT
271  ? 4
272  : ti.get_size();
273  if (sz < 0) {
274  // for varlen types, only account for the pointer/size for each row, for now
275  num_bytes += 16;
276  } else {
277  num_bytes += sz;
278  }
279  }
280  }
281  return num_bytes;
282 }
const Catalog_Namespace::Catalog * catalog_
Definition: Execute.h:958
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:326
std::unique_ptr< PlanState > plan_state_
Definition: Execute.h:929
const ColumnDescriptor * getMetadataForColumn(int tableId, const std::string &colName) const
Definition: sqltypes.h:55
SQLTypeInfo columnType

+ Here is the call graph for this function:

const ColumnDescriptor * Executor::getPhysicalColumnDescriptor ( const Analyzer::ColumnVar col_var,
int  n 
) const

Definition at line 219 of file Execute.cpp.

References catalog_(), get_column_descriptor_maybe(), Analyzer::ColumnVar::get_column_id(), and Analyzer::ColumnVar::get_table_id().

221  {
222  const auto cd = getColumnDescriptor(col_var);
223  if (!cd || n > cd->columnType.get_physical_cols()) {
224  return nullptr;
225  }
227  col_var->get_column_id() + n, col_var->get_table_id(), *catalog_);
228 }
int get_table_id() const
Definition: Analyzer.h:194
const ColumnDescriptor * get_column_descriptor_maybe(const int col_id, const int table_id, const Catalog_Namespace::Catalog &cat)
Definition: Execute.h:170
const Catalog_Namespace::Catalog * catalog_
Definition: Execute.h:958
const ColumnDescriptor * getColumnDescriptor(const Analyzer::ColumnVar *) const
Definition: Execute.cpp:213
int get_column_id() const
Definition: Analyzer.h:195

+ Here is the call graph for this function:

std::pair< std::vector< std::vector< int64_t > >, std::vector< std::vector< uint64_t > > > Executor::getRowCountAndOffsetForAllFrags ( const RelAlgExecutionUnit ra_exe_unit,
const CartesianProduct< std::vector< std::vector< size_t >>> &  frag_ids_crossjoin,
const std::vector< InputDescriptor > &  input_descs,
const std::map< int, const TableFragments * > &  all_tables_fragments</