OmniSciDB  7bf56492aa
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Executor Class Reference

#include <Execute.h>

+ Collaboration diagram for Executor:

Classes

struct  CompilationResult
 
class  ExecutionDispatch
 
class  FetchCacheAnchor
 
struct  GroupColLLVMValue
 
struct  JoinHashTableOrError
 

Public Member Functions

 Executor (const int db_id, const size_t block_size_x, const size_t grid_size_x, const std::string &debug_dir, const std::string &debug_file)
 
StringDictionaryProxygetStringDictionaryProxy (const int dictId, const std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner, const bool with_generation) const
 
bool isCPUOnly () const
 
bool isArchMaxwell (const ExecutorDeviceType dt) const
 
bool containsLeftDeepOuterJoin () const
 
const ColumnDescriptorgetColumnDescriptor (const Analyzer::ColumnVar *) const
 
const ColumnDescriptorgetPhysicalColumnDescriptor (const Analyzer::ColumnVar *, int) const
 
const Catalog_Namespace::CataloggetCatalog () const
 
void setCatalog (const Catalog_Namespace::Catalog *catalog)
 
const std::shared_ptr
< RowSetMemoryOwner
getRowSetMemoryOwner () const
 
const TemporaryTablesgetTemporaryTables () const
 
Fragmenter_Namespace::TableInfo getTableInfo (const int table_id) const
 
const TableGenerationgetTableGeneration (const int table_id) const
 
ExpressionRange getColRange (const PhysicalInput &) const
 
size_t getNumBytesForFetchedRow () const
 
std::vector< ColumnLazyFetchInfogetColLazyFetchInfo (const std::vector< Analyzer::Expr * > &target_exprs) const
 
void registerActiveModule (void *module, const int device_id) const
 
void unregisterActiveModule (void *module, const int device_id) const
 
void interrupt (std::string query_session="", std::string interrupt_session="")
 
void resetInterrupt ()
 
int8_t warpSize () const
 
unsigned gridSize () const
 
unsigned blockSize () const
 
void setupCaching (const std::unordered_set< PhysicalInput > &phys_inputs, const std::unordered_set< int > &phys_table_ids)
 
void setCurrentQuerySession (const std::string &query_session)
 
std::string & getCurrentQuerySession ()
 
bool checkCurrentQuerySession (const std::string &candidate_query_session)
 
void invalidateQuerySession ()
 
bool addToQuerySessionList (const std::string &query_session)
 
bool removeFromQuerySessionList (const std::string &query_session)
 
void setQuerySessionAsInterrupted (const std::string &query_session)
 
bool checkIsQuerySessionInterrupted (const std::string &query_session)
 
template<typename THREAD_POOL >
void dispatchFragments (const std::function< void(const ExecutorDeviceType chosen_device_type, int chosen_device_id, const QueryCompilationDescriptor &query_comp_desc, const QueryMemoryDescriptor &query_mem_desc, const FragmentsList &frag_list, const ExecutorDispatchMode kernel_dispatch_mode, const int64_t rowid_lookup_key)> dispatch, const ExecutionDispatch &execution_dispatch, const std::vector< InputTableInfo > &table_infos, const ExecutionOptions &eo, const bool is_agg, const bool allow_single_frag_table_opt, const size_t context_count, const QueryCompilationDescriptor &query_comp_desc, const QueryMemoryDescriptor &query_mem_desc, QueryFragmentDescriptor &fragment_descriptor, std::unordered_set< int > &available_gpus, int &available_cpus)
 

Static Public Member Functions

static std::shared_ptr< ExecutorgetExecutor (const int db_id, const std::string &debug_dir="", const std::string &debug_file="", const MapDParameters mapd_parameters=MapDParameters())
 
static void nukeCacheOfExecutors ()
 
static void clearMemory (const Data_Namespace::MemoryLevel memory_level)
 
static std::pair< int64_t,
int32_t > 
reduceResults (const SQLAgg agg, const SQLTypeInfo &ti, const int64_t agg_init_val, const int8_t out_byte_width, const int64_t *out_vec, const size_t out_vec_sz, const bool is_group_by, const bool float_argument_input)
 
static void addCodeToCache (const CodeCacheKey &, std::vector< std::tuple< void *, ExecutionEngineWrapper >>, llvm::Module *, CodeCache &)
 

Static Public Attributes

static const size_t high_scan_limit
 
static const int32_t ERR_DIV_BY_ZERO {1}
 
static const int32_t ERR_OUT_OF_GPU_MEM {2}
 
static const int32_t ERR_OUT_OF_SLOTS {3}
 
static const int32_t ERR_UNSUPPORTED_SELF_JOIN {4}
 
static const int32_t ERR_OUT_OF_RENDER_MEM {5}
 
static const int32_t ERR_OUT_OF_CPU_MEM {6}
 
static const int32_t ERR_OVERFLOW_OR_UNDERFLOW {7}
 
static const int32_t ERR_SPECULATIVE_TOP_OOM {8}
 
static const int32_t ERR_OUT_OF_TIME {9}
 
static const int32_t ERR_INTERRUPTED {10}
 
static const int32_t ERR_COLUMNAR_CONVERSION_NOT_SUPPORTED {11}
 
static const int32_t ERR_TOO_MANY_LITERALS {12}
 
static const int32_t ERR_STRING_CONST_IN_RESULTSET {13}
 
static const int32_t ERR_STREAMING_TOP_N_NOT_SUPPORTED_IN_RENDER_QUERY {14}
 
static const int32_t ERR_SINGLE_VALUE_FOUND_MULTIPLE_VALUES {15}
 

Private Types

using PerFragmentCallBack = std::function< void(ResultSetPtr, const Fragmenter_Namespace::FragmentInfo &)>
 

Private Member Functions

void clearMetaInfoCache ()
 
int deviceCount (const ExecutorDeviceType) const
 
int deviceCountForMemoryLevel (const Data_Namespace::MemoryLevel memory_level) const
 
llvm::Value * codegenWindowFunction (const size_t target_index, const CompilationOptions &co)
 
llvm::Value * codegenWindowFunctionAggregate (const CompilationOptions &co)
 
llvm::BasicBlock * codegenWindowResetStateControlFlow ()
 
void codegenWindowFunctionStateInit (llvm::Value *aggregate_state)
 
llvm::Value * codegenWindowFunctionAggregateCalls (llvm::Value *aggregate_state, const CompilationOptions &co)
 
void codegenWindowAvgEpilogue (llvm::Value *crt_val, llvm::Value *window_func_null_val, llvm::Value *multiplicity_lv)
 
llvm::Value * codegenAggregateWindowState ()
 
llvm::Value * aggregateWindowStatePtr ()
 
bool isArchPascalOrLater (const ExecutorDeviceType dt) const
 
bool needFetchAllFragments (const InputColDescriptor &col_desc, const RelAlgExecutionUnit &ra_exe_unit, const FragmentsList &selected_fragments) const
 
ResultSetPtr executeWorkUnit (size_t &max_groups_buffer_entry_guess, const bool is_agg, const std::vector< InputTableInfo > &, const RelAlgExecutionUnit &, const CompilationOptions &, const ExecutionOptions &options, const Catalog_Namespace::Catalog &, std::shared_ptr< RowSetMemoryOwner >, RenderInfo *render_info, const bool has_cardinality_estimation, ColumnCacheMap &column_cache)
 
void executeUpdate (const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &table_infos, const CompilationOptions &co, const ExecutionOptions &eo, const Catalog_Namespace::Catalog &cat, std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner, const UpdateLogForFragment::Callback &cb, const bool is_agg)
 
void executeWorkUnitPerFragment (const RelAlgExecutionUnit &ra_exe_unit, const InputTableInfo &table_info, const CompilationOptions &co, const ExecutionOptions &eo, const Catalog_Namespace::Catalog &cat, PerFragmentCallBack &cb)
 Compiles and dispatches a work unit per fragment processing results with the per fragment callback. Currently used for computing metrics over fragments (metadata). More...
 
ResultSetPtr executeExplain (const QueryCompilationDescriptor &)
 
ResultSetPtr executeTableFunction (const TableFunctionExecutionUnit exe_unit, const std::vector< InputTableInfo > &table_infos, const CompilationOptions &co, const ExecutionOptions &eo, const Catalog_Namespace::Catalog &cat)
 Compiles and dispatches a table function; that is, a function that takes as input one or more columns and returns a ResultSet, which can be parsed by subsequent execution steps. More...
 
ExecutorDeviceType getDeviceTypeForTargets (const RelAlgExecutionUnit &ra_exe_unit, const ExecutorDeviceType requested_device_type)
 
ResultSetPtr collectAllDeviceResults (ExecutionDispatch &execution_dispatch, const std::vector< Analyzer::Expr * > &target_exprs, const QueryMemoryDescriptor &query_mem_desc, const ExecutorDeviceType device_type, std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner)
 
ResultSetPtr collectAllDeviceShardedTopResults (ExecutionDispatch &execution_dispatch) const
 
std::unordered_map< int, const
Analyzer::BinOper * > 
getInnerTabIdToJoinCond () const
 
template<typename THREAD_POOL >
void dispatchFragments (const std::function< void(const ExecutorDeviceType chosen_device_type, int chosen_device_id, const QueryCompilationDescriptor &query_comp_desc, const QueryMemoryDescriptor &query_mem_desc, const FragmentsList &frag_list, const ExecutorDispatchMode kernel_dispatch_mode, const int64_t rowid_lookup_key)> dispatch, const ExecutionDispatch &execution_dispatch, const std::vector< InputTableInfo > &table_infos, const ExecutionOptions &eo, const bool is_agg, const bool allow_single_frag_table_opt, const size_t context_count, const QueryCompilationDescriptor &query_comp_desc, const QueryMemoryDescriptor &query_mem_desc, QueryFragmentDescriptor &fragment_descriptor, std::unordered_set< int > &available_gpus, int &available_cpus)
 
std::vector< size_t > getTableFragmentIndices (const RelAlgExecutionUnit &ra_exe_unit, const ExecutorDeviceType device_type, const size_t table_idx, const size_t outer_frag_idx, std::map< int, const TableFragments * > &selected_tables_fragments, const std::unordered_map< int, const Analyzer::BinOper * > &inner_table_id_to_join_condition)
 
bool skipFragmentPair (const Fragmenter_Namespace::FragmentInfo &outer_fragment_info, const Fragmenter_Namespace::FragmentInfo &inner_fragment_info, const int inner_table_id, const std::unordered_map< int, const Analyzer::BinOper * > &inner_table_id_to_join_condition, const RelAlgExecutionUnit &ra_exe_unit, const ExecutorDeviceType device_type)
 
FetchResult fetchChunks (const ColumnFetcher &, const RelAlgExecutionUnit &ra_exe_unit, const int device_id, const Data_Namespace::MemoryLevel, const std::map< int, const TableFragments * > &, const FragmentsList &selected_fragments, const Catalog_Namespace::Catalog &, std::list< ChunkIter > &, std::list< std::shared_ptr< Chunk_NS::Chunk >> &)
 
std::pair< std::vector
< std::vector< int64_t >
>, std::vector< std::vector
< uint64_t > > > 
getRowCountAndOffsetForAllFrags (const RelAlgExecutionUnit &ra_exe_unit, const CartesianProduct< std::vector< std::vector< size_t >>> &frag_ids_crossjoin, const std::vector< InputDescriptor > &input_descs, const std::map< int, const TableFragments * > &all_tables_fragments)
 
void buildSelectedFragsMapping (std::vector< std::vector< size_t >> &selected_fragments_crossjoin, std::vector< size_t > &local_col_to_frag_pos, const std::list< std::shared_ptr< const InputColDescriptor >> &col_global_ids, const FragmentsList &selected_fragments, const RelAlgExecutionUnit &ra_exe_unit)
 
std::vector< size_t > getFragmentCount (const FragmentsList &selected_fragments, const size_t scan_idx, const RelAlgExecutionUnit &ra_exe_unit)
 
int32_t executePlanWithGroupBy (const RelAlgExecutionUnit &ra_exe_unit, const CompilationResult &, const bool hoist_literals, ResultSetPtr &results, const ExecutorDeviceType device_type, std::vector< std::vector< const int8_t * >> &col_buffers, const std::vector< size_t > outer_tab_frag_ids, QueryExecutionContext *, const std::vector< std::vector< int64_t >> &num_rows, const std::vector< std::vector< uint64_t >> &frag_offsets, Data_Namespace::DataMgr *, const int device_id, const int64_t limit, const uint32_t start_rowid, const uint32_t num_tables, RenderInfo *render_info)
 
int32_t executePlanWithoutGroupBy (const RelAlgExecutionUnit &ra_exe_unit, const CompilationResult &, const bool hoist_literals, ResultSetPtr &results, const std::vector< Analyzer::Expr * > &target_exprs, const ExecutorDeviceType device_type, std::vector< std::vector< const int8_t * >> &col_buffers, QueryExecutionContext *query_exe_context, const std::vector< std::vector< int64_t >> &num_rows, const std::vector< std::vector< uint64_t >> &frag_offsets, Data_Namespace::DataMgr *data_mgr, const int device_id, const uint32_t start_rowid, const uint32_t num_tables, RenderInfo *render_info)
 
ResultSetPtr resultsUnion (ExecutionDispatch &execution_dispatch)
 
std::vector< int64_t > getJoinHashTablePtrs (const ExecutorDeviceType device_type, const int device_id)
 
ResultSetPtr reduceMultiDeviceResults (const RelAlgExecutionUnit &, std::vector< std::pair< ResultSetPtr, std::vector< size_t >>> &all_fragment_results, std::shared_ptr< RowSetMemoryOwner >, const QueryMemoryDescriptor &) const
 
ResultSetPtr reduceMultiDeviceResultSets (std::vector< std::pair< ResultSetPtr, std::vector< size_t >>> &all_fragment_results, std::shared_ptr< RowSetMemoryOwner >, const QueryMemoryDescriptor &) const
 
ResultSetPtr reduceSpeculativeTopN (const RelAlgExecutionUnit &, std::vector< std::pair< ResultSetPtr, std::vector< size_t >>> &all_fragment_results, std::shared_ptr< RowSetMemoryOwner >, const QueryMemoryDescriptor &) const
 
ResultSetPtr executeWorkUnitImpl (size_t &max_groups_buffer_entry_guess, const bool is_agg, const bool allow_single_frag_table_opt, const std::vector< InputTableInfo > &, const RelAlgExecutionUnit &, const CompilationOptions &, const ExecutionOptions &options, const Catalog_Namespace::Catalog &, std::shared_ptr< RowSetMemoryOwner >, RenderInfo *render_info, const bool has_cardinality_estimation, ColumnCacheMap &column_cache)
 
std::vector< llvm::Value * > inlineHoistedLiterals ()
 
std::tuple
< Executor::CompilationResult,
std::unique_ptr
< QueryMemoryDescriptor > > 
compileWorkUnit (const std::vector< InputTableInfo > &query_infos, const RelAlgExecutionUnit &ra_exe_unit, const CompilationOptions &co, const ExecutionOptions &eo, const CudaMgr_Namespace::CudaMgr *cuda_mgr, const bool allow_lazy_fetch, std::shared_ptr< RowSetMemoryOwner >, const size_t max_groups_buffer_entry_count, const int8_t crt_min_byte_width, const bool has_cardinality_estimation, ColumnCacheMap &column_cache, RenderInfo *render_info=nullptr)
 
llvm::BasicBlock * codegenSkipDeletedOuterTableRow (const RelAlgExecutionUnit &ra_exe_unit, const CompilationOptions &co)
 
std::vector< JoinLoopbuildJoinLoops (RelAlgExecutionUnit &ra_exe_unit, const CompilationOptions &co, const ExecutionOptions &eo, const std::vector< InputTableInfo > &query_infos, ColumnCacheMap &column_cache)
 
std::function< llvm::Value
*(const std::vector
< llvm::Value * >
&, llvm::Value *)> 
buildIsDeletedCb (const RelAlgExecutionUnit &ra_exe_unit, const size_t level_idx, const CompilationOptions &co)
 
std::shared_ptr
< JoinHashTableInterface
buildCurrentLevelHashTable (const JoinCondition &current_level_join_conditions, RelAlgExecutionUnit &ra_exe_unit, const CompilationOptions &co, const std::vector< InputTableInfo > &query_infos, ColumnCacheMap &column_cache, std::vector< std::string > &fail_reasons)
 
llvm::Value * addJoinLoopIterator (const std::vector< llvm::Value * > &prev_iters, const size_t level_idx)
 
void codegenJoinLoops (const std::vector< JoinLoop > &join_loops, const RelAlgExecutionUnit &ra_exe_unit, GroupByAndAggregate &group_by_and_aggregate, llvm::Function *query_func, llvm::BasicBlock *entry_bb, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const ExecutionOptions &eo)
 
bool compileBody (const RelAlgExecutionUnit &ra_exe_unit, GroupByAndAggregate &group_by_and_aggregate, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co)
 
void createErrorCheckControlFlow (llvm::Function *query_func, bool run_with_dynamic_watchdog, bool run_with_allowing_runtime_interrupt, ExecutorDeviceType device_type)
 
void preloadFragOffsets (const std::vector< InputDescriptor > &input_descs, const std::vector< InputTableInfo > &query_infos)
 
JoinHashTableOrError buildHashTableForQualifier (const std::shared_ptr< Analyzer::BinOper > &qual_bin_oper, const std::vector< InputTableInfo > &query_infos, const MemoryLevel memory_level, const JoinHashTableInterface::HashType preferred_hash_type, ColumnCacheMap &column_cache)
 
void nukeOldState (const bool allow_lazy_fetch, const std::vector< InputTableInfo > &query_infos, const RelAlgExecutionUnit *ra_exe_unit)
 
std::vector< std::pair< void
*, void * > > 
optimizeAndCodegenCPU (llvm::Function *, llvm::Function *, const std::unordered_set< llvm::Function * > &, const CompilationOptions &)
 
std::vector< std::pair< void
*, void * > > 
optimizeAndCodegenGPU (llvm::Function *, llvm::Function *, std::unordered_set< llvm::Function * > &, const bool no_inline, const CudaMgr_Namespace::CudaMgr *cuda_mgr, const CompilationOptions &)
 
std::string generatePTX (const std::string &) const
 
void initializeNVPTXBackend () const
 
int64_t deviceCycles (int milliseconds) const
 
GroupColLLVMValue groupByColumnCodegen (Analyzer::Expr *group_by_col, const size_t col_width, const CompilationOptions &, const bool translate_null_val, const int64_t translated_null_val, GroupByAndAggregate::DiamondCodegen &, std::stack< llvm::BasicBlock * > &, const bool thread_mem_shared)
 
llvm::Value * castToFP (llvm::Value *val)
 
llvm::Value * castToIntPtrTyIn (llvm::Value *val, const size_t bit_width)
 
RelAlgExecutionUnit addDeletedColumn (const RelAlgExecutionUnit &ra_exe_unit, const CompilationOptions &co)
 
std::pair< bool, int64_t > skipFragment (const InputDescriptor &table_desc, const Fragmenter_Namespace::FragmentInfo &frag_info, const std::list< std::shared_ptr< Analyzer::Expr >> &simple_quals, const std::vector< uint64_t > &frag_offsets, const size_t frag_idx)
 
std::pair< bool, int64_t > skipFragmentInnerJoins (const InputDescriptor &table_desc, const RelAlgExecutionUnit &ra_exe_unit, const Fragmenter_Namespace::FragmentInfo &fragment, const std::vector< uint64_t > &frag_offsets, const size_t frag_idx)
 
AggregatedColRange computeColRangesCache (const std::unordered_set< PhysicalInput > &phys_inputs)
 
StringDictionaryGenerations computeStringDictionaryGenerations (const std::unordered_set< PhysicalInput > &phys_inputs)
 
TableGenerations computeTableGenerations (std::unordered_set< int > phys_table_ids)
 
std::vector< std::pair< void
*, void * > > 
getCodeFromCache (const CodeCacheKey &, const CodeCache &)
 
void addCodeToCache (const CodeCacheKey &, const std::vector< std::tuple< void *, GpuCompilationContext * >> &, llvm::Module *, CodeCache &)
 
std::vector< int8_t > serializeLiterals (const std::unordered_map< int, CgenState::LiteralValues > &literals, const int device_id)
 
llvm::Value * spillDoubleElement (llvm::Value *elem_val, llvm::Type *elem_ty)
 

Static Private Member Functions

static size_t align (const size_t off_in, const size_t alignment)
 

Private Attributes

std::unique_ptr< CgenStatecgen_state_
 
std::unique_ptr< PlanStateplan_state_
 
std::shared_ptr
< RowSetMemoryOwner
row_set_mem_owner_
 
std::mutex gpu_exec_mutex_ [max_gpu_count]
 
std::mutex gpu_active_modules_mutex_
 
uint32_t gpu_active_modules_device_mask_
 
void * gpu_active_modules_ [max_gpu_count]
 
bool interrupted_
 
std::shared_ptr
< StringDictionaryProxy
lit_str_dict_proxy_
 
std::mutex str_dict_mutex_
 
std::unique_ptr
< llvm::TargetMachine > 
nvptx_target_machine_
 
CodeCache cpu_code_cache_
 
CodeCache gpu_code_cache_
 
const unsigned block_size_x_
 
const unsigned grid_size_x_
 
const std::string debug_dir_
 
const std::string debug_file_
 
const int db_id_
 
const Catalog_Namespace::Catalogcatalog_
 
const TemporaryTablestemporary_tables_
 
InputTableInfoCache input_table_info_cache_
 
AggregatedColRange agg_col_range_cache_
 
StringDictionaryGenerations string_dictionary_generations_
 
TableGenerations table_generations_
 

Static Private Attributes

static const int max_gpu_count {16}
 
static const size_t baseline_threshold
 
static const size_t code_cache_size {10000}
 
static std::mutex executor_session_mutex_
 
static std::string current_query_session_ {""}
 
static std::map< std::string,
bool > 
queries_interrupt_flag_
 
static std::map< int,
std::shared_ptr< Executor > > 
executors_
 
static std::atomic_flag execute_spin_lock_ = ATOMIC_FLAG_INIT
 
static std::mutex execute_mutex_
 
static mapd_shared_mutex executors_cache_mutex_
 

Friends

class BaselineJoinHashTable
 
class CodeGenerator
 
class ColumnFetcher
 
class OverlapsJoinHashTable
 
class GroupByAndAggregate
 
class QueryCompilationDescriptor
 
class QueryMemoryDescriptor
 
class QueryMemoryInitializer
 
class QueryFragmentDescriptor
 
class QueryExecutionContext
 
class ResultSet
 
class InValuesBitmap
 
class JoinHashTable
 
class LeafAggregator
 
class QueryRewriter
 
class PendingExecutionClosure
 
class RelAlgExecutor
 
class TableOptimizer
 
class TableFunctionCompilationContext
 
class TableFunctionExecutionContext
 
struct TargetExprCodegenBuilder
 
struct TargetExprCodegen
 

Detailed Description

Definition at line 338 of file Execute.h.

Member Typedef Documentation

Definition at line 559 of file Execute.h.

Constructor & Destructor Documentation

Executor::Executor ( const int  db_id,
const size_t  block_size_x,
const size_t  grid_size_x,
const std::string &  debug_dir,
const std::string &  debug_file 
)

Definition at line 111 of file Execute.cpp.

116  : cgen_state_(new CgenState({}, false))
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:917

Member Function Documentation

static void Executor::addCodeToCache ( const CodeCacheKey ,
std::vector< std::tuple< void *, ExecutionEngineWrapper >>  ,
llvm::Module *  ,
CodeCache  
)
static

Referenced by StubGenerator::generateStub().

+ Here is the caller graph for this function:

void Executor::addCodeToCache ( const CodeCacheKey ,
const std::vector< std::tuple< void *, GpuCompilationContext * >> &  ,
llvm::Module *  ,
CodeCache  
)
private
RelAlgExecutionUnit Executor::addDeletedColumn ( const RelAlgExecutionUnit ra_exe_unit,
const CompilationOptions co 
)
private

Definition at line 2679 of file Execute.cpp.

References CompilationOptions::add_delete_column, catalog_(), CHECK(), and TABLE.

2680  {
2681  if (!co.add_delete_column) {
2682  return ra_exe_unit;
2683  }
2684  auto ra_exe_unit_with_deleted = ra_exe_unit;
2685  for (const auto& input_table : ra_exe_unit_with_deleted.input_descs) {
2686  if (input_table.getSourceType() != InputSourceType::TABLE) {
2687  continue;
2688  }
2689  const auto td = catalog_->getMetadataForTable(input_table.getTableId());
2690  CHECK(td);
2691  const auto deleted_cd = catalog_->getDeletedColumnIfRowsDeleted(td);
2692  if (!deleted_cd) {
2693  continue;
2694  }
2695  CHECK(deleted_cd->columnType.is_boolean());
2696  // check deleted column is not already present
2697  bool found = false;
2698  for (const auto& input_col : ra_exe_unit_with_deleted.input_col_descs) {
2699  if (input_col.get()->getColId() == deleted_cd->columnId &&
2700  input_col.get()->getScanDesc().getTableId() == deleted_cd->tableId &&
2701  input_col.get()->getScanDesc().getNestLevel() == input_table.getNestLevel()) {
2702  found = true;
2703  }
2704  }
2705  if (!found) {
2706  // add deleted column
2707  ra_exe_unit_with_deleted.input_col_descs.emplace_back(new InputColDescriptor(
2708  deleted_cd->columnId, deleted_cd->tableId, input_table.getNestLevel()));
2709  }
2710  }
2711  return ra_exe_unit_with_deleted;
2712 }
CHECK(cgen_state)
const Catalog_Namespace::Catalog * catalog_
Definition: Execute.h:961
const ColumnDescriptor * getDeletedColumnIfRowsDeleted(const TableDescriptor *td) const
Definition: Catalog.cpp:2571
const TableDescriptor * getMetadataForTable(const std::string &tableName, const bool populateFragmenter=true) const
Returns a pointer to a const TableDescriptor struct matching the provided tableName.

+ Here is the call graph for this function:

llvm::Value * Executor::addJoinLoopIterator ( const std::vector< llvm::Value * > &  prev_iters,
const size_t  level_idx 
)
private

Definition at line 449 of file IRCodegen.cpp.

References CodeGenerator::cgen_state_, CHECK(), and CgenState::scan_idx_to_hash_pos_.

450  {
451  // Iterators are added for loop-outer joins when the head of the loop is generated,
452  // then once again when the body if generated. Allow this instead of special handling
453  // of call sites.
454  const auto it = cgen_state_->scan_idx_to_hash_pos_.find(level_idx);
455  if (it != cgen_state_->scan_idx_to_hash_pos_.end()) {
456  return it->second;
457  }
458  CHECK(!prev_iters.empty());
459  llvm::Value* matching_row_index = prev_iters.back();
460  const auto it_ok =
461  cgen_state_->scan_idx_to_hash_pos_.emplace(level_idx, matching_row_index);
462  CHECK(it_ok.second);
463  return matching_row_index;
464 }
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:917
CHECK(cgen_state)

+ Here is the call graph for this function:

bool Executor::addToQuerySessionList ( const std::string &  query_session)

Definition at line 2982 of file Execute.cpp.

References CHECK().

2982  {
2983  std::lock_guard<std::mutex> session_access_lock(executor_session_mutex_);
2984  auto emplace_query = queries_interrupt_flag_.emplace(query_session, false);
2985  if (!emplace_query.second) {
2986  // initialize the existing session's interrupt flag
2987  auto it = queries_interrupt_flag_.find(query_session);
2988  CHECK(it != queries_interrupt_flag_.end());
2989  it->second = false;
2990  }
2991  return emplace_query.second;
2992 }
CHECK(cgen_state)
static std::mutex executor_session_mutex_
Definition: Execute.h:968
static std::map< std::string, bool > queries_interrupt_flag_
Definition: Execute.h:971

+ Here is the call graph for this function:

llvm::Value * Executor::aggregateWindowStatePtr ( )
private

Definition at line 122 of file WindowFunctionIR.cpp.

References anonymous_namespace{WindowFunctionIR.cpp}::get_adjusted_window_type_info(), get_int_type(), WindowProjectNodeContext::getActiveWindowFunctionContext(), and kFLOAT.

122  {
123  const auto window_func_context =
125  const auto window_func = window_func_context->getWindowFunction();
126  const auto arg_ti = get_adjusted_window_type_info(window_func);
127  llvm::Type* aggregate_state_type =
128  arg_ti.get_type() == kFLOAT
129  ? llvm::PointerType::get(get_int_type(32, cgen_state_->context_), 0)
130  : llvm::PointerType::get(get_int_type(64, cgen_state_->context_), 0);
131  const auto aggregate_state_i64 = cgen_state_->llInt(
132  reinterpret_cast<const int64_t>(window_func_context->aggregateState()));
133  return cgen_state_->ir_builder_.CreateIntToPtr(aggregate_state_i64,
134  aggregate_state_type);
135 }
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:917
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
static WindowFunctionContext * getActiveWindowFunctionContext()
SQLTypeInfo get_adjusted_window_type_info(const Analyzer::WindowFunction *window_func)

+ Here is the call graph for this function:

static size_t Executor::align ( const size_t  off_in,
const size_t  alignment 
)
inlinestaticprivate

Definition at line 909 of file Execute.h.

909  {
910  size_t off = off_in;
911  if (off % alignment != 0) {
912  off += (alignment - off % alignment);
913  }
914  return off;
915  }
unsigned Executor::blockSize ( ) const

Definition at line 2613 of file Execute.cpp.

References block_size_x_(), catalog_(), and CHECK().

2613  {
2614  CHECK(catalog_);
2615  const auto cuda_mgr = catalog_->getDataMgr().getCudaMgr();
2616  CHECK(cuda_mgr);
2617  const auto& dev_props = cuda_mgr->getAllDeviceProperties();
2618  return block_size_x_ ? block_size_x_ : dev_props.front().maxThreadsPerBlock;
2619 }
CudaMgr_Namespace::CudaMgr * getCudaMgr() const
Definition: DataMgr.h:149
Data_Namespace::DataMgr & getDataMgr() const
Definition: Catalog.h:183
CHECK(cgen_state)
const Catalog_Namespace::Catalog * catalog_
Definition: Execute.h:961
const unsigned block_size_x_
Definition: Execute.h:955

+ Here is the call graph for this function:

std::shared_ptr< JoinHashTableInterface > Executor::buildCurrentLevelHashTable ( const JoinCondition current_level_join_conditions,
RelAlgExecutionUnit ra_exe_unit,
const CompilationOptions co,
const std::vector< InputTableInfo > &  query_infos,
ColumnCacheMap column_cache,
std::vector< std::string > &  fail_reasons 
)
private

Definition at line 403 of file IRCodegen.cpp.

References anonymous_namespace{IRCodegen.cpp}::add_qualifier_to_execution_unit(), Data_Namespace::CPU_LEVEL, CompilationOptions::device_type, Executor::JoinHashTableOrError::fail_reason, GPU, Data_Namespace::GPU_LEVEL, Executor::JoinHashTableOrError::hash_table, INNER, IS_EQUIVALENCE, PlanState::join_info_, JoinHashTableInterface::OneToOne, CodeGenerator::plan_state_, JoinCondition::quals, and JoinCondition::type.

409  {
410  if (current_level_join_conditions.type != JoinType::INNER &&
411  current_level_join_conditions.quals.size() > 1) {
412  fail_reasons.emplace_back("No equijoin expression found for outer join");
413  return nullptr;
414  }
415  std::shared_ptr<JoinHashTableInterface> current_level_hash_table;
416  for (const auto& join_qual : current_level_join_conditions.quals) {
417  auto qual_bin_oper = std::dynamic_pointer_cast<Analyzer::BinOper>(join_qual);
418  if (!qual_bin_oper || !IS_EQUIVALENCE(qual_bin_oper->get_optype())) {
419  fail_reasons.emplace_back("No equijoin expression found");
420  if (current_level_join_conditions.type == JoinType::INNER) {
421  add_qualifier_to_execution_unit(ra_exe_unit, join_qual);
422  }
423  continue;
424  }
425  JoinHashTableOrError hash_table_or_error;
426  if (!current_level_hash_table) {
427  hash_table_or_error = buildHashTableForQualifier(
428  qual_bin_oper,
429  query_infos,
433  column_cache);
434  current_level_hash_table = hash_table_or_error.hash_table;
435  }
436  if (hash_table_or_error.hash_table) {
437  plan_state_->join_info_.join_hash_tables_.push_back(hash_table_or_error.hash_table);
438  plan_state_->join_info_.equi_join_tautologies_.push_back(qual_bin_oper);
439  } else {
440  fail_reasons.push_back(hash_table_or_error.fail_reason);
441  if (current_level_join_conditions.type == JoinType::INNER) {
442  add_qualifier_to_execution_unit(ra_exe_unit, qual_bin_oper);
443  }
444  }
445  }
446  return current_level_hash_table;
447 }
#define IS_EQUIVALENCE(X)
Definition: sqldefs.h:67
std::unique_ptr< PlanState > plan_state_
Definition: Execute.h:932
void add_qualifier_to_execution_unit(RelAlgExecutionUnit &ra_exe_unit, const std::shared_ptr< Analyzer::Expr > &qual)
Definition: IRCodegen.cpp:190
ExecutorDeviceType device_type
std::list< std::shared_ptr< Analyzer::Expr > > quals
JoinHashTableOrError buildHashTableForQualifier(const std::shared_ptr< Analyzer::BinOper > &qual_bin_oper, const std::vector< InputTableInfo > &query_infos, const MemoryLevel memory_level, const JoinHashTableInterface::HashType preferred_hash_type, ColumnCacheMap &column_cache)
Definition: Execute.cpp:2567

+ Here is the call graph for this function:

Executor::JoinHashTableOrError Executor::buildHashTableForQualifier ( const std::shared_ptr< Analyzer::BinOper > &  qual_bin_oper,
const std::vector< InputTableInfo > &  query_infos,
const MemoryLevel  memory_level,
const JoinHashTableInterface::HashType  preferred_hash_type,
ColumnCacheMap column_cache 
)
private

Definition at line 2567 of file Execute.cpp.

References g_enable_dynamic_watchdog, g_enable_overlaps_hashjoin, g_enable_runtime_query_interrupt, JoinHashTableInterface::getInstance(), and interrupted_().

2572  {
2573  if (!g_enable_overlaps_hashjoin && qual_bin_oper->is_overlaps_oper()) {
2574  return {nullptr, "Overlaps hash join disabled, attempting to fall back to loop join"};
2575  }
2576  // check whether the interrupt flag turns on (non kernel-time query interrupt)
2578  resetInterrupt();
2580  }
2581  try {
2582  auto tbl =
2584  query_infos,
2585  memory_level,
2586  preferred_hash_type,
2587  deviceCountForMemoryLevel(memory_level),
2588  column_cache,
2589  this);
2590  return {tbl, ""};
2591  } catch (const HashJoinFail& e) {
2592  return {nullptr, e.what()};
2593  }
2594 }
static const int32_t ERR_INTERRUPTED
Definition: Execute.h:988
bool g_enable_dynamic_watchdog
Definition: Execute.cpp:72
bool g_enable_overlaps_hashjoin
Definition: Execute.cpp:88
static std::shared_ptr< JoinHashTableInterface > getInstance(const std::shared_ptr< Analyzer::BinOper > qual_bin_oper, const std::vector< InputTableInfo > &query_infos, const Data_Namespace::MemoryLevel memory_level, const HashType preferred_hash_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor)
Make hash table from an in-flight SQL query&#39;s parse tree etc.
void resetInterrupt()
int deviceCountForMemoryLevel(const Data_Namespace::MemoryLevel memory_level) const
Definition: Execute.cpp:607
bool g_enable_runtime_query_interrupt
Definition: Execute.cpp:104
bool interrupted_
Definition: Execute.h:941

+ Here is the call graph for this function:

std::function< llvm::Value *(const std::vector< llvm::Value * > &, llvm::Value *)> Executor::buildIsDeletedCb ( const RelAlgExecutionUnit ra_exe_unit,
const size_t  level_idx,
const CompilationOptions co 
)
private

Definition at line 344 of file IRCodegen.cpp.

References CompilationOptions::add_delete_column, catalog_(), CodeGenerator::cgen_state_, CHECK(), CHECK_LT, CodeGenerator::codegen(), CgenState::context_, RelAlgExecutionUnit::input_descs, CgenState::ir_builder_, CgenState::llBool(), CgenState::llInt(), CgenState::row_func_, TABLE, and CodeGenerator::toBool().

346  {
347  if (!co.add_delete_column) {
348  return nullptr;
349  }
350  CHECK_LT(level_idx + 1, ra_exe_unit.input_descs.size());
351  const auto input_desc = ra_exe_unit.input_descs[level_idx + 1];
352  if (input_desc.getSourceType() != InputSourceType::TABLE) {
353  return nullptr;
354  }
355  const auto td = catalog_->getMetadataForTable(input_desc.getTableId());
356  CHECK(td);
357  const auto deleted_cd = catalog_->getDeletedColumnIfRowsDeleted(td);
358  if (!deleted_cd) {
359  return nullptr;
360  }
361  CHECK(deleted_cd->columnType.is_boolean());
362  const auto deleted_expr = makeExpr<Analyzer::ColumnVar>(deleted_cd->columnType,
363  input_desc.getTableId(),
364  deleted_cd->columnId,
365  input_desc.getNestLevel());
366  return [this, deleted_expr, level_idx, &co](const std::vector<llvm::Value*>& prev_iters,
367  llvm::Value* have_more_inner_rows) {
368  const auto matching_row_index = addJoinLoopIterator(prev_iters, level_idx + 1);
369  // Avoid fetching the deleted column from a position which is not valid.
370  // An invalid position can be returned by a one to one hash lookup (negative)
371  // or at the end of iteration over a set of matching values.
372  llvm::Value* is_valid_it{nullptr};
373  if (have_more_inner_rows) {
374  is_valid_it = have_more_inner_rows;
375  } else {
376  is_valid_it = cgen_state_->ir_builder_.CreateICmp(
377  llvm::ICmpInst::ICMP_SGE, matching_row_index, cgen_state_->llInt<int64_t>(0));
378  }
379  const auto it_valid_bb = llvm::BasicBlock::Create(
380  cgen_state_->context_, "it_valid", cgen_state_->row_func_);
381  const auto it_not_valid_bb = llvm::BasicBlock::Create(
382  cgen_state_->context_, "it_not_valid", cgen_state_->row_func_);
383  cgen_state_->ir_builder_.CreateCondBr(is_valid_it, it_valid_bb, it_not_valid_bb);
384  const auto row_is_deleted_bb = llvm::BasicBlock::Create(
385  cgen_state_->context_, "row_is_deleted", cgen_state_->row_func_);
386  cgen_state_->ir_builder_.SetInsertPoint(it_valid_bb);
387  CodeGenerator code_generator(this);
388  const auto row_is_deleted = code_generator.toBool(
389  code_generator.codegen(deleted_expr.get(), true, co).front());
390  cgen_state_->ir_builder_.CreateBr(row_is_deleted_bb);
391  cgen_state_->ir_builder_.SetInsertPoint(it_not_valid_bb);
392  const auto row_is_deleted_default = cgen_state_->llBool(false);
393  cgen_state_->ir_builder_.CreateBr(row_is_deleted_bb);
394  cgen_state_->ir_builder_.SetInsertPoint(row_is_deleted_bb);
395  auto row_is_deleted_or_default =
396  cgen_state_->ir_builder_.CreatePHI(row_is_deleted->getType(), 2);
397  row_is_deleted_or_default->addIncoming(row_is_deleted, it_valid_bb);
398  row_is_deleted_or_default->addIncoming(row_is_deleted_default, it_not_valid_bb);
399  return row_is_deleted_or_default;
400  };
401 }
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:917
const std::vector< InputDescriptor > input_descs
CHECK(cgen_state)
const Catalog_Namespace::Catalog * catalog_
Definition: Execute.h:961
const ColumnDescriptor * getDeletedColumnIfRowsDeleted(const TableDescriptor *td) const
Definition: Catalog.cpp:2571
#define CHECK_LT(x, y)
Definition: Logger.h:207
llvm::Value * addJoinLoopIterator(const std::vector< llvm::Value * > &prev_iters, const size_t level_idx)
Definition: IRCodegen.cpp:449
const TableDescriptor * getMetadataForTable(const std::string &tableName, const bool populateFragmenter=true) const
Returns a pointer to a const TableDescriptor struct matching the provided tableName.

+ Here is the call graph for this function:

std::vector< JoinLoop > Executor::buildJoinLoops ( RelAlgExecutionUnit ra_exe_unit,
const CompilationOptions co,
const ExecutionOptions eo,
const std::vector< InputTableInfo > &  query_infos,
ColumnCacheMap column_cache 
)
private

Definition at line 222 of file IRCodegen.cpp.

References CodeGenerator::cgen_state_, CHECK(), CHECK_LT, INJECT_TIMER, RelAlgExecutionUnit::join_quals, LEFT, JoinHashTableInterface::OneToOne, CgenState::outer_join_match_found_per_level_, Set, Singleton, JoinLoopDomain::slot_lookup_result, and JoinLoopDomain::values_buffer.

227  {
229  std::vector<JoinLoop> join_loops;
230  for (size_t level_idx = 0, current_hash_table_idx = 0;
231  level_idx < ra_exe_unit.join_quals.size();
232  ++level_idx) {
233  const auto& current_level_join_conditions = ra_exe_unit.join_quals[level_idx];
234  std::vector<std::string> fail_reasons;
235  const auto current_level_hash_table =
236  buildCurrentLevelHashTable(current_level_join_conditions,
237  ra_exe_unit,
238  co,
239  query_infos,
240  column_cache,
241  fail_reasons);
242  const auto found_outer_join_matches_cb =
243  [this, level_idx](llvm::Value* found_outer_join_matches) {
244  CHECK_LT(level_idx, cgen_state_->outer_join_match_found_per_level_.size());
245  CHECK(!cgen_state_->outer_join_match_found_per_level_[level_idx]);
246  cgen_state_->outer_join_match_found_per_level_[level_idx] =
247  found_outer_join_matches;
248  };
249  const auto is_deleted_cb = buildIsDeletedCb(ra_exe_unit, level_idx, co);
250  if (current_level_hash_table) {
251  if (current_level_hash_table->getHashType() == JoinHashTable::HashType::OneToOne) {
252  join_loops.emplace_back(
254  current_level_join_conditions.type,
255  [this, current_hash_table_idx, level_idx, current_level_hash_table, &co](
256  const std::vector<llvm::Value*>& prev_iters) {
257  addJoinLoopIterator(prev_iters, level_idx);
258  JoinLoopDomain domain{{0}};
259  domain.slot_lookup_result =
260  current_level_hash_table->codegenSlot(co, current_hash_table_idx);
261  return domain;
262  },
263  nullptr,
264  current_level_join_conditions.type == JoinType::LEFT
265  ? std::function<void(llvm::Value*)>(found_outer_join_matches_cb)
266  : nullptr,
267  is_deleted_cb);
268  } else {
269  join_loops.emplace_back(
271  current_level_join_conditions.type,
272  [this, current_hash_table_idx, level_idx, current_level_hash_table, &co](
273  const std::vector<llvm::Value*>& prev_iters) {
274  addJoinLoopIterator(prev_iters, level_idx);
275  JoinLoopDomain domain{{0}};
276  const auto matching_set = current_level_hash_table->codegenMatchingSet(
277  co, current_hash_table_idx);
278  domain.values_buffer = matching_set.elements;
279  domain.element_count = matching_set.count;
280  return domain;
281  },
282  nullptr,
283  current_level_join_conditions.type == JoinType::LEFT
284  ? std::function<void(llvm::Value*)>(found_outer_join_matches_cb)
285  : nullptr,
286  is_deleted_cb);
287  }
288  ++current_hash_table_idx;
289  } else {
290  const auto fail_reasons_str = current_level_join_conditions.quals.empty()
291  ? "No equijoin expression found"
292  : boost::algorithm::join(fail_reasons, " | ");
294  ra_exe_unit, eo, query_infos, level_idx, fail_reasons_str);
295  // Callback provided to the `JoinLoop` framework to evaluate the (outer) join
296  // condition.
297  VLOG(1) << "Unable to build hash table, falling back to loop join: "
298  << fail_reasons_str;
299  const auto outer_join_condition_cb =
300  [this, level_idx, &co, &current_level_join_conditions](
301  const std::vector<llvm::Value*>& prev_iters) {
302  // The values generated for the match path don't dominate all uses
303  // since on the non-match path nulls are generated. Reset the cache
304  // once the condition is generated to avoid incorrect reuse.
305  FetchCacheAnchor anchor(cgen_state_.get());
306  addJoinLoopIterator(prev_iters, level_idx + 1);
307  llvm::Value* left_join_cond = cgen_state_->llBool(true);
308  CodeGenerator code_generator(this);
309  for (auto expr : current_level_join_conditions.quals) {
310  left_join_cond = cgen_state_->ir_builder_.CreateAnd(
311  left_join_cond,
312  code_generator.toBool(
313  code_generator.codegen(expr.get(), true, co).front()));
314  }
315  return left_join_cond;
316  };
317  join_loops.emplace_back(
319  current_level_join_conditions.type,
320  [this, level_idx](const std::vector<llvm::Value*>& prev_iters) {
321  addJoinLoopIterator(prev_iters, level_idx);
322  JoinLoopDomain domain{{0}};
323  const auto rows_per_scan_ptr = cgen_state_->ir_builder_.CreateGEP(
324  get_arg_by_name(cgen_state_->row_func_, "num_rows_per_scan"),
325  cgen_state_->llInt(int32_t(level_idx + 1)));
326  domain.upper_bound = cgen_state_->ir_builder_.CreateLoad(rows_per_scan_ptr,
327  "num_rows_per_scan");
328  return domain;
329  },
330  current_level_join_conditions.type == JoinType::LEFT
331  ? std::function<llvm::Value*(const std::vector<llvm::Value*>&)>(
332  outer_join_condition_cb)
333  : nullptr,
334  current_level_join_conditions.type == JoinType::LEFT
335  ? std::function<void(llvm::Value*)>(found_outer_join_matches_cb)
336  : nullptr,
337  is_deleted_cb);
338  }
339  }
340  return join_loops;
341 }
llvm::Value * values_buffer
Definition: JoinLoop.h:47
std::string join(T const &container, std::string const &delim)
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:917
CHECK(cgen_state)
llvm::Value * get_arg_by_name(llvm::Function *func, const std::string &name)
Definition: Execute.h:117
#define INJECT_TIMER(DESC)
Definition: measure.h:91
const JoinQualsPerNestingLevel join_quals
llvm::Value * slot_lookup_result
Definition: JoinLoop.h:45
#define CHECK_LT(x, y)
Definition: Logger.h:207
llvm::Value * addJoinLoopIterator(const std::vector< llvm::Value * > &prev_iters, const size_t level_idx)
Definition: IRCodegen.cpp:449
void check_if_loop_join_is_allowed(RelAlgExecutionUnit &ra_exe_unit, const ExecutionOptions &eo, const std::vector< InputTableInfo > &query_infos, const size_t level_idx, const std::string &fail_reason)
Definition: IRCodegen.cpp:200
std::vector< JoinLoop > buildJoinLoops(RelAlgExecutionUnit &ra_exe_unit, const CompilationOptions &co, const ExecutionOptions &eo, const std::vector< InputTableInfo > &query_infos, ColumnCacheMap &column_cache)
Definition: IRCodegen.cpp:222
std::function< llvm::Value *(const std::vector< llvm::Value * > &, llvm::Value *)> buildIsDeletedCb(const RelAlgExecutionUnit &ra_exe_unit, const size_t level_idx, const CompilationOptions &co)
Definition: IRCodegen.cpp:344
#define VLOG(n)
Definition: Logger.h:291
std::shared_ptr< JoinHashTableInterface > buildCurrentLevelHashTable(const JoinCondition &current_level_join_conditions, RelAlgExecutionUnit &ra_exe_unit, const CompilationOptions &co, const std::vector< InputTableInfo > &query_infos, ColumnCacheMap &column_cache, std::vector< std::string > &fail_reasons)
Definition: IRCodegen.cpp:403

+ Here is the call graph for this function:

void Executor::buildSelectedFragsMapping ( std::vector< std::vector< size_t >> &  selected_fragments_crossjoin,
std::vector< size_t > &  local_col_to_frag_pos,
const std::list< std::shared_ptr< const InputColDescriptor >> &  col_global_ids,
const FragmentsList selected_fragments,
const RelAlgExecutionUnit ra_exe_unit 
)
private

Definition at line 2171 of file Execute.cpp.

References CHECK(), CHECK_EQ, CHECK_LT, and RelAlgExecutionUnit::input_descs.

2176  {
2177  local_col_to_frag_pos.resize(plan_state_->global_to_local_col_ids_.size());
2178  size_t frag_pos{0};
2179  const auto& input_descs = ra_exe_unit.input_descs;
2180  for (size_t scan_idx = 0; scan_idx < input_descs.size(); ++scan_idx) {
2181  const int table_id = input_descs[scan_idx].getTableId();
2182  CHECK_EQ(selected_fragments[scan_idx].table_id, table_id);
2183  selected_fragments_crossjoin.push_back(
2184  getFragmentCount(selected_fragments, scan_idx, ra_exe_unit));
2185  for (const auto& col_id : col_global_ids) {
2186  CHECK(col_id);
2187  const auto& input_desc = col_id->getScanDesc();
2188  if (input_desc.getTableId() != table_id ||
2189  input_desc.getNestLevel() != static_cast<int>(scan_idx)) {
2190  continue;
2191  }
2192  auto it = plan_state_->global_to_local_col_ids_.find(*col_id);
2193  CHECK(it != plan_state_->global_to_local_col_ids_.end());
2194  CHECK_LT(static_cast<size_t>(it->second),
2195  plan_state_->global_to_local_col_ids_.size());
2196  local_col_to_frag_pos[it->second] = frag_pos;
2197  }
2198  ++frag_pos;
2199  }
2200 }
#define CHECK_EQ(x, y)
Definition: Logger.h:205
const std::vector< InputDescriptor > input_descs
CHECK(cgen_state)
std::unique_ptr< PlanState > plan_state_
Definition: Execute.h:932
#define CHECK_LT(x, y)
Definition: Logger.h:207
std::vector< size_t > getFragmentCount(const FragmentsList &selected_fragments, const size_t scan_idx, const RelAlgExecutionUnit &ra_exe_unit)
Definition: Execute.cpp:2157

+ Here is the call graph for this function:

llvm::Value * Executor::castToFP ( llvm::Value *  val)
private

Definition at line 2629 of file Execute.cpp.

References logger::FATAL, LOG, and to_string().

2629  {
2630  if (!val->getType()->isIntegerTy()) {
2631  return val;
2632  }
2633 
2634  auto val_width = static_cast<llvm::IntegerType*>(val->getType())->getBitWidth();
2635  llvm::Type* dest_ty{nullptr};
2636  switch (val_width) {
2637  case 32:
2638  dest_ty = llvm::Type::getFloatTy(cgen_state_->context_);
2639  break;
2640  case 64:
2641  dest_ty = llvm::Type::getDoubleTy(cgen_state_->context_);
2642  break;
2643  default:
2644  LOG(FATAL) << "Unsupported FP width: " << std::to_string(val_width);
2645  }
2646  return cgen_state_->ir_builder_.CreateSIToFP(val, dest_ty);
2647 }
#define LOG(tag)
Definition: Logger.h:188
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:917
std::string to_string(char const *&&v)

+ Here is the call graph for this function:

llvm::Value * Executor::castToIntPtrTyIn ( llvm::Value *  val,
const size_t  bit_width 
)
private

Definition at line 2649 of file Execute.cpp.

References CHECK(), CHECK_LT, and get_int_type().

2649  {
2650  CHECK(val->getType()->isPointerTy());
2651 
2652  const auto val_ptr_type = static_cast<llvm::PointerType*>(val->getType());
2653  const auto val_type = val_ptr_type->getElementType();
2654  size_t val_width = 0;
2655  if (val_type->isIntegerTy()) {
2656  val_width = val_type->getIntegerBitWidth();
2657  } else {
2658  if (val_type->isFloatTy()) {
2659  val_width = 32;
2660  } else {
2661  CHECK(val_type->isDoubleTy());
2662  val_width = 64;
2663  }
2664  }
2665  CHECK_LT(size_t(0), val_width);
2666  if (bitWidth == val_width) {
2667  return val;
2668  }
2669  return cgen_state_->ir_builder_.CreateBitCast(
2670  val, llvm::PointerType::get(get_int_type(bitWidth, cgen_state_->context_), 0));
2671 }
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:917
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
CHECK(cgen_state)
#define CHECK_LT(x, y)
Definition: Logger.h:207

+ Here is the call graph for this function:

bool Executor::checkCurrentQuerySession ( const std::string &  candidate_query_session)

Definition at line 2972 of file Execute.cpp.

2972  {
2973  // if current_query_session is equal to the candidate_query_session,
2974  // or it is empty session we consider
2975  return (current_query_session_ == candidate_query_session);
2976 }
static std::string current_query_session_
Definition: Execute.h:969
bool Executor::checkIsQuerySessionInterrupted ( const std::string &  query_session)

Definition at line 3016 of file Execute.cpp.

3016  {
3017  std::lock_guard<std::mutex> session_access_lock(executor_session_mutex_);
3018  auto it = queries_interrupt_flag_.find(query_session);
3019  if (it != queries_interrupt_flag_.end()) {
3020  return it->second;
3021  }
3022  return false;
3023 }
static std::mutex executor_session_mutex_
Definition: Execute.h:968
static std::map< std::string, bool > queries_interrupt_flag_
Definition: Execute.h:971
void Executor::clearMemory ( const Data_Namespace::MemoryLevel  memory_level)
static

Definition at line 160 of file Execute.cpp.

References Data_Namespace::DataMgr::clearMemory(), Data_Namespace::CPU_LEVEL, Catalog_Namespace::SysCatalog::getDataMgr(), Data_Namespace::GPU_LEVEL, Catalog_Namespace::SysCatalog::instance(), and CacheInvalidator< CACHE_HOLDING_TYPES >::invalidateCaches().

Referenced by MapDHandler::clear_cpu_memory(), MapDHandler::clear_gpu_memory(), QueryRunner::QueryRunner::clearCpuMemory(), and QueryRunner::QueryRunner::clearGpuMemory().

160  {
161  switch (memory_level) {
164  std::lock_guard<std::mutex> flush_lock(
165  execute_mutex_); // Don't flush memory while queries are running
166 
168  if (memory_level == Data_Namespace::MemoryLevel::CPU_LEVEL) {
169  // The hash table cache uses CPU memory not managed by the buffer manager. In the
170  // future, we should manage these allocations with the buffer manager directly.
171  // For now, assume the user wants to purge the hash table cache when they clear
172  // CPU memory (currently used in ExecuteTest to lower memory pressure)
174  }
175  break;
176  }
177  default: {
178  throw std::runtime_error(
179  "Clearing memory levels other than the CPU level or GPU level is not "
180  "supported.");
181  }
182  }
183 }
void clearMemory(const MemoryLevel memLevel)
Definition: DataMgr.cpp:361
static void invalidateCaches()
Data_Namespace::DataMgr & getDataMgr() const
Definition: SysCatalog.h:189
static SysCatalog & instance()
Definition: SysCatalog.h:256
static std::mutex execute_mutex_
Definition: Execute.h:975

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void Executor::clearMetaInfoCache ( )
private

Definition at line 328 of file Execute.cpp.

References input_table_info_cache_().

328  {
333 }
AggregatedColRange agg_col_range_cache_
Definition: Execute.h:965
StringDictionaryGenerations string_dictionary_generations_
Definition: Execute.h:966
InputTableInfoCache input_table_info_cache_
Definition: Execute.h:964
TableGenerations table_generations_
Definition: Execute.h:967

+ Here is the call graph for this function:

llvm::Value * Executor::codegenAggregateWindowState ( )
private

Definition at line 318 of file WindowFunctionIR.cpp.

References AVG, COUNT, anonymous_namespace{WindowFunctionIR.cpp}::get_adjusted_window_type_info(), get_int_type(), WindowProjectNodeContext::getActiveWindowFunctionContext(), Analyzer::WindowFunction::getKind(), kDECIMAL, kDOUBLE, and kFLOAT.

318  {
319  const auto pi32_type =
320  llvm::PointerType::get(get_int_type(32, cgen_state_->context_), 0);
321  const auto pi64_type =
322  llvm::PointerType::get(get_int_type(64, cgen_state_->context_), 0);
323  const auto window_func_context =
325  const Analyzer::WindowFunction* window_func = window_func_context->getWindowFunction();
326  const auto window_func_ti = get_adjusted_window_type_info(window_func);
327  const auto aggregate_state_type =
328  window_func_ti.get_type() == kFLOAT ? pi32_type : pi64_type;
329  auto aggregate_state = aggregateWindowStatePtr();
330  if (window_func->getKind() == SqlWindowFunctionKind::AVG) {
331  const auto aggregate_state_count_i64 = cgen_state_->llInt(
332  reinterpret_cast<const int64_t>(window_func_context->aggregateStateCount()));
333  auto aggregate_state_count = cgen_state_->ir_builder_.CreateIntToPtr(
334  aggregate_state_count_i64, aggregate_state_type);
335  const auto double_null_lv = cgen_state_->inlineFpNull(SQLTypeInfo(kDOUBLE));
336  switch (window_func_ti.get_type()) {
337  case kFLOAT: {
338  return cgen_state_->emitCall(
339  "load_avg_float", {aggregate_state, aggregate_state_count, double_null_lv});
340  }
341  case kDOUBLE: {
342  return cgen_state_->emitCall(
343  "load_avg_double", {aggregate_state, aggregate_state_count, double_null_lv});
344  }
345  case kDECIMAL: {
346  return cgen_state_->emitCall(
347  "load_avg_decimal",
348  {aggregate_state,
349  aggregate_state_count,
350  double_null_lv,
351  cgen_state_->llInt<int32_t>(window_func_ti.get_scale())});
352  }
353  default: {
354  return cgen_state_->emitCall(
355  "load_avg_int", {aggregate_state, aggregate_state_count, double_null_lv});
356  }
357  }
358  }
359  if (window_func->getKind() == SqlWindowFunctionKind::COUNT) {
360  return cgen_state_->ir_builder_.CreateLoad(aggregate_state);
361  }
362  switch (window_func_ti.get_type()) {
363  case kFLOAT: {
364  return cgen_state_->emitCall("load_float", {aggregate_state});
365  }
366  case kDOUBLE: {
367  return cgen_state_->emitCall("load_double", {aggregate_state});
368  }
369  default: {
370  return cgen_state_->ir_builder_.CreateLoad(aggregate_state);
371  }
372  }
373 }
SqlWindowFunctionKind getKind() const
Definition: Analyzer.h:1396
llvm::Value * aggregateWindowStatePtr()
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:917
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
static WindowFunctionContext * getActiveWindowFunctionContext()
SQLTypeInfo get_adjusted_window_type_info(const Analyzer::WindowFunction *window_func)

+ Here is the call graph for this function:

void Executor::codegenJoinLoops ( const std::vector< JoinLoop > &  join_loops,
const RelAlgExecutionUnit ra_exe_unit,
GroupByAndAggregate group_by_and_aggregate,
llvm::Function *  query_func,
llvm::BasicBlock *  entry_bb,
const QueryMemoryDescriptor query_mem_desc,
const CompilationOptions co,
const ExecutionOptions eo 
)
private

Definition at line 466 of file IRCodegen.cpp.

References ExecutionOptions::allow_runtime_query_interrupt, CodeGenerator::cgen_state_, JoinLoop::codegen(), CgenState::context_, CompilationOptions::device_type, CgenState::ir_builder_, CgenState::llInt(), CgenState::needs_error_check_, CodeGenerator::posArg(), CgenState::row_func_, and ExecutionOptions::with_dynamic_watchdog.

473  {
474  const auto exit_bb =
475  llvm::BasicBlock::Create(cgen_state_->context_, "exit", cgen_state_->row_func_);
476  cgen_state_->ir_builder_.SetInsertPoint(exit_bb);
477  cgen_state_->ir_builder_.CreateRet(cgen_state_->llInt<int32_t>(0));
478  cgen_state_->ir_builder_.SetInsertPoint(entry_bb);
479  CodeGenerator code_generator(this);
480  const auto loops_entry_bb = JoinLoop::codegen(
481  join_loops,
482  [this,
483  query_func,
484  &query_mem_desc,
485  &co,
486  &eo,
487  &group_by_and_aggregate,
488  &join_loops,
489  &ra_exe_unit](const std::vector<llvm::Value*>& prev_iters) {
490  addJoinLoopIterator(prev_iters, join_loops.size());
491  auto& builder = cgen_state_->ir_builder_;
492  const auto loop_body_bb = llvm::BasicBlock::Create(
493  builder.getContext(), "loop_body", builder.GetInsertBlock()->getParent());
494  builder.SetInsertPoint(loop_body_bb);
495  const bool can_return_error =
496  compileBody(ra_exe_unit, group_by_and_aggregate, query_mem_desc, co);
497  if (can_return_error || cgen_state_->needs_error_check_ ||
499  createErrorCheckControlFlow(query_func,
502  co.device_type);
503  }
504  return loop_body_bb;
505  },
506  code_generator.posArg(nullptr),
507  exit_bb,
508  cgen_state_->ir_builder_);
509  cgen_state_->ir_builder_.SetInsertPoint(entry_bb);
510  cgen_state_->ir_builder_.CreateBr(loops_entry_bb);
511 }
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:917
bool compileBody(const RelAlgExecutionUnit &ra_exe_unit, GroupByAndAggregate &group_by_and_aggregate, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co)
const bool with_dynamic_watchdog
ExecutorDeviceType device_type
void createErrorCheckControlFlow(llvm::Function *query_func, bool run_with_dynamic_watchdog, bool run_with_allowing_runtime_interrupt, ExecutorDeviceType device_type)
static llvm::BasicBlock * codegen(const std::vector< JoinLoop > &join_loops, const std::function< llvm::BasicBlock *(const std::vector< llvm::Value * > &)> &body_codegen, llvm::Value *outer_iter, llvm::BasicBlock *exit_bb, llvm::IRBuilder<> &builder)
Definition: JoinLoop.cpp:45
llvm::Value * addJoinLoopIterator(const std::vector< llvm::Value * > &prev_iters, const size_t level_idx)
Definition: IRCodegen.cpp:449
const bool allow_runtime_query_interrupt

+ Here is the call graph for this function:

llvm::BasicBlock * Executor::codegenSkipDeletedOuterTableRow ( const RelAlgExecutionUnit ra_exe_unit,
const CompilationOptions co 
)
private

Definition at line 1883 of file NativeCodegen.cpp.

1885  {
1886  if (!co.add_delete_column) {
1887  return nullptr;
1888  }
1889  CHECK(!ra_exe_unit.input_descs.empty());
1890  const auto& outer_input_desc = ra_exe_unit.input_descs[0];
1891  if (outer_input_desc.getSourceType() != InputSourceType::TABLE) {
1892  return nullptr;
1893  }
1894  const auto td = catalog_->getMetadataForTable(outer_input_desc.getTableId());
1895  CHECK(td);
1896  const auto deleted_cd = catalog_->getDeletedColumnIfRowsDeleted(td);
1897  if (!deleted_cd) {
1898  return nullptr;
1899  }
1900  CHECK(deleted_cd->columnType.is_boolean());
1901  const auto deleted_expr =
1902  makeExpr<Analyzer::ColumnVar>(deleted_cd->columnType,
1903  outer_input_desc.getTableId(),
1904  deleted_cd->columnId,
1905  outer_input_desc.getNestLevel());
1906  CodeGenerator code_generator(this);
1907  const auto is_deleted =
1908  code_generator.toBool(code_generator.codegen(deleted_expr.get(), true, co).front());
1909  const auto is_deleted_bb = llvm::BasicBlock::Create(
1910  cgen_state_->context_, "is_deleted", cgen_state_->row_func_);
1911  llvm::BasicBlock* bb = llvm::BasicBlock::Create(
1912  cgen_state_->context_, "is_not_deleted", cgen_state_->row_func_);
1913  cgen_state_->ir_builder_.CreateCondBr(is_deleted, is_deleted_bb, bb);
1914  cgen_state_->ir_builder_.SetInsertPoint(is_deleted_bb);
1915  cgen_state_->ir_builder_.CreateRet(cgen_state_->llInt<int32_t>(0));
1916  cgen_state_->ir_builder_.SetInsertPoint(bb);
1917  return bb;
1918 }
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:917
const std::vector< InputDescriptor > input_descs
CHECK(cgen_state)
const Catalog_Namespace::Catalog * catalog_
Definition: Execute.h:961
const ColumnDescriptor * getDeletedColumnIfRowsDeleted(const TableDescriptor *td) const
Definition: Catalog.cpp:2571
const TableDescriptor * getMetadataForTable(const std::string &tableName, const bool populateFragmenter=true) const
Returns a pointer to a const TableDescriptor struct matching the provided tableName.
void Executor::codegenWindowAvgEpilogue ( llvm::Value *  crt_val,
llvm::Value *  window_func_null_val,
llvm::Value *  multiplicity_lv 
)
private

Definition at line 282 of file WindowFunctionIR.cpp.

References anonymous_namespace{WindowFunctionIR.cpp}::get_adjusted_window_type_info(), get_int_type(), WindowProjectNodeContext::getActiveWindowFunctionContext(), kDOUBLE, and kFLOAT.

284  {
285  const auto window_func_context =
287  const auto window_func = window_func_context->getWindowFunction();
288  const auto window_func_ti = get_adjusted_window_type_info(window_func);
289  const auto pi32_type =
290  llvm::PointerType::get(get_int_type(32, cgen_state_->context_), 0);
291  const auto pi64_type =
292  llvm::PointerType::get(get_int_type(64, cgen_state_->context_), 0);
293  const auto aggregate_state_type =
294  window_func_ti.get_type() == kFLOAT ? pi32_type : pi64_type;
295  const auto aggregate_state_count_i64 = cgen_state_->llInt(
296  reinterpret_cast<const int64_t>(window_func_context->aggregateStateCount()));
297  auto aggregate_state_count = cgen_state_->ir_builder_.CreateIntToPtr(
298  aggregate_state_count_i64, aggregate_state_type);
299  std::string agg_count_func_name = "agg_count";
300  switch (window_func_ti.get_type()) {
301  case kFLOAT: {
302  agg_count_func_name += "_float";
303  break;
304  }
305  case kDOUBLE: {
306  agg_count_func_name += "_double";
307  break;
308  }
309  default: {
310  break;
311  }
312  }
313  agg_count_func_name += "_skip_val";
314  cgen_state_->emitCall(agg_count_func_name,
315  {aggregate_state_count, crt_val, window_func_null_val});
316 }
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:917
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
static WindowFunctionContext * getActiveWindowFunctionContext()
SQLTypeInfo get_adjusted_window_type_info(const Analyzer::WindowFunction *window_func)

+ Here is the call graph for this function:

llvm::Value * Executor::codegenWindowFunction ( const size_t  target_index,
const CompilationOptions co 
)
private

Definition at line 21 of file WindowFunctionIR.cpp.

References WindowProjectNodeContext::activateWindowFunctionContext(), run_benchmark_import::args, AVG, CHECK(), CHECK_EQ, CodeGenerator::codegen(), COUNT, CUME_DIST, DENSE_RANK, logger::FATAL, FIRST_VALUE, WindowProjectNodeContext::get(), WindowFunctionContext::getWindowFunction(), LAG, LAST_VALUE, LEAD, LOG, MAX, MIN, NTILE, PERCENT_RANK, CodeGenerator::posArg(), RANK, ROW_NUMBER, and SUM.

22  {
23  CodeGenerator code_generator(this);
24  const auto window_func_context =
26  const auto window_func = window_func_context->getWindowFunction();
27  switch (window_func->getKind()) {
32  return cgen_state_->emitCall("row_number_window_func",
33  {cgen_state_->llInt(reinterpret_cast<const int64_t>(
34  window_func_context->output())),
35  code_generator.posArg(nullptr)});
36  }
39  return cgen_state_->emitCall("percent_window_func",
40  {cgen_state_->llInt(reinterpret_cast<const int64_t>(
41  window_func_context->output())),
42  code_generator.posArg(nullptr)});
43  }
49  const auto& args = window_func->getArgs();
50  CHECK(!args.empty());
51  const auto arg_lvs = code_generator.codegen(args.front().get(), true, co);
52  CHECK_EQ(arg_lvs.size(), size_t(1));
53  return arg_lvs.front();
54  }
61  }
62  default: {
63  LOG(FATAL) << "Invalid window function kind";
64  }
65  }
66  return nullptr;
67 }
#define CHECK_EQ(x, y)
Definition: Logger.h:205
#define LOG(tag)
Definition: Logger.h:188
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:917
const WindowFunctionContext * activateWindowFunctionContext(const size_t target_index) const
CHECK(cgen_state)
static const WindowProjectNodeContext * get()
llvm::Value * codegenWindowFunctionAggregate(const CompilationOptions &co)
const Analyzer::WindowFunction * getWindowFunction() const

+ Here is the call graph for this function:

llvm::Value * Executor::codegenWindowFunctionAggregate ( const CompilationOptions co)
private

Definition at line 137 of file WindowFunctionIR.cpp.

References AVG, CHECK(), WindowProjectNodeContext::get(), get_int_type(), and WindowProjectNodeContext::getActiveWindowFunctionContext().

137  {
138  const auto reset_state_false_bb = codegenWindowResetStateControlFlow();
139  auto aggregate_state = aggregateWindowStatePtr();
140  llvm::Value* aggregate_state_count = nullptr;
141  const auto window_func_context =
143  const auto window_func = window_func_context->getWindowFunction();
144  if (window_func->getKind() == SqlWindowFunctionKind::AVG) {
145  const auto aggregate_state_count_i64 = cgen_state_->llInt(
146  reinterpret_cast<const int64_t>(window_func_context->aggregateStateCount()));
147  const auto pi64_type =
148  llvm::PointerType::get(get_int_type(64, cgen_state_->context_), 0);
149  aggregate_state_count =
150  cgen_state_->ir_builder_.CreateIntToPtr(aggregate_state_count_i64, pi64_type);
151  }
152  codegenWindowFunctionStateInit(aggregate_state);
153  if (window_func->getKind() == SqlWindowFunctionKind::AVG) {
154  const auto count_zero = cgen_state_->llInt(int64_t(0));
155  cgen_state_->emitCall("agg_id", {aggregate_state_count, count_zero});
156  }
157  cgen_state_->ir_builder_.CreateBr(reset_state_false_bb);
158  cgen_state_->ir_builder_.SetInsertPoint(reset_state_false_bb);
160  return codegenWindowFunctionAggregateCalls(aggregate_state, co);
161 }
llvm::Value * aggregateWindowStatePtr()
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:917
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
CHECK(cgen_state)
static const WindowProjectNodeContext * get()
void codegenWindowFunctionStateInit(llvm::Value *aggregate_state)
llvm::Value * codegenWindowFunctionAggregateCalls(llvm::Value *aggregate_state, const CompilationOptions &co)
static WindowFunctionContext * getActiveWindowFunctionContext()
llvm::BasicBlock * codegenWindowResetStateControlFlow()

+ Here is the call graph for this function:

llvm::Value * Executor::codegenWindowFunctionAggregateCalls ( llvm::Value *  aggregate_state,
const CompilationOptions co 
)
private

Definition at line 240 of file WindowFunctionIR.cpp.

References run_benchmark_import::args, AVG, CHECK(), CHECK_EQ, CodeGenerator::codegen(), CodeGenerator::codegenCastBetweenIntTypes(), COUNT, anonymous_namespace{WindowFunctionIR.cpp}::get_adjusted_window_type_info(), anonymous_namespace{WindowFunctionIR.cpp}::get_window_agg_name(), WindowProjectNodeContext::getActiveWindowFunctionContext(), kFLOAT, and SUM.

241  {
242  const auto window_func_context =
244  const auto window_func = window_func_context->getWindowFunction();
245  const auto window_func_ti = get_adjusted_window_type_info(window_func);
246  const auto window_func_null_val =
247  window_func_ti.is_fp()
248  ? cgen_state_->inlineFpNull(window_func_ti)
249  : cgen_state_->castToTypeIn(cgen_state_->inlineIntNull(window_func_ti), 64);
250  const auto& args = window_func->getArgs();
251  llvm::Value* crt_val;
252  if (args.empty()) {
253  CHECK(window_func->getKind() == SqlWindowFunctionKind::COUNT);
254  crt_val = cgen_state_->llInt(int64_t(1));
255  } else {
256  CodeGenerator code_generator(this);
257  const auto arg_lvs = code_generator.codegen(args.front().get(), true, co);
258  CHECK_EQ(arg_lvs.size(), size_t(1));
259  if (window_func->getKind() == SqlWindowFunctionKind::SUM && !window_func_ti.is_fp()) {
260  crt_val = code_generator.codegenCastBetweenIntTypes(
261  arg_lvs.front(), args.front()->get_type_info(), window_func_ti, false);
262  } else {
263  crt_val = window_func_ti.get_type() == kFLOAT
264  ? arg_lvs.front()
265  : cgen_state_->castToTypeIn(arg_lvs.front(), 64);
266  }
267  }
268  const auto agg_name = get_window_agg_name(window_func->getKind(), window_func_ti);
269  llvm::Value* multiplicity_lv = nullptr;
270  if (args.empty()) {
271  cgen_state_->emitCall(agg_name, {aggregate_state, crt_val});
272  } else {
273  cgen_state_->emitCall(agg_name + "_skip_val",
274  {aggregate_state, crt_val, window_func_null_val});
275  }
276  if (window_func->getKind() == SqlWindowFunctionKind::AVG) {
277  codegenWindowAvgEpilogue(crt_val, window_func_null_val, multiplicity_lv);
278  }
280 }
#define CHECK_EQ(x, y)
Definition: Logger.h:205
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:917
std::string get_window_agg_name(const SqlWindowFunctionKind kind, const SQLTypeInfo &window_func_ti)
void codegenWindowAvgEpilogue(llvm::Value *crt_val, llvm::Value *window_func_null_val, llvm::Value *multiplicity_lv)
CHECK(cgen_state)
llvm::Value * codegenAggregateWindowState()
static WindowFunctionContext * getActiveWindowFunctionContext()
SQLTypeInfo get_adjusted_window_type_info(const Analyzer::WindowFunction *window_func)

+ Here is the call graph for this function:

void Executor::codegenWindowFunctionStateInit ( llvm::Value *  aggregate_state)
private

Definition at line 191 of file WindowFunctionIR.cpp.

References COUNT, anonymous_namespace{WindowFunctionIR.cpp}::get_adjusted_window_type_info(), get_int_type(), WindowProjectNodeContext::getActiveWindowFunctionContext(), kDOUBLE, and kFLOAT.

191  {
192  const auto window_func_context =
194  const auto window_func = window_func_context->getWindowFunction();
195  const auto window_func_ti = get_adjusted_window_type_info(window_func);
196  const auto window_func_null_val =
197  window_func_ti.is_fp()
198  ? cgen_state_->inlineFpNull(window_func_ti)
199  : cgen_state_->castToTypeIn(cgen_state_->inlineIntNull(window_func_ti), 64);
200  llvm::Value* window_func_init_val;
201  if (window_func_context->getWindowFunction()->getKind() ==
203  switch (window_func_ti.get_type()) {
204  case kFLOAT: {
205  window_func_init_val = cgen_state_->llFp(float(0));
206  break;
207  }
208  case kDOUBLE: {
209  window_func_init_val = cgen_state_->llFp(double(0));
210  break;
211  }
212  default: {
213  window_func_init_val = cgen_state_->llInt(int64_t(0));
214  break;
215  }
216  }
217  } else {
218  window_func_init_val = window_func_null_val;
219  }
220  const auto pi32_type =
221  llvm::PointerType::get(get_int_type(32, cgen_state_->context_), 0);
222  switch (window_func_ti.get_type()) {
223  case kDOUBLE: {
224  cgen_state_->emitCall("agg_id_double", {aggregate_state, window_func_init_val});
225  break;
226  }
227  case kFLOAT: {
228  aggregate_state =
229  cgen_state_->ir_builder_.CreateBitCast(aggregate_state, pi32_type);
230  cgen_state_->emitCall("agg_id_float", {aggregate_state, window_func_init_val});
231  break;
232  }
233  default: {
234  cgen_state_->emitCall("agg_id", {aggregate_state, window_func_init_val});
235  break;
236  }
237  }
238 }
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:917
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
static WindowFunctionContext * getActiveWindowFunctionContext()
SQLTypeInfo get_adjusted_window_type_info(const Analyzer::WindowFunction *window_func)

+ Here is the call graph for this function:

llvm::BasicBlock * Executor::codegenWindowResetStateControlFlow ( )
private

Definition at line 163 of file WindowFunctionIR.cpp.

References WindowProjectNodeContext::getActiveWindowFunctionContext(), CodeGenerator::posArg(), and CodeGenerator::toBool().

163  {
164  const auto window_func_context =
166  const auto bitset = cgen_state_->llInt(
167  reinterpret_cast<const int64_t>(window_func_context->partitionStart()));
168  const auto min_val = cgen_state_->llInt(int64_t(0));
169  const auto max_val = cgen_state_->llInt(window_func_context->elementCount() - 1);
170  const auto null_val = cgen_state_->llInt(inline_int_null_value<int64_t>());
171  const auto null_bool_val = cgen_state_->llInt<int8_t>(inline_int_null_value<int8_t>());
172  CodeGenerator code_generator(this);
173  const auto reset_state =
174  code_generator.toBool(cgen_state_->emitCall("bit_is_set",
175  {bitset,
176  code_generator.posArg(nullptr),
177  min_val,
178  max_val,
179  null_val,
180  null_bool_val}));
181  const auto reset_state_true_bb = llvm::BasicBlock::Create(
182  cgen_state_->context_, "reset_state.true", cgen_state_->row_func_);
183  const auto reset_state_false_bb = llvm::BasicBlock::Create(
184  cgen_state_->context_, "reset_state.false", cgen_state_->row_func_);
185  cgen_state_->ir_builder_.CreateCondBr(
186  reset_state, reset_state_true_bb, reset_state_false_bb);
187  cgen_state_->ir_builder_.SetInsertPoint(reset_state_true_bb);
188  return reset_state_false_bb;
189 }
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:917
static WindowFunctionContext * getActiveWindowFunctionContext()

+ Here is the call graph for this function:

ResultSetPtr Executor::collectAllDeviceResults ( ExecutionDispatch execution_dispatch,
const std::vector< Analyzer::Expr * > &  target_exprs,
const QueryMemoryDescriptor query_mem_desc,
const ExecutorDeviceType  device_type,
std::shared_ptr< RowSetMemoryOwner row_set_mem_owner 
)
private

Definition at line 1562 of file Execute.cpp.

References anonymous_namespace{Execute.cpp}::build_row_for_empty_input(), catalog_(), DEBUG_TIMER, Executor::ExecutionDispatch::getExecutionUnit(), Executor::ExecutionDispatch::getFragmentResults(), QueryMemoryDescriptor::getQueryDescriptionType(), GPU, NonGroupedAggregate, GroupByAndAggregate::shard_count_for_top_groups(), and use_speculative_top_n().

1567  {
1568  auto timer = DEBUG_TIMER(__func__);
1569  auto& result_per_device = execution_dispatch.getFragmentResults();
1570  if (result_per_device.empty() && query_mem_desc.getQueryDescriptionType() ==
1572  return build_row_for_empty_input(target_exprs, query_mem_desc, device_type);
1573  }
1574  const auto& ra_exe_unit = execution_dispatch.getExecutionUnit();
1575  if (use_speculative_top_n(ra_exe_unit, query_mem_desc)) {
1576  return reduceSpeculativeTopN(
1577  ra_exe_unit, result_per_device, row_set_mem_owner, query_mem_desc);
1578  }
1579  const auto shard_count =
1580  device_type == ExecutorDeviceType::GPU
1582  : 0;
1583 
1584  if (shard_count && !result_per_device.empty()) {
1585  return collectAllDeviceShardedTopResults(execution_dispatch);
1586  }
1587  return reduceMultiDeviceResults(
1588  ra_exe_unit, result_per_device, row_set_mem_owner, query_mem_desc);
1589 }
bool use_speculative_top_n(const RelAlgExecutionUnit &ra_exe_unit, const QueryMemoryDescriptor &query_mem_desc)
ResultSetPtr reduceSpeculativeTopN(const RelAlgExecutionUnit &, std::vector< std::pair< ResultSetPtr, std::vector< size_t >>> &all_fragment_results, std::shared_ptr< RowSetMemoryOwner >, const QueryMemoryDescriptor &) const
Definition: Execute.cpp:933
const Catalog_Namespace::Catalog * catalog_
Definition: Execute.h:961
ResultSetPtr reduceMultiDeviceResults(const RelAlgExecutionUnit &, std::vector< std::pair< ResultSetPtr, std::vector< size_t >>> &all_fragment_results, std::shared_ptr< RowSetMemoryOwner >, const QueryMemoryDescriptor &) const
Definition: Execute.cpp:850
QueryDescriptionType getQueryDescriptionType() const
ResultSetPtr collectAllDeviceShardedTopResults(ExecutionDispatch &execution_dispatch) const
Definition: Execute.cpp:1673
ResultSetPtr build_row_for_empty_input(const std::vector< Analyzer::Expr * > &target_exprs_in, const QueryMemoryDescriptor &query_mem_desc, const ExecutorDeviceType device_type)
Definition: Execute.cpp:1525
#define DEBUG_TIMER(name)
Definition: Logger.h:313
static size_t shard_count_for_top_groups(const RelAlgExecutionUnit &ra_exe_unit, const Catalog_Namespace::Catalog &catalog)

+ Here is the call graph for this function:

ResultSetPtr Executor::collectAllDeviceShardedTopResults ( ExecutionDispatch execution_dispatch) const
private

Definition at line 1673 of file Execute.cpp.

References CHECK(), CHECK_EQ, CHECK_LE, Executor::ExecutionDispatch::getExecutionUnit(), Executor::ExecutionDispatch::getFragmentResults(), anonymous_namespace{Execute.cpp}::permute_storage_columnar(), anonymous_namespace{Execute.cpp}::permute_storage_row_wise(), and run_benchmark_import::result.

1674  {
1675  const auto& ra_exe_unit = execution_dispatch.getExecutionUnit();
1676  auto& result_per_device = execution_dispatch.getFragmentResults();
1677  const auto first_result_set = result_per_device.front().first;
1678  CHECK(first_result_set);
1679  auto top_query_mem_desc = first_result_set->getQueryMemDesc();
1680  CHECK(!top_query_mem_desc.hasInterleavedBinsOnGpu());
1681  const auto top_n = ra_exe_unit.sort_info.limit + ra_exe_unit.sort_info.offset;
1682  top_query_mem_desc.setEntryCount(0);
1683  for (auto& result : result_per_device) {
1684  const auto result_set = result.first;
1685  CHECK(result_set);
1686  result_set->sort(ra_exe_unit.sort_info.order_entries, top_n);
1687  size_t new_entry_cnt = top_query_mem_desc.getEntryCount() + result_set->rowCount();
1688  top_query_mem_desc.setEntryCount(new_entry_cnt);
1689  }
1690  auto top_result_set = std::make_shared<ResultSet>(first_result_set->getTargetInfos(),
1691  first_result_set->getDeviceType(),
1692  top_query_mem_desc,
1693  first_result_set->getRowSetMemOwner(),
1694  this);
1695  auto top_storage = top_result_set->allocateStorage();
1696  size_t top_output_row_idx{0};
1697  for (auto& result : result_per_device) {
1698  const auto result_set = result.first;
1699  CHECK(result_set);
1700  const auto& top_permutation = result_set->getPermutationBuffer();
1701  CHECK_LE(top_permutation.size(), top_n);
1702  if (top_query_mem_desc.didOutputColumnar()) {
1703  top_output_row_idx = permute_storage_columnar(result_set->getStorage(),
1704  result_set->getQueryMemDesc(),
1705  top_storage,
1706  top_output_row_idx,
1707  top_query_mem_desc,
1708  top_permutation);
1709  } else {
1710  top_output_row_idx = permute_storage_row_wise(result_set->getStorage(),
1711  top_storage,
1712  top_output_row_idx,
1713  top_query_mem_desc,
1714  top_permutation);
1715  }
1716  }
1717  CHECK_EQ(top_output_row_idx, top_query_mem_desc.getEntryCount());
1718  return top_result_set;
1719 }
#define CHECK_EQ(x, y)
Definition: Logger.h:205
size_t permute_storage_row_wise(const ResultSetStorage *input_storage, const ResultSetStorage *output_storage, size_t output_row_index, const QueryMemoryDescriptor &output_query_mem_desc, const std::vector< uint32_t > &top_permutation)
Definition: Execute.cpp:1652
CHECK(cgen_state)
#define CHECK_LE(x, y)
Definition: Logger.h:208
size_t permute_storage_columnar(const ResultSetStorage *input_storage, const QueryMemoryDescriptor &input_query_mem_desc, const ResultSetStorage *output_storage, size_t output_row_index, const QueryMemoryDescriptor &output_query_mem_desc, const std::vector< uint32_t > &top_permutation)
Definition: Execute.cpp:1602

+ Here is the call graph for this function:

bool Executor::compileBody ( const RelAlgExecutionUnit ra_exe_unit,
GroupByAndAggregate group_by_and_aggregate,
const QueryMemoryDescriptor query_mem_desc,
const CompilationOptions co 
)
private

Definition at line 1920 of file NativeCodegen.cpp.

1923  {
1924  // generate the code for the filter
1925  std::vector<Analyzer::Expr*> primary_quals;
1926  std::vector<Analyzer::Expr*> deferred_quals;
1927  bool short_circuited =
1928  CodeGenerator::prioritizeQuals(ra_exe_unit, primary_quals, deferred_quals);
1929  if (short_circuited) {
1930  VLOG(1) << "Prioritized " << std::to_string(primary_quals.size()) << " quals, "
1931  << "short-circuited and deferred " << std::to_string(deferred_quals.size())
1932  << " quals";
1933  }
1934  llvm::Value* filter_lv = cgen_state_->llBool(true);
1935  CodeGenerator code_generator(this);
1936  for (auto expr : primary_quals) {
1937  // Generate the filter for primary quals
1938  auto cond = code_generator.toBool(code_generator.codegen(expr, true, co).front());
1939  filter_lv = cgen_state_->ir_builder_.CreateAnd(filter_lv, cond);
1940  }
1941  CHECK(filter_lv->getType()->isIntegerTy(1));
1942  llvm::BasicBlock* sc_false{nullptr};
1943  if (!deferred_quals.empty()) {
1944  auto sc_true = llvm::BasicBlock::Create(
1945  cgen_state_->context_, "sc_true", cgen_state_->row_func_);
1946  sc_false = llvm::BasicBlock::Create(
1947  cgen_state_->context_, "sc_false", cgen_state_->row_func_);
1948  cgen_state_->ir_builder_.CreateCondBr(filter_lv, sc_true, sc_false);
1949  cgen_state_->ir_builder_.SetInsertPoint(sc_false);
1950  if (ra_exe_unit.join_quals.empty()) {
1951  cgen_state_->ir_builder_.CreateRet(cgen_state_->llInt(int32_t(0)));
1952  }
1953  cgen_state_->ir_builder_.SetInsertPoint(sc_true);
1954  filter_lv = cgen_state_->llBool(true);
1955  }
1956  for (auto expr : deferred_quals) {
1957  filter_lv = cgen_state_->ir_builder_.CreateAnd(
1958  filter_lv, code_generator.toBool(code_generator.codegen(expr, true, co).front()));
1959  }
1960 
1961  CHECK(filter_lv->getType()->isIntegerTy(1));
1962  return group_by_and_aggregate.codegen(filter_lv, sc_false, query_mem_desc, co);
1963 }
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:917
static bool prioritizeQuals(const RelAlgExecutionUnit &ra_exe_unit, std::vector< Analyzer::Expr * > &primary_quals, std::vector< Analyzer::Expr * > &deferred_quals)
Definition: LogicalIR.cpp:157
std::string to_string(char const *&&v)
CHECK(cgen_state)
bool codegen(llvm::Value *filter_result, llvm::BasicBlock *sc_false, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co)
const JoinQualsPerNestingLevel join_quals
#define VLOG(n)
Definition: Logger.h:291
std::tuple< Executor::CompilationResult, std::unique_ptr< QueryMemoryDescriptor > > Executor::compileWorkUnit ( const std::vector< InputTableInfo > &  query_infos,
const RelAlgExecutionUnit ra_exe_unit,
const CompilationOptions co,
const ExecutionOptions eo,
const CudaMgr_Namespace::CudaMgr cuda_mgr,
const bool  allow_lazy_fetch,
std::shared_ptr< RowSetMemoryOwner row_set_mem_owner,
const size_t  max_groups_buffer_entry_count,
const int8_t  crt_min_byte_width,
const bool  has_cardinality_estimation,
ColumnCacheMap column_cache,
RenderInfo render_info = nullptr 
)
private

Definition at line 1633 of file NativeCodegen.cpp.

References run_benchmark_import::args, and get_arg_by_name().

1644  {
1645  auto timer = DEBUG_TIMER(__func__);
1646  nukeOldState(allow_lazy_fetch, query_infos, &ra_exe_unit);
1647 
1648  GroupByAndAggregate group_by_and_aggregate(
1649  this, co.device_type, ra_exe_unit, query_infos, row_set_mem_owner);
1650  auto query_mem_desc =
1651  group_by_and_aggregate.initQueryMemoryDescriptor(eo.allow_multifrag,
1652  max_groups_buffer_entry_guess,
1653  crt_min_byte_width,
1654  render_info,
1656 
1657  if (query_mem_desc->getQueryDescriptionType() ==
1659  !has_cardinality_estimation &&
1660  (!render_info || !render_info->isPotentialInSituRender()) && !eo.just_explain) {
1662  }
1663 
1664  const bool output_columnar = query_mem_desc->didOutputColumnar();
1665 
1667  const size_t num_count_distinct_descs =
1668  query_mem_desc->getCountDistinctDescriptorsSize();
1669  for (size_t i = 0; i < num_count_distinct_descs; i++) {
1670  const auto& count_distinct_descriptor =
1671  query_mem_desc->getCountDistinctDescriptor(i);
1672  if (count_distinct_descriptor.impl_type_ == CountDistinctImplType::StdSet ||
1673  (count_distinct_descriptor.impl_type_ != CountDistinctImplType::Invalid &&
1674  !co.hoist_literals)) {
1675  throw QueryMustRunOnCpu();
1676  }
1677  }
1678  }
1679 
1680  // Read the module template and target either CPU or GPU
1681  // by binding the stream position functions to the right implementation:
1682  // stride access for GPU, contiguous for CPU
1683  auto rt_module_copy = llvm::CloneModule(
1684 #if LLVM_VERSION_MAJOR >= 7
1685  *g_rt_module.get(),
1686 #else
1687  g_rt_module.get(),
1688 #endif
1689  cgen_state_->vmap_,
1690  [](const llvm::GlobalValue* gv) {
1691  auto func = llvm::dyn_cast<llvm::Function>(gv);
1692  if (!func) {
1693  return true;
1694  }
1695  return (func->getLinkage() == llvm::GlobalValue::LinkageTypes::PrivateLinkage ||
1696  func->getLinkage() == llvm::GlobalValue::LinkageTypes::InternalLinkage ||
1698  });
1699 
1701  if (is_udf_module_present(true)) {
1703  }
1704  if (is_rt_udf_module_present(true)) {
1706  rt_udf_cpu_module, *rt_module_copy, cgen_state_.get());
1707  }
1708  } else {
1709  rt_module_copy->setDataLayout(get_gpu_data_layout());
1710  rt_module_copy->setTargetTriple(get_gpu_target_triple_string());
1711 
1712  if (is_udf_module_present()) {
1713  llvm::Triple gpu_triple(udf_gpu_module->getTargetTriple());
1714 
1715  if (!gpu_triple.isNVPTX()) {
1716  throw QueryMustRunOnCpu();
1717  }
1718 
1720  }
1721  if (is_rt_udf_module_present()) {
1723  rt_udf_gpu_module, *rt_module_copy, cgen_state_.get());
1724  }
1725  }
1726 
1727  cgen_state_->module_ = rt_module_copy.release();
1728 
1729  auto agg_fnames =
1730  get_agg_fnames(ra_exe_unit.target_exprs, !ra_exe_unit.groupby_exprs.empty());
1731 
1732  const auto agg_slot_count = ra_exe_unit.estimator ? size_t(1) : agg_fnames.size();
1733 
1734  const bool is_group_by{query_mem_desc->isGroupBy()};
1735  auto query_func = is_group_by ? query_group_by_template(cgen_state_->module_,
1736  co.hoist_literals,
1737  *query_mem_desc,
1738  co.device_type,
1739  ra_exe_unit.scan_limit)
1741  agg_slot_count,
1742  co.hoist_literals,
1743  !!ra_exe_unit.estimator);
1744  bind_pos_placeholders("pos_start", true, query_func, cgen_state_->module_);
1745  bind_pos_placeholders("group_buff_idx", false, query_func, cgen_state_->module_);
1746  bind_pos_placeholders("pos_step", false, query_func, cgen_state_->module_);
1747 
1748  cgen_state_->query_func_ = query_func;
1749  cgen_state_->query_func_entry_ir_builder_.SetInsertPoint(
1750  &query_func->getEntryBlock().front());
1751 
1752  std::vector<llvm::Value*> col_heads;
1753  std::tie(cgen_state_->row_func_, col_heads) =
1754  create_row_function(ra_exe_unit.input_col_descs.size(),
1755  is_group_by ? 0 : agg_slot_count,
1756  co.hoist_literals,
1757  query_func,
1758  cgen_state_->module_,
1759  cgen_state_->context_);
1760  CHECK(cgen_state_->row_func_);
1761  // make sure it's in-lined, we don't want register spills in the inner loop
1763  auto bb =
1764  llvm::BasicBlock::Create(cgen_state_->context_, "entry", cgen_state_->row_func_);
1765  cgen_state_->ir_builder_.SetInsertPoint(bb);
1766  preloadFragOffsets(ra_exe_unit.input_descs, query_infos);
1767  RelAlgExecutionUnit body_execution_unit = ra_exe_unit;
1768  const auto join_loops =
1769  buildJoinLoops(body_execution_unit, co, eo, query_infos, column_cache);
1770  plan_state_->allocateLocalColumnIds(ra_exe_unit.input_col_descs);
1771  const auto is_not_deleted_bb = codegenSkipDeletedOuterTableRow(ra_exe_unit, co);
1772  if (is_not_deleted_bb) {
1773  bb = is_not_deleted_bb;
1774  }
1775  if (!join_loops.empty()) {
1776  codegenJoinLoops(join_loops,
1777  body_execution_unit,
1778  group_by_and_aggregate,
1779  query_func,
1780  bb,
1781  *(query_mem_desc.get()),
1782  co,
1783  eo);
1784  } else {
1785  const bool can_return_error =
1786  compileBody(ra_exe_unit, group_by_and_aggregate, *query_mem_desc, co);
1787  if (can_return_error || cgen_state_->needs_error_check_ || eo.with_dynamic_watchdog ||
1789  createErrorCheckControlFlow(query_func,
1792  co.device_type);
1793  }
1794  }
1795  std::vector<llvm::Value*> hoisted_literals;
1796 
1797  if (co.hoist_literals) {
1798  VLOG(1) << "number of hoisted literals: "
1799  << cgen_state_->query_func_literal_loads_.size()
1800  << " / literal buffer usage: " << cgen_state_->getLiteralBufferUsage(0)
1801  << " bytes";
1802  }
1803 
1804  if (co.hoist_literals && !cgen_state_->query_func_literal_loads_.empty()) {
1805  // we have some hoisted literals...
1806  hoisted_literals = inlineHoistedLiterals();
1807  }
1808  // iterate through all the instruction in the query template function and
1809  // replace the call to the filter placeholder with the call to the actual filter
1810  for (auto it = llvm::inst_begin(query_func), e = llvm::inst_end(query_func); it != e;
1811  ++it) {
1812  if (!llvm::isa<llvm::CallInst>(*it)) {
1813  continue;
1814  }
1815  auto& filter_call = llvm::cast<llvm::CallInst>(*it);
1816  if (std::string(filter_call.getCalledFunction()->getName()) == "row_process") {
1817  std::vector<llvm::Value*> args;
1818  for (size_t i = 0; i < filter_call.getNumArgOperands(); ++i) {
1819  args.push_back(filter_call.getArgOperand(i));
1820  }
1821  args.insert(args.end(), col_heads.begin(), col_heads.end());
1822  args.push_back(get_arg_by_name(query_func, "join_hash_tables"));
1823  // push hoisted literals arguments, if any
1824  args.insert(args.end(), hoisted_literals.begin(), hoisted_literals.end());
1825 
1826  llvm::ReplaceInstWithInst(&filter_call,
1827  llvm::CallInst::Create(cgen_state_->row_func_, args, ""));
1828  break;
1829  }
1830  }
1831  plan_state_->init_agg_vals_ =
1832  init_agg_val_vec(ra_exe_unit.target_exprs, ra_exe_unit.quals, *query_mem_desc);
1833 
1834  auto multifrag_query_func = cgen_state_->module_->getFunction(
1835  "multifrag_query" + std::string(co.hoist_literals ? "_hoisted_literals" : ""));
1836  CHECK(multifrag_query_func);
1837 
1838  bind_query(query_func,
1839  "query_stub" + std::string(co.hoist_literals ? "_hoisted_literals" : ""),
1840  multifrag_query_func,
1841  cgen_state_->module_);
1842 
1843  auto live_funcs =
1845  {query_func, cgen_state_->row_func_},
1846  {multifrag_query_func});
1847 
1848  std::string llvm_ir;
1849  if (eo.just_explain) {
1851 #ifdef WITH_JIT_DEBUG
1852  throw std::runtime_error(
1853  "Explain optimized not available when JIT runtime debug symbols are enabled");
1854 #else
1855  optimize_ir(query_func, cgen_state_->module_, live_funcs, co);
1856 #endif // WITH_JIT_DEBUG
1857  }
1858  llvm_ir =
1859  serialize_llvm_object(query_func) + serialize_llvm_object(cgen_state_->row_func_);
1860  }
1861  verify_function_ir(cgen_state_->row_func_);
1862 
1863  LOG(IR) << query_mem_desc->toString() << "\nGenerated IR\n"
1864  << serialize_llvm_object(query_func)
1865  << serialize_llvm_object(cgen_state_->row_func_) << "\nEnd of IR";
1866 
1867  return std::make_tuple(
1870  ? optimizeAndCodegenCPU(query_func, multifrag_query_func, live_funcs, co)
1871  : optimizeAndCodegenGPU(query_func,
1872  multifrag_query_func,
1873  live_funcs,
1874  is_group_by || ra_exe_unit.estimator,
1875  cuda_mgr,
1876  co),
1877  cgen_state_->getLiterals(),
1878  output_columnar,
1879  llvm_ir},
1880  std::move(query_mem_desc));
1881 }
std::vector< Analyzer::Expr * > target_exprs
std::unique_ptr< llvm::Module > rt_udf_cpu_module
void codegenJoinLoops(const std::vector< JoinLoop > &join_loops, const RelAlgExecutionUnit &ra_exe_unit, GroupByAndAggregate &group_by_and_aggregate, llvm::Function *query_func, llvm::BasicBlock *entry_bb, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const ExecutionOptions &eo)
Definition: IRCodegen.cpp:466
std::unique_ptr< llvm::Module > udf_gpu_module
#define LOG(tag)
Definition: Logger.h:188
std::unique_ptr< llvm::Module > rt_udf_gpu_module
void mark_function_always_inline(llvm::Function *func)
llvm::StringRef get_gpu_data_layout()
bool is_udf_module_present(bool cpu_only=false)
std::vector< int64_t > init_agg_val_vec(const std::vector< TargetInfo > &targets, const QueryMemoryDescriptor &query_mem_desc)
llvm::Function * query_template(llvm::Module *module, const size_t aggr_col_count, const bool hoist_literals, const bool is_estimate_query)
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:917
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
const std::vector< InputDescriptor > input_descs
std::vector< std::string > get_agg_fnames(const std::vector< Analyzer::Expr * > &target_exprs, const bool is_group_by)
std::vector< std::pair< void *, void * > > optimizeAndCodegenCPU(llvm::Function *, llvm::Function *, const std::unordered_set< llvm::Function * > &, const CompilationOptions &)
void preloadFragOffsets(const std::vector< InputDescriptor > &input_descs, const std::vector< InputTableInfo > &query_infos)
Definition: Execute.cpp:2547
llvm::StringRef get_gpu_target_triple_string()
void verify_function_ir(const llvm::Function *func)
const bool allow_multifrag
CHECK(cgen_state)
llvm::Value * get_arg_by_name(llvm::Function *func, const std::string &name)
Definition: Execute.h:117
static std::unordered_set< llvm::Function * > markDeadRuntimeFuncs(llvm::Module &module, const std::vector< llvm::Function * > &roots, const std::vector< llvm::Function * > &leaves)
bool compileBody(const RelAlgExecutionUnit &ra_exe_unit, GroupByAndAggregate &group_by_and_aggregate, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co)
const bool with_dynamic_watchdog
std::unique_ptr< llvm::Module > g_rt_module
ExecutorExplainType explain_type
std::unique_ptr< PlanState > plan_state_
Definition: Execute.h:932
void nukeOldState(const bool allow_lazy_fetch, const std::vector< InputTableInfo > &query_infos, const RelAlgExecutionUnit *ra_exe_unit)
Definition: Execute.cpp:2533
llvm::Function * query_group_by_template(llvm::Module *module, const bool hoist_literals, const QueryMemoryDescriptor &query_mem_desc, const ExecutorDeviceType device_type, const bool check_scan_limit)
static void link_udf_module(const std::unique_ptr< llvm::Module > &udf_module, llvm::Module &module, CgenState *cgen_state, llvm::Linker::Flags flags=llvm::Linker::Flags::None)
const std::shared_ptr< Analyzer::Estimator > estimator
ExecutorDeviceType device_type
llvm::BasicBlock * codegenSkipDeletedOuterTableRow(const RelAlgExecutionUnit &ra_exe_unit, const CompilationOptions &co)
std::string serialize_llvm_object(const T *llvm_obj)
static bool alwaysCloneRuntimeFunction(const llvm::Function *func)
void createErrorCheckControlFlow(llvm::Function *query_func, bool run_with_dynamic_watchdog, bool run_with_allowing_runtime_interrupt, ExecutorDeviceType device_type)
std::unique_ptr< llvm::Module > udf_cpu_module
void bind_pos_placeholders(const std::string &pos_fn_name, const bool use_resume_param, llvm::Function *query_func, llvm::Module *module)
std::list< std::shared_ptr< Analyzer::Expr > > quals
bool isPotentialInSituRender() const
Definition: RenderInfo.cpp:61
#define DEBUG_TIMER(name)
Definition: Logger.h:313
std::vector< std::pair< void *, void * > > optimizeAndCodegenGPU(llvm::Function *, llvm::Function *, std::unordered_set< llvm::Function * > &, const bool no_inline, const CudaMgr_Namespace::CudaMgr *cuda_mgr, const CompilationOptions &)
std::vector< llvm::Value * > inlineHoistedLiterals()
std::list< std::shared_ptr< const InputColDescriptor > > input_col_descs
std::vector< JoinLoop > buildJoinLoops(RelAlgExecutionUnit &ra_exe_unit, const CompilationOptions &co, const ExecutionOptions &eo, const std::vector< InputTableInfo > &query_infos, ColumnCacheMap &column_cache)
Definition: IRCodegen.cpp:222
const bool allow_runtime_query_interrupt
void optimize_ir(llvm::Function *query_func, llvm::Module *module, const std::unordered_set< llvm::Function * > &live_funcs, const CompilationOptions &co)
cgen_state module_
bool is_rt_udf_module_present(bool cpu_only=false)
#define VLOG(n)
Definition: Logger.h:291
std::pair< llvm::Function *, std::vector< llvm::Value * > > create_row_function(const size_t in_col_count, const size_t agg_col_count, const bool hoist_literals, llvm::Function *query_func, llvm::Module *module, llvm::LLVMContext &context)
void bind_query(llvm::Function *query_func, const std::string &query_fname, llvm::Function *multifrag_query_func, llvm::Module *module)

+ Here is the call graph for this function:

AggregatedColRange Executor::computeColRangesCache ( const std::unordered_set< PhysicalInput > &  phys_inputs)
private

Definition at line 2893 of file Execute.cpp.

References catalog_(), CHECK(), getLeafColumnRange(), AggregatedColRange::setColRange(), and ExpressionRange::typeSupportsRange().

2894  {
2895  AggregatedColRange agg_col_range_cache;
2896  CHECK(catalog_);
2897  std::unordered_set<int> phys_table_ids;
2898  for (const auto& phys_input : phys_inputs) {
2899  phys_table_ids.insert(phys_input.table_id);
2900  }
2901  std::vector<InputTableInfo> query_infos;
2902  for (const int table_id : phys_table_ids) {
2903  query_infos.emplace_back(InputTableInfo{table_id, getTableInfo(table_id)});
2904  }
2905  for (const auto& phys_input : phys_inputs) {
2906  const auto cd =
2907  catalog_->getMetadataForColumn(phys_input.table_id, phys_input.col_id);
2908  CHECK(cd);
2909  if (ExpressionRange::typeSupportsRange(cd->columnType)) {
2910  const auto col_var = boost::make_unique<Analyzer::ColumnVar>(
2911  cd->columnType, phys_input.table_id, phys_input.col_id, 0);
2912  const auto col_range = getLeafColumnRange(col_var.get(), query_infos, this, false);
2913  agg_col_range_cache.setColRange(phys_input, col_range);
2914  }
2915  }
2916  return agg_col_range_cache;
2917 }
Fragmenter_Namespace::TableInfo getTableInfo(const int table_id) const
Definition: Execute.cpp:249
CHECK(cgen_state)
const Catalog_Namespace::Catalog * catalog_
Definition: Execute.h:961
const ColumnDescriptor * getMetadataForColumn(int tableId, const std::string &colName) const
ExpressionRange getLeafColumnRange(const Analyzer::ColumnVar *col_expr, const std::vector< InputTableInfo > &query_infos, const Executor *executor, const bool is_outer_join_proj)
void setColRange(const PhysicalInput &, const ExpressionRange &)
static bool typeSupportsRange(const SQLTypeInfo &ti)

+ Here is the call graph for this function:

StringDictionaryGenerations Executor::computeStringDictionaryGenerations ( const std::unordered_set< PhysicalInput > &  phys_inputs)
private

Definition at line 2919 of file Execute.cpp.

References catalog_(), CHECK(), kENCODING_DICT, and StringDictionaryGenerations::setGeneration().

2920  {
2921  StringDictionaryGenerations string_dictionary_generations;
2922  CHECK(catalog_);
2923  for (const auto& phys_input : phys_inputs) {
2924  const auto cd =
2925  catalog_->getMetadataForColumn(phys_input.table_id, phys_input.col_id);
2926  CHECK(cd);
2927  const auto& col_ti =
2928  cd->columnType.is_array() ? cd->columnType.get_elem_type() : cd->columnType;
2929  if (col_ti.is_string() && col_ti.get_compression() == kENCODING_DICT) {
2930  const int dict_id = col_ti.get_comp_param();
2931  const auto dd = catalog_->getMetadataForDict(dict_id);
2932  CHECK(dd && dd->stringDict);
2933  string_dictionary_generations.setGeneration(dict_id,
2934  dd->stringDict->storageEntryCount());
2935  }
2936  }
2937  return string_dictionary_generations;
2938 }
CHECK(cgen_state)
const Catalog_Namespace::Catalog * catalog_
Definition: Execute.h:961
const ColumnDescriptor * getMetadataForColumn(int tableId, const std::string &colName) const
void setGeneration(const uint32_t id, const size_t generation)
const DictDescriptor * getMetadataForDict(int dict_ref, bool loadDict=true) const
Definition: Catalog.cpp:1444

+ Here is the call graph for this function:

TableGenerations Executor::computeTableGenerations ( std::unordered_set< int >  phys_table_ids)
private

Definition at line 2940 of file Execute.cpp.

References TableGenerations::setGeneration().

2941  {
2942  TableGenerations table_generations;
2943  for (const int table_id : phys_table_ids) {
2944  const auto table_info = getTableInfo(table_id);
2945  table_generations.setGeneration(
2946  table_id, TableGeneration{table_info.getPhysicalNumTuples(), 0});
2947  }
2948  return table_generations;
2949 }
Fragmenter_Namespace::TableInfo getTableInfo(const int table_id) const
Definition: Execute.cpp:249
void setGeneration(const uint32_t id, const TableGeneration &generation)

+ Here is the call graph for this function:

bool Executor::containsLeftDeepOuterJoin ( ) const
inline

Definition at line 375 of file Execute.h.

References cgen_state_.

375  {
376  return cgen_state_->contains_left_deep_outer_join_;
377  }
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:917
void Executor::createErrorCheckControlFlow ( llvm::Function *  query_func,
bool  run_with_dynamic_watchdog,
bool  run_with_allowing_runtime_interrupt,
ExecutorDeviceType  device_type 
)
private

Definition at line 1336 of file NativeCodegen.cpp.

1339  {
1340  // check whether the row processing was successful; currently, it can
1341  // fail by running out of group by buffer slots
1342 
1343  if (run_with_dynamic_watchdog && run_with_allowing_runtime_interrupt) {
1344  // when both dynamic watchdog and runtime interrupt turns on
1345  // we use dynamic watchdog
1346  run_with_allowing_runtime_interrupt = false;
1347  }
1348 
1349  llvm::Value* row_count = nullptr;
1350  if ((run_with_dynamic_watchdog || run_with_allowing_runtime_interrupt) &&
1351  device_type == ExecutorDeviceType::GPU) {
1352  row_count =
1353  find_variable_in_basic_block<llvm::LoadInst>(query_func, ".entry", "row_count");
1354  }
1355 
1356  bool done_splitting = false;
1357  for (auto bb_it = query_func->begin(); bb_it != query_func->end() && !done_splitting;
1358  ++bb_it) {
1359  llvm::Value* pos = nullptr;
1360  for (auto inst_it = bb_it->begin(); inst_it != bb_it->end(); ++inst_it) {
1361  if ((run_with_dynamic_watchdog || run_with_allowing_runtime_interrupt) &&
1362  llvm::isa<llvm::PHINode>(*inst_it)) {
1363  if (inst_it->getName() == "pos") {
1364  pos = &*inst_it;
1365  }
1366  continue;
1367  }
1368  if (!llvm::isa<llvm::CallInst>(*inst_it)) {
1369  continue;
1370  }
1371  auto& filter_call = llvm::cast<llvm::CallInst>(*inst_it);
1372  if (std::string(filter_call.getCalledFunction()->getName()) == "row_process") {
1373  auto next_inst_it = inst_it;
1374  ++next_inst_it;
1375  auto new_bb = bb_it->splitBasicBlock(next_inst_it);
1376  auto& br_instr = bb_it->back();
1377  llvm::IRBuilder<> ir_builder(&br_instr);
1378  llvm::Value* err_lv = &*inst_it;
1379  if (run_with_dynamic_watchdog) {
1380  CHECK(pos);
1381  llvm::Value* call_watchdog_lv = nullptr;
1382  if (device_type == ExecutorDeviceType::GPU) {
1383  // In order to make sure all threads within a block see the same barrier,
1384  // only those blocks whose none of their threads have experienced the critical
1385  // edge will go through the dynamic watchdog computation
1386  CHECK(row_count);
1387  auto crit_edge_rem =
1388  (blockSize() & (blockSize() - 1))
1389  ? ir_builder.CreateSRem(
1390  row_count,
1391  cgen_state_->llInt(static_cast<int64_t>(blockSize())))
1392  : ir_builder.CreateAnd(
1393  row_count,
1394  cgen_state_->llInt(static_cast<int64_t>(blockSize() - 1)));
1395  auto crit_edge_threshold = ir_builder.CreateSub(row_count, crit_edge_rem);
1396  crit_edge_threshold->setName("crit_edge_threshold");
1397 
1398  // only those threads where pos < crit_edge_threshold go through dynamic
1399  // watchdog call
1400  call_watchdog_lv =
1401  ir_builder.CreateICmp(llvm::ICmpInst::ICMP_SLT, pos, crit_edge_threshold);
1402  } else {
1403  // CPU path: run watchdog for every 64th row
1404  auto dw_predicate = ir_builder.CreateAnd(pos, uint64_t(0x3f));
1405  call_watchdog_lv = ir_builder.CreateICmp(
1406  llvm::ICmpInst::ICMP_EQ, dw_predicate, cgen_state_->llInt(int64_t(0LL)));
1407  }
1408  CHECK(call_watchdog_lv);
1409  auto error_check_bb = bb_it->splitBasicBlock(
1410  llvm::BasicBlock::iterator(br_instr), ".error_check");
1411  auto& watchdog_br_instr = bb_it->back();
1412 
1413  auto watchdog_check_bb = llvm::BasicBlock::Create(
1414  cgen_state_->context_, ".watchdog_check", query_func, error_check_bb);
1415  llvm::IRBuilder<> watchdog_ir_builder(watchdog_check_bb);
1416  auto detected_timeout = watchdog_ir_builder.CreateCall(
1417  cgen_state_->module_->getFunction("dynamic_watchdog"), {});
1418  auto timeout_err_lv = watchdog_ir_builder.CreateSelect(
1419  detected_timeout, cgen_state_->llInt(Executor::ERR_OUT_OF_TIME), err_lv);
1420  watchdog_ir_builder.CreateBr(error_check_bb);
1421 
1422  llvm::ReplaceInstWithInst(
1423  &watchdog_br_instr,
1424  llvm::BranchInst::Create(
1425  watchdog_check_bb, error_check_bb, call_watchdog_lv));
1426  ir_builder.SetInsertPoint(&br_instr);
1427  auto unified_err_lv = ir_builder.CreatePHI(err_lv->getType(), 2);
1428 
1429  unified_err_lv->addIncoming(timeout_err_lv, watchdog_check_bb);
1430  unified_err_lv->addIncoming(err_lv, &*bb_it);
1431  err_lv = unified_err_lv;
1432  } else if (run_with_allowing_runtime_interrupt) {
1433  CHECK(pos);
1434  llvm::Value* call_check_interrupt_lv = nullptr;
1435  if (device_type == ExecutorDeviceType::GPU) {
1436  // approximate how many times the %pos variable
1437  // is increased --> the number of iteration
1438  int32_t num_shift_by_gridDim = getExpOfTwo(gridSize());
1439  int32_t num_shift_by_blockDim = getExpOfTwo(blockSize());
1440  if (!isPowOfTwo(gridSize())) {
1441  num_shift_by_gridDim++;
1442  }
1443  if (!isPowOfTwo(blockSize())) {
1444  num_shift_by_blockDim++;
1445  }
1446  int total_num_shift = num_shift_by_gridDim + num_shift_by_blockDim;
1447  // check the interrupt flag for every 64th iteration
1448  llvm::Value* pos_shifted_per_iteration =
1449  ir_builder.CreateLShr(pos, cgen_state_->llInt(total_num_shift));
1450  auto interrupt_predicate =
1451  ir_builder.CreateAnd(pos_shifted_per_iteration, uint64_t(0x3f));
1452  call_check_interrupt_lv =
1453  ir_builder.CreateICmp(llvm::ICmpInst::ICMP_EQ,
1454  interrupt_predicate,
1455  cgen_state_->llInt(int64_t(0LL)));
1456  } else {
1457  // CPU path: run interrupt checker for every 64th row
1458  auto interrupt_predicate = ir_builder.CreateAnd(pos, uint64_t(0x3f));
1459  call_check_interrupt_lv =
1460  ir_builder.CreateICmp(llvm::ICmpInst::ICMP_EQ,
1461  interrupt_predicate,
1462  cgen_state_->llInt(int64_t(0LL)));
1463  }
1464  CHECK(call_check_interrupt_lv);
1465  auto error_check_bb = bb_it->splitBasicBlock(
1466  llvm::BasicBlock::iterator(br_instr), ".error_check");
1467  auto& check_interrupt_br_instr = bb_it->back();
1468 
1469  auto interrupt_check_bb = llvm::BasicBlock::Create(
1470  cgen_state_->context_, ".interrupt_check", query_func, error_check_bb);
1471  llvm::IRBuilder<> interrupt_checker_ir_builder(interrupt_check_bb);
1472  auto detected_interrupt = interrupt_checker_ir_builder.CreateCall(
1473  cgen_state_->module_->getFunction("check_interrupt"), {});
1474  auto interrupt_err_lv = interrupt_checker_ir_builder.CreateSelect(
1475  detected_interrupt, cgen_state_->llInt(Executor::ERR_INTERRUPTED), err_lv);
1476  interrupt_checker_ir_builder.CreateBr(error_check_bb);
1477 
1478  llvm::ReplaceInstWithInst(
1479  &check_interrupt_br_instr,
1480  llvm::BranchInst::Create(
1481  interrupt_check_bb, error_check_bb, call_check_interrupt_lv));
1482  ir_builder.SetInsertPoint(&br_instr);
1483  auto unified_err_lv = ir_builder.CreatePHI(err_lv->getType(), 2);
1484 
1485  unified_err_lv->addIncoming(interrupt_err_lv, interrupt_check_bb);
1486  unified_err_lv->addIncoming(err_lv, &*bb_it);
1487  err_lv = unified_err_lv;
1488  }
1489  const auto error_code_arg = get_arg_by_name(query_func, "error_code");
1490  err_lv =
1491  ir_builder.CreateCall(cgen_state_->module_->getFunction("record_error_code"),
1492  std::vector<llvm::Value*>{err_lv, error_code_arg});
1493  if (device_type == ExecutorDeviceType::GPU) {
1494  // let kernel execution finish as expected, regardless of the observed error,
1495  // unless it is from the dynamic watchdog where all threads within that block
1496  // return together.
1497  if (run_with_allowing_runtime_interrupt) {
1498  err_lv = ir_builder.CreateICmp(llvm::ICmpInst::ICMP_EQ,
1499  err_lv,
1501  } else {
1502  err_lv = ir_builder.CreateICmp(llvm::ICmpInst::ICMP_EQ,
1503  err_lv,
1505  }
1506 
1507  } else {
1508  err_lv = ir_builder.CreateICmp(llvm::ICmpInst::ICMP_NE,
1509  err_lv,
1510  cgen_state_->llInt(static_cast<int32_t>(0)));
1511  }
1512  auto error_bb = llvm::BasicBlock::Create(
1513  cgen_state_->context_, ".error_exit", query_func, new_bb);
1514  llvm::ReturnInst::Create(cgen_state_->context_, error_bb);
1515  llvm::ReplaceInstWithInst(&br_instr,
1516  llvm::BranchInst::Create(error_bb, new_bb, err_lv));
1517  done_splitting = true;
1518  break;
1519  }
1520  }
1521  }
1522  CHECK(done_splitting);
1523 }
static const int32_t ERR_INTERRUPTED
Definition: Execute.h:988
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:917
unsigned getExpOfTwo(unsigned n)
Definition: MathUtils.h:24
bool isPowOfTwo(unsigned n)
Definition: MathUtils.h:20
CHECK(cgen_state)
llvm::Value * get_arg_by_name(llvm::Function *func, const std::string &name)
Definition: Execute.h:117
static const int32_t ERR_OUT_OF_TIME
Definition: Execute.h:987
unsigned gridSize() const
Definition: Execute.cpp:2605
unsigned blockSize() const
Definition: Execute.cpp:2613
int Executor::deviceCount ( const ExecutorDeviceType  device_type) const
private

Definition at line 597 of file Execute.cpp.

References catalog_(), CHECK(), and GPU.

597  {
598  if (device_type == ExecutorDeviceType::GPU) {
599  const auto cuda_mgr = catalog_->getDataMgr().getCudaMgr();
600  CHECK(cuda_mgr);
601  return cuda_mgr->getDeviceCount();
602  } else {
603  return 1;
604  }
605 }
CudaMgr_Namespace::CudaMgr * getCudaMgr() const
Definition: DataMgr.h:149
Data_Namespace::DataMgr & getDataMgr() const
Definition: Catalog.h:183
CHECK(cgen_state)
const Catalog_Namespace::Catalog * catalog_
Definition: Execute.h:961

+ Here is the call graph for this function:

int Executor::deviceCountForMemoryLevel ( const Data_Namespace::MemoryLevel  memory_level) const
private

Definition at line 607 of file Execute.cpp.

References CPU, GPU, and Data_Namespace::GPU_LEVEL.

608  {
609  return memory_level == GPU_LEVEL ? deviceCount(ExecutorDeviceType::GPU)
610  : deviceCount(ExecutorDeviceType::CPU);
611 }
ExecutorDeviceType
int deviceCount(const ExecutorDeviceType) const
Definition: Execute.cpp:597
int64_t Executor::deviceCycles ( int  milliseconds) const
private

Definition at line 2621 of file Execute.cpp.

References catalog_(), and CHECK().

2621  {
2622  CHECK(catalog_);
2623  const auto cuda_mgr = catalog_->getDataMgr().getCudaMgr();
2624  CHECK(cuda_mgr);
2625  const auto& dev_props = cuda_mgr->getAllDeviceProperties();
2626  return static_cast<int64_t>(dev_props.front().clockKhz) * milliseconds;
2627 }
CudaMgr_Namespace::CudaMgr * getCudaMgr() const
Definition: DataMgr.h:149
Data_Namespace::DataMgr & getDataMgr() const
Definition: Catalog.h:183
CHECK(cgen_state)
const Catalog_Namespace::Catalog * catalog_
Definition: Execute.h:961

+ Here is the call graph for this function:

template<typename THREAD_POOL >
void Executor::dispatchFragments ( const std::function< void(const ExecutorDeviceType chosen_device_type, int chosen_device_id, const QueryCompilationDescriptor &query_comp_desc, const QueryMemoryDescriptor &query_mem_desc, const FragmentsList &frag_list, const ExecutorDispatchMode kernel_dispatch_mode, const int64_t rowid_lookup_key)>  dispatch,
const ExecutionDispatch execution_dispatch,
const std::vector< InputTableInfo > &  table_infos,
const ExecutionOptions eo,
const bool  is_agg,
const bool  allow_single_frag_table_opt,
const size_t  context_count,
const QueryCompilationDescriptor query_comp_desc,
const QueryMemoryDescriptor query_mem_desc,
QueryFragmentDescriptor fragment_descriptor,
std::unordered_set< int > &  available_gpus,
int &  available_cpus 
)
private
template<typename THREAD_POOL >
void Executor::dispatchFragments ( const std::function< void(const ExecutorDeviceType chosen_device_type, int chosen_device_id, const QueryCompilationDescriptor &query_comp_desc, const QueryMemoryDescriptor &query_mem_desc, const FragmentsList &frag_list, const ExecutorDispatchMode kernel_dispatch_mode, const int64_t rowid_lookup_key)>  dispatch,
const ExecutionDispatch execution_dispatch,
const std::vector< InputTableInfo > &  table_infos,
const ExecutionOptions eo,
const bool  is_agg,
const bool  allow_single_frag_table_opt,
const size_t  context_count,
const QueryCompilationDescriptor query_comp_desc,
const QueryMemoryDescriptor query_mem_desc,
QueryFragmentDescriptor fragment_descriptor,
std::unordered_set< int > &  available_gpus,
int &  available_cpus 
)

Definition at line 1748 of file Execute.cpp.

References ExecutionOptions::allow_multifrag, QueryFragmentDescriptor::assignFragsToKernelDispatch(), QueryFragmentDescriptor::assignFragsToMultiDispatch(), QueryFragmentDescriptor::buildFragmentKernelMap(), catalog_(), CHECK(), CHECK_GE, CHECK_GT, anonymous_namespace{Execute.cpp}::checkWorkUnitWatchdog(), g_inner_join_fragment_skipping, QueryCompilationDescriptor::getDeviceType(), QueryMemoryDescriptor::getEntryCount(), Executor::ExecutionDispatch::getExecutionUnit(), Executor::ExecutionDispatch::getFragOffsets(), QueryMemoryDescriptor::getQueryDescriptionType(), GPU, anonymous_namespace{Execute.cpp}::has_lazy_fetched_columns(), logger::INFO, KernelPerFragment, LOG, MultifragmentKernel, Projection, query_mem_desc, QueryFragmentDescriptor::shouldCheckWorkUnitWatchdog(), QueryMemoryDescriptor::toString(), VLOG, and ExecutionOptions::with_watchdog.

1766  {
1767  THREAD_POOL query_threads;
1768  const auto& ra_exe_unit = execution_dispatch.getExecutionUnit();
1769  CHECK(!ra_exe_unit.input_descs.empty());
1770 
1771  const auto device_type = query_comp_desc.getDeviceType();
1772  const bool uses_lazy_fetch =
1773  plan_state_->allow_lazy_fetch_ &&
1774  has_lazy_fetched_columns(getColLazyFetchInfo(ra_exe_unit.target_exprs));
1775  const bool use_multifrag_kernel = (device_type == ExecutorDeviceType::GPU) &&
1776  eo.allow_multifrag && (!uses_lazy_fetch || is_agg);
1777 
1778  const auto device_count = deviceCount(device_type);
1779  CHECK_GT(device_count, 0);
1780 
1781  fragment_descriptor.buildFragmentKernelMap(ra_exe_unit,
1782  execution_dispatch.getFragOffsets(),
1783  device_count,
1784  device_type,
1785  use_multifrag_kernel,
1787  this);
1788  if (eo.with_watchdog && fragment_descriptor.shouldCheckWorkUnitWatchdog()) {
1789  checkWorkUnitWatchdog(ra_exe_unit, table_infos, *catalog_, device_type, device_count);
1790  }
1791 
1792  if (use_multifrag_kernel) {
1793  VLOG(1) << "Dispatching multifrag kernels";
1794  VLOG(1) << query_mem_desc.toString();
1795 
1796  // NB: We should never be on this path when the query is retried because of running
1797  // out of group by slots; also, for scan only queries on CPU we want the
1798  // high-granularity, fragment by fragment execution instead. For scan only queries on
1799  // GPU, we want the multifrag kernel path to save the overhead of allocating an output
1800  // buffer per fragment.
1801  auto multifrag_kernel_dispatch =
1802  [&query_threads, &dispatch, query_comp_desc, query_mem_desc](
1803  const int device_id,
1804  const FragmentsList& frag_list,
1805  const int64_t rowid_lookup_key) {
1806  query_threads.append(dispatch,
1808  device_id,
1809  query_comp_desc,
1810  query_mem_desc,
1811  frag_list,
1813  rowid_lookup_key);
1814  };
1815  fragment_descriptor.assignFragsToMultiDispatch(multifrag_kernel_dispatch);
1816  } else {
1817  VLOG(1) << "Dispatching kernel per fragment";
1818  VLOG(1) << query_mem_desc.toString();
1819 
1820  if (!ra_exe_unit.use_bump_allocator && allow_single_frag_table_opt &&
1822  table_infos.size() == 1 && table_infos.front().table_id > 0) {
1823  const auto max_frag_size =
1824  table_infos.front().info.getFragmentNumTuplesUpperBound();
1825  if (max_frag_size < query_mem_desc.getEntryCount()) {
1826  LOG(INFO) << "Lowering scan limit from " << query_mem_desc.getEntryCount()
1827  << " to match max fragment size " << max_frag_size
1828  << " for kernel per fragment execution path.";
1829  throw CompilationRetryNewScanLimit(max_frag_size);
1830  }
1831  }
1832 
1833  size_t frag_list_idx{0};
1834  auto fragment_per_kernel_dispatch = [&query_threads,
1835  &dispatch,
1836  &frag_list_idx,
1837  &device_type,
1838  query_comp_desc,
1839  query_mem_desc](const int device_id,
1840  const FragmentsList& frag_list,
1841  const int64_t rowid_lookup_key) {
1842  if (!frag_list.size()) {
1843  return;
1844  }
1845  CHECK_GE(device_id, 0);
1846 
1847  query_threads.append(dispatch,
1848  device_type,
1849  device_id,
1850  query_comp_desc,
1851  query_mem_desc,
1852  frag_list,
1854  rowid_lookup_key);
1855 
1856  ++frag_list_idx;
1857  };
1858 
1859  fragment_descriptor.assignFragsToKernelDispatch(fragment_per_kernel_dispatch,
1860  ra_exe_unit);
1861  }
1862  query_threads.join();
1863 }
bool is_agg(const Analyzer::Expr *expr)
ExecutorDeviceType getDeviceType() const
std::string toString() const
#define LOG(tag)
Definition: Logger.h:188
std::vector< ColumnLazyFetchInfo > getColLazyFetchInfo(const std::vector< Analyzer::Expr * > &target_exprs) const
Definition: Execute.cpp:287
#define CHECK_GE(x, y)
Definition: Logger.h:210
void assignFragsToKernelDispatch(DISPATCH_FCN f, const RelAlgExecutionUnit &ra_exe_unit) const
int deviceCount(const ExecutorDeviceType) const
Definition: Execute.cpp:597
#define CHECK_GT(x, y)
Definition: Logger.h:209
std::vector< FragmentsPerTable > FragmentsList
bool g_inner_join_fragment_skipping
Definition: Execute.cpp:79
const bool allow_multifrag
CHECK(cgen_state)
const Catalog_Namespace::Catalog * catalog_
Definition: Execute.h:961
std::unique_ptr< PlanState > plan_state_
Definition: Execute.h:932
void checkWorkUnitWatchdog(const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &table_infos, const Catalog_Namespace::Catalog &cat, const ExecutorDeviceType device_type, const int device_count)
Definition: Execute.cpp:1034
QueryDescriptionType getQueryDescriptionType() const
void assignFragsToMultiDispatch(DISPATCH_FCN f) const
void buildFragmentKernelMap(const RelAlgExecutionUnit &ra_exe_unit, const std::vector< uint64_t > &frag_offsets, const int device_count, const ExecutorDeviceType &device_type, const bool enable_multifrag_kernels, const bool enable_inner_join_fragment_skipping, Executor *executor)
bool has_lazy_fetched_columns(const std::vector< ColumnLazyFetchInfo > &fetched_cols)
Definition: Execute.cpp:1736
#define VLOG(n)
Definition: Logger.h:291
const bool with_watchdog

+ Here is the call graph for this function:

ResultSetPtr Executor::executeExplain ( const QueryCompilationDescriptor query_comp_desc)
private

Definition at line 1427 of file Execute.cpp.

References QueryCompilationDescriptor::getIR().

1427  {
1428  return std::make_shared<ResultSet>(query_comp_desc.getIR());
1429 }

+ Here is the call graph for this function:

int32_t Executor::executePlanWithGroupBy ( const RelAlgExecutionUnit ra_exe_unit,
const CompilationResult compilation_result,
const bool  hoist_literals,
ResultSetPtr results,
const ExecutorDeviceType  device_type,
std::vector< std::vector< const int8_t * >> &  col_buffers,
const std::vector< size_t >  outer_tab_frag_ids,
QueryExecutionContext query_exe_context,
const std::vector< std::vector< int64_t >> &  num_rows,
const std::vector< std::vector< uint64_t >> &  frag_offsets,
Data_Namespace::DataMgr data_mgr,
const int  device_id,
const int64_t  limit,
const uint32_t  start_rowid,
const uint32_t  num_tables,
RenderInfo render_info 
)
private

Definition at line 2405 of file Execute.cpp.

References CHECK(), CHECK_NE, anonymous_namespace{Execute.cpp}::check_rows_less_than_needed(), CPU, DEBUG_TIMER, ERR_DIV_BY_ZERO, ERR_INTERRUPTED, ERR_OUT_OF_TIME, ERR_OVERFLOW_OR_UNDERFLOW, ERR_SINGLE_VALUE_FOUND_MULTIPLE_VALUES, error_code, logger::FATAL, g_enable_dynamic_watchdog, g_enable_runtime_query_interrupt, QueryExecutionContext::getRowSet(), GPU, RelAlgExecutionUnit::groupby_exprs, INJECT_TIMER, interrupted_(), QueryExecutionContext::launchCpuCode(), QueryExecutionContext::launchGpuCode(), Executor::CompilationResult::literal_values, LOG, Executor::CompilationResult::native_functions, num_rows, QueryExecutionContext::query_mem_desc_, RenderInfo::render_allocator_map_ptr, and RenderInfo::useCudaBuffers().

2421  {
2422  auto timer = DEBUG_TIMER(__func__);
2424  CHECK(!results);
2425  if (col_buffers.empty()) {
2426  return 0;
2427  }
2428  CHECK_NE(ra_exe_unit.groupby_exprs.size(), size_t(0));
2429  // TODO(alex):
2430  // 1. Optimize size (make keys more compact).
2431  // 2. Resize on overflow.
2432  // 3. Optimize runtime.
2433  auto hoist_buf = serializeLiterals(compilation_result.literal_values, device_id);
2434  int32_t error_code = device_type == ExecutorDeviceType::GPU ? 0 : start_rowid;
2435  const auto join_hash_table_ptrs = getJoinHashTablePtrs(device_type, device_id);
2437  return ERR_INTERRUPTED;
2438  }
2439 
2440  RenderAllocatorMap* render_allocator_map_ptr = nullptr;
2441  if (render_info && render_info->useCudaBuffers()) {
2442  render_allocator_map_ptr = render_info->render_allocator_map_ptr.get();
2443  }
2444  if (device_type == ExecutorDeviceType::CPU) {
2445  query_exe_context->launchCpuCode(ra_exe_unit,
2446  compilation_result.native_functions,
2447  hoist_literals,
2448  hoist_buf,
2449  col_buffers,
2450  num_rows,
2451  frag_offsets,
2452  scan_limit,
2453  &error_code,
2454  num_tables,
2455  join_hash_table_ptrs);
2456  } else {
2457  try {
2458  query_exe_context->launchGpuCode(ra_exe_unit,
2459  compilation_result.native_functions,
2460  hoist_literals,
2461  hoist_buf,
2462  col_buffers,
2463  num_rows,
2464  frag_offsets,
2465  scan_limit,
2466  data_mgr,
2467  blockSize(),
2468  gridSize(),
2469  device_id,
2470  &error_code,
2471  num_tables,
2472  join_hash_table_ptrs,
2473  render_allocator_map_ptr);
2474  } catch (const OutOfMemory&) {
2475  return ERR_OUT_OF_GPU_MEM;
2476  } catch (const OutOfRenderMemory&) {
2477  return ERR_OUT_OF_RENDER_MEM;
2478  } catch (const StreamingTopNNotSupportedInRenderQuery&) {
2480  } catch (const std::bad_alloc&) {
2481  return ERR_SPECULATIVE_TOP_OOM;
2482  } catch (const std::exception& e) {
2483  LOG(FATAL) << "Error launching the GPU kernel: " << e.what();
2484  }
2485  }
2486 
2487  if (error_code == Executor::ERR_OVERFLOW_OR_UNDERFLOW ||
2488  error_code == Executor::ERR_DIV_BY_ZERO ||
2489  error_code == Executor::ERR_OUT_OF_TIME ||
2490  error_code == Executor::ERR_INTERRUPTED ||
2492  return error_code;
2493  }
2494 
2495  if (error_code != Executor::ERR_OVERFLOW_OR_UNDERFLOW &&
2496  error_code != Executor::ERR_DIV_BY_ZERO && !render_allocator_map_ptr) {
2497  results =
2498  query_exe_context->getRowSet(ra_exe_unit, query_exe_context->query_mem_desc_);
2499  CHECK(results);
2500  results->holdLiterals(hoist_buf);
2501  }
2502  if (error_code < 0 && render_allocator_map_ptr) {
2503  // More rows passed the filter than available slots. We don't have a count to check,
2504  // so assume we met the limit if a scan limit is set
2505  if (scan_limit != 0) {
2506  return 0;
2507  } else {
2508  return error_code;
2509  }
2510  }
2511  if (error_code && (!scan_limit || check_rows_less_than_needed(results, scan_limit))) {
2512  return error_code; // unlucky, not enough results and we ran out of slots
2513  }
2514 
2515  return 0;
2516 }
bool useCudaBuffers() const
Definition: RenderInfo.cpp:66
const int8_t const int64_t * num_rows
static const int32_t ERR_INTERRUPTED
Definition: Execute.h:988
std::vector< int64_t * > launchCpuCode(const RelAlgExecutionUnit &ra_exe_unit, const std::vector< std::pair< void *, void * >> &fn_ptrs, const bool hoist_literals, const std::vector< int8_t > &literal_buff, std::vector< std::vector< const int8_t * >> col_buffers, const std::vector< std::vector< int64_t >> &num_rows, const std::vector< std::vector< uint64_t >> &frag_row_offsets, const int32_t scan_limit, int32_t *error_code, const uint32_t num_tables, const std::vector< int64_t > &join_hash_tables)
std::vector< int64_t * > launchGpuCode(const RelAlgExecutionUnit &ra_exe_unit, const std::vector< std::pair< void *, void * >> &cu_functions, const bool hoist_literals, const std::vector< int8_t > &literal_buff, std::vector< std::vector< const int8_t * >> col_buffers, const std::vector< std::vector< int64_t >> &num_rows, const std::vector< std::vector< uint64_t >> &frag_row_offsets, const int32_t scan_limit, Data_Namespace::DataMgr *data_mgr, const unsigned block_size_x, const unsigned grid_size_x, const int device_id, int32_t *error_code, const uint32_t num_tables, const std::vector< int64_t > &join_hash_tables, RenderAllocatorMap *render_allocator_map)
#define LOG(tag)
Definition: Logger.h:188
bool g_enable_dynamic_watchdog
Definition: Execute.cpp:72
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
std::vector< int64_t > getJoinHashTablePtrs(const ExecutorDeviceType device_type, const int device_id)
Definition: Execute.cpp:2518
static const int32_t ERR_STREAMING_TOP_N_NOT_SUPPORTED_IN_RENDER_QUERY
Definition: Execute.h:992
static const int32_t ERR_DIV_BY_ZERO
Definition: Execute.h:979
ResultSetPtr getRowSet(const RelAlgExecutionUnit &ra_exe_unit, const QueryMemoryDescriptor &query_mem_desc) const
CHECK(cgen_state)
#define INJECT_TIMER(DESC)
Definition: measure.h:91
#define CHECK_NE(x, y)
Definition: Logger.h:206
static const int32_t ERR_OUT_OF_RENDER_MEM
Definition: Execute.h:983
const int8_t const int64_t const uint64_t const int32_t const int64_t int64_t uint32_t const int64_t int32_t * error_code
static const int32_t ERR_OVERFLOW_OR_UNDERFLOW
Definition: Execute.h:985
static const int32_t ERR_OUT_OF_TIME
Definition: Execute.h:987
static const int32_t ERR_SINGLE_VALUE_FOUND_MULTIPLE_VALUES
Definition: Execute.h:993
static const int32_t ERR_OUT_OF_GPU_MEM
Definition: Execute.h:980
std::vector< int8_t > serializeLiterals(const std::unordered_map< int, CgenState::LiteralValues > &literals, const int device_id)
Definition: Execute.cpp:335
unsigned gridSize() const
Definition: Execute.cpp:2605
std::unique_ptr< RenderAllocatorMap > render_allocator_map_ptr
Definition: RenderInfo.h:32
bool check_rows_less_than_needed(const ResultSetPtr &results, const size_t scan_limit)
Definition: Execute.cpp:2398
static const int32_t ERR_SPECULATIVE_TOP_OOM
Definition: Execute.h:986
#define DEBUG_TIMER(name)
Definition: Logger.h:313
int32_t executePlanWithGroupBy(const RelAlgExecutionUnit &ra_exe_unit, const CompilationResult &, const bool hoist_literals, ResultSetPtr &results, const ExecutorDeviceType device_type, std::vector< std::vector< const int8_t * >> &col_buffers, const std::vector< size_t > outer_tab_frag_ids, QueryExecutionContext *, const std::vector< std::vector< int64_t >> &num_rows, const std::vector< std::vector< uint64_t >> &frag_offsets, Data_Namespace::DataMgr *, const int device_id, const int64_t limit, const uint32_t start_rowid, const uint32_t num_tables, RenderInfo *render_info)
Definition: Execute.cpp:2405
unsigned blockSize() const
Definition: Execute.cpp:2613
bool g_enable_runtime_query_interrupt
Definition: Execute.cpp:104
const QueryMemoryDescriptor query_mem_desc_
bool interrupted_
Definition: Execute.h:941

+ Here is the call graph for this function:

int32_t Executor::executePlanWithoutGroupBy ( const RelAlgExecutionUnit ra_exe_unit,
const CompilationResult compilation_result,
const bool  hoist_literals,
ResultSetPtr results,
const std::vector< Analyzer::Expr * > &  target_exprs,
const ExecutorDeviceType  device_type,
std::vector< std::vector< const int8_t * >> &  col_buffers,
QueryExecutionContext query_exe_context,
const std::vector< std::vector< int64_t >> &  num_rows,
const std::vector< std::vector< uint64_t >> &  frag_offsets,
Data_Namespace::DataMgr data_mgr,
const int  device_id,
const uint32_t  start_rowid,
const uint32_t  num_tables,
RenderInfo render_info 
)
private

Definition at line 2218 of file Execute.cpp.

References CHECK(), CHECK_EQ, CPU, DEBUG_TIMER, ERR_DIV_BY_ZERO, ERR_INTERRUPTED, ERR_OUT_OF_TIME, ERR_OVERFLOW_OR_UNDERFLOW, ERR_SINGLE_VALUE_FOUND_MULTIPLE_VALUES, error_code, RelAlgExecutionUnit::estimator, QueryExecutionContext::estimator_result_set_, logger::FATAL, g_bigint_count, g_enable_dynamic_watchdog, g_enable_runtime_query_interrupt, get_target_info(), QueryExecutionContext::getAggInitValForIndex(), QueryMemoryDescriptor::getPaddedSlotWidthBytes(), GPU, INJECT_TIMER, interrupted_(), is_distinct_target(), RenderInfo::isPotentialInSituRender(), kAPPROX_COUNT_DISTINCT, kAVG, kCOUNT, kSAMPLE, QueryExecutionContext::launchCpuCode(), QueryExecutionContext::launchGpuCode(), Executor::CompilationResult::literal_values, LOG, Executor::CompilationResult::native_functions, num_rows, out, QueryExecutionContext::query_buffers_, QueryExecutionContext::query_mem_desc_, reduceResults(), RenderInfo::render_allocator_map_ptr, takes_float_argument(), and RenderInfo::useCudaBuffers().

2233  {
2235  auto timer = DEBUG_TIMER(__func__);
2236  CHECK(!results);
2237  if (col_buffers.empty()) {
2238  return 0;
2239  }
2240 
2241  RenderAllocatorMap* render_allocator_map_ptr = nullptr;
2242  if (render_info) {
2243  // TODO(adb): make sure that we either never get here in the CPU case, or if we do get
2244  // here, we are in non-insitu mode.
2245  CHECK(render_info->useCudaBuffers() || !render_info->isPotentialInSituRender())
2246  << "CUDA disabled rendering in the executePlanWithoutGroupBy query path is "
2247  "currently unsupported.";
2248  render_allocator_map_ptr = render_info->render_allocator_map_ptr.get();
2249  }
2250 
2251  int32_t error_code = device_type == ExecutorDeviceType::GPU ? 0 : start_rowid;
2252  std::vector<int64_t*> out_vec;
2253  const auto hoist_buf = serializeLiterals(compilation_result.literal_values, device_id);
2254  const auto join_hash_table_ptrs = getJoinHashTablePtrs(device_type, device_id);
2255  std::unique_ptr<OutVecOwner> output_memory_scope;
2257  resetInterrupt();
2259  }
2260  if (device_type == ExecutorDeviceType::CPU) {
2261  out_vec = query_exe_context->launchCpuCode(ra_exe_unit,
2262  compilation_result.native_functions,
2263  hoist_literals,
2264  hoist_buf,
2265  col_buffers,
2266  num_rows,
2267  frag_offsets,
2268  0,
2269  &error_code,
2270  num_tables,
2271  join_hash_table_ptrs);
2272  output_memory_scope.reset(new OutVecOwner(out_vec));
2273  } else {
2274  try {
2275  out_vec = query_exe_context->launchGpuCode(ra_exe_unit,
2276  compilation_result.native_functions,
2277  hoist_literals,
2278  hoist_buf,
2279  col_buffers,
2280  num_rows,
2281  frag_offsets,
2282  0,
2283  data_mgr,
2284  blockSize(),
2285  gridSize(),
2286  device_id,
2287  &error_code,
2288  num_tables,
2289  join_hash_table_ptrs,
2290  render_allocator_map_ptr);
2291  output_memory_scope.reset(new OutVecOwner(out_vec));
2292  } catch (const OutOfMemory&) {
2293  return ERR_OUT_OF_GPU_MEM;
2294  } catch (const std::exception& e) {
2295  LOG(FATAL) << "Error launching the GPU kernel: " << e.what();
2296  }
2297  }
2298  if (error_code == Executor::ERR_OVERFLOW_OR_UNDERFLOW ||
2299  error_code == Executor::ERR_DIV_BY_ZERO ||
2300  error_code == Executor::ERR_OUT_OF_TIME ||
2301  error_code == Executor::ERR_INTERRUPTED ||
2303  return error_code;
2304  }
2305  if (ra_exe_unit.estimator) {
2306  CHECK(!error_code);
2307  results =
2308  std::shared_ptr<ResultSet>(query_exe_context->estimator_result_set_.release());
2309  return 0;
2310  }
2311  std::vector<int64_t> reduced_outs;
2312  const auto num_frags = col_buffers.size();
2313  const size_t entry_count = device_type == ExecutorDeviceType::GPU
2314  ? num_frags * blockSize() * gridSize()
2315  : num_frags;
2316  if (size_t(1) == entry_count) {
2317  for (auto out : out_vec) {
2318  CHECK(out);
2319  reduced_outs.push_back(*out);
2320  }
2321  } else {
2322  size_t out_vec_idx = 0;
2323 
2324  for (const auto target_expr : target_exprs) {
2325  const auto agg_info = get_target_info(target_expr, g_bigint_count);
2326  CHECK(agg_info.is_agg);
2327 
2328  const int num_iterations = agg_info.sql_type.is_geometry()
2329  ? agg_info.sql_type.get_physical_coord_cols()
2330  : 1;
2331 
2332  for (int i = 0; i < num_iterations; i++) {
2333  int64_t val1;
2334  const bool float_argument_input = takes_float_argument(agg_info);
2335  if (is_distinct_target(agg_info)) {
2336  CHECK(agg_info.agg_kind == kCOUNT ||
2337  agg_info.agg_kind == kAPPROX_COUNT_DISTINCT);
2338  val1 = out_vec[out_vec_idx][0];
2339  error_code = 0;
2340  } else {
2341  const auto chosen_bytes = static_cast<size_t>(
2342  query_exe_context->query_mem_desc_.getPaddedSlotWidthBytes(out_vec_idx));
2343  std::tie(val1, error_code) = Executor::reduceResults(
2344  agg_info.agg_kind,
2345  agg_info.sql_type,
2346  query_exe_context->getAggInitValForIndex(out_vec_idx),
2347  float_argument_input ? sizeof(int32_t) : chosen_bytes,
2348  out_vec[out_vec_idx],
2349  entry_count,
2350  false,
2351  float_argument_input);
2352  }
2353  if (error_code) {
2354  break;
2355  }
2356  reduced_outs.push_back(val1);
2357  if (agg_info.agg_kind == kAVG ||
2358  (agg_info.agg_kind == kSAMPLE &&
2359  (agg_info.sql_type.is_varlen() || agg_info.sql_type.is_geometry()))) {
2360  const auto chosen_bytes = static_cast<size_t>(
2361  query_exe_context->query_mem_desc_.getPaddedSlotWidthBytes(out_vec_idx +
2362  1));
2363  int64_t val2;
2364  std::tie(val2, error_code) = Executor::reduceResults(
2365  agg_info.agg_kind == kAVG ? kCOUNT : agg_info.agg_kind,
2366  agg_info.sql_type,
2367  query_exe_context->getAggInitValForIndex(out_vec_idx + 1),
2368  float_argument_input ? sizeof(int32_t) : chosen_bytes,
2369  out_vec[out_vec_idx + 1],
2370  entry_count,
2371  false,
2372  false);
2373  if (error_code) {
2374  break;
2375  }
2376  reduced_outs.push_back(val2);
2377  ++out_vec_idx;
2378  }
2379  ++out_vec_idx;
2380  }
2381  }
2382  }
2383 
2384  if (error_code) {
2385  return error_code;
2386  }
2387 
2388  CHECK_EQ(size_t(1), query_exe_context->query_buffers_->result_sets_.size());
2389  auto rows_ptr = std::shared_ptr<ResultSet>(
2390  query_exe_context->query_buffers_->result_sets_[0].release());
2391  rows_ptr->fillOneEntry(reduced_outs);
2392  results = std::move(rows_ptr);
2393  return error_code;
2394 }
#define CHECK_EQ(x, y)
Definition: Logger.h:205
bool useCudaBuffers() const
Definition: RenderInfo.cpp:66
const int8_t const int64_t * num_rows
static const int32_t ERR_INTERRUPTED
Definition: Execute.h:988
std::vector< int64_t * > launchCpuCode(const RelAlgExecutionUnit &ra_exe_unit, const std::vector< std::pair< void *, void * >> &fn_ptrs, const bool hoist_literals, const std::vector< int8_t > &literal_buff, std::vector< std::vector< const int8_t * >> col_buffers, const std::vector< std::vector< int64_t >> &num_rows, const std::vector< std::vector< uint64_t >> &frag_row_offsets, const int32_t scan_limit, int32_t *error_code, const uint32_t num_tables, const std::vector< int64_t > &join_hash_tables)
std::vector< int64_t * > launchGpuCode(const RelAlgExecutionUnit &ra_exe_unit, const std::vector< std::pair< void *, void * >> &cu_functions, const bool hoist_literals, const std::vector< int8_t > &literal_buff, std::vector< std::vector< const int8_t * >> col_buffers, const std::vector< std::vector< int64_t >> &num_rows, const std::vector< std::vector< uint64_t >> &frag_row_offsets, const int32_t scan_limit, Data_Namespace::DataMgr *data_mgr, const unsigned block_size_x, const unsigned grid_size_x, const int device_id, int32_t *error_code, const uint32_t num_tables, const std::vector< int64_t > &join_hash_tables, RenderAllocatorMap *render_allocator_map)
#define LOG(tag)
Definition: Logger.h:188
TargetInfo get_target_info(const PointerType target_expr, const bool bigint_count)
Definition: TargetInfo.h:66
static std::pair< int64_t, int32_t > reduceResults(const SQLAgg agg, const SQLTypeInfo &ti, const int64_t agg_init_val, const int8_t out_byte_width, const int64_t *out_vec, const size_t out_vec_sz, const bool is_group_by, const bool float_argument_input)
Definition: Execute.cpp:614
bool g_enable_dynamic_watchdog
Definition: Execute.cpp:72
bool takes_float_argument(const TargetInfo &target_info)
Definition: TargetInfo.h:121
std::unique_ptr< QueryMemoryInitializer > query_buffers_
std::vector< int64_t > getJoinHashTablePtrs(const ExecutorDeviceType device_type, const int device_id)
Definition: Execute.cpp:2518
static const int32_t ERR_DIV_BY_ZERO
Definition: Execute.h:979
CHECK(cgen_state)
#define INJECT_TIMER(DESC)
Definition: measure.h:91
const int8_t const int64_t const uint64_t const int32_t const int64_t int64_t uint32_t const int64_t int32_t * error_code
static const int32_t ERR_OVERFLOW_OR_UNDERFLOW
Definition: Execute.h:985
static const int32_t ERR_OUT_OF_TIME
Definition: Execute.h:987
bool g_bigint_count
bool is_distinct_target(const TargetInfo &target_info)
Definition: TargetInfo.h:117
const int8_t getPaddedSlotWidthBytes(const size_t slot_idx) const
int64_t getAggInitValForIndex(const size_t index) const
static const int32_t ERR_SINGLE_VALUE_FOUND_MULTIPLE_VALUES
Definition: Execute.h:993
const std::shared_ptr< Analyzer::Estimator > estimator
static const int32_t ERR_OUT_OF_GPU_MEM
Definition: Execute.h:980
int32_t executePlanWithoutGroupBy(const RelAlgExecutionUnit &ra_exe_unit, const CompilationResult &, const bool hoist_literals, ResultSetPtr &results, const std::vector< Analyzer::Expr * > &target_exprs, const ExecutorDeviceType device_type, std::vector< std::vector< const int8_t * >> &col_buffers, QueryExecutionContext *query_exe_context, const std::vector< std::vector< int64_t >> &num_rows, const std::vector< std::vector< uint64_t >> &frag_offsets, Data_Namespace::DataMgr *data_mgr, const int device_id, const uint32_t start_rowid, const uint32_t num_tables, RenderInfo *render_info)
Definition: Execute.cpp:2218
std::vector< int8_t > serializeLiterals(const std::unordered_map< int, CgenState::LiteralValues > &literals, const int device_id)
Definition: Execute.cpp:335
Definition: sqldefs.h:76
unsigned gridSize() const
Definition: Execute.cpp:2605
std::unique_ptr< RenderAllocatorMap > render_allocator_map_ptr
Definition: RenderInfo.h:32
void resetInterrupt()
bool isPotentialInSituRender() const
Definition: RenderInfo.cpp:61
#define DEBUG_TIMER(name)
Definition: Logger.h:313
unsigned blockSize() const
Definition: Execute.cpp:2613
const int8_t const int64_t const uint64_t const int32_t const int64_t int64_t ** out
std::unique_ptr< ResultSet > estimator_result_set_
Definition: sqldefs.h:72
bool g_enable_runtime_query_interrupt
Definition: Execute.cpp:104
const QueryMemoryDescriptor query_mem_desc_
bool interrupted_
Definition: Execute.h:941

+ Here is the call graph for this function:

ResultSetPtr Executor::executeTableFunction ( const TableFunctionExecutionUnit  exe_unit,
const std::vector< InputTableInfo > &  table_infos,
const CompilationOptions co,
const ExecutionOptions eo,
const Catalog_Namespace::Catalog cat 
)
private

Compiles and dispatches a table function; that is, a function that takes as input one or more columns and returns a ResultSet, which can be parsed by subsequent execution steps.

Definition at line 1401 of file Execute.cpp.

References CHECK_EQ, TableFunctionCompilationContext::compile(), CompilationOptions::device_type, TableFunctionExecutionContext::execute(), and INJECT_TIMER.

1406  {
1407  INJECT_TIMER(Exec_executeTableFunction);
1408  nukeOldState(false, table_infos, nullptr);
1409 
1410  ColumnCacheMap column_cache; // Note: if we add retries to the table function
1411  // framework, we may want to move this up a level
1412 
1413  ColumnFetcher column_fetcher(this, column_cache);
1414  TableFunctionCompilationContext compilation_context;
1415  compilation_context.compile(exe_unit, co, this);
1416 
1418  CHECK_EQ(table_infos.size(), size_t(1));
1419  return exe_context.execute(exe_unit,
1420  table_infos.front(),
1421  &compilation_context,
1422  column_fetcher,
1423  co.device_type,
1424  this);
1425 }
#define CHECK_EQ(x, y)
Definition: Logger.h:205
const std::shared_ptr< RowSetMemoryOwner > getRowSetMemoryOwner() const
Definition: Execute.cpp:241
#define INJECT_TIMER(DESC)
Definition: measure.h:91
void nukeOldState(const bool allow_lazy_fetch, const std::vector< InputTableInfo > &query_infos, const RelAlgExecutionUnit *ra_exe_unit)
Definition: Execute.cpp:2533
ExecutorDeviceType device_type
std::unordered_map< int, std::unordered_map< int, std::shared_ptr< const ColumnarResults >>> ColumnCacheMap
void compile(const TableFunctionExecutionUnit &exe_unit, const CompilationOptions &co, Executor *executor)

+ Here is the call graph for this function:

void Executor::executeUpdate ( const RelAlgExecutionUnit ra_exe_unit,
const std::vector< InputTableInfo > &  table_infos,
const CompilationOptions co,
const ExecutionOptions eo,
const Catalog_Namespace::Catalog cat,
std::shared_ptr< RowSetMemoryOwner row_set_mem_owner,
const UpdateLogForFragment::Callback cb,
const bool  is_agg 
)
private

Definition at line 60 of file ExecuteUpdate.cpp.

References CHECK(), CHECK_EQ, CHECK_GT, Executor::ExecutionDispatch::compile(), CompilationOptions::device_type, FragmentsPerTable::fragment_ids, Executor::ExecutionDispatch::getFragmentResults(), KernelPerFragment, and Executor::ExecutionDispatch::run().

67  {
68  CHECK(cb);
69  const auto ra_exe_unit = addDeletedColumn(ra_exe_unit_in, co);
70  ColumnCacheMap column_cache;
71 
72  ColumnFetcher column_fetcher(this, column_cache);
73  CHECK_GT(ra_exe_unit.input_descs.size(), size_t(0));
74  const auto table_id = ra_exe_unit.input_descs[0].getTableId();
75  const auto& outer_fragments = table_infos.front().info.fragments;
76 
77  std::vector<FragmentsPerTable> fragments = {{0, {0}}};
78  for (size_t tab_idx = 1; tab_idx < ra_exe_unit.input_descs.size(); tab_idx++) {
79  int table_id = ra_exe_unit.input_descs[tab_idx].getTableId();
80  CHECK_EQ(table_infos[tab_idx].table_id, table_id);
81  const auto& fragmentsPerTable = table_infos[tab_idx].info.fragments;
82  FragmentsPerTable entry = {table_id, {}};
83  for (size_t innerFragId = 0; innerFragId < fragmentsPerTable.size(); innerFragId++) {
84  entry.fragment_ids.push_back(innerFragId);
85  }
86  fragments.push_back(entry);
87  }
88 
89  // There could be benefit to multithread this once we see where the bottle necks really
90  // are
91  for (size_t fragment_index = 0; fragment_index < outer_fragments.size();
92  ++fragment_index) {
93  ExecutionDispatch current_fragment_execution_dispatch(
94  this, ra_exe_unit, table_infos, cat, row_set_mem_owner, nullptr);
95 
96  const int64_t crt_fragment_tuple_count =
97  outer_fragments[fragment_index].getNumTuples();
98  int64_t max_groups_buffer_entry_guess = crt_fragment_tuple_count;
99  if (is_agg) {
100  max_groups_buffer_entry_guess =
101  std::min(2 * max_groups_buffer_entry_guess, static_cast<int64_t>(100'000'000));
102  }
103 
104  const auto execution_descriptors = current_fragment_execution_dispatch.compile(
105  max_groups_buffer_entry_guess, 8, co, eo, column_fetcher, true);
106  // We may want to consider in the future allowing this to execute on devices other
107  // than CPU
108 
109  fragments[0] = {table_id, {fragment_index}};
110 
111  current_fragment_execution_dispatch.run(
112  co.device_type,
113  0,
114  eo,
115  column_fetcher,
116  *std::get<QueryCompilationDescriptorOwned>(execution_descriptors),
117  *std::get<QueryMemoryDescriptorOwned>(execution_descriptors),
118  fragments,
120  -1);
121  const auto& proj_fragment_results =
122  current_fragment_execution_dispatch.getFragmentResults();
123  if (proj_fragment_results.empty()) {
124  continue;
125  }
126  const auto& proj_fragment_result = proj_fragment_results[0];
127  const auto proj_result_set = proj_fragment_result.first;
128  CHECK(proj_result_set);
129  cb({outer_fragments[fragment_index], fragment_index, proj_result_set});
130  }
131 }
bool is_agg(const Analyzer::Expr *expr)
#define CHECK_EQ(x, y)
Definition: Logger.h:205
const std::vector< InputDescriptor > input_descs
#define CHECK_GT(x, y)
Definition: Logger.h:209
CHECK(cgen_state)
RelAlgExecutionUnit addDeletedColumn(const RelAlgExecutionUnit &ra_exe_unit, const CompilationOptions &co)
Definition: Execute.cpp:2679
ExecutorDeviceType device_type
std::unordered_map< int, std::unordered_map< int, std::shared_ptr< const ColumnarResults >>> ColumnCacheMap
std::vector< size_t > fragment_ids

+ Here is the call graph for this function:

ResultSetPtr Executor::executeWorkUnit ( size_t &  max_groups_buffer_entry_guess,
const bool  is_agg,
const std::vector< InputTableInfo > &  query_infos,
const RelAlgExecutionUnit ra_exe_unit_in,
const CompilationOptions co,
const ExecutionOptions options,
const Catalog_Namespace::Catalog cat,
std::shared_ptr< RowSetMemoryOwner row_set_mem_owner,
RenderInfo render_info,
const bool  has_cardinality_estimation,
ColumnCacheMap column_cache 
)
private

Definition at line 1123 of file Execute.cpp.

References CompilationRetryNewScanLimit::new_scan_limit_, and anonymous_namespace{Execute.cpp}::replace_scan_limit().

1134  {
1135  try {
1136  return executeWorkUnitImpl(max_groups_buffer_entry_guess,
1137  is_agg,
1138  true,
1139  query_infos,
1140  ra_exe_unit_in,
1141  co,
1142  eo,
1143  cat,
1144  row_set_mem_owner,
1145  render_info,
1146  has_cardinality_estimation,
1147  column_cache);
1148  } catch (const CompilationRetryNewScanLimit& e) {
1149  return executeWorkUnitImpl(max_groups_buffer_entry_guess,
1150  is_agg,
1151  false,
1152  query_infos,
1153  replace_scan_limit(ra_exe_unit_in, e.new_scan_limit_),
1154  co,
1155  eo,
1156  cat,
1157  row_set_mem_owner,
1158  render_info,
1159  has_cardinality_estimation,
1160  column_cache);
1161  }
1162 }
bool is_agg(const Analyzer::Expr *expr)
RelAlgExecutionUnit replace_scan_limit(const RelAlgExecutionUnit &ra_exe_unit_in, const size_t new_scan_limit)
Definition: Execute.cpp:1104
ResultSetPtr executeWorkUnitImpl(size_t &max_groups_buffer_entry_guess, const bool is_agg, const bool allow_single_frag_table_opt, const std::vector< InputTableInfo > &, const RelAlgExecutionUnit &, const CompilationOptions &, const ExecutionOptions &options, const Catalog_Namespace::Catalog &, std::shared_ptr< RowSetMemoryOwner >, RenderInfo *render_info, const bool has_cardinality_estimation, ColumnCacheMap &column_cache)
Definition: Execute.cpp:1164

+ Here is the call graph for this function:

ResultSetPtr Executor::executeWorkUnitImpl ( size_t &  max_groups_buffer_entry_guess,
const bool  is_agg,
const bool  allow_single_frag_table_opt,
const std::vector< InputTableInfo > &  query_infos,
const RelAlgExecutionUnit ra_exe_unit_in,
const CompilationOptions co,
const ExecutionOptions options,
const Catalog_Namespace::Catalog cat,
std::shared_ptr< RowSetMemoryOwner row_set_mem_owner,
RenderInfo render_info,
const bool  has_cardinality_estimation,
ColumnCacheMap column_cache 
)
private

Definition at line 1164 of file Execute.cpp.

References CompilationOptions::add_delete_column, CompilationOptions::allow_lazy_fetch, CHECK(), Executor::ExecutionDispatch::compile(), anonymous_namespace{Execute.cpp}::compute_buffer_entry_guess(), CPU, cpu_threads(), DEBUG_TIMER_NEW_THREAD, CompilationOptions::device_type, ExecutionOptions::executor_type, CompilationOptions::explain_type, Data_Namespace::DataMgr::freeAllBuffers(), g_use_tbb_pool, get_available_gpus(), get_context_count(), get_min_byte_width(), Catalog_Namespace::Catalog::getDataMgr(), QueryExecutionError::getErrorCode(), Data_Namespace::DataMgr::getMemoryInfo(), GPU, Data_Namespace::GPU_LEVEL, CompilationOptions::hoist_literals, INJECT_TIMER, interrupted_(), anonymous_namespace{RelAlgExecutor.cpp}::is_agg(), ExecutionOptions::just_explain, MAX_BYTE_WIDTH_SUPPORTED, Native, CompilationOptions::opt_level, Projection, query_mem_desc, CompilationOptions::register_intel_jit_listener, Executor::ExecutionDispatch::run(), logger::thread_id(), VLOG, and CompilationOptions::with_dynamic_watchdog.

1176  {
1177  INJECT_TIMER(Exec_executeWorkUnit);
1178  const auto ra_exe_unit = addDeletedColumn(ra_exe_unit_in, co);
1179  const auto device_type = getDeviceTypeForTargets(ra_exe_unit, co.device_type);
1180  CHECK(!query_infos.empty());
1181  if (!max_groups_buffer_entry_guess) {
1182  // The query has failed the first execution attempt because of running out
1183  // of group by slots. Make the conservative choice: allocate fragment size
1184  // slots and run on the CPU.
1185  CHECK(device_type == ExecutorDeviceType::CPU);
1186  max_groups_buffer_entry_guess = compute_buffer_entry_guess(query_infos);
1187  }
1188 
1189  int8_t crt_min_byte_width{get_min_byte_width()};
1190  do {
1191  ExecutionDispatch execution_dispatch(
1192  this, ra_exe_unit, query_infos, cat, row_set_mem_owner, render_info);
1193  ColumnFetcher column_fetcher(this, column_cache);
1194  std::unique_ptr<QueryCompilationDescriptor> query_comp_desc_owned;
1195  std::unique_ptr<QueryMemoryDescriptor> query_mem_desc_owned;
1196  if (eo.executor_type == ExecutorType::Native) {
1197  try {
1198  INJECT_TIMER(execution_dispatch_comp);
1199  std::tie(query_comp_desc_owned, query_mem_desc_owned) =
1200  execution_dispatch.compile(max_groups_buffer_entry_guess,
1201  crt_min_byte_width,
1202  {device_type,
1203  co.hoist_literals,
1204  co.opt_level,
1206  co.allow_lazy_fetch,
1207  co.add_delete_column,
1208  co.explain_type,
1210  eo,
1211  column_fetcher,
1212  has_cardinality_estimation);
1213  CHECK(query_comp_desc_owned);
1214  crt_min_byte_width = query_comp_desc_owned->getMinByteWidth();
1215  } catch (CompilationRetryNoCompaction&) {
1216  crt_min_byte_width = MAX_BYTE_WIDTH_SUPPORTED;
1217  continue;
1218  }
1219  } else {
1220  plan_state_.reset(new PlanState(false, this));
1221  plan_state_->allocateLocalColumnIds(ra_exe_unit.input_col_descs);
1222  CHECK(!query_comp_desc_owned);
1223  query_comp_desc_owned.reset(new QueryCompilationDescriptor());
1224  CHECK(!query_mem_desc_owned);
1225  query_mem_desc_owned.reset(
1227  }
1228  if (eo.just_explain) {
1229  return executeExplain(*query_comp_desc_owned);
1230  }
1231 
1232  for (const auto target_expr : ra_exe_unit.target_exprs) {
1233  plan_state_->target_exprs_.push_back(target_expr);
1234  }
1235 
1236  auto dispatch = [&execution_dispatch,
1237  &column_fetcher,
1238  &eo,
1239  parent_thread_id = logger::thread_id()](
1240  const ExecutorDeviceType chosen_device_type,
1241  int chosen_device_id,
1242  const QueryCompilationDescriptor& query_comp_desc,
1244  const FragmentsList& frag_list,
1245  const ExecutorDispatchMode kernel_dispatch_mode,
1246  const int64_t rowid_lookup_key) {
1247  DEBUG_TIMER_NEW_THREAD(parent_thread_id);
1248  INJECT_TIMER(execution_dispatch_run);
1249  execution_dispatch.run(chosen_device_type,
1250  chosen_device_id,
1251  eo,
1252  column_fetcher,
1253  query_comp_desc,
1254  query_mem_desc,
1255  frag_list,
1256  kernel_dispatch_mode,
1257  rowid_lookup_key);
1258  };
1259 
1260  QueryFragmentDescriptor fragment_descriptor(
1261  ra_exe_unit,
1262  query_infos,
1263  query_comp_desc_owned->getDeviceType() == ExecutorDeviceType::GPU
1265  : std::vector<Data_Namespace::MemoryInfo>{},
1266  eo.gpu_input_mem_limit_percent,
1267  eo.outer_fragment_indices);
1268 
1269  if (!eo.just_validate) {
1270  int available_cpus = cpu_threads();
1271  auto available_gpus = get_available_gpus(cat);
1272 
1273  const auto context_count =
1274  get_context_count(device_type, available_cpus, available_gpus.size());
1275  try {
1276  if (g_use_tbb_pool) {
1277 #ifdef HAVE_TBB
1278  VLOG(1) << "Using TBB thread pool for kernel dispatch.";
1279  dispatchFragments<threadpool::TbbThreadPool<void>>(dispatch,
1280  execution_dispatch,
1281  query_infos,
1282  eo,
1283  is_agg,
1284  allow_single_frag_table_opt,
1285  context_count,
1286  *query_comp_desc_owned,
1287  *query_mem_desc_owned,
1288  fragment_descriptor,
1289  available_gpus,
1290  available_cpus);
1291 #else
1292  throw std::runtime_error(
1293  "This build is not TBB enabled. Restart the server with "
1294  "\"enable-modern-thread-pool\" disabled.");
1295 #endif
1296  } else {
1297  dispatchFragments<threadpool::FuturesThreadPool<void>>(
1298  dispatch,
1299  execution_dispatch,
1300  query_infos,
1301  eo,
1302  is_agg,
1303  allow_single_frag_table_opt,
1304  context_count,
1305  *query_comp_desc_owned,
1306  *query_mem_desc_owned,
1307  fragment_descriptor,
1308  available_gpus,
1309  available_cpus);
1310  }
1311  } catch (QueryExecutionError& e) {
1312  if (eo.with_dynamic_watchdog && interrupted_ &&
1313  e.getErrorCode() == ERR_OUT_OF_TIME) {
1314  resetInterrupt();
1316  }
1317  if (eo.allow_runtime_query_interrupt && interrupted_) {
1318  resetInterrupt();
1320  }
1321  cat.getDataMgr().freeAllBuffers();
1323  static_cast<size_t>(crt_min_byte_width << 1) <= sizeof(int64_t)) {
1324  crt_min_byte_width <<= 1;
1325  continue;
1326  }
1327  throw;
1328  }
1329  }
1330  cat.getDataMgr().freeAllBuffers();
1331  if (is_agg) {
1332  try {
1333  return collectAllDeviceResults(execution_dispatch,
1334  ra_exe_unit.target_exprs,
1335  *query_mem_desc_owned,
1336  query_comp_desc_owned->getDeviceType(),
1337  row_set_mem_owner);
1338  } catch (ReductionRanOutOfSlots&) {
1340  } catch (OverflowOrUnderflow&) {
1341  crt_min_byte_width <<= 1;
1342  continue;
1343  }
1344  }
1345  return resultsUnion(execution_dispatch);
1346 
1347  } while (static_cast<size_t>(crt_min_byte_width) <= sizeof(int64_t));
1348 
1349  return std::make_shared<ResultSet>(std::vector<TargetInfo>{},
1352  nullptr,
1353  this);
1354 }
bool is_agg(const Analyzer::Expr *expr)
ResultSetPtr collectAllDeviceResults(ExecutionDispatch &execution_dispatch, const std::vector< Analyzer::Expr * > &target_exprs, const QueryMemoryDescriptor &query_mem_desc, const ExecutorDeviceType device_type, std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner)
Definition: Execute.cpp:1562
int32_t getErrorCode() const
Definition: ErrorHandling.h:55
static const int32_t ERR_INTERRUPTED
Definition: Execute.h:988
ExecutorDeviceType
Data_Namespace::DataMgr & getDataMgr() const
Definition: Catalog.h:183
std::unordered_set< int > get_available_gpus(const Catalog_Namespace::Catalog &cat)
Definition: Execute.cpp:960
#define DEBUG_TIMER_NEW_THREAD(parent_thread_id)
Definition: Logger.h:315
ExecutorOptLevel opt_level
size_t compute_buffer_entry_guess(const std::vector< InputTableInfo > &query_infos)
Definition: Execute.cpp:984
std::vector< FragmentsPerTable > FragmentsList
int8_t get_min_byte_width()
ExecutorDispatchMode
bool g_use_tbb_pool
Definition: Execute.cpp:73
CHECK(cgen_state)
size_t get_context_count(const ExecutorDeviceType device_type, const size_t cpu_count, const size_t gpu_count)
Definition: Execute.cpp:972
#define INJECT_TIMER(DESC)
Definition: measure.h:91
friend class QueryMemoryDescriptor
Definition: Execute.h:1001
ExecutorExplainType explain_type
static const int32_t ERR_OVERFLOW_OR_UNDERFLOW
Definition: Execute.h:985
std::unique_ptr< PlanState > plan_state_
Definition: Execute.h:932
static const int32_t ERR_OUT_OF_TIME
Definition: Execute.h:987
friend class QueryCompilationDescriptor
Definition: Execute.h:1000
RelAlgExecutionUnit addDeletedColumn(const RelAlgExecutionUnit &ra_exe_unit, const CompilationOptions &co)
Definition: Execute.cpp:2679
std::vector< MemoryInfo > getMemoryInfo(const MemoryLevel memLevel)
Definition: DataMgr.cpp:237
ResultSetPtr resultsUnion(ExecutionDispatch &execution_dispatch)
Definition: Execute.cpp:824
ExecutorDeviceType device_type
ThreadId thread_id()
Definition: Logger.cpp:715
void resetInterrupt()
static const int32_t ERR_OUT_OF_SLOTS
Definition: Execute.h:981
constexpr int8_t MAX_BYTE_WIDTH_SUPPORTED
ExecutorDeviceType getDeviceTypeForTargets(const RelAlgExecutionUnit &ra_exe_unit, const ExecutorDeviceType requested_device_type)
Definition: Execute.cpp:1433
int cpu_threads()
Definition: thread_count.h:25
#define VLOG(n)
Definition: Logger.h:291
bool interrupted_
Definition: Execute.h:941
ResultSetPtr executeExplain(const QueryCompilationDescriptor &)
Definition: Execute.cpp:1427

+ Here is the call graph for this function:

void Executor::executeWorkUnitPerFragment ( const RelAlgExecutionUnit ra_exe_unit,
const InputTableInfo table_info,
const CompilationOptions co,
const ExecutionOptions eo,
const Catalog_Namespace::Catalog cat,
PerFragmentCallBack cb 
)
private

Compiles and dispatches a work unit per fragment processing results with the per fragment callback. Currently used for computing metrics over fragments (metadata).

Definition at line 1356 of file Execute.cpp.

References CHECK_EQ, Executor::ExecutionDispatch::compile(), CompilationOptions::device_type, Fragmenter_Namespace::TableInfo::fragments, InputTableInfo::info, and Executor::ExecutionDispatch::run().

1361  {
1362  const auto ra_exe_unit = addDeletedColumn(ra_exe_unit_in, co);
1363  ColumnCacheMap column_cache;
1364 
1365  std::vector<InputTableInfo> table_infos{table_info};
1366  // TODO(adb): ensure this is under a try / catch
1367  ExecutionDispatch execution_dispatch(
1368  this, ra_exe_unit, table_infos, cat, row_set_mem_owner_, nullptr);
1369  ColumnFetcher column_fetcher(this, column_cache);
1370  std::unique_ptr<QueryCompilationDescriptor> query_comp_desc_owned;
1371  std::unique_ptr<QueryMemoryDescriptor> query_mem_desc_owned;
1372  std::tie(query_comp_desc_owned, query_mem_desc_owned) =
1373  execution_dispatch.compile(0, 8, co, eo, column_fetcher, false);
1374  CHECK_EQ(size_t(1), ra_exe_unit.input_descs.size());
1375  const auto table_id = ra_exe_unit.input_descs[0].getTableId();
1376  const auto& outer_fragments = table_info.info.fragments;
1377  for (size_t fragment_index = 0; fragment_index < outer_fragments.size();
1378  ++fragment_index) {
1379  // We may want to consider in the future allowing this to execute on devices other
1380  // than CPU
1381  execution_dispatch.run(co.device_type,
1382  0,
1383  eo,
1384  column_fetcher,
1385  *query_comp_desc_owned,
1386  *query_mem_desc_owned,
1387  {{table_id, {fragment_index}}},
1389  -1);
1390  }
1391 
1392  const auto& all_fragment_results = execution_dispatch.getFragmentResults();
1393 
1394  for (size_t fragment_index = 0; fragment_index < outer_fragments.size();
1395  ++fragment_index) {
1396  const auto fragment_results = all_fragment_results[fragment_index];
1397  cb(fragment_results.first, outer_fragments[fragment_index]);
1398  }
1399 }
#define CHECK_EQ(x, y)
Definition: Logger.h:205
Fragmenter_Namespace::TableInfo info
Definition: InputMetadata.h:35
std::deque< FragmentInfo > fragments
Definition: Fragmenter.h:166
const std::vector< InputDescriptor > input_descs
std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner_
Definition: Execute.h:933
RelAlgExecutionUnit addDeletedColumn(const RelAlgExecutionUnit &ra_exe_unit, const CompilationOptions &co)
Definition: Execute.cpp:2679
ExecutorDeviceType device_type
std::unordered_map< int, std::unordered_map< int, std::shared_ptr< const ColumnarResults >>> ColumnCacheMap

+ Here is the call graph for this function:

FetchResult Executor::fetchChunks ( const ColumnFetcher column_fetcher,
const RelAlgExecutionUnit ra_exe_unit,
const int  device_id,
const Data_Namespace::MemoryLevel  memory_level,
const std::map< int, const TableFragments * > &  all_tables_fragments,
const FragmentsList selected_fragments,
const Catalog_Namespace::Catalog cat,
std::list< ChunkIter > &  chunk_iterators,
std::list< std::shared_ptr< Chunk_NS::Chunk >> &  chunks 
)
private

Definition at line 2063 of file Execute.cpp.

References CHECK(), CHECK_EQ, CHECK_LT, Data_Namespace::CPU_LEVEL, DEBUG_TIMER, g_enable_dynamic_watchdog, g_enable_runtime_query_interrupt, ColumnFetcher::getAllTableColumnFragments(), ColumnFetcher::getOneTableColumnFragment(), ColumnFetcher::getResultSetColumn(), INJECT_TIMER, RelAlgExecutionUnit::input_col_descs, RelAlgExecutionUnit::input_descs, interrupted_(), RESULT, and anonymous_namespace{Execute.cpp}::try_get_column_descriptor().

2072  {
2073  auto timer = DEBUG_TIMER(__func__);
2075  const auto& col_global_ids = ra_exe_unit.input_col_descs;
2076  std::vector<std::vector<size_t>> selected_fragments_crossjoin;
2077  std::vector<size_t> local_col_to_frag_pos;
2078  buildSelectedFragsMapping(selected_fragments_crossjoin,
2079  local_col_to_frag_pos,
2080  col_global_ids,
2081  selected_fragments,
2082  ra_exe_unit);
2083 
2085  selected_fragments_crossjoin);
2086 
2087  std::vector<std::vector<const int8_t*>> all_frag_col_buffers;
2088  std::vector<std::vector<int64_t>> all_num_rows;
2089  std::vector<std::vector<uint64_t>> all_frag_offsets;
2090 
2091  for (const auto& selected_frag_ids : frag_ids_crossjoin) {
2092  std::vector<const int8_t*> frag_col_buffers(
2093  plan_state_->global_to_local_col_ids_.size());
2094  for (const auto& col_id : col_global_ids) {
2095  // check whether the interrupt flag turns on (non kernel-time query interrupt)
2097  interrupted_) {
2098  resetInterrupt();
2100  }
2101  CHECK(col_id);
2102  const int table_id = col_id->getScanDesc().getTableId();
2103  const auto cd = try_get_column_descriptor(col_id.get(), cat);
2104  if (cd && cd->isVirtualCol) {
2105  CHECK_EQ("rowid", cd->columnName);
2106  continue;
2107  }
2108  const auto fragments_it = all_tables_fragments.find(table_id);
2109  CHECK(fragments_it != all_tables_fragments.end());
2110  const auto fragments = fragments_it->second;
2111  auto it = plan_state_->global_to_local_col_ids_.find(*col_id);
2112  CHECK(it != plan_state_->global_to_local_col_ids_.end());
2113  CHECK_LT(static_cast<size_t>(it->second),
2114  plan_state_->global_to_local_col_ids_.size());
2115  const size_t frag_id = selected_frag_ids[local_col_to_frag_pos[it->second]];
2116  if (!fragments->size()) {
2117  return {};
2118  }
2119  CHECK_LT(frag_id, fragments->size());
2120  auto memory_level_for_column = memory_level;
2121  if (plan_state_->columns_to_fetch_.find(
2122  std::make_pair(col_id->getScanDesc().getTableId(), col_id->getColId())) ==
2123  plan_state_->columns_to_fetch_.end()) {
2124  memory_level_for_column = Data_Namespace::CPU_LEVEL;
2125  }
2126  if (col_id->getScanDesc().getSourceType() == InputSourceType::RESULT) {
2127  frag_col_buffers[it->second] = column_fetcher.getResultSetColumn(
2128  col_id.get(), memory_level_for_column, device_id);
2129  } else {
2130  if (needFetchAllFragments(*col_id, ra_exe_unit, selected_fragments)) {
2131  frag_col_buffers[it->second] =
2132  column_fetcher.getAllTableColumnFragments(table_id,
2133  col_id->getColId(),
2134  all_tables_fragments,
2135  memory_level_for_column,
2136  device_id);
2137  } else {
2138  frag_col_buffers[it->second] =
2139  column_fetcher.getOneTableColumnFragment(table_id,
2140  frag_id,
2141  col_id->getColId(),
2142  all_tables_fragments,
2143  chunks,
2144  chunk_iterators,
2145  memory_level_for_column,
2146  device_id);
2147  }
2148  }
2149  }
2150  all_frag_col_buffers.push_back(frag_col_buffers);
2151  }
2152  std::tie(all_num_rows, all_frag_offsets) = getRowCountAndOffsetForAllFrags(
2153  ra_exe_unit, frag_ids_crossjoin, ra_exe_unit.input_descs, all_tables_fragments);
2154  return {all_frag_col_buffers, all_num_rows, all_frag_offsets};
2155 }
#define CHECK_EQ(x, y)
Definition: Logger.h:205
const ColumnDescriptor * try_get_column_descriptor(const InputColDescriptor *col_desc, const Catalog_Namespace::Catalog &cat)
Definition: Execute.cpp:1969
static const int32_t ERR_INTERRUPTED
Definition: Execute.h:988
FetchResult fetchChunks(const ColumnFetcher &, const RelAlgExecutionUnit &ra_exe_unit, const int device_id, const Data_Namespace::MemoryLevel, const std::map< int, const TableFragments * > &, const FragmentsList &selected_fragments, const Catalog_Namespace::Catalog &, std::list< ChunkIter > &, std::list< std::shared_ptr< Chunk_NS::Chunk >> &)
Definition: Execute.cpp:2063
const int8_t * getAllTableColumnFragments(const int table_id, const int col_id, const std::map< int, const TableFragments * > &all_tables_fragments, const Data_Namespace::MemoryLevel memory_level, const int device_id) const
bool g_enable_dynamic_watchdog
Definition: Execute.cpp:72
const std::vector< InputDescriptor > input_descs
bool needFetchAllFragments(const InputColDescriptor &col_desc, const RelAlgExecutionUnit &ra_exe_unit, const FragmentsList &selected_fragments) const
Definition: Execute.cpp:2044
const int8_t * getOneTableColumnFragment(const int table_id, const int frag_id, const int col_id, const std::map< int, const TableFragments * > &all_tables_fragments, std::list< std::shared_ptr< Chunk_NS::Chunk >> &chunk_holder, std::list< ChunkIter > &chunk_iter_holder, const Data_Namespace::MemoryLevel memory_level, const int device_id) const
CHECK(cgen_state)
#define INJECT_TIMER(DESC)
Definition: measure.h:91
std::unique_ptr< PlanState > plan_state_
Definition: Execute.h:932
#define CHECK_LT(x, y)
Definition: Logger.h:207
std::pair< std::vector< std::vector< int64_t > >, std::vector< std::vector< uint64_t > > > getRowCountAndOffsetForAllFrags(const RelAlgExecutionUnit &ra_exe_unit, const CartesianProduct< std::vector< std::vector< size_t >>> &frag_ids_crossjoin, const std::vector< InputDescriptor > &input_descs, const std::map< int, const TableFragments * > &all_tables_fragments)
Definition: Execute.cpp:1997
void buildSelectedFragsMapping(std::vector< std::vector< size_t >> &selected_fragments_crossjoin, std::vector< size_t > &local_col_to_frag_pos, const std::list< std::shared_ptr< const InputColDescriptor >> &col_global_ids, const FragmentsList &selected_fragments, const RelAlgExecutionUnit &ra_exe_unit)
Definition: Execute.cpp:2171
void resetInterrupt()
#define DEBUG_TIMER(name)
Definition: Logger.h:313
const int8_t * getResultSetColumn(const InputColDescriptor *col_desc, const Data_Namespace::MemoryLevel memory_level, const int device_id) const
std::list< std::shared_ptr< const InputColDescriptor > > input_col_descs
bool g_enable_runtime_query_interrupt
Definition: Execute.cpp:104
bool interrupted_
Definition: Execute.h:941

+ Here is the call graph for this function:

std::string Executor::generatePTX ( const std::string &  cuda_llir) const
private

Definition at line 888 of file NativeCodegen.cpp.

888  {
890  cuda_llir, nvptx_target_machine_.get(), cgen_state_.get());
891 }
std::unique_ptr< llvm::TargetMachine > nvptx_target_machine_
Definition: Execute.h:946
static std::string generatePTX(const std::string &cuda_llir, llvm::TargetMachine *nvptx_target_machine, CgenState *cgen_state)
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:917
const Catalog_Namespace::Catalog * Executor::getCatalog ( ) const

Definition at line 233 of file Execute.cpp.

References catalog_().

233  {
234  return catalog_;
235 }
const Catalog_Namespace::Catalog * catalog_
Definition: Execute.h:961

+ Here is the call graph for this function:

std::vector< std::pair< void *, void * > > Executor::getCodeFromCache ( const CodeCacheKey key,
const CodeCache cache 
)
private

Definition at line 163 of file NativeCodegen.cpp.

References LruCache< key_t, value_t, hash_t >::cend(), LruCache< key_t, value_t, hash_t >::find(), and GpuCompilationContext::module().

164  {
165  auto it = cache.find(key);
166  if (it != cache.cend()) {
167  delete cgen_state_->module_;
168  cgen_state_->module_ = it->second.second;
169  std::vector<std::pair<void*, void*>> native_functions;
170  for (auto& native_code : it->second.first) {
171  GpuCompilationContext* gpu_context = std::get<2>(native_code).get();
172  native_functions.emplace_back(std::get<0>(native_code),
173  gpu_context ? gpu_context->module() : nullptr);
174  }
175  return native_functions;
176  }
177  return {};
178 }
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:917
const_list_iterator_t cend() const
Definition: LruCache.hpp:55
const_list_iterator_t find(const key_t &key) const
Definition: LruCache.hpp:49

+ Here is the call graph for this function:

std::vector< ColumnLazyFetchInfo > Executor::getColLazyFetchInfo ( const std::vector< Analyzer::Expr * > &  target_exprs) const

Definition at line 287 of file Execute.cpp.

References catalog_(), CHECK(), ColumnDescriptor::columnType, get_column_descriptor(), IS_GEO, and kNULLT.

288  {
290  CHECK(catalog_);
291  std::vector<ColumnLazyFetchInfo> col_lazy_fetch_info;
292  for (const auto target_expr : target_exprs) {
293  if (!plan_state_->isLazyFetchColumn(target_expr)) {
294  col_lazy_fetch_info.emplace_back(
295  ColumnLazyFetchInfo{false, -1, SQLTypeInfo(kNULLT, false)});
296  } else {
297  const auto col_var = dynamic_cast<const Analyzer::ColumnVar*>(target_expr);
298  CHECK(col_var);
299  auto col_id = col_var->get_column_id();
300  auto rte_idx = (col_var->get_rte_idx() == -1) ? 0 : col_var->get_rte_idx();
301  auto cd = (col_var->get_table_id() > 0)
302  ? get_column_descriptor(col_id, col_var->get_table_id(), *catalog_)
303  : nullptr;
304  if (cd && IS_GEO(cd->columnType.get_type())) {
305  // Geo coords cols will be processed in sequence. So we only need to track the
306  // first coords col in lazy fetch info.
307  {
308  auto cd0 =
309  get_column_descriptor(col_id + 1, col_var->get_table_id(), *catalog_);
310  auto col0_ti = cd0->columnType;
311  CHECK(!cd0->isVirtualCol);
312  auto col0_var = makeExpr<Analyzer::ColumnVar>(
313  col0_ti, col_var->get_table_id(), cd0->columnId, rte_idx);
314  auto local_col0_id = plan_state_->getLocalColumnId(col0_var.get(), false);
315  col_lazy_fetch_info.emplace_back(
316  ColumnLazyFetchInfo{true, local_col0_id, col0_ti});
317  }
318  } else {
319  auto local_col_id = plan_state_->getLocalColumnId(col_var, false);
320  const auto& col_ti = col_var->get_type_info();
321  col_lazy_fetch_info.emplace_back(ColumnLazyFetchInfo{true, local_col_id, col_ti});
322  }
323  }
324  }
325  return col_lazy_fetch_info;
326 }
CHECK(cgen_state)
const Catalog_Namespace::Catalog * catalog_
Definition: Execute.h:961
std::unique_ptr< PlanState > plan_state_
Definition: Execute.h:932
SQLTypeInfo columnType
#define IS_GEO(T)
Definition: sqltypes.h:163
const ColumnDescriptor * get_column_descriptor(const int col_id, const int table_id, const Catalog_Namespace::Catalog &cat)
Definition: Execute.h:142

+ Here is the call graph for this function:

ExpressionRange Executor::getColRange ( const PhysicalInput phys_input) const

Definition at line 257 of file Execute.cpp.

257  {
258  return agg_col_range_cache_.getColRange(phys_input);
259 }
ExpressionRange getColRange(const PhysicalInput &) const
AggregatedColRange agg_col_range_cache_
Definition: Execute.h:965
const ColumnDescriptor * Executor::getColumnDescriptor ( const Analyzer::ColumnVar col_var) const

Definition at line 216 of file Execute.cpp.

References catalog_(), get_column_descriptor_maybe(), Analyzer::ColumnVar::get_column_id(), and Analyzer::ColumnVar::get_table_id().

217  {
219  col_var->get_column_id(), col_var->get_table_id(), *catalog_);
220 }
int get_table_id() const
Definition: Analyzer.h:194
const ColumnDescriptor * get_column_descriptor_maybe(const int col_id, const int table_id, const Catalog_Namespace::Catalog &cat)
Definition: Execute.h:171
const Catalog_Namespace::Catalog * catalog_
Definition: Execute.h:961
int get_column_id() const
Definition: Analyzer.h:195

+ Here is the call graph for this function:

std::string & Executor::getCurrentQuerySession ( )

Definition at line 2968 of file Execute.cpp.

2968  {
2969  return current_query_session_;
2970 }
static std::string current_query_session_
Definition: Execute.h:969
ExecutorDeviceType Executor::getDeviceTypeForTargets ( const RelAlgExecutionUnit ra_exe_unit,
const ExecutorDeviceType  requested_device_type 
)
private

Definition at line 1433 of file Execute.cpp.

References CPU, g_bigint_count, get_target_info(), RelAlgExecutionUnit::groupby_exprs, kAVG, kDOUBLE, kSUM, and RelAlgExecutionUnit::target_exprs.

1435  {
1436  for (const auto target_expr : ra_exe_unit.target_exprs) {
1437  const auto agg_info = get_target_info(target_expr, g_bigint_count);
1438  if (!ra_exe_unit.groupby_exprs.empty() &&
1439  !isArchPascalOrLater(requested_device_type)) {
1440  if ((agg_info.agg_kind == kAVG || agg_info.agg_kind == kSUM) &&
1441  agg_info.agg_arg_type.get_type() == kDOUBLE) {
1442  return ExecutorDeviceType::CPU;
1443  }
1444  }
1445  if (dynamic_cast<const Analyzer::RegexpExpr*>(target_expr)) {
1446  return ExecutorDeviceType::CPU;
1447  }
1448  }
1449  return requested_device_type;
1450 }
std::vector< Analyzer::Expr * > target_exprs
TargetInfo get_target_info(const PointerType target_expr, const bool bigint_count)
Definition: TargetInfo.h:66
bool isArchPascalOrLater(const ExecutorDeviceType dt) const
Definition: Execute.h:456
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
bool g_bigint_count
Definition: sqldefs.h:75
Definition: sqldefs.h:72

+ Here is the call graph for this function:

std::shared_ptr< Executor > Executor::getExecutor ( const int  db_id,
const std::string &  debug_dir = "",
const std::string &  debug_file = "",
const MapDParameters  mapd_parameters = MapDParameters() 
)
static

Definition at line 130 of file Execute.cpp.

References CHECK(), MapDParameters::cuda_block_size, MapDParameters::cuda_grid_size, and INJECT_TIMER.

Referenced by Parser::InsertValuesStmt::execute(), MapDHandler::execute_rel_alg(), MapDHandler::execute_rel_alg_df(), Parser::LocalConnector::getOuterFragmentCount(), MapDHandler::interrupt(), migrations::MigrationMgr::migrateDateInDaysMetadata(), Parser::InsertIntoTableAsSelectStmt::populateData(), QueryRunner::anonymous_namespace{QueryRunner.cpp}::run_select_query_with_filter_push_down(), QueryRunner::QueryRunner::runSelectQuery(), MapDHandler::sql_execute_impl(), and Fragmenter_Namespace::InsertOrderFragmenter::updateColumns().

133  {
135  const auto executor_key = db_id;
136  {
137  mapd_shared_lock<mapd_shared_mutex> read_lock(executors_cache_mutex_);
138  auto it = executors_.find(executor_key);
139  if (it != executors_.end()) {
140  return it->second;
141  }
142  }
143  {
144  mapd_unique_lock<mapd_shared_mutex> write_lock(executors_cache_mutex_);
145  auto it = executors_.find(executor_key);
146  if (it != executors_.end()) {
147  return it->second;
148  }
149  auto executor = std::make_shared<Executor>(db_id,
150  mapd_parameters.cuda_block_size,
151  mapd_parameters.cuda_grid_size,
152  debug_dir,
153  debug_file);
154  auto it_ok = executors_.insert(std::make_pair(executor_key, executor));
155  CHECK(it_ok.second);
156  return executor;
157  }
158 }
static std::shared_ptr< Executor > getExecutor(const int db_id, const std::string &debug_dir="", const std::string &debug_file="", const MapDParameters mapd_parameters=MapDParameters())
Definition: Execute.cpp:130
static mapd_shared_mutex executors_cache_mutex_
Definition: Execute.h:976
size_t cuda_block_size
CHECK(cgen_state)
#define INJECT_TIMER(DESC)
Definition: measure.h:91
size_t cuda_grid_size
static std::map< int, std::shared_ptr< Executor > > executors_
Definition: Execute.h:973

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::vector< size_t > Executor::getFragmentCount ( const FragmentsList selected_fragments,
const size_t  scan_idx,
const RelAlgExecutionUnit ra_exe_unit 
)
private

Definition at line 2157 of file Execute.cpp.

References RelAlgExecutionUnit::input_descs, and RelAlgExecutionUnit::join_quals.

2159  {
2160  if ((ra_exe_unit.input_descs.size() > size_t(2) || !ra_exe_unit.join_quals.empty()) &&
2161  scan_idx > 0 &&
2162  !plan_state_->join_info_.sharded_range_table_indices_.count(scan_idx) &&
2163  !selected_fragments[scan_idx].fragment_ids.empty()) {
2164  // Fetch all fragments
2165  return {size_t(0)};
2166  }
2167 
2168  return selected_fragments[scan_idx].fragment_ids;
2169 }
const std::vector< InputDescriptor > input_descs
const JoinQualsPerNestingLevel join_quals
std::unique_ptr< PlanState > plan_state_
Definition: Execute.h:932
std::unordered_map< int, const Analyzer::BinOper * > Executor::getInnerTabIdToJoinCond ( ) const
private

Definition at line 1721 of file Execute.cpp.

References CHECK_EQ.

1722  {
1723  std::unordered_map<int, const Analyzer::BinOper*> id_to_cond;
1724  const auto& join_info = plan_state_->join_info_;
1725  CHECK_EQ(join_info.equi_join_tautologies_.size(), join_info.join_hash_tables_.size());
1726  for (size_t i = 0; i < join_info.join_hash_tables_.size(); ++i) {
1727  int inner_table_id = join_info.join_hash_tables_[i]->getInnerTableId();
1728  id_to_cond.insert(
1729  std::make_pair(inner_table_id, join_info.equi_join_tautologies_[i].get()));
1730  }
1731  return id_to_cond;
1732 }
#define CHECK_EQ(x, y)
Definition: Logger.h:205
std::unique_ptr< PlanState > plan_state_
Definition: Execute.h:932
std::vector< int64_t > Executor::getJoinHashTablePtrs ( const ExecutorDeviceType  device_type,
const int  device_id 
)
private

Definition at line 2518 of file Execute.cpp.

References CHECK(), GPU, and join_hash_tables.

2519  {
2520  std::vector<int64_t> table_ptrs;
2521  const auto& join_hash_tables = plan_state_->join_info_.join_hash_tables_;
2522  for (auto hash_table : join_hash_tables) {
2523  if (!hash_table) {
2524  CHECK(table_ptrs.empty());
2525  return {};
2526  }
2527  table_ptrs.push_back(hash_table->getJoinHashBuffer(
2528  device_type, device_type == ExecutorDeviceType::GPU ? device_id : 0));
2529  }
2530  return table_ptrs;
2531 }
const int8_t const int64_t const uint64_t const int32_t const int64_t int64_t uint32_t const int64_t * join_hash_tables
CHECK(cgen_state)
std::unique_ptr< PlanState > plan_state_
Definition: Execute.h:932

+ Here is the call graph for this function:

size_t Executor::getNumBytesForFetchedRow ( ) const

Definition at line 261 of file Execute.cpp.

References catalog_(), kENCODING_DICT, and kTEXT.

261  {
262  size_t num_bytes = 0;
263  if (!plan_state_) {
264  return 0;
265  }
266  for (const auto& fetched_col_pair : plan_state_->columns_to_fetch_) {
267  if (fetched_col_pair.first < 0) {
268  num_bytes += 8;
269  } else {
270  const auto cd =
271  catalog_->getMetadataForColumn(fetched_col_pair.first, fetched_col_pair.second);
272  const auto& ti = cd->columnType;
273  const auto sz = ti.get_type() == kTEXT && ti.get_compression() == kENCODING_DICT
274  ? 4
275  : ti.get_size();
276  if (sz < 0) {
277  // for varlen types, only account for the pointer/size for each row, for now
278  num_bytes += 16;
279  } else {
280  num_bytes += sz;
281  }
282  }
283  }
284  return num_bytes;
285 }
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:248
const Catalog_Namespace::Catalog * catalog_
Definition: Execute.h:961
std::unique_ptr< PlanState > plan_state_
Definition: Execute.h:932
const ColumnDescriptor * getMetadataForColumn(int tableId, const std::string &colName) const
Definition: sqltypes.h:53
SQLTypeInfo columnType

+ Here is the call graph for this function:

const ColumnDescriptor * Executor::getPhysicalColumnDescriptor ( const Analyzer::ColumnVar col_var,
int  n 
) const

Definition at line 222 of file Execute.cpp.

References catalog_(), get_column_descriptor_maybe(), Analyzer::ColumnVar::get_column_id(), and Analyzer::ColumnVar::get_table_id().

224  {
225  const auto cd = getColumnDescriptor(col_var);
226  if (!cd || n > cd->columnType.get_physical_cols()) {
227  return nullptr;
228  }
230  col_var->get_column_id() + n, col_var->get_table_id(), *catalog_);
231 }
int get_table_id() const
Definition: Analyzer.h:194
const ColumnDescriptor * get_column_descriptor_maybe(const int col_id, const int table_id, const Catalog_Namespace::Catalog &cat)
Definition: Execute.h:171
const Catalog_Namespace::Catalog * catalog_
Definition: Execute.h:961
const ColumnDescriptor * getColumnDescriptor(const Analyzer::ColumnVar *) const
Definition: Execute.cpp:216
int get_column_id() const
Definition: Analyzer.h:195

+ Here is the call graph for this function:

std::pair< std::vector< std::vector< int64_t > >, std::vector< std::vector< uint64_t > > > Executor::getRowCountAndOffsetForAllFrags ( const RelAlgExecutionUnit ra_exe_unit,
const CartesianProduct< std::vector< std::vector< size_t >>> &  frag_ids_crossjoin,
const std::vector< InputDescriptor > &  input_descs,
const std::map< int, const TableFragments * > &  all_tables_fragments 
)
private

Definition at line 1997 of file Execute.cpp.

References CHECK(), CHECK_EQ, CHECK_LT, get_table_id_to_frag_offsets(), RelAlgExecutionUnit::join_quals, and num_rows.

2001  {
2002  std::vector<std::vector<int64_t>> all_num_rows;
2003  std::vector<std::vector<uint64_t>> all_frag_offsets;
2004  const auto tab_id_to_frag_offsets =
2005  get_table_id_to_frag_offsets(input_descs, all_tables_fragments);
2006  std::unordered_map<size_t, size_t> outer_id_to_num_row_idx;
2007  for (const auto& selected_frag_ids : frag_ids_crossjoin) {
2008  std::vector<int64_t> num_rows;
2009  std::vector<uint64_t> frag_offsets;
2010  CHECK_EQ(selected_frag_ids.size(), input_descs.size());
2011  for (size_t tab_idx = 0; tab_idx < input_descs.size(); ++tab_idx) {
2012  const auto frag_id = selected_frag_ids[tab_idx];
2013  const auto fragments_it =
2014  all_tables_fragments.find(input_descs[tab_idx].getTableId());
2015  CHECK(fragments_it != all_tables_fragments.end());
2016  const auto& fragments = *fragments_it->second;
2017  if (ra_exe_unit.join_quals.empty() || tab_idx == 0 ||
2018  plan_state_->join_info_.sharded_range_table_indices_.count(tab_idx)) {
2019  const auto& fragment = fragments[frag_id];
2020  num_rows.push_back(fragment.getNumTuples());
2021  } else {
2022  size_t total_row_count{0};
2023  for (const auto& fragment : fragments) {
2024  total_row_count += fragment.getNumTuples();
2025  }
2026  num_rows.push_back(total_row_count);
2027  }
2028  const auto frag_offsets_it =
2029  tab_id_to_frag_offsets.find(input_descs[tab_idx].getTableId());
2030  CHECK(frag_offsets_it != tab_id_to_frag_offsets.end());
2031  const auto& offsets = frag_offsets_it->second;
2032  CHECK_LT(frag_id, offsets.size());
2033  frag_offsets.push_back(offsets[frag_id]);
2034  }
2035  all_num_rows.push_back(num_rows);
2036  // Fragment offsets of outer table should be ONLY used by rowid for now.
2037  all_frag_offsets.push_back(frag_offsets);
2038  }
2039  return {all_num_rows, all_frag_offsets};
2040 }
#define CHECK_EQ(x, y)
Definition: Logger.h:205
const int8_t const int64_t * num_rows
CHECK(cgen_state)
const JoinQualsPerNestingLevel join_quals
std::map< size_t, std::vector< uint64_t > > get_table_id_to_frag_offsets(const std::vector< InputDescriptor > &input_descs, const std::map< int, const TableFragments * > &all_tables_fragments)
Definition: Execute.cpp:1978
std::unique_ptr< PlanState > plan_state_
Definition: Execute.h:932
#define CHECK_LT(x, y)
Definition: Logger.h:207

+ Here is the call graph for this function:

const std::shared_ptr< RowSetMemoryOwner > Executor::getRowSetMemoryOwner ( ) const

Definition at line 241 of file Execute.cpp.

241  {
242  return row_set_mem_owner_;
243 }
std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner_
Definition: Execute.h:933
StringDictionaryProxy * Executor::getStringDictionaryProxy ( const int  dictId,
const std::shared_ptr< RowSetMemoryOwner row_set_mem_owner,
const bool  with_generation 
) const

Definition at line 185 of file Execute.cpp.

References catalog_(), CHECK(), CHECK_EQ, CHECK_LE, g_cache_string_hash, and REGULAR_DICT.

188  {
189  const int dict_id{dict_id_in < 0 ? REGULAR_DICT(dict_id_in) : dict_id_in};
190  CHECK(catalog_);
191  const auto dd = catalog_->getMetadataForDict(dict_id);
192  std::lock_guard<std::mutex> lock(str_dict_mutex_);
193  if (dd) {
194  CHECK(dd->stringDict);
195  CHECK_LE(dd->dictNBits, 32);
196  CHECK(row_set_mem_owner);
197  const auto generation = with_generation
199  : ssize_t(-1);
200  return row_set_mem_owner->addStringDict(dd->stringDict, dict_id, generation);
201  }
202  CHECK_EQ(0, dict_id);
203  if (!lit_str_dict_proxy_) {
204  std::shared_ptr<StringDictionary> tsd =
205  std::make_shared<StringDictionary>("", false, true, g_cache_string_hash);
206  lit_str_dict_proxy_.reset(new StringDictionaryProxy(tsd, 0));
207  }
208  return lit_str_dict_proxy_.get();
209 }
#define CHECK_EQ(x, y)
Definition: Logger.h:205
std::shared_ptr< StringDictionaryProxy > lit_str_dict_proxy_
Definition: Execute.h:943
CHECK(cgen_state)
std::mutex str_dict_mutex_
Definition: Execute.h:944
const Catalog_Namespace::Catalog * catalog_
Definition: Execute.h:961
ssize_t getGeneration(const uint32_t id) const
const DictDescriptor * getMetadataForDict(int dict_ref, bool loadDict=true) const
Definition: Catalog.cpp:1444
#define REGULAR_DICT(TRANSIENTID)
Definition: sqltypes.h:189
#define CHECK_LE(x, y)
Definition: Logger.h:208
StringDictionaryGenerations string_dictionary_generations_
Definition: Execute.h:966
bool g_cache_string_hash
Definition: Execute.cpp:89

+ Here is the call graph for this function:

std::vector< size_t > Executor::getTableFragmentIndices ( const RelAlgExecutionUnit ra_exe_unit,
const ExecutorDeviceType  device_type,
const size_t  table_idx,
const size_t  outer_frag_idx,
std::map< int, const TableFragments * > &  selected_tables_fragments,
const std::unordered_map< int, const Analyzer::BinOper * > &  inner_table_id_to_join_condition 
)
private

Definition at line 1865 of file Execute.cpp.

References CHECK(), CHECK_LT, and RelAlgExecutionUnit::input_descs.

1872  {
1873  const int table_id = ra_exe_unit.input_descs[table_idx].getTableId();
1874  auto table_frags_it = selected_tables_fragments.find(table_id);
1875  CHECK(table_frags_it != selected_tables_fragments.end());
1876  const auto& outer_input_desc = ra_exe_unit.input_descs[0];
1877  const auto outer_table_fragments_it =
1878  selected_tables_fragments.find(outer_input_desc.getTableId());
1879  const auto outer_table_fragments = outer_table_fragments_it->second;
1880  CHECK(outer_table_fragments_it != selected_tables_fragments.end());
1881  CHECK_LT(outer_frag_idx, outer_table_fragments->size());
1882  if (!table_idx) {
1883  return {outer_frag_idx};
1884  }
1885  const auto& outer_fragment_info = (*outer_table_fragments)[outer_frag_idx];
1886  auto& inner_frags = table_frags_it->second;
1887  CHECK_LT(size_t(1), ra_exe_unit.input_descs.size());
1888  std::vector<size_t> all_frag_ids;
1889  for (size_t inner_frag_idx = 0; inner_frag_idx < inner_frags->size();
1890  ++inner_frag_idx) {
1891  const auto& inner_frag_info = (*inner_frags)[inner_frag_idx];
1892  if (skipFragmentPair(outer_fragment_info,
1893  inner_frag_info,
1894  table_idx,
1895  inner_table_id_to_join_condition,
1896  ra_exe_unit,
1897  device_type)) {
1898  continue;
1899  }
1900  all_frag_ids.push_back(inner_frag_idx);
1901  }
1902  return all_frag_ids;
1903 }
const std::vector< InputDescriptor > input_descs
CHECK(cgen_state)
#define CHECK_LT(x, y)
Definition: Logger.h:207
bool skipFragmentPair(const Fragmenter_Namespace::FragmentInfo &outer_fragment_info, const Fragmenter_Namespace::FragmentInfo &inner_fragment_info, const int inner_table_id, const std::unordered_map< int, const Analyzer::BinOper * > &inner_table_id_to_join_condition, const RelAlgExecutionUnit &ra_exe_unit, const ExecutorDeviceType device_type)
Definition: Execute.cpp:1907

+ Here is the call graph for this function:

const TableGeneration & Executor::getTableGeneration ( const int  table_id) const

Definition at line 253 of file Execute.cpp.

253  {
254  return table_generations_.getGeneration(table_id);
255 }
const TableGeneration & getGeneration(const uint32_t id) const
TableGenerations table_generations_
Definition: Execute.h:967
Fragmenter_Namespace::TableInfo Executor::getTableInfo ( const int  table_id) const

Definition at line 249 of file Execute.cpp.

References input_table_info_cache_().

249  {
250  return input_table_info_cache_.getTableInfo(table_id);
251 }
Fragmenter_Namespace::TableInfo getTableInfo(const int table_id)
InputTableInfoCache input_table_info_cache_
Definition: Execute.h:964

+ Here is the call graph for this function:

const TemporaryTables * Executor::getTemporaryTables ( ) const

Definition at line 245 of file Execute.cpp.

References temporary_tables_().

245  {
246  return temporary_tables_;
247 }
const TemporaryTables * temporary_tables_
Definition: Execute.h:962

+ Here is the call graph for this function:

unsigned Executor::gridSize ( ) const

Definition at line 2605 of file Execute.cpp.

References catalog_(), CHECK(), and grid_size_x_().

2605  {
2606  CHECK(catalog_);
2607  const auto cuda_mgr = catalog_->getDataMgr().getCudaMgr();
2608  CHECK(cuda_mgr);
2609  const auto& dev_props = cuda_mgr->getAllDeviceProperties();
2610  return grid_size_x_ ? grid_size_x_ : 2 * dev_props.front().numMPs;
2611 }
CudaMgr_Namespace::CudaMgr * getCudaMgr() const
Definition: DataMgr.h:149
Data_Namespace::DataMgr & getDataMgr() const
Definition: Catalog.h:183
CHECK(cgen_state)