OmniSciDB  1dac507f6e
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Executor Class Reference

#include <Execute.h>

+ Collaboration diagram for Executor:

Classes

struct  CompilationResult
 
class  ExecutionDispatch
 
class  FetchCacheAnchor
 
struct  FetchResult
 
struct  GroupColLLVMValue
 
struct  JoinHashTableOrError
 

Public Types

typedef std::tuple
< std::string, const
Analyzer::Expr *, int64_t,
const size_t > 
AggInfo
 

Public Member Functions

 Executor (const int db_id, const size_t block_size_x, const size_t grid_size_x, const std::string &debug_dir, const std::string &debug_file,::QueryRenderer::QueryRenderManager *render_manager)
 
std::shared_ptr< ResultSetexecute (const Planner::RootPlan *root_plan, const Catalog_Namespace::SessionInfo &session, const bool hoist_literals, const ExecutorDeviceType device_type, const ExecutorOptLevel, const bool allow_multifrag, const bool allow_loop_joins, RenderInfo *render_query_data=nullptr)
 
std::shared_ptr< ResultSetrenderPointsNonInSitu (const std::string &queryStr, const ExecutionResult &results, const Catalog_Namespace::SessionInfo &session, const int render_widget_id, const ::QueryRenderer::JSONLocation *data_loc, RenderInfo *render_query_data)
 
std::shared_ptr< ResultSetrenderPointsInSitu (RenderInfo *render_query_data)
 
std::shared_ptr< ResultSetrenderPolygonsNonInSitu (const std::string &queryStr, const ExecutionResult &results, const Catalog_Namespace::SessionInfo &session, const int render_widget_id, const ::QueryRenderer::JSONLocation *data_loc, RenderInfo *render_query_data, const std::string &poly_table_name)
 
std::shared_ptr< ResultSetrenderLinesNonInSitu (const std::string &queryStr, const ExecutionResult &results, const Catalog_Namespace::SessionInfo &session, const int render_widget_id, const ::QueryRenderer::JSONLocation *data_loc, RenderInfo *render_query_data)
 
std::vector< int32_t > getStringIds (const std::string &col_name, const std::vector< std::string > &col_vals, const ::QueryRenderer::QueryDataLayout *query_data_layout, const ResultSet *results, const std::shared_ptr< RowSetMemoryOwner > &row_set_mem_owner, const bool warn=false) const
 
std::vector< std::string > getStringsFromIds (const std::string &col_name, const std::vector< int32_t > &ids, const ::QueryRenderer::QueryDataLayout *query_data_layout, const ResultSet *results, const std::shared_ptr< RowSetMemoryOwner > &row_set_mem_owner) const
 
StringDictionaryProxygetStringDictionaryProxy (const int dictId, const std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner, const bool with_generation) const
 
bool isCPUOnly () const
 
bool isArchMaxwell (const ExecutorDeviceType dt) const
 
bool containsLeftDeepOuterJoin () const
 
const ColumnDescriptorgetColumnDescriptor (const Analyzer::ColumnVar *) const
 
const ColumnDescriptorgetPhysicalColumnDescriptor (const Analyzer::ColumnVar *, int) const
 
const Catalog_Namespace::CataloggetCatalog () const
 
void setCatalog (const Catalog_Namespace::Catalog *catalog)
 
const std::shared_ptr
< RowSetMemoryOwner
getRowSetMemoryOwner () const
 
const TemporaryTablesgetTemporaryTables () const
 
Fragmenter_Namespace::TableInfo getTableInfo (const int table_id) const
 
const TableGenerationgetTableGeneration (const int table_id) const
 
ExpressionRange getColRange (const PhysicalInput &) const
 
size_t getNumBytesForFetchedRow () const
 
std::vector< ColumnLazyFetchInfogetColLazyFetchInfo (const std::vector< Analyzer::Expr * > &target_exprs) const
 
void registerActiveModule (void *module, const int device_id) const
 
void unregisterActiveModule (void *module, const int device_id) const
 
void interrupt ()
 
void resetInterrupt ()
 
int8_t warpSize () const
 
unsigned gridSize () const
 
unsigned blockSize () const
 
void setupCaching (const std::unordered_set< PhysicalInput > &phys_inputs, const std::unordered_set< int > &phys_table_ids)
 

Static Public Member Functions

static std::shared_ptr< ExecutorgetExecutor (const int db_id, const std::string &debug_dir="", const std::string &debug_file="", const MapDParameters mapd_parameters=MapDParameters(),::QueryRenderer::QueryRenderManager *render_manager=nullptr)
 
static void nukeCacheOfExecutors ()
 
static void clearMemory (const Data_Namespace::MemoryLevel memory_level)
 
static std::pair< int64_t,
int32_t > 
reduceResults (const SQLAgg agg, const SQLTypeInfo &ti, const int64_t agg_init_val, const int8_t out_byte_width, const int64_t *out_vec, const size_t out_vec_sz, const bool is_group_by, const bool float_argument_input)
 
static void addCodeToCache (const CodeCacheKey &, std::vector< std::tuple< void *, ExecutionEngineWrapper >>, llvm::Module *, CodeCache &)
 

Static Public Attributes

static const size_t high_scan_limit
 

Private Types

using PerFragmentCallBack = std::function< void(ResultSetPtr, const Fragmenter_Namespace::FragmentInfo &)>
 

Private Member Functions

void clearMetaInfoCache ()
 
int deviceCount (const ExecutorDeviceType) const
 
int deviceCountForMemoryLevel (const Data_Namespace::MemoryLevel memory_level) const
 
llvm::Value * codegenWindowFunction (const size_t target_index, const CompilationOptions &co)
 
llvm::Value * codegenWindowFunctionAggregate (const CompilationOptions &co)
 
llvm::BasicBlock * codegenWindowResetStateControlFlow ()
 
void codegenWindowFunctionStateInit (llvm::Value *aggregate_state)
 
llvm::Value * codegenWindowFunctionAggregateCalls (llvm::Value *aggregate_state, const CompilationOptions &co)
 
void codegenWindowAvgEpilogue (llvm::Value *crt_val, llvm::Value *window_func_null_val, llvm::Value *multiplicity_lv)
 
llvm::Value * codegenAggregateWindowState ()
 
llvm::Value * aggregateWindowStatePtr ()
 
bool isArchPascalOrLater (const ExecutorDeviceType dt) const
 
bool needFetchAllFragments (const InputColDescriptor &col_desc, const RelAlgExecutionUnit &ra_exe_unit, const FragmentsList &selected_fragments) const
 
ResultSetPtr executeWorkUnit (size_t &max_groups_buffer_entry_guess, const bool is_agg, const std::vector< InputTableInfo > &, const RelAlgExecutionUnit &, const CompilationOptions &, const ExecutionOptions &options, const Catalog_Namespace::Catalog &, std::shared_ptr< RowSetMemoryOwner >, RenderInfo *render_info, const bool has_cardinality_estimation, ColumnCacheMap &column_cache)
 
void executeUpdate (const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &table_infos, const CompilationOptions &co, const ExecutionOptions &eo, const Catalog_Namespace::Catalog &cat, std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner, const UpdateLogForFragment::Callback &cb, const bool is_agg=false)
 
void executeWorkUnitPerFragment (const RelAlgExecutionUnit &ra_exe_unit, const InputTableInfo &table_info, const CompilationOptions &co, const ExecutionOptions &eo, const Catalog_Namespace::Catalog &cat, PerFragmentCallBack &cb)
 Compiles and dispatches a work unit per fragment processing results with the per fragment callback. Currently used for computing metrics over fragments (metadata). More...
 
ResultSetPtr executeExplain (const QueryCompilationDescriptor &)
 
ResultSetPtr executeTableFunction (const TableFunctionExecutionUnit exe_unit, const std::vector< InputTableInfo > &table_infos, const CompilationOptions &co, const ExecutionOptions &eo, const Catalog_Namespace::Catalog &cat)
 Compiles and dispatches a table function; that is, a function that takes as input one or more columns and returns a ResultSet, which can be parsed by subsequent execution steps. More...
 
ExecutorDeviceType getDeviceTypeForTargets (const RelAlgExecutionUnit &ra_exe_unit, const ExecutorDeviceType requested_device_type)
 
ResultSetPtr collectAllDeviceResults (ExecutionDispatch &execution_dispatch, const std::vector< Analyzer::Expr * > &target_exprs, const QueryMemoryDescriptor &query_mem_desc, const ExecutorDeviceType device_type, std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner)
 
ResultSetPtr collectAllDeviceShardedTopResults (ExecutionDispatch &execution_dispatch) const
 
std::unordered_map< int, const
Analyzer::BinOper * > 
getInnerTabIdToJoinCond () const
 
void dispatchFragments (const std::function< void(const ExecutorDeviceType chosen_device_type, int chosen_device_id, const QueryCompilationDescriptor &query_comp_desc, const QueryMemoryDescriptor &query_mem_desc, const FragmentsList &frag_list, const ExecutorDispatchMode kernel_dispatch_mode, const int64_t rowid_lookup_key)> dispatch, const ExecutionDispatch &execution_dispatch, const std::vector< InputTableInfo > &table_infos, const ExecutionOptions &eo, const bool is_agg, const bool allow_single_frag_table_opt, const size_t context_count, const QueryCompilationDescriptor &query_comp_desc, const QueryMemoryDescriptor &query_mem_desc, QueryFragmentDescriptor &fragment_descriptor, std::unordered_set< int > &available_gpus, int &available_cpus)
 
std::vector< size_t > getTableFragmentIndices (const RelAlgExecutionUnit &ra_exe_unit, const ExecutorDeviceType device_type, const size_t table_idx, const size_t outer_frag_idx, std::map< int, const TableFragments * > &selected_tables_fragments, const std::unordered_map< int, const Analyzer::BinOper * > &inner_table_id_to_join_condition)
 
bool skipFragmentPair (const Fragmenter_Namespace::FragmentInfo &outer_fragment_info, const Fragmenter_Namespace::FragmentInfo &inner_fragment_info, const int inner_table_id, const std::unordered_map< int, const Analyzer::BinOper * > &inner_table_id_to_join_condition, const RelAlgExecutionUnit &ra_exe_unit, const ExecutorDeviceType device_type)
 
FetchResult fetchChunks (const ColumnFetcher &, const RelAlgExecutionUnit &ra_exe_unit, const int device_id, const Data_Namespace::MemoryLevel, const std::map< int, const TableFragments * > &, const FragmentsList &selected_fragments, const Catalog_Namespace::Catalog &, std::list< ChunkIter > &, std::list< std::shared_ptr< Chunk_NS::Chunk >> &)
 
std::pair< std::vector
< std::vector< int64_t >
>, std::vector< std::vector
< uint64_t > > > 
getRowCountAndOffsetForAllFrags (const RelAlgExecutionUnit &ra_exe_unit, const CartesianProduct< std::vector< std::vector< size_t >>> &frag_ids_crossjoin, const std::vector< InputDescriptor > &input_descs, const std::map< int, const TableFragments * > &all_tables_fragments)
 
void buildSelectedFragsMapping (std::vector< std::vector< size_t >> &selected_fragments_crossjoin, std::vector< size_t > &local_col_to_frag_pos, const std::list< std::shared_ptr< const InputColDescriptor >> &col_global_ids, const FragmentsList &selected_fragments, const RelAlgExecutionUnit &ra_exe_unit)
 
std::vector< size_t > getFragmentCount (const FragmentsList &selected_fragments, const size_t scan_idx, const RelAlgExecutionUnit &ra_exe_unit)
 
int32_t executePlanWithGroupBy (const RelAlgExecutionUnit &ra_exe_unit, const CompilationResult &, const bool hoist_literals, ResultSetPtr &results, const ExecutorDeviceType device_type, std::vector< std::vector< const int8_t * >> &col_buffers, const std::vector< size_t > outer_tab_frag_ids, QueryExecutionContext *, const std::vector< std::vector< int64_t >> &num_rows, const std::vector< std::vector< uint64_t >> &frag_offsets, Data_Namespace::DataMgr *, const int device_id, const int64_t limit, const uint32_t start_rowid, const uint32_t num_tables, RenderInfo *render_info)
 
int32_t executePlanWithoutGroupBy (const RelAlgExecutionUnit &ra_exe_unit, const CompilationResult &, const bool hoist_literals, ResultSetPtr &results, const std::vector< Analyzer::Expr * > &target_exprs, const ExecutorDeviceType device_type, std::vector< std::vector< const int8_t * >> &col_buffers, QueryExecutionContext *query_exe_context, const std::vector< std::vector< int64_t >> &num_rows, const std::vector< std::vector< uint64_t >> &frag_offsets, Data_Namespace::DataMgr *data_mgr, const int device_id, const uint32_t start_rowid, const uint32_t num_tables, RenderInfo *render_info)
 
ResultSetPtr resultsUnion (ExecutionDispatch &execution_dispatch)
 
std::vector< int64_t > getJoinHashTablePtrs (const ExecutorDeviceType device_type, const int device_id)
 
ResultSetPtr reduceMultiDeviceResults (const RelAlgExecutionUnit &, std::vector< std::pair< ResultSetPtr, std::vector< size_t >>> &all_fragment_results, std::shared_ptr< RowSetMemoryOwner >, const QueryMemoryDescriptor &) const
 
ResultSetPtr reduceMultiDeviceResultSets (std::vector< std::pair< ResultSetPtr, std::vector< size_t >>> &all_fragment_results, std::shared_ptr< RowSetMemoryOwner >, const QueryMemoryDescriptor &) const
 
ResultSetPtr reduceSpeculativeTopN (const RelAlgExecutionUnit &, std::vector< std::pair< ResultSetPtr, std::vector< size_t >>> &all_fragment_results, std::shared_ptr< RowSetMemoryOwner >, const QueryMemoryDescriptor &) const
 
void executeSimpleInsert (const Planner::RootPlan *root_plan)
 
ResultSetPtr executeWorkUnitImpl (size_t &max_groups_buffer_entry_guess, const bool is_agg, const bool allow_single_frag_table_opt, const std::vector< InputTableInfo > &, const RelAlgExecutionUnit &, const CompilationOptions &, const ExecutionOptions &options, const Catalog_Namespace::Catalog &, std::shared_ptr< RowSetMemoryOwner >, RenderInfo *render_info, const bool has_cardinality_estimation, ColumnCacheMap &column_cache)
 
std::vector< llvm::Value * > inlineHoistedLiterals ()
 
std::tuple
< Executor::CompilationResult,
std::unique_ptr
< QueryMemoryDescriptor > > 
compileWorkUnit (const std::vector< InputTableInfo > &query_infos, const RelAlgExecutionUnit &ra_exe_unit, const CompilationOptions &co, const ExecutionOptions &eo, const CudaMgr_Namespace::CudaMgr *cuda_mgr, const bool allow_lazy_fetch, std::shared_ptr< RowSetMemoryOwner >, const size_t max_groups_buffer_entry_count, const int8_t crt_min_byte_width, const bool has_cardinality_estimation, ColumnCacheMap &column_cache, RenderInfo *render_info=nullptr)
 
llvm::BasicBlock * codegenSkipDeletedOuterTableRow (const RelAlgExecutionUnit &ra_exe_unit, const CompilationOptions &co)
 
std::vector< JoinLoopbuildJoinLoops (RelAlgExecutionUnit &ra_exe_unit, const CompilationOptions &co, const ExecutionOptions &eo, const std::vector< InputTableInfo > &query_infos, ColumnCacheMap &column_cache)
 
std::function< llvm::Value
*(const std::vector
< llvm::Value * >
&, llvm::Value *)> 
buildIsDeletedCb (const RelAlgExecutionUnit &ra_exe_unit, const size_t level_idx, const CompilationOptions &co)
 
std::shared_ptr
< JoinHashTableInterface
buildCurrentLevelHashTable (const JoinCondition &current_level_join_conditions, RelAlgExecutionUnit &ra_exe_unit, const CompilationOptions &co, const std::vector< InputTableInfo > &query_infos, ColumnCacheMap &column_cache, std::vector< std::string > &fail_reasons)
 
llvm::Value * addJoinLoopIterator (const std::vector< llvm::Value * > &prev_iters, const size_t level_idx)
 
void codegenJoinLoops (const std::vector< JoinLoop > &join_loops, const RelAlgExecutionUnit &ra_exe_unit, GroupByAndAggregate &group_by_and_aggregate, llvm::Function *query_func, llvm::BasicBlock *entry_bb, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const ExecutionOptions &eo)
 
bool compileBody (const RelAlgExecutionUnit &ra_exe_unit, GroupByAndAggregate &group_by_and_aggregate, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co)
 
void createErrorCheckControlFlow (llvm::Function *query_func, bool run_with_dynamic_watchdog, ExecutorDeviceType device_type)
 
void preloadFragOffsets (const std::vector< InputDescriptor > &input_descs, const std::vector< InputTableInfo > &query_infos)
 
JoinHashTableOrError buildHashTableForQualifier (const std::shared_ptr< Analyzer::BinOper > &qual_bin_oper, const std::vector< InputTableInfo > &query_infos, const MemoryLevel memory_level, const JoinHashTableInterface::HashType preferred_hash_type, ColumnCacheMap &column_cache)
 
void nukeOldState (const bool allow_lazy_fetch, const std::vector< InputTableInfo > &query_infos, const RelAlgExecutionUnit *ra_exe_unit)
 
std::vector< std::pair< void
*, void * > > 
optimizeAndCodegenCPU (llvm::Function *, llvm::Function *, const std::unordered_set< llvm::Function * > &, const CompilationOptions &)
 
std::vector< std::pair< void
*, void * > > 
optimizeAndCodegenGPU (llvm::Function *, llvm::Function *, std::unordered_set< llvm::Function * > &, const bool no_inline, const CudaMgr_Namespace::CudaMgr *cuda_mgr, const CompilationOptions &)
 
std::string generatePTX (const std::string &) const
 
void initializeNVPTXBackend () const
 
int64_t deviceCycles (int milliseconds) const
 
GroupColLLVMValue groupByColumnCodegen (Analyzer::Expr *group_by_col, const size_t col_width, const CompilationOptions &, const bool translate_null_val, const int64_t translated_null_val, GroupByAndAggregate::DiamondCodegen &, std::stack< llvm::BasicBlock * > &, const bool thread_mem_shared)
 
llvm::Value * castToFP (llvm::Value *val)
 
llvm::Value * castToIntPtrTyIn (llvm::Value *val, const size_t bit_width)
 
RelAlgExecutionUnit addDeletedColumn (const RelAlgExecutionUnit &ra_exe_unit)
 
std::pair< bool, int64_t > skipFragment (const InputDescriptor &table_desc, const Fragmenter_Namespace::FragmentInfo &frag_info, const std::list< std::shared_ptr< Analyzer::Expr >> &simple_quals, const std::vector< uint64_t > &frag_offsets, const size_t frag_idx)
 
std::pair< bool, int64_t > skipFragmentInnerJoins (const InputDescriptor &table_desc, const RelAlgExecutionUnit &ra_exe_unit, const Fragmenter_Namespace::FragmentInfo &fragment, const std::vector< uint64_t > &frag_offsets, const size_t frag_idx)
 
AggregatedColRange computeColRangesCache (const std::unordered_set< PhysicalInput > &phys_inputs)
 
StringDictionaryGenerations computeStringDictionaryGenerations (const std::unordered_set< PhysicalInput > &phys_inputs)
 
TableGenerations computeTableGenerations (std::unordered_set< int > phys_table_ids)
 
std::vector< std::pair< void
*, void * > > 
getCodeFromCache (const CodeCacheKey &, const CodeCache &)
 
void addCodeToCache (const CodeCacheKey &, const std::vector< std::tuple< void *, GpuCompilationContext * >> &, llvm::Module *, CodeCache &)
 
std::vector< int8_t > serializeLiterals (const std::unordered_map< int, CgenState::LiteralValues > &literals, const int device_id)
 
llvm::Value * spillDoubleElement (llvm::Value *elem_val, llvm::Type *elem_ty)
 

Static Private Member Functions

static size_t align (const size_t off_in, const size_t alignment)
 

Private Attributes

std::unique_ptr< CgenStatecgen_state_
 
std::unique_ptr< PlanStateplan_state_
 
std::shared_ptr
< RowSetMemoryOwner
row_set_mem_owner_
 
std::mutex gpu_exec_mutex_ [max_gpu_count]
 
std::mutex gpu_active_modules_mutex_
 
uint32_t gpu_active_modules_device_mask_
 
void * gpu_active_modules_ [max_gpu_count]
 
bool interrupted_
 
std::shared_ptr
< StringDictionaryProxy
lit_str_dict_proxy_
 
std::mutex str_dict_mutex_
 
std::unique_ptr
< llvm::TargetMachine > 
nvptx_target_machine_
 
CodeCache cpu_code_cache_
 
CodeCache gpu_code_cache_
 
::QueryRenderer::QueryRenderManager * render_manager_
 
const unsigned block_size_x_
 
const unsigned grid_size_x_
 
const std::string debug_dir_
 
const std::string debug_file_
 
const int db_id_
 
const Catalog_Namespace::Catalogcatalog_
 
const TemporaryTablestemporary_tables_
 
InputTableInfoCache input_table_info_cache_
 
AggregatedColRange agg_col_range_cache_
 
StringDictionaryGenerations string_dictionary_generations_
 
TableGenerations table_generations_
 

Static Private Attributes

static const int max_gpu_count {16}
 
static const size_t baseline_threshold
 
static const size_t code_cache_size {10000}
 
static std::map< std::pair
< int,::QueryRenderer::QueryRenderManager * >
, std::shared_ptr< Executor > > 
executors_
 
static std::mutex execute_mutex_
 
static mapd_shared_mutex executors_cache_mutex_
 
static const int32_t ERR_DIV_BY_ZERO {1}
 
static const int32_t ERR_OUT_OF_GPU_MEM {2}
 
static const int32_t ERR_OUT_OF_SLOTS {3}
 
static const int32_t ERR_UNSUPPORTED_SELF_JOIN {4}
 
static const int32_t ERR_OUT_OF_RENDER_MEM {5}
 
static const int32_t ERR_OUT_OF_CPU_MEM {6}
 
static const int32_t ERR_OVERFLOW_OR_UNDERFLOW {7}
 
static const int32_t ERR_SPECULATIVE_TOP_OOM {8}
 
static const int32_t ERR_OUT_OF_TIME {9}
 
static const int32_t ERR_INTERRUPTED {10}
 
static const int32_t ERR_COLUMNAR_CONVERSION_NOT_SUPPORTED {11}
 
static const int32_t ERR_TOO_MANY_LITERALS {12}
 
static const int32_t ERR_STRING_CONST_IN_RESULTSET {13}
 
static const int32_t ERR_STREAMING_TOP_N_NOT_SUPPORTED_IN_RENDER_QUERY {14}
 

Friends

class BaselineJoinHashTable
 
class CodeGenerator
 
class ColumnFetcher
 
class OverlapsJoinHashTable
 
class GroupByAndAggregate
 
class QueryCompilationDescriptor
 
class QueryMemoryDescriptor
 
class QueryMemoryInitializer
 
class QueryFragmentDescriptor
 
class QueryExecutionContext
 
class ResultSet
 
class InValuesBitmap
 
class JoinHashTable
 
class LeafAggregator
 
class QueryRewriter
 
class PendingExecutionClosure
 
class RelAlgExecutor
 
class TableOptimizer
 
class TableFunctionCompilationContext
 
class TableFunctionExecutionContext
 
struct TargetExprCodegenBuilder
 
struct TargetExprCodegen
 
template<typename META_TYPE_CLASS >
class AggregateReductionEgress
 

Detailed Description

Definition at line 330 of file Execute.h.

Member Typedef Documentation

typedef std::tuple<std::string, const Analyzer::Expr*, int64_t, const size_t> Executor::AggInfo

Definition at line 360 of file Execute.h.

Definition at line 626 of file Execute.h.

Constructor & Destructor Documentation

Executor::Executor ( const int  db_id,
const size_t  block_size_x,
const size_t  grid_size_x,
const std::string &  debug_dir,
const std::string &  debug_file,
::QueryRenderer::QueryRenderManager *  render_manager 
)

Definition at line 106 of file Execute.cpp.

112  : cgen_state_(new CgenState({}, false))
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:973

Member Function Documentation

static void Executor::addCodeToCache ( const CodeCacheKey ,
std::vector< std::tuple< void *, ExecutionEngineWrapper >>  ,
llvm::Module *  ,
CodeCache  
)
static

Referenced by StubGenerator::generateStub().

+ Here is the caller graph for this function:

void Executor::addCodeToCache ( const CodeCacheKey ,
const std::vector< std::tuple< void *, GpuCompilationContext * >> &  ,
llvm::Module *  ,
CodeCache  
)
private
RelAlgExecutionUnit Executor::addDeletedColumn ( const RelAlgExecutionUnit ra_exe_unit)
private

Definition at line 3034 of file Execute.cpp.

References catalog_(), CHECK(), and TABLE.

3034  {
3035  auto ra_exe_unit_with_deleted = ra_exe_unit;
3036  for (const auto& input_table : ra_exe_unit_with_deleted.input_descs) {
3037  if (input_table.getSourceType() != InputSourceType::TABLE) {
3038  continue;
3039  }
3040  const auto td = catalog_->getMetadataForTable(input_table.getTableId());
3041  CHECK(td);
3042  const auto deleted_cd = catalog_->getDeletedColumnIfRowsDeleted(td);
3043  if (!deleted_cd) {
3044  continue;
3045  }
3046  CHECK(deleted_cd->columnType.is_boolean());
3047  // check deleted column is not already present
3048  bool found = false;
3049  for (const auto& input_col : ra_exe_unit_with_deleted.input_col_descs) {
3050  if (input_col.get()->getColId() == deleted_cd->columnId &&
3051  input_col.get()->getScanDesc().getTableId() == deleted_cd->tableId &&
3052  input_col.get()->getScanDesc().getNestLevel() == input_table.getNestLevel()) {
3053  found = true;
3054  }
3055  }
3056  if (!found) {
3057  // add deleted column
3058  ra_exe_unit_with_deleted.input_col_descs.emplace_back(new InputColDescriptor(
3059  deleted_cd->columnId, deleted_cd->tableId, input_table.getNestLevel()));
3060  }
3061  }
3062  return ra_exe_unit_with_deleted;
3063 }
CHECK(cgen_state)
const Catalog_Namespace::Catalog * catalog_
Definition: Execute.h:1019
const ColumnDescriptor * getDeletedColumnIfRowsDeleted(const TableDescriptor *td) const
Definition: Catalog.cpp:2204
const TableDescriptor * getMetadataForTable(const std::string &tableName, const bool populateFragmenter=true) const
Returns a pointer to a const TableDescriptor struct matching the provided tableName.

+ Here is the call graph for this function:

llvm::Value * Executor::addJoinLoopIterator ( const std::vector< llvm::Value * > &  prev_iters,
const size_t  level_idx 
)
private

Definition at line 442 of file IRCodegen.cpp.

References CHECK().

443  {
444  // Iterators are added for loop-outer joins when the head of the loop is generated,
445  // then once again when the body if generated. Allow this instead of special handling
446  // of call sites.
447  const auto it = cgen_state_->scan_idx_to_hash_pos_.find(level_idx);
448  if (it != cgen_state_->scan_idx_to_hash_pos_.end()) {
449  return it->second;
450  }
451  CHECK(!prev_iters.empty());
452  llvm::Value* matching_row_index = prev_iters.back();
453  const auto it_ok =
454  cgen_state_->scan_idx_to_hash_pos_.emplace(level_idx, matching_row_index);
455  CHECK(it_ok.second);
456  return matching_row_index;
457 }
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:973
CHECK(cgen_state)

+ Here is the call graph for this function:

llvm::Value * Executor::aggregateWindowStatePtr ( )
private

Definition at line 122 of file WindowFunctionIR.cpp.

References anonymous_namespace{WindowFunctionIR.cpp}::get_adjusted_window_type_info(), get_int_type(), WindowProjectNodeContext::getActiveWindowFunctionContext(), and kFLOAT.

122  {
123  const auto window_func_context =
125  const auto window_func = window_func_context->getWindowFunction();
126  const auto arg_ti = get_adjusted_window_type_info(window_func);
127  llvm::Type* aggregate_state_type =
128  arg_ti.get_type() == kFLOAT
129  ? llvm::PointerType::get(get_int_type(32, cgen_state_->context_), 0)
130  : llvm::PointerType::get(get_int_type(64, cgen_state_->context_), 0);
131  const auto aggregate_state_i64 = cgen_state_->llInt(
132  reinterpret_cast<const int64_t>(window_func_context->aggregateState()));
133  return cgen_state_->ir_builder_.CreateIntToPtr(aggregate_state_i64,
134  aggregate_state_type);
135 }
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:973
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
static WindowFunctionContext * getActiveWindowFunctionContext()
SQLTypeInfo get_adjusted_window_type_info(const Analyzer::WindowFunction *window_func)

+ Here is the call graph for this function:

static size_t Executor::align ( const size_t  off_in,
const size_t  alignment 
)
inlinestaticprivate

Definition at line 965 of file Execute.h.

965  {
966  size_t off = off_in;
967  if (off % alignment != 0) {
968  off += (alignment - off % alignment);
969  }
970  return off;
971  }
unsigned Executor::blockSize ( ) const

Definition at line 2968 of file Execute.cpp.

References block_size_x_(), catalog_(), and CHECK().

2968  {
2969  CHECK(catalog_);
2970  const auto cuda_mgr = catalog_->getDataMgr().getCudaMgr();
2971  CHECK(cuda_mgr);
2972  const auto& dev_props = cuda_mgr->getAllDeviceProperties();
2973  return block_size_x_ ? block_size_x_ : dev_props.front().maxThreadsPerBlock;
2974 }
CudaMgr_Namespace::CudaMgr * getCudaMgr() const
Definition: DataMgr.h:117
Data_Namespace::DataMgr & getDataMgr() const
Definition: Catalog.h:177
CHECK(cgen_state)
const Catalog_Namespace::Catalog * catalog_
Definition: Execute.h:1019
const unsigned block_size_x_
Definition: Execute.h:1013

+ Here is the call graph for this function:

std::shared_ptr< JoinHashTableInterface > Executor::buildCurrentLevelHashTable ( const JoinCondition current_level_join_conditions,
RelAlgExecutionUnit ra_exe_unit,
const CompilationOptions co,
const std::vector< InputTableInfo > &  query_infos,
ColumnCacheMap column_cache,
std::vector< std::string > &  fail_reasons 
)
private

Definition at line 396 of file IRCodegen.cpp.

References anonymous_namespace{IRCodegen.cpp}::add_qualifier_to_execution_unit(), Data_Namespace::CPU_LEVEL, CompilationOptions::device_type_, Executor::JoinHashTableOrError::fail_reason, GPU, Data_Namespace::GPU_LEVEL, Executor::JoinHashTableOrError::hash_table, INNER, IS_EQUIVALENCE, JoinHashTableInterface::OneToOne, JoinCondition::quals, and JoinCondition::type.

402  {
403  if (current_level_join_conditions.type != JoinType::INNER &&
404  current_level_join_conditions.quals.size() > 1) {
405  fail_reasons.emplace_back("No equijoin expression found for outer join");
406  return nullptr;
407  }
408  std::shared_ptr<JoinHashTableInterface> current_level_hash_table;
409  for (const auto& join_qual : current_level_join_conditions.quals) {
410  auto qual_bin_oper = std::dynamic_pointer_cast<Analyzer::BinOper>(join_qual);
411  if (!qual_bin_oper || !IS_EQUIVALENCE(qual_bin_oper->get_optype())) {
412  fail_reasons.emplace_back("No equijoin expression found");
413  if (current_level_join_conditions.type == JoinType::INNER) {
414  add_qualifier_to_execution_unit(ra_exe_unit, join_qual);
415  }
416  continue;
417  }
418  JoinHashTableOrError hash_table_or_error;
419  if (!current_level_hash_table) {
420  hash_table_or_error = buildHashTableForQualifier(
421  qual_bin_oper,
422  query_infos,
426  column_cache);
427  current_level_hash_table = hash_table_or_error.hash_table;
428  }
429  if (hash_table_or_error.hash_table) {
430  plan_state_->join_info_.join_hash_tables_.push_back(hash_table_or_error.hash_table);
431  plan_state_->join_info_.equi_join_tautologies_.push_back(qual_bin_oper);
432  } else {
433  fail_reasons.push_back(hash_table_or_error.fail_reason);
434  if (current_level_join_conditions.type == JoinType::INNER) {
435  add_qualifier_to_execution_unit(ra_exe_unit, qual_bin_oper);
436  }
437  }
438  }
439  return current_level_hash_table;
440 }
#define IS_EQUIVALENCE(X)
Definition: sqldefs.h:67
std::unique_ptr< PlanState > plan_state_
Definition: Execute.h:988
void add_qualifier_to_execution_unit(RelAlgExecutionUnit &ra_exe_unit, const std::shared_ptr< Analyzer::Expr > &qual)
Definition: IRCodegen.cpp:186
ExecutorDeviceType device_type_
std::list< std::shared_ptr< Analyzer::Expr > > quals
JoinHashTableOrError buildHashTableForQualifier(const std::shared_ptr< Analyzer::BinOper > &qual_bin_oper, const std::vector< InputTableInfo > &query_infos, const MemoryLevel memory_level, const JoinHashTableInterface::HashType preferred_hash_type, ColumnCacheMap &column_cache)
Definition: Execute.cpp:2894

+ Here is the call graph for this function:

Executor::JoinHashTableOrError Executor::buildHashTableForQualifier ( const std::shared_ptr< Analyzer::BinOper > &  qual_bin_oper,
const std::vector< InputTableInfo > &  query_infos,
const MemoryLevel  memory_level,
const JoinHashTableInterface::HashType  preferred_hash_type,
ColumnCacheMap column_cache 
)
private

Definition at line 2894 of file Execute.cpp.

References CHECK(), CHECK_EQ, CHECK_GT, coalesce_singleton_equi_join(), g_enable_overlaps_hashjoin, OverlapsJoinHashTable::getInstance(), BaselineJoinHashTable::getInstance(), and JoinHashTable::getInstance().

2899  {
2900  std::shared_ptr<JoinHashTableInterface> join_hash_table;
2901  const int device_count = deviceCountForMemoryLevel(memory_level);
2902  CHECK_GT(device_count, 0);
2903  if (!g_enable_overlaps_hashjoin && qual_bin_oper->is_overlaps_oper()) {
2904  return {nullptr, "Overlaps hash join disabled, attempting to fall back to loop join"};
2905  }
2906  try {
2907  if (qual_bin_oper->is_overlaps_oper()) {
2908  join_hash_table = OverlapsJoinHashTable::getInstance(
2909  qual_bin_oper, query_infos, memory_level, device_count, column_cache, this);
2910  } else if (dynamic_cast<const Analyzer::ExpressionTuple*>(
2911  qual_bin_oper->get_left_operand())) {
2912  join_hash_table = BaselineJoinHashTable::getInstance(qual_bin_oper,
2913  query_infos,
2914  memory_level,
2915  preferred_hash_type,
2916  device_count,
2917  column_cache,
2918  this);
2919  } else {
2920  try {
2921  join_hash_table = JoinHashTable::getInstance(qual_bin_oper,
2922  query_infos,
2923  memory_level,
2924  preferred_hash_type,
2925  device_count,
2926  column_cache,
2927  this);
2928  } catch (TooManyHashEntries&) {
2929  const auto join_quals = coalesce_singleton_equi_join(qual_bin_oper);
2930  CHECK_EQ(join_quals.size(), size_t(1));
2931  const auto join_qual =
2932  std::dynamic_pointer_cast<Analyzer::BinOper>(join_quals.front());
2933  join_hash_table = BaselineJoinHashTable::getInstance(join_qual,
2934  query_infos,
2935  memory_level,
2936  preferred_hash_type,
2937  device_count,
2938  column_cache,
2939  this);
2940  }
2941  }
2942  CHECK(join_hash_table);
2943  return {join_hash_table, ""};
2944  } catch (const HashJoinFail& e) {
2945  return {nullptr, e.what()};
2946  }
2947  CHECK(false);
2948  return {nullptr, ""};
2949 }
#define CHECK_EQ(x, y)
Definition: Logger.h:198
std::list< std::shared_ptr< Analyzer::Expr > > coalesce_singleton_equi_join(const std::shared_ptr< Analyzer::BinOper > &join_qual)
static std::shared_ptr< OverlapsJoinHashTable > getInstance(const std::shared_ptr< Analyzer::BinOper > condition, const std::vector< InputTableInfo > &query_infos, const Data_Namespace::MemoryLevel memory_level, const int device_count, ColumnCacheMap &column_cache, Executor *executor)
Make hash table from an in-flight SQL query&#39;s parse tree etc.
#define CHECK_GT(x, y)
Definition: Logger.h:202
static std::shared_ptr< BaselineJoinHashTable > getInstance(const std::shared_ptr< Analyzer::BinOper > condition, const std::vector< InputTableInfo > &query_infos, const Data_Namespace::MemoryLevel memory_level, const HashType preferred_hash_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor)
Make hash table from an in-flight SQL query&#39;s parse tree etc.
bool g_enable_overlaps_hashjoin
Definition: Execute.cpp:87
CHECK(cgen_state)
static std::shared_ptr< JoinHashTable > getInstance(const std::shared_ptr< Analyzer::BinOper > qual_bin_oper, const std::vector< InputTableInfo > &query_infos, const Data_Namespace::MemoryLevel memory_level, const HashType preferred_hash_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor)
Make hash table from an in-flight SQL query&#39;s parse tree etc.
int deviceCountForMemoryLevel(const Data_Namespace::MemoryLevel memory_level) const
Definition: Execute.cpp:607

+ Here is the call graph for this function:

std::function< llvm::Value *(const std::vector< llvm::Value * > &, llvm::Value *)> Executor::buildIsDeletedCb ( const RelAlgExecutionUnit ra_exe_unit,
const size_t  level_idx,
const CompilationOptions co 
)
private

Definition at line 340 of file IRCodegen.cpp.

References catalog_(), CHECK(), CHECK_LT, CodeGenerator::codegen(), RelAlgExecutionUnit::input_descs, TABLE, and CodeGenerator::toBool().

342  {
343  CHECK_LT(level_idx + 1, ra_exe_unit.input_descs.size());
344  const auto input_desc = ra_exe_unit.input_descs[level_idx + 1];
345  if (input_desc.getSourceType() != InputSourceType::TABLE) {
346  return nullptr;
347  }
348  const auto td = catalog_->getMetadataForTable(input_desc.getTableId());
349  CHECK(td);
350  const auto deleted_cd = catalog_->getDeletedColumnIfRowsDeleted(td);
351  if (!deleted_cd) {
352  return nullptr;
353  }
354  CHECK(deleted_cd->columnType.is_boolean());
355  const auto deleted_expr = makeExpr<Analyzer::ColumnVar>(deleted_cd->columnType,
356  input_desc.getTableId(),
357  deleted_cd->columnId,
358  input_desc.getNestLevel());
359  return [this, deleted_expr, level_idx, &co](const std::vector<llvm::Value*>& prev_iters,
360  llvm::Value* have_more_inner_rows) {
361  const auto matching_row_index = addJoinLoopIterator(prev_iters, level_idx + 1);
362  // Avoid fetching the deleted column from a position which is not valid.
363  // An invalid position can be returned by a one to one hash lookup (negative)
364  // or at the end of iteration over a set of matching values.
365  llvm::Value* is_valid_it{nullptr};
366  if (have_more_inner_rows) {
367  is_valid_it = have_more_inner_rows;
368  } else {
369  is_valid_it = cgen_state_->ir_builder_.CreateICmp(
370  llvm::ICmpInst::ICMP_SGE, matching_row_index, cgen_state_->llInt<int64_t>(0));
371  }
372  const auto it_valid_bb = llvm::BasicBlock::Create(
373  cgen_state_->context_, "it_valid", cgen_state_->row_func_);
374  const auto it_not_valid_bb = llvm::BasicBlock::Create(
375  cgen_state_->context_, "it_not_valid", cgen_state_->row_func_);
376  cgen_state_->ir_builder_.CreateCondBr(is_valid_it, it_valid_bb, it_not_valid_bb);
377  const auto row_is_deleted_bb = llvm::BasicBlock::Create(
378  cgen_state_->context_, "row_is_deleted", cgen_state_->row_func_);
379  cgen_state_->ir_builder_.SetInsertPoint(it_valid_bb);
380  CodeGenerator code_generator(this);
381  const auto row_is_deleted = code_generator.toBool(
382  code_generator.codegen(deleted_expr.get(), true, co).front());
383  cgen_state_->ir_builder_.CreateBr(row_is_deleted_bb);
384  cgen_state_->ir_builder_.SetInsertPoint(it_not_valid_bb);
385  const auto row_is_deleted_default = cgen_state_->llBool(false);
386  cgen_state_->ir_builder_.CreateBr(row_is_deleted_bb);
387  cgen_state_->ir_builder_.SetInsertPoint(row_is_deleted_bb);
388  auto row_is_deleted_or_default =
389  cgen_state_->ir_builder_.CreatePHI(row_is_deleted->getType(), 2);
390  row_is_deleted_or_default->addIncoming(row_is_deleted, it_valid_bb);
391  row_is_deleted_or_default->addIncoming(row_is_deleted_default, it_not_valid_bb);
392  return row_is_deleted_or_default;
393  };
394 }
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:973
const std::vector< InputDescriptor > input_descs
CHECK(cgen_state)
const Catalog_Namespace::Catalog * catalog_
Definition: Execute.h:1019
const ColumnDescriptor * getDeletedColumnIfRowsDeleted(const TableDescriptor *td) const
Definition: Catalog.cpp:2204
#define CHECK_LT(x, y)
Definition: Logger.h:200
llvm::Value * addJoinLoopIterator(const std::vector< llvm::Value * > &prev_iters, const size_t level_idx)
Definition: IRCodegen.cpp:442
const TableDescriptor * getMetadataForTable(const std::string &tableName, const bool populateFragmenter=true) const
Returns a pointer to a const TableDescriptor struct matching the provided tableName.

+ Here is the call graph for this function:

std::vector< JoinLoop > Executor::buildJoinLoops ( RelAlgExecutionUnit ra_exe_unit,
const CompilationOptions co,
const ExecutionOptions eo,
const std::vector< InputTableInfo > &  query_infos,
ColumnCacheMap column_cache 
)
private

Definition at line 218 of file IRCodegen.cpp.

References CodeGenerator::cgen_state_, CHECK(), CHECK_LT, INJECT_TIMER, RelAlgExecutionUnit::join_quals, LEFT, JoinHashTableInterface::OneToOne, CgenState::outer_join_match_found_per_level_, Set, Singleton, JoinLoopDomain::slot_lookup_result, and JoinLoopDomain::values_buffer.

223  {
225  std::vector<JoinLoop> join_loops;
226  for (size_t level_idx = 0, current_hash_table_idx = 0;
227  level_idx < ra_exe_unit.join_quals.size();
228  ++level_idx) {
229  const auto& current_level_join_conditions = ra_exe_unit.join_quals[level_idx];
230  std::vector<std::string> fail_reasons;
231  const auto current_level_hash_table =
232  buildCurrentLevelHashTable(current_level_join_conditions,
233  ra_exe_unit,
234  co,
235  query_infos,
236  column_cache,
237  fail_reasons);
238  const auto found_outer_join_matches_cb =
239  [this, level_idx](llvm::Value* found_outer_join_matches) {
240  CHECK_LT(level_idx, cgen_state_->outer_join_match_found_per_level_.size());
241  CHECK(!cgen_state_->outer_join_match_found_per_level_[level_idx]);
242  cgen_state_->outer_join_match_found_per_level_[level_idx] =
243  found_outer_join_matches;
244  };
245  const auto is_deleted_cb = buildIsDeletedCb(ra_exe_unit, level_idx, co);
246  if (current_level_hash_table) {
247  if (current_level_hash_table->getHashType() == JoinHashTable::HashType::OneToOne) {
248  join_loops.emplace_back(
250  current_level_join_conditions.type,
251  [this, current_hash_table_idx, level_idx, current_level_hash_table, &co](
252  const std::vector<llvm::Value*>& prev_iters) {
253  addJoinLoopIterator(prev_iters, level_idx);
254  JoinLoopDomain domain{{0}};
255  domain.slot_lookup_result =
256  current_level_hash_table->codegenSlot(co, current_hash_table_idx);
257  return domain;
258  },
259  nullptr,
260  current_level_join_conditions.type == JoinType::LEFT
261  ? std::function<void(llvm::Value*)>(found_outer_join_matches_cb)
262  : nullptr,
263  is_deleted_cb);
264  } else {
265  join_loops.emplace_back(
267  current_level_join_conditions.type,
268  [this, current_hash_table_idx, level_idx, current_level_hash_table, &co](
269  const std::vector<llvm::Value*>& prev_iters) {
270  addJoinLoopIterator(prev_iters, level_idx);
271  JoinLoopDomain domain{{0}};
272  const auto matching_set = current_level_hash_table->codegenMatchingSet(
273  co, current_hash_table_idx);
274  domain.values_buffer = matching_set.elements;
275  domain.element_count = matching_set.count;
276  return domain;
277  },
278  nullptr,
279  current_level_join_conditions.type == JoinType::LEFT
280  ? std::function<void(llvm::Value*)>(found_outer_join_matches_cb)
281  : nullptr,
282  is_deleted_cb);
283  }
284  ++current_hash_table_idx;
285  } else {
286  const auto fail_reasons_str = current_level_join_conditions.quals.empty()
287  ? "No equijoin expression found"
288  : boost::algorithm::join(fail_reasons, " | ");
290  ra_exe_unit, eo, query_infos, level_idx, fail_reasons_str);
291  // Callback provided to the `JoinLoop` framework to evaluate the (outer) join
292  // condition.
293  VLOG(1) << "Unable to build hash table, falling back to loop join: "
294  << fail_reasons_str;
295  const auto outer_join_condition_cb =
296  [this, level_idx, &co, &current_level_join_conditions](
297  const std::vector<llvm::Value*>& prev_iters) {
298  // The values generated for the match path don't dominate all uses
299  // since on the non-match path nulls are generated. Reset the cache
300  // once the condition is generated to avoid incorrect reuse.
301  FetchCacheAnchor anchor(cgen_state_.get());
302  addJoinLoopIterator(prev_iters, level_idx + 1);
303  llvm::Value* left_join_cond = cgen_state_->llBool(true);
304  CodeGenerator code_generator(this);
305  for (auto expr : current_level_join_conditions.quals) {
306  left_join_cond = cgen_state_->ir_builder_.CreateAnd(
307  left_join_cond,
308  code_generator.toBool(
309  code_generator.codegen(expr.get(), true, co).front()));
310  }
311  return left_join_cond;
312  };
313  join_loops.emplace_back(
315  current_level_join_conditions.type,
316  [this, level_idx](const std::vector<llvm::Value*>& prev_iters) {
317  addJoinLoopIterator(prev_iters, level_idx);
318  JoinLoopDomain domain{{0}};
319  const auto rows_per_scan_ptr = cgen_state_->ir_builder_.CreateGEP(
320  get_arg_by_name(cgen_state_->row_func_, "num_rows_per_scan"),
321  cgen_state_->llInt(int32_t(level_idx + 1)));
322  domain.upper_bound = cgen_state_->ir_builder_.CreateLoad(rows_per_scan_ptr,
323  "num_rows_per_scan");
324  return domain;
325  },
326  current_level_join_conditions.type == JoinType::LEFT
327  ? std::function<llvm::Value*(const std::vector<llvm::Value*>&)>(
328  outer_join_condition_cb)
329  : nullptr,
330  current_level_join_conditions.type == JoinType::LEFT
331  ? std::function<void(llvm::Value*)>(found_outer_join_matches_cb)
332  : nullptr,
333  is_deleted_cb);
334  }
335  }
336  return join_loops;
337 }
llvm::Value * values_buffer
Definition: JoinLoop.h:47
std::string join(T const &container, std::string const &delim)
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:973
CHECK(cgen_state)
llvm::Value * get_arg_by_name(llvm::Function *func, const std::string &name)
Definition: Execute.h:117
#define INJECT_TIMER(DESC)
Definition: measure.h:91
const JoinQualsPerNestingLevel join_quals
llvm::Value * slot_lookup_result
Definition: JoinLoop.h:45
#define CHECK_LT(x, y)
Definition: Logger.h:200
llvm::Value * addJoinLoopIterator(const std::vector< llvm::Value * > &prev_iters, const size_t level_idx)
Definition: IRCodegen.cpp:442
void check_if_loop_join_is_allowed(RelAlgExecutionUnit &ra_exe_unit, const ExecutionOptions &eo, const std::vector< InputTableInfo > &query_infos, const size_t level_idx, const std::string &fail_reason)
Definition: IRCodegen.cpp:196
std::vector< JoinLoop > buildJoinLoops(RelAlgExecutionUnit &ra_exe_unit, const CompilationOptions &co, const ExecutionOptions &eo, const std::vector< InputTableInfo > &query_infos, ColumnCacheMap &column_cache)
Definition: IRCodegen.cpp:218
std::function< llvm::Value *(const std::vector< llvm::Value * > &, llvm::Value *)> buildIsDeletedCb(const RelAlgExecutionUnit &ra_exe_unit, const size_t level_idx, const CompilationOptions &co)
Definition: IRCodegen.cpp:340
#define VLOG(n)
Definition: Logger.h:280
std::shared_ptr< JoinHashTableInterface > buildCurrentLevelHashTable(const JoinCondition &current_level_join_conditions, RelAlgExecutionUnit &ra_exe_unit, const CompilationOptions &co, const std::vector< InputTableInfo > &query_infos, ColumnCacheMap &column_cache, std::vector< std::string > &fail_reasons)
Definition: IRCodegen.cpp:396

+ Here is the call graph for this function:

void Executor::buildSelectedFragsMapping ( std::vector< std::vector< size_t >> &  selected_fragments_crossjoin,
std::vector< size_t > &  local_col_to_frag_pos,
const std::list< std::shared_ptr< const InputColDescriptor >> &  col_global_ids,
const FragmentsList selected_fragments,
const RelAlgExecutionUnit ra_exe_unit 
)
private

Definition at line 2116 of file Execute.cpp.

References CHECK(), CHECK_EQ, CHECK_LT, and RelAlgExecutionUnit::input_descs.

2121  {
2122  local_col_to_frag_pos.resize(plan_state_->global_to_local_col_ids_.size());
2123  size_t frag_pos{0};
2124  const auto& input_descs = ra_exe_unit.input_descs;
2125  for (size_t scan_idx = 0; scan_idx < input_descs.size(); ++scan_idx) {
2126  const int table_id = input_descs[scan_idx].getTableId();
2127  CHECK_EQ(selected_fragments[scan_idx].table_id, table_id);
2128  selected_fragments_crossjoin.push_back(
2129  getFragmentCount(selected_fragments, scan_idx, ra_exe_unit));
2130  for (const auto& col_id : col_global_ids) {
2131  CHECK(col_id);
2132  const auto& input_desc = col_id->getScanDesc();
2133  if (input_desc.getTableId() != table_id ||
2134  input_desc.getNestLevel() != static_cast<int>(scan_idx)) {
2135  continue;
2136  }
2137  auto it = plan_state_->global_to_local_col_ids_.find(*col_id);
2138  CHECK(it != plan_state_->global_to_local_col_ids_.end());
2139  CHECK_LT(static_cast<size_t>(it->second),
2140  plan_state_->global_to_local_col_ids_.size());
2141  local_col_to_frag_pos[it->second] = frag_pos;
2142  }
2143  ++frag_pos;
2144  }
2145 }
#define CHECK_EQ(x, y)
Definition: Logger.h:198
const std::vector< InputDescriptor > input_descs
CHECK(cgen_state)
std::unique_ptr< PlanState > plan_state_
Definition: Execute.h:988
#define CHECK_LT(x, y)
Definition: Logger.h:200
std::vector< size_t > getFragmentCount(const FragmentsList &selected_fragments, const size_t scan_idx, const RelAlgExecutionUnit &ra_exe_unit)
Definition: Execute.cpp:2102

+ Here is the call graph for this function:

llvm::Value * Executor::castToFP ( llvm::Value *  val)
private

Definition at line 2984 of file Execute.cpp.

References CHECK().

2984  {
2985  if (!val->getType()->isIntegerTy()) {
2986  return val;
2987  }
2988 
2989  auto val_width = static_cast<llvm::IntegerType*>(val->getType())->getBitWidth();
2990  llvm::Type* dest_ty{nullptr};
2991  switch (val_width) {
2992  case 32:
2993  dest_ty = llvm::Type::getFloatTy(cgen_state_->context_);
2994  break;
2995  case 64:
2996  dest_ty = llvm::Type::getDoubleTy(cgen_state_->context_);
2997  break;
2998  default:
2999  CHECK(false);
3000  }
3001  return cgen_state_->ir_builder_.CreateSIToFP(val, dest_ty);
3002 }
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:973
CHECK(cgen_state)

+ Here is the call graph for this function:

llvm::Value * Executor::castToIntPtrTyIn ( llvm::Value *  val,
const size_t  bit_width 
)
private

Definition at line 3004 of file Execute.cpp.

References CHECK(), CHECK_LT, and get_int_type().

3004  {
3005  CHECK(val->getType()->isPointerTy());
3006 
3007  const auto val_ptr_type = static_cast<llvm::PointerType*>(val->getType());
3008  const auto val_type = val_ptr_type->getElementType();
3009  size_t val_width = 0;
3010  if (val_type->isIntegerTy()) {
3011  val_width = val_type->getIntegerBitWidth();
3012  } else {
3013  if (val_type->isFloatTy()) {
3014  val_width = 32;
3015  } else {
3016  CHECK(val_type->isDoubleTy());
3017  val_width = 64;
3018  }
3019  }
3020  CHECK_LT(size_t(0), val_width);
3021  if (bitWidth == val_width) {
3022  return val;
3023  }
3024  return cgen_state_->ir_builder_.CreateBitCast(
3025  val, llvm::PointerType::get(get_int_type(bitWidth, cgen_state_->context_), 0));
3026 }
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:973
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
CHECK(cgen_state)
#define CHECK_LT(x, y)
Definition: Logger.h:200

+ Here is the call graph for this function:

void Executor::clearMemory ( const Data_Namespace::MemoryLevel  memory_level)
static

Definition at line 160 of file Execute.cpp.

References Data_Namespace::DataMgr::clearMemory(), Data_Namespace::CPU_LEVEL, Catalog_Namespace::SysCatalog::getDataMgr(), Data_Namespace::GPU_LEVEL, Catalog_Namespace::SysCatalog::instance(), and CacheInvalidator< CACHE_HOLDING_TYPES >::invalidateCaches().

Referenced by MapDHandler::clear_cpu_memory(), MapDHandler::clear_gpu_memory(), QueryRunner::QueryRunner::clearCpuMemory(), and QueryRunner::QueryRunner::clearGpuMemory().

160  {
161  switch (memory_level) {
164  std::lock_guard<std::mutex> flush_lock(
165  execute_mutex_); // Don't flush memory while queries are running
166 
168  if (memory_level == Data_Namespace::MemoryLevel::CPU_LEVEL) {
169  // The hash table cache uses CPU memory not managed by the buffer manager. In the
170  // future, we should manage these allocations with the buffer manager directly.
171  // For now, assume the user wants to purge the hash table cache when they clear
172  // CPU memory (currently used in ExecuteTest to lower memory pressure)
174  }
175  break;
176  }
177  default: {
178  throw std::runtime_error(
179  "Clearing memory levels other than the CPU level or GPU level is not "
180  "supported.");
181  }
182  }
183 }
void clearMemory(const MemoryLevel memLevel)
Definition: DataMgr.cpp:300
static void invalidateCaches()
Data_Namespace::DataMgr & getDataMgr() const
Definition: SysCatalog.h:175
static SysCatalog & instance()
Definition: SysCatalog.h:242
static std::mutex execute_mutex_
Definition: Execute.h:1030

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void Executor::clearMetaInfoCache ( )
private

Definition at line 328 of file Execute.cpp.

References input_table_info_cache_().

328  {
333 }
AggregatedColRange agg_col_range_cache_
Definition: Execute.h:1023
StringDictionaryGenerations string_dictionary_generations_
Definition: Execute.h:1024
InputTableInfoCache input_table_info_cache_
Definition: Execute.h:1022
TableGenerations table_generations_
Definition: Execute.h:1025

+ Here is the call graph for this function:

llvm::Value * Executor::codegenAggregateWindowState ( )
private

Definition at line 318 of file WindowFunctionIR.cpp.

References AVG, COUNT, anonymous_namespace{WindowFunctionIR.cpp}::get_adjusted_window_type_info(), get_int_type(), WindowProjectNodeContext::getActiveWindowFunctionContext(), Analyzer::WindowFunction::getKind(), kDECIMAL, kDOUBLE, and kFLOAT.

318  {
319  const auto pi32_type =
320  llvm::PointerType::get(get_int_type(32, cgen_state_->context_), 0);
321  const auto pi64_type =
322  llvm::PointerType::get(get_int_type(64, cgen_state_->context_), 0);
323  const auto window_func_context =
325  const Analyzer::WindowFunction* window_func = window_func_context->getWindowFunction();
326  const auto window_func_ti = get_adjusted_window_type_info(window_func);
327  const auto aggregate_state_type =
328  window_func_ti.get_type() == kFLOAT ? pi32_type : pi64_type;
329  auto aggregate_state = aggregateWindowStatePtr();
330  if (window_func->getKind() == SqlWindowFunctionKind::AVG) {
331  const auto aggregate_state_count_i64 = cgen_state_->llInt(
332  reinterpret_cast<const int64_t>(window_func_context->aggregateStateCount()));
333  auto aggregate_state_count = cgen_state_->ir_builder_.CreateIntToPtr(
334  aggregate_state_count_i64, aggregate_state_type);
335  const auto double_null_lv = cgen_state_->inlineFpNull(SQLTypeInfo(kDOUBLE));
336  switch (window_func_ti.get_type()) {
337  case kFLOAT: {
338  return cgen_state_->emitCall(
339  "load_avg_float", {aggregate_state, aggregate_state_count, double_null_lv});
340  }
341  case kDOUBLE: {
342  return cgen_state_->emitCall(
343  "load_avg_double", {aggregate_state, aggregate_state_count, double_null_lv});
344  }
345  case kDECIMAL: {
346  return cgen_state_->emitCall(
347  "load_avg_decimal",
348  {aggregate_state,
349  aggregate_state_count,
350  double_null_lv,
351  cgen_state_->llInt<int32_t>(window_func_ti.get_scale())});
352  }
353  default: {
354  return cgen_state_->emitCall(
355  "load_avg_int", {aggregate_state, aggregate_state_count, double_null_lv});
356  }
357  }
358  }
359  if (window_func->getKind() == SqlWindowFunctionKind::COUNT) {
360  return cgen_state_->ir_builder_.CreateLoad(aggregate_state);
361  }
362  switch (window_func_ti.get_type()) {
363  case kFLOAT: {
364  return cgen_state_->emitCall("load_float", {aggregate_state});
365  }
366  case kDOUBLE: {
367  return cgen_state_->emitCall("load_double", {aggregate_state});
368  }
369  default: {
370  return cgen_state_->ir_builder_.CreateLoad(aggregate_state);
371  }
372  }
373 }
SqlWindowFunctionKind getKind() const
Definition: Analyzer.h:1396
llvm::Value * aggregateWindowStatePtr()
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:973
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
SQLTypeInfoCore< ArrayContextTypeSizer, ExecutorTypePackaging, DateTimeFacilities > SQLTypeInfo
Definition: sqltypes.h:852
static WindowFunctionContext * getActiveWindowFunctionContext()
SQLTypeInfo get_adjusted_window_type_info(const Analyzer::WindowFunction *window_func)

+ Here is the call graph for this function:

void Executor::codegenJoinLoops ( const std::vector< JoinLoop > &  join_loops,
const RelAlgExecutionUnit ra_exe_unit,
GroupByAndAggregate group_by_and_aggregate,
llvm::Function *  query_func,
llvm::BasicBlock *  entry_bb,
const QueryMemoryDescriptor query_mem_desc,
const CompilationOptions co,
const ExecutionOptions eo 
)
private

Definition at line 459 of file IRCodegen.cpp.

References JoinLoop::codegen(), CompilationOptions::device_type_, CodeGenerator::posArg(), and ExecutionOptions::with_dynamic_watchdog.

466  {
467  const auto exit_bb =
468  llvm::BasicBlock::Create(cgen_state_->context_, "exit", cgen_state_->row_func_);
469  cgen_state_->ir_builder_.SetInsertPoint(exit_bb);
470  cgen_state_->ir_builder_.CreateRet(cgen_state_->llInt<int32_t>(0));
471  cgen_state_->ir_builder_.SetInsertPoint(entry_bb);
472  CodeGenerator code_generator(this);
473  const auto loops_entry_bb = JoinLoop::codegen(
474  join_loops,
475  [this,
476  query_func,
477  &query_mem_desc,
478  &co,
479  &eo,
480  &group_by_and_aggregate,
481  &join_loops,
482  &ra_exe_unit](const std::vector<llvm::Value*>& prev_iters) {
483  addJoinLoopIterator(prev_iters, join_loops.size());
484  auto& builder = cgen_state_->ir_builder_;
485  const auto loop_body_bb = llvm::BasicBlock::Create(
486  builder.getContext(), "loop_body", builder.GetInsertBlock()->getParent());
487  builder.SetInsertPoint(loop_body_bb);
488  const bool can_return_error =
489  compileBody(ra_exe_unit, group_by_and_aggregate, query_mem_desc, co);
490  if (can_return_error || cgen_state_->needs_error_check_ ||
493  query_func, eo.with_dynamic_watchdog, co.device_type_);
494  }
495  return loop_body_bb;
496  },
497  code_generator.posArg(nullptr),
498  exit_bb,
499  cgen_state_->ir_builder_);
500  cgen_state_->ir_builder_.SetInsertPoint(entry_bb);
501  cgen_state_->ir_builder_.CreateBr(loops_entry_bb);
502 }
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:973
bool compileBody(const RelAlgExecutionUnit &ra_exe_unit, GroupByAndAggregate &group_by_and_aggregate, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co)
const bool with_dynamic_watchdog
ExecutorDeviceType device_type_
void createErrorCheckControlFlow(llvm::Function *query_func, bool run_with_dynamic_watchdog, ExecutorDeviceType device_type)
static llvm::BasicBlock * codegen(const std::vector< JoinLoop > &join_loops, const std::function< llvm::BasicBlock *(const std::vector< llvm::Value * > &)> &body_codegen, llvm::Value *outer_iter, llvm::BasicBlock *exit_bb, llvm::IRBuilder<> &builder)
Definition: JoinLoop.cpp:45
llvm::Value * addJoinLoopIterator(const std::vector< llvm::Value * > &prev_iters, const size_t level_idx)
Definition: IRCodegen.cpp:442

+ Here is the call graph for this function:

llvm::BasicBlock * Executor::codegenSkipDeletedOuterTableRow ( const RelAlgExecutionUnit ra_exe_unit,
const CompilationOptions co 
)
private

Definition at line 1814 of file NativeCodegen.cpp.

1816  {
1817  CHECK(!ra_exe_unit.input_descs.empty());
1818  const auto& outer_input_desc = ra_exe_unit.input_descs[0];
1819  if (outer_input_desc.getSourceType() != InputSourceType::TABLE) {
1820  return nullptr;
1821  }
1822  const auto td = catalog_->getMetadataForTable(outer_input_desc.getTableId());
1823  CHECK(td);
1824  const auto deleted_cd = catalog_->getDeletedColumnIfRowsDeleted(td);
1825  if (!deleted_cd) {
1826  return nullptr;
1827  }
1828  CHECK(deleted_cd->columnType.is_boolean());
1829  const auto deleted_expr =
1830  makeExpr<Analyzer::ColumnVar>(deleted_cd->columnType,
1831  outer_input_desc.getTableId(),
1832  deleted_cd->columnId,
1833  outer_input_desc.getNestLevel());
1834  CodeGenerator code_generator(this);
1835  const auto is_deleted =
1836  code_generator.toBool(code_generator.codegen(deleted_expr.get(), true, co).front());
1837  const auto is_deleted_bb = llvm::BasicBlock::Create(
1838  cgen_state_->context_, "is_deleted", cgen_state_->row_func_);
1839  llvm::BasicBlock* bb = llvm::BasicBlock::Create(
1840  cgen_state_->context_, "is_not_deleted", cgen_state_->row_func_);
1841  cgen_state_->ir_builder_.CreateCondBr(is_deleted, is_deleted_bb, bb);
1842  cgen_state_->ir_builder_.SetInsertPoint(is_deleted_bb);
1843  cgen_state_->ir_builder_.CreateRet(cgen_state_->llInt<int32_t>(0));
1844  cgen_state_->ir_builder_.SetInsertPoint(bb);
1845  return bb;
1846 }
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:973
const std::vector< InputDescriptor > input_descs
CHECK(cgen_state)
const Catalog_Namespace::Catalog * catalog_
Definition: Execute.h:1019
const ColumnDescriptor * getDeletedColumnIfRowsDeleted(const TableDescriptor *td) const
Definition: Catalog.cpp:2204
const TableDescriptor * getMetadataForTable(const std::string &tableName, const bool populateFragmenter=true) const
Returns a pointer to a const TableDescriptor struct matching the provided tableName.
void Executor::codegenWindowAvgEpilogue ( llvm::Value *  crt_val,
llvm::Value *  window_func_null_val,
llvm::Value *  multiplicity_lv 
)
private

Definition at line 282 of file WindowFunctionIR.cpp.

References anonymous_namespace{WindowFunctionIR.cpp}::get_adjusted_window_type_info(), get_int_type(), WindowProjectNodeContext::getActiveWindowFunctionContext(), kDOUBLE, and kFLOAT.

284  {
285  const auto window_func_context =
287  const auto window_func = window_func_context->getWindowFunction();
288  const auto window_func_ti = get_adjusted_window_type_info(window_func);
289  const auto pi32_type =
290  llvm::PointerType::get(get_int_type(32, cgen_state_->context_), 0);
291  const auto pi64_type =
292  llvm::PointerType::get(get_int_type(64, cgen_state_->context_), 0);
293  const auto aggregate_state_type =
294  window_func_ti.get_type() == kFLOAT ? pi32_type : pi64_type;
295  const auto aggregate_state_count_i64 = cgen_state_->llInt(
296  reinterpret_cast<const int64_t>(window_func_context->aggregateStateCount()));
297  auto aggregate_state_count = cgen_state_->ir_builder_.CreateIntToPtr(
298  aggregate_state_count_i64, aggregate_state_type);
299  std::string agg_count_func_name = "agg_count";
300  switch (window_func_ti.get_type()) {
301  case kFLOAT: {
302  agg_count_func_name += "_float";
303  break;
304  }
305  case kDOUBLE: {
306  agg_count_func_name += "_double";
307  break;
308  }
309  default: {
310  break;
311  }
312  }
313  agg_count_func_name += "_skip_val";
314  cgen_state_->emitCall(agg_count_func_name,
315  {aggregate_state_count, crt_val, window_func_null_val});
316 }
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:973
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
static WindowFunctionContext * getActiveWindowFunctionContext()
SQLTypeInfo get_adjusted_window_type_info(const Analyzer::WindowFunction *window_func)

+ Here is the call graph for this function:

llvm::Value * Executor::codegenWindowFunction ( const size_t  target_index,
const CompilationOptions co 
)
private

Definition at line 21 of file WindowFunctionIR.cpp.

References WindowProjectNodeContext::activateWindowFunctionContext(), run_benchmark_import::args, AVG, CHECK(), CHECK_EQ, CodeGenerator::codegen(), COUNT, CUME_DIST, DENSE_RANK, logger::FATAL, FIRST_VALUE, WindowProjectNodeContext::get(), WindowFunctionContext::getWindowFunction(), LAG, LAST_VALUE, LEAD, LOG, MAX, MIN, NTILE, PERCENT_RANK, CodeGenerator::posArg(), RANK, ROW_NUMBER, and SUM.

22  {
23  CodeGenerator code_generator(this);
24  const auto window_func_context =
26  const auto window_func = window_func_context->getWindowFunction();
27  switch (window_func->getKind()) {
32  return cgen_state_->emitCall("row_number_window_func",
33  {cgen_state_->llInt(reinterpret_cast<const int64_t>(
34  window_func_context->output())),
35  code_generator.posArg(nullptr)});
36  }
39  return cgen_state_->emitCall("percent_window_func",
40  {cgen_state_->llInt(reinterpret_cast<const int64_t>(
41  window_func_context->output())),
42  code_generator.posArg(nullptr)});
43  }
49  const auto& args = window_func->getArgs();
50  CHECK(!args.empty());
51  const auto arg_lvs = code_generator.codegen(args.front().get(), true, co);
52  CHECK_EQ(arg_lvs.size(), size_t(1));
53  return arg_lvs.front();
54  }
61  }
62  default: {
63  LOG(FATAL) << "Invalid window function kind";
64  }
65  }
66  return nullptr;
67 }
#define CHECK_EQ(x, y)
Definition: Logger.h:198
#define LOG(tag)
Definition: Logger.h:185
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:973
const WindowFunctionContext * activateWindowFunctionContext(const size_t target_index) const
CHECK(cgen_state)
static const WindowProjectNodeContext * get()
llvm::Value * codegenWindowFunctionAggregate(const CompilationOptions &co)
const Analyzer::WindowFunction * getWindowFunction() const

+ Here is the call graph for this function:

llvm::Value * Executor::codegenWindowFunctionAggregate ( const CompilationOptions co)
private

Definition at line 137 of file WindowFunctionIR.cpp.

References AVG, CHECK(), WindowProjectNodeContext::get(), get_int_type(), and WindowProjectNodeContext::getActiveWindowFunctionContext().

137  {
138  const auto reset_state_false_bb = codegenWindowResetStateControlFlow();
139  auto aggregate_state = aggregateWindowStatePtr();
140  llvm::Value* aggregate_state_count = nullptr;
141  const auto window_func_context =
143  const auto window_func = window_func_context->getWindowFunction();
144  if (window_func->getKind() == SqlWindowFunctionKind::AVG) {
145  const auto aggregate_state_count_i64 = cgen_state_->llInt(
146  reinterpret_cast<const int64_t>(window_func_context->aggregateStateCount()));
147  const auto pi64_type =
148  llvm::PointerType::get(get_int_type(64, cgen_state_->context_), 0);
149  aggregate_state_count =
150  cgen_state_->ir_builder_.CreateIntToPtr(aggregate_state_count_i64, pi64_type);
151  }
152  codegenWindowFunctionStateInit(aggregate_state);
153  if (window_func->getKind() == SqlWindowFunctionKind::AVG) {
154  const auto count_zero = cgen_state_->llInt(int64_t(0));
155  cgen_state_->emitCall("agg_id", {aggregate_state_count, count_zero});
156  }
157  cgen_state_->ir_builder_.CreateBr(reset_state_false_bb);
158  cgen_state_->ir_builder_.SetInsertPoint(reset_state_false_bb);
160  return codegenWindowFunctionAggregateCalls(aggregate_state, co);
161 }
llvm::Value * aggregateWindowStatePtr()
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:973
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
CHECK(cgen_state)
static const WindowProjectNodeContext * get()
void codegenWindowFunctionStateInit(llvm::Value *aggregate_state)
llvm::Value * codegenWindowFunctionAggregateCalls(llvm::Value *aggregate_state, const CompilationOptions &co)
static WindowFunctionContext * getActiveWindowFunctionContext()
llvm::BasicBlock * codegenWindowResetStateControlFlow()

+ Here is the call graph for this function:

llvm::Value * Executor::codegenWindowFunctionAggregateCalls ( llvm::Value *  aggregate_state,
const CompilationOptions co 
)
private

Definition at line 240 of file WindowFunctionIR.cpp.

References run_benchmark_import::args, AVG, CHECK(), CHECK_EQ, CodeGenerator::codegen(), CodeGenerator::codegenCastBetweenIntTypes(), COUNT, anonymous_namespace{WindowFunctionIR.cpp}::get_adjusted_window_type_info(), anonymous_namespace{WindowFunctionIR.cpp}::get_window_agg_name(), WindowProjectNodeContext::getActiveWindowFunctionContext(), kFLOAT, and SUM.

241  {
242  const auto window_func_context =
244  const auto window_func = window_func_context->getWindowFunction();
245  const auto window_func_ti = get_adjusted_window_type_info(window_func);
246  const auto window_func_null_val =
247  window_func_ti.is_fp()
248  ? cgen_state_->inlineFpNull(window_func_ti)
249  : cgen_state_->castToTypeIn(cgen_state_->inlineIntNull(window_func_ti), 64);
250  const auto& args = window_func->getArgs();
251  llvm::Value* crt_val;
252  if (args.empty()) {
253  CHECK(window_func->getKind() == SqlWindowFunctionKind::COUNT);
254  crt_val = cgen_state_->llInt(int64_t(1));
255  } else {
256  CodeGenerator code_generator(this);
257  const auto arg_lvs = code_generator.codegen(args.front().get(), true, co);
258  CHECK_EQ(arg_lvs.size(), size_t(1));
259  if (window_func->getKind() == SqlWindowFunctionKind::SUM && !window_func_ti.is_fp()) {
260  crt_val = code_generator.codegenCastBetweenIntTypes(
261  arg_lvs.front(), args.front()->get_type_info(), window_func_ti, false);
262  } else {
263  crt_val = window_func_ti.get_type() == kFLOAT
264  ? arg_lvs.front()
265  : cgen_state_->castToTypeIn(arg_lvs.front(), 64);
266  }
267  }
268  const auto agg_name = get_window_agg_name(window_func->getKind(), window_func_ti);
269  llvm::Value* multiplicity_lv = nullptr;
270  if (args.empty()) {
271  cgen_state_->emitCall(agg_name, {aggregate_state, crt_val});
272  } else {
273  cgen_state_->emitCall(agg_name + "_skip_val",
274  {aggregate_state, crt_val, window_func_null_val});
275  }
276  if (window_func->getKind() == SqlWindowFunctionKind::AVG) {
277  codegenWindowAvgEpilogue(crt_val, window_func_null_val, multiplicity_lv);
278  }
280 }
#define CHECK_EQ(x, y)
Definition: Logger.h:198
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:973
std::string get_window_agg_name(const SqlWindowFunctionKind kind, const SQLTypeInfo &window_func_ti)
void codegenWindowAvgEpilogue(llvm::Value *crt_val, llvm::Value *window_func_null_val, llvm::Value *multiplicity_lv)
CHECK(cgen_state)
llvm::Value * codegenAggregateWindowState()
static WindowFunctionContext * getActiveWindowFunctionContext()
SQLTypeInfo get_adjusted_window_type_info(const Analyzer::WindowFunction *window_func)

+ Here is the call graph for this function:

void Executor::codegenWindowFunctionStateInit ( llvm::Value *  aggregate_state)
private

Definition at line 191 of file WindowFunctionIR.cpp.

References COUNT, anonymous_namespace{WindowFunctionIR.cpp}::get_adjusted_window_type_info(), get_int_type(), WindowProjectNodeContext::getActiveWindowFunctionContext(), kDOUBLE, and kFLOAT.

191  {
192  const auto window_func_context =
194  const auto window_func = window_func_context->getWindowFunction();
195  const auto window_func_ti = get_adjusted_window_type_info(window_func);
196  const auto window_func_null_val =
197  window_func_ti.is_fp()
198  ? cgen_state_->inlineFpNull(window_func_ti)
199  : cgen_state_->castToTypeIn(cgen_state_->inlineIntNull(window_func_ti), 64);
200  llvm::Value* window_func_init_val;
201  if (window_func_context->getWindowFunction()->getKind() ==
203  switch (window_func_ti.get_type()) {
204  case kFLOAT: {
205  window_func_init_val = cgen_state_->llFp(float(0));
206  break;
207  }
208  case kDOUBLE: {
209  window_func_init_val = cgen_state_->llFp(double(0));
210  break;
211  }
212  default: {
213  window_func_init_val = cgen_state_->llInt(int64_t(0));
214  break;
215  }
216  }
217  } else {
218  window_func_init_val = window_func_null_val;
219  }
220  const auto pi32_type =
221  llvm::PointerType::get(get_int_type(32, cgen_state_->context_), 0);
222  switch (window_func_ti.get_type()) {
223  case kDOUBLE: {
224  cgen_state_->emitCall("agg_id_double", {aggregate_state, window_func_init_val});
225  break;
226  }
227  case kFLOAT: {
228  aggregate_state =
229  cgen_state_->ir_builder_.CreateBitCast(aggregate_state, pi32_type);
230  cgen_state_->emitCall("agg_id_float", {aggregate_state, window_func_init_val});
231  break;
232  }
233  default: {
234  cgen_state_->emitCall("agg_id", {aggregate_state, window_func_init_val});
235  break;
236  }
237  }
238 }
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:973
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
static WindowFunctionContext * getActiveWindowFunctionContext()
SQLTypeInfo get_adjusted_window_type_info(const Analyzer::WindowFunction *window_func)

+ Here is the call graph for this function:

llvm::BasicBlock * Executor::codegenWindowResetStateControlFlow ( )
private

Definition at line 163 of file WindowFunctionIR.cpp.

References WindowProjectNodeContext::getActiveWindowFunctionContext(), CodeGenerator::posArg(), and CodeGenerator::toBool().

163  {
164  const auto window_func_context =
166  const auto bitset = cgen_state_->llInt(
167  reinterpret_cast<const int64_t>(window_func_context->partitionStart()));
168  const auto min_val = cgen_state_->llInt(int64_t(0));
169  const auto max_val = cgen_state_->llInt(window_func_context->elementCount() - 1);
170  const auto null_val = cgen_state_->llInt(inline_int_null_value<int64_t>());
171  const auto null_bool_val = cgen_state_->llInt<int8_t>(inline_int_null_value<int8_t>());
172  CodeGenerator code_generator(this);
173  const auto reset_state =
174  code_generator.toBool(cgen_state_->emitCall("bit_is_set",
175  {bitset,
176  code_generator.posArg(nullptr),
177  min_val,
178  max_val,
179  null_val,
180  null_bool_val}));
181  const auto reset_state_true_bb = llvm::BasicBlock::Create(
182  cgen_state_->context_, "reset_state.true", cgen_state_->row_func_);
183  const auto reset_state_false_bb = llvm::BasicBlock::Create(
184  cgen_state_->context_, "reset_state.false", cgen_state_->row_func_);
185  cgen_state_->ir_builder_.CreateCondBr(
186  reset_state, reset_state_true_bb, reset_state_false_bb);
187  cgen_state_->ir_builder_.SetInsertPoint(reset_state_true_bb);
188  return reset_state_false_bb;
189 }
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:973
static WindowFunctionContext * getActiveWindowFunctionContext()

+ Here is the call graph for this function:

ResultSetPtr Executor::collectAllDeviceResults ( ExecutionDispatch execution_dispatch,
const std::vector< Analyzer::Expr * > &  target_exprs,
const QueryMemoryDescriptor query_mem_desc,
const ExecutorDeviceType  device_type,
std::shared_ptr< RowSetMemoryOwner row_set_mem_owner 
)
private

Definition at line 1507 of file Execute.cpp.

References anonymous_namespace{Execute.cpp}::build_row_for_empty_input(), catalog_(), DEBUG_TIMER, Executor::ExecutionDispatch::getExecutionUnit(), Executor::ExecutionDispatch::getFragmentResults(), QueryMemoryDescriptor::getQueryDescriptionType(), GPU, NonGroupedAggregate, GroupByAndAggregate::shard_count_for_top_groups(), and use_speculative_top_n().

1512  {
1513  auto timer = DEBUG_TIMER(__func__);
1514  auto& result_per_device = execution_dispatch.getFragmentResults();
1515  if (result_per_device.empty() && query_mem_desc.getQueryDescriptionType() ==
1517  return build_row_for_empty_input(target_exprs, query_mem_desc, device_type);
1518  }
1519  const auto& ra_exe_unit = execution_dispatch.getExecutionUnit();
1520  if (use_speculative_top_n(ra_exe_unit, query_mem_desc)) {
1521  return reduceSpeculativeTopN(
1522  ra_exe_unit, result_per_device, row_set_mem_owner, query_mem_desc);
1523  }
1524  const auto shard_count =
1525  device_type == ExecutorDeviceType::GPU
1527  : 0;
1528 
1529  if (shard_count && !result_per_device.empty()) {
1530  return collectAllDeviceShardedTopResults(execution_dispatch);
1531  }
1532  return reduceMultiDeviceResults(
1533  ra_exe_unit, result_per_device, row_set_mem_owner, query_mem_desc);
1534 }
bool use_speculative_top_n(const RelAlgExecutionUnit &ra_exe_unit, const QueryMemoryDescriptor &query_mem_desc)
ResultSetPtr reduceSpeculativeTopN(const RelAlgExecutionUnit &, std::vector< std::pair< ResultSetPtr, std::vector< size_t >>> &all_fragment_results, std::shared_ptr< RowSetMemoryOwner >, const QueryMemoryDescriptor &) const
Definition: Execute.cpp:920
const Catalog_Namespace::Catalog * catalog_
Definition: Execute.h:1019
ResultSetPtr reduceMultiDeviceResults(const RelAlgExecutionUnit &, std::vector< std::pair< ResultSetPtr, std::vector< size_t >>> &all_fragment_results, std::shared_ptr< RowSetMemoryOwner >, const QueryMemoryDescriptor &) const
Definition: Execute.cpp:837
QueryDescriptionType getQueryDescriptionType() const
ResultSetPtr collectAllDeviceShardedTopResults(ExecutionDispatch &execution_dispatch) const
Definition: Execute.cpp:1618
ResultSetPtr build_row_for_empty_input(const std::vector< Analyzer::Expr * > &target_exprs_in, const QueryMemoryDescriptor &query_mem_desc, const ExecutorDeviceType device_type)
Definition: Execute.cpp:1470
#define DEBUG_TIMER(name)
Definition: Logger.h:296
static size_t shard_count_for_top_groups(const RelAlgExecutionUnit &ra_exe_unit, const Catalog_Namespace::Catalog &catalog)

+ Here is the call graph for this function:

ResultSetPtr Executor::collectAllDeviceShardedTopResults ( ExecutionDispatch execution_dispatch) const
private

Definition at line 1618 of file Execute.cpp.

References CHECK(), CHECK_EQ, CHECK_LE, Executor::ExecutionDispatch::getExecutionUnit(), Executor::ExecutionDispatch::getFragmentResults(), anonymous_namespace{Execute.cpp}::permute_storage_columnar(), anonymous_namespace{Execute.cpp}::permute_storage_row_wise(), and run_benchmark_import::result.

1619  {
1620  const auto& ra_exe_unit = execution_dispatch.getExecutionUnit();
1621  auto& result_per_device = execution_dispatch.getFragmentResults();
1622  const auto first_result_set = result_per_device.front().first;
1623  CHECK(first_result_set);
1624  auto top_query_mem_desc = first_result_set->getQueryMemDesc();
1625  CHECK(!top_query_mem_desc.hasInterleavedBinsOnGpu());
1626  const auto top_n = ra_exe_unit.sort_info.limit + ra_exe_unit.sort_info.offset;
1627  top_query_mem_desc.setEntryCount(0);
1628  for (auto& result : result_per_device) {
1629  const auto result_set = result.first;
1630  CHECK(result_set);
1631  result_set->sort(ra_exe_unit.sort_info.order_entries, top_n);
1632  size_t new_entry_cnt = top_query_mem_desc.getEntryCount() + result_set->rowCount();
1633  top_query_mem_desc.setEntryCount(new_entry_cnt);
1634  }
1635  auto top_result_set = std::make_shared<ResultSet>(first_result_set->getTargetInfos(),
1636  first_result_set->getDeviceType(),
1637  top_query_mem_desc,
1638  first_result_set->getRowSetMemOwner(),
1639  this);
1640  auto top_storage = top_result_set->allocateStorage();
1641  size_t top_output_row_idx{0};
1642  for (auto& result : result_per_device) {
1643  const auto result_set = result.first;
1644  CHECK(result_set);
1645  const auto& top_permutation = result_set->getPermutationBuffer();
1646  CHECK_LE(top_permutation.size(), top_n);
1647  if (top_query_mem_desc.didOutputColumnar()) {
1648  top_output_row_idx = permute_storage_columnar(result_set->getStorage(),
1649  result_set->getQueryMemDesc(),
1650  top_storage,
1651  top_output_row_idx,
1652  top_query_mem_desc,
1653  top_permutation);
1654  } else {
1655  top_output_row_idx = permute_storage_row_wise(result_set->getStorage(),
1656  top_storage,
1657  top_output_row_idx,
1658  top_query_mem_desc,
1659  top_permutation);
1660  }
1661  }
1662  CHECK_EQ(top_output_row_idx, top_query_mem_desc.getEntryCount());
1663  return top_result_set;
1664 }
#define CHECK_EQ(x, y)
Definition: Logger.h:198
size_t permute_storage_row_wise(const ResultSetStorage *input_storage, const ResultSetStorage *output_storage, size_t output_row_index, const QueryMemoryDescriptor &output_query_mem_desc, const std::vector< uint32_t > &top_permutation)
Definition: Execute.cpp:1597
CHECK(cgen_state)
#define CHECK_LE(x, y)
Definition: Logger.h:201
size_t permute_storage_columnar(const ResultSetStorage *input_storage, const QueryMemoryDescriptor &input_query_mem_desc, const ResultSetStorage *output_storage, size_t output_row_index, const QueryMemoryDescriptor &output_query_mem_desc, const std::vector< uint32_t > &top_permutation)
Definition: Execute.cpp:1547

+ Here is the call graph for this function:

bool Executor::compileBody ( const RelAlgExecutionUnit ra_exe_unit,
GroupByAndAggregate group_by_and_aggregate,
const QueryMemoryDescriptor query_mem_desc,
const CompilationOptions co 
)
private

Definition at line 1848 of file NativeCodegen.cpp.

1851  {
1852  // generate the code for the filter
1853  std::vector<Analyzer::Expr*> primary_quals;
1854  std::vector<Analyzer::Expr*> deferred_quals;
1855  bool short_circuited =
1856  CodeGenerator::prioritizeQuals(ra_exe_unit, primary_quals, deferred_quals);
1857  if (short_circuited) {
1858  VLOG(1) << "Prioritized " << std::to_string(primary_quals.size()) << " quals, "
1859  << "short-circuited and deferred " << std::to_string(deferred_quals.size())
1860  << " quals";
1861  }
1862  llvm::Value* filter_lv = cgen_state_->llBool(true);
1863  CodeGenerator code_generator(this);
1864  for (auto expr : primary_quals) {
1865  // Generate the filter for primary quals
1866  auto cond = code_generator.toBool(code_generator.codegen(expr, true, co).front());
1867  filter_lv = cgen_state_->ir_builder_.CreateAnd(filter_lv, cond);
1868  }
1869  CHECK(filter_lv->getType()->isIntegerTy(1));
1870  llvm::BasicBlock* sc_false{nullptr};
1871  if (!deferred_quals.empty()) {
1872  auto sc_true = llvm::BasicBlock::Create(
1873  cgen_state_->context_, "sc_true", cgen_state_->row_func_);
1874  sc_false = llvm::BasicBlock::Create(
1875  cgen_state_->context_, "sc_false", cgen_state_->row_func_);
1876  cgen_state_->ir_builder_.CreateCondBr(filter_lv, sc_true, sc_false);
1877  cgen_state_->ir_builder_.SetInsertPoint(sc_false);
1878  if (ra_exe_unit.join_quals.empty()) {
1879  cgen_state_->ir_builder_.CreateRet(cgen_state_->llInt(int32_t(0)));
1880  }
1881  cgen_state_->ir_builder_.SetInsertPoint(sc_true);
1882  filter_lv = cgen_state_->llBool(true);
1883  }
1884  for (auto expr : deferred_quals) {
1885  filter_lv = cgen_state_->ir_builder_.CreateAnd(
1886  filter_lv, code_generator.toBool(code_generator.codegen(expr, true, co).front()));
1887  }
1888 
1889  CHECK(filter_lv->getType()->isIntegerTy(1));
1890  return group_by_and_aggregate.codegen(filter_lv, sc_false, query_mem_desc, co);
1891 }
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:973
static bool prioritizeQuals(const RelAlgExecutionUnit &ra_exe_unit, std::vector< Analyzer::Expr * > &primary_quals, std::vector< Analyzer::Expr * > &deferred_quals)
Definition: LogicalIR.cpp:157
std::string to_string(char const *&&v)
CHECK(cgen_state)
bool codegen(llvm::Value *filter_result, llvm::BasicBlock *sc_false, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co)
const JoinQualsPerNestingLevel join_quals
#define VLOG(n)
Definition: Logger.h:280
std::tuple< Executor::CompilationResult, std::unique_ptr< QueryMemoryDescriptor > > Executor::compileWorkUnit ( const std::vector< InputTableInfo > &  query_infos,
const RelAlgExecutionUnit ra_exe_unit,
const CompilationOptions co,
const ExecutionOptions eo,
const CudaMgr_Namespace::CudaMgr cuda_mgr,
const bool  allow_lazy_fetch,
std::shared_ptr< RowSetMemoryOwner row_set_mem_owner,
const size_t  max_groups_buffer_entry_count,
const int8_t  crt_min_byte_width,
const bool  has_cardinality_estimation,
ColumnCacheMap column_cache,
RenderInfo render_info = nullptr 
)
private

Definition at line 1568 of file NativeCodegen.cpp.

1579  {
1580  auto timer = DEBUG_TIMER(__func__);
1581  nukeOldState(allow_lazy_fetch, query_infos, &ra_exe_unit);
1582 
1583  GroupByAndAggregate group_by_and_aggregate(
1584  this, co.device_type_, ra_exe_unit, query_infos, row_set_mem_owner);
1585  auto query_mem_desc =
1586  group_by_and_aggregate.initQueryMemoryDescriptor(eo.allow_multifrag,
1587  max_groups_buffer_entry_guess,
1588  crt_min_byte_width,
1589  render_info,
1591 
1592  if (query_mem_desc->getQueryDescriptionType() ==
1594  !has_cardinality_estimation &&
1595  (!render_info || !render_info->isPotentialInSituRender()) && !eo.just_explain) {
1597  }
1598 
1599  const bool output_columnar = query_mem_desc->didOutputColumnar();
1600 
1602  const size_t num_count_distinct_descs =
1603  query_mem_desc->getCountDistinctDescriptorsSize();
1604  for (size_t i = 0; i < num_count_distinct_descs; i++) {
1605  const auto& count_distinct_descriptor =
1606  query_mem_desc->getCountDistinctDescriptor(i);
1607  if (count_distinct_descriptor.impl_type_ == CountDistinctImplType::StdSet ||
1608  (count_distinct_descriptor.impl_type_ != CountDistinctImplType::Invalid &&
1609  !co.hoist_literals_)) {
1610  throw QueryMustRunOnCpu();
1611  }
1612  }
1613  }
1614 
1615  // Read the module template and target either CPU or GPU
1616  // by binding the stream position functions to the right implementation:
1617  // stride access for GPU, contiguous for CPU
1618  auto rt_module_copy = llvm::CloneModule(
1619 #if LLVM_VERSION_MAJOR >= 7
1620  *g_rt_module.get(),
1621 #else
1622  g_rt_module.get(),
1623 #endif
1624  cgen_state_->vmap_,
1625  [](const llvm::GlobalValue* gv) {
1626  auto func = llvm::dyn_cast<llvm::Function>(gv);
1627  if (!func) {
1628  return true;
1629  }
1630  return (func->getLinkage() == llvm::GlobalValue::LinkageTypes::PrivateLinkage ||
1631  func->getLinkage() == llvm::GlobalValue::LinkageTypes::InternalLinkage ||
1633  });
1634 
1636  if (is_udf_module_present(true)) {
1638  }
1639  if (is_rt_udf_module_present(true)) {
1641  rt_udf_cpu_module, *rt_module_copy, cgen_state_.get());
1642  }
1643  } else {
1644  rt_module_copy->setDataLayout(get_gpu_data_layout());
1645  rt_module_copy->setTargetTriple(get_gpu_target_triple_string());
1646 
1647  if (is_udf_module_present()) {
1648  llvm::Triple gpu_triple(udf_gpu_module->getTargetTriple());
1649 
1650  if (!gpu_triple.isNVPTX()) {
1651  throw QueryMustRunOnCpu();
1652  }
1653 
1655  }
1656  if (is_rt_udf_module_present()) {
1658  rt_udf_gpu_module, *rt_module_copy, cgen_state_.get());
1659  }
1660  }
1661 
1662  cgen_state_->module_ = rt_module_copy.release();
1663 
1664  auto agg_fnames =
1665  get_agg_fnames(ra_exe_unit.target_exprs, !ra_exe_unit.groupby_exprs.empty());
1666 
1667  const auto agg_slot_count = ra_exe_unit.estimator ? size_t(1) : agg_fnames.size();
1668 
1669  const bool is_group_by{query_mem_desc->isGroupBy()};
1670  auto query_func = is_group_by ? query_group_by_template(cgen_state_->module_,
1671  co.hoist_literals_,
1672  *query_mem_desc,
1673  co.device_type_,
1674  ra_exe_unit.scan_limit)
1676  agg_slot_count,
1677  co.hoist_literals_,
1678  !!ra_exe_unit.estimator);
1679  bind_pos_placeholders("pos_start", true, query_func, cgen_state_->module_);
1680  bind_pos_placeholders("group_buff_idx", false, query_func, cgen_state_->module_);
1681  bind_pos_placeholders("pos_step", false, query_func, cgen_state_->module_);
1682 
1683  cgen_state_->query_func_ = query_func;
1684  cgen_state_->query_func_entry_ir_builder_.SetInsertPoint(
1685  &query_func->getEntryBlock().front());
1686 
1687  std::vector<llvm::Value*> col_heads;
1688  std::tie(cgen_state_->row_func_, col_heads) =
1689  create_row_function(ra_exe_unit.input_col_descs.size(),
1690  is_group_by ? 0 : agg_slot_count,
1691  co.hoist_literals_,
1692  query_func,
1693  cgen_state_->module_,
1694  cgen_state_->context_);
1695  CHECK(cgen_state_->row_func_);
1696  // make sure it's in-lined, we don't want register spills in the inner loop
1698  auto bb =
1699  llvm::BasicBlock::Create(cgen_state_->context_, "entry", cgen_state_->row_func_);
1700  cgen_state_->ir_builder_.SetInsertPoint(bb);
1701  preloadFragOffsets(ra_exe_unit.input_descs, query_infos);
1702  RelAlgExecutionUnit body_execution_unit = ra_exe_unit;
1703  const auto join_loops =
1704  buildJoinLoops(body_execution_unit, co, eo, query_infos, column_cache);
1705  plan_state_->allocateLocalColumnIds(ra_exe_unit.input_col_descs);
1706  const auto is_not_deleted_bb = codegenSkipDeletedOuterTableRow(ra_exe_unit, co);
1707  if (is_not_deleted_bb) {
1708  bb = is_not_deleted_bb;
1709  }
1710  if (!join_loops.empty()) {
1711  codegenJoinLoops(join_loops,
1712  body_execution_unit,
1713  group_by_and_aggregate,
1714  query_func,
1715  bb,
1716  *(query_mem_desc.get()),
1717  co,
1718  eo);
1719  } else {
1720  const bool can_return_error =
1721  compileBody(ra_exe_unit, group_by_and_aggregate, *query_mem_desc, co);
1722  if (can_return_error || cgen_state_->needs_error_check_ || eo.with_dynamic_watchdog) {
1724  }
1725  }
1726  std::vector<llvm::Value*> hoisted_literals;
1727 
1728  if (co.hoist_literals_) {
1729  VLOG(1) << "number of hoisted literals: "
1730  << cgen_state_->query_func_literal_loads_.size()
1731  << " / literal buffer usage: " << cgen_state_->getLiteralBufferUsage(0)
1732  << " bytes";
1733  }
1734 
1735  if (co.hoist_literals_ && !cgen_state_->query_func_literal_loads_.empty()) {
1736  // we have some hoisted literals...
1737  hoisted_literals = inlineHoistedLiterals();
1738  }
1739  // iterate through all the instruction in the query template function and
1740  // replace the call to the filter placeholder with the call to the actual filter
1741  for (auto it = llvm::inst_begin(query_func), e = llvm::inst_end(query_func); it != e;
1742  ++it) {
1743  if (!llvm::isa<llvm::CallInst>(*it)) {
1744  continue;
1745  }
1746  auto& filter_call = llvm::cast<llvm::CallInst>(*it);
1747  if (std::string(filter_call.getCalledFunction()->getName()) == "row_process") {
1748  std::vector<llvm::Value*> args;
1749  for (size_t i = 0; i < filter_call.getNumArgOperands(); ++i) {
1750  args.push_back(filter_call.getArgOperand(i));
1751  }
1752  args.insert(args.end(), col_heads.begin(), col_heads.end());
1753  args.push_back(get_arg_by_name(query_func, "join_hash_tables"));
1754  // push hoisted literals arguments, if any
1755  args.insert(args.end(), hoisted_literals.begin(), hoisted_literals.end());
1756 
1757  llvm::ReplaceInstWithInst(&filter_call,
1758  llvm::CallInst::Create(cgen_state_->row_func_, args, ""));
1759  break;
1760  }
1761  }
1762  plan_state_->init_agg_vals_ =
1763  init_agg_val_vec(ra_exe_unit.target_exprs, ra_exe_unit.quals, *query_mem_desc);
1764 
1765  auto multifrag_query_func = cgen_state_->module_->getFunction(
1766  "multifrag_query" + std::string(co.hoist_literals_ ? "_hoisted_literals" : ""));
1767  CHECK(multifrag_query_func);
1768 
1769  bind_query(query_func,
1770  "query_stub" + std::string(co.hoist_literals_ ? "_hoisted_literals" : ""),
1771  multifrag_query_func,
1772  cgen_state_->module_);
1773 
1774  auto live_funcs =
1776  {query_func, cgen_state_->row_func_},
1777  {multifrag_query_func});
1778 
1779  std::string llvm_ir;
1780  if (eo.just_explain) {
1782 #ifdef WITH_JIT_DEBUG
1783  throw std::runtime_error(
1784  "Explain optimized not available when JIT runtime debug symbols are enabled");
1785 #else
1786  optimize_ir(query_func, cgen_state_->module_, live_funcs, co);
1787 #endif // WITH_JIT_DEBUG
1788  }
1789  llvm_ir =
1790  serialize_llvm_object(query_func) + serialize_llvm_object(cgen_state_->row_func_);
1791  }
1792  verify_function_ir(cgen_state_->row_func_);
1793 
1794  LOG(IR) << query_mem_desc->toString() << "\nGenerated IR\n"
1795  << serialize_llvm_object(query_func)
1796  << serialize_llvm_object(cgen_state_->row_func_) << "\nEnd of IR";
1797 
1798  return std::make_tuple(
1801  ? optimizeAndCodegenCPU(query_func, multifrag_query_func, live_funcs, co)
1802  : optimizeAndCodegenGPU(query_func,
1803  multifrag_query_func,
1804  live_funcs,
1805  is_group_by || ra_exe_unit.estimator,
1806  cuda_mgr,
1807  co),
1808  cgen_state_->getLiterals(),
1809  output_columnar,
1810  llvm_ir},
1811  std::move(query_mem_desc));
1812 }
std::vector< Analyzer::Expr * > target_exprs
std::unique_ptr< llvm::Module > rt_udf_cpu_module
void codegenJoinLoops(const std::vector< JoinLoop > &join_loops, const RelAlgExecutionUnit &ra_exe_unit, GroupByAndAggregate &group_by_and_aggregate, llvm::Function *query_func, llvm::BasicBlock *entry_bb, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const ExecutionOptions &eo)
Definition: IRCodegen.cpp:459
std::unique_ptr< llvm::Module > udf_gpu_module
#define LOG(tag)
Definition: Logger.h:185
std::unique_ptr< llvm::Module > rt_udf_gpu_module
void mark_function_always_inline(llvm::Function *func)
llvm::StringRef get_gpu_data_layout()
bool is_udf_module_present(bool cpu_only=false)
std::vector< int64_t > init_agg_val_vec(const std::vector< TargetInfo > &targets, const QueryMemoryDescriptor &query_mem_desc)
llvm::Function * query_template(llvm::Module *module, const size_t aggr_col_count, const bool hoist_literals, const bool is_estimate_query)
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:973
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
const std::vector< InputDescriptor > input_descs
std::vector< std::string > get_agg_fnames(const std::vector< Analyzer::Expr * > &target_exprs, const bool is_group_by)
std::vector< std::pair< void *, void * > > optimizeAndCodegenCPU(llvm::Function *, llvm::Function *, const std::unordered_set< llvm::Function * > &, const CompilationOptions &)
void preloadFragOffsets(const std::vector< InputDescriptor > &input_descs, const std::vector< InputTableInfo > &query_infos)
Definition: Execute.cpp:2874
llvm::StringRef get_gpu_target_triple_string()
void verify_function_ir(const llvm::Function *func)
const bool allow_multifrag
CHECK(cgen_state)
llvm::Value * get_arg_by_name(llvm::Function *func, const std::string &name)
Definition: Execute.h:117
static std::unordered_set< llvm::Function * > markDeadRuntimeFuncs(llvm::Module &module, const std::vector< llvm::Function * > &roots, const std::vector< llvm::Function * > &leaves)
bool compileBody(const RelAlgExecutionUnit &ra_exe_unit, GroupByAndAggregate &group_by_and_aggregate, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co)
const bool with_dynamic_watchdog
std::unique_ptr< llvm::Module > g_rt_module
std::unique_ptr< PlanState > plan_state_
Definition: Execute.h:988
void nukeOldState(const bool allow_lazy_fetch, const std::vector< InputTableInfo > &query_infos, const RelAlgExecutionUnit *ra_exe_unit)
Definition: Execute.cpp:2860
const bool output_columnar_hint
llvm::Function * query_group_by_template(llvm::Module *module, const bool hoist_literals, const QueryMemoryDescriptor &query_mem_desc, const ExecutorDeviceType device_type, const bool check_scan_limit)
static void link_udf_module(const std::unique_ptr< llvm::Module > &udf_module, llvm::Module &module, CgenState *cgen_state, llvm::Linker::Flags flags=llvm::Linker::Flags::None)
const std::shared_ptr< Analyzer::Estimator > estimator
ExecutorDeviceType device_type_
void createErrorCheckControlFlow(llvm::Function *query_func, bool run_with_dynamic_watchdog, ExecutorDeviceType device_type)
const ExecutorExplainType explain_type_
llvm::BasicBlock * codegenSkipDeletedOuterTableRow(const RelAlgExecutionUnit &ra_exe_unit, const CompilationOptions &co)
std::string serialize_llvm_object(const T *llvm_obj)
static bool alwaysCloneRuntimeFunction(const llvm::Function *func)
std::unique_ptr< llvm::Module > udf_cpu_module
void bind_pos_placeholders(const std::string &pos_fn_name, const bool use_resume_param, llvm::Function *query_func, llvm::Module *module)
std::list< std::shared_ptr< Analyzer::Expr > > quals
bool isPotentialInSituRender() const
Definition: RenderInfo.cpp:61
#define DEBUG_TIMER(name)
Definition: Logger.h:296
std::vector< std::pair< void *, void * > > optimizeAndCodegenGPU(llvm::Function *, llvm::Function *, std::unordered_set< llvm::Function * > &, const bool no_inline, const CudaMgr_Namespace::CudaMgr *cuda_mgr, const CompilationOptions &)
std::vector< llvm::Value * > inlineHoistedLiterals()
std::list< std::shared_ptr< const InputColDescriptor > > input_col_descs
std::vector< JoinLoop > buildJoinLoops(RelAlgExecutionUnit &ra_exe_unit, const CompilationOptions &co, const ExecutionOptions &eo, const std::vector< InputTableInfo > &query_infos, ColumnCacheMap &column_cache)
Definition: IRCodegen.cpp:218
void optimize_ir(llvm::Function *query_func, llvm::Module *module, const std::unordered_set< llvm::Function * > &live_funcs, const CompilationOptions &co)
cgen_state module_
bool is_rt_udf_module_present(bool cpu_only=false)
#define VLOG(n)
Definition: Logger.h:280
std::pair< llvm::Function *, std::vector< llvm::Value * > > create_row_function(const size_t in_col_count, const size_t agg_col_count, const bool hoist_literals, llvm::Function *query_func, llvm::Module *module, llvm::LLVMContext &context)
void bind_query(llvm::Function *query_func, const std::string &query_fname, llvm::Function *multifrag_query_func, llvm::Module *module)
AggregatedColRange Executor::computeColRangesCache ( const std::unordered_set< PhysicalInput > &  phys_inputs)
private

Definition at line 3244 of file Execute.cpp.

References catalog_(), CHECK(), getLeafColumnRange(), AggregatedColRange::setColRange(), and ExpressionRange::typeSupportsRange().

3245  {
3246  AggregatedColRange agg_col_range_cache;
3247  CHECK(catalog_);
3248  std::unordered_set<int> phys_table_ids;
3249  for (const auto& phys_input : phys_inputs) {
3250  phys_table_ids.insert(phys_input.table_id);
3251  }
3252  std::vector<InputTableInfo> query_infos;
3253  for (const int table_id : phys_table_ids) {
3254  query_infos.emplace_back(InputTableInfo{table_id, getTableInfo(table_id)});
3255  }
3256  for (const auto& phys_input : phys_inputs) {
3257  const auto cd =
3258  catalog_->getMetadataForColumn(phys_input.table_id, phys_input.col_id);
3259  CHECK(cd);
3260  if (ExpressionRange::typeSupportsRange(cd->columnType)) {
3261  const auto col_var = boost::make_unique<Analyzer::ColumnVar>(
3262  cd->columnType, phys_input.table_id, phys_input.col_id, 0);
3263  const auto col_range = getLeafColumnRange(col_var.get(), query_infos, this, false);
3264  agg_col_range_cache.setColRange(phys_input, col_range);
3265  }
3266  }
3267  return agg_col_range_cache;
3268 }
Fragmenter_Namespace::TableInfo getTableInfo(const int table_id) const
Definition: Execute.cpp:249
CHECK(cgen_state)
const Catalog_Namespace::Catalog * catalog_
Definition: Execute.h:1019
const ColumnDescriptor * getMetadataForColumn(int tableId, const std::string &colName) const
ExpressionRange getLeafColumnRange(const Analyzer::ColumnVar *col_expr, const std::vector< InputTableInfo > &query_infos, const Executor *executor, const bool is_outer_join_proj)
void setColRange(const PhysicalInput &, const ExpressionRange &)
static bool typeSupportsRange(const SQLTypeInfo &ti)

+ Here is the call graph for this function:

StringDictionaryGenerations Executor::computeStringDictionaryGenerations ( const std::unordered_set< PhysicalInput > &  phys_inputs)
private

Definition at line 3270 of file Execute.cpp.

References catalog_(), CHECK(), kENCODING_DICT, and StringDictionaryGenerations::setGeneration().

3271  {
3272  StringDictionaryGenerations string_dictionary_generations;
3273  CHECK(catalog_);
3274  for (const auto& phys_input : phys_inputs) {
3275  const auto cd =
3276  catalog_->getMetadataForColumn(phys_input.table_id, phys_input.col_id);
3277  CHECK(cd);
3278  const auto& col_ti =
3279  cd->columnType.is_array() ? cd->columnType.get_elem_type() : cd->columnType;
3280  if (col_ti.is_string() && col_ti.get_compression() == kENCODING_DICT) {
3281  const int dict_id = col_ti.get_comp_param();
3282  const auto dd = catalog_->getMetadataForDict(dict_id);
3283  CHECK(dd && dd->stringDict);
3284  string_dictionary_generations.setGeneration(dict_id,
3285  dd->stringDict->storageEntryCount());
3286  }
3287  }
3288  return string_dictionary_generations;
3289 }
CHECK(cgen_state)
const Catalog_Namespace::Catalog * catalog_
Definition: Execute.h:1019
const ColumnDescriptor * getMetadataForColumn(int tableId, const std::string &colName) const
void setGeneration(const uint32_t id, const size_t generation)
const DictDescriptor * getMetadataForDict(int dict_ref, bool loadDict=true) const
Definition: Catalog.cpp:1350

+ Here is the call graph for this function:

TableGenerations Executor::computeTableGenerations ( std::unordered_set< int >  phys_table_ids)
private

Definition at line 3291 of file Execute.cpp.

References TableGenerations::setGeneration().

3292  {
3293  TableGenerations table_generations;
3294  for (const int table_id : phys_table_ids) {
3295  const auto table_info = getTableInfo(table_id);
3296  table_generations.setGeneration(
3297  table_id, TableGeneration{table_info.getPhysicalNumTuples(), 0});
3298  }
3299  return table_generations;
3300 }
Fragmenter_Namespace::TableInfo getTableInfo(const int table_id) const
Definition: Execute.cpp:249
void setGeneration(const uint32_t id, const TableGeneration &generation)

+ Here is the call graph for this function:

bool Executor::containsLeftDeepOuterJoin ( ) const
inline

Definition at line 436 of file Execute.h.

References cgen_state_.

436  {
437  return cgen_state_->contains_left_deep_outer_join_;
438  }
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:973
void Executor::createErrorCheckControlFlow ( llvm::Function *  query_func,
bool  run_with_dynamic_watchdog,
ExecutorDeviceType  device_type 
)
private

Definition at line 1343 of file NativeCodegen.cpp.

1345  {
1346  // check whether the row processing was successful; currently, it can
1347  // fail by running out of group by buffer slots
1348 
1349  llvm::Value* row_count = nullptr;
1350  if (run_with_dynamic_watchdog && device_type == ExecutorDeviceType::GPU) {
1351  row_count =
1352  find_variable_in_basic_block<llvm::LoadInst>(query_func, ".entry", "row_count");
1353  }
1354 
1355  bool done_splitting = false;
1356  for (auto bb_it = query_func->begin(); bb_it != query_func->end() && !done_splitting;
1357  ++bb_it) {
1358  llvm::Value* pos = nullptr;
1359  for (auto inst_it = bb_it->begin(); inst_it != bb_it->end(); ++inst_it) {
1360  if (run_with_dynamic_watchdog && llvm::isa<llvm::PHINode>(*inst_it)) {
1361  if (inst_it->getName() == "pos") {
1362  pos = &*inst_it;
1363  }
1364  continue;
1365  }
1366  if (!llvm::isa<llvm::CallInst>(*inst_it)) {
1367  continue;
1368  }
1369  auto& filter_call = llvm::cast<llvm::CallInst>(*inst_it);
1370  if (std::string(filter_call.getCalledFunction()->getName()) == "row_process") {
1371  auto next_inst_it = inst_it;
1372  ++next_inst_it;
1373  auto new_bb = bb_it->splitBasicBlock(next_inst_it);
1374  auto& br_instr = bb_it->back();
1375  llvm::IRBuilder<> ir_builder(&br_instr);
1376  llvm::Value* err_lv = &*inst_it;
1377  if (run_with_dynamic_watchdog) {
1378  CHECK(pos);
1379  llvm::Value* call_watchdog_lv = nullptr;
1380  if (device_type == ExecutorDeviceType::GPU) {
1381  // In order to make sure all threads wihtin a block see the same barrier,
1382  // only those blocks whose none of their threads have experienced the critical
1383  // edge will go through the dynamic watchdog computation
1384  CHECK(row_count);
1385  auto crit_edge_rem =
1386  (blockSize() & (blockSize() - 1))
1387  ? ir_builder.CreateSRem(
1388  row_count,
1389  cgen_state_->llInt(static_cast<int64_t>(blockSize())))
1390  : ir_builder.CreateAnd(
1391  row_count,
1392  cgen_state_->llInt(static_cast<int64_t>(blockSize() - 1)));
1393  auto crit_edge_threshold = ir_builder.CreateSub(row_count, crit_edge_rem);
1394  crit_edge_threshold->setName("crit_edge_threshold");
1395 
1396  // only those threads where pos < crit_edge_threshold go through dynamic
1397  // watchdog call
1398  call_watchdog_lv =
1399  ir_builder.CreateICmp(llvm::ICmpInst::ICMP_SLT, pos, crit_edge_threshold);
1400  } else {
1401  // CPU path: run watchdog for every 64th row
1402  auto dw_predicate = ir_builder.CreateAnd(pos, uint64_t(0x3f));
1403  call_watchdog_lv = ir_builder.CreateICmp(
1404  llvm::ICmpInst::ICMP_EQ, dw_predicate, cgen_state_->llInt(int64_t(0LL)));
1405  }
1406  CHECK(call_watchdog_lv);
1407  auto error_check_bb = bb_it->splitBasicBlock(
1408  llvm::BasicBlock::iterator(br_instr), ".error_check");
1409  auto& watchdog_br_instr = bb_it->back();
1410 
1411  auto watchdog_check_bb = llvm::BasicBlock::Create(
1412  cgen_state_->context_, ".watchdog_check", query_func, error_check_bb);
1413  llvm::IRBuilder<> watchdog_ir_builder(watchdog_check_bb);
1414  auto detected_timeout = watchdog_ir_builder.CreateCall(
1415  cgen_state_->module_->getFunction("dynamic_watchdog"), {});
1416  auto timeout_err_lv = watchdog_ir_builder.CreateSelect(
1417  detected_timeout, cgen_state_->llInt(Executor::ERR_OUT_OF_TIME), err_lv);
1418  watchdog_ir_builder.CreateBr(error_check_bb);
1419 
1420  llvm::ReplaceInstWithInst(
1421  &watchdog_br_instr,
1422  llvm::BranchInst::Create(
1423  watchdog_check_bb, error_check_bb, call_watchdog_lv));
1424  ir_builder.SetInsertPoint(&br_instr);
1425  auto unified_err_lv = ir_builder.CreatePHI(err_lv->getType(), 2);
1426 
1427  unified_err_lv->addIncoming(timeout_err_lv, watchdog_check_bb);
1428  unified_err_lv->addIncoming(err_lv, &*bb_it);
1429  err_lv = unified_err_lv;
1430  }
1431  const auto error_code_arg = get_arg_by_name(query_func, "error_code");
1432  err_lv =
1433  ir_builder.CreateCall(cgen_state_->module_->getFunction("record_error_code"),
1434  std::vector<llvm::Value*>{err_lv, error_code_arg});
1435  if (device_type == ExecutorDeviceType::GPU) {
1436  // let kernel execution finish as expected, regardless of the observed error,
1437  // unless it is from the dynamic watchdog where all threads within that block
1438  // return together.
1439  err_lv = ir_builder.CreateICmp(llvm::ICmpInst::ICMP_EQ,
1440  err_lv,
1442  } else {
1443  err_lv = ir_builder.CreateICmp(llvm::ICmpInst::ICMP_NE,
1444  err_lv,
1445  cgen_state_->llInt(static_cast<int32_t>(0)));
1446  }
1447  auto error_bb = llvm::BasicBlock::Create(
1448  cgen_state_->context_, ".error_exit", query_func, new_bb);
1449  llvm::ReturnInst::Create(cgen_state_->context_, error_bb);
1450  llvm::ReplaceInstWithInst(&br_instr,
1451  llvm::BranchInst::Create(error_bb, new_bb, err_lv));
1452  done_splitting = true;
1453  break;
1454  }
1455  }
1456  }
1457  CHECK(done_splitting);
1458 }
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:973
CHECK(cgen_state)
llvm::Value * get_arg_by_name(llvm::Function *func, const std::string &name)
Definition: Execute.h:117
static const int32_t ERR_OUT_OF_TIME
Definition: Execute.h:1041
unsigned blockSize() const
Definition: Execute.cpp:2968
int Executor::deviceCount ( const ExecutorDeviceType  device_type) const
private

Definition at line 597 of file Execute.cpp.

References catalog_(), CHECK(), and GPU.

597  {
598  if (device_type == ExecutorDeviceType::GPU) {
599  const auto cuda_mgr = catalog_->getDataMgr().getCudaMgr();
600  CHECK(cuda_mgr);
601  return cuda_mgr->getDeviceCount();
602  } else {
603  return 1;
604  }
605 }
CudaMgr_Namespace::CudaMgr * getCudaMgr() const
Definition: DataMgr.h:117
Data_Namespace::DataMgr & getDataMgr() const
Definition: Catalog.h:177
CHECK(cgen_state)
const Catalog_Namespace::Catalog * catalog_
Definition: Execute.h:1019

+ Here is the call graph for this function:

int Executor::deviceCountForMemoryLevel ( const Data_Namespace::MemoryLevel  memory_level) const
private

Definition at line 607 of file Execute.cpp.

References CPU, GPU, and Data_Namespace::GPU_LEVEL.

608  {
609  return memory_level == GPU_LEVEL ? deviceCount(ExecutorDeviceType::GPU)
610  : deviceCount(ExecutorDeviceType::CPU);
611 }
ExecutorDeviceType
int deviceCount(const ExecutorDeviceType) const
Definition: Execute.cpp:597
int64_t Executor::deviceCycles ( int  milliseconds) const
private

Definition at line 2976 of file Execute.cpp.

References catalog_(), and CHECK().

2976  {
2977  CHECK(catalog_);
2978  const auto cuda_mgr = catalog_->getDataMgr().getCudaMgr();
2979  CHECK(cuda_mgr);
2980  const auto& dev_props = cuda_mgr->getAllDeviceProperties();
2981  return static_cast<int64_t>(dev_props.front().clockKhz) * milliseconds;
2982 }
CudaMgr_Namespace::CudaMgr * getCudaMgr() const
Definition: DataMgr.h:117
Data_Namespace::DataMgr & getDataMgr() const
Definition: Catalog.h:177
CHECK(cgen_state)
const Catalog_Namespace::Catalog * catalog_
Definition: Execute.h:1019

+ Here is the call graph for this function:

void Executor::dispatchFragments ( const std::function< void(const ExecutorDeviceType chosen_device_type, int chosen_device_id, const QueryCompilationDescriptor &query_comp_desc, const QueryMemoryDescriptor &query_mem_desc, const FragmentsList &frag_list, const ExecutorDispatchMode kernel_dispatch_mode, const int64_t rowid_lookup_key)>  dispatch,
const ExecutionDispatch execution_dispatch,
const std::vector< InputTableInfo > &  table_infos,
const ExecutionOptions eo,
const bool  is_agg,
const bool  allow_single_frag_table_opt,
const size_t  context_count,
const QueryCompilationDescriptor query_comp_desc,
const QueryMemoryDescriptor query_mem_desc,
QueryFragmentDescriptor fragment_descriptor,
std::unordered_set< int > &  available_gpus,
int &  available_cpus 
)
private

Definition at line 1692 of file Execute.cpp.

References ExecutionOptions::allow_multifrag, QueryFragmentDescriptor::assignFragsToKernelDispatch(), QueryFragmentDescriptor::assignFragsToMultiDispatch(), QueryFragmentDescriptor::buildFragmentKernelMap(), catalog_(), CHECK(), CHECK_GE, CHECK_GT, anonymous_namespace{Execute.cpp}::checkWorkUnitWatchdog(), g_inner_join_fragment_skipping, QueryCompilationDescriptor::getDeviceType(), QueryMemoryDescriptor::getEntryCount(), Executor::ExecutionDispatch::getExecutionUnit(), Executor::ExecutionDispatch::getFragOffsets(), QueryMemoryDescriptor::getQueryDescriptionType(), GPU, anonymous_namespace{Execute.cpp}::has_lazy_fetched_columns(), logger::INFO, KernelPerFragment, LOG, MultifragmentKernel, Projection, query_mem_desc, QueryFragmentDescriptor::shouldCheckWorkUnitWatchdog(), QueryMemoryDescriptor::toString(), VLOG, and ExecutionOptions::with_watchdog.

1710  {
1711  std::vector<std::future<void>> query_threads;
1712  const auto& ra_exe_unit = execution_dispatch.getExecutionUnit();
1713  CHECK(!ra_exe_unit.input_descs.empty());
1714 
1715  const auto device_type = query_comp_desc.getDeviceType();
1716  const bool uses_lazy_fetch =
1717  plan_state_->allow_lazy_fetch_ &&
1718  has_lazy_fetched_columns(getColLazyFetchInfo(ra_exe_unit.target_exprs));
1719  const bool use_multifrag_kernel = (device_type == ExecutorDeviceType::GPU) &&
1720  eo.allow_multifrag && (!uses_lazy_fetch || is_agg);
1721 
1722  const auto device_count = deviceCount(device_type);
1723  CHECK_GT(device_count, 0);
1724 
1725  fragment_descriptor.buildFragmentKernelMap(ra_exe_unit,
1726  execution_dispatch.getFragOffsets(),
1727  device_count,
1728  device_type,
1729  use_multifrag_kernel,
1731  this);
1732  if (eo.with_watchdog && fragment_descriptor.shouldCheckWorkUnitWatchdog()) {
1733  checkWorkUnitWatchdog(ra_exe_unit, table_infos, *catalog_, device_type, device_count);
1734  }
1735 
1736  if (use_multifrag_kernel) {
1737  VLOG(1) << "Dispatching multifrag kernels";
1738  VLOG(1) << query_mem_desc.toString();
1739 
1740  // NB: We should never be on this path when the query is retried because of running
1741  // out of group by slots; also, for scan only queries on CPU we want the
1742  // high-granularity, fragment by fragment execution instead. For scan only queries on
1743  // GPU, we want the multifrag kernel path to save the overhead of allocating an output
1744  // buffer per fragment.
1745  auto multifrag_kernel_dispatch =
1746  [&query_threads, &dispatch, query_comp_desc, query_mem_desc](
1747  const int device_id,
1748  const FragmentsList& frag_list,
1749  const int64_t rowid_lookup_key) {
1750  query_threads.push_back(std::async(std::launch::async,
1751  dispatch,
1753  device_id,
1754  query_comp_desc,
1755  query_mem_desc,
1756  frag_list,
1758  rowid_lookup_key));
1759  };
1760  fragment_descriptor.assignFragsToMultiDispatch(multifrag_kernel_dispatch);
1761  } else {
1762  VLOG(1) << "Dispatching kernel per fragment";
1763  VLOG(1) << query_mem_desc.toString();
1764 
1765  if (!ra_exe_unit.use_bump_allocator && allow_single_frag_table_opt &&
1767  table_infos.size() == 1 && table_infos.front().table_id > 0) {
1768  const auto max_frag_size =
1769  table_infos.front().info.getFragmentNumTuplesUpperBound();
1770  if (max_frag_size < query_mem_desc.getEntryCount()) {
1771  LOG(INFO) << "Lowering scan limit from " << query_mem_desc.getEntryCount()
1772  << " to match max fragment size " << max_frag_size
1773  << " for kernel per fragment execution path.";
1774  throw CompilationRetryNewScanLimit(max_frag_size);
1775  }
1776  }
1777 
1778  size_t frag_list_idx{0};
1779  auto fragment_per_kernel_dispatch = [&query_threads,
1780  &dispatch,
1781  &frag_list_idx,
1782  &device_type,
1783  query_comp_desc,
1784  query_mem_desc](const int device_id,
1785  const FragmentsList& frag_list,
1786  const int64_t rowid_lookup_key) {
1787  if (!frag_list.size()) {
1788  return;
1789  }
1790  CHECK_GE(device_id, 0);
1791 
1792  query_threads.push_back(std::async(std::launch::async,
1793  dispatch,
1794  device_type,
1795  device_id,
1796  query_comp_desc,
1797  query_mem_desc,
1798  frag_list,
1800  rowid_lookup_key));
1801 
1802  ++frag_list_idx;
1803  };
1804 
1805  fragment_descriptor.assignFragsToKernelDispatch(fragment_per_kernel_dispatch,
1806  ra_exe_unit);
1807  }
1808  for (auto& child : query_threads) {
1809  child.wait();
1810  }
1811  for (auto& child : query_threads) {
1812  child.get();
1813  }
1814 }
bool is_agg(const Analyzer::Expr *expr)
ExecutorDeviceType getDeviceType() const
std::string toString() const
#define LOG(tag)
Definition: Logger.h:185
std::vector< ColumnLazyFetchInfo > getColLazyFetchInfo(const std::vector< Analyzer::Expr * > &target_exprs) const
Definition: Execute.cpp:287
#define CHECK_GE(x, y)
Definition: Logger.h:203
void assignFragsToKernelDispatch(DISPATCH_FCN f, const RelAlgExecutionUnit &ra_exe_unit) const
int deviceCount(const ExecutorDeviceType) const
Definition: Execute.cpp:597
#define CHECK_GT(x, y)
Definition: Logger.h:202
std::vector< FragmentsPerTable > FragmentsList
bool g_inner_join_fragment_skipping
Definition: Execute.cpp:78
const bool allow_multifrag
CHECK(cgen_state)
const Catalog_Namespace::Catalog * catalog_
Definition: Execute.h:1019
std::unique_ptr< PlanState > plan_state_
Definition: Execute.h:988
void checkWorkUnitWatchdog(const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &table_infos, const Catalog_Namespace::Catalog &cat, const ExecutorDeviceType device_type, const int device_count)
Definition: Execute.cpp:1021
QueryDescriptionType getQueryDescriptionType() const
void assignFragsToMultiDispatch(DISPATCH_FCN f) const
void buildFragmentKernelMap(const RelAlgExecutionUnit &ra_exe_unit, const std::vector< uint64_t > &frag_offsets, const int device_count, const ExecutorDeviceType &device_type, const bool enable_multifrag_kernels, const bool enable_inner_join_fragment_skipping, Executor *executor)
bool has_lazy_fetched_columns(const std::vector< ColumnLazyFetchInfo > &fetched_cols)
Definition: Execute.cpp:1681
#define VLOG(n)
Definition: Logger.h:280
const bool with_watchdog

+ Here is the call graph for this function:

std::shared_ptr< ResultSet > Executor::execute ( const Planner::RootPlan root_plan,
const Catalog_Namespace::SessionInfo session,
const bool  hoist_literals,
const ExecutorDeviceType  device_type,
const ExecutorOptLevel  opt_level,
const bool  allow_multifrag,
const bool  allow_loop_joins,
RenderInfo render_query_data = nullptr 
)

Definition at line 32 of file LegacyExecute.cpp.

References catalog_(), CHECK(), CPU, g_enable_dynamic_watchdog, Catalog_Namespace::SessionInfo::get_currentUser(), Planner::RootPlan::get_plan_dest(), Planner::RootPlan::get_result_table_id(), Planner::RootPlan::get_stmt_type(), Catalog_Namespace::SessionInfo::getCatalog(), Planner::RootPlan::getCatalog(), AccessPrivileges::INSERT_INTO_TABLE, Catalog_Namespace::SysCatalog::instance(), Planner::RootPlan::kEXPLAIN, kINSERT, kSELECT, DBObject::loadKey(), DBObject::setPrivileges(), TableDBObjectType, timer_start(), and timer_stop().

40  {
41  catalog_ = &root_plan->getCatalog();
42  const auto stmt_type = root_plan->get_stmt_type();
43  // capture the lock acquistion time
44  auto clock_begin = timer_start();
45  std::lock_guard<std::mutex> lock(execute_mutex_);
48  }
49  ScopeGuard restore_metainfo_cache = [this] { clearMetaInfoCache(); };
50  int64_t queue_time_ms = timer_stop(clock_begin);
51  ScopeGuard row_set_holder = [this] { row_set_mem_owner_ = nullptr; };
52  switch (stmt_type) {
53  case kSELECT: {
54  throw std::runtime_error("The legacy SELECT path has been fully deprecated.");
55  }
56  case kINSERT: {
57  if (root_plan->get_plan_dest() == Planner::RootPlan::kEXPLAIN) {
58  auto explanation_rs = std::make_shared<ResultSet>("No explanation available.");
59  explanation_rs->setQueueTime(queue_time_ms);
60  return explanation_rs;
61  }
62  auto& cat = session.getCatalog();
64  auto user_metadata = session.get_currentUser();
65  const int table_id = root_plan->get_result_table_id();
66  auto td = cat.getMetadataForTable(table_id);
67  DBObject dbObject(td->tableName, TableDBObjectType);
68  dbObject.loadKey(cat);
69  dbObject.setPrivileges(AccessPrivileges::INSERT_INTO_TABLE);
70  std::vector<DBObject> privObjects;
71  privObjects.push_back(dbObject);
72  if (!sys_cat.checkPrivileges(user_metadata, privObjects)) {
73  throw std::runtime_error(
74  "Violation of access privileges: user " + user_metadata.userName +
75  " has no insert privileges for table " + td->tableName + ".");
76  break;
77  }
78  executeSimpleInsert(root_plan);
79  auto empty_rs = std::make_shared<ResultSet>(std::vector<TargetInfo>{},
82  nullptr,
83  this);
84  empty_rs->setQueueTime(queue_time_ms);
85  return empty_rs;
86  }
87  default:
88  CHECK(false);
89  }
90  CHECK(false);
91  return nullptr;
92 }
int get_result_table_id() const
Definition: Planner.h:291
SQLStmtType get_stmt_type() const
Definition: Planner.h:290
static const AccessPrivileges INSERT_INTO_TABLE
Definition: DBObject.h:154
TypeR::rep timer_stop(Type clock_begin)
Definition: measure.h:46
const Catalog_Namespace::Catalog & getCatalog() const
Definition: Planner.h:293
bool g_enable_dynamic_watchdog
Definition: Execute.cpp:72
void executeSimpleInsert(const Planner::RootPlan *root_plan)
Definition: Execute.cpp:2582
static SysCatalog & instance()
Definition: SysCatalog.h:242
CHECK(cgen_state)
const Catalog_Namespace::Catalog * catalog_
Definition: Execute.h:1019
std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner_
Definition: Execute.h:989
friend class QueryMemoryDescriptor
Definition: Execute.h:1053
Catalog & getCatalog() const
Definition: SessionInfo.h:90
Dest get_plan_dest() const
Definition: Planner.h:299
void resetInterrupt()
static std::mutex execute_mutex_
Definition: Execute.h:1030
const UserMetadata & get_currentUser() const
Definition: SessionInfo.h:93
Type timer_start()
Definition: measure.h:40
void clearMetaInfoCache()
Definition: Execute.cpp:328

+ Here is the call graph for this function:

ResultSetPtr Executor::executeExplain ( const QueryCompilationDescriptor query_comp_desc)
private

Definition at line 1372 of file Execute.cpp.

References QueryCompilationDescriptor::getIR().

1372  {
1373  return std::make_shared<ResultSet>(query_comp_desc.getIR());
1374 }

+ Here is the call graph for this function:

int32_t Executor::executePlanWithGroupBy ( const RelAlgExecutionUnit ra_exe_unit,
const CompilationResult compilation_result,
const bool  hoist_literals,
ResultSetPtr results,
const ExecutorDeviceType  device_type,
std::vector< std::vector< const int8_t * >> &  col_buffers,
const std::vector< size_t >  outer_tab_frag_ids,
QueryExecutionContext query_exe_context,
const std::vector< std::vector< int64_t >> &  num_rows,
const std::vector< std::vector< uint64_t >> &  frag_offsets,
Data_Namespace::DataMgr data_mgr,
const int  device_id,
const int64_t  limit,
const uint32_t  start_rowid,
const uint32_t  num_tables,
RenderInfo render_info 
)
private

Definition at line 2402 of file Execute.cpp.

References CHECK(), CHECK_NE, anonymous_namespace{Execute.cpp}::check_rows_less_than_needed(), CPU, DEBUG_TIMER, ERR_DIV_BY_ZERO, ERR_INTERRUPTED, ERR_OUT_OF_TIME, ERR_OVERFLOW_OR_UNDERFLOW, error_code, logger::FATAL, g_enable_dynamic_watchdog, QueryExecutionContext::getRowSet(), GPU, RelAlgExecutionUnit::groupby_exprs, INJECT_TIMER, interrupted_(), QueryExecutionContext::launchCpuCode(), QueryExecutionContext::launchGpuCode(), Executor::CompilationResult::literal_values, LOG, Executor::CompilationResult::native_functions, num_rows, QueryExecutionContext::query_mem_desc_, RenderInfo::render_allocator_map_ptr, and RenderInfo::useCudaBuffers().

2418  {
2419  auto timer = DEBUG_TIMER(__func__);
2421  CHECK(!results);
2422  if (col_buffers.empty()) {
2423  return 0;
2424  }
2425  CHECK_NE(ra_exe_unit.groupby_exprs.size(), size_t(0));
2426  // TODO(alex):
2427  // 1. Optimize size (make keys more compact).
2428  // 2. Resize on overflow.
2429  // 3. Optimize runtime.
2430  auto hoist_buf = serializeLiterals(compilation_result.literal_values, device_id);
2431  int32_t error_code = device_type == ExecutorDeviceType::GPU ? 0 : start_rowid;
2432  const auto join_hash_table_ptrs = getJoinHashTablePtrs(device_type, device_id);
2434  return ERR_INTERRUPTED;
2435  }
2436 
2437  RenderAllocatorMap* render_allocator_map_ptr = nullptr;
2438  if (render_info && render_info->useCudaBuffers()) {
2439  render_allocator_map_ptr = render_info->render_allocator_map_ptr.get();
2440  }
2441 
2442  if (device_type == ExecutorDeviceType::CPU) {
2443  query_exe_context->launchCpuCode(ra_exe_unit,
2444  compilation_result.native_functions,
2445  hoist_literals,
2446  hoist_buf,
2447  col_buffers,
2448  num_rows,
2449  frag_offsets,
2450  scan_limit,
2451  &error_code,
2452  num_tables,
2453  join_hash_table_ptrs);
2454  } else {
2455  try {
2456  query_exe_context->launchGpuCode(ra_exe_unit,
2457  compilation_result.native_functions,
2458  hoist_literals,
2459  hoist_buf,
2460  col_buffers,
2461  num_rows,
2462  frag_offsets,
2463  scan_limit,
2464  data_mgr,
2465  blockSize(),
2466  gridSize(),
2467  device_id,
2468  &error_code,
2469  num_tables,
2470  join_hash_table_ptrs,
2471  render_allocator_map_ptr);
2472  } catch (const OutOfMemory&) {
2473  return ERR_OUT_OF_GPU_MEM;
2474  } catch (const OutOfRenderMemory&) {
2475  return ERR_OUT_OF_RENDER_MEM;
2476  } catch (const StreamingTopNNotSupportedInRenderQuery&) {
2478  } catch (const std::bad_alloc&) {
2479  return ERR_SPECULATIVE_TOP_OOM;
2480  } catch (const std::exception& e) {
2481  LOG(FATAL) << "Error launching the GPU kernel: " << e.what();
2482  }
2483  }
2484 
2485  if (error_code == Executor::ERR_OVERFLOW_OR_UNDERFLOW ||
2486  error_code == Executor::ERR_DIV_BY_ZERO ||
2487  error_code == Executor::ERR_OUT_OF_TIME ||
2488  error_code == Executor::ERR_INTERRUPTED) {
2489  return error_code;
2490  }
2491 
2492  if (error_code != Executor::ERR_OVERFLOW_OR_UNDERFLOW &&
2493  error_code != Executor::ERR_DIV_BY_ZERO && !render_allocator_map_ptr) {
2494  results =
2495  query_exe_context->getRowSet(ra_exe_unit, query_exe_context->query_mem_desc_);
2496  CHECK(results);
2497  results->holdLiterals(hoist_buf);
2498  }
2499  if (error_code < 0 && render_allocator_map_ptr) {
2500  // More rows passed the filter than available slots. We don't have a count to check,
2501  // so assume we met the limit if a scan limit is set
2502  if (scan_limit != 0) {
2503  return 0;
2504  } else {
2505  return error_code;
2506  }
2507  }
2508  if (error_code && (!scan_limit || check_rows_less_than_needed(results, scan_limit))) {
2509  return error_code; // unlucky, not enough results and we ran out of slots
2510  }
2511 
2512  return 0;
2513 }
bool useCudaBuffers() const
Definition: RenderInfo.cpp:66
const int8_t const int64_t * num_rows
static const int32_t ERR_INTERRUPTED
Definition: Execute.h:1042
std::vector< int64_t * > launchCpuCode(const RelAlgExecutionUnit &ra_exe_unit, const std::vector< std::pair< void *, void * >> &fn_ptrs, const bool hoist_literals, const std::vector< int8_t > &literal_buff, std::vector< std::vector< const int8_t * >> col_buffers, const std::vector< std::vector< int64_t >> &num_rows, const std::vector< std::vector< uint64_t >> &frag_row_offsets, const int32_t scan_limit, int32_t *error_code, const uint32_t num_tables, const std::vector< int64_t > &join_hash_tables)
std::vector< int64_t * > launchGpuCode(const RelAlgExecutionUnit &ra_exe_unit, const std::vector< std::pair< void *, void * >> &cu_functions, const bool hoist_literals, const std::vector< int8_t > &literal_buff, std::vector< std::vector< const int8_t * >> col_buffers, const std::vector< std::vector< int64_t >> &num_rows, const std::vector< std::vector< uint64_t >> &frag_row_offsets, const int32_t scan_limit, Data_Namespace::DataMgr *data_mgr, const unsigned block_size_x, const unsigned grid_size_x, const int device_id, int32_t *error_code, const uint32_t num_tables, const std::vector< int64_t > &join_hash_tables, RenderAllocatorMap *render_allocator_map)
#define LOG(tag)
Definition: Logger.h:185
bool g_enable_dynamic_watchdog
Definition: Execute.cpp:72
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
std::vector< int64_t > getJoinHashTablePtrs(const ExecutorDeviceType device_type, const int device_id)
Definition: Execute.cpp:2515
static const int32_t ERR_STREAMING_TOP_N_NOT_SUPPORTED_IN_RENDER_QUERY
Definition: Execute.h:1046
static const int32_t ERR_DIV_BY_ZERO
Definition: Execute.h:1033
ResultSetPtr getRowSet(const RelAlgExecutionUnit &ra_exe_unit, const QueryMemoryDescriptor &query_mem_desc) const
CHECK(cgen_state)
#define INJECT_TIMER(DESC)
Definition: measure.h:91
#define CHECK_NE(x, y)
Definition: Logger.h:199
static const int32_t ERR_OUT_OF_RENDER_MEM
Definition: Execute.h:1037
const int8_t const int64_t const uint64_t const int32_t const int64_t int64_t uint32_t const int64_t int32_t * error_code
static const int32_t ERR_OVERFLOW_OR_UNDERFLOW
Definition: Execute.h:1039
static const int32_t ERR_OUT_OF_TIME
Definition: Execute.h:1041
static const int32_t ERR_OUT_OF_GPU_MEM
Definition: Execute.h:1034
std::vector< int8_t > serializeLiterals(const std::unordered_map< int, CgenState::LiteralValues > &literals, const int device_id)
Definition: Execute.cpp:335
unsigned gridSize() const
Definition: Execute.cpp:2960
std::unique_ptr< RenderAllocatorMap > render_allocator_map_ptr
Definition: RenderInfo.h:32
bool check_rows_less_than_needed(const ResultSetPtr &results, const size_t scan_limit)
Definition: Execute.cpp:2395
static const int32_t ERR_SPECULATIVE_TOP_OOM
Definition: Execute.h:1040
#define DEBUG_TIMER(name)
Definition: Logger.h:296
int32_t executePlanWithGroupBy(const RelAlgExecutionUnit &ra_exe_unit, const CompilationResult &, const bool hoist_literals, ResultSetPtr &results, const ExecutorDeviceType device_type, std::vector< std::vector< const int8_t * >> &col_buffers, const std::vector< size_t > outer_tab_frag_ids, QueryExecutionContext *, const std::vector< std::vector< int64_t >> &num_rows, const std::vector< std::vector< uint64_t >> &frag_offsets, Data_Namespace::DataMgr *, const int device_id, const int64_t limit, const uint32_t start_rowid, const uint32_t num_tables, RenderInfo *render_info)
Definition: Execute.cpp:2402
unsigned blockSize() const
Definition: Execute.cpp:2968
const QueryMemoryDescriptor query_mem_desc_
bool interrupted_
Definition: Execute.h:997

+ Here is the call graph for this function:

int32_t Executor::executePlanWithoutGroupBy ( const RelAlgExecutionUnit ra_exe_unit,
const CompilationResult compilation_result,
const bool  hoist_literals,
ResultSetPtr results,
const std::vector< Analyzer::Expr * > &  target_exprs,
const ExecutorDeviceType  device_type,
std::vector< std::vector< const int8_t * >> &  col_buffers,
QueryExecutionContext query_exe_context,
const std::vector< std::vector< int64_t >> &  num_rows,
const std::vector< std::vector< uint64_t >> &  frag_offsets,
Data_Namespace::DataMgr data_mgr,
const int  device_id,
const uint32_t  start_rowid,
const uint32_t  num_tables,
RenderInfo render_info 
)
private

Definition at line 2259 of file Execute.cpp.

References CHECK(), CHECK_EQ, CPU, DEBUG_TIMER, ERR_DIV_BY_ZERO, ERR_INTERRUPTED, ERR_OUT_OF_TIME, ERR_OVERFLOW_OR_UNDERFLOW, error_code, RelAlgExecutionUnit::estimator, QueryExecutionContext::estimator_result_set_, logger::FATAL, g_bigint_count, g_enable_dynamic_watchdog, get_target_info(), Experimental::MetaTypeClassFactory< CLASSIFICATIONS_PACK >::getMetaTypeClass(), GPU, INJECT_TIMER, interrupted_(), RenderInfo::isPotentialInSituRender(), QueryExecutionContext::launchCpuCode(), QueryExecutionContext::launchGpuCode(), Executor::CompilationResult::literal_values, LOG, Executor::CompilationResult::native_functions, num_rows, out, QueryExecutionContext::query_buffers_, RenderInfo::render_allocator_map_ptr, and RenderInfo::useCudaBuffers().

2274  {
2276  auto timer = DEBUG_TIMER(__func__);
2277  CHECK(!results);
2278  if (col_buffers.empty()) {
2279  return 0;
2280  }
2281 
2282  RenderAllocatorMap* render_allocator_map_ptr = nullptr;
2283  if (render_info) {
2284  // TODO(adb): make sure that we either never get here in the CPU case, or if we do get
2285  // here, we are in non-insitu mode.
2286  CHECK(render_info->useCudaBuffers() || !render_info->isPotentialInSituRender())
2287  << "CUDA disabled rendering in the executePlanWithoutGroupBy query path is "
2288  "currently unsupported.";
2289  render_allocator_map_ptr = render_info->render_allocator_map_ptr.get();
2290  }
2291 
2292  int32_t error_code = device_type == ExecutorDeviceType::GPU ? 0 : start_rowid;
2293  std::vector<int64_t*> out_vec;
2294  const auto hoist_buf = serializeLiterals(compilation_result.literal_values, device_id);
2295  const auto join_hash_table_ptrs = getJoinHashTablePtrs(device_type, device_id);
2296  std::unique_ptr<OutVecOwner> output_memory_scope;
2298  return ERR_INTERRUPTED;
2299  }
2300  if (device_type == ExecutorDeviceType::CPU) {
2301  out_vec = query_exe_context->launchCpuCode(ra_exe_unit,
2302  compilation_result.native_functions,
2303  hoist_literals,
2304  hoist_buf,
2305  col_buffers,
2306  num_rows,
2307  frag_offsets,
2308  0,
2309  &error_code,
2310  num_tables,
2311  join_hash_table_ptrs);
2312  output_memory_scope.reset(new OutVecOwner(out_vec));
2313  } else {
2314  try {
2315  out_vec = query_exe_context->launchGpuCode(ra_exe_unit,
2316  compilation_result.native_functions,
2317  hoist_literals,
2318  hoist_buf,
2319  col_buffers,
2320  num_rows,
2321  frag_offsets,
2322  0,
2323  data_mgr,
2324  blockSize(),
2325  gridSize(),
2326  device_id,
2327  &error_code,
2328  num_tables,
2329  join_hash_table_ptrs,
2330  render_allocator_map_ptr);
2331  output_memory_scope.reset(new OutVecOwner(out_vec));
2332  } catch (const OutOfMemory&) {
2333  return ERR_OUT_OF_GPU_MEM;
2334  } catch (const std::exception& e) {
2335  LOG(FATAL) << "Error launching the GPU kernel: " << e.what();
2336  }
2337  }
2338  if (error_code == Executor::ERR_OVERFLOW_OR_UNDERFLOW ||
2339  error_code == Executor::ERR_DIV_BY_ZERO ||
2340  error_code == Executor::ERR_OUT_OF_TIME ||
2341  error_code == Executor::ERR_INTERRUPTED) {
2342  return error_code;
2343  }
2344  if (ra_exe_unit.estimator) {
2345  CHECK(!error_code);
2346  results =
2347  std::shared_ptr<ResultSet>(query_exe_context->estimator_result_set_.release());
2348  return 0;
2349  }
2350  std::vector<int64_t> reduced_outs;
2351  const auto num_frags = col_buffers.size();
2352  const size_t entry_count = device_type == ExecutorDeviceType::GPU
2353  ? num_frags * blockSize() * gridSize()
2354  : num_frags;
2355  if (size_t(1) == entry_count) {
2356  for (auto out : out_vec) {
2357  CHECK(out);
2358  reduced_outs.push_back(*out);
2359  }
2360  } else {
2361  size_t out_vec_idx = 0;
2362 
2363  for (const auto target_expr : target_exprs) {
2364  const auto agg_info = get_target_info(target_expr, g_bigint_count);
2365  CHECK(agg_info.is_agg);
2366 
2367  auto meta_class(
2369  auto agg_reduction_impl =
2371  agg_reduction_impl(meta_class,
2372  entry_count,
2373  error_code,
2374  agg_info,
2375  out_vec_idx,
2376  out_vec,
2377  reduced_outs,
2378  query_exe_context);
2379  if (error_code) {
2380  break;
2381  }
2382  }
2383  }
2384 
2385  CHECK_EQ(size_t(1), query_exe_context->query_buffers_->result_sets_.size());
2386  auto rows_ptr = std::shared_ptr<ResultSet>(
2387  query_exe_context->query_buffers_->result_sets_[0].release());
2388  rows_ptr->fillOneEntry(reduced_outs);
2389  results = std::move(rows_ptr);
2390  return error_code;
2391 }
#define CHECK_EQ(x, y)
Definition: Logger.h:198
bool useCudaBuffers() const
Definition: RenderInfo.cpp:66
static auto getMetaTypeClass(SQL_TYPE_INFO const &sql_type_info) -> MetaTypeClassContainer
const int8_t const int64_t * num_rows
static const int32_t ERR_INTERRUPTED
Definition: Execute.h:1042
std::vector< int64_t * > launchCpuCode(const RelAlgExecutionUnit &ra_exe_unit, const std::vector< std::pair< void *, void * >> &fn_ptrs, const bool hoist_literals, const std::vector< int8_t > &literal_buff, std::vector< std::vector< const int8_t * >> col_buffers, const std::vector< std::vector< int64_t >> &num_rows, const std::vector< std::vector< uint64_t >> &frag_row_offsets, const int32_t scan_limit, int32_t *error_code, const uint32_t num_tables, const std::vector< int64_t > &join_hash_tables)
std::vector< int64_t * > launchGpuCode(const RelAlgExecutionUnit &ra_exe_unit, const std::vector< std::pair< void *, void * >> &cu_functions, const bool hoist_literals, const std::vector< int8_t > &literal_buff, std::vector< std::vector< const int8_t * >> col_buffers, const std::vector< std::vector< int64_t >> &num_rows, const std::vector< std::vector< uint64_t >> &frag_row_offsets, const int32_t scan_limit, Data_Namespace::DataMgr *data_mgr, const unsigned block_size_x, const unsigned grid_size_x, const int device_id, int32_t *error_code, const uint32_t num_tables, const std::vector< int64_t > &join_hash_tables, RenderAllocatorMap *render_allocator_map)
#define LOG(tag)
Definition: Logger.h:185
TargetInfo get_target_info(const PointerType target_expr, const bool bigint_count)
Definition: TargetInfo.h:65
bool g_enable_dynamic_watchdog
Definition: Execute.cpp:72
std::unique_ptr< QueryMemoryInitializer > query_buffers_
std::vector< int64_t > getJoinHashTablePtrs(const ExecutorDeviceType device_type, const int device_id)
Definition: Execute.cpp:2515
static const int32_t ERR_DIV_BY_ZERO
Definition: Execute.h:1033
CHECK(cgen_state)
#define INJECT_TIMER(DESC)
Definition: measure.h:91
const int8_t const int64_t const uint64_t const int32_t const int64_t int64_t uint32_t const int64_t int32_t * error_code
static const int32_t ERR_OVERFLOW_OR_UNDERFLOW
Definition: Execute.h:1039
static const int32_t ERR_OUT_OF_TIME
Definition: Execute.h:1041
bool g_bigint_count
const std::shared_ptr< Analyzer::Estimator > estimator
static const int32_t ERR_OUT_OF_GPU_MEM
Definition: Execute.h:1034
int32_t executePlanWithoutGroupBy(const RelAlgExecutionUnit &ra_exe_unit, const CompilationResult &, const bool hoist_literals, ResultSetPtr &results, const std::vector< Analyzer::Expr * > &target_exprs, const ExecutorDeviceType device_type, std::vector< std::vector< const int8_t * >> &col_buffers, QueryExecutionContext *query_exe_context, const std::vector< std::vector< int64_t >> &num_rows, const std::vector< std::vector< uint64_t >> &frag_offsets, Data_Namespace::DataMgr *data_mgr, const int device_id, const uint32_t start_rowid, const uint32_t num_tables, RenderInfo *render_info)
Definition: Execute.cpp:2259
std::vector< int8_t > serializeLiterals(const std::unordered_map< int, CgenState::LiteralValues > &literals, const int device_id)
Definition: Execute.cpp:335
unsigned gridSize() const
Definition: Execute.cpp:2960
std::unique_ptr< RenderAllocatorMap > render_allocator_map_ptr
Definition: RenderInfo.h:32
bool isPotentialInSituRender() const
Definition: RenderInfo.cpp:61
#define DEBUG_TIMER(name)
Definition: Logger.h:296
MetaTypeClassHandler< SPECIALIZED_HANDLER, Geometry > GeoVsNonGeoClassHandler
unsigned blockSize() const
Definition: Execute.cpp:2968
const int8_t const int64_t const uint64_t const int32_t const int64_t int64_t ** out
std::unique_ptr< ResultSet > estimator_result_set_
bool interrupted_
Definition: Execute.h:997

+ Here is the call graph for this function:

void Executor::executeSimpleInsert ( const Planner::RootPlan root_plan)
private

Definition at line 2582 of file Execute.cpp.

References Importer_NS::appendDatum(), DataBlockPtr::arraysPtr, CHECK(), CHECK_EQ, checked_malloc(), Fragmenter_Namespace::InsertData::columnIds, Fragmenter_Namespace::InsertData::data, Fragmenter_Namespace::InsertData::databaseId, get_column_descriptor(), SQLTypeInfoCore< TYPE_FACET_PACK >::get_compression(), SQLTypeInfoCore< TYPE_FACET_PACK >::get_elem_type(), Planner::RootPlan::get_plan(), Planner::RootPlan::get_result_col_list(), Planner::RootPlan::get_result_table_id(), SQLTypeInfoCore< TYPE_FACET_PACK >::get_size(), Planner::RootPlan::getCatalog(), Catalog_Namespace::Catalog::getMetadataForTable(), inline_fixed_encoding_null_val(), anonymous_namespace{Execute.cpp}::insert_one_dict_str(), anonymous_namespace{TypedDataAccessors.h}::is_null(), SQLTypeInfoCore< TYPE_FACET_PACK >::is_string(), kARRAY, kBIGINT, kBOOLEAN, kCAST, kCHAR, kDATE, kDECIMAL, kDOUBLE, kENCODING_DICT, kENCODING_NONE, kFLOAT, kINT, kLINESTRING, kMULTIPOLYGON, kNUMERIC, kPOINT, kPOLYGON, kSMALLINT, kTEXT, kTIME, kTIMESTAMP, kTINYINT, kVARCHAR, DataBlockPtr::numbersPtr, Fragmenter_Namespace::InsertData::numRows, anonymous_namespace{TypedDataAccessors.h}::put_null(), anonymous_namespace{TypedDataAccessors.h}::put_null_array(), SHARD_FOR_KEY, DataBlockPtr::stringsPtr, Fragmenter_Namespace::InsertData::tableId, and to_string().

2582  {
2583  const auto plan = root_plan->get_plan();
2584  CHECK(plan);
2585  const auto values_plan = dynamic_cast<const Planner::ValuesScan*>(plan);
2586  if (!values_plan) {
2587  throw std::runtime_error(
2588  "Only simple INSERT of immediate tuples is currently supported");
2589  }
2590  row_set_mem_owner_ = std::make_shared<RowSetMemoryOwner>();
2591  const auto& targets = values_plan->get_targetlist();
2592  const int table_id = root_plan->get_result_table_id();
2593  const auto& col_id_list = root_plan->get_result_col_list();
2594  std::vector<const ColumnDescriptor*> col_descriptors;
2595  std::vector<int> col_ids;
2596  std::unordered_map<int, std::unique_ptr<uint8_t[]>> col_buffers;
2597  std::unordered_map<int, std::vector<std::string>> str_col_buffers;
2598  std::unordered_map<int, std::vector<ArrayDatum>> arr_col_buffers;
2599  auto& cat = root_plan->getCatalog();
2600  const auto table_descriptor = cat.getMetadataForTable(table_id);
2601  const auto shard_tables = cat.getPhysicalTablesDescriptors(table_descriptor);
2602  const TableDescriptor* shard{nullptr};
2603  for (const int col_id : col_id_list) {
2604  const auto cd = get_column_descriptor(col_id, table_id, cat);
2605  const auto col_enc = cd->columnType.get_compression();
2606  if (cd->columnType.is_string()) {
2607  switch (col_enc) {
2608  case kENCODING_NONE: {
2609  auto it_ok =
2610  str_col_buffers.insert(std::make_pair(col_id, std::vector<std::string>{}));
2611  CHECK(it_ok.second);
2612  break;
2613  }
2614  case kENCODING_DICT: {
2615  const auto dd = cat.getMetadataForDict(cd->columnType.get_comp_param());
2616  CHECK(dd);
2617  const auto it_ok = col_buffers.emplace(
2618  col_id, std::unique_ptr<uint8_t[]>(new uint8_t[cd->columnType.get_size()]));
2619  CHECK(it_ok.second);
2620  break;
2621  }
2622  default:
2623  CHECK(false);
2624  }
2625  } else if (cd->columnType.is_geometry()) {
2626  auto it_ok =
2627  str_col_buffers.insert(std::make_pair(col_id, std::vector<std::string>{}));
2628  CHECK(it_ok.second);
2629  } else if (cd->columnType.is_array()) {
2630  auto it_ok =
2631  arr_col_buffers.insert(std::make_pair(col_id, std::vector<ArrayDatum>{}));
2632  CHECK(it_ok.second);
2633  } else {
2634  const auto it_ok = col_buffers.emplace(
2635  col_id,
2636  std::unique_ptr<uint8_t[]>(
2637  new uint8_t[cd->columnType.get_logical_size()]())); // changed to zero-init
2638  // the buffer
2639  CHECK(it_ok.second);
2640  }
2641  col_descriptors.push_back(cd);
2642  col_ids.push_back(col_id);
2643  }
2644  size_t col_idx = 0;
2646  insert_data.databaseId = cat.getCurrentDB().dbId;
2647  insert_data.tableId = table_id;
2648  int64_t int_col_val{0};
2649  for (auto target_entry : targets) {
2650  auto col_cv = dynamic_cast<const Analyzer::Constant*>(target_entry->get_expr());
2651  if (!col_cv) {
2652  auto col_cast = dynamic_cast<const Analyzer::UOper*>(target_entry->get_expr());
2653  CHECK(col_cast);
2654  CHECK_EQ(kCAST, col_cast->get_optype());
2655  col_cv = dynamic_cast<const Analyzer::Constant*>(col_cast->get_operand());
2656  }
2657  CHECK(col_cv);
2658  const auto cd = col_descriptors[col_idx];
2659  auto col_datum = col_cv->get_constval();
2660  auto col_type = cd->columnType.get_type();
2661  uint8_t* col_data_bytes{nullptr};
2662  if (!cd->columnType.is_array() && !cd->columnType.is_geometry() &&
2663  (!cd->columnType.is_string() ||
2664  cd->columnType.get_compression() == kENCODING_DICT)) {
2665  const auto col_data_bytes_it = col_buffers.find(col_ids[col_idx]);
2666  CHECK(col_data_bytes_it != col_buffers.end());
2667  col_data_bytes = col_data_bytes_it->second.get();
2668  }
2669  switch (col_type) {
2670  case kBOOLEAN: {
2671  auto col_data = col_data_bytes;
2672  *col_data = col_cv->get_is_null() ? inline_fixed_encoding_null_val(cd->columnType)
2673  : (col_datum.boolval ? 1 : 0);
2674  break;
2675  }
2676  case kTINYINT: {
2677  auto col_data = reinterpret_cast<int8_t*>(col_data_bytes);
2678  *col_data = col_cv->get_is_null() ? inline_fixed_encoding_null_val(cd->columnType)
2679  : col_datum.tinyintval;
2680  int_col_val = col_datum.tinyintval;
2681  break;
2682  }
2683  case kSMALLINT: {
2684  auto col_data = reinterpret_cast<int16_t*>(col_data_bytes);
2685  *col_data = col_cv->get_is_null() ? inline_fixed_encoding_null_val(cd->columnType)
2686  : col_datum.smallintval;
2687  int_col_val = col_datum.smallintval;
2688  break;
2689  }
2690  case kINT: {
2691  auto col_data = reinterpret_cast<int32_t*>(col_data_bytes);
2692  *col_data = col_cv->get_is_null() ? inline_fixed_encoding_null_val(cd->columnType)
2693  : col_datum.intval;
2694  int_col_val = col_datum.intval;
2695  break;
2696  }
2697  case kBIGINT:
2698  case kDECIMAL:
2699  case kNUMERIC: {
2700  auto col_data = reinterpret_cast<int64_t*>(col_data_bytes);
2701  *col_data = col_cv->get_is_null() ? inline_fixed_encoding_null_val(cd->columnType)
2702  : col_datum.bigintval;
2703  int_col_val = col_datum.bigintval;
2704  break;
2705  }
2706  case kFLOAT: {
2707  auto col_data = reinterpret_cast<float*>(col_data_bytes);
2708  *col_data = col_datum.floatval;
2709  break;
2710  }
2711  case kDOUBLE: {
2712  auto col_data = reinterpret_cast<double*>(col_data_bytes);
2713  *col_data = col_datum.doubleval;
2714  break;
2715  }
2716  case kTEXT:
2717  case kVARCHAR:
2718  case kCHAR: {
2719  switch (cd->columnType.get_compression()) {
2720  case kENCODING_NONE:
2721  str_col_buffers[col_ids[col_idx]].push_back(
2722  col_datum.stringval ? *col_datum.stringval : "");
2723  break;
2724  case kENCODING_DICT: {
2725  switch (cd->columnType.get_size()) {
2726  case 1:
2727  int_col_val = insert_one_dict_str(
2728  reinterpret_cast<uint8_t*>(col_data_bytes), cd, col_cv, cat);
2729  break;
2730  case 2:
2731  int_col_val = insert_one_dict_str(
2732  reinterpret_cast<uint16_t*>(col_data_bytes), cd, col_cv, cat);
2733  break;
2734  case 4:
2735  int_col_val = insert_one_dict_str(
2736  reinterpret_cast<int32_t*>(col_data_bytes), cd, col_cv, cat);
2737  break;
2738  default:
2739  CHECK(false);
2740  }
2741  break;
2742  }
2743  default:
2744  CHECK(false);
2745  }
2746  break;
2747  }
2748  case kTIME:
2749  case kTIMESTAMP:
2750  case kDATE: {
2751  auto col_data = reinterpret_cast<int64_t*>(col_data_bytes);
2752  *col_data = col_cv->get_is_null() ? inline_fixed_encoding_null_val(cd->columnType)
2753  : col_datum.bigintval;
2754  break;
2755  }
2756  case kARRAY: {
2757  const auto is_null = col_cv->get_is_null();
2758  const auto size = cd->columnType.get_size();
2759  const SQLTypeInfo elem_ti = cd->columnType.get_elem_type();
2760  if (is_null) {
2761  if (size > 0) {
2762  // NULL fixlen array: NULL_ARRAY sentinel followed by NULL sentinels
2763  if (elem_ti.is_string() && elem_ti.get_compression() == kENCODING_DICT) {
2764  throw std::runtime_error("Column " + cd->columnName +
2765  " doesn't accept NULL values");
2766  }
2767  int8_t* buf = (int8_t*)checked_malloc(size);
2768  put_null_array(static_cast<void*>(buf), elem_ti, "");
2769  for (int8_t* p = buf + elem_ti.get_size(); (p - buf) < size;
2770  p += elem_ti.get_size()) {
2771  put_null(static_cast<void*>(p), elem_ti, "");
2772  }
2773  arr_col_buffers[col_ids[col_idx]].emplace_back(size, buf, is_null);
2774  } else {
2775  arr_col_buffers[col_ids[col_idx]].emplace_back(0, nullptr, is_null);
2776  }
2777  break;
2778  }
2779  const auto l = col_cv->get_value_list();
2780  size_t len = l.size() * elem_ti.get_size();
2781  if (size > 0 && static_cast<size_t>(size) != len) {
2782  throw std::runtime_error("Array column " + cd->columnName + " expects " +
2783  std::to_string(size / elem_ti.get_size()) +
2784  " values, " + "received " + std::to_string(l.size()));
2785  }
2786  if (elem_ti.is_string()) {
2787  CHECK(kENCODING_DICT == elem_ti.get_compression());
2788  CHECK(4 == elem_ti.get_size());
2789 
2790  int8_t* buf = (int8_t*)checked_malloc(len);
2791  int32_t* p = reinterpret_cast<int32_t*>(buf);
2792 
2793  int elemIndex = 0;
2794  for (auto& e : l) {
2795  auto c = std::dynamic_pointer_cast<Analyzer::Constant>(e);
2796  CHECK(c);
2797 
2798  int_col_val =
2799  insert_one_dict_str(&p[elemIndex], cd->columnName, elem_ti, c.get(), cat);
2800 
2801  elemIndex++;
2802  }
2803  arr_col_buffers[col_ids[col_idx]].push_back(ArrayDatum(len, buf, is_null));
2804 
2805  } else {
2806  int8_t* buf = (int8_t*)checked_malloc(len);
2807  int8_t* p = buf;
2808  for (auto& e : l) {
2809  auto c = std::dynamic_pointer_cast<Analyzer::Constant>(e);
2810  CHECK(c);
2811  p = Importer_NS::appendDatum(p, c->get_constval(), elem_ti);
2812  }
2813  arr_col_buffers[col_ids[col_idx]].push_back(ArrayDatum(len, buf, is_null));
2814  }
2815  break;
2816  }
2817  case kPOINT:
2818  case kLINESTRING:
2819  case kPOLYGON:
2820  case kMULTIPOLYGON:
2821  str_col_buffers[col_ids[col_idx]].push_back(
2822  col_datum.stringval ? *col_datum.stringval : "");
2823  break;
2824  default:
2825  CHECK(false);
2826  }
2827  ++col_idx;
2828  if (col_idx == static_cast<size_t>(table_descriptor->shardedColumnId)) {
2829  const auto shard_count = shard_tables.size();
2830  const size_t shard_idx = SHARD_FOR_KEY(int_col_val, shard_count);
2831  shard = shard_tables[shard_idx];
2832  }
2833  }
2834  for (const auto& kv : col_buffers) {
2835  insert_data.columnIds.push_back(kv.first);
2836  DataBlockPtr p;
2837  p.numbersPtr = reinterpret_cast<int8_t*>(kv.second.get());
2838  insert_data.data.push_back(p);
2839  }
2840  for (auto& kv : str_col_buffers) {
2841  insert_data.columnIds.push_back(kv.first);
2842  DataBlockPtr p;
2843  p.stringsPtr = &kv.second;
2844  insert_data.data.push_back(p);
2845  }
2846  for (auto& kv : arr_col_buffers) {
2847  insert_data.columnIds.push_back(kv.first);
2848  DataBlockPtr p;
2849  p.arraysPtr = &kv.second;
2850  insert_data.data.push_back(p);
2851  }
2852  insert_data.numRows = 1;
2853  if (shard) {
2854  shard->fragmenter->insertData(insert_data);
2855  } else {
2856  table_descriptor->fragmenter->insertData(insert_data);
2857  }
2858 }
const Plan * get_plan() const
Definition: Planner.h:289
#define CHECK_EQ(x, y)
Definition: Logger.h:198
int get_result_table_id() const
Definition: Planner.h:291
Definition: sqltypes.h:52
HOST DEVICE EncodingType get_compression() const
Definition: sqltypes.h:334
std::vector< std::string > * stringsPtr
Definition: sqltypes.h:141
std::vector< ArrayDatum > * arraysPtr
Definition: sqltypes.h:142
HOST DEVICE int get_size() const
Definition: sqltypes.h:336
Definition: sqldefs.h:49
const Catalog_Namespace::Catalog & getCatalog() const
Definition: Planner.h:293
std::string to_string(char const *&&v)
int tableId
identifies the database into which the data is being inserted
Definition: Fragmenter.h:61
size_t numRows
a vector of column ids for the row(s) being inserted
Definition: Fragmenter.h:63
int8_t * appendDatum(int8_t *buf, Datum d, const SQLTypeInfo &ti)
Definition: Importer.cpp:232
CHECK(cgen_state)
void * checked_malloc(const size_t size)
Definition: checked_alloc.h:40
int64_t insert_one_dict_str(T *col_data, const std::string &columnName, const SQLTypeInfo &columnType, const Analyzer::Constant *col_cv, const Catalog_Namespace::Catalog &catalog)
Definition: Execute.cpp:2533
std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner_
Definition: Execute.h:989
void put_null_array(void *ndptr, const SQLTypeInfo &ntype, const std::string col_name)
const std::list< int > & get_result_col_list() const
Definition: Planner.h:292
void put_null(void *ndptr, const SQLTypeInfo &ntype, const std::string col_name)
Definition: sqltypes.h:55
Definition: sqltypes.h:56
bool is_null(const T &v, const SQLTypeInfo &t)
std::vector< DataBlockPtr > data
the number of rows being inserted
Definition: Fragmenter.h:64
Definition: sqltypes.h:44
bool is_string() const
Definition: sqltypes.h:477
int64_t inline_fixed_encoding_null_val(const SQL_TYPE_INFO &ti)
The data to be inserted using the fragment manager.
Definition: Fragmenter.h:59
Definition: sqltypes.h:48
SQLTypeInfoCore get_elem_type() const
Definition: sqltypes.h:659
const TableDescriptor * getMetadataForTable(const std::string &tableName, const bool populateFragmenter=true) const
Returns a pointer to a const TableDescriptor struct matching the provided tableName.
specifies the content in-memory of a row in the table metadata table
int8_t * numbersPtr
Definition: sqltypes.h:140
std::vector< int > columnIds
identifies the table into which the data is being inserted
Definition: Fragmenter.h:62
#define SHARD_FOR_KEY(key, num_shards)
Definition: shard_key.h:20
std::conditional_t< isCudaCC(), DeviceArrayDatum, HostArrayDatum > ArrayDatum
Definition: sqltypes.h:122
const ColumnDescriptor * get_column_descriptor(const int col_id, const int table_id, const Catalog_Namespace::Catalog &cat)
Definition: Execute.h:142

+ Here is the call graph for this function:

ResultSetPtr Executor::executeTableFunction ( const TableFunctionExecutionUnit  exe_unit,
const std::vector< InputTableInfo > &  table_infos,
const CompilationOptions co,
const ExecutionOptions eo,
const Catalog_Namespace::Catalog cat 
)
private

Compiles and dispatches a table function; that is, a function that takes as input one or more columns and returns a ResultSet, which can be parsed by subsequent execution steps.

Definition at line 1346 of file Execute.cpp.

References CHECK_EQ, TableFunctionCompilationContext::compile(), CompilationOptions::device_type_, TableFunctionExecutionContext::execute(), and INJECT_TIMER.

1351  {
1352  INJECT_TIMER(Exec_executeTableFunction);
1353  nukeOldState(false, table_infos, nullptr);
1354 
1355  ColumnCacheMap column_cache; // Note: if we add retries to the table function
1356  // framework, we may want to move this up a level
1357 
1358  ColumnFetcher column_fetcher(this, column_cache);
1359  TableFunctionCompilationContext compilation_context;
1360  compilation_context.compile(exe_unit, co, this);
1361 
1363  CHECK_EQ(table_infos.size(), size_t(1));
1364  return exe_context.execute(exe_unit,
1365  table_infos.front(),
1366  &compilation_context,
1367  column_fetcher,
1368  co.device_type_,
1369  this);
1370 }
#define CHECK_EQ(x, y)
Definition: Logger.h:198
const std::shared_ptr< RowSetMemoryOwner > getRowSetMemoryOwner() const
Definition: Execute.cpp:241
#define INJECT_TIMER(DESC)
Definition: measure.h:91
void nukeOldState(const bool allow_lazy_fetch, const std::vector< InputTableInfo > &query_infos, const RelAlgExecutionUnit *ra_exe_unit)
Definition: Execute.cpp:2860
ExecutorDeviceType device_type_
std::unordered_map< int, std::unordered_map< int, std::shared_ptr< const ColumnarResults > > > ColumnCacheMap
void compile(const TableFunctionExecutionUnit &exe_unit, const CompilationOptions &co, Executor *executor)

+ Here is the call graph for this function:

void Executor::executeUpdate ( const RelAlgExecutionUnit ra_exe_unit,
const std::vector< InputTableInfo > &  table_infos,
const CompilationOptions co,
const ExecutionOptions eo,
const Catalog_Namespace::Catalog cat,
std::shared_ptr< RowSetMemoryOwner row_set_mem_owner,
const UpdateLogForFragment::Callback cb,
const bool  is_agg = false 
)
private

Definition at line 60 of file ExecuteUpdate.cpp.

References CHECK(), CHECK_EQ, CHECK_GT, Executor::ExecutionDispatch::compile(), create_count_all_execution_unit(), CompilationOptions::device_type_, FragmentsPerTable::fragment_ids, g_bigint_count, Executor::ExecutionDispatch::getFragmentResults(), kBIGINT, kCOUNT, KernelPerFragment, kINT, and Executor::ExecutionDispatch::run().

67  {
68  CHECK(cb);
69  const auto ra_exe_unit = addDeletedColumn(ra_exe_unit_in);
70  ColumnCacheMap column_cache;
71 
72  const auto count =
73  makeExpr<Analyzer::AggExpr>(SQLTypeInfo(g_bigint_count ? kBIGINT : kINT, false),
74  kCOUNT,
75  nullptr,
76  false,
77  nullptr);
78  const auto count_all_exe_unit = create_count_all_execution_unit(ra_exe_unit, count);
79 
80  ColumnFetcher column_fetcher(this, column_cache);
81  CHECK_GT(ra_exe_unit.input_descs.size(), size_t(0));
82  const auto table_id = ra_exe_unit.input_descs[0].getTableId();
83  const auto& outer_fragments = table_infos.front().info.fragments;
84 
85  std::vector<FragmentsPerTable> fragments = {{0, {0}}};
86  for (size_t tab_idx = 1; tab_idx < ra_exe_unit.input_descs.size(); tab_idx++) {
87  int table_id = ra_exe_unit.input_descs[tab_idx].getTableId();
88  CHECK_EQ(table_infos[tab_idx].table_id, table_id);
89  const auto& fragmentsPerTable = table_infos[tab_idx].info.fragments;
90  FragmentsPerTable entry = {table_id, {}};
91  for (size_t innerFragId = 0; innerFragId < fragmentsPerTable.size(); innerFragId++) {
92  entry.fragment_ids.push_back(innerFragId);
93  }
94  fragments.push_back(entry);
95  }
96 
97  // There could be benefit to multithread this once we see where the bottle necks really
98  // are
99  for (size_t fragment_index = 0; fragment_index < outer_fragments.size();
100  ++fragment_index) {
101  ExecutionDispatch current_fragment_execution_dispatch(
102  this, ra_exe_unit, table_infos, cat, row_set_mem_owner, nullptr);
103 
104  const int64_t crt_fragment_tuple_count =
105  outer_fragments[fragment_index].getNumTuples();
106  int64_t max_groups_buffer_entry_guess = crt_fragment_tuple_count;
107  if (is_agg) {
108  max_groups_buffer_entry_guess =
109  std::min(2 * max_groups_buffer_entry_guess, static_cast<int64_t>(100'000'000));
110  }
111 
112  const auto execution_descriptors = current_fragment_execution_dispatch.compile(
113  max_groups_buffer_entry_guess, 8, co, eo, column_fetcher, true);
114  // We may want to consider in the future allowing this to execute on devices other
115  // than CPU
116 
117  fragments[0] = {table_id, {fragment_index}};
118 
119  current_fragment_execution_dispatch.run(
120  co.device_type_,
121  0,
122  eo,
123  column_fetcher,
124  *std::get<QueryCompilationDescriptorOwned>(execution_descriptors),
125  *std::get<QueryMemoryDescriptorOwned>(execution_descriptors),
126  fragments,
128  -1);
129  const auto& proj_fragment_results =
130  current_fragment_execution_dispatch.getFragmentResults();
131  if (proj_fragment_results.empty()) {
132  continue;
133  }
134  const auto& proj_fragment_result = proj_fragment_results[0];
135  const auto proj_result_set = proj_fragment_result.first;
136  CHECK(proj_result_set);
137  cb({outer_fragments[fragment_index], fragment_index, proj_result_set});
138  }
139 }
bool is_agg(const Analyzer::Expr *expr)
#define CHECK_EQ(x, y)
Definition: Logger.h:198
RelAlgExecutionUnit addDeletedColumn(const RelAlgExecutionUnit &ra_exe_unit)
Definition: Execute.cpp:3034
const std::vector< InputDescriptor > input_descs
#define CHECK_GT(x, y)
Definition: Logger.h:202
RelAlgExecutionUnit create_count_all_execution_unit(const RelAlgExecutionUnit &ra_exe_unit, std::shared_ptr< Analyzer::Expr > replacement_target)
CHECK(cgen_state)
bool g_bigint_count
SQLTypeInfoCore< ArrayContextTypeSizer, ExecutorTypePackaging, DateTimeFacilities > SQLTypeInfo
Definition: sqltypes.h:852
ExecutorDeviceType device_type_
Definition: sqldefs.h:71
std::unordered_map< int, std::unordered_map< int, std::shared_ptr< const ColumnarResults > > > ColumnCacheMap
Definition: sqltypes.h:48
std::vector< size_t > fragment_ids

+ Here is the call graph for this function:

ResultSetPtr Executor::executeWorkUnit ( size_t &  max_groups_buffer_entry_guess,
const bool  is_agg,
const std::vector< InputTableInfo > &  query_infos,
const RelAlgExecutionUnit ra_exe_unit_in,
const CompilationOptions co,
const ExecutionOptions options,
const Catalog_Namespace::Catalog cat,
std::shared_ptr< RowSetMemoryOwner row_set_mem_owner,
RenderInfo render_info,
const bool  has_cardinality_estimation,
ColumnCacheMap column_cache 
)
private

Definition at line 1109 of file Execute.cpp.

References CompilationRetryNewScanLimit::new_scan_limit_, and anonymous_namespace{Execute.cpp}::replace_scan_limit().

1120  {
1121  try {
1122  return executeWorkUnitImpl(max_groups_buffer_entry_guess,
1123  is_agg,
1124  true,
1125  query_infos,
1126  ra_exe_unit_in,
1127  co,
1128  eo,
1129  cat,
1130  row_set_mem_owner,
1131  render_info,
1132  has_cardinality_estimation,
1133  column_cache);
1134  } catch (const CompilationRetryNewScanLimit& e) {
1135  return executeWorkUnitImpl(max_groups_buffer_entry_guess,
1136  is_agg,
1137  false,
1138  query_infos,
1139  replace_scan_limit(ra_exe_unit_in, e.new_scan_limit_),
1140  co,
1141  eo,
1142  cat,
1143  row_set_mem_owner,
1144  render_info,
1145  has_cardinality_estimation,
1146  column_cache);
1147  }
1148 }
bool is_agg(const Analyzer::Expr *expr)
RelAlgExecutionUnit replace_scan_limit(const RelAlgExecutionUnit &ra_exe_unit_in, const size_t new_scan_limit)
Definition: Execute.cpp:1091
ResultSetPtr executeWorkUnitImpl(size_t &max_groups_buffer_entry_guess, const bool is_agg, const bool allow_single_frag_table_opt, const std::vector< InputTableInfo > &, const RelAlgExecutionUnit &, const CompilationOptions &, const ExecutionOptions &options, const Catalog_Namespace::Catalog &, std::shared_ptr< RowSetMemoryOwner >, RenderInfo *render_info, const bool has_cardinality_estimation, ColumnCacheMap &column_cache)
Definition: Execute.cpp:1150

+ Here is the call graph for this function:

ResultSetPtr Executor::executeWorkUnitImpl ( size_t &  max_groups_buffer_entry_guess,
const bool  is_agg,
const bool  allow_single_frag_table_opt,
const std::vector< InputTableInfo > &  query_infos,
const RelAlgExecutionUnit ra_exe_unit_in,
const CompilationOptions co,
const ExecutionOptions options,
const Catalog_Namespace::Catalog cat,
std::shared_ptr< RowSetMemoryOwner row_set_mem_owner,
RenderInfo render_info,
const bool  has_cardinality_estimation,
ColumnCacheMap column_cache 
)
private

Definition at line 1150 of file Execute.cpp.

References CHECK(), Executor::ExecutionDispatch::compile(), anonymous_namespace{Execute.cpp}::compute_buffer_entry_guess(), CPU, cpu_threads(), DEBUG_TIMER_NEW_THREAD, CompilationOptions::device_type_, CompilationOptions::explain_type_, Data_Namespace::DataMgr::freeAllBuffers(), get_available_gpus(), get_context_count(), get_min_byte_width(), Catalog_Namespace::Catalog::getDataMgr(), QueryExecutionError::getErrorCode(), Data_Namespace::DataMgr::getMemoryInfo(), GPU, Data_Namespace::GPU_LEVEL, CompilationOptions::hoist_literals_, INJECT_TIMER, interrupted_(), ExecutionOptions::just_explain, MAX_BYTE_WIDTH_SUPPORTED, CompilationOptions::opt_level_, query_mem_desc, CompilationOptions::register_intel_jit_listener_, Executor::ExecutionDispatch::run(), and CompilationOptions::with_dynamic_watchdog_.

1162  {
1163  INJECT_TIMER(Exec_executeWorkUnit);
1164  const auto ra_exe_unit = addDeletedColumn(ra_exe_unit_in);
1165  const auto device_type = getDeviceTypeForTargets(ra_exe_unit, co.device_type_);
1166  CHECK(!query_infos.empty());
1167  if (!max_groups_buffer_entry_guess) {
1168  // The query has failed the first execution attempt because of running out
1169  // of group by slots. Make the conservative choice: allocate fragment size
1170  // slots and run on the CPU.
1171  CHECK(device_type == ExecutorDeviceType::CPU);
1172  max_groups_buffer_entry_guess = compute_buffer_entry_guess(query_infos);
1173  }
1174 
1175  int8_t crt_min_byte_width{get_min_byte_width()};
1176  do {
1177  ExecutionDispatch execution_dispatch(
1178  this, ra_exe_unit, query_infos, cat, row_set_mem_owner, render_info);
1179  ColumnFetcher column_fetcher(this, column_cache);
1180  std::unique_ptr<QueryCompilationDescriptor> query_comp_desc_owned;
1181  std::unique_ptr<QueryMemoryDescriptor> query_mem_desc_owned;
1182  try {
1183  INJECT_TIMER(execution_dispatch_comp);
1184  std::tie(query_comp_desc_owned, query_mem_desc_owned) =
1185  execution_dispatch.compile(max_groups_buffer_entry_guess,
1186  crt_min_byte_width,
1187  {device_type,
1188  co.hoist_literals_,
1189  co.opt_level_,
1191  co.explain_type_,
1193  eo,
1194  column_fetcher,
1195  has_cardinality_estimation);
1196  CHECK(query_comp_desc_owned);
1197  crt_min_byte_width = query_comp_desc_owned->getMinByteWidth();
1198  } catch (CompilationRetryNoCompaction&) {
1199  crt_min_byte_width = MAX_BYTE_WIDTH_SUPPORTED;
1200  continue;
1201  }
1202  if (eo.just_explain) {
1203  return executeExplain(*query_comp_desc_owned);
1204  }
1205 
1206  for (const auto target_expr : ra_exe_unit.target_exprs) {
1207  plan_state_->target_exprs_.push_back(target_expr);
1208  }
1209 
1210  auto dispatch = [&execution_dispatch,
1211  &column_fetcher,
1212  &eo,
1213  parent_thread_id = std::this_thread::get_id()](
1214  const ExecutorDeviceType chosen_device_type,
1215  int chosen_device_id,
1216  const QueryCompilationDescriptor& query_comp_desc,
1218  const FragmentsList& frag_list,
1219  const ExecutorDispatchMode kernel_dispatch_mode,
1220  const int64_t rowid_lookup_key) {
1221  DEBUG_TIMER_NEW_THREAD(parent_thread_id);
1222  INJECT_TIMER(execution_dispatch_run);
1223  execution_dispatch.run(chosen_device_type,
1224  chosen_device_id,
1225  eo,
1226  column_fetcher,
1227  query_comp_desc,
1228  query_mem_desc,
1229  frag_list,
1230  kernel_dispatch_mode,
1231  rowid_lookup_key);
1232  };
1233 
1234  QueryFragmentDescriptor fragment_descriptor(
1235  ra_exe_unit,
1236  query_infos,
1237  query_comp_desc_owned->getDeviceType() == ExecutorDeviceType::GPU
1239  : std::vector<Data_Namespace::MemoryInfo>{},
1240  eo.gpu_input_mem_limit_percent);
1241 
1242  if (!eo.just_validate) {
1243  int available_cpus = cpu_threads();
1244  auto available_gpus = get_available_gpus(cat);
1245 
1246  const auto context_count =
1247  get_context_count(device_type, available_cpus, available_gpus.size());
1248  try {
1249  dispatchFragments(dispatch,
1250  execution_dispatch,
1251  query_infos,
1252  eo,
1253  is_agg,
1254  allow_single_frag_table_opt,
1255  context_count,
1256  *query_comp_desc_owned,
1257  *query_mem_desc_owned,
1258  fragment_descriptor,
1259  available_gpus,
1260  available_cpus);
1261  } catch (QueryExecutionError& e) {
1262  if (eo.with_dynamic_watchdog && interrupted_ &&
1263  e.getErrorCode() == ERR_OUT_OF_TIME) {
1265  }
1266  cat.getDataMgr().freeAllBuffers();
1268  static_cast<size_t>(crt_min_byte_width << 1) <= sizeof(int64_t)) {
1269  crt_min_byte_width <<= 1;
1270  continue;
1271  }
1272  throw;
1273  }
1274  }
1275  cat.getDataMgr().freeAllBuffers();
1276  if (is_agg) {
1277  try {
1278  return collectAllDeviceResults(execution_dispatch,
1279  ra_exe_unit.target_exprs,
1280  *query_mem_desc_owned,
1281  query_comp_desc_owned->getDeviceType(),
1282  row_set_mem_owner);
1283  } catch (ReductionRanOutOfSlots&) {
1285  } catch (OverflowOrUnderflow&) {
1286  crt_min_byte_width <<= 1;
1287  continue;
1288  }
1289  }
1290  return resultsUnion(execution_dispatch);
1291 
1292  } while (static_cast<size_t>(crt_min_byte_width) <= sizeof(int64_t));
1293 
1294  return std::make_shared<ResultSet>(std::vector<TargetInfo>{},
1297  nullptr,
1298  this);
1299 }
bool is_agg(const Analyzer::Expr *expr)
ResultSetPtr collectAllDeviceResults(ExecutionDispatch &execution_dispatch, const std::vector< Analyzer::Expr * > &target_exprs, const QueryMemoryDescriptor &query_mem_desc, const ExecutorDeviceType device_type, std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner)
Definition: Execute.cpp:1507
int32_t getErrorCode() const
Definition: ErrorHandling.h:55
static const int32_t ERR_INTERRUPTED
Definition: Execute.h:1042
void dispatchFragments(const std::function< void(const ExecutorDeviceType chosen_device_type, int chosen_device_id, const QueryCompilationDescriptor &query_comp_desc, const QueryMemoryDescriptor &query_mem_desc, const FragmentsList &frag_list, const ExecutorDispatchMode kernel_dispatch_mode, const int64_t rowid_lookup_key)> dispatch, const ExecutionDispatch &execution_dispatch, const std::vector< InputTableInfo > &table_infos, const ExecutionOptions &eo, const bool is_agg, const bool allow_single_frag_table_opt, const size_t context_count, const QueryCompilationDescriptor &query_comp_desc, const QueryMemoryDescriptor &query_mem_desc, QueryFragmentDescriptor &fragment_descriptor, std::unordered_set< int > &available_gpus, int &available_cpus)
Definition: Execute.cpp:1692
ExecutorDeviceType
Data_Namespace::DataMgr & getDataMgr() const
Definition: Catalog.h:177
std::unordered_set< int > get_available_gpus(const Catalog_Namespace::Catalog &cat)
Definition: Execute.cpp:947
const ExecutorOptLevel opt_level_
RelAlgExecutionUnit addDeletedColumn(const RelAlgExecutionUnit &ra_exe_unit)
Definition: Execute.cpp:3034
#define DEBUG_TIMER_NEW_THREAD(parent_thread_id)
Definition: Logger.h:298
size_t compute_buffer_entry_guess(const std::vector< InputTableInfo > &query_infos)
Definition: Execute.cpp:971
std::vector< FragmentsPerTable > FragmentsList
int8_t get_min_byte_width()
ExecutorDispatchMode
CHECK(cgen_state)
size_t get_context_count(const ExecutorDeviceType device_type, const size_t cpu_count, const size_t gpu_count)
Definition: Execute.cpp:959
#define INJECT_TIMER(DESC)
Definition: measure.h:91
friend class QueryMemoryDescriptor
Definition: Execute.h:1053
static const int32_t ERR_OVERFLOW_OR_UNDERFLOW
Definition: Execute.h:1039
std::unique_ptr< PlanState > plan_state_
Definition: Execute.h:988
static const int32_t ERR_OUT_OF_TIME
Definition: Execute.h:1041
const bool register_intel_jit_listener_
std::vector< MemoryInfo > getMemoryInfo(const MemoryLevel memLevel)
Definition: DataMgr.cpp:176
ResultSetPtr resultsUnion(ExecutionDispatch &execution_dispatch)
Definition: Execute.cpp:811
ExecutorDeviceType device_type_
const ExecutorExplainType explain_type_
static const int32_t ERR_OUT_OF_SLOTS
Definition: Execute.h:1035
constexpr int8_t MAX_BYTE_WIDTH_SUPPORTED
ExecutorDeviceType getDeviceTypeForTargets(const RelAlgExecutionUnit &ra_exe_unit, const ExecutorDeviceType requested_device_type)
Definition: Execute.cpp:1378
const bool with_dynamic_watchdog_
int cpu_threads()
Definition: thread_count.h:25
bool interrupted_
Definition: Execute.h:997
ResultSetPtr executeExplain(const QueryCompilationDescriptor &)
Definition: Execute.cpp:1372

+ Here is the call graph for this function:

void Executor::executeWorkUnitPerFragment ( const RelAlgExecutionUnit ra_exe_unit,
const InputTableInfo table_info,
const CompilationOptions co,
const ExecutionOptions eo,
const Catalog_Namespace::Catalog cat,
PerFragmentCallBack cb 
)
private

Compiles and dispatches a work unit per fragment processing results with the per fragment callback. Currently used for computing metrics over fragments (metadata).

Definition at line 1301 of file Execute.cpp.

References CHECK_EQ, Executor::ExecutionDispatch::compile(), CompilationOptions::device_type_, Fragmenter_Namespace::TableInfo::fragments, InputTableInfo::info, and Executor::ExecutionDispatch::run().

1306  {
1307  const auto ra_exe_unit = addDeletedColumn(ra_exe_unit_in);
1308  ColumnCacheMap column_cache;
1309 
1310  std::vector<InputTableInfo> table_infos{table_info};
1311  // TODO(adb): ensure this is under a try / catch
1312  ExecutionDispatch execution_dispatch(
1313  this, ra_exe_unit, table_infos, cat, row_set_mem_owner_, nullptr);
1314  ColumnFetcher column_fetcher(this, column_cache);
1315  std::unique_ptr<QueryCompilationDescriptor> query_comp_desc_owned;
1316  std::unique_ptr<QueryMemoryDescriptor> query_mem_desc_owned;
1317  std::tie(query_comp_desc_owned, query_mem_desc_owned) =
1318  execution_dispatch.compile(0, 8, co, eo, column_fetcher, false);
1319  CHECK_EQ(size_t(1), ra_exe_unit.input_descs.size());
1320  const auto table_id = ra_exe_unit.input_descs[0].getTableId();
1321  const auto& outer_fragments = table_info.info.fragments;
1322  for (size_t fragment_index = 0; fragment_index < outer_fragments.size();
1323  ++fragment_index) {
1324  // We may want to consider in the future allowing this to execute on devices other
1325  // than CPU
1326  execution_dispatch.run(co.device_type_,
1327  0,
1328  eo,
1329  column_fetcher,
1330  *query_comp_desc_owned,
1331  *query_mem_desc_owned,
1332  {{table_id, {fragment_index}}},
1334  -1);
1335  }
1336 
1337  const auto& all_fragment_results = execution_dispatch.getFragmentResults();
1338 
1339  for (size_t fragment_index = 0; fragment_index < outer_fragments.size();
1340  ++fragment_index) {
1341  const auto fragment_results = all_fragment_results[fragment_index];
1342  cb(fragment_results.first, outer_fragments[fragment_index]);
1343  }
1344 }
#define CHECK_EQ(x, y)
Definition: Logger.h:198
Fragmenter_Namespace::TableInfo info
Definition: InputMetadata.h:35
RelAlgExecutionUnit addDeletedColumn(const RelAlgExecutionUnit &ra_exe_unit)
Definition: Execute.cpp:3034
std::deque< FragmentInfo > fragments
Definition: Fragmenter.h:167
const std::vector< InputDescriptor > input_descs
std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner_
Definition: Execute.h:989
ExecutorDeviceType device_type_
std::unordered_map< int, std::unordered_map< int, std::shared_ptr< const ColumnarResults > > > ColumnCacheMap

+ Here is the call graph for this function:

Executor::FetchResult Executor::fetchChunks ( const ColumnFetcher column_fetcher,
const RelAlgExecutionUnit ra_exe_unit,
const int  device_id,
const Data_Namespace::MemoryLevel  memory_level,
const std::map< int, const TableFragments * > &  all_tables_fragments,
const FragmentsList selected_fragments,
const Catalog_Namespace::Catalog cat,
std::list< ChunkIter > &  chunk_iterators,
std::list< std::shared_ptr< Chunk_NS::Chunk >> &  chunks 
)
private

Definition at line 2014 of file Execute.cpp.

References CHECK(), CHECK_EQ, CHECK_LT, Data_Namespace::CPU_LEVEL, DEBUG_TIMER, ColumnFetcher::getAllTableColumnFragments(), ColumnFetcher::getOneTableColumnFragment(), ColumnFetcher::getResultSetColumn(), INJECT_TIMER, RelAlgExecutionUnit::input_col_descs, RelAlgExecutionUnit::input_descs, RESULT, and anonymous_namespace{Execute.cpp}::try_get_column_descriptor().

2023  {
2024  auto timer = DEBUG_TIMER(__func__);
2026  const auto& col_global_ids = ra_exe_unit.input_col_descs;
2027  std::vector<std::vector<size_t>> selected_fragments_crossjoin;
2028  std::vector<size_t> local_col_to_frag_pos;
2029  buildSelectedFragsMapping(selected_fragments_crossjoin,
2030  local_col_to_frag_pos,
2031  col_global_ids,
2032  selected_fragments,
2033  ra_exe_unit);
2034 
2036  selected_fragments_crossjoin);
2037 
2038  std::vector<std::vector<const int8_t*>> all_frag_col_buffers;
2039  std::vector<std::vector<int64_t>> all_num_rows;
2040  std::vector<std::vector<uint64_t>> all_frag_offsets;
2041 
2042  for (const auto& selected_frag_ids : frag_ids_crossjoin) {
2043  std::vector<const int8_t*> frag_col_buffers(
2044  plan_state_->global_to_local_col_ids_.size());
2045  for (const auto& col_id : col_global_ids) {
2046  CHECK(col_id);
2047  const int table_id = col_id->getScanDesc().getTableId();
2048  const auto cd = try_get_column_descriptor(col_id.get(), cat);
2049  if (cd && cd->isVirtualCol) {
2050  CHECK_EQ("rowid", cd->columnName);
2051  continue;
2052  }
2053  const auto fragments_it = all_tables_fragments.find(table_id);
2054  CHECK(fragments_it != all_tables_fragments.end());
2055  const auto fragments = fragments_it->second;
2056  auto it = plan_state_->global_to_local_col_ids_.find(*col_id);
2057  CHECK(it != plan_state_->global_to_local_col_ids_.end());
2058  CHECK_LT(static_cast<size_t>(it->second),
2059  plan_state_->global_to_local_col_ids_.size());
2060  const size_t frag_id = selected_frag_ids[local_col_to_frag_pos[it->second]];
2061  if (!fragments->size()) {
2062  return {};
2063  }
2064  CHECK_LT(frag_id, fragments->size());
2065  auto memory_level_for_column = memory_level;
2066  if (plan_state_->columns_to_fetch_.find(
2067  std::make_pair(col_id->getScanDesc().getTableId(), col_id->getColId())) ==
2068  plan_state_->columns_to_fetch_.end()) {
2069  memory_level_for_column = Data_Namespace::CPU_LEVEL;
2070  }
2071  if (col_id->getScanDesc().getSourceType() == InputSourceType::RESULT) {
2072  frag_col_buffers[it->second] = column_fetcher.getResultSetColumn(
2073  col_id.get(), memory_level_for_column, device_id);
2074  } else {
2075  if (needFetchAllFragments(*col_id, ra_exe_unit, selected_fragments)) {
2076  frag_col_buffers[it->second] =
2077  column_fetcher.getAllTableColumnFragments(table_id,
2078  col_id->getColId(),
2079  all_tables_fragments,
2080  memory_level_for_column,
2081  device_id);
2082  } else {
2083  frag_col_buffers[it->second] =
2084  column_fetcher.getOneTableColumnFragment(table_id,
2085  frag_id,
2086  col_id->getColId(),
2087  all_tables_fragments,
2088  chunks,
2089  chunk_iterators,
2090  memory_level_for_column,
2091  device_id);
2092  }
2093  }
2094  }
2095  all_frag_col_buffers.push_back(frag_col_buffers);
2096  }
2097  std::tie(all_num_rows, all_frag_offsets) = getRowCountAndOffsetForAllFrags(
2098  ra_exe_unit, frag_ids_crossjoin, ra_exe_unit.input_descs, all_tables_fragments);
2099  return {all_frag_col_buffers, all_num_rows, all_frag_offsets};
2100 }
#define CHECK_EQ(x, y)
Definition: Logger.h:198
const ColumnDescriptor * try_get_column_descriptor(const InputColDescriptor *col_desc, const Catalog_Namespace::Catalog &cat)
Definition: Execute.cpp:1920
const int8_t * getAllTableColumnFragments(const int table_id, const int col_id, const std::map< int, const TableFragments * > &all_tables_fragments, const Data_Namespace::MemoryLevel memory_level, const int device_id) const
const std::vector< InputDescriptor > input_descs
bool needFetchAllFragments(const InputColDescriptor &col_desc, const RelAlgExecutionUnit &ra_exe_unit, const FragmentsList &selected_fragments) const
Definition: Execute.cpp:1995
const int8_t * getOneTableColumnFragment(const int table_id, const int frag_id, const int col_id, const std::map< int, const TableFragments * > &all_tables_fragments, std::list< std::shared_ptr< Chunk_NS::Chunk >> &chunk_holder, std::list< ChunkIter > &chunk_iter_holder, const Data_Namespace::MemoryLevel memory_level, const int device_id) const
CHECK(cgen_state)
#define INJECT_TIMER(DESC)
Definition: measure.h:91
std::unique_ptr< PlanState > plan_state_
Definition: Execute.h:988
#define CHECK_LT(x, y)
Definition: Logger.h:200
FetchResult fetchChunks(const ColumnFetcher &, const RelAlgExecutionUnit &ra_exe_unit, const int device_id, const Data_Namespace::MemoryLevel, const std::map< int, const TableFragments * > &, const FragmentsList &selected_fragments, const Catalog_Namespace::Catalog &, std::list< ChunkIter > &, std::list< std::shared_ptr< Chunk_NS::Chunk >> &)
Definition: Execute.cpp:2014
std::pair< std::vector< std::vector< int64_t > >, std::vector< std::vector< uint64_t > > > getRowCountAndOffsetForAllFrags(const RelAlgExecutionUnit &ra_exe_unit, const CartesianProduct< std::vector< std::vector< size_t >>> &frag_ids_crossjoin, const std::vector< InputDescriptor > &input_descs, const std::map< int, const TableFragments * > &all_tables_fragments)
Definition: Execute.cpp:1948
void buildSelectedFragsMapping(std::vector< std::vector< size_t >> &selected_fragments_crossjoin, std::vector< size_t > &local_col_to_frag_pos, const std::list< std::shared_ptr< const InputColDescriptor >> &col_global_ids, const FragmentsList &selected_fragments, const RelAlgExecutionUnit &ra_exe_unit)
Definition: Execute.cpp:2116
#define DEBUG_TIMER(name)
Definition: Logger.h:296
const int8_t * getResultSetColumn(const InputColDescriptor *col_desc, const Data_Namespace::MemoryLevel memory_level, const int device_id) const
std::list< std::shared_ptr< const InputColDescriptor > > input_col_descs

+ Here is the call graph for this function:

std::string Executor::generatePTX ( const std::string &  cuda_llir) const
private

Definition at line 899 of file NativeCodegen.cpp.

899  {
901  cuda_llir, nvptx_target_machine_.get(), cgen_state_.get());
902 }
std::unique_ptr< llvm::TargetMachine > nvptx_target_machine_
Definition: Execute.h:1002
static std::string generatePTX(const std::string &cuda_llir, llvm::TargetMachine *nvptx_target_machine, CgenState *cgen_state)
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:973
const Catalog_Namespace::Catalog * Executor::getCatalog ( ) const

Definition at line 233 of file Execute.cpp.

References catalog_().

233  {
234  return catalog_;
235 }
const Catalog_Namespace::Catalog * catalog_
Definition: Execute.h:1019

+ Here is the call graph for this function:

std::vector< std::pair< void *, void * > > Executor::getCodeFromCache ( const CodeCacheKey key,
const CodeCache cache 
)
private

Definition at line 171 of file NativeCodegen.cpp.

References LruCache< key_t, value_t, hash_t >::cend(), LruCache< key_t, value_t, hash_t >::find(), and GpuCompilationContext::module().

172  {
173  auto it = cache.find(key);
174  if (it != cache.cend()) {
175  delete cgen_state_->module_;
176  cgen_state_->module_ = it->second.second;
177  std::vector<std::pair<void*, void*>> native_functions;
178  for (auto& native_code : it->second.first) {
179  GpuCompilationContext* gpu_context = std::get<2>(native_code).get();
180  native_functions.emplace_back(std::get<0>(native_code),
181  gpu_context ? gpu_context->module() : nullptr);
182  }
183  return native_functions;
184  }
185  return {};
186 }
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:973
const_list_iterator_t cend() const
Definition: LruCache.hpp:55
const_list_iterator_t find(const key_t &key) const
Definition: LruCache.hpp:49

+ Here is the call graph for this function:

std::vector< ColumnLazyFetchInfo > Executor::getColLazyFetchInfo ( const std::vector< Analyzer::Expr * > &  target_exprs) const

Definition at line 287 of file Execute.cpp.

References catalog_(), CHECK(), ColumnDescriptor::columnType, get_column_descriptor(), IS_GEO, and kNULLT.

288  {
290  CHECK(catalog_);
291  std::vector<ColumnLazyFetchInfo> col_lazy_fetch_info;
292  for (const auto target_expr : target_exprs) {
293  if (!plan_state_->isLazyFetchColumn(target_expr)) {
294  col_lazy_fetch_info.emplace_back(
295  ColumnLazyFetchInfo{false, -1, SQLTypeInfo(kNULLT, false)});
296  } else {
297  const auto col_var = dynamic_cast<const Analyzer::ColumnVar*>(target_expr);
298  CHECK(col_var);
299  auto col_id = col_var->get_column_id();
300  auto rte_idx = (col_var->get_rte_idx() == -1) ? 0 : col_var->get_rte_idx();
301  auto cd = (col_var->get_table_id() > 0)
302  ? get_column_descriptor(col_id, col_var->get_table_id(), *catalog_)
303  : nullptr;
304  if (cd && IS_GEO(cd->columnType.get_type())) {
305  // Geo coords cols will be processed in sequence. So we only need to track the
306  // first coords col in lazy fetch info.
307  {
308  auto cd0 =
309  get_column_descriptor(col_id + 1, col_var->get_table_id(), *catalog_);
310  auto col0_ti = cd0->columnType;
311  CHECK(!cd0->isVirtualCol);
312  auto col0_var = makeExpr<Analyzer::ColumnVar>(
313  col0_ti, col_var->get_table_id(), cd0->columnId, rte_idx);
314  auto local_col0_id = plan_state_->getLocalColumnId(col0_var.get(), false);
315  col_lazy_fetch_info.emplace_back(
316  ColumnLazyFetchInfo{true, local_col0_id, col0_ti});
317  }
318  } else {
319  auto local_col_id = plan_state_->getLocalColumnId(col_var, false);
320  const auto& col_ti = col_var->get_type_info();
321  col_lazy_fetch_info.emplace_back(ColumnLazyFetchInfo{true, local_col_id, col_ti});
322  }
323  }
324  }
325  return col_lazy_fetch_info;
326 }
CHECK(cgen_state)
const Catalog_Namespace::Catalog * catalog_
Definition: Execute.h:1019
std::unique_ptr< PlanState > plan_state_
Definition: Execute.h:988
SQLTypeInfoCore< ArrayContextTypeSizer, ExecutorTypePackaging, DateTimeFacilities > SQLTypeInfo
Definition: sqltypes.h:852
SQLTypeInfo columnType
#define IS_GEO(T)
Definition: sqltypes.h:167
const ColumnDescriptor * get_column_descriptor(const int col_id, const int table_id, const Catalog_Namespace::Catalog &cat)
Definition: Execute.h:142

+ Here is the call graph for this function:

ExpressionRange Executor::getColRange ( const PhysicalInput phys_input) const

Definition at line 257 of file Execute.cpp.

257  {
258  return agg_col_range_cache_.getColRange(phys_input);
259 }
ExpressionRange getColRange(const PhysicalInput &) const
AggregatedColRange agg_col_range_cache_
Definition: Execute.h:1023
const ColumnDescriptor * Executor::getColumnDescriptor ( const Analyzer::ColumnVar col_var) const

Definition at line 216 of file Execute.cpp.

References catalog_(), get_column_descriptor_maybe(), Analyzer::ColumnVar::get_column_id(), and Analyzer::ColumnVar::get_table_id().

217  {
219  col_var->get_column_id(), col_var->get_table_id(), *catalog_);
220 }
int get_table_id() const
Definition: Analyzer.h:194
const ColumnDescriptor * get_column_descriptor_maybe(const int col_id, const int table_id, const Catalog_Namespace::Catalog &cat)
Definition: Execute.h:171
const Catalog_Namespace::Catalog * catalog_
Definition: Execute.h:1019
int get_column_id() const
Definition: Analyzer.h:195

+ Here is the call graph for this function:

ExecutorDeviceType Executor::getDeviceTypeForTargets ( const RelAlgExecutionUnit ra_exe_unit,
const ExecutorDeviceType  requested_device_type 
)
private

Definition at line 1378 of file Execute.cpp.

References CPU, g_bigint_count, get_target_info(), RelAlgExecutionUnit::groupby_exprs, kAVG, kDOUBLE, kSUM, and RelAlgExecutionUnit::target_exprs.

1380  {
1381  for (const auto target_expr : ra_exe_unit.target_exprs) {
1382  const auto agg_info = get_target_info(target_expr, g_bigint_count);
1383  if (!ra_exe_unit.groupby_exprs.empty() &&
1384  !isArchPascalOrLater(requested_device_type)) {
1385  if ((agg_info.agg_kind == kAVG || agg_info.agg_kind == kSUM) &&
1386  agg_info.agg_arg_type.get_type() == kDOUBLE) {
1387  return ExecutorDeviceType::CPU;
1388  }
1389  }
1390  if (dynamic_cast<const Analyzer::RegexpExpr*>(target_expr)) {
1391  return ExecutorDeviceType::CPU;
1392  }
1393  }
1394  return requested_device_type;
1395 }
std::vector< Analyzer::Expr * > target_exprs
TargetInfo get_target_info(const PointerType target_expr, const bool bigint_count)
Definition: TargetInfo.h:65
bool isArchPascalOrLater(const ExecutorDeviceType dt) const
Definition: Execute.h:517
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
bool g_bigint_count
Definition: sqldefs.h:71
Definition: sqldefs.h:71

+ Here is the call graph for this function:

std::shared_ptr< Executor > Executor::getExecutor ( const int  db_id,
const std::string &  debug_dir = "",
const std::string &  debug_file = "",
const MapDParameters  mapd_parameters = MapDParameters(),
::QueryRenderer::QueryRenderManager *  render_manager = nullptr 
)
static

Definition at line 127 of file Execute.cpp.

References CHECK(), MapDParameters::cuda_block_size, MapDParameters::cuda_grid_size, and INJECT_TIMER.

Referenced by Catalog_Namespace::Catalog::checkDateInDaysColumnMigration(), MapDHandler::execute_rel_alg(), MapDHandler::execute_rel_alg_df(), MapDHandler::execute_root_plan(), Parser::getResultSet(), MapDHandler::interrupt(), Parser::InsertIntoTableAsSelectStmt::populateData(), QueryRunner::anonymous_namespace{QueryRunner.cpp}::run_select_query_with_filter_push_down(), QueryRunner::QueryRunner::runSelectQuery(), QueryRunner::QueryRunner::runSQL(), MapDHandler::sql_execute_impl(), and Fragmenter_Namespace::InsertOrderFragmenter::updateColumns().

132  {
134  const auto executor_key = std::make_pair(db_id, render_manager);
135  {
136  mapd_shared_lock<mapd_shared_mutex> read_lock(executors_cache_mutex_);
137  auto it = executors_.find(executor_key);
138  if (it != executors_.end()) {
139  return it->second;
140  }
141  }
142  {
143  mapd_unique_lock<mapd_shared_mutex> write_lock(executors_cache_mutex_);
144  auto it = executors_.find(executor_key);
145  if (it != executors_.end()) {
146  return it->second;
147  }
148  auto executor = std::make_shared<Executor>(db_id,
149  mapd_parameters.cuda_block_size,
150  mapd_parameters.cuda_grid_size,
151  debug_dir,
152  debug_file,
153  render_manager);
154  auto it_ok = executors_.insert(std::make_pair(executor_key, executor));
155  CHECK(it_ok.second);
156  return executor;
157  }
158 }
static std::shared_ptr< Executor > getExecutor(const int db_id, const std::string &debug_dir="", const std::string &debug_file="", const MapDParameters mapd_parameters=MapDParameters(),::QueryRenderer::QueryRenderManager *render_manager=nullptr)
Definition: Execute.cpp:127
static mapd_shared_mutex executors_cache_mutex_
Definition: Execute.h:1031
size_t cuda_block_size
CHECK(cgen_state)
#define INJECT_TIMER(DESC)
Definition: measure.h:91
size_t cuda_grid_size
static std::map< std::pair< int,::QueryRenderer::QueryRenderManager * >, std::shared_ptr< Executor > > executors_
Definition: Execute.h:1029

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::vector< size_t > Executor::getFragmentCount ( const FragmentsList selected_fragments,
const size_t  scan_idx,
const RelAlgExecutionUnit ra_exe_unit 
)
private

Definition at line 2102 of file Execute.cpp.

References RelAlgExecutionUnit::input_descs, and RelAlgExecutionUnit::join_quals.

2104  {
2105  if ((ra_exe_unit.input_descs.size() > size_t(2) || !ra_exe_unit.join_quals.empty()) &&
2106  scan_idx > 0 &&
2107  !plan_state_->join_info_.sharded_range_table_indices_.count(scan_idx) &&
2108  !selected_fragments[scan_idx].fragment_ids.empty()) {
2109  // Fetch all fragments
2110  return {size_t(0)};
2111  }
2112 
2113  return selected_fragments[scan_idx].fragment_ids;
2114 }
const std::vector< InputDescriptor > input_descs
const JoinQualsPerNestingLevel join_quals
std::unique_ptr< PlanState > plan_state_
Definition: Execute.h:988
std::unordered_map< int, const Analyzer::BinOper * > Executor::getInnerTabIdToJoinCond ( ) const
private

Definition at line 1666 of file Execute.cpp.

References CHECK_EQ.

1667  {
1668  std::unordered_map<int, const Analyzer::BinOper*> id_to_cond;
1669  const auto& join_info = plan_state_->join_info_;
1670  CHECK_EQ(join_info.equi_join_tautologies_.size(), join_info.join_hash_tables_.size());
1671  for (size_t i = 0; i < join_info.join_hash_tables_.size(); ++i) {
1672  int inner_table_id = join_info.join_hash_tables_[i]->getInnerTableId();
1673  id_to_cond.insert(
1674  std::make_pair(inner_table_id, join_info.equi_join_tautologies_[i].get()));
1675  }
1676  return id_to_cond;
1677 }
#define CHECK_EQ(x, y)
Definition: Logger.h:198
std::unique_ptr< PlanState > plan_state_
Definition: Execute.h:988
std::vector< int64_t > Executor::getJoinHashTablePtrs ( const ExecutorDeviceType  device_type,
const int  device_id 
)
private

Definition at line 2515 of file Execute.cpp.

References CHECK(), GPU, and join_hash_tables.

2516  {
2517  std::vector<int64_t> table_ptrs;
2518  const auto& join_hash_tables = plan_state_->join_info_.join_hash_tables_;
2519  for (auto hash_table : join_hash_tables) {
2520  if (!hash_table) {
2521  CHECK(table_ptrs.empty());
2522  return {};
2523  }
2524  table_ptrs.push_back(hash_table->getJoinHashBuffer(
2525  device_type, device_type == ExecutorDeviceType::GPU ? device_id : 0));
2526  }
2527  return table_ptrs;
2528 }
const int8_t const int64_t const uint64_t const int32_t const int64_t int64_t uint32_t const int64_t * join_hash_tables
CHECK(cgen_state)
std::unique_ptr< PlanState > plan_state_
Definition: Execute.h:988

+ Here is the call graph for this function:

size_t Executor::getNumBytesForFetchedRow ( ) const

Definition at line 261 of file Execute.cpp.

References catalog_(), kENCODING_DICT, and kTEXT.

261  {
262  size_t num_bytes = 0;
263  if (!plan_state_) {
264  return 0;
265  }
266  for (const auto& fetched_col_pair : plan_state_->columns_to_fetch_) {
267  if (fetched_col_pair.first < 0) {
268  num_bytes += 8;
269  } else {
270  const auto cd =
271  catalog_->getMetadataForColumn(fetched_col_pair.first, fetched_col_pair.second);
272  const auto& ti = cd->columnType;
273  const auto sz = ti.get_type() == kTEXT && ti.get_compression() == kENCODING_DICT
274  ? 4
275  : ti.get_size();
276  if (sz < 0) {
277  // for varlen types, only account for the pointer/size for each row, for now
278  num_bytes += 16;
279  } else {
280  num_bytes += sz;
281  }
282  }
283  }
284  return num_bytes;
285 }
const Catalog_Namespace::Catalog * catalog_
Definition: Execute.h:1019
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:326
std::unique_ptr< PlanState > plan_state_
Definition: Execute.h:988
const ColumnDescriptor * getMetadataForColumn(int tableId, const std::string &colName) const
Definition: sqltypes.h:55
Definition: