OmniSciDB
bf83d84833
|
#include <Execute.h>
Classes | |
class | FetchCacheAnchor |
struct | GroupColLLVMValue |
struct | JoinHashTableOrError |
Public Types | |
using | ExecutorId = size_t |
using | CachedCardinality = std::pair< bool, size_t > |
Public Member Functions | |
Executor (const ExecutorId id, const size_t block_size_x, const size_t grid_size_x, const size_t max_gpu_slab_size, const std::string &debug_dir, const std::string &debug_file) | |
const TemporaryTables * | getTemporaryTables () |
StringDictionaryProxy * | getStringDictionaryProxy (const int dict_id, const bool with_generation) const |
StringDictionaryProxy * | getStringDictionaryProxy (const int dictId, const std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner, const bool with_generation) const |
bool | isCPUOnly () const |
bool | isArchMaxwell (const ExecutorDeviceType dt) const |
bool | containsLeftDeepOuterJoin () const |
const ColumnDescriptor * | getColumnDescriptor (const Analyzer::ColumnVar *) const |
const ColumnDescriptor * | getPhysicalColumnDescriptor (const Analyzer::ColumnVar *, int) const |
const Catalog_Namespace::Catalog * | getCatalog () const |
void | setCatalog (const Catalog_Namespace::Catalog *catalog) |
const std::shared_ptr < RowSetMemoryOwner > | getRowSetMemoryOwner () const |
const TemporaryTables * | getTemporaryTables () const |
Fragmenter_Namespace::TableInfo | getTableInfo (const int table_id) const |
const TableGeneration & | getTableGeneration (const int table_id) const |
ExpressionRange | getColRange (const PhysicalInput &) const |
size_t | getNumBytesForFetchedRow (const std::set< int > &table_ids_to_fetch) const |
std::vector< ColumnLazyFetchInfo > | getColLazyFetchInfo (const std::vector< Analyzer::Expr * > &target_exprs) const |
void | registerActiveModule (void *module, const int device_id) const |
void | unregisterActiveModule (void *module, const int device_id) const |
void | interrupt (const std::string &query_session="", const std::string &interrupt_session="") |
void | resetInterrupt () |
void | enableRuntimeQueryInterrupt (const double runtime_query_check_freq, const unsigned pending_query_check_freq) const |
int8_t | warpSize () const |
unsigned | gridSize () const |
unsigned | numBlocksPerMP () const |
unsigned | blockSize () const |
size_t | maxGpuSlabSize () const |
ResultSetPtr | executeWorkUnit (size_t &max_groups_buffer_entry_guess, const bool is_agg, const std::vector< InputTableInfo > &, const RelAlgExecutionUnit &, const CompilationOptions &, const ExecutionOptions &options, const Catalog_Namespace::Catalog &, RenderInfo *render_info, const bool has_cardinality_estimation, ColumnCacheMap &column_cache) |
void | executeUpdate (const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &table_infos, const CompilationOptions &co, const ExecutionOptions &eo, const Catalog_Namespace::Catalog &cat, std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner, const UpdateLogForFragment::Callback &cb, const bool is_agg) |
void | setupCaching (const std::unordered_set< PhysicalInput > &phys_inputs, const std::unordered_set< int > &phys_table_ids) |
void | setColRangeCache (const AggregatedColRange &aggregated_col_range) |
QuerySessionId & | getCurrentQuerySession (mapd_shared_lock< mapd_shared_mutex > &read_lock) |
size_t | getRunningExecutorId (mapd_shared_lock< mapd_shared_mutex > &read_lock) |
bool | checkCurrentQuerySession (const std::string &candidate_query_session, mapd_shared_lock< mapd_shared_mutex > &read_lock) |
void | invalidateRunningQuerySession (mapd_unique_lock< mapd_shared_mutex > &write_lock) |
bool | addToQuerySessionList (const QuerySessionId &query_session, const std::string &query_str, const std::chrono::time_point< std::chrono::system_clock > submitted, const size_t executor_id, const QuerySessionStatus::QueryStatus query_status, mapd_unique_lock< mapd_shared_mutex > &write_lock) |
bool | removeFromQuerySessionList (const QuerySessionId &query_session, const std::chrono::time_point< std::chrono::system_clock > submitted, mapd_unique_lock< mapd_shared_mutex > &write_lock) |
void | setQuerySessionAsInterrupted (const QuerySessionId &query_session, mapd_unique_lock< mapd_shared_mutex > &write_lock) |
bool | checkIsQuerySessionInterrupted (const QuerySessionId &query_session, mapd_shared_lock< mapd_shared_mutex > &read_lock) |
bool | checkIsQuerySessionEnrolled (const QuerySessionId &query_session, mapd_shared_lock< mapd_shared_mutex > &read_lock) |
bool | updateQuerySessionStatusWithLock (const QuerySessionId &query_session, const std::chrono::time_point< std::chrono::system_clock > submitted, const QuerySessionStatus::QueryStatus updated_query_status, mapd_unique_lock< mapd_shared_mutex > &write_lock) |
bool | updateQuerySessionExecutorAssignment (const QuerySessionId &query_session, const std::chrono::time_point< std::chrono::system_clock > submitted, const size_t executor_id, mapd_unique_lock< mapd_shared_mutex > &write_lock) |
std::vector< QuerySessionStatus > | getQuerySessionInfo (const QuerySessionId &query_session, mapd_shared_lock< mapd_shared_mutex > &read_lock) |
mapd_shared_mutex & | getSessionLock () |
CurrentQueryStatus | attachExecutorToQuerySession (std::shared_ptr< const query_state::QueryState > &query_state) |
void | checkPendingQueryStatus (const QuerySessionId &query_session) |
void | clearQuerySessionStatus (const QuerySessionId &query_session, const std::chrono::time_point< std::chrono::system_clock > submitted, bool acquire_spin_lock) |
void | updateQuerySessionStatus (std::shared_ptr< const query_state::QueryState > &query_state, const QuerySessionStatus::QueryStatus new_query_status) |
void | updateQuerySessionStatus (const QuerySessionId &query_session, const std::chrono::time_point< std::chrono::system_clock > submitted, const QuerySessionStatus::QueryStatus new_query_status) |
void | enrollQuerySession (const QuerySessionId &query_session, const std::string &query_str, const std::chrono::time_point< std::chrono::system_clock > submitted, const size_t executor_id, const QuerySessionStatus::QueryStatus query_session_status) |
void | addToCardinalityCache (const std::string &cache_key, const size_t cache_value) |
CachedCardinality | getCachedCardinality (const std::string &cache_key) |
template<typename THREAD_POOL > | |
void | launchKernels (SharedKernelContext &shared_context, std::vector< std::unique_ptr< ExecutionKernel >> &&kernels) |
Static Public Member Functions | |
static std::shared_ptr< Executor > | getExecutor (const ExecutorId id, const std::string &debug_dir="", const std::string &debug_file="", const SystemParameters system_parameters=SystemParameters()) |
static void | nukeCacheOfExecutors () |
static void | clearMemory (const Data_Namespace::MemoryLevel memory_level) |
static size_t | getArenaBlockSize () |
static std::pair< int64_t, int32_t > | reduceResults (const SQLAgg agg, const SQLTypeInfo &ti, const int64_t agg_init_val, const int8_t out_byte_width, const int64_t *out_vec, const size_t out_vec_sz, const bool is_group_by, const bool float_argument_input) |
static void | addCodeToCache (const CodeCacheKey &, std::shared_ptr< CompilationContext >, llvm::Module *, CodeCache &) |
Static Public Attributes | |
static const ExecutorId | UNITARY_EXECUTOR_ID = 0 |
static const size_t | high_scan_limit |
static const int32_t | ERR_DIV_BY_ZERO {1} |
static const int32_t | ERR_OUT_OF_GPU_MEM {2} |
static const int32_t | ERR_OUT_OF_SLOTS {3} |
static const int32_t | ERR_UNSUPPORTED_SELF_JOIN {4} |
static const int32_t | ERR_OUT_OF_RENDER_MEM {5} |
static const int32_t | ERR_OUT_OF_CPU_MEM {6} |
static const int32_t | ERR_OVERFLOW_OR_UNDERFLOW {7} |
static const int32_t | ERR_OUT_OF_TIME {9} |
static const int32_t | ERR_INTERRUPTED {10} |
static const int32_t | ERR_COLUMNAR_CONVERSION_NOT_SUPPORTED {11} |
static const int32_t | ERR_TOO_MANY_LITERALS {12} |
static const int32_t | ERR_STRING_CONST_IN_RESULTSET {13} |
static const int32_t | ERR_STREAMING_TOP_N_NOT_SUPPORTED_IN_RENDER_QUERY {14} |
static const int32_t | ERR_SINGLE_VALUE_FOUND_MULTIPLE_VALUES {15} |
static const int32_t | ERR_GEOS {16} |
static std::mutex | compilation_mutex_ |
static std::mutex | kernel_mutex_ |
Private Types | |
using | PerFragmentCallBack = std::function< void(ResultSetPtr, const Fragmenter_Namespace::FragmentInfo &)> |
Private Member Functions | |
void | clearMetaInfoCache () |
int | deviceCount (const ExecutorDeviceType) const |
int | deviceCountForMemoryLevel (const Data_Namespace::MemoryLevel memory_level) const |
llvm::Value * | codegenWindowFunction (const size_t target_index, const CompilationOptions &co) |
llvm::Value * | codegenWindowFunctionAggregate (const CompilationOptions &co) |
llvm::BasicBlock * | codegenWindowResetStateControlFlow () |
void | codegenWindowFunctionStateInit (llvm::Value *aggregate_state) |
llvm::Value * | codegenWindowFunctionAggregateCalls (llvm::Value *aggregate_state, const CompilationOptions &co) |
void | codegenWindowAvgEpilogue (llvm::Value *crt_val, llvm::Value *window_func_null_val, llvm::Value *multiplicity_lv) |
llvm::Value * | codegenAggregateWindowState () |
llvm::Value * | aggregateWindowStatePtr () |
bool | isArchPascalOrLater (const ExecutorDeviceType dt) const |
bool | needFetchAllFragments (const InputColDescriptor &col_desc, const RelAlgExecutionUnit &ra_exe_unit, const FragmentsList &selected_fragments) const |
void | executeWorkUnitPerFragment (const RelAlgExecutionUnit &ra_exe_unit, const InputTableInfo &table_info, const CompilationOptions &co, const ExecutionOptions &eo, const Catalog_Namespace::Catalog &cat, PerFragmentCallBack &cb) |
Compiles and dispatches a work unit per fragment processing results with the per fragment callback. Currently used for computing metrics over fragments (metadata). More... | |
ResultSetPtr | executeExplain (const QueryCompilationDescriptor &) |
ResultSetPtr | executeTableFunction (const TableFunctionExecutionUnit exe_unit, const std::vector< InputTableInfo > &table_infos, const CompilationOptions &co, const ExecutionOptions &eo, const Catalog_Namespace::Catalog &cat) |
Compiles and dispatches a table function; that is, a function that takes as input one or more columns and returns a ResultSet, which can be parsed by subsequent execution steps. More... | |
ExecutorDeviceType | getDeviceTypeForTargets (const RelAlgExecutionUnit &ra_exe_unit, const ExecutorDeviceType requested_device_type) |
ResultSetPtr | collectAllDeviceResults (SharedKernelContext &shared_context, const RelAlgExecutionUnit &ra_exe_unit, const QueryMemoryDescriptor &query_mem_desc, const ExecutorDeviceType device_type, std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner) |
ResultSetPtr | collectAllDeviceShardedTopResults (SharedKernelContext &shared_context, const RelAlgExecutionUnit &ra_exe_unit) const |
std::unordered_map< int, const Analyzer::BinOper * > | getInnerTabIdToJoinCond () const |
std::vector< std::unique_ptr < ExecutionKernel > > | createKernels (SharedKernelContext &shared_context, const RelAlgExecutionUnit &ra_exe_unit, ColumnFetcher &column_fetcher, const std::vector< InputTableInfo > &table_infos, const ExecutionOptions &eo, const bool is_agg, const bool allow_single_frag_table_opt, const size_t context_count, const QueryCompilationDescriptor &query_comp_desc, const QueryMemoryDescriptor &query_mem_desc, RenderInfo *render_info, std::unordered_set< int > &available_gpus, int &available_cpus) |
template<typename THREAD_POOL > | |
void | launchKernels (SharedKernelContext &shared_context, std::vector< std::unique_ptr< ExecutionKernel >> &&kernels) |
std::vector< size_t > | getTableFragmentIndices (const RelAlgExecutionUnit &ra_exe_unit, const ExecutorDeviceType device_type, const size_t table_idx, const size_t outer_frag_idx, std::map< int, const TableFragments * > &selected_tables_fragments, const std::unordered_map< int, const Analyzer::BinOper * > &inner_table_id_to_join_condition) |
bool | skipFragmentPair (const Fragmenter_Namespace::FragmentInfo &outer_fragment_info, const Fragmenter_Namespace::FragmentInfo &inner_fragment_info, const int inner_table_id, const std::unordered_map< int, const Analyzer::BinOper * > &inner_table_id_to_join_condition, const RelAlgExecutionUnit &ra_exe_unit, const ExecutorDeviceType device_type) |
FetchResult | fetchChunks (const ColumnFetcher &, const RelAlgExecutionUnit &ra_exe_unit, const int device_id, const Data_Namespace::MemoryLevel, const std::map< int, const TableFragments * > &, const FragmentsList &selected_fragments, const Catalog_Namespace::Catalog &, std::list< ChunkIter > &, std::list< std::shared_ptr< Chunk_NS::Chunk >> &, DeviceAllocator *device_allocator) |
FetchResult | fetchUnionChunks (const ColumnFetcher &, const RelAlgExecutionUnit &ra_exe_unit, const int device_id, const Data_Namespace::MemoryLevel, const std::map< int, const TableFragments * > &, const FragmentsList &selected_fragments, const Catalog_Namespace::Catalog &, std::list< ChunkIter > &, std::list< std::shared_ptr< Chunk_NS::Chunk >> &, DeviceAllocator *device_allocator) |
std::pair< std::vector < std::vector< int64_t > >, std::vector< std::vector < uint64_t > > > | getRowCountAndOffsetForAllFrags (const RelAlgExecutionUnit &ra_exe_unit, const CartesianProduct< std::vector< std::vector< size_t >>> &frag_ids_crossjoin, const std::vector< InputDescriptor > &input_descs, const std::map< int, const TableFragments * > &all_tables_fragments) |
void | buildSelectedFragsMapping (std::vector< std::vector< size_t >> &selected_fragments_crossjoin, std::vector< size_t > &local_col_to_frag_pos, const std::list< std::shared_ptr< const InputColDescriptor >> &col_global_ids, const FragmentsList &selected_fragments, const RelAlgExecutionUnit &ra_exe_unit) |
void | buildSelectedFragsMappingForUnion (std::vector< std::vector< size_t >> &selected_fragments_crossjoin, std::vector< size_t > &local_col_to_frag_pos, const std::list< std::shared_ptr< const InputColDescriptor >> &col_global_ids, const FragmentsList &selected_fragments, const RelAlgExecutionUnit &ra_exe_unit) |
std::vector< size_t > | getFragmentCount (const FragmentsList &selected_fragments, const size_t scan_idx, const RelAlgExecutionUnit &ra_exe_unit) |
int32_t | executePlanWithGroupBy (const RelAlgExecutionUnit &ra_exe_unit, const CompilationResult &, const bool hoist_literals, ResultSetPtr &results, const ExecutorDeviceType device_type, std::vector< std::vector< const int8_t * >> &col_buffers, const std::vector< size_t > outer_tab_frag_ids, QueryExecutionContext *, const std::vector< std::vector< int64_t >> &num_rows, const std::vector< std::vector< uint64_t >> &frag_offsets, Data_Namespace::DataMgr *, const int device_id, const int outer_table_id, const int64_t limit, const uint32_t start_rowid, const uint32_t num_tables, RenderInfo *render_info) |
int32_t | executePlanWithoutGroupBy (const RelAlgExecutionUnit &ra_exe_unit, const CompilationResult &, const bool hoist_literals, ResultSetPtr &results, const std::vector< Analyzer::Expr * > &target_exprs, const ExecutorDeviceType device_type, std::vector< std::vector< const int8_t * >> &col_buffers, QueryExecutionContext *query_exe_context, const std::vector< std::vector< int64_t >> &num_rows, const std::vector< std::vector< uint64_t >> &frag_offsets, Data_Namespace::DataMgr *data_mgr, const int device_id, const uint32_t start_rowid, const uint32_t num_tables, RenderInfo *render_info) |
ResultSetPtr | resultsUnion (SharedKernelContext &shared_context, const RelAlgExecutionUnit &ra_exe_unit) |
std::vector< int64_t > | getJoinHashTablePtrs (const ExecutorDeviceType device_type, const int device_id) |
ResultSetPtr | reduceMultiDeviceResults (const RelAlgExecutionUnit &, std::vector< std::pair< ResultSetPtr, std::vector< size_t >>> &all_fragment_results, std::shared_ptr< RowSetMemoryOwner >, const QueryMemoryDescriptor &) const |
ResultSetPtr | reduceMultiDeviceResultSets (std::vector< std::pair< ResultSetPtr, std::vector< size_t >>> &all_fragment_results, std::shared_ptr< RowSetMemoryOwner >, const QueryMemoryDescriptor &) const |
ResultSetPtr | reduceSpeculativeTopN (const RelAlgExecutionUnit &, std::vector< std::pair< ResultSetPtr, std::vector< size_t >>> &all_fragment_results, std::shared_ptr< RowSetMemoryOwner >, const QueryMemoryDescriptor &) const |
ResultSetPtr | executeWorkUnitImpl (size_t &max_groups_buffer_entry_guess, const bool is_agg, const bool allow_single_frag_table_opt, const std::vector< InputTableInfo > &, const RelAlgExecutionUnit &, const CompilationOptions &, const ExecutionOptions &options, const Catalog_Namespace::Catalog &, std::shared_ptr< RowSetMemoryOwner >, RenderInfo *render_info, const bool has_cardinality_estimation, ColumnCacheMap &column_cache) |
std::vector< llvm::Value * > | inlineHoistedLiterals () |
std::tuple< CompilationResult, std::unique_ptr < QueryMemoryDescriptor > > | compileWorkUnit (const std::vector< InputTableInfo > &query_infos, const PlanState::DeletedColumnsMap &deleted_cols_map, const RelAlgExecutionUnit &ra_exe_unit, const CompilationOptions &co, const ExecutionOptions &eo, const CudaMgr_Namespace::CudaMgr *cuda_mgr, const bool allow_lazy_fetch, std::shared_ptr< RowSetMemoryOwner >, const size_t max_groups_buffer_entry_count, const int8_t crt_min_byte_width, const bool has_cardinality_estimation, ColumnCacheMap &column_cache, RenderInfo *render_info=nullptr) |
llvm::BasicBlock * | codegenSkipDeletedOuterTableRow (const RelAlgExecutionUnit &ra_exe_unit, const CompilationOptions &co) |
std::vector< JoinLoop > | buildJoinLoops (RelAlgExecutionUnit &ra_exe_unit, const CompilationOptions &co, const ExecutionOptions &eo, const std::vector< InputTableInfo > &query_infos, ColumnCacheMap &column_cache) |
std::function< llvm::Value *(const std::vector < llvm::Value * > &, llvm::Value *)> | buildIsDeletedCb (const RelAlgExecutionUnit &ra_exe_unit, const size_t level_idx, const CompilationOptions &co) |
std::shared_ptr< HashJoin > | buildCurrentLevelHashTable (const JoinCondition ¤t_level_join_conditions, RelAlgExecutionUnit &ra_exe_unit, const CompilationOptions &co, const std::vector< InputTableInfo > &query_infos, ColumnCacheMap &column_cache, std::vector< std::string > &fail_reasons) |
void | redeclareFilterFunction () |
llvm::Value * | addJoinLoopIterator (const std::vector< llvm::Value * > &prev_iters, const size_t level_idx) |
void | codegenJoinLoops (const std::vector< JoinLoop > &join_loops, const RelAlgExecutionUnit &ra_exe_unit, GroupByAndAggregate &group_by_and_aggregate, llvm::Function *query_func, llvm::BasicBlock *entry_bb, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const ExecutionOptions &eo) |
bool | compileBody (const RelAlgExecutionUnit &ra_exe_unit, GroupByAndAggregate &group_by_and_aggregate, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const GpuSharedMemoryContext &gpu_smem_context={}) |
void | createErrorCheckControlFlow (llvm::Function *query_func, bool run_with_dynamic_watchdog, bool run_with_allowing_runtime_interrupt, ExecutorDeviceType device_type, const std::vector< InputTableInfo > &input_table_infos) |
void | insertErrorCodeChecker (llvm::Function *query_func, bool hoist_literals) |
void | preloadFragOffsets (const std::vector< InputDescriptor > &input_descs, const std::vector< InputTableInfo > &query_infos) |
JoinHashTableOrError | buildHashTableForQualifier (const std::shared_ptr< Analyzer::BinOper > &qual_bin_oper, const std::vector< InputTableInfo > &query_infos, const MemoryLevel memory_level, const HashType preferred_hash_type, ColumnCacheMap &column_cache) |
void | nukeOldState (const bool allow_lazy_fetch, const std::vector< InputTableInfo > &query_infos, const PlanState::DeletedColumnsMap &deleted_cols_map, const RelAlgExecutionUnit *ra_exe_unit) |
std::shared_ptr < CompilationContext > | optimizeAndCodegenCPU (llvm::Function *, llvm::Function *, const std::unordered_set< llvm::Function * > &, const CompilationOptions &) |
std::shared_ptr < CompilationContext > | optimizeAndCodegenGPU (llvm::Function *, llvm::Function *, std::unordered_set< llvm::Function * > &, const bool no_inline, const CudaMgr_Namespace::CudaMgr *cuda_mgr, const CompilationOptions &) |
std::string | generatePTX (const std::string &) const |
void | initializeNVPTXBackend () const |
int64_t | deviceCycles (int milliseconds) const |
GroupColLLVMValue | groupByColumnCodegen (Analyzer::Expr *group_by_col, const size_t col_width, const CompilationOptions &, const bool translate_null_val, const int64_t translated_null_val, GroupByAndAggregate::DiamondCodegen &, std::stack< llvm::BasicBlock * > &, const bool thread_mem_shared) |
llvm::Value * | castToFP (llvm::Value *val) |
llvm::Value * | castToIntPtrTyIn (llvm::Value *val, const size_t bit_width) |
std::tuple < RelAlgExecutionUnit, PlanState::DeletedColumnsMap > | addDeletedColumn (const RelAlgExecutionUnit &ra_exe_unit, const CompilationOptions &co) |
std::pair< bool, int64_t > | skipFragment (const InputDescriptor &table_desc, const Fragmenter_Namespace::FragmentInfo &frag_info, const std::list< std::shared_ptr< Analyzer::Expr >> &simple_quals, const std::vector< uint64_t > &frag_offsets, const size_t frag_idx) |
std::pair< bool, int64_t > | skipFragmentInnerJoins (const InputDescriptor &table_desc, const RelAlgExecutionUnit &ra_exe_unit, const Fragmenter_Namespace::FragmentInfo &fragment, const std::vector< uint64_t > &frag_offsets, const size_t frag_idx) |
AggregatedColRange | computeColRangesCache (const std::unordered_set< PhysicalInput > &phys_inputs) |
StringDictionaryGenerations | computeStringDictionaryGenerations (const std::unordered_set< PhysicalInput > &phys_inputs) |
TableGenerations | computeTableGenerations (std::unordered_set< int > phys_table_ids) |
std::shared_ptr < CompilationContext > | getCodeFromCache (const CodeCacheKey &, const CodeCache &) |
std::vector< int8_t > | serializeLiterals (const std::unordered_map< int, CgenState::LiteralValues > &literals, const int device_id) |
llvm::Value * | spillDoubleElement (llvm::Value *elem_val, llvm::Type *elem_ty) |
Static Private Member Functions | |
static size_t | align (const size_t off_in, const size_t alignment) |
Private Attributes | |
std::unique_ptr< CgenState > | cgen_state_ |
std::unique_ptr< PlanState > | plan_state_ |
std::shared_ptr < RowSetMemoryOwner > | row_set_mem_owner_ |
std::mutex | gpu_exec_mutex_ [max_gpu_count] |
std::mutex | str_dict_mutex_ |
std::unique_ptr < llvm::TargetMachine > | nvptx_target_machine_ |
CodeCache | cpu_code_cache_ |
CodeCache | gpu_code_cache_ |
const unsigned | block_size_x_ |
const unsigned | grid_size_x_ |
const size_t | max_gpu_slab_size_ |
const std::string | debug_dir_ |
const std::string | debug_file_ |
const ExecutorId | executor_id_ |
const Catalog_Namespace::Catalog * | catalog_ |
const TemporaryTables * | temporary_tables_ |
int64_t | kernel_queue_time_ms_ = 0 |
int64_t | compilation_queue_time_ms_ = 0 |
std::unique_ptr < WindowProjectNodeContext > | window_project_node_context_owned_ |
WindowFunctionContext * | active_window_function_ {nullptr} |
InputTableInfoCache | input_table_info_cache_ |
AggregatedColRange | agg_col_range_cache_ |
TableGenerations | table_generations_ |
Static Private Attributes | |
static const int | max_gpu_count {16} |
static std::mutex | gpu_active_modules_mutex_ |
static uint32_t | gpu_active_modules_device_mask_ {0x0} |
static void * | gpu_active_modules_ [max_gpu_count] |
static std::atomic< bool > | interrupted_ {false} |
static const size_t | baseline_threshold |
static const size_t | code_cache_size {1000} |
static mapd_shared_mutex | executor_session_mutex_ |
static QuerySessionId | current_query_session_ {""} |
static size_t | running_query_executor_id_ {0} |
static InterruptFlagMap | queries_interrupt_flag_ |
static QuerySessionMap | queries_session_map_ |
static std::map< int, std::shared_ptr< Executor > > | executors_ |
static std::atomic_flag | execute_spin_lock_ = ATOMIC_FLAG_INIT |
static mapd_shared_mutex | execute_mutex_ |
static mapd_shared_mutex | executors_cache_mutex_ |
static mapd_shared_mutex | recycler_mutex_ |
static std::unordered_map < std::string, size_t > | cardinality_cache_ |
Friends | |
class | BaselineJoinHashTable |
class | CodeGenerator |
class | ColumnFetcher |
class | ExecutionKernel |
class | HashJoin |
class | OverlapsJoinHashTable |
class | GroupByAndAggregate |
class | QueryCompilationDescriptor |
class | QueryMemoryDescriptor |
class | QueryMemoryInitializer |
class | QueryFragmentDescriptor |
class | QueryExecutionContext |
class | ResultSet |
class | InValuesBitmap |
class | LeafAggregator |
class | PerfectJoinHashTable |
class | QueryRewriter |
class | PendingExecutionClosure |
class | RelAlgExecutor |
class | TableOptimizer |
class | TableFunctionCompilationContext |
class | TableFunctionExecutionContext |
struct | TargetExprCodegenBuilder |
struct | TargetExprCodegen |
class | WindowProjectNodeContext |
using Executor::CachedCardinality = std::pair<bool, size_t> |
using Executor::ExecutorId = size_t |
|
private |
Executor::Executor | ( | const ExecutorId | id, |
const size_t | block_size_x, | ||
const size_t | grid_size_x, | ||
const size_t | max_gpu_slab_size, | ||
const std::string & | debug_dir, | ||
const std::string & | debug_file | ||
) |
Definition at line 137 of file Execute.cpp.
|
static |
Definition at line 385 of file NativeCodegen.cpp.
References LruCache< key_t, value_t, hash_t >::put().
Referenced by StubGenerator::generateStub().
|
private |
Definition at line 3247 of file Execute.cpp.
References catalog_(), CHECK, CHECK_EQ, CompilationOptions::filter_on_deleted_column, and TABLE.
|
private |
Definition at line 635 of file IRCodegen.cpp.
References AUTOMATIC_IR_METADATA, CodeGenerator::cgen_state_, CHECK, and CgenState::scan_idx_to_hash_pos_.
void Executor::addToCardinalityCache | ( | const std::string & | cache_key, |
const size_t | cache_value | ||
) |
Definition at line 3910 of file Execute.cpp.
References g_use_estimator_result_cache, and VLOG.
bool Executor::addToQuerySessionList | ( | const QuerySessionId & | query_session, |
const std::string & | query_str, | ||
const std::chrono::time_point< std::chrono::system_clock > | submitted, | ||
const size_t | executor_id, | ||
const QuerySessionStatus::QueryStatus | query_status, | ||
mapd_unique_lock< mapd_shared_mutex > & | write_lock | ||
) |
Definition at line 3749 of file Execute.cpp.
References toString().
|
private |
Definition at line 124 of file WindowFunctionIR.cpp.
References AUTOMATIC_IR_METADATA, anonymous_namespace{WindowFunctionIR.cpp}::get_adjusted_window_type_info(), get_int_type(), WindowProjectNodeContext::getActiveWindowFunctionContext(), and kFLOAT.
|
inlinestaticprivate |
CurrentQueryStatus Executor::attachExecutorToQuerySession | ( | std::shared_ptr< const query_state::QueryState > & | query_state | ) |
Definition at line 3617 of file Execute.cpp.
References executor_id_().
unsigned Executor::blockSize | ( | ) | const |
Definition at line 3173 of file Execute.cpp.
References block_size_x_(), catalog_(), and CHECK.
|
private |
Definition at line 490 of file IRCodegen.cpp.
References anonymous_namespace{IRCodegen.cpp}::add_qualifier_to_execution_unit(), AUTOMATIC_IR_METADATA, CodeGenerator::cgen_state_, Data_Namespace::CPU_LEVEL, CompilationOptions::device_type, Executor::JoinHashTableOrError::fail_reason, GPU, Data_Namespace::GPU_LEVEL, Executor::JoinHashTableOrError::hash_table, INNER, IS_EQUIVALENCE, PlanState::join_info_, OneToOne, CodeGenerator::plan_state_, JoinCondition::quals, and JoinCondition::type.
|
private |
Definition at line 3118 of file Execute.cpp.
References g_enable_dynamic_watchdog, g_enable_overlaps_hashjoin, g_enable_runtime_query_interrupt, and HashJoin::getInstance().
|
private |
Definition at line 431 of file IRCodegen.cpp.
References AUTOMATIC_IR_METADATA, CodeGenerator::cgen_state_, CHECK, CHECK_LT, CodeGenerator::codegen(), CgenState::context_, CgenState::current_func_, CompilationOptions::filter_on_deleted_column, PlanState::getDeletedColForTable(), RelAlgExecutionUnit::input_descs, CgenState::ir_builder_, CgenState::llBool(), CgenState::llInt(), CodeGenerator::plan_state_, TABLE, and CodeGenerator::toBool().
|
private |
Definition at line 260 of file IRCodegen.cpp.
References AUTOMATIC_IR_METADATA, CodeGenerator::cgen_state_, CHECK, CHECK_LT, CodeGenerator::codegen(), INJECT_TIMER, CgenState::ir_builder_, RelAlgExecutionUnit::join_quals, LEFT, CgenState::llBool(), OneToOne, CgenState::outer_join_match_found_per_level_, Set, Singleton, JoinLoopDomain::slot_lookup_result, CodeGenerator::toBool(), JoinCondition::type, and JoinLoopDomain::values_buffer.
|
private |
Definition at line 2613 of file Execute.cpp.
References CHECK, CHECK_EQ, CHECK_LT, and RelAlgExecutionUnit::input_descs.
|
private |
Definition at line 2644 of file Execute.cpp.
References CHECK, CHECK_LT, and RelAlgExecutionUnit::input_descs.
|
private |
Definition at line 3195 of file Execute.cpp.
References AUTOMATIC_IR_METADATA, logger::FATAL, LOG, and to_string().
|
private |
Definition at line 3216 of file Execute.cpp.
References AUTOMATIC_IR_METADATA, CHECK, CHECK_LT, and get_int_type().
bool Executor::checkCurrentQuerySession | ( | const std::string & | candidate_query_session, |
mapd_shared_lock< mapd_shared_mutex > & | read_lock | ||
) |
Definition at line 3603 of file Execute.cpp.
bool Executor::checkIsQuerySessionEnrolled | ( | const QuerySessionId & | query_session, |
mapd_shared_lock< mapd_shared_mutex > & | read_lock | ||
) |
Definition at line 3889 of file Execute.cpp.
bool Executor::checkIsQuerySessionInterrupted | ( | const QuerySessionId & | query_session, |
mapd_shared_lock< mapd_shared_mutex > & | read_lock | ||
) |
Definition at line 3881 of file Execute.cpp.
void Executor::checkPendingQueryStatus | ( | const QuerySessionId & | query_session | ) |
Definition at line 3644 of file Execute.cpp.
References ERR_INTERRUPTED, and VLOG.
|
static |
Definition at line 178 of file Execute.cpp.
References Data_Namespace::DataMgr::clearMemory(), Data_Namespace::CPU_LEVEL, Catalog_Namespace::SysCatalog::getDataMgr(), Data_Namespace::GPU_LEVEL, Catalog_Namespace::SysCatalog::instance(), and CacheInvalidator< CACHE_HOLDING_TYPES >::invalidateCaches().
Referenced by DBHandler::clear_cpu_memory(), DBHandler::clear_gpu_memory(), QueryRunner::QueryRunner::clearCpuMemory(), and QueryRunner::QueryRunner::clearGpuMemory().
|
private |
Definition at line 373 of file Execute.cpp.
References input_table_info_cache_().
void Executor::clearQuerySessionStatus | ( | const QuerySessionId & | query_session, |
const std::chrono::time_point< std::chrono::system_clock > | submitted, | ||
bool | acquire_spin_lock | ||
) |
Definition at line 3670 of file Execute.cpp.
References executor_id_().
|
private |
Definition at line 326 of file WindowFunctionIR.cpp.
References AUTOMATIC_IR_METADATA, AVG, COUNT, anonymous_namespace{WindowFunctionIR.cpp}::get_adjusted_window_type_info(), get_int_type(), WindowProjectNodeContext::getActiveWindowFunctionContext(), Analyzer::WindowFunction::getKind(), kDECIMAL, kDOUBLE, and kFLOAT.
|
private |
Definition at line 653 of file IRCodegen.cpp.
References ExecutionOptions::allow_runtime_query_interrupt, AUTOMATIC_IR_METADATA, CodeGenerator::cgen_state_, JoinLoop::codegen(), CgenState::context_, CgenState::current_func_, CompilationOptions::device_type, CgenState::ir_builder_, CgenState::llInt(), CgenState::needs_error_check_, CodeGenerator::posArg(), GroupByAndAggregate::query_infos_, and ExecutionOptions::with_dynamic_watchdog.
|
private |
Definition at line 2909 of file NativeCodegen.cpp.
|
private |
Definition at line 289 of file WindowFunctionIR.cpp.
References AUTOMATIC_IR_METADATA, anonymous_namespace{WindowFunctionIR.cpp}::get_adjusted_window_type_info(), get_int_type(), WindowProjectNodeContext::getActiveWindowFunctionContext(), kDOUBLE, and kFLOAT.
|
private |
Definition at line 21 of file WindowFunctionIR.cpp.
References WindowProjectNodeContext::activateWindowFunctionContext(), run_benchmark_import::args, AUTOMATIC_IR_METADATA, AVG, CHECK, CHECK_EQ, COUNT, CUME_DIST, DENSE_RANK, logger::FATAL, FIRST_VALUE, WindowProjectNodeContext::get(), WindowFunctionContext::getWindowFunction(), LAG, LAST_VALUE, LEAD, LOG, MAX, MIN, NTILE, PERCENT_RANK, RANK, ROW_NUMBER, and SUM.
|
private |
Definition at line 140 of file WindowFunctionIR.cpp.
References AUTOMATIC_IR_METADATA, AVG, CHECK, WindowProjectNodeContext::get(), get_int_type(), and WindowProjectNodeContext::getActiveWindowFunctionContext().
|
private |
Definition at line 246 of file WindowFunctionIR.cpp.
References run_benchmark_import::args, AUTOMATIC_IR_METADATA, AVG, CHECK, CHECK_EQ, CodeGenerator::codegen(), CodeGenerator::codegenCastBetweenIntTypes(), COUNT, anonymous_namespace{WindowFunctionIR.cpp}::get_adjusted_window_type_info(), anonymous_namespace{WindowFunctionIR.cpp}::get_window_agg_name(), WindowProjectNodeContext::getActiveWindowFunctionContext(), kFLOAT, and SUM.
|
private |
Definition at line 196 of file WindowFunctionIR.cpp.
References AUTOMATIC_IR_METADATA, COUNT, anonymous_namespace{WindowFunctionIR.cpp}::get_adjusted_window_type_info(), get_int_type(), WindowProjectNodeContext::getActiveWindowFunctionContext(), kDOUBLE, and kFLOAT.
|
private |
Definition at line 167 of file WindowFunctionIR.cpp.
References AUTOMATIC_IR_METADATA, WindowProjectNodeContext::getActiveWindowFunctionContext(), CodeGenerator::posArg(), and CodeGenerator::toBool().
|
private |
Definition at line 1797 of file Execute.cpp.
References anonymous_namespace{Execute.cpp}::build_row_for_empty_input(), catalog_(), DEBUG_TIMER, SharedKernelContext::getFragmentResults(), QueryMemoryDescriptor::getQueryDescriptionType(), GPU, NonGroupedAggregate, GroupByAndAggregate::shard_count_for_top_groups(), RelAlgExecutionUnit::target_exprs, and use_speculative_top_n().
|
private |
Definition at line 1912 of file Execute.cpp.
References catalog_(), CHECK, CHECK_EQ, CHECK_LE, SharedKernelContext::getFragmentResults(), SortInfo::limit, SortInfo::offset, SortInfo::order_entries, anonymous_namespace{Execute.cpp}::permute_storage_columnar(), anonymous_namespace{Execute.cpp}::permute_storage_row_wise(), run_benchmark_import::result, and RelAlgExecutionUnit::sort_info.
|
private |
Definition at line 2946 of file NativeCodegen.cpp.
|
private |
Definition at line 2447 of file NativeCodegen.cpp.
|
private |
Definition at line 3521 of file Execute.cpp.
References catalog_(), CHECK, getLeafColumnRange(), AggregatedColRange::setColRange(), and ExpressionRange::typeSupportsRange().
|
private |
Definition at line 3547 of file Execute.cpp.
References catalog_(), CHECK, kENCODING_DICT, and StringDictionaryGenerations::setGeneration().
|
private |
Definition at line 3568 of file Execute.cpp.
References TableGenerations::setGeneration().
|
inline |
|
private |
Definition at line 1801 of file NativeCodegen.cpp.
|
private |
Determines execution dispatch mode and required fragments for a given query step, then creates kernels to execute the query and returns them for launch.
Definition at line 1988 of file Execute.cpp.
References ExecutionOptions::allow_multifrag, catalog_(), CHECK, CHECK_GE, CHECK_GT, anonymous_namespace{Execute.cpp}::checkWorkUnitWatchdog(), g_inner_join_fragment_skipping, QueryCompilationDescriptor::getDeviceType(), QueryMemoryDescriptor::getEntryCount(), SharedKernelContext::getFragOffsets(), QueryMemoryDescriptor::getQueryDescriptionType(), GPU, ExecutionOptions::gpu_input_mem_limit_percent, Data_Namespace::GPU_LEVEL, anonymous_namespace{Execute.cpp}::has_lazy_fetched_columns(), logger::INFO, RelAlgExecutionUnit::input_descs, KernelPerFragment, LOG, MultifragmentKernel, ExecutionOptions::outer_fragment_indices, Projection, query_mem_desc, RelAlgExecutionUnit::target_exprs, QueryMemoryDescriptor::toString(), RelAlgExecutionUnit::use_bump_allocator, VLOG, and ExecutionOptions::with_watchdog.
|
private |
Definition at line 641 of file Execute.cpp.
References catalog_(), CHECK, and GPU.
|
private |
Definition at line 651 of file Execute.cpp.
References CPU, GPU, and Data_Namespace::GPU_LEVEL.
|
private |
Definition at line 3187 of file Execute.cpp.
References catalog_(), and CHECK.
void Executor::enableRuntimeQueryInterrupt | ( | const double | runtime_query_check_freq, |
const unsigned | pending_query_check_freq | ||
) | const |
Definition at line 3895 of file Execute.cpp.
References g_enable_runtime_query_interrupt, g_pending_query_interrupt_freq, and g_running_query_interrupt_freq.
void Executor::enrollQuerySession | ( | const QuerySessionId & | query_session, |
const std::string & | query_str, | ||
const std::chrono::time_point< std::chrono::system_clock > | submitted, | ||
const size_t | executor_id, | ||
const QuerySessionStatus::QueryStatus | query_session_status | ||
) |
Definition at line 3730 of file Execute.cpp.
|
private |
Definition at line 1661 of file Execute.cpp.
References QueryCompilationDescriptor::getIR().
|
private |
Definition at line 2897 of file Execute.cpp.
References CHECK, CHECK_NE, anonymous_namespace{Execute.cpp}::check_rows_less_than_needed(), CPU, DEBUG_TIMER, ERR_DIV_BY_ZERO, ERR_GEOS, ERR_INTERRUPTED, ERR_OUT_OF_TIME, ERR_OVERFLOW_OR_UNDERFLOW, ERR_SINGLE_VALUE_FOUND_MULTIPLE_VALUES, logger::FATAL, g_enable_dynamic_watchdog, g_enable_runtime_query_interrupt, CompilationResult::generated_code, QueryMemoryDescriptor::getEntryCount(), QueryExecutionContext::getRowSet(), GpuSharedMemoryContext::getSharedMemorySize(), GPU, CompilationResult::gpu_smem_context, RelAlgExecutionUnit::groupby_exprs, INJECT_TIMER, RelAlgExecutionUnit::input_col_descs, RelAlgExecutionUnit::input_descs, QueryExecutionContext::launchCpuCode(), QueryExecutionContext::launchGpuCode(), CompilationResult::literal_values, LOG, shared::printContainer(), QueryExecutionContext::query_buffers_, QueryExecutionContext::query_mem_desc_, RenderInfo::render_allocator_map_ptr, RelAlgExecutionUnit::scan_limit, QueryMemoryDescriptor::setEntryCount(), RelAlgExecutionUnit::union_all, RenderInfo::useCudaBuffers(), and VLOG.
|
private |
Definition at line 2697 of file Execute.cpp.
References CHECK, CHECK_EQ, CPU, DEBUG_TIMER, ERR_DIV_BY_ZERO, ERR_GEOS, ERR_INTERRUPTED, ERR_OUT_OF_TIME, ERR_OVERFLOW_OR_UNDERFLOW, ERR_SINGLE_VALUE_FOUND_MULTIPLE_VALUES, RelAlgExecutionUnit::estimator, QueryExecutionContext::estimator_result_set_, logger::FATAL, g_bigint_count, g_enable_dynamic_watchdog, g_enable_runtime_query_interrupt, CompilationResult::generated_code, get_target_info(), QueryExecutionContext::getAggInitValForIndex(), QueryMemoryDescriptor::getPaddedSlotWidthBytes(), GpuSharedMemoryContext::getSharedMemorySize(), GPU, CompilationResult::gpu_smem_context, INJECT_TIMER, is_distinct_target(), RenderInfo::isPotentialInSituRender(), GpuSharedMemoryContext::isSharedMemoryUsed(), kAPPROX_COUNT_DISTINCT, kAVG, kCOUNT, kSAMPLE, QueryExecutionContext::launchCpuCode(), QueryExecutionContext::launchGpuCode(), CompilationResult::literal_values, LOG, QueryExecutionContext::query_buffers_, QueryExecutionContext::query_mem_desc_, reduceResults(), RenderInfo::render_allocator_map_ptr, takes_float_argument(), and RenderInfo::useCudaBuffers().
|
private |
Compiles and dispatches a table function; that is, a function that takes as input one or more columns and returns a ResultSet, which can be parsed by subsequent execution steps.
Definition at line 1640 of file Execute.cpp.
References TableFunctionCompilationContext::compile(), CompilationOptions::device_type, TableFunctionExecutionContext::execute(), and INJECT_TIMER.
void Executor::executeUpdate | ( | const RelAlgExecutionUnit & | ra_exe_unit, |
const std::vector< InputTableInfo > & | table_infos, | ||
const CompilationOptions & | co, | ||
const ExecutionOptions & | eo, | ||
const Catalog_Namespace::Catalog & | cat, | ||
std::shared_ptr< RowSetMemoryOwner > | row_set_mem_owner, | ||
const UpdateLogForFragment::Callback & | cb, | ||
const bool | is_agg | ||
) |
Definition at line 62 of file ExecuteUpdate.cpp.
References CHECK, CHECK_EQ, CHECK_GT, CPU, executor_id_(), FragmentsPerTable::fragment_ids, SharedKernelContext::getFragmentResults(), KernelPerFragment, query_mem_desc, ExecutionKernel::run(), timer_start(), timer_stop(), and VLOG.
ResultSetPtr Executor::executeWorkUnit | ( | size_t & | max_groups_buffer_entry_guess, |
const bool | is_agg, | ||
const std::vector< InputTableInfo > & | query_infos, | ||
const RelAlgExecutionUnit & | ra_exe_unit_in, | ||
const CompilationOptions & | co, | ||
const ExecutionOptions & | options, | ||
const Catalog_Namespace::Catalog & | cat, | ||
RenderInfo * | render_info, | ||
const bool | has_cardinality_estimation, | ||
ColumnCacheMap & | column_cache | ||
) |
Definition at line 1345 of file Execute.cpp.
References executor_id_(), ExecutionOptions::just_validate, CompilationRetryNewScanLimit::new_scan_limit_, anonymous_namespace{Execute.cpp}::replace_scan_limit(), run_benchmark_import::result, and VLOG.
|
private |
Definition at line 1412 of file Execute.cpp.
References CompilationOptions::allow_lazy_fetch, ExecutionOptions::allow_runtime_query_interrupt, catalog_(), CHECK, anonymous_namespace{Execute.cpp}::compute_buffer_entry_guess(), CPU, cpu_threads(), CompilationOptions::device_type, ExecutionOptions::executor_type, CompilationOptions::explain_type, CompilationOptions::filter_on_deleted_column, g_use_tbb_pool, get_available_gpus(), get_context_count(), get_min_byte_width(), QueryExecutionError::getErrorCode(), CompilationOptions::hoist_literals, INJECT_TIMER, ExecutionOptions::just_explain, ExecutionOptions::just_validate, MAX_BYTE_WIDTH_SUPPORTED, Native, CompilationOptions::opt_level, Projection, CompilationOptions::register_intel_jit_listener, timer_start(), timer_stop(), VLOG, CompilationOptions::with_dynamic_watchdog, and ExecutionOptions::with_dynamic_watchdog.