OmniSciDB  085a039ca4
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
Executor Class Reference

#include <Execute.h>

+ Collaboration diagram for Executor:

Classes

class  CgenStateManager
 
struct  ExecutorMutexHolder
 
class  FetchCacheAnchor
 
struct  GroupColLLVMValue
 
struct  JoinHashTableOrError
 

Public Types

enum  ExtModuleKinds {
  ExtModuleKinds::template_module, ExtModuleKinds::udf_cpu_module, ExtModuleKinds::udf_gpu_module, ExtModuleKinds::rt_udf_cpu_module,
  ExtModuleKinds::rt_udf_gpu_module, ExtModuleKinds::rt_geos_module, ExtModuleKinds::rt_libdevice_module
}
 
using ExecutorId = size_t
 
using CachedCardinality = std::pair< bool, size_t >
 

Public Member Functions

 Executor (const ExecutorId id, Data_Namespace::DataMgr *data_mgr, const size_t block_size_x, const size_t grid_size_x, const size_t max_gpu_slab_size, const std::string &debug_dir, const std::string &debug_file)
 
void clearCaches (bool runtime_only=false)
 
std::string dumpCache () const
 
void reset (bool discard_runtime_modules_only=false)
 
const std::unique_ptr
< llvm::Module > & 
get_rt_module () const
 
const std::unique_ptr
< llvm::Module > & 
get_udf_module (bool is_gpu=false) const
 
const std::unique_ptr
< llvm::Module > & 
get_rt_udf_module (bool is_gpu=false) const
 
const std::unique_ptr
< llvm::Module > & 
get_geos_module () const
 
const std::unique_ptr
< llvm::Module > & 
get_libdevice_module () const
 
bool has_rt_module () const
 
bool has_udf_module (bool is_gpu=false) const
 
bool has_rt_udf_module (bool is_gpu=false) const
 
bool has_geos_module () const
 
bool has_libdevice_module () const
 
const TemporaryTablesgetTemporaryTables ()
 
StringDictionaryProxygetStringDictionaryProxy (const int dict_id, const bool with_generation) const
 
StringDictionaryProxygetStringDictionaryProxy (const int dictId, const std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner, const bool with_generation) const
 
const
StringDictionaryProxy::IdMap
getStringProxyTranslationMap (const int source_dict_id, const int dest_dict_id, const RowSetMemoryOwner::StringTranslationType translation_type, const std::vector< StringOps_Namespace::StringOpInfo > &string_op_infos, std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner, const bool with_generation) const
 
const
StringDictionaryProxy::IdMap
getJoinIntersectionStringProxyTranslationMap (const StringDictionaryProxy *source_proxy, StringDictionaryProxy *dest_proxy, const std::vector< StringOps_Namespace::StringOpInfo > &source_string_op_infos, const std::vector< StringOps_Namespace::StringOpInfo > &dest_source_string_op_infos, std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner) const
 
bool isCPUOnly () const
 
bool isArchMaxwell (const ExecutorDeviceType dt) const
 
bool containsLeftDeepOuterJoin () const
 
const ColumnDescriptorgetColumnDescriptor (const Analyzer::ColumnVar *) const
 
const ColumnDescriptorgetPhysicalColumnDescriptor (const Analyzer::ColumnVar *, int) const
 
const Catalog_Namespace::CataloggetCatalog () const
 
void setCatalog (const Catalog_Namespace::Catalog *catalog)
 
Data_Namespace::DataMgrgetDataMgr () const
 
const std::shared_ptr
< RowSetMemoryOwner
getRowSetMemoryOwner () const
 
const TemporaryTablesgetTemporaryTables () const
 
Fragmenter_Namespace::TableInfo getTableInfo (const int table_id) const
 
const TableGenerationgetTableGeneration (const int table_id) const
 
ExpressionRange getColRange (const PhysicalInput &) const
 
size_t getNumBytesForFetchedRow (const std::set< int > &table_ids_to_fetch) const
 
bool hasLazyFetchColumns (const std::vector< Analyzer::Expr * > &target_exprs) const
 
std::vector< ColumnLazyFetchInfogetColLazyFetchInfo (const std::vector< Analyzer::Expr * > &target_exprs) const
 
void interrupt (const QuerySessionId &query_session="", const QuerySessionId &interrupt_session="")
 
void resetInterrupt ()
 
void enableRuntimeQueryInterrupt (const double runtime_query_check_freq, const unsigned pending_query_check_freq) const
 
int8_t warpSize () const
 
unsigned gridSize () const
 
unsigned numBlocksPerMP () const
 
unsigned blockSize () const
 
size_t maxGpuSlabSize () const
 
ResultSetPtr executeWorkUnit (size_t &max_groups_buffer_entry_guess, const bool is_agg, const std::vector< InputTableInfo > &, const RelAlgExecutionUnit &, const CompilationOptions &, const ExecutionOptions &options, const Catalog_Namespace::Catalog &, RenderInfo *render_info, const bool has_cardinality_estimation, ColumnCacheMap &column_cache)
 
TableUpdateMetadata executeUpdate (const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &table_infos, const TableDescriptor *updated_table_desc, const CompilationOptions &co, const ExecutionOptions &eo, const Catalog_Namespace::Catalog &cat, std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner, const UpdateLogForFragment::Callback &cb, const bool is_agg)
 
void addTransientStringLiterals (const RelAlgExecutionUnit &ra_exe_unit, const std::shared_ptr< RowSetMemoryOwner > &row_set_mem_owner)
 
int deviceCount (const ExecutorDeviceType) const
 
void setupCaching (const std::unordered_set< PhysicalInput > &phys_inputs, const std::unordered_set< int > &phys_table_ids)
 
void setColRangeCache (const AggregatedColRange &aggregated_col_range)
 
ExecutorId getExecutorId () const
 
QuerySessionIdgetCurrentQuerySession (mapd_shared_lock< mapd_shared_mutex > &read_lock)
 
QuerySessionStatus::QueryStatus getQuerySessionStatus (const QuerySessionId &candidate_query_session, mapd_shared_lock< mapd_shared_mutex > &read_lock)
 
bool checkCurrentQuerySession (const std::string &candidate_query_session, mapd_shared_lock< mapd_shared_mutex > &read_lock)
 
void invalidateRunningQuerySession (mapd_unique_lock< mapd_shared_mutex > &write_lock)
 
bool addToQuerySessionList (const QuerySessionId &query_session, const std::string &query_str, const std::string &submitted, const size_t executor_id, const QuerySessionStatus::QueryStatus query_status, mapd_unique_lock< mapd_shared_mutex > &write_lock)
 
bool removeFromQuerySessionList (const QuerySessionId &query_session, const std::string &submitted_time_str, mapd_unique_lock< mapd_shared_mutex > &write_lock)
 
void setQuerySessionAsInterrupted (const QuerySessionId &query_session, mapd_unique_lock< mapd_shared_mutex > &write_lock)
 
bool checkIsQuerySessionInterrupted (const std::string &query_session, mapd_shared_lock< mapd_shared_mutex > &read_lock)
 
bool checkIsQuerySessionEnrolled (const QuerySessionId &query_session, mapd_shared_lock< mapd_shared_mutex > &read_lock)
 
bool updateQuerySessionStatusWithLock (const QuerySessionId &query_session, const std::string &submitted_time_str, const QuerySessionStatus::QueryStatus updated_query_status, mapd_unique_lock< mapd_shared_mutex > &write_lock)
 
bool updateQuerySessionExecutorAssignment (const QuerySessionId &query_session, const std::string &submitted_time_str, const size_t executor_id, mapd_unique_lock< mapd_shared_mutex > &write_lock)
 
std::vector< QuerySessionStatusgetQuerySessionInfo (const QuerySessionId &query_session, mapd_shared_lock< mapd_shared_mutex > &read_lock)
 
mapd_shared_mutexgetSessionLock ()
 
CurrentQueryStatus attachExecutorToQuerySession (const QuerySessionId &query_session_id, const std::string &query_str, const std::string &query_submitted_time)
 
void checkPendingQueryStatus (const QuerySessionId &query_session)
 
void clearQuerySessionStatus (const QuerySessionId &query_session, const std::string &submitted_time_str)
 
void updateQuerySessionStatus (const QuerySessionId &query_session, const std::string &submitted_time_str, const QuerySessionStatus::QueryStatus new_query_status)
 
void enrollQuerySession (const QuerySessionId &query_session, const std::string &query_str, const std::string &submitted_time_str, const size_t executor_id, const QuerySessionStatus::QueryStatus query_session_status)
 
size_t getNumCurentSessionsEnrolled () const
 
const std::vector< size_t > getExecutorIdsRunningQuery (const QuerySessionId &interrupt_session) const
 
bool checkNonKernelTimeInterrupted () const
 
void registerExtractedQueryPlanDag (const QueryPlanDAG &query_plan_dag)
 
const QueryPlanDAG getLatestQueryPlanDagExtracted () const
 
void addToCardinalityCache (const std::string &cache_key, const size_t cache_value)
 
CachedCardinality getCachedCardinality (const std::string &cache_key)
 
mapd_shared_mutexgetDataRecyclerLock ()
 
QueryPlanDagCachegetQueryPlanDagCache ()
 
ResultSetRecyclerHoldergetRecultSetRecyclerHolder ()
 
CgenStategetCgenStatePtr () const
 
PlanStategetPlanStatePtr () const
 
llvm::LLVMContext & getContext ()
 
void update_extension_modules (bool update_runtime_modules_only=false)
 

Static Public Member Functions

static void clearExternalCaches (bool for_update, const TableDescriptor *td, const int current_db_id)
 
template<typename F >
static void registerExtensionFunctions (F register_extension_functions)
 
static std::shared_ptr< ExecutorgetExecutor (const ExecutorId id, const std::string &debug_dir="", const std::string &debug_file="", const SystemParameters &system_parameters=SystemParameters())
 
static void nukeCacheOfExecutors ()
 
static void clearMemory (const Data_Namespace::MemoryLevel memory_level)
 
static size_t getArenaBlockSize ()
 
static void addUdfIrToModule (const std::string &udf_ir_filename, const bool is_cuda_ir)
 
static void initialize_extension_module_sources ()
 
static void registerActiveModule (void *module, const int device_id)
 
static void unregisterActiveModule (const int device_id)
 
static std::pair< int64_t,
int32_t > 
reduceResults (const SQLAgg agg, const SQLTypeInfo &ti, const int64_t agg_init_val, const int8_t out_byte_width, const int64_t *out_vec, const size_t out_vec_sz, const bool is_group_by, const bool float_argument_input)
 
static void update_after_registration (bool update_runtime_modules_only=false)
 

Public Attributes

std::mutex compilation_mutex_
 
const logger::ThreadId thread_id_
 

Static Public Attributes

static const ExecutorId UNITARY_EXECUTOR_ID = 0
 
static const ExecutorId INVALID_EXECUTOR_ID = SIZE_MAX
 
static std::map
< ExtModuleKinds, std::string > 
extension_module_sources
 
static const size_t high_scan_limit
 
static CodeCacheAccessor
< CpuCompilationContext
s_stubs_accessor
 
static CodeCacheAccessor
< CpuCompilationContext
s_code_accessor
 
static CodeCacheAccessor
< CpuCompilationContext
cpu_code_accessor
 
static CodeCacheAccessor
< GpuCompilationContext
gpu_code_accessor
 
static CodeCacheAccessor
< CompilationContext
tf_code_accessor
 
static const int32_t ERR_DIV_BY_ZERO {1}
 
static const int32_t ERR_OUT_OF_GPU_MEM {2}
 
static const int32_t ERR_OUT_OF_SLOTS {3}
 
static const int32_t ERR_UNSUPPORTED_SELF_JOIN {4}
 
static const int32_t ERR_OUT_OF_RENDER_MEM {5}
 
static const int32_t ERR_OUT_OF_CPU_MEM {6}
 
static const int32_t ERR_OVERFLOW_OR_UNDERFLOW {7}
 
static const int32_t ERR_OUT_OF_TIME {9}
 
static const int32_t ERR_INTERRUPTED {10}
 
static const int32_t ERR_COLUMNAR_CONVERSION_NOT_SUPPORTED {11}
 
static const int32_t ERR_TOO_MANY_LITERALS {12}
 
static const int32_t ERR_STRING_CONST_IN_RESULTSET {13}
 
static const int32_t ERR_STREAMING_TOP_N_NOT_SUPPORTED_IN_RENDER_QUERY {14}
 
static const int32_t ERR_SINGLE_VALUE_FOUND_MULTIPLE_VALUES {15}
 
static const int32_t ERR_GEOS {16}
 
static const int32_t ERR_WIDTH_BUCKET_INVALID_ARGUMENT {17}
 
static std::mutex register_runtime_extension_functions_mutex_
 
static std::mutex kernel_mutex_
 

Private Types

using PerFragmentCallBack = std::function< void(ResultSetPtr, const Fragmenter_Namespace::FragmentInfo &)>
 

Private Member Functions

void clearMetaInfoCache ()
 
int deviceCountForMemoryLevel (const Data_Namespace::MemoryLevel memory_level) const
 
llvm::Value * codegenWindowFunction (const size_t target_index, const CompilationOptions &co)
 
llvm::Value * codegenWindowFunctionAggregate (const CompilationOptions &co)
 
llvm::BasicBlock * codegenWindowResetStateControlFlow ()
 
void codegenWindowFunctionStateInit (llvm::Value *aggregate_state)
 
llvm::Value * codegenWindowFunctionAggregateCalls (llvm::Value *aggregate_state, const CompilationOptions &co)
 
void codegenWindowAvgEpilogue (llvm::Value *crt_val, llvm::Value *window_func_null_val, llvm::Value *multiplicity_lv)
 
llvm::Value * codegenAggregateWindowState ()
 
llvm::Value * aggregateWindowStatePtr ()
 
CudaMgr_Namespace::CudaMgrcudaMgr () const
 
bool isArchPascalOrLater (const ExecutorDeviceType dt) const
 
bool needFetchAllFragments (const InputColDescriptor &col_desc, const RelAlgExecutionUnit &ra_exe_unit, const FragmentsList &selected_fragments) const
 
bool needLinearizeAllFragments (const ColumnDescriptor *cd, const InputColDescriptor &inner_col_desc, const RelAlgExecutionUnit &ra_exe_unit, const FragmentsList &selected_fragments, const Data_Namespace::MemoryLevel memory_level) const
 
void executeWorkUnitPerFragment (const RelAlgExecutionUnit &ra_exe_unit, const InputTableInfo &table_info, const CompilationOptions &co, const ExecutionOptions &eo, const Catalog_Namespace::Catalog &cat, PerFragmentCallBack &cb, const std::set< size_t > &fragment_indexes_param)
 Compiles and dispatches a work unit per fragment processing results with the per fragment callback. Currently used for computing metrics over fragments (metadata). More...
 
ResultSetPtr executeExplain (const QueryCompilationDescriptor &)
 
ResultSetPtr executeTableFunction (const TableFunctionExecutionUnit exe_unit, const std::vector< InputTableInfo > &table_infos, const CompilationOptions &co, const ExecutionOptions &eo, const Catalog_Namespace::Catalog &cat)
 Compiles and dispatches a table function; that is, a function that takes as input one or more columns and returns a ResultSet, which can be parsed by subsequent execution steps. More...
 
ExecutorDeviceType getDeviceTypeForTargets (const RelAlgExecutionUnit &ra_exe_unit, const ExecutorDeviceType requested_device_type)
 
ResultSetPtr collectAllDeviceResults (SharedKernelContext &shared_context, const RelAlgExecutionUnit &ra_exe_unit, const QueryMemoryDescriptor &query_mem_desc, const ExecutorDeviceType device_type, std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner)
 
ResultSetPtr collectAllDeviceShardedTopResults (SharedKernelContext &shared_context, const RelAlgExecutionUnit &ra_exe_unit) const
 
std::unordered_map< int, const
Analyzer::BinOper * > 
getInnerTabIdToJoinCond () const
 
std::vector< std::unique_ptr
< ExecutionKernel > > 
createKernels (SharedKernelContext &shared_context, const RelAlgExecutionUnit &ra_exe_unit, ColumnFetcher &column_fetcher, const std::vector< InputTableInfo > &table_infos, const ExecutionOptions &eo, const bool is_agg, const bool allow_single_frag_table_opt, const size_t context_count, const QueryCompilationDescriptor &query_comp_desc, const QueryMemoryDescriptor &query_mem_desc, RenderInfo *render_info, std::unordered_set< int > &available_gpus, int &available_cpus)
 
void launchKernels (SharedKernelContext &shared_context, std::vector< std::unique_ptr< ExecutionKernel >> &&kernels, const ExecutorDeviceType device_type)
 
std::vector< size_t > getTableFragmentIndices (const RelAlgExecutionUnit &ra_exe_unit, const ExecutorDeviceType device_type, const size_t table_idx, const size_t outer_frag_idx, std::map< int, const TableFragments * > &selected_tables_fragments, const std::unordered_map< int, const Analyzer::BinOper * > &inner_table_id_to_join_condition)
 
bool skipFragmentPair (const Fragmenter_Namespace::FragmentInfo &outer_fragment_info, const Fragmenter_Namespace::FragmentInfo &inner_fragment_info, const int inner_table_id, const std::unordered_map< int, const Analyzer::BinOper * > &inner_table_id_to_join_condition, const RelAlgExecutionUnit &ra_exe_unit, const ExecutorDeviceType device_type)
 
FetchResult fetchChunks (const ColumnFetcher &, const RelAlgExecutionUnit &ra_exe_unit, const int device_id, const Data_Namespace::MemoryLevel, const std::map< int, const TableFragments * > &, const FragmentsList &selected_fragments, const Catalog_Namespace::Catalog &, std::list< ChunkIter > &, std::list< std::shared_ptr< Chunk_NS::Chunk >> &, DeviceAllocator *device_allocator, const size_t thread_idx, const bool allow_runtime_interrupt)
 
FetchResult fetchUnionChunks (const ColumnFetcher &, const RelAlgExecutionUnit &ra_exe_unit, const int device_id, const Data_Namespace::MemoryLevel, const std::map< int, const TableFragments * > &, const FragmentsList &selected_fragments, const Catalog_Namespace::Catalog &, std::list< ChunkIter > &, std::list< std::shared_ptr< Chunk_NS::Chunk >> &, DeviceAllocator *device_allocator, const size_t thread_idx, const bool allow_runtime_interrupt)
 
std::pair< std::vector
< std::vector< int64_t >
>, std::vector< std::vector
< uint64_t > > > 
getRowCountAndOffsetForAllFrags (const RelAlgExecutionUnit &ra_exe_unit, const CartesianProduct< std::vector< std::vector< size_t >>> &frag_ids_crossjoin, const std::vector< InputDescriptor > &input_descs, const std::map< int, const TableFragments * > &all_tables_fragments)
 
void buildSelectedFragsMapping (std::vector< std::vector< size_t >> &selected_fragments_crossjoin, std::vector< size_t > &local_col_to_frag_pos, const std::list< std::shared_ptr< const InputColDescriptor >> &col_global_ids, const FragmentsList &selected_fragments, const RelAlgExecutionUnit &ra_exe_unit)
 
void buildSelectedFragsMappingForUnion (std::vector< std::vector< size_t >> &selected_fragments_crossjoin, std::vector< size_t > &local_col_to_frag_pos, const std::list< std::shared_ptr< const InputColDescriptor >> &col_global_ids, const FragmentsList &selected_fragments, const RelAlgExecutionUnit &ra_exe_unit)
 
std::vector< size_t > getFragmentCount (const FragmentsList &selected_fragments, const size_t scan_idx, const RelAlgExecutionUnit &ra_exe_unit)
 
int32_t executePlanWithGroupBy (const RelAlgExecutionUnit &ra_exe_unit, const CompilationResult &, const bool hoist_literals, ResultSetPtr *results, const ExecutorDeviceType device_type, std::vector< std::vector< const int8_t * >> &col_buffers, const std::vector< size_t > outer_tab_frag_ids, QueryExecutionContext *, const std::vector< std::vector< int64_t >> &num_rows, const std::vector< std::vector< uint64_t >> &frag_offsets, Data_Namespace::DataMgr *, const int device_id, const int outer_table_id, const int64_t limit, const uint32_t start_rowid, const uint32_t num_tables, const bool allow_runtime_interrupt, RenderInfo *render_info, const int64_t rows_to_process=-1)
 
int32_t executePlanWithoutGroupBy (const RelAlgExecutionUnit &ra_exe_unit, const CompilationResult &, const bool hoist_literals, ResultSetPtr *results, const std::vector< Analyzer::Expr * > &target_exprs, const ExecutorDeviceType device_type, std::vector< std::vector< const int8_t * >> &col_buffers, QueryExecutionContext *query_exe_context, const std::vector< std::vector< int64_t >> &num_rows, const std::vector< std::vector< uint64_t >> &frag_offsets, Data_Namespace::DataMgr *data_mgr, const int device_id, const uint32_t start_rowid, const uint32_t num_tables, const bool allow_runtime_interrupt, RenderInfo *render_info, const int64_t rows_to_process=-1)
 
ResultSetPtr resultsUnion (SharedKernelContext &shared_context, const RelAlgExecutionUnit &ra_exe_unit)
 
std::vector< int8_t * > getJoinHashTablePtrs (const ExecutorDeviceType device_type, const int device_id)
 
ResultSetPtr reduceMultiDeviceResults (const RelAlgExecutionUnit &, std::vector< std::pair< ResultSetPtr, std::vector< size_t >>> &all_fragment_results, std::shared_ptr< RowSetMemoryOwner >, const QueryMemoryDescriptor &) const
 
ResultSetPtr reduceMultiDeviceResultSets (std::vector< std::pair< ResultSetPtr, std::vector< size_t >>> &all_fragment_results, std::shared_ptr< RowSetMemoryOwner >, const QueryMemoryDescriptor &) const
 
ResultSetPtr reduceSpeculativeTopN (const RelAlgExecutionUnit &, std::vector< std::pair< ResultSetPtr, std::vector< size_t >>> &all_fragment_results, std::shared_ptr< RowSetMemoryOwner >, const QueryMemoryDescriptor &) const
 
ResultSetPtr executeWorkUnitImpl (size_t &max_groups_buffer_entry_guess, const bool is_agg, const bool allow_single_frag_table_opt, const std::vector< InputTableInfo > &, const RelAlgExecutionUnit &, const CompilationOptions &, const ExecutionOptions &options, const Catalog_Namespace::Catalog &, std::shared_ptr< RowSetMemoryOwner >, RenderInfo *render_info, const bool has_cardinality_estimation, ColumnCacheMap &column_cache)
 
std::vector< llvm::Value * > inlineHoistedLiterals ()
 
void AutoTrackBuffersInRuntimeIR ()
 
std::tuple< CompilationResult,
std::unique_ptr
< QueryMemoryDescriptor > > 
compileWorkUnit (const std::vector< InputTableInfo > &query_infos, const PlanState::DeletedColumnsMap &deleted_cols_map, const RelAlgExecutionUnit &ra_exe_unit, const CompilationOptions &co, const ExecutionOptions &eo, const CudaMgr_Namespace::CudaMgr *cuda_mgr, const bool allow_lazy_fetch, std::shared_ptr< RowSetMemoryOwner >, const size_t max_groups_buffer_entry_count, const int8_t crt_min_byte_width, const bool has_cardinality_estimation, ColumnCacheMap &column_cache, RenderInfo *render_info=nullptr)
 
llvm::BasicBlock * codegenSkipDeletedOuterTableRow (const RelAlgExecutionUnit &ra_exe_unit, const CompilationOptions &co)
 
std::vector< JoinLoopbuildJoinLoops (RelAlgExecutionUnit &ra_exe_unit, const CompilationOptions &co, const ExecutionOptions &eo, const std::vector< InputTableInfo > &query_infos, ColumnCacheMap &column_cache)
 
JoinLoop::HoistedFiltersCallback buildHoistLeftHandSideFiltersCb (const RelAlgExecutionUnit &ra_exe_unit, const size_t level_idx, const int inner_table_id, const CompilationOptions &co)
 
std::function< llvm::Value
*(const std::vector
< llvm::Value * >
&, llvm::Value *)> 
buildIsDeletedCb (const RelAlgExecutionUnit &ra_exe_unit, const size_t level_idx, const CompilationOptions &co)
 
std::shared_ptr< HashJoinbuildCurrentLevelHashTable (const JoinCondition &current_level_join_conditions, size_t level_idx, RelAlgExecutionUnit &ra_exe_unit, const CompilationOptions &co, const std::vector< InputTableInfo > &query_infos, ColumnCacheMap &column_cache, std::vector< std::string > &fail_reasons)
 
void redeclareFilterFunction ()
 
llvm::Value * addJoinLoopIterator (const std::vector< llvm::Value * > &prev_iters, const size_t level_idx)
 
void codegenJoinLoops (const std::vector< JoinLoop > &join_loops, const RelAlgExecutionUnit &ra_exe_unit, GroupByAndAggregate &group_by_and_aggregate, llvm::Function *query_func, llvm::BasicBlock *entry_bb, QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const ExecutionOptions &eo)
 
bool compileBody (const RelAlgExecutionUnit &ra_exe_unit, GroupByAndAggregate &group_by_and_aggregate, QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const GpuSharedMemoryContext &gpu_smem_context={})
 
void createErrorCheckControlFlow (llvm::Function *query_func, bool run_with_dynamic_watchdog, bool run_with_allowing_runtime_interrupt, const std::vector< JoinLoop > &join_loops, ExecutorDeviceType device_type, const std::vector< InputTableInfo > &input_table_infos)
 
void insertErrorCodeChecker (llvm::Function *query_func, bool hoist_literals, bool allow_runtime_query_interrupt)
 
void preloadFragOffsets (const std::vector< InputDescriptor > &input_descs, const std::vector< InputTableInfo > &query_infos)
 
JoinHashTableOrError buildHashTableForQualifier (const std::shared_ptr< Analyzer::BinOper > &qual_bin_oper, const std::vector< InputTableInfo > &query_infos, const MemoryLevel memory_level, const JoinType join_type, const HashType preferred_hash_type, ColumnCacheMap &column_cache, const HashTableBuildDagMap &hashtable_build_dag_map, const RegisteredQueryHint &query_hint, const TableIdToNodeMap &table_id_to_node_map)
 
void nukeOldState (const bool allow_lazy_fetch, const std::vector< InputTableInfo > &query_infos, const PlanState::DeletedColumnsMap &deleted_cols_map, const RelAlgExecutionUnit *ra_exe_unit)
 
std::shared_ptr
< CompilationContext
optimizeAndCodegenCPU (llvm::Function *, llvm::Function *, const std::unordered_set< llvm::Function * > &, const CompilationOptions &)
 
std::shared_ptr
< CompilationContext
optimizeAndCodegenGPU (llvm::Function *, llvm::Function *, std::unordered_set< llvm::Function * > &, const bool no_inline, const CudaMgr_Namespace::CudaMgr *cuda_mgr, const bool is_gpu_smem_used, const CompilationOptions &)
 
std::string generatePTX (const std::string &) const
 
void initializeNVPTXBackend () const
 
int64_t deviceCycles (int milliseconds) const
 
GroupColLLVMValue groupByColumnCodegen (Analyzer::Expr *group_by_col, const size_t col_width, const CompilationOptions &, const bool translate_null_val, const int64_t translated_null_val, DiamondCodegen &, std::stack< llvm::BasicBlock * > &, const bool thread_mem_shared)
 
llvm::Value * castToFP (llvm::Value *, SQLTypeInfo const &from_ti, SQLTypeInfo const &to_ti)
 
llvm::Value * castToIntPtrTyIn (llvm::Value *val, const size_t bit_width)
 
std::tuple
< RelAlgExecutionUnit,
PlanState::DeletedColumnsMap
addDeletedColumn (const RelAlgExecutionUnit &ra_exe_unit, const CompilationOptions &co)
 
bool isFragmentFullyDeleted (const int table_id, const Fragmenter_Namespace::FragmentInfo &fragment)
 
FragmentSkipStatus canSkipFragmentForFpQual (const Analyzer::BinOper *comp_expr, const Analyzer::ColumnVar *lhs_col, const Fragmenter_Namespace::FragmentInfo &fragment, const Analyzer::Constant *rhs_const) const
 
std::pair< bool, int64_t > skipFragment (const InputDescriptor &table_desc, const Fragmenter_Namespace::FragmentInfo &frag_info, const std::list< std::shared_ptr< Analyzer::Expr >> &simple_quals, const std::vector< uint64_t > &frag_offsets, const size_t frag_idx)
 
std::pair< bool, int64_t > skipFragmentInnerJoins (const InputDescriptor &table_desc, const RelAlgExecutionUnit &ra_exe_unit, const Fragmenter_Namespace::FragmentInfo &fragment, const std::vector< uint64_t > &frag_offsets, const size_t frag_idx)
 
AggregatedColRange computeColRangesCache (const std::unordered_set< PhysicalInput > &phys_inputs)
 
StringDictionaryGenerations computeStringDictionaryGenerations (const std::unordered_set< PhysicalInput > &phys_inputs)
 
TableGenerations computeTableGenerations (std::unordered_set< int > phys_table_ids)
 
std::vector< int8_t > serializeLiterals (const std::unordered_map< int, CgenState::LiteralValues > &literals, const int device_id)
 
const std::unique_ptr
< llvm::Module > & 
get_extension_module (ExtModuleKinds kind) const
 
bool has_extension_module (ExtModuleKinds kind) const
 
llvm::Value * spillDoubleElement (llvm::Value *elem_val, llvm::Type *elem_ty)
 
ExecutorMutexHolder acquireExecuteMutex ()
 

Static Private Member Functions

static size_t align (const size_t off_in, const size_t alignment)
 

Private Attributes

const ExecutorId executor_id_
 
std::unique_ptr
< llvm::LLVMContext > 
context_
 
std::unique_ptr< CgenStatecgen_state_
 
std::map< ExtModuleKinds,
std::unique_ptr< llvm::Module > > 
extension_modules_
 
std::unique_ptr< PlanStateplan_state_
 
std::shared_ptr
< RowSetMemoryOwner
row_set_mem_owner_
 
std::mutex gpu_exec_mutex_ [max_gpu_count]
 
std::atomic< bool > interrupted_
 
std::mutex str_dict_mutex_
 
std::unique_ptr
< llvm::TargetMachine > 
nvptx_target_machine_
 
const unsigned block_size_x_
 
const unsigned grid_size_x_
 
const size_t max_gpu_slab_size_
 
const std::string debug_dir_
 
const std::string debug_file_
 
const Catalog_Namespace::Catalogcatalog_
 
Data_Namespace::DataMgrdata_mgr_
 
const TemporaryTablestemporary_tables_
 
TableIdToNodeMap table_id_to_node_map_
 
int64_t kernel_queue_time_ms_ = 0
 
int64_t compilation_queue_time_ms_ = 0
 
std::unique_ptr
< WindowProjectNodeContext
window_project_node_context_owned_
 
WindowFunctionContextactive_window_function_ {nullptr}
 
InputTableInfoCache input_table_info_cache_
 
AggregatedColRange agg_col_range_cache_
 
TableGenerations table_generations_
 
QuerySessionId current_query_session_
 

Static Private Attributes

static const int max_gpu_count {16}
 
static std::mutex gpu_active_modules_mutex_
 
static uint32_t gpu_active_modules_device_mask_ {0x0}
 
static void * gpu_active_modules_ [max_gpu_count]
 
static const size_t baseline_threshold
 
static const size_t code_cache_size {1000}
 
static mapd_shared_mutex executor_session_mutex_
 
static InterruptFlagMap queries_interrupt_flag_
 
static QuerySessionMap queries_session_map_
 
static std::map< int,
std::shared_ptr< Executor > > 
executors_
 
static mapd_shared_mutex execute_mutex_
 
static mapd_shared_mutex executors_cache_mutex_
 
static QueryPlanDagCache query_plan_dag_cache_
 
static mapd_shared_mutex recycler_mutex_
 
static std::unordered_map
< std::string, size_t > 
cardinality_cache_
 
static ResultSetRecyclerHolder resultset_recycler_holder_
 
static QueryPlanDAG latest_query_plan_extracted_ {EMPTY_QUERY_PLAN}
 

Friends

class BaselineJoinHashTable
 
class CodeGenerator
 
class ColumnFetcher
 
struct DiamondCodegen
 
class ExecutionKernel
 
class KernelSubtask
 
class HashJoin
 
class OverlapsJoinHashTable
 
class RangeJoinHashTable
 
class GroupByAndAggregate
 
class QueryCompilationDescriptor
 
class QueryMemoryDescriptor
 
class QueryMemoryInitializer
 
class QueryFragmentDescriptor
 
class QueryExecutionContext
 
class ResultSet
 
class InValuesBitmap
 
class StringDictionaryTranslationMgr
 
class LeafAggregator
 
class PerfectJoinHashTable
 
class QueryRewriter
 
class PendingExecutionClosure
 
class RelAlgExecutor
 
class TableOptimizer
 
class TableFunctionCompilationContext
 
class TableFunctionExecutionContext
 
struct TargetExprCodegenBuilder
 
struct TargetExprCodegen
 
class WindowProjectNodeContext
 

Detailed Description

Definition at line 368 of file Execute.h.

Member Typedef Documentation

using Executor::CachedCardinality = std::pair<bool, size_t>

Definition at line 1163 of file Execute.h.

using Executor::ExecutorId = size_t

Definition at line 375 of file Execute.h.

Definition at line 697 of file Execute.h.

Member Enumeration Documentation

Enumerator
template_module 
udf_cpu_module 
udf_gpu_module 
rt_udf_cpu_module 
rt_udf_gpu_module 
rt_geos_module 
rt_libdevice_module 

Definition at line 469 of file Execute.h.

469  {
470  template_module, // RuntimeFunctions.bc
471  udf_cpu_module, // Load-time UDFs for CPU execution
472  udf_gpu_module, // Load-time UDFs for GPU execution
473  rt_udf_cpu_module, // Run-time UDF/UDTFs for CPU execution
474  rt_udf_gpu_module, // Run-time UDF/UDTFs for GPU execution
475  rt_geos_module, // geos functions
476  rt_libdevice_module // math library functions for GPU execution
477  };
std::unique_ptr< llvm::Module > udf_gpu_module
std::unique_ptr< llvm::Module > udf_cpu_module

Constructor & Destructor Documentation

Executor::Executor ( const ExecutorId  id,
Data_Namespace::DataMgr data_mgr,
const size_t  block_size_x,
const size_t  grid_size_x,
const size_t  max_gpu_slab_size,
const std::string &  debug_dir,
const std::string &  debug_file 
)

Definition at line 260 of file Execute.cpp.

267  : executor_id_(executor_id)
268  , context_(new llvm::LLVMContext())
269  , cgen_state_(new CgenState({}, false, this))
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1222
const ExecutorId executor_id_
Definition: Execute.h:1196
std::unique_ptr< llvm::LLVMContext > context_
Definition: Execute.h:1197

Member Function Documentation

ExecutorMutexHolder Executor::acquireExecuteMutex ( )
inlineprivate

Definition at line 1320 of file Execute.h.

References execute_mutex_, executor_id_, Executor::ExecutorMutexHolder::shared_lock, Executor::ExecutorMutexHolder::unique_lock, and UNITARY_EXECUTOR_ID.

1320  {
1321  ExecutorMutexHolder ret;
1323  // Only one unitary executor can run at a time
1324  ret.unique_lock = mapd_unique_lock<mapd_shared_mutex>(execute_mutex_);
1325  } else {
1326  ret.shared_lock = mapd_shared_lock<mapd_shared_mutex>(execute_mutex_);
1327  }
1328  return ret;
1329  }
static mapd_shared_mutex execute_mutex_
Definition: Execute.h:1314
const ExecutorId executor_id_
Definition: Execute.h:1196
static const ExecutorId UNITARY_EXECUTOR_ID
Definition: Execute.h:376
std::tuple< RelAlgExecutionUnit, PlanState::DeletedColumnsMap > Executor::addDeletedColumn ( const RelAlgExecutionUnit ra_exe_unit,
const CompilationOptions co 
)
private

Definition at line 3954 of file Execute.cpp.

References anonymous_namespace{Execute.cpp}::add_deleted_col_to_map(), catalog_, CHECK, CompilationOptions::filter_on_deleted_column, Catalog_Namespace::Catalog::getDeletedColumnIfRowsDeleted(), Catalog_Namespace::Catalog::getMetadataForTable(), and TABLE.

Referenced by executeWorkUnitImpl(), and executeWorkUnitPerFragment().

3956  {
3957  if (!co.filter_on_deleted_column) {
3958  return std::make_tuple(ra_exe_unit, PlanState::DeletedColumnsMap{});
3959  }
3960  auto ra_exe_unit_with_deleted = ra_exe_unit;
3961  PlanState::DeletedColumnsMap deleted_cols_map;
3962  for (const auto& input_table : ra_exe_unit_with_deleted.input_descs) {
3963  if (input_table.getSourceType() != InputSourceType::TABLE) {
3964  continue;
3965  }
3966  const auto td = catalog_->getMetadataForTable(input_table.getTableId());
3967  CHECK(td);
3968  const auto deleted_cd = catalog_->getDeletedColumnIfRowsDeleted(td);
3969  if (!deleted_cd) {
3970  continue;
3971  }
3972  CHECK(deleted_cd->columnType.is_boolean());
3973  // check deleted column is not already present
3974  bool found = false;
3975  for (const auto& input_col : ra_exe_unit_with_deleted.input_col_descs) {
3976  if (input_col.get()->getColId() == deleted_cd->columnId &&
3977  input_col.get()->getScanDesc().getTableId() == deleted_cd->tableId &&
3978  input_col.get()->getScanDesc().getNestLevel() == input_table.getNestLevel()) {
3979  found = true;
3980  add_deleted_col_to_map(deleted_cols_map, deleted_cd);
3981  break;
3982  }
3983  }
3984  if (!found) {
3985  // add deleted column
3986  ra_exe_unit_with_deleted.input_col_descs.emplace_back(new InputColDescriptor(
3987  deleted_cd->columnId, deleted_cd->tableId, input_table.getNestLevel()));
3988  add_deleted_col_to_map(deleted_cols_map, deleted_cd);
3989  }
3990  }
3991  return std::make_tuple(ra_exe_unit_with_deleted, deleted_cols_map);
3992 }
const Catalog_Namespace::Catalog * catalog_
Definition: Execute.h:1286
const ColumnDescriptor * getDeletedColumnIfRowsDeleted(const TableDescriptor *td) const
Definition: Catalog.cpp:3336
std::unordered_map< TableId, const ColumnDescriptor * > DeletedColumnsMap
Definition: PlanState.h:44
#define CHECK(condition)
Definition: Logger.h:223
const TableDescriptor * getMetadataForTable(const std::string &tableName, const bool populateFragmenter=true) const
Returns a pointer to a const TableDescriptor struct matching the provided tableName.
void add_deleted_col_to_map(PlanState::DeletedColumnsMap &deleted_cols_map, const ColumnDescriptor *deleted_cd)
Definition: Execute.cpp:3942

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

llvm::Value * Executor::addJoinLoopIterator ( const std::vector< llvm::Value * > &  prev_iters,
const size_t  level_idx 
)
private

Definition at line 1094 of file IRCodegen.cpp.

References AUTOMATIC_IR_METADATA, and CHECK.

1095  {
1097  // Iterators are added for loop-outer joins when the head of the loop is generated,
1098  // then once again when the body if generated. Allow this instead of special handling
1099  // of call sites.
1100  const auto it = cgen_state_->scan_idx_to_hash_pos_.find(level_idx);
1101  if (it != cgen_state_->scan_idx_to_hash_pos_.end()) {
1102  return it->second;
1103  }
1104  CHECK(!prev_iters.empty());
1105  llvm::Value* matching_row_index = prev_iters.back();
1106  const auto it_ok =
1107  cgen_state_->scan_idx_to_hash_pos_.emplace(level_idx, matching_row_index);
1108  CHECK(it_ok.second);
1109  return matching_row_index;
1110 }
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1222
#define AUTOMATIC_IR_METADATA(CGENSTATE)
#define CHECK(condition)
Definition: Logger.h:223
void Executor::addToCardinalityCache ( const std::string &  cache_key,
const size_t  cache_value 
)

Definition at line 4747 of file Execute.cpp.

References cardinality_cache_, g_use_estimator_result_cache, recycler_mutex_, and VLOG.

4748  {
4750  mapd_unique_lock<mapd_shared_mutex> lock(recycler_mutex_);
4751  cardinality_cache_[cache_key] = cache_value;
4752  VLOG(1) << "Put estimated cardinality to the cache";
4753  }
4754 }
static std::unordered_map< std::string, size_t > cardinality_cache_
Definition: Execute.h:1335
bool g_use_estimator_result_cache
Definition: Execute.cpp:127
static mapd_shared_mutex recycler_mutex_
Definition: Execute.h:1334
#define VLOG(n)
Definition: Logger.h:317
bool Executor::addToQuerySessionList ( const QuerySessionId query_session,
const std::string &  query_str,
const std::string &  submitted,
const size_t  executor_id,
const QuerySessionStatus::QueryStatus  query_status,
mapd_unique_lock< mapd_shared_mutex > &  write_lock 
)

Definition at line 4579 of file Execute.cpp.

References queries_interrupt_flag_, and queries_session_map_.

Referenced by enrollQuerySession().

4584  {
4585  // an internal API that enrolls the query session into the Executor's session map
4586  if (queries_session_map_.count(query_session)) {
4587  if (queries_session_map_.at(query_session).count(submitted_time_str)) {
4588  queries_session_map_.at(query_session).erase(submitted_time_str);
4589  queries_session_map_.at(query_session)
4590  .emplace(submitted_time_str,
4591  QuerySessionStatus(query_session,
4592  executor_id,
4593  query_str,
4594  submitted_time_str,
4595  query_status));
4596  } else {
4597  queries_session_map_.at(query_session)
4598  .emplace(submitted_time_str,
4599  QuerySessionStatus(query_session,
4600  executor_id,
4601  query_str,
4602  submitted_time_str,
4603  query_status));
4604  }
4605  } else {
4606  std::map<std::string, QuerySessionStatus> executor_per_query_map;
4607  executor_per_query_map.emplace(
4608  submitted_time_str,
4610  query_session, executor_id, query_str, submitted_time_str, query_status));
4611  queries_session_map_.emplace(query_session, executor_per_query_map);
4612  }
4613  return queries_interrupt_flag_.emplace(query_session, false).second;
4614 }
static QuerySessionMap queries_session_map_
Definition: Execute.h:1309
static InterruptFlagMap queries_interrupt_flag_
Definition: Execute.h:1307

+ Here is the caller graph for this function:

void Executor::addTransientStringLiterals ( const RelAlgExecutionUnit ra_exe_unit,
const std::shared_ptr< RowSetMemoryOwner > &  row_set_mem_owner 
)

Definition at line 2143 of file Execute.cpp.

References CHECK, getStringDictionaryProxy(), RelAlgExecutionUnit::groupby_exprs, kENCODING_DICT, kSAMPLE, kSINGLE_VALUE, RelAlgExecutionUnit::quals, RelAlgExecutionUnit::simple_quals, RelAlgExecutionUnit::target_exprs, RelAlgExecutionUnit::target_exprs_union, and ScalarExprVisitor< T >::visit().

2145  {
2146  TransientDictIdVisitor dict_id_visitor;
2147 
2148  auto visit_expr =
2149  [this, &dict_id_visitor, &row_set_mem_owner](const Analyzer::Expr* expr) {
2150  if (!expr) {
2151  return;
2152  }
2153  const auto dict_id = dict_id_visitor.visit(expr);
2154  if (dict_id >= 0) {
2155  auto sdp = getStringDictionaryProxy(dict_id, row_set_mem_owner, true);
2156  CHECK(sdp);
2157  TransientStringLiteralsVisitor visitor(sdp, this);
2158  visitor.visit(expr);
2159  }
2160  };
2161 
2162  for (const auto& group_expr : ra_exe_unit.groupby_exprs) {
2163  visit_expr(group_expr.get());
2164  }
2165 
2166  for (const auto& group_expr : ra_exe_unit.quals) {
2167  visit_expr(group_expr.get());
2168  }
2169 
2170  for (const auto& group_expr : ra_exe_unit.simple_quals) {
2171  visit_expr(group_expr.get());
2172  }
2173 
2174  const auto visit_target_expr = [&](const Analyzer::Expr* target_expr) {
2175  const auto& target_type = target_expr->get_type_info();
2176  if (!target_type.is_string() || target_type.get_compression() == kENCODING_DICT) {
2177  const auto agg_expr = dynamic_cast<const Analyzer::AggExpr*>(target_expr);
2178  if (agg_expr) {
2179  if (agg_expr->get_aggtype() == kSINGLE_VALUE ||
2180  agg_expr->get_aggtype() == kSAMPLE) {
2181  visit_expr(agg_expr->get_arg());
2182  }
2183  } else {
2184  visit_expr(target_expr);
2185  }
2186  }
2187  };
2188  const auto& target_exprs = ra_exe_unit.target_exprs;
2189  std::for_each(target_exprs.begin(), target_exprs.end(), visit_target_expr);
2190  const auto& target_exprs_union = ra_exe_unit.target_exprs_union;
2191  std::for_each(target_exprs_union.begin(), target_exprs_union.end(), visit_target_expr);
2192 }
std::vector< Analyzer::Expr * > target_exprs
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
T visit(const Analyzer::Expr *expr) const
std::vector< Analyzer::Expr * > target_exprs_union
StringDictionaryProxy * getStringDictionaryProxy(const int dict_id, const bool with_generation) const
Definition: Execute.h:529
std::list< std::shared_ptr< Analyzer::Expr > > quals
#define CHECK(condition)
Definition: Logger.h:223
std::list< std::shared_ptr< Analyzer::Expr > > simple_quals

+ Here is the call graph for this function:

void Executor::addUdfIrToModule ( const std::string &  udf_ir_filename,
const bool  is_cuda_ir 
)
static

Definition at line 1846 of file NativeCodegen.cpp.

Referenced by DBHandler::initialize().

1847  {
1851  udf_ir_filename;
1852 }
static std::map< ExtModuleKinds, std::string > extension_module_sources
Definition: Execute.h:479

+ Here is the caller graph for this function:

llvm::Value * Executor::aggregateWindowStatePtr ( )
private

Definition at line 124 of file WindowFunctionIR.cpp.

References AUTOMATIC_IR_METADATA, anonymous_namespace{WindowFunctionIR.cpp}::get_adjusted_window_type_info(), get_int_type(), WindowProjectNodeContext::getActiveWindowFunctionContext(), and kFLOAT.

124  {
126  const auto window_func_context =
128  const auto window_func = window_func_context->getWindowFunction();
129  const auto arg_ti = get_adjusted_window_type_info(window_func);
130  llvm::Type* aggregate_state_type =
131  arg_ti.get_type() == kFLOAT
132  ? llvm::PointerType::get(get_int_type(32, cgen_state_->context_), 0)
133  : llvm::PointerType::get(get_int_type(64, cgen_state_->context_), 0);
134  const auto aggregate_state_i64 = cgen_state_->llInt(
135  reinterpret_cast<const int64_t>(window_func_context->aggregateState()));
136  return cgen_state_->ir_builder_.CreateIntToPtr(aggregate_state_i64,
137  aggregate_state_type);
138 }
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1222
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
static WindowFunctionContext * getActiveWindowFunctionContext(Executor *executor)
#define AUTOMATIC_IR_METADATA(CGENSTATE)
std::unique_ptr< llvm::LLVMContext > context_
Definition: Execute.h:1197
SQLTypeInfo get_adjusted_window_type_info(const Analyzer::WindowFunction *window_func)

+ Here is the call graph for this function:

static size_t Executor::align ( const size_t  off_in,
const size_t  alignment 
)
inlinestaticprivate

Definition at line 1188 of file Execute.h.

Referenced by serializeLiterals().

1188  {
1189  size_t off = off_in;
1190  if (off % alignment != 0) {
1191  off += (alignment - off % alignment);
1192  }
1193  return off;
1194  }

+ Here is the caller graph for this function:

CurrentQueryStatus Executor::attachExecutorToQuerySession ( const QuerySessionId query_session_id,
const std::string &  query_str,
const std::string &  query_submitted_time 
)

Definition at line 4477 of file Execute.cpp.

References executor_id_, executor_session_mutex_, updateQuerySessionExecutorAssignment(), and updateQuerySessionStatusWithLock().

4480  {
4481  if (!query_session_id.empty()) {
4482  // if session is valid, do update 1) the exact executor id and 2) query status
4483  mapd_unique_lock<mapd_shared_mutex> write_lock(executor_session_mutex_);
4485  query_session_id, query_submitted_time, executor_id_, write_lock);
4486  updateQuerySessionStatusWithLock(query_session_id,
4487  query_submitted_time,
4488  QuerySessionStatus::QueryStatus::PENDING_EXECUTOR,
4489  write_lock);
4490  }
4491  return {query_session_id, query_str};
4492 }
bool updateQuerySessionStatusWithLock(const QuerySessionId &query_session, const std::string &submitted_time_str, const QuerySessionStatus::QueryStatus updated_query_status, mapd_unique_lock< mapd_shared_mutex > &write_lock)
Definition: Execute.cpp:4616
static mapd_shared_mutex executor_session_mutex_
Definition: Execute.h:1303
const ExecutorId executor_id_
Definition: Execute.h:1196
bool updateQuerySessionExecutorAssignment(const QuerySessionId &query_session, const std::string &submitted_time_str, const size_t executor_id, mapd_unique_lock< mapd_shared_mutex > &write_lock)
Definition: Execute.cpp:4642
mapd_unique_lock< mapd_shared_mutex > write_lock

+ Here is the call graph for this function:

void Executor::AutoTrackBuffersInRuntimeIR ( )
private

Definition at line 2188 of file NativeCodegen.cpp.

2188  {
2189  llvm::Module* M = cgen_state_->module_;
2190  if (M->getFunction("allocate_varlen_buffer") == nullptr)
2191  return;
2192 
2193  // read metadata
2194  bool should_track = false;
2195  auto* flag = M->getModuleFlag("manage_memory_buffer");
2196  if (auto* cnt = llvm::mdconst::extract_or_null<llvm::ConstantInt>(flag)) {
2197  if (cnt->getZExtValue() == 1) {
2198  should_track = true;
2199  }
2200  }
2201 
2202  if (!should_track) {
2203  // metadata is not present
2204  return;
2205  }
2206 
2207  LOG(INFO) << "Found 'manage_memory_buffer' metadata.";
2208  llvm::SmallVector<llvm::CallInst*, 4> calls_to_analyze;
2209 
2210  for (llvm::Function& F : *M) {
2211  for (llvm::BasicBlock& BB : F) {
2212  for (llvm::Instruction& I : BB) {
2213  if (llvm::CallInst* CI = llvm::dyn_cast<llvm::CallInst>(&I)) {
2214  // Keep track of calls to "allocate_varlen_buffer" for later processing
2215  llvm::Function* called = CI->getCalledFunction();
2216  if (called) {
2217  if (called->getName() == "allocate_varlen_buffer") {
2218  calls_to_analyze.push_back(CI);
2219  }
2220  }
2221  }
2222  }
2223  }
2224  }
2225 
2226  // for each call to "allocate_varlen_buffer", check if there's a corresponding
2227  // call to "register_buffer_with_executor_rsm". If not, add a call to it
2228  llvm::IRBuilder<> Builder(cgen_state_->context_);
2229  auto i64 = get_int_type(64, cgen_state_->context_);
2230  auto i8p = get_int_ptr_type(8, cgen_state_->context_);
2231  auto void_ = llvm::Type::getVoidTy(cgen_state_->context_);
2232  llvm::FunctionType* fnty = llvm::FunctionType::get(void_, {i64, i8p}, false);
2233  llvm::FunctionCallee register_buffer_fn =
2234  M->getOrInsertFunction("register_buffer_with_executor_rsm", fnty, {});
2235 
2236  int64_t executor_addr = reinterpret_cast<int64_t>(this);
2237  for (llvm::CallInst* CI : calls_to_analyze) {
2238  bool found = false;
2239  // for each user of the function, check if its a callinst
2240  // and if the callinst is calling "register_buffer_with_executor_rsm"
2241  // if no such instruction exist, add one registering the buffer
2242  for (llvm::User* U : CI->users()) {
2243  if (llvm::CallInst* call = llvm::dyn_cast<llvm::CallInst>(U)) {
2244  if (call->getCalledFunction() and
2245  call->getCalledFunction()->getName() == "register_buffer_with_executor_rsm") {
2246  found = true;
2247  break;
2248  }
2249  }
2250  }
2251  if (!found) {
2252  Builder.SetInsertPoint(CI->getNextNode());
2253  Builder.CreateCall(register_buffer_fn,
2254  {ll_int(executor_addr, cgen_state_->context_), CI});
2255  }
2256  }
2257 }
#define LOG(tag)
Definition: Logger.h:217
llvm::ConstantInt * ll_int(const T v, llvm::LLVMContext &context)
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1222
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
llvm::Type * get_int_ptr_type(const int width, llvm::LLVMContext &context)
unsigned Executor::blockSize ( ) const

Definition at line 3862 of file Execute.cpp.

References block_size_x_, CHECK, data_mgr_, CudaMgr_Namespace::CudaMgr::getAllDeviceProperties(), and Data_Namespace::DataMgr::getCudaMgr().

Referenced by collectAllDeviceShardedTopResults(), executePlanWithGroupBy(), executePlanWithoutGroupBy(), executeTableFunction(), executeWorkUnitImpl(), reduceMultiDeviceResults(), reduceMultiDeviceResultSets(), and resultsUnion().

3862  {
3863  CHECK(data_mgr_);
3864  const auto cuda_mgr = data_mgr_->getCudaMgr();
3865  if (!cuda_mgr) {
3866  return 0;
3867  }
3868  const auto& dev_props = cuda_mgr->getAllDeviceProperties();
3869  return block_size_x_ ? block_size_x_ : dev_props.front().maxThreadsPerBlock;
3870 }
CudaMgr_Namespace::CudaMgr * getCudaMgr() const
Definition: DataMgr.h:222
Data_Namespace::DataMgr * data_mgr_
Definition: Execute.h:1287
const unsigned block_size_x_
Definition: Execute.h:1280
#define CHECK(condition)
Definition: Logger.h:223
const std::vector< DeviceProperties > & getAllDeviceProperties() const
Definition: CudaMgr.h:127

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::shared_ptr< HashJoin > Executor::buildCurrentLevelHashTable ( const JoinCondition current_level_join_conditions,
size_t  level_idx,
RelAlgExecutionUnit ra_exe_unit,
const CompilationOptions co,
const std::vector< InputTableInfo > &  query_infos,
ColumnCacheMap column_cache,
std::vector< std::string > &  fail_reasons 
)
private

Definition at line 935 of file IRCodegen.cpp.

References anonymous_namespace{IRCodegen.cpp}::add_qualifier_to_execution_unit(), AUTOMATIC_IR_METADATA, anonymous_namespace{IRCodegen.cpp}::check_valid_join_qual(), Data_Namespace::CPU_LEVEL, CompilationOptions::device_type, Executor::JoinHashTableOrError::fail_reason, GPU, Data_Namespace::GPU_LEVEL, Executor::JoinHashTableOrError::hash_table, RelAlgExecutionUnit::hash_table_build_plan_dag, IS_EQUIVALENCE, LEFT, OneToOne, JoinCondition::quals, RelAlgExecutionUnit::query_hint, RelAlgExecutionUnit::table_id_to_node_map, JoinCondition::type, and VLOG.

942  {
944  std::shared_ptr<HashJoin> current_level_hash_table;
945  auto handleNonHashtableQual = [&ra_exe_unit, &level_idx, this](
946  JoinType join_type,
947  std::shared_ptr<Analyzer::Expr> qual) {
948  if (join_type == JoinType::LEFT) {
949  plan_state_->addNonHashtableQualForLeftJoin(level_idx, qual);
950  } else {
951  add_qualifier_to_execution_unit(ra_exe_unit, qual);
952  }
953  };
954  for (const auto& join_qual : current_level_join_conditions.quals) {
955  auto qual_bin_oper = std::dynamic_pointer_cast<Analyzer::BinOper>(join_qual);
956  if (current_level_hash_table || !qual_bin_oper ||
957  !IS_EQUIVALENCE(qual_bin_oper->get_optype())) {
958  handleNonHashtableQual(current_level_join_conditions.type, join_qual);
959  if (!current_level_hash_table) {
960  fail_reasons.emplace_back("No equijoin expression found");
961  }
962  continue;
963  }
964  check_valid_join_qual(qual_bin_oper);
965  JoinHashTableOrError hash_table_or_error;
966  if (!current_level_hash_table) {
967  hash_table_or_error = buildHashTableForQualifier(
968  qual_bin_oper,
969  query_infos,
972  current_level_join_conditions.type,
974  column_cache,
975  ra_exe_unit.hash_table_build_plan_dag,
976  ra_exe_unit.query_hint,
977  ra_exe_unit.table_id_to_node_map);
978  current_level_hash_table = hash_table_or_error.hash_table;
979  }
980  if (hash_table_or_error.hash_table) {
981  plan_state_->join_info_.join_hash_tables_.push_back(hash_table_or_error.hash_table);
982  plan_state_->join_info_.equi_join_tautologies_.push_back(qual_bin_oper);
983  } else {
984  fail_reasons.push_back(hash_table_or_error.fail_reason);
985  if (!current_level_hash_table) {
986  VLOG(2) << "Building a hashtable based on a qual " << qual_bin_oper->toString()
987  << " fails: " << hash_table_or_error.fail_reason;
988  }
989  handleNonHashtableQual(current_level_join_conditions.type, qual_bin_oper);
990  }
991  }
992  return current_level_hash_table;
993 }
JoinType
Definition: sqldefs.h:136
#define IS_EQUIVALENCE(X)
Definition: sqldefs.h:69
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1222
TableIdToNodeMap table_id_to_node_map
std::unique_ptr< PlanState > plan_state_
Definition: Execute.h:1252
void add_qualifier_to_execution_unit(RelAlgExecutionUnit &ra_exe_unit, const std::shared_ptr< Analyzer::Expr > &qual)
Definition: IRCodegen.cpp:474
#define AUTOMATIC_IR_METADATA(CGENSTATE)
ExecutorDeviceType device_type
JoinHashTableOrError buildHashTableForQualifier(const std::shared_ptr< Analyzer::BinOper > &qual_bin_oper, const std::vector< InputTableInfo > &query_infos, const MemoryLevel memory_level, const JoinType join_type, const HashType preferred_hash_type, ColumnCacheMap &column_cache, const HashTableBuildDagMap &hashtable_build_dag_map, const RegisteredQueryHint &query_hint, const TableIdToNodeMap &table_id_to_node_map)
Definition: Execute.cpp:3806
std::list< std::shared_ptr< Analyzer::Expr > > quals
RegisteredQueryHint query_hint
#define VLOG(n)
Definition: Logger.h:317
HashTableBuildDagMap hash_table_build_plan_dag
void check_valid_join_qual(std::shared_ptr< Analyzer::BinOper > &bin_oper)
Definition: IRCodegen.cpp:504

+ Here is the call graph for this function:

Executor::JoinHashTableOrError Executor::buildHashTableForQualifier ( const std::shared_ptr< Analyzer::BinOper > &  qual_bin_oper,
const std::vector< InputTableInfo > &  query_infos,
const MemoryLevel  memory_level,
const JoinType  join_type,
const HashType  preferred_hash_type,
ColumnCacheMap column_cache,
const HashTableBuildDagMap hashtable_build_dag_map,
const RegisteredQueryHint query_hint,
const TableIdToNodeMap table_id_to_node_map 
)
private

Definition at line 3806 of file Execute.cpp.

References deviceCountForMemoryLevel(), ERR_INTERRUPTED, g_enable_dynamic_watchdog, g_enable_overlaps_hashjoin, HashJoin::getInstance(), and interrupted_.

3815  {
3816  if (!g_enable_overlaps_hashjoin && qual_bin_oper->is_overlaps_oper()) {
3817  return {nullptr, "Overlaps hash join disabled, attempting to fall back to loop join"};
3818  }
3819  if (g_enable_dynamic_watchdog && interrupted_.load()) {
3821  }
3822  try {
3823  auto tbl = HashJoin::getInstance(qual_bin_oper,
3824  query_infos,
3825  memory_level,
3826  join_type,
3827  preferred_hash_type,
3828  deviceCountForMemoryLevel(memory_level),
3829  column_cache,
3830  this,
3831  hashtable_build_dag_map,
3832  query_hint,
3833  table_id_to_node_map);
3834  return {tbl, ""};
3835  } catch (const HashJoinFail& e) {
3836  return {nullptr, e.what()};
3837  }
3838 }
static const int32_t ERR_INTERRUPTED
Definition: Execute.h:1351
std::atomic< bool > interrupted_
Definition: Execute.h:1262
bool g_enable_dynamic_watchdog
Definition: Execute.cpp:80
bool g_enable_overlaps_hashjoin
Definition: Execute.cpp:102
int deviceCountForMemoryLevel(const Data_Namespace::MemoryLevel memory_level) const
Definition: Execute.cpp:1046
static std::shared_ptr< HashJoin > getInstance(const std::shared_ptr< Analyzer::BinOper > qual_bin_oper, const std::vector< InputTableInfo > &query_infos, const Data_Namespace::MemoryLevel memory_level, const JoinType join_type, const HashType preferred_hash_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor, const HashTableBuildDagMap &hashtable_build_dag_map, const RegisteredQueryHint &query_hint, const TableIdToNodeMap &table_id_to_node_map)
Make hash table from an in-flight SQL query&#39;s parse tree etc.
Definition: HashJoin.cpp:283

+ Here is the call graph for this function:

JoinLoop::HoistedFiltersCallback Executor::buildHoistLeftHandSideFiltersCb ( const RelAlgExecutionUnit ra_exe_unit,
const size_t  level_idx,
const int  inner_table_id,
const CompilationOptions co 
)
private

Definition at line 768 of file IRCodegen.cpp.

References AUTOMATIC_IR_METADATA, CHECK, CodeGenerator::codegen(), g_enable_left_join_filter_hoisting, RelAlgExecutionUnit::join_quals, LEFT, RelAlgExecutionUnit::quals, RelAlgExecutionUnit::simple_quals, CodeGenerator::toBool(), and VLOG.

772  {
774  return nullptr;
775  }
776 
777  const auto& current_level_join_conditions = ra_exe_unit.join_quals[level_idx];
778  if (level_idx == 0 && current_level_join_conditions.type == JoinType::LEFT) {
779  const auto& condition = current_level_join_conditions.quals.front();
780  const auto bin_oper = dynamic_cast<const Analyzer::BinOper*>(condition.get());
781  CHECK(bin_oper) << condition->toString();
782  const auto rhs =
783  dynamic_cast<const Analyzer::ColumnVar*>(bin_oper->get_right_operand());
784  const auto lhs =
785  dynamic_cast<const Analyzer::ColumnVar*>(bin_oper->get_left_operand());
786  if (lhs && rhs && lhs->get_table_id() != rhs->get_table_id()) {
787  const Analyzer::ColumnVar* selected_lhs{nullptr};
788  // grab the left hand side column -- this is somewhat similar to normalize column
789  // pair, and a better solution may be to hoist that function out of the join
790  // framework and normalize columns at the top of build join loops
791  if (lhs->get_table_id() == inner_table_id) {
792  selected_lhs = rhs;
793  } else if (rhs->get_table_id() == inner_table_id) {
794  selected_lhs = lhs;
795  }
796  if (selected_lhs) {
797  std::list<std::shared_ptr<Analyzer::Expr>> hoisted_quals;
798  // get all LHS-only filters
799  auto should_hoist_qual = [&hoisted_quals](const auto& qual, const int table_id) {
800  CHECK(qual);
801 
802  ExprTableIdVisitor visitor;
803  const auto table_ids = visitor.visit(qual.get());
804  if (table_ids.size() == 1 && table_ids.find(table_id) != table_ids.end()) {
805  hoisted_quals.push_back(qual);
806  }
807  };
808  for (const auto& qual : ra_exe_unit.simple_quals) {
809  should_hoist_qual(qual, selected_lhs->get_table_id());
810  }
811  for (const auto& qual : ra_exe_unit.quals) {
812  should_hoist_qual(qual, selected_lhs->get_table_id());
813  }
814 
815  // build the filters callback and return it
816  if (!hoisted_quals.empty()) {
817  return [this, hoisted_quals, co](llvm::BasicBlock* true_bb,
818  llvm::BasicBlock* exit_bb,
819  const std::string& loop_name,
820  llvm::Function* parent_func,
821  CgenState* cgen_state) -> llvm::BasicBlock* {
822  // make sure we have quals to hoist
823  bool has_quals_to_hoist = false;
824  for (const auto& qual : hoisted_quals) {
825  // check to see if the filter was previously hoisted. if all filters were
826  // previously hoisted, this callback becomes a noop
827  if (plan_state_->hoisted_filters_.count(qual) == 0) {
828  has_quals_to_hoist = true;
829  break;
830  }
831  }
832 
833  if (!has_quals_to_hoist) {
834  return nullptr;
835  }
836 
837  AUTOMATIC_IR_METADATA(cgen_state);
838 
839  llvm::IRBuilder<>& builder = cgen_state->ir_builder_;
840  auto& context = builder.getContext();
841 
842  const auto filter_bb =
843  llvm::BasicBlock::Create(context,
844  "hoisted_left_join_filters_" + loop_name,
845  parent_func,
846  /*insert_before=*/true_bb);
847  builder.SetInsertPoint(filter_bb);
848 
849  llvm::Value* filter_lv = cgen_state_->llBool(true);
850  CodeGenerator code_generator(this);
852  for (const auto& qual : hoisted_quals) {
853  if (plan_state_->hoisted_filters_.insert(qual).second) {
854  // qual was inserted into the hoisted filters map, which means we have not
855  // seen this qual before. Generate filter.
856  VLOG(1) << "Generating code for hoisted left hand side qualifier "
857  << qual->toString();
858  auto cond = code_generator.toBool(
859  code_generator.codegen(qual.get(), true, co).front());
860  filter_lv = builder.CreateAnd(filter_lv, cond);
861  }
862  }
863  CHECK(filter_lv->getType()->isIntegerTy(1));
864 
865  builder.CreateCondBr(filter_lv, true_bb, exit_bb);
866  return filter_bb;
867  };
868  }
869  }
870  }
871  }
872  return nullptr;
873 }
bool g_enable_left_join_filter_hoisting
Definition: Execute.cpp:100
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1222
const JoinQualsPerNestingLevel join_quals
std::unique_ptr< PlanState > plan_state_
Definition: Execute.h:1252
#define AUTOMATIC_IR_METADATA(CGENSTATE)
std::list< std::shared_ptr< Analyzer::Expr > > quals
#define CHECK(condition)
Definition: Logger.h:223
#define VLOG(n)
Definition: Logger.h:317
std::list< std::shared_ptr< Analyzer::Expr > > simple_quals

+ Here is the call graph for this function:

std::function< llvm::Value *(const std::vector< llvm::Value * > &, llvm::Value *)> Executor::buildIsDeletedCb ( const RelAlgExecutionUnit ra_exe_unit,
const size_t  level_idx,
const CompilationOptions co 
)
private

Definition at line 876 of file IRCodegen.cpp.

References AUTOMATIC_IR_METADATA, CHECK, CHECK_LT, CodeGenerator::codegen(), CompilationOptions::filter_on_deleted_column, RelAlgExecutionUnit::input_descs, TABLE, and CodeGenerator::toBool().

878  {
880  if (!co.filter_on_deleted_column) {
881  return nullptr;
882  }
883  CHECK_LT(level_idx + 1, ra_exe_unit.input_descs.size());
884  const auto input_desc = ra_exe_unit.input_descs[level_idx + 1];
885  if (input_desc.getSourceType() != InputSourceType::TABLE) {
886  return nullptr;
887  }
888 
889  const auto deleted_cd = plan_state_->getDeletedColForTable(input_desc.getTableId());
890  if (!deleted_cd) {
891  return nullptr;
892  }
893  CHECK(deleted_cd->columnType.is_boolean());
894  const auto deleted_expr = makeExpr<Analyzer::ColumnVar>(deleted_cd->columnType,
895  input_desc.getTableId(),
896  deleted_cd->columnId,
897  input_desc.getNestLevel());
898  return [this, deleted_expr, level_idx, &co](const std::vector<llvm::Value*>& prev_iters,
899  llvm::Value* have_more_inner_rows) {
900  const auto matching_row_index = addJoinLoopIterator(prev_iters, level_idx + 1);
901  // Avoid fetching the deleted column from a position which is not valid.
902  // An invalid position can be returned by a one to one hash lookup (negative)
903  // or at the end of iteration over a set of matching values.
904  llvm::Value* is_valid_it{nullptr};
905  if (have_more_inner_rows) {
906  is_valid_it = have_more_inner_rows;
907  } else {
908  is_valid_it = cgen_state_->ir_builder_.CreateICmp(
909  llvm::ICmpInst::ICMP_SGE, matching_row_index, cgen_state_->llInt<int64_t>(0));
910  }
911  const auto it_valid_bb = llvm::BasicBlock::Create(
912  cgen_state_->context_, "it_valid", cgen_state_->current_func_);
913  const auto it_not_valid_bb = llvm::BasicBlock::Create(
914  cgen_state_->context_, "it_not_valid", cgen_state_->current_func_);
915  cgen_state_->ir_builder_.CreateCondBr(is_valid_it, it_valid_bb, it_not_valid_bb);
916  const auto row_is_deleted_bb = llvm::BasicBlock::Create(
917  cgen_state_->context_, "row_is_deleted", cgen_state_->current_func_);
918  cgen_state_->ir_builder_.SetInsertPoint(it_valid_bb);
919  CodeGenerator code_generator(this);
920  const auto row_is_deleted = code_generator.toBool(
921  code_generator.codegen(deleted_expr.get(), true, co).front());
922  cgen_state_->ir_builder_.CreateBr(row_is_deleted_bb);
923  cgen_state_->ir_builder_.SetInsertPoint(it_not_valid_bb);
924  const auto row_is_deleted_default = cgen_state_->llBool(false);
925  cgen_state_->ir_builder_.CreateBr(row_is_deleted_bb);
926  cgen_state_->ir_builder_.SetInsertPoint(row_is_deleted_bb);
927  auto row_is_deleted_or_default =
928  cgen_state_->ir_builder_.CreatePHI(row_is_deleted->getType(), 2);
929  row_is_deleted_or_default->addIncoming(row_is_deleted, it_valid_bb);
930  row_is_deleted_or_default->addIncoming(row_is_deleted_default, it_not_valid_bb);
931  return row_is_deleted_or_default;
932  };
933 }
std::vector< InputDescriptor > input_descs
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1222
std::unique_ptr< PlanState > plan_state_
Definition: Execute.h:1252
#define AUTOMATIC_IR_METADATA(CGENSTATE)
#define CHECK_LT(x, y)
Definition: Logger.h:233
llvm::Value * addJoinLoopIterator(const std::vector< llvm::Value * > &prev_iters, const size_t level_idx)
Definition: IRCodegen.cpp:1094
#define CHECK(condition)
Definition: Logger.h:223

+ Here is the call graph for this function:

std::vector< JoinLoop > Executor::buildJoinLoops ( RelAlgExecutionUnit ra_exe_unit,
const CompilationOptions co,
const ExecutionOptions eo,
const std::vector< InputTableInfo > &  query_infos,
ColumnCacheMap column_cache 
)
private

Definition at line 523 of file IRCodegen.cpp.

References AUTOMATIC_IR_METADATA, CodeGenerator::cgen_state_, CHECK, CHECK_LT, CodeGenerator::codegen(), INJECT_TIMER, CgenState::ir_builder_, RelAlgExecutionUnit::join_quals, LEFT, PlanState::left_join_non_hashtable_quals_, CgenState::llBool(), MultiSet, OneToOne, CgenState::outer_join_match_found_per_level_, CodeGenerator::plan_state_, Set, Singleton, JoinLoopDomain::slot_lookup_result, CodeGenerator::toBool(), and JoinLoopDomain::values_buffer.

528  {
531  std::vector<JoinLoop> join_loops;
532  for (size_t level_idx = 0, current_hash_table_idx = 0;
533  level_idx < ra_exe_unit.join_quals.size();
534  ++level_idx) {
535  const auto& current_level_join_conditions = ra_exe_unit.join_quals[level_idx];
536  std::vector<std::string> fail_reasons;
537  const auto current_level_hash_table =
538  buildCurrentLevelHashTable(current_level_join_conditions,
539  level_idx,
540  ra_exe_unit,
541  co,
542  query_infos,
543  column_cache,
544  fail_reasons);
545  const auto found_outer_join_matches_cb =
546  [this, level_idx](llvm::Value* found_outer_join_matches) {
547  CHECK_LT(level_idx, cgen_state_->outer_join_match_found_per_level_.size());
548  CHECK(!cgen_state_->outer_join_match_found_per_level_[level_idx]);
549  cgen_state_->outer_join_match_found_per_level_[level_idx] =
550  found_outer_join_matches;
551  };
552  const auto is_deleted_cb = buildIsDeletedCb(ra_exe_unit, level_idx, co);
553  auto rem_left_join_quals_it =
554  plan_state_->left_join_non_hashtable_quals_.find(level_idx);
555  bool has_remaining_left_join_quals =
556  rem_left_join_quals_it != plan_state_->left_join_non_hashtable_quals_.end() &&
557  !rem_left_join_quals_it->second.empty();
558  const auto outer_join_condition_remaining_quals_cb =
559  [this, level_idx, &co](const std::vector<llvm::Value*>& prev_iters) {
560  // when we have multiple quals for the left join in the current join level
561  // we first try to build a hashtable by using one of the possible qual,
562  // and deal with remaining quals as extra join conditions
563  FetchCacheAnchor anchor(cgen_state_.get());
564  addJoinLoopIterator(prev_iters, level_idx + 1);
565  llvm::Value* left_join_cond = cgen_state_->llBool(true);
566  CodeGenerator code_generator(this);
567  auto it = plan_state_->left_join_non_hashtable_quals_.find(level_idx);
568  if (it != plan_state_->left_join_non_hashtable_quals_.end()) {
569  for (auto expr : it->second) {
570  left_join_cond = cgen_state_->ir_builder_.CreateAnd(
571  left_join_cond,
572  code_generator.toBool(
573  code_generator.codegen(expr.get(), true, co).front()));
574  }
575  }
576  return left_join_cond;
577  };
578  if (current_level_hash_table) {
579  const auto hoisted_filters_cb = buildHoistLeftHandSideFiltersCb(
580  ra_exe_unit, level_idx, current_level_hash_table->getInnerTableId(), co);
581  if (current_level_hash_table->getHashType() == HashType::OneToOne) {
582  join_loops.emplace_back(
583  /*kind=*/JoinLoopKind::Singleton,
584  /*type=*/current_level_join_conditions.type,
585  /*iteration_domain_codegen=*/
586  [this, current_hash_table_idx, level_idx, current_level_hash_table, &co](
587  const std::vector<llvm::Value*>& prev_iters) {
588  addJoinLoopIterator(prev_iters, level_idx);
589  JoinLoopDomain domain{{0}};
590  domain.slot_lookup_result =
591  current_level_hash_table->codegenSlot(co, current_hash_table_idx);
592  return domain;
593  },
594  /*outer_condition_match=*/
595  current_level_join_conditions.type == JoinType::LEFT &&
596  has_remaining_left_join_quals
597  ? std::function<llvm::Value*(const std::vector<llvm::Value*>&)>(
598  outer_join_condition_remaining_quals_cb)
599  : nullptr,
600  /*found_outer_matches=*/current_level_join_conditions.type == JoinType::LEFT
601  ? std::function<void(llvm::Value*)>(found_outer_join_matches_cb)
602  : nullptr,
603  /*hoisted_filters=*/hoisted_filters_cb,
604  /*is_deleted=*/is_deleted_cb,
605  /*nested_loop_join=*/false);
606  } else if (auto range_join_table =
607  dynamic_cast<RangeJoinHashTable*>(current_level_hash_table.get())) {
608  join_loops.emplace_back(
609  /* kind= */ JoinLoopKind::MultiSet,
610  /* type= */ current_level_join_conditions.type,
611  /* iteration_domain_codegen= */
612  [this,
613  range_join_table,
614  current_hash_table_idx,
615  level_idx,
616  current_level_hash_table,
617  &co](const std::vector<llvm::Value*>& prev_iters) {
618  addJoinLoopIterator(prev_iters, level_idx);
619  JoinLoopDomain domain{{0}};
620  CHECK(!prev_iters.empty());
621  const auto matching_set = range_join_table->codegenMatchingSetWithOffset(
622  co, current_hash_table_idx, prev_iters.back());
623  domain.values_buffer = matching_set.elements;
624  domain.element_count = matching_set.count;
625  return domain;
626  },
627  /* outer_condition_match= */
628  current_level_join_conditions.type == JoinType::LEFT
629  ? std::function<llvm::Value*(const std::vector<llvm::Value*>&)>(
630  outer_join_condition_remaining_quals_cb)
631  : nullptr,
632  /* found_outer_matches= */
633  current_level_join_conditions.type == JoinType::LEFT
634  ? std::function<void(llvm::Value*)>(found_outer_join_matches_cb)
635  : nullptr,
636  /* hoisted_filters= */ nullptr, // <<! TODO
637  /* is_deleted= */ is_deleted_cb,
638  /*nested_loop_join=*/false);
639  } else {
640  join_loops.emplace_back(
641  /*kind=*/JoinLoopKind::Set,
642  /*type=*/current_level_join_conditions.type,
643  /*iteration_domain_codegen=*/
644  [this, current_hash_table_idx, level_idx, current_level_hash_table, &co](
645  const std::vector<llvm::Value*>& prev_iters) {
646  addJoinLoopIterator(prev_iters, level_idx);
647  JoinLoopDomain domain{{0}};
648  const auto matching_set = current_level_hash_table->codegenMatchingSet(
649  co, current_hash_table_idx);
650  domain.values_buffer = matching_set.elements;
651  domain.element_count = matching_set.count;
652  return domain;
653  },
654  /*outer_condition_match=*/
655  current_level_join_conditions.type == JoinType::LEFT
656  ? std::function<llvm::Value*(const std::vector<llvm::Value*>&)>(
657  outer_join_condition_remaining_quals_cb)
658  : nullptr,
659  /*found_outer_matches=*/current_level_join_conditions.type == JoinType::LEFT
660  ? std::function<void(llvm::Value*)>(found_outer_join_matches_cb)
661  : nullptr,
662  /*hoisted_filters=*/hoisted_filters_cb,
663  /*is_deleted=*/is_deleted_cb,
664  /*nested_loop_join=*/false);
665  }
666  ++current_hash_table_idx;
667  } else {
668  const auto fail_reasons_str = current_level_join_conditions.quals.empty()
669  ? "No equijoin expression found"
670  : boost::algorithm::join(fail_reasons, " | ");
672  ra_exe_unit, eo, query_infos, level_idx, fail_reasons_str);
673  // Callback provided to the `JoinLoop` framework to evaluate the (outer) join
674  // condition.
675  VLOG(1) << "Unable to build hash table, falling back to loop join: "
676  << fail_reasons_str;
677  const auto outer_join_condition_cb =
678  [this, level_idx, &co, &current_level_join_conditions](
679  const std::vector<llvm::Value*>& prev_iters) {
680  // The values generated for the match path don't dominate all uses
681  // since on the non-match path nulls are generated. Reset the cache
682  // once the condition is generated to avoid incorrect reuse.
683  FetchCacheAnchor anchor(cgen_state_.get());
684  addJoinLoopIterator(prev_iters, level_idx + 1);
685  llvm::Value* left_join_cond = cgen_state_->llBool(true);
686  CodeGenerator code_generator(this);
687  for (auto expr : current_level_join_conditions.quals) {
688  left_join_cond = cgen_state_->ir_builder_.CreateAnd(
689  left_join_cond,
690  code_generator.toBool(
691  code_generator.codegen(expr.get(), true, co).front()));
692  }
693  return left_join_cond;
694  };
695  join_loops.emplace_back(
696  /*kind=*/JoinLoopKind::UpperBound,
697  /*type=*/current_level_join_conditions.type,
698  /*iteration_domain_codegen=*/
699  [this, level_idx](const std::vector<llvm::Value*>& prev_iters) {
700  addJoinLoopIterator(prev_iters, level_idx);
701  JoinLoopDomain domain{{0}};
702  auto* arg = get_arg_by_name(cgen_state_->row_func_, "num_rows_per_scan");
703  const auto rows_per_scan_ptr = cgen_state_->ir_builder_.CreateGEP(
704  arg->getType()->getScalarType()->getPointerElementType(),
705  arg,
706  cgen_state_->llInt(int32_t(level_idx + 1)));
707  domain.upper_bound = cgen_state_->ir_builder_.CreateLoad(
708  rows_per_scan_ptr->getType()->getPointerElementType(),
709  rows_per_scan_ptr,
710  "num_rows_per_scan");
711  return domain;
712  },
713  /*outer_condition_match=*/
714  current_level_join_conditions.type == JoinType::LEFT
715  ? std::function<llvm::Value*(const std::vector<llvm::Value*>&)>(
716  outer_join_condition_cb)
717  : nullptr,
718  /*found_outer_matches=*/
719  current_level_join_conditions.type == JoinType::LEFT
720  ? std::function<void(llvm::Value*)>(found_outer_join_matches_cb)
721  : nullptr,
722  /*hoisted_filters=*/nullptr,
723  /*is_deleted=*/is_deleted_cb,
724  /*nested_loop_join=*/true);
725  }
726  }
727  return join_loops;
728 }
llvm::Value * values_buffer
Definition: JoinLoop.h:49
std::string join(T const &container, std::string const &delim)
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1222
llvm::Value * get_arg_by_name(llvm::Function *func, const std::string &name)
Definition: Execute.h:166
#define INJECT_TIMER(DESC)
Definition: measure.h:93
const JoinQualsPerNestingLevel join_quals
std::unique_ptr< PlanState > plan_state_
Definition: Execute.h:1252
#define AUTOMATIC_IR_METADATA(CGENSTATE)
llvm::Value * slot_lookup_result
Definition: JoinLoop.h:47
#define CHECK_LT(x, y)
Definition: Logger.h:233
std::shared_ptr< HashJoin > buildCurrentLevelHashTable(const JoinCondition &current_level_join_conditions, size_t level_idx, RelAlgExecutionUnit &ra_exe_unit, const CompilationOptions &co, const std::vector< InputTableInfo > &query_infos, ColumnCacheMap &column_cache, std::vector< std::string > &fail_reasons)
Definition: IRCodegen.cpp:935
llvm::Value * addJoinLoopIterator(const std::vector< llvm::Value * > &prev_iters, const size_t level_idx)
Definition: IRCodegen.cpp:1094
#define CHECK(condition)
Definition: Logger.h:223
void check_if_loop_join_is_allowed(RelAlgExecutionUnit &ra_exe_unit, const ExecutionOptions &eo, const std::vector< InputTableInfo > &query_infos, const size_t level_idx, const std::string &fail_reason)
Definition: IRCodegen.cpp:484
JoinLoop::HoistedFiltersCallback buildHoistLeftHandSideFiltersCb(const RelAlgExecutionUnit &ra_exe_unit, const size_t level_idx, const int inner_table_id, const CompilationOptions &co)
Definition: IRCodegen.cpp:768
std::vector< JoinLoop > buildJoinLoops(RelAlgExecutionUnit &ra_exe_unit, const CompilationOptions &co, const ExecutionOptions &eo, const std::vector< InputTableInfo > &query_infos, ColumnCacheMap &column_cache)
Definition: IRCodegen.cpp:523
std::function< llvm::Value *(const std::vector< llvm::Value * > &, llvm::Value *)> buildIsDeletedCb(const RelAlgExecutionUnit &ra_exe_unit, const size_t level_idx, const CompilationOptions &co)
Definition: IRCodegen.cpp:876
#define VLOG(n)
Definition: Logger.h:317

+ Here is the call graph for this function:

void Executor::buildSelectedFragsMapping ( std::vector< std::vector< size_t >> &  selected_fragments_crossjoin,
std::vector< size_t > &  local_col_to_frag_pos,
const std::list< std::shared_ptr< const InputColDescriptor >> &  col_global_ids,
const FragmentsList selected_fragments,
const RelAlgExecutionUnit ra_exe_unit 
)
private

Definition at line 3259 of file Execute.cpp.

References CHECK, CHECK_EQ, CHECK_LT, getFragmentCount(), RelAlgExecutionUnit::input_descs, and plan_state_.

Referenced by fetchChunks().

3264  {
3265  local_col_to_frag_pos.resize(plan_state_->global_to_local_col_ids_.size());
3266  size_t frag_pos{0};
3267  const auto& input_descs = ra_exe_unit.input_descs;
3268  for (size_t scan_idx = 0; scan_idx < input_descs.size(); ++scan_idx) {
3269  const int table_id = input_descs[scan_idx].getTableId();
3270  CHECK_EQ(selected_fragments[scan_idx].table_id, table_id);
3271  selected_fragments_crossjoin.push_back(
3272  getFragmentCount(selected_fragments, scan_idx, ra_exe_unit));
3273  for (const auto& col_id : col_global_ids) {
3274  CHECK(col_id);
3275  const auto& input_desc = col_id->getScanDesc();
3276  if (input_desc.getTableId() != table_id ||
3277  input_desc.getNestLevel() != static_cast<int>(scan_idx)) {
3278  continue;
3279  }
3280  auto it = plan_state_->global_to_local_col_ids_.find(*col_id);
3281  CHECK(it != plan_state_->global_to_local_col_ids_.end());
3282  CHECK_LT(static_cast<size_t>(it->second),
3283  plan_state_->global_to_local_col_ids_.size());
3284  local_col_to_frag_pos[it->second] = frag_pos;
3285  }
3286  ++frag_pos;
3287  }
3288 }
#define CHECK_EQ(x, y)
Definition: Logger.h:231
std::vector< InputDescriptor > input_descs
std::unique_ptr< PlanState > plan_state_
Definition: Execute.h:1252
#define CHECK_LT(x, y)
Definition: Logger.h:233
std::vector< size_t > getFragmentCount(const FragmentsList &selected_fragments, const size_t scan_idx, const RelAlgExecutionUnit &ra_exe_unit)
Definition: Execute.cpp:3245
#define CHECK(condition)
Definition: Logger.h:223

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void Executor::buildSelectedFragsMappingForUnion ( std::vector< std::vector< size_t >> &  selected_fragments_crossjoin,
std::vector< size_t > &  local_col_to_frag_pos,
const std::list< std::shared_ptr< const InputColDescriptor >> &  col_global_ids,
const FragmentsList selected_fragments,
const RelAlgExecutionUnit ra_exe_unit 
)
private

Definition at line 3290 of file Execute.cpp.

References CHECK, CHECK_LT, RelAlgExecutionUnit::input_descs, and plan_state_.

Referenced by fetchUnionChunks().

3295  {
3296  local_col_to_frag_pos.resize(plan_state_->global_to_local_col_ids_.size());
3297  size_t frag_pos{0};
3298  const auto& input_descs = ra_exe_unit.input_descs;
3299  for (size_t scan_idx = 0; scan_idx < input_descs.size(); ++scan_idx) {
3300  const int table_id = input_descs[scan_idx].getTableId();
3301  // selected_fragments here is from assignFragsToKernelDispatch
3302  // execution_kernel.fragments
3303  if (selected_fragments[0].table_id != table_id) { // TODO 0
3304  continue;
3305  }
3306  // CHECK_EQ(selected_fragments[scan_idx].table_id, table_id);
3307  selected_fragments_crossjoin.push_back(
3308  // getFragmentCount(selected_fragments, scan_idx, ra_exe_unit));
3309  {size_t(1)}); // TODO
3310  for (const auto& col_id : col_global_ids) {
3311  CHECK(col_id);
3312  const auto& input_desc = col_id->getScanDesc();
3313  if (input_desc.getTableId() != table_id ||
3314  input_desc.getNestLevel() != static_cast<int>(scan_idx)) {
3315  continue;
3316  }
3317  auto it = plan_state_->global_to_local_col_ids_.find(*col_id);
3318  CHECK(it != plan_state_->global_to_local_col_ids_.end());
3319  CHECK_LT(static_cast<size_t>(it->second),
3320  plan_state_->global_to_local_col_ids_.size());
3321  local_col_to_frag_pos[it->second] = frag_pos;
3322  }
3323  ++frag_pos;
3324  }
3325 }
std::vector< InputDescriptor > input_descs
std::unique_ptr< PlanState > plan_state_
Definition: Execute.h:1252
#define CHECK_LT(x, y)
Definition: Logger.h:233
#define CHECK(condition)
Definition: Logger.h:223

+ Here is the caller graph for this function:

FragmentSkipStatus Executor::canSkipFragmentForFpQual ( const Analyzer::BinOper comp_expr,
const Analyzer::ColumnVar lhs_col,
const Fragmenter_Namespace::FragmentInfo fragment,
const Analyzer::Constant rhs_const 
) const
private

Definition at line 4066 of file Execute.cpp.

References CHECK, extract_max_stat_fp_type(), extract_min_stat_fp_type(), Analyzer::ColumnVar::get_column_id(), Analyzer::Constant::get_constval(), Analyzer::BinOper::get_optype(), SQLTypeInfo::get_type(), Analyzer::Expr::get_type_info(), Fragmenter_Namespace::FragmentInfo::getChunkMetadataMap(), INVALID, kDOUBLE, kEQ, kFLOAT, kGE, kGT, kLE, kLT, NOT_SKIPPABLE, and SKIPPABLE.

Referenced by skipFragment().

4070  {
4071  const int col_id = lhs_col->get_column_id();
4072  auto chunk_meta_it = fragment.getChunkMetadataMap().find(col_id);
4073  if (chunk_meta_it == fragment.getChunkMetadataMap().end()) {
4075  }
4076  double chunk_min{0.};
4077  double chunk_max{0.};
4078  const auto& chunk_type = lhs_col->get_type_info();
4079  chunk_min = extract_min_stat_fp_type(chunk_meta_it->second->chunkStats, chunk_type);
4080  chunk_max = extract_max_stat_fp_type(chunk_meta_it->second->chunkStats, chunk_type);
4081  if (chunk_min > chunk_max) {
4083  }
4084 
4085  const auto datum_fp = rhs_const->get_constval();
4086  const auto rhs_type = rhs_const->get_type_info().get_type();
4087  CHECK(rhs_type == kFLOAT || rhs_type == kDOUBLE);
4088 
4089  // Do we need to codegen the constant like the integer path does?
4090  const auto rhs_val = rhs_type == kFLOAT ? datum_fp.floatval : datum_fp.doubleval;
4091 
4092  // Todo: dedup the following comparison code with the integer/timestamp path, it is
4093  // slightly tricky due to do cleanly as we do not have rowid on this path
4094  switch (comp_expr->get_optype()) {
4095  case kGE:
4096  if (chunk_max < rhs_val) {
4098  }
4099  break;
4100  case kGT:
4101  if (chunk_max <= rhs_val) {
4103  }
4104  break;
4105  case kLE:
4106  if (chunk_min > rhs_val) {
4108  }
4109  break;
4110  case kLT:
4111  if (chunk_min >= rhs_val) {
4113  }
4114  break;
4115  case kEQ:
4116  if (chunk_min > rhs_val || chunk_max < rhs_val) {
4118  }
4119  break;
4120  default:
4121  break;
4122  }
4124 }
double extract_max_stat_fp_type(const ChunkStats &stats, const SQLTypeInfo &ti)
Definition: sqldefs.h:35
Definition: sqldefs.h:36
Definition: sqldefs.h:30
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:329
SQLOps get_optype() const
Definition: Analyzer.h:446
double extract_min_stat_fp_type(const ChunkStats &stats, const SQLTypeInfo &ti)
const ChunkMetadataMap & getChunkMetadataMap() const
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:81
Definition: sqldefs.h:34
Datum get_constval() const
Definition: Analyzer.h:342
#define CHECK(condition)
Definition: Logger.h:223
Definition: sqldefs.h:33
int get_column_id() const
Definition: Analyzer.h:201

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

llvm::Value * Executor::castToFP ( llvm::Value *  value,
SQLTypeInfo const &  from_ti,
SQLTypeInfo const &  to_ti 
)
private

Definition at line 3881 of file Execute.cpp.

References AUTOMATIC_IR_METADATA, cgen_state_, exp_to_scale(), logger::FATAL, SQLTypeInfo::get_scale(), SQLTypeInfo::get_size(), SQLTypeInfo::is_fp(), SQLTypeInfo::is_number(), and LOG.

3883  {
3885  if (value->getType()->isIntegerTy() && from_ti.is_number() && to_ti.is_fp() &&
3886  (!from_ti.is_fp() || from_ti.get_size() != to_ti.get_size())) {
3887  llvm::Type* fp_type{nullptr};
3888  switch (to_ti.get_size()) {
3889  case 4:
3890  fp_type = llvm::Type::getFloatTy(cgen_state_->context_);
3891  break;
3892  case 8:
3893  fp_type = llvm::Type::getDoubleTy(cgen_state_->context_);
3894  break;
3895  default:
3896  LOG(FATAL) << "Unsupported FP size: " << to_ti.get_size();
3897  }
3898  value = cgen_state_->ir_builder_.CreateSIToFP(value, fp_type);
3899  if (from_ti.get_scale()) {
3900  value = cgen_state_->ir_builder_.CreateFDiv(
3901  value,
3902  llvm::ConstantFP::get(value->getType(), exp_to_scale(from_ti.get_scale())));
3903  }
3904  }
3905  return value;
3906 }
#define LOG(tag)
Definition: Logger.h:217
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1222
#define AUTOMATIC_IR_METADATA(CGENSTATE)
uint64_t exp_to_scale(const unsigned exp)

+ Here is the call graph for this function:

llvm::Value * Executor::castToIntPtrTyIn ( llvm::Value *  val,
const size_t  bit_width 
)
private

Definition at line 3908 of file Execute.cpp.

References AUTOMATIC_IR_METADATA, cgen_state_, CHECK, CHECK_LT, and get_int_type().

3908  {
3910  CHECK(val->getType()->isPointerTy());
3911 
3912  const auto val_ptr_type = static_cast<llvm::PointerType*>(val->getType());
3913  const auto val_type = val_ptr_type->getElementType();
3914  size_t val_width = 0;
3915  if (val_type->isIntegerTy()) {
3916  val_width = val_type->getIntegerBitWidth();
3917  } else {
3918  if (val_type->isFloatTy()) {
3919  val_width = 32;
3920  } else {
3921  CHECK(val_type->isDoubleTy());
3922  val_width = 64;
3923  }
3924  }
3925  CHECK_LT(size_t(0), val_width);
3926  if (bitWidth == val_width) {
3927  return val;
3928  }
3929  return cgen_state_->ir_builder_.CreateBitCast(
3930  val, llvm::PointerType::get(get_int_type(bitWidth, cgen_state_->context_), 0));
3931 }
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1222
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
#define AUTOMATIC_IR_METADATA(CGENSTATE)
#define CHECK_LT(x, y)
Definition: Logger.h:233
#define CHECK(condition)
Definition: Logger.h:223

+ Here is the call graph for this function:

bool Executor::checkCurrentQuerySession ( const std::string &  candidate_query_session,
mapd_shared_lock< mapd_shared_mutex > &  read_lock 
)

Definition at line 4451 of file Execute.cpp.

References current_query_session_.

4452  {
4453  // if current_query_session is equal to the candidate_query_session,
4454  // or it is empty session we consider
4455  return !candidate_query_session.empty() &&
4456  (current_query_session_ == candidate_query_session);
4457 }
QuerySessionId current_query_session_
Definition: Execute.h:1305
bool Executor::checkIsQuerySessionEnrolled ( const QuerySessionId query_session,
mapd_shared_lock< mapd_shared_mutex > &  read_lock 
)

Definition at line 4723 of file Execute.cpp.

References queries_session_map_.

Referenced by executeWorkUnitImpl().

4725  {
4726  if (query_session.empty()) {
4727  return false;
4728  }
4729  return !query_session.empty() && queries_session_map_.count(query_session);
4730 }
static QuerySessionMap queries_session_map_
Definition: Execute.h:1309

+ Here is the caller graph for this function:

bool Executor::checkIsQuerySessionInterrupted ( const std::string &  query_session,
mapd_shared_lock< mapd_shared_mutex > &  read_lock 
)

Definition at line 4712 of file Execute.cpp.

References queries_interrupt_flag_.

Referenced by executePlanWithGroupBy(), executePlanWithoutGroupBy(), fetchChunks(), and fetchUnionChunks().

4714  {
4715  if (query_session.empty()) {
4716  return false;
4717  }
4718  auto flag_it = queries_interrupt_flag_.find(query_session);
4719  return !query_session.empty() && flag_it != queries_interrupt_flag_.end() &&
4720  flag_it->second;
4721 }
static InterruptFlagMap queries_interrupt_flag_
Definition: Execute.h:1307

+ Here is the caller graph for this function:

bool Executor::checkNonKernelTimeInterrupted ( ) const

Definition at line 4800 of file Execute.cpp.

References current_query_session_, executor_id_, executor_session_mutex_, queries_interrupt_flag_, and UNITARY_EXECUTOR_ID.

4800  {
4801  // this function should be called within an executor which is assigned
4802  // to the specific query thread (that indicates we already enroll the session)
4803  // check whether this is called from non unitary executor
4805  return false;
4806  };
4807  mapd_shared_lock<mapd_shared_mutex> session_read_lock(executor_session_mutex_);
4808  auto flag_it = queries_interrupt_flag_.find(current_query_session_);
4809  return !current_query_session_.empty() && flag_it != queries_interrupt_flag_.end() &&
4810  flag_it->second;
4811 }
static mapd_shared_mutex executor_session_mutex_
Definition: Execute.h:1303
QuerySessionId current_query_session_
Definition: Execute.h:1305
const ExecutorId executor_id_
Definition: Execute.h:1196
static InterruptFlagMap queries_interrupt_flag_
Definition: Execute.h:1307
static const ExecutorId UNITARY_EXECUTOR_ID
Definition: Execute.h:376
void Executor::checkPendingQueryStatus ( const QuerySessionId query_session)

Definition at line 4494 of file Execute.cpp.

References ERR_INTERRUPTED, executor_session_mutex_, queries_interrupt_flag_, queries_session_map_, and VLOG.

4494  {
4495  // check whether we are okay to execute the "pending" query
4496  // i.e., before running the query check if this query session is "ALREADY" interrupted
4497  mapd_shared_lock<mapd_shared_mutex> session_read_lock(executor_session_mutex_);
4498  if (query_session.empty()) {
4499  return;
4500  }
4501  if (queries_interrupt_flag_.find(query_session) == queries_interrupt_flag_.end()) {
4502  // something goes wrong since we assume this is caller's responsibility
4503  // (call this function only for enrolled query session)
4504  if (!queries_session_map_.count(query_session)) {
4505  VLOG(1) << "Interrupting pending query is not available since the query session is "
4506  "not enrolled";
4507  } else {
4508  // here the query session is enrolled but the interrupt flag is not registered
4509  VLOG(1)
4510  << "Interrupting pending query is not available since its interrupt flag is "
4511  "not registered";
4512  }
4513  return;
4514  }
4515  if (queries_interrupt_flag_[query_session]) {
4517  }
4518 }
static mapd_shared_mutex executor_session_mutex_
Definition: Execute.h:1303
static QuerySessionMap queries_session_map_
Definition: Execute.h:1309
static const int32_t ERR_INTERRUPTED
Definition: Execute.h:1351
static InterruptFlagMap queries_interrupt_flag_
Definition: Execute.h:1307
#define VLOG(n)
Definition: Logger.h:317
void Executor::clearCaches ( bool  runtime_only = false)
static void Executor::clearExternalCaches ( bool  for_update,
const TableDescriptor td,
const int  current_db_id 
)
inlinestatic

Definition at line 391 of file Execute.h.

References TableDescriptor::getTableChunkKey(), CacheInvalidator< CACHE_HOLDING_TYPES >::invalidateCaches(), and CacheInvalidator< CACHE_HOLDING_TYPES >::invalidateCachesByTable().

Referenced by Parser::InsertIntoTableAsSelectStmt::execute(), Parser::DropTableStmt::execute(), Parser::TruncateTableStmt::execute(), Parser::DropColumnStmt::execute(), Parser::CopyTableStmt::execute(), RelAlgExecutor::executeDelete(), and RelAlgExecutor::executeUpdate().

393  {
394  bool clearEntireCache = true;
395  if (td) {
396  const auto& table_chunk_key_prefix = td->getTableChunkKey(current_db_id);
397  if (!table_chunk_key_prefix.empty()) {
398  auto table_key = boost::hash_value(table_chunk_key_prefix);
400  if (for_update) {
402  } else {
404  }
405  clearEntireCache = false;
406  }
407  }
408  if (clearEntireCache) {
410  if (for_update) {
412  } else {
414  }
415  }
416  }
static void invalidateCachesByTable(size_t table_key)
static void invalidateCaches()
std::vector< int > getTableChunkKey(const int getCurrentDBId) const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void Executor::clearMemory ( const Data_Namespace::MemoryLevel  memory_level)
static

Definition at line 507 of file Execute.cpp.

References Data_Namespace::DataMgr::clearMemory(), Data_Namespace::CPU_LEVEL, execute_mutex_, Catalog_Namespace::SysCatalog::getDataMgr(), Data_Namespace::GPU_LEVEL, Catalog_Namespace::SysCatalog::instance(), and CacheInvalidator< CACHE_HOLDING_TYPES >::invalidateCaches().

Referenced by DBHandler::clear_cpu_memory(), DBHandler::clear_gpu_memory(), QueryRunner::QueryRunner::clearCpuMemory(), and QueryRunner::QueryRunner::clearGpuMemory().

507  {
508  switch (memory_level) {
511  mapd_unique_lock<mapd_shared_mutex> flush_lock(
512  execute_mutex_); // Don't flush memory while queries are running
513 
514  if (memory_level == Data_Namespace::MemoryLevel::CPU_LEVEL) {
515  // The hash table cache uses CPU memory not managed by the buffer manager. In the
516  // future, we should manage these allocations with the buffer manager directly.
517  // For now, assume the user wants to purge the hash table cache when they clear
518  // CPU memory (currently used in ExecuteTest to lower memory pressure)
520  }
523  break;
524  }
525  default: {
526  throw std::runtime_error(
527  "Clearing memory levels other than the CPU level or GPU level is not "
528  "supported.");
529  }
530  }
531 }
static mapd_shared_mutex execute_mutex_
Definition: Execute.h:1314
void clearMemory(const MemoryLevel memLevel)
Definition: DataMgr.cpp:437
static void invalidateCaches()
Data_Namespace::DataMgr & getDataMgr() const
Definition: SysCatalog.h:228
static SysCatalog & instance()
Definition: SysCatalog.h:337

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void Executor::clearMetaInfoCache ( )
private

Definition at line 770 of file Execute.cpp.

References agg_col_range_cache_, TableGenerations::clear(), AggregatedColRange::clear(), InputTableInfoCache::clear(), input_table_info_cache_, and table_generations_.

770  {
774 }
AggregatedColRange agg_col_range_cache_
Definition: Execute.h:1301
InputTableInfoCache input_table_info_cache_
Definition: Execute.h:1300
TableGenerations table_generations_
Definition: Execute.h:1302

+ Here is the call graph for this function:

void Executor::clearQuerySessionStatus ( const QuerySessionId query_session,
const std::string &  submitted_time_str 
)

Definition at line 4520 of file Execute.cpp.

References current_query_session_, executor_session_mutex_, invalidateRunningQuerySession(), removeFromQuerySessionList(), and resetInterrupt().

4521  {
4522  mapd_unique_lock<mapd_shared_mutex> session_write_lock(executor_session_mutex_);
4523  // clear the interrupt-related info for a finished query
4524  if (query_session.empty()) {
4525  return;
4526  }
4527  removeFromQuerySessionList(query_session, submitted_time_str, session_write_lock);
4528  if (query_session.compare(current_query_session_) == 0) {
4529  invalidateRunningQuerySession(session_write_lock);
4530  resetInterrupt();
4531  }
4532 }
static mapd_shared_mutex executor_session_mutex_
Definition: Execute.h:1303
void invalidateRunningQuerySession(mapd_unique_lock< mapd_shared_mutex > &write_lock)
Definition: Execute.cpp:4472
QuerySessionId current_query_session_
Definition: Execute.h:1305
void resetInterrupt()
bool removeFromQuerySessionList(const QuerySessionId &query_session, const std::string &submitted_time_str, mapd_unique_lock< mapd_shared_mutex > &write_lock)
Definition: Execute.cpp:4667

+ Here is the call graph for this function:

llvm::Value * Executor::codegenAggregateWindowState ( )
private

Definition at line 326 of file WindowFunctionIR.cpp.

References AUTOMATIC_IR_METADATA, AVG, COUNT, anonymous_namespace{WindowFunctionIR.cpp}::get_adjusted_window_type_info(), get_int_type(), WindowProjectNodeContext::getActiveWindowFunctionContext(), Analyzer::WindowFunction::getKind(), kDECIMAL, kDOUBLE, and kFLOAT.

326  {
328  const auto pi32_type =
329  llvm::PointerType::get(get_int_type(32, cgen_state_->context_), 0);
330  const auto pi64_type =
331  llvm::PointerType::get(get_int_type(64, cgen_state_->context_), 0);
332  const auto window_func_context =
334  const Analyzer::WindowFunction* window_func = window_func_context->getWindowFunction();
335  const auto window_func_ti = get_adjusted_window_type_info(window_func);
336  const auto aggregate_state_type =
337  window_func_ti.get_type() == kFLOAT ? pi32_type : pi64_type;
338  auto aggregate_state = aggregateWindowStatePtr();
339  if (window_func->getKind() == SqlWindowFunctionKind::AVG) {
340  const auto aggregate_state_count_i64 = cgen_state_->llInt(
341  reinterpret_cast<const int64_t>(window_func_context->aggregateStateCount()));
342  auto aggregate_state_count = cgen_state_->ir_builder_.CreateIntToPtr(
343  aggregate_state_count_i64, aggregate_state_type);
344  const auto double_null_lv = cgen_state_->inlineFpNull(SQLTypeInfo(kDOUBLE));
345  switch (window_func_ti.get_type()) {
346  case kFLOAT: {
347  return cgen_state_->emitCall(
348  "load_avg_float", {aggregate_state, aggregate_state_count, double_null_lv});
349  }
350  case kDOUBLE: {
351  return cgen_state_->emitCall(
352  "load_avg_double", {aggregate_state, aggregate_state_count, double_null_lv});
353  }
354  case kDECIMAL: {
355  return cgen_state_->emitCall(
356  "load_avg_decimal",
357  {aggregate_state,
358  aggregate_state_count,
359  double_null_lv,
360  cgen_state_->llInt<int32_t>(window_func_ti.get_scale())});
361  }
362  default: {
363  return cgen_state_->emitCall(
364  "load_avg_int", {aggregate_state, aggregate_state_count, double_null_lv});
365  }
366  }
367  }
368  if (window_func->getKind() == SqlWindowFunctionKind::COUNT) {
369  return cgen_state_->ir_builder_.CreateLoad(
370  aggregate_state->getType()->getPointerElementType(), aggregate_state);
371  }
372  switch (window_func_ti.get_type()) {
373  case kFLOAT: {
374  return cgen_state_->emitCall("load_float", {aggregate_state});
375  }
376  case kDOUBLE: {
377  return cgen_state_->emitCall("load_double", {aggregate_state});
378  }
379  default: {
380  return cgen_state_->ir_builder_.CreateLoad(
381  aggregate_state->getType()->getPointerElementType(), aggregate_state);
382  }
383  }
384 }
SqlWindowFunctionKind getKind() const
Definition: Analyzer.h:2261
llvm::Value * aggregateWindowStatePtr()
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1222
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
static WindowFunctionContext * getActiveWindowFunctionContext(Executor *executor)
#define AUTOMATIC_IR_METADATA(CGENSTATE)
SQLTypeInfo get_adjusted_window_type_info(const Analyzer::WindowFunction *window_func)

+ Here is the call graph for this function:

void Executor::codegenJoinLoops ( const std::vector< JoinLoop > &  join_loops,
const RelAlgExecutionUnit ra_exe_unit,
GroupByAndAggregate group_by_and_aggregate,
llvm::Function *  query_func,
llvm::BasicBlock *  entry_bb,
QueryMemoryDescriptor query_mem_desc,
const CompilationOptions co,
const ExecutionOptions eo 
)
private

Definition at line 1112 of file IRCodegen.cpp.

References ExecutionOptions::allow_runtime_query_interrupt, AUTOMATIC_IR_METADATA, CHECK, CHECK_EQ, JoinLoop::codegen(), CompilationOptions::device_type, JoinLoopDomain::element_count, get_int_array_type(), get_int_type(), INNER, MultiSet, CodeGenerator::posArg(), GroupByAndAggregate::query_infos_, query_mem_desc, Set, and ExecutionOptions::with_dynamic_watchdog.

1119  {
1121  const auto exit_bb =
1122  llvm::BasicBlock::Create(cgen_state_->context_, "exit", cgen_state_->current_func_);
1123  cgen_state_->ir_builder_.SetInsertPoint(exit_bb);
1124  cgen_state_->ir_builder_.CreateRet(cgen_state_->llInt<int32_t>(0));
1125  cgen_state_->ir_builder_.SetInsertPoint(entry_bb);
1126  CodeGenerator code_generator(this);
1127 
1128  llvm::BasicBlock* loops_entry_bb{nullptr};
1129  auto has_range_join =
1130  std::any_of(join_loops.begin(), join_loops.end(), [](const auto& join_loop) {
1131  return join_loop.kind() == JoinLoopKind::MultiSet;
1132  });
1133  if (has_range_join) {
1134  CHECK_EQ(join_loops.size(), size_t(1));
1135  const auto element_count =
1136  llvm::ConstantInt::get(get_int_type(64, cgen_state_->context_), 9);
1137 
1138  auto compute_packed_offset = [](const int32_t x, const int32_t y) -> uint64_t {
1139  const uint64_t y_shifted = static_cast<uint64_t>(y) << 32;
1140  return y_shifted | static_cast<uint32_t>(x);
1141  };
1142 
1143  const auto values_arr = std::vector<llvm::Constant*>{
1144  llvm::ConstantInt::get(get_int_type(64, cgen_state_->context_), 0),
1145  llvm::ConstantInt::get(get_int_type(64, cgen_state_->context_),
1146  compute_packed_offset(0, 1)),
1147  llvm::ConstantInt::get(get_int_type(64, cgen_state_->context_),
1148  compute_packed_offset(0, -1)),
1149  llvm::ConstantInt::get(get_int_type(64, cgen_state_->context_),
1150  compute_packed_offset(1, 0)),
1151  llvm::ConstantInt::get(get_int_type(64, cgen_state_->context_),
1152  compute_packed_offset(1, 1)),
1153  llvm::ConstantInt::get(get_int_type(64, cgen_state_->context_),
1154  compute_packed_offset(1, -1)),
1155  llvm::ConstantInt::get(get_int_type(64, cgen_state_->context_),
1156  compute_packed_offset(-1, 0)),
1157  llvm::ConstantInt::get(get_int_type(64, cgen_state_->context_),
1158  compute_packed_offset(-1, 1)),
1159  llvm::ConstantInt::get(get_int_type(64, cgen_state_->context_),
1160  compute_packed_offset(-1, -1))};
1161 
1162  const auto constant_values_array = llvm::ConstantArray::get(
1163  get_int_array_type(64, 9, cgen_state_->context_), values_arr);
1164  CHECK(cgen_state_->module_);
1165  const auto values =
1166  new llvm::GlobalVariable(*cgen_state_->module_,
1167  get_int_array_type(64, 9, cgen_state_->context_),
1168  true,
1169  llvm::GlobalValue::LinkageTypes::InternalLinkage,
1170  constant_values_array);
1171  JoinLoop join_loop(
1174  [element_count, values](const std::vector<llvm::Value*>& v) {
1175  JoinLoopDomain domain{{0}};
1176  domain.element_count = element_count;
1177  domain.values_buffer = values;
1178  return domain;
1179  },
1180  nullptr,
1181  nullptr,
1182  nullptr,
1183  nullptr,
1184  "range_key_loop");
1185 
1186  loops_entry_bb = JoinLoop::codegen(
1187  {join_loop},
1188  [this,
1189  query_func,
1190  &query_mem_desc,
1191  &co,
1192  &eo,
1193  &group_by_and_aggregate,
1194  &join_loops,
1195  &ra_exe_unit](const std::vector<llvm::Value*>& prev_iters) {
1196  auto& builder = cgen_state_->ir_builder_;
1197 
1198  auto body_exit_bb =
1199  llvm::BasicBlock::Create(cgen_state_->context_,
1200  "range_key_inner_body_exit",
1201  builder.GetInsertBlock()->getParent());
1202 
1203  auto range_key_body_bb =
1204  llvm::BasicBlock::Create(cgen_state_->context_,
1205  "range_key_loop_body",
1206  builder.GetInsertBlock()->getParent());
1207  builder.SetInsertPoint(range_key_body_bb);
1208 
1209  const auto body_loops_entry_bb = JoinLoop::codegen(
1210  join_loops,
1211  [this,
1212  query_func,
1213  &query_mem_desc,
1214  &co,
1215  &eo,
1216  &group_by_and_aggregate,
1217  &join_loops,
1218  &ra_exe_unit](const std::vector<llvm::Value*>& prev_iters) {
1219  addJoinLoopIterator(prev_iters, join_loops.size());
1220  auto& builder = cgen_state_->ir_builder_;
1221  const auto loop_body_bb =
1222  llvm::BasicBlock::Create(builder.getContext(),
1223  "loop_body",
1224  builder.GetInsertBlock()->getParent());
1225  builder.SetInsertPoint(loop_body_bb);
1226  const bool can_return_error =
1227  compileBody(ra_exe_unit, group_by_and_aggregate, query_mem_desc, co);
1228  if (can_return_error || cgen_state_->needs_error_check_ ||
1229  eo.with_dynamic_watchdog || eo.allow_runtime_query_interrupt) {
1230  createErrorCheckControlFlow(query_func,
1231  eo.with_dynamic_watchdog,
1232  eo.allow_runtime_query_interrupt,
1233  join_loops,
1234  co.device_type,
1235  group_by_and_aggregate.query_infos_);
1236  }
1237  return loop_body_bb;
1238  },
1239  prev_iters.back(),
1240  body_exit_bb,
1241  cgen_state_.get());
1242 
1243  builder.SetInsertPoint(range_key_body_bb);
1244  cgen_state_->ir_builder_.CreateBr(body_loops_entry_bb);
1245 
1246  builder.SetInsertPoint(body_exit_bb);
1247  return range_key_body_bb;
1248  },
1249  code_generator.posArg(nullptr),
1250  exit_bb,
1251  cgen_state_.get());
1252  } else {
1253  loops_entry_bb = JoinLoop::codegen(
1254  join_loops,
1255  /*body_codegen=*/
1256  [this,
1257  query_func,
1258  &query_mem_desc,
1259  &co,
1260  &eo,
1261  &group_by_and_aggregate,
1262  &join_loops,
1263  &ra_exe_unit](const std::vector<llvm::Value*>& prev_iters) {
1265  addJoinLoopIterator(prev_iters, join_loops.size());
1266  auto& builder = cgen_state_->ir_builder_;
1267  const auto loop_body_bb = llvm::BasicBlock::Create(
1268  builder.getContext(), "loop_body", builder.GetInsertBlock()->getParent());
1269  builder.SetInsertPoint(loop_body_bb);
1270  const bool can_return_error =
1271  compileBody(ra_exe_unit, group_by_and_aggregate, query_mem_desc, co);
1272  if (can_return_error || cgen_state_->needs_error_check_ ||
1273  eo.with_dynamic_watchdog || eo.allow_runtime_query_interrupt) {
1274  createErrorCheckControlFlow(query_func,
1275  eo.with_dynamic_watchdog,
1276  eo.allow_runtime_query_interrupt,
1277  join_loops,
1278  co.device_type,
1279  group_by_and_aggregate.query_infos_);
1280  }
1281  return loop_body_bb;
1282  },
1283  /*outer_iter=*/code_generator.posArg(nullptr),
1284  exit_bb,
1285  cgen_state_.get());
1286  }
1287  CHECK(loops_entry_bb);
1288  cgen_state_->ir_builder_.SetInsertPoint(entry_bb);
1289  cgen_state_->ir_builder_.CreateBr(loops_entry_bb);
1290 }
void createErrorCheckControlFlow(llvm::Function *query_func, bool run_with_dynamic_watchdog, bool run_with_allowing_runtime_interrupt, const std::vector< JoinLoop > &join_loops, ExecutorDeviceType device_type, const std::vector< InputTableInfo > &input_table_infos)
#define CHECK_EQ(x, y)
Definition: Logger.h:231
llvm::Value * element_count
Definition: JoinLoop.h:46
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1222
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
bool compileBody(const RelAlgExecutionUnit &ra_exe_unit, GroupByAndAggregate &group_by_and_aggregate, QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const GpuSharedMemoryContext &gpu_smem_context={})
static llvm::BasicBlock * codegen(const std::vector< JoinLoop > &join_loops, const std::function< llvm::BasicBlock *(const std::vector< llvm::Value * > &)> &body_codegen, llvm::Value *outer_iter, llvm::BasicBlock *exit_bb, CgenState *cgen_state)
Definition: JoinLoop.cpp:50
#define AUTOMATIC_IR_METADATA(CGENSTATE)
llvm::Value * addJoinLoopIterator(const std::vector< llvm::Value * > &prev_iters, const size_t level_idx)
Definition: IRCodegen.cpp:1094
#define CHECK(condition)
Definition: Logger.h:223
llvm::ArrayType * get_int_array_type(int const width, int count, llvm::LLVMContext &context)

+ Here is the call graph for this function:

llvm::BasicBlock * Executor::codegenSkipDeletedOuterTableRow ( const RelAlgExecutionUnit ra_exe_unit,
const CompilationOptions co 
)
private

Definition at line 3166 of file NativeCodegen.cpp.

3168  {
3170  if (!co.filter_on_deleted_column) {
3171  return nullptr;
3172  }
3173  CHECK(!ra_exe_unit.input_descs.empty());
3174  const auto& outer_input_desc = ra_exe_unit.input_descs[0];
3175  if (outer_input_desc.getSourceType() != InputSourceType::TABLE) {
3176  return nullptr;
3177  }
3178  const auto deleted_cd =
3179  plan_state_->getDeletedColForTable(outer_input_desc.getTableId());
3180  if (!deleted_cd) {
3181  return nullptr;
3182  }
3183  CHECK(deleted_cd->columnType.is_boolean());
3184  const auto deleted_expr =
3185  makeExpr<Analyzer::ColumnVar>(deleted_cd->columnType,
3186  outer_input_desc.getTableId(),
3187  deleted_cd->columnId,
3188  outer_input_desc.getNestLevel());
3189  CodeGenerator code_generator(this);
3190  const auto is_deleted =
3191  code_generator.toBool(code_generator.codegen(deleted_expr.get(), true, co).front());
3192  const auto is_deleted_bb = llvm::BasicBlock::Create(
3193  cgen_state_->context_, "is_deleted", cgen_state_->row_func_);
3194  llvm::BasicBlock* bb = llvm::BasicBlock::Create(
3195  cgen_state_->context_, "is_not_deleted", cgen_state_->row_func_);
3196  cgen_state_->ir_builder_.CreateCondBr(is_deleted, is_deleted_bb, bb);
3197  cgen_state_->ir_builder_.SetInsertPoint(is_deleted_bb);
3198  cgen_state_->ir_builder_.CreateRet(cgen_state_->llInt<int32_t>(0));
3199  cgen_state_->ir_builder_.SetInsertPoint(bb);
3200  return bb;
3201 }
std::vector< InputDescriptor > input_descs
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1222
std::unique_ptr< PlanState > plan_state_
Definition: Execute.h:1252
#define AUTOMATIC_IR_METADATA(CGENSTATE)
#define CHECK(condition)
Definition: Logger.h:223
void Executor::codegenWindowAvgEpilogue ( llvm::Value *  crt_val,
llvm::Value *  window_func_null_val,
llvm::Value *  multiplicity_lv 
)
private

Definition at line 289 of file WindowFunctionIR.cpp.

References AUTOMATIC_IR_METADATA, anonymous_namespace{WindowFunctionIR.cpp}::get_adjusted_window_type_info(), get_int_type(), WindowProjectNodeContext::getActiveWindowFunctionContext(), kDOUBLE, and kFLOAT.

291  {
293  const auto window_func_context =
295  const auto window_func = window_func_context->getWindowFunction();
296  const auto window_func_ti = get_adjusted_window_type_info(window_func);
297  const auto pi32_type =
298  llvm::PointerType::get(get_int_type(32, cgen_state_->context_), 0);
299  const auto pi64_type =
300  llvm::PointerType::get(get_int_type(64, cgen_state_->context_), 0);
301  const auto aggregate_state_type =
302  window_func_ti.get_type() == kFLOAT ? pi32_type : pi64_type;
303  const auto aggregate_state_count_i64 = cgen_state_->llInt(
304  reinterpret_cast<const int64_t>(window_func_context->aggregateStateCount()));
305  auto aggregate_state_count = cgen_state_->ir_builder_.CreateIntToPtr(
306  aggregate_state_count_i64, aggregate_state_type);
307  std::string agg_count_func_name = "agg_count";
308  switch (window_func_ti.get_type()) {
309  case kFLOAT: {
310  agg_count_func_name += "_float";
311  break;
312  }
313  case kDOUBLE: {
314  agg_count_func_name += "_double";
315  break;
316  }
317  default: {
318  break;
319  }
320  }
321  agg_count_func_name += "_skip_val";
322  cgen_state_->emitCall(agg_count_func_name,
323  {aggregate_state_count, crt_val, window_func_null_val});
324 }
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1222
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
static WindowFunctionContext * getActiveWindowFunctionContext(Executor *executor)
#define AUTOMATIC_IR_METADATA(CGENSTATE)
SQLTypeInfo get_adjusted_window_type_info(const Analyzer::WindowFunction *window_func)

+ Here is the call graph for this function:

llvm::Value * Executor::codegenWindowFunction ( const size_t  target_index,
const CompilationOptions co 
)
private

Definition at line 21 of file WindowFunctionIR.cpp.

References WindowProjectNodeContext::activateWindowFunctionContext(), run_benchmark_import::args, AUTOMATIC_IR_METADATA, AVG, CHECK, CHECK_EQ, COUNT, CUME_DIST, DENSE_RANK, logger::FATAL, FIRST_VALUE, WindowProjectNodeContext::get(), WindowFunctionContext::getWindowFunction(), LAG, LAST_VALUE, LEAD, LOG, MAX, MIN, NTILE, PERCENT_RANK, RANK, ROW_NUMBER, and SUM.

22  {
24  CodeGenerator code_generator(this);
25  const auto window_func_context =
27  target_index);
28  const auto window_func = window_func_context->getWindowFunction();
29  switch (window_func->getKind()) {
34  return cgen_state_->emitCall("row_number_window_func",
35  {cgen_state_->llInt(reinterpret_cast<const int64_t>(
36  window_func_context->output())),
37  code_generator.posArg(nullptr)});
38  }
41  return cgen_state_->emitCall("percent_window_func",
42  {cgen_state_->llInt(reinterpret_cast<const int64_t>(
43  window_func_context->output())),
44  code_generator.posArg(nullptr)});
45  }
51  const auto& args = window_func->getArgs();
52  CHECK(!args.empty());
53  const auto arg_lvs = code_generator.codegen(args.front().get(), true, co);
54  CHECK_EQ(arg_lvs.size(), size_t(1));
55  return arg_lvs.front();
56  }
63  }
64  default: {
65  LOG(FATAL) << "Invalid window function kind";
66  }
67  }
68  return nullptr;
69 }
#define CHECK_EQ(x, y)
Definition: Logger.h:231
#define LOG(tag)
Definition: Logger.h:217
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1222
static const WindowProjectNodeContext * get(Executor *executor)
const WindowFunctionContext * activateWindowFunctionContext(Executor *executor, const size_t target_index) const
#define AUTOMATIC_IR_METADATA(CGENSTATE)
llvm::Value * codegenWindowFunctionAggregate(const CompilationOptions &co)
#define CHECK(condition)
Definition: Logger.h:223
const Analyzer::WindowFunction * getWindowFunction() const

+ Here is the call graph for this function:

llvm::Value * Executor::codegenWindowFunctionAggregate ( const CompilationOptions co)
private

Definition at line 140 of file WindowFunctionIR.cpp.

References AUTOMATIC_IR_METADATA, AVG, CHECK, WindowProjectNodeContext::get(), get_int_type(), and WindowProjectNodeContext::getActiveWindowFunctionContext().

140  {
142  const auto reset_state_false_bb = codegenWindowResetStateControlFlow();
143  auto aggregate_state = aggregateWindowStatePtr();
144  llvm::Value* aggregate_state_count = nullptr;
145  const auto window_func_context =
147  const auto window_func = window_func_context->getWindowFunction();
148  if (window_func->getKind() == SqlWindowFunctionKind::AVG) {
149  const auto aggregate_state_count_i64 = cgen_state_->llInt(
150  reinterpret_cast<const int64_t>(window_func_context->aggregateStateCount()));
151  const auto pi64_type =
152  llvm::PointerType::get(get_int_type(64, cgen_state_->context_), 0);
153  aggregate_state_count =
154  cgen_state_->ir_builder_.CreateIntToPtr(aggregate_state_count_i64, pi64_type);
155  }
156  codegenWindowFunctionStateInit(aggregate_state);
157  if (window_func->getKind() == SqlWindowFunctionKind::AVG) {
158  const auto count_zero = cgen_state_->llInt(int64_t(0));
159  cgen_state_->emitCall("agg_id", {aggregate_state_count, count_zero});
160  }
161  cgen_state_->ir_builder_.CreateBr(reset_state_false_bb);
162  cgen_state_->ir_builder_.SetInsertPoint(reset_state_false_bb);
164  return codegenWindowFunctionAggregateCalls(aggregate_state, co);
165 }
llvm::Value * aggregateWindowStatePtr()
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1222
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
static WindowFunctionContext * getActiveWindowFunctionContext(Executor *executor)
static const WindowProjectNodeContext * get(Executor *executor)
#define AUTOMATIC_IR_METADATA(CGENSTATE)
void codegenWindowFunctionStateInit(llvm::Value *aggregate_state)
#define CHECK(condition)
Definition: Logger.h:223
llvm::Value * codegenWindowFunctionAggregateCalls(llvm::Value *aggregate_state, const CompilationOptions &co)
llvm::BasicBlock * codegenWindowResetStateControlFlow()

+ Here is the call graph for this function:

llvm::Value * Executor::codegenWindowFunctionAggregateCalls ( llvm::Value *  aggregate_state,
const CompilationOptions co 
)
private

Definition at line 246 of file WindowFunctionIR.cpp.

References run_benchmark_import::args, AUTOMATIC_IR_METADATA, AVG, CHECK, CHECK_EQ, CodeGenerator::codegen(), CodeGenerator::codegenCastBetweenIntTypes(), COUNT, anonymous_namespace{WindowFunctionIR.cpp}::get_adjusted_window_type_info(), anonymous_namespace{WindowFunctionIR.cpp}::get_window_agg_name(), WindowProjectNodeContext::getActiveWindowFunctionContext(), kFLOAT, and SUM.

247  {
249  const auto window_func_context =
251  const auto window_func = window_func_context->getWindowFunction();
252  const auto window_func_ti = get_adjusted_window_type_info(window_func);
253  const auto window_func_null_val =
254  window_func_ti.is_fp()
255  ? cgen_state_->inlineFpNull(window_func_ti)
256  : cgen_state_->castToTypeIn(cgen_state_->inlineIntNull(window_func_ti), 64);
257  const auto& args = window_func->getArgs();
258  llvm::Value* crt_val;
259  if (args.empty()) {
260  CHECK(window_func->getKind() == SqlWindowFunctionKind::COUNT);
261  crt_val = cgen_state_->llInt(int64_t(1));
262  } else {
263  CodeGenerator code_generator(this);
264  const auto arg_lvs = code_generator.codegen(args.front().get(), true, co);
265  CHECK_EQ(arg_lvs.size(), size_t(1));
266  if (window_func->getKind() == SqlWindowFunctionKind::SUM && !window_func_ti.is_fp()) {
267  crt_val = code_generator.codegenCastBetweenIntTypes(
268  arg_lvs.front(), args.front()->get_type_info(), window_func_ti, false);
269  } else {
270  crt_val = window_func_ti.get_type() == kFLOAT
271  ? arg_lvs.front()
272  : cgen_state_->castToTypeIn(arg_lvs.front(), 64);
273  }
274  }
275  const auto agg_name = get_window_agg_name(window_func->getKind(), window_func_ti);
276  llvm::Value* multiplicity_lv = nullptr;
277  if (args.empty()) {
278  cgen_state_->emitCall(agg_name, {aggregate_state, crt_val});
279  } else {
280  cgen_state_->emitCall(agg_name + "_skip_val",
281  {aggregate_state, crt_val, window_func_null_val});
282  }
283  if (window_func->getKind() == SqlWindowFunctionKind::AVG) {
284  codegenWindowAvgEpilogue(crt_val, window_func_null_val, multiplicity_lv);
285  }
287 }
#define CHECK_EQ(x, y)
Definition: Logger.h:231
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1222
static WindowFunctionContext * getActiveWindowFunctionContext(Executor *executor)
std::string get_window_agg_name(const SqlWindowFunctionKind kind, const SQLTypeInfo &window_func_ti)
void codegenWindowAvgEpilogue(llvm::Value *crt_val, llvm::Value *window_func_null_val, llvm::Value *multiplicity_lv)
#define AUTOMATIC_IR_METADATA(CGENSTATE)
llvm::Value * codegenAggregateWindowState()
#define CHECK(condition)
Definition: Logger.h:223
SQLTypeInfo get_adjusted_window_type_info(const Analyzer::WindowFunction *window_func)

+ Here is the call graph for this function:

void Executor::codegenWindowFunctionStateInit ( llvm::Value *  aggregate_state)
private

Definition at line 196 of file WindowFunctionIR.cpp.

References AUTOMATIC_IR_METADATA, COUNT, anonymous_namespace{WindowFunctionIR.cpp}::get_adjusted_window_type_info(), get_int_type(), WindowProjectNodeContext::getActiveWindowFunctionContext(), kDOUBLE, and kFLOAT.

196  {
198  const auto window_func_context =
200  const auto window_func = window_func_context->getWindowFunction();
201  const auto window_func_ti = get_adjusted_window_type_info(window_func);
202  const auto window_func_null_val =
203  window_func_ti.is_fp()
204  ? cgen_state_->inlineFpNull(window_func_ti)
205  : cgen_state_->castToTypeIn(cgen_state_->inlineIntNull(window_func_ti), 64);
206  llvm::Value* window_func_init_val;
207  if (window_func_context->getWindowFunction()->getKind() ==
209  switch (window_func_ti.get_type()) {
210  case kFLOAT: {
211  window_func_init_val = cgen_state_->llFp(float(0));
212  break;
213  }
214  case kDOUBLE: {
215  window_func_init_val = cgen_state_->llFp(double(0));
216  break;
217  }
218  default: {
219  window_func_init_val = cgen_state_->llInt(int64_t(0));
220  break;
221  }
222  }
223  } else {
224  window_func_init_val = window_func_null_val;
225  }
226  const auto pi32_type =
227  llvm::PointerType::get(get_int_type(32, cgen_state_->context_), 0);
228  switch (window_func_ti.get_type()) {
229  case kDOUBLE: {
230  cgen_state_->emitCall("agg_id_double", {aggregate_state, window_func_init_val});
231  break;
232  }
233  case kFLOAT: {
234  aggregate_state =
235  cgen_state_->ir_builder_.CreateBitCast(aggregate_state, pi32_type);
236  cgen_state_->emitCall("agg_id_float", {aggregate_state, window_func_init_val});
237  break;
238  }
239  default: {
240  cgen_state_->emitCall("agg_id", {aggregate_state, window_func_init_val});
241  break;
242  }
243  }
244 }
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1222
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
static WindowFunctionContext * getActiveWindowFunctionContext(Executor *executor)
#define AUTOMATIC_IR_METADATA(CGENSTATE)
SQLTypeInfo get_adjusted_window_type_info(const Analyzer::WindowFunction *window_func)

+ Here is the call graph for this function:

llvm::BasicBlock * Executor::codegenWindowResetStateControlFlow ( )
private

Definition at line 167 of file WindowFunctionIR.cpp.

References AUTOMATIC_IR_METADATA, WindowProjectNodeContext::getActiveWindowFunctionContext(), CodeGenerator::posArg(), and CodeGenerator::toBool().

167  {
169  const auto window_func_context =
171  const auto bitset = cgen_state_->llInt(
172  reinterpret_cast<const int64_t>(window_func_context->partitionStart()));
173  const auto min_val = cgen_state_->llInt(int64_t(0));
174  const auto max_val = cgen_state_->llInt(window_func_context->elementCount() - 1);
175  const auto null_val = cgen_state_->llInt(inline_int_null_value<int64_t>());
176  const auto null_bool_val = cgen_state_->llInt<int8_t>(inline_int_null_value<int8_t>());
177  CodeGenerator code_generator(this);
178  const auto reset_state =
179  code_generator.toBool(cgen_state_->emitCall("bit_is_set",
180  {bitset,
181  code_generator.posArg(nullptr),
182  min_val,
183  max_val,
184  null_val,
185  null_bool_val}));
186  const auto reset_state_true_bb = llvm::BasicBlock::Create(
187  cgen_state_->context_, "reset_state.true", cgen_state_->current_func_);
188  const auto reset_state_false_bb = llvm::BasicBlock::Create(
189  cgen_state_->context_, "reset_state.false", cgen_state_->current_func_);
190  cgen_state_->ir_builder_.CreateCondBr(
191  reset_state, reset_state_true_bb, reset_state_false_bb);
192  cgen_state_->ir_builder_.SetInsertPoint(reset_state_true_bb);
193  return reset_state_false_bb;
194 }
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1222
static WindowFunctionContext * getActiveWindowFunctionContext(Executor *executor)
#define AUTOMATIC_IR_METADATA(CGENSTATE)

+ Here is the call graph for this function:

ResultSetPtr Executor::collectAllDeviceResults ( SharedKernelContext shared_context,
const RelAlgExecutionUnit ra_exe_unit,
const QueryMemoryDescriptor query_mem_desc,
const ExecutorDeviceType  device_type,
std::shared_ptr< RowSetMemoryOwner row_set_mem_owner 
)
private

Definition at line 2327 of file Execute.cpp.

References anonymous_namespace{Execute.cpp}::build_row_for_empty_input(), catalog_, collectAllDeviceShardedTopResults(), DEBUG_TIMER, SharedKernelContext::getFragmentResults(), QueryMemoryDescriptor::getQueryDescriptionType(), GPU, NonGroupedAggregate, reduceMultiDeviceResults(), reduceSpeculativeTopN(), GroupByAndAggregate::shard_count_for_top_groups(), RelAlgExecutionUnit::target_exprs, and use_speculative_top_n().

Referenced by executeWorkUnitImpl().

2332  {
2333  auto timer = DEBUG_TIMER(__func__);
2334  auto& result_per_device = shared_context.getFragmentResults();
2335  if (result_per_device.empty() && query_mem_desc.getQueryDescriptionType() ==
2338  ra_exe_unit.target_exprs, query_mem_desc, device_type);
2339  }
2340  if (use_speculative_top_n(ra_exe_unit, query_mem_desc)) {
2341  try {
2342  return reduceSpeculativeTopN(
2343  ra_exe_unit, result_per_device, row_set_mem_owner, query_mem_desc);
2344  } catch (const std::bad_alloc&) {
2345  throw SpeculativeTopNFailed("Failed during multi-device reduction.");
2346  }
2347  }
2348  const auto shard_count =
2349  device_type == ExecutorDeviceType::GPU
2351  : 0;
2352 
2353  if (shard_count && !result_per_device.empty()) {
2354  return collectAllDeviceShardedTopResults(shared_context, ra_exe_unit);
2355  }
2356  return reduceMultiDeviceResults(
2357  ra_exe_unit, result_per_device, row_set_mem_owner, query_mem_desc);
2358 }
std::vector< Analyzer::Expr * > target_exprs
bool use_speculative_top_n(const RelAlgExecutionUnit &ra_exe_unit, const QueryMemoryDescriptor &query_mem_desc)
ResultSetPtr reduceSpeculativeTopN(const RelAlgExecutionUnit &, std::vector< std::pair< ResultSetPtr, std::vector< size_t >>> &all_fragment_results, std::shared_ptr< RowSetMemoryOwner >, const QueryMemoryDescriptor &) const
Definition: Execute.cpp:1411
const Catalog_Namespace::Catalog * catalog_
Definition: Execute.h:1286
ResultSetPtr reduceMultiDeviceResults(const RelAlgExecutionUnit &, std::vector< std::pair< ResultSetPtr, std::vector< size_t >>> &all_fragment_results, std::shared_ptr< RowSetMemoryOwner >, const QueryMemoryDescriptor &) const
Definition: Execute.cpp:1304
ResultSetPtr collectAllDeviceShardedTopResults(SharedKernelContext &shared_context, const RelAlgExecutionUnit &ra_exe_unit) const
Definition: Execute.cpp:2442
QueryDescriptionType getQueryDescriptionType() const
ResultSetPtr build_row_for_empty_input(const std::vector< Analyzer::Expr * > &target_exprs_in, const QueryMemoryDescriptor &query_mem_desc, const ExecutorDeviceType device_type)
Definition: Execute.cpp:2285
std::vector< std::pair< ResultSetPtr, std::vector< size_t > > > & getFragmentResults()
#define DEBUG_TIMER(name)
Definition: Logger.h:370
static size_t shard_count_for_top_groups(const RelAlgExecutionUnit &ra_exe_unit, const Catalog_Namespace::Catalog &catalog)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

ResultSetPtr Executor::collectAllDeviceShardedTopResults ( SharedKernelContext shared_context,
const RelAlgExecutionUnit ra_exe_unit 
) const
private

Definition at line 2442 of file Execute.cpp.

References blockSize(), catalog_, CHECK, CHECK_EQ, CHECK_LE, SharedKernelContext::getFragmentResults(), gridSize(), SortInfo::limit, SortInfo::offset, SortInfo::order_entries, anonymous_namespace{Execute.cpp}::permute_storage_columnar(), anonymous_namespace{Execute.cpp}::permute_storage_row_wise(), run_benchmark_import::result, and RelAlgExecutionUnit::sort_info.

Referenced by collectAllDeviceResults().

2444  {
2445  auto& result_per_device = shared_context.getFragmentResults();
2446  const auto first_result_set = result_per_device.front().first;
2447  CHECK(first_result_set);
2448  auto top_query_mem_desc = first_result_set->getQueryMemDesc();
2449  CHECK(!top_query_mem_desc.hasInterleavedBinsOnGpu());
2450  const auto top_n = ra_exe_unit.sort_info.limit + ra_exe_unit.sort_info.offset;
2451  top_query_mem_desc.setEntryCount(0);
2452  for (auto& result : result_per_device) {
2453  const auto result_set = result.first;
2454  CHECK(result_set);
2455  result_set->sort(ra_exe_unit.sort_info.order_entries, top_n, this);
2456  size_t new_entry_cnt = top_query_mem_desc.getEntryCount() + result_set->rowCount();
2457  top_query_mem_desc.setEntryCount(new_entry_cnt);
2458  }
2459  auto top_result_set = std::make_shared<ResultSet>(first_result_set->getTargetInfos(),
2460  first_result_set->getDeviceType(),
2461  top_query_mem_desc,
2462  first_result_set->getRowSetMemOwner(),
2463  catalog_,
2464  blockSize(),
2465  gridSize());
2466  auto top_storage = top_result_set->allocateStorage();
2467  size_t top_output_row_idx{0};
2468  for (auto& result : result_per_device) {
2469  const auto result_set = result.first;
2470  CHECK(result_set);
2471  const auto& top_permutation = result_set->getPermutationBuffer();
2472  CHECK_LE(top_permutation.size(), top_n);
2473  if (top_query_mem_desc.didOutputColumnar()) {
2474  top_output_row_idx = permute_storage_columnar(result_set->getStorage(),
2475  result_set->getQueryMemDesc(),
2476  top_storage,
2477  top_output_row_idx,
2478  top_query_mem_desc,
2479  top_permutation);
2480  } else {
2481  top_output_row_idx = permute_storage_row_wise(result_set->getStorage(),
2482  top_storage,
2483  top_output_row_idx,
2484  top_query_mem_desc,
2485  top_permutation);
2486  }
2487  }
2488  CHECK_EQ(top_output_row_idx, top_query_mem_desc.getEntryCount());
2489  return top_result_set;
2490 }
#define CHECK_EQ(x, y)
Definition: Logger.h:231
const std::list< Analyzer::OrderEntry > order_entries
size_t permute_storage_row_wise(const ResultSetStorage *input_storage, const ResultSetStorage *output_storage, size_t output_row_index, const QueryMemoryDescriptor &output_query_mem_desc, const std::vector< uint32_t > &top_permutation)
Definition: Execute.cpp:2421
const size_t limit
const Catalog_Namespace::Catalog * catalog_
Definition: Execute.h:1286
#define CHECK_LE(x, y)
Definition: Logger.h:234
unsigned gridSize() const
Definition: Execute.cpp:3848
size_t permute_storage_columnar(const ResultSetStorage *input_storage, const QueryMemoryDescriptor &input_query_mem_desc, const ResultSetStorage *output_storage, size_t output_row_index, const QueryMemoryDescriptor &output_query_mem_desc, const std::vector< uint32_t > &top_permutation)
Definition: Execute.cpp:2371
std::vector< std::pair< ResultSetPtr, std::vector< size_t > > > & getFragmentResults()
#define CHECK(condition)
Definition: Logger.h:223
unsigned blockSize() const
Definition: Execute.cpp:3862
const size_t offset

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool Executor::compileBody ( const RelAlgExecutionUnit ra_exe_unit,
GroupByAndAggregate group_by_and_aggregate,
QueryMemoryDescriptor query_mem_desc,
const CompilationOptions co,
const GpuSharedMemoryContext gpu_smem_context = {} 
)
private

Definition at line 3203 of file NativeCodegen.cpp.

3207  {
3209 
3210  // Switch the code generation into a separate filter function if enabled.
3211  // Note that accesses to function arguments are still codegenned from the
3212  // row function's arguments, then later automatically forwarded and
3213  // remapped into filter function arguments by redeclareFilterFunction().
3214  cgen_state_->row_func_bb_ = cgen_state_->ir_builder_.GetInsertBlock();
3215  llvm::Value* loop_done{nullptr};
3216  std::unique_ptr<Executor::FetchCacheAnchor> fetch_cache_anchor;
3217  if (cgen_state_->filter_func_) {
3218  if (cgen_state_->row_func_bb_->getName() == "loop_body") {
3219  auto row_func_entry_bb = &cgen_state_->row_func_->getEntryBlock();
3220  cgen_state_->ir_builder_.SetInsertPoint(row_func_entry_bb,
3221  row_func_entry_bb->begin());
3222  loop_done = cgen_state_->ir_builder_.CreateAlloca(
3223  get_int_type(1, cgen_state_->context_), nullptr, "loop_done");
3224  cgen_state_->ir_builder_.SetInsertPoint(cgen_state_->row_func_bb_);
3225  cgen_state_->ir_builder_.CreateStore(cgen_state_->llBool(true), loop_done);
3226  }
3227  cgen_state_->ir_builder_.SetInsertPoint(cgen_state_->filter_func_bb_);
3228  cgen_state_->current_func_ = cgen_state_->filter_func_;
3229  fetch_cache_anchor = std::make_unique<Executor::FetchCacheAnchor>(cgen_state_.get());
3230  }
3231 
3232  // generate the code for the filter
3233  std::vector<Analyzer::Expr*> primary_quals;
3234  std::vector<Analyzer::Expr*> deferred_quals;
3235  bool short_circuited = CodeGenerator::prioritizeQuals(
3236  ra_exe_unit, primary_quals, deferred_quals, plan_state_->hoisted_filters_);
3237  if (short_circuited) {
3238  VLOG(1) << "Prioritized " << std::to_string(primary_quals.size()) << " quals, "
3239  << "short-circuited and deferred " << std::to_string(deferred_quals.size())
3240  << " quals";
3241  }
3242  llvm::Value* filter_lv = cgen_state_->llBool(true);
3243  CodeGenerator code_generator(this);
3244  for (auto expr : primary_quals) {
3245  // Generate the filter for primary quals
3246  auto cond = code_generator.toBool(code_generator.codegen(expr, true, co).front());
3247  filter_lv = cgen_state_->ir_builder_.CreateAnd(filter_lv, cond);
3248  }
3249  CHECK(filter_lv->getType()->isIntegerTy(1));
3250  llvm::BasicBlock* sc_false{nullptr};
3251  if (!deferred_quals.empty()) {
3252  auto sc_true = llvm::BasicBlock::Create(
3253  cgen_state_->context_, "sc_true", cgen_state_->current_func_);
3254  sc_false = llvm::BasicBlock::Create(
3255  cgen_state_->context_, "sc_false", cgen_state_->current_func_);
3256  cgen_state_->ir_builder_.CreateCondBr(filter_lv, sc_true, sc_false);
3257  cgen_state_->ir_builder_.SetInsertPoint(sc_false);
3258  if (ra_exe_unit.join_quals.empty()) {
3259  cgen_state_->ir_builder_.CreateRet(cgen_state_->llInt(int32_t(0)));
3260  }
3261  cgen_state_->ir_builder_.SetInsertPoint(sc_true);
3262  filter_lv = cgen_state_->llBool(true);
3263  }
3264  for (auto expr : deferred_quals) {
3265  filter_lv = cgen_state_->ir_builder_.CreateAnd(
3266  filter_lv, code_generator.toBool(code_generator.codegen(expr, true, co).front()));
3267  }
3268 
3269  CHECK(filter_lv->getType()->isIntegerTy(1));
3270  auto ret = group_by_and_aggregate.codegen(
3271  filter_lv, sc_false, query_mem_desc, co, gpu_smem_context);
3272 
3273  // Switch the code generation back to the row function if a filter
3274  // function was enabled.
3275  if (cgen_state_->filter_func_) {
3276  if (cgen_state_->row_func_bb_->getName() == "loop_body") {
3277  cgen_state_->ir_builder_.CreateStore(cgen_state_->llBool(false), loop_done);
3278  cgen_state_->ir_builder_.CreateRet(cgen_state_->llInt<int32_t>(0));
3279  }
3280 
3281  cgen_state_->ir_builder_.SetInsertPoint(cgen_state_->row_func_bb_);
3282  cgen_state_->current_func_ = cgen_state_->row_func_;
3283  cgen_state_->filter_func_call_ =
3284  cgen_state_->ir_builder_.CreateCall(cgen_state_->filter_func_, {});
3285 
3286  // Create real filter function declaration after placeholder call
3287  // is emitted.
3289 
3290  if (cgen_state_->row_func_bb_->getName() == "loop_body") {
3291  auto loop_done_true = llvm::BasicBlock::Create(
3292  cgen_state_->context_, "loop_done_true", cgen_state_->row_func_);
3293  auto loop_done_false = llvm::BasicBlock::Create(
3294  cgen_state_->context_, "loop_done_false", cgen_state_->row_func_);
3295  auto loop_done_flag = cgen_state_->ir_builder_.CreateLoad(
3296  loop_done->getType()->getPointerElementType(), loop_done);
3297  cgen_state_->ir_builder_.CreateCondBr(
3298  loop_done_flag, loop_done_true, loop_done_false);
3299  cgen_state_->ir_builder_.SetInsertPoint(loop_done_true);
3300  cgen_state_->ir_builder_.CreateRet(cgen_state_->filter_func_call_);
3301  cgen_state_->ir_builder_.SetInsertPoint(loop_done_false);
3302  } else {
3303  cgen_state_->ir_builder_.CreateRet(cgen_state_->filter_func_call_);
3304  }
3305  }
3306  return ret;
3307 }
bool codegen(llvm::Value *filter_result, llvm::BasicBlock *sc_false, QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const GpuSharedMemoryContext &gpu_smem_context)
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1222
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
std::string to_string(char const *&&v)
const JoinQualsPerNestingLevel join_quals
std::unique_ptr< PlanState > plan_state_
Definition: Execute.h:1252
#define AUTOMATIC_IR_METADATA(CGENSTATE)
static bool prioritizeQuals(const RelAlgExecutionUnit &ra_exe_unit, std::vector< Analyzer::Expr * > &primary_quals, std::vector< Analyzer::Expr * > &deferred_quals, const PlanState::HoistedFiltersSet &hoisted_quals)
Definition: LogicalIR.cpp:157
#define CHECK(condition)
Definition: Logger.h:223
void redeclareFilterFunction()
Definition: IRCodegen.cpp:995
#define VLOG(n)
Definition: Logger.h:317
std::tuple< CompilationResult, std::unique_ptr< QueryMemoryDescriptor > > Executor::compileWorkUnit ( const std::vector< InputTableInfo > &  query_infos,
const PlanState::DeletedColumnsMap deleted_cols_map,
const RelAlgExecutionUnit ra_exe_unit,
const CompilationOptions co,
const ExecutionOptions eo,
const CudaMgr_Namespace::CudaMgr cuda_mgr,
const bool  allow_lazy_fetch,
std::shared_ptr< RowSetMemoryOwner row_set_mem_owner,
const size_t  max_groups_buffer_entry_count,
const int8_t  crt_min_byte_width,
const bool  has_cardinality_estimation,
ColumnCacheMap column_cache,
RenderInfo render_info = nullptr 
)
private

Definition at line 2656 of file NativeCodegen.cpp.

2668  {
2669  auto timer = DEBUG_TIMER(__func__);
2670 
2672  const auto cuda_mgr = data_mgr_->getCudaMgr();
2673  if (!cuda_mgr) {
2674  throw QueryMustRunOnCpu();
2675  }
2676  }
2677 
2678 #ifndef NDEBUG
2679  static std::uint64_t counter = 0;
2680  ++counter;
2681  VLOG(1) << "CODEGEN #" << counter << ":";
2682  LOG(IR) << "CODEGEN #" << counter << ":";
2683  LOG(PTX) << "CODEGEN #" << counter << ":";
2684  LOG(ASM) << "CODEGEN #" << counter << ":";
2685 #endif
2686 
2687  // cgenstate_manager uses RAII pattern to manage the live time of
2688  // CgenState instances.
2689  Executor::CgenStateManager cgenstate_manager(*this,
2690  allow_lazy_fetch,
2691  query_infos,
2692  deleted_cols_map,
2693  &ra_exe_unit); // locks compilation_mutex
2694 
2695  addTransientStringLiterals(ra_exe_unit, row_set_mem_owner);
2696 
2697  GroupByAndAggregate group_by_and_aggregate(
2698  this,
2699  co.device_type,
2700  ra_exe_unit,
2701  query_infos,
2702  row_set_mem_owner,
2703  has_cardinality_estimation ? std::optional<int64_t>(max_groups_buffer_entry_guess)
2704  : std::nullopt);
2705  auto query_mem_desc =
2706  group_by_and_aggregate.initQueryMemoryDescriptor(eo.allow_multifrag,
2707  max_groups_buffer_entry_guess,
2708  crt_min_byte_width,
2709  render_info,
2711 
2712  if (query_mem_desc->getQueryDescriptionType() ==
2714  !has_cardinality_estimation &&
2715  (!render_info || !render_info->isPotentialInSituRender()) && !eo.just_explain) {
2716  const auto col_range_info = group_by_and_aggregate.getColRangeInfo();
2717  throw CardinalityEstimationRequired(col_range_info.max - col_range_info.min);
2718  }
2719 
2720  const bool output_columnar = query_mem_desc->didOutputColumnar();
2721  const bool gpu_shared_mem_optimization =
2723  ra_exe_unit,
2724  cuda_mgr,
2725  co.device_type,
2726  cuda_mgr ? this->blockSize() : 1,
2727  cuda_mgr ? this->numBlocksPerMP() : 1);
2728  if (gpu_shared_mem_optimization) {
2729  // disable interleaved bins optimization on the GPU
2730  query_mem_desc->setHasInterleavedBinsOnGpu(false);
2731  LOG(DEBUG1) << "GPU shared memory is used for the " +
2732  query_mem_desc->queryDescTypeToString() + " query(" +
2733  std::to_string(get_shared_memory_size(gpu_shared_mem_optimization,
2734  query_mem_desc.get())) +
2735  " out of " + std::to_string(g_gpu_smem_threshold) + " bytes).";
2736  }
2737 
2738  const GpuSharedMemoryContext gpu_smem_context(
2739  get_shared_memory_size(gpu_shared_mem_optimization, query_mem_desc.get()));
2740 
2742  const size_t num_count_distinct_descs =
2743  query_mem_desc->getCountDistinctDescriptorsSize();
2744  for (size_t i = 0; i < num_count_distinct_descs; i++) {
2745  const auto& count_distinct_descriptor =
2746  query_mem_desc->getCountDistinctDescriptor(i);
2747  if (count_distinct_descriptor.impl_type_ == CountDistinctImplType::UnorderedSet ||
2748  (count_distinct_descriptor.impl_type_ != CountDistinctImplType::Invalid &&
2749  !co.hoist_literals)) {
2750  throw QueryMustRunOnCpu();
2751  }
2752  }
2753 
2754  // we currently do not support varlen projection based on baseline groupby when
2755  // 1) target table is multi-fragmented and 2) multiple gpus are involved for query
2756  // processing in this case, we punt the query to cpu to avoid server crash
2757  for (const auto expr : ra_exe_unit.target_exprs) {
2758  if (auto gby_expr = dynamic_cast<Analyzer::AggExpr*>(expr)) {
2759  bool has_multiple_gpus = cuda_mgr ? cuda_mgr->getDeviceCount() > 1 : false;
2760  if (gby_expr->get_aggtype() == SQLAgg::kSAMPLE && has_multiple_gpus &&
2761  !g_leaf_count) {
2762  std::set<const Analyzer::ColumnVar*,
2763  bool (*)(const Analyzer::ColumnVar*, const Analyzer::ColumnVar*)>
2765  gby_expr->collect_column_var(colvar_set, true);
2766  for (const auto cv : colvar_set) {
2767  if (cv->get_type_info().is_varlen()) {
2768  const auto tbl_id = cv->get_table_id();
2769  std::for_each(query_infos.begin(),
2770  query_infos.end(),
2771  [tbl_id](const InputTableInfo& input_table_info) {
2772  if (input_table_info.table_id == tbl_id &&
2773  input_table_info.info.fragments.size() > 1) {
2774  throw QueryMustRunOnCpu();
2775  }
2776  });
2777  }
2778  }
2779  }
2780  }
2781  }
2782  }
2783 
2784  // Read the module template and target either CPU or GPU
2785  // by binding the stream position functions to the right implementation:
2786  // stride access for GPU, contiguous for CPU
2787  CHECK(cgen_state_->module_ == nullptr);
2788  cgen_state_->set_module_shallow_copy(get_rt_module(), /*always_clone=*/true);
2789 
2790  auto is_gpu = co.device_type == ExecutorDeviceType::GPU;
2791  if (is_gpu) {
2792  cgen_state_->module_->setDataLayout(get_gpu_data_layout());
2793  cgen_state_->module_->setTargetTriple(get_gpu_target_triple_string());
2794  }
2795  if (has_udf_module(/*is_gpu=*/is_gpu)) {
2797  get_udf_module(/*is_gpu=*/is_gpu), *cgen_state_->module_, cgen_state_.get());
2798  }
2799  if (has_rt_udf_module(/*is_gpu=*/is_gpu)) {
2801  get_rt_udf_module(/*is_gpu=*/is_gpu), *cgen_state_->module_, cgen_state_.get());
2802  }
2803 
2805 
2806  auto agg_fnames =
2807  get_agg_fnames(ra_exe_unit.target_exprs, !ra_exe_unit.groupby_exprs.empty());
2808 
2809  const auto agg_slot_count = ra_exe_unit.estimator ? size_t(1) : agg_fnames.size();
2810 
2811  const bool is_group_by{query_mem_desc->isGroupBy()};
2812  auto [query_func, row_func_call] = is_group_by
2814  co.hoist_literals,
2815  *query_mem_desc,
2816  co.device_type,
2817  ra_exe_unit.scan_limit,
2818  gpu_smem_context)
2819  : query_template(cgen_state_->module_,
2820  agg_slot_count,
2821  co.hoist_literals,
2822  !!ra_exe_unit.estimator,
2823  gpu_smem_context);
2824  bind_pos_placeholders("pos_start", true, query_func, cgen_state_->module_);
2825  bind_pos_placeholders("group_buff_idx", false, query_func, cgen_state_->module_);
2826  bind_pos_placeholders("pos_step", false, query_func, cgen_state_->module_);
2827 
2828  cgen_state_->query_func_ = query_func;
2829  cgen_state_->row_func_call_ = row_func_call;
2830  cgen_state_->query_func_entry_ir_builder_.SetInsertPoint(
2831  &query_func->getEntryBlock().front());
2832 
2833  // Generate the function signature and column head fetches s.t.
2834  // double indirection isn't needed in the inner loop
2835  auto& fetch_bb = query_func->front();
2836  llvm::IRBuilder<> fetch_ir_builder(&fetch_bb);
2837  fetch_ir_builder.SetInsertPoint(&*fetch_bb.begin());
2838  auto col_heads = generate_column_heads_load(ra_exe_unit.input_col_descs.size(),
2839  query_func->args().begin(),
2840  fetch_ir_builder,
2841  cgen_state_->context_);
2842  CHECK_EQ(ra_exe_unit.input_col_descs.size(), col_heads.size());
2843 
2844  cgen_state_->row_func_ = create_row_function(ra_exe_unit.input_col_descs.size(),
2845  is_group_by ? 0 : agg_slot_count,
2846  co.hoist_literals,
2847  cgen_state_->module_,
2848  cgen_state_->context_);
2849  CHECK(cgen_state_->row_func_);
2850  cgen_state_->row_func_bb_ =
2851  llvm::BasicBlock::Create(cgen_state_->context_, "entry", cgen_state_->row_func_);
2852 
2854  auto filter_func_ft =
2855  llvm::FunctionType::get(get_int_type(32, cgen_state_->context_), {}, false);
2856  cgen_state_->filter_func_ = llvm::Function::Create(filter_func_ft,
2857  llvm::Function::ExternalLinkage,
2858  "filter_func",
2859  cgen_state_->module_);
2860  CHECK(cgen_state_->filter_func_);
2861  cgen_state_->filter_func_bb_ = llvm::BasicBlock::Create(
2862  cgen_state_->context_, "entry", cgen_state_->filter_func_);
2863  }
2864 
2865  cgen_state_->current_func_ = cgen_state_->row_func_;
2866  cgen_state_->ir_builder_.SetInsertPoint(cgen_state_->row_func_bb_);
2867 
2868  preloadFragOffsets(ra_exe_unit.input_descs, query_infos);
2869  RelAlgExecutionUnit body_execution_unit = ra_exe_unit;
2870  const auto join_loops =
2871  buildJoinLoops(body_execution_unit, co, eo, query_infos, column_cache);
2872 
2873  plan_state_->allocateLocalColumnIds(ra_exe_unit.input_col_descs);
2874  for (auto& simple_qual : ra_exe_unit.simple_quals) {
2875  plan_state_->addSimpleQual(simple_qual);
2876  }
2877  const auto is_not_deleted_bb = codegenSkipDeletedOuterTableRow(ra_exe_unit, co);
2878  if (is_not_deleted_bb) {
2879  cgen_state_->row_func_bb_ = is_not_deleted_bb;
2880  }
2881  if (!join_loops.empty()) {
2882  codegenJoinLoops(join_loops,
2883  body_execution_unit,
2884  group_by_and_aggregate,
2885  query_func,
2886  cgen_state_->row_func_bb_,
2887  *(query_mem_desc.get()),
2888  co,
2889  eo);
2890  } else {
2891  const bool can_return_error = compileBody(
2892  ra_exe_unit, group_by_and_aggregate, *query_mem_desc, co, gpu_smem_context);
2893  if (can_return_error || cgen_state_->needs_error_check_ || eo.with_dynamic_watchdog ||
2895  createErrorCheckControlFlow(query_func,
2898  join_loops,
2899  co.device_type,
2900  group_by_and_aggregate.query_infos_);
2901  }
2902  }
2903  std::vector<llvm::Value*> hoisted_literals;
2904 
2905  if (co.hoist_literals) {
2906  VLOG(1) << "number of hoisted literals: "
2907  << cgen_state_->query_func_literal_loads_.size()
2908  << " / literal buffer usage: " << cgen_state_->getLiteralBufferUsage(0)
2909  << " bytes";
2910  }
2911 
2912  if (co.hoist_literals && !cgen_state_->query_func_literal_loads_.empty()) {
2913  // we have some hoisted literals...
2914  hoisted_literals = inlineHoistedLiterals();
2915  }
2916 
2917  // replace the row func placeholder call with the call to the actual row func
2918  std::vector<llvm::Value*> row_func_args;
2919  for (size_t i = 0; i < cgen_state_->row_func_call_->getNumArgOperands(); ++i) {
2920  row_func_args.push_back(cgen_state_->row_func_call_->getArgOperand(i));
2921  }
2922  row_func_args.insert(row_func_args.end(), col_heads.begin(), col_heads.end());
2923  row_func_args.push_back(get_arg_by_name(query_func, "join_hash_tables"));
2924  // push hoisted literals arguments, if any
2925  row_func_args.insert(
2926  row_func_args.end(), hoisted_literals.begin(), hoisted_literals.end());
2927  llvm::ReplaceInstWithInst(
2928  cgen_state_->row_func_call_,
2929  llvm::CallInst::Create(cgen_state_->row_func_, row_func_args, ""));
2930 
2931  // replace the filter func placeholder call with the call to the actual filter func
2932  if (cgen_state_->filter_func_) {
2933  std::vector<llvm::Value*> filter_func_args;
2934  for (auto arg_it = cgen_state_->filter_func_args_.begin();
2935  arg_it != cgen_state_->filter_func_args_.end();
2936  ++arg_it) {
2937  filter_func_args.push_back(arg_it->first);
2938  }
2939  llvm::ReplaceInstWithInst(
2940  cgen_state_->filter_func_call_,
2941  llvm::CallInst::Create(cgen_state_->filter_func_, filter_func_args, ""));
2942  }
2943 
2944  // Aggregate
2945  plan_state_->init_agg_vals_ =
2946  init_agg_val_vec(ra_exe_unit.target_exprs, ra_exe_unit.quals, *query_mem_desc);
2947 
2948  /*
2949  * If we have decided to use GPU shared memory (decision is not made here), then
2950  * we generate proper code for extra components that it needs (buffer initialization and
2951  * gpu reduction from shared memory to global memory). We then replace these functions
2952  * into the already compiled query_func (replacing two placeholders, write_back_nop and
2953  * init_smem_nop). The rest of the code should be as before (row_func, etc.).
2954  */
2955  if (gpu_smem_context.isSharedMemoryUsed()) {
2956  if (query_mem_desc->getQueryDescriptionType() ==
2958  GpuSharedMemCodeBuilder gpu_smem_code(
2959  cgen_state_->module_,
2960  cgen_state_->context_,
2961  *query_mem_desc,
2963  plan_state_->init_agg_vals_,
2964  executor_id_);
2965  gpu_smem_code.codegen();
2966  gpu_smem_code.injectFunctionsInto(query_func);
2967 
2968  // helper functions are used for caching purposes later
2969  cgen_state_->helper_functions_.push_back(gpu_smem_code.getReductionFunction());
2970  cgen_state_->helper_functions_.push_back(gpu_smem_code.getInitFunction());
2971  LOG(IR) << gpu_smem_code.toString();
2972  }
2973  }
2974 
2975  auto multifrag_query_func = cgen_state_->module_->getFunction(
2976  "multifrag_query" + std::string(co.hoist_literals ? "_hoisted_literals" : ""));
2977  CHECK(multifrag_query_func);
2978 
2981  multifrag_query_func, co.hoist_literals, eo.allow_runtime_query_interrupt);
2982  }
2983 
2984  bind_query(query_func,
2985  "query_stub" + std::string(co.hoist_literals ? "_hoisted_literals" : ""),
2986  multifrag_query_func,
2987  cgen_state_->module_);
2988 
2989  std::vector<llvm::Function*> root_funcs{query_func, cgen_state_->row_func_};
2990  if (cgen_state_->filter_func_) {
2991  root_funcs.push_back(cgen_state_->filter_func_);
2992  }
2993  auto live_funcs = CodeGenerator::markDeadRuntimeFuncs(
2994  *cgen_state_->module_, root_funcs, {multifrag_query_func});
2995 
2996  // Always inline the row function and the filter function.
2997  // We don't want register spills in the inner loops.
2998  // LLVM seems to correctly free up alloca instructions
2999  // in these functions even when they are inlined.
3001  if (cgen_state_->filter_func_) {
3003  }
3004 
3005 #ifndef NDEBUG
3006  // Add helpful metadata to the LLVM IR for debugging.
3008 #endif
3009 
3010  // Serialize the important LLVM IR functions to text for SQL EXPLAIN.
3011  std::string llvm_ir;
3012  if (eo.just_explain) {
3014 #ifdef WITH_JIT_DEBUG
3015  throw std::runtime_error(
3016  "Explain optimized not available when JIT runtime debug symbols are enabled");
3017 #else
3018  // Note that we don't run the NVVM reflect pass here. Use LOG(IR) to get the
3019  // optimized IR after NVVM reflect
3020  llvm::legacy::PassManager pass_manager;
3021  optimize_ir(query_func,
3022  cgen_state_->module_,
3023  pass_manager,
3024  live_funcs,
3025  gpu_smem_context.isSharedMemoryUsed(),
3026  co);
3027 #endif // WITH_JIT_DEBUG
3028  }
3029  llvm_ir =
3030  serialize_llvm_object(multifrag_query_func) + serialize_llvm_object(query_func) +
3031  serialize_llvm_object(cgen_state_->row_func_) +
3032  (cgen_state_->filter_func_ ? serialize_llvm_object(cgen_state_->filter_func_)
3033  : "");
3034 
3035 #ifndef NDEBUG
3036  llvm_ir += serialize_llvm_metadata_footnotes(query_func, cgen_state_.get());
3037 #endif
3038  }
3039 
3040  LOG(IR) << "\n\n" << query_mem_desc->toString() << "\n";
3041  LOG(IR) << "IR for the "
3042  << (co.device_type == ExecutorDeviceType::CPU ? "CPU:\n" : "GPU:\n");
3043 #ifdef NDEBUG
3044  LOG(IR) << serialize_llvm_object(query_func)
3045  << serialize_llvm_object(cgen_state_->row_func_)
3046  << (cgen_state_->filter_func_ ? serialize_llvm_object(cgen_state_->filter_func_)
3047  : "")
3048  << "\nEnd of IR";
3049 #else
3050  LOG(IR) << serialize_llvm_object(cgen_state_->module_) << "\nEnd of IR";
3051 #endif
3052 
3053  // Insert calls to "register_buffer_with_executor_rsm" for allocations
3054  // in runtime functions (i.e. from RBC) without it
3056 
3057  // Run some basic validation checks on the LLVM IR before code is generated below.
3058  verify_function_ir(cgen_state_->row_func_);
3059  if (cgen_state_->filter_func_) {
3060  verify_function_ir(cgen_state_->filter_func_);
3061  }
3062 
3063  // Generate final native code from the LLVM IR.
3064  return std::make_tuple(
3067  ? optimizeAndCodegenCPU(query_func, multifrag_query_func, live_funcs, co)
3068  : optimizeAndCodegenGPU(query_func,
3069  multifrag_query_func,
3070  live_funcs,
3071  is_group_by || ra_exe_unit.estimator,
3072  cuda_mgr,
3073  gpu_smem_context.isSharedMemoryUsed(),
3074  co),
3075  cgen_state_->getLiterals(),
3076  output_columnar,
3077  llvm_ir,
3078  std::move(gpu_smem_context)},
3079  std::move(query_mem_desc));
3080 }
void createErrorCheckControlFlow(llvm::Function *query_func, bool run_with_dynamic_watchdog, bool run_with_allowing_runtime_interrupt, const std::vector< JoinLoop > &join_loops, ExecutorDeviceType device_type, const std::vector< InputTableInfo > &input_table_infos)
CudaMgr_Namespace::CudaMgr * getCudaMgr() const
Definition: DataMgr.h:222
std::vector< Analyzer::Expr * > target_exprs
#define CHECK_EQ(x, y)
Definition: Logger.h:231
void codegenJoinLoops(const std::vector< JoinLoop > &join_loops, const RelAlgExecutionUnit &ra_exe_unit, GroupByAndAggregate &group_by_and_aggregate, llvm::Function *query_func, llvm::BasicBlock *entry_bb, QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const ExecutionOptions &eo)
Definition: IRCodegen.cpp:1112
const std::unique_ptr< llvm::Module > & get_udf_module(bool is_gpu=false) const
Definition: Execute.h:486
Data_Namespace::DataMgr * data_mgr_
Definition: Execute.h:1287
static bool colvar_comp(const ColumnVar *l, const ColumnVar *r)
Definition: Analyzer.h:215
void collect_column_var(std::set< const ColumnVar *, bool(*)(const ColumnVar *, const ColumnVar *)> &colvar_set, bool include_agg) const override
Definition: Analyzer.h:220
void optimize_ir(llvm::Function *query_func, llvm::Module *llvm_module, llvm::legacy::PassManager &pass_manager, const std::unordered_set< llvm::Function * > &live_funcs, const bool is_gpu_smem_used, const CompilationOptions &co)
#define LOG(tag)
Definition: Logger.h:217
void AutoTrackBuffersInRuntimeIR()
void mark_function_always_inline(llvm::Function *func)
llvm::StringRef get_gpu_data_layout()
const std::unique_ptr< llvm::Module > & get_rt_udf_module(bool is_gpu=false) const
Definition: Execute.h:490
std::vector< InputDescriptor > input_descs
std::string serialize_llvm_metadata_footnotes(llvm::Function *query_func, CgenState *cgen_state)
std::tuple< llvm::Function *, llvm::CallInst * > query_template(llvm::Module *mod, const size_t aggr_col_count, const bool hoist_literals, const bool is_estimate_query, const GpuSharedMemoryContext &gpu_smem_context)
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1222
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
unsigned numBlocksPerMP() const
Definition: Execute.cpp:3857
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
std::tuple< llvm::Function *, llvm::CallInst * > query_group_by_template(llvm::Module *mod, const bool hoist_literals, const QueryMemoryDescriptor &query_mem_desc, const ExecutorDeviceType device_type, const bool check_scan_limit, const GpuSharedMemoryContext &gpu_smem_context)
void addTransientStringLiterals(const RelAlgExecutionUnit &ra_exe_unit, const std::shared_ptr< RowSetMemoryOwner > &row_set_mem_owner)
Definition: Execute.cpp:2143
std::vector< std::string > get_agg_fnames(const std::vector< Analyzer::Expr * > &target_exprs, const bool is_group_by)
std::string to_string(char const *&&v)
void preloadFragOffsets(const std::vector< InputDescriptor > &input_descs, const std::vector< InputTableInfo > &query_infos)
Definition: Execute.cpp:3785
const ExecutorId executor_id_
Definition: Execute.h:1196
bool is_gpu_shared_mem_supported(const QueryMemoryDescriptor *query_mem_desc_ptr, const RelAlgExecutionUnit &ra_exe_unit, const CudaMgr_Namespace::CudaMgr *cuda_mgr, const ExecutorDeviceType device_type, const unsigned gpu_blocksize, const unsigned num_blocks_per_mp)
llvm::StringRef get_gpu_target_triple_string()
void verify_function_ir(const llvm::Function *func)
bool compileBody(const RelAlgExecutionUnit &ra_exe_unit, GroupByAndAggregate &group_by_and_aggregate, QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const GpuSharedMemoryContext &gpu_smem_context={})
llvm::Value * get_arg_by_name(llvm::Function *func, const std::string &name)
Definition: Execute.h:166
static std::unordered_set< llvm::Function * > markDeadRuntimeFuncs(llvm::Module &module, const std::vector< llvm::Function * > &roots, const std::vector< llvm::Function * > &leaves)
ExecutorExplainType explain_type
std::shared_ptr< CompilationContext > optimizeAndCodegenCPU(llvm::Function *, llvm::Function *, const std::unordered_set< llvm::Function * > &, const CompilationOptions &)
std::unique_ptr< PlanState > plan_state_
Definition: Execute.h:1252
void insertErrorCodeChecker(llvm::Function *query_func, bool hoist_literals, bool allow_runtime_query_interrupt)
static void link_udf_module(const std::unique_ptr< llvm::Module > &udf_module, llvm::Module &module, CgenState *cgen_state, llvm::Linker::Flags flags=llvm::Linker::Flags::None)
const std::shared_ptr< Analyzer::Estimator > estimator
#define AUTOMATIC_IR_METADATA(CGENSTATE)
const std::unique_ptr< llvm::Module > & get_rt_module() const
Definition: Execute.h:483
this
Definition: Execute.cpp:269
#define AUTOMATIC_IR_METADATA_DONE()
llvm::Function * create_row_function(const size_t in_col_count, const size_t agg_col_count, const bool hoist_literals, llvm::Module *llvm_module, llvm::LLVMContext &context)
ExecutorDeviceType device_type
void bind_pos_placeholders(const std::string &pos_fn_name, const bool use_resume_param, llvm::Function *query_func, llvm::Module *llvm_module)
llvm::BasicBlock * codegenSkipDeletedOuterTableRow(const RelAlgExecutionUnit &ra_exe_unit, const CompilationOptions &co)
void bind_query(llvm::Function *query_func, const std::string &query_fname, llvm::Function *multifrag_query_func, llvm::Module *llvm_module)
std::string serialize_llvm_object(const T *llvm_obj)
std::shared_ptr< CompilationContext > optimizeAndCodegenGPU(llvm::Function *, llvm::Function *, std::unordered_set< llvm::Function * > &, const bool no_inline, const CudaMgr_Namespace::CudaMgr *cuda_mgr, const bool is_gpu_smem_used, const CompilationOptions &)
std::vector< llvm::Value * > generate_column_heads_load(const int num_columns, llvm::Value *byte_stream_arg, llvm::IRBuilder<> &ir_builder, llvm::LLVMContext &ctx)
bool has_udf_module(bool is_gpu=false) const
Definition: Execute.h:506
bool g_enable_filter_function
Definition: Execute.cpp:84
std::list< std::shared_ptr< Analyzer::Expr > > quals
bool isPotentialInSituRender() const
Definition: RenderInfo.cpp:68
#define CHECK(condition)
Definition: Logger.h:223
#define DEBUG_TIMER(name)
Definition: Logger.h:370
bool has_rt_udf_module(bool is_gpu=false) const
Definition: Execute.h:510
std::vector< llvm::Value * > inlineHoistedLiterals()
std::vector< TargetInfo > target_exprs_to_infos(const std::vector< Analyzer::Expr * > &targets, const QueryMemoryDescriptor &query_mem_desc)
std::list< std::shared_ptr< const InputColDescriptor > > input_col_descs
std::vector< JoinLoop > buildJoinLoops(RelAlgExecutionUnit &ra_exe_unit, const CompilationOptions &co, const ExecutionOptions &eo, const std::vector< InputTableInfo > &query_infos, ColumnCacheMap &column_cache)
Definition: IRCodegen.cpp:523
unsigned blockSize() const
Definition: Execute.cpp:3862
size_t g_leaf_count
Definition: ParserNode.cpp:78
std::vector< int64_t > init_agg_val_vec(const std::vector< TargetInfo > &targets, const QueryMemoryDescriptor &query_mem_desc)
#define VLOG(n)
Definition: Logger.h:317
size_t get_shared_memory_size(const bool shared_mem_used, const QueryMemoryDescriptor *query_mem_desc_ptr)
std::list< std::shared_ptr< Analyzer::Expr > > simple_quals
size_t g_gpu_smem_threshold
Definition: Execute.cpp:130
AggregatedColRange Executor::computeColRangesCache ( const std::unordered_set< PhysicalInput > &  phys_inputs)
private

Definition at line 4356 of file Execute.cpp.

References catalog_, CHECK, getLeafColumnRange(), Catalog_Namespace::Catalog::getMetadataForColumn(), getTableInfo(), AggregatedColRange::setColRange(), and ExpressionRange::typeSupportsRange().

Referenced by setupCaching().

4357  {
4358  AggregatedColRange agg_col_range_cache;
4359  CHECK(catalog_);
4360  std::unordered_set<int> phys_table_ids;
4361  for (const auto& phys_input : phys_inputs) {
4362  phys_table_ids.insert(phys_input.table_id);
4363  }
4364  std::vector<InputTableInfo> query_infos;
4365  for (const int table_id : phys_table_ids) {
4366  query_infos.emplace_back(InputTableInfo{table_id, getTableInfo(table_id)});
4367  }
4368  for (const auto& phys_input : phys_inputs) {
4369  const auto cd =
4370  catalog_->getMetadataForColumn(phys_input.table_id, phys_input.col_id);
4371  CHECK(cd);
4372  if (ExpressionRange::typeSupportsRange(cd->columnType)) {
4373  const auto col_var = boost::make_unique<Analyzer::ColumnVar>(
4374  cd->columnType, phys_input.table_id, phys_input.col_id, 0);
4375  const auto col_range = getLeafColumnRange(col_var.get(), query_infos, this, false);
4376  agg_col_range_cache.setColRange(phys_input, col_range);
4377  }
4378  }
4379  return agg_col_range_cache;
4380 }
Fragmenter_Namespace::TableInfo getTableInfo(const int table_id) const
Definition: Execute.cpp:674
const Catalog_Namespace::Catalog * catalog_
Definition: Execute.h:1286
const ColumnDescriptor * getMetadataForColumn(int tableId, const std::string &colName) const
ExpressionRange getLeafColumnRange(const Analyzer::ColumnVar *col_expr, const std::vector< InputTableInfo > &query_infos, const Executor *executor, const bool is_outer_join_proj)
#define CHECK(condition)
Definition: Logger.h:223
void setColRange(const PhysicalInput &, const ExpressionRange &)
static bool typeSupportsRange(const SQLTypeInfo &ti)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

StringDictionaryGenerations Executor::computeStringDictionaryGenerations ( const std::unordered_set< PhysicalInput > &  phys_inputs)
private

Definition at line 4382 of file Execute.cpp.

References catalog_, CHECK, Catalog_Namespace::Catalog::getMetadataForColumn(), Catalog_Namespace::Catalog::getMetadataForDict(), kENCODING_DICT, anonymous_namespace{Execute.cpp}::prepare_string_dictionaries(), and StringDictionaryGenerations::setGeneration().

Referenced by setupCaching().

4383  {
4384  StringDictionaryGenerations string_dictionary_generations;
4385  CHECK(catalog_);
4386  // Foreign tables may have not populated dictionaries for encoded columns. If this is
4387  // the case then we need to populate them here to make sure that the generations are set
4388  // correctly.
4389  prepare_string_dictionaries(phys_inputs, *catalog_);
4390  for (const auto& phys_input : phys_inputs) {
4391  const auto cd =
4392  catalog_->getMetadataForColumn(phys_input.table_id, phys_input.col_id);
4393  CHECK(cd);
4394  const auto& col_ti =
4395  cd->columnType.is_array() ? cd->columnType.get_elem_type() : cd->columnType;
4396  if (col_ti.is_string() && col_ti.get_compression() == kENCODING_DICT) {
4397  const int dict_id = col_ti.get_comp_param();
4398  const auto dd = catalog_->getMetadataForDict(dict_id);
4399  CHECK(dd && dd->stringDict);
4400  string_dictionary_generations.setGeneration(dict_id,
4401  dd->stringDict->storageEntryCount());
4402  }
4403  }
4404  return string_dictionary_generations;
4405 }
void prepare_string_dictionaries(const std::unordered_set< PhysicalInput > &phys_inputs, const Catalog_Namespace::Catalog &catalog)
Definition: Execute.cpp:204
void setGeneration(const uint32_t id, const uint64_t generation)
const Catalog_Namespace::Catalog * catalog_
Definition: Execute.h:1286
const ColumnDescriptor * getMetadataForColumn(int tableId, const std::string &colName) const
const DictDescriptor * getMetadataForDict(int dict_ref, bool loadDict=true) const
Definition: Catalog.cpp:1673
#define CHECK(condition)
Definition: Logger.h:223

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

TableGenerations Executor::computeTableGenerations ( std::unordered_set< int >  phys_table_ids)
private

Definition at line 4407 of file Execute.cpp.

References getTableInfo(), and TableGenerations::setGeneration().

Referenced by setupCaching().

4408  {
4409  TableGenerations table_generations;
4410  for (const int table_id : phys_table_ids) {
4411  const auto table_info = getTableInfo(table_id);
4412  table_generations.setGeneration(
4413  table_id,
4414  TableGeneration{static_cast<int64_t>(table_info.getPhysicalNumTuples()), 0});
4415  }
4416  return table_generations;
4417 }
Fragmenter_Namespace::TableInfo getTableInfo(const int table_id) const
Definition: Execute.cpp:674
void setGeneration(const uint32_t id, const TableGeneration &generation)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool Executor::containsLeftDeepOuterJoin ( ) const
inline

Definition at line 559 of file Execute.h.

References cgen_state_.

559  {
560  return cgen_state_->contains_left_deep_outer_join_;
561  }
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1222
void Executor::createErrorCheckControlFlow ( llvm::Function *  query_func,
bool  run_with_dynamic_watchdog,
bool  run_with_allowing_runtime_interrupt,
const std::vector< JoinLoop > &  join_loops,
ExecutorDeviceType  device_type,
const std::vector< InputTableInfo > &  input_table_infos 
)
private

Definition at line 1919 of file NativeCodegen.cpp.

1925  {
1927 
1928  // check whether the row processing was successful; currently, it can
1929  // fail by running out of group by buffer slots
1930 
1931  if (run_with_dynamic_watchdog && run_with_allowing_runtime_interrupt) {
1932  // when both dynamic watchdog and runtime interrupt turns on
1933  // we use dynamic watchdog
1934  run_with_allowing_runtime_interrupt = false;
1935  }
1936 
1937  {
1938  // disable injecting query interrupt checker if the session info is invalid
1939  mapd_shared_lock<mapd_shared_mutex> session_read_lock(executor_session_mutex_);
1940  if (current_query_session_.empty()) {
1941  run_with_allowing_runtime_interrupt = false;
1942  }
1943  }
1944 
1945  llvm::Value* row_count = nullptr;
1946  if ((run_with_dynamic_watchdog || run_with_allowing_runtime_interrupt) &&
1947  device_type == ExecutorDeviceType::GPU) {
1948  row_count =
1949  find_variable_in_basic_block<llvm::LoadInst>(query_func, ".entry", "row_count");
1950  }
1951 
1952  bool done_splitting = false;
1953  for (auto bb_it = query_func->begin(); bb_it != query_func->end() && !done_splitting;
1954  ++bb_it) {
1955  llvm::Value* pos = nullptr;
1956  for (auto inst_it = bb_it->begin(); inst_it != bb_it->end(); ++inst_it) {
1957  if ((run_with_dynamic_watchdog || run_with_allowing_runtime_interrupt) &&
1958  llvm::isa<llvm::PHINode>(*inst_it)) {
1959  if (inst_it->getName() == "pos") {
1960  pos = &*inst_it;
1961  }
1962  continue;
1963  }
1964  if (!llvm::isa<llvm::CallInst>(*inst_it)) {
1965  continue;
1966  }
1967  auto& row_func_call = llvm::cast<llvm::CallInst>(*inst_it);
1968  if (std::string(row_func_call.getCalledFunction()->getName()) == "row_process") {
1969  auto next_inst_it = inst_it;
1970  ++next_inst_it;
1971  auto new_bb = bb_it->splitBasicBlock(next_inst_it);
1972  auto& br_instr = bb_it->back();
1973  llvm::IRBuilder<> ir_builder(&br_instr);
1974  llvm::Value* err_lv = &*inst_it;
1975  llvm::Value* err_lv_returned_from_row_func = nullptr;
1976  if (run_with_dynamic_watchdog) {
1977  CHECK(pos);
1978  llvm::Value* call_watchdog_lv = nullptr;
1979  if (device_type == ExecutorDeviceType::GPU) {
1980  // In order to make sure all threads within a block see the same barrier,
1981  // only those blocks whose none of their threads have experienced the critical
1982  // edge will go through the dynamic watchdog computation
1983  CHECK(row_count);
1984  auto crit_edge_rem =
1985  (blockSize() & (blockSize() - 1))
1986  ? ir_builder.CreateSRem(
1987  row_count,
1988  cgen_state_->llInt(static_cast<int64_t>(blockSize())))
1989  : ir_builder.CreateAnd(
1990  row_count,
1991  cgen_state_->llInt(static_cast<int64_t>(blockSize() - 1)));
1992  auto crit_edge_threshold = ir_builder.CreateSub(row_count, crit_edge_rem);
1993  crit_edge_threshold->setName("crit_edge_threshold");
1994 
1995  // only those threads where pos < crit_edge_threshold go through dynamic
1996  // watchdog call
1997  call_watchdog_lv =
1998  ir_builder.CreateICmp(llvm::ICmpInst::ICMP_SLT, pos, crit_edge_threshold);
1999  } else {
2000  // CPU path: run watchdog for every 64th row
2001  auto dw_predicate = ir_builder.CreateAnd(pos, uint64_t(0x3f));
2002  call_watchdog_lv = ir_builder.CreateICmp(
2003  llvm::ICmpInst::ICMP_EQ, dw_predicate, cgen_state_->llInt(int64_t(0LL)));
2004  }
2005  CHECK(call_watchdog_lv);
2006  auto error_check_bb = bb_it->splitBasicBlock(
2007  llvm::BasicBlock::iterator(br_instr), ".error_check");
2008  auto& watchdog_br_instr = bb_it->back();
2009 
2010  auto watchdog_check_bb = llvm::BasicBlock::Create(
2011  cgen_state_->context_, ".watchdog_check", query_func, error_check_bb);
2012  llvm::IRBuilder<> watchdog_ir_builder(watchdog_check_bb);
2013  auto detected_timeout = watchdog_ir_builder.CreateCall(
2014  cgen_state_->module_->getFunction("dynamic_watchdog"), {});
2015  auto timeout_err_lv = watchdog_ir_builder.CreateSelect(
2016  detected_timeout, cgen_state_->llInt(Executor::ERR_OUT_OF_TIME), err_lv);
2017  watchdog_ir_builder.CreateBr(error_check_bb);
2018 
2019  llvm::ReplaceInstWithInst(
2020  &watchdog_br_instr,
2021  llvm::BranchInst::Create(
2022  watchdog_check_bb, error_check_bb, call_watchdog_lv));
2023  ir_builder.SetInsertPoint(&br_instr);
2024  auto unified_err_lv = ir_builder.CreatePHI(err_lv->getType(), 2);
2025 
2026  unified_err_lv->addIncoming(timeout_err_lv, watchdog_check_bb);
2027  unified_err_lv->addIncoming(err_lv, &*bb_it);
2028  err_lv = unified_err_lv;
2029  } else if (run_with_allowing_runtime_interrupt) {
2030  CHECK(pos);
2031  llvm::Value* call_check_interrupt_lv{nullptr};
2032  llvm::Value* interrupt_err_lv{nullptr};
2033  llvm::BasicBlock* error_check_bb{nullptr};
2034  llvm::BasicBlock* interrupt_check_bb{nullptr};
2035  llvm::Instruction* check_interrupt_br_instr{nullptr};
2036 
2037  auto has_loop_join = std::any_of(
2038  join_loops.begin(), join_loops.end(), [](const JoinLoop& join_loop) {
2039  return join_loop.isNestedLoopJoin();
2040  });
2041  auto codegen_interrupt_checker = [&]() {
2042  error_check_bb = bb_it->splitBasicBlock(llvm::BasicBlock::iterator(br_instr),
2043  ".error_check");
2044  check_interrupt_br_instr = &bb_it->back();
2045 
2046  interrupt_check_bb = llvm::BasicBlock::Create(
2047  cgen_state_->context_, ".interrupt_check", query_func, error_check_bb);
2048  llvm::IRBuilder<> interrupt_checker_ir_builder(interrupt_check_bb);
2049  auto detected_interrupt = interrupt_checker_ir_builder.CreateCall(
2050  cgen_state_->module_->getFunction("check_interrupt"), {});
2051  interrupt_err_lv = interrupt_checker_ir_builder.CreateSelect(
2052  detected_interrupt,
2054  err_lv);
2055  interrupt_checker_ir_builder.CreateBr(error_check_bb);
2056  };
2057  if (has_loop_join) {
2058  codegen_interrupt_checker();
2059  CHECK(interrupt_check_bb);
2060  CHECK(check_interrupt_br_instr);
2061  llvm::ReplaceInstWithInst(check_interrupt_br_instr,
2062  llvm::BranchInst::Create(interrupt_check_bb));
2063  ir_builder.SetInsertPoint(&br_instr);
2064  err_lv = interrupt_err_lv;
2065  } else {
2066  if (device_type == ExecutorDeviceType::GPU) {
2067  // approximate how many times the %pos variable
2068  // is increased --> the number of iteration
2069  // here we calculate the # bit shift by considering grid/block/fragment
2070  // sizes since if we use the fixed one (i.e., per 64-th increment) some CUDA
2071  // threads cannot enter the interrupt checking block depending on the
2072  // fragment size --> a thread may not take care of 64 threads if an outer
2073  // table is not sufficiently large, and so cannot be interrupted
2074  int32_t num_shift_by_gridDim = shared::getExpOfTwo(gridSize());
2075  int32_t num_shift_by_blockDim = shared::getExpOfTwo(blockSize());
2076  int64_t total_num_shift = num_shift_by_gridDim + num_shift_by_blockDim;
2077  uint64_t interrupt_checking_freq = 32;
2078  auto freq_control_knob = g_running_query_interrupt_freq;
2079  CHECK_GT(freq_control_knob, 0);
2080  CHECK_LE(freq_control_knob, 1.0);
2081  if (!input_table_infos.empty()) {
2082  const auto& outer_table_info = *input_table_infos.begin();
2083  auto num_outer_table_tuples =
2084  outer_table_info.info.getFragmentNumTuplesUpperBound();
2085  if (num_outer_table_tuples > 0) {
2086  // gridSize * blockSize --> pos_step (idx of the next row per thread)
2087  // we additionally multiply two to pos_step since the number of
2088  // dispatched blocks are double of the gridSize
2089  // # tuples (of fragment) / pos_step --> maximum # increment (K)
2090  // also we multiply 1 / freq_control_knob to K to control the frequency
2091  // So, needs to check the interrupt status more frequently? make K
2092  // smaller
2093  auto max_inc = uint64_t(
2094  floor(num_outer_table_tuples / (gridSize() * blockSize() * 2)));
2095  if (max_inc < 2) {
2096  // too small `max_inc`, so this correction is necessary to make
2097  // `interrupt_checking_freq` be valid (i.e., larger than zero)
2098  max_inc = 2;
2099  }
2100  auto calibrated_inc =
2101  uint64_t(floor(max_inc * (1 - freq_control_knob)));
2102  interrupt_checking_freq =
2103  uint64_t(pow(2, shared::getExpOfTwo(calibrated_inc)));
2104  // add the coverage when interrupt_checking_freq > K
2105  // if so, some threads still cannot be branched to the interrupt checker
2106  // so we manually use smaller but close to the max_inc as freq
2107  if (interrupt_checking_freq > max_inc) {
2108  interrupt_checking_freq = max_inc / 2;
2109  }
2110  if (interrupt_checking_freq < 8) {
2111  // such small freq incurs too frequent interrupt status checking,
2112  // so we fixup to the minimum freq value at some reasonable degree
2113  interrupt_checking_freq = 8;
2114  }
2115  }
2116  }
2117  VLOG(1) << "Set the running query interrupt checking frequency: "
2118  << interrupt_checking_freq;
2119  // check the interrupt flag for every interrupt_checking_freq-th iteration
2120  llvm::Value* pos_shifted_per_iteration =
2121  ir_builder.CreateLShr(pos, cgen_state_->llInt(total_num_shift));
2122  auto interrupt_predicate = ir_builder.CreateAnd(pos_shifted_per_iteration,
2123  interrupt_checking_freq);
2124  call_check_interrupt_lv =
2125  ir_builder.CreateICmp(llvm::ICmpInst::ICMP_EQ,
2126  interrupt_predicate,
2127  cgen_state_->llInt(int64_t(0LL)));
2128  } else {
2129  // CPU path: run interrupt checker for every 64th row
2130  auto interrupt_predicate = ir_builder.CreateAnd(pos, uint64_t(0x3f));
2131  call_check_interrupt_lv =
2132  ir_builder.CreateICmp(llvm::ICmpInst::ICMP_EQ,
2133  interrupt_predicate,
2134  cgen_state_->llInt(int64_t(0LL)));
2135  }
2136  codegen_interrupt_checker();
2137  CHECK(call_check_interrupt_lv);
2138  CHECK(interrupt_err_lv);
2139  CHECK(interrupt_check_bb);
2140  CHECK(error_check_bb);
2141  CHECK(check_interrupt_br_instr);
2142  llvm::ReplaceInstWithInst(
2143  check_interrupt_br_instr,
2144  llvm::BranchInst::Create(
2145  interrupt_check_bb, error_check_bb, call_check_interrupt_lv));
2146  ir_builder.SetInsertPoint(&br_instr);
2147  auto unified_err_lv = ir_builder.CreatePHI(err_lv->getType(), 2);
2148 
2149  unified_err_lv->addIncoming(interrupt_err_lv, interrupt_check_bb);
2150  unified_err_lv->addIncoming(err_lv, &*bb_it);
2151  err_lv = unified_err_lv;
2152  }
2153  }
2154  if (!err_lv_returned_from_row_func) {
2155  err_lv_returned_from_row_func = err_lv;
2156  }
2157  if (device_type == ExecutorDeviceType::GPU && g_enable_dynamic_watchdog) {
2158  // let kernel execution finish as expected, regardless of the observed error,
2159  // unless it is from the dynamic watchdog where all threads within that block
2160  // return together.
2161  err_lv = ir_builder.CreateICmp(llvm::ICmpInst::ICMP_EQ,
2162  err_lv,
2164  } else {
2165  err_lv = ir_builder.CreateICmp(llvm::ICmpInst::ICMP_NE,
2166  err_lv,
2167  cgen_state_->llInt(static_cast<int32_t>(0)));
2168  }
2169  auto error_bb = llvm::BasicBlock::Create(
2170  cgen_state_->context_, ".error_exit", query_func, new_bb);
2171  const auto error_code_arg = get_arg_by_name(query_func, "error_code");
2172  llvm::CallInst::Create(
2173  cgen_state_->module_->getFunction("record_error_code"),
2174  std::vector<llvm::Value*>{err_lv_returned_from_row_func, error_code_arg},
2175  "",
2176  error_bb);
2177  llvm::ReturnInst::Create(cgen_state_->context_, error_bb);
2178  llvm::ReplaceInstWithInst(&br_instr,
2179  llvm::BranchInst::Create(error_bb, new_bb, err_lv));
2180  done_splitting = true;
2181  break;
2182  }
2183  }
2184  }
2185  CHECK(done_splitting);
2186 }
static mapd_shared_mutex executor_session_mutex_
Definition: Execute.h:1303
double g_running_query_interrupt_freq
Definition: Execute.cpp:129
static const int32_t ERR_INTERRUPTED
Definition: Execute.h:1351
QuerySessionId current_query_session_
Definition: Execute.h:1305
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1222
bool g_enable_dynamic_watchdog
Definition: Execute.cpp:80
#define CHECK_GT(x, y)
Definition: Logger.h:235
unsigned getExpOfTwo(unsigned n)
Definition: MathUtils.cpp:23
llvm::Value * get_arg_by_name(llvm::Function *func, const std::string &name)
Definition: Execute.h:166
static const int32_t ERR_OUT_OF_TIME
Definition: Execute.h:1350
#define AUTOMATIC_IR_METADATA(CGENSTATE)
#define CHECK_LE(x, y)
Definition: Logger.h:234
unsigned gridSize() const
Definition: Execute.cpp:3848
#define CHECK(condition)
Definition: Logger.h:223
unsigned blockSize() const
Definition: Execute.cpp:3862
#define VLOG(n)
Definition: Logger.h:317
std::vector< std::unique_ptr< ExecutionKernel > > Executor::createKernels ( SharedKernelContext shared_context,
const RelAlgExecutionUnit ra_exe_unit,
ColumnFetcher column_fetcher,
const std::vector< InputTableInfo > &  table_infos,
const ExecutionOptions eo,
const bool  is_agg,
const bool  allow_single_frag_table_opt,
const size_t  context_count,
const QueryCompilationDescriptor query_comp_desc,
const QueryMemoryDescriptor query_mem_desc,
RenderInfo render_info,
std::unordered_set< int > &  available_gpus,
int &  available_cpus 
)
private

Determines execution dispatch mode and required fragments for a given query step, then creates kernels to execute the query and returns them for launch.

Definition at line 2518 of file Execute.cpp.

References ExecutionOptions::allow_multifrag, catalog_, CHECK, CHECK_GE, CHECK_GT, anonymous_namespace{Execute.cpp}::checkWorkUnitWatchdog(), data_mgr_, deviceCount(), g_inner_join_fragment_skipping, getColLazyFetchInfo(), QueryCompilationDescriptor::getDeviceType(), QueryMemoryDescriptor::getEntryCount(), SharedKernelContext::getFragOffsets(), Data_Namespace::DataMgr::getMemoryInfo(), QueryMemoryDescriptor::getQueryDescriptionType(), GPU, ExecutionOptions::gpu_input_mem_limit_percent, Data_Namespace::GPU_LEVEL, anonymous_namespace{Execute.cpp}::has_lazy_fetched_columns(), logger::INFO, RelAlgExecutionUnit::input_descs, KernelPerFragment, LOG, MultifragmentKernel, ExecutionOptions::outer_fragment_indices, plan_state_, Projection, query_mem_desc, RelAlgExecutionUnit::target_exprs, QueryMemoryDescriptor::toString(), RelAlgExecutionUnit::use_bump_allocator, VLOG, and ExecutionOptions::with_watchdog.

Referenced by executeWorkUnitImpl().

2531  {
2532  std::vector<std::unique_ptr<ExecutionKernel>> execution_kernels;
2533 
2534  QueryFragmentDescriptor fragment_descriptor(
2535  ra_exe_unit,
2536  table_infos,
2537  query_comp_desc.getDeviceType() == ExecutorDeviceType::GPU
2539  : std::vector<Data_Namespace::MemoryInfo>{},
2542  CHECK(!ra_exe_unit.input_descs.empty());
2543 
2544  const auto device_type = query_comp_desc.getDeviceType();
2545  const bool uses_lazy_fetch =
2546  plan_state_->allow_lazy_fetch_ &&
2548  const bool use_multifrag_kernel = (device_type == ExecutorDeviceType::GPU) &&
2549  eo.allow_multifrag && (!uses_lazy_fetch || is_agg);
2550  const auto device_count = deviceCount(device_type);
2551  CHECK_GT(device_count, 0);
2552 
2553  fragment_descriptor.buildFragmentKernelMap(ra_exe_unit,
2554  shared_context.getFragOffsets(),
2555  device_count,
2556  device_type,
2557  use_multifrag_kernel,
2559  this);
2560  if (eo.with_watchdog && fragment_descriptor.shouldCheckWorkUnitWatchdog()) {
2561  checkWorkUnitWatchdog(ra_exe_unit, table_infos, *catalog_, device_type, device_count);
2562  }
2563 
2564  if (use_multifrag_kernel) {
2565  VLOG(1) << "Creating multifrag execution kernels";
2566  VLOG(1) << query_mem_desc.toString();
2567 
2568  // NB: We should never be on this path when the query is retried because of running
2569  // out of group by slots; also, for scan only queries on CPU we want the
2570  // high-granularity, fragment by fragment execution instead. For scan only queries on
2571  // GPU, we want the multifrag kernel path to save the overhead of allocating an output
2572  // buffer per fragment.
2573  auto multifrag_kernel_dispatch = [&ra_exe_unit,
2574  &execution_kernels,
2575  &column_fetcher,
2576  &eo,
2577  &query_comp_desc,
2578  &query_mem_desc,
2579  render_info](const int device_id,
2580  const FragmentsList& frag_list,
2581  const int64_t rowid_lookup_key) {
2582  execution_kernels.emplace_back(
2583  std::make_unique<ExecutionKernel>(ra_exe_unit,
2585  device_id,
2586  eo,
2587  column_fetcher,
2588  query_comp_desc,
2589  query_mem_desc,
2590  frag_list,
2592  render_info,
2593  rowid_lookup_key));
2594  };
2595  fragment_descriptor.assignFragsToMultiDispatch(multifrag_kernel_dispatch);
2596  } else {
2597  VLOG(1) << "Creating one execution kernel per fragment";
2598  VLOG(1) << query_mem_desc.toString();
2599 
2600  if (!ra_exe_unit.use_bump_allocator && allow_single_frag_table_opt &&
2601  (query_mem_desc.getQueryDescriptionType() == QueryDescriptionType::Projection) &&
2602  table_infos.size() == 1 && table_infos.front().table_id > 0) {
2603  const auto max_frag_size =
2604  table_infos.front().info.getFragmentNumTuplesUpperBound();
2605  if (max_frag_size < query_mem_desc.getEntryCount()) {
2606  LOG(INFO) << "Lowering scan limit from " << query_mem_desc.getEntryCount()
2607  << " to match max fragment size " << max_frag_size
2608  << " for kernel per fragment execution path.";
2609  throw CompilationRetryNewScanLimit(max_frag_size);
2610  }
2611  }
2612 
2613  size_t frag_list_idx{0};
2614  auto fragment_per_kernel_dispatch = [&ra_exe_unit,
2615  &execution_kernels,
2616  &column_fetcher,
2617  &eo,
2618  &frag_list_idx,
2619  &device_type,
2620  &query_comp_desc,
2621  &query_mem_desc,
2622  render_info](const int device_id,
2623  const FragmentsList& frag_list,
2624  const int64_t rowid_lookup_key) {
2625  if (!frag_list.size()) {
2626  return;
2627  }
2628  CHECK_GE(device_id, 0);
2629 
2630  execution_kernels.emplace_back(
2631  std::make_unique<ExecutionKernel>(ra_exe_unit,
2632  device_type,
2633  device_id,
2634  eo,
2635  column_fetcher,
2636  query_comp_desc,
2637  query_mem_desc,
2638  frag_list,
2640  render_info,
2641  rowid_lookup_key));
2642  ++frag_list_idx;
2643  };
2644 
2645  fragment_descriptor.assignFragsToKernelDispatch(fragment_per_kernel_dispatch,
2646  ra_exe_unit);
2647  }
2648 
2649  return execution_kernels;
2650 }
bool is_agg(const Analyzer::Expr *expr)
std::vector< Analyzer::Expr * > target_exprs
Data_Namespace::DataMgr * data_mgr_
Definition: Execute.h:1287
ExecutorDeviceType getDeviceType() const
const std::vector< uint64_t > & getFragOffsets()
std::string toString() const
#define LOG(tag)
Definition: Logger.h:217
std::vector< size_t > outer_fragment_indices
std::vector< ColumnLazyFetchInfo > getColLazyFetchInfo(const std::vector< Analyzer::Expr * > &target_exprs) const
Definition: Execute.cpp:729
std::vector< InputDescriptor > input_descs
#define CHECK_GE(x, y)
Definition: Logger.h:236
int deviceCount(const ExecutorDeviceType) const
Definition: Execute.cpp:1038
#define CHECK_GT(x, y)
Definition: Logger.h:235
std::vector< FragmentsPerTable > FragmentsList
bool g_inner_join_fragment_skipping
Definition: Execute.cpp:91
const Catalog_Namespace::Catalog * catalog_
Definition: Execute.h:1286
std::unique_ptr< PlanState > plan_state_
Definition: Execute.h:1252
void checkWorkUnitWatchdog(const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &table_infos, const Catalog_Namespace::Catalog &cat, const ExecutorDeviceType device_type, const int device_count)
Definition: Execute.cpp:1514
std::vector< MemoryInfo > getMemoryInfo(const MemoryLevel memLevel) const
Definition: DataMgr.cpp:352
#define CHECK(condition)
Definition: Logger.h:223
double gpu_input_mem_limit_percent
bool has_lazy_fetched_columns(const std::vector< ColumnLazyFetchInfo > &fetched_cols)
Definition: Execute.cpp:2507
#define VLOG(n)
Definition: Logger.h:317

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

CudaMgr_Namespace::CudaMgr* Executor::cudaMgr ( ) const
inlineprivate

Definition at line 672 of file Execute.h.

References CHECK, data_mgr_, and Data_Namespace::DataMgr::getCudaMgr().

Referenced by deviceCount(), deviceCycles(), isArchPascalOrLater(), numBlocksPerMP(), and warpSize().

672  {
673  CHECK(data_mgr_);
674  auto cuda_mgr = data_mgr_->getCudaMgr();
675  CHECK(cuda_mgr);
676  return cuda_mgr;
677  }
CudaMgr_Namespace::CudaMgr * getCudaMgr() const
Definition: DataMgr.h:222
Data_Namespace::DataMgr * data_mgr_
Definition: Execute.h:1287
#define CHECK(condition)
Definition: Logger.h:223

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

int Executor::deviceCount ( const ExecutorDeviceType  device_type) const

Definition at line 1038 of file Execute.cpp.

References cudaMgr(), CudaMgr_Namespace::CudaMgr::getDeviceCount(), and GPU.

Referenced by createKernels(), and deviceCountForMemoryLevel().

1038  {
1039  if (device_type == ExecutorDeviceType::GPU) {
1040  return cudaMgr()->getDeviceCount();
1041  } else {
1042  return 1;
1043  }
1044 }
CudaMgr_Namespace::CudaMgr * cudaMgr() const
Definition: Execute.h:672
int getDeviceCount() const
Definition: CudaMgr.h:86

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

int Executor::deviceCountForMemoryLevel ( const Data_Namespace::MemoryLevel  memory_level) const
private

Definition at line 1046 of file Execute.cpp.

References CPU, deviceCount(), GPU, and Data_Namespace::GPU_LEVEL.

Referenced by buildHashTableForQualifier().

1047  {
1048  return memory_level == GPU_LEVEL ? deviceCount(ExecutorDeviceType::GPU)
1049  : deviceCount(ExecutorDeviceType::CPU);
1050 }
ExecutorDeviceType
int deviceCount(const ExecutorDeviceType) const
Definition: Execute.cpp:1038

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

int64_t Executor::deviceCycles ( int  milliseconds) const
private

Definition at line 3876 of file Execute.cpp.

References cudaMgr(), and CudaMgr_Namespace::CudaMgr::getAllDeviceProperties().

3876  {
3877  const auto& dev_props = cudaMgr()->getAllDeviceProperties();
3878  return static_cast<int64_t>(dev_props.front().clockKhz) * milliseconds;
3879 }
CudaMgr_Namespace::CudaMgr * cudaMgr() const
Definition: Execute.h:672
const std::vector< DeviceProperties > & getAllDeviceProperties() const
Definition: CudaMgr.h:127

+ Here is the call graph for this function:

std::string Executor::dumpCache ( ) const

Definition at line 4852 of file Execute.cpp.

References agg_col_range_cache_, TableGenerations::asMap(), AggregatedColRange::asMap(), row_set_mem_owner_, and table_generations_.

4852  {
4853  std::stringstream ss;
4854  ss << "colRangeCache: ";
4855  for (auto& [phys_input, exp_range] : agg_col_range_cache_.asMap()) {
4856  ss << "{" << phys_input.col_id << ", " << phys_input.table_id
4857  << "} = " << exp_range.toString() << ", ";
4858  }
4859  ss << "stringDictGenerations: ";
4860  for (auto& [key, val] : row_set_mem_owner_->getStringDictionaryGenerations().asMap()) {
4861  ss << "{" << key << "} = " << val << ", ";
4862  }
4863  ss << "tableGenerations: ";
4864  for (auto& [key, val] : table_generations_.asMap()) {
4865  ss << "{" << key << "} = {" << val.tuple_count << ", " << val.start_rowid << "}, ";
4866  }
4867  ss << "\n";
4868  return ss.str();
4869 }
AggregatedColRange agg_col_range_cache_
Definition: Execute.h:1301
std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner_
Definition: Execute.h:1253
const std::unordered_map< uint32_t, TableGeneration > & asMap() const
TableGenerations table_generations_
Definition: Execute.h:1302
const std::unordered_map< PhysicalInput, ExpressionRange > & asMap() const

+ Here is the call graph for this function:

void Executor::enableRuntimeQueryInterrupt ( const double  runtime_query_check_freq,
const unsigned  pending_query_check_freq 
) const

Definition at line 4732 of file Execute.cpp.

References g_enable_runtime_query_interrupt, g_pending_query_interrupt_freq, and g_running_query_interrupt_freq.

4734  {
4735  // The only one scenario that we intentionally call this function is
4736  // to allow runtime query interrupt in QueryRunner for test cases.
4737  // Because test machine's default setting does not allow runtime query interrupt,
4738  // so we have to turn it on within test code if necessary.
4740  g_pending_query_interrupt_freq = pending_query_check_freq;
4741  g_running_query_interrupt_freq = runtime_query_check_freq;
4744  }
4745 }
double g_running_query_interrupt_freq
Definition: Execute.cpp:129
unsigned g_pending_query_interrupt_freq
Definition: Execute.cpp:128
bool g_enable_runtime_query_interrupt
Definition: Execute.cpp:125
void Executor::enrollQuerySession ( const QuerySessionId query_session,
const std::string &  query_str,
const std::string &  submitted_time_str,
const size_t  executor_id,
const QuerySessionStatus::QueryStatus  query_session_status 
)

Definition at line 4550 of file Execute.cpp.

References addToQuerySessionList(), current_query_session_, and executor_session_mutex_.

4555  {
4556  // enroll the query session into the Executor's session map
4557  mapd_unique_lock<mapd_shared_mutex> session_write_lock(executor_session_mutex_);
4558  if (query_session.empty()) {
4559  return;
4560  }
4561 
4562  addToQuerySessionList(query_session,