OmniSciDB  ca0c39ec8f
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
Executor Class Reference

#include <Execute.h>

+ Collaboration diagram for Executor:

Classes

class  CgenStateManager
 
struct  ExecutorMutexHolder
 
class  FetchCacheAnchor
 
struct  GroupColLLVMValue
 
struct  JoinHashTableOrError
 

Public Types

enum  ExtModuleKinds {
  ExtModuleKinds::template_module, ExtModuleKinds::udf_cpu_module, ExtModuleKinds::udf_gpu_module, ExtModuleKinds::rt_udf_cpu_module,
  ExtModuleKinds::rt_udf_gpu_module, ExtModuleKinds::rt_geos_module, ExtModuleKinds::rt_libdevice_module
}
 
using ExecutorId = size_t
 
using CachedCardinality = std::pair< bool, size_t >
 

Public Member Functions

 Executor (const ExecutorId id, Data_Namespace::DataMgr *data_mgr, const size_t block_size_x, const size_t grid_size_x, const size_t max_gpu_slab_size, const std::string &debug_dir, const std::string &debug_file)
 
void clearCaches (bool runtime_only=false)
 
std::string dumpCache () const
 
void reset (bool discard_runtime_modules_only=false)
 
const std::unique_ptr
< llvm::Module > & 
get_rt_module () const
 
const std::unique_ptr
< llvm::Module > & 
get_udf_module (bool is_gpu=false) const
 
const std::unique_ptr
< llvm::Module > & 
get_rt_udf_module (bool is_gpu=false) const
 
const std::unique_ptr
< llvm::Module > & 
get_geos_module () const
 
const std::unique_ptr
< llvm::Module > & 
get_libdevice_module () const
 
bool has_rt_module () const
 
bool has_udf_module (bool is_gpu=false) const
 
bool has_rt_udf_module (bool is_gpu=false) const
 
bool has_geos_module () const
 
bool has_libdevice_module () const
 
const TemporaryTablesgetTemporaryTables ()
 
StringDictionaryProxygetStringDictionaryProxy (const int dict_id, const bool with_generation) const
 
StringDictionaryProxygetStringDictionaryProxy (const int dictId, const std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner, const bool with_generation) const
 
const
StringDictionaryProxy::IdMap
getStringProxyTranslationMap (const int source_dict_id, const int dest_dict_id, const RowSetMemoryOwner::StringTranslationType translation_type, const std::vector< StringOps_Namespace::StringOpInfo > &string_op_infos, std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner, const bool with_generation) const
 
const
StringDictionaryProxy::IdMap
getJoinIntersectionStringProxyTranslationMap (const StringDictionaryProxy *source_proxy, StringDictionaryProxy *dest_proxy, const std::vector< StringOps_Namespace::StringOpInfo > &source_string_op_infos, const std::vector< StringOps_Namespace::StringOpInfo > &dest_source_string_op_infos, std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner) const
 
const
StringDictionaryProxy::TranslationMap
< Datum > * 
getStringProxyNumericTranslationMap (const int source_dict_id, const std::vector< StringOps_Namespace::StringOpInfo > &string_op_infos, std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner, const bool with_generation) const
 
bool isCPUOnly () const
 
bool isArchMaxwell (const ExecutorDeviceType dt) const
 
bool containsLeftDeepOuterJoin () const
 
const ColumnDescriptorgetColumnDescriptor (const Analyzer::ColumnVar *) const
 
const ColumnDescriptorgetPhysicalColumnDescriptor (const Analyzer::ColumnVar *, int) const
 
const Catalog_Namespace::CataloggetCatalog () const
 
void setCatalog (const Catalog_Namespace::Catalog *catalog)
 
Data_Namespace::DataMgrgetDataMgr () const
 
const std::shared_ptr
< RowSetMemoryOwner
getRowSetMemoryOwner () const
 
const TemporaryTablesgetTemporaryTables () const
 
Fragmenter_Namespace::TableInfo getTableInfo (const int table_id) const
 
const TableGenerationgetTableGeneration (const int table_id) const
 
ExpressionRange getColRange (const PhysicalInput &) const
 
size_t getNumBytesForFetchedRow (const std::set< int > &table_ids_to_fetch) const
 
bool hasLazyFetchColumns (const std::vector< Analyzer::Expr * > &target_exprs) const
 
std::vector< ColumnLazyFetchInfogetColLazyFetchInfo (const std::vector< Analyzer::Expr * > &target_exprs) const
 
void interrupt (const QuerySessionId &query_session="", const QuerySessionId &interrupt_session="")
 
void resetInterrupt ()
 
void enableRuntimeQueryInterrupt (const double runtime_query_check_freq, const unsigned pending_query_check_freq) const
 
int8_t warpSize () const
 
unsigned gridSize () const
 
void setGridSize (unsigned grid_size)
 
void resetGridSize ()
 
unsigned numBlocksPerMP () const
 
unsigned blockSize () const
 
void setBlockSize (unsigned block_size)
 
void resetBlockSize ()
 
size_t maxGpuSlabSize () const
 
ResultSetPtr executeWorkUnit (size_t &max_groups_buffer_entry_guess, const bool is_agg, const std::vector< InputTableInfo > &, const RelAlgExecutionUnit &, const CompilationOptions &, const ExecutionOptions &options, const Catalog_Namespace::Catalog &, RenderInfo *render_info, const bool has_cardinality_estimation, ColumnCacheMap &column_cache)
 
TableUpdateMetadata executeUpdate (const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &table_infos, const TableDescriptor *updated_table_desc, const CompilationOptions &co, const ExecutionOptions &eo, Catalog_Namespace::Catalog &cat, std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner, const UpdateLogForFragment::Callback &cb, const bool is_agg)
 
void addTransientStringLiterals (const RelAlgExecutionUnit &ra_exe_unit, const std::shared_ptr< RowSetMemoryOwner > &row_set_mem_owner)
 
int deviceCount (const ExecutorDeviceType) const
 
void setupCaching (const std::unordered_set< PhysicalInput > &phys_inputs, const std::unordered_set< int > &phys_table_ids)
 
void setColRangeCache (const AggregatedColRange &aggregated_col_range)
 
ExecutorId getExecutorId () const
 
QuerySessionIdgetCurrentQuerySession (heavyai::shared_lock< heavyai::shared_mutex > &read_lock)
 
QuerySessionStatus::QueryStatus getQuerySessionStatus (const QuerySessionId &candidate_query_session, heavyai::shared_lock< heavyai::shared_mutex > &read_lock)
 
bool checkCurrentQuerySession (const std::string &candidate_query_session, heavyai::shared_lock< heavyai::shared_mutex > &read_lock)
 
void invalidateRunningQuerySession (heavyai::unique_lock< heavyai::shared_mutex > &write_lock)
 
bool addToQuerySessionList (const QuerySessionId &query_session, const std::string &query_str, const std::string &submitted, const size_t executor_id, const QuerySessionStatus::QueryStatus query_status, heavyai::unique_lock< heavyai::shared_mutex > &write_lock)
 
bool removeFromQuerySessionList (const QuerySessionId &query_session, const std::string &submitted_time_str, heavyai::unique_lock< heavyai::shared_mutex > &write_lock)
 
void setQuerySessionAsInterrupted (const QuerySessionId &query_session, heavyai::unique_lock< heavyai::shared_mutex > &write_lock)
 
bool checkIsQuerySessionInterrupted (const std::string &query_session, heavyai::shared_lock< heavyai::shared_mutex > &read_lock)
 
bool checkIsQuerySessionEnrolled (const QuerySessionId &query_session, heavyai::shared_lock< heavyai::shared_mutex > &read_lock)
 
bool updateQuerySessionStatusWithLock (const QuerySessionId &query_session, const std::string &submitted_time_str, const QuerySessionStatus::QueryStatus updated_query_status, heavyai::unique_lock< heavyai::shared_mutex > &write_lock)
 
bool updateQuerySessionExecutorAssignment (const QuerySessionId &query_session, const std::string &submitted_time_str, const size_t executor_id, heavyai::unique_lock< heavyai::shared_mutex > &write_lock)
 
std::vector< QuerySessionStatusgetQuerySessionInfo (const QuerySessionId &query_session, heavyai::shared_lock< heavyai::shared_mutex > &read_lock)
 
heavyai::shared_mutexgetSessionLock ()
 
CurrentQueryStatus attachExecutorToQuerySession (const QuerySessionId &query_session_id, const std::string &query_str, const std::string &query_submitted_time)
 
void checkPendingQueryStatus (const QuerySessionId &query_session)
 
void clearQuerySessionStatus (const QuerySessionId &query_session, const std::string &submitted_time_str)
 
void updateQuerySessionStatus (const QuerySessionId &query_session, const std::string &submitted_time_str, const QuerySessionStatus::QueryStatus new_query_status)
 
void enrollQuerySession (const QuerySessionId &query_session, const std::string &query_str, const std::string &submitted_time_str, const size_t executor_id, const QuerySessionStatus::QueryStatus query_session_status)
 
size_t getNumCurentSessionsEnrolled () const
 
const std::vector< size_t > getExecutorIdsRunningQuery (const QuerySessionId &interrupt_session) const
 
bool checkNonKernelTimeInterrupted () const
 
void registerExtractedQueryPlanDag (const QueryPlanDAG &query_plan_dag)
 
const QueryPlanDAG getLatestQueryPlanDagExtracted () const
 
void addToCardinalityCache (const std::string &cache_key, const size_t cache_value)
 
CachedCardinality getCachedCardinality (const std::string &cache_key)
 
heavyai::shared_mutexgetDataRecyclerLock ()
 
QueryPlanDagCachegetQueryPlanDagCache ()
 
ResultSetRecyclerHoldergetRecultSetRecyclerHolder ()
 
CgenStategetCgenStatePtr () const
 
PlanStategetPlanStatePtr () const
 
llvm::LLVMContext & getContext ()
 
void update_extension_modules (bool update_runtime_modules_only=false)
 

Static Public Member Functions

static void clearExternalCaches (bool for_update, const TableDescriptor *td, const int current_db_id)
 
template<typename F >
static void registerExtensionFunctions (F register_extension_functions)
 
static std::shared_ptr< ExecutorgetExecutor (const ExecutorId id, const std::string &debug_dir="", const std::string &debug_file="", const SystemParameters &system_parameters=SystemParameters())
 
static void nukeCacheOfExecutors ()
 
static void clearMemory (const Data_Namespace::MemoryLevel memory_level)
 
static size_t getArenaBlockSize ()
 
static void addUdfIrToModule (const std::string &udf_ir_filename, const bool is_cuda_ir)
 
static void initialize_extension_module_sources ()
 
static void registerActiveModule (void *module, const int device_id)
 
static void unregisterActiveModule (const int device_id)
 
static std::pair< int64_t,
int32_t > 
reduceResults (const SQLAgg agg, const SQLTypeInfo &ti, const int64_t agg_init_val, const int8_t out_byte_width, const int64_t *out_vec, const size_t out_vec_sz, const bool is_group_by, const bool float_argument_input)
 
static void update_after_registration (bool update_runtime_modules_only=false)
 

Public Attributes

std::mutex compilation_mutex_
 
const logger::ThreadId thread_id_
 

Static Public Attributes

static const ExecutorId UNITARY_EXECUTOR_ID = 0
 
static const ExecutorId INVALID_EXECUTOR_ID = SIZE_MAX
 
static std::map
< ExtModuleKinds, std::string > 
extension_module_sources
 
static const size_t high_scan_limit
 
static const int32_t ERR_DIV_BY_ZERO {1}
 
static const int32_t ERR_OUT_OF_GPU_MEM {2}
 
static const int32_t ERR_OUT_OF_SLOTS {3}
 
static const int32_t ERR_UNSUPPORTED_SELF_JOIN {4}
 
static const int32_t ERR_OUT_OF_RENDER_MEM {5}
 
static const int32_t ERR_OUT_OF_CPU_MEM {6}
 
static const int32_t ERR_OVERFLOW_OR_UNDERFLOW {7}
 
static const int32_t ERR_OUT_OF_TIME {9}
 
static const int32_t ERR_INTERRUPTED {10}
 
static const int32_t ERR_COLUMNAR_CONVERSION_NOT_SUPPORTED {11}
 
static const int32_t ERR_TOO_MANY_LITERALS {12}
 
static const int32_t ERR_STRING_CONST_IN_RESULTSET {13}
 
static const int32_t ERR_STREAMING_TOP_N_NOT_SUPPORTED_IN_RENDER_QUERY {14}
 
static const int32_t ERR_SINGLE_VALUE_FOUND_MULTIPLE_VALUES {15}
 
static const int32_t ERR_GEOS {16}
 
static const int32_t ERR_WIDTH_BUCKET_INVALID_ARGUMENT {17}
 
static std::mutex register_runtime_extension_functions_mutex_
 
static std::mutex kernel_mutex_
 

Private Types

using PerFragmentCallBack = std::function< void(ResultSetPtr, const Fragmenter_Namespace::FragmentInfo &)>
 

Private Member Functions

void clearMetaInfoCache ()
 
int deviceCountForMemoryLevel (const Data_Namespace::MemoryLevel memory_level) const
 
llvm::Value * codegenWindowFunction (const size_t target_index, const CompilationOptions &co)
 
llvm::Value * codegenWindowFunctionAggregate (const CompilationOptions &co)
 
llvm::BasicBlock * codegenWindowResetStateControlFlow ()
 
void codegenWindowFunctionStateInit (llvm::Value *aggregate_state)
 
llvm::Value * codegenWindowFunctionAggregateCalls (llvm::Value *aggregate_state, const CompilationOptions &co)
 
llvm::Value * codegenWindowFunctionOnFrame (const CompilationOptions &co)
 
llvm::Value * codegenCurrentPartitionIndex (const WindowFunctionContext *window_func_context, llvm::Value *current_row_pos_lv)
 
llvm::Value * codegenFrameBoundExpr (const Analyzer::WindowFunction *window_func, const Analyzer::WindowFrame *frame_bound, CodeGenerator &code_generator, const CompilationOptions &co)
 
std::pair< llvm::Value
*, llvm::Value * > 
codegenWindowFrameBound (WindowFunctionContext *window_func_context, const Analyzer::WindowFrame *frame_start_bound, const Analyzer::WindowFrame *frame_end_bound, llvm::Value *current_row_pos_lv, llvm::Value *current_partition_start_offset_lv, llvm::Value *order_key_buf_ptr_lv, llvm::Value *order_key_col_null_val_lv, llvm::Value *frame_start_bound_expr_lv, llvm::Value *frame_end_bound_expr_lv, llvm::Value *num_elem_current_partition_lv, llvm::Value *target_partition_rowid_ptr_lv, llvm::Value *target_partition_sorted_rowid_ptr_lv, llvm::Value *null_start_pos_lv, llvm::Value *null_end_pos_lv, CodeGenerator &code_generator)
 
void codegenWindowAvgEpilogue (llvm::Value *crt_val, llvm::Value *window_func_null_val, llvm::Value *multiplicity_lv)
 
llvm::Value * codegenAggregateWindowState ()
 
llvm::Value * aggregateWindowStatePtr ()
 
CudaMgr_Namespace::CudaMgrcudaMgr () const
 
bool isArchPascalOrLater (const ExecutorDeviceType dt) const
 
bool needFetchAllFragments (const InputColDescriptor &col_desc, const RelAlgExecutionUnit &ra_exe_unit, const FragmentsList &selected_fragments) const
 
bool needLinearizeAllFragments (const ColumnDescriptor *cd, const InputColDescriptor &inner_col_desc, const RelAlgExecutionUnit &ra_exe_unit, const FragmentsList &selected_fragments, const Data_Namespace::MemoryLevel memory_level) const
 
void executeWorkUnitPerFragment (const RelAlgExecutionUnit &ra_exe_unit, const InputTableInfo &table_info, const CompilationOptions &co, const ExecutionOptions &eo, const Catalog_Namespace::Catalog &cat, PerFragmentCallBack &cb, const std::set< size_t > &fragment_indexes_param)
 Compiles and dispatches a work unit per fragment processing results with the per fragment callback. Currently used for computing metrics over fragments (metadata). More...
 
ResultSetPtr executeExplain (const QueryCompilationDescriptor &)
 
ResultSetPtr executeTableFunction (const TableFunctionExecutionUnit exe_unit, const std::vector< InputTableInfo > &table_infos, const CompilationOptions &co, const ExecutionOptions &eo, const Catalog_Namespace::Catalog &cat)
 Compiles and dispatches a table function; that is, a function that takes as input one or more columns and returns a ResultSet, which can be parsed by subsequent execution steps. More...
 
ExecutorDeviceType getDeviceTypeForTargets (const RelAlgExecutionUnit &ra_exe_unit, const ExecutorDeviceType requested_device_type)
 
ResultSetPtr collectAllDeviceResults (SharedKernelContext &shared_context, const RelAlgExecutionUnit &ra_exe_unit, const QueryMemoryDescriptor &query_mem_desc, const ExecutorDeviceType device_type, std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner)
 
ResultSetPtr collectAllDeviceShardedTopResults (SharedKernelContext &shared_context, const RelAlgExecutionUnit &ra_exe_unit) const
 
std::unordered_map< int, const
Analyzer::BinOper * > 
getInnerTabIdToJoinCond () const
 
std::vector< std::unique_ptr
< ExecutionKernel > > 
createKernels (SharedKernelContext &shared_context, const RelAlgExecutionUnit &ra_exe_unit, ColumnFetcher &column_fetcher, const std::vector< InputTableInfo > &table_infos, const ExecutionOptions &eo, const bool is_agg, const bool allow_single_frag_table_opt, const size_t context_count, const QueryCompilationDescriptor &query_comp_desc, const QueryMemoryDescriptor &query_mem_desc, RenderInfo *render_info, std::unordered_set< int > &available_gpus, int &available_cpus)
 
void launchKernels (SharedKernelContext &shared_context, std::vector< std::unique_ptr< ExecutionKernel >> &&kernels, const ExecutorDeviceType device_type)
 
std::vector< size_t > getTableFragmentIndices (const RelAlgExecutionUnit &ra_exe_unit, const ExecutorDeviceType device_type, const size_t table_idx, const size_t outer_frag_idx, std::map< int, const TableFragments * > &selected_tables_fragments, const std::unordered_map< int, const Analyzer::BinOper * > &inner_table_id_to_join_condition)
 
bool skipFragmentPair (const Fragmenter_Namespace::FragmentInfo &outer_fragment_info, const Fragmenter_Namespace::FragmentInfo &inner_fragment_info, const int inner_table_id, const std::unordered_map< int, const Analyzer::BinOper * > &inner_table_id_to_join_condition, const RelAlgExecutionUnit &ra_exe_unit, const ExecutorDeviceType device_type)
 
FetchResult fetchChunks (const ColumnFetcher &, const RelAlgExecutionUnit &ra_exe_unit, const int device_id, const Data_Namespace::MemoryLevel, const std::map< int, const TableFragments * > &, const FragmentsList &selected_fragments, const Catalog_Namespace::Catalog &, std::list< ChunkIter > &, std::list< std::shared_ptr< Chunk_NS::Chunk >> &, DeviceAllocator *device_allocator, const size_t thread_idx, const bool allow_runtime_interrupt)
 
FetchResult fetchUnionChunks (const ColumnFetcher &, const RelAlgExecutionUnit &ra_exe_unit, const int device_id, const Data_Namespace::MemoryLevel, const std::map< int, const TableFragments * > &, const FragmentsList &selected_fragments, const Catalog_Namespace::Catalog &, std::list< ChunkIter > &, std::list< std::shared_ptr< Chunk_NS::Chunk >> &, DeviceAllocator *device_allocator, const size_t thread_idx, const bool allow_runtime_interrupt)
 
std::pair< std::vector
< std::vector< int64_t >
>, std::vector< std::vector
< uint64_t > > > 
getRowCountAndOffsetForAllFrags (const RelAlgExecutionUnit &ra_exe_unit, const CartesianProduct< std::vector< std::vector< size_t >>> &frag_ids_crossjoin, const std::vector< InputDescriptor > &input_descs, const std::map< int, const TableFragments * > &all_tables_fragments)
 
void buildSelectedFragsMapping (std::vector< std::vector< size_t >> &selected_fragments_crossjoin, std::vector< size_t > &local_col_to_frag_pos, const std::list< std::shared_ptr< const InputColDescriptor >> &col_global_ids, const FragmentsList &selected_fragments, const RelAlgExecutionUnit &ra_exe_unit)
 
void buildSelectedFragsMappingForUnion (std::vector< std::vector< size_t >> &selected_fragments_crossjoin, const FragmentsList &selected_fragments, const RelAlgExecutionUnit &ra_exe_unit)
 
std::vector< size_t > getFragmentCount (const FragmentsList &selected_fragments, const size_t scan_idx, const RelAlgExecutionUnit &ra_exe_unit)
 
int32_t executePlanWithGroupBy (const RelAlgExecutionUnit &ra_exe_unit, const CompilationResult &, const bool hoist_literals, ResultSetPtr *results, const ExecutorDeviceType device_type, std::vector< std::vector< const int8_t * >> &col_buffers, const std::vector< size_t > outer_tab_frag_ids, QueryExecutionContext *, const std::vector< std::vector< int64_t >> &num_rows, const std::vector< std::vector< uint64_t >> &frag_offsets, Data_Namespace::DataMgr *, const int device_id, const int outer_table_id, const int64_t limit, const uint32_t start_rowid, const uint32_t num_tables, const bool allow_runtime_interrupt, RenderInfo *render_info, const int64_t rows_to_process=-1)
 
int32_t executePlanWithoutGroupBy (const RelAlgExecutionUnit &ra_exe_unit, const CompilationResult &, const bool hoist_literals, ResultSetPtr *results, const std::vector< Analyzer::Expr * > &target_exprs, const ExecutorDeviceType device_type, std::vector< std::vector< const int8_t * >> &col_buffers, QueryExecutionContext *query_exe_context, const std::vector< std::vector< int64_t >> &num_rows, const std::vector< std::vector< uint64_t >> &frag_offsets, Data_Namespace::DataMgr *data_mgr, const int device_id, const uint32_t start_rowid, const uint32_t num_tables, const bool allow_runtime_interrupt, RenderInfo *render_info, const int64_t rows_to_process=-1)
 
ResultSetPtr resultsUnion (SharedKernelContext &shared_context, const RelAlgExecutionUnit &ra_exe_unit)
 
std::vector< int8_t * > getJoinHashTablePtrs (const ExecutorDeviceType device_type, const int device_id)
 
ResultSetPtr reduceMultiDeviceResults (const RelAlgExecutionUnit &, std::vector< std::pair< ResultSetPtr, std::vector< size_t >>> &all_fragment_results, std::shared_ptr< RowSetMemoryOwner >, const QueryMemoryDescriptor &) const
 
ResultSetPtr reduceMultiDeviceResultSets (std::vector< std::pair< ResultSetPtr, std::vector< size_t >>> &all_fragment_results, std::shared_ptr< RowSetMemoryOwner >, const QueryMemoryDescriptor &) const
 
ResultSetPtr reduceSpeculativeTopN (const RelAlgExecutionUnit &, std::vector< std::pair< ResultSetPtr, std::vector< size_t >>> &all_fragment_results, std::shared_ptr< RowSetMemoryOwner >, const QueryMemoryDescriptor &) const
 
ResultSetPtr executeWorkUnitImpl (size_t &max_groups_buffer_entry_guess, const bool is_agg, const bool allow_single_frag_table_opt, const std::vector< InputTableInfo > &, const RelAlgExecutionUnit &, const CompilationOptions &, const ExecutionOptions &options, const Catalog_Namespace::Catalog &, std::shared_ptr< RowSetMemoryOwner >, RenderInfo *render_info, const bool has_cardinality_estimation, ColumnCacheMap &column_cache)
 
std::vector< llvm::Value * > inlineHoistedLiterals ()
 
void AutoTrackBuffersInRuntimeIR ()
 
std::tuple< CompilationResult,
std::unique_ptr
< QueryMemoryDescriptor > > 
compileWorkUnit (const std::vector< InputTableInfo > &query_infos, const PlanState::DeletedColumnsMap &deleted_cols_map, const RelAlgExecutionUnit &ra_exe_unit, const CompilationOptions &co, const ExecutionOptions &eo, const CudaMgr_Namespace::CudaMgr *cuda_mgr, const bool allow_lazy_fetch, std::shared_ptr< RowSetMemoryOwner >, const size_t max_groups_buffer_entry_count, const int8_t crt_min_byte_width, const bool has_cardinality_estimation, ColumnCacheMap &column_cache, RenderInfo *render_info=nullptr)
 
llvm::BasicBlock * codegenSkipDeletedOuterTableRow (const RelAlgExecutionUnit &ra_exe_unit, const CompilationOptions &co)
 
std::vector< JoinLoopbuildJoinLoops (RelAlgExecutionUnit &ra_exe_unit, const CompilationOptions &co, const ExecutionOptions &eo, const std::vector< InputTableInfo > &query_infos, ColumnCacheMap &column_cache)
 
JoinLoop::HoistedFiltersCallback buildHoistLeftHandSideFiltersCb (const RelAlgExecutionUnit &ra_exe_unit, const size_t level_idx, const int inner_table_id, const CompilationOptions &co)
 
std::function< llvm::Value
*(const std::vector
< llvm::Value * >
&, llvm::Value *)> 
buildIsDeletedCb (const RelAlgExecutionUnit &ra_exe_unit, const size_t level_idx, const CompilationOptions &co)
 
std::shared_ptr< HashJoinbuildCurrentLevelHashTable (const JoinCondition &current_level_join_conditions, size_t level_idx, RelAlgExecutionUnit &ra_exe_unit, const CompilationOptions &co, const std::vector< InputTableInfo > &query_infos, ColumnCacheMap &column_cache, std::vector< std::string > &fail_reasons)
 
void redeclareFilterFunction ()
 
llvm::Value * addJoinLoopIterator (const std::vector< llvm::Value * > &prev_iters, const size_t level_idx)
 
void codegenJoinLoops (const std::vector< JoinLoop > &join_loops, const RelAlgExecutionUnit &ra_exe_unit, GroupByAndAggregate &group_by_and_aggregate, llvm::Function *query_func, llvm::BasicBlock *entry_bb, QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const ExecutionOptions &eo)
 
bool compileBody (const RelAlgExecutionUnit &ra_exe_unit, GroupByAndAggregate &group_by_and_aggregate, QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const GpuSharedMemoryContext &gpu_smem_context={})
 
void createErrorCheckControlFlow (llvm::Function *query_func, bool run_with_dynamic_watchdog, bool run_with_allowing_runtime_interrupt, const std::vector< JoinLoop > &join_loops, ExecutorDeviceType device_type, const std::vector< InputTableInfo > &input_table_infos)
 
void insertErrorCodeChecker (llvm::Function *query_func, bool hoist_literals, bool allow_runtime_query_interrupt)
 
void preloadFragOffsets (const std::vector< InputDescriptor > &input_descs, const std::vector< InputTableInfo > &query_infos)
 
JoinHashTableOrError buildHashTableForQualifier (const std::shared_ptr< Analyzer::BinOper > &qual_bin_oper, const std::vector< InputTableInfo > &query_infos, const MemoryLevel memory_level, const JoinType join_type, const HashType preferred_hash_type, ColumnCacheMap &column_cache, const HashTableBuildDagMap &hashtable_build_dag_map, const RegisteredQueryHint &query_hint, const TableIdToNodeMap &table_id_to_node_map)
 
void nukeOldState (const bool allow_lazy_fetch, const std::vector< InputTableInfo > &query_infos, const PlanState::DeletedColumnsMap &deleted_cols_map, const RelAlgExecutionUnit *ra_exe_unit)
 
std::shared_ptr
< CompilationContext
optimizeAndCodegenCPU (llvm::Function *, llvm::Function *, const std::unordered_set< llvm::Function * > &, const CompilationOptions &)
 
std::shared_ptr
< CompilationContext
optimizeAndCodegenGPU (llvm::Function *, llvm::Function *, std::unordered_set< llvm::Function * > &, const bool no_inline, const CudaMgr_Namespace::CudaMgr *cuda_mgr, const bool is_gpu_smem_used, const CompilationOptions &)
 
std::string generatePTX (const std::string &) const
 
void initializeNVPTXBackend () const
 
int64_t deviceCycles (int milliseconds) const
 
GroupColLLVMValue groupByColumnCodegen (Analyzer::Expr *group_by_col, const size_t col_width, const CompilationOptions &, const bool translate_null_val, const int64_t translated_null_val, DiamondCodegen &, std::stack< llvm::BasicBlock * > &, const bool thread_mem_shared)
 
llvm::Value * castToFP (llvm::Value *, SQLTypeInfo const &from_ti, SQLTypeInfo const &to_ti)
 
llvm::Value * castToIntPtrTyIn (llvm::Value *val, const size_t bit_width)
 
std::tuple
< RelAlgExecutionUnit,
PlanState::DeletedColumnsMap
addDeletedColumn (const RelAlgExecutionUnit &ra_exe_unit, const CompilationOptions &co)
 
bool isFragmentFullyDeleted (const int table_id, const Fragmenter_Namespace::FragmentInfo &fragment)
 
FragmentSkipStatus canSkipFragmentForFpQual (const Analyzer::BinOper *comp_expr, const Analyzer::ColumnVar *lhs_col, const Fragmenter_Namespace::FragmentInfo &fragment, const Analyzer::Constant *rhs_const) const
 
std::pair< bool, int64_t > skipFragment (const InputDescriptor &table_desc, const Fragmenter_Namespace::FragmentInfo &frag_info, const std::list< std::shared_ptr< Analyzer::Expr >> &simple_quals, const std::vector< uint64_t > &frag_offsets, const size_t frag_idx)
 
std::pair< bool, int64_t > skipFragmentInnerJoins (const InputDescriptor &table_desc, const RelAlgExecutionUnit &ra_exe_unit, const Fragmenter_Namespace::FragmentInfo &fragment, const std::vector< uint64_t > &frag_offsets, const size_t frag_idx)
 
AggregatedColRange computeColRangesCache (const std::unordered_set< PhysicalInput > &phys_inputs)
 
StringDictionaryGenerations computeStringDictionaryGenerations (const std::unordered_set< PhysicalInput > &phys_inputs)
 
TableGenerations computeTableGenerations (std::unordered_set< int > phys_table_ids)
 
std::vector< int8_t > serializeLiterals (const std::unordered_map< int, CgenState::LiteralValues > &literals, const int device_id)
 
const std::unique_ptr
< llvm::Module > & 
get_extension_module (ExtModuleKinds kind) const
 
bool has_extension_module (ExtModuleKinds kind) const
 
llvm::Value * spillDoubleElement (llvm::Value *elem_val, llvm::Type *elem_ty)
 
ExecutorMutexHolder acquireExecuteMutex ()
 

Static Private Member Functions

static size_t align (const size_t off_in, const size_t alignment)
 

Private Attributes

const ExecutorId executor_id_
 
std::unique_ptr
< llvm::LLVMContext > 
context_
 
std::unique_ptr< CgenStatecgen_state_
 
std::map< ExtModuleKinds,
std::unique_ptr< llvm::Module > > 
extension_modules_
 
std::unique_ptr< PlanStateplan_state_
 
std::shared_ptr
< RowSetMemoryOwner
row_set_mem_owner_
 
std::mutex gpu_exec_mutex_ [max_gpu_count]
 
std::atomic< bool > interrupted_
 
std::mutex str_dict_mutex_
 
std::unique_ptr
< llvm::TargetMachine > 
nvptx_target_machine_
 
unsigned block_size_x_
 
unsigned grid_size_x_
 
const size_t max_gpu_slab_size_
 
const std::string debug_dir_
 
const std::string debug_file_
 
const Catalog_Namespace::Catalogcatalog_
 
Data_Namespace::DataMgrdata_mgr_
 
const TemporaryTablestemporary_tables_
 
TableIdToNodeMap table_id_to_node_map_
 
int64_t kernel_queue_time_ms_ = 0
 
int64_t compilation_queue_time_ms_ = 0
 
std::unique_ptr
< WindowProjectNodeContext
window_project_node_context_owned_
 
WindowFunctionContextactive_window_function_ {nullptr}
 
InputTableInfoCache input_table_info_cache_
 
AggregatedColRange agg_col_range_cache_
 
TableGenerations table_generations_
 
QuerySessionId current_query_session_
 

Static Private Attributes

static const int max_gpu_count {16}
 
static std::mutex gpu_active_modules_mutex_
 
static uint32_t gpu_active_modules_device_mask_ {0x0}
 
static void * gpu_active_modules_ [max_gpu_count]
 
static const size_t baseline_threshold
 
static heavyai::shared_mutex executor_session_mutex_
 
static InterruptFlagMap queries_interrupt_flag_
 
static QuerySessionMap queries_session_map_
 
static std::map< int,
std::shared_ptr< Executor > > 
executors_
 
static heavyai::shared_mutex execute_mutex_
 
static heavyai::shared_mutex executors_cache_mutex_
 
static QueryPlanDagCache query_plan_dag_cache_
 
static heavyai::shared_mutex recycler_mutex_
 
static std::unordered_map
< std::string, size_t > 
cardinality_cache_
 
static ResultSetRecyclerHolder resultset_recycler_holder_
 
static QueryPlanDAG latest_query_plan_extracted_ {EMPTY_QUERY_PLAN}
 

Friends

class BaselineJoinHashTable
 
class CodeGenerator
 
class ColumnFetcher
 
struct DiamondCodegen
 
class ExecutionKernel
 
class KernelSubtask
 
class HashJoin
 
class OverlapsJoinHashTable
 
class RangeJoinHashTable
 
class GroupByAndAggregate
 
class QueryCompilationDescriptor
 
class QueryMemoryDescriptor
 
class QueryMemoryInitializer
 
class QueryFragmentDescriptor
 
class QueryExecutionContext
 
class ResultSet
 
class InValuesBitmap
 
class StringDictionaryTranslationMgr
 
class LeafAggregator
 
class PerfectJoinHashTable
 
class QueryRewriter
 
class PendingExecutionClosure
 
class RelAlgExecutor
 
class TableOptimizer
 
class TableFunctionCompilationContext
 
class TableFunctionExecutionContext
 
struct TargetExprCodegenBuilder
 
struct TargetExprCodegen
 
class WindowProjectNodeContext
 

Detailed Description

Definition at line 368 of file Execute.h.

Member Typedef Documentation

using Executor::CachedCardinality = std::pair<bool, size_t>

Definition at line 1209 of file Execute.h.

using Executor::ExecutorId = size_t

Definition at line 375 of file Execute.h.

Definition at line 739 of file Execute.h.

Member Enumeration Documentation

Enumerator
template_module 
udf_cpu_module 
udf_gpu_module 
rt_udf_cpu_module 
rt_udf_gpu_module 
rt_geos_module 
rt_libdevice_module 

Definition at line 469 of file Execute.h.

469  {
470  template_module, // RuntimeFunctions.bc
471  udf_cpu_module, // Load-time UDFs for CPU execution
472  udf_gpu_module, // Load-time UDFs for GPU execution
473  rt_udf_cpu_module, // Run-time UDF/UDTFs for CPU execution
474  rt_udf_gpu_module, // Run-time UDF/UDTFs for GPU execution
475  rt_geos_module, // geos functions
476  rt_libdevice_module // math library functions for GPU execution
477  };
std::unique_ptr< llvm::Module > udf_gpu_module
std::unique_ptr< llvm::Module > udf_cpu_module

Constructor & Destructor Documentation

Executor::Executor ( const ExecutorId  id,
Data_Namespace::DataMgr data_mgr,
const size_t  block_size_x,
const size_t  grid_size_x,
const size_t  max_gpu_slab_size,
const std::string &  debug_dir,
const std::string &  debug_file 
)

Definition at line 244 of file Execute.cpp.

251  : executor_id_(executor_id)
252  , context_(new llvm::LLVMContext())
253  , cgen_state_(new CgenState({}, false, this))
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1268
const ExecutorId executor_id_
Definition: Execute.h:1242
std::unique_ptr< llvm::LLVMContext > context_
Definition: Execute.h:1243

Member Function Documentation

ExecutorMutexHolder Executor::acquireExecuteMutex ( )
inlineprivate

Definition at line 1357 of file Execute.h.

References execute_mutex_, executor_id_, Executor::ExecutorMutexHolder::shared_lock, Executor::ExecutorMutexHolder::unique_lock, and UNITARY_EXECUTOR_ID.

1357  {
1358  ExecutorMutexHolder ret;
1360  // Only one unitary executor can run at a time
1362  } else {
1364  }
1365  return ret;
1366  }
static heavyai::shared_mutex execute_mutex_
Definition: Execute.h:1351
std::shared_lock< T > shared_lock
const ExecutorId executor_id_
Definition: Execute.h:1242
std::unique_lock< T > unique_lock
static const ExecutorId UNITARY_EXECUTOR_ID
Definition: Execute.h:376
std::tuple< RelAlgExecutionUnit, PlanState::DeletedColumnsMap > Executor::addDeletedColumn ( const RelAlgExecutionUnit ra_exe_unit,
const CompilationOptions co 
)
private

Definition at line 3958 of file Execute.cpp.

References anonymous_namespace{Execute.cpp}::add_deleted_col_to_map(), catalog_, CHECK, CompilationOptions::filter_on_deleted_column, Catalog_Namespace::Catalog::getDeletedColumnIfRowsDeleted(), Catalog_Namespace::Catalog::getMetadataForTable(), and TABLE.

Referenced by executeWorkUnitImpl(), and executeWorkUnitPerFragment().

3960  {
3961  if (!co.filter_on_deleted_column) {
3962  return std::make_tuple(ra_exe_unit, PlanState::DeletedColumnsMap{});
3963  }
3964  auto ra_exe_unit_with_deleted = ra_exe_unit;
3965  PlanState::DeletedColumnsMap deleted_cols_map;
3966  for (const auto& input_table : ra_exe_unit_with_deleted.input_descs) {
3967  if (input_table.getSourceType() != InputSourceType::TABLE) {
3968  continue;
3969  }
3970  const auto td = catalog_->getMetadataForTable(input_table.getTableId());
3971  CHECK(td);
3972  const auto deleted_cd = catalog_->getDeletedColumnIfRowsDeleted(td);
3973  if (!deleted_cd) {
3974  continue;
3975  }
3976  CHECK(deleted_cd->columnType.is_boolean());
3977  // check deleted column is not already present
3978  bool found = false;
3979  for (const auto& input_col : ra_exe_unit_with_deleted.input_col_descs) {
3980  if (input_col.get()->getColId() == deleted_cd->columnId &&
3981  input_col.get()->getScanDesc().getTableId() == deleted_cd->tableId &&
3982  input_col.get()->getScanDesc().getNestLevel() == input_table.getNestLevel()) {
3983  found = true;
3984  add_deleted_col_to_map(deleted_cols_map, deleted_cd);
3985  break;
3986  }
3987  }
3988  if (!found) {
3989  // add deleted column
3990  ra_exe_unit_with_deleted.input_col_descs.emplace_back(new InputColDescriptor(
3991  deleted_cd->columnId, deleted_cd->tableId, input_table.getNestLevel()));
3992  add_deleted_col_to_map(deleted_cols_map, deleted_cd);
3993  }
3994  }
3995  return std::make_tuple(ra_exe_unit_with_deleted, deleted_cols_map);
3996 }
const Catalog_Namespace::Catalog * catalog_
Definition: Execute.h:1323
const ColumnDescriptor * getDeletedColumnIfRowsDeleted(const TableDescriptor *td) const
Definition: Catalog.cpp:3679
std::unordered_map< TableId, const ColumnDescriptor * > DeletedColumnsMap
Definition: PlanState.h:44
#define CHECK(condition)
Definition: Logger.h:222
const TableDescriptor * getMetadataForTable(const std::string &tableName, const bool populateFragmenter=true) const
Returns a pointer to a const TableDescriptor struct matching the provided tableName.
void add_deleted_col_to_map(PlanState::DeletedColumnsMap &deleted_cols_map, const ColumnDescriptor *deleted_cd)
Definition: Execute.cpp:3946

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

llvm::Value * Executor::addJoinLoopIterator ( const std::vector< llvm::Value * > &  prev_iters,
const size_t  level_idx 
)
private

Definition at line 1094 of file IRCodegen.cpp.

References AUTOMATIC_IR_METADATA, and CHECK.

1095  {
1097  // Iterators are added for loop-outer joins when the head of the loop is generated,
1098  // then once again when the body if generated. Allow this instead of special handling
1099  // of call sites.
1100  const auto it = cgen_state_->scan_idx_to_hash_pos_.find(level_idx);
1101  if (it != cgen_state_->scan_idx_to_hash_pos_.end()) {
1102  return it->second;
1103  }
1104  CHECK(!prev_iters.empty());
1105  llvm::Value* matching_row_index = prev_iters.back();
1106  const auto it_ok =
1107  cgen_state_->scan_idx_to_hash_pos_.emplace(level_idx, matching_row_index);
1108  CHECK(it_ok.second);
1109  return matching_row_index;
1110 }
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1268
#define AUTOMATIC_IR_METADATA(CGENSTATE)
#define CHECK(condition)
Definition: Logger.h:222
void Executor::addToCardinalityCache ( const std::string &  cache_key,
const size_t  cache_value 
)

Definition at line 4760 of file Execute.cpp.

References cardinality_cache_, g_use_estimator_result_cache, recycler_mutex_, and VLOG.

4761  {
4764  cardinality_cache_[cache_key] = cache_value;
4765  VLOG(1) << "Put estimated cardinality to the cache";
4766  }
4767 }
static std::unordered_map< std::string, size_t > cardinality_cache_
Definition: Execute.h:1372
std::unique_lock< T > unique_lock
static heavyai::shared_mutex recycler_mutex_
Definition: Execute.h:1371
bool g_use_estimator_result_cache
Definition: Execute.cpp:127
#define VLOG(n)
Definition: Logger.h:316
bool Executor::addToQuerySessionList ( const QuerySessionId query_session,
const std::string &  query_str,
const std::string &  submitted,
const size_t  executor_id,
const QuerySessionStatus::QueryStatus  query_status,
heavyai::unique_lock< heavyai::shared_mutex > &  write_lock 
)

Definition at line 4591 of file Execute.cpp.

References queries_interrupt_flag_, and queries_session_map_.

Referenced by enrollQuerySession().

4597  {
4598  // an internal API that enrolls the query session into the Executor's session map
4599  if (queries_session_map_.count(query_session)) {
4600  if (queries_session_map_.at(query_session).count(submitted_time_str)) {
4601  queries_session_map_.at(query_session).erase(submitted_time_str);
4602  queries_session_map_.at(query_session)
4603  .emplace(submitted_time_str,
4604  QuerySessionStatus(query_session,
4605  executor_id,
4606  query_str,
4607  submitted_time_str,
4608  query_status));
4609  } else {
4610  queries_session_map_.at(query_session)
4611  .emplace(submitted_time_str,
4612  QuerySessionStatus(query_session,
4613  executor_id,
4614  query_str,
4615  submitted_time_str,
4616  query_status));
4617  }
4618  } else {
4619  std::map<std::string, QuerySessionStatus> executor_per_query_map;
4620  executor_per_query_map.emplace(
4621  submitted_time_str,
4623  query_session, executor_id, query_str, submitted_time_str, query_status));
4624  queries_session_map_.emplace(query_session, executor_per_query_map);
4625  }
4626  return queries_interrupt_flag_.emplace(query_session, false).second;
4627 }
static QuerySessionMap queries_session_map_
Definition: Execute.h:1346
static InterruptFlagMap queries_interrupt_flag_
Definition: Execute.h:1344

+ Here is the caller graph for this function:

void Executor::addTransientStringLiterals ( const RelAlgExecutionUnit ra_exe_unit,
const std::shared_ptr< RowSetMemoryOwner > &  row_set_mem_owner 
)

Definition at line 2163 of file Execute.cpp.

References CHECK, getStringDictionaryProxy(), RelAlgExecutionUnit::groupby_exprs, kENCODING_DICT, kSAMPLE, kSINGLE_VALUE, RelAlgExecutionUnit::quals, RelAlgExecutionUnit::simple_quals, RelAlgExecutionUnit::target_exprs, RelAlgExecutionUnit::target_exprs_union, and ScalarExprVisitor< T >::visit().

2165  {
2166  TransientDictIdVisitor dict_id_visitor;
2167 
2168  auto visit_expr =
2169  [this, &dict_id_visitor, &row_set_mem_owner](const Analyzer::Expr* expr) {
2170  if (!expr) {
2171  return;
2172  }
2173  const auto dict_id = dict_id_visitor.visit(expr);
2174  if (dict_id >= 0) {
2175  auto sdp = getStringDictionaryProxy(dict_id, row_set_mem_owner, true);
2176  CHECK(sdp);
2177  TransientStringLiteralsVisitor visitor(sdp, this);
2178  visitor.visit(expr);
2179  }
2180  };
2181 
2182  for (const auto& group_expr : ra_exe_unit.groupby_exprs) {
2183  visit_expr(group_expr.get());
2184  }
2185 
2186  for (const auto& group_expr : ra_exe_unit.quals) {
2187  visit_expr(group_expr.get());
2188  }
2189 
2190  for (const auto& group_expr : ra_exe_unit.simple_quals) {
2191  visit_expr(group_expr.get());
2192  }
2193 
2194  const auto visit_target_expr = [&](const Analyzer::Expr* target_expr) {
2195  const auto& target_type = target_expr->get_type_info();
2196  if (!target_type.is_string() || target_type.get_compression() == kENCODING_DICT) {
2197  const auto agg_expr = dynamic_cast<const Analyzer::AggExpr*>(target_expr);
2198  if (agg_expr) {
2199  if (agg_expr->get_aggtype() == kSINGLE_VALUE ||
2200  agg_expr->get_aggtype() == kSAMPLE) {
2201  visit_expr(agg_expr->get_arg());
2202  }
2203  } else {
2204  visit_expr(target_expr);
2205  }
2206  }
2207  };
2208  const auto& target_exprs = ra_exe_unit.target_exprs;
2209  std::for_each(target_exprs.begin(), target_exprs.end(), visit_target_expr);
2210  const auto& target_exprs_union = ra_exe_unit.target_exprs_union;
2211  std::for_each(target_exprs_union.begin(), target_exprs_union.end(), visit_target_expr);
2212 }
std::vector< Analyzer::Expr * > target_exprs
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
T visit(const Analyzer::Expr *expr) const
std::vector< Analyzer::Expr * > target_exprs_union
StringDictionaryProxy * getStringDictionaryProxy(const int dict_id, const bool with_generation) const
Definition: Execute.h:529
std::list< std::shared_ptr< Analyzer::Expr > > quals
#define CHECK(condition)
Definition: Logger.h:222
std::list< std::shared_ptr< Analyzer::Expr > > simple_quals

+ Here is the call graph for this function:

void Executor::addUdfIrToModule ( const std::string &  udf_ir_filename,
const bool  is_cuda_ir 
)
static

Definition at line 1857 of file NativeCodegen.cpp.

Referenced by DBHandler::initialize().

1858  {
1862  udf_ir_filename;
1863 }
static std::map< ExtModuleKinds, std::string > extension_module_sources
Definition: Execute.h:479

+ Here is the caller graph for this function:

llvm::Value * Executor::aggregateWindowStatePtr ( )
private

Definition at line 211 of file WindowFunctionIR.cpp.

References AUTOMATIC_IR_METADATA, anonymous_namespace{WindowFunctionIR.cpp}::get_adjusted_window_type_info(), get_int_type(), WindowProjectNodeContext::getActiveWindowFunctionContext(), and kFLOAT.

211  {
213  const auto window_func_context =
215  const auto window_func = window_func_context->getWindowFunction();
216  const auto arg_ti = get_adjusted_window_type_info(window_func);
217  llvm::Type* aggregate_state_type =
218  arg_ti.get_type() == kFLOAT
219  ? llvm::PointerType::get(get_int_type(32, cgen_state_->context_), 0)
220  : llvm::PointerType::get(get_int_type(64, cgen_state_->context_), 0);
221  const auto aggregate_state_i64 = cgen_state_->llInt(
222  reinterpret_cast<const int64_t>(window_func_context->aggregateState()));
223  return cgen_state_->ir_builder_.CreateIntToPtr(aggregate_state_i64,
224  aggregate_state_type);
225 }
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1268
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
static WindowFunctionContext * getActiveWindowFunctionContext(Executor *executor)
#define AUTOMATIC_IR_METADATA(CGENSTATE)
std::unique_ptr< llvm::LLVMContext > context_
Definition: Execute.h:1243
SQLTypeInfo get_adjusted_window_type_info(const Analyzer::WindowFunction *window_func)

+ Here is the call graph for this function:

static size_t Executor::align ( const size_t  off_in,
const size_t  alignment 
)
inlinestaticprivate

Definition at line 1234 of file Execute.h.

Referenced by serializeLiterals().

1234  {
1235  size_t off = off_in;
1236  if (off % alignment != 0) {
1237  off += (alignment - off % alignment);
1238  }
1239  return off;
1240  }

+ Here is the caller graph for this function:

CurrentQueryStatus Executor::attachExecutorToQuerySession ( const QuerySessionId query_session_id,
const std::string &  query_str,
const std::string &  query_submitted_time 
)

Definition at line 4489 of file Execute.cpp.

References executor_id_, executor_session_mutex_, updateQuerySessionExecutorAssignment(), and updateQuerySessionStatusWithLock().

4492  {
4493  if (!query_session_id.empty()) {
4494  // if session is valid, do update 1) the exact executor id and 2) query status
4497  query_session_id, query_submitted_time, executor_id_, write_lock);
4498  updateQuerySessionStatusWithLock(query_session_id,
4499  query_submitted_time,
4500  QuerySessionStatus::QueryStatus::PENDING_EXECUTOR,
4501  write_lock);
4502  }
4503  return {query_session_id, query_str};
4504 }
heavyai::unique_lock< heavyai::shared_mutex > write_lock
bool updateQuerySessionStatusWithLock(const QuerySessionId &query_session, const std::string &submitted_time_str, const QuerySessionStatus::QueryStatus updated_query_status, heavyai::unique_lock< heavyai::shared_mutex > &write_lock)
Definition: Execute.cpp:4629
const ExecutorId executor_id_
Definition: Execute.h:1242
bool updateQuerySessionExecutorAssignment(const QuerySessionId &query_session, const std::string &submitted_time_str, const size_t executor_id, heavyai::unique_lock< heavyai::shared_mutex > &write_lock)
Definition: Execute.cpp:4655
std::unique_lock< T > unique_lock
static heavyai::shared_mutex executor_session_mutex_
Definition: Execute.h:1340

+ Here is the call graph for this function:

void Executor::AutoTrackBuffersInRuntimeIR ( )
private

Definition at line 2200 of file NativeCodegen.cpp.

References CHECK.

2200  {
2201  llvm::Module* M = cgen_state_->module_;
2202  if (M->getFunction("allocate_varlen_buffer") == nullptr)
2203  return;
2204 
2205  // read metadata
2206  bool should_track = false;
2207  auto* flag = M->getModuleFlag("manage_memory_buffer");
2208  if (auto* cnt = llvm::mdconst::extract_or_null<llvm::ConstantInt>(flag)) {
2209  if (cnt->getZExtValue() == 1) {
2210  should_track = true;
2211  }
2212  }
2213 
2214  if (!should_track) {
2215  // metadata is not present
2216  return;
2217  }
2218 
2219  LOG(INFO) << "Found 'manage_memory_buffer' metadata.";
2220  llvm::SmallVector<llvm::CallInst*, 4> calls_to_analyze;
2221 
2222  for (llvm::Function& F : *M) {
2223  for (llvm::BasicBlock& BB : F) {
2224  for (llvm::Instruction& I : BB) {
2225  if (llvm::CallInst* CI = llvm::dyn_cast<llvm::CallInst>(&I)) {
2226  // Keep track of calls to "allocate_varlen_buffer" for later processing
2227  llvm::Function* called = CI->getCalledFunction();
2228  if (called) {
2229  if (called->getName() == "allocate_varlen_buffer") {
2230  calls_to_analyze.push_back(CI);
2231  }
2232  }
2233  }
2234  }
2235  }
2236  }
2237 
2238  // for each call to "allocate_varlen_buffer", check if there's a corresponding
2239  // call to "register_buffer_with_executor_rsm". If not, add a call to it
2240  llvm::IRBuilder<> Builder(cgen_state_->context_);
2241  auto i64 = get_int_type(64, cgen_state_->context_);
2242  auto i8p = get_int_ptr_type(8, cgen_state_->context_);
2243  auto void_ = llvm::Type::getVoidTy(cgen_state_->context_);
2244  llvm::FunctionType* fnty = llvm::FunctionType::get(void_, {i64, i8p}, false);
2245  llvm::FunctionCallee register_buffer_fn =
2246  M->getOrInsertFunction("register_buffer_with_executor_rsm", fnty, {});
2247 
2248  int64_t executor_addr = reinterpret_cast<int64_t>(this);
2249  for (llvm::CallInst* CI : calls_to_analyze) {
2250  bool found = false;
2251  // for each user of the function, check if its a callinst
2252  // and if the callinst is calling "register_buffer_with_executor_rsm"
2253  // if no such instruction exist, add one registering the buffer
2254  for (llvm::User* U : CI->users()) {
2255  if (llvm::CallInst* call = llvm::dyn_cast<llvm::CallInst>(U)) {
2256  if (call->getCalledFunction() and
2257  call->getCalledFunction()->getName() == "register_buffer_with_executor_rsm") {
2258  found = true;
2259  break;
2260  }
2261  }
2262  }
2263  if (!found) {
2264  Builder.SetInsertPoint(CI->getNextNode());
2265  Builder.CreateCall(register_buffer_fn,
2266  {ll_int(executor_addr, cgen_state_->context_), CI});
2267  }
2268  }
2269 }
#define LOG(tag)
Definition: Logger.h:216
llvm::ConstantInt * ll_int(const T v, llvm::LLVMContext &context)
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1268
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
llvm::Type * get_int_ptr_type(const int width, llvm::LLVMContext &context)
unsigned Executor::blockSize ( ) const

Definition at line 3850 of file Execute.cpp.

References block_size_x_, CHECK, data_mgr_, CudaMgr_Namespace::CudaMgr::getAllDeviceProperties(), and Data_Namespace::DataMgr::getCudaMgr().

Referenced by collectAllDeviceShardedTopResults(), executePlanWithGroupBy(), executePlanWithoutGroupBy(), executeTableFunction(), executeWorkUnitImpl(), reduceMultiDeviceResults(), reduceMultiDeviceResultSets(), and resultsUnion().

3850  {
3851  CHECK(data_mgr_);
3852  const auto cuda_mgr = data_mgr_->getCudaMgr();
3853  if (!cuda_mgr) {
3854  return 0;
3855  }
3856  const auto& dev_props = cuda_mgr->getAllDeviceProperties();
3857  return block_size_x_ ? block_size_x_ : dev_props.front().maxThreadsPerBlock;
3858 }
CudaMgr_Namespace::CudaMgr * getCudaMgr() const
Definition: DataMgr.h:224
Data_Namespace::DataMgr * data_mgr_
Definition: Execute.h:1324
unsigned block_size_x_
Definition: Execute.h:1317
#define CHECK(condition)
Definition: Logger.h:222
const std::vector< DeviceProperties > & getAllDeviceProperties() const
Definition: CudaMgr.h:128

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::shared_ptr< HashJoin > Executor::buildCurrentLevelHashTable ( const JoinCondition current_level_join_conditions,
size_t  level_idx,
RelAlgExecutionUnit ra_exe_unit,
const CompilationOptions co,
const std::vector< InputTableInfo > &  query_infos,
ColumnCacheMap column_cache,
std::vector< std::string > &  fail_reasons 
)
private

Definition at line 935 of file IRCodegen.cpp.

References anonymous_namespace{IRCodegen.cpp}::add_qualifier_to_execution_unit(), AUTOMATIC_IR_METADATA, anonymous_namespace{IRCodegen.cpp}::check_valid_join_qual(), Data_Namespace::CPU_LEVEL, CompilationOptions::device_type, Executor::JoinHashTableOrError::fail_reason, GPU, Data_Namespace::GPU_LEVEL, Executor::JoinHashTableOrError::hash_table, RelAlgExecutionUnit::hash_table_build_plan_dag, IS_EQUIVALENCE, LEFT, OneToOne, JoinCondition::quals, RelAlgExecutionUnit::query_hint, RelAlgExecutionUnit::table_id_to_node_map, JoinCondition::type, and VLOG.

942  {
944  std::shared_ptr<HashJoin> current_level_hash_table;
945  auto handleNonHashtableQual = [&ra_exe_unit, &level_idx, this](
946  JoinType join_type,
947  std::shared_ptr<Analyzer::Expr> qual) {
948  if (join_type == JoinType::LEFT) {
949  plan_state_->addNonHashtableQualForLeftJoin(level_idx, qual);
950  } else {
951  add_qualifier_to_execution_unit(ra_exe_unit, qual);
952  }
953  };
954  for (const auto& join_qual : current_level_join_conditions.quals) {
955  auto qual_bin_oper = std::dynamic_pointer_cast<Analyzer::BinOper>(join_qual);
956  if (current_level_hash_table || !qual_bin_oper ||
957  !IS_EQUIVALENCE(qual_bin_oper->get_optype())) {
958  handleNonHashtableQual(current_level_join_conditions.type, join_qual);
959  if (!current_level_hash_table) {
960  fail_reasons.emplace_back("No equijoin expression found");
961  }
962  continue;
963  }
964  check_valid_join_qual(qual_bin_oper);
965  JoinHashTableOrError hash_table_or_error;
966  if (!current_level_hash_table) {
967  hash_table_or_error = buildHashTableForQualifier(
968  qual_bin_oper,
969  query_infos,
972  current_level_join_conditions.type,
974  column_cache,
975  ra_exe_unit.hash_table_build_plan_dag,
976  ra_exe_unit.query_hint,
977  ra_exe_unit.table_id_to_node_map);
978  current_level_hash_table = hash_table_or_error.hash_table;
979  }
980  if (hash_table_or_error.hash_table) {
981  plan_state_->join_info_.join_hash_tables_.push_back(hash_table_or_error.hash_table);
982  plan_state_->join_info_.equi_join_tautologies_.push_back(qual_bin_oper);
983  } else {
984  fail_reasons.push_back(hash_table_or_error.fail_reason);
985  if (!current_level_hash_table) {
986  VLOG(2) << "Building a hashtable based on a qual " << qual_bin_oper->toString()
987  << " fails: " << hash_table_or_error.fail_reason;
988  }
989  handleNonHashtableQual(current_level_join_conditions.type, qual_bin_oper);
990  }
991  }
992  return current_level_hash_table;
993 }
JoinType
Definition: sqldefs.h:157
#define IS_EQUIVALENCE(X)
Definition: sqldefs.h:68
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1268
TableIdToNodeMap table_id_to_node_map
std::unique_ptr< PlanState > plan_state_
Definition: Execute.h:1298
void add_qualifier_to_execution_unit(RelAlgExecutionUnit &ra_exe_unit, const std::shared_ptr< Analyzer::Expr > &qual)
Definition: IRCodegen.cpp:474
#define AUTOMATIC_IR_METADATA(CGENSTATE)
ExecutorDeviceType device_type
JoinHashTableOrError buildHashTableForQualifier(const std::shared_ptr< Analyzer::BinOper > &qual_bin_oper, const std::vector< InputTableInfo > &query_infos, const MemoryLevel memory_level, const JoinType join_type, const HashType preferred_hash_type, ColumnCacheMap &column_cache, const HashTableBuildDagMap &hashtable_build_dag_map, const RegisteredQueryHint &query_hint, const TableIdToNodeMap &table_id_to_node_map)
Definition: Execute.cpp:3794
std::list< std::shared_ptr< Analyzer::Expr > > quals
RegisteredQueryHint query_hint
#define VLOG(n)
Definition: Logger.h:316
HashTableBuildDagMap hash_table_build_plan_dag
void check_valid_join_qual(std::shared_ptr< Analyzer::BinOper > &bin_oper)
Definition: IRCodegen.cpp:504

+ Here is the call graph for this function:

Executor::JoinHashTableOrError Executor::buildHashTableForQualifier ( const std::shared_ptr< Analyzer::BinOper > &  qual_bin_oper,
const std::vector< InputTableInfo > &  query_infos,
const MemoryLevel  memory_level,
const JoinType  join_type,
const HashType  preferred_hash_type,
ColumnCacheMap column_cache,
const HashTableBuildDagMap hashtable_build_dag_map,
const RegisteredQueryHint query_hint,
const TableIdToNodeMap table_id_to_node_map 
)
private

Definition at line 3794 of file Execute.cpp.

References deviceCountForMemoryLevel(), ERR_INTERRUPTED, g_enable_dynamic_watchdog, g_enable_overlaps_hashjoin, HashJoin::getInstance(), and interrupted_.

3803  {
3804  if (!g_enable_overlaps_hashjoin && qual_bin_oper->is_overlaps_oper()) {
3805  return {nullptr, "Overlaps hash join disabled, attempting to fall back to loop join"};
3806  }
3807  if (g_enable_dynamic_watchdog && interrupted_.load()) {
3809  }
3810  try {
3811  auto tbl = HashJoin::getInstance(qual_bin_oper,
3812  query_infos,
3813  memory_level,
3814  join_type,
3815  preferred_hash_type,
3816  deviceCountForMemoryLevel(memory_level),
3817  column_cache,
3818  this,
3819  hashtable_build_dag_map,
3820  query_hint,
3821  table_id_to_node_map);
3822  return {tbl, ""};
3823  } catch (const HashJoinFail& e) {
3824  return {nullptr, e.what()};
3825  }
3826 }
static const int32_t ERR_INTERRUPTED
Definition: Execute.h:1388
std::atomic< bool > interrupted_
Definition: Execute.h:1308
bool g_enable_dynamic_watchdog
Definition: Execute.cpp:80
bool g_enable_overlaps_hashjoin
Definition: Execute.cpp:102
int deviceCountForMemoryLevel(const Data_Namespace::MemoryLevel memory_level) const
Definition: Execute.cpp:1064
static std::shared_ptr< HashJoin > getInstance(const std::shared_ptr< Analyzer::BinOper > qual_bin_oper, const std::vector< InputTableInfo > &query_infos, const Data_Namespace::MemoryLevel memory_level, const JoinType join_type, const HashType preferred_hash_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor, const HashTableBuildDagMap &hashtable_build_dag_map, const RegisteredQueryHint &query_hint, const TableIdToNodeMap &table_id_to_node_map)
Make hash table from an in-flight SQL query&#39;s parse tree etc.
Definition: HashJoin.cpp:283

+ Here is the call graph for this function:

JoinLoop::HoistedFiltersCallback Executor::buildHoistLeftHandSideFiltersCb ( const RelAlgExecutionUnit ra_exe_unit,
const size_t  level_idx,
const int  inner_table_id,
const CompilationOptions co 
)
private

Definition at line 768 of file IRCodegen.cpp.

References AUTOMATIC_IR_METADATA, CHECK, CodeGenerator::codegen(), g_enable_left_join_filter_hoisting, RelAlgExecutionUnit::join_quals, LEFT, RelAlgExecutionUnit::quals, RelAlgExecutionUnit::simple_quals, CodeGenerator::toBool(), and VLOG.

772  {
774  return nullptr;
775  }
776 
777  const auto& current_level_join_conditions = ra_exe_unit.join_quals[level_idx];
778  if (level_idx == 0 && current_level_join_conditions.type == JoinType::LEFT) {
779  const auto& condition = current_level_join_conditions.quals.front();
780  const auto bin_oper = dynamic_cast<const Analyzer::BinOper*>(condition.get());
781  CHECK(bin_oper) << condition->toString();
782  const auto rhs =
783  dynamic_cast<const Analyzer::ColumnVar*>(bin_oper->get_right_operand());
784  const auto lhs =
785  dynamic_cast<const Analyzer::ColumnVar*>(bin_oper->get_left_operand());
786  if (lhs && rhs && lhs->get_table_id() != rhs->get_table_id()) {
787  const Analyzer::ColumnVar* selected_lhs{nullptr};
788  // grab the left hand side column -- this is somewhat similar to normalize column
789  // pair, and a better solution may be to hoist that function out of the join
790  // framework and normalize columns at the top of build join loops
791  if (lhs->get_table_id() == inner_table_id) {
792  selected_lhs = rhs;
793  } else if (rhs->get_table_id() == inner_table_id) {
794  selected_lhs = lhs;
795  }
796  if (selected_lhs) {
797  std::list<std::shared_ptr<Analyzer::Expr>> hoisted_quals;
798  // get all LHS-only filters
799  auto should_hoist_qual = [&hoisted_quals](const auto& qual, const int table_id) {
800  CHECK(qual);
801 
802  ExprTableIdVisitor visitor;
803  const auto table_ids = visitor.visit(qual.get());
804  if (table_ids.size() == 1 && table_ids.find(table_id) != table_ids.end()) {
805  hoisted_quals.push_back(qual);
806  }
807  };
808  for (const auto& qual : ra_exe_unit.simple_quals) {
809  should_hoist_qual(qual, selected_lhs->get_table_id());
810  }
811  for (const auto& qual : ra_exe_unit.quals) {
812  should_hoist_qual(qual, selected_lhs->get_table_id());
813  }
814 
815  // build the filters callback and return it
816  if (!hoisted_quals.empty()) {
817  return [this, hoisted_quals, co](llvm::BasicBlock* true_bb,
818  llvm::BasicBlock* exit_bb,
819  const std::string& loop_name,
820  llvm::Function* parent_func,
821  CgenState* cgen_state) -> llvm::BasicBlock* {
822  // make sure we have quals to hoist
823  bool has_quals_to_hoist = false;
824  for (const auto& qual : hoisted_quals) {
825  // check to see if the filter was previously hoisted. if all filters were
826  // previously hoisted, this callback becomes a noop
827  if (plan_state_->hoisted_filters_.count(qual) == 0) {
828  has_quals_to_hoist = true;
829  break;
830  }
831  }
832 
833  if (!has_quals_to_hoist) {
834  return nullptr;
835  }
836 
837  AUTOMATIC_IR_METADATA(cgen_state);
838 
839  llvm::IRBuilder<>& builder = cgen_state->ir_builder_;
840  auto& context = builder.getContext();
841 
842  const auto filter_bb =
843  llvm::BasicBlock::Create(context,
844  "hoisted_left_join_filters_" + loop_name,
845  parent_func,
846  /*insert_before=*/true_bb);
847  builder.SetInsertPoint(filter_bb);
848 
849  llvm::Value* filter_lv = cgen_state_->llBool(true);
850  CodeGenerator code_generator(this);
852  for (const auto& qual : hoisted_quals) {
853  if (plan_state_->hoisted_filters_.insert(qual).second) {
854  // qual was inserted into the hoisted filters map, which means we have not
855  // seen this qual before. Generate filter.
856  VLOG(1) << "Generating code for hoisted left hand side qualifier "
857  << qual->toString();
858  auto cond = code_generator.toBool(
859  code_generator.codegen(qual.get(), true, co).front());
860  filter_lv = builder.CreateAnd(filter_lv, cond);
861  }
862  }
863  CHECK(filter_lv->getType()->isIntegerTy(1));
864 
865  builder.CreateCondBr(filter_lv, true_bb, exit_bb);
866  return filter_bb;
867  };
868  }
869  }
870  }
871  }
872  return nullptr;
873 }
bool g_enable_left_join_filter_hoisting
Definition: Execute.cpp:100
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1268
const JoinQualsPerNestingLevel join_quals
std::unique_ptr< PlanState > plan_state_
Definition: Execute.h:1298
#define AUTOMATIC_IR_METADATA(CGENSTATE)
std::list< std::shared_ptr< Analyzer::Expr > > quals
#define CHECK(condition)
Definition: Logger.h:222
#define VLOG(n)
Definition: Logger.h:316
std::list< std::shared_ptr< Analyzer::Expr > > simple_quals

+ Here is the call graph for this function:

std::function< llvm::Value *(const std::vector< llvm::Value * > &, llvm::Value *)> Executor::buildIsDeletedCb ( const RelAlgExecutionUnit ra_exe_unit,
const size_t  level_idx,
const CompilationOptions co 
)
private

Definition at line 876 of file IRCodegen.cpp.

References AUTOMATIC_IR_METADATA, CHECK, CHECK_LT, CodeGenerator::codegen(), CompilationOptions::filter_on_deleted_column, RelAlgExecutionUnit::input_descs, TABLE, and CodeGenerator::toBool().

878  {
880  if (!co.filter_on_deleted_column) {
881  return nullptr;
882  }
883  CHECK_LT(level_idx + 1, ra_exe_unit.input_descs.size());
884  const auto input_desc = ra_exe_unit.input_descs[level_idx + 1];
885  if (input_desc.getSourceType() != InputSourceType::TABLE) {
886  return nullptr;
887  }
888 
889  const auto deleted_cd = plan_state_->getDeletedColForTable(input_desc.getTableId());
890  if (!deleted_cd) {
891  return nullptr;
892  }
893  CHECK(deleted_cd->columnType.is_boolean());
894  const auto deleted_expr = makeExpr<Analyzer::ColumnVar>(deleted_cd->columnType,
895  input_desc.getTableId(),
896  deleted_cd->columnId,
897  input_desc.getNestLevel());
898  return [this, deleted_expr, level_idx, &co](const std::vector<llvm::Value*>& prev_iters,
899  llvm::Value* have_more_inner_rows) {
900  const auto matching_row_index = addJoinLoopIterator(prev_iters, level_idx + 1);
901  // Avoid fetching the deleted column from a position which is not valid.
902  // An invalid position can be returned by a one to one hash lookup (negative)
903  // or at the end of iteration over a set of matching values.
904  llvm::Value* is_valid_it{nullptr};
905  if (have_more_inner_rows) {
906  is_valid_it = have_more_inner_rows;
907  } else {
908  is_valid_it = cgen_state_->ir_builder_.CreateICmp(
909  llvm::ICmpInst::ICMP_SGE, matching_row_index, cgen_state_->llInt<int64_t>(0));
910  }
911  const auto it_valid_bb = llvm::BasicBlock::Create(
912  cgen_state_->context_, "it_valid", cgen_state_->current_func_);
913  const auto it_not_valid_bb = llvm::BasicBlock::Create(
914  cgen_state_->context_, "it_not_valid", cgen_state_->current_func_);
915  cgen_state_->ir_builder_.CreateCondBr(is_valid_it, it_valid_bb, it_not_valid_bb);
916  const auto row_is_deleted_bb = llvm::BasicBlock::Create(
917  cgen_state_->context_, "row_is_deleted", cgen_state_->current_func_);
918  cgen_state_->ir_builder_.SetInsertPoint(it_valid_bb);
919  CodeGenerator code_generator(this);
920  const auto row_is_deleted = code_generator.toBool(
921  code_generator.codegen(deleted_expr.get(), true, co).front());
922  cgen_state_->ir_builder_.CreateBr(row_is_deleted_bb);
923  cgen_state_->ir_builder_.SetInsertPoint(it_not_valid_bb);
924  const auto row_is_deleted_default = cgen_state_->llBool(false);
925  cgen_state_->ir_builder_.CreateBr(row_is_deleted_bb);
926  cgen_state_->ir_builder_.SetInsertPoint(row_is_deleted_bb);
927  auto row_is_deleted_or_default =
928  cgen_state_->ir_builder_.CreatePHI(row_is_deleted->getType(), 2);
929  row_is_deleted_or_default->addIncoming(row_is_deleted, it_valid_bb);
930  row_is_deleted_or_default->addIncoming(row_is_deleted_default, it_not_valid_bb);
931  return row_is_deleted_or_default;
932  };
933 }
std::vector< InputDescriptor > input_descs
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1268
std::unique_ptr< PlanState > plan_state_
Definition: Execute.h:1298
#define AUTOMATIC_IR_METADATA(CGENSTATE)
#define CHECK_LT(x, y)
Definition: Logger.h:232
llvm::Value * addJoinLoopIterator(const std::vector< llvm::Value * > &prev_iters, const size_t level_idx)
Definition: IRCodegen.cpp:1094
#define CHECK(condition)
Definition: Logger.h:222

+ Here is the call graph for this function:

std::vector< JoinLoop > Executor::buildJoinLoops ( RelAlgExecutionUnit ra_exe_unit,
const CompilationOptions co,
const ExecutionOptions eo,
const std::vector< InputTableInfo > &  query_infos,
ColumnCacheMap column_cache 
)
private

Definition at line 523 of file IRCodegen.cpp.

References AUTOMATIC_IR_METADATA, CodeGenerator::cgen_state_, CHECK, CHECK_LT, CodeGenerator::codegen(), INJECT_TIMER, CgenState::ir_builder_, RelAlgExecutionUnit::join_quals, LEFT, PlanState::left_join_non_hashtable_quals_, CgenState::llBool(), MultiSet, OneToOne, CgenState::outer_join_match_found_per_level_, CodeGenerator::plan_state_, Set, Singleton, JoinLoopDomain::slot_lookup_result, CodeGenerator::toBool(), and JoinLoopDomain::values_buffer.

528  {
531  std::vector<JoinLoop> join_loops;
532  for (size_t level_idx = 0, current_hash_table_idx = 0;
533  level_idx < ra_exe_unit.join_quals.size();
534  ++level_idx) {
535  const auto& current_level_join_conditions = ra_exe_unit.join_quals[level_idx];
536  std::vector<std::string> fail_reasons;
537  const auto current_level_hash_table =
538  buildCurrentLevelHashTable(current_level_join_conditions,
539  level_idx,
540  ra_exe_unit,
541  co,
542  query_infos,
543  column_cache,
544  fail_reasons);
545  const auto found_outer_join_matches_cb =
546  [this, level_idx](llvm::Value* found_outer_join_matches) {
547  CHECK_LT(level_idx, cgen_state_->outer_join_match_found_per_level_.size());
548  CHECK(!cgen_state_->outer_join_match_found_per_level_[level_idx]);
549  cgen_state_->outer_join_match_found_per_level_[level_idx] =
550  found_outer_join_matches;
551  };
552  const auto is_deleted_cb = buildIsDeletedCb(ra_exe_unit, level_idx, co);
553  auto rem_left_join_quals_it =
554  plan_state_->left_join_non_hashtable_quals_.find(level_idx);
555  bool has_remaining_left_join_quals =
556  rem_left_join_quals_it != plan_state_->left_join_non_hashtable_quals_.end() &&
557  !rem_left_join_quals_it->second.empty();
558  const auto outer_join_condition_remaining_quals_cb =
559  [this, level_idx, &co](const std::vector<llvm::Value*>& prev_iters) {
560  // when we have multiple quals for the left join in the current join level
561  // we first try to build a hashtable by using one of the possible qual,
562  // and deal with remaining quals as extra join conditions
563  FetchCacheAnchor anchor(cgen_state_.get());
564  addJoinLoopIterator(prev_iters, level_idx + 1);
565  llvm::Value* left_join_cond = cgen_state_->llBool(true);
566  CodeGenerator code_generator(this);
567  auto it = plan_state_->left_join_non_hashtable_quals_.find(level_idx);
568  if (it != plan_state_->left_join_non_hashtable_quals_.end()) {
569  for (auto expr : it->second) {
570  left_join_cond = cgen_state_->ir_builder_.CreateAnd(
571  left_join_cond,
572  code_generator.toBool(
573  code_generator.codegen(expr.get(), true, co).front()));
574  }
575  }
576  return left_join_cond;
577  };
578  if (current_level_hash_table) {
579  const auto hoisted_filters_cb = buildHoistLeftHandSideFiltersCb(
580  ra_exe_unit, level_idx, current_level_hash_table->getInnerTableId(), co);
581  if (current_level_hash_table->getHashType() == HashType::OneToOne) {
582  join_loops.emplace_back(
583  /*kind=*/JoinLoopKind::Singleton,
584  /*type=*/current_level_join_conditions.type,
585  /*iteration_domain_codegen=*/
586  [this, current_hash_table_idx, level_idx, current_level_hash_table, &co](
587  const std::vector<llvm::Value*>& prev_iters) {
588  addJoinLoopIterator(prev_iters, level_idx);
589  JoinLoopDomain domain{{0}};
590  domain.slot_lookup_result =
591  current_level_hash_table->codegenSlot(co, current_hash_table_idx);
592  return domain;
593  },
594  /*outer_condition_match=*/
595  current_level_join_conditions.type == JoinType::LEFT &&
596  has_remaining_left_join_quals
597  ? std::function<llvm::Value*(const std::vector<llvm::Value*>&)>(
598  outer_join_condition_remaining_quals_cb)
599  : nullptr,
600  /*found_outer_matches=*/current_level_join_conditions.type == JoinType::LEFT
601  ? std::function<void(llvm::Value*)>(found_outer_join_matches_cb)
602  : nullptr,
603  /*hoisted_filters=*/hoisted_filters_cb,
604  /*is_deleted=*/is_deleted_cb,
605  /*nested_loop_join=*/false);
606  } else if (auto range_join_table =
607  dynamic_cast<RangeJoinHashTable*>(current_level_hash_table.get())) {
608  join_loops.emplace_back(
609  /* kind= */ JoinLoopKind::MultiSet,
610  /* type= */ current_level_join_conditions.type,
611  /* iteration_domain_codegen= */
612  [this,
613  range_join_table,
614  current_hash_table_idx,
615  level_idx,
616  current_level_hash_table,
617  &co](const std::vector<llvm::Value*>& prev_iters) {
618  addJoinLoopIterator(prev_iters, level_idx);
619  JoinLoopDomain domain{{0}};
620  CHECK(!prev_iters.empty());
621  const auto matching_set = range_join_table->codegenMatchingSetWithOffset(
622  co, current_hash_table_idx, prev_iters.back());
623  domain.values_buffer = matching_set.elements;
624  domain.element_count = matching_set.count;
625  return domain;
626  },
627  /* outer_condition_match= */
628  current_level_join_conditions.type == JoinType::LEFT
629  ? std::function<llvm::Value*(const std::vector<llvm::Value*>&)>(
630  outer_join_condition_remaining_quals_cb)
631  : nullptr,
632  /* found_outer_matches= */
633  current_level_join_conditions.type == JoinType::LEFT
634  ? std::function<void(llvm::Value*)>(found_outer_join_matches_cb)
635  : nullptr,
636  /* hoisted_filters= */ nullptr, // <<! TODO
637  /* is_deleted= */ is_deleted_cb,
638  /*nested_loop_join=*/false);
639  } else {
640  join_loops.emplace_back(
641  /*kind=*/JoinLoopKind::Set,
642  /*type=*/current_level_join_conditions.type,
643  /*iteration_domain_codegen=*/
644  [this, current_hash_table_idx, level_idx, current_level_hash_table, &co](
645  const std::vector<llvm::Value*>& prev_iters) {
646  addJoinLoopIterator(prev_iters, level_idx);
647  JoinLoopDomain domain{{0}};
648  const auto matching_set = current_level_hash_table->codegenMatchingSet(
649  co, current_hash_table_idx);
650  domain.values_buffer = matching_set.elements;
651  domain.element_count = matching_set.count;
652  return domain;
653  },
654  /*outer_condition_match=*/
655  current_level_join_conditions.type == JoinType::LEFT
656  ? std::function<llvm::Value*(const std::vector<llvm::Value*>&)>(
657  outer_join_condition_remaining_quals_cb)
658  : nullptr,
659  /*found_outer_matches=*/current_level_join_conditions.type == JoinType::LEFT
660  ? std::function<void(llvm::Value*)>(found_outer_join_matches_cb)
661  : nullptr,
662  /*hoisted_filters=*/hoisted_filters_cb,
663  /*is_deleted=*/is_deleted_cb,
664  /*nested_loop_join=*/false);
665  }
666  ++current_hash_table_idx;
667  } else {
668  const auto fail_reasons_str = current_level_join_conditions.quals.empty()
669  ? "No equijoin expression found"
670  : boost::algorithm::join(fail_reasons, " | ");
672  ra_exe_unit, eo, query_infos, level_idx, fail_reasons_str);
673  // Callback provided to the `JoinLoop` framework to evaluate the (outer) join
674  // condition.
675  VLOG(1) << "Unable to build hash table, falling back to loop join: "
676  << fail_reasons_str;
677  const auto outer_join_condition_cb =
678  [this, level_idx, &co, &current_level_join_conditions](
679  const std::vector<llvm::Value*>& prev_iters) {
680  // The values generated for the match path don't dominate all uses
681  // since on the non-match path nulls are generated. Reset the cache
682  // once the condition is generated to avoid incorrect reuse.
683  FetchCacheAnchor anchor(cgen_state_.get());
684  addJoinLoopIterator(prev_iters, level_idx + 1);
685  llvm::Value* left_join_cond = cgen_state_->llBool(true);
686  CodeGenerator code_generator(this);
687  for (auto expr : current_level_join_conditions.quals) {
688  left_join_cond = cgen_state_->ir_builder_.CreateAnd(
689  left_join_cond,
690  code_generator.toBool(
691  code_generator.codegen(expr.get(), true, co).front()));
692  }
693  return left_join_cond;
694  };
695  join_loops.emplace_back(
696  /*kind=*/JoinLoopKind::UpperBound,
697  /*type=*/current_level_join_conditions.type,
698  /*iteration_domain_codegen=*/
699  [this, level_idx](const std::vector<llvm::Value*>& prev_iters) {
700  addJoinLoopIterator(prev_iters, level_idx);
701  JoinLoopDomain domain{{0}};
702  auto* arg = get_arg_by_name(cgen_state_->row_func_, "num_rows_per_scan");
703  const auto rows_per_scan_ptr = cgen_state_->ir_builder_.CreateGEP(
704  arg->getType()->getScalarType()->getPointerElementType(),
705  arg,
706  cgen_state_->llInt(int32_t(level_idx + 1)));
707  domain.upper_bound = cgen_state_->ir_builder_.CreateLoad(
708  rows_per_scan_ptr->getType()->getPointerElementType(),
709  rows_per_scan_ptr,
710  "num_rows_per_scan");
711  return domain;
712  },
713  /*outer_condition_match=*/
714  current_level_join_conditions.type == JoinType::LEFT
715  ? std::function<llvm::Value*(const std::vector<llvm::Value*>&)>(
716  outer_join_condition_cb)
717  : nullptr,
718  /*found_outer_matches=*/
719  current_level_join_conditions.type == JoinType::LEFT
720  ? std::function<void(llvm::Value*)>(found_outer_join_matches_cb)
721  : nullptr,
722  /*hoisted_filters=*/nullptr,
723  /*is_deleted=*/is_deleted_cb,
724  /*nested_loop_join=*/true);
725  }
726  }
727  return join_loops;
728 }
llvm::Value * values_buffer
Definition: JoinLoop.h:49
std::string join(T const &container, std::string const &delim)
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1268
llvm::Value * get_arg_by_name(llvm::Function *func, const std::string &name)
Definition: Execute.h:166
#define INJECT_TIMER(DESC)
Definition: measure.h:93
const JoinQualsPerNestingLevel join_quals
std::unique_ptr< PlanState > plan_state_
Definition: Execute.h:1298
#define AUTOMATIC_IR_METADATA(CGENSTATE)
llvm::Value * slot_lookup_result
Definition: JoinLoop.h:47
#define CHECK_LT(x, y)
Definition: Logger.h:232
std::shared_ptr< HashJoin > buildCurrentLevelHashTable(const JoinCondition &current_level_join_conditions, size_t level_idx, RelAlgExecutionUnit &ra_exe_unit, const CompilationOptions &co, const std::vector< InputTableInfo > &query_infos, ColumnCacheMap &column_cache, std::vector< std::string > &fail_reasons)
Definition: IRCodegen.cpp:935
llvm::Value * addJoinLoopIterator(const std::vector< llvm::Value * > &prev_iters, const size_t level_idx)
Definition: IRCodegen.cpp:1094
#define CHECK(condition)
Definition: Logger.h:222
void check_if_loop_join_is_allowed(RelAlgExecutionUnit &ra_exe_unit, const ExecutionOptions &eo, const std::vector< InputTableInfo > &query_infos, const size_t level_idx, const std::string &fail_reason)
Definition: IRCodegen.cpp:484
JoinLoop::HoistedFiltersCallback buildHoistLeftHandSideFiltersCb(const RelAlgExecutionUnit &ra_exe_unit, const size_t level_idx, const int inner_table_id, const CompilationOptions &co)
Definition: IRCodegen.cpp:768
std::vector< JoinLoop > buildJoinLoops(RelAlgExecutionUnit &ra_exe_unit, const CompilationOptions &co, const ExecutionOptions &eo, const std::vector< InputTableInfo > &query_infos, ColumnCacheMap &column_cache)
Definition: IRCodegen.cpp:523
std::function< llvm::Value *(const std::vector< llvm::Value * > &, llvm::Value *)> buildIsDeletedCb(const RelAlgExecutionUnit &ra_exe_unit, const size_t level_idx, const CompilationOptions &co)
Definition: IRCodegen.cpp:876
#define VLOG(n)
Definition: Logger.h:316

+ Here is the call graph for this function:

void Executor::buildSelectedFragsMapping ( std::vector< std::vector< size_t >> &  selected_fragments_crossjoin,
std::vector< size_t > &  local_col_to_frag_pos,
const std::list< std::shared_ptr< const InputColDescriptor >> &  col_global_ids,
const FragmentsList selected_fragments,
const RelAlgExecutionUnit ra_exe_unit 
)
private

Definition at line 3269 of file Execute.cpp.

References CHECK, CHECK_EQ, CHECK_LT, getFragmentCount(), RelAlgExecutionUnit::input_descs, and plan_state_.

Referenced by fetchChunks().

3274  {
3275  local_col_to_frag_pos.resize(plan_state_->global_to_local_col_ids_.size());
3276  size_t frag_pos{0};
3277  const auto& input_descs = ra_exe_unit.input_descs;
3278  for (size_t scan_idx = 0; scan_idx < input_descs.size(); ++scan_idx) {
3279  const int table_id = input_descs[scan_idx].getTableId();
3280  CHECK_EQ(selected_fragments[scan_idx].table_id, table_id);
3281  selected_fragments_crossjoin.push_back(
3282  getFragmentCount(selected_fragments, scan_idx, ra_exe_unit));
3283  for (const auto& col_id : col_global_ids) {
3284  CHECK(col_id);
3285  const auto& input_desc = col_id->getScanDesc();
3286  if (input_desc.getTableId() != table_id ||
3287  input_desc.getNestLevel() != static_cast<int>(scan_idx)) {
3288  continue;
3289  }
3290  auto it = plan_state_->global_to_local_col_ids_.find(*col_id);
3291  CHECK(it != plan_state_->global_to_local_col_ids_.end());
3292  CHECK_LT(static_cast<size_t>(it->second),
3293  plan_state_->global_to_local_col_ids_.size());
3294  local_col_to_frag_pos[it->second] = frag_pos;
3295  }
3296  ++frag_pos;
3297  }
3298 }
#define CHECK_EQ(x, y)
Definition: Logger.h:230
std::vector< InputDescriptor > input_descs
std::unique_ptr< PlanState > plan_state_
Definition: Execute.h:1298
#define CHECK_LT(x, y)
Definition: Logger.h:232
std::vector< size_t > getFragmentCount(const FragmentsList &selected_fragments, const size_t scan_idx, const RelAlgExecutionUnit &ra_exe_unit)
Definition: Execute.cpp:3255
#define CHECK(condition)
Definition: Logger.h:222

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void Executor::buildSelectedFragsMappingForUnion ( std::vector< std::vector< size_t >> &  selected_fragments_crossjoin,
const FragmentsList selected_fragments,
const RelAlgExecutionUnit ra_exe_unit 
)
private

Definition at line 3300 of file Execute.cpp.

References RelAlgExecutionUnit::input_descs.

Referenced by fetchUnionChunks().

3303  {
3304  const auto& input_descs = ra_exe_unit.input_descs;
3305  for (size_t scan_idx = 0; scan_idx < input_descs.size(); ++scan_idx) {
3306  // selected_fragments is set in assignFragsToKernelDispatch execution_kernel.fragments
3307  if (selected_fragments[0].table_id == input_descs[scan_idx].getTableId()) {
3308  selected_fragments_crossjoin.push_back({size_t(1)});
3309  }
3310  }
3311 }
std::vector< InputDescriptor > input_descs

+ Here is the caller graph for this function:

FragmentSkipStatus Executor::canSkipFragmentForFpQual ( const Analyzer::BinOper comp_expr,
const Analyzer::ColumnVar lhs_col,
const Fragmenter_Namespace::FragmentInfo fragment,
const Analyzer::Constant rhs_const 
) const
private

Definition at line 4070 of file Execute.cpp.

References CHECK, extract_max_stat_fp_type(), extract_min_stat_fp_type(), Analyzer::ColumnVar::get_column_id(), Analyzer::Constant::get_constval(), Analyzer::BinOper::get_optype(), SQLTypeInfo::get_type(), Analyzer::Expr::get_type_info(), Fragmenter_Namespace::FragmentInfo::getChunkMetadataMap(), INVALID, kDOUBLE, kEQ, kFLOAT, kGE, kGT, kLE, kLT, NOT_SKIPPABLE, and SKIPPABLE.

Referenced by skipFragment().

4074  {
4075  const int col_id = lhs_col->get_column_id();
4076  auto chunk_meta_it = fragment.getChunkMetadataMap().find(col_id);
4077  if (chunk_meta_it == fragment.getChunkMetadataMap().end()) {
4079  }
4080  double chunk_min{0.};
4081  double chunk_max{0.};
4082  const auto& chunk_type = lhs_col->get_type_info();
4083  chunk_min = extract_min_stat_fp_type(chunk_meta_it->second->chunkStats, chunk_type);
4084  chunk_max = extract_max_stat_fp_type(chunk_meta_it->second->chunkStats, chunk_type);
4085  if (chunk_min > chunk_max) {
4087  }
4088 
4089  const auto datum_fp = rhs_const->get_constval();
4090  const auto rhs_type = rhs_const->get_type_info().get_type();
4091  CHECK(rhs_type == kFLOAT || rhs_type == kDOUBLE);
4092 
4093  // Do we need to codegen the constant like the integer path does?
4094  const auto rhs_val = rhs_type == kFLOAT ? datum_fp.floatval : datum_fp.doubleval;
4095 
4096  // Todo: dedup the following comparison code with the integer/timestamp path, it is
4097  // slightly tricky due to do cleanly as we do not have rowid on this path
4098  switch (comp_expr->get_optype()) {
4099  case kGE:
4100  if (chunk_max < rhs_val) {
4102  }
4103  break;
4104  case kGT:
4105  if (chunk_max <= rhs_val) {
4107  }
4108  break;
4109  case kLE:
4110  if (chunk_min > rhs_val) {
4112  }
4113  break;
4114  case kLT:
4115  if (chunk_min >= rhs_val) {
4117  }
4118  break;
4119  case kEQ:
4120  if (chunk_min > rhs_val || chunk_max < rhs_val) {
4122  }
4123  break;
4124  default:
4125  break;
4126  }
4128 }
double extract_max_stat_fp_type(const ChunkStats &stats, const SQLTypeInfo &ti)
Definition: sqldefs.h:34
Definition: sqldefs.h:35
Definition: sqldefs.h:29
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:379
SQLOps get_optype() const
Definition: Analyzer.h:447
double extract_min_stat_fp_type(const ChunkStats &stats, const SQLTypeInfo &ti)
const ChunkMetadataMap & getChunkMetadataMap() const
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:82
Definition: sqldefs.h:33
Datum get_constval() const
Definition: Analyzer.h:343
#define CHECK(condition)
Definition: Logger.h:222
Definition: sqldefs.h:32
int get_column_id() const
Definition: Analyzer.h:202

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

llvm::Value * Executor::castToFP ( llvm::Value *  value,
SQLTypeInfo const &  from_ti,
SQLTypeInfo const &  to_ti 
)
private

Definition at line 3885 of file Execute.cpp.

References AUTOMATIC_IR_METADATA, cgen_state_, exp_to_scale(), logger::FATAL, SQLTypeInfo::get_scale(), SQLTypeInfo::get_size(), SQLTypeInfo::is_fp(), SQLTypeInfo::is_number(), and LOG.

3887  {
3889  if (value->getType()->isIntegerTy() && from_ti.is_number() && to_ti.is_fp() &&
3890  (!from_ti.is_fp() || from_ti.get_size() != to_ti.get_size())) {
3891  llvm::Type* fp_type{nullptr};
3892  switch (to_ti.get_size()) {
3893  case 4:
3894  fp_type = llvm::Type::getFloatTy(cgen_state_->context_);
3895  break;
3896  case 8:
3897  fp_type = llvm::Type::getDoubleTy(cgen_state_->context_);
3898  break;
3899  default:
3900  LOG(FATAL) << "Unsupported FP size: " << to_ti.get_size();
3901  }
3902  value = cgen_state_->ir_builder_.CreateSIToFP(value, fp_type);
3903  if (from_ti.get_scale()) {
3904  value = cgen_state_->ir_builder_.CreateFDiv(
3905  value,
3906  llvm::ConstantFP::get(value->getType(), exp_to_scale(from_ti.get_scale())));
3907  }
3908  }
3909  return value;
3910 }
#define LOG(tag)
Definition: Logger.h:216
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1268
#define AUTOMATIC_IR_METADATA(CGENSTATE)
uint64_t exp_to_scale(const unsigned exp)

+ Here is the call graph for this function:

llvm::Value * Executor::castToIntPtrTyIn ( llvm::Value *  val,
const size_t  bit_width 
)
private

Definition at line 3912 of file Execute.cpp.

References AUTOMATIC_IR_METADATA, cgen_state_, CHECK, CHECK_LT, and get_int_type().

3912  {
3914  CHECK(val->getType()->isPointerTy());
3915 
3916  const auto val_ptr_type = static_cast<llvm::PointerType*>(val->getType());
3917  const auto val_type = val_ptr_type->getPointerElementType();
3918  size_t val_width = 0;
3919  if (val_type->isIntegerTy()) {
3920  val_width = val_type->getIntegerBitWidth();
3921  } else {
3922  if (val_type->isFloatTy()) {
3923  val_width = 32;
3924  } else {
3925  CHECK(val_type->isDoubleTy());
3926  val_width = 64;
3927  }
3928  }
3929  CHECK_LT(size_t(0), val_width);
3930  if (bitWidth == val_width) {
3931  return val;
3932  }
3933  return cgen_state_->ir_builder_.CreateBitCast(
3934  val, llvm::PointerType::get(get_int_type(bitWidth, cgen_state_->context_), 0));
3935 }
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1268
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
#define AUTOMATIC_IR_METADATA(CGENSTATE)
#define CHECK_LT(x, y)
Definition: Logger.h:232
#define CHECK(condition)
Definition: Logger.h:222

+ Here is the call graph for this function:

bool Executor::checkCurrentQuerySession ( const std::string &  candidate_query_session,
heavyai::shared_lock< heavyai::shared_mutex > &  read_lock 
)

Definition at line 4462 of file Execute.cpp.

References current_query_session_.

4464  {
4465  // if current_query_session is equal to the candidate_query_session,
4466  // or it is empty session we consider
4467  return !candidate_query_session.empty() &&
4468  (current_query_session_ == candidate_query_session);
4469 }
QuerySessionId current_query_session_
Definition: Execute.h:1342
bool Executor::checkIsQuerySessionEnrolled ( const QuerySessionId query_session,
heavyai::shared_lock< heavyai::shared_mutex > &  read_lock 
)

Definition at line 4736 of file Execute.cpp.

References queries_session_map_.

Referenced by executeWorkUnitImpl().

4738  {
4739  if (query_session.empty()) {
4740  return false;
4741  }
4742  return !query_session.empty() && queries_session_map_.count(query_session);
4743 }
static QuerySessionMap queries_session_map_
Definition: Execute.h:1346

+ Here is the caller graph for this function:

bool Executor::checkIsQuerySessionInterrupted ( const std::string &  query_session,
heavyai::shared_lock< heavyai::shared_mutex > &  read_lock 
)

Definition at line 4725 of file Execute.cpp.

References queries_interrupt_flag_.

Referenced by executePlanWithGroupBy(), executePlanWithoutGroupBy(), fetchChunks(), and fetchUnionChunks().

4727  {
4728  if (query_session.empty()) {
4729  return false;
4730  }
4731  auto flag_it = queries_interrupt_flag_.find(query_session);
4732  return !query_session.empty() && flag_it != queries_interrupt_flag_.end() &&
4733  flag_it->second;
4734 }
static InterruptFlagMap queries_interrupt_flag_
Definition: Execute.h:1344

+ Here is the caller graph for this function:

bool Executor::checkNonKernelTimeInterrupted ( ) const

Definition at line 4813 of file Execute.cpp.

References current_query_session_, executor_id_, executor_session_mutex_, queries_interrupt_flag_, and UNITARY_EXECUTOR_ID.

4813  {
4814  // this function should be called within an executor which is assigned
4815  // to the specific query thread (that indicates we already enroll the session)
4816  // check whether this is called from non unitary executor
4818  return false;
4819  };
4821  auto flag_it = queries_interrupt_flag_.find(current_query_session_);
4822  return !current_query_session_.empty() && flag_it != queries_interrupt_flag_.end() &&
4823  flag_it->second;
4824 }
QuerySessionId current_query_session_
Definition: Execute.h:1342
std::shared_lock< T > shared_lock
const ExecutorId executor_id_
Definition: Execute.h:1242
static InterruptFlagMap queries_interrupt_flag_
Definition: Execute.h:1344
static heavyai::shared_mutex executor_session_mutex_
Definition: Execute.h:1340
static const ExecutorId UNITARY_EXECUTOR_ID
Definition: Execute.h:376
void Executor::checkPendingQueryStatus ( const QuerySessionId query_session)

Definition at line 4506 of file Execute.cpp.

References ERR_INTERRUPTED, executor_session_mutex_, queries_interrupt_flag_, queries_session_map_, and VLOG.

4506  {
4507  // check whether we are okay to execute the "pending" query
4508  // i.e., before running the query check if this query session is "ALREADY" interrupted
4510  if (query_session.empty()) {
4511  return;
4512  }
4513  if (queries_interrupt_flag_.find(query_session) == queries_interrupt_flag_.end()) {
4514  // something goes wrong since we assume this is caller's responsibility
4515  // (call this function only for enrolled query session)
4516  if (!queries_session_map_.count(query_session)) {
4517  VLOG(1) << "Interrupting pending query is not available since the query session is "
4518  "not enrolled";
4519  } else {
4520  // here the query session is enrolled but the interrupt flag is not registered
4521  VLOG(1)
4522  << "Interrupting pending query is not available since its interrupt flag is "
4523  "not registered";
4524  }
4525  return;
4526  }
4527  if (queries_interrupt_flag_[query_session]) {
4529  }
4530 }
static QuerySessionMap queries_session_map_
Definition: Execute.h:1346
static const int32_t ERR_INTERRUPTED
Definition: Execute.h:1388
std::shared_lock< T > shared_lock
static InterruptFlagMap queries_interrupt_flag_
Definition: Execute.h:1344
static heavyai::shared_mutex executor_session_mutex_
Definition: Execute.h:1340
#define VLOG(n)
Definition: Logger.h:316
void Executor::clearCaches ( bool  runtime_only = false)
static void Executor::clearExternalCaches ( bool  for_update,
const TableDescriptor td,
const int  current_db_id 
)
inlinestatic

Definition at line 391 of file Execute.h.

References TableDescriptor::getTableChunkKey(), CacheInvalidator< CACHE_HOLDING_TYPES >::invalidateCaches(), and CacheInvalidator< CACHE_HOLDING_TYPES >::invalidateCachesByTable().

Referenced by Parser::InsertIntoTableAsSelectStmt::execute(), Parser::DropTableStmt::execute(), Parser::TruncateTableStmt::execute(), Parser::DropColumnStmt::execute(), Parser::CopyTableStmt::execute(), RelAlgExecutor::executeDelete(), and RelAlgExecutor::executeUpdate().

393  {
394  bool clearEntireCache = true;
395  if (td) {
396  const auto& table_chunk_key_prefix = td->getTableChunkKey(current_db_id);
397  if (!table_chunk_key_prefix.empty()) {
398  auto table_key = boost::hash_value(table_chunk_key_prefix);
400  if (for_update) {
402  } else {
404  }
405  clearEntireCache = false;
406  }
407  }
408  if (clearEntireCache) {
410  if (for_update) {
412  } else {
414  }
415  }
416  }
static void invalidateCachesByTable(size_t table_key)
static void invalidateCaches()
std::vector< int > getTableChunkKey(const int getCurrentDBId) const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void Executor::clearMemory ( const Data_Namespace::MemoryLevel  memory_level)
static

Definition at line 501 of file Execute.cpp.

References Data_Namespace::DataMgr::clearMemory(), Data_Namespace::CPU_LEVEL, execute_mutex_, Catalog_Namespace::SysCatalog::getDataMgr(), Data_Namespace::GPU_LEVEL, Catalog_Namespace::SysCatalog::instance(), and CacheInvalidator< CACHE_HOLDING_TYPES >::invalidateCaches().

Referenced by DBHandler::clear_cpu_memory(), DBHandler::clear_gpu_memory(), QueryRunner::QueryRunner::clearCpuMemory(), and QueryRunner::QueryRunner::clearGpuMemory().

501  {
502  switch (memory_level) {
506  execute_mutex_); // Don't flush memory while queries are running
507 
508  if (memory_level == Data_Namespace::MemoryLevel::CPU_LEVEL) {
509  // The hash table cache uses CPU memory not managed by the buffer manager. In the
510  // future, we should manage these allocations with the buffer manager directly.
511  // For now, assume the user wants to purge the hash table cache when they clear
512  // CPU memory (currently used in ExecuteTest to lower memory pressure)
514  }
517  break;
518  }
519  default: {
520  throw std::runtime_error(
521  "Clearing memory levels other than the CPU level or GPU level is not "
522  "supported.");
523  }
524  }
525 }
static heavyai::shared_mutex execute_mutex_
Definition: Execute.h:1351
void clearMemory(const MemoryLevel memLevel)
Definition: DataMgr.cpp:434
static void invalidateCaches()
Data_Namespace::DataMgr & getDataMgr() const
Definition: SysCatalog.h:232
static SysCatalog & instance()
Definition: SysCatalog.h:341
std::unique_lock< T > unique_lock

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void Executor::clearMetaInfoCache ( )
private

Definition at line 788 of file Execute.cpp.

References agg_col_range_cache_, TableGenerations::clear(), AggregatedColRange::clear(), InputTableInfoCache::clear(), input_table_info_cache_, and table_generations_.

788  {
792 }
AggregatedColRange agg_col_range_cache_
Definition: Execute.h:1338
InputTableInfoCache input_table_info_cache_
Definition: Execute.h:1337
TableGenerations table_generations_
Definition: Execute.h:1339

+ Here is the call graph for this function:

void Executor::clearQuerySessionStatus ( const QuerySessionId query_session,
const std::string &  submitted_time_str 
)

Definition at line 4532 of file Execute.cpp.

References current_query_session_, executor_session_mutex_, invalidateRunningQuerySession(), removeFromQuerySessionList(), and resetInterrupt().

4533  {
4535  // clear the interrupt-related info for a finished query
4536  if (query_session.empty()) {
4537  return;
4538  }
4539  removeFromQuerySessionList(query_session, submitted_time_str, session_write_lock);
4540  if (query_session.compare(current_query_session_) == 0) {
4541  invalidateRunningQuerySession(session_write_lock);
4542  resetInterrupt();
4543  }
4544 }
QuerySessionId current_query_session_
Definition: Execute.h:1342
bool removeFromQuerySessionList(const QuerySessionId &query_session, const std::string &submitted_time_str, heavyai::unique_lock< heavyai::shared_mutex > &write_lock)
Definition: Execute.cpp:4680
std::unique_lock< T > unique_lock
static heavyai::shared_mutex executor_session_mutex_
Definition: Execute.h:1340
void resetInterrupt()
void invalidateRunningQuerySession(heavyai::unique_lock< heavyai::shared_mutex > &write_lock)
Definition: Execute.cpp:4484

+ Here is the call graph for this function:

llvm::Value * Executor::codegenAggregateWindowState ( )
private

Definition at line 1336 of file WindowFunctionIR.cpp.

References AUTOMATIC_IR_METADATA, AVG, COUNT, anonymous_namespace{WindowFunctionIR.cpp}::get_adjusted_window_type_info(), get_int_type(), WindowProjectNodeContext::getActiveWindowFunctionContext(), Analyzer::WindowFunction::getKind(), kDECIMAL, kDOUBLE, and kFLOAT.

1336  {
1338  const auto pi32_type =
1339  llvm::PointerType::get(get_int_type(32, cgen_state_->context_), 0);
1340  const auto pi64_type =
1341  llvm::PointerType::get(get_int_type(64, cgen_state_->context_), 0);
1342  const auto window_func_context =
1344  const Analyzer::WindowFunction* window_func = window_func_context->getWindowFunction();
1345  const auto window_func_ti = get_adjusted_window_type_info(window_func);
1346  const auto aggregate_state_type =
1347  window_func_ti.get_type() == kFLOAT ? pi32_type : pi64_type;
1348  auto aggregate_state = aggregateWindowStatePtr();
1349  if (window_func->getKind() == SqlWindowFunctionKind::AVG) {
1350  const auto aggregate_state_count_i64 = cgen_state_->llInt(
1351  reinterpret_cast<const int64_t>(window_func_context->aggregateStateCount()));
1352  auto aggregate_state_count = cgen_state_->ir_builder_.CreateIntToPtr(
1353  aggregate_state_count_i64, aggregate_state_type);
1354  const auto double_null_lv = cgen_state_->inlineFpNull(SQLTypeInfo(kDOUBLE));
1355  switch (window_func_ti.get_type()) {
1356  case kFLOAT: {
1357  return cgen_state_->emitCall(
1358  "load_avg_float", {aggregate_state, aggregate_state_count, double_null_lv});
1359  }
1360  case kDOUBLE: {
1361  return cgen_state_->emitCall(
1362  "load_avg_double", {aggregate_state, aggregate_state_count, double_null_lv});
1363  }
1364  case kDECIMAL: {
1365  return cgen_state_->emitCall(
1366  "load_avg_decimal",
1367  {aggregate_state,
1368  aggregate_state_count,
1369  double_null_lv,
1370  cgen_state_->llInt<int32_t>(window_func_ti.get_scale())});
1371  }
1372  default: {
1373  return cgen_state_->emitCall(
1374  "load_avg_int", {aggregate_state, aggregate_state_count, double_null_lv});
1375  }
1376  }
1377  }
1378  if (window_func->getKind() == SqlWindowFunctionKind::COUNT) {
1379  return cgen_state_->ir_builder_.CreateLoad(
1380  aggregate_state->getType()->getPointerElementType(), aggregate_state);
1381  }
1382  switch (window_func_ti.get_type()) {
1383  case kFLOAT: {
1384  return cgen_state_->emitCall("load_float", {aggregate_state});
1385  }
1386  case kDOUBLE: {
1387  return cgen_state_->emitCall("load_double", {aggregate_state});
1388  }
1389  default: {
1390  return cgen_state_->ir_builder_.CreateLoad(
1391  aggregate_state->getType()->getPointerElementType(), aggregate_state);
1392  }
1393  }
1394 }
SqlWindowFunctionKind getKind() const
Definition: Analyzer.h:2454
llvm::Value * aggregateWindowStatePtr()
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1268
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
static WindowFunctionContext * getActiveWindowFunctionContext(Executor *executor)
#define AUTOMATIC_IR_METADATA(CGENSTATE)
SQLTypeInfo get_adjusted_window_type_info(const Analyzer::WindowFunction *window_func)

+ Here is the call graph for this function:

llvm::Value * Executor::codegenCurrentPartitionIndex ( const WindowFunctionContext window_func_context,
llvm::Value *  current_row_pos_lv 
)
private

Definition at line 603 of file WindowFunctionIR.cpp.

References get_int_type(), WindowFunctionContext::partitionCount(), and WindowFunctionContext::partitionNumCountBuf().

605  {
606  const auto pi64_type =
607  llvm::PointerType::get(get_int_type(64, cgen_state_->context_), 0);
608  // given current row's pos, calculate the partition index that it belongs to
609  auto partition_count_lv = cgen_state_->llInt(window_func_context->partitionCount());
610  auto partition_num_count_buf_lv = cgen_state_->llInt(
611  reinterpret_cast<int64_t>(window_func_context->partitionNumCountBuf()));
612  auto partition_num_count_ptr_lv =
613  cgen_state_->ir_builder_.CreateIntToPtr(partition_num_count_buf_lv, pi64_type);
614  return cgen_state_->emitCall(
615  "compute_int64_t_lower_bound",
616  {partition_count_lv, current_row_pos_lv, partition_num_count_ptr_lv});
617 }
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1268
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
size_t partitionCount() const
const int64_t * partitionNumCountBuf() const

+ Here is the call graph for this function:

llvm::Value * Executor::codegenFrameBoundExpr ( const Analyzer::WindowFunction window_func,
const Analyzer::WindowFrame frame_bound,
CodeGenerator code_generator,
const CompilationOptions co 
)
private

Definition at line 558 of file WindowFunctionIR.cpp.

References CHECK, CodeGenerator::codegen(), EXPR_FOLLOWING, EXPR_PRECEDING, g_cluster, SQLTypeInfo::get_size(), Analyzer::Expr::get_type_info(), Analyzer::WindowFrame::getBoundExpr(), Analyzer::WindowFunction::getOrderKeys(), Analyzer::WindowFunction::hasRangeModeFraming(), kBIGINT, kINT, and kSMALLINT.

561  {
562  auto needs_bound_expr_codegen = [](const Analyzer::WindowFrame* window_frame) {
563  return window_frame->getBoundType() == SqlWindowFrameBoundType::EXPR_FOLLOWING ||
564  window_frame->getBoundType() == SqlWindowFrameBoundType::EXPR_PRECEDING;
565  };
566  const auto order_col_ti = window_func->getOrderKeys().front()->get_type_info();
567  auto encode_date_col_val = [&order_col_ti, this](llvm::Value* bound_expr_lv) {
568  if (order_col_ti.get_comp_param() == 16) {
569  return cgen_state_->emitCall(
570  "fixed_width_date_encode_noinline",
571  {bound_expr_lv,
572  cgen_state_->castToTypeIn(cgen_state_->inlineIntNull(SQLTypeInfo(kSMALLINT)),
573  32),
574  cgen_state_->inlineIntNull(SQLTypeInfo(kBIGINT))});
575  } else {
576  return cgen_state_->emitCall("fixed_width_date_encode_noinline",
577  {bound_expr_lv,
578  cgen_state_->inlineIntNull(SQLTypeInfo(kINT)),
579  cgen_state_->inlineIntNull(SQLTypeInfo(kBIGINT))});
580  }
581  };
582  llvm::Value* bound_expr_lv{nullptr};
583  if (needs_bound_expr_codegen(frame_bound)) {
584  auto bound_expr_lvs = code_generator.codegen(frame_bound->getBoundExpr(), true, co);
585  bound_expr_lv = bound_expr_lvs.front();
586  if (order_col_ti.is_date() && window_func->hasRangeModeFraming()) {
587  if (g_cluster) {
588  throw std::runtime_error(
589  "Range mode with date type ordering column is not supported yet.");
590  }
591  bound_expr_lv = encode_date_col_val(bound_expr_lv);
592  }
593  if (frame_bound->getBoundExpr()->get_type_info().get_size() != 8) {
594  bound_expr_lv = cgen_state_->castToTypeIn(bound_expr_lv, 64);
595  }
596  } else {
597  bound_expr_lv = cgen_state_->llInt((int64_t)-1);
598  }
599  CHECK(bound_expr_lv);
600  return bound_expr_lv;
601 }
bool hasRangeModeFraming() const
Definition: Analyzer.h:2488
HOST DEVICE int get_size() const
Definition: sqltypes.h:389
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1268
const std::vector< std::shared_ptr< Analyzer::Expr > > & getOrderKeys() const
Definition: Analyzer.h:2462
const Analyzer::Expr * getBoundExpr() const
Definition: Analyzer.h:2388
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:82
std::vector< llvm::Value * > codegen(const Analyzer::Expr *, const bool fetch_columns, const CompilationOptions &)
Definition: IRCodegen.cpp:30
#define CHECK(condition)
Definition: Logger.h:222
bool g_cluster
Definition: sqltypes.h:60

+ Here is the call graph for this function:

void Executor::codegenJoinLoops ( const std::vector< JoinLoop > &  join_loops,
const RelAlgExecutionUnit ra_exe_unit,
GroupByAndAggregate group_by_and_aggregate,
llvm::Function *  query_func,
llvm::BasicBlock *  entry_bb,
QueryMemoryDescriptor query_mem_desc,
const CompilationOptions co,
const ExecutionOptions eo 
)
private

Definition at line 1112 of file IRCodegen.cpp.

References ExecutionOptions::allow_runtime_query_interrupt, AUTOMATIC_IR_METADATA, CHECK, CHECK_EQ, JoinLoop::codegen(), CompilationOptions::device_type, JoinLoopDomain::element_count, get_int_array_type(), get_int_type(), INNER, MultiSet, CodeGenerator::posArg(), GroupByAndAggregate::query_infos_, query_mem_desc, Set, and ExecutionOptions::with_dynamic_watchdog.

1119  {
1121  const auto exit_bb =
1122  llvm::BasicBlock::Create(cgen_state_->context_, "exit", cgen_state_->current_func_);
1123  cgen_state_->ir_builder_.SetInsertPoint(exit_bb);
1124  cgen_state_->ir_builder_.CreateRet(cgen_state_->llInt<int32_t>(0));
1125  cgen_state_->ir_builder_.SetInsertPoint(entry_bb);
1126  CodeGenerator code_generator(this);
1127 
1128  llvm::BasicBlock* loops_entry_bb{nullptr};
1129  auto has_range_join =
1130  std::any_of(join_loops.begin(), join_loops.end(), [](const auto& join_loop) {
1131  return join_loop.kind() == JoinLoopKind::MultiSet;
1132  });
1133  if (has_range_join) {
1134  CHECK_EQ(join_loops.size(), size_t(1));
1135  const auto element_count =
1136  llvm::ConstantInt::get(get_int_type(64, cgen_state_->context_), 9);
1137 
1138  auto compute_packed_offset = [](const int32_t x, const int32_t y) -> uint64_t {
1139  const uint64_t y_shifted = static_cast<uint64_t>(y) << 32;
1140  return y_shifted | static_cast<uint32_t>(x);
1141  };
1142 
1143  const auto values_arr = std::vector<llvm::Constant*>{
1144  llvm::ConstantInt::get(get_int_type(64, cgen_state_->context_), 0),
1145  llvm::ConstantInt::get(get_int_type(64, cgen_state_->context_),
1146  compute_packed_offset(0, 1)),
1147  llvm::ConstantInt::get(get_int_type(64, cgen_state_->context_),
1148  compute_packed_offset(0, -1)),
1149  llvm::ConstantInt::get(get_int_type(64, cgen_state_->context_),
1150  compute_packed_offset(1, 0)),
1151  llvm::ConstantInt::get(get_int_type(64, cgen_state_->context_),
1152  compute_packed_offset(1, 1)),
1153  llvm::ConstantInt::get(get_int_type(64, cgen_state_->context_),
1154  compute_packed_offset(1, -1)),
1155  llvm::ConstantInt::get(get_int_type(64, cgen_state_->context_),
1156  compute_packed_offset(-1, 0)),
1157  llvm::ConstantInt::get(get_int_type(64, cgen_state_->context_),
1158  compute_packed_offset(-1, 1)),
1159  llvm::ConstantInt::get(get_int_type(64, cgen_state_->context_),
1160  compute_packed_offset(-1, -1))};
1161 
1162  const auto constant_values_array = llvm::ConstantArray::get(
1163  get_int_array_type(64, 9, cgen_state_->context_), values_arr);
1164  CHECK(cgen_state_->module_);
1165  const auto values =
1166  new llvm::GlobalVariable(*cgen_state_->module_,
1167  get_int_array_type(64, 9, cgen_state_->context_),
1168  true,
1169  llvm::GlobalValue::LinkageTypes::InternalLinkage,
1170  constant_values_array);
1171  JoinLoop join_loop(
1174  [element_count, values](const std::vector<llvm::Value*>& v) {
1175  JoinLoopDomain domain{{0}};
1176  domain.element_count = element_count;
1177  domain.values_buffer = values;
1178  return domain;
1179  },
1180  nullptr,
1181  nullptr,
1182  nullptr,
1183  nullptr,
1184  "range_key_loop");
1185 
1186  loops_entry_bb = JoinLoop::codegen(
1187  {join_loop},
1188  [this,
1189  query_func,
1190  &query_mem_desc,
1191  &co,
1192  &eo,
1193  &group_by_and_aggregate,
1194  &join_loops,
1195  &ra_exe_unit](const std::vector<llvm::Value*>& prev_iters) {
1196  auto& builder = cgen_state_->ir_builder_;
1197 
1198  auto body_exit_bb =
1199  llvm::BasicBlock::Create(cgen_state_->context_,
1200  "range_key_inner_body_exit",
1201  builder.GetInsertBlock()->getParent());
1202 
1203  auto range_key_body_bb =
1204  llvm::BasicBlock::Create(cgen_state_->context_,
1205  "range_key_loop_body",
1206  builder.GetInsertBlock()->getParent());
1207  builder.SetInsertPoint(range_key_body_bb);
1208 
1209  const auto body_loops_entry_bb = JoinLoop::codegen(
1210  join_loops,
1211  [this,
1212  query_func,
1213  &query_mem_desc,
1214  &co,
1215  &eo,
1216  &group_by_and_aggregate,
1217  &join_loops,
1218  &ra_exe_unit](const std::vector<llvm::Value*>& prev_iters) {
1219  addJoinLoopIterator(prev_iters, join_loops.size());
1220  auto& builder = cgen_state_->ir_builder_;
1221  const auto loop_body_bb =
1222  llvm::BasicBlock::Create(builder.getContext(),
1223  "loop_body",
1224  builder.GetInsertBlock()->getParent());
1225  builder.SetInsertPoint(loop_body_bb);
1226  const bool can_return_error =
1227  compileBody(ra_exe_unit, group_by_and_aggregate, query_mem_desc, co);
1228  if (can_return_error || cgen_state_->needs_error_check_ ||
1229  eo.with_dynamic_watchdog || eo.allow_runtime_query_interrupt) {
1230  createErrorCheckControlFlow(query_func,
1231  eo.with_dynamic_watchdog,
1232  eo.allow_runtime_query_interrupt,
1233  join_loops,
1234  co.device_type,
1235  group_by_and_aggregate.query_infos_);
1236  }
1237  return loop_body_bb;
1238  },
1239  prev_iters.back(),
1240  body_exit_bb,
1241  cgen_state_.get());
1242 
1243  builder.SetInsertPoint(range_key_body_bb);
1244  cgen_state_->ir_builder_.CreateBr(body_loops_entry_bb);
1245 
1246  builder.SetInsertPoint(body_exit_bb);
1247  return range_key_body_bb;
1248  },
1249  code_generator.posArg(nullptr),
1250  exit_bb,
1251  cgen_state_.get());
1252  } else {
1253  loops_entry_bb = JoinLoop::codegen(
1254  join_loops,
1255  /*body_codegen=*/
1256  [this,
1257  query_func,
1258  &query_mem_desc,
1259  &co,
1260  &eo,
1261  &group_by_and_aggregate,
1262  &join_loops,
1263  &ra_exe_unit](const std::vector<llvm::Value*>& prev_iters) {
1265  addJoinLoopIterator(prev_iters, join_loops.size());
1266  auto& builder = cgen_state_->ir_builder_;
1267  const auto loop_body_bb = llvm::BasicBlock::Create(
1268  builder.getContext(), "loop_body", builder.GetInsertBlock()->getParent());
1269  builder.SetInsertPoint(loop_body_bb);
1270  const bool can_return_error =
1271  compileBody(ra_exe_unit, group_by_and_aggregate, query_mem_desc, co);
1272  if (can_return_error || cgen_state_->needs_error_check_ ||
1273  eo.with_dynamic_watchdog || eo.allow_runtime_query_interrupt) {
1274  createErrorCheckControlFlow(query_func,
1275  eo.with_dynamic_watchdog,
1276  eo.allow_runtime_query_interrupt,
1277  join_loops,
1278  co.device_type,
1279  group_by_and_aggregate.query_infos_);
1280  }
1281  return loop_body_bb;
1282  },
1283  /*outer_iter=*/code_generator.posArg(nullptr),
1284  exit_bb,
1285  cgen_state_.get());
1286  }
1287  CHECK(loops_entry_bb);
1288  cgen_state_->ir_builder_.SetInsertPoint(entry_bb);
1289  cgen_state_->ir_builder_.CreateBr(loops_entry_bb);
1290 }
void createErrorCheckControlFlow(llvm::Function *query_func, bool run_with_dynamic_watchdog, bool run_with_allowing_runtime_interrupt, const std::vector< JoinLoop > &join_loops, ExecutorDeviceType device_type, const std::vector< InputTableInfo > &input_table_infos)
#define CHECK_EQ(x, y)
Definition: Logger.h:230
llvm::Value * element_count
Definition: JoinLoop.h:46
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1268
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
bool compileBody(const RelAlgExecutionUnit &ra_exe_unit, GroupByAndAggregate &group_by_and_aggregate, QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const GpuSharedMemoryContext &gpu_smem_context={})
static llvm::BasicBlock * codegen(const std::vector< JoinLoop > &join_loops, const std::function< llvm::BasicBlock *(const std::vector< llvm::Value * > &)> &body_codegen, llvm::Value *outer_iter, llvm::BasicBlock *exit_bb, CgenState *cgen_state)
Definition: JoinLoop.cpp:50
#define AUTOMATIC_IR_METADATA(CGENSTATE)
llvm::Value * addJoinLoopIterator(const std::vector< llvm::Value * > &prev_iters, const size_t level_idx)
Definition: IRCodegen.cpp:1094
#define CHECK(condition)
Definition: Logger.h:222
llvm::ArrayType * get_int_array_type(int const width, int count, llvm::LLVMContext &context)

+ Here is the call graph for this function:

llvm::BasicBlock * Executor::codegenSkipDeletedOuterTableRow ( const RelAlgExecutionUnit ra_exe_unit,
const CompilationOptions co 
)
private

Definition at line 3177 of file NativeCodegen.cpp.

3179  {
3181  if (!co.filter_on_deleted_column) {
3182  return nullptr;
3183  }
3184  CHECK(!ra_exe_unit.input_descs.empty());
3185  const auto& outer_input_desc = ra_exe_unit.input_descs[0];
3186  if (outer_input_desc.getSourceType() != InputSourceType::TABLE) {
3187  return nullptr;
3188  }
3189  const auto deleted_cd =
3190  plan_state_->getDeletedColForTable(outer_input_desc.getTableId());
3191  if (!deleted_cd) {
3192  return nullptr;
3193  }
3194  CHECK(deleted_cd->columnType.is_boolean());
3195  const auto deleted_expr =
3196  makeExpr<Analyzer::ColumnVar>(deleted_cd->columnType,
3197  outer_input_desc.getTableId(),
3198  deleted_cd->columnId,
3199  outer_input_desc.getNestLevel());
3200  CodeGenerator code_generator(this);
3201  const auto is_deleted =
3202  code_generator.toBool(code_generator.codegen(deleted_expr.get(), true, co).front());
3203  const auto is_deleted_bb = llvm::BasicBlock::Create(
3204  cgen_state_->context_, "is_deleted", cgen_state_->row_func_);
3205  llvm::BasicBlock* bb = llvm::BasicBlock::Create(
3206  cgen_state_->context_, "is_not_deleted", cgen_state_->row_func_);
3207  cgen_state_->ir_builder_.CreateCondBr(is_deleted, is_deleted_bb, bb);
3208  cgen_state_->ir_builder_.SetInsertPoint(is_deleted_bb);
3209  cgen_state_->ir_builder_.CreateRet(cgen_state_->llInt<int32_t>(0));
3210  cgen_state_->ir_builder_.SetInsertPoint(bb);
3211  return bb;
3212 }
std::vector< InputDescriptor > input_descs
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1268
std::unique_ptr< PlanState > plan_state_
Definition: Execute.h:1298
#define AUTOMATIC_IR_METADATA(CGENSTATE)
#define CHECK(condition)
Definition: Logger.h:222
void Executor::codegenWindowAvgEpilogue ( llvm::Value *  crt_val,
llvm::Value *  window_func_null_val,
llvm::Value *  multiplicity_lv 
)
private

Definition at line 1299 of file WindowFunctionIR.cpp.

References AUTOMATIC_IR_METADATA, anonymous_namespace{WindowFunctionIR.cpp}::get_adjusted_window_type_info(), get_int_type(), WindowProjectNodeContext::getActiveWindowFunctionContext(), kDOUBLE, and kFLOAT.

1301  {
1303  const auto window_func_context =
1305  const auto window_func = window_func_context->getWindowFunction();
1306  const auto window_func_ti = get_adjusted_window_type_info(window_func);
1307  const auto pi32_type =
1308  llvm::PointerType::get(get_int_type(32, cgen_state_->context_), 0);
1309  const auto pi64_type =
1310  llvm::PointerType::get(get_int_type(64, cgen_state_->context_), 0);
1311  const auto aggregate_state_type =
1312  window_func_ti.get_type() == kFLOAT ? pi32_type : pi64_type;
1313  const auto aggregate_state_count_i64 = cgen_state_->llInt(
1314  reinterpret_cast<const int64_t>(window_func_context->aggregateStateCount()));
1315  auto aggregate_state_count = cgen_state_->ir_builder_.CreateIntToPtr(
1316  aggregate_state_count_i64, aggregate_state_type);
1317  std::string agg_count_func_name = "agg_count";
1318  switch (window_func_ti.get_type()) {
1319  case kFLOAT: {
1320  agg_count_func_name += "_float";
1321  break;
1322  }
1323  case kDOUBLE: {
1324  agg_count_func_name += "_double";
1325  break;
1326  }
1327  default: {
1328  break;
1329  }
1330  }
1331  agg_count_func_name += "_skip_val";
1332  cgen_state_->emitCall(agg_count_func_name,
1333  {aggregate_state_count, crt_val, window_func_null_val});
1334 }
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1268
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
static WindowFunctionContext * getActiveWindowFunctionContext(Executor *executor)
#define AUTOMATIC_IR_METADATA(CGENSTATE)
SQLTypeInfo get_adjusted_window_type_info(const Analyzer::WindowFunction *window_func)

+ Here is the call graph for this function:

std::pair< llvm::Value *, llvm::Value * > Executor::codegenWindowFrameBound ( WindowFunctionContext window_func_context,
const Analyzer::WindowFrame frame_start_bound,
const Analyzer::WindowFrame frame_end_bound,
llvm::Value *  current_row_pos_lv,
llvm::Value *  current_partition_start_offset_lv,
llvm::Value *  order_key_buf_ptr_lv,
llvm::Value *  order_key_col_null_val_lv,
llvm::Value *  frame_start_bound_expr_lv,
llvm::Value *  frame_end_bound_expr_lv,
llvm::Value *  num_elem_current_partition_lv,
llvm::Value *  target_partition_rowid_ptr_lv,
llvm::Value *  target_partition_sorted_rowid_ptr_lv,
llvm::Value *  null_start_pos_lv,
llvm::Value *  null_end_pos_lv,
CodeGenerator code_generator 
)
private

Definition at line 619 of file WindowFunctionIR.cpp.

References CHECK, CodeGenerator::codegenWindowPosition(), CURRENT_ROW, EXPR_FOLLOWING, EXPR_PRECEDING, anonymous_namespace{WindowFunctionIR.cpp}::get_col_type_name_by_size(), get_int_type(), Analyzer::Expr::get_type_info(), Analyzer::WindowFrame::getBoundExpr(), Analyzer::WindowFrame::getBoundType(), WindowFunctionContext::getOrderKeyColumnBuffers(), WindowFunctionContext::getOrderKeyColumnBufferTypes(), WindowFunctionContext::getWindowFunction(), SQLTypeInfo::is_date(), SQLTypeInfo::is_timestamp(), UNBOUNDED_FOLLOWING, and UNBOUNDED_PRECEDING.

634  {
635  const auto window_func = window_func_context->getWindowFunction();
636  CHECK(window_func);
637  std::string order_col_type_name{""};
638  llvm::Value* current_col_value_lv{nullptr};
639  llvm::Value* frame_start_bound_lv{nullptr};
640  llvm::Value* frame_end_bound_lv{nullptr};
641 
642  if (window_func->hasRangeModeFraming()) {
643  CHECK(window_func_context->getOrderKeyColumnBuffers().size() == 1);
644  CHECK(window_func->getOrderKeys().size() == 1UL);
645  CHECK(window_func_context->getOrderKeyColumnBuffers().size() == 1UL);
646  const auto order_key_ti = window_func->getOrderKeys().front()->get_type_info();
647  const auto order_key_size = order_key_ti.get_size();
648  size_t order_key_size_in_byte = order_key_size * 8;
649  order_col_type_name = get_col_type_name_by_size(
650  order_key_size,
651  window_func_context->getOrderKeyColumnBufferTypes().front().is_fp());
652 
653  // load column value of the current row (of ordering column)
654  auto rowid_in_partition_lv =
655  code_generator.codegenWindowPosition(window_func_context, current_row_pos_lv);
656  auto current_col_value_ptr_lv = cgen_state_->ir_builder_.CreateGEP(
657  get_int_type(order_key_size_in_byte, cgen_state_->context_),
658  order_key_buf_ptr_lv,
659  rowid_in_partition_lv);
660  current_col_value_lv = cgen_state_->ir_builder_.CreateLoad(
661  current_col_value_ptr_lv->getType()->getPointerElementType(),
662  current_col_value_ptr_lv,
663  "current_col_value");
664  }
665 
666  // compute frame start depending on the bound type
667  if (frame_start_bound->getBoundType() == SqlWindowFrameBoundType::UNBOUNDED_PRECEDING) {
668  // frame starts at the first row of the partition
669  frame_start_bound_lv = cgen_state_->llInt((int64_t)0);
670  } else if (frame_start_bound->getBoundType() ==
672  // frame starts at the position before X rows of the current row
673  CHECK(frame_start_bound_expr_lv);
674  if (window_func->hasRowModeFraming()) {
675  frame_start_bound_lv = cgen_state_->emitCall("compute_row_mode_start_index_sub",
676  {current_row_pos_lv,
677  current_partition_start_offset_lv,
678  frame_start_bound_expr_lv});
679  } else {
680  CHECK(window_func->hasRangeModeFraming());
681  if (frame_start_bound->getBoundExpr()->get_type_info().is_date() ||
682  frame_start_bound->getBoundExpr()->get_type_info().is_timestamp()) {
683  std::string lower_bound_func_name{"compute_"};
684  lower_bound_func_name.append(order_col_type_name);
685  lower_bound_func_name.append("_lower_bound_from_ordered_index_for_timeinterval");
686  frame_start_bound_lv = cgen_state_->emitCall(
687  lower_bound_func_name,
688  {num_elem_current_partition_lv,
689  frame_start_bound_expr_lv,
690  order_key_buf_ptr_lv,
691  target_partition_rowid_ptr_lv,
692  target_partition_sorted_rowid_ptr_lv,
693  cgen_state_->castToTypeIn(order_key_col_null_val_lv, 64),
694  null_start_pos_lv,
695  null_end_pos_lv});
696  } else {
697  std::string lower_bound_func_name{"range_mode_"};
698  lower_bound_func_name.append(order_col_type_name);
699  lower_bound_func_name.append("_sub_frame_lower_bound");
700  frame_start_bound_lv =
701  cgen_state_->emitCall(lower_bound_func_name,
702  {num_elem_current_partition_lv,
703  current_col_value_lv,
704  order_key_buf_ptr_lv,
705  target_partition_rowid_ptr_lv,
706  target_partition_sorted_rowid_ptr_lv,
707  frame_start_bound_expr_lv,
708  order_key_col_null_val_lv,
709  null_start_pos_lv,
710  null_end_pos_lv});
711  }
712  }
713  } else if (frame_start_bound->getBoundType() == SqlWindowFrameBoundType::CURRENT_ROW) {
714  // frame start at the current row
715  if (window_func->hasRowModeFraming()) {
716  frame_start_bound_lv = cgen_state_->emitCall("compute_row_mode_start_index_sub",
717  {current_row_pos_lv,
718  current_partition_start_offset_lv,
719  cgen_state_->llInt(((int64_t)0))});
720  } else {
721  CHECK(window_func->hasRangeModeFraming());
722  std::string lower_bound_func_name{"compute_"};
723  lower_bound_func_name.append(order_col_type_name);
724  lower_bound_func_name.append("_lower_bound_from_ordered_index");
725  frame_start_bound_lv = cgen_state_->emitCall(lower_bound_func_name,
726  {num_elem_current_partition_lv,
727  current_col_value_lv,
728  order_key_buf_ptr_lv,
729  target_partition_rowid_ptr_lv,
730  target_partition_sorted_rowid_ptr_lv,
731  order_key_col_null_val_lv,
732  null_start_pos_lv,
733  null_end_pos_lv});
734  }
735  } else if (frame_start_bound->getBoundType() ==
737  // frame start at the position after X rows of the current row
738  CHECK(frame_start_bound_expr_lv);
739  if (window_func->hasRowModeFraming()) {
740  frame_start_bound_lv = cgen_state_->emitCall("compute_row_mode_start_index_add",
741  {current_row_pos_lv,
742  current_partition_start_offset_lv,
743  frame_start_bound_expr_lv,
744  num_elem_current_partition_lv});
745  } else {
746  CHECK(window_func->hasRangeModeFraming());
747  if (frame_start_bound->getBoundExpr()->get_type_info().is_date() ||
748  frame_start_bound->getBoundExpr()->get_type_info().is_timestamp()) {
749  std::string lower_bound_func_name{"compute_"};
750  lower_bound_func_name.append(order_col_type_name);
751  lower_bound_func_name.append("_lower_bound_from_ordered_index_for_timeinterval");
752  frame_start_bound_lv = cgen_state_->emitCall(
753  lower_bound_func_name,
754  {num_elem_current_partition_lv,
755  frame_start_bound_expr_lv,
756  order_key_buf_ptr_lv,
757  target_partition_rowid_ptr_lv,
758  target_partition_sorted_rowid_ptr_lv,
759  cgen_state_->castToTypeIn(order_key_col_null_val_lv, 64),
760  null_start_pos_lv,
761  null_end_pos_lv});
762  } else {
763  std::string lower_bound_func_name{"range_mode_"};
764  lower_bound_func_name.append(order_col_type_name);
765  lower_bound_func_name.append("_add_frame_lower_bound");
766  frame_start_bound_lv =
767  cgen_state_->emitCall(lower_bound_func_name,
768  {num_elem_current_partition_lv,
769  current_col_value_lv,
770  order_key_buf_ptr_lv,
771  target_partition_rowid_ptr_lv,
772  target_partition_sorted_rowid_ptr_lv,
773  frame_start_bound_expr_lv,
774  order_key_col_null_val_lv,
775  null_start_pos_lv,
776  null_end_pos_lv});
777  }
778  }
779  } else {
780  CHECK(false) << "frame start cannot be UNBOUNDED FOLLOWING";
781  }
782 
783  // compute frame end
785  // frame ends at the first row of the partition
786  CHECK(false) << "frame end cannot be UNBOUNDED PRECEDING";
787  } else if (frame_end_bound->getBoundType() == SqlWindowFrameBoundType::EXPR_PRECEDING) {
788  // frame ends at the position X rows before the current row
789  CHECK(frame_end_bound_expr_lv);
790  if (window_func->hasRowModeFraming()) {
791  frame_end_bound_lv = cgen_state_->emitCall("compute_row_mode_end_index_sub",
792  {current_row_pos_lv,
793  current_partition_start_offset_lv,
794  frame_end_bound_expr_lv});
795  } else {
796  CHECK(window_func->hasRangeModeFraming());
797  if (frame_end_bound->getBoundExpr()->get_type_info().is_date() ||
798  frame_end_bound->getBoundExpr()->get_type_info().is_timestamp()) {
799  std::string upper_bound_func_name{"compute_"};
800  upper_bound_func_name.append(order_col_type_name);
801  upper_bound_func_name.append("_upper_bound_from_ordered_index_for_timeinterval");
802  frame_end_bound_lv = cgen_state_->emitCall(
803  upper_bound_func_name,
804  {num_elem_current_partition_lv,
805  frame_end_bound_expr_lv,
806  order_key_buf_ptr_lv,
807  target_partition_rowid_ptr_lv,
808  target_partition_sorted_rowid_ptr_lv,
809  cgen_state_->castToTypeIn(order_key_col_null_val_lv, 64),
810  null_start_pos_lv,
811  null_end_pos_lv});
812  } else {
813  std::string upper_bound_func_name{"range_mode_"};
814  upper_bound_func_name.append(order_col_type_name);
815  upper_bound_func_name.append("_sub_frame_upper_bound");
816  frame_end_bound_lv = cgen_state_->emitCall(upper_bound_func_name,
817  {num_elem_current_partition_lv,
818  current_col_value_lv,
819  order_key_buf_ptr_lv,
820  target_partition_rowid_ptr_lv,
821  target_partition_sorted_rowid_ptr_lv,
822  frame_end_bound_expr_lv,
823  order_key_col_null_val_lv,
824  null_start_pos_lv,
825  null_end_pos_lv});
826  }
827  }
828  } else if (frame_end_bound->getBoundType() == SqlWindowFrameBoundType::CURRENT_ROW) {
829  // frame ends at the current row
830  if (window_func->hasRowModeFraming()) {
831  frame_end_bound_lv = cgen_state_->emitCall("compute_row_mode_end_index_sub",
832  {current_row_pos_lv,
833  current_partition_start_offset_lv,
834  cgen_state_->llInt((int64_t)0)});
835  } else {
836  CHECK(window_func->hasRangeModeFraming());
837  std::string upper_bound_func_name{"compute_"};
838  upper_bound_func_name.append(order_col_type_name);
839  upper_bound_func_name.append("_upper_bound_from_ordered_index");
840  frame_end_bound_lv = cgen_state_->emitCall(upper_bound_func_name,
841  {num_elem_current_partition_lv,
842  current_col_value_lv,
843  order_key_buf_ptr_lv,
844  target_partition_rowid_ptr_lv,
845  target_partition_sorted_rowid_ptr_lv,
846  order_key_col_null_val_lv,
847  null_start_pos_lv,
848  null_end_pos_lv});
849  }
850  } else if (frame_end_bound->getBoundType() == SqlWindowFrameBoundType::EXPR_FOLLOWING) {
851  // frame ends at the position X rows after the current row
852  CHECK(frame_end_bound_expr_lv);
853  if (window_func->hasRowModeFraming()) {
854  frame_end_bound_lv = cgen_state_->emitCall("compute_row_mode_end_index_add",
855  {current_row_pos_lv,
856  current_partition_start_offset_lv,
857  frame_end_bound_expr_lv,
858  num_elem_current_partition_lv});
859  } else {
860  CHECK(window_func->hasRangeModeFraming());
861  if (frame_end_bound->getBoundExpr()->get_type_info().is_date() ||
862  frame_end_bound->getBoundExpr()->get_type_info().is_timestamp()) {
863  std::string upper_bound_func_name{"compute_"};
864  upper_bound_func_name.append(order_col_type_name);
865  upper_bound_func_name.append("_upper_bound_from_ordered_index_for_timeinterval");
866  frame_end_bound_lv = cgen_state_->emitCall(
867  upper_bound_func_name,
868  {num_elem_current_partition_lv,
869  frame_end_bound_expr_lv,
870  order_key_buf_ptr_lv,
871  target_partition_rowid_ptr_lv,
872  target_partition_sorted_rowid_ptr_lv,
873  cgen_state_->castToTypeIn(order_key_col_null_val_lv, 64),
874  null_start_pos_lv,
875  null_end_pos_lv});
876  } else {
877  std::string upper_bound_func_name{"range_mode_"};
878  upper_bound_func_name.append(order_col_type_name);
879  upper_bound_func_name.append("_add_frame_upper_bound");
880  frame_end_bound_lv = cgen_state_->emitCall(upper_bound_func_name,
881  {num_elem_current_partition_lv,
882  current_col_value_lv,
883  order_key_buf_ptr_lv,
884  target_partition_rowid_ptr_lv,
885  target_partition_sorted_rowid_ptr_lv,
886  frame_end_bound_expr_lv,
887  order_key_col_null_val_lv,
888  null_start_pos_lv,
889  null_end_pos_lv});
890  }
891  }
892  } else {
893  // frame ends at the last row of the partition
894  CHECK(frame_end_bound->getBoundType() ==
896  frame_end_bound_lv = num_elem_current_partition_lv;
897  }
898  CHECK(frame_start_bound_lv);
899  CHECK(frame_end_bound_lv);
900  return std::make_pair(frame_start_bound_lv, frame_end_bound_lv);
901 }
std::string get_col_type_name_by_size(const size_t size, const bool is_fp)
bool is_timestamp() const
Definition: sqltypes.h:995
const std::vector< SQLTypeInfo > & getOrderKeyColumnBufferTypes() const
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1268
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
SqlWindowFrameBoundType getBoundType() const
Definition: Analyzer.h:2386
llvm::Value * codegenWindowPosition(const WindowFunctionContext *window_func_context, llvm::Value *pos_arg)
Definition: ColumnIR.cpp:230
const Analyzer::Expr * getBoundExpr() const
Definition: Analyzer.h:2388
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:82
const std::vector< const int8_t * > & getOrderKeyColumnBuffers() const
#define CHECK(condition)
Definition: Logger.h:222
const Analyzer::WindowFunction * getWindowFunction() const
bool is_date() const
Definition: sqltypes.h:983

+ Here is the call graph for this function:

llvm::Value * Executor::codegenWindowFunction ( const size_t  target_index,
const CompilationOptions co 
)
private

Definition at line 21 of file WindowFunctionIR.cpp.

References WindowProjectNodeContext::activateWindowFunctionContext(), run_benchmark_import::args, AUTOMATIC_IR_METADATA, AVG, CHECK, CHECK_EQ, COUNT, CUME_DIST, DENSE_RANK, logger::FATAL, FIRST_VALUE, WindowProjectNodeContext::get(), WindowFunctionContext::getWindowFunction(), LAG, LAG_IN_FRAME, LAST_VALUE, LEAD, LEAD_IN_FRAME, LOG, MAX, MIN, NTILE, PERCENT_RANK, RANK, ROW_NUMBER, and SUM.

22  {
24  CodeGenerator code_generator(this);
25 
26  const auto window_func_context =
28  target_index);
29  const auto window_func = window_func_context->getWindowFunction();
30  switch (window_func->getKind()) {
35  // they are always evaluated on the entire partition
36  return code_generator.codegenWindowPosition(window_func_context,
37  code_generator.posArg(nullptr));
38  }
41  // they are always evaluated on the entire partition
42  return cgen_state_->emitCall("percent_window_func",
43  {cgen_state_->llInt(reinterpret_cast<const int64_t>(
44  window_func_context->output())),
45  code_generator.posArg(nullptr)});
46  }
51  // they are always evaluated on the current frame
53  const auto& args = window_func->getArgs();
54  CHECK(!args.empty());
55  const auto arg_lvs = code_generator.codegen(args.front().get(), true, co);
56  CHECK_EQ(arg_lvs.size(), size_t(1));
57  return arg_lvs.front();
58  }
64  // they are always evaluated on the current frame
66  }
70  }
71  default: {
72  LOG(FATAL) << "Invalid window function kind";
73  }
74  }
75  return nullptr;
76 }
#define CHECK_EQ(x, y)
Definition: Logger.h:230
#define LOG(tag)
Definition: Logger.h:216
llvm::Value * codegenWindowFunctionOnFrame(const CompilationOptions &co)
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1268
static const WindowProjectNodeContext * get(Executor *executor)
const WindowFunctionContext * activateWindowFunctionContext(Executor *executor, const size_t target_index) const
#define AUTOMATIC_IR_METADATA(CGENSTATE)
llvm::Value * codegenWindowFunctionAggregate(const CompilationOptions &co)
#define CHECK(condition)
Definition: Logger.h:222
const Analyzer::WindowFunction * getWindowFunction() const

+ Here is the call graph for this function:

llvm::Value * Executor::codegenWindowFunctionAggregate ( const CompilationOptions co)
private

Definition at line 227 of file WindowFunctionIR.cpp.

References AUTOMATIC_IR_METADATA, AVG, CHECK, WindowProjectNodeContext::get(), get_int_type(), and WindowProjectNodeContext::getActiveWindowFunctionContext().

227  {
229  const auto reset_state_false_bb = codegenWindowResetStateControlFlow();
230  auto aggregate_state = aggregateWindowStatePtr();
231  llvm::Value* aggregate_state_count = nullptr;
232  const auto window_func_context =
234  const auto window_func = window_func_context->getWindowFunction();
235  if (window_func->getKind() == SqlWindowFunctionKind::AVG) {
236  const auto aggregate_state_count_i64 = cgen_state_->llInt(
237  reinterpret_cast<const int64_t>(window_func_context->aggregateStateCount()));
238  const auto pi64_type =
239  llvm::PointerType::get(get_int_type(64, cgen_state_->context_), 0);
240  aggregate_state_count =
241  cgen_state_->ir_builder_.CreateIntToPtr(aggregate_state_count_i64, pi64_type);
242  }
243  codegenWindowFunctionStateInit(aggregate_state);
244  if (window_func->getKind() == SqlWindowFunctionKind::AVG) {
245  const auto count_zero = cgen_state_->llInt(int64_t(0));
246  cgen_state_->emitCall("agg_id", {aggregate_state_count, count_zero});
247  }
248  cgen_state_->ir_builder_.CreateBr(reset_state_false_bb);
249  cgen_state_->ir_builder_.SetInsertPoint(reset_state_false_bb);
251  return codegenWindowFunctionAggregateCalls(aggregate_state, co);
252 }
llvm::Value * aggregateWindowStatePtr()
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1268
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
static WindowFunctionContext * getActiveWindowFunctionContext(Executor *executor)
static const WindowProjectNodeContext * get(Executor *executor)
#define AUTOMATIC_IR_METADATA(CGENSTATE)
void codegenWindowFunctionStateInit(llvm::Value *aggregate_state)
#define CHECK(condition)
Definition: Logger.h:222
llvm::Value * codegenWindowFunctionAggregateCalls(llvm::Value *aggregate_state, const CompilationOptions &co)
llvm::BasicBlock * codegenWindowResetStateControlFlow()

+ Here is the call graph for this function:

llvm::Value * Executor::codegenWindowFunctionAggregateCalls ( llvm::Value *  aggregate_state,
const CompilationOptions co 
)
private

Definition at line 903 of file WindowFunctionIR.cpp.

References run_benchmark_import::args, AUTOMATIC_IR_METADATA, AVG, CHECK, CHECK_EQ, CodeGenerator::codegen(), CodeGenerator::codegenCastBetweenIntTypes(), COUNT, anonymous_namespace{WindowFunctionIR.cpp}::get_adjusted_window_type_info(), anonymous_namespace{WindowFunctionIR.cpp}::get_col_type_name_by_size(), get_int_type(), anonymous_namespace{WindowFunctionIR.cpp}::get_window_agg_name(), WindowProjectNodeContext::getActiveWindowFunctionContext(), kBIGINT, kDATE, kDECIMAL, kDOUBLE, kFLOAT, kINT, kNUMERIC, kSMALLINT, kTIME, kTIMESTAMP, kTINYINT, MAX, MIN, CodeGenerator::posArg(), and SUM.

904  {
906  const auto window_func_context =
908  const auto window_func = window_func_context->getWindowFunction();
909  const auto window_func_ti = get_adjusted_window_type_info(window_func);
910  const auto window_func_null_val =
911  window_func_ti.is_fp()
912  ? cgen_state_->inlineFpNull(window_func_ti)
913  : cgen_state_->castToTypeIn(cgen_state_->inlineIntNull(window_func_ti), 64);
914  const auto& args = window_func->getArgs();
915  llvm::Value* crt_val;
916  CodeGenerator code_generator(this);
917  if (args.empty()) {
918  CHECK(window_func->getKind() == SqlWindowFunctionKind::COUNT);
919  crt_val = cgen_state_->llInt(int64_t(1));
920  } else {
921  const auto arg_lvs = code_generator.codegen(args.front().get(), true, co);
922  CHECK_EQ(arg_lvs.size(), size_t(1));
923  if (window_func->getKind() == SqlWindowFunctionKind::SUM && !window_func_ti.is_fp()) {
924  crt_val = code_generator.codegenCastBetweenIntTypes(
925  arg_lvs.front(), args.front()->get_type_info(), window_func_ti, false);
926  } else {
927  crt_val = window_func_ti.get_type() == kFLOAT
928  ? arg_lvs.front()
929  : cgen_state_->castToTypeIn(arg_lvs.front(), 64);
930  }
931  }
932  if (window_func_context->needsToBuildAggregateTree()) {
933  // compute an aggregated value for each row of the window frame by using segment tree
934  // when constructing a window context, we build a necessary segment tree for it
935  // and use the tree array (so called `aggregate tree`) to query the aggregated value
936  // of the specific window frame
937  // we fall back to the non-framing window func evaluation logic if an input
938  // of the window function can be an empty one
939  const auto pi64_type =
940  llvm::PointerType::get(get_int_type(64, cgen_state_->context_), 0);
941  const auto ppi64_type = llvm::PointerType::get(pi64_type, 0);
942  const auto pi32_type =
943  llvm::PointerType::get(get_int_type(32, cgen_state_->context_), 0);
944 
945  // codegen frame bound expr if necessary
946  const auto frame_start_bound = window_func->getFrameStartBound();
947  const auto frame_end_bound = window_func->getFrameEndBound();
948  auto frame_start_bound_expr_lv =
949  codegenFrameBoundExpr(window_func, frame_start_bound, code_generator, co);
950  auto frame_end_bound_expr_lv =
951  codegenFrameBoundExpr(window_func, frame_end_bound, code_generator, co);
952  CHECK(frame_start_bound_expr_lv);
953  CHECK(frame_end_bound_expr_lv);
954 
955  // compute aggregated value over the computed frame range
956  auto current_row_pos_lv = code_generator.posArg(nullptr);
957  auto partition_index_lv =
958  codegenCurrentPartitionIndex(window_func_context, current_row_pos_lv);
959 
960  // ordering column buffer
961  const auto target_col_ti = window_func->getArgs().front()->get_type_info();
962  const auto target_col_size = target_col_ti.get_size();
963  const auto col_type_name =
964  get_col_type_name_by_size(target_col_size, target_col_ti.is_fp());
965 
966  // partial sum of # elems of partitions
967  auto partition_start_offset_buf_lv = cgen_state_->llInt(
968  reinterpret_cast<int64_t>(window_func_context->partitionStartOffset()));
969  auto partition_start_offset_ptr_lv =
970  cgen_state_->ir_builder_.CreateIntToPtr(partition_start_offset_buf_lv, pi64_type);
971 
972  // get start offset of the current partition
973  auto current_partition_start_offset_ptr_lv =
974  cgen_state_->ir_builder_.CreateGEP(get_int_type(64, cgen_state_->context_),
975  partition_start_offset_ptr_lv,
976  partition_index_lv);
977  auto current_partition_start_offset_lv = cgen_state_->ir_builder_.CreateLoad(
978  current_partition_start_offset_ptr_lv->getType()->getPointerElementType(),
979  current_partition_start_offset_ptr_lv);
980 
981  // row_id buf of the current partition
982  const auto partition_rowid_buf_lv =
983  cgen_state_->llInt(reinterpret_cast<int64_t>(window_func_context->payload()));
984  const auto partition_rowid_ptr_lv =
985  cgen_state_->ir_builder_.CreateIntToPtr(partition_rowid_buf_lv, pi32_type);
986  auto target_partition_rowid_ptr_lv =
987  cgen_state_->ir_builder_.CreateGEP(get_int_type(32, cgen_state_->context_),
988  partition_rowid_ptr_lv,
989  current_partition_start_offset_lv);
990 
991  // row_id buf of ordered current partition
992  const auto sorted_rowid_lv = cgen_state_->llInt(
993  reinterpret_cast<int64_t>(window_func_context->sortedPartition()));
994  const auto sorted_rowid_ptr_lv =
995  cgen_state_->ir_builder_.CreateIntToPtr(sorted_rowid_lv, pi64_type);
996  auto target_partition_sorted_rowid_ptr_lv =
997  cgen_state_->ir_builder_.CreateGEP(get_int_type(64, cgen_state_->context_),
998  sorted_rowid_ptr_lv,
999  current_partition_start_offset_lv);
1000 
1001  // # elems per partition
1002  const auto partition_count_buf =
1003  cgen_state_->llInt(reinterpret_cast<int64_t>(window_func_context->counts()));
1004  auto partition_count_buf_ptr_lv =
1005  cgen_state_->ir_builder_.CreateIntToPtr(partition_count_buf, pi32_type);
1006 
1007  // # elems of the given partition
1008  const auto num_elem_current_partition_ptr =
1009  cgen_state_->ir_builder_.CreateGEP(get_int_type(32, cgen_state_->context_),
1010  partition_count_buf_ptr_lv,
1011  partition_index_lv);
1012  auto num_elem_current_partition_lv = cgen_state_->castToTypeIn(
1013  cgen_state_->ir_builder_.CreateLoad(
1014  num_elem_current_partition_ptr->getType()->getPointerElementType(),
1015  num_elem_current_partition_ptr),
1016  64);
1017 
1018  const auto order_key_ti = window_func->getOrderKeys().front()->get_type_info();
1019  const auto order_key_size = order_key_ti.get_size();
1020  const auto order_col_type_name = get_col_type_name_by_size(
1021  order_key_size,
1022  window_func_context->getOrderKeyColumnBufferTypes().front().is_fp());
1023  size_t order_key_size_in_byte = order_key_size * 8;
1024 
1025  const auto order_key_buf_type = llvm::PointerType::get(
1026  get_int_type(order_key_size_in_byte, cgen_state_->context_), 0);
1027  const auto order_key_buf = cgen_state_->llInt(reinterpret_cast<int64_t>(
1028  window_func_context->getOrderKeyColumnBuffers().front()));
1029  auto order_key_buf_ptr_lv =
1030  cgen_state_->ir_builder_.CreateIntToPtr(order_key_buf, order_key_buf_type);
1031 
1032  // null value of the ordering column
1033  const auto order_key_buf_ti =
1034  window_func_context->getOrderKeyColumnBufferTypes().front();
1035  llvm::Value* order_key_col_null_val_lv{nullptr};
1036  switch (order_key_buf_ti.get_type()) {
1037  case kDATE:
1038  case kTIME:
1039  case kTIMESTAMP: {
1040  switch (order_key_buf_ti.get_size()) {
1041  case 1: {
1042  order_key_col_null_val_lv =
1044  break;
1045  }
1046  case 2: {
1047  order_key_col_null_val_lv =
1049  break;
1050  }
1051  case 4: {
1052  order_key_col_null_val_lv =
1053  cgen_state_->inlineNull(SQLTypeInfo(SQLTypes::kINT));
1054  break;
1055  }
1056  case 8: {
1057  order_key_col_null_val_lv =
1059  break;
1060  }
1061  default:
1062  break;
1063  }
1064  break;
1065  }
1066  default: {
1067  order_key_col_null_val_lv = cgen_state_->inlineNull(order_key_buf_ti);
1068  break;
1069  }
1070  }
1071 
1072  // null range of the aggregate tree
1073  const auto null_start_pos_buf = cgen_state_->llInt(
1074  reinterpret_cast<int64_t>(window_func_context->getNullValueStartPos()));
1075  const auto null_start_pos_buf_ptr =
1076  cgen_state_->ir_builder_.CreateIntToPtr(null_start_pos_buf, pi64_type);
1077  const auto null_start_pos_ptr =
1078  cgen_state_->ir_builder_.CreateGEP(get_int_type(64, cgen_state_->context_),
1079  null_start_pos_buf_ptr,
1080  partition_index_lv);
1081  auto null_start_pos_lv = cgen_state_->ir_builder_.CreateLoad(
1082  null_start_pos_ptr->getType()->getPointerElementType(),
1083  null_start_pos_ptr,
1084  "null_start_pos");
1085  const auto null_end_pos_buf = cgen_state_->llInt(
1086  reinterpret_cast<int64_t>(window_func_context->getNullValueEndPos()));
1087  const auto null_end_pos_buf_ptr =
1088  cgen_state_->ir_builder_.CreateIntToPtr(null_end_pos_buf, pi64_type);
1089  const auto null_end_pos_ptr =
1090  cgen_state_->ir_builder_.CreateGEP(get_int_type(64, cgen_state_->context_),
1091  null_end_pos_buf_ptr,
1092  partition_index_lv);
1093  auto null_end_pos_lv = cgen_state_->ir_builder_.CreateLoad(
1094  null_end_pos_ptr->getType()->getPointerElementType(),
1095  null_end_pos_ptr,
1096  "null_end_pos");
1097 
1098  llvm::Value* frame_start_bound_lv{nullptr};
1099  llvm::Value* frame_end_bound_lv{nullptr};
1100  std::tie(frame_start_bound_lv, frame_end_bound_lv) =
1101  codegenWindowFrameBound(window_func_context,
1102  frame_start_bound,
1103  frame_end_bound,
1104  current_row_pos_lv,
1105  current_partition_start_offset_lv,
1106  order_key_buf_ptr_lv,
1107  order_key_col_null_val_lv,
1108  frame_start_bound_expr_lv,
1109  frame_end_bound_expr_lv,
1110  num_elem_current_partition_lv,
1111  target_partition_rowid_ptr_lv,
1112  target_partition_sorted_rowid_ptr_lv,
1113  null_start_pos_lv,
1114  null_end_pos_lv,
1115  code_generator);
1116  CHECK(frame_start_bound_lv);
1117  CHECK(frame_end_bound_lv);
1118 
1119  // codegen to send a query with frame bound to aggregate tree searcher
1120  llvm::Value* aggregation_trees_lv{nullptr};
1121  llvm::Value* invalid_val_lv{nullptr};
1122  llvm::Value* null_val_lv{nullptr};
1123  std::string aggregation_tree_search_func_name{"search_"};
1124  std::string aggregation_tree_getter_func_name{"get_"};
1125 
1126  // prepare null values and aggregate_tree getter and searcher depending on
1127  // a type of the ordering column
1128  auto agg_expr_ti = args.front()->get_type_info();
1129  switch (agg_expr_ti.get_type()) {
1130  case SQLTypes::kTINYINT:
1131  case SQLTypes::kSMALLINT:
1132  case SQLTypes::kINT:
1133  case SQLTypes::kBIGINT:
1134  case SQLTypes::kNUMERIC:
1135  case SQLTypes::kDECIMAL: {
1136  if (window_func->getKind() == SqlWindowFunctionKind::MIN) {
1137  invalid_val_lv = cgen_state_->llInt(std::numeric_limits<int64_t>::max());
1138  } else if (window_func->getKind() == SqlWindowFunctionKind::MAX) {
1139  invalid_val_lv = cgen_state_->llInt(std::numeric_limits<int64_t>::lowest());
1140  } else {
1141  invalid_val_lv = cgen_state_->llInt((int64_t)0);
1142  }
1143  null_val_lv = cgen_state_->llInt(inline_int_null_value<int64_t>());
1144  aggregation_tree_search_func_name += "int64_t";
1145  aggregation_tree_getter_func_name += "integer";
1146  break;
1147  }
1148  case SQLTypes::kFLOAT:
1149  case SQLTypes::kDOUBLE: {
1150  if (window_func->getKind() == SqlWindowFunctionKind::MIN) {
1151  invalid_val_lv = cgen_state_->llFp(std::numeric_limits<double>::max());
1152  } else if (window_func->getKind() == SqlWindowFunctionKind::MAX) {
1153  invalid_val_lv = cgen_state_->llFp(std::numeric_limits<double>::lowest());
1154  } else {
1155  invalid_val_lv = cgen_state_->llFp((double)0);
1156  }
1157  null_val_lv = cgen_state_->inlineFpNull(SQLTypeInfo(kDOUBLE));
1158  aggregation_tree_search_func_name += "double";
1159  aggregation_tree_getter_func_name += "double";
1160  break;
1161  }
1162  default: {
1163  CHECK(false);
1164  break;
1165  }
1166  }
1167 
1168  // derived aggregation has a different code path
1169  if (window_func->getKind() == SqlWindowFunctionKind::AVG) {
1170  aggregation_tree_search_func_name += "_derived";
1171  aggregation_tree_getter_func_name += "_derived";
1172  }
1173 
1174  // get a buffer holding aggregate trees for each partition
1175  if (agg_expr_ti.is_integer() || agg_expr_ti.is_decimal()) {
1176  if (window_func->getKind() == SqlWindowFunctionKind::AVG) {
1177  aggregation_trees_lv = cgen_state_->llInt(reinterpret_cast<int64_t>(
1178  window_func_context->getDerivedAggregationTreesForIntegerTypeWindowExpr()));
1179  } else {
1180  aggregation_trees_lv = cgen_state_->llInt(reinterpret_cast<int64_t>(
1181  window_func_context->getAggregationTreesForIntegerTypeWindowExpr()));
1182  }
1183  } else if (agg_expr_ti.is_fp()) {
1184  if (window_func->getKind() == SqlWindowFunctionKind::AVG) {
1185  aggregation_trees_lv = cgen_state_->llInt(reinterpret_cast<int64_t>(
1186  window_func_context->getDerivedAggregationTreesForDoubleTypeWindowExpr()));
1187  } else {
1188  aggregation_trees_lv = cgen_state_->llInt(reinterpret_cast<int64_t>(
1189  window_func_context->getAggregationTreesForDoubleTypeWindowExpr()));
1190  }
1191  }
1192 
1193  CHECK(aggregation_trees_lv);
1194  CHECK(invalid_val_lv);
1195  aggregation_tree_search_func_name += "_aggregation_tree";
1196  aggregation_tree_getter_func_name += "_aggregation_tree";
1197 
1198  // get the aggregate tree of the current partition from a window context
1199  auto aggregation_trees_ptr =
1200  cgen_state_->ir_builder_.CreateIntToPtr(aggregation_trees_lv, ppi64_type);
1201  auto target_aggregation_tree_lv = cgen_state_->emitCall(
1202  aggregation_tree_getter_func_name, {aggregation_trees_ptr, partition_index_lv});
1203 
1204  // a depth of segment tree
1205  const auto tree_depth_buf = cgen_state_->llInt(
1206  reinterpret_cast<int64_t>(window_func_context->getAggregateTreeDepth()));
1207  const auto tree_depth_buf_ptr =
1208  cgen_state_->ir_builder_.CreateIntToPtr(tree_depth_buf, pi64_type);
1209  const auto current_partition_tree_depth_buf_ptr = cgen_state_->ir_builder_.CreateGEP(
1210  get_int_type(64, cgen_state_->context_), tree_depth_buf_ptr, partition_index_lv);
1211  const auto current_partition_tree_depth_lv = cgen_state_->ir_builder_.CreateLoad(
1212  current_partition_tree_depth_buf_ptr->getType()->getPointerElementType(),
1213  current_partition_tree_depth_buf_ptr);
1214 
1215  // a fanout of the current partition's segment tree
1216  const auto aggregation_tree_fanout_lv = cgen_state_->llInt(
1217  static_cast<int64_t>(window_func_context->getAggregateTreeFanout()));
1218 
1219  // agg_type
1220  const auto agg_type_lv =
1221  cgen_state_->llInt(static_cast<int32_t>(window_func->getKind()));
1222 
1223  // send a query to the aggregate tree with the frame range:
1224  // `frame_start_bound_lv` ~ `frame_end_bound_lv`
1225  auto res_lv =
1226  cgen_state_->emitCall(aggregation_tree_search_func_name,
1227  {target_aggregation_tree_lv,
1228  frame_start_bound_lv,
1229  frame_end_bound_lv,
1230  current_partition_tree_depth_lv,
1231  aggregation_tree_fanout_lv,
1232  cgen_state_->llBool(agg_expr_ti.is_decimal()),
1233  cgen_state_->llInt((int64_t)agg_expr_ti.get_scale()),
1234  invalid_val_lv,
1235  null_val_lv,
1236  agg_type_lv});
1237 
1238  // handling returned null value if exists
1239  std::string null_handler_func_name{"handle_null_val_"};
1240  std::vector<llvm::Value*> null_handler_args{res_lv, null_val_lv};
1241 
1242  // determine null_handling function's name
1243  if (window_func->getKind() == SqlWindowFunctionKind::AVG) {
1244  // average aggregate function returns a value as a double
1245  // (and our search* function also returns a double)
1246  if (agg_expr_ti.is_fp()) {
1247  // fp type: double null value
1248  null_handler_func_name += "double_double";
1249  } else {
1250  // non-fp type: int64_t null type
1251  null_handler_func_name += "double_int64_t";
1252  }
1253  } else if (agg_expr_ti.is_fp()) {
1254  // fp type: double null value
1255  null_handler_func_name += "double_double";
1256  } else {
1257  // non-fp type: int64_t null type
1258  null_handler_func_name += "int64_t_int64_t";
1259  }
1260  null_handler_func_name += "_window_framing_agg";
1261 
1262  // prepare null_val
1263  if (window_func->getKind() == SqlWindowFunctionKind::COUNT) {
1264  if (agg_expr_ti.is_fp()) {
1265  null_handler_args.push_back(cgen_state_->llFp((double)0));
1266  } else {
1267  null_handler_args.push_back(cgen_state_->llInt((int64_t)0));
1268  }
1269  } else if (window_func->getKind() == SqlWindowFunctionKind::AVG) {
1270  null_handler_args.push_back(cgen_state_->inlineFpNull(SQLTypeInfo(kDOUBLE)));
1271  } else {
1272  null_handler_args.push_back(cgen_state_->castToTypeIn(window_func_null_val, 64));
1273  }
1274  res_lv = cgen_state_->emitCall(null_handler_func_name, null_handler_args);
1275 
1276  // when AGG_TYPE is double, we get a double type return value we expect an integer
1277  // type value for the count aggregation
1278  if (window_func->getKind() == SqlWindowFunctionKind::COUNT && agg_expr_ti.is_fp()) {
1279  return cgen_state_->ir_builder_.CreateFPToSI(
1280  res_lv, get_int_type(64, cgen_state_->context_));
1281  }
1282  return res_lv;
1283  } else {
1284  llvm::Value* multiplicity_lv = nullptr;
1285  const auto agg_name = get_window_agg_name(window_func->getKind(), window_func_ti);
1286  if (args.empty()) {
1287  cgen_state_->emitCall(agg_name, {aggregate_state, crt_val});
1288  } else {
1289  cgen_state_->emitCall(agg_name + "_skip_val",
1290  {aggregate_state, crt_val, window_func_null_val});
1291  }
1292  if (window_func->getKind() == SqlWindowFunctionKind::AVG) {
1293  codegenWindowAvgEpilogue(crt_val, window_func_null_val, multiplicity_lv);
1294  }
1295  return codegenAggregateWindowState();
1296  }
1297 }
std::string get_col_type_name_by_size(const size_t size, const bool is_fp)
#define CHECK_EQ(x, y)
Definition: Logger.h:230
llvm::Value * codegenCurrentPartitionIndex(const WindowFunctionContext *window_func_context, llvm::Value *current_row_pos_lv)
Definition: sqltypes.h:64
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1268
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
static WindowFunctionContext * getActiveWindowFunctionContext(Executor *executor)
std::string get_window_agg_name(const SqlWindowFunctionKind kind, const SQLTypeInfo &window_func_ti)
void codegenWindowAvgEpilogue(llvm::Value *crt_val, llvm::Value *window_func_null_val, llvm::Value *multiplicity_lv)
#define AUTOMATIC_IR_METADATA(CGENSTATE)
Definition: sqltypes.h:68
std::pair< llvm::Value *, llvm::Value * > codegenWindowFrameBound(WindowFunctionContext *window_func_context, const Analyzer::WindowFrame *frame_start_bound, const Analyzer::WindowFrame *frame_end_bound, llvm::Value *current_row_pos_lv, llvm::Value *current_partition_start_offset_lv, llvm::Value *order_key_buf_ptr_lv, llvm::Value *order_key_col_null_val_lv, llvm::Value *frame_start_bound_expr_lv, llvm::Value *frame_end_bound_expr_lv, llvm::Value *num_elem_current_partition_lv, llvm::Value *target_partition_rowid_ptr_lv, llvm::Value *target_partition_sorted_rowid_ptr_lv, llvm::Value *null_start_pos_lv, llvm::Value *null_end_pos_lv, CodeGenerator &code_generator)
llvm::Value * codegenAggregateWindowState()
llvm::Value * codegenFrameBoundExpr(const Analyzer::WindowFunction *window_func, const Analyzer::WindowFrame *frame_bound, CodeGenerator &code_generator, const CompilationOptions &co)
#define CHECK(condition)
Definition: Logger.h:222
Definition: sqltypes.h:60
SQLTypeInfo get_adjusted_window_type_info(const Analyzer::WindowFunction *window_func)

+ Here is the call graph for this function:

llvm::Value * Executor::codegenWindowFunctionOnFrame ( const CompilationOptions co)
private

Definition at line 333 of file WindowFunctionIR.cpp.

References run_benchmark_import::args, AUTOMATIC_IR_METADATA, CHECK, anonymous_namespace{WindowFunctionIR.cpp}::get_col_type_name_by_size(), get_fp_type(), get_int_type(), anonymous_namespace{WindowFunctionIR.cpp}::get_null_value_by_size(), anonymous_namespace{WindowFunctionIR.cpp}::get_null_value_by_size_with_encoding(), WindowProjectNodeContext::getActiveWindowFunctionContext(), kENCODING_DATE_IN_DAYS, kSecsPerDay, LAG_IN_FRAME, and LEAD_IN_FRAME.

333  {
335  const auto window_func_context =
337  const auto window_func = window_func_context->getWindowFunction();
338  const auto window_func_kind = window_func->getKind();
339  const auto& args = window_func->getArgs();
340  CHECK(args.size() >= 1 && args.size() <= 3);
341  CodeGenerator code_generator(this);
342  const auto offset_lv =
343  cgen_state_->castToTypeIn(code_generator.codegen(args[1].get(), true, co)[0], 64);
344 
345  // codegen frame bound expr if necessary
346  const auto frame_start_bound = window_func->getFrameStartBound();
347  const auto frame_end_bound = window_func->getFrameEndBound();
348  auto frame_start_bound_expr_lv =
349  codegenFrameBoundExpr(window_func, frame_start_bound, code_generator, co);
350  auto frame_end_bound_expr_lv =
351  codegenFrameBoundExpr(window_func, frame_end_bound, code_generator, co);
352  CHECK(frame_start_bound_expr_lv);
353  CHECK(frame_end_bound_expr_lv);
354 
355  auto current_row_pos_lv = code_generator.posArg(nullptr);
356  auto partition_index_lv =
357  codegenCurrentPartitionIndex(window_func_context, current_row_pos_lv);
358 
359  llvm::Value* res_lv{nullptr};
360  // currently, we only support below two window functions on frame
361  // todo (yonnmin): remove this when supporting more window functions on frame
362  CHECK(window_func_kind == SqlWindowFunctionKind::LEAD_IN_FRAME ||
363  window_func_kind == SqlWindowFunctionKind::LAG_IN_FRAME);
364  const auto pi32_type =
365  llvm::PointerType::get(get_int_type(32, cgen_state_->context_), 0);
366  const auto pi64_type =
367  llvm::PointerType::get(get_int_type(64, cgen_state_->context_), 0);
368  bool is_lag_in_frame = window_func_kind == SqlWindowFunctionKind::LAG_IN_FRAME;
369 
370  // ordering column buffer
371  const auto target_col_ti = window_func->getArgs().front()->get_type_info();
372  const auto target_col_size = target_col_ti.get_size();
373  const auto target_col_type_name =
374  get_col_type_name_by_size(target_col_size, target_col_ti.is_fp());
375  const auto target_col_logical_type_name = get_col_type_name_by_size(
376  window_func->get_type_info().get_size(), window_func->get_type_info().is_fp());
377 
378  // when target_column is fixed encoded, we store the actual column value by
379  // considering it, but our resultset analyzer only considers the type without encoding
380  // scheme so we handle them separately
381  auto logical_null_val_lv =
382  get_null_value_by_size(cgen_state_.get(), window_func->get_type_info());
383  auto target_col_null_val_lv =
385  size_t target_col_size_in_byte = target_col_size * 8;
386  llvm::Type* col_buf_ptr_type =
387  target_col_ti.is_fp()
388  ? get_fp_type(target_col_size_in_byte, cgen_state_->context_)
389  : get_int_type(target_col_size_in_byte, cgen_state_->context_);
390  auto col_buf_type = llvm::PointerType::get(col_buf_ptr_type, 0);
391  auto target_col_buf_ptr_lv = cgen_state_->llInt(reinterpret_cast<int64_t>(
392  window_func_context->getColumnBufferForWindowFunctionExpressions().front()));
393  auto target_col_buf_lv =
394  cgen_state_->ir_builder_.CreateIntToPtr(target_col_buf_ptr_lv, col_buf_type);
395 
396  // partial sum of # elems of partitions
397  auto partition_start_offset_buf_lv = cgen_state_->llInt(
398  reinterpret_cast<int64_t>(window_func_context->partitionStartOffset()));
399  auto partition_start_offset_ptr_lv =
400  cgen_state_->ir_builder_.CreateIntToPtr(partition_start_offset_buf_lv, pi64_type);
401 
402  // get start offset of the current partition
403  auto current_partition_start_offset_ptr_lv =
404  cgen_state_->ir_builder_.CreateGEP(get_int_type(64, cgen_state_->context_),
405  partition_start_offset_ptr_lv,
406  partition_index_lv);
407  auto current_partition_start_offset_lv = cgen_state_->ir_builder_.CreateLoad(
408  current_partition_start_offset_ptr_lv->getType()->getPointerElementType(),
409  current_partition_start_offset_ptr_lv);
410 
411  // row_id buf of the current partition
412  const auto partition_rowid_buf_lv =
413  cgen_state_->llInt(reinterpret_cast<int64_t>(window_func_context->payload()));
414  const auto partition_rowid_ptr_lv =
415  cgen_state_->ir_builder_.CreateIntToPtr(partition_rowid_buf_lv, pi32_type);
416  auto target_partition_rowid_ptr_lv =
417  cgen_state_->ir_builder_.CreateGEP(get_int_type(32, cgen_state_->context_),
418  partition_rowid_ptr_lv,
419  current_partition_start_offset_lv);
420 
421  // row_id buf of ordered current partition
422  const auto sorted_rowid_lv = cgen_state_->llInt(
423  reinterpret_cast<int64_t>(window_func_context->sortedPartition()));
424  const auto sorted_rowid_ptr_lv =
425  cgen_state_->ir_builder_.CreateIntToPtr(sorted_rowid_lv, pi64_type);
426  auto target_partition_sorted_rowid_ptr_lv =
427  cgen_state_->ir_builder_.CreateGEP(get_int_type(64, cgen_state_->context_),
428  sorted_rowid_ptr_lv,
429  current_partition_start_offset_lv);
430 
431  // # elems per partition
432  const auto partition_count_buf =
433  cgen_state_->llInt(reinterpret_cast<int64_t>(window_func_context->counts()));
434  auto partition_count_buf_ptr_lv =
435  cgen_state_->ir_builder_.CreateIntToPtr(partition_count_buf, pi32_type);
436 
437  // # elems of the given partition
438  const auto num_elem_current_partition_ptr =
439  cgen_state_->ir_builder_.CreateGEP(get_int_type(32, cgen_state_->context_),
440  partition_count_buf_ptr_lv,
441  partition_index_lv);
442  auto num_elem_current_partition_lv = cgen_state_->castToTypeIn(
443  cgen_state_->ir_builder_.CreateLoad(
444  num_elem_current_partition_ptr->getType()->getPointerElementType(),
445  num_elem_current_partition_ptr),
446  64);
447 
448  const auto order_key_ti = window_func->getOrderKeys().front()->get_type_info();
449  const auto order_key_size = order_key_ti.get_size();
450  const auto order_col_type_name = get_col_type_name_by_size(
451  order_key_size,
452  window_func_context->getOrderKeyColumnBufferTypes().front().is_fp());
453  size_t order_key_size_in_byte = order_key_size * 8;
454 
455  const auto order_key_buf_type = llvm::PointerType::get(
456  get_int_type(order_key_size_in_byte, cgen_state_->context_), 0);
457  const auto order_key_buf = cgen_state_->llInt(
458  reinterpret_cast<int64_t>(window_func_context->getOrderKeyColumnBuffers().front()));
459  auto order_key_buf_ptr_lv =
460  cgen_state_->ir_builder_.CreateIntToPtr(order_key_buf, order_key_buf_type);
461 
462  // null value of the ordering column
463  const auto order_key_buf_ti =
464  window_func_context->getOrderKeyColumnBufferTypes().front();
465  auto order_key_col_null_val_lv =
466  get_null_value_by_size_with_encoding(cgen_state_.get(), order_key_buf_ti);
467 
468  // null range of the aggregate tree
469  const auto null_start_pos_buf = cgen_state_->llInt(
470  reinterpret_cast<int64_t>(window_func_context->getNullValueStartPos()));
471  const auto null_start_pos_buf_ptr =
472  cgen_state_->ir_builder_.CreateIntToPtr(null_start_pos_buf, pi64_type);
473  const auto null_start_pos_ptr =
474  cgen_state_->ir_builder_.CreateGEP(get_int_type(64, cgen_state_->context_),
475  null_start_pos_buf_ptr,
476  partition_index_lv);
477  auto null_start_pos_lv = cgen_state_->ir_builder_.CreateLoad(
478  null_start_pos_ptr->getType()->getPointerElementType(),
479  null_start_pos_ptr,
480  "null_start_pos");
481  const auto null_end_pos_buf = cgen_state_->llInt(
482  reinterpret_cast<int64_t>(window_func_context->getNullValueEndPos()));
483  const auto null_end_pos_buf_ptr =
484  cgen_state_->ir_builder_.CreateIntToPtr(null_end_pos_buf, pi64_type);
485  const auto null_end_pos_ptr = cgen_state_->ir_builder_.CreateGEP(
486  get_int_type(64, cgen_state_->context_), null_end_pos_buf_ptr, partition_index_lv);
487  auto null_end_pos_lv = cgen_state_->ir_builder_.CreateLoad(
488  null_end_pos_ptr->getType()->getPointerElementType(),
489  null_end_pos_ptr,
490  "null_end_pos");
491 
492  std::string compute_row_idx_on_frame_func_name = "compute_";
493  compute_row_idx_on_frame_func_name += order_col_type_name + "_current_row_idx_in_frame";
494  auto cur_row_idx_in_frame_lv =
495  cgen_state_->emitCall(compute_row_idx_on_frame_func_name,
496  {num_elem_current_partition_lv,
497  current_row_pos_lv,
498  order_key_buf_ptr_lv,
499  target_partition_rowid_ptr_lv,
500  target_partition_sorted_rowid_ptr_lv,
501  order_key_col_null_val_lv,
502  null_start_pos_lv,
503  null_end_pos_lv});
504 
505  llvm::Value* frame_start_bound_lv{nullptr};
506  llvm::Value* frame_end_bound_lv{nullptr};
507  std::tie(frame_start_bound_lv, frame_end_bound_lv) =
508  codegenWindowFrameBound(window_func_context,
509  frame_start_bound,
510  frame_end_bound,
511  cur_row_idx_in_frame_lv,
512  cgen_state_->llInt((int64_t)0),
513  order_key_buf_ptr_lv,
514  order_key_col_null_val_lv,
515  frame_start_bound_expr_lv,
516  frame_end_bound_expr_lv,
517  num_elem_current_partition_lv,
518  target_partition_rowid_ptr_lv,
519  target_partition_sorted_rowid_ptr_lv,
520  null_start_pos_lv,
521  null_end_pos_lv,
522  code_generator);
523  CHECK(frame_start_bound_lv);
524  CHECK(frame_end_bound_lv);
525 
526  llvm::Value* modified_cur_row_idx_in_frame_lv{nullptr};
527  if (is_lag_in_frame) {
528  modified_cur_row_idx_in_frame_lv =
529  cgen_state_->ir_builder_.CreateSub(cur_row_idx_in_frame_lv, offset_lv);
530  } else {
531  modified_cur_row_idx_in_frame_lv =
532  cgen_state_->ir_builder_.CreateAdd(cur_row_idx_in_frame_lv, offset_lv);
533  }
534  CHECK(modified_cur_row_idx_in_frame_lv);
535 
536  std::string target_func_name = "get_";
537  target_func_name += target_col_type_name + "_value_";
538  target_func_name += target_col_logical_type_name + "_type_";
539  target_func_name += "in_frame";
540  res_lv = cgen_state_->emitCall(target_func_name,
541  {modified_cur_row_idx_in_frame_lv,
542  frame_start_bound_lv,
543  frame_end_bound_lv,
544  target_col_buf_lv,
545  target_partition_rowid_ptr_lv,
546  target_partition_sorted_rowid_ptr_lv,
547  logical_null_val_lv,
548  target_col_null_val_lv});
549  if (target_col_ti.get_compression() == kENCODING_DATE_IN_DAYS) {
550  res_lv = cgen_state_->emitCall(
551  "encode_date",
552  {res_lv, logical_null_val_lv, cgen_state_->llInt((int64_t)kSecsPerDay)});
553  }
554  CHECK(res_lv);
555  return res_lv;
556 }
std::string get_col_type_name_by_size(const size_t size, const bool is_fp)
static constexpr int64_t kSecsPerDay
llvm::Value * codegenCurrentPartitionIndex(const WindowFunctionContext *window_func_context, llvm::Value *current_row_pos_lv)
llvm::Type * get_fp_type(const int width, llvm::LLVMContext &context)
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1268
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
static WindowFunctionContext * getActiveWindowFunctionContext(Executor *executor)
#define AUTOMATIC_IR_METADATA(CGENSTATE)
std::pair< llvm::Value *, llvm::Value * > codegenWindowFrameBound(WindowFunctionContext *window_func_context, const Analyzer::WindowFrame *frame_start_bound, const Analyzer::WindowFrame *frame_end_bound, llvm::Value *current_row_pos_lv, llvm::Value *current_partition_start_offset_lv, llvm::Value *order_key_buf_ptr_lv, llvm::Value *order_key_col_null_val_lv, llvm::Value *frame_start_bound_expr_lv, llvm::Value *frame_end_bound_expr_lv, llvm::Value *num_elem_current_partition_lv, llvm::Value *target_partition_rowid_ptr_lv, llvm::Value *target_partition_sorted_rowid_ptr_lv, llvm::Value *null_start_pos_lv, llvm::Value *null_end_pos_lv, CodeGenerator &code_generator)
std::unique_ptr< llvm::LLVMContext > context_
Definition: Execute.h:1243
llvm::Value * codegenFrameBoundExpr(const Analyzer::WindowFunction *window_func, const Analyzer::WindowFrame *frame_bound, CodeGenerator &code_generator, const CompilationOptions &co)
#define CHECK(condition)
Definition: Logger.h:222
llvm::Value * get_null_value_by_size(CgenState *cgen_state, SQLTypeInfo col_ti)
llvm::Value * get_null_value_by_size_with_encoding(CgenState *cgen_state, SQLTypeInfo col_ti)

+ Here is the call graph for this function:

void Executor::codegenWindowFunctionStateInit ( llvm::Value *  aggregate_state)
private

Definition at line 283 of file WindowFunctionIR.cpp.

References AUTOMATIC_IR_METADATA, COUNT, anonymous_namespace{WindowFunctionIR.cpp}::get_adjusted_window_type_info(), get_int_type(), WindowProjectNodeContext::getActiveWindowFunctionContext(), kDOUBLE, and kFLOAT.

283  {
285  const auto window_func_context =
287  const auto window_func = window_func_context->getWindowFunction();
288  const auto window_func_ti = get_adjusted_window_type_info(window_func);
289  const auto window_func_null_val =
290  window_func_ti.is_fp()
291  ? cgen_state_->inlineFpNull(window_func_ti)
292  : cgen_state_->castToTypeIn(cgen_state_->inlineIntNull(window_func_ti), 64);
293  llvm::Value* window_func_init_val;
294  if (window_func_context->getWindowFunction()->getKind() ==
296  switch (window_func_ti.get_type()) {
297  case kFLOAT: {
298  window_func_init_val = cgen_state_->llFp(float(0));
299  break;
300  }
301  case kDOUBLE: {
302  window_func_init_val = cgen_state_->llFp(double(0));
303  break;
304  }
305  default: {
306  window_func_init_val = cgen_state_->llInt(int64_t(0));
307  break;
308  }
309  }
310  } else {
311  window_func_init_val = window_func_null_val;
312  }
313  const auto pi32_type =
314  llvm::PointerType::get(get_int_type(32, cgen_state_->context_), 0);
315  switch (window_func_ti.get_type()) {
316  case kDOUBLE: {
317  cgen_state_->emitCall("agg_id_double", {aggregate_state, window_func_init_val});
318  break;
319  }
320  case kFLOAT: {
321  aggregate_state =
322  cgen_state_->ir_builder_.CreateBitCast(aggregate_state, pi32_type);
323  cgen_state_->emitCall("agg_id_float", {aggregate_state, window_func_init_val});
324  break;
325  }
326  default: {
327  cgen_state_->emitCall("agg_id", {aggregate_state, window_func_init_val});
328  break;
329  }
330  }
331 }
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1268
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
static WindowFunctionContext * getActiveWindowFunctionContext(Executor *executor)
#define AUTOMATIC_IR_METADATA(CGENSTATE)
SQLTypeInfo get_adjusted_window_type_info(const Analyzer::WindowFunction *window_func)

+ Here is the call graph for this function:

llvm::BasicBlock * Executor::codegenWindowResetStateControlFlow ( )
private

Definition at line 254 of file WindowFunctionIR.cpp.

References AUTOMATIC_IR_METADATA, WindowProjectNodeContext::getActiveWindowFunctionContext(), CodeGenerator::posArg(), and CodeGenerator::toBool().

254  {
256  const auto window_func_context =
258  const auto bitset = cgen_state_->llInt(
259  reinterpret_cast<const int64_t>(window_func_context->partitionStart()));
260  const auto min_val = cgen_state_->llInt(int64_t(0));
261  const auto max_val = cgen_state_->llInt(window_func_context->elementCount() - 1);
262  const auto null_val = cgen_state_->llInt(inline_int_null_value<int64_t>());
263  const auto null_bool_val = cgen_state_->llInt<int8_t>(inline_int_null_value<int8_t>());
264  CodeGenerator code_generator(this);
265  const auto reset_state =
266  code_generator.toBool(cgen_state_->emitCall("bit_is_set",
267  {bitset,
268  code_generator.posArg(nullptr),
269  min_val,
270  max_val,
271  null_val,
272  null_bool_val}));
273  const auto reset_state_true_bb = llvm::BasicBlock::Create(
274  cgen_state_->context_, "reset_state.true", cgen_state_->current_func_);
275  const auto reset_state_false_bb = llvm::BasicBlock::Create(
276  cgen_state_->context_, "reset_state.false", cgen_state_->current_func_);
277  cgen_state_->ir_builder_.CreateCondBr(
278  reset_state, reset_state_true_bb, reset_state_false_bb);
279  cgen_state_->ir_builder_.SetInsertPoint(reset_state_true_bb);
280  return reset_state_false_bb;
281 }
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1268
static WindowFunctionContext * getActiveWindowFunctionContext(Executor *executor)
#define AUTOMATIC_IR_METADATA(CGENSTATE)

+ Here is the call graph for this function:

ResultSetPtr Executor::collectAllDeviceResults ( SharedKernelContext shared_context,
const RelAlgExecutionUnit ra_exe_unit,
const QueryMemoryDescriptor query_mem_desc,
const ExecutorDeviceType  device_type,
std::shared_ptr< RowSetMemoryOwner row_set_mem_owner 
)
private

Definition at line 2347 of file Execute.cpp.

References anonymous_namespace{Execute.cpp}::build_row_for_empty_input(), catalog_, collectAllDeviceShardedTopResults(), DEBUG_TIMER, SharedKernelContext::getFragmentResults(), QueryMemoryDescriptor::getQueryDescriptionType(), GPU, NonGroupedAggregate, reduceMultiDeviceResults(), reduceSpeculativeTopN(), GroupByAndAggregate::shard_count_for_top_groups(), RelAlgExecutionUnit::target_exprs, and use_speculative_top_n().

Referenced by executeWorkUnitImpl().

2352  {
2353  auto timer = DEBUG_TIMER(__func__);
2354  auto& result_per_device = shared_context.getFragmentResults();
2355  if (result_per_device.empty() && query_mem_desc.getQueryDescriptionType() ==
2358  ra_exe_unit.target_exprs, query_mem_desc, device_type);
2359  }
2360  if (use_speculative_top_n(ra_exe_unit, query_mem_desc)) {
2361  try {
2362  return reduceSpeculativeTopN(
2363  ra_exe_unit, result_per_device, row_set_mem_owner, query_mem_desc);
2364  } catch (const std::bad_alloc&) {
2365  throw SpeculativeTopNFailed("Failed during multi-device reduction.");
2366  }
2367  }
2368  const auto shard_count =
2369  device_type == ExecutorDeviceType::GPU
2371  : 0;
2372 
2373  if (shard_count && !result_per_device.empty()) {
2374  return collectAllDeviceShardedTopResults(shared_context, ra_exe_unit);
2375  }
2376  return reduceMultiDeviceResults(
2377  ra_exe_unit, result_per_device, row_set_mem_owner, query_mem_desc);
2378 }
std::vector< Analyzer::Expr * > target_exprs
bool use_speculative_top_n(const RelAlgExecutionUnit &ra_exe_unit, const QueryMemoryDescriptor &query_mem_desc)
ResultSetPtr reduceSpeculativeTopN(const RelAlgExecutionUnit &, std::vector< std::pair< ResultSetPtr, std::vector< size_t >>> &all_fragment_results, std::shared_ptr< RowSetMemoryOwner >, const QueryMemoryDescriptor &) const
Definition: Execute.cpp:1429
const Catalog_Namespace::Catalog * catalog_
Definition: Execute.h:1323
ResultSetPtr reduceMultiDeviceResults(const RelAlgExecutionUnit &, std::vector< std::pair< ResultSetPtr, std::vector< size_t >>> &all_fragment_results, std::shared_ptr< RowSetMemoryOwner >, const QueryMemoryDescriptor &) const
Definition: Execute.cpp:1322
ResultSetPtr collectAllDeviceShardedTopResults(SharedKernelContext &shared_context, const RelAlgExecutionUnit &ra_exe_unit) const
Definition: Execute.cpp:2462
QueryDescriptionType getQueryDescriptionType() const
ResultSetPtr build_row_for_empty_input(const std::vector< Analyzer::Expr * > &target_exprs_in, const QueryMemoryDescriptor &query_mem_desc, const ExecutorDeviceType device_type)
Definition: Execute.cpp:2305
std::vector< std::pair< ResultSetPtr, std::vector< size_t > > > & getFragmentResults()
#define DEBUG_TIMER(name)
Definition: Logger.h:371
static size_t shard_count_for_top_groups(const RelAlgExecutionUnit &ra_exe_unit, const Catalog_Namespace::Catalog &catalog)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

ResultSetPtr Executor::collectAllDeviceShardedTopResults ( SharedKernelContext shared_context,
const RelAlgExecutionUnit ra_exe_unit 
) const
private

Definition at line 2462 of file Execute.cpp.

References blockSize(), catalog_, CHECK, CHECK_EQ, CHECK_LE, SharedKernelContext::getFragmentResults(), gridSize(), SortInfo::limit, SortInfo::offset, SortInfo::order_entries, anonymous_namespace{Execute.cpp}::permute_storage_columnar(), anonymous_namespace{Execute.cpp}::permute_storage_row_wise(), run_benchmark_import::result, and RelAlgExecutionUnit::sort_info.

Referenced by collectAllDeviceResults().

2464  {
2465  auto& result_per_device = shared_context.getFragmentResults();
2466  const auto first_result_set = result_per_device.front().first;
2467  CHECK(first_result_set);
2468  auto top_query_mem_desc = first_result_set->getQueryMemDesc();
2469  CHECK(!top_query_mem_desc.hasInterleavedBinsOnGpu());
2470  const auto top_n = ra_exe_unit.sort_info.limit + ra_exe_unit.sort_info.offset;
2471  top_query_mem_desc.setEntryCount(0);
2472  for (auto& result : result_per_device) {
2473  const auto result_set = result.first;
2474  CHECK(result_set);
2475  result_set->sort(ra_exe_unit.sort_info.order_entries, top_n, this);
2476  size_t new_entry_cnt = top_query_mem_desc.getEntryCount() + result_set->rowCount();
2477  top_query_mem_desc.setEntryCount(new_entry_cnt);
2478  }
2479  auto top_result_set = std::make_shared<ResultSet>(first_result_set->getTargetInfos(),
2480  first_result_set->getDeviceType(),
2481  top_query_mem_desc,
2482  first_result_set->getRowSetMemOwner(),
2483  catalog_,
2484  blockSize(),
2485  gridSize());
2486  auto top_storage = top_result_set->allocateStorage();
2487  size_t top_output_row_idx{0};
2488  for (auto& result : result_per_device) {
2489  const auto result_set = result.first;
2490  CHECK(result_set);
2491  const auto& top_permutation = result_set->getPermutationBuffer();
2492  CHECK_LE(top_permutation.size(), top_n);
2493  if (top_query_mem_desc.didOutputColumnar()) {
2494  top_output_row_idx = permute_storage_columnar(result_set->getStorage(),
2495  result_set->getQueryMemDesc(),
2496  top_storage,
2497  top_output_row_idx,
2498  top_query_mem_desc,
2499  top_permutation);
2500  } else {
2501  top_output_row_idx = permute_storage_row_wise(result_set->getStorage(),
2502  top_storage,
2503  top_output_row_idx,
2504  top_query_mem_desc,
2505  top_permutation);
2506  }
2507  }
2508  CHECK_EQ(top_output_row_idx, top_query_mem_desc.getEntryCount());
2509  return top_result_set;
2510 }
#define CHECK_EQ(x, y)
Definition: Logger.h:230
const std::list< Analyzer::OrderEntry > order_entries
size_t permute_storage_row_wise(const ResultSetStorage *input_storage, const ResultSetStorage *output_storage, size_t output_row_index, const QueryMemoryDescriptor &output_query_mem_desc, const std::vector< uint32_t > &top_permutation)
Definition: Execute.cpp:2441
const size_t limit
const Catalog_Namespace::Catalog * catalog_
Definition: Execute.h:1323
#define CHECK_LE(x, y)
Definition: Logger.h:233
unsigned gridSize() const
Definition: Execute.cpp:3836
size_t permute_storage_columnar(const ResultSetStorage *input_storage, const QueryMemoryDescriptor &input_query_mem_desc, const ResultSetStorage *output_storage, size_t output_row_index, const QueryMemoryDescriptor &output_query_mem_desc, const std::vector< uint32_t > &top_permutation)
Definition: Execute.cpp:2391
std::vector< std::pair< ResultSetPtr, std::vector< size_t > > > & getFragmentResults()
#define CHECK(condition)
Definition: Logger.h:222
unsigned blockSize() const
Definition: Execute.cpp:3850
const size_t offset

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool Executor::compileBody ( const RelAlgExecutionUnit ra_exe_unit,
GroupByAndAggregate group_by_and_aggregate,
QueryMemoryDescriptor query_mem_desc,
const CompilationOptions co,
const GpuSharedMemoryContext gpu_smem_context = {} 
)
private

Definition at line 3214 of file NativeCodegen.cpp.

3218  {
3220 
3221  // Switch the code generation into a separate filter function if enabled.
3222  // Note that accesses to function arguments are still codegenned from the
3223  // row function's arguments, then later automatically forwarded and
3224  // remapped into filter function arguments by redeclareFilterFunction().
3225  cgen_state_->row_func_bb_ = cgen_state_->ir_builder_.GetInsertBlock();
3226  llvm::Value* loop_done{nullptr};
3227  std::unique_ptr<Executor::FetchCacheAnchor> fetch_cache_anchor;
3228  if (cgen_state_->filter_func_) {
3229  if (cgen_state_->row_func_bb_->getName() == "loop_body") {
3230  auto row_func_entry_bb = &cgen_state_->row_func_->getEntryBlock();
3231  cgen_state_->ir_builder_.SetInsertPoint(row_func_entry_bb,
3232  row_func_entry_bb->begin());
3233  loop_done = cgen_state_->ir_builder_.CreateAlloca(
3234  get_int_type(1, cgen_state_->context_), nullptr, "loop_done");
3235  cgen_state_->ir_builder_.SetInsertPoint(cgen_state_->row_func_bb_);
3236  cgen_state_->ir_builder_.CreateStore(cgen_state_->llBool(true), loop_done);
3237  }
3238  cgen_state_->ir_builder_.SetInsertPoint(cgen_state_->filter_func_bb_);
3239  cgen_state_->current_func_ = cgen_state_->filter_func_;
3240  fetch_cache_anchor = std::make_unique<Executor::FetchCacheAnchor>(cgen_state_.get());
3241  }
3242 
3243  // generate the code for the filter
3244  std::vector<Analyzer::Expr*> primary_quals;
3245  std::vector<Analyzer::Expr*> deferred_quals;
3246  bool short_circuited = CodeGenerator::prioritizeQuals(
3247  ra_exe_unit, primary_quals, deferred_quals, plan_state_->hoisted_filters_);
3248  if (short_circuited) {
3249  VLOG(1) << "Prioritized " << std::to_string(primary_quals.size()) << " quals, "
3250  << "short-circuited and deferred " << std::to_string(deferred_quals.size())
3251  << " quals";
3252  }
3253  llvm::Value* filter_lv = cgen_state_->llBool(true);
3254  CodeGenerator code_generator(this);
3255  for (auto expr : primary_quals) {
3256  // Generate the filter for primary quals
3257  auto cond = code_generator.toBool(code_generator.codegen(expr, true, co).front());
3258  filter_lv = cgen_state_->ir_builder_.CreateAnd(filter_lv, cond);
3259  }
3260  CHECK(filter_lv->getType()->isIntegerTy(1));
3261  llvm::BasicBlock* sc_false{nullptr};
3262  if (!deferred_quals.empty()) {
3263  auto sc_true = llvm::BasicBlock::Create(
3264  cgen_state_->context_, "sc_true", cgen_state_->current_func_);
3265  sc_false = llvm::BasicBlock::Create(
3266  cgen_state_->context_, "sc_false", cgen_state_->current_func_);
3267  cgen_state_->ir_builder_.CreateCondBr(filter_lv, sc_true, sc_false);
3268  cgen_state_->ir_builder_.SetInsertPoint(sc_false);
3269  if (ra_exe_unit.join_quals.empty()) {
3270  cgen_state_->ir_builder_.CreateRet(cgen_state_->llInt(int32_t(0)));
3271  }
3272  cgen_state_->ir_builder_.SetInsertPoint(sc_true);
3273  filter_lv = cgen_state_->llBool(true);
3274  }
3275  for (auto expr : deferred_quals) {
3276  filter_lv = cgen_state_->ir_builder_.CreateAnd(
3277  filter_lv, code_generator.toBool(code_generator.codegen(expr, true, co).front()));
3278  }
3279 
3280  CHECK(filter_lv->getType()->isIntegerTy(1));
3281  auto ret = group_by_and_aggregate.codegen(
3282  filter_lv, sc_false, query_mem_desc, co, gpu_smem_context);
3283 
3284  // Switch the code generation back to the row function if a filter
3285  // function was enabled.
3286  if (cgen_state_->filter_func_) {
3287  if (cgen_state_->row_func_bb_->getName() == "loop_body") {
3288  cgen_state_->ir_builder_.CreateStore(cgen_state_->llBool(false), loop_done);
3289  cgen_state_->ir_builder_.CreateRet(cgen_state_->llInt<int32_t>(0));
3290  }
3291 
3292  cgen_state_->ir_builder_.SetInsertPoint(cgen_state_->row_func_bb_);
3293  cgen_state_->current_func_ = cgen_state_->row_func_;
3294  cgen_state_->filter_func_call_ =
3295  cgen_state_->ir_builder_.CreateCall(cgen_state_->filter_func_, {});
3296 
3297  // Create real filter function declaration after placeholder call
3298  // is emitted.
3300 
3301  if (cgen_state_->row_func_bb_->getName() == "loop_body") {
3302  auto loop_done_true = llvm::BasicBlock::Create(
3303  cgen_state_->context_, "loop_done_true", cgen_state_->row_func_);
3304  auto loop_done_false = llvm::BasicBlock::Create(
3305  cgen_state_->context_, "loop_done_false", cgen_state_->row_func_);
3306  auto loop_done_flag = cgen_state_->ir_builder_.CreateLoad(
3307  loop_done->getType()->getPointerElementType(), loop_done);
3308  cgen_state_->ir_builder_.CreateCondBr(
3309  loop_done_flag, loop_done_true, loop_done_false);
3310  cgen_state_->ir_builder_.SetInsertPoint(loop_done_true);
3311  cgen_state_->ir_builder_.CreateRet(cgen_state_->filter_func_call_);
3312  cgen_state_->ir_builder_.SetInsertPoint(loop_done_false);
3313  } else {
3314  cgen_state_->ir_builder_.CreateRet(cgen_state_->filter_func_call_);
3315  }
3316  }
3317  return ret;
3318 }
bool codegen(llvm::Value *filter_result, llvm::BasicBlock *sc_false, QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const GpuSharedMemoryContext &gpu_smem_context)
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1268
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
std::string to_string(char const *&&v)
const JoinQualsPerNestingLevel join_quals
std::unique_ptr< PlanState > plan_state_
Definition: Execute.h:1298
#define AUTOMATIC_IR_METADATA(CGENSTATE)
static bool prioritizeQuals(const RelAlgExecutionUnit &ra_exe_unit, std::vector< Analyzer::Expr * > &primary_quals, std::vector< Analyzer::Expr * > &deferred_quals, const PlanState::HoistedFiltersSet &hoisted_quals)
Definition: LogicalIR.cpp:157
#define CHECK(condition)
Definition: Logger.h:222
void redeclareFilterFunction()
Definition: IRCodegen.cpp:995
#define VLOG(n)
Definition: Logger.h:316
std::tuple< CompilationResult, std::unique_ptr< QueryMemoryDescriptor > > Executor::compileWorkUnit ( const std::vector< InputTableInfo > &  query_infos,
const PlanState::DeletedColumnsMap deleted_cols_map,
const RelAlgExecutionUnit ra_exe_unit,
const CompilationOptions co,
const ExecutionOptions eo,
const CudaMgr_Namespace::CudaMgr cuda_mgr,
const bool  allow_lazy_fetch,
std::shared_ptr< RowSetMemoryOwner row_set_mem_owner,
const size_t  max_groups_buffer_entry_count,
const int8_t  crt_min_byte_width,
const bool  has_cardinality_estimation,
ColumnCacheMap column_cache,
RenderInfo render_info = nullptr 
)
private

Definition at line 2668 of file NativeCodegen.cpp.

2680  {
2681  auto timer = DEBUG_TIMER(__func__);
2682 
2684  if (!cuda_mgr) {
2685  throw QueryMustRunOnCpu();
2686  }
2687  }
2688 
2689 #ifndef NDEBUG
2690  static std::uint64_t counter = 0;
2691  ++counter;
2692  VLOG(1) << "CODEGEN #" << counter << ":";
2693  LOG(IR) << "CODEGEN #" << counter << ":";
2694  LOG(PTX) << "CODEGEN #" << counter << ":";
2695  LOG(ASM) << "CODEGEN #" << counter << ":";
2696 #endif
2697 
2698  // cgenstate_manager uses RAII pattern to manage the live time of
2699  // CgenState instances.
2700  Executor::CgenStateManager cgenstate_manager(*this,
2701  allow_lazy_fetch,
2702  query_infos,
2703  deleted_cols_map,
2704  &ra_exe_unit); // locks compilation_mutex
2705 
2706  addTransientStringLiterals(ra_exe_unit, row_set_mem_owner);
2707 
2708  GroupByAndAggregate group_by_and_aggregate(
2709  this,
2710  co.device_type,
2711  ra_exe_unit,
2712  query_infos,
2713  row_set_mem_owner,
2714  has_cardinality_estimation ? std::optional<int64_t>(max_groups_buffer_entry_guess)
2715  : std::nullopt);
2716  auto query_mem_desc =
2717  group_by_and_aggregate.initQueryMemoryDescriptor(eo.allow_multifrag,
2718  max_groups_buffer_entry_guess,
2719  crt_min_byte_width,
2720  render_info,
2722 
2723  if (query_mem_desc->getQueryDescriptionType() ==
2725  !has_cardinality_estimation && (!render_info || !render_info->isInSitu()) &&
2726  !eo.just_explain) {
2727  const auto col_range_info = group_by_and_aggregate.getColRangeInfo();
2728  throw CardinalityEstimationRequired(col_range_info.max - col_range_info.min);
2729  }
2730 
2731  const bool output_columnar = query_mem_desc->didOutputColumnar();
2732  const bool gpu_shared_mem_optimization =
2734  ra_exe_unit,
2735  cuda_mgr,
2736  co.device_type,
2737  cuda_mgr ? this->blockSize() : 1,
2738  cuda_mgr ? this->numBlocksPerMP() : 1);
2739  if (gpu_shared_mem_optimization) {
2740  // disable interleaved bins optimization on the GPU
2741  query_mem_desc->setHasInterleavedBinsOnGpu(false);
2742  LOG(DEBUG1) << "GPU shared memory is used for the " +
2743  query_mem_desc->queryDescTypeToString() + " query(" +
2744  std::to_string(get_shared_memory_size(gpu_shared_mem_optimization,
2745  query_mem_desc.get())) +
2746  " out of " + std::to_string(g_gpu_smem_threshold) + " bytes).";
2747  }
2748 
2749  const GpuSharedMemoryContext gpu_smem_context(
2750  get_shared_memory_size(gpu_shared_mem_optimization, query_mem_desc.get()));
2751 
2753  const size_t num_count_distinct_descs =
2754  query_mem_desc->getCountDistinctDescriptorsSize();
2755  for (size_t i = 0; i < num_count_distinct_descs; i++) {
2756  const auto& count_distinct_descriptor =
2757  query_mem_desc->getCountDistinctDescriptor(i);
2758  if (count_distinct_descriptor.impl_type_ == CountDistinctImplType::UnorderedSet ||
2759  (count_distinct_descriptor.impl_type_ != CountDistinctImplType::Invalid &&
2760  !co.hoist_literals)) {
2761  throw QueryMustRunOnCpu();
2762  }
2763  }
2764 
2765  // we currently do not support varlen projection based on baseline groupby when
2766  // 1) target table is multi-fragmented and 2) multiple gpus are involved for query
2767  // processing in this case, we punt the query to cpu to avoid server crash
2768  for (const auto expr : ra_exe_unit.target_exprs) {
2769  if (auto gby_expr = dynamic_cast<Analyzer::AggExpr*>(expr)) {
2770  bool has_multiple_gpus = cuda_mgr ? cuda_mgr->getDeviceCount() > 1 : false;
2771  if (gby_expr->get_aggtype() == SQLAgg::kSAMPLE && has_multiple_gpus &&
2772  !g_leaf_count) {
2773  std::set<const Analyzer::ColumnVar*,
2774  bool (*)(const Analyzer::ColumnVar*, const Analyzer::ColumnVar*)>
2776  gby_expr->collect_column_var(colvar_set, true);
2777  for (const auto cv : colvar_set) {
2778  if (cv->get_type_info().is_varlen()) {
2779  const auto tbl_id = cv->get_table_id();
2780  std::for_each(query_infos.begin(),
2781  query_infos.end(),
2782  [tbl_id](const InputTableInfo& input_table_info) {
2783  if (input_table_info.table_id == tbl_id &&
2784  input_table_info.info.fragments.size() > 1) {
2785  throw QueryMustRunOnCpu();
2786  }
2787  });
2788  }
2789  }
2790  }
2791  }
2792  }
2793  }
2794 
2795  // Read the module template and target either CPU or GPU
2796  // by binding the stream position functions to the right implementation:
2797  // stride access for GPU, contiguous for CPU
2798  CHECK(cgen_state_->module_ == nullptr);
2799  cgen_state_->set_module_shallow_copy(get_rt_module(), /*always_clone=*/true);
2800 
2801  auto is_gpu = co.device_type == ExecutorDeviceType::GPU;
2802  if (is_gpu) {
2803  cgen_state_->module_->setDataLayout(get_gpu_data_layout());
2804  cgen_state_->module_->setTargetTriple(get_gpu_target_triple_string());
2805  }
2806  if (has_udf_module(/*is_gpu=*/is_gpu)) {
2808  get_udf_module(/*is_gpu=*/is_gpu), *cgen_state_->module_, cgen_state_.get());
2809  }
2810  if (has_rt_udf_module(/*is_gpu=*/is_gpu)) {
2812  get_rt_udf_module(/*is_gpu=*/is_gpu), *cgen_state_->module_, cgen_state_.get());
2813  }
2814 
2816 
2817  auto agg_fnames =
2818  get_agg_fnames(ra_exe_unit.target_exprs, !ra_exe_unit.groupby_exprs.empty());
2819 
2820  const auto agg_slot_count = ra_exe_unit.estimator ? size_t(1) : agg_fnames.size();
2821 
2822  const bool is_group_by{query_mem_desc->isGroupBy()};
2823  auto [query_func, row_func_call] = is_group_by
2825  co.hoist_literals,
2826  *query_mem_desc,
2827  co.device_type,
2828  ra_exe_unit.scan_limit,
2829  gpu_smem_context)
2830  : query_template(cgen_state_->module_,
2831  agg_slot_count,
2832  co.hoist_literals,
2833  !!ra_exe_unit.estimator,
2834  gpu_smem_context);
2835  bind_pos_placeholders("pos_start", true, query_func, cgen_state_->module_);
2836  bind_pos_placeholders("group_buff_idx", false, query_func, cgen_state_->module_);
2837  bind_pos_placeholders("pos_step", false, query_func, cgen_state_->module_);
2838 
2839  cgen_state_->query_func_ = query_func;
2840  cgen_state_->row_func_call_ = row_func_call;
2841  cgen_state_->query_func_entry_ir_builder_.SetInsertPoint(
2842  &query_func->getEntryBlock().front());
2843 
2844  // Generate the function signature and column head fetches s.t.
2845  // double indirection isn't needed in the inner loop
2846  auto& fetch_bb = query_func->front();
2847  llvm::IRBuilder<> fetch_ir_builder(&fetch_bb);
2848  fetch_ir_builder.SetInsertPoint(&*fetch_bb.begin());
2849  auto col_heads = generate_column_heads_load(ra_exe_unit.input_col_descs.size(),
2850  query_func->args().begin(),
2851  fetch_ir_builder,
2852  cgen_state_->context_);
2853  CHECK_EQ(ra_exe_unit.input_col_descs.size(), col_heads.size());
2854 
2855  cgen_state_->row_func_ = create_row_function(ra_exe_unit.input_col_descs.size(),
2856  is_group_by ? 0 : agg_slot_count,
2857  co.hoist_literals,
2858  cgen_state_->module_,
2859  cgen_state_->context_);
2860  CHECK(cgen_state_->row_func_);
2861  cgen_state_->row_func_bb_ =
2862  llvm::BasicBlock::Create(cgen_state_->context_, "entry", cgen_state_->row_func_);
2863 
2865  auto filter_func_ft =
2866  llvm::FunctionType::get(get_int_type(32, cgen_state_->context_), {}, false);
2867  cgen_state_->filter_func_ = llvm::Function::Create(filter_func_ft,
2868  llvm::Function::ExternalLinkage,
2869  "filter_func",
2870  cgen_state_->module_);
2871  CHECK(cgen_state_->filter_func_);
2872  cgen_state_->filter_func_bb_ = llvm::BasicBlock::Create(
2873  cgen_state_->context_, "entry", cgen_state_->filter_func_);
2874  }
2875 
2876  cgen_state_->current_func_ = cgen_state_->row_func_;
2877  cgen_state_->ir_builder_.SetInsertPoint(cgen_state_->row_func_bb_);
2878 
2879  preloadFragOffsets(ra_exe_unit.input_descs, query_infos);
2880  RelAlgExecutionUnit body_execution_unit = ra_exe_unit;
2881  const auto join_loops =
2882  buildJoinLoops(body_execution_unit, co, eo, query_infos, column_cache);
2883 
2884  plan_state_->allocateLocalColumnIds(ra_exe_unit.input_col_descs);
2885  for (auto& simple_qual : ra_exe_unit.simple_quals) {
2886  plan_state_->addSimpleQual(simple_qual);
2887  }
2888  const auto is_not_deleted_bb = codegenSkipDeletedOuterTableRow(ra_exe_unit, co);
2889  if (is_not_deleted_bb) {
2890  cgen_state_->row_func_bb_ = is_not_deleted_bb;
2891  }
2892  if (!join_loops.empty()) {
2893  codegenJoinLoops(join_loops,
2894  body_execution_unit,
2895  group_by_and_aggregate,
2896  query_func,
2897  cgen_state_->row_func_bb_,
2898  *(query_mem_desc.get()),
2899  co,
2900  eo);
2901  } else {
2902  const bool can_return_error = compileBody(
2903  ra_exe_unit, group_by_and_aggregate, *query_mem_desc, co, gpu_smem_context);
2904  if (can_return_error || cgen_state_->needs_error_check_ || eo.with_dynamic_watchdog ||
2906  createErrorCheckControlFlow(query_func,
2909  join_loops,
2910  co.device_type,
2911  group_by_and_aggregate.query_infos_);
2912  }
2913  }
2914  std::vector<llvm::Value*> hoisted_literals;
2915 
2916  if (co.hoist_literals) {
2917  VLOG(1) << "number of hoisted literals: "
2918  << cgen_state_->query_func_literal_loads_.size()
2919  << " / literal buffer usage: " << cgen_state_->getLiteralBufferUsage(0)
2920  << " bytes";
2921  }
2922 
2923  if (co.hoist_literals && !cgen_state_->query_func_literal_loads_.empty()) {
2924  // we have some hoisted literals...
2925  hoisted_literals = inlineHoistedLiterals();
2926  }
2927 
2928  // replace the row func placeholder call with the call to the actual row func
2929  std::vector<llvm::Value*> row_func_args;
2930  for (size_t i = 0; i < cgen_state_->row_func_call_->getNumOperands() - 1; ++i) {
2931  row_func_args.push_back(cgen_state_->row_func_call_->getArgOperand(i));
2932  }
2933  row_func_args.insert(row_func_args.end(), col_heads.begin(), col_heads.end());
2934  row_func_args.push_back(get_arg_by_name(query_func, "join_hash_tables"));
2935  // push hoisted literals arguments, if any
2936  row_func_args.insert(
2937  row_func_args.end(), hoisted_literals.begin(), hoisted_literals.end());
2938  llvm::ReplaceInstWithInst(
2939  cgen_state_->row_func_call_,
2940  llvm::CallInst::Create(cgen_state_->row_func_, row_func_args, ""));
2941 
2942  // replace the filter func placeholder call with the call to the actual filter func
2943  if (cgen_state_->filter_func_) {
2944  std::vector<llvm::Value*> filter_func_args;
2945  for (auto arg_it = cgen_state_->filter_func_args_.begin();
2946  arg_it != cgen_state_->filter_func_args_.end();
2947  ++arg_it) {
2948  filter_func_args.push_back(arg_it->first);
2949  }
2950  llvm::ReplaceInstWithInst(
2951  cgen_state_->filter_func_call_,
2952  llvm::CallInst::Create(cgen_state_->filter_func_, filter_func_args, ""));
2953  }
2954 
2955  // Aggregate
2956  plan_state_->init_agg_vals_ =
2957  init_agg_val_vec(ra_exe_unit.target_exprs, ra_exe_unit.quals, *query_mem_desc);
2958 
2959  /*
2960  * If we have decided to use GPU shared memory (decision is not made here), then
2961  * we generate proper code for extra components that it needs (buffer initialization and
2962  * gpu reduction from shared memory to global memory). We then replace these functions
2963  * into the already compiled query_func (replacing two placeholders, write_back_nop and
2964  * init_smem_nop). The rest of the code should be as before (row_func, etc.).
2965  */
2966  if (gpu_smem_context.isSharedMemoryUsed()) {
2967  if (query_mem_desc->getQueryDescriptionType() ==
2969  GpuSharedMemCodeBuilder gpu_smem_code(
2970  cgen_state_->module_,
2971  cgen_state_->context_,
2972  *query_mem_desc,
2974  plan_state_->init_agg_vals_,
2975  executor_id_);
2976  gpu_smem_code.codegen();
2977  gpu_smem_code.injectFunctionsInto(query_func);
2978 
2979  // helper functions are used for caching purposes later
2980  cgen_state_->helper_functions_.push_back(gpu_smem_code.getReductionFunction());
2981  cgen_state_->helper_functions_.push_back(gpu_smem_code.getInitFunction());
2982  LOG(IR) << gpu_smem_code.toString();
2983  }
2984  }
2985 
2986  auto multifrag_query_func = cgen_state_->module_->getFunction(
2987  "multifrag_query" + std::string(co.hoist_literals ? "_hoisted_literals" : ""));
2988  CHECK(multifrag_query_func);
2989 
2992  multifrag_query_func, co.hoist_literals, eo.allow_runtime_query_interrupt);
2993  }
2994 
2995  bind_query(query_func,
2996  "query_stub" + std::string(co.hoist_literals ? "_hoisted_literals" : ""),
2997  multifrag_query_func,
2998  cgen_state_->module_);
2999 
3000  std::vector<llvm::Function*> root_funcs{query_func, cgen_state_->row_func_};
3001  if (cgen_state_->filter_func_) {
3002  root_funcs.push_back(cgen_state_->filter_func_);
3003  }
3004  auto live_funcs = CodeGenerator::markDeadRuntimeFuncs(
3005  *cgen_state_->module_, root_funcs, {multifrag_query_func});
3006 
3007  // Always inline the row function and the filter function.
3008  // We don't want register spills in the inner loops.
3009  // LLVM seems to correctly free up alloca instructions
3010  // in these functions even when they are inlined.
3012  if (cgen_state_->filter_func_) {
3014  }
3015 
3016 #ifndef NDEBUG
3017  // Add helpful metadata to the LLVM IR for debugging.
3019 #endif
3020 
3021  // Serialize the important LLVM IR functions to text for SQL EXPLAIN.
3022  std::string llvm_ir;
3023  if (eo.just_explain) {
3025 #ifdef WITH_JIT_DEBUG
3026  throw std::runtime_error(
3027  "Explain optimized not available when JIT runtime debug symbols are enabled");
3028