OmniSciDB  c0231cc57d
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
Executor Class Reference

#include <Execute.h>

+ Collaboration diagram for Executor:

Classes

class  CgenStateManager
 
struct  ExecutorMutexHolder
 
class  FetchCacheAnchor
 
struct  GroupColLLVMValue
 
struct  JoinHashTableOrError
 

Public Types

enum  ExtModuleKinds {
  ExtModuleKinds::template_module, ExtModuleKinds::udf_cpu_module, ExtModuleKinds::udf_gpu_module, ExtModuleKinds::rt_udf_cpu_module,
  ExtModuleKinds::rt_udf_gpu_module, ExtModuleKinds::rt_geos_module, ExtModuleKinds::rt_libdevice_module
}
 
using ExecutorId = size_t
 
using CachedCardinality = std::pair< bool, size_t >
 

Public Member Functions

 Executor (const ExecutorId id, Data_Namespace::DataMgr *data_mgr, const size_t block_size_x, const size_t grid_size_x, const size_t max_gpu_slab_size, const std::string &debug_dir, const std::string &debug_file)
 
void clearCaches (bool runtime_only=false)
 
std::string dumpCache () const
 
void reset (bool discard_runtime_modules_only=false)
 
const std::unique_ptr
< llvm::Module > & 
get_rt_module () const
 
const std::unique_ptr
< llvm::Module > & 
get_udf_module (bool is_gpu=false) const
 
const std::unique_ptr
< llvm::Module > & 
get_rt_udf_module (bool is_gpu=false) const
 
const std::unique_ptr
< llvm::Module > & 
get_geos_module () const
 
const std::unique_ptr
< llvm::Module > & 
get_libdevice_module () const
 
bool has_rt_module () const
 
bool has_udf_module (bool is_gpu=false) const
 
bool has_rt_udf_module (bool is_gpu=false) const
 
bool has_geos_module () const
 
bool has_libdevice_module () const
 
const TemporaryTablesgetTemporaryTables ()
 
StringDictionaryProxygetStringDictionaryProxy (const int dict_id, const bool with_generation) const
 
StringDictionaryProxygetStringDictionaryProxy (const int dictId, const std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner, const bool with_generation) const
 
const
StringDictionaryProxy::IdMap
getStringProxyTranslationMap (const int source_dict_id, const int dest_dict_id, const RowSetMemoryOwner::StringTranslationType translation_type, const std::vector< StringOps_Namespace::StringOpInfo > &string_op_infos, std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner, const bool with_generation) const
 
const
StringDictionaryProxy::IdMap
getJoinIntersectionStringProxyTranslationMap (const StringDictionaryProxy *source_proxy, StringDictionaryProxy *dest_proxy, const std::vector< StringOps_Namespace::StringOpInfo > &source_string_op_infos, const std::vector< StringOps_Namespace::StringOpInfo > &dest_source_string_op_infos, std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner) const
 
bool isCPUOnly () const
 
bool isArchMaxwell (const ExecutorDeviceType dt) const
 
bool containsLeftDeepOuterJoin () const
 
const ColumnDescriptorgetColumnDescriptor (const Analyzer::ColumnVar *) const
 
const ColumnDescriptorgetPhysicalColumnDescriptor (const Analyzer::ColumnVar *, int) const
 
const Catalog_Namespace::CataloggetCatalog () const
 
void setCatalog (const Catalog_Namespace::Catalog *catalog)
 
Data_Namespace::DataMgrgetDataMgr () const
 
const std::shared_ptr
< RowSetMemoryOwner
getRowSetMemoryOwner () const
 
const TemporaryTablesgetTemporaryTables () const
 
Fragmenter_Namespace::TableInfo getTableInfo (const int table_id) const
 
const TableGenerationgetTableGeneration (const int table_id) const
 
ExpressionRange getColRange (const PhysicalInput &) const
 
size_t getNumBytesForFetchedRow (const std::set< int > &table_ids_to_fetch) const
 
bool hasLazyFetchColumns (const std::vector< Analyzer::Expr * > &target_exprs) const
 
std::vector< ColumnLazyFetchInfogetColLazyFetchInfo (const std::vector< Analyzer::Expr * > &target_exprs) const
 
void interrupt (const QuerySessionId &query_session="", const QuerySessionId &interrupt_session="")
 
void resetInterrupt ()
 
void enableRuntimeQueryInterrupt (const double runtime_query_check_freq, const unsigned pending_query_check_freq) const
 
int8_t warpSize () const
 
unsigned gridSize () const
 
unsigned numBlocksPerMP () const
 
unsigned blockSize () const
 
size_t maxGpuSlabSize () const
 
ResultSetPtr executeWorkUnit (size_t &max_groups_buffer_entry_guess, const bool is_agg, const std::vector< InputTableInfo > &, const RelAlgExecutionUnit &, const CompilationOptions &, const ExecutionOptions &options, const Catalog_Namespace::Catalog &, RenderInfo *render_info, const bool has_cardinality_estimation, ColumnCacheMap &column_cache)
 
TableUpdateMetadata executeUpdate (const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &table_infos, const TableDescriptor *updated_table_desc, const CompilationOptions &co, const ExecutionOptions &eo, Catalog_Namespace::Catalog &cat, std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner, const UpdateLogForFragment::Callback &cb, const bool is_agg)
 
void addTransientStringLiterals (const RelAlgExecutionUnit &ra_exe_unit, const std::shared_ptr< RowSetMemoryOwner > &row_set_mem_owner)
 
int deviceCount (const ExecutorDeviceType) const
 
void setupCaching (const std::unordered_set< PhysicalInput > &phys_inputs, const std::unordered_set< int > &phys_table_ids)
 
void setColRangeCache (const AggregatedColRange &aggregated_col_range)
 
ExecutorId getExecutorId () const
 
QuerySessionIdgetCurrentQuerySession (heavyai::shared_lock< heavyai::shared_mutex > &read_lock)
 
QuerySessionStatus::QueryStatus getQuerySessionStatus (const QuerySessionId &candidate_query_session, heavyai::shared_lock< heavyai::shared_mutex > &read_lock)
 
bool checkCurrentQuerySession (const std::string &candidate_query_session, heavyai::shared_lock< heavyai::shared_mutex > &read_lock)
 
void invalidateRunningQuerySession (heavyai::unique_lock< heavyai::shared_mutex > &write_lock)
 
bool addToQuerySessionList (const QuerySessionId &query_session, const std::string &query_str, const std::string &submitted, const size_t executor_id, const QuerySessionStatus::QueryStatus query_status, heavyai::unique_lock< heavyai::shared_mutex > &write_lock)
 
bool removeFromQuerySessionList (const QuerySessionId &query_session, const std::string &submitted_time_str, heavyai::unique_lock< heavyai::shared_mutex > &write_lock)
 
void setQuerySessionAsInterrupted (const QuerySessionId &query_session, heavyai::unique_lock< heavyai::shared_mutex > &write_lock)
 
bool checkIsQuerySessionInterrupted (const std::string &query_session, heavyai::shared_lock< heavyai::shared_mutex > &read_lock)
 
bool checkIsQuerySessionEnrolled (const QuerySessionId &query_session, heavyai::shared_lock< heavyai::shared_mutex > &read_lock)
 
bool updateQuerySessionStatusWithLock (const QuerySessionId &query_session, const std::string &submitted_time_str, const QuerySessionStatus::QueryStatus updated_query_status, heavyai::unique_lock< heavyai::shared_mutex > &write_lock)
 
bool updateQuerySessionExecutorAssignment (const QuerySessionId &query_session, const std::string &submitted_time_str, const size_t executor_id, heavyai::unique_lock< heavyai::shared_mutex > &write_lock)
 
std::vector< QuerySessionStatusgetQuerySessionInfo (const QuerySessionId &query_session, heavyai::shared_lock< heavyai::shared_mutex > &read_lock)
 
heavyai::shared_mutexgetSessionLock ()
 
CurrentQueryStatus attachExecutorToQuerySession (const QuerySessionId &query_session_id, const std::string &query_str, const std::string &query_submitted_time)
 
void checkPendingQueryStatus (const QuerySessionId &query_session)
 
void clearQuerySessionStatus (const QuerySessionId &query_session, const std::string &submitted_time_str)
 
void updateQuerySessionStatus (const QuerySessionId &query_session, const std::string &submitted_time_str, const QuerySessionStatus::QueryStatus new_query_status)
 
void enrollQuerySession (const QuerySessionId &query_session, const std::string &query_str, const std::string &submitted_time_str, const size_t executor_id, const QuerySessionStatus::QueryStatus query_session_status)
 
size_t getNumCurentSessionsEnrolled () const
 
const std::vector< size_t > getExecutorIdsRunningQuery (const QuerySessionId &interrupt_session) const
 
bool checkNonKernelTimeInterrupted () const
 
void registerExtractedQueryPlanDag (const QueryPlanDAG &query_plan_dag)
 
const QueryPlanDAG getLatestQueryPlanDagExtracted () const
 
void addToCardinalityCache (const std::string &cache_key, const size_t cache_value)
 
CachedCardinality getCachedCardinality (const std::string &cache_key)
 
heavyai::shared_mutexgetDataRecyclerLock ()
 
QueryPlanDagCachegetQueryPlanDagCache ()
 
ResultSetRecyclerHoldergetRecultSetRecyclerHolder ()
 
CgenStategetCgenStatePtr () const
 
PlanStategetPlanStatePtr () const
 
llvm::LLVMContext & getContext ()
 
void update_extension_modules (bool update_runtime_modules_only=false)
 

Static Public Member Functions

static void clearExternalCaches (bool for_update, const TableDescriptor *td, const int current_db_id)
 
template<typename F >
static void registerExtensionFunctions (F register_extension_functions)
 
static std::shared_ptr< ExecutorgetExecutor (const ExecutorId id, const std::string &debug_dir="", const std::string &debug_file="", const SystemParameters &system_parameters=SystemParameters())
 
static void nukeCacheOfExecutors ()
 
static void clearMemory (const Data_Namespace::MemoryLevel memory_level)
 
static size_t getArenaBlockSize ()
 
static void addUdfIrToModule (const std::string &udf_ir_filename, const bool is_cuda_ir)
 
static void initialize_extension_module_sources ()
 
static void registerActiveModule (void *module, const int device_id)
 
static void unregisterActiveModule (const int device_id)
 
static std::pair< int64_t,
int32_t > 
reduceResults (const SQLAgg agg, const SQLTypeInfo &ti, const int64_t agg_init_val, const int8_t out_byte_width, const int64_t *out_vec, const size_t out_vec_sz, const bool is_group_by, const bool float_argument_input)
 
static void update_after_registration (bool update_runtime_modules_only=false)
 

Public Attributes

std::mutex compilation_mutex_
 
const logger::ThreadId thread_id_
 

Static Public Attributes

static const ExecutorId UNITARY_EXECUTOR_ID = 0
 
static const ExecutorId INVALID_EXECUTOR_ID = SIZE_MAX
 
static std::map
< ExtModuleKinds, std::string > 
extension_module_sources
 
static const size_t high_scan_limit
 
static const int32_t ERR_DIV_BY_ZERO {1}
 
static const int32_t ERR_OUT_OF_GPU_MEM {2}
 
static const int32_t ERR_OUT_OF_SLOTS {3}
 
static const int32_t ERR_UNSUPPORTED_SELF_JOIN {4}
 
static const int32_t ERR_OUT_OF_RENDER_MEM {5}
 
static const int32_t ERR_OUT_OF_CPU_MEM {6}
 
static const int32_t ERR_OVERFLOW_OR_UNDERFLOW {7}
 
static const int32_t ERR_OUT_OF_TIME {9}
 
static const int32_t ERR_INTERRUPTED {10}
 
static const int32_t ERR_COLUMNAR_CONVERSION_NOT_SUPPORTED {11}
 
static const int32_t ERR_TOO_MANY_LITERALS {12}
 
static const int32_t ERR_STRING_CONST_IN_RESULTSET {13}
 
static const int32_t ERR_STREAMING_TOP_N_NOT_SUPPORTED_IN_RENDER_QUERY {14}
 
static const int32_t ERR_SINGLE_VALUE_FOUND_MULTIPLE_VALUES {15}
 
static const int32_t ERR_GEOS {16}
 
static const int32_t ERR_WIDTH_BUCKET_INVALID_ARGUMENT {17}
 
static std::mutex register_runtime_extension_functions_mutex_
 
static std::mutex kernel_mutex_
 

Private Types

using PerFragmentCallBack = std::function< void(ResultSetPtr, const Fragmenter_Namespace::FragmentInfo &)>
 

Private Member Functions

void clearMetaInfoCache ()
 
int deviceCountForMemoryLevel (const Data_Namespace::MemoryLevel memory_level) const
 
llvm::Value * codegenWindowFunction (const size_t target_index, const CompilationOptions &co)
 
llvm::Value * codegenWindowFunctionAggregate (const CompilationOptions &co)
 
llvm::BasicBlock * codegenWindowResetStateControlFlow ()
 
void codegenWindowFunctionStateInit (llvm::Value *aggregate_state)
 
llvm::Value * codegenWindowFunctionAggregateCalls (llvm::Value *aggregate_state, const CompilationOptions &co)
 
llvm::Value * codegenWindowFunctionOnFrame (const CompilationOptions &co)
 
llvm::Value * codegenCurrentPartitionIndex (const WindowFunctionContext *window_func_context, llvm::Value *current_row_pos_lv)
 
llvm::Value * codegenFrameBoundExpr (const Analyzer::WindowFunction *window_func, const Analyzer::WindowFrame *frame_bound, CodeGenerator &code_generator, const CompilationOptions &co)
 
std::pair< llvm::Value
*, llvm::Value * > 
codegenWindowFrameBound (WindowFunctionContext *window_func_context, const Analyzer::WindowFrame *frame_start_bound, const Analyzer::WindowFrame *frame_end_bound, llvm::Value *current_row_pos_lv, llvm::Value *current_partition_start_offset_lv, llvm::Value *order_key_buf_ptr_lv, llvm::Value *order_key_col_null_val_lv, llvm::Value *frame_start_bound_expr_lv, llvm::Value *frame_end_bound_expr_lv, llvm::Value *num_elem_current_partition_lv, llvm::Value *target_partition_rowid_ptr_lv, llvm::Value *target_partition_sorted_rowid_ptr_lv, llvm::Value *null_start_pos_lv, llvm::Value *null_end_pos_lv, CodeGenerator &code_generator)
 
void codegenWindowAvgEpilogue (llvm::Value *crt_val, llvm::Value *window_func_null_val, llvm::Value *multiplicity_lv)
 
llvm::Value * codegenAggregateWindowState ()
 
llvm::Value * aggregateWindowStatePtr ()
 
CudaMgr_Namespace::CudaMgrcudaMgr () const
 
bool isArchPascalOrLater (const ExecutorDeviceType dt) const
 
bool needFetchAllFragments (const InputColDescriptor &col_desc, const RelAlgExecutionUnit &ra_exe_unit, const FragmentsList &selected_fragments) const
 
bool needLinearizeAllFragments (const ColumnDescriptor *cd, const InputColDescriptor &inner_col_desc, const RelAlgExecutionUnit &ra_exe_unit, const FragmentsList &selected_fragments, const Data_Namespace::MemoryLevel memory_level) const
 
void executeWorkUnitPerFragment (const RelAlgExecutionUnit &ra_exe_unit, const InputTableInfo &table_info, const CompilationOptions &co, const ExecutionOptions &eo, const Catalog_Namespace::Catalog &cat, PerFragmentCallBack &cb, const std::set< size_t > &fragment_indexes_param)
 Compiles and dispatches a work unit per fragment processing results with the per fragment callback. Currently used for computing metrics over fragments (metadata). More...
 
ResultSetPtr executeExplain (const QueryCompilationDescriptor &)
 
ResultSetPtr executeTableFunction (const TableFunctionExecutionUnit exe_unit, const std::vector< InputTableInfo > &table_infos, const CompilationOptions &co, const ExecutionOptions &eo, const Catalog_Namespace::Catalog &cat)
 Compiles and dispatches a table function; that is, a function that takes as input one or more columns and returns a ResultSet, which can be parsed by subsequent execution steps. More...
 
ExecutorDeviceType getDeviceTypeForTargets (const RelAlgExecutionUnit &ra_exe_unit, const ExecutorDeviceType requested_device_type)
 
ResultSetPtr collectAllDeviceResults (SharedKernelContext &shared_context, const RelAlgExecutionUnit &ra_exe_unit, const QueryMemoryDescriptor &query_mem_desc, const ExecutorDeviceType device_type, std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner)
 
ResultSetPtr collectAllDeviceShardedTopResults (SharedKernelContext &shared_context, const RelAlgExecutionUnit &ra_exe_unit) const
 
std::unordered_map< int, const
Analyzer::BinOper * > 
getInnerTabIdToJoinCond () const
 
std::vector< std::unique_ptr
< ExecutionKernel > > 
createKernels (SharedKernelContext &shared_context, const RelAlgExecutionUnit &ra_exe_unit, ColumnFetcher &column_fetcher, const std::vector< InputTableInfo > &table_infos, const ExecutionOptions &eo, const bool is_agg, const bool allow_single_frag_table_opt, const size_t context_count, const QueryCompilationDescriptor &query_comp_desc, const QueryMemoryDescriptor &query_mem_desc, RenderInfo *render_info, std::unordered_set< int > &available_gpus, int &available_cpus)
 
void launchKernels (SharedKernelContext &shared_context, std::vector< std::unique_ptr< ExecutionKernel >> &&kernels, const ExecutorDeviceType device_type)
 
std::vector< size_t > getTableFragmentIndices (const RelAlgExecutionUnit &ra_exe_unit, const ExecutorDeviceType device_type, const size_t table_idx, const size_t outer_frag_idx, std::map< int, const TableFragments * > &selected_tables_fragments, const std::unordered_map< int, const Analyzer::BinOper * > &inner_table_id_to_join_condition)
 
bool skipFragmentPair (const Fragmenter_Namespace::FragmentInfo &outer_fragment_info, const Fragmenter_Namespace::FragmentInfo &inner_fragment_info, const int inner_table_id, const std::unordered_map< int, const Analyzer::BinOper * > &inner_table_id_to_join_condition, const RelAlgExecutionUnit &ra_exe_unit, const ExecutorDeviceType device_type)
 
FetchResult fetchChunks (const ColumnFetcher &, const RelAlgExecutionUnit &ra_exe_unit, const int device_id, const Data_Namespace::MemoryLevel, const std::map< int, const TableFragments * > &, const FragmentsList &selected_fragments, const Catalog_Namespace::Catalog &, std::list< ChunkIter > &, std::list< std::shared_ptr< Chunk_NS::Chunk >> &, DeviceAllocator *device_allocator, const size_t thread_idx, const bool allow_runtime_interrupt)
 
FetchResult fetchUnionChunks (const ColumnFetcher &, const RelAlgExecutionUnit &ra_exe_unit, const int device_id, const Data_Namespace::MemoryLevel, const std::map< int, const TableFragments * > &, const FragmentsList &selected_fragments, const Catalog_Namespace::Catalog &, std::list< ChunkIter > &, std::list< std::shared_ptr< Chunk_NS::Chunk >> &, DeviceAllocator *device_allocator, const size_t thread_idx, const bool allow_runtime_interrupt)
 
std::pair< std::vector
< std::vector< int64_t >
>, std::vector< std::vector
< uint64_t > > > 
getRowCountAndOffsetForAllFrags (const RelAlgExecutionUnit &ra_exe_unit, const CartesianProduct< std::vector< std::vector< size_t >>> &frag_ids_crossjoin, const std::vector< InputDescriptor > &input_descs, const std::map< int, const TableFragments * > &all_tables_fragments)
 
void buildSelectedFragsMapping (std::vector< std::vector< size_t >> &selected_fragments_crossjoin, std::vector< size_t > &local_col_to_frag_pos, const std::list< std::shared_ptr< const InputColDescriptor >> &col_global_ids, const FragmentsList &selected_fragments, const RelAlgExecutionUnit &ra_exe_unit)
 
void buildSelectedFragsMappingForUnion (std::vector< std::vector< size_t >> &selected_fragments_crossjoin, const FragmentsList &selected_fragments, const RelAlgExecutionUnit &ra_exe_unit)
 
std::vector< size_t > getFragmentCount (const FragmentsList &selected_fragments, const size_t scan_idx, const RelAlgExecutionUnit &ra_exe_unit)
 
int32_t executePlanWithGroupBy (const RelAlgExecutionUnit &ra_exe_unit, const CompilationResult &, const bool hoist_literals, ResultSetPtr *results, const ExecutorDeviceType device_type, std::vector< std::vector< const int8_t * >> &col_buffers, const std::vector< size_t > outer_tab_frag_ids, QueryExecutionContext *, const std::vector< std::vector< int64_t >> &num_rows, const std::vector< std::vector< uint64_t >> &frag_offsets, Data_Namespace::DataMgr *, const int device_id, const int outer_table_id, const int64_t limit, const uint32_t start_rowid, const uint32_t num_tables, const bool allow_runtime_interrupt, RenderInfo *render_info, const int64_t rows_to_process=-1)
 
int32_t executePlanWithoutGroupBy (const RelAlgExecutionUnit &ra_exe_unit, const CompilationResult &, const bool hoist_literals, ResultSetPtr *results, const std::vector< Analyzer::Expr * > &target_exprs, const ExecutorDeviceType device_type, std::vector< std::vector< const int8_t * >> &col_buffers, QueryExecutionContext *query_exe_context, const std::vector< std::vector< int64_t >> &num_rows, const std::vector< std::vector< uint64_t >> &frag_offsets, Data_Namespace::DataMgr *data_mgr, const int device_id, const uint32_t start_rowid, const uint32_t num_tables, const bool allow_runtime_interrupt, RenderInfo *render_info, const int64_t rows_to_process=-1)
 
ResultSetPtr resultsUnion (SharedKernelContext &shared_context, const RelAlgExecutionUnit &ra_exe_unit)
 
std::vector< int8_t * > getJoinHashTablePtrs (const ExecutorDeviceType device_type, const int device_id)
 
ResultSetPtr reduceMultiDeviceResults (const RelAlgExecutionUnit &, std::vector< std::pair< ResultSetPtr, std::vector< size_t >>> &all_fragment_results, std::shared_ptr< RowSetMemoryOwner >, const QueryMemoryDescriptor &) const
 
ResultSetPtr reduceMultiDeviceResultSets (std::vector< std::pair< ResultSetPtr, std::vector< size_t >>> &all_fragment_results, std::shared_ptr< RowSetMemoryOwner >, const QueryMemoryDescriptor &) const
 
ResultSetPtr reduceSpeculativeTopN (const RelAlgExecutionUnit &, std::vector< std::pair< ResultSetPtr, std::vector< size_t >>> &all_fragment_results, std::shared_ptr< RowSetMemoryOwner >, const QueryMemoryDescriptor &) const
 
ResultSetPtr executeWorkUnitImpl (size_t &max_groups_buffer_entry_guess, const bool is_agg, const bool allow_single_frag_table_opt, const std::vector< InputTableInfo > &, const RelAlgExecutionUnit &, const CompilationOptions &, const ExecutionOptions &options, const Catalog_Namespace::Catalog &, std::shared_ptr< RowSetMemoryOwner >, RenderInfo *render_info, const bool has_cardinality_estimation, ColumnCacheMap &column_cache)
 
std::vector< llvm::Value * > inlineHoistedLiterals ()
 
void AutoTrackBuffersInRuntimeIR ()
 
std::tuple< CompilationResult,
std::unique_ptr
< QueryMemoryDescriptor > > 
compileWorkUnit (const std::vector< InputTableInfo > &query_infos, const PlanState::DeletedColumnsMap &deleted_cols_map, const RelAlgExecutionUnit &ra_exe_unit, const CompilationOptions &co, const ExecutionOptions &eo, const CudaMgr_Namespace::CudaMgr *cuda_mgr, const bool allow_lazy_fetch, std::shared_ptr< RowSetMemoryOwner >, const size_t max_groups_buffer_entry_count, const int8_t crt_min_byte_width, const bool has_cardinality_estimation, ColumnCacheMap &column_cache, RenderInfo *render_info=nullptr)
 
llvm::BasicBlock * codegenSkipDeletedOuterTableRow (const RelAlgExecutionUnit &ra_exe_unit, const CompilationOptions &co)
 
std::vector< JoinLoopbuildJoinLoops (RelAlgExecutionUnit &ra_exe_unit, const CompilationOptions &co, const ExecutionOptions &eo, const std::vector< InputTableInfo > &query_infos, ColumnCacheMap &column_cache)
 
JoinLoop::HoistedFiltersCallback buildHoistLeftHandSideFiltersCb (const RelAlgExecutionUnit &ra_exe_unit, const size_t level_idx, const int inner_table_id, const CompilationOptions &co)
 
std::function< llvm::Value
*(const std::vector
< llvm::Value * >
&, llvm::Value *)> 
buildIsDeletedCb (const RelAlgExecutionUnit &ra_exe_unit, const size_t level_idx, const CompilationOptions &co)
 
std::shared_ptr< HashJoinbuildCurrentLevelHashTable (const JoinCondition &current_level_join_conditions, size_t level_idx, RelAlgExecutionUnit &ra_exe_unit, const CompilationOptions &co, const std::vector< InputTableInfo > &query_infos, ColumnCacheMap &column_cache, std::vector< std::string > &fail_reasons)
 
void redeclareFilterFunction ()
 
llvm::Value * addJoinLoopIterator (const std::vector< llvm::Value * > &prev_iters, const size_t level_idx)
 
void codegenJoinLoops (const std::vector< JoinLoop > &join_loops, const RelAlgExecutionUnit &ra_exe_unit, GroupByAndAggregate &group_by_and_aggregate, llvm::Function *query_func, llvm::BasicBlock *entry_bb, QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const ExecutionOptions &eo)
 
bool compileBody (const RelAlgExecutionUnit &ra_exe_unit, GroupByAndAggregate &group_by_and_aggregate, QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const GpuSharedMemoryContext &gpu_smem_context={})
 
void createErrorCheckControlFlow (llvm::Function *query_func, bool run_with_dynamic_watchdog, bool run_with_allowing_runtime_interrupt, const std::vector< JoinLoop > &join_loops, ExecutorDeviceType device_type, const std::vector< InputTableInfo > &input_table_infos)
 
void insertErrorCodeChecker (llvm::Function *query_func, bool hoist_literals, bool allow_runtime_query_interrupt)
 
void preloadFragOffsets (const std::vector< InputDescriptor > &input_descs, const std::vector< InputTableInfo > &query_infos)
 
JoinHashTableOrError buildHashTableForQualifier (const std::shared_ptr< Analyzer::BinOper > &qual_bin_oper, const std::vector< InputTableInfo > &query_infos, const MemoryLevel memory_level, const JoinType join_type, const HashType preferred_hash_type, ColumnCacheMap &column_cache, const HashTableBuildDagMap &hashtable_build_dag_map, const RegisteredQueryHint &query_hint, const TableIdToNodeMap &table_id_to_node_map)
 
void nukeOldState (const bool allow_lazy_fetch, const std::vector< InputTableInfo > &query_infos, const PlanState::DeletedColumnsMap &deleted_cols_map, const RelAlgExecutionUnit *ra_exe_unit)
 
std::shared_ptr
< CompilationContext
optimizeAndCodegenCPU (llvm::Function *, llvm::Function *, const std::unordered_set< llvm::Function * > &, const CompilationOptions &)
 
std::shared_ptr
< CompilationContext
optimizeAndCodegenGPU (llvm::Function *, llvm::Function *, std::unordered_set< llvm::Function * > &, const bool no_inline, const CudaMgr_Namespace::CudaMgr *cuda_mgr, const bool is_gpu_smem_used, const CompilationOptions &)
 
std::string generatePTX (const std::string &) const
 
void initializeNVPTXBackend () const
 
int64_t deviceCycles (int milliseconds) const
 
GroupColLLVMValue groupByColumnCodegen (Analyzer::Expr *group_by_col, const size_t col_width, const CompilationOptions &, const bool translate_null_val, const int64_t translated_null_val, DiamondCodegen &, std::stack< llvm::BasicBlock * > &, const bool thread_mem_shared)
 
llvm::Value * castToFP (llvm::Value *, SQLTypeInfo const &from_ti, SQLTypeInfo const &to_ti)
 
llvm::Value * castToIntPtrTyIn (llvm::Value *val, const size_t bit_width)
 
std::tuple
< RelAlgExecutionUnit,
PlanState::DeletedColumnsMap
addDeletedColumn (const RelAlgExecutionUnit &ra_exe_unit, const CompilationOptions &co)
 
bool isFragmentFullyDeleted (const int table_id, const Fragmenter_Namespace::FragmentInfo &fragment)
 
FragmentSkipStatus canSkipFragmentForFpQual (const Analyzer::BinOper *comp_expr, const Analyzer::ColumnVar *lhs_col, const Fragmenter_Namespace::FragmentInfo &fragment, const Analyzer::Constant *rhs_const) const
 
std::pair< bool, int64_t > skipFragment (const InputDescriptor &table_desc, const Fragmenter_Namespace::FragmentInfo &frag_info, const std::list< std::shared_ptr< Analyzer::Expr >> &simple_quals, const std::vector< uint64_t > &frag_offsets, const size_t frag_idx)
 
std::pair< bool, int64_t > skipFragmentInnerJoins (const InputDescriptor &table_desc, const RelAlgExecutionUnit &ra_exe_unit, const Fragmenter_Namespace::FragmentInfo &fragment, const std::vector< uint64_t > &frag_offsets, const size_t frag_idx)
 
AggregatedColRange computeColRangesCache (const std::unordered_set< PhysicalInput > &phys_inputs)
 
StringDictionaryGenerations computeStringDictionaryGenerations (const std::unordered_set< PhysicalInput > &phys_inputs)
 
TableGenerations computeTableGenerations (std::unordered_set< int > phys_table_ids)
 
std::vector< int8_t > serializeLiterals (const std::unordered_map< int, CgenState::LiteralValues > &literals, const int device_id)
 
const std::unique_ptr
< llvm::Module > & 
get_extension_module (ExtModuleKinds kind) const
 
bool has_extension_module (ExtModuleKinds kind) const
 
llvm::Value * spillDoubleElement (llvm::Value *elem_val, llvm::Type *elem_ty)
 
ExecutorMutexHolder acquireExecuteMutex ()
 

Static Private Member Functions

static size_t align (const size_t off_in, const size_t alignment)
 

Private Attributes

const ExecutorId executor_id_
 
std::unique_ptr
< llvm::LLVMContext > 
context_
 
std::unique_ptr< CgenStatecgen_state_
 
std::map< ExtModuleKinds,
std::unique_ptr< llvm::Module > > 
extension_modules_
 
std::unique_ptr< PlanStateplan_state_
 
std::shared_ptr
< RowSetMemoryOwner
row_set_mem_owner_
 
std::mutex gpu_exec_mutex_ [max_gpu_count]
 
std::atomic< bool > interrupted_
 
std::mutex str_dict_mutex_
 
std::unique_ptr
< llvm::TargetMachine > 
nvptx_target_machine_
 
const unsigned block_size_x_
 
const unsigned grid_size_x_
 
const size_t max_gpu_slab_size_
 
const std::string debug_dir_
 
const std::string debug_file_
 
const Catalog_Namespace::Catalogcatalog_
 
Data_Namespace::DataMgrdata_mgr_
 
const TemporaryTablestemporary_tables_
 
TableIdToNodeMap table_id_to_node_map_
 
int64_t kernel_queue_time_ms_ = 0
 
int64_t compilation_queue_time_ms_ = 0
 
std::unique_ptr
< WindowProjectNodeContext
window_project_node_context_owned_
 
WindowFunctionContextactive_window_function_ {nullptr}
 
InputTableInfoCache input_table_info_cache_
 
AggregatedColRange agg_col_range_cache_
 
TableGenerations table_generations_
 
QuerySessionId current_query_session_
 

Static Private Attributes

static const int max_gpu_count {16}
 
static std::mutex gpu_active_modules_mutex_
 
static uint32_t gpu_active_modules_device_mask_ {0x0}
 
static void * gpu_active_modules_ [max_gpu_count]
 
static const size_t baseline_threshold
 
static heavyai::shared_mutex executor_session_mutex_
 
static InterruptFlagMap queries_interrupt_flag_
 
static QuerySessionMap queries_session_map_
 
static std::map< int,
std::shared_ptr< Executor > > 
executors_
 
static heavyai::shared_mutex execute_mutex_
 
static heavyai::shared_mutex executors_cache_mutex_
 
static QueryPlanDagCache query_plan_dag_cache_
 
static heavyai::shared_mutex recycler_mutex_
 
static std::unordered_map
< std::string, size_t > 
cardinality_cache_
 
static ResultSetRecyclerHolder resultset_recycler_holder_
 
static QueryPlanDAG latest_query_plan_extracted_ {EMPTY_QUERY_PLAN}
 

Friends

class BaselineJoinHashTable
 
class CodeGenerator
 
class ColumnFetcher
 
struct DiamondCodegen
 
class ExecutionKernel
 
class KernelSubtask
 
class HashJoin
 
class OverlapsJoinHashTable
 
class RangeJoinHashTable
 
class GroupByAndAggregate
 
class QueryCompilationDescriptor
 
class QueryMemoryDescriptor
 
class QueryMemoryInitializer
 
class QueryFragmentDescriptor
 
class QueryExecutionContext
 
class ResultSet
 
class InValuesBitmap
 
class StringDictionaryTranslationMgr
 
class LeafAggregator
 
class PerfectJoinHashTable
 
class QueryRewriter
 
class PendingExecutionClosure
 
class RelAlgExecutor
 
class TableOptimizer
 
class TableFunctionCompilationContext
 
class TableFunctionExecutionContext
 
struct TargetExprCodegenBuilder
 
struct TargetExprCodegen
 
class WindowProjectNodeContext
 

Detailed Description

Definition at line 368 of file Execute.h.

Member Typedef Documentation

using Executor::CachedCardinality = std::pair<bool, size_t>

Definition at line 1199 of file Execute.h.

using Executor::ExecutorId = size_t

Definition at line 375 of file Execute.h.

Definition at line 729 of file Execute.h.

Member Enumeration Documentation

Enumerator
template_module 
udf_cpu_module 
udf_gpu_module 
rt_udf_cpu_module 
rt_udf_gpu_module 
rt_geos_module 
rt_libdevice_module 

Definition at line 469 of file Execute.h.

469  {
470  template_module, // RuntimeFunctions.bc
471  udf_cpu_module, // Load-time UDFs for CPU execution
472  udf_gpu_module, // Load-time UDFs for GPU execution
473  rt_udf_cpu_module, // Run-time UDF/UDTFs for CPU execution
474  rt_udf_gpu_module, // Run-time UDF/UDTFs for GPU execution
475  rt_geos_module, // geos functions
476  rt_libdevice_module // math library functions for GPU execution
477  };
std::unique_ptr< llvm::Module > udf_gpu_module
std::unique_ptr< llvm::Module > udf_cpu_module

Constructor & Destructor Documentation

Executor::Executor ( const ExecutorId  id,
Data_Namespace::DataMgr data_mgr,
const size_t  block_size_x,
const size_t  grid_size_x,
const size_t  max_gpu_slab_size,
const std::string &  debug_dir,
const std::string &  debug_file 
)

Definition at line 244 of file Execute.cpp.

251  : executor_id_(executor_id)
252  , context_(new llvm::LLVMContext())
253  , cgen_state_(new CgenState({}, false, this))
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1258
const ExecutorId executor_id_
Definition: Execute.h:1232
std::unique_ptr< llvm::LLVMContext > context_
Definition: Execute.h:1233

Member Function Documentation

ExecutorMutexHolder Executor::acquireExecuteMutex ( )
inlineprivate

Definition at line 1347 of file Execute.h.

References execute_mutex_, executor_id_, Executor::ExecutorMutexHolder::shared_lock, Executor::ExecutorMutexHolder::unique_lock, and UNITARY_EXECUTOR_ID.

1347  {
1348  ExecutorMutexHolder ret;
1350  // Only one unitary executor can run at a time
1352  } else {
1354  }
1355  return ret;
1356  }
static heavyai::shared_mutex execute_mutex_
Definition: Execute.h:1341
std::shared_lock< T > shared_lock
const ExecutorId executor_id_
Definition: Execute.h:1232
std::unique_lock< T > unique_lock
static const ExecutorId UNITARY_EXECUTOR_ID
Definition: Execute.h:376
std::tuple< RelAlgExecutionUnit, PlanState::DeletedColumnsMap > Executor::addDeletedColumn ( const RelAlgExecutionUnit ra_exe_unit,
const CompilationOptions co 
)
private

Definition at line 3918 of file Execute.cpp.

References anonymous_namespace{Execute.cpp}::add_deleted_col_to_map(), catalog_, CHECK, CompilationOptions::filter_on_deleted_column, Catalog_Namespace::Catalog::getDeletedColumnIfRowsDeleted(), Catalog_Namespace::Catalog::getMetadataForTable(), and TABLE.

Referenced by executeWorkUnitImpl(), and executeWorkUnitPerFragment().

3920  {
3921  if (!co.filter_on_deleted_column) {
3922  return std::make_tuple(ra_exe_unit, PlanState::DeletedColumnsMap{});
3923  }
3924  auto ra_exe_unit_with_deleted = ra_exe_unit;
3925  PlanState::DeletedColumnsMap deleted_cols_map;
3926  for (const auto& input_table : ra_exe_unit_with_deleted.input_descs) {
3927  if (input_table.getSourceType() != InputSourceType::TABLE) {
3928  continue;
3929  }
3930  const auto td = catalog_->getMetadataForTable(input_table.getTableId());
3931  CHECK(td);
3932  const auto deleted_cd = catalog_->getDeletedColumnIfRowsDeleted(td);
3933  if (!deleted_cd) {
3934  continue;
3935  }
3936  CHECK(deleted_cd->columnType.is_boolean());
3937  // check deleted column is not already present
3938  bool found = false;
3939  for (const auto& input_col : ra_exe_unit_with_deleted.input_col_descs) {
3940  if (input_col.get()->getColId() == deleted_cd->columnId &&
3941  input_col.get()->getScanDesc().getTableId() == deleted_cd->tableId &&
3942  input_col.get()->getScanDesc().getNestLevel() == input_table.getNestLevel()) {
3943  found = true;
3944  add_deleted_col_to_map(deleted_cols_map, deleted_cd);
3945  break;
3946  }
3947  }
3948  if (!found) {
3949  // add deleted column
3950  ra_exe_unit_with_deleted.input_col_descs.emplace_back(new InputColDescriptor(
3951  deleted_cd->columnId, deleted_cd->tableId, input_table.getNestLevel()));
3952  add_deleted_col_to_map(deleted_cols_map, deleted_cd);
3953  }
3954  }
3955  return std::make_tuple(ra_exe_unit_with_deleted, deleted_cols_map);
3956 }
const Catalog_Namespace::Catalog * catalog_
Definition: Execute.h:1313
const ColumnDescriptor * getDeletedColumnIfRowsDeleted(const TableDescriptor *td) const
Definition: Catalog.cpp:3679
std::unordered_map< TableId, const ColumnDescriptor * > DeletedColumnsMap
Definition: PlanState.h:44
#define CHECK(condition)
Definition: Logger.h:222
const TableDescriptor * getMetadataForTable(const std::string &tableName, const bool populateFragmenter=true) const
Returns a pointer to a const TableDescriptor struct matching the provided tableName.
void add_deleted_col_to_map(PlanState::DeletedColumnsMap &deleted_cols_map, const ColumnDescriptor *deleted_cd)
Definition: Execute.cpp:3906

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

llvm::Value * Executor::addJoinLoopIterator ( const std::vector< llvm::Value * > &  prev_iters,
const size_t  level_idx 
)
private

Definition at line 1094 of file IRCodegen.cpp.

References AUTOMATIC_IR_METADATA, and CHECK.

1095  {
1097  // Iterators are added for loop-outer joins when the head of the loop is generated,
1098  // then once again when the body if generated. Allow this instead of special handling
1099  // of call sites.
1100  const auto it = cgen_state_->scan_idx_to_hash_pos_.find(level_idx);
1101  if (it != cgen_state_->scan_idx_to_hash_pos_.end()) {
1102  return it->second;
1103  }
1104  CHECK(!prev_iters.empty());
1105  llvm::Value* matching_row_index = prev_iters.back();
1106  const auto it_ok =
1107  cgen_state_->scan_idx_to_hash_pos_.emplace(level_idx, matching_row_index);
1108  CHECK(it_ok.second);
1109  return matching_row_index;
1110 }
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1258
#define AUTOMATIC_IR_METADATA(CGENSTATE)
#define CHECK(condition)
Definition: Logger.h:222
void Executor::addToCardinalityCache ( const std::string &  cache_key,
const size_t  cache_value 
)

Definition at line 4720 of file Execute.cpp.

References cardinality_cache_, g_use_estimator_result_cache, recycler_mutex_, and VLOG.

4721  {
4724  cardinality_cache_[cache_key] = cache_value;
4725  VLOG(1) << "Put estimated cardinality to the cache";
4726  }
4727 }
static std::unordered_map< std::string, size_t > cardinality_cache_
Definition: Execute.h:1362
std::unique_lock< T > unique_lock
static heavyai::shared_mutex recycler_mutex_
Definition: Execute.h:1361
bool g_use_estimator_result_cache
Definition: Execute.cpp:127
#define VLOG(n)
Definition: Logger.h:316
bool Executor::addToQuerySessionList ( const QuerySessionId query_session,
const std::string &  query_str,
const std::string &  submitted,
const size_t  executor_id,
const QuerySessionStatus::QueryStatus  query_status,
heavyai::unique_lock< heavyai::shared_mutex > &  write_lock 
)

Definition at line 4551 of file Execute.cpp.

References queries_interrupt_flag_, and queries_session_map_.

Referenced by enrollQuerySession().

4557  {
4558  // an internal API that enrolls the query session into the Executor's session map
4559  if (queries_session_map_.count(query_session)) {
4560  if (queries_session_map_.at(query_session).count(submitted_time_str)) {
4561  queries_session_map_.at(query_session).erase(submitted_time_str);
4562  queries_session_map_.at(query_session)
4563  .emplace(submitted_time_str,
4564  QuerySessionStatus(query_session,
4565  executor_id,
4566  query_str,
4567  submitted_time_str,
4568  query_status));
4569  } else {
4570  queries_session_map_.at(query_session)
4571  .emplace(submitted_time_str,
4572  QuerySessionStatus(query_session,
4573  executor_id,
4574  query_str,
4575  submitted_time_str,
4576  query_status));
4577  }
4578  } else {
4579  std::map<std::string, QuerySessionStatus> executor_per_query_map;
4580  executor_per_query_map.emplace(
4581  submitted_time_str,
4583  query_session, executor_id, query_str, submitted_time_str, query_status));
4584  queries_session_map_.emplace(query_session, executor_per_query_map);
4585  }
4586  return queries_interrupt_flag_.emplace(query_session, false).second;
4587 }
static QuerySessionMap queries_session_map_
Definition: Execute.h:1336
static InterruptFlagMap queries_interrupt_flag_
Definition: Execute.h:1334

+ Here is the caller graph for this function:

void Executor::addTransientStringLiterals ( const RelAlgExecutionUnit ra_exe_unit,
const std::shared_ptr< RowSetMemoryOwner > &  row_set_mem_owner 
)

Definition at line 2139 of file Execute.cpp.

References CHECK, getStringDictionaryProxy(), RelAlgExecutionUnit::groupby_exprs, kENCODING_DICT, kSAMPLE, kSINGLE_VALUE, RelAlgExecutionUnit::quals, RelAlgExecutionUnit::simple_quals, RelAlgExecutionUnit::target_exprs, RelAlgExecutionUnit::target_exprs_union, and ScalarExprVisitor< T >::visit().

2141  {
2142  TransientDictIdVisitor dict_id_visitor;
2143 
2144  auto visit_expr =
2145  [this, &dict_id_visitor, &row_set_mem_owner](const Analyzer::Expr* expr) {
2146  if (!expr) {
2147  return;
2148  }
2149  const auto dict_id = dict_id_visitor.visit(expr);
2150  if (dict_id >= 0) {
2151  auto sdp = getStringDictionaryProxy(dict_id, row_set_mem_owner, true);
2152  CHECK(sdp);
2153  TransientStringLiteralsVisitor visitor(sdp, this);
2154  visitor.visit(expr);
2155  }
2156  };
2157 
2158  for (const auto& group_expr : ra_exe_unit.groupby_exprs) {
2159  visit_expr(group_expr.get());
2160  }
2161 
2162  for (const auto& group_expr : ra_exe_unit.quals) {
2163  visit_expr(group_expr.get());
2164  }
2165 
2166  for (const auto& group_expr : ra_exe_unit.simple_quals) {
2167  visit_expr(group_expr.get());
2168  }
2169 
2170  const auto visit_target_expr = [&](const Analyzer::Expr* target_expr) {
2171  const auto& target_type = target_expr->get_type_info();
2172  if (!target_type.is_string() || target_type.get_compression() == kENCODING_DICT) {
2173  const auto agg_expr = dynamic_cast<const Analyzer::AggExpr*>(target_expr);
2174  if (agg_expr) {
2175  if (agg_expr->get_aggtype() == kSINGLE_VALUE ||
2176  agg_expr->get_aggtype() == kSAMPLE) {
2177  visit_expr(agg_expr->get_arg());
2178  }
2179  } else {
2180  visit_expr(target_expr);
2181  }
2182  }
2183  };
2184  const auto& target_exprs = ra_exe_unit.target_exprs;
2185  std::for_each(target_exprs.begin(), target_exprs.end(), visit_target_expr);
2186  const auto& target_exprs_union = ra_exe_unit.target_exprs_union;
2187  std::for_each(target_exprs_union.begin(), target_exprs_union.end(), visit_target_expr);
2188 }
std::vector< Analyzer::Expr * > target_exprs
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
T visit(const Analyzer::Expr *expr) const
std::vector< Analyzer::Expr * > target_exprs_union
StringDictionaryProxy * getStringDictionaryProxy(const int dict_id, const bool with_generation) const
Definition: Execute.h:529
std::list< std::shared_ptr< Analyzer::Expr > > quals
#define CHECK(condition)
Definition: Logger.h:222
std::list< std::shared_ptr< Analyzer::Expr > > simple_quals

+ Here is the call graph for this function:

void Executor::addUdfIrToModule ( const std::string &  udf_ir_filename,
const bool  is_cuda_ir 
)
static

Definition at line 1857 of file NativeCodegen.cpp.

Referenced by DBHandler::initialize().

1858  {
1862  udf_ir_filename;
1863 }
static std::map< ExtModuleKinds, std::string > extension_module_sources
Definition: Execute.h:479

+ Here is the caller graph for this function:

llvm::Value * Executor::aggregateWindowStatePtr ( )
private

Definition at line 211 of file WindowFunctionIR.cpp.

References AUTOMATIC_IR_METADATA, anonymous_namespace{WindowFunctionIR.cpp}::get_adjusted_window_type_info(), get_int_type(), WindowProjectNodeContext::getActiveWindowFunctionContext(), and kFLOAT.

211  {
213  const auto window_func_context =
215  const auto window_func = window_func_context->getWindowFunction();
216  const auto arg_ti = get_adjusted_window_type_info(window_func);
217  llvm::Type* aggregate_state_type =
218  arg_ti.get_type() == kFLOAT
219  ? llvm::PointerType::get(get_int_type(32, cgen_state_->context_), 0)
220  : llvm::PointerType::get(get_int_type(64, cgen_state_->context_), 0);
221  const auto aggregate_state_i64 = cgen_state_->llInt(
222  reinterpret_cast<const int64_t>(window_func_context->aggregateState()));
223  return cgen_state_->ir_builder_.CreateIntToPtr(aggregate_state_i64,
224  aggregate_state_type);
225 }
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1258
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
static WindowFunctionContext * getActiveWindowFunctionContext(Executor *executor)
#define AUTOMATIC_IR_METADATA(CGENSTATE)
std::unique_ptr< llvm::LLVMContext > context_
Definition: Execute.h:1233
SQLTypeInfo get_adjusted_window_type_info(const Analyzer::WindowFunction *window_func)

+ Here is the call graph for this function:

static size_t Executor::align ( const size_t  off_in,
const size_t  alignment 
)
inlinestaticprivate

Definition at line 1224 of file Execute.h.

Referenced by serializeLiterals().

1224  {
1225  size_t off = off_in;
1226  if (off % alignment != 0) {
1227  off += (alignment - off % alignment);
1228  }
1229  return off;
1230  }

+ Here is the caller graph for this function:

CurrentQueryStatus Executor::attachExecutorToQuerySession ( const QuerySessionId query_session_id,
const std::string &  query_str,
const std::string &  query_submitted_time 
)

Definition at line 4449 of file Execute.cpp.

References executor_id_, executor_session_mutex_, updateQuerySessionExecutorAssignment(), and updateQuerySessionStatusWithLock().

4452  {
4453  if (!query_session_id.empty()) {
4454  // if session is valid, do update 1) the exact executor id and 2) query status
4457  query_session_id, query_submitted_time, executor_id_, write_lock);
4458  updateQuerySessionStatusWithLock(query_session_id,
4459  query_submitted_time,
4460  QuerySessionStatus::QueryStatus::PENDING_EXECUTOR,
4461  write_lock);
4462  }
4463  return {query_session_id, query_str};
4464 }
heavyai::unique_lock< heavyai::shared_mutex > write_lock
bool updateQuerySessionStatusWithLock(const QuerySessionId &query_session, const std::string &submitted_time_str, const QuerySessionStatus::QueryStatus updated_query_status, heavyai::unique_lock< heavyai::shared_mutex > &write_lock)
Definition: Execute.cpp:4589
const ExecutorId executor_id_
Definition: Execute.h:1232
bool updateQuerySessionExecutorAssignment(const QuerySessionId &query_session, const std::string &submitted_time_str, const size_t executor_id, heavyai::unique_lock< heavyai::shared_mutex > &write_lock)
Definition: Execute.cpp:4615
std::unique_lock< T > unique_lock
static heavyai::shared_mutex executor_session_mutex_
Definition: Execute.h:1330

+ Here is the call graph for this function:

void Executor::AutoTrackBuffersInRuntimeIR ( )
private

Definition at line 2200 of file NativeCodegen.cpp.

References CHECK.

2200  {
2201  llvm::Module* M = cgen_state_->module_;
2202  if (M->getFunction("allocate_varlen_buffer") == nullptr)
2203  return;
2204 
2205  // read metadata
2206  bool should_track = false;
2207  auto* flag = M->getModuleFlag("manage_memory_buffer");
2208  if (auto* cnt = llvm::mdconst::extract_or_null<llvm::ConstantInt>(flag)) {
2209  if (cnt->getZExtValue() == 1) {
2210  should_track = true;
2211  }
2212  }
2213 
2214  if (!should_track) {
2215  // metadata is not present
2216  return;
2217  }
2218 
2219  LOG(INFO) << "Found 'manage_memory_buffer' metadata.";
2220  llvm::SmallVector<llvm::CallInst*, 4> calls_to_analyze;
2221 
2222  for (llvm::Function& F : *M) {
2223  for (llvm::BasicBlock& BB : F) {
2224  for (llvm::Instruction& I : BB) {
2225  if (llvm::CallInst* CI = llvm::dyn_cast<llvm::CallInst>(&I)) {
2226  // Keep track of calls to "allocate_varlen_buffer" for later processing
2227  llvm::Function* called = CI->getCalledFunction();
2228  if (called) {
2229  if (called->getName() == "allocate_varlen_buffer") {
2230  calls_to_analyze.push_back(CI);
2231  }
2232  }
2233  }
2234  }
2235  }
2236  }
2237 
2238  // for each call to "allocate_varlen_buffer", check if there's a corresponding
2239  // call to "register_buffer_with_executor_rsm". If not, add a call to it
2240  llvm::IRBuilder<> Builder(cgen_state_->context_);
2241  auto i64 = get_int_type(64, cgen_state_->context_);
2242  auto i8p = get_int_ptr_type(8, cgen_state_->context_);
2243  auto void_ = llvm::Type::getVoidTy(cgen_state_->context_);
2244  llvm::FunctionType* fnty = llvm::FunctionType::get(void_, {i64, i8p}, false);
2245  llvm::FunctionCallee register_buffer_fn =
2246  M->getOrInsertFunction("register_buffer_with_executor_rsm", fnty, {});
2247 
2248  int64_t executor_addr = reinterpret_cast<int64_t>(this);
2249  for (llvm::CallInst* CI : calls_to_analyze) {
2250  bool found = false;
2251  // for each user of the function, check if its a callinst
2252  // and if the callinst is calling "register_buffer_with_executor_rsm"
2253  // if no such instruction exist, add one registering the buffer
2254  for (llvm::User* U : CI->users()) {
2255  if (llvm::CallInst* call = llvm::dyn_cast<llvm::CallInst>(U)) {
2256  if (call->getCalledFunction() and
2257  call->getCalledFunction()->getName() == "register_buffer_with_executor_rsm") {
2258  found = true;
2259  break;
2260  }
2261  }
2262  }
2263  if (!found) {
2264  Builder.SetInsertPoint(CI->getNextNode());
2265  Builder.CreateCall(register_buffer_fn,
2266  {ll_int(executor_addr, cgen_state_->context_), CI});
2267  }
2268  }
2269 }
#define LOG(tag)
Definition: Logger.h:216
llvm::ConstantInt * ll_int(const T v, llvm::LLVMContext &context)
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1258
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
llvm::Type * get_int_ptr_type(const int width, llvm::LLVMContext &context)
unsigned Executor::blockSize ( ) const

Definition at line 3826 of file Execute.cpp.

References block_size_x_, CHECK, data_mgr_, CudaMgr_Namespace::CudaMgr::getAllDeviceProperties(), and Data_Namespace::DataMgr::getCudaMgr().

Referenced by collectAllDeviceShardedTopResults(), executePlanWithGroupBy(), executePlanWithoutGroupBy(), executeTableFunction(), executeWorkUnitImpl(), reduceMultiDeviceResults(), reduceMultiDeviceResultSets(), and resultsUnion().

3826  {
3827  CHECK(data_mgr_);
3828  const auto cuda_mgr = data_mgr_->getCudaMgr();
3829  if (!cuda_mgr) {
3830  return 0;
3831  }
3832  const auto& dev_props = cuda_mgr->getAllDeviceProperties();
3833  return block_size_x_ ? block_size_x_ : dev_props.front().maxThreadsPerBlock;
3834 }
CudaMgr_Namespace::CudaMgr * getCudaMgr() const
Definition: DataMgr.h:224
Data_Namespace::DataMgr * data_mgr_
Definition: Execute.h:1314
const unsigned block_size_x_
Definition: Execute.h:1307
#define CHECK(condition)
Definition: Logger.h:222
const std::vector< DeviceProperties > & getAllDeviceProperties() const
Definition: CudaMgr.h:128

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::shared_ptr< HashJoin > Executor::buildCurrentLevelHashTable ( const JoinCondition current_level_join_conditions,
size_t  level_idx,
RelAlgExecutionUnit ra_exe_unit,
const CompilationOptions co,
const std::vector< InputTableInfo > &  query_infos,
ColumnCacheMap column_cache,
std::vector< std::string > &  fail_reasons 
)
private

Definition at line 935 of file IRCodegen.cpp.

References anonymous_namespace{IRCodegen.cpp}::add_qualifier_to_execution_unit(), AUTOMATIC_IR_METADATA, anonymous_namespace{IRCodegen.cpp}::check_valid_join_qual(), Data_Namespace::CPU_LEVEL, CompilationOptions::device_type, Executor::JoinHashTableOrError::fail_reason, GPU, Data_Namespace::GPU_LEVEL, Executor::JoinHashTableOrError::hash_table, RelAlgExecutionUnit::hash_table_build_plan_dag, IS_EQUIVALENCE, LEFT, OneToOne, JoinCondition::quals, RelAlgExecutionUnit::query_hint, RelAlgExecutionUnit::table_id_to_node_map, JoinCondition::type, and VLOG.

942  {
944  std::shared_ptr<HashJoin> current_level_hash_table;
945  auto handleNonHashtableQual = [&ra_exe_unit, &level_idx, this](
946  JoinType join_type,
947  std::shared_ptr<Analyzer::Expr> qual) {
948  if (join_type == JoinType::LEFT) {
949  plan_state_->addNonHashtableQualForLeftJoin(level_idx, qual);
950  } else {
951  add_qualifier_to_execution_unit(ra_exe_unit, qual);
952  }
953  };
954  for (const auto& join_qual : current_level_join_conditions.quals) {
955  auto qual_bin_oper = std::dynamic_pointer_cast<Analyzer::BinOper>(join_qual);
956  if (current_level_hash_table || !qual_bin_oper ||
957  !IS_EQUIVALENCE(qual_bin_oper->get_optype())) {
958  handleNonHashtableQual(current_level_join_conditions.type, join_qual);
959  if (!current_level_hash_table) {
960  fail_reasons.emplace_back("No equijoin expression found");
961  }
962  continue;
963  }
964  check_valid_join_qual(qual_bin_oper);
965  JoinHashTableOrError hash_table_or_error;
966  if (!current_level_hash_table) {
967  hash_table_or_error = buildHashTableForQualifier(
968  qual_bin_oper,
969  query_infos,
972  current_level_join_conditions.type,
974  column_cache,
975  ra_exe_unit.hash_table_build_plan_dag,
976  ra_exe_unit.query_hint,
977  ra_exe_unit.table_id_to_node_map);
978  current_level_hash_table = hash_table_or_error.hash_table;
979  }
980  if (hash_table_or_error.hash_table) {
981  plan_state_->join_info_.join_hash_tables_.push_back(hash_table_or_error.hash_table);
982  plan_state_->join_info_.equi_join_tautologies_.push_back(qual_bin_oper);
983  } else {
984  fail_reasons.push_back(hash_table_or_error.fail_reason);
985  if (!current_level_hash_table) {
986  VLOG(2) << "Building a hashtable based on a qual " << qual_bin_oper->toString()
987  << " fails: " << hash_table_or_error.fail_reason;
988  }
989  handleNonHashtableQual(current_level_join_conditions.type, qual_bin_oper);
990  }
991  }
992  return current_level_hash_table;
993 }
JoinType
Definition: sqldefs.h:156
#define IS_EQUIVALENCE(X)
Definition: sqldefs.h:68
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1258
TableIdToNodeMap table_id_to_node_map
std::unique_ptr< PlanState > plan_state_
Definition: Execute.h:1288
void add_qualifier_to_execution_unit(RelAlgExecutionUnit &ra_exe_unit, const std::shared_ptr< Analyzer::Expr > &qual)
Definition: IRCodegen.cpp:474
#define AUTOMATIC_IR_METADATA(CGENSTATE)
ExecutorDeviceType device_type
JoinHashTableOrError buildHashTableForQualifier(const std::shared_ptr< Analyzer::BinOper > &qual_bin_oper, const std::vector< InputTableInfo > &query_infos, const MemoryLevel memory_level, const JoinType join_type, const HashType preferred_hash_type, ColumnCacheMap &column_cache, const HashTableBuildDagMap &hashtable_build_dag_map, const RegisteredQueryHint &query_hint, const TableIdToNodeMap &table_id_to_node_map)
Definition: Execute.cpp:3770
std::list< std::shared_ptr< Analyzer::Expr > > quals
RegisteredQueryHint query_hint
#define VLOG(n)
Definition: Logger.h:316
HashTableBuildDagMap hash_table_build_plan_dag
void check_valid_join_qual(std::shared_ptr< Analyzer::BinOper > &bin_oper)
Definition: IRCodegen.cpp:504

+ Here is the call graph for this function:

Executor::JoinHashTableOrError Executor::buildHashTableForQualifier ( const std::shared_ptr< Analyzer::BinOper > &  qual_bin_oper,
const std::vector< InputTableInfo > &  query_infos,
const MemoryLevel  memory_level,
const JoinType  join_type,
const HashType  preferred_hash_type,
ColumnCacheMap column_cache,
const HashTableBuildDagMap hashtable_build_dag_map,
const RegisteredQueryHint query_hint,
const TableIdToNodeMap table_id_to_node_map 
)
private

Definition at line 3770 of file Execute.cpp.

References deviceCountForMemoryLevel(), ERR_INTERRUPTED, g_enable_dynamic_watchdog, g_enable_overlaps_hashjoin, HashJoin::getInstance(), and interrupted_.

3779  {
3780  if (!g_enable_overlaps_hashjoin && qual_bin_oper->is_overlaps_oper()) {
3781  return {nullptr, "Overlaps hash join disabled, attempting to fall back to loop join"};
3782  }
3783  if (g_enable_dynamic_watchdog && interrupted_.load()) {
3785  }
3786  try {
3787  auto tbl = HashJoin::getInstance(qual_bin_oper,
3788  query_infos,
3789  memory_level,
3790  join_type,
3791  preferred_hash_type,
3792  deviceCountForMemoryLevel(memory_level),
3793  column_cache,
3794  this,
3795  hashtable_build_dag_map,
3796  query_hint,
3797  table_id_to_node_map);
3798  return {tbl, ""};
3799  } catch (const HashJoinFail& e) {
3800  return {nullptr, e.what()};
3801  }
3802 }
static const int32_t ERR_INTERRUPTED
Definition: Execute.h:1378
std::atomic< bool > interrupted_
Definition: Execute.h:1298
bool g_enable_dynamic_watchdog
Definition: Execute.cpp:80
bool g_enable_overlaps_hashjoin
Definition: Execute.cpp:102
int deviceCountForMemoryLevel(const Data_Namespace::MemoryLevel memory_level) const
Definition: Execute.cpp:1040
static std::shared_ptr< HashJoin > getInstance(const std::shared_ptr< Analyzer::BinOper > qual_bin_oper, const std::vector< InputTableInfo > &query_infos, const Data_Namespace::MemoryLevel memory_level, const JoinType join_type, const HashType preferred_hash_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor, const HashTableBuildDagMap &hashtable_build_dag_map, const RegisteredQueryHint &query_hint, const TableIdToNodeMap &table_id_to_node_map)
Make hash table from an in-flight SQL query&#39;s parse tree etc.
Definition: HashJoin.cpp:283

+ Here is the call graph for this function:

JoinLoop::HoistedFiltersCallback Executor::buildHoistLeftHandSideFiltersCb ( const RelAlgExecutionUnit ra_exe_unit,
const size_t  level_idx,
const int  inner_table_id,
const CompilationOptions co 
)
private

Definition at line 768 of file IRCodegen.cpp.

References AUTOMATIC_IR_METADATA, CHECK, CodeGenerator::codegen(), g_enable_left_join_filter_hoisting, RelAlgExecutionUnit::join_quals, LEFT, RelAlgExecutionUnit::quals, RelAlgExecutionUnit::simple_quals, CodeGenerator::toBool(), and VLOG.

772  {
774  return nullptr;
775  }
776 
777  const auto& current_level_join_conditions = ra_exe_unit.join_quals[level_idx];
778  if (level_idx == 0 && current_level_join_conditions.type == JoinType::LEFT) {
779  const auto& condition = current_level_join_conditions.quals.front();
780  const auto bin_oper = dynamic_cast<const Analyzer::BinOper*>(condition.get());
781  CHECK(bin_oper) << condition->toString();
782  const auto rhs =
783  dynamic_cast<const Analyzer::ColumnVar*>(bin_oper->get_right_operand());
784  const auto lhs =
785  dynamic_cast<const Analyzer::ColumnVar*>(bin_oper->get_left_operand());
786  if (lhs && rhs && lhs->get_table_id() != rhs->get_table_id()) {
787  const Analyzer::ColumnVar* selected_lhs{nullptr};
788  // grab the left hand side column -- this is somewhat similar to normalize column
789  // pair, and a better solution may be to hoist that function out of the join
790  // framework and normalize columns at the top of build join loops
791  if (lhs->get_table_id() == inner_table_id) {
792  selected_lhs = rhs;
793  } else if (rhs->get_table_id() == inner_table_id) {
794  selected_lhs = lhs;
795  }
796  if (selected_lhs) {
797  std::list<std::shared_ptr<Analyzer::Expr>> hoisted_quals;
798  // get all LHS-only filters
799  auto should_hoist_qual = [&hoisted_quals](const auto& qual, const int table_id) {
800  CHECK(qual);
801 
802  ExprTableIdVisitor visitor;
803  const auto table_ids = visitor.visit(qual.get());
804  if (table_ids.size() == 1 && table_ids.find(table_id) != table_ids.end()) {
805  hoisted_quals.push_back(qual);
806  }
807  };
808  for (const auto& qual : ra_exe_unit.simple_quals) {
809  should_hoist_qual(qual, selected_lhs->get_table_id());
810  }
811  for (const auto& qual : ra_exe_unit.quals) {
812  should_hoist_qual(qual, selected_lhs->get_table_id());
813  }
814 
815  // build the filters callback and return it
816  if (!hoisted_quals.empty()) {
817  return [this, hoisted_quals, co](llvm::BasicBlock* true_bb,
818  llvm::BasicBlock* exit_bb,
819  const std::string& loop_name,
820  llvm::Function* parent_func,
821  CgenState* cgen_state) -> llvm::BasicBlock* {
822  // make sure we have quals to hoist
823  bool has_quals_to_hoist = false;
824  for (const auto& qual : hoisted_quals) {
825  // check to see if the filter was previously hoisted. if all filters were
826  // previously hoisted, this callback becomes a noop
827  if (plan_state_->hoisted_filters_.count(qual) == 0) {
828  has_quals_to_hoist = true;
829  break;
830  }
831  }
832 
833  if (!has_quals_to_hoist) {
834  return nullptr;
835  }
836 
837  AUTOMATIC_IR_METADATA(cgen_state);
838 
839  llvm::IRBuilder<>& builder = cgen_state->ir_builder_;
840  auto& context = builder.getContext();
841 
842  const auto filter_bb =
843  llvm::BasicBlock::Create(context,
844  "hoisted_left_join_filters_" + loop_name,
845  parent_func,
846  /*insert_before=*/true_bb);
847  builder.SetInsertPoint(filter_bb);
848 
849  llvm::Value* filter_lv = cgen_state_->llBool(true);
850  CodeGenerator code_generator(this);
852  for (const auto& qual : hoisted_quals) {
853  if (plan_state_->hoisted_filters_.insert(qual).second) {
854  // qual was inserted into the hoisted filters map, which means we have not
855  // seen this qual before. Generate filter.
856  VLOG(1) << "Generating code for hoisted left hand side qualifier "
857  << qual->toString();
858  auto cond = code_generator.toBool(
859  code_generator.codegen(qual.get(), true, co).front());
860  filter_lv = builder.CreateAnd(filter_lv, cond);
861  }
862  }
863  CHECK(filter_lv->getType()->isIntegerTy(1));
864 
865  builder.CreateCondBr(filter_lv, true_bb, exit_bb);
866  return filter_bb;
867  };
868  }
869  }
870  }
871  }
872  return nullptr;
873 }
bool g_enable_left_join_filter_hoisting
Definition: Execute.cpp:100
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1258
const JoinQualsPerNestingLevel join_quals
std::unique_ptr< PlanState > plan_state_
Definition: Execute.h:1288
#define AUTOMATIC_IR_METADATA(CGENSTATE)
std::list< std::shared_ptr< Analyzer::Expr > > quals
#define CHECK(condition)
Definition: Logger.h:222
#define VLOG(n)
Definition: Logger.h:316
std::list< std::shared_ptr< Analyzer::Expr > > simple_quals

+ Here is the call graph for this function:

std::function< llvm::Value *(const std::vector< llvm::Value * > &, llvm::Value *)> Executor::buildIsDeletedCb ( const RelAlgExecutionUnit ra_exe_unit,
const size_t  level_idx,
const CompilationOptions co 
)
private

Definition at line 876 of file IRCodegen.cpp.

References AUTOMATIC_IR_METADATA, CHECK, CHECK_LT, CodeGenerator::codegen(), CompilationOptions::filter_on_deleted_column, RelAlgExecutionUnit::input_descs, TABLE, and CodeGenerator::toBool().

878  {
880  if (!co.filter_on_deleted_column) {
881  return nullptr;
882  }
883  CHECK_LT(level_idx + 1, ra_exe_unit.input_descs.size());
884  const auto input_desc = ra_exe_unit.input_descs[level_idx + 1];
885  if (input_desc.getSourceType() != InputSourceType::TABLE) {
886  return nullptr;
887  }
888 
889  const auto deleted_cd = plan_state_->getDeletedColForTable(input_desc.getTableId());
890  if (!deleted_cd) {
891  return nullptr;
892  }
893  CHECK(deleted_cd->columnType.is_boolean());
894  const auto deleted_expr = makeExpr<Analyzer::ColumnVar>(deleted_cd->columnType,
895  input_desc.getTableId(),
896  deleted_cd->columnId,
897  input_desc.getNestLevel());
898  return [this, deleted_expr, level_idx, &co](const std::vector<llvm::Value*>& prev_iters,
899  llvm::Value* have_more_inner_rows) {
900  const auto matching_row_index = addJoinLoopIterator(prev_iters, level_idx + 1);
901  // Avoid fetching the deleted column from a position which is not valid.
902  // An invalid position can be returned by a one to one hash lookup (negative)
903  // or at the end of iteration over a set of matching values.
904  llvm::Value* is_valid_it{nullptr};
905  if (have_more_inner_rows) {
906  is_valid_it = have_more_inner_rows;
907  } else {
908  is_valid_it = cgen_state_->ir_builder_.CreateICmp(
909  llvm::ICmpInst::ICMP_SGE, matching_row_index, cgen_state_->llInt<int64_t>(0));
910  }
911  const auto it_valid_bb = llvm::BasicBlock::Create(
912  cgen_state_->context_, "it_valid", cgen_state_->current_func_);
913  const auto it_not_valid_bb = llvm::BasicBlock::Create(
914  cgen_state_->context_, "it_not_valid", cgen_state_->current_func_);
915  cgen_state_->ir_builder_.CreateCondBr(is_valid_it, it_valid_bb, it_not_valid_bb);
916  const auto row_is_deleted_bb = llvm::BasicBlock::Create(
917  cgen_state_->context_, "row_is_deleted", cgen_state_->current_func_);
918  cgen_state_->ir_builder_.SetInsertPoint(it_valid_bb);
919  CodeGenerator code_generator(this);
920  const auto row_is_deleted = code_generator.toBool(
921  code_generator.codegen(deleted_expr.get(), true, co).front());
922  cgen_state_->ir_builder_.CreateBr(row_is_deleted_bb);
923  cgen_state_->ir_builder_.SetInsertPoint(it_not_valid_bb);
924  const auto row_is_deleted_default = cgen_state_->llBool(false);
925  cgen_state_->ir_builder_.CreateBr(row_is_deleted_bb);
926  cgen_state_->ir_builder_.SetInsertPoint(row_is_deleted_bb);
927  auto row_is_deleted_or_default =
928  cgen_state_->ir_builder_.CreatePHI(row_is_deleted->getType(), 2);
929  row_is_deleted_or_default->addIncoming(row_is_deleted, it_valid_bb);
930  row_is_deleted_or_default->addIncoming(row_is_deleted_default, it_not_valid_bb);
931  return row_is_deleted_or_default;
932  };
933 }
std::vector< InputDescriptor > input_descs
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1258
std::unique_ptr< PlanState > plan_state_
Definition: Execute.h:1288
#define AUTOMATIC_IR_METADATA(CGENSTATE)
#define CHECK_LT(x, y)
Definition: Logger.h:232
llvm::Value * addJoinLoopIterator(const std::vector< llvm::Value * > &prev_iters, const size_t level_idx)
Definition: IRCodegen.cpp:1094
#define CHECK(condition)
Definition: Logger.h:222

+ Here is the call graph for this function:

std::vector< JoinLoop > Executor::buildJoinLoops ( RelAlgExecutionUnit ra_exe_unit,
const CompilationOptions co,
const ExecutionOptions eo,
const std::vector< InputTableInfo > &  query_infos,
ColumnCacheMap column_cache 
)
private

Definition at line 523 of file IRCodegen.cpp.

References AUTOMATIC_IR_METADATA, CodeGenerator::cgen_state_, CHECK, CHECK_LT, CodeGenerator::codegen(), INJECT_TIMER, CgenState::ir_builder_, RelAlgExecutionUnit::join_quals, LEFT, PlanState::left_join_non_hashtable_quals_, CgenState::llBool(), MultiSet, OneToOne, CgenState::outer_join_match_found_per_level_, CodeGenerator::plan_state_, Set, Singleton, JoinLoopDomain::slot_lookup_result, CodeGenerator::toBool(), and JoinLoopDomain::values_buffer.

528  {
531  std::vector<JoinLoop> join_loops;
532  for (size_t level_idx = 0, current_hash_table_idx = 0;
533  level_idx < ra_exe_unit.join_quals.size();
534  ++level_idx) {
535  const auto& current_level_join_conditions = ra_exe_unit.join_quals[level_idx];
536  std::vector<std::string> fail_reasons;
537  const auto current_level_hash_table =
538  buildCurrentLevelHashTable(current_level_join_conditions,
539  level_idx,
540  ra_exe_unit,
541  co,
542  query_infos,
543  column_cache,
544  fail_reasons);
545  const auto found_outer_join_matches_cb =
546  [this, level_idx](llvm::Value* found_outer_join_matches) {
547  CHECK_LT(level_idx, cgen_state_->outer_join_match_found_per_level_.size());
548  CHECK(!cgen_state_->outer_join_match_found_per_level_[level_idx]);
549  cgen_state_->outer_join_match_found_per_level_[level_idx] =
550  found_outer_join_matches;
551  };
552  const auto is_deleted_cb = buildIsDeletedCb(ra_exe_unit, level_idx, co);
553  auto rem_left_join_quals_it =
554  plan_state_->left_join_non_hashtable_quals_.find(level_idx);
555  bool has_remaining_left_join_quals =
556  rem_left_join_quals_it != plan_state_->left_join_non_hashtable_quals_.end() &&
557  !rem_left_join_quals_it->second.empty();
558  const auto outer_join_condition_remaining_quals_cb =
559  [this, level_idx, &co](const std::vector<llvm::Value*>& prev_iters) {
560  // when we have multiple quals for the left join in the current join level
561  // we first try to build a hashtable by using one of the possible qual,
562  // and deal with remaining quals as extra join conditions
563  FetchCacheAnchor anchor(cgen_state_.get());
564  addJoinLoopIterator(prev_iters, level_idx + 1);
565  llvm::Value* left_join_cond = cgen_state_->llBool(true);
566  CodeGenerator code_generator(this);
567  auto it = plan_state_->left_join_non_hashtable_quals_.find(level_idx);
568  if (it != plan_state_->left_join_non_hashtable_quals_.end()) {
569  for (auto expr : it->second) {
570  left_join_cond = cgen_state_->ir_builder_.CreateAnd(
571  left_join_cond,
572  code_generator.toBool(
573  code_generator.codegen(expr.get(), true, co).front()));
574  }
575  }
576  return left_join_cond;
577  };
578  if (current_level_hash_table) {
579  const auto hoisted_filters_cb = buildHoistLeftHandSideFiltersCb(
580  ra_exe_unit, level_idx, current_level_hash_table->getInnerTableId(), co);
581  if (current_level_hash_table->getHashType() == HashType::OneToOne) {
582  join_loops.emplace_back(
583  /*kind=*/JoinLoopKind::Singleton,
584  /*type=*/current_level_join_conditions.type,
585  /*iteration_domain_codegen=*/
586  [this, current_hash_table_idx, level_idx, current_level_hash_table, &co](
587  const std::vector<llvm::Value*>& prev_iters) {
588  addJoinLoopIterator(prev_iters, level_idx);
589  JoinLoopDomain domain{{0}};
590  domain.slot_lookup_result =
591  current_level_hash_table->codegenSlot(co, current_hash_table_idx);
592  return domain;
593  },
594  /*outer_condition_match=*/
595  current_level_join_conditions.type == JoinType::LEFT &&
596  has_remaining_left_join_quals
597  ? std::function<llvm::Value*(const std::vector<llvm::Value*>&)>(
598  outer_join_condition_remaining_quals_cb)
599  : nullptr,
600  /*found_outer_matches=*/current_level_join_conditions.type == JoinType::LEFT
601  ? std::function<void(llvm::Value*)>(found_outer_join_matches_cb)
602  : nullptr,
603  /*hoisted_filters=*/hoisted_filters_cb,
604  /*is_deleted=*/is_deleted_cb,
605  /*nested_loop_join=*/false);
606  } else if (auto range_join_table =
607  dynamic_cast<RangeJoinHashTable*>(current_level_hash_table.get())) {
608  join_loops.emplace_back(
609  /* kind= */ JoinLoopKind::MultiSet,
610  /* type= */ current_level_join_conditions.type,
611  /* iteration_domain_codegen= */
612  [this,
613  range_join_table,
614  current_hash_table_idx,
615  level_idx,
616  current_level_hash_table,
617  &co](const std::vector<llvm::Value*>& prev_iters) {
618  addJoinLoopIterator(prev_iters, level_idx);
619  JoinLoopDomain domain{{0}};
620  CHECK(!prev_iters.empty());
621  const auto matching_set = range_join_table->codegenMatchingSetWithOffset(
622  co, current_hash_table_idx, prev_iters.back());
623  domain.values_buffer = matching_set.elements;
624  domain.element_count = matching_set.count;
625  return domain;
626  },
627  /* outer_condition_match= */
628  current_level_join_conditions.type == JoinType::LEFT
629  ? std::function<llvm::Value*(const std::vector<llvm::Value*>&)>(
630  outer_join_condition_remaining_quals_cb)
631  : nullptr,
632  /* found_outer_matches= */
633  current_level_join_conditions.type == JoinType::LEFT
634  ? std::function<void(llvm::Value*)>(found_outer_join_matches_cb)
635  : nullptr,
636  /* hoisted_filters= */ nullptr, // <<! TODO
637  /* is_deleted= */ is_deleted_cb,
638  /*nested_loop_join=*/false);
639  } else {
640  join_loops.emplace_back(
641  /*kind=*/JoinLoopKind::Set,
642  /*type=*/current_level_join_conditions.type,
643  /*iteration_domain_codegen=*/
644  [this, current_hash_table_idx, level_idx, current_level_hash_table, &co](
645  const std::vector<llvm::Value*>& prev_iters) {
646  addJoinLoopIterator(prev_iters, level_idx);
647  JoinLoopDomain domain{{0}};
648  const auto matching_set = current_level_hash_table->codegenMatchingSet(
649  co, current_hash_table_idx);
650  domain.values_buffer = matching_set.elements;
651  domain.element_count = matching_set.count;
652  return domain;
653  },
654  /*outer_condition_match=*/
655  current_level_join_conditions.type == JoinType::LEFT
656  ? std::function<llvm::Value*(const std::vector<llvm::Value*>&)>(
657  outer_join_condition_remaining_quals_cb)
658  : nullptr,
659  /*found_outer_matches=*/current_level_join_conditions.type == JoinType::LEFT
660  ? std::function<void(llvm::Value*)>(found_outer_join_matches_cb)
661  : nullptr,
662  /*hoisted_filters=*/hoisted_filters_cb,
663  /*is_deleted=*/is_deleted_cb,
664  /*nested_loop_join=*/false);
665  }
666  ++current_hash_table_idx;
667  } else {
668  const auto fail_reasons_str = current_level_join_conditions.quals.empty()
669  ? "No equijoin expression found"
670  : boost::algorithm::join(fail_reasons, " | ");
672  ra_exe_unit, eo, query_infos, level_idx, fail_reasons_str);
673  // Callback provided to the `JoinLoop` framework to evaluate the (outer) join
674  // condition.
675  VLOG(1) << "Unable to build hash table, falling back to loop join: "
676  << fail_reasons_str;
677  const auto outer_join_condition_cb =
678  [this, level_idx, &co, &current_level_join_conditions](
679  const std::vector<llvm::Value*>& prev_iters) {
680  // The values generated for the match path don't dominate all uses
681  // since on the non-match path nulls are generated. Reset the cache
682  // once the condition is generated to avoid incorrect reuse.
683  FetchCacheAnchor anchor(cgen_state_.get());
684  addJoinLoopIterator(prev_iters, level_idx + 1);
685  llvm::Value* left_join_cond = cgen_state_->llBool(true);
686  CodeGenerator code_generator(this);
687  for (auto expr : current_level_join_conditions.quals) {
688  left_join_cond = cgen_state_->ir_builder_.CreateAnd(
689  left_join_cond,
690  code_generator.toBool(
691  code_generator.codegen(expr.get(), true, co).front()));
692  }
693  return left_join_cond;
694  };
695  join_loops.emplace_back(
696  /*kind=*/JoinLoopKind::UpperBound,
697  /*type=*/current_level_join_conditions.type,
698  /*iteration_domain_codegen=*/
699  [this, level_idx](const std::vector<llvm::Value*>& prev_iters) {
700  addJoinLoopIterator(prev_iters, level_idx);
701  JoinLoopDomain domain{{0}};
702  auto* arg = get_arg_by_name(cgen_state_->row_func_, "num_rows_per_scan");
703  const auto rows_per_scan_ptr = cgen_state_->ir_builder_.CreateGEP(
704  arg->getType()->getScalarType()->getPointerElementType(),
705  arg,
706  cgen_state_->llInt(int32_t(level_idx + 1)));
707  domain.upper_bound = cgen_state_->ir_builder_.CreateLoad(
708  rows_per_scan_ptr->getType()->getPointerElementType(),
709  rows_per_scan_ptr,
710  "num_rows_per_scan");
711  return domain;
712  },
713  /*outer_condition_match=*/
714  current_level_join_conditions.type == JoinType::LEFT
715  ? std::function<llvm::Value*(const std::vector<llvm::Value*>&)>(
716  outer_join_condition_cb)
717  : nullptr,
718  /*found_outer_matches=*/
719  current_level_join_conditions.type == JoinType::LEFT
720  ? std::function<void(llvm::Value*)>(found_outer_join_matches_cb)
721  : nullptr,
722  /*hoisted_filters=*/nullptr,
723  /*is_deleted=*/is_deleted_cb,
724  /*nested_loop_join=*/true);
725  }
726  }
727  return join_loops;
728 }
llvm::Value * values_buffer
Definition: JoinLoop.h:49
std::string join(T const &container, std::string const &delim)
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1258
llvm::Value * get_arg_by_name(llvm::Function *func, const std::string &name)
Definition: Execute.h:166
#define INJECT_TIMER(DESC)
Definition: measure.h:93
const JoinQualsPerNestingLevel join_quals
std::unique_ptr< PlanState > plan_state_
Definition: Execute.h:1288
#define AUTOMATIC_IR_METADATA(CGENSTATE)
llvm::Value * slot_lookup_result
Definition: JoinLoop.h:47
#define CHECK_LT(x, y)
Definition: Logger.h:232
std::shared_ptr< HashJoin > buildCurrentLevelHashTable(const JoinCondition &current_level_join_conditions, size_t level_idx, RelAlgExecutionUnit &ra_exe_unit, const CompilationOptions &co, const std::vector< InputTableInfo > &query_infos, ColumnCacheMap &column_cache, std::vector< std::string > &fail_reasons)
Definition: IRCodegen.cpp:935
llvm::Value * addJoinLoopIterator(const std::vector< llvm::Value * > &prev_iters, const size_t level_idx)
Definition: IRCodegen.cpp:1094
#define CHECK(condition)
Definition: Logger.h:222
void check_if_loop_join_is_allowed(RelAlgExecutionUnit &ra_exe_unit, const ExecutionOptions &eo, const std::vector< InputTableInfo > &query_infos, const size_t level_idx, const std::string &fail_reason)
Definition: IRCodegen.cpp:484
JoinLoop::HoistedFiltersCallback buildHoistLeftHandSideFiltersCb(const RelAlgExecutionUnit &ra_exe_unit, const size_t level_idx, const int inner_table_id, const CompilationOptions &co)
Definition: IRCodegen.cpp:768
std::vector< JoinLoop > buildJoinLoops(RelAlgExecutionUnit &ra_exe_unit, const CompilationOptions &co, const ExecutionOptions &eo, const std::vector< InputTableInfo > &query_infos, ColumnCacheMap &column_cache)
Definition: IRCodegen.cpp:523
std::function< llvm::Value *(const std::vector< llvm::Value * > &, llvm::Value *)> buildIsDeletedCb(const RelAlgExecutionUnit &ra_exe_unit, const size_t level_idx, const CompilationOptions &co)
Definition: IRCodegen.cpp:876
#define VLOG(n)
Definition: Logger.h:316

+ Here is the call graph for this function:

void Executor::buildSelectedFragsMapping ( std::vector< std::vector< size_t >> &  selected_fragments_crossjoin,
std::vector< size_t > &  local_col_to_frag_pos,
const std::list< std::shared_ptr< const InputColDescriptor >> &  col_global_ids,
const FragmentsList selected_fragments,
const RelAlgExecutionUnit ra_exe_unit 
)
private

Definition at line 3245 of file Execute.cpp.

References CHECK, CHECK_EQ, CHECK_LT, getFragmentCount(), RelAlgExecutionUnit::input_descs, and plan_state_.

Referenced by fetchChunks().

3250  {
3251  local_col_to_frag_pos.resize(plan_state_->global_to_local_col_ids_.size());
3252  size_t frag_pos{0};
3253  const auto& input_descs = ra_exe_unit.input_descs;
3254  for (size_t scan_idx = 0; scan_idx < input_descs.size(); ++scan_idx) {
3255  const int table_id = input_descs[scan_idx].getTableId();
3256  CHECK_EQ(selected_fragments[scan_idx].table_id, table_id);
3257  selected_fragments_crossjoin.push_back(
3258  getFragmentCount(selected_fragments, scan_idx, ra_exe_unit));
3259  for (const auto& col_id : col_global_ids) {
3260  CHECK(col_id);
3261  const auto& input_desc = col_id->getScanDesc();
3262  if (input_desc.getTableId() != table_id ||
3263  input_desc.getNestLevel() != static_cast<int>(scan_idx)) {
3264  continue;
3265  }
3266  auto it = plan_state_->global_to_local_col_ids_.find(*col_id);
3267  CHECK(it != plan_state_->global_to_local_col_ids_.end());
3268  CHECK_LT(static_cast<size_t>(it->second),
3269  plan_state_->global_to_local_col_ids_.size());
3270  local_col_to_frag_pos[it->second] = frag_pos;
3271  }
3272  ++frag_pos;
3273  }
3274 }
#define CHECK_EQ(x, y)
Definition: Logger.h:230
std::vector< InputDescriptor > input_descs
std::unique_ptr< PlanState > plan_state_
Definition: Execute.h:1288
#define CHECK_LT(x, y)
Definition: Logger.h:232
std::vector< size_t > getFragmentCount(const FragmentsList &selected_fragments, const size_t scan_idx, const RelAlgExecutionUnit &ra_exe_unit)
Definition: Execute.cpp:3231
#define CHECK(condition)
Definition: Logger.h:222

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void Executor::buildSelectedFragsMappingForUnion ( std::vector< std::vector< size_t >> &  selected_fragments_crossjoin,
const FragmentsList selected_fragments,
const RelAlgExecutionUnit ra_exe_unit 
)
private

Definition at line 3276 of file Execute.cpp.

References RelAlgExecutionUnit::input_descs.

Referenced by fetchUnionChunks().

3279  {
3280  const auto& input_descs = ra_exe_unit.input_descs;
3281  for (size_t scan_idx = 0; scan_idx < input_descs.size(); ++scan_idx) {
3282  // selected_fragments is set in assignFragsToKernelDispatch execution_kernel.fragments
3283  if (selected_fragments[0].table_id == input_descs[scan_idx].getTableId()) {
3284  selected_fragments_crossjoin.push_back({size_t(1)});
3285  }
3286  }
3287 }
std::vector< InputDescriptor > input_descs

+ Here is the caller graph for this function:

FragmentSkipStatus Executor::canSkipFragmentForFpQual ( const Analyzer::BinOper comp_expr,
const Analyzer::ColumnVar lhs_col,
const Fragmenter_Namespace::FragmentInfo fragment,
const Analyzer::Constant rhs_const 
) const
private

Definition at line 4030 of file Execute.cpp.

References CHECK, extract_max_stat_fp_type(), extract_min_stat_fp_type(), Analyzer::ColumnVar::get_column_id(), Analyzer::Constant::get_constval(), Analyzer::BinOper::get_optype(), SQLTypeInfo::get_type(), Analyzer::Expr::get_type_info(), Fragmenter_Namespace::FragmentInfo::getChunkMetadataMap(), INVALID, kDOUBLE, kEQ, kFLOAT, kGE, kGT, kLE, kLT, NOT_SKIPPABLE, and SKIPPABLE.

Referenced by skipFragment().

4034  {
4035  const int col_id = lhs_col->get_column_id();
4036  auto chunk_meta_it = fragment.getChunkMetadataMap().find(col_id);
4037  if (chunk_meta_it == fragment.getChunkMetadataMap().end()) {
4039  }
4040  double chunk_min{0.};
4041  double chunk_max{0.};
4042  const auto& chunk_type = lhs_col->get_type_info();
4043  chunk_min = extract_min_stat_fp_type(chunk_meta_it->second->chunkStats, chunk_type);
4044  chunk_max = extract_max_stat_fp_type(chunk_meta_it->second->chunkStats, chunk_type);
4045  if (chunk_min > chunk_max) {
4047  }
4048 
4049  const auto datum_fp = rhs_const->get_constval();
4050  const auto rhs_type = rhs_const->get_type_info().get_type();
4051  CHECK(rhs_type == kFLOAT || rhs_type == kDOUBLE);
4052 
4053  // Do we need to codegen the constant like the integer path does?
4054  const auto rhs_val = rhs_type == kFLOAT ? datum_fp.floatval : datum_fp.doubleval;
4055 
4056  // Todo: dedup the following comparison code with the integer/timestamp path, it is
4057  // slightly tricky due to do cleanly as we do not have rowid on this path
4058  switch (comp_expr->get_optype()) {
4059  case kGE:
4060  if (chunk_max < rhs_val) {
4062  }
4063  break;
4064  case kGT:
4065  if (chunk_max <= rhs_val) {
4067  }
4068  break;
4069  case kLE:
4070  if (chunk_min > rhs_val) {
4072  }
4073  break;
4074  case kLT:
4075  if (chunk_min >= rhs_val) {
4077  }
4078  break;
4079  case kEQ:
4080  if (chunk_min > rhs_val || chunk_max < rhs_val) {
4082  }
4083  break;
4084  default:
4085  break;
4086  }
4088 }
double extract_max_stat_fp_type(const ChunkStats &stats, const SQLTypeInfo &ti)
Definition: sqldefs.h:34
Definition: sqldefs.h:35
Definition: sqldefs.h:29
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:404
SQLOps get_optype() const
Definition: Analyzer.h:447
double extract_min_stat_fp_type(const ChunkStats &stats, const SQLTypeInfo &ti)
const ChunkMetadataMap & getChunkMetadataMap() const
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:82
Definition: sqldefs.h:33
Datum get_constval() const
Definition: Analyzer.h:343
#define CHECK(condition)
Definition: Logger.h:222
Definition: sqldefs.h:32
int get_column_id() const
Definition: Analyzer.h:202

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

llvm::Value * Executor::castToFP ( llvm::Value *  value,
SQLTypeInfo const &  from_ti,
SQLTypeInfo const &  to_ti 
)
private

Definition at line 3845 of file Execute.cpp.

References AUTOMATIC_IR_METADATA, cgen_state_, exp_to_scale(), logger::FATAL, SQLTypeInfo::get_scale(), SQLTypeInfo::get_size(), SQLTypeInfo::is_fp(), SQLTypeInfo::is_number(), and LOG.

3847  {
3849  if (value->getType()->isIntegerTy() && from_ti.is_number() && to_ti.is_fp() &&
3850  (!from_ti.is_fp() || from_ti.get_size() != to_ti.get_size())) {
3851  llvm::Type* fp_type{nullptr};
3852  switch (to_ti.get_size()) {
3853  case 4:
3854  fp_type = llvm::Type::getFloatTy(cgen_state_->context_);
3855  break;
3856  case 8:
3857  fp_type = llvm::Type::getDoubleTy(cgen_state_->context_);
3858  break;
3859  default:
3860  LOG(FATAL) << "Unsupported FP size: " << to_ti.get_size();
3861  }
3862  value = cgen_state_->ir_builder_.CreateSIToFP(value, fp_type);
3863  if (from_ti.get_scale()) {
3864  value = cgen_state_->ir_builder_.CreateFDiv(
3865  value,
3866  llvm::ConstantFP::get(value->getType(), exp_to_scale(from_ti.get_scale())));
3867  }
3868  }
3869  return value;
3870 }
#define LOG(tag)
Definition: Logger.h:216
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1258
#define AUTOMATIC_IR_METADATA(CGENSTATE)
uint64_t exp_to_scale(const unsigned exp)

+ Here is the call graph for this function:

llvm::Value * Executor::castToIntPtrTyIn ( llvm::Value *  val,
const size_t  bit_width 
)
private

Definition at line 3872 of file Execute.cpp.

References AUTOMATIC_IR_METADATA, cgen_state_, CHECK, CHECK_LT, and get_int_type().

3872  {
3874  CHECK(val->getType()->isPointerTy());
3875 
3876  const auto val_ptr_type = static_cast<llvm::PointerType*>(val->getType());
3877  const auto val_type = val_ptr_type->getPointerElementType();
3878  size_t val_width = 0;
3879  if (val_type->isIntegerTy()) {
3880  val_width = val_type->getIntegerBitWidth();
3881  } else {
3882  if (val_type->isFloatTy()) {
3883  val_width = 32;
3884  } else {
3885  CHECK(val_type->isDoubleTy());
3886  val_width = 64;
3887  }
3888  }
3889  CHECK_LT(size_t(0), val_width);
3890  if (bitWidth == val_width) {
3891  return val;
3892  }
3893  return cgen_state_->ir_builder_.CreateBitCast(
3894  val, llvm::PointerType::get(get_int_type(bitWidth, cgen_state_->context_), 0));
3895 }
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1258
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
#define AUTOMATIC_IR_METADATA(CGENSTATE)
#define CHECK_LT(x, y)
Definition: Logger.h:232
#define CHECK(condition)
Definition: Logger.h:222

+ Here is the call graph for this function:

bool Executor::checkCurrentQuerySession ( const std::string &  candidate_query_session,
heavyai::shared_lock< heavyai::shared_mutex > &  read_lock 
)

Definition at line 4422 of file Execute.cpp.

References current_query_session_.

4424  {
4425  // if current_query_session is equal to the candidate_query_session,
4426  // or it is empty session we consider
4427  return !candidate_query_session.empty() &&
4428  (current_query_session_ == candidate_query_session);
4429 }
QuerySessionId current_query_session_
Definition: Execute.h:1332
bool Executor::checkIsQuerySessionEnrolled ( const QuerySessionId query_session,
heavyai::shared_lock< heavyai::shared_mutex > &  read_lock 
)

Definition at line 4696 of file Execute.cpp.

References queries_session_map_.

Referenced by executeWorkUnitImpl().

4698  {
4699  if (query_session.empty()) {
4700  return false;
4701  }
4702  return !query_session.empty() && queries_session_map_.count(query_session);
4703 }
static QuerySessionMap queries_session_map_
Definition: Execute.h:1336

+ Here is the caller graph for this function:

bool Executor::checkIsQuerySessionInterrupted ( const std::string &  query_session,
heavyai::shared_lock< heavyai::shared_mutex > &  read_lock 
)

Definition at line 4685 of file Execute.cpp.

References queries_interrupt_flag_.

Referenced by executePlanWithGroupBy(), executePlanWithoutGroupBy(), fetchChunks(), and fetchUnionChunks().

4687  {
4688  if (query_session.empty()) {
4689  return false;
4690  }
4691  auto flag_it = queries_interrupt_flag_.find(query_session);
4692  return !query_session.empty() && flag_it != queries_interrupt_flag_.end() &&
4693  flag_it->second;
4694 }
static InterruptFlagMap queries_interrupt_flag_
Definition: Execute.h:1334

+ Here is the caller graph for this function:

bool Executor::checkNonKernelTimeInterrupted ( ) const

Definition at line 4773 of file Execute.cpp.

References current_query_session_, executor_id_, executor_session_mutex_, queries_interrupt_flag_, and UNITARY_EXECUTOR_ID.

4773  {
4774  // this function should be called within an executor which is assigned
4775  // to the specific query thread (that indicates we already enroll the session)
4776  // check whether this is called from non unitary executor
4778  return false;
4779  };
4781  auto flag_it = queries_interrupt_flag_.find(current_query_session_);
4782  return !current_query_session_.empty() && flag_it != queries_interrupt_flag_.end() &&
4783  flag_it->second;
4784 }
QuerySessionId current_query_session_
Definition: Execute.h:1332
std::shared_lock< T > shared_lock
const ExecutorId executor_id_
Definition: Execute.h:1232
static InterruptFlagMap queries_interrupt_flag_
Definition: Execute.h:1334
static heavyai::shared_mutex executor_session_mutex_
Definition: Execute.h:1330
static const ExecutorId UNITARY_EXECUTOR_ID
Definition: Execute.h:376
void Executor::checkPendingQueryStatus ( const QuerySessionId query_session)

Definition at line 4466 of file Execute.cpp.

References ERR_INTERRUPTED, executor_session_mutex_, queries_interrupt_flag_, queries_session_map_, and VLOG.

4466  {
4467  // check whether we are okay to execute the "pending" query
4468  // i.e., before running the query check if this query session is "ALREADY" interrupted
4470  if (query_session.empty()) {
4471  return;
4472  }
4473  if (queries_interrupt_flag_.find(query_session) == queries_interrupt_flag_.end()) {
4474  // something goes wrong since we assume this is caller's responsibility
4475  // (call this function only for enrolled query session)
4476  if (!queries_session_map_.count(query_session)) {
4477  VLOG(1) << "Interrupting pending query is not available since the query session is "
4478  "not enrolled";
4479  } else {
4480  // here the query session is enrolled but the interrupt flag is not registered
4481  VLOG(1)
4482  << "Interrupting pending query is not available since its interrupt flag is "
4483  "not registered";
4484  }
4485  return;
4486  }
4487  if (queries_interrupt_flag_[query_session]) {
4489  }
4490 }
static QuerySessionMap queries_session_map_
Definition: Execute.h:1336
static const int32_t ERR_INTERRUPTED
Definition: Execute.h:1378
std::shared_lock< T > shared_lock
static InterruptFlagMap queries_interrupt_flag_
Definition: Execute.h:1334
static heavyai::shared_mutex executor_session_mutex_
Definition: Execute.h:1330
#define VLOG(n)
Definition: Logger.h:316
void Executor::clearCaches ( bool  runtime_only = false)
static void Executor::clearExternalCaches ( bool  for_update,
const TableDescriptor td,
const int  current_db_id 
)
inlinestatic

Definition at line 391 of file Execute.h.

References TableDescriptor::getTableChunkKey(), CacheInvalidator< CACHE_HOLDING_TYPES >::invalidateCaches(), and CacheInvalidator< CACHE_HOLDING_TYPES >::invalidateCachesByTable().

Referenced by Parser::InsertIntoTableAsSelectStmt::execute(), Parser::DropTableStmt::execute(), Parser::TruncateTableStmt::execute(), Parser::DropColumnStmt::execute(), Parser::CopyTableStmt::execute(), RelAlgExecutor::executeDelete(), and RelAlgExecutor::executeUpdate().

393  {
394  bool clearEntireCache = true;
395  if (td) {
396  const auto& table_chunk_key_prefix = td->getTableChunkKey(current_db_id);
397  if (!table_chunk_key_prefix.empty()) {
398  auto table_key = boost::hash_value(table_chunk_key_prefix);
400  if (for_update) {
402  } else {
404  }
405  clearEntireCache = false;
406  }
407  }
408  if (clearEntireCache) {
410  if (for_update) {
412  } else {
414  }
415  }
416  }
static void invalidateCachesByTable(size_t table_key)
static void invalidateCaches()
std::vector< int > getTableChunkKey(const int getCurrentDBId) const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void Executor::clearMemory ( const Data_Namespace::MemoryLevel  memory_level)
static

Definition at line 501 of file Execute.cpp.

References Data_Namespace::DataMgr::clearMemory(), Data_Namespace::CPU_LEVEL, execute_mutex_, Catalog_Namespace::SysCatalog::getDataMgr(), Data_Namespace::GPU_LEVEL, Catalog_Namespace::SysCatalog::instance(), and CacheInvalidator< CACHE_HOLDING_TYPES >::invalidateCaches().

Referenced by DBHandler::clear_cpu_memory(), DBHandler::clear_gpu_memory(), QueryRunner::QueryRunner::clearCpuMemory(), and QueryRunner::QueryRunner::clearGpuMemory().

501  {
502  switch (memory_level) {
506  execute_mutex_); // Don't flush memory while queries are running
507 
508  if (memory_level == Data_Namespace::MemoryLevel::CPU_LEVEL) {
509  // The hash table cache uses CPU memory not managed by the buffer manager. In the
510  // future, we should manage these allocations with the buffer manager directly.
511  // For now, assume the user wants to purge the hash table cache when they clear
512  // CPU memory (currently used in ExecuteTest to lower memory pressure)
514  }
517  break;
518  }
519  default: {
520  throw std::runtime_error(
521  "Clearing memory levels other than the CPU level or GPU level is not "
522  "supported.");
523  }
524  }
525 }
static heavyai::shared_mutex execute_mutex_
Definition: Execute.h:1341
void clearMemory(const MemoryLevel memLevel)
Definition: DataMgr.cpp:434
static void invalidateCaches()
Data_Namespace::DataMgr & getDataMgr() const
Definition: SysCatalog.h:232
static SysCatalog & instance()
Definition: SysCatalog.h:341
std::unique_lock< T > unique_lock

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void Executor::clearMetaInfoCache ( )
private

Definition at line 764 of file Execute.cpp.

References agg_col_range_cache_, TableGenerations::clear(), AggregatedColRange::clear(), InputTableInfoCache::clear(), input_table_info_cache_, and table_generations_.

764  {
768 }
AggregatedColRange agg_col_range_cache_
Definition: Execute.h:1328
InputTableInfoCache input_table_info_cache_
Definition: Execute.h:1327
TableGenerations table_generations_
Definition: Execute.h:1329

+ Here is the call graph for this function:

void Executor::clearQuerySessionStatus ( const QuerySessionId query_session,
const std::string &  submitted_time_str 
)

Definition at line 4492 of file Execute.cpp.

References current_query_session_, executor_session_mutex_, invalidateRunningQuerySession(), removeFromQuerySessionList(), and resetInterrupt().

4493  {
4495  // clear the interrupt-related info for a finished query
4496  if (query_session.empty()) {
4497  return;
4498  }
4499  removeFromQuerySessionList(query_session, submitted_time_str, session_write_lock);
4500  if (query_session.compare(current_query_session_) == 0) {
4501  invalidateRunningQuerySession(session_write_lock);
4502  resetInterrupt();
4503  }
4504 }
QuerySessionId current_query_session_
Definition: Execute.h:1332
bool removeFromQuerySessionList(const QuerySessionId &query_session, const std::string &submitted_time_str, heavyai::unique_lock< heavyai::shared_mutex > &write_lock)
Definition: Execute.cpp:4640
std::unique_lock< T > unique_lock
static heavyai::shared_mutex executor_session_mutex_
Definition: Execute.h:1330
void resetInterrupt()
void invalidateRunningQuerySession(heavyai::unique_lock< heavyai::shared_mutex > &write_lock)
Definition: Execute.cpp:4444

+ Here is the call graph for this function:

llvm::Value * Executor::codegenAggregateWindowState ( )
private

Definition at line 1336 of file WindowFunctionIR.cpp.

References AUTOMATIC_IR_METADATA, AVG, COUNT, anonymous_namespace{WindowFunctionIR.cpp}::get_adjusted_window_type_info(), get_int_type(), WindowProjectNodeContext::getActiveWindowFunctionContext(), Analyzer::WindowFunction::getKind(), kDECIMAL, kDOUBLE, and kFLOAT.

1336  {
1338  const auto pi32_type =
1339  llvm::PointerType::get(get_int_type(32, cgen_state_->context_), 0);
1340  const auto pi64_type =
1341  llvm::PointerType::get(get_int_type(64, cgen_state_->context_), 0);
1342  const auto window_func_context =
1344  const Analyzer::WindowFunction* window_func = window_func_context->getWindowFunction();
1345  const auto window_func_ti = get_adjusted_window_type_info(window_func);
1346  const auto aggregate_state_type =
1347  window_func_ti.get_type() == kFLOAT ? pi32_type : pi64_type;
1348  auto aggregate_state = aggregateWindowStatePtr();
1349  if (window_func->getKind() == SqlWindowFunctionKind::AVG) {
1350  const auto aggregate_state_count_i64 = cgen_state_->llInt(
1351  reinterpret_cast<const int64_t>(window_func_context->aggregateStateCount()));
1352  auto aggregate_state_count = cgen_state_->ir_builder_.CreateIntToPtr(
1353  aggregate_state_count_i64, aggregate_state_type);
1354  const auto double_null_lv = cgen_state_->inlineFpNull(SQLTypeInfo(kDOUBLE));
1355  switch (window_func_ti.get_type()) {
1356  case kFLOAT: {
1357  return cgen_state_->emitCall(
1358  "load_avg_float", {aggregate_state, aggregate_state_count, double_null_lv});
1359  }
1360  case kDOUBLE: {
1361  return cgen_state_->emitCall(
1362  "load_avg_double", {aggregate_state, aggregate_state_count, double_null_lv});
1363  }
1364  case kDECIMAL: {
1365  return cgen_state_->emitCall(
1366  "load_avg_decimal",
1367  {aggregate_state,
1368  aggregate_state_count,
1369  double_null_lv,
1370  cgen_state_->llInt<int32_t>(window_func_ti.get_scale())});
1371  }
1372  default: {
1373  return cgen_state_->emitCall(
1374  "load_avg_int", {aggregate_state, aggregate_state_count, double_null_lv});
1375  }
1376  }
1377  }
1378  if (window_func->getKind() == SqlWindowFunctionKind::COUNT) {
1379  return cgen_state_->ir_builder_.CreateLoad(
1380  aggregate_state->getType()->getPointerElementType(), aggregate_state);
1381  }
1382  switch (window_func_ti.get_type()) {
1383  case kFLOAT: {
1384  return cgen_state_->emitCall("load_float", {aggregate_state});
1385  }
1386  case kDOUBLE: {
1387  return cgen_state_->emitCall("load_double", {aggregate_state});
1388  }
1389  default: {
1390  return cgen_state_->ir_builder_.CreateLoad(
1391  aggregate_state->getType()->getPointerElementType(), aggregate_state);
1392  }
1393  }
1394 }
SqlWindowFunctionKind getKind() const
Definition: Analyzer.h:2406
llvm::Value * aggregateWindowStatePtr()
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1258
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
static WindowFunctionContext * getActiveWindowFunctionContext(Executor *executor)
#define AUTOMATIC_IR_METADATA(CGENSTATE)
SQLTypeInfo get_adjusted_window_type_info(const Analyzer::WindowFunction *window_func)

+ Here is the call graph for this function:

llvm::Value * Executor::codegenCurrentPartitionIndex ( const WindowFunctionContext window_func_context,
llvm::Value *  current_row_pos_lv 
)
private

Definition at line 603 of file WindowFunctionIR.cpp.

References get_int_type(), WindowFunctionContext::partitionCount(), and WindowFunctionContext::partitionNumCountBuf().

605  {
606  const auto pi64_type =
607  llvm::PointerType::get(get_int_type(64, cgen_state_->context_), 0);
608  // given current row's pos, calculate the partition index that it belongs to
609  auto partition_count_lv = cgen_state_->llInt(window_func_context->partitionCount());
610  auto partition_num_count_buf_lv = cgen_state_->llInt(
611  reinterpret_cast<int64_t>(window_func_context->partitionNumCountBuf()));
612  auto partition_num_count_ptr_lv =
613  cgen_state_->ir_builder_.CreateIntToPtr(partition_num_count_buf_lv, pi64_type);
614  return cgen_state_->emitCall(
615  "compute_int64_t_lower_bound",
616  {partition_count_lv, current_row_pos_lv, partition_num_count_ptr_lv});
617 }
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1258
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
size_t partitionCount() const
const int64_t * partitionNumCountBuf() const

+ Here is the call graph for this function:

llvm::Value * Executor::codegenFrameBoundExpr ( const Analyzer::WindowFunction window_func,
const Analyzer::WindowFrame frame_bound,
CodeGenerator code_generator,
const CompilationOptions co 
)
private

Definition at line 558 of file WindowFunctionIR.cpp.

References CHECK, CodeGenerator::codegen(), EXPR_FOLLOWING, EXPR_PRECEDING, g_cluster, SQLTypeInfo::get_size(), Analyzer::Expr::get_type_info(), Analyzer::WindowFrame::getBoundExpr(), Analyzer::WindowFunction::getOrderKeys(), Analyzer::WindowFunction::hasRangeModeFraming(), kBIGINT, kINT, and kSMALLINT.

561  {
562  auto needs_bound_expr_codegen = [](const Analyzer::WindowFrame* window_frame) {
563  return window_frame->getBoundType() == SqlWindowFrameBoundType::EXPR_FOLLOWING ||
564  window_frame->getBoundType() == SqlWindowFrameBoundType::EXPR_PRECEDING;
565  };
566  const auto order_col_ti = window_func->getOrderKeys().front()->get_type_info();
567  auto encode_date_col_val = [&order_col_ti, this](llvm::Value* bound_expr_lv) {
568  if (order_col_ti.get_comp_param() == 16) {
569  return cgen_state_->emitCall(
570  "fixed_width_date_encode_noinline",
571  {bound_expr_lv,
572  cgen_state_->castToTypeIn(cgen_state_->inlineIntNull(SQLTypeInfo(kSMALLINT)),
573  32),
574  cgen_state_->inlineIntNull(SQLTypeInfo(kBIGINT))});
575  } else {
576  return cgen_state_->emitCall("fixed_width_date_encode_noinline",
577  {bound_expr_lv,
578  cgen_state_->inlineIntNull(SQLTypeInfo(kINT)),
579  cgen_state_->inlineIntNull(SQLTypeInfo(kBIGINT))});
580  }
581  };
582  llvm::Value* bound_expr_lv{nullptr};
583  if (needs_bound_expr_codegen(frame_bound)) {
584  auto bound_expr_lvs = code_generator.codegen(frame_bound->getBoundExpr(), true, co);
585  bound_expr_lv = bound_expr_lvs.front();
586  if (order_col_ti.is_date() && window_func->hasRangeModeFraming()) {
587  if (g_cluster) {
588  throw std::runtime_error(
589  "Range mode with date type ordering column is not supported yet.");
590  }
591  bound_expr_lv = encode_date_col_val(bound_expr_lv);
592  }
593  if (frame_bound->getBoundExpr()->get_type_info().get_size() != 8) {
594  bound_expr_lv = cgen_state_->castToTypeIn(bound_expr_lv, 64);
595  }
596  } else {
597  bound_expr_lv = cgen_state_->llInt((int64_t)-1);
598  }
599  CHECK(bound_expr_lv);
600  return bound_expr_lv;
601 }
bool hasRangeModeFraming() const
Definition: Analyzer.h:2440
HOST DEVICE int get_size() const
Definition: sqltypes.h:414
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1258
const std::vector< std::shared_ptr< Analyzer::Expr > > & getOrderKeys() const
Definition: Analyzer.h:2414
const Analyzer::Expr * getBoundExpr() const
Definition: Analyzer.h:2340
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:82
std::vector< llvm::Value * > codegen(const Analyzer::Expr *, const bool fetch_columns, const CompilationOptions &)
Definition: IRCodegen.cpp:30
#define CHECK(condition)
Definition: Logger.h:222
bool g_cluster
Definition: sqltypes.h:59

+ Here is the call graph for this function:

void Executor::codegenJoinLoops ( const std::vector< JoinLoop > &  join_loops,
const RelAlgExecutionUnit ra_exe_unit,
GroupByAndAggregate group_by_and_aggregate,
llvm::Function *  query_func,
llvm::BasicBlock *  entry_bb,
QueryMemoryDescriptor query_mem_desc,
const CompilationOptions co,
const ExecutionOptions eo 
)
private

Definition at line 1112 of file IRCodegen.cpp.

References ExecutionOptions::allow_runtime_query_interrupt, AUTOMATIC_IR_METADATA, CHECK, CHECK_EQ, JoinLoop::codegen(), CompilationOptions::device_type, JoinLoopDomain::element_count, get_int_array_type(), get_int_type(), INNER, MultiSet, CodeGenerator::posArg(), GroupByAndAggregate::query_infos_, query_mem_desc, Set, and ExecutionOptions::with_dynamic_watchdog.

1119  {
1121  const auto exit_bb =
1122  llvm::BasicBlock::Create(cgen_state_->context_, "exit", cgen_state_->current_func_);
1123  cgen_state_->ir_builder_.SetInsertPoint(exit_bb);
1124  cgen_state_->ir_builder_.CreateRet(cgen_state_->llInt<int32_t>(0));
1125  cgen_state_->ir_builder_.SetInsertPoint(entry_bb);
1126  CodeGenerator code_generator(this);
1127 
1128  llvm::BasicBlock* loops_entry_bb{nullptr};
1129  auto has_range_join =
1130  std::any_of(join_loops.begin(), join_loops.end(), [](const auto& join_loop) {
1131  return join_loop.kind() == JoinLoopKind::MultiSet;
1132  });
1133  if (has_range_join) {
1134  CHECK_EQ(join_loops.size(), size_t(1));
1135  const auto element_count =
1136  llvm::ConstantInt::get(get_int_type(64, cgen_state_->context_), 9);
1137 
1138  auto compute_packed_offset = [](const int32_t x, const int32_t y) -> uint64_t {
1139  const uint64_t y_shifted = static_cast<uint64_t>(y) << 32;
1140  return y_shifted | static_cast<uint32_t>(x);
1141  };
1142 
1143  const auto values_arr = std::vector<llvm::Constant*>{
1144  llvm::ConstantInt::get(get_int_type(64, cgen_state_->context_), 0),
1145  llvm::ConstantInt::get(get_int_type(64, cgen_state_->context_),
1146  compute_packed_offset(0, 1)),
1147  llvm::ConstantInt::get(get_int_type(64, cgen_state_->context_),
1148  compute_packed_offset(0, -1)),
1149  llvm::ConstantInt::get(get_int_type(64, cgen_state_->context_),
1150  compute_packed_offset(1, 0)),
1151  llvm::ConstantInt::get(get_int_type(64, cgen_state_->context_),
1152  compute_packed_offset(1, 1)),
1153  llvm::ConstantInt::get(get_int_type(64, cgen_state_->context_),
1154  compute_packed_offset(1, -1)),
1155  llvm::ConstantInt::get(get_int_type(64, cgen_state_->context_),
1156  compute_packed_offset(-1, 0)),
1157  llvm::ConstantInt::get(get_int_type(64, cgen_state_->context_),
1158  compute_packed_offset(-1, 1)),
1159  llvm::ConstantInt::get(get_int_type(64, cgen_state_->context_),
1160  compute_packed_offset(-1, -1))};
1161 
1162  const auto constant_values_array = llvm::ConstantArray::get(
1163  get_int_array_type(64, 9, cgen_state_->context_), values_arr);
1164  CHECK(cgen_state_->module_);
1165  const auto values =
1166  new llvm::GlobalVariable(*cgen_state_->module_,
1167  get_int_array_type(64, 9, cgen_state_->context_),
1168  true,
1169  llvm::GlobalValue::LinkageTypes::InternalLinkage,
1170  constant_values_array);
1171  JoinLoop join_loop(
1174  [element_count, values](const std::vector<llvm::Value*>& v) {
1175  JoinLoopDomain domain{{0}};
1176  domain.element_count = element_count;
1177  domain.values_buffer = values;
1178  return domain;
1179  },
1180  nullptr,
1181  nullptr,
1182  nullptr,
1183  nullptr,
1184  "range_key_loop");
1185 
1186  loops_entry_bb = JoinLoop::codegen(
1187  {join_loop},
1188  [this,
1189  query_func,
1190  &query_mem_desc,
1191  &co,
1192  &eo,
1193  &group_by_and_aggregate,
1194  &join_loops,
1195  &ra_exe_unit](const std::vector<llvm::Value*>& prev_iters) {
1196  auto& builder = cgen_state_->ir_builder_;
1197 
1198  auto body_exit_bb =
1199  llvm::BasicBlock::Create(cgen_state_->context_,
1200  "range_key_inner_body_exit",
1201  builder.GetInsertBlock()->getParent());
1202 
1203  auto range_key_body_bb =
1204  llvm::BasicBlock::Create(cgen_state_->context_,
1205  "range_key_loop_body",
1206  builder.GetInsertBlock()->getParent());
1207  builder.SetInsertPoint(range_key_body_bb);
1208 
1209  const auto body_loops_entry_bb = JoinLoop::codegen(
1210  join_loops,
1211  [this,
1212  query_func,
1213  &query_mem_desc,
1214  &co,
1215  &eo,
1216  &group_by_and_aggregate,
1217  &join_loops,
1218  &ra_exe_unit](const std::vector<llvm::Value*>& prev_iters) {
1219  addJoinLoopIterator(prev_iters, join_loops.size());
1220  auto& builder = cgen_state_->ir_builder_;
1221  const auto loop_body_bb =
1222  llvm::BasicBlock::Create(builder.getContext(),
1223  "loop_body",
1224  builder.GetInsertBlock()->getParent());
1225  builder.SetInsertPoint(loop_body_bb);
1226  const bool can_return_error =
1227  compileBody(ra_exe_unit, group_by_and_aggregate, query_mem_desc, co);
1228  if (can_return_error || cgen_state_->needs_error_check_ ||
1229  eo.with_dynamic_watchdog || eo.allow_runtime_query_interrupt) {
1230  createErrorCheckControlFlow(query_func,
1231  eo.with_dynamic_watchdog,
1232  eo.allow_runtime_query_interrupt,
1233  join_loops,
1234  co.device_type,
1235  group_by_and_aggregate.query_infos_);
1236  }
1237  return loop_body_bb;
1238  },
1239  prev_iters.back(),
1240  body_exit_bb,
1241  cgen_state_.get());
1242 
1243  builder.SetInsertPoint(range_key_body_bb);
1244  cgen_state_->ir_builder_.CreateBr(body_loops_entry_bb);
1245 
1246  builder.SetInsertPoint(body_exit_bb);
1247  return range_key_body_bb;
1248  },
1249  code_generator.posArg(nullptr),
1250  exit_bb,
1251  cgen_state_.get());
1252  } else {
1253  loops_entry_bb = JoinLoop::codegen(
1254  join_loops,
1255  /*body_codegen=*/
1256  [this,
1257  query_func,
1258  &query_mem_desc,
1259  &co,
1260  &eo,
1261  &group_by_and_aggregate,
1262  &join_loops,
1263  &ra_exe_unit](const std::vector<llvm::Value*>& prev_iters) {
1265  addJoinLoopIterator(prev_iters, join_loops.size());
1266  auto& builder = cgen_state_->ir_builder_;
1267  const auto loop_body_bb = llvm::BasicBlock::Create(
1268  builder.getContext(), "loop_body", builder.GetInsertBlock()->getParent());
1269  builder.SetInsertPoint(loop_body_bb);
1270  const bool can_return_error =
1271  compileBody(ra_exe_unit, group_by_and_aggregate, query_mem_desc, co);
1272  if (can_return_error || cgen_state_->needs_error_check_ ||
1273  eo.with_dynamic_watchdog || eo.allow_runtime_query_interrupt) {
1274  createErrorCheckControlFlow(query_func,
1275  eo.with_dynamic_watchdog,
1276  eo.allow_runtime_query_interrupt,
1277  join_loops,
1278  co.device_type,
1279  group_by_and_aggregate.query_infos_);
1280  }
1281  return loop_body_bb;
1282  },
1283  /*outer_iter=*/code_generator.posArg(nullptr),
1284  exit_bb,
1285  cgen_state_.get());
1286  }
1287  CHECK(loops_entry_bb);
1288  cgen_state_->ir_builder_.SetInsertPoint(entry_bb);
1289  cgen_state_->ir_builder_.CreateBr(loops_entry_bb);
1290 }
void createErrorCheckControlFlow(llvm::Function *query_func, bool run_with_dynamic_watchdog, bool run_with_allowing_runtime_interrupt, const std::vector< JoinLoop > &join_loops, ExecutorDeviceType device_type, const std::vector< InputTableInfo > &input_table_infos)
#define CHECK_EQ(x, y)
Definition: Logger.h:230
llvm::Value * element_count
Definition: JoinLoop.h:46
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1258
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
bool compileBody(const RelAlgExecutionUnit &ra_exe_unit, GroupByAndAggregate &group_by_and_aggregate, QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const GpuSharedMemoryContext &gpu_smem_context={})
static llvm::BasicBlock * codegen(const std::vector< JoinLoop > &join_loops, const std::function< llvm::BasicBlock *(const std::vector< llvm::Value * > &)> &body_codegen, llvm::Value *outer_iter, llvm::BasicBlock *exit_bb, CgenState *cgen_state)
Definition: JoinLoop.cpp:50
#define AUTOMATIC_IR_METADATA(CGENSTATE)
llvm::Value * addJoinLoopIterator(const std::vector< llvm::Value * > &prev_iters, const size_t level_idx)
Definition: IRCodegen.cpp:1094
#define CHECK(condition)
Definition: Logger.h:222
llvm::ArrayType * get_int_array_type(int const width, int count, llvm::LLVMContext &context)

+ Here is the call graph for this function:

llvm::BasicBlock * Executor::codegenSkipDeletedOuterTableRow ( const RelAlgExecutionUnit ra_exe_unit,
const CompilationOptions co 
)
private

Definition at line 3178 of file NativeCodegen.cpp.

3180  {
3182  if (!co.filter_on_deleted_column) {
3183  return nullptr;
3184  }
3185  CHECK(!ra_exe_unit.input_descs.empty());
3186  const auto& outer_input_desc = ra_exe_unit.input_descs[0];
3187  if (outer_input_desc.getSourceType() != InputSourceType::TABLE) {
3188  return nullptr;
3189  }
3190  const auto deleted_cd =
3191  plan_state_->getDeletedColForTable(outer_input_desc.getTableId());
3192  if (!deleted_cd) {
3193  return nullptr;
3194  }
3195  CHECK(deleted_cd->columnType.is_boolean());
3196  const auto deleted_expr =
3197  makeExpr<Analyzer::ColumnVar>(deleted_cd->columnType,
3198  outer_input_desc.getTableId(),
3199  deleted_cd->columnId,
3200  outer_input_desc.getNestLevel());
3201  CodeGenerator code_generator(this);
3202  const auto is_deleted =
3203  code_generator.toBool(code_generator.codegen(deleted_expr.get(), true, co).front());
3204  const auto is_deleted_bb = llvm::BasicBlock::Create(
3205  cgen_state_->context_, "is_deleted", cgen_state_->row_func_);
3206  llvm::BasicBlock* bb = llvm::BasicBlock::Create(
3207  cgen_state_->context_, "is_not_deleted", cgen_state_->row_func_);
3208  cgen_state_->ir_builder_.CreateCondBr(is_deleted, is_deleted_bb, bb);
3209  cgen_state_->ir_builder_.SetInsertPoint(is_deleted_bb);
3210  cgen_state_->ir_builder_.CreateRet(cgen_state_->llInt<int32_t>(0));
3211  cgen_state_->ir_builder_.SetInsertPoint(bb);
3212  return bb;
3213 }
std::vector< InputDescriptor > input_descs
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1258
std::unique_ptr< PlanState > plan_state_
Definition: Execute.h:1288
#define AUTOMATIC_IR_METADATA(CGENSTATE)
#define CHECK(condition)
Definition: Logger.h:222
void Executor::codegenWindowAvgEpilogue ( llvm::Value *  crt_val,
llvm::Value *  window_func_null_val,
llvm::Value *  multiplicity_lv 
)
private

Definition at line 1299 of file WindowFunctionIR.cpp.

References AUTOMATIC_IR_METADATA, anonymous_namespace{WindowFunctionIR.cpp}::get_adjusted_window_type_info(), get_int_type(), WindowProjectNodeContext::getActiveWindowFunctionContext(), kDOUBLE, and kFLOAT.

1301  {
1303  const auto window_func_context =
1305  const auto window_func = window_func_context->getWindowFunction();
1306  const auto window_func_ti = get_adjusted_window_type_info(window_func);
1307  const auto pi32_type =
1308  llvm::PointerType::get(get_int_type(32, cgen_state_->context_), 0);
1309  const auto pi64_type =
1310  llvm::PointerType::get(get_int_type(64, cgen_state_->context_), 0);
1311  const auto aggregate_state_type =
1312  window_func_ti.get_type() == kFLOAT ? pi32_type : pi64_type;
1313  const auto aggregate_state_count_i64 = cgen_state_->llInt(
1314  reinterpret_cast<const int64_t>(window_func_context->aggregateStateCount()));
1315  auto aggregate_state_count = cgen_state_->ir_builder_.CreateIntToPtr(
1316  aggregate_state_count_i64, aggregate_state_type);
1317  std::string agg_count_func_name = "agg_count";
1318  switch (window_func_ti.get_type()) {
1319  case kFLOAT: {
1320  agg_count_func_name += "_float";
1321  break;
1322  }
1323  case kDOUBLE: {
1324  agg_count_func_name += "_double";
1325  break;
1326  }
1327  default: {
1328  break;
1329  }
1330  }
1331  agg_count_func_name += "_skip_val";
1332  cgen_state_->emitCall(agg_count_func_name,
1333  {aggregate_state_count, crt_val, window_func_null_val});
1334 }
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1258
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
static WindowFunctionContext * getActiveWindowFunctionContext(Executor *executor)
#define AUTOMATIC_IR_METADATA(CGENSTATE)
SQLTypeInfo get_adjusted_window_type_info(const Analyzer::WindowFunction *window_func)

+ Here is the call graph for this function:

std::pair< llvm::Value *, llvm::Value * > Executor::codegenWindowFrameBound ( WindowFunctionContext window_func_context,
const Analyzer::WindowFrame frame_start_bound,
const Analyzer::WindowFrame frame_end_bound,
llvm::Value *  current_row_pos_lv,
llvm::Value *  current_partition_start_offset_lv,
llvm::Value *  order_key_buf_ptr_lv,
llvm::Value *  order_key_col_null_val_lv,
llvm::Value *  frame_start_bound_expr_lv,
llvm::Value *  frame_end_bound_expr_lv,
llvm::Value *  num_elem_current_partition_lv,
llvm::Value *  target_partition_rowid_ptr_lv,
llvm::Value *  target_partition_sorted_rowid_ptr_lv,
llvm::Value *  null_start_pos_lv,
llvm::Value *  null_end_pos_lv,
CodeGenerator code_generator 
)
private

Definition at line 619 of file WindowFunctionIR.cpp.

References CHECK, CodeGenerator::codegenWindowPosition(), CURRENT_ROW, EXPR_FOLLOWING, EXPR_PRECEDING, anonymous_namespace{WindowFunctionIR.cpp}::get_col_type_name_by_size(), get_int_type(), Analyzer::Expr::get_type_info(), Analyzer::WindowFrame::getBoundExpr(), Analyzer::WindowFrame::getBoundType(), WindowFunctionContext::getOrderKeyColumnBuffers(), WindowFunctionContext::getOrderKeyColumnBufferTypes(), WindowFunctionContext::getWindowFunction(), SQLTypeInfo::is_date(), SQLTypeInfo::is_timestamp(), UNBOUNDED_FOLLOWING, and UNBOUNDED_PRECEDING.

634  {
635  const auto window_func = window_func_context->getWindowFunction();
636  CHECK(window_func);
637  std::string order_col_type_name{""};
638  llvm::Value* current_col_value_lv{nullptr};
639  llvm::Value* frame_start_bound_lv{nullptr};
640  llvm::Value* frame_end_bound_lv{nullptr};
641 
642  if (window_func->hasRangeModeFraming()) {
643  CHECK(window_func_context->getOrderKeyColumnBuffers().size() == 1);
644  CHECK(window_func->getOrderKeys().size() == 1UL);
645  CHECK(window_func_context->getOrderKeyColumnBuffers().size() == 1UL);
646  const auto order_key_ti = window_func->getOrderKeys().front()->get_type_info();
647  const auto order_key_size = order_key_ti.get_size();
648  size_t order_key_size_in_byte = order_key_size * 8;
649  order_col_type_name = get_col_type_name_by_size(
650  order_key_size,
651  window_func_context->getOrderKeyColumnBufferTypes().front().is_fp());
652 
653  // load column value of the current row (of ordering column)
654  auto rowid_in_partition_lv =
655  code_generator.codegenWindowPosition(window_func_context, current_row_pos_lv);
656  auto current_col_value_ptr_lv = cgen_state_->ir_builder_.CreateGEP(
657  get_int_type(order_key_size_in_byte, cgen_state_->context_),
658  order_key_buf_ptr_lv,
659  rowid_in_partition_lv);
660  current_col_value_lv = cgen_state_->ir_builder_.CreateLoad(
661  current_col_value_ptr_lv->getType()->getPointerElementType(),
662  current_col_value_ptr_lv,
663  "current_col_value");
664  }
665 
666  // compute frame start depending on the bound type
667  if (frame_start_bound->getBoundType() == SqlWindowFrameBoundType::UNBOUNDED_PRECEDING) {
668  // frame starts at the first row of the partition
669  frame_start_bound_lv = cgen_state_->llInt((int64_t)0);
670  } else if (frame_start_bound->getBoundType() ==
672  // frame starts at the position before X rows of the current row
673  CHECK(frame_start_bound_expr_lv);
674  if (window_func->hasRowModeFraming()) {
675  frame_start_bound_lv = cgen_state_->emitCall("compute_row_mode_start_index_sub",
676  {current_row_pos_lv,
677  current_partition_start_offset_lv,
678  frame_start_bound_expr_lv});
679  } else {
680  CHECK(window_func->hasRangeModeFraming());
681  if (frame_start_bound->getBoundExpr()->get_type_info().is_date() ||
682  frame_start_bound->getBoundExpr()->get_type_info().is_timestamp()) {
683  std::string lower_bound_func_name{"compute_"};
684  lower_bound_func_name.append(order_col_type_name);
685  lower_bound_func_name.append("_lower_bound_from_ordered_index_for_timeinterval");
686  frame_start_bound_lv = cgen_state_->emitCall(
687  lower_bound_func_name,
688  {num_elem_current_partition_lv,
689  frame_start_bound_expr_lv,
690  order_key_buf_ptr_lv,
691  target_partition_rowid_ptr_lv,
692  target_partition_sorted_rowid_ptr_lv,
693  cgen_state_->castToTypeIn(order_key_col_null_val_lv, 64),
694  null_start_pos_lv,
695  null_end_pos_lv});
696  } else {
697  std::string lower_bound_func_name{"range_mode_"};
698  lower_bound_func_name.append(order_col_type_name);
699  lower_bound_func_name.append("_sub_frame_lower_bound");
700  frame_start_bound_lv =
701  cgen_state_->emitCall(lower_bound_func_name,
702  {num_elem_current_partition_lv,
703  current_col_value_lv,
704  order_key_buf_ptr_lv,
705  target_partition_rowid_ptr_lv,
706  target_partition_sorted_rowid_ptr_lv,
707  frame_start_bound_expr_lv,
708  order_key_col_null_val_lv,
709  null_start_pos_lv,
710  null_end_pos_lv});
711  }
712  }
713  } else if (frame_start_bound->getBoundType() == SqlWindowFrameBoundType::CURRENT_ROW) {
714  // frame start at the current row
715  if (window_func->hasRowModeFraming()) {
716  frame_start_bound_lv = cgen_state_->emitCall("compute_row_mode_start_index_sub",
717  {current_row_pos_lv,
718  current_partition_start_offset_lv,
719  cgen_state_->llInt(((int64_t)0))});
720  } else {
721  CHECK(window_func->hasRangeModeFraming());
722  std::string lower_bound_func_name{"compute_"};
723  lower_bound_func_name.append(order_col_type_name);
724  lower_bound_func_name.append("_lower_bound_from_ordered_index");
725  frame_start_bound_lv = cgen_state_->emitCall(lower_bound_func_name,
726  {num_elem_current_partition_lv,
727  current_col_value_lv,
728  order_key_buf_ptr_lv,
729  target_partition_rowid_ptr_lv,
730  target_partition_sorted_rowid_ptr_lv,
731  order_key_col_null_val_lv,
732  null_start_pos_lv,
733  null_end_pos_lv});
734  }
735  } else if (frame_start_bound->getBoundType() ==
737  // frame start at the position after X rows of the current row
738  CHECK(frame_start_bound_expr_lv);
739  if (window_func->hasRowModeFraming()) {
740  frame_start_bound_lv = cgen_state_->emitCall("compute_row_mode_start_index_add",
741  {current_row_pos_lv,
742  current_partition_start_offset_lv,
743  frame_start_bound_expr_lv,
744  num_elem_current_partition_lv});
745  } else {
746  CHECK(window_func->hasRangeModeFraming());
747  if (frame_start_bound->getBoundExpr()->get_type_info().is_date() ||
748  frame_start_bound->getBoundExpr()->get_type_info().is_timestamp()) {
749  std::string lower_bound_func_name{"compute_"};
750  lower_bound_func_name.append(order_col_type_name);
751  lower_bound_func_name.append("_lower_bound_from_ordered_index_for_timeinterval");
752  frame_start_bound_lv = cgen_state_->emitCall(
753  lower_bound_func_name,
754  {num_elem_current_partition_lv,
755  frame_start_bound_expr_lv,
756  order_key_buf_ptr_lv,
757  target_partition_rowid_ptr_lv,
758  target_partition_sorted_rowid_ptr_lv,
759  cgen_state_->castToTypeIn(order_key_col_null_val_lv, 64),
760  null_start_pos_lv,
761  null_end_pos_lv});
762  } else {
763  std::string lower_bound_func_name{"range_mode_"};
764  lower_bound_func_name.append(order_col_type_name);
765  lower_bound_func_name.append("_add_frame_lower_bound");
766  frame_start_bound_lv =
767  cgen_state_->emitCall(lower_bound_func_name,
768  {num_elem_current_partition_lv,
769  current_col_value_lv,
770  order_key_buf_ptr_lv,
771  target_partition_rowid_ptr_lv,
772  target_partition_sorted_rowid_ptr_lv,
773  frame_start_bound_expr_lv,
774  order_key_col_null_val_lv,
775  null_start_pos_lv,
776  null_end_pos_lv});
777  }
778  }
779  } else {
780  CHECK(false) << "frame start cannot be UNBOUNDED FOLLOWING";
781  }
782 
783  // compute frame end
785  // frame ends at the first row of the partition
786  CHECK(false) << "frame end cannot be UNBOUNDED PRECEDING";
787  } else if (frame_end_bound->getBoundType() == SqlWindowFrameBoundType::EXPR_PRECEDING) {
788  // frame ends at the position X rows before the current row
789  CHECK(frame_end_bound_expr_lv);
790  if (window_func->hasRowModeFraming()) {
791  frame_end_bound_lv = cgen_state_->emitCall("compute_row_mode_end_index_sub",
792  {current_row_pos_lv,
793  current_partition_start_offset_lv,
794  frame_end_bound_expr_lv});
795  } else {
796  CHECK(window_func->hasRangeModeFraming());
797  if (frame_end_bound->getBoundExpr()->get_type_info().is_date() ||
798  frame_end_bound->getBoundExpr()->get_type_info().is_timestamp()) {
799  std::string upper_bound_func_name{"compute_"};
800  upper_bound_func_name.append(order_col_type_name);
801  upper_bound_func_name.append("_upper_bound_from_ordered_index_for_timeinterval");
802  frame_end_bound_lv = cgen_state_->emitCall(
803  upper_bound_func_name,
804  {num_elem_current_partition_lv,
805  frame_end_bound_expr_lv,
806  order_key_buf_ptr_lv,
807  target_partition_rowid_ptr_lv,
808  target_partition_sorted_rowid_ptr_lv,
809  cgen_state_->castToTypeIn(order_key_col_null_val_lv, 64),
810  null_start_pos_lv,
811  null_end_pos_lv});
812  } else {
813  std::string upper_bound_func_name{"range_mode_"};
814  upper_bound_func_name.append(order_col_type_name);
815  upper_bound_func_name.append("_sub_frame_upper_bound");
816  frame_end_bound_lv = cgen_state_->emitCall(upper_bound_func_name,
817  {num_elem_current_partition_lv,
818  current_col_value_lv,
819  order_key_buf_ptr_lv,
820  target_partition_rowid_ptr_lv,
821  target_partition_sorted_rowid_ptr_lv,
822  frame_end_bound_expr_lv,
823  order_key_col_null_val_lv,
824  null_start_pos_lv,
825  null_end_pos_lv});
826  }
827  }
828  } else if (frame_end_bound->getBoundType() == SqlWindowFrameBoundType::CURRENT_ROW) {
829  // frame ends at the current row
830  if (window_func->hasRowModeFraming()) {
831  frame_end_bound_lv = cgen_state_->emitCall("compute_row_mode_end_index_sub",
832  {current_row_pos_lv,
833  current_partition_start_offset_lv,
834  cgen_state_->llInt((int64_t)0)});
835  } else {
836  CHECK(window_func->hasRangeModeFraming());
837  std::string upper_bound_func_name{"compute_"};
838  upper_bound_func_name.append(order_col_type_name);
839  upper_bound_func_name.append("_upper_bound_from_ordered_index");
840  frame_end_bound_lv = cgen_state_->emitCall(upper_bound_func_name,
841  {num_elem_current_partition_lv,
842  current_col_value_lv,
843  order_key_buf_ptr_lv,
844  target_partition_rowid_ptr_lv,
845  target_partition_sorted_rowid_ptr_lv,
846  order_key_col_null_val_lv,
847  null_start_pos_lv,
848  null_end_pos_lv});
849  }
850  } else if (frame_end_bound->getBoundType() == SqlWindowFrameBoundType::EXPR_FOLLOWING) {
851  // frame ends at the position X rows after the current row
852  CHECK(frame_end_bound_expr_lv);
853  if (window_func->hasRowModeFraming()) {
854  frame_end_bound_lv = cgen_state_->emitCall("compute_row_mode_end_index_add",
855  {current_row_pos_lv,
856  current_partition_start_offset_lv,
857  frame_end_bound_expr_lv,
858  num_elem_current_partition_lv});
859  } else {
860  CHECK(window_func->hasRangeModeFraming());
861  if (frame_end_bound->getBoundExpr()->get_type_info().is_date() ||
862  frame_end_bound->getBoundExpr()->get_type_info().is_timestamp()) {
863  std::string upper_bound_func_name{"compute_"};
864  upper_bound_func_name.append(order_col_type_name);
865  upper_bound_func_name.append("_upper_bound_from_ordered_index_for_timeinterval");
866  frame_end_bound_lv = cgen_state_->emitCall(
867  upper_bound_func_name,
868  {num_elem_current_partition_lv,
869  frame_end_bound_expr_lv,
870  order_key_buf_ptr_lv,
871  target_partition_rowid_ptr_lv,
872  target_partition_sorted_rowid_ptr_lv,
873  cgen_state_->castToTypeIn(order_key_col_null_val_lv, 64),
874  null_start_pos_lv,
875  null_end_pos_lv});
876  } else {
877  std::string upper_bound_func_name{"range_mode_"};
878  upper_bound_func_name.append(order_col_type_name);
879  upper_bound_func_name.append("_add_frame_upper_bound");
880  frame_end_bound_lv = cgen_state_->emitCall(upper_bound_func_name,
881  {num_elem_current_partition_lv,
882  current_col_value_lv,
883  order_key_buf_ptr_lv,
884  target_partition_rowid_ptr_lv,
885  target_partition_sorted_rowid_ptr_lv,
886  frame_end_bound_expr_lv,
887  order_key_col_null_val_lv,
888  null_start_pos_lv,
889  null_end_pos_lv});
890  }
891  }
892  } else {
893  // frame ends at the last row of the partition
894  CHECK(frame_end_bound->getBoundType() ==
896  frame_end_bound_lv = num_elem_current_partition_lv;
897  }
898  CHECK(frame_start_bound_lv);
899  CHECK(frame_end_bound_lv);
900  return std::make_pair(frame_start_bound_lv, frame_end_bound_lv);
901 }
std::string get_col_type_name_by_size(const size_t size, const bool is_fp)
bool is_timestamp() const
Definition: sqltypes.h:1020
const std::vector< SQLTypeInfo > & getOrderKeyColumnBufferTypes() const
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1258
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
SqlWindowFrameBoundType getBoundType() const
Definition: Analyzer.h:2338
llvm::Value * codegenWindowPosition(const WindowFunctionContext *window_func_context, llvm::Value *pos_arg)
Definition: ColumnIR.cpp:230
const Analyzer::Expr * getBoundExpr() const
Definition: Analyzer.h:2340
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:82
const std::vector< const int8_t * > & getOrderKeyColumnBuffers() const
#define CHECK(condition)
Definition: Logger.h:222
const Analyzer::WindowFunction * getWindowFunction() const
bool is_date() const
Definition: sqltypes.h:1008

+ Here is the call graph for this function:

llvm::Value * Executor::codegenWindowFunction ( const size_t  target_index,
const CompilationOptions co 
)
private

Definition at line 21 of file WindowFunctionIR.cpp.

References WindowProjectNodeContext::activateWindowFunctionContext(), run_benchmark_import::args, AUTOMATIC_IR_METADATA, AVG, CHECK, CHECK_EQ, COUNT, CUME_DIST, DENSE_RANK, logger::FATAL, FIRST_VALUE, WindowProjectNodeContext::get(), WindowFunctionContext::getWindowFunction(), LAG, LAG_IN_FRAME, LAST_VALUE, LEAD, LEAD_IN_FRAME, LOG, MAX, MIN, NTILE, PERCENT_RANK, RANK, ROW_NUMBER, and SUM.

22  {
24  CodeGenerator code_generator(this);
25 
26  const auto window_func_context =
28  target_index);
29  const auto window_func = window_func_context->getWindowFunction();
30  switch (window_func->getKind()) {
35  // they are always evaluated on the entire partition
36  return code_generator.codegenWindowPosition(window_func_context,
37  code_generator.posArg(nullptr));
38  }
41  // they are always evaluated on the entire partition
42  return cgen_state_->emitCall("percent_window_func",
43  {cgen_state_->llInt(reinterpret_cast<const int64_t>(
44  window_func_context->output())),
45  code_generator.posArg(nullptr)});
46  }
51  // they are always evaluated on the current frame
53  const auto& args = window_func->getArgs();
54  CHECK(!args.empty());
55  const auto arg_lvs = code_generator.codegen(args.front().get(), true, co);
56  CHECK_EQ(arg_lvs.size(), size_t(1));
57  return arg_lvs.front();
58  }
64  // they are always evaluated on the current frame
66  }
70  }
71  default: {
72  LOG(FATAL) << "Invalid window function kind";
73  }
74  }
75  return nullptr;
76 }
#define CHECK_EQ(x, y)
Definition: Logger.h:230
#define LOG(tag)
Definition: Logger.h:216
llvm::Value * codegenWindowFunctionOnFrame(const CompilationOptions &co)
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1258
static const WindowProjectNodeContext * get(Executor *executor)
const WindowFunctionContext * activateWindowFunctionContext(Executor *executor, const size_t target_index) const
#define AUTOMATIC_IR_METADATA(CGENSTATE)
llvm::Value * codegenWindowFunctionAggregate(const CompilationOptions &co)
#define CHECK(condition)
Definition: Logger.h:222
const Analyzer::WindowFunction * getWindowFunction() const

+ Here is the call graph for this function:

llvm::Value * Executor::codegenWindowFunctionAggregate ( const CompilationOptions co)
private

Definition at line 227 of file WindowFunctionIR.cpp.

References AUTOMATIC_IR_METADATA, AVG, CHECK, WindowProjectNodeContext::get(), get_int_type(), and WindowProjectNodeContext::getActiveWindowFunctionContext().

227  {
229  const auto reset_state_false_bb = codegenWindowResetStateControlFlow();
230  auto aggregate_state = aggregateWindowStatePtr();
231  llvm::Value* aggregate_state_count = nullptr;
232  const auto window_func_context =
234  const auto window_func = window_func_context->getWindowFunction();
235  if (window_func->getKind() == SqlWindowFunctionKind::AVG) {
236  const auto aggregate_state_count_i64 = cgen_state_->llInt(
237  reinterpret_cast<const int64_t>(window_func_context->aggregateStateCount()));
238  const auto pi64_type =
239  llvm::PointerType::get(get_int_type(64, cgen_state_->context_), 0);
240  aggregate_state_count =
241  cgen_state_->ir_builder_.CreateIntToPtr(aggregate_state_count_i64, pi64_type);
242  }
243  codegenWindowFunctionStateInit(aggregate_state);
244  if (window_func->getKind() == SqlWindowFunctionKind::AVG) {
245  const auto count_zero = cgen_state_->llInt(int64_t(0));
246  cgen_state_->emitCall("agg_id", {aggregate_state_count, count_zero});
247  }
248  cgen_state_->ir_builder_.CreateBr(reset_state_false_bb);
249  cgen_state_->ir_builder_.SetInsertPoint(reset_state_false_bb);
251  return codegenWindowFunctionAggregateCalls(aggregate_state, co);
252 }
llvm::Value * aggregateWindowStatePtr()
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1258
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
static WindowFunctionContext * getActiveWindowFunctionContext(Executor *executor)
static const WindowProjectNodeContext * get(Executor *executor)
#define AUTOMATIC_IR_METADATA(CGENSTATE)
void codegenWindowFunctionStateInit(llvm::Value *aggregate_state)
#define CHECK(condition)
Definition: Logger.h:222
llvm::Value * codegenWindowFunctionAggregateCalls(llvm::Value *aggregate_state, const CompilationOptions &co)
llvm::BasicBlock * codegenWindowResetStateControlFlow()

+ Here is the call graph for this function:

llvm::Value * Executor::codegenWindowFunctionAggregateCalls ( llvm::Value *  aggregate_state,
const CompilationOptions co 
)
private

Definition at line 903 of file WindowFunctionIR.cpp.

References run_benchmark_import::args, AUTOMATIC_IR_METADATA, AVG, CHECK, CHECK_EQ, CodeGenerator::codegen(), CodeGenerator::codegenCastBetweenIntTypes(), COUNT, anonymous_namespace{WindowFunctionIR.cpp}::get_adjusted_window_type_info(), anonymous_namespace{WindowFunctionIR.cpp}::get_col_type_name_by_size(), get_int_type(), anonymous_namespace{WindowFunctionIR.cpp}::get_window_agg_name(), WindowProjectNodeContext::getActiveWindowFunctionContext(), kBIGINT, kDATE, kDECIMAL, kDOUBLE, kFLOAT, kINT, kNUMERIC, kSMALLINT, kTIME, kTIMESTAMP, kTINYINT, MAX, MIN, CodeGenerator::posArg(), and SUM.

904  {
906  const auto window_func_context =
908  const auto window_func = window_func_context->getWindowFunction();
909  const auto window_func_ti = get_adjusted_window_type_info(window_func);
910  const auto window_func_null_val =
911  window_func_ti.is_fp()
912  ? cgen_state_->inlineFpNull(window_func_ti)
913  : cgen_state_->castToTypeIn(cgen_state_->inlineIntNull(window_func_ti), 64);
914  const auto& args = window_func->getArgs();
915  llvm::Value* crt_val;
916  CodeGenerator code_generator(this);
917  if (args.empty()) {
918  CHECK(window_func->getKind() == SqlWindowFunctionKind::COUNT);
919  crt_val = cgen_state_->llInt(int64_t(1));
920  } else {
921  const auto arg_lvs = code_generator.codegen(args.front().get(), true, co);
922  CHECK_EQ(arg_lvs.size(), size_t(1));
923  if (window_func->getKind() == SqlWindowFunctionKind::SUM && !window_func_ti.is_fp()) {
924  crt_val = code_generator.codegenCastBetweenIntTypes(
925  arg_lvs.front(), args.front()->get_type_info(), window_func_ti, false);
926  } else {
927  crt_val = window_func_ti.get_type() == kFLOAT
928  ? arg_lvs.front()
929  : cgen_state_->castToTypeIn(arg_lvs.front(), 64);
930  }
931  }
932  if (window_func_context->needsToBuildAggregateTree()) {
933  // compute an aggregated value for each row of the window frame by using segment tree
934  // when constructing a window context, we build a necessary segment tree for it
935  // and use the tree array (so called `aggregate tree`) to query the aggregated value
936  // of the specific window frame
937  // we fall back to the non-framing window func evaluation logic if an input
938  // of the window function can be an empty one
939  const auto pi64_type =
940  llvm::PointerType::get(get_int_type(64, cgen_state_->context_), 0);
941  const auto ppi64_type = llvm::PointerType::get(pi64_type, 0);
942  const auto pi32_type =
943  llvm::PointerType::get(get_int_type(32, cgen_state_->context_), 0);
944 
945  // codegen frame bound expr if necessary
946  const auto frame_start_bound = window_func->getFrameStartBound();
947  const auto frame_end_bound = window_func->getFrameEndBound();
948  auto frame_start_bound_expr_lv =
949  codegenFrameBoundExpr(window_func, frame_start_bound, code_generator, co);
950  auto frame_end_bound_expr_lv =
951  codegenFrameBoundExpr(window_func, frame_end_bound, code_generator, co);
952  CHECK(frame_start_bound_expr_lv);
953  CHECK(frame_end_bound_expr_lv);
954 
955  // compute aggregated value over the computed frame range
956  auto current_row_pos_lv = code_generator.posArg(nullptr);
957  auto partition_index_lv =
958  codegenCurrentPartitionIndex(window_func_context, current_row_pos_lv);
959 
960  // ordering column buffer
961  const auto target_col_ti = window_func->getArgs().front()->get_type_info();
962  const auto target_col_size = target_col_ti.get_size();
963  const auto col_type_name =
964  get_col_type_name_by_size(target_col_size, target_col_ti.is_fp());
965 
966  // partial sum of # elems of partitions
967  auto partition_start_offset_buf_lv = cgen_state_->llInt(
968  reinterpret_cast<int64_t>(window_func_context->partitionStartOffset()));
969  auto partition_start_offset_ptr_lv =
970  cgen_state_->ir_builder_.CreateIntToPtr(partition_start_offset_buf_lv, pi64_type);
971 
972  // get start offset of the current partition
973  auto current_partition_start_offset_ptr_lv =
974  cgen_state_->ir_builder_.CreateGEP(get_int_type(64, cgen_state_->context_),
975  partition_start_offset_ptr_lv,
976  partition_index_lv);
977  auto current_partition_start_offset_lv = cgen_state_->ir_builder_.CreateLoad(
978  current_partition_start_offset_ptr_lv->getType()->getPointerElementType(),
979  current_partition_start_offset_ptr_lv);
980 
981  // row_id buf of the current partition
982  const auto partition_rowid_buf_lv =
983  cgen_state_->llInt(reinterpret_cast<int64_t>(window_func_context->payload()));
984  const auto partition_rowid_ptr_lv =
985  cgen_state_->ir_builder_.CreateIntToPtr(partition_rowid_buf_lv, pi32_type);
986  auto target_partition_rowid_ptr_lv =
987  cgen_state_->ir_builder_.CreateGEP(get_int_type(32, cgen_state_->context_),
988  partition_rowid_ptr_lv,
989  current_partition_start_offset_lv);
990 
991  // row_id buf of ordered current partition
992  const auto sorted_rowid_lv = cgen_state_->llInt(
993  reinterpret_cast<int64_t>(window_func_context->sortedPartition()));
994  const auto sorted_rowid_ptr_lv =
995  cgen_state_->ir_builder_.CreateIntToPtr(sorted_rowid_lv, pi64_type);
996  auto target_partition_sorted_rowid_ptr_lv =
997  cgen_state_->ir_builder_.CreateGEP(get_int_type(64, cgen_state_->context_),
998  sorted_rowid_ptr_lv,
999  current_partition_start_offset_lv);
1000 
1001  // # elems per partition
1002  const auto partition_count_buf =
1003  cgen_state_->llInt(reinterpret_cast<int64_t>(window_func_context->counts()));
1004  auto partition_count_buf_ptr_lv =
1005  cgen_state_->ir_builder_.CreateIntToPtr(partition_count_buf, pi32_type);
1006 
1007  // # elems of the given partition
1008  const auto num_elem_current_partition_ptr =
1009  cgen_state_->ir_builder_.CreateGEP(get_int_type(32, cgen_state_->context_),
1010  partition_count_buf_ptr_lv,
1011  partition_index_lv);
1012  auto num_elem_current_partition_lv = cgen_state_->castToTypeIn(
1013  cgen_state_->ir_builder_.CreateLoad(
1014  num_elem_current_partition_ptr->getType()->getPointerElementType(),
1015  num_elem_current_partition_ptr),
1016  64);
1017 
1018  const auto order_key_ti = window_func->getOrderKeys().front()->get_type_info();
1019  const auto order_key_size = order_key_ti.get_size();
1020  const auto order_col_type_name = get_col_type_name_by_size(
1021  order_key_size,
1022  window_func_context->getOrderKeyColumnBufferTypes().front().is_fp());
1023  size_t order_key_size_in_byte = order_key_size * 8;
1024 
1025  const auto order_key_buf_type = llvm::PointerType::get(
1026  get_int_type(order_key_size_in_byte, cgen_state_->context_), 0);
1027  const auto order_key_buf = cgen_state_->llInt(reinterpret_cast<int64_t>(
1028  window_func_context->getOrderKeyColumnBuffers().front()));
1029  auto order_key_buf_ptr_lv =
1030  cgen_state_->ir_builder_.CreateIntToPtr(order_key_buf, order_key_buf_type);
1031 
1032  // null value of the ordering column
1033  const auto order_key_buf_ti =
1034  window_func_context->getOrderKeyColumnBufferTypes().front();
1035  llvm::Value* order_key_col_null_val_lv{nullptr};
1036  switch (order_key_buf_ti.get_type()) {
1037  case kDATE:
1038  case kTIME:
1039  case kTIMESTAMP: {
1040  switch (order_key_buf_ti.get_size()) {
1041  case 1: {
1042  order_key_col_null_val_lv =
1044  break;
1045  }
1046  case 2: {
1047  order_key_col_null_val_lv =
1049  break;
1050  }
1051  case 4: {
1052  order_key_col_null_val_lv =
1053  cgen_state_->inlineNull(SQLTypeInfo(SQLTypes::kINT));
1054  break;
1055  }
1056  case 8: {
1057  order_key_col_null_val_lv =
1059  break;
1060  }
1061  default:
1062  break;
1063  }
1064  break;
1065  }
1066  default: {
1067  order_key_col_null_val_lv = cgen_state_->inlineNull(order_key_buf_ti);
1068  break;
1069  }
1070  }
1071 
1072  // null range of the aggregate tree
1073  const auto null_start_pos_buf = cgen_state_->llInt(
1074  reinterpret_cast<int64_t>(window_func_context->getNullValueStartPos()));
1075  const auto null_start_pos_buf_ptr =
1076  cgen_state_->ir_builder_.CreateIntToPtr(null_start_pos_buf, pi64_type);
1077  const auto null_start_pos_ptr =
1078  cgen_state_->ir_builder_.CreateGEP(get_int_type(64, cgen_state_->context_),
1079  null_start_pos_buf_ptr,
1080  partition_index_lv);
1081  auto null_start_pos_lv = cgen_state_->ir_builder_.CreateLoad(
1082  null_start_pos_ptr->getType()->getPointerElementType(),
1083  null_start_pos_ptr,
1084  "null_start_pos");
1085  const auto null_end_pos_buf = cgen_state_->llInt(
1086  reinterpret_cast<int64_t>(window_func_context->getNullValueEndPos()));
1087  const auto null_end_pos_buf_ptr =
1088  cgen_state_->ir_builder_.CreateIntToPtr(null_end_pos_buf, pi64_type);
1089  const auto null_end_pos_ptr =
1090  cgen_state_->ir_builder_.CreateGEP(get_int_type(64, cgen_state_->context_),
1091  null_end_pos_buf_ptr,
1092  partition_index_lv);
1093  auto null_end_pos_lv = cgen_state_->ir_builder_.CreateLoad(
1094  null_end_pos_ptr->getType()->getPointerElementType(),
1095  null_end_pos_ptr,
1096  "null_end_pos");
1097 
1098  llvm::Value* frame_start_bound_lv{nullptr};
1099  llvm::Value* frame_end_bound_lv{nullptr};
1100  std::tie(frame_start_bound_lv, frame_end_bound_lv) =
1101  codegenWindowFrameBound(window_func_context,
1102  frame_start_bound,
1103  frame_end_bound,
1104  current_row_pos_lv,
1105  current_partition_start_offset_lv,
1106  order_key_buf_ptr_lv,
1107  order_key_col_null_val_lv,
1108  frame_start_bound_expr_lv,
1109  frame_end_bound_expr_lv,
1110  num_elem_current_partition_lv,
1111  target_partition_rowid_ptr_lv,
1112  target_partition_sorted_rowid_ptr_lv,
1113  null_start_pos_lv,
1114  null_end_pos_lv,
1115  code_generator);
1116  CHECK(frame_start_bound_lv);
1117  CHECK(frame_end_bound_lv);
1118 
1119  // codegen to send a query with frame bound to aggregate tree searcher
1120  llvm::Value* aggregation_trees_lv{nullptr};
1121  llvm::Value* invalid_val_lv{nullptr};
1122  llvm::Value* null_val_lv{nullptr};
1123  std::string aggregation_tree_search_func_name{"search_"};
1124  std::string aggregation_tree_getter_func_name{"get_"};
1125 
1126  // prepare null values and aggregate_tree getter and searcher depending on
1127  // a type of the ordering column
1128  auto agg_expr_ti = args.front()->get_type_info();
1129  switch (agg_expr_ti.get_type()) {
1130  case SQLTypes::kTINYINT:
1131  case SQLTypes::kSMALLINT:
1132  case SQLTypes::kINT:
1133  case SQLTypes::kBIGINT:
1134  case SQLTypes::kNUMERIC:
1135  case SQLTypes::kDECIMAL: {
1136  if (window_func->getKind() == SqlWindowFunctionKind::MIN) {
1137  invalid_val_lv = cgen_state_->llInt(std::numeric_limits<int64_t>::max());
1138  } else if (window_func->getKind() == SqlWindowFunctionKind::MAX) {
1139  invalid_val_lv = cgen_state_->llInt(std::numeric_limits<int64_t>::lowest());
1140  } else {
1141  invalid_val_lv = cgen_state_->llInt((int64_t)0);
1142  }
1143  null_val_lv = cgen_state_->llInt(inline_int_null_value<int64_t>());
1144  aggregation_tree_search_func_name += "int64_t";
1145  aggregation_tree_getter_func_name += "integer";
1146  break;
1147  }
1148  case SQLTypes::kFLOAT:
1149  case SQLTypes::kDOUBLE: {
1150  if (window_func->getKind() == SqlWindowFunctionKind::MIN) {
1151  invalid_val_lv = cgen_state_->llFp(std::numeric_limits<double>::max());
1152  } else if (window_func->getKind() == SqlWindowFunctionKind::MAX) {
1153  invalid_val_lv = cgen_state_->llFp(std::numeric_limits<double>::lowest());
1154  } else {
1155  invalid_val_lv = cgen_state_->llFp((double)0);
1156  }
1157  null_val_lv = cgen_state_->inlineFpNull(SQLTypeInfo(kDOUBLE));
1158  aggregation_tree_search_func_name += "double";
1159  aggregation_tree_getter_func_name += "double";
1160  break;
1161  }
1162  default: {
1163  CHECK(false);
1164  break;
1165  }
1166  }
1167 
1168  // derived aggregation has a different code path
1169  if (window_func->getKind() == SqlWindowFunctionKind::AVG) {
1170  aggregation_tree_search_func_name += "_derived";
1171  aggregation_tree_getter_func_name += "_derived";
1172  }
1173 
1174  // get a buffer holding aggregate trees for each partition
1175  if (agg_expr_ti.is_integer() || agg_expr_ti.is_decimal()) {
1176  if (window_func->getKind() == SqlWindowFunctionKind::AVG) {
1177  aggregation_trees_lv = cgen_state_->llInt(reinterpret_cast<int64_t>(
1178  window_func_context->getDerivedAggregationTreesForIntegerTypeWindowExpr()));
1179  } else {
1180  aggregation_trees_lv = cgen_state_->llInt(reinterpret_cast<int64_t>(
1181  window_func_context->getAggregationTreesForIntegerTypeWindowExpr()));
1182  }
1183  } else if (agg_expr_ti.is_fp()) {
1184  if (window_func->getKind() == SqlWindowFunctionKind::AVG) {
1185  aggregation_trees_lv = cgen_state_->llInt(reinterpret_cast<int64_t>(
1186  window_func_context->getDerivedAggregationTreesForDoubleTypeWindowExpr()));
1187  } else {
1188  aggregation_trees_lv = cgen_state_->llInt(reinterpret_cast<int64_t>(
1189  window_func_context->getAggregationTreesForDoubleTypeWindowExpr()));
1190  }
1191  }
1192 
1193  CHECK(aggregation_trees_lv);
1194  CHECK(invalid_val_lv);
1195  aggregation_tree_search_func_name += "_aggregation_tree";
1196  aggregation_tree_getter_func_name += "_aggregation_tree";
1197 
1198  // get the aggregate tree of the current partition from a window context
1199  auto aggregation_trees_ptr =
1200  cgen_state_->ir_builder_.CreateIntToPtr(aggregation_trees_lv, ppi64_type);
1201  auto target_aggregation_tree_lv = cgen_state_->emitCall(
1202  aggregation_tree_getter_func_name, {aggregation_trees_ptr, partition_index_lv});
1203 
1204  // a depth of segment tree
1205  const auto tree_depth_buf = cgen_state_->llInt(
1206  reinterpret_cast<int64_t>(window_func_context->getAggregateTreeDepth()));
1207  const auto tree_depth_buf_ptr =
1208  cgen_state_->ir_builder_.CreateIntToPtr(tree_depth_buf, pi64_type);
1209  const auto current_partition_tree_depth_buf_ptr = cgen_state_->ir_builder_.CreateGEP(
1210  get_int_type(64, cgen_state_->context_), tree_depth_buf_ptr, partition_index_lv);
1211  const auto current_partition_tree_depth_lv = cgen_state_->ir_builder_.CreateLoad(
1212  current_partition_tree_depth_buf_ptr->getType()->getPointerElementType(),
1213  current_partition_tree_depth_buf_ptr);
1214 
1215  // a fanout of the current partition's segment tree
1216  const auto aggregation_tree_fanout_lv = cgen_state_->llInt(
1217  static_cast<int64_t>(window_func_context->getAggregateTreeFanout()));
1218 
1219  // agg_type
1220  const auto agg_type_lv =
1221  cgen_state_->llInt(static_cast<int32_t>(window_func->getKind()));
1222 
1223  // send a query to the aggregate tree with the frame range:
1224  // `frame_start_bound_lv` ~ `frame_end_bound_lv`
1225  auto res_lv =
1226  cgen_state_->emitCall(aggregation_tree_search_func_name,
1227  {target_aggregation_tree_lv,
1228  frame_start_bound_lv,
1229  frame_end_bound_lv,
1230  current_partition_tree_depth_lv,
1231  aggregation_tree_fanout_lv,
1232  cgen_state_->llBool(agg_expr_ti.is_decimal()),
1233  cgen_state_->llInt((int64_t)agg_expr_ti.get_scale()),
1234  invalid_val_lv,
1235  null_val_lv,
1236  agg_type_lv});
1237 
1238  // handling returned null value if exists
1239  std::string null_handler_func_name{"handle_null_val_"};
1240  std::vector<llvm::Value*> null_handler_args{res_lv, null_val_lv};
1241 
1242  // determine null_handling function's name
1243  if (window_func->getKind() == SqlWindowFunctionKind::AVG) {
1244  // average aggregate function returns a value as a double
1245  // (and our search* function also returns a double)
1246  if (agg_expr_ti.is_fp()) {
1247  // fp type: double null value
1248  null_handler_func_name += "double_double";
1249  } else {
1250  // non-fp type: int64_t null type
1251  null_handler_func_name += "double_int64_t";
1252  }
1253  } else if (agg_expr_ti.is_fp()) {
1254  // fp type: double null value
1255  null_handler_func_name += "double_double";
1256  } else {
1257  // non-fp type: int64_t null type
1258  null_handler_func_name += "int64_t_int64_t";
1259  }
1260  null_handler_func_name += "_window_framing_agg";
1261 
1262  // prepare null_val
1263  if (window_func->getKind() == SqlWindowFunctionKind::COUNT) {
1264  if (agg_expr_ti.is_fp()) {
1265  null_handler_args.push_back(cgen_state_->llFp((double)0));
1266  } else {
1267  null_handler_args.push_back(cgen_state_->llInt((int64_t)0));
1268  }
1269  } else if (window_func->getKind() == SqlWindowFunctionKind::AVG) {
1270  null_handler_args.push_back(cgen_state_->inlineFpNull(SQLTypeInfo(kDOUBLE)));
1271  } else {
1272  null_handler_args.push_back(cgen_state_->castToTypeIn(window_func_null_val, 64));
1273  }
1274  res_lv = cgen_state_->emitCall(null_handler_func_name, null_handler_args);
1275 
1276  // when AGG_TYPE is double, we get a double type return value we expect an integer
1277  // type value for the count aggregation
1278  if (window_func->getKind() == SqlWindowFunctionKind::COUNT && agg_expr_ti.is_fp()) {
1279  return cgen_state_->ir_builder_.CreateFPToSI(
1280  res_lv, get_int_type(64, cgen_state_->context_));
1281  }
1282  return res_lv;
1283  } else {
1284  llvm::Value* multiplicity_lv = nullptr;
1285  const auto agg_name = get_window_agg_name(window_func->getKind(), window_func_ti);
1286  if (args.empty()) {
1287  cgen_state_->emitCall(agg_name, {aggregate_state, crt_val});
1288  } else {
1289  cgen_state_->emitCall(agg_name + "_skip_val",
1290  {aggregate_state, crt_val, window_func_null_val});
1291  }
1292  if (window_func->getKind() == SqlWindowFunctionKind::AVG) {
1293  codegenWindowAvgEpilogue(crt_val, window_func_null_val, multiplicity_lv);
1294  }
1295  return codegenAggregateWindowState();
1296  }
1297 }
std::string get_col_type_name_by_size(const size_t size, const bool is_fp)
#define CHECK_EQ(x, y)
Definition: Logger.h:230
llvm::Value * codegenCurrentPartitionIndex(const WindowFunctionContext *window_func_context, llvm::Value *current_row_pos_lv)
Definition: sqltypes.h:63
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1258
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
static WindowFunctionContext * getActiveWindowFunctionContext(Executor *executor)
std::string get_window_agg_name(const SqlWindowFunctionKind kind, const SQLTypeInfo &window_func_ti)
void codegenWindowAvgEpilogue(llvm::Value *crt_val, llvm::Value *window_func_null_val, llvm::Value *multiplicity_lv)
#define AUTOMATIC_IR_METADATA(CGENSTATE)
Definition: sqltypes.h:67
std::pair< llvm::Value *, llvm::Value * > codegenWindowFrameBound(WindowFunctionContext *window_func_context, const Analyzer::WindowFrame *frame_start_bound, const Analyzer::WindowFrame *frame_end_bound, llvm::Value *current_row_pos_lv, llvm::Value *current_partition_start_offset_lv, llvm::Value *order_key_buf_ptr_lv, llvm::Value *order_key_col_null_val_lv, llvm::Value *frame_start_bound_expr_lv, llvm::Value *frame_end_bound_expr_lv, llvm::Value *num_elem_current_partition_lv, llvm::Value *target_partition_rowid_ptr_lv, llvm::Value *target_partition_sorted_rowid_ptr_lv, llvm::Value *null_start_pos_lv, llvm::Value *null_end_pos_lv, CodeGenerator &code_generator)
llvm::Value * codegenAggregateWindowState()
llvm::Value * codegenFrameBoundExpr(const Analyzer::WindowFunction *window_func, const Analyzer::WindowFrame *frame_bound, CodeGenerator &code_generator, const CompilationOptions &co)
#define CHECK(condition)
Definition: Logger.h:222
Definition: sqltypes.h:59
SQLTypeInfo get_adjusted_window_type_info(const Analyzer::WindowFunction *window_func)

+ Here is the call graph for this function:

llvm::Value * Executor::codegenWindowFunctionOnFrame ( const CompilationOptions co)
private

Definition at line 333 of file WindowFunctionIR.cpp.

References run_benchmark_import::args, AUTOMATIC_IR_METADATA, CHECK, anonymous_namespace{WindowFunctionIR.cpp}::get_col_type_name_by_size(), get_fp_type(), get_int_type(), anonymous_namespace{WindowFunctionIR.cpp}::get_null_value_by_size(), anonymous_namespace{WindowFunctionIR.cpp}::get_null_value_by_size_with_encoding(), WindowProjectNodeContext::getActiveWindowFunctionContext(), kENCODING_DATE_IN_DAYS, kSecsPerDay, LAG_IN_FRAME, and LEAD_IN_FRAME.

333  {
335  const auto window_func_context =
337  const auto window_func = window_func_context->getWindowFunction();
338  const auto window_func_kind = window_func->getKind();
339  const auto& args = window_func->getArgs();
340  CHECK(args.size() >= 1 && args.size() <= 3);
341  CodeGenerator code_generator(this);
342  const auto offset_lv =
343  cgen_state_->castToTypeIn(code_generator.codegen(args[1].get(), true, co)[0], 64);
344 
345  // codegen frame bound expr if necessary
346  const auto frame_start_bound = window_func->getFrameStartBound();
347  const auto frame_end_bound = window_func->getFrameEndBound();
348  auto frame_start_bound_expr_lv =
349  codegenFrameBoundExpr(window_func, frame_start_bound, code_generator, co);
350  auto frame_end_bound_expr_lv =
351  codegenFrameBoundExpr(window_func, frame_end_bound, code_generator, co);
352  CHECK(frame_start_bound_expr_lv);
353  CHECK(frame_end_bound_expr_lv);
354 
355  auto current_row_pos_lv = code_generator.posArg(nullptr);
356  auto partition_index_lv =
357  codegenCurrentPartitionIndex(window_func_context, current_row_pos_lv);
358 
359  llvm::Value* res_lv{nullptr};
360  // currently, we only support below two window functions on frame
361  // todo (yonnmin): remove this when supporting more window functions on frame
362  CHECK(window_func_kind == SqlWindowFunctionKind::LEAD_IN_FRAME ||
363  window_func_kind == SqlWindowFunctionKind::LAG_IN_FRAME);
364  const auto pi32_type =
365  llvm::PointerType::get(get_int_type(32, cgen_state_->context_), 0);
366  const auto pi64_type =
367  llvm::PointerType::get(get_int_type(64, cgen_state_->context_), 0);
368  bool is_lag_in_frame = window_func_kind == SqlWindowFunctionKind::LAG_IN_FRAME;
369 
370  // ordering column buffer
371  const auto target_col_ti = window_func->getArgs().front()->get_type_info();
372  const auto target_col_size = target_col_ti.get_size();
373  const auto target_col_type_name =
374  get_col_type_name_by_size(target_col_size, target_col_ti.is_fp());
375  const auto target_col_logical_type_name = get_col_type_name_by_size(
376  window_func->get_type_info().get_size(), window_func->get_type_info().is_fp());
377 
378  // when target_column is fixed encoded, we store the actual column value by
379  // considering it, but our resultset analyzer only considers the type without encoding
380  // scheme so we handle them separately
381  auto logical_null_val_lv =
382  get_null_value_by_size(cgen_state_.get(), window_func->get_type_info());
383  auto target_col_null_val_lv =
385  size_t target_col_size_in_byte = target_col_size * 8;
386  llvm::Type* col_buf_ptr_type =
387  target_col_ti.is_fp()
388  ? get_fp_type(target_col_size_in_byte, cgen_state_->context_)
389  : get_int_type(target_col_size_in_byte, cgen_state_->context_);
390  auto col_buf_type = llvm::PointerType::get(col_buf_ptr_type, 0);
391  auto target_col_buf_ptr_lv = cgen_state_->llInt(reinterpret_cast<int64_t>(
392  window_func_context->getColumnBufferForWindowFunctionExpressions().front()));
393  auto target_col_buf_lv =
394  cgen_state_->ir_builder_.CreateIntToPtr(target_col_buf_ptr_lv, col_buf_type);
395 
396  // partial sum of # elems of partitions
397  auto partition_start_offset_buf_lv = cgen_state_->llInt(
398  reinterpret_cast<int64_t>(window_func_context->partitionStartOffset()));
399  auto partition_start_offset_ptr_lv =
400  cgen_state_->ir_builder_.CreateIntToPtr(partition_start_offset_buf_lv, pi64_type);
401 
402  // get start offset of the current partition
403  auto current_partition_start_offset_ptr_lv =
404  cgen_state_->ir_builder_.CreateGEP(get_int_type(64, cgen_state_->context_),
405  partition_start_offset_ptr_lv,
406  partition_index_lv);
407  auto current_partition_start_offset_lv = cgen_state_->ir_builder_.CreateLoad(
408  current_partition_start_offset_ptr_lv->getType()->getPointerElementType(),
409  current_partition_start_offset_ptr_lv);
410 
411  // row_id buf of the current partition
412  const auto partition_rowid_buf_lv =
413  cgen_state_->llInt(reinterpret_cast<int64_t>(window_func_context->payload()));
414  const auto partition_rowid_ptr_lv =
415  cgen_state_->ir_builder_.CreateIntToPtr(partition_rowid_buf_lv, pi32_type);
416  auto target_partition_rowid_ptr_lv =
417  cgen_state_->ir_builder_.CreateGEP(get_int_type(32, cgen_state_->context_),
418  partition_rowid_ptr_lv,
419  current_partition_start_offset_lv);
420 
421  // row_id buf of ordered current partition
422  const auto sorted_rowid_lv = cgen_state_->llInt(
423  reinterpret_cast<int64_t>(window_func_context->sortedPartition()));
424  const auto sorted_rowid_ptr_lv =
425  cgen_state_->ir_builder_.CreateIntToPtr(sorted_rowid_lv, pi64_type);
426  auto target_partition_sorted_rowid_ptr_lv =
427  cgen_state_->ir_builder_.CreateGEP(get_int_type(64, cgen_state_->context_),
428  sorted_rowid_ptr_lv,
429  current_partition_start_offset_lv);
430 
431  // # elems per partition
432  const auto partition_count_buf =
433  cgen_state_->llInt(reinterpret_cast<int64_t>(window_func_context->counts()));
434  auto partition_count_buf_ptr_lv =
435  cgen_state_->ir_builder_.CreateIntToPtr(partition_count_buf, pi32_type);
436 
437  // # elems of the given partition
438  const auto num_elem_current_partition_ptr =
439  cgen_state_->ir_builder_.CreateGEP(get_int_type(32, cgen_state_->context_),
440  partition_count_buf_ptr_lv,
441  partition_index_lv);
442  auto num_elem_current_partition_lv = cgen_state_->castToTypeIn(
443  cgen_state_->ir_builder_.CreateLoad(
444  num_elem_current_partition_ptr->getType()->getPointerElementType(),
445  num_elem_current_partition_ptr),
446  64);
447 
448  const auto order_key_ti = window_func->getOrderKeys().front()->get_type_info();
449  const auto order_key_size = order_key_ti.get_size();
450  const auto order_col_type_name = get_col_type_name_by_size(
451  order_key_size,
452  window_func_context->getOrderKeyColumnBufferTypes().front().is_fp());
453  size_t order_key_size_in_byte = order_key_size * 8;
454 
455  const auto order_key_buf_type = llvm::PointerType::get(
456  get_int_type(order_key_size_in_byte, cgen_state_->context_), 0);
457  const auto order_key_buf = cgen_state_->llInt(
458  reinterpret_cast<int64_t>(window_func_context->getOrderKeyColumnBuffers().front()));
459  auto order_key_buf_ptr_lv =
460  cgen_state_->ir_builder_.CreateIntToPtr(order_key_buf, order_key_buf_type);
461 
462  // null value of the ordering column
463  const auto order_key_buf_ti =
464  window_func_context->getOrderKeyColumnBufferTypes().front();
465  auto order_key_col_null_val_lv =
466  get_null_value_by_size_with_encoding(cgen_state_.get(), order_key_buf_ti);
467 
468  // null range of the aggregate tree
469  const auto null_start_pos_buf = cgen_state_->llInt(
470  reinterpret_cast<int64_t>(window_func_context->getNullValueStartPos()));
471  const auto null_start_pos_buf_ptr =
472  cgen_state_->ir_builder_.CreateIntToPtr(null_start_pos_buf, pi64_type);
473  const auto null_start_pos_ptr =
474  cgen_state_->ir_builder_.CreateGEP(get_int_type(64, cgen_state_->context_),
475  null_start_pos_buf_ptr,
476  partition_index_lv);
477  auto null_start_pos_lv = cgen_state_->ir_builder_.CreateLoad(
478  null_start_pos_ptr->getType()->getPointerElementType(),
479  null_start_pos_ptr,
480  "null_start_pos");
481  const auto null_end_pos_buf = cgen_state_->llInt(
482  reinterpret_cast<int64_t>(window_func_context->getNullValueEndPos()));
483  const auto null_end_pos_buf_ptr =
484  cgen_state_->ir_builder_.CreateIntToPtr(null_end_pos_buf, pi64_type);
485  const auto null_end_pos_ptr = cgen_state_->ir_builder_.CreateGEP(
486  get_int_type(64, cgen_state_->context_), null_end_pos_buf_ptr, partition_index_lv);
487  auto null_end_pos_lv = cgen_state_->ir_builder_.CreateLoad(
488  null_end_pos_ptr->getType()->getPointerElementType(),
489  null_end_pos_ptr,
490  "null_end_pos");
491 
492  std::string compute_row_idx_on_frame_func_name = "compute_";
493  compute_row_idx_on_frame_func_name += order_col_type_name + "_current_row_idx_in_frame";
494  auto cur_row_idx_in_frame_lv =
495  cgen_state_->emitCall(compute_row_idx_on_frame_func_name,
496  {num_elem_current_partition_lv,
497  current_row_pos_lv,
498  order_key_buf_ptr_lv,
499  target_partition_rowid_ptr_lv,
500  target_partition_sorted_rowid_ptr_lv,
501  order_key_col_null_val_lv,
502  null_start_pos_lv,
503  null_end_pos_lv});
504 
505  llvm::Value* frame_start_bound_lv{nullptr};
506  llvm::Value* frame_end_bound_lv{nullptr};
507  std::tie(frame_start_bound_lv, frame_end_bound_lv) =
508  codegenWindowFrameBound(window_func_context,
509  frame_start_bound,
510  frame_end_bound,
511  cur_row_idx_in_frame_lv,
512  cgen_state_->llInt((int64_t)0),
513  order_key_buf_ptr_lv,
514  order_key_col_null_val_lv,
515  frame_start_bound_expr_lv,
516  frame_end_bound_expr_lv,
517  num_elem_current_partition_lv,
518  target_partition_rowid_ptr_lv,
519  target_partition_sorted_rowid_ptr_lv,
520  null_start_pos_lv,
521  null_end_pos_lv,
522  code_generator);
523  CHECK(frame_start_bound_lv);
524  CHECK(frame_end_bound_lv);
525 
526  llvm::Value* modified_cur_row_idx_in_frame_lv{nullptr};
527  if (is_lag_in_frame) {
528  modified_cur_row_idx_in_frame_lv =
529  cgen_state_->ir_builder_.CreateSub(cur_row_idx_in_frame_lv, offset_lv);
530  } else {
531  modified_cur_row_idx_in_frame_lv =
532  cgen_state_->ir_builder_.CreateAdd(cur_row_idx_in_frame_lv, offset_lv);
533  }
534  CHECK(modified_cur_row_idx_in_frame_lv);
535 
536  std::string target_func_name = "get_";
537  target_func_name += target_col_type_name + "_value_";
538  target_func_name += target_col_logical_type_name + "_type_";
539  target_func_name += "in_frame";
540  res_lv = cgen_state_->emitCall(target_func_name,
541  {modified_cur_row_idx_in_frame_lv,
542  frame_start_bound_lv,
543  frame_end_bound_lv,
544  target_col_buf_lv,
545  target_partition_rowid_ptr_lv,
546  target_partition_sorted_rowid_ptr_lv,
547  logical_null_val_lv,
548  target_col_null_val_lv});
549  if (target_col_ti.get_compression() == kENCODING_DATE_IN_DAYS) {
550  res_lv = cgen_state_->emitCall(
551  "encode_date",
552  {res_lv, logical_null_val_lv, cgen_state_->llInt((int64_t)kSecsPerDay)});
553  }
554  CHECK(res_lv);
555  return res_lv;
556 }
std::string get_col_type_name_by_size(const size_t size, const bool is_fp)
static constexpr int64_t kSecsPerDay
llvm::Value * codegenCurrentPartitionIndex(const WindowFunctionContext *window_func_context, llvm::Value *current_row_pos_lv)
llvm::Type * get_fp_type(const int width, llvm::LLVMContext &context)
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1258
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
static WindowFunctionContext * getActiveWindowFunctionContext(Executor *executor)
#define AUTOMATIC_IR_METADATA(CGENSTATE)
std::pair< llvm::Value *, llvm::Value * > codegenWindowFrameBound(WindowFunctionContext *window_func_context, const Analyzer::WindowFrame *frame_start_bound, const Analyzer::WindowFrame *frame_end_bound, llvm::Value *current_row_pos_lv, llvm::Value *current_partition_start_offset_lv, llvm::Value *order_key_buf_ptr_lv, llvm::Value *order_key_col_null_val_lv, llvm::Value *frame_start_bound_expr_lv, llvm::Value *frame_end_bound_expr_lv, llvm::Value *num_elem_current_partition_lv, llvm::Value *target_partition_rowid_ptr_lv, llvm::Value *target_partition_sorted_rowid_ptr_lv, llvm::Value *null_start_pos_lv, llvm::Value *null_end_pos_lv, CodeGenerator &code_generator)
std::unique_ptr< llvm::LLVMContext > context_
Definition: Execute.h:1233
llvm::Value * codegenFrameBoundExpr(const Analyzer::WindowFunction *window_func, const Analyzer::WindowFrame *frame_bound, CodeGenerator &code_generator, const CompilationOptions &co)
#define CHECK(condition)
Definition: Logger.h:222
llvm::Value * get_null_value_by_size(CgenState *cgen_state, SQLTypeInfo col_ti)
llvm::Value * get_null_value_by_size_with_encoding(CgenState *cgen_state, SQLTypeInfo col_ti)

+ Here is the call graph for this function:

void Executor::codegenWindowFunctionStateInit ( llvm::Value *  aggregate_state)
private

Definition at line 283 of file WindowFunctionIR.cpp.

References AUTOMATIC_IR_METADATA, COUNT, anonymous_namespace{WindowFunctionIR.cpp}::get_adjusted_window_type_info(), get_int_type(), WindowProjectNodeContext::getActiveWindowFunctionContext(), kDOUBLE, and kFLOAT.

283  {
285  const auto window_func_context =
287  const auto window_func = window_func_context->getWindowFunction();
288  const auto window_func_ti = get_adjusted_window_type_info(window_func);
289  const auto window_func_null_val =
290  window_func_ti.is_fp()
291  ? cgen_state_->inlineFpNull(window_func_ti)
292  : cgen_state_->castToTypeIn(cgen_state_->inlineIntNull(window_func_ti), 64);
293  llvm::Value* window_func_init_val;
294  if (window_func_context->getWindowFunction()->getKind() ==
296  switch (window_func_ti.get_type()) {
297  case kFLOAT: {
298  window_func_init_val = cgen_state_->llFp(float(0));
299  break;
300  }
301  case kDOUBLE: {
302  window_func_init_val = cgen_state_->llFp(double(0));
303  break;
304  }
305  default: {
306  window_func_init_val = cgen_state_->llInt(int64_t(0));
307  break;
308  }
309  }
310  } else {
311  window_func_init_val = window_func_null_val;
312  }
313  const auto pi32_type =
314  llvm::PointerType::get(get_int_type(32, cgen_state_->context_), 0);
315  switch (window_func_ti.get_type()) {
316  case kDOUBLE: {
317  cgen_state_->emitCall("agg_id_double", {aggregate_state, window_func_init_val});
318  break;
319  }
320  case kFLOAT: {
321  aggregate_state =
322  cgen_state_->ir_builder_.CreateBitCast(aggregate_state, pi32_type);
323  cgen_state_->emitCall("agg_id_float", {aggregate_state, window_func_init_val});
324  break;
325  }
326  default: {
327  cgen_state_->emitCall("agg_id", {aggregate_state, window_func_init_val});
328  break;
329  }
330  }
331 }
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1258
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
static WindowFunctionContext * getActiveWindowFunctionContext(Executor *executor)
#define AUTOMATIC_IR_METADATA(CGENSTATE)
SQLTypeInfo get_adjusted_window_type_info(const Analyzer::WindowFunction *window_func)

+ Here is the call graph for this function:

llvm::BasicBlock * Executor::codegenWindowResetStateControlFlow ( )
private

Definition at line 254 of file WindowFunctionIR.cpp.

References AUTOMATIC_IR_METADATA, WindowProjectNodeContext::getActiveWindowFunctionContext(), CodeGenerator::posArg(), and CodeGenerator::toBool().

254  {
256  const auto window_func_context =
258  const auto bitset = cgen_state_->llInt(
259  reinterpret_cast<const int64_t>(window_func_context->partitionStart()));
260  const auto min_val = cgen_state_->llInt(int64_t(0));
261  const auto max_val = cgen_state_->llInt(window_func_context->elementCount() - 1);
262  const auto null_val = cgen_state_->llInt(inline_int_null_value<int64_t>());
263  const auto null_bool_val = cgen_state_->llInt<int8_t>(inline_int_null_value<int8_t>());
264  CodeGenerator code_generator(this);
265  const auto reset_state =
266  code_generator.toBool(cgen_state_->emitCall("bit_is_set",
267  {bitset,
268  code_generator.posArg(nullptr),
269  min_val,
270  max_val,
271  null_val,
272  null_bool_val}));
273  const auto reset_state_true_bb = llvm::BasicBlock::Create(
274  cgen_state_->context_, "reset_state.true", cgen_state_->current_func_);
275  const auto reset_state_false_bb = llvm::BasicBlock::Create(
276  cgen_state_->context_, "reset_state.false", cgen_state_->current_func_);
277  cgen_state_->ir_builder_.CreateCondBr(
278  reset_state, reset_state_true_bb, reset_state_false_bb);
279  cgen_state_->ir_builder_.SetInsertPoint(reset_state_true_bb);
280  return reset_state_false_bb;
281 }
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1258
static WindowFunctionContext * getActiveWindowFunctionContext(Executor *executor)
#define AUTOMATIC_IR_METADATA(CGENSTATE)

+ Here is the call graph for this function:

ResultSetPtr Executor::collectAllDeviceResults ( SharedKernelContext shared_context,
const RelAlgExecutionUnit ra_exe_unit,
const QueryMemoryDescriptor query_mem_desc,
const ExecutorDeviceType  device_type,
std::shared_ptr< RowSetMemoryOwner row_set_mem_owner 
)
private

Definition at line 2323 of file Execute.cpp.

References anonymous_namespace{Execute.cpp}::build_row_for_empty_input(), catalog_, collectAllDeviceShardedTopResults(), DEBUG_TIMER, SharedKernelContext::getFragmentResults(), QueryMemoryDescriptor::getQueryDescriptionType(), GPU, NonGroupedAggregate, reduceMultiDeviceResults(), reduceSpeculativeTopN(), GroupByAndAggregate::shard_count_for_top_groups(), RelAlgExecutionUnit::target_exprs, and use_speculative_top_n().

Referenced by executeWorkUnitImpl().

2328  {
2329  auto timer = DEBUG_TIMER(__func__);
2330  auto& result_per_device = shared_context.getFragmentResults();
2331  if (result_per_device.empty() && query_mem_desc.getQueryDescriptionType() ==
2334  ra_exe_unit.target_exprs, query_mem_desc, device_type);
2335  }
2336  if (use_speculative_top_n(ra_exe_unit, query_mem_desc)) {
2337  try {
2338  return reduceSpeculativeTopN(
2339  ra_exe_unit, result_per_device, row_set_mem_owner, query_mem_desc);
2340  } catch (const std::bad_alloc&) {
2341  throw SpeculativeTopNFailed("Failed during multi-device reduction.");
2342  }
2343  }
2344  const auto shard_count =
2345  device_type == ExecutorDeviceType::GPU
2347  : 0;
2348 
2349  if (shard_count && !result_per_device.empty()) {
2350  return collectAllDeviceShardedTopResults(shared_context, ra_exe_unit);
2351  }
2352  return reduceMultiDeviceResults(
2353  ra_exe_unit, result_per_device, row_set_mem_owner, query_mem_desc);
2354 }
std::vector< Analyzer::Expr * > target_exprs
bool use_speculative_top_n(const RelAlgExecutionUnit &ra_exe_unit, const QueryMemoryDescriptor &query_mem_desc)
ResultSetPtr reduceSpeculativeTopN(const RelAlgExecutionUnit &, std::vector< std::pair< ResultSetPtr, std::vector< size_t >>> &all_fragment_results, std::shared_ptr< RowSetMemoryOwner >, const QueryMemoryDescriptor &) const
Definition: Execute.cpp:1405
const Catalog_Namespace::Catalog * catalog_
Definition: Execute.h:1313
ResultSetPtr reduceMultiDeviceResults(const RelAlgExecutionUnit &, std::vector< std::pair< ResultSetPtr, std::vector< size_t >>> &all_fragment_results, std::shared_ptr< RowSetMemoryOwner >, const QueryMemoryDescriptor &) const
Definition: Execute.cpp:1298
ResultSetPtr collectAllDeviceShardedTopResults(SharedKernelContext &shared_context, const RelAlgExecutionUnit &ra_exe_unit) const
Definition: Execute.cpp:2438
QueryDescriptionType getQueryDescriptionType() const
ResultSetPtr build_row_for_empty_input(const std::vector< Analyzer::Expr * > &target_exprs_in, const QueryMemoryDescriptor &query_mem_desc, const ExecutorDeviceType device_type)
Definition: Execute.cpp:2281
std::vector< std::pair< ResultSetPtr, std::vector< size_t > > > & getFragmentResults()
#define DEBUG_TIMER(name)
Definition: Logger.h:371
static size_t shard_count_for_top_groups(const RelAlgExecutionUnit &ra_exe_unit, const Catalog_Namespace::Catalog &catalog)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

ResultSetPtr Executor::collectAllDeviceShardedTopResults ( SharedKernelContext shared_context,
const RelAlgExecutionUnit ra_exe_unit 
) const
private

Definition at line 2438 of file Execute.cpp.

References blockSize(), catalog_, CHECK, CHECK_EQ, CHECK_LE, SharedKernelContext::getFragmentResults(), gridSize(), SortInfo::limit, SortInfo::offset, SortInfo::order_entries, anonymous_namespace{Execute.cpp}::permute_storage_columnar(), anonymous_namespace{Execute.cpp}::permute_storage_row_wise(), run_benchmark_import::result, and RelAlgExecutionUnit::sort_info.

Referenced by collectAllDeviceResults().

2440  {
2441  auto& result_per_device = shared_context.getFragmentResults();
2442  const auto first_result_set = result_per_device.front().first;
2443  CHECK(first_result_set);
2444  auto top_query_mem_desc = first_result_set->getQueryMemDesc();
2445  CHECK(!top_query_mem_desc.hasInterleavedBinsOnGpu());
2446  const auto top_n = ra_exe_unit.sort_info.limit + ra_exe_unit.sort_info.offset;
2447  top_query_mem_desc.setEntryCount(0);
2448  for (auto& result : result_per_device) {
2449  const auto result_set = result.first;
2450  CHECK(result_set);
2451  result_set->sort(ra_exe_unit.sort_info.order_entries, top_n, this);
2452  size_t new_entry_cnt = top_query_mem_desc.getEntryCount() + result_set->rowCount();
2453  top_query_mem_desc.setEntryCount(new_entry_cnt);
2454  }
2455  auto top_result_set = std::make_shared<ResultSet>(first_result_set->getTargetInfos(),
2456  first_result_set->getDeviceType(),
2457  top_query_mem_desc,
2458  first_result_set->getRowSetMemOwner(),
2459  catalog_,
2460  blockSize(),
2461  gridSize());
2462  auto top_storage = top_result_set->allocateStorage();
2463  size_t top_output_row_idx{0};
2464  for (auto& result : result_per_device) {
2465  const auto result_set = result.first;
2466  CHECK(result_set);
2467  const auto& top_permutation = result_set->getPermutationBuffer();
2468  CHECK_LE(top_permutation.size(), top_n);
2469  if (top_query_mem_desc.didOutputColumnar()) {
2470  top_output_row_idx = permute_storage_columnar(result_set->getStorage(),
2471  result_set->getQueryMemDesc(),
2472  top_storage,
2473  top_output_row_idx,
2474  top_query_mem_desc,
2475  top_permutation);
2476  } else {
2477  top_output_row_idx = permute_storage_row_wise(result_set->getStorage(),
2478  top_storage,
2479  top_output_row_idx,
2480  top_query_mem_desc,
2481  top_permutation);
2482  }
2483  }
2484  CHECK_EQ(top_output_row_idx, top_query_mem_desc.getEntryCount());
2485  return top_result_set;
2486 }
#define CHECK_EQ(x, y)
Definition: Logger.h:230
const std::list< Analyzer::OrderEntry > order_entries
size_t permute_storage_row_wise(const ResultSetStorage *input_storage, const ResultSetStorage *output_storage, size_t output_row_index, const QueryMemoryDescriptor &output_query_mem_desc, const std::vector< uint32_t > &top_permutation)
Definition: Execute.cpp:2417
const size_t limit
const Catalog_Namespace::Catalog * catalog_
Definition: Execute.h:1313
#define CHECK_LE(x, y)
Definition: Logger.h:233
unsigned gridSize() const
Definition: Execute.cpp:3812
size_t permute_storage_columnar(const ResultSetStorage *input_storage, const QueryMemoryDescriptor &input_query_mem_desc, const ResultSetStorage *output_storage, size_t output_row_index, const QueryMemoryDescriptor &output_query_mem_desc, const std::vector< uint32_t > &top_permutation)
Definition: Execute.cpp:2367
std::vector< std::pair< ResultSetPtr, std::vector< size_t > > > & getFragmentResults()
#define CHECK(condition)
Definition: Logger.h:222
unsigned blockSize() const
Definition: Execute.cpp:3826
const size_t offset

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool Executor::compileBody ( const RelAlgExecutionUnit ra_exe_unit,
GroupByAndAggregate group_by_and_aggregate,
QueryMemoryDescriptor query_mem_desc,
const CompilationOptions co,
const GpuSharedMemoryContext gpu_smem_context = {} 
)
private

Definition at line 3215 of file NativeCodegen.cpp.

3219  {
3221 
3222  // Switch the code generation into a separate filter function if enabled.
3223  // Note that accesses to function arguments are still codegenned from the
3224  // row function's arguments, then later automatically forwarded and
3225  // remapped into filter function arguments by redeclareFilterFunction().
3226  cgen_state_->row_func_bb_ = cgen_state_->ir_builder_.GetInsertBlock();
3227  llvm::Value* loop_done{nullptr};
3228  std::unique_ptr<Executor::FetchCacheAnchor> fetch_cache_anchor;
3229  if (cgen_state_->filter_func_) {
3230  if (cgen_state_->row_func_bb_->getName() == "loop_body") {
3231  auto row_func_entry_bb = &cgen_state_->row_func_->getEntryBlock();
3232  cgen_state_->ir_builder_.SetInsertPoint(row_func_entry_bb,
3233  row_func_entry_bb->begin());
3234  loop_done = cgen_state_->ir_builder_.CreateAlloca(
3235  get_int_type(1, cgen_state_->context_), nullptr, "loop_done");
3236  cgen_state_->ir_builder_.SetInsertPoint(cgen_state_->row_func_bb_);
3237  cgen_state_->ir_builder_.CreateStore(cgen_state_->llBool(true), loop_done);
3238  }
3239  cgen_state_->ir_builder_.SetInsertPoint(cgen_state_->filter_func_bb_);
3240  cgen_state_->current_func_ = cgen_state_->filter_func_;
3241  fetch_cache_anchor = std::make_unique<Executor::FetchCacheAnchor>(cgen_state_.get());
3242  }
3243 
3244  // generate the code for the filter
3245  std::vector<Analyzer::Expr*> primary_quals;
3246  std::vector<Analyzer::Expr*> deferred_quals;
3247  bool short_circuited = CodeGenerator::prioritizeQuals(
3248  ra_exe_unit, primary_quals, deferred_quals, plan_state_->hoisted_filters_);
3249  if (short_circuited) {
3250  VLOG(1) << "Prioritized " << std::to_string(primary_quals.size()) << " quals, "
3251  << "short-circuited and deferred " << std::to_string(deferred_quals.size())
3252  << " quals";
3253  }
3254  llvm::Value* filter_lv = cgen_state_->llBool(true);
3255  CodeGenerator code_generator(this);
3256  for (auto expr : primary_quals) {
3257  // Generate the filter for primary quals
3258  auto cond = code_generator.toBool(code_generator.codegen(expr, true, co).front());
3259  filter_lv = cgen_state_->ir_builder_.CreateAnd(filter_lv, cond);
3260  }
3261  CHECK(filter_lv->getType()->isIntegerTy(1));
3262  llvm::BasicBlock* sc_false{nullptr};
3263  if (!deferred_quals.empty()) {
3264  auto sc_true = llvm::BasicBlock::Create(
3265  cgen_state_->context_, "sc_true", cgen_state_->current_func_);
3266  sc_false = llvm::BasicBlock::Create(
3267  cgen_state_->context_, "sc_false", cgen_state_->current_func_);
3268  cgen_state_->ir_builder_.CreateCondBr(filter_lv, sc_true, sc_false);
3269  cgen_state_->ir_builder_.SetInsertPoint(sc_false);
3270  if (ra_exe_unit.join_quals.empty()) {
3271  cgen_state_->ir_builder_.CreateRet(cgen_state_->llInt(int32_t(0)));
3272  }
3273  cgen_state_->ir_builder_.SetInsertPoint(sc_true);
3274  filter_lv = cgen_state_->llBool(true);
3275  }
3276  for (auto expr : deferred_quals) {
3277  filter_lv = cgen_state_->ir_builder_.CreateAnd(
3278  filter_lv, code_generator.toBool(code_generator.codegen(expr, true, co).front()));
3279  }
3280 
3281  CHECK(filter_lv->getType()->isIntegerTy(1));
3282  auto ret = group_by_and_aggregate.codegen(
3283  filter_lv, sc_false, query_mem_desc, co, gpu_smem_context);
3284 
3285  // Switch the code generation back to the row function if a filter
3286  // function was enabled.
3287  if (cgen_state_->filter_func_) {
3288  if (cgen_state_->row_func_bb_->getName() == "loop_body") {
3289  cgen_state_->ir_builder_.CreateStore(cgen_state_->llBool(false), loop_done);
3290  cgen_state_->ir_builder_.CreateRet(cgen_state_->llInt<int32_t>(0));
3291  }
3292 
3293  cgen_state_->ir_builder_.SetInsertPoint(cgen_state_->row_func_bb_);
3294  cgen_state_->current_func_ = cgen_state_->row_func_;
3295  cgen_state_->filter_func_call_ =
3296  cgen_state_->ir_builder_.CreateCall(cgen_state_->filter_func_, {});
3297 
3298  // Create real filter function declaration after placeholder call
3299  // is emitted.
3301 
3302  if (cgen_state_->row_func_bb_->getName() == "loop_body") {
3303  auto loop_done_true = llvm::BasicBlock::Create(
3304  cgen_state_->context_, "loop_done_true", cgen_state_->row_func_);
3305  auto loop_done_false = llvm::BasicBlock::Create(
3306  cgen_state_->context_, "loop_done_false", cgen_state_->row_func_);
3307  auto loop_done_flag = cgen_state_->ir_builder_.CreateLoad(
3308  loop_done->getType()->getPointerElementType(), loop_done);
3309  cgen_state_->ir_builder_.CreateCondBr(
3310  loop_done_flag, loop_done_true, loop_done_false);
3311  cgen_state_->ir_builder_.SetInsertPoint(loop_done_true);
3312  cgen_state_->ir_builder_.CreateRet(cgen_state_->filter_func_call_);
3313  cgen_state_->ir_builder_.SetInsertPoint(loop_done_false);
3314  } else {
3315  cgen_state_->ir_builder_.CreateRet(cgen_state_->filter_func_call_);
3316  }
3317  }
3318  return ret;
3319 }
bool codegen(llvm::Value *filter_result, llvm::BasicBlock *sc_false, QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const GpuSharedMemoryContext &gpu_smem_context)
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1258
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
std::string to_string(char const *&&v)
const JoinQualsPerNestingLevel join_quals
std::unique_ptr< PlanState > plan_state_
Definition: Execute.h:1288
#define AUTOMATIC_IR_METADATA(CGENSTATE)
static bool prioritizeQuals(const RelAlgExecutionUnit &ra_exe_unit, std::vector< Analyzer::Expr * > &primary_quals, std::vector< Analyzer::Expr * > &deferred_quals, const PlanState::HoistedFiltersSet &hoisted_quals)
Definition: LogicalIR.cpp:157
#define CHECK(condition)
Definition: Logger.h:222
void redeclareFilterFunction()
Definition: IRCodegen.cpp:995
#define VLOG(n)
Definition: Logger.h:316
std::tuple< CompilationResult, std::unique_ptr< QueryMemoryDescriptor > > Executor::compileWorkUnit ( const std::vector< InputTableInfo > &  query_infos,
const PlanState::DeletedColumnsMap deleted_cols_map,
const RelAlgExecutionUnit ra_exe_unit,
const CompilationOptions co,
const ExecutionOptions eo,
const CudaMgr_Namespace::CudaMgr cuda_mgr,
const bool  allow_lazy_fetch,
std::shared_ptr< RowSetMemoryOwner row_set_mem_owner,
const size_t  max_groups_buffer_entry_count,
const int8_t  crt_min_byte_width,
const bool  has_cardinality_estimation,
ColumnCacheMap column_cache,
RenderInfo render_info = nullptr 
)
private

Definition at line 2668 of file NativeCodegen.cpp.

2680  {
2681  auto timer = DEBUG_TIMER(__func__);
2682 
2684  const auto cuda_mgr = data_mgr_->getCudaMgr();
2685  if (!cuda_mgr) {
2686  throw QueryMustRunOnCpu();
2687  }
2688  }
2689 
2690 #ifndef NDEBUG
2691  static std::uint64_t counter = 0;
2692  ++counter;
2693  VLOG(1) << "CODEGEN #" << counter << ":";
2694  LOG(IR) << "CODEGEN #" << counter << ":";
2695  LOG(PTX) << "CODEGEN #" << counter << ":";
2696  LOG(ASM) << "CODEGEN #" << counter << ":";
2697 #endif
2698 
2699  // cgenstate_manager uses RAII pattern to manage the live time of
2700  // CgenState instances.
2701  Executor::CgenStateManager cgenstate_manager(*this,
2702  allow_lazy_fetch,
2703  query_infos,
2704  deleted_cols_map,
2705  &ra_exe_unit); // locks compilation_mutex
2706 
2707  addTransientStringLiterals(ra_exe_unit, row_set_mem_owner);
2708 
2709  GroupByAndAggregate group_by_and_aggregate(
2710  this,
2711  co.device_type,
2712  ra_exe_unit,
2713  query_infos,
2714  row_set_mem_owner,
2715  has_cardinality_estimation ? std::optional<int64_t>(max_groups_buffer_entry_guess)
2716  : std::nullopt);
2717  auto query_mem_desc =
2718  group_by_and_aggregate.initQueryMemoryDescriptor(eo.allow_multifrag,
2719  max_groups_buffer_entry_guess,
2720  crt_min_byte_width,
2721  render_info,
2723 
2724  if (query_mem_desc->getQueryDescriptionType() ==
2726  !has_cardinality_estimation && (!render_info || !render_info->isInSitu()) &&
2727  !eo.just_explain) {
2728  const auto col_range_info = group_by_and_aggregate.getColRangeInfo();
2729  throw CardinalityEstimationRequired(col_range_info.max - col_range_info.min);
2730  }
2731 
2732  const bool output_columnar = query_mem_desc->didOutputColumnar();
2733  const bool gpu_shared_mem_optimization =
2735  ra_exe_unit,
2736  cuda_mgr,
2737  co.device_type,
2738  cuda_mgr ? this->blockSize() : 1,
2739  cuda_mgr ? this->numBlocksPerMP() : 1);
2740  if (gpu_shared_mem_optimization) {
2741  // disable interleaved bins optimization on the GPU
2742  query_mem_desc->setHasInterleavedBinsOnGpu(false);
2743  LOG(DEBUG1) << "GPU shared memory is used for the " +
2744  query_mem_desc->queryDescTypeToString() + " query(" +
2745  std::to_string(get_shared_memory_size(gpu_shared_mem_optimization,
2746  query_mem_desc.get())) +
2747  " out of " + std::to_string(g_gpu_smem_threshold) + " bytes).";
2748  }
2749 
2750  const GpuSharedMemoryContext gpu_smem_context(
2751  get_shared_memory_size(gpu_shared_mem_optimization, query_mem_desc.get()));
2752 
2754  const size_t num_count_distinct_descs =
2755  query_mem_desc->getCountDistinctDescriptorsSize();
2756  for (size_t i = 0; i < num_count_distinct_descs; i++) {
2757  const auto& count_distinct_descriptor =
2758  query_mem_desc->getCountDistinctDescriptor(i);
2759  if (count_distinct_descriptor.impl_type_ == CountDistinctImplType::UnorderedSet ||
2760  (count_distinct_descriptor.impl_type_ != CountDistinctImplType::Invalid &&
2761  !co.hoist_literals)) {
2762  throw QueryMustRunOnCpu();
2763  }
2764  }
2765 
2766  // we currently do not support varlen projection based on baseline groupby when
2767  // 1) target table is multi-fragmented and 2) multiple gpus are involved for query
2768  // processing in this case, we punt the query to cpu to avoid server crash
2769  for (const auto expr : ra_exe_unit.target_exprs) {
2770  if (auto gby_expr = dynamic_cast<Analyzer::AggExpr*>(expr)) {
2771  bool has_multiple_gpus = cuda_mgr ? cuda_mgr->getDeviceCount() > 1 : false;
2772  if (gby_expr->get_aggtype() == SQLAgg::kSAMPLE && has_multiple_gpus &&
2773  !g_leaf_count) {
2774  std::set<const Analyzer::ColumnVar*,
2775  bool (*)(const Analyzer::ColumnVar*, const Analyzer::ColumnVar*)>
2777  gby_expr->collect_column_var(colvar_set, true);
2778  for (const auto cv : colvar_set) {
2779  if (cv->get_type_info().is_varlen()) {
2780  const auto tbl_id = cv->get_table_id();
2781  std::for_each(query_infos.begin(),
2782  query_infos.end(),
2783  [tbl_id](const InputTableInfo& input_table_info) {
2784  if (input_table_info.table_id == tbl_id &&
2785  input_table_info.info.fragments.size() > 1) {
2786  throw QueryMustRunOnCpu();
2787  }
2788  });
2789  }
2790  }
2791  }
2792  }
2793  }
2794  }
2795 
2796  // Read the module template and target either CPU or GPU
2797  // by binding the stream position functions to the right implementation:
2798  // stride access for GPU, contiguous for CPU
2799  CHECK(cgen_state_->module_ == nullptr);
2800  cgen_state_->set_module_shallow_copy(get_rt_module(), /*always_clone=*/true);
2801 
2802  auto is_gpu = co.device_type == ExecutorDeviceType::GPU;
2803  if (is_gpu) {
2804  cgen_state_->module_->setDataLayout(get_gpu_data_layout());
2805  cgen_state_->module_->setTargetTriple(get_gpu_target_triple_string());
2806  }
2807  if (has_udf_module(/*is_gpu=*/is_gpu)) {
2809  get_udf_module(/*is_gpu=*/is_gpu), *cgen_state_->module_, cgen_state_.get());
2810  }
2811  if (has_rt_udf_module(/*is_gpu=*/is_gpu)) {
2813  get_rt_udf_module(/*is_gpu=*/is_gpu), *cgen_state_->module_, cgen_state_.get());
2814  }
2815 
2817 
2818  auto agg_fnames =
2819  get_agg_fnames(ra_exe_unit.target_exprs, !ra_exe_unit.groupby_exprs.empty());
2820 
2821  const auto agg_slot_count = ra_exe_unit.estimator ? size_t(1) : agg_fnames.size();
2822 
2823  const bool is_group_by{query_mem_desc->isGroupBy()};
2824  auto [query_func, row_func_call] = is_group_by
2826  co.hoist_literals,
2827  *query_mem_desc,
2828  co.device_type,
2829  ra_exe_unit.scan_limit,
2830  gpu_smem_context)
2831  : query_template(cgen_state_->module_,
2832  agg_slot_count,
2833  co.hoist_literals,
2834  !!ra_exe_unit.estimator,
2835  gpu_smem_context);
2836  bind_pos_placeholders("pos_start", true, query_func, cgen_state_->module_);
2837  bind_pos_placeholders("group_buff_idx", false, query_func, cgen_state_->module_);
2838  bind_pos_placeholders("pos_step", false, query_func, cgen_state_->module_);
2839 
2840  cgen_state_->query_func_ = query_func;
2841  cgen_state_->row_func_call_ = row_func_call;
2842  cgen_state_->query_func_entry_ir_builder_.SetInsertPoint(
2843  &query_func->getEntryBlock().front());
2844 
2845  // Generate the function signature and column head fetches s.t.
2846  // double indirection isn't needed in the inner loop
2847  auto& fetch_bb = query_func->front();
2848  llvm::IRBuilder<> fetch_ir_builder(&fetch_bb);
2849  fetch_ir_builder.SetInsertPoint(&*fetch_bb.begin());
2850  auto col_heads = generate_column_heads_load(ra_exe_unit.input_col_descs.size(),
2851  query_func->args().begin(),
2852  fetch_ir_builder,
2853  cgen_state_->context_);
2854  CHECK_EQ(ra_exe_unit.input_col_descs.size(), col_heads.size());
2855 
2856  cgen_state_->row_func_ = create_row_function(ra_exe_unit.input_col_descs.size(),
2857  is_group_by ? 0 : agg_slot_count,
2858  co.hoist_literals,
2859  cgen_state_->module_,
2860  cgen_state_->context_);
2861  CHECK(cgen_state_->row_func_);
2862  cgen_state_->row_func_bb_ =
2863  llvm::BasicBlock::Create(cgen_state_->context_, "entry", cgen_state_->row_func_);
2864 
2866  auto filter_func_ft =
2867  llvm::FunctionType::get(get_int_type(32, cgen_state_->context_), {}, false);
2868  cgen_state_->filter_func_ = llvm::Function::Create(filter_func_ft,
2869  llvm::Function::ExternalLinkage,
2870  "filter_func",
2871  cgen_state_->module_);
2872  CHECK(cgen_state_->filter_func_);
2873  cgen_state_->filter_func_bb_ = llvm::BasicBlock::Create(
2874  cgen_state_->context_, "entry", cgen_state_->filter_func_);
2875  }
2876 
2877  cgen_state_->current_func_ = cgen_state_->row_func_;
2878  cgen_state_->ir_builder_.SetInsertPoint(cgen_state_->row_func_bb_);
2879 
2880  preloadFragOffsets(ra_exe_unit.input_descs, query_infos);
2881  RelAlgExecutionUnit body_execution_unit = ra_exe_unit;
2882  const auto join_loops =
2883  buildJoinLoops(body_execution_unit, co, eo, query_infos, column_cache);
2884 
2885  plan_state_->allocateLocalColumnIds(ra_exe_unit.input_col_descs);
2886  for (auto& simple_qual : ra_exe_unit.simple_quals) {
2887  plan_state_->addSimpleQual(simple_qual);
2888  }
2889  const auto is_not_deleted_bb = codegenSkipDeletedOuterTableRow(ra_exe_unit, co);
2890  if (is_not_deleted_bb) {
2891  cgen_state_->row_func_bb_ = is_not_deleted_bb;
2892  }
2893  if (!join_loops.empty()) {
2894  codegenJoinLoops(join_loops,
2895  body_execution_unit,
2896  group_by_and_aggregate,
2897  query_func,
2898  cgen_state_->row_func_bb_,
2899  *(query_mem_desc.get()),
2900  co,
2901  eo);
2902  } else {
2903  const bool can_return_error = compileBody(
2904  ra_exe_unit, group_by_and_aggregate, *query_mem_desc, co, gpu_smem_context);
2905  if (can_return_error || cgen_state_->needs_error_check_ || eo.with_dynamic_watchdog ||
2907  createErrorCheckControlFlow(query_func,
2910  join_loops,
2911  co.device_type,
2912  group_by_and_aggregate.query_infos_);
2913  }
2914  }
2915  std::vector<llvm::Value*> hoisted_literals;
2916 
2917  if (co.hoist_literals) {
2918  VLOG(1) << "number of hoisted literals: "
2919  << cgen_state_->query_func_literal_loads_.size()
2920  << " / literal buffer usage: " << cgen_state_->getLiteralBufferUsage(0)
2921  << " bytes";
2922  }
2923 
2924  if (co.hoist_literals && !cgen_state_->query_func_literal_loads_.empty()) {
2925  // we have some hoisted literals...
2926  hoisted_literals = inlineHoistedLiterals();
2927  }
2928 
2929  // replace the row func placeholder call with the call to the actual row func
2930  std::vector<llvm::Value*> row_func_args;
2931  for (size_t i = 0; i < cgen_state_->row_func_call_->getNumOperands() - 1; ++i) {
2932  row_func_args.push_back(cgen_state_->row_func_call_->getArgOperand(i));
2933  }
2934  row_func_args.insert(row_func_args.end(), col_heads.begin(), col_heads.end());
2935  row_func_args.push_back(get_arg_by_name(query_func, "join_hash_tables"));
2936  // push hoisted literals arguments, if any
2937  row_func_args.insert(
2938  row_func_args.end(), hoisted_literals.begin(), hoisted_literals.end());
2939  llvm::ReplaceInstWithInst(
2940  cgen_state_->row_func_call_,
2941  llvm::CallInst::Create(cgen_state_->row_func_, row_func_args, ""));
2942 
2943  // replace the filter func placeholder call with the call to the actual filter func
2944  if (cgen_state_->filter_func_) {
2945  std::vector<llvm::Value*> filter_func_args;
2946  for (auto arg_it = cgen_state_->filter_func_args_.begin();
2947  arg_it != cgen_state_->filter_func_args_.end();
2948  ++arg_it) {
2949  filter_func_args.push_back(arg_it->first);
2950  }
2951  llvm::ReplaceInstWithInst(
2952  cgen_state_->filter_func_call_,
2953  llvm::CallInst::Create(cgen_state_->filter_func_, filter_func_args, ""));
2954  }
2955 
2956  // Aggregate
2957  plan_state_->init_agg_vals_ =
2958  init_agg_val_vec(ra_exe_unit.target_exprs, ra_exe_unit.quals, *query_mem_desc);
2959 
2960  /*
2961  * If we have decided to use GPU shared memory (decision is not made here), then
2962  * we generate proper code for extra components that it needs (buffer initialization and
2963  * gpu reduction from shared memory to global memory). We then replace these functions
2964  * into the already compiled query_func (replacing two placeholders, write_back_nop and
2965  * init_smem_nop). The rest of the code should be as before (row_func, etc.).
2966  */
2967  if (gpu_smem_context.isSharedMemoryUsed()) {
2968  if (query_mem_desc->getQueryDescriptionType() ==
2970  GpuSharedMemCodeBuilder gpu_smem_code(
2971  cgen_state_->module_,
2972  cgen_state_->context_,
2973  *query_mem_desc,
2975  plan_state_->init_agg_vals_,
2976  executor_id_);
2977  gpu_smem_code.codegen();
2978  gpu_smem_code.injectFunctionsInto(query_func);
2979 
2980  // helper functions are used for caching purposes later
2981  cgen_state_->helper_functions_.push_back(gpu_smem_code.getReductionFunction());
2982  cgen_state_->helper_functions_.push_back(gpu_smem_code.getInitFunction());
2983  LOG(IR) << gpu_smem_code.toString();
2984  }
2985  }
2986 
2987  auto multifrag_query_func = cgen_state_->module_->getFunction(
2988  "multifrag_query" + std::string(co.hoist_literals ? "_hoisted_literals" : ""));
2989  CHECK(multifrag_query_func);
2990 
2993  multifrag_query_func, co.hoist_literals, eo.allow_runtime_query_interrupt);
2994  }
2995 
2996  bind_query(query_func,
2997  "query_stub" + std::string(co.hoist_literals ? "_hoisted_literals" : ""),
2998  multifrag_query_func,
2999  cgen_state_->module_);
3000 
3001  std::vector<llvm::Function*> root_funcs{query_func, cgen_state_->row_func_};
3002  if (cgen_state_->filter_func_) {
3003  root_funcs.push_back(cgen_state_->filter_func_);
3004  }
3005  auto live_funcs = CodeGenerator::markDeadRuntimeFuncs(
3006  *cgen_state_->module_, root_funcs, {multifrag_query_func});
3007 
3008  // Always inline the row function and the filter function.
3009  // We don't want register spills in the inner loops.
3010  // LLVM seems to correctly free up alloca instructions
3011  // in these functions even when they are inlined.
3013  if (cgen_state_->filter_func_) {
3015  }
3016 
3017 #ifndef NDEBUG
3018  // Add helpful metadata to the LLVM IR for debugging.
3020 #endif
3021 
3022  // Serialize the important LLVM IR functions to text for SQL EXPLAIN.
3023  std::string llvm_ir;
3024  if (eo.just_explain) {
3026 #ifdef WITH_JIT_DEBUG
3027  throw std::runtime_error(
3028  "Explain optimized not available when JIT runtime debug symbols are enabled");
3029 #else
3030  // Note that we don't run the NVVM reflect pass here. Use LOG(IR) to get the
3031  // optimized IR after NVVM reflect
3032  llvm::legacy::PassManager pass_manager;
3033  optimize_ir(query_func,
3034  cgen_state_->module_,
3035  pass_manager,
3036  live_funcs,
3037  gpu_smem_context.isSharedMemoryUsed(),
3038  co);
3039 #endif // WITH_JIT_DEBUG
3040  }
3041  llvm_ir =
3042  serialize_llvm_object(multifrag_query_func) + serialize_l