OmniSciDB  c1a53651b2
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
Executor Class Reference

#include <Execute.h>

+ Collaboration diagram for Executor:

Classes

class  CgenStateManager
 
struct  ExecutorMutexHolder
 
class  FetchCacheAnchor
 
struct  GroupColLLVMValue
 
struct  JoinHashTableOrError
 

Public Types

enum  ExtModuleKinds {
  ExtModuleKinds::template_module, ExtModuleKinds::udf_cpu_module, ExtModuleKinds::udf_gpu_module, ExtModuleKinds::rt_udf_cpu_module,
  ExtModuleKinds::rt_udf_gpu_module, ExtModuleKinds::rt_geos_module, ExtModuleKinds::rt_libdevice_module
}
 
using ExecutorId = size_t
 
using CachedCardinality = std::pair< bool, size_t >
 

Public Member Functions

 Executor (const ExecutorId id, Data_Namespace::DataMgr *data_mgr, const size_t block_size_x, const size_t grid_size_x, const size_t max_gpu_slab_size, const std::string &debug_dir, const std::string &debug_file)
 
void clearCaches (bool runtime_only=false)
 
std::string dumpCache () const
 
void reset (bool discard_runtime_modules_only=false)
 
const std::unique_ptr
< llvm::Module > & 
get_rt_module () const
 
const std::unique_ptr
< llvm::Module > & 
get_udf_module (bool is_gpu=false) const
 
const std::unique_ptr
< llvm::Module > & 
get_rt_udf_module (bool is_gpu=false) const
 
const std::unique_ptr
< llvm::Module > & 
get_geos_module () const
 
const std::unique_ptr
< llvm::Module > & 
get_libdevice_module () const
 
bool has_rt_module () const
 
bool has_udf_module (bool is_gpu=false) const
 
bool has_rt_udf_module (bool is_gpu=false) const
 
bool has_geos_module () const
 
bool has_libdevice_module () const
 
const TemporaryTablesgetTemporaryTables ()
 
StringDictionaryProxygetStringDictionaryProxy (const shared::StringDictKey &dict_key, const bool with_generation) const
 
StringDictionaryProxygetStringDictionaryProxy (const shared::StringDictKey &dict_key, const std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner, const bool with_generation) const
 
const
StringDictionaryProxy::IdMap
getStringProxyTranslationMap (const shared::StringDictKey &source_dict_key, const shared::StringDictKey &dest_dict_key, const RowSetMemoryOwner::StringTranslationType translation_type, const std::vector< StringOps_Namespace::StringOpInfo > &string_op_infos, std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner, const bool with_generation) const
 
const
StringDictionaryProxy::IdMap
getJoinIntersectionStringProxyTranslationMap (const StringDictionaryProxy *source_proxy, StringDictionaryProxy *dest_proxy, const std::vector< StringOps_Namespace::StringOpInfo > &source_string_op_infos, const std::vector< StringOps_Namespace::StringOpInfo > &dest_source_string_op_infos, std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner) const
 
const
StringDictionaryProxy::TranslationMap
< Datum > * 
getStringProxyNumericTranslationMap (const shared::StringDictKey &source_dict_key, const std::vector< StringOps_Namespace::StringOpInfo > &string_op_infos, std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner, const bool with_generation) const
 
bool isCPUOnly () const
 
bool isArchMaxwell (const ExecutorDeviceType dt) const
 
bool containsLeftDeepOuterJoin () const
 
const ColumnDescriptorgetColumnDescriptor (const Analyzer::ColumnVar *) const
 
const ColumnDescriptorgetPhysicalColumnDescriptor (const Analyzer::ColumnVar *, int) const
 
Data_Namespace::DataMgrgetDataMgr () const
 
const std::shared_ptr
< RowSetMemoryOwner
getRowSetMemoryOwner () const
 
const TemporaryTablesgetTemporaryTables () const
 
Fragmenter_Namespace::TableInfo getTableInfo (const shared::TableKey &table_key) const
 
const TableGenerationgetTableGeneration (const shared::TableKey &table_key) const
 
ExpressionRange getColRange (const PhysicalInput &) const
 
size_t getNumBytesForFetchedRow (const std::set< shared::TableKey > &table_keys_to_fetch) const
 
bool hasLazyFetchColumns (const std::vector< Analyzer::Expr * > &target_exprs) const
 
std::vector< ColumnLazyFetchInfogetColLazyFetchInfo (const std::vector< Analyzer::Expr * > &target_exprs) const
 
void interrupt (const QuerySessionId &query_session="", const QuerySessionId &interrupt_session="")
 
void resetInterrupt ()
 
void enableRuntimeQueryInterrupt (const double runtime_query_check_freq, const unsigned pending_query_check_freq) const
 
int8_t warpSize () const
 
unsigned gridSize () const
 
void setGridSize (unsigned grid_size)
 
void resetGridSize ()
 
unsigned numBlocksPerMP () const
 
unsigned blockSize () const
 
void setBlockSize (unsigned block_size)
 
void resetBlockSize ()
 
size_t maxGpuSlabSize () const
 
ResultSetPtr executeWorkUnit (size_t &max_groups_buffer_entry_guess, const bool is_agg, const std::vector< InputTableInfo > &, const RelAlgExecutionUnit &, const CompilationOptions &, const ExecutionOptions &options, RenderInfo *render_info, const bool has_cardinality_estimation, ColumnCacheMap &column_cache)
 
TableUpdateMetadata executeUpdate (const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &table_infos, const TableDescriptor *updated_table_desc, const CompilationOptions &co, const ExecutionOptions &eo, const Catalog_Namespace::Catalog &cat, std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner, const UpdateLogForFragment::Callback &cb, const bool is_agg)
 
void addTransientStringLiterals (const RelAlgExecutionUnit &ra_exe_unit, const std::shared_ptr< RowSetMemoryOwner > &row_set_mem_owner)
 
int deviceCount (const ExecutorDeviceType) const
 
void setupCaching (const std::unordered_set< PhysicalInput > &phys_inputs, const std::unordered_set< shared::TableKey > &phys_table_keys)
 
void setColRangeCache (const AggregatedColRange &aggregated_col_range)
 
ExecutorId getExecutorId () const
 
QuerySessionIdgetCurrentQuerySession (heavyai::shared_lock< heavyai::shared_mutex > &read_lock)
 
QuerySessionStatus::QueryStatus getQuerySessionStatus (const QuerySessionId &candidate_query_session, heavyai::shared_lock< heavyai::shared_mutex > &read_lock)
 
bool checkCurrentQuerySession (const std::string &candidate_query_session, heavyai::shared_lock< heavyai::shared_mutex > &read_lock)
 
void invalidateRunningQuerySession (heavyai::unique_lock< heavyai::shared_mutex > &write_lock)
 
bool addToQuerySessionList (const QuerySessionId &query_session, const std::string &query_str, const std::string &submitted, const size_t executor_id, const QuerySessionStatus::QueryStatus query_status, heavyai::unique_lock< heavyai::shared_mutex > &write_lock)
 
bool removeFromQuerySessionList (const QuerySessionId &query_session, const std::string &submitted_time_str, heavyai::unique_lock< heavyai::shared_mutex > &write_lock)
 
void setQuerySessionAsInterrupted (const QuerySessionId &query_session, heavyai::unique_lock< heavyai::shared_mutex > &write_lock)
 
bool checkIsQuerySessionInterrupted (const std::string &query_session, heavyai::shared_lock< heavyai::shared_mutex > &read_lock)
 
bool checkIsQuerySessionEnrolled (const QuerySessionId &query_session, heavyai::shared_lock< heavyai::shared_mutex > &read_lock)
 
bool updateQuerySessionStatusWithLock (const QuerySessionId &query_session, const std::string &submitted_time_str, const QuerySessionStatus::QueryStatus updated_query_status, heavyai::unique_lock< heavyai::shared_mutex > &write_lock)
 
bool updateQuerySessionExecutorAssignment (const QuerySessionId &query_session, const std::string &submitted_time_str, const size_t executor_id, heavyai::unique_lock< heavyai::shared_mutex > &write_lock)
 
std::vector< QuerySessionStatusgetQuerySessionInfo (const QuerySessionId &query_session, heavyai::shared_lock< heavyai::shared_mutex > &read_lock)
 
heavyai::shared_mutexgetSessionLock ()
 
CurrentQueryStatus attachExecutorToQuerySession (const QuerySessionId &query_session_id, const std::string &query_str, const std::string &query_submitted_time)
 
void checkPendingQueryStatus (const QuerySessionId &query_session)
 
void clearQuerySessionStatus (const QuerySessionId &query_session, const std::string &submitted_time_str)
 
void updateQuerySessionStatus (const QuerySessionId &query_session, const std::string &submitted_time_str, const QuerySessionStatus::QueryStatus new_query_status)
 
void enrollQuerySession (const QuerySessionId &query_session, const std::string &query_str, const std::string &submitted_time_str, const size_t executor_id, const QuerySessionStatus::QueryStatus query_session_status)
 
size_t getNumCurentSessionsEnrolled () const
 
const std::vector< size_t > getExecutorIdsRunningQuery (const QuerySessionId &interrupt_session) const
 
bool checkNonKernelTimeInterrupted () const
 
void registerExtractedQueryPlanDag (const QueryPlanDAG &query_plan_dag)
 
const QueryPlanDAG getLatestQueryPlanDagExtracted () const
 
void addToCardinalityCache (const std::string &cache_key, const size_t cache_value)
 
CachedCardinality getCachedCardinality (const std::string &cache_key)
 
heavyai::shared_mutexgetDataRecyclerLock ()
 
QueryPlanDagCachegetQueryPlanDagCache ()
 
ResultSetRecyclerHoldergetRecultSetRecyclerHolder ()
 
CgenStategetCgenStatePtr () const
 
PlanStategetPlanStatePtr () const
 
llvm::LLVMContext & getContext ()
 
void update_extension_modules (bool update_runtime_modules_only=false)
 

Static Public Member Functions

static void clearExternalCaches (bool for_update, const TableDescriptor *td, const int current_db_id)
 
template<typename F >
static void registerExtensionFunctions (F register_extension_functions)
 
static std::shared_ptr< ExecutorgetExecutor (const ExecutorId id, const std::string &debug_dir="", const std::string &debug_file="", const SystemParameters &system_parameters=SystemParameters())
 
static void nukeCacheOfExecutors ()
 
static void clearMemory (const Data_Namespace::MemoryLevel memory_level)
 
static size_t getArenaBlockSize ()
 
static void addUdfIrToModule (const std::string &udf_ir_filename, const bool is_cuda_ir)
 
static void initialize_extension_module_sources ()
 
static void registerActiveModule (void *module, const int device_id)
 
static void unregisterActiveModule (const int device_id)
 
static std::pair< int64_t,
int32_t > 
reduceResults (const SQLAgg agg, const SQLTypeInfo &ti, const int64_t agg_init_val, const int8_t out_byte_width, const int64_t *out_vec, const size_t out_vec_sz, const bool is_group_by, const bool float_argument_input)
 
static void update_after_registration (bool update_runtime_modules_only=false)
 
static size_t getBaselineThreshold (bool for_count_distinct, ExecutorDeviceType device_type)
 

Public Attributes

std::mutex compilation_mutex_
 

Static Public Attributes

static const ExecutorId UNITARY_EXECUTOR_ID = 0
 
static const ExecutorId INVALID_EXECUTOR_ID = SIZE_MAX
 
static std::map
< ExtModuleKinds, std::string > 
extension_module_sources
 
static const size_t high_scan_limit
 
static const int32_t ERR_DIV_BY_ZERO {1}
 
static const int32_t ERR_OUT_OF_GPU_MEM {2}
 
static const int32_t ERR_OUT_OF_SLOTS {3}
 
static const int32_t ERR_UNSUPPORTED_SELF_JOIN {4}
 
static const int32_t ERR_OUT_OF_RENDER_MEM {5}
 
static const int32_t ERR_OUT_OF_CPU_MEM {6}
 
static const int32_t ERR_OVERFLOW_OR_UNDERFLOW {7}
 
static const int32_t ERR_OUT_OF_TIME {9}
 
static const int32_t ERR_INTERRUPTED {10}
 
static const int32_t ERR_COLUMNAR_CONVERSION_NOT_SUPPORTED {11}
 
static const int32_t ERR_TOO_MANY_LITERALS {12}
 
static const int32_t ERR_STRING_CONST_IN_RESULTSET {13}
 
static const int32_t ERR_STREAMING_TOP_N_NOT_SUPPORTED_IN_RENDER_QUERY {14}
 
static const int32_t ERR_SINGLE_VALUE_FOUND_MULTIPLE_VALUES {15}
 
static const int32_t ERR_GEOS {16}
 
static const int32_t ERR_WIDTH_BUCKET_INVALID_ARGUMENT {17}
 
static std::mutex register_runtime_extension_functions_mutex_
 
static std::mutex kernel_mutex_
 

Private Types

using PerFragmentCallBack = std::function< void(ResultSetPtr, const Fragmenter_Namespace::FragmentInfo &)>
 

Private Member Functions

void clearMetaInfoCache ()
 
int deviceCountForMemoryLevel (const Data_Namespace::MemoryLevel memory_level) const
 
llvm::Value * codegenWindowFunction (const size_t target_index, const CompilationOptions &co)
 
llvm::Value * codegenConditionalAggregateCondValSelector (llvm::Value *cond_lv, SQLAgg const aggKind, CompilationOptions const &co) const
 
llvm::Value * codegenWindowFunctionAggregate (const CompilationOptions &co)
 
llvm::BasicBlock * codegenWindowResetStateControlFlow ()
 
void codegenWindowFunctionStateInit (llvm::Value *aggregate_state)
 
llvm::Value * codegenWindowFunctionAggregateCalls (llvm::Value *aggregate_state, const CompilationOptions &co)
 
llvm::Value * codegenWindowNavigationFunctionOnFrame (const CompilationOptions &co)
 
llvm::Value * codegenCurrentPartitionIndex (const WindowFunctionContext *window_func_context, llvm::Value *current_row_pos_lv)
 
llvm::Value * codegenFrameBoundExpr (const Analyzer::WindowFunction *window_func, const Analyzer::WindowFrame *frame_bound, CodeGenerator &code_generator, const CompilationOptions &co)
 
llvm::Value * codegenFrameBound (bool for_start_bound, bool for_range_mode, bool for_window_frame_naviation, const Analyzer::WindowFrame *frame_bound, bool is_timestamp_type_frame, llvm::Value *order_key_null_val, const WindowFrameBoundFuncArgs &args)
 
std::pair< std::string,
llvm::Value * > 
codegenLoadOrderKeyBufPtr (WindowFunctionContext *window_func_context) const
 
std::pair< llvm::Value
*, llvm::Value * > 
codegenFrameNullRange (WindowFunctionContext *window_func_context, llvm::Value *partition_index_lv) const
 
WindowPartitionBufferPtrs codegenLoadPartitionBuffers (WindowFunctionContext *window_func_context, llvm::Value *partition_index_lv) const
 
std::pair< llvm::Value
*, llvm::Value * > 
codegenWindowFrameBounds (WindowFunctionContext *window_func_context, const Analyzer::WindowFrame *frame_start_bound, const Analyzer::WindowFrame *frame_end_bound, llvm::Value *order_key_col_null_val_lv, WindowFrameBoundFuncArgs &args, CodeGenerator &code_generator)
 
std::pair< llvm::Value
*, llvm::Value * > 
codegenFrameBoundRange (const Analyzer::WindowFunction *window_func, CodeGenerator &code_generator, const CompilationOptions &co)
 
std::vector< llvm::Value * > prepareRowModeFuncArgs (bool for_start_bound, SqlWindowFrameBoundType bound_type, const WindowFrameBoundFuncArgs &args) const
 
std::vector< llvm::Value * > prepareRangeModeFuncArgs (bool for_start_bound, const Analyzer::WindowFrame *frame_bound, bool is_timestamp_type_frame, llvm::Value *order_key_null_val, const WindowFrameBoundFuncArgs &frame_args) const
 
const std::string getOrderKeyTypeName (WindowFunctionContext *window_func_context) const
 
llvm::Value * codegenLoadCurrentValueFromColBuf (WindowFunctionContext *window_func_context, CodeGenerator &code_generator, WindowFrameBoundFuncArgs &args) const
 
size_t getOrderKeySize (WindowFunctionContext *window_func_context) const
 
const SQLTypeInfo getFirstOrderColTypeInfo (WindowFunctionContext *window_func_context) const
 
std::string getFramingFuncName (const std::string &bound_type, const std::string &order_col_type, const std::string &op_type, bool for_timestamp_type) const
 
void codegenWindowAvgEpilogue (llvm::Value *crt_val, llvm::Value *window_func_null_val)
 
llvm::Value * codegenAggregateWindowState ()
 
llvm::Value * aggregateWindowStatePtr ()
 
CudaMgr_Namespace::CudaMgrcudaMgr () const
 
bool isArchPascalOrLater (const ExecutorDeviceType dt) const
 
bool needFetchAllFragments (const InputColDescriptor &col_desc, const RelAlgExecutionUnit &ra_exe_unit, const FragmentsList &selected_fragments) const
 
bool needLinearizeAllFragments (const ColumnDescriptor *cd, const InputColDescriptor &inner_col_desc, const RelAlgExecutionUnit &ra_exe_unit, const FragmentsList &selected_fragments, const Data_Namespace::MemoryLevel memory_level) const
 
void executeWorkUnitPerFragment (const RelAlgExecutionUnit &ra_exe_unit, const InputTableInfo &table_info, const CompilationOptions &co, const ExecutionOptions &eo, const Catalog_Namespace::Catalog &cat, PerFragmentCallBack &cb, const std::set< size_t > &fragment_indexes_param)
 Compiles and dispatches a work unit per fragment processing results with the per fragment callback. Currently used for computing metrics over fragments (metadata). More...
 
ResultSetPtr executeExplain (const QueryCompilationDescriptor &)
 
ResultSetPtr executeTableFunction (const TableFunctionExecutionUnit exe_unit, const std::vector< InputTableInfo > &table_infos, const CompilationOptions &co, const ExecutionOptions &eo)
 Compiles and dispatches a table function; that is, a function that takes as input one or more columns and returns a ResultSet, which can be parsed by subsequent execution steps. More...
 
ExecutorDeviceType getDeviceTypeForTargets (const RelAlgExecutionUnit &ra_exe_unit, const ExecutorDeviceType requested_device_type)
 
ResultSetPtr collectAllDeviceResults (SharedKernelContext &shared_context, const RelAlgExecutionUnit &ra_exe_unit, const QueryMemoryDescriptor &query_mem_desc, const ExecutorDeviceType device_type, std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner)
 
ResultSetPtr collectAllDeviceShardedTopResults (SharedKernelContext &shared_context, const RelAlgExecutionUnit &ra_exe_unit) const
 
std::unordered_map
< shared::TableKey, const
Analyzer::BinOper * > 
getInnerTabIdToJoinCond () const
 
std::vector< std::unique_ptr
< ExecutionKernel > > 
createKernels (SharedKernelContext &shared_context, const RelAlgExecutionUnit &ra_exe_unit, ColumnFetcher &column_fetcher, const std::vector< InputTableInfo > &table_infos, const ExecutionOptions &eo, const bool is_agg, const bool allow_single_frag_table_opt, const size_t context_count, const QueryCompilationDescriptor &query_comp_desc, const QueryMemoryDescriptor &query_mem_desc, RenderInfo *render_info, std::unordered_set< int > &available_gpus, int &available_cpus)
 
void launchKernels (SharedKernelContext &shared_context, std::vector< std::unique_ptr< ExecutionKernel >> &&kernels, const ExecutorDeviceType device_type)
 
std::vector< size_t > getTableFragmentIndices (const RelAlgExecutionUnit &ra_exe_unit, const ExecutorDeviceType device_type, const size_t table_idx, const size_t outer_frag_idx, std::map< shared::TableKey, const TableFragments * > &selected_tables_fragments, const std::unordered_map< shared::TableKey, const Analyzer::BinOper * > &inner_table_id_to_join_condition)
 
bool skipFragmentPair (const Fragmenter_Namespace::FragmentInfo &outer_fragment_info, const Fragmenter_Namespace::FragmentInfo &inner_fragment_info, const int inner_table_id, const std::unordered_map< shared::TableKey, const Analyzer::BinOper * > &inner_table_id_to_join_condition, const RelAlgExecutionUnit &ra_exe_unit, const ExecutorDeviceType device_type)
 
FetchResult fetchChunks (const ColumnFetcher &, const RelAlgExecutionUnit &ra_exe_unit, const int device_id, const Data_Namespace::MemoryLevel, const std::map< shared::TableKey, const TableFragments * > &, const FragmentsList &selected_fragments, std::list< ChunkIter > &, std::list< std::shared_ptr< Chunk_NS::Chunk >> &, DeviceAllocator *device_allocator, const size_t thread_idx, const bool allow_runtime_interrupt)
 
FetchResult fetchUnionChunks (const ColumnFetcher &, const RelAlgExecutionUnit &ra_exe_unit, const int device_id, const Data_Namespace::MemoryLevel, const std::map< shared::TableKey, const TableFragments * > &, const FragmentsList &selected_fragments, std::list< ChunkIter > &, std::list< std::shared_ptr< Chunk_NS::Chunk >> &, DeviceAllocator *device_allocator, const size_t thread_idx, const bool allow_runtime_interrupt)
 
std::pair< std::vector
< std::vector< int64_t >
>, std::vector< std::vector
< uint64_t > > > 
getRowCountAndOffsetForAllFrags (const RelAlgExecutionUnit &ra_exe_unit, const CartesianProduct< std::vector< std::vector< size_t >>> &frag_ids_crossjoin, const std::vector< InputDescriptor > &input_descs, const std::map< shared::TableKey, const TableFragments * > &all_tables_fragments)
 
void buildSelectedFragsMapping (std::vector< std::vector< size_t >> &selected_fragments_crossjoin, std::vector< size_t > &local_col_to_frag_pos, const std::list< std::shared_ptr< const InputColDescriptor >> &col_global_ids, const FragmentsList &selected_fragments, const RelAlgExecutionUnit &ra_exe_unit)
 
void buildSelectedFragsMappingForUnion (std::vector< std::vector< size_t >> &selected_fragments_crossjoin, const FragmentsList &selected_fragments, const RelAlgExecutionUnit &ra_exe_unit)
 
std::vector< size_t > getFragmentCount (const FragmentsList &selected_fragments, const size_t scan_idx, const RelAlgExecutionUnit &ra_exe_unit)
 
int32_t executePlanWithGroupBy (const RelAlgExecutionUnit &ra_exe_unit, const CompilationResult &, const bool hoist_literals, ResultSetPtr *results, const ExecutorDeviceType device_type, std::vector< std::vector< const int8_t * >> &col_buffers, const std::vector< size_t > outer_tab_frag_ids, QueryExecutionContext *, const std::vector< std::vector< int64_t >> &num_rows, const std::vector< std::vector< uint64_t >> &frag_offsets, Data_Namespace::DataMgr *, const int device_id, const shared::TableKey &outer_table_key, const int64_t limit, const uint32_t start_rowid, const uint32_t num_tables, const bool allow_runtime_interrupt, RenderInfo *render_info, const bool optimize_cuda_block_and_grid_sizes, const int64_t rows_to_process=-1)
 
int32_t executePlanWithoutGroupBy (const RelAlgExecutionUnit &ra_exe_unit, const CompilationResult &, const bool hoist_literals, ResultSetPtr *results, const std::vector< Analyzer::Expr * > &target_exprs, const ExecutorDeviceType device_type, std::vector< std::vector< const int8_t * >> &col_buffers, QueryExecutionContext *query_exe_context, const std::vector< std::vector< int64_t >> &num_rows, const std::vector< std::vector< uint64_t >> &frag_offsets, Data_Namespace::DataMgr *data_mgr, const int device_id, const uint32_t start_rowid, const uint32_t num_tables, const bool allow_runtime_interrupt, RenderInfo *render_info, const bool optimize_cuda_block_and_grid_sizes, const int64_t rows_to_process=-1)
 
ResultSetPtr resultsUnion (SharedKernelContext &shared_context, const RelAlgExecutionUnit &ra_exe_unit)
 
std::vector< int8_t * > getJoinHashTablePtrs (const ExecutorDeviceType device_type, const int device_id)
 
ResultSetPtr reduceMultiDeviceResults (const RelAlgExecutionUnit &, std::vector< std::pair< ResultSetPtr, std::vector< size_t >>> &all_fragment_results, std::shared_ptr< RowSetMemoryOwner >, const QueryMemoryDescriptor &) const
 
ResultSetPtr reduceMultiDeviceResultSets (std::vector< std::pair< ResultSetPtr, std::vector< size_t >>> &all_fragment_results, std::shared_ptr< RowSetMemoryOwner >, const QueryMemoryDescriptor &) const
 
ResultSetPtr reduceSpeculativeTopN (const RelAlgExecutionUnit &, std::vector< std::pair< ResultSetPtr, std::vector< size_t >>> &all_fragment_results, std::shared_ptr< RowSetMemoryOwner >, const QueryMemoryDescriptor &) const
 
ResultSetPtr executeWorkUnitImpl (size_t &max_groups_buffer_entry_guess, const bool is_agg, const bool allow_single_frag_table_opt, const std::vector< InputTableInfo > &, const RelAlgExecutionUnit &, const CompilationOptions &, const ExecutionOptions &options, std::shared_ptr< RowSetMemoryOwner >, RenderInfo *render_info, const bool has_cardinality_estimation, ColumnCacheMap &column_cache)
 
std::vector< llvm::Value * > inlineHoistedLiterals ()
 
void AutoTrackBuffersInRuntimeIR ()
 
std::tuple< CompilationResult,
std::unique_ptr
< QueryMemoryDescriptor > > 
compileWorkUnit (const std::vector< InputTableInfo > &query_infos, const PlanState::DeletedColumnsMap &deleted_cols_map, const RelAlgExecutionUnit &ra_exe_unit, const CompilationOptions &co, const ExecutionOptions &eo, const CudaMgr_Namespace::CudaMgr *cuda_mgr, const bool allow_lazy_fetch, std::shared_ptr< RowSetMemoryOwner >, const size_t max_groups_buffer_entry_count, const int8_t crt_min_byte_width, const bool has_cardinality_estimation, ColumnCacheMap &column_cache, RenderInfo *render_info=nullptr)
 
llvm::BasicBlock * codegenSkipDeletedOuterTableRow (const RelAlgExecutionUnit &ra_exe_unit, const CompilationOptions &co)
 
std::vector< JoinLoopbuildJoinLoops (RelAlgExecutionUnit &ra_exe_unit, const CompilationOptions &co, const ExecutionOptions &eo, const std::vector< InputTableInfo > &query_infos, ColumnCacheMap &column_cache)
 
JoinLoop::HoistedFiltersCallback buildHoistLeftHandSideFiltersCb (const RelAlgExecutionUnit &ra_exe_unit, const size_t level_idx, const shared::TableKey &inner_table_key, const CompilationOptions &co)
 
std::function< llvm::Value
*(const std::vector
< llvm::Value * >
&, llvm::Value *)> 
buildIsDeletedCb (const RelAlgExecutionUnit &ra_exe_unit, const size_t level_idx, const CompilationOptions &co)
 
std::shared_ptr< HashJoinbuildCurrentLevelHashTable (const JoinCondition &current_level_join_conditions, size_t level_idx, RelAlgExecutionUnit &ra_exe_unit, const CompilationOptions &co, const std::vector< InputTableInfo > &query_infos, ColumnCacheMap &column_cache, std::vector< std::string > &fail_reasons)
 
void redeclareFilterFunction ()
 
llvm::Value * addJoinLoopIterator (const std::vector< llvm::Value * > &prev_iters, const size_t level_idx)
 
void codegenJoinLoops (const std::vector< JoinLoop > &join_loops, const RelAlgExecutionUnit &ra_exe_unit, GroupByAndAggregate &group_by_and_aggregate, llvm::Function *query_func, llvm::BasicBlock *entry_bb, QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const ExecutionOptions &eo)
 
bool compileBody (const RelAlgExecutionUnit &ra_exe_unit, GroupByAndAggregate &group_by_and_aggregate, QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const GpuSharedMemoryContext &gpu_smem_context={})
 
void createErrorCheckControlFlow (llvm::Function *query_func, bool run_with_dynamic_watchdog, bool run_with_allowing_runtime_interrupt, const std::vector< JoinLoop > &join_loops, ExecutorDeviceType device_type, const std::vector< InputTableInfo > &input_table_infos)
 
void insertErrorCodeChecker (llvm::Function *query_func, bool hoist_literals, bool allow_runtime_query_interrupt)
 
void preloadFragOffsets (const std::vector< InputDescriptor > &input_descs, const std::vector< InputTableInfo > &query_infos)
 
JoinHashTableOrError buildHashTableForQualifier (const std::shared_ptr< Analyzer::BinOper > &qual_bin_oper, const std::vector< InputTableInfo > &query_infos, const MemoryLevel memory_level, const JoinType join_type, const HashType preferred_hash_type, ColumnCacheMap &column_cache, const HashTableBuildDagMap &hashtable_build_dag_map, const RegisteredQueryHint &query_hint, const TableIdToNodeMap &table_id_to_node_map)
 
void nukeOldState (const bool allow_lazy_fetch, const std::vector< InputTableInfo > &query_infos, const PlanState::DeletedColumnsMap &deleted_cols_map, const RelAlgExecutionUnit *ra_exe_unit)
 
std::shared_ptr
< CompilationContext
optimizeAndCodegenCPU (llvm::Function *, llvm::Function *, const std::unordered_set< llvm::Function * > &, const CompilationOptions &)
 
std::shared_ptr
< CompilationContext
optimizeAndCodegenGPU (llvm::Function *, llvm::Function *, std::unordered_set< llvm::Function * > &, const bool no_inline, const CudaMgr_Namespace::CudaMgr *cuda_mgr, const bool is_gpu_smem_used, const CompilationOptions &)
 
std::string generatePTX (const std::string &) const
 
void initializeNVPTXBackend () const
 
int64_t deviceCycles (int milliseconds) const
 
GroupColLLVMValue groupByColumnCodegen (Analyzer::Expr *group_by_col, const size_t col_width, const CompilationOptions &, const bool translate_null_val, const int64_t translated_null_val, DiamondCodegen &, std::stack< llvm::BasicBlock * > &, const bool thread_mem_shared)
 
llvm::Value * castToFP (llvm::Value *, SQLTypeInfo const &from_ti, SQLTypeInfo const &to_ti)
 
llvm::Value * castToIntPtrTyIn (llvm::Value *val, const size_t bit_width)
 
std::tuple
< RelAlgExecutionUnit,
PlanState::DeletedColumnsMap
addDeletedColumn (const RelAlgExecutionUnit &ra_exe_unit, const CompilationOptions &co)
 
bool isFragmentFullyDeleted (const InputDescriptor &table_desc, const Fragmenter_Namespace::FragmentInfo &fragment)
 
FragmentSkipStatus canSkipFragmentForFpQual (const Analyzer::BinOper *comp_expr, const Analyzer::ColumnVar *lhs_col, const Fragmenter_Namespace::FragmentInfo &fragment, const Analyzer::Constant *rhs_const) const
 
std::pair< bool, int64_t > skipFragment (const InputDescriptor &table_desc, const Fragmenter_Namespace::FragmentInfo &frag_info, const std::list< std::shared_ptr< Analyzer::Expr >> &simple_quals, const std::vector< uint64_t > &frag_offsets, const size_t frag_idx)
 
std::pair< bool, int64_t > skipFragmentInnerJoins (const InputDescriptor &table_desc, const RelAlgExecutionUnit &ra_exe_unit, const Fragmenter_Namespace::FragmentInfo &fragment, const std::vector< uint64_t > &frag_offsets, const size_t frag_idx)
 
AggregatedColRange computeColRangesCache (const std::unordered_set< PhysicalInput > &phys_inputs)
 
StringDictionaryGenerations computeStringDictionaryGenerations (const std::unordered_set< PhysicalInput > &phys_inputs)
 
TableGenerations computeTableGenerations (const std::unordered_set< shared::TableKey > &phys_table_keys)
 
std::vector< int8_t > serializeLiterals (const std::unordered_map< int, CgenState::LiteralValues > &literals, const int device_id)
 
const std::unique_ptr
< llvm::Module > & 
get_extension_module (ExtModuleKinds kind) const
 
bool has_extension_module (ExtModuleKinds kind) const
 
llvm::Value * spillDoubleElement (llvm::Value *elem_val, llvm::Type *elem_ty)
 
ExecutorMutexHolder acquireExecuteMutex ()
 

Static Private Member Functions

static size_t align (const size_t off_in, const size_t alignment)
 

Private Attributes

const ExecutorId executor_id_
 
std::unique_ptr
< llvm::LLVMContext > 
context_
 
std::unique_ptr< CgenStatecgen_state_
 
std::map< ExtModuleKinds,
std::unique_ptr< llvm::Module > > 
extension_modules_
 
std::unique_ptr< PlanStateplan_state_
 
std::shared_ptr
< RowSetMemoryOwner
row_set_mem_owner_
 
std::mutex gpu_exec_mutex_ [max_gpu_count]
 
std::atomic< bool > interrupted_
 
std::mutex str_dict_mutex_
 
std::unique_ptr
< llvm::TargetMachine > 
nvptx_target_machine_
 
unsigned block_size_x_
 
unsigned grid_size_x_
 
const size_t max_gpu_slab_size_
 
const std::string debug_dir_
 
const std::string debug_file_
 
Data_Namespace::DataMgrdata_mgr_
 
const TemporaryTablestemporary_tables_
 
TableIdToNodeMap table_id_to_node_map_
 
int64_t kernel_queue_time_ms_ = 0
 
int64_t compilation_queue_time_ms_ = 0
 
std::unique_ptr
< WindowProjectNodeContext
window_project_node_context_owned_
 
WindowFunctionContextactive_window_function_ {nullptr}
 
InputTableInfoCache input_table_info_cache_
 
AggregatedColRange agg_col_range_cache_
 
TableGenerations table_generations_
 
QuerySessionId current_query_session_
 

Static Private Attributes

static const int max_gpu_count {16}
 
static std::mutex gpu_active_modules_mutex_
 
static uint32_t gpu_active_modules_device_mask_ {0x0}
 
static void * gpu_active_modules_ [max_gpu_count]
 
static const size_t baseline_threshold
 
static heavyai::shared_mutex executor_session_mutex_
 
static InterruptFlagMap queries_interrupt_flag_
 
static QuerySessionMap queries_session_map_
 
static std::map< int,
std::shared_ptr< Executor > > 
executors_
 
static heavyai::shared_mutex execute_mutex_
 
static heavyai::shared_mutex executors_cache_mutex_
 
static QueryPlanDagCache query_plan_dag_cache_
 
static heavyai::shared_mutex recycler_mutex_
 
static std::unordered_map
< std::string, size_t > 
cardinality_cache_
 
static ResultSetRecyclerHolder resultset_recycler_holder_
 
static QueryPlanDAG latest_query_plan_extracted_ {EMPTY_QUERY_PLAN}
 

Friends

class BaselineJoinHashTable
 
class CodeGenerator
 
class ColumnFetcher
 
struct DiamondCodegen
 
class ExecutionKernel
 
class KernelSubtask
 
class HashJoin
 
class OverlapsJoinHashTable
 
class RangeJoinHashTable
 
class GroupByAndAggregate
 
class QueryCompilationDescriptor
 
class QueryMemoryDescriptor
 
class QueryMemoryInitializer
 
class QueryFragmentDescriptor
 
class QueryExecutionContext
 
class ResultSet
 
class InValuesBitmap
 
class StringDictionaryTranslationMgr
 
class LeafAggregator
 
class PerfectJoinHashTable
 
class QueryRewriter
 
class PendingExecutionClosure
 
class RelAlgExecutor
 
class TableOptimizer
 
class TableFunctionCompilationContext
 
class TableFunctionExecutionContext
 
struct TargetExprCodegenBuilder
 
struct TargetExprCodegen
 
class WindowProjectNodeContext
 

Detailed Description

Definition at line 365 of file Execute.h.

Member Typedef Documentation

using Executor::CachedCardinality = std::pair<bool, size_t>

Definition at line 1250 of file Execute.h.

using Executor::ExecutorId = size_t

Definition at line 372 of file Execute.h.

Definition at line 779 of file Execute.h.

Member Enumeration Documentation

Enumerator
template_module 
udf_cpu_module 
udf_gpu_module 
rt_udf_cpu_module 
rt_udf_gpu_module 
rt_geos_module 
rt_libdevice_module 

Definition at line 466 of file Execute.h.

466  {
467  template_module, // RuntimeFunctions.bc
468  udf_cpu_module, // Load-time UDFs for CPU execution
469  udf_gpu_module, // Load-time UDFs for GPU execution
470  rt_udf_cpu_module, // Run-time UDF/UDTFs for CPU execution
471  rt_udf_gpu_module, // Run-time UDF/UDTFs for GPU execution
472  rt_geos_module, // geos functions
473  rt_libdevice_module // math library functions for GPU execution
474  };
std::unique_ptr< llvm::Module > udf_gpu_module
std::unique_ptr< llvm::Module > udf_cpu_module

Constructor & Destructor Documentation

Executor::Executor ( const ExecutorId  id,
Data_Namespace::DataMgr data_mgr,
const size_t  block_size_x,
const size_t  grid_size_x,
const size_t  max_gpu_slab_size,
const std::string &  debug_dir,
const std::string &  debug_file 
)

Definition at line 244 of file Execute.cpp.

251  : executor_id_(executor_id)
252  , context_(new llvm::LLVMContext())
253  , cgen_state_(new CgenState({}, false, this))
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1317
const ExecutorId executor_id_
Definition: Execute.h:1291
std::unique_ptr< llvm::LLVMContext > context_
Definition: Execute.h:1292

Member Function Documentation

ExecutorMutexHolder Executor::acquireExecuteMutex ( )
inlineprivate

Definition at line 1405 of file Execute.h.

References execute_mutex_, executor_id_, Executor::ExecutorMutexHolder::shared_lock, Executor::ExecutorMutexHolder::unique_lock, and UNITARY_EXECUTOR_ID.

1405  {
1406  ExecutorMutexHolder ret;
1408  // Only one unitary executor can run at a time
1410  } else {
1412  }
1413  return ret;
1414  }
static heavyai::shared_mutex execute_mutex_
Definition: Execute.h:1399
std::shared_lock< T > shared_lock
const ExecutorId executor_id_
Definition: Execute.h:1291
std::unique_lock< T > unique_lock
static const ExecutorId UNITARY_EXECUTOR_ID
Definition: Execute.h:373
std::tuple< RelAlgExecutionUnit, PlanState::DeletedColumnsMap > Executor::addDeletedColumn ( const RelAlgExecutionUnit ra_exe_unit,
const CompilationOptions co 
)
private

Definition at line 3930 of file Execute.cpp.

References anonymous_namespace{Execute.cpp}::add_deleted_col_to_map(), CHECK, CompilationOptions::filter_on_deleted_column, Catalog_Namespace::SysCatalog::getCatalog(), Catalog_Namespace::SysCatalog::instance(), and TABLE.

Referenced by executeWorkUnitImpl(), and executeWorkUnitPerFragment().

3932  {
3933  if (!co.filter_on_deleted_column) {
3934  return std::make_tuple(ra_exe_unit, PlanState::DeletedColumnsMap{});
3935  }
3936  auto ra_exe_unit_with_deleted = ra_exe_unit;
3937  PlanState::DeletedColumnsMap deleted_cols_map;
3938  for (const auto& input_table : ra_exe_unit_with_deleted.input_descs) {
3939  if (input_table.getSourceType() != InputSourceType::TABLE) {
3940  continue;
3941  }
3942  const auto& table_key = input_table.getTableKey();
3943  const auto catalog =
3945  CHECK(catalog);
3946  const auto td = catalog->getMetadataForTable(table_key.table_id);
3947  CHECK(td);
3948  const auto deleted_cd = catalog->getDeletedColumnIfRowsDeleted(td);
3949  if (!deleted_cd) {
3950  continue;
3951  }
3952  CHECK(deleted_cd->columnType.is_boolean());
3953  // check deleted column is not already present
3954  bool found = false;
3955  for (const auto& input_col : ra_exe_unit_with_deleted.input_col_descs) {
3956  if (input_col.get()->getColId() == deleted_cd->columnId &&
3957  input_col.get()->getScanDesc().getTableKey() == table_key &&
3958  input_col.get()->getScanDesc().getNestLevel() == input_table.getNestLevel()) {
3959  found = true;
3960  add_deleted_col_to_map(deleted_cols_map, deleted_cd, table_key);
3961  break;
3962  }
3963  }
3964  if (!found) {
3965  // add deleted column
3966  ra_exe_unit_with_deleted.input_col_descs.emplace_back(
3967  new InputColDescriptor(deleted_cd->columnId,
3968  deleted_cd->tableId,
3969  table_key.db_id,
3970  input_table.getNestLevel()));
3971  add_deleted_col_to_map(deleted_cols_map, deleted_cd, table_key);
3972  }
3973  }
3974  return std::make_tuple(ra_exe_unit_with_deleted, deleted_cols_map);
3975 }
std::unordered_map< shared::TableKey, const ColumnDescriptor * > DeletedColumnsMap
Definition: PlanState.h:43
static SysCatalog & instance()
Definition: SysCatalog.h:343
void add_deleted_col_to_map(PlanState::DeletedColumnsMap &deleted_cols_map, const ColumnDescriptor *deleted_cd, const shared::TableKey &table_key)
Definition: Execute.cpp:3918
std::shared_ptr< Catalog > getCatalog(const std::string &dbName)
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

llvm::Value * Executor::addJoinLoopIterator ( const std::vector< llvm::Value * > &  prev_iters,
const size_t  level_idx 
)
private

Definition at line 1119 of file IRCodegen.cpp.

References AUTOMATIC_IR_METADATA, and CHECK.

1120  {
1122  // Iterators are added for loop-outer joins when the head of the loop is generated,
1123  // then once again when the body if generated. Allow this instead of special handling
1124  // of call sites.
1125  const auto it = cgen_state_->scan_idx_to_hash_pos_.find(level_idx);
1126  if (it != cgen_state_->scan_idx_to_hash_pos_.end()) {
1127  return it->second;
1128  }
1129  CHECK(!prev_iters.empty());
1130  llvm::Value* matching_row_index = prev_iters.back();
1131  const auto it_ok =
1132  cgen_state_->scan_idx_to_hash_pos_.emplace(level_idx, matching_row_index);
1133  CHECK(it_ok.second);
1134  return matching_row_index;
1135 }
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1317
#define AUTOMATIC_IR_METADATA(CGENSTATE)
#define CHECK(condition)
Definition: Logger.h:291
void Executor::addToCardinalityCache ( const std::string &  cache_key,
const size_t  cache_value 
)

Definition at line 4744 of file Execute.cpp.

References cardinality_cache_, g_use_estimator_result_cache, recycler_mutex_, and VLOG.

4745  {
4748  cardinality_cache_[cache_key] = cache_value;
4749  VLOG(1) << "Put estimated cardinality to the cache";
4750  }
4751 }
static std::unordered_map< std::string, size_t > cardinality_cache_
Definition: Execute.h:1420
std::unique_lock< T > unique_lock
static heavyai::shared_mutex recycler_mutex_
Definition: Execute.h:1419
bool g_use_estimator_result_cache
Definition: Execute.cpp:127
#define VLOG(n)
Definition: Logger.h:387
bool Executor::addToQuerySessionList ( const QuerySessionId query_session,
const std::string &  query_str,
const std::string &  submitted,
const size_t  executor_id,
const QuerySessionStatus::QueryStatus  query_status,
heavyai::unique_lock< heavyai::shared_mutex > &  write_lock 
)

Definition at line 4575 of file Execute.cpp.

References queries_interrupt_flag_, and queries_session_map_.

Referenced by enrollQuerySession().

4581  {
4582  // an internal API that enrolls the query session into the Executor's session map
4583  if (queries_session_map_.count(query_session)) {
4584  if (queries_session_map_.at(query_session).count(submitted_time_str)) {
4585  queries_session_map_.at(query_session).erase(submitted_time_str);
4586  queries_session_map_.at(query_session)
4587  .emplace(submitted_time_str,
4588  QuerySessionStatus(query_session,
4589  executor_id,
4590  query_str,
4591  submitted_time_str,
4592  query_status));
4593  } else {
4594  queries_session_map_.at(query_session)
4595  .emplace(submitted_time_str,
4596  QuerySessionStatus(query_session,
4597  executor_id,
4598  query_str,
4599  submitted_time_str,
4600  query_status));
4601  }
4602  } else {
4603  std::map<std::string, QuerySessionStatus> executor_per_query_map;
4604  executor_per_query_map.emplace(
4605  submitted_time_str,
4607  query_session, executor_id, query_str, submitted_time_str, query_status));
4608  queries_session_map_.emplace(query_session, executor_per_query_map);
4609  }
4610  return queries_interrupt_flag_.emplace(query_session, false).second;
4611 }
static QuerySessionMap queries_session_map_
Definition: Execute.h:1394
static InterruptFlagMap queries_interrupt_flag_
Definition: Execute.h:1392

+ Here is the caller graph for this function:

void Executor::addTransientStringLiterals ( const RelAlgExecutionUnit ra_exe_unit,
const std::shared_ptr< RowSetMemoryOwner > &  row_set_mem_owner 
)

Definition at line 2132 of file Execute.cpp.

References CHECK, getStringDictionaryProxy(), RelAlgExecutionUnit::groupby_exprs, kENCODING_DICT, kMODE, kSAMPLE, kSINGLE_VALUE, RelAlgExecutionUnit::quals, RelAlgExecutionUnit::simple_quals, RelAlgExecutionUnit::target_exprs, RelAlgExecutionUnit::target_exprs_union, and ScalarExprVisitor< T >::visit().

2134  {
2135  TransientDictIdVisitor dict_id_visitor;
2136 
2137  auto visit_expr =
2138  [this, &dict_id_visitor, &row_set_mem_owner](const Analyzer::Expr* expr) {
2139  if (!expr) {
2140  return;
2141  }
2142  const auto& dict_key = dict_id_visitor.visit(expr);
2143  if (dict_key.dict_id >= 0) {
2144  auto sdp = getStringDictionaryProxy(dict_key, row_set_mem_owner, true);
2145  CHECK(sdp);
2146  TransientStringLiteralsVisitor visitor(sdp, this);
2147  visitor.visit(expr);
2148  }
2149  };
2150 
2151  for (const auto& group_expr : ra_exe_unit.groupby_exprs) {
2152  visit_expr(group_expr.get());
2153  }
2154 
2155  for (const auto& group_expr : ra_exe_unit.quals) {
2156  visit_expr(group_expr.get());
2157  }
2158 
2159  for (const auto& group_expr : ra_exe_unit.simple_quals) {
2160  visit_expr(group_expr.get());
2161  }
2162 
2163  const auto visit_target_expr = [&](const Analyzer::Expr* target_expr) {
2164  const auto& target_type = target_expr->get_type_info();
2165  if (!target_type.is_string() || target_type.get_compression() == kENCODING_DICT) {
2166  const auto agg_expr = dynamic_cast<const Analyzer::AggExpr*>(target_expr);
2167  if (agg_expr) {
2168  // The following agg types require taking into account transient string values
2169  if (agg_expr->get_is_distinct() || agg_expr->get_aggtype() == kSINGLE_VALUE ||
2170  agg_expr->get_aggtype() == kSAMPLE || agg_expr->get_aggtype() == kMODE) {
2171  visit_expr(agg_expr->get_arg());
2172  }
2173  } else {
2174  visit_expr(target_expr);
2175  }
2176  }
2177  };
2178  const auto& target_exprs = ra_exe_unit.target_exprs;
2179  std::for_each(target_exprs.begin(), target_exprs.end(), visit_target_expr);
2180  const auto& target_exprs_union = ra_exe_unit.target_exprs_union;
2181  std::for_each(target_exprs_union.begin(), target_exprs_union.end(), visit_target_expr);
2182 }
std::vector< Analyzer::Expr * > target_exprs
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
T visit(const Analyzer::Expr *expr) const
StringDictionaryProxy * getStringDictionaryProxy(const shared::StringDictKey &dict_key, const bool with_generation) const
Definition: Execute.h:526
std::vector< Analyzer::Expr * > target_exprs_union
std::list< std::shared_ptr< Analyzer::Expr > > quals
#define CHECK(condition)
Definition: Logger.h:291
Definition: sqldefs.h:83
std::list< std::shared_ptr< Analyzer::Expr > > simple_quals

+ Here is the call graph for this function:

void Executor::addUdfIrToModule ( const std::string &  udf_ir_filename,
const bool  is_cuda_ir 
)
static

Definition at line 1897 of file NativeCodegen.cpp.

Referenced by DBHandler::initialize().

1898  {
1902  udf_ir_filename;
1903 }
static std::map< ExtModuleKinds, std::string > extension_module_sources
Definition: Execute.h:476

+ Here is the caller graph for this function:

llvm::Value * Executor::aggregateWindowStatePtr ( )
private

Definition at line 226 of file WindowFunctionIR.cpp.

References AUTOMATIC_IR_METADATA, anonymous_namespace{WindowFunctionIR.cpp}::get_adjusted_window_type_info(), get_int_type(), WindowProjectNodeContext::getActiveWindowFunctionContext(), and kFLOAT.

226  {
228  const auto window_func_context =
230  const auto window_func = window_func_context->getWindowFunction();
231  const auto arg_ti = get_adjusted_window_type_info(window_func);
232  llvm::Type* aggregate_state_type =
233  arg_ti.get_type() == kFLOAT
234  ? llvm::PointerType::get(get_int_type(32, cgen_state_->context_), 0)
235  : llvm::PointerType::get(get_int_type(64, cgen_state_->context_), 0);
236  const auto aggregate_state_i64 = cgen_state_->llInt(
237  reinterpret_cast<const int64_t>(window_func_context->aggregateState()));
238  return cgen_state_->ir_builder_.CreateIntToPtr(aggregate_state_i64,
239  aggregate_state_type);
240 }
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1317
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
static WindowFunctionContext * getActiveWindowFunctionContext(Executor *executor)
#define AUTOMATIC_IR_METADATA(CGENSTATE)
std::unique_ptr< llvm::LLVMContext > context_
Definition: Execute.h:1292
SQLTypeInfo get_adjusted_window_type_info(const Analyzer::WindowFunction *window_func)

+ Here is the call graph for this function:

static size_t Executor::align ( const size_t  off_in,
const size_t  alignment 
)
inlinestaticprivate

Definition at line 1283 of file Execute.h.

Referenced by serializeLiterals().

1283  {
1284  size_t off = off_in;
1285  if (off % alignment != 0) {
1286  off += (alignment - off % alignment);
1287  }
1288  return off;
1289  }

+ Here is the caller graph for this function:

CurrentQueryStatus Executor::attachExecutorToQuerySession ( const QuerySessionId query_session_id,
const std::string &  query_str,
const std::string &  query_submitted_time 
)

Definition at line 4473 of file Execute.cpp.

References executor_id_, executor_session_mutex_, updateQuerySessionExecutorAssignment(), and updateQuerySessionStatusWithLock().

4476  {
4477  if (!query_session_id.empty()) {
4478  // if session is valid, do update 1) the exact executor id and 2) query status
4481  query_session_id, query_submitted_time, executor_id_, write_lock);
4482  updateQuerySessionStatusWithLock(query_session_id,
4483  query_submitted_time,
4484  QuerySessionStatus::QueryStatus::PENDING_EXECUTOR,
4485  write_lock);
4486  }
4487  return {query_session_id, query_str};
4488 }
heavyai::unique_lock< heavyai::shared_mutex > write_lock
bool updateQuerySessionStatusWithLock(const QuerySessionId &query_session, const std::string &submitted_time_str, const QuerySessionStatus::QueryStatus updated_query_status, heavyai::unique_lock< heavyai::shared_mutex > &write_lock)
Definition: Execute.cpp:4613
const ExecutorId executor_id_
Definition: Execute.h:1291
bool updateQuerySessionExecutorAssignment(const QuerySessionId &query_session, const std::string &submitted_time_str, const size_t executor_id, heavyai::unique_lock< heavyai::shared_mutex > &write_lock)
Definition: Execute.cpp:4639
std::unique_lock< T > unique_lock
static heavyai::shared_mutex executor_session_mutex_
Definition: Execute.h:1388

+ Here is the call graph for this function:

void Executor::AutoTrackBuffersInRuntimeIR ( )
private

Definition at line 2240 of file NativeCodegen.cpp.

2240  {
2241  llvm::Module* M = cgen_state_->module_;
2242  if (M->getFunction("allocate_varlen_buffer") == nullptr)
2243  return;
2244 
2245  // read metadata
2246  bool should_track = false;
2247  auto* flag = M->getModuleFlag("manage_memory_buffer");
2248  if (auto* cnt = llvm::mdconst::extract_or_null<llvm::ConstantInt>(flag)) {
2249  if (cnt->getZExtValue() == 1) {
2250  should_track = true;
2251  }
2252  }
2253 
2254  if (!should_track) {
2255  // metadata is not present
2256  return;
2257  }
2258 
2259  LOG(INFO) << "Found 'manage_memory_buffer' metadata.";
2260  llvm::SmallVector<llvm::CallInst*, 4> calls_to_analyze;
2261 
2262  for (llvm::Function& F : *M) {
2263  for (llvm::BasicBlock& BB : F) {
2264  for (llvm::Instruction& I : BB) {
2265  if (llvm::CallInst* CI = llvm::dyn_cast<llvm::CallInst>(&I)) {
2266  // Keep track of calls to "allocate_varlen_buffer" for later processing
2267  llvm::Function* called = CI->getCalledFunction();
2268  if (called) {
2269  if (called->getName() == "allocate_varlen_buffer") {
2270  calls_to_analyze.push_back(CI);
2271  }
2272  }
2273  }
2274  }
2275  }
2276  }
2277 
2278  // for each call to "allocate_varlen_buffer", check if there's a corresponding
2279  // call to "register_buffer_with_executor_rsm". If not, add a call to it
2280  llvm::IRBuilder<> Builder(cgen_state_->context_);
2281  auto i64 = get_int_type(64, cgen_state_->context_);
2282  auto i8p = get_int_ptr_type(8, cgen_state_->context_);
2283  auto void_ = llvm::Type::getVoidTy(cgen_state_->context_);
2284  llvm::FunctionType* fnty = llvm::FunctionType::get(void_, {i64, i8p}, false);
2285  llvm::FunctionCallee register_buffer_fn =
2286  M->getOrInsertFunction("register_buffer_with_executor_rsm", fnty, {});
2287 
2288  int64_t executor_addr = reinterpret_cast<int64_t>(this);
2289  for (llvm::CallInst* CI : calls_to_analyze) {
2290  bool found = false;
2291  // for each user of the function, check if its a callinst
2292  // and if the callinst is calling "register_buffer_with_executor_rsm"
2293  // if no such instruction exist, add one registering the buffer
2294  for (llvm::User* U : CI->users()) {
2295  if (llvm::CallInst* call = llvm::dyn_cast<llvm::CallInst>(U)) {
2296  if (call->getCalledFunction() and
2297  call->getCalledFunction()->getName() == "register_buffer_with_executor_rsm") {
2298  found = true;
2299  break;
2300  }
2301  }
2302  }
2303  if (!found) {
2304  Builder.SetInsertPoint(CI->getNextNode());
2305  Builder.CreateCall(register_buffer_fn,
2306  {ll_int(executor_addr, cgen_state_->context_), CI});
2307  }
2308  }
2309 }
#define LOG(tag)
Definition: Logger.h:285
llvm::ConstantInt * ll_int(const T v, llvm::LLVMContext &context)
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1317
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
llvm::Type * get_int_ptr_type(const int width, llvm::LLVMContext &context)
unsigned Executor::blockSize ( ) const

Definition at line 3821 of file Execute.cpp.

References block_size_x_, CHECK, data_mgr_, CudaMgr_Namespace::CudaMgr::getAllDeviceProperties(), and Data_Namespace::DataMgr::getCudaMgr().

Referenced by collectAllDeviceShardedTopResults(), executePlanWithGroupBy(), executePlanWithoutGroupBy(), executeTableFunction(), executeWorkUnitImpl(), reduceMultiDeviceResults(), reduceMultiDeviceResultSets(), and resultsUnion().

3821  {
3822  CHECK(data_mgr_);
3823  const auto cuda_mgr = data_mgr_->getCudaMgr();
3824  if (!cuda_mgr) {
3825  return 0;
3826  }
3827  const auto& dev_props = cuda_mgr->getAllDeviceProperties();
3828  return block_size_x_ ? block_size_x_ : dev_props.front().maxThreadsPerBlock;
3829 }
CudaMgr_Namespace::CudaMgr * getCudaMgr() const
Definition: DataMgr.h:224
Data_Namespace::DataMgr * data_mgr_
Definition: Execute.h:1372
unsigned block_size_x_
Definition: Execute.h:1366
#define CHECK(condition)
Definition: Logger.h:291
const std::vector< DeviceProperties > & getAllDeviceProperties() const
Definition: CudaMgr.h:128

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::shared_ptr< HashJoin > Executor::buildCurrentLevelHashTable ( const JoinCondition current_level_join_conditions,
size_t  level_idx,
RelAlgExecutionUnit ra_exe_unit,
const CompilationOptions co,
const std::vector< InputTableInfo > &  query_infos,
ColumnCacheMap column_cache,
std::vector< std::string > &  fail_reasons 
)
private

Definition at line 960 of file IRCodegen.cpp.

References anonymous_namespace{IRCodegen.cpp}::add_qualifier_to_execution_unit(), AUTOMATIC_IR_METADATA, anonymous_namespace{IRCodegen.cpp}::check_valid_join_qual(), Data_Namespace::CPU_LEVEL, CompilationOptions::device_type, Executor::JoinHashTableOrError::fail_reason, GPU, Data_Namespace::GPU_LEVEL, Executor::JoinHashTableOrError::hash_table, RelAlgExecutionUnit::hash_table_build_plan_dag, IS_EQUIVALENCE, LEFT, OneToOne, JoinCondition::quals, RelAlgExecutionUnit::query_hint, RelAlgExecutionUnit::table_id_to_node_map, JoinCondition::type, and VLOG.

967  {
969  std::shared_ptr<HashJoin> current_level_hash_table;
970  auto handleNonHashtableQual = [&ra_exe_unit, &level_idx, this](
971  JoinType join_type,
972  std::shared_ptr<Analyzer::Expr> qual) {
973  if (join_type == JoinType::LEFT) {
974  plan_state_->addNonHashtableQualForLeftJoin(level_idx, qual);
975  } else {
976  add_qualifier_to_execution_unit(ra_exe_unit, qual);
977  }
978  };
979  for (const auto& join_qual : current_level_join_conditions.quals) {
980  auto qual_bin_oper = std::dynamic_pointer_cast<Analyzer::BinOper>(join_qual);
981  if (current_level_hash_table || !qual_bin_oper ||
982  !IS_EQUIVALENCE(qual_bin_oper->get_optype())) {
983  handleNonHashtableQual(current_level_join_conditions.type, join_qual);
984  if (!current_level_hash_table) {
985  fail_reasons.emplace_back("No equijoin expression found");
986  }
987  continue;
988  }
989  check_valid_join_qual(qual_bin_oper);
990  JoinHashTableOrError hash_table_or_error;
991  if (!current_level_hash_table) {
992  hash_table_or_error = buildHashTableForQualifier(
993  qual_bin_oper,
994  query_infos,
997  current_level_join_conditions.type,
999  column_cache,
1000  ra_exe_unit.hash_table_build_plan_dag,
1001  ra_exe_unit.query_hint,
1002  ra_exe_unit.table_id_to_node_map);
1003  current_level_hash_table = hash_table_or_error.hash_table;
1004  }
1005  if (hash_table_or_error.hash_table) {
1006  plan_state_->join_info_.join_hash_tables_.push_back(hash_table_or_error.hash_table);
1007  plan_state_->join_info_.equi_join_tautologies_.push_back(qual_bin_oper);
1008  } else {
1009  fail_reasons.push_back(hash_table_or_error.fail_reason);
1010  if (!current_level_hash_table) {
1011  VLOG(2) << "Building a hashtable based on a qual " << qual_bin_oper->toString()
1012  << " fails: " << hash_table_or_error.fail_reason;
1013  }
1014  handleNonHashtableQual(current_level_join_conditions.type, qual_bin_oper);
1015  }
1016  }
1017  return current_level_hash_table;
1018 }
JoinType
Definition: sqldefs.h:165
#define IS_EQUIVALENCE(X)
Definition: sqldefs.h:69
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1317
TableIdToNodeMap table_id_to_node_map
std::unique_ptr< PlanState > plan_state_
Definition: Execute.h:1347
void add_qualifier_to_execution_unit(RelAlgExecutionUnit &ra_exe_unit, const std::shared_ptr< Analyzer::Expr > &qual)
Definition: IRCodegen.cpp:474
#define AUTOMATIC_IR_METADATA(CGENSTATE)
ExecutorDeviceType device_type
JoinHashTableOrError buildHashTableForQualifier(const std::shared_ptr< Analyzer::BinOper > &qual_bin_oper, const std::vector< InputTableInfo > &query_infos, const MemoryLevel memory_level, const JoinType join_type, const HashType preferred_hash_type, ColumnCacheMap &column_cache, const HashTableBuildDagMap &hashtable_build_dag_map, const RegisteredQueryHint &query_hint, const TableIdToNodeMap &table_id_to_node_map)
Definition: Execute.cpp:3765
std::list< std::shared_ptr< Analyzer::Expr > > quals
RegisteredQueryHint query_hint
#define VLOG(n)
Definition: Logger.h:387
HashTableBuildDagMap hash_table_build_plan_dag
void check_valid_join_qual(std::shared_ptr< Analyzer::BinOper > &bin_oper)
Definition: IRCodegen.cpp:525

+ Here is the call graph for this function:

Executor::JoinHashTableOrError Executor::buildHashTableForQualifier ( const std::shared_ptr< Analyzer::BinOper > &  qual_bin_oper,
const std::vector< InputTableInfo > &  query_infos,
const MemoryLevel  memory_level,
const JoinType  join_type,
const HashType  preferred_hash_type,
ColumnCacheMap column_cache,
const HashTableBuildDagMap hashtable_build_dag_map,
const RegisteredQueryHint query_hint,
const TableIdToNodeMap table_id_to_node_map 
)
private

Definition at line 3765 of file Execute.cpp.

References deviceCountForMemoryLevel(), ERR_INTERRUPTED, g_enable_dynamic_watchdog, g_enable_overlaps_hashjoin, HashJoin::getInstance(), and interrupted_.

3774  {
3775  if (!g_enable_overlaps_hashjoin && qual_bin_oper->is_overlaps_oper()) {
3776  return {nullptr, "Overlaps hash join disabled, attempting to fall back to loop join"};
3777  }
3778  if (g_enable_dynamic_watchdog && interrupted_.load()) {
3780  }
3781  try {
3782  auto tbl = HashJoin::getInstance(qual_bin_oper,
3783  query_infos,
3784  memory_level,
3785  join_type,
3786  preferred_hash_type,
3787  deviceCountForMemoryLevel(memory_level),
3788  column_cache,
3789  this,
3790  hashtable_build_dag_map,
3791  query_hint,
3792  table_id_to_node_map);
3793  return {tbl, ""};
3794  } catch (const HashJoinFail& e) {
3795  return {nullptr, e.what()};
3796  }
3797 }
static const int32_t ERR_INTERRUPTED
Definition: Execute.h:1436
std::atomic< bool > interrupted_
Definition: Execute.h:1357
bool g_enable_dynamic_watchdog
Definition: Execute.cpp:80
bool g_enable_overlaps_hashjoin
Definition: Execute.cpp:102
int deviceCountForMemoryLevel(const Data_Namespace::MemoryLevel memory_level) const
Definition: Execute.cpp:1047
static std::shared_ptr< HashJoin > getInstance(const std::shared_ptr< Analyzer::BinOper > qual_bin_oper, const std::vector< InputTableInfo > &query_infos, const Data_Namespace::MemoryLevel memory_level, const JoinType join_type, const HashType preferred_hash_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor, const HashTableBuildDagMap &hashtable_build_dag_map, const RegisteredQueryHint &query_hint, const TableIdToNodeMap &table_id_to_node_map)
Make hash table from an in-flight SQL query&#39;s parse tree etc.
Definition: HashJoin.cpp:283

+ Here is the call graph for this function:

JoinLoop::HoistedFiltersCallback Executor::buildHoistLeftHandSideFiltersCb ( const RelAlgExecutionUnit ra_exe_unit,
const size_t  level_idx,
const shared::TableKey inner_table_key,
const CompilationOptions co 
)
private

Definition at line 792 of file IRCodegen.cpp.

References AUTOMATIC_IR_METADATA, CHECK, CodeGenerator::codegen(), g_enable_left_join_filter_hoisting, RelAlgExecutionUnit::join_quals, LEFT, RelAlgExecutionUnit::quals, RelAlgExecutionUnit::simple_quals, CodeGenerator::toBool(), and VLOG.

796  {
798  return nullptr;
799  }
800 
801  const auto& current_level_join_conditions = ra_exe_unit.join_quals[level_idx];
802  if (level_idx == 0 && current_level_join_conditions.type == JoinType::LEFT) {
803  const auto& condition = current_level_join_conditions.quals.front();
804  const auto bin_oper = dynamic_cast<const Analyzer::BinOper*>(condition.get());
805  CHECK(bin_oper) << condition->toString();
806  const auto rhs =
807  dynamic_cast<const Analyzer::ColumnVar*>(bin_oper->get_right_operand());
808  const auto lhs =
809  dynamic_cast<const Analyzer::ColumnVar*>(bin_oper->get_left_operand());
810  if (lhs && rhs && lhs->getTableKey() != rhs->getTableKey()) {
811  const Analyzer::ColumnVar* selected_lhs{nullptr};
812  // grab the left hand side column -- this is somewhat similar to normalize column
813  // pair, and a better solution may be to hoist that function out of the join
814  // framework and normalize columns at the top of build join loops
815  if (lhs->getTableKey() == inner_table_id) {
816  selected_lhs = rhs;
817  } else if (rhs->getTableKey() == inner_table_id) {
818  selected_lhs = lhs;
819  }
820  if (selected_lhs) {
821  std::list<std::shared_ptr<Analyzer::Expr>> hoisted_quals;
822  // get all LHS-only filters
823  auto should_hoist_qual = [&hoisted_quals](const auto& qual,
824  const shared::TableKey& table_key) {
825  CHECK(qual);
826 
827  ExprTableIdVisitor visitor;
828  const auto table_keys = visitor.visit(qual.get());
829  if (table_keys.size() == 1 && table_keys.find(table_key) != table_keys.end()) {
830  hoisted_quals.push_back(qual);
831  }
832  };
833  for (const auto& qual : ra_exe_unit.simple_quals) {
834  should_hoist_qual(qual, selected_lhs->getTableKey());
835  }
836  for (const auto& qual : ra_exe_unit.quals) {
837  should_hoist_qual(qual, selected_lhs->getTableKey());
838  }
839 
840  // build the filters callback and return it
841  if (!hoisted_quals.empty()) {
842  return [this, hoisted_quals, co](llvm::BasicBlock* true_bb,
843  llvm::BasicBlock* exit_bb,
844  const std::string& loop_name,
845  llvm::Function* parent_func,
846  CgenState* cgen_state) -> llvm::BasicBlock* {
847  // make sure we have quals to hoist
848  bool has_quals_to_hoist = false;
849  for (const auto& qual : hoisted_quals) {
850  // check to see if the filter was previously hoisted. if all filters were
851  // previously hoisted, this callback becomes a noop
852  if (plan_state_->hoisted_filters_.count(qual) == 0) {
853  has_quals_to_hoist = true;
854  break;
855  }
856  }
857 
858  if (!has_quals_to_hoist) {
859  return nullptr;
860  }
861 
862  AUTOMATIC_IR_METADATA(cgen_state);
863 
864  llvm::IRBuilder<>& builder = cgen_state->ir_builder_;
865  auto& context = builder.getContext();
866 
867  const auto filter_bb =
868  llvm::BasicBlock::Create(context,
869  "hoisted_left_join_filters_" + loop_name,
870  parent_func,
871  /*insert_before=*/true_bb);
872  builder.SetInsertPoint(filter_bb);
873 
874  llvm::Value* filter_lv = cgen_state_->llBool(true);
875  CodeGenerator code_generator(this);
877  for (const auto& qual : hoisted_quals) {
878  if (plan_state_->hoisted_filters_.insert(qual).second) {
879  // qual was inserted into the hoisted filters map, which means we have not
880  // seen this qual before. Generate filter.
881  VLOG(1) << "Generating code for hoisted left hand side qualifier "
882  << qual->toString();
883  auto cond = code_generator.toBool(
884  code_generator.codegen(qual.get(), true, co).front());
885  filter_lv = builder.CreateAnd(filter_lv, cond);
886  }
887  }
888  CHECK(filter_lv->getType()->isIntegerTy(1));
889 
890  builder.CreateCondBr(filter_lv, true_bb, exit_bb);
891  return filter_bb;
892  };
893  }
894  }
895  }
896  }
897  return nullptr;
898 }
bool g_enable_left_join_filter_hoisting
Definition: Execute.cpp:100
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1317
const JoinQualsPerNestingLevel join_quals
std::unique_ptr< PlanState > plan_state_
Definition: Execute.h:1347
#define AUTOMATIC_IR_METADATA(CGENSTATE)
std::list< std::shared_ptr< Analyzer::Expr > > quals
#define CHECK(condition)
Definition: Logger.h:291
#define VLOG(n)
Definition: Logger.h:387
std::list< std::shared_ptr< Analyzer::Expr > > simple_quals

+ Here is the call graph for this function:

std::function< llvm::Value *(const std::vector< llvm::Value * > &, llvm::Value *)> Executor::buildIsDeletedCb ( const RelAlgExecutionUnit ra_exe_unit,
const size_t  level_idx,
const CompilationOptions co 
)
private

Definition at line 901 of file IRCodegen.cpp.

References AUTOMATIC_IR_METADATA, CHECK, CHECK_LT, CodeGenerator::codegen(), CompilationOptions::filter_on_deleted_column, RelAlgExecutionUnit::input_descs, TABLE, and CodeGenerator::toBool().

903  {
905  if (!co.filter_on_deleted_column) {
906  return nullptr;
907  }
908  CHECK_LT(level_idx + 1, ra_exe_unit.input_descs.size());
909  const auto input_desc = ra_exe_unit.input_descs[level_idx + 1];
910  if (input_desc.getSourceType() != InputSourceType::TABLE) {
911  return nullptr;
912  }
913 
914  const auto deleted_cd = plan_state_->getDeletedColForTable(input_desc.getTableKey());
915  if (!deleted_cd) {
916  return nullptr;
917  }
918  CHECK(deleted_cd->columnType.is_boolean());
919  const auto deleted_expr = makeExpr<Analyzer::ColumnVar>(
920  deleted_cd->columnType,
921  shared::ColumnKey{input_desc.getTableKey(), deleted_cd->columnId},
922  input_desc.getNestLevel());
923  return [this, deleted_expr, level_idx, &co](const std::vector<llvm::Value*>& prev_iters,
924  llvm::Value* have_more_inner_rows) {
925  const auto matching_row_index = addJoinLoopIterator(prev_iters, level_idx + 1);
926  // Avoid fetching the deleted column from a position which is not valid.
927  // An invalid position can be returned by a one to one hash lookup (negative)
928  // or at the end of iteration over a set of matching values.
929  llvm::Value* is_valid_it{nullptr};
930  if (have_more_inner_rows) {
931  is_valid_it = have_more_inner_rows;
932  } else {
933  is_valid_it = cgen_state_->ir_builder_.CreateICmp(
934  llvm::ICmpInst::ICMP_SGE, matching_row_index, cgen_state_->llInt<int64_t>(0));
935  }
936  const auto it_valid_bb = llvm::BasicBlock::Create(
937  cgen_state_->context_, "it_valid", cgen_state_->current_func_);
938  const auto it_not_valid_bb = llvm::BasicBlock::Create(
939  cgen_state_->context_, "it_not_valid", cgen_state_->current_func_);
940  cgen_state_->ir_builder_.CreateCondBr(is_valid_it, it_valid_bb, it_not_valid_bb);
941  const auto row_is_deleted_bb = llvm::BasicBlock::Create(
942  cgen_state_->context_, "row_is_deleted", cgen_state_->current_func_);
943  cgen_state_->ir_builder_.SetInsertPoint(it_valid_bb);
944  CodeGenerator code_generator(this);
945  const auto row_is_deleted = code_generator.toBool(
946  code_generator.codegen(deleted_expr.get(), true, co).front());
947  cgen_state_->ir_builder_.CreateBr(row_is_deleted_bb);
948  cgen_state_->ir_builder_.SetInsertPoint(it_not_valid_bb);
949  const auto row_is_deleted_default = cgen_state_->llBool(false);
950  cgen_state_->ir_builder_.CreateBr(row_is_deleted_bb);
951  cgen_state_->ir_builder_.SetInsertPoint(row_is_deleted_bb);
952  auto row_is_deleted_or_default =
953  cgen_state_->ir_builder_.CreatePHI(row_is_deleted->getType(), 2);
954  row_is_deleted_or_default->addIncoming(row_is_deleted, it_valid_bb);
955  row_is_deleted_or_default->addIncoming(row_is_deleted_default, it_not_valid_bb);
956  return row_is_deleted_or_default;
957  };
958 }
std::vector< InputDescriptor > input_descs
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1317
std::unique_ptr< PlanState > plan_state_
Definition: Execute.h:1347
#define AUTOMATIC_IR_METADATA(CGENSTATE)
#define CHECK_LT(x, y)
Definition: Logger.h:303
llvm::Value * addJoinLoopIterator(const std::vector< llvm::Value * > &prev_iters, const size_t level_idx)
Definition: IRCodegen.cpp:1119
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the call graph for this function:

std::vector< JoinLoop > Executor::buildJoinLoops ( RelAlgExecutionUnit ra_exe_unit,
const CompilationOptions co,
const ExecutionOptions eo,
const std::vector< InputTableInfo > &  query_infos,
ColumnCacheMap column_cache 
)
private

Definition at line 544 of file IRCodegen.cpp.

References AUTOMATIC_IR_METADATA, CodeGenerator::cgen_state_, CHECK, CHECK_LT, CodeGenerator::codegen(), INJECT_TIMER, CgenState::ir_builder_, RelAlgExecutionUnit::join_quals, LEFT, PlanState::left_join_non_hashtable_quals_, CgenState::llBool(), MultiSet, OneToOne, CgenState::outer_join_match_found_per_level_, CodeGenerator::plan_state_, Set, Singleton, JoinLoopDomain::slot_lookup_result, CodeGenerator::toBool(), and JoinLoopDomain::values_buffer.

549  {
552  std::vector<JoinLoop> join_loops;
553  for (size_t level_idx = 0, current_hash_table_idx = 0;
554  level_idx < ra_exe_unit.join_quals.size();
555  ++level_idx) {
556  const auto& current_level_join_conditions = ra_exe_unit.join_quals[level_idx];
557  std::vector<std::string> fail_reasons;
558  const auto current_level_hash_table =
559  buildCurrentLevelHashTable(current_level_join_conditions,
560  level_idx,
561  ra_exe_unit,
562  co,
563  query_infos,
564  column_cache,
565  fail_reasons);
566  const auto found_outer_join_matches_cb =
567  [this, level_idx](llvm::Value* found_outer_join_matches) {
568  CHECK_LT(level_idx, cgen_state_->outer_join_match_found_per_level_.size());
569  CHECK(!cgen_state_->outer_join_match_found_per_level_[level_idx]);
570  cgen_state_->outer_join_match_found_per_level_[level_idx] =
571  found_outer_join_matches;
572  };
573  const auto is_deleted_cb = buildIsDeletedCb(ra_exe_unit, level_idx, co);
574  auto rem_left_join_quals_it =
575  plan_state_->left_join_non_hashtable_quals_.find(level_idx);
576  bool has_remaining_left_join_quals =
577  rem_left_join_quals_it != plan_state_->left_join_non_hashtable_quals_.end() &&
578  !rem_left_join_quals_it->second.empty();
579  const auto outer_join_condition_remaining_quals_cb =
580  [this, level_idx, &co](const std::vector<llvm::Value*>& prev_iters) {
581  // when we have multiple quals for the left join in the current join level
582  // we first try to build a hashtable by using one of the possible qual,
583  // and deal with remaining quals as extra join conditions
584  FetchCacheAnchor anchor(cgen_state_.get());
585  addJoinLoopIterator(prev_iters, level_idx + 1);
586  llvm::Value* left_join_cond = cgen_state_->llBool(true);
587  CodeGenerator code_generator(this);
588  auto it = plan_state_->left_join_non_hashtable_quals_.find(level_idx);
589  if (it != plan_state_->left_join_non_hashtable_quals_.end()) {
590  for (auto expr : it->second) {
591  left_join_cond = cgen_state_->ir_builder_.CreateAnd(
592  left_join_cond,
593  code_generator.toBool(
594  code_generator.codegen(expr.get(), true, co).front()));
595  }
596  }
597  return left_join_cond;
598  };
599  if (current_level_hash_table) {
600  const auto hoisted_filters_cb = buildHoistLeftHandSideFiltersCb(
601  ra_exe_unit, level_idx, current_level_hash_table->getInnerTableId(), co);
602  if (current_level_hash_table->getHashType() == HashType::OneToOne) {
603  join_loops.emplace_back(
604  /*kind=*/JoinLoopKind::Singleton,
605  /*type=*/current_level_join_conditions.type,
606  /*iteration_domain_codegen=*/
607  [this, current_hash_table_idx, level_idx, current_level_hash_table, &co](
608  const std::vector<llvm::Value*>& prev_iters) {
609  addJoinLoopIterator(prev_iters, level_idx);
610  JoinLoopDomain domain{{0}};
611  domain.slot_lookup_result =
612  current_level_hash_table->codegenSlot(co, current_hash_table_idx);
613  return domain;
614  },
615  /*outer_condition_match=*/
616  current_level_join_conditions.type == JoinType::LEFT &&
617  has_remaining_left_join_quals
618  ? std::function<llvm::Value*(const std::vector<llvm::Value*>&)>(
619  outer_join_condition_remaining_quals_cb)
620  : nullptr,
621  /*found_outer_matches=*/current_level_join_conditions.type == JoinType::LEFT
622  ? std::function<void(llvm::Value*)>(found_outer_join_matches_cb)
623  : nullptr,
624  /*hoisted_filters=*/hoisted_filters_cb,
625  /*is_deleted=*/is_deleted_cb,
626  /*nested_loop_join=*/false);
627  } else if (auto range_join_table =
628  dynamic_cast<RangeJoinHashTable*>(current_level_hash_table.get())) {
629  join_loops.emplace_back(
630  /* kind= */ JoinLoopKind::MultiSet,
631  /* type= */ current_level_join_conditions.type,
632  /* iteration_domain_codegen= */
633  [this,
634  range_join_table,
635  current_hash_table_idx,
636  level_idx,
637  current_level_hash_table,
638  &co](const std::vector<llvm::Value*>& prev_iters) {
639  addJoinLoopIterator(prev_iters, level_idx);
640  JoinLoopDomain domain{{0}};
641  CHECK(!prev_iters.empty());
642  const auto matching_set = range_join_table->codegenMatchingSetWithOffset(
643  co, current_hash_table_idx, prev_iters.back());
644  domain.values_buffer = matching_set.elements;
645  domain.element_count = matching_set.count;
646  return domain;
647  },
648  /* outer_condition_match= */
649  current_level_join_conditions.type == JoinType::LEFT
650  ? std::function<llvm::Value*(const std::vector<llvm::Value*>&)>(
651  outer_join_condition_remaining_quals_cb)
652  : nullptr,
653  /* found_outer_matches= */
654  current_level_join_conditions.type == JoinType::LEFT
655  ? std::function<void(llvm::Value*)>(found_outer_join_matches_cb)
656  : nullptr,
657  /* hoisted_filters= */ nullptr, // <<! TODO
658  /* is_deleted= */ is_deleted_cb,
659  /*nested_loop_join=*/false);
660  } else {
661  join_loops.emplace_back(
662  /*kind=*/JoinLoopKind::Set,
663  /*type=*/current_level_join_conditions.type,
664  /*iteration_domain_codegen=*/
665  [this, current_hash_table_idx, level_idx, current_level_hash_table, &co](
666  const std::vector<llvm::Value*>& prev_iters) {
667  addJoinLoopIterator(prev_iters, level_idx);
668  JoinLoopDomain domain{{0}};
669  const auto matching_set = current_level_hash_table->codegenMatchingSet(
670  co, current_hash_table_idx);
671  domain.values_buffer = matching_set.elements;
672  domain.element_count = matching_set.count;
673  return domain;
674  },
675  /*outer_condition_match=*/
676  current_level_join_conditions.type == JoinType::LEFT
677  ? std::function<llvm::Value*(const std::vector<llvm::Value*>&)>(
678  outer_join_condition_remaining_quals_cb)
679  : nullptr,
680  /*found_outer_matches=*/current_level_join_conditions.type == JoinType::LEFT
681  ? std::function<void(llvm::Value*)>(found_outer_join_matches_cb)
682  : nullptr,
683  /*hoisted_filters=*/hoisted_filters_cb,
684  /*is_deleted=*/is_deleted_cb,
685  /*nested_loop_join=*/false);
686  }
687  ++current_hash_table_idx;
688  } else {
689  const auto fail_reasons_str = current_level_join_conditions.quals.empty()
690  ? "No equijoin expression found"
691  : boost::algorithm::join(fail_reasons, " | ");
693  ra_exe_unit, eo, query_infos, level_idx, fail_reasons_str);
694  // Callback provided to the `JoinLoop` framework to evaluate the (outer) join
695  // condition.
696  VLOG(1) << "Unable to build hash table, falling back to loop join: "
697  << fail_reasons_str;
698  const auto outer_join_condition_cb =
699  [this, level_idx, &co, &current_level_join_conditions](
700  const std::vector<llvm::Value*>& prev_iters) {
701  // The values generated for the match path don't dominate all uses
702  // since on the non-match path nulls are generated. Reset the cache
703  // once the condition is generated to avoid incorrect reuse.
704  FetchCacheAnchor anchor(cgen_state_.get());
705  addJoinLoopIterator(prev_iters, level_idx + 1);
706  llvm::Value* left_join_cond = cgen_state_->llBool(true);
707  CodeGenerator code_generator(this);
708  for (auto expr : current_level_join_conditions.quals) {
709  left_join_cond = cgen_state_->ir_builder_.CreateAnd(
710  left_join_cond,
711  code_generator.toBool(
712  code_generator.codegen(expr.get(), true, co).front()));
713  }
714  return left_join_cond;
715  };
716  join_loops.emplace_back(
717  /*kind=*/JoinLoopKind::UpperBound,
718  /*type=*/current_level_join_conditions.type,
719  /*iteration_domain_codegen=*/
720  [this, level_idx](const std::vector<llvm::Value*>& prev_iters) {
721  addJoinLoopIterator(prev_iters, level_idx);
722  JoinLoopDomain domain{{0}};
723  auto* arg = get_arg_by_name(cgen_state_->row_func_, "num_rows_per_scan");
724  const auto rows_per_scan_ptr = cgen_state_->ir_builder_.CreateGEP(
725  arg->getType()->getScalarType()->getPointerElementType(),
726  arg,
727  cgen_state_->llInt(int32_t(level_idx + 1)));
728  domain.upper_bound = cgen_state_->ir_builder_.CreateLoad(
729  rows_per_scan_ptr->getType()->getPointerElementType(),
730  rows_per_scan_ptr,
731  "num_rows_per_scan");
732  return domain;
733  },
734  /*outer_condition_match=*/
735  current_level_join_conditions.type == JoinType::LEFT
736  ? std::function<llvm::Value*(const std::vector<llvm::Value*>&)>(
737  outer_join_condition_cb)
738  : nullptr,
739  /*found_outer_matches=*/
740  current_level_join_conditions.type == JoinType::LEFT
741  ? std::function<void(llvm::Value*)>(found_outer_join_matches_cb)
742  : nullptr,
743  /*hoisted_filters=*/nullptr,
744  /*is_deleted=*/is_deleted_cb,
745  /*nested_loop_join=*/true);
746  }
747  }
748  return join_loops;
749 }
llvm::Value * values_buffer
Definition: JoinLoop.h:49
std::string join(T const &container, std::string const &delim)
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1317
llvm::Value * get_arg_by_name(llvm::Function *func, const std::string &name)
Definition: Execute.h:167
#define INJECT_TIMER(DESC)
Definition: measure.h:93
const JoinQualsPerNestingLevel join_quals
std::unique_ptr< PlanState > plan_state_
Definition: Execute.h:1347
#define AUTOMATIC_IR_METADATA(CGENSTATE)
llvm::Value * slot_lookup_result
Definition: JoinLoop.h:47
#define CHECK_LT(x, y)
Definition: Logger.h:303
std::shared_ptr< HashJoin > buildCurrentLevelHashTable(const JoinCondition &current_level_join_conditions, size_t level_idx, RelAlgExecutionUnit &ra_exe_unit, const CompilationOptions &co, const std::vector< InputTableInfo > &query_infos, ColumnCacheMap &column_cache, std::vector< std::string > &fail_reasons)
Definition: IRCodegen.cpp:960
llvm::Value * addJoinLoopIterator(const std::vector< llvm::Value * > &prev_iters, const size_t level_idx)
Definition: IRCodegen.cpp:1119
#define CHECK(condition)
Definition: Logger.h:291
void check_if_loop_join_is_allowed(RelAlgExecutionUnit &ra_exe_unit, const ExecutionOptions &eo, const std::vector< InputTableInfo > &query_infos, const size_t level_idx, const std::string &fail_reason)
Definition: IRCodegen.cpp:484
std::vector< JoinLoop > buildJoinLoops(RelAlgExecutionUnit &ra_exe_unit, const CompilationOptions &co, const ExecutionOptions &eo, const std::vector< InputTableInfo > &query_infos, ColumnCacheMap &column_cache)
Definition: IRCodegen.cpp:544
std::function< llvm::Value *(const std::vector< llvm::Value * > &, llvm::Value *)> buildIsDeletedCb(const RelAlgExecutionUnit &ra_exe_unit, const size_t level_idx, const CompilationOptions &co)
Definition: IRCodegen.cpp:901
JoinLoop::HoistedFiltersCallback buildHoistLeftHandSideFiltersCb(const RelAlgExecutionUnit &ra_exe_unit, const size_t level_idx, const shared::TableKey &inner_table_key, const CompilationOptions &co)
Definition: IRCodegen.cpp:792
#define VLOG(n)
Definition: Logger.h:387

+ Here is the call graph for this function:

void Executor::buildSelectedFragsMapping ( std::vector< std::vector< size_t >> &  selected_fragments_crossjoin,
std::vector< size_t > &  local_col_to_frag_pos,
const std::list< std::shared_ptr< const InputColDescriptor >> &  col_global_ids,
const FragmentsList selected_fragments,
const RelAlgExecutionUnit ra_exe_unit 
)
private

Definition at line 3234 of file Execute.cpp.

References CHECK, CHECK_EQ, CHECK_LT, getFragmentCount(), RelAlgExecutionUnit::input_descs, and plan_state_.

Referenced by fetchChunks().

3239  {
3240  local_col_to_frag_pos.resize(plan_state_->global_to_local_col_ids_.size());
3241  size_t frag_pos{0};
3242  const auto& input_descs = ra_exe_unit.input_descs;
3243  for (size_t scan_idx = 0; scan_idx < input_descs.size(); ++scan_idx) {
3244  const auto& table_key = input_descs[scan_idx].getTableKey();
3245  CHECK_EQ(selected_fragments[scan_idx].table_key, table_key);
3246  selected_fragments_crossjoin.push_back(
3247  getFragmentCount(selected_fragments, scan_idx, ra_exe_unit));
3248  for (const auto& col_id : col_global_ids) {
3249  CHECK(col_id);
3250  const auto& input_desc = col_id->getScanDesc();
3251  if (input_desc.getTableKey() != table_key ||
3252  input_desc.getNestLevel() != static_cast<int>(scan_idx)) {
3253  continue;
3254  }
3255  auto it = plan_state_->global_to_local_col_ids_.find(*col_id);
3256  CHECK(it != plan_state_->global_to_local_col_ids_.end());
3257  CHECK_LT(static_cast<size_t>(it->second),
3258  plan_state_->global_to_local_col_ids_.size());
3259  local_col_to_frag_pos[it->second] = frag_pos;
3260  }
3261  ++frag_pos;
3262  }
3263 }
#define CHECK_EQ(x, y)
Definition: Logger.h:301
std::vector< InputDescriptor > input_descs
std::unique_ptr< PlanState > plan_state_
Definition: Execute.h:1347
#define CHECK_LT(x, y)
Definition: Logger.h:303
std::vector< size_t > getFragmentCount(const FragmentsList &selected_fragments, const size_t scan_idx, const RelAlgExecutionUnit &ra_exe_unit)
Definition: Execute.cpp:3220
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void Executor::buildSelectedFragsMappingForUnion ( std::vector< std::vector< size_t >> &  selected_fragments_crossjoin,
const FragmentsList selected_fragments,
const RelAlgExecutionUnit ra_exe_unit 
)
private

Definition at line 3265 of file Execute.cpp.

References RelAlgExecutionUnit::input_descs.

Referenced by fetchUnionChunks().

3268  {
3269  const auto& input_descs = ra_exe_unit.input_descs;
3270  for (size_t scan_idx = 0; scan_idx < input_descs.size(); ++scan_idx) {
3271  // selected_fragments is set in assignFragsToKernelDispatch execution_kernel.fragments
3272  if (selected_fragments[0].table_key == input_descs[scan_idx].getTableKey()) {
3273  selected_fragments_crossjoin.push_back({size_t(1)});
3274  }
3275  }
3276 }
std::vector< InputDescriptor > input_descs

+ Here is the caller graph for this function:

FragmentSkipStatus Executor::canSkipFragmentForFpQual ( const Analyzer::BinOper comp_expr,
const Analyzer::ColumnVar lhs_col,
const Fragmenter_Namespace::FragmentInfo fragment,
const Analyzer::Constant rhs_const 
) const
private

Definition at line 4053 of file Execute.cpp.

References CHECK, shared::ColumnKey::column_id, extract_max_stat_fp_type(), extract_min_stat_fp_type(), Analyzer::Constant::get_constval(), Analyzer::BinOper::get_optype(), SQLTypeInfo::get_type(), Analyzer::Expr::get_type_info(), Fragmenter_Namespace::FragmentInfo::getChunkMetadataMap(), Analyzer::ColumnVar::getColumnKey(), INVALID, kDOUBLE, kEQ, kFLOAT, kGE, kGT, kLE, kLT, NOT_SKIPPABLE, and SKIPPABLE.

Referenced by skipFragment().

4057  {
4058  auto col_id = lhs_col->getColumnKey().column_id;
4059  auto chunk_meta_it = fragment.getChunkMetadataMap().find(col_id);
4060  if (chunk_meta_it == fragment.getChunkMetadataMap().end()) {
4062  }
4063  double chunk_min{0.};
4064  double chunk_max{0.};
4065  const auto& chunk_type = lhs_col->get_type_info();
4066  chunk_min = extract_min_stat_fp_type(chunk_meta_it->second->chunkStats, chunk_type);
4067  chunk_max = extract_max_stat_fp_type(chunk_meta_it->second->chunkStats, chunk_type);
4068  if (chunk_min > chunk_max) {
4070  }
4071 
4072  const auto datum_fp = rhs_const->get_constval();
4073  const auto rhs_type = rhs_const->get_type_info().get_type();
4074  CHECK(rhs_type == kFLOAT || rhs_type == kDOUBLE);
4075 
4076  // Do we need to codegen the constant like the integer path does?
4077  const auto rhs_val = rhs_type == kFLOAT ? datum_fp.floatval : datum_fp.doubleval;
4078 
4079  // Todo: dedup the following comparison code with the integer/timestamp path, it is
4080  // slightly tricky due to do cleanly as we do not have rowid on this path
4081  switch (comp_expr->get_optype()) {
4082  case kGE:
4083  if (chunk_max < rhs_val) {
4085  }
4086  break;
4087  case kGT:
4088  if (chunk_max <= rhs_val) {
4090  }
4091  break;
4092  case kLE:
4093  if (chunk_min > rhs_val) {
4095  }
4096  break;
4097  case kLT:
4098  if (chunk_min >= rhs_val) {
4100  }
4101  break;
4102  case kEQ:
4103  if (chunk_min > rhs_val || chunk_max < rhs_val) {
4105  }
4106  break;
4107  default:
4108  break;
4109  }
4111 }
double extract_max_stat_fp_type(const ChunkStats &stats, const SQLTypeInfo &ti)
Definition: sqldefs.h:34
Definition: sqldefs.h:35
Definition: sqldefs.h:29
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:381
SQLOps get_optype() const
Definition: Analyzer.h:452
double extract_min_stat_fp_type(const ChunkStats &stats, const SQLTypeInfo &ti)
const ChunkMetadataMap & getChunkMetadataMap() const
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:79
Definition: sqldefs.h:33
const shared::ColumnKey & getColumnKey() const
Definition: Analyzer.h:198
Datum get_constval() const
Definition: Analyzer.h:348
#define CHECK(condition)
Definition: Logger.h:291
Definition: sqldefs.h:32

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

llvm::Value * Executor::castToFP ( llvm::Value *  value,
SQLTypeInfo const &  from_ti,
SQLTypeInfo const &  to_ti 
)
private

Definition at line 3856 of file Execute.cpp.

References AUTOMATIC_IR_METADATA, cgen_state_, exp_to_scale(), logger::FATAL, SQLTypeInfo::get_scale(), SQLTypeInfo::get_size(), SQLTypeInfo::is_fp(), SQLTypeInfo::is_number(), and LOG.

3858  {
3860  if (value->getType()->isIntegerTy() && from_ti.is_number() && to_ti.is_fp() &&
3861  (!from_ti.is_fp() || from_ti.get_size() != to_ti.get_size())) {
3862  llvm::Type* fp_type{nullptr};
3863  switch (to_ti.get_size()) {
3864  case 4:
3865  fp_type = llvm::Type::getFloatTy(cgen_state_->context_);
3866  break;
3867  case 8:
3868  fp_type = llvm::Type::getDoubleTy(cgen_state_->context_);
3869  break;
3870  default:
3871  LOG(FATAL) << "Unsupported FP size: " << to_ti.get_size();
3872  }
3873  value = cgen_state_->ir_builder_.CreateSIToFP(value, fp_type);
3874  if (from_ti.get_scale()) {
3875  value = cgen_state_->ir_builder_.CreateFDiv(
3876  value,
3877  llvm::ConstantFP::get(value->getType(), exp_to_scale(from_ti.get_scale())));
3878  }
3879  }
3880  return value;
3881 }
#define LOG(tag)
Definition: Logger.h:285
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1317
#define AUTOMATIC_IR_METADATA(CGENSTATE)
uint64_t exp_to_scale(const unsigned exp)

+ Here is the call graph for this function:

llvm::Value * Executor::castToIntPtrTyIn ( llvm::Value *  val,
const size_t  bit_width 
)
private

Definition at line 3883 of file Execute.cpp.

References AUTOMATIC_IR_METADATA, cgen_state_, CHECK, CHECK_LT, and get_int_type().

3883  {
3885  CHECK(val->getType()->isPointerTy());
3886 
3887  const auto val_ptr_type = static_cast<llvm::PointerType*>(val->getType());
3888  const auto val_type = val_ptr_type->getPointerElementType();
3889  size_t val_width = 0;
3890  if (val_type->isIntegerTy()) {
3891  val_width = val_type->getIntegerBitWidth();
3892  } else {
3893  if (val_type->isFloatTy()) {
3894  val_width = 32;
3895  } else {
3896  CHECK(val_type->isDoubleTy());
3897  val_width = 64;
3898  }
3899  }
3900  CHECK_LT(size_t(0), val_width);
3901  if (bitWidth == val_width) {
3902  return val;
3903  }
3904  return cgen_state_->ir_builder_.CreateBitCast(
3905  val, llvm::PointerType::get(get_int_type(bitWidth, cgen_state_->context_), 0));
3906 }
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1317
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
#define AUTOMATIC_IR_METADATA(CGENSTATE)
#define CHECK_LT(x, y)
Definition: Logger.h:303
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the call graph for this function:

bool Executor::checkCurrentQuerySession ( const std::string &  candidate_query_session,
heavyai::shared_lock< heavyai::shared_mutex > &  read_lock 
)

Definition at line 4446 of file Execute.cpp.

References current_query_session_.

4448  {
4449  // if current_query_session is equal to the candidate_query_session,
4450  // or it is empty session we consider
4451  return !candidate_query_session.empty() &&
4452  (current_query_session_ == candidate_query_session);
4453 }
QuerySessionId current_query_session_
Definition: Execute.h:1390
bool Executor::checkIsQuerySessionEnrolled ( const QuerySessionId query_session,
heavyai::shared_lock< heavyai::shared_mutex > &  read_lock 
)

Definition at line 4720 of file Execute.cpp.

References queries_session_map_.

Referenced by executeWorkUnitImpl().

4722  {
4723  if (query_session.empty()) {
4724  return false;
4725  }
4726  return !query_session.empty() && queries_session_map_.count(query_session);
4727 }
static QuerySessionMap queries_session_map_
Definition: Execute.h:1394

+ Here is the caller graph for this function:

bool Executor::checkIsQuerySessionInterrupted ( const std::string &  query_session,
heavyai::shared_lock< heavyai::shared_mutex > &  read_lock 
)

Definition at line 4709 of file Execute.cpp.

References queries_interrupt_flag_.

Referenced by executePlanWithGroupBy(), executePlanWithoutGroupBy(), fetchChunks(), and fetchUnionChunks().

4711  {
4712  if (query_session.empty()) {
4713  return false;
4714  }
4715  auto flag_it = queries_interrupt_flag_.find(query_session);
4716  return !query_session.empty() && flag_it != queries_interrupt_flag_.end() &&
4717  flag_it->second;
4718 }
static InterruptFlagMap queries_interrupt_flag_
Definition: Execute.h:1392

+ Here is the caller graph for this function:

bool Executor::checkNonKernelTimeInterrupted ( ) const

Definition at line 4797 of file Execute.cpp.

References current_query_session_, executor_id_, executor_session_mutex_, queries_interrupt_flag_, and UNITARY_EXECUTOR_ID.

4797  {
4798  // this function should be called within an executor which is assigned
4799  // to the specific query thread (that indicates we already enroll the session)
4800  // check whether this is called from non unitary executor
4802  return false;
4803  };
4805  auto flag_it = queries_interrupt_flag_.find(current_query_session_);
4806  return !current_query_session_.empty() && flag_it != queries_interrupt_flag_.end() &&
4807  flag_it->second;
4808 }
QuerySessionId current_query_session_
Definition: Execute.h:1390
std::shared_lock< T > shared_lock
const ExecutorId executor_id_
Definition: Execute.h:1291
static InterruptFlagMap queries_interrupt_flag_
Definition: Execute.h:1392
static heavyai::shared_mutex executor_session_mutex_
Definition: Execute.h:1388
static const ExecutorId UNITARY_EXECUTOR_ID
Definition: Execute.h:373
void Executor::checkPendingQueryStatus ( const QuerySessionId query_session)

Definition at line 4490 of file Execute.cpp.

References ERR_INTERRUPTED, executor_session_mutex_, queries_interrupt_flag_, queries_session_map_, and VLOG.

4490  {
4491  // check whether we are okay to execute the "pending" query
4492  // i.e., before running the query check if this query session is "ALREADY" interrupted
4494  if (query_session.empty()) {
4495  return;
4496  }
4497  if (queries_interrupt_flag_.find(query_session) == queries_interrupt_flag_.end()) {
4498  // something goes wrong since we assume this is caller's responsibility
4499  // (call this function only for enrolled query session)
4500  if (!queries_session_map_.count(query_session)) {
4501  VLOG(1) << "Interrupting pending query is not available since the query session is "
4502  "not enrolled";
4503  } else {
4504  // here the query session is enrolled but the interrupt flag is not registered
4505  VLOG(1)
4506  << "Interrupting pending query is not available since its interrupt flag is "
4507  "not registered";
4508  }
4509  return;
4510  }
4511  if (queries_interrupt_flag_[query_session]) {
4513  }
4514 }
static QuerySessionMap queries_session_map_
Definition: Execute.h:1394
static const int32_t ERR_INTERRUPTED
Definition: Execute.h:1436
std::shared_lock< T > shared_lock
static InterruptFlagMap queries_interrupt_flag_
Definition: Execute.h:1392
static heavyai::shared_mutex executor_session_mutex_
Definition: Execute.h:1388
#define VLOG(n)
Definition: Logger.h:387
void Executor::clearCaches ( bool  runtime_only = false)
static void Executor::clearExternalCaches ( bool  for_update,
const TableDescriptor td,
const int  current_db_id 
)
inlinestatic

Definition at line 388 of file Execute.h.

References TableDescriptor::getTableChunkKey(), CacheInvalidator< CACHE_HOLDING_TYPES >::invalidateCaches(), and CacheInvalidator< CACHE_HOLDING_TYPES >::invalidateCachesByTable().

Referenced by Parser::InsertIntoTableAsSelectStmt::execute(), Parser::DropTableStmt::execute(), Parser::TruncateTableStmt::execute(), Parser::DropColumnStmt::execute(), Parser::CopyTableStmt::execute(), RelAlgExecutor::executeDelete(), and RelAlgExecutor::executeUpdate().

390  {
391  bool clearEntireCache = true;
392  if (td) {
393  const auto& table_chunk_key_prefix = td->getTableChunkKey(current_db_id);
394  if (!table_chunk_key_prefix.empty()) {
395  auto table_key = boost::hash_value(table_chunk_key_prefix);
397  if (for_update) {
399  } else {
401  }
402  clearEntireCache = false;
403  }
404  }
405  if (clearEntireCache) {
407  if (for_update) {
409  } else {
411  }
412  }
413  }
static void invalidateCachesByTable(size_t table_key)
static void invalidateCaches()
std::vector< int > getTableChunkKey(const int getCurrentDBId) const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void Executor::clearMemory ( const Data_Namespace::MemoryLevel  memory_level)
static

Definition at line 497 of file Execute.cpp.

References Data_Namespace::DataMgr::clearMemory(), Data_Namespace::CPU_LEVEL, execute_mutex_, Catalog_Namespace::SysCatalog::getDataMgr(), Data_Namespace::GPU_LEVEL, Catalog_Namespace::SysCatalog::instance(), and CacheInvalidator< CACHE_HOLDING_TYPES >::invalidateCaches().

Referenced by DBHandler::clear_cpu_memory(), DBHandler::clear_gpu_memory(), QueryRunner::QueryRunner::clearCpuMemory(), and QueryRunner::QueryRunner::clearGpuMemory().

497  {
498  switch (memory_level) {
502  execute_mutex_); // Don't flush memory while queries are running
503 
504  if (memory_level == Data_Namespace::MemoryLevel::CPU_LEVEL) {
505  // The hash table cache uses CPU memory not managed by the buffer manager. In the
506  // future, we should manage these allocations with the buffer manager directly.
507  // For now, assume the user wants to purge the hash table cache when they clear
508  // CPU memory (currently used in ExecuteTest to lower memory pressure)
510  }
513  break;
514  }
515  default: {
516  throw std::runtime_error(
517  "Clearing memory levels other than the CPU level or GPU level is not "
518  "supported.");
519  }
520  }
521 }
static heavyai::shared_mutex execute_mutex_
Definition: Execute.h:1399
void clearMemory(const MemoryLevel memLevel)
Definition: DataMgr.cpp:434
static void invalidateCaches()
Data_Namespace::DataMgr & getDataMgr() const
Definition: SysCatalog.h:234
static SysCatalog & instance()
Definition: SysCatalog.h:343
std::unique_lock< T > unique_lock

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void Executor::clearMetaInfoCache ( )
private

Definition at line 771 of file Execute.cpp.

References agg_col_range_cache_, TableGenerations::clear(), AggregatedColRange::clear(), InputTableInfoCache::clear(), input_table_info_cache_, and table_generations_.

771  {
775 }
AggregatedColRange agg_col_range_cache_
Definition: Execute.h:1386
InputTableInfoCache input_table_info_cache_
Definition: Execute.h:1385
TableGenerations table_generations_
Definition: Execute.h:1387

+ Here is the call graph for this function:

void Executor::clearQuerySessionStatus ( const QuerySessionId query_session,
const std::string &  submitted_time_str 
)

Definition at line 4516 of file Execute.cpp.

References current_query_session_, executor_session_mutex_, invalidateRunningQuerySession(), removeFromQuerySessionList(), and resetInterrupt().

4517  {
4519  // clear the interrupt-related info for a finished query
4520  if (query_session.empty()) {
4521  return;
4522  }
4523  removeFromQuerySessionList(query_session, submitted_time_str, session_write_lock);
4524  if (query_session.compare(current_query_session_) == 0) {
4525  invalidateRunningQuerySession(session_write_lock);
4526  resetInterrupt();
4527  }
4528 }
QuerySessionId current_query_session_
Definition: Execute.h:1390
bool removeFromQuerySessionList(const QuerySessionId &query_session, const std::string &submitted_time_str, heavyai::unique_lock< heavyai::shared_mutex > &write_lock)
Definition: Execute.cpp:4664
std::unique_lock< T > unique_lock
static heavyai::shared_mutex executor_session_mutex_
Definition: Execute.h:1388
void resetInterrupt()
void invalidateRunningQuerySession(heavyai::unique_lock< heavyai::shared_mutex > &write_lock)
Definition: Execute.cpp:4468

+ Here is the call graph for this function:

llvm::Value * Executor::codegenAggregateWindowState ( )
private

Definition at line 1335 of file WindowFunctionIR.cpp.

References AUTOMATIC_IR_METADATA, AVG, COUNT, anonymous_namespace{WindowFunctionIR.cpp}::get_adjusted_window_type_info(), get_int_type(), WindowProjectNodeContext::getActiveWindowFunctionContext(), Analyzer::WindowFunction::getKind(), kDECIMAL, kDOUBLE, and kFLOAT.

1335  {
1337  const auto pi32_type =
1338  llvm::PointerType::get(get_int_type(32, cgen_state_->context_), 0);
1339  const auto pi64_type =
1340  llvm::PointerType::get(get_int_type(64, cgen_state_->context_), 0);
1341  const auto window_func_context =
1343  const Analyzer::WindowFunction* window_func = window_func_context->getWindowFunction();
1344  const auto window_func_ti = get_adjusted_window_type_info(window_func);
1345  const auto aggregate_state_type =
1346  window_func_ti.get_type() == kFLOAT ? pi32_type : pi64_type;
1347  auto aggregate_state = aggregateWindowStatePtr();
1348  if (window_func->getKind() == SqlWindowFunctionKind::AVG) {
1349  const auto aggregate_state_count_i64 = cgen_state_->llInt(
1350  reinterpret_cast<const int64_t>(window_func_context->aggregateStateCount()));
1351  auto aggregate_state_count = cgen_state_->ir_builder_.CreateIntToPtr(
1352  aggregate_state_count_i64, aggregate_state_type);
1353  const auto double_null_lv = cgen_state_->inlineFpNull(SQLTypeInfo(kDOUBLE));
1354  switch (window_func_ti.get_type()) {
1355  case kFLOAT: {
1356  return cgen_state_->emitCall(
1357  "load_avg_float", {aggregate_state, aggregate_state_count, double_null_lv});
1358  }
1359  case kDOUBLE: {
1360  return cgen_state_->emitCall(
1361  "load_avg_double", {aggregate_state, aggregate_state_count, double_null_lv});
1362  }
1363  case kDECIMAL: {
1364  return cgen_state_->emitCall(
1365  "load_avg_decimal",
1366  {aggregate_state,
1367  aggregate_state_count,
1368  double_null_lv,
1369  cgen_state_->llInt<int32_t>(window_func_ti.get_scale())});
1370  }
1371  default: {
1372  return cgen_state_->emitCall(
1373  "load_avg_int", {aggregate_state, aggregate_state_count, double_null_lv});
1374  }
1375  }
1376  }
1377  if (window_func->getKind() == SqlWindowFunctionKind::COUNT) {
1378  return cgen_state_->ir_builder_.CreateLoad(
1379  aggregate_state->getType()->getPointerElementType(), aggregate_state);
1380  }
1381  switch (window_func_ti.get_type()) {
1382  case kFLOAT: {
1383  return cgen_state_->emitCall("load_float", {aggregate_state});
1384  }
1385  case kDOUBLE: {
1386  return cgen_state_->emitCall("load_double", {aggregate_state});
1387  }
1388  default: {
1389  return cgen_state_->ir_builder_.CreateLoad(
1390  aggregate_state->getType()->getPointerElementType(), aggregate_state);
1391  }
1392  }
1393 }
SqlWindowFunctionKind getKind() const
Definition: Analyzer.h:2576
llvm::Value * aggregateWindowStatePtr()
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1317
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
static WindowFunctionContext * getActiveWindowFunctionContext(Executor *executor)
#define AUTOMATIC_IR_METADATA(CGENSTATE)
SQLTypeInfo get_adjusted_window_type_info(const Analyzer::WindowFunction *window_func)

+ Here is the call graph for this function:

llvm::Value * Executor::codegenConditionalAggregateCondValSelector ( llvm::Value *  cond_lv,
SQLAgg const  aggKind,
CompilationOptions const &  co 
) const
private

Definition at line 1395 of file WindowFunctionIR.cpp.

References CHECK, and kSUM_IF.

1398  {
1399  llvm::Value* res_cond_lv{nullptr};
1400  switch (aggKind) {
1401  case kSUM_IF:
1402  if (cond_lv->getType()->isIntegerTy(1)) {
1403  // cond_expr returns i1 type val, just need to cast to i8 type
1404  // i.e., cond_expr IS NULL
1405  res_cond_lv = cgen_state_->castToTypeIn(cond_lv, 8);
1406  } else {
1407  CHECK(cond_lv->getType()->isIntegerTy(8));
1408  // cond_expr may have null value instead of upcasted bool (i1-type) value
1409  // so we have to correctly set true condition
1410  // i.e., i8 @gt_int32_t_nullable_lhs(..., i64 -2147483648, i8 -128)
1411  // has one of the following i8-type values: 1, 0, -128
1412  auto true_cond_lv =
1413  cgen_state_->ir_builder_.CreateICmpEQ(cond_lv, cgen_state_->llInt((int8_t)1));
1414  res_cond_lv = cgen_state_->ir_builder_.CreateSelect(
1415  true_cond_lv, cgen_state_->llInt((int8_t)1), cgen_state_->llInt((int8_t)0));
1416  }
1417  break;
1418  default:
1419  break;
1420  }
1421  return res_cond_lv;
1422 }
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1317
#define CHECK(condition)
Definition: Logger.h:291
llvm::Value * Executor::codegenCurrentPartitionIndex ( const WindowFunctionContext window_func_context,
llvm::Value *  current_row_pos_lv 
)
private

Definition at line 699 of file WindowFunctionIR.cpp.

References WindowFunctionContext::elementCount(), get_int_type(), WindowFunctionContext::getWindowFunction(), Analyzer::WindowFunction::isFrameNavigateWindowFunction(), WindowFunctionContext::partitionCount(), WindowFunctionContext::partitionNumCountBuf(), and WindowFunctionContext::payload().

701  {
702  const auto pi64_type =
703  llvm::PointerType::get(get_int_type(64, cgen_state_->context_), 0);
704  const auto pi32_type =
705  llvm::PointerType::get(get_int_type(32, cgen_state_->context_), 0);
706  auto row_pos_lv = current_row_pos_lv;
707  if (window_func_context->getWindowFunction()->isFrameNavigateWindowFunction()) {
708  // `current_row_pos_lv` indicates the index of the current row, but to figure out it's
709  // index of window partition it belongs to, we need a special approach especially for
710  // window framing navigation function for instance, when we have five rows having two
711  // columns pc and val such as (2,1), (2,2), (2,3), (1,1), (1,2), we build a OneToMany
712  // Perfect Hash Table as: offset: 0 2 / count: 2 3 / payload: i1, i2, i3, i4, i5 where
713  // i1 ~ i3 and i4 ~ i5 are rows for partition 1 (i.e., pc = 1) and 2 (i.e., prc = 2),
714  // respectively. But when processing the first row (2, 1), the original
715  // `current_row_pos_lv` stands for zero so computing which partitions it belongs to is
716  // hard unless hashing the value at runtime. Even if we do hash, we cannot know the
717  // exact hash slot unless we do binary + linear searches multiple times (via payload
718  // buffer and the ordered payload buffer) i.e., when the row (1,2) is assigned to the
719  // partition[4], we cannot find the hash slot index '4' by using `current_row_pos_lv`
720  // unless doing a costly operation like a linear search over the entire window
721  // partition Instead, we collect a hash slot that each row is assigned to and keep
722  // this info at the payload buffer `hash_slot_idx_ptr_lv` and use it for computing
723  // window frame navigation functions
724  auto* const hash_slot_idx_ptr =
725  window_func_context->payload() + window_func_context->elementCount();
726  auto hash_slot_idx_buf_lv =
727  cgen_state_->llInt(reinterpret_cast<int64_t>(hash_slot_idx_ptr));
728  auto hash_slot_idx_ptr_lv =
729  cgen_state_->ir_builder_.CreateIntToPtr(hash_slot_idx_buf_lv, pi32_type);
730  auto hash_slot_idx_load_lv = cgen_state_->ir_builder_.CreateGEP(
731  hash_slot_idx_ptr_lv->getType()->getPointerElementType(),
732  hash_slot_idx_ptr_lv,
733  current_row_pos_lv);
734  row_pos_lv = cgen_state_->castToTypeIn(
735  cgen_state_->ir_builder_.CreateLoad(
736  hash_slot_idx_load_lv->getType()->getPointerElementType(),
737  hash_slot_idx_load_lv,
738  "cur_row_hash_slot_idx"),
739  64);
740  }
741  auto partition_count_lv = cgen_state_->llInt(window_func_context->partitionCount());
742  auto partition_num_count_buf_lv = cgen_state_->llInt(
743  reinterpret_cast<int64_t>(window_func_context->partitionNumCountBuf()));
744  auto partition_num_count_ptr_lv =
745  cgen_state_->ir_builder_.CreateIntToPtr(partition_num_count_buf_lv, pi64_type);
746  return cgen_state_->emitCall(
747  "compute_int64_t_lower_bound",
748  {partition_count_lv, row_pos_lv, partition_num_count_ptr_lv});
749 }
bool isFrameNavigateWindowFunction() const
Definition: Analyzer.h:2630
size_t elementCount() const
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1317
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
size_t partitionCount() const
const int64_t * partitionNumCountBuf() const
const Analyzer::WindowFunction * getWindowFunction() const
const int32_t * payload() const

+ Here is the call graph for this function:

llvm::Value * Executor::codegenFrameBound ( bool  for_start_bound,
bool  for_range_mode,
bool  for_window_frame_naviation,
const Analyzer::WindowFrame frame_bound,
bool  is_timestamp_type_frame,
llvm::Value *  order_key_null_val,
const WindowFrameBoundFuncArgs args 
)
private

Definition at line 567 of file WindowFunctionIR.cpp.

References CHECK, CURRENT_ROW, WindowFrameBoundFuncArgs::current_row_pos_lv, EXPR_FOLLOWING, EXPR_PRECEDING, WindowFrameBoundFuncArgs::frame_end_bound_expr_lv, WindowFrameBoundFuncArgs::frame_start_bound_expr_lv, Analyzer::WindowFrame::getBoundType(), WindowFrameBoundFuncArgs::int64_t_one_val_lv, WindowFrameBoundFuncArgs::int64_t_zero_val_lv, WindowFrameBoundFuncArgs::num_elem_current_partition_lv, WindowFrameBoundFuncArgs::order_type_col_name, UNBOUNDED_FOLLOWING, and UNBOUNDED_PRECEDING.

573  {
574  const auto bound_type = frame_bound->getBoundType();
575  auto adjust_frame_end_bound = [&](llvm::Value* target_bound_lv) {
576  return cgen_state_->ir_builder_.CreateSub(target_bound_lv, args.int64_t_one_val_lv);
577  };
579  CHECK(for_start_bound) << "frame end cannot be UNBOUNDED PRECEDING";
580  return args.int64_t_zero_val_lv;
581  } else if (bound_type == SqlWindowFrameBoundType::UNBOUNDED_FOLLOWING) {
582  CHECK(!for_start_bound) << "frame start cannot be UNBOUNDED FOLLOWING";
583  // adjust frame bound w.r.t the open frame interval if necessary
584  return for_window_frame_naviation
585  ? adjust_frame_end_bound(args.num_elem_current_partition_lv)
586  : args.num_elem_current_partition_lv;
587  }
588  std::vector<llvm::Value*> func_args;
589  std::string op_name =
590  bound_type == SqlWindowFrameBoundType::EXPR_FOLLOWING ? "add" : "sub";
591  if (!for_range_mode) {
592  llvm::Value* current_row_bound_expr_lv{nullptr};
593  if (for_window_frame_naviation) {
594  // we already know a current row's index in (ordered) window frame in this case
595  auto bound_expr =
596  for_start_bound ? args.frame_start_bound_expr_lv : args.frame_end_bound_expr_lv;
597  if (bound_type == SqlWindowFrameBoundType::EXPR_FOLLOWING) {
598  current_row_bound_expr_lv =
599  cgen_state_->ir_builder_.CreateAdd(args.current_row_pos_lv, bound_expr);
600  } else if (bound_type == SqlWindowFrameBoundType::EXPR_PRECEDING) {
601  current_row_bound_expr_lv =
602  cgen_state_->ir_builder_.CreateSub(args.current_row_pos_lv, bound_expr);
603  } else {
605  current_row_bound_expr_lv = args.current_row_pos_lv;
606  }
607  // adjust frame bound w.r.t the open frame interval
608  if (for_start_bound) {
609  return cgen_state_->ir_builder_.CreateSelect(
610  cgen_state_->ir_builder_.CreateICmpSLT(current_row_bound_expr_lv,
611  args.int64_t_zero_val_lv),
612  args.int64_t_zero_val_lv,
613  current_row_bound_expr_lv);
614  } else {
615  return cgen_state_->ir_builder_.CreateSelect(
616  cgen_state_->ir_builder_.CreateICmpSGE(current_row_bound_expr_lv,
618  adjust_frame_end_bound(args.num_elem_current_partition_lv),
619  current_row_bound_expr_lv);
620  }
621  } else {
622  std::string func_class = for_start_bound ? "start" : "end";
623  auto const func_name = "compute_row_mode_" + func_class + "_index_" + op_name;
624  func_args = prepareRowModeFuncArgs(for_start_bound, bound_type, args);
625  current_row_bound_expr_lv = cgen_state_->emitCall(func_name, func_args);
626  }
627  return current_row_bound_expr_lv;
628  } else {
629  std::string func_class = for_start_bound ? "lower" : "upper";
630  auto const func_name = getFramingFuncName(
631  func_class,
632  args.order_type_col_name,
633  op_name,
634  bound_type != SqlWindowFrameBoundType::CURRENT_ROW && is_timestamp_type_frame);
635  func_args = prepareRangeModeFuncArgs(
636  for_start_bound, frame_bound, is_timestamp_type_frame, order_key_null_val, args);
637  auto frame_bound_lv = cgen_state_->emitCall(func_name, func_args);
638  if (!for_start_bound && for_window_frame_naviation) {
639  // adjust frame end bound w.r.t the open frame interval
640  frame_bound_lv = cgen_state_->ir_builder_.CreateSelect(
641  cgen_state_->ir_builder_.CreateICmpSGE(frame_bound_lv,
643  adjust_frame_end_bound(args.num_elem_current_partition_lv),
644  frame_bound_lv);
645  }
646  return frame_bound_lv;
647  }
648 }
llvm::Value * num_elem_current_partition_lv
Definition: WindowContext.h:94
llvm::Value * current_row_pos_lv
Definition: WindowContext.h:89
llvm::Value * frame_end_bound_expr_lv
Definition: WindowContext.h:88
std::string getFramingFuncName(const std::string &bound_type, const std::string &order_col_type, const std::string &op_type, bool for_timestamp_type) const
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1317
SqlWindowFrameBoundType getBoundType() const
Definition: Analyzer.h:2488
std::vector< llvm::Value * > prepareRangeModeFuncArgs(bool for_start_bound, const Analyzer::WindowFrame *frame_bound, bool is_timestamp_type_frame, llvm::Value *order_key_null_val, const WindowFrameBoundFuncArgs &frame_args) const
#define CHECK(condition)
Definition: Logger.h:291
llvm::Value * int64_t_zero_val_lv
Definition: WindowContext.h:92
llvm::Value * int64_t_one_val_lv
Definition: WindowContext.h:93
llvm::Value * frame_start_bound_expr_lv
Definition: WindowContext.h:87
std::string order_type_col_name
Definition: WindowContext.h:96
std::vector< llvm::Value * > prepareRowModeFuncArgs(bool for_start_bound, SqlWindowFrameBoundType bound_type, const WindowFrameBoundFuncArgs &args) const

+ Here is the call graph for this function:

llvm::Value * Executor::codegenFrameBoundExpr ( const Analyzer::WindowFunction window_func,
const Analyzer::WindowFrame frame_bound,
CodeGenerator code_generator,
const CompilationOptions co 
)
private

Definition at line 516 of file WindowFunctionIR.cpp.

References CHECK, CodeGenerator::codegen(), EXPR_FOLLOWING, EXPR_PRECEDING, g_cluster, SQLTypeInfo::get_size(), Analyzer::Expr::get_type_info(), Analyzer::WindowFrame::getBoundExpr(), Analyzer::WindowFunction::getOrderKeys(), Analyzer::WindowFunction::hasRangeModeFraming(), kBIGINT, kINT, and kSMALLINT.

519  {
520  auto needs_bound_expr_codegen = [](const Analyzer::WindowFrame* window_frame) {
521  return window_frame->getBoundType() == SqlWindowFrameBoundType::EXPR_FOLLOWING ||
522  window_frame->getBoundType() == SqlWindowFrameBoundType::EXPR_PRECEDING;
523  };
524  const auto order_col_ti = window_func->getOrderKeys().front()->get_type_info();
525  auto encode_date_col_val = [&order_col_ti, this](llvm::Value* bound_expr_lv) {
526  if (order_col_ti.get_comp_param() == 16) {
527  return cgen_state_->emitCall(
528  "fixed_width_date_encode_noinline",
529  {bound_expr_lv,
530  cgen_state_->castToTypeIn(cgen_state_->inlineIntNull(SQLTypeInfo(kSMALLINT)),
531  32),
532  cgen_state_->inlineIntNull(SQLTypeInfo(kBIGINT))});
533  } else {
534  return cgen_state_->emitCall("fixed_width_date_encode_noinline",
535  {bound_expr_lv,
536  cgen_state_->inlineIntNull(SQLTypeInfo(kINT)),
537  cgen_state_->inlineIntNull(SQLTypeInfo(kBIGINT))});
538  }
539  };
540  llvm::Value* bound_expr_lv{nullptr};
541  if (needs_bound_expr_codegen(frame_bound)) {
542  auto bound_expr = frame_bound->getBoundExpr();
543  if (auto dateadd_expr = dynamic_cast<const Analyzer::DateaddExpr*>(bound_expr)) {
544  if (dateadd_expr->get_datetime_expr()->get_type_info().is_encoded_timestamp()) {
545  dateadd_expr->set_fixed_encoding_null_val();
546  }
547  }
548  auto bound_expr_lvs = code_generator.codegen(bound_expr, true, co);
549  bound_expr_lv = bound_expr_lvs.front();
550  if (order_col_ti.is_date() && window_func->hasRangeModeFraming()) {
551  if (g_cluster) {
552  throw std::runtime_error(
553  "Range mode with date type ordering column is not supported yet.");
554  }
555  bound_expr_lv = encode_date_col_val(bound_expr_lv);
556  }
557  if (frame_bound->getBoundExpr()->get_type_info().get_size() != 8) {
558  bound_expr_lv = cgen_state_->castToTypeIn(bound_expr_lv, 64);
559  }
560  } else {
561  bound_expr_lv = cgen_state_->llInt((int64_t)-1);
562  }
563  CHECK(bound_expr_lv);
564  return bound_expr_lv;
565 }
bool hasRangeModeFraming() const
Definition: Analyzer.h:2610
HOST DEVICE int get_size() const
Definition: sqltypes.h:393
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1317
const std::vector< std::shared_ptr< Analyzer::Expr > > & getOrderKeys() const
Definition: Analyzer.h:2584
const Analyzer::Expr * getBoundExpr() const
Definition: Analyzer.h:2490
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:79
std::vector< llvm::Value * > codegen(const Analyzer::Expr *, const bool fetch_columns, const CompilationOptions &)
Definition: IRCodegen.cpp:30
#define CHECK(condition)
Definition: Logger.h:291
bool g_cluster
Definition: sqltypes.h:62

+ Here is the call graph for this function:

std::pair< llvm::Value *, llvm::Value * > Executor::codegenFrameBoundRange ( const Analyzer::WindowFunction window_func,
CodeGenerator code_generator,
const CompilationOptions co 
)
private

Definition at line 921 of file WindowFunctionIR.cpp.

References CHECK, Analyzer::WindowFunction::getFrameEndBound(), and Analyzer::WindowFunction::getFrameStartBound().

924  {
925  const auto frame_start_bound = window_func->getFrameStartBound();
926  const auto frame_end_bound = window_func->getFrameEndBound();
927  auto frame_start_bound_expr_lv =
928  codegenFrameBoundExpr(window_func, frame_start_bound, code_generator, co);
929  auto frame_end_bound_expr_lv =
930  codegenFrameBoundExpr(window_func, frame_end_bound, code_generator, co);
931  CHECK(frame_start_bound_expr_lv);
932  CHECK(frame_end_bound_expr_lv);
933  return std::make_pair(frame_start_bound_expr_lv, frame_end_bound_expr_lv);
934 }
const Analyzer::WindowFrame * getFrameStartBound() const
Definition: Analyzer.h:2588
const Analyzer::WindowFrame * getFrameEndBound() const
Definition: Analyzer.h:2595
llvm::Value * codegenFrameBoundExpr(const Analyzer::WindowFunction *window_func, const Analyzer::WindowFrame *frame_bound, CodeGenerator &code_generator, const CompilationOptions &co)
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the call graph for this function:

std::pair< llvm::Value *, llvm::Value * > Executor::codegenFrameNullRange ( WindowFunctionContext window_func_context,
llvm::Value *  partition_index_lv 
) const
private

Definition at line 808 of file WindowFunctionIR.cpp.

References get_int_type(), WindowFunctionContext::getNullValueEndPos(), and WindowFunctionContext::getNullValueStartPos().

810  {
811  const auto pi64_type =
812  llvm::PointerType::get(get_int_type(64, cgen_state_->context_), 0);
813  const auto null_start_pos_buf = cgen_state_->llInt(
814  reinterpret_cast<int64_t>(window_func_context->getNullValueStartPos()));
815  const auto null_start_pos_buf_ptr =
816  cgen_state_->ir_builder_.CreateIntToPtr(null_start_pos_buf, pi64_type);
817  const auto null_start_pos_ptr =
818  cgen_state_->ir_builder_.CreateGEP(get_int_type(64, cgen_state_->context_),
819  null_start_pos_buf_ptr,
820  partition_index_lv);
821  auto null_start_pos_lv = cgen_state_->ir_builder_.CreateLoad(
822  null_start_pos_ptr->getType()->getPointerElementType(),
823  null_start_pos_ptr,
824  "null_start_pos");
825  const auto null_end_pos_buf = cgen_state_->llInt(
826  reinterpret_cast<int64_t>(window_func_context->getNullValueEndPos()));
827  const auto null_end_pos_buf_ptr =
828  cgen_state_->ir_builder_.CreateIntToPtr(null_end_pos_buf, pi64_type);
829  const auto null_end_pos_ptr = cgen_state_->ir_builder_.CreateGEP(
830  get_int_type(64, cgen_state_->context_), null_end_pos_buf_ptr, partition_index_lv);
831  auto null_end_pos_lv = cgen_state_->ir_builder_.CreateLoad(
832  null_end_pos_ptr->getType()->getPointerElementType(),
833  null_end_pos_ptr,
834  "null_end_pos");
835  return std::make_pair(null_start_pos_lv, null_end_pos_lv);
836 }
int64_t * getNullValueEndPos() const
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1317
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
int64_t * getNullValueStartPos() const

+ Here is the call graph for this function:

void Executor::codegenJoinLoops ( const std::vector< JoinLoop > &  join_loops,
const RelAlgExecutionUnit ra_exe_unit,
GroupByAndAggregate group_by_and_aggregate,
llvm::Function *  query_func,
llvm::BasicBlock *  entry_bb,
QueryMemoryDescriptor query_mem_desc,
const CompilationOptions co,
const ExecutionOptions eo 
)
private

Definition at line 1137 of file IRCodegen.cpp.

References ExecutionOptions::allow_runtime_query_interrupt, anonymous_namespace{QueryMemoryDescriptor.cpp}::any_of(), AUTOMATIC_IR_METADATA, CHECK, CHECK_EQ, JoinLoop::codegen(), CompilationOptions::device_type, JoinLoopDomain::element_count, get_int_array_type(), get_int_type(), INNER, MultiSet, CodeGenerator::posArg(), GroupByAndAggregate::query_infos_, query_mem_desc, Set, and ExecutionOptions::with_dynamic_watchdog.

1144  {
1146  const auto exit_bb =
1147  llvm::BasicBlock::Create(cgen_state_->context_, "exit", cgen_state_->current_func_);
1148  cgen_state_->ir_builder_.SetInsertPoint(exit_bb);
1149  cgen_state_->ir_builder_.CreateRet(cgen_state_->llInt<int32_t>(0));
1150  cgen_state_->ir_builder_.SetInsertPoint(entry_bb);
1151  CodeGenerator code_generator(this);
1152 
1153  llvm::BasicBlock* loops_entry_bb{nullptr};
1154  auto has_range_join =
1155  std::any_of(join_loops.begin(), join_loops.end(), [](const auto& join_loop) {
1156  return join_loop.kind() == JoinLoopKind::MultiSet;
1157  });
1158  if (has_range_join) {
1159  CHECK_EQ(join_loops.size(), size_t(1));
1160  const auto element_count =
1161  llvm::ConstantInt::get(get_int_type(64, cgen_state_->context_), 9);
1162 
1163  auto compute_packed_offset = [](const int32_t x, const int32_t y) -> uint64_t {
1164  const uint64_t y_shifted = static_cast<uint64_t>(y) << 32;
1165  return y_shifted | static_cast<uint32_t>(x);
1166  };
1167 
1168  const auto values_arr = std::vector<llvm::Constant*>{
1169  llvm::ConstantInt::get(get_int_type(64, cgen_state_->context_), 0),
1170  llvm::ConstantInt::get(get_int_type(64, cgen_state_->context_),
1171  compute_packed_offset(0, 1)),
1172  llvm::ConstantInt::get(get_int_type(64, cgen_state_->context_),
1173  compute_packed_offset(0, -1)),
1174  llvm::ConstantInt::get(get_int_type(64, cgen_state_->context_),
1175  compute_packed_offset(1, 0)),
1176  llvm::ConstantInt::get(get_int_type(64, cgen_state_->context_),
1177  compute_packed_offset(1, 1)),
1178  llvm::ConstantInt::get(get_int_type(64, cgen_state_->context_),
1179  compute_packed_offset(1, -1)),
1180  llvm::ConstantInt::get(get_int_type(64, cgen_state_->context_),
1181  compute_packed_offset(-1, 0)),
1182  llvm::ConstantInt::get(get_int_type(64, cgen_state_->context_),
1183  compute_packed_offset(-1, 1)),
1184  llvm::ConstantInt::get(get_int_type(64, cgen_state_->context_),
1185  compute_packed_offset(-1, -1))};
1186 
1187  const auto constant_values_array = llvm::ConstantArray::get(
1188  get_int_array_type(64, 9, cgen_state_->context_), values_arr);
1189  CHECK(cgen_state_->module_);
1190  const auto values =
1191  new llvm::GlobalVariable(*cgen_state_->module_,
1192  get_int_array_type(64, 9, cgen_state_->context_),
1193  true,
1194  llvm::GlobalValue::LinkageTypes::InternalLinkage,
1195  constant_values_array);
1196  JoinLoop join_loop(
1199  [element_count, values](const std::vector<llvm::Value*>& v) {
1200  JoinLoopDomain domain{{0}};
1201  domain.element_count = element_count;
1202  domain.values_buffer = values;
1203  return domain;
1204  },
1205  nullptr,
1206  nullptr,
1207  nullptr,
1208  nullptr,
1209  "range_key_loop");
1210 
1211  loops_entry_bb = JoinLoop::codegen(
1212  {join_loop},
1213  [this,
1214  query_func,
1215  &query_mem_desc,
1216  &co,
1217  &eo,
1218  &group_by_and_aggregate,
1219  &join_loops,
1220  &ra_exe_unit](const std::vector<llvm::Value*>& prev_iters) {
1221  auto& builder = cgen_state_->ir_builder_;
1222 
1223  auto body_exit_bb =
1224  llvm::BasicBlock::Create(cgen_state_->context_,
1225  "range_key_inner_body_exit",
1226  builder.GetInsertBlock()->getParent());
1227 
1228  auto range_key_body_bb =
1229  llvm::BasicBlock::Create(cgen_state_->context_,
1230  "range_key_loop_body",
1231  builder.GetInsertBlock()->getParent());
1232  builder.SetInsertPoint(range_key_body_bb);
1233 
1234  const auto body_loops_entry_bb = JoinLoop::codegen(
1235  join_loops,
1236  [this,
1237  query_func,
1238  &query_mem_desc,
1239  &co,
1240  &eo,
1241  &group_by_and_aggregate,
1242  &join_loops,
1243  &ra_exe_unit](const std::vector<llvm::Value*>& prev_iters) {
1244  addJoinLoopIterator(prev_iters, join_loops.size());
1245  auto& builder = cgen_state_->ir_builder_;
1246  const auto loop_body_bb =
1247  llvm::BasicBlock::Create(builder.getContext(),
1248  "loop_body",
1249  builder.GetInsertBlock()->getParent());
1250  builder.SetInsertPoint(loop_body_bb);
1251  const bool can_return_error =
1252  compileBody(ra_exe_unit, group_by_and_aggregate, query_mem_desc, co);
1253  if (can_return_error || cgen_state_->needs_error_check_ ||
1254  eo.with_dynamic_watchdog || eo.allow_runtime_query_interrupt) {
1255  createErrorCheckControlFlow(query_func,
1256  eo.with_dynamic_watchdog,
1257  eo.allow_runtime_query_interrupt,
1258  join_loops,
1259  co.device_type,
1260  group_by_and_aggregate.query_infos_);
1261  }
1262  return loop_body_bb;
1263  },
1264  prev_iters.back(),
1265  body_exit_bb,
1266  cgen_state_.get());
1267 
1268  builder.SetInsertPoint(range_key_body_bb);
1269  cgen_state_->ir_builder_.CreateBr(body_loops_entry_bb);
1270 
1271  builder.SetInsertPoint(body_exit_bb);
1272  return range_key_body_bb;
1273  },
1274  code_generator.posArg(nullptr),
1275  exit_bb,
1276  cgen_state_.get());
1277  } else {
1278  loops_entry_bb = JoinLoop::codegen(
1279  join_loops,
1280  /*body_codegen=*/
1281  [this,
1282  query_func,
1283  &query_mem_desc,
1284  &co,
1285  &eo,
1286  &group_by_and_aggregate,
1287  &join_loops,
1288  &ra_exe_unit](const std::vector<llvm::Value*>& prev_iters) {
1290  addJoinLoopIterator(prev_iters, join_loops.size());
1291  auto& builder = cgen_state_->ir_builder_;
1292  const auto loop_body_bb = llvm::BasicBlock::Create(
1293  builder.getContext(), "loop_body", builder.GetInsertBlock()->getParent());
1294  builder.SetInsertPoint(loop_body_bb);
1295  const bool can_return_error =
1296  compileBody(ra_exe_unit, group_by_and_aggregate, query_mem_desc, co);
1297  if (can_return_error || cgen_state_->needs_error_check_ ||
1298  eo.with_dynamic_watchdog || eo.allow_runtime_query_interrupt) {
1299  createErrorCheckControlFlow(query_func,
1300  eo.with_dynamic_watchdog,
1301  eo.allow_runtime_query_interrupt,
1302  join_loops,
1303  co.device_type,
1304  group_by_and_aggregate.query_infos_);
1305  }
1306  return loop_body_bb;
1307  },
1308  /*outer_iter=*/code_generator.posArg(nullptr),
1309  exit_bb,
1310  cgen_state_.get());
1311  }
1312  CHECK(loops_entry_bb);
1313  cgen_state_->ir_builder_.SetInsertPoint(entry_bb);
1314  cgen_state_->ir_builder_.CreateBr(loops_entry_bb);
1315 }
void createErrorCheckControlFlow(llvm::Function *query_func, bool run_with_dynamic_watchdog, bool run_with_allowing_runtime_interrupt, const std::vector< JoinLoop > &join_loops, ExecutorDeviceType device_type, const std::vector< InputTableInfo > &input_table_infos)
#define CHECK_EQ(x, y)
Definition: Logger.h:301
llvm::Value * element_count
Definition: JoinLoop.h:46
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1317
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
bool compileBody(const RelAlgExecutionUnit &ra_exe_unit, GroupByAndAggregate &group_by_and_aggregate, QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const GpuSharedMemoryContext &gpu_smem_context={})
static llvm::BasicBlock * codegen(const std::vector< JoinLoop > &join_loops, const std::function< llvm::BasicBlock *(const std::vector< llvm::Value * > &)> &body_codegen, llvm::Value *outer_iter, llvm::BasicBlock *exit_bb, CgenState *cgen_state)
Definition: JoinLoop.cpp:50
#define AUTOMATIC_IR_METADATA(CGENSTATE)
llvm::Value * addJoinLoopIterator(const std::vector< llvm::Value * > &prev_iters, const size_t level_idx)
Definition: IRCodegen.cpp:1119
#define CHECK(condition)
Definition: Logger.h:291
bool any_of(std::vector< Analyzer::Expr * > const &target_exprs)
llvm::ArrayType * get_int_array_type(int const width, int count, llvm::LLVMContext &context)

+ Here is the call graph for this function:

llvm::Value * Executor::codegenLoadCurrentValueFromColBuf ( WindowFunctionContext window_func_context,
CodeGenerator code_generator,
WindowFrameBoundFuncArgs args 
) const
private

Definition at line 671 of file WindowFunctionIR.cpp.

References CHECK, CodeGenerator::codegenWindowPosition(), WindowFrameBoundFuncArgs::current_row_pos_lv, get_fp_type(), get_int_type(), Analyzer::WindowFunction::getOrderKeys(), WindowFunctionContext::getWindowFunction(), Analyzer::WindowFunction::isFrameNavigateWindowFunction(), and WindowFrameBoundFuncArgs::order_key_buf_ptr_lv.

674  {
675  llvm::Value* current_col_value_ptr_lv{nullptr};
676  const auto order_key_size_in_byte = getOrderKeySize(window_func_context) * 8;
677  auto const order_key_ptr =
678  window_func_context->getWindowFunction()->getOrderKeys().front();
679  CHECK(order_key_ptr);
680  auto const order_col_ti = order_key_ptr->get_type_info();
681  auto const order_col_llvm_type =
682  order_col_ti.is_fp() ? get_fp_type(order_key_size_in_byte, cgen_state_->context_)
683  : get_int_type(order_key_size_in_byte, cgen_state_->context_);
684  if (!window_func_context->getWindowFunction()->isFrameNavigateWindowFunction()) {
685  auto rowid_in_partition_lv = code_generator.codegenWindowPosition(
686  window_func_context, args.current_row_pos_lv);
687  current_col_value_ptr_lv = cgen_state_->ir_builder_.CreateGEP(
688  order_col_llvm_type, args.order_key_buf_ptr_lv, rowid_in_partition_lv);
689  } else {
690  current_col_value_ptr_lv = cgen_state_->ir_builder_.CreateGEP(
691  order_col_llvm_type, args.order_key_buf_ptr_lv, args.current_row_pos_lv);
692  }
693  return cgen_state_->ir_builder_.CreateLoad(
694  current_col_value_ptr_lv->getType()->getPointerElementType(),
695  current_col_value_ptr_lv,
696  "current_col_value");
697 }
bool isFrameNavigateWindowFunction() const
Definition: Analyzer.h:2630
llvm::Value * current_row_pos_lv
Definition: WindowContext.h:89
llvm::Type * get_fp_type(const int width, llvm::LLVMContext &context)
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1317
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
const std::vector< std::shared_ptr< Analyzer::Expr > > & getOrderKeys() const
Definition: Analyzer.h:2584
llvm::Value * codegenWindowPosition(const WindowFunctionContext *window_func_context, llvm::Value *pos_arg)
Definition: ColumnIR.cpp:227
std::unique_ptr< llvm::LLVMContext > context_
Definition: Execute.h:1292
llvm::Value * order_key_buf_ptr_lv
Definition: WindowContext.h:95
#define CHECK(condition)
Definition: Logger.h:291
const Analyzer::WindowFunction * getWindowFunction() const
size_t getOrderKeySize(WindowFunctionContext *window_func_context) const

+ Here is the call graph for this function:

std::pair< std::string, llvm::Value * > Executor::codegenLoadOrderKeyBufPtr ( WindowFunctionContext window_func_context) const
private

Definition at line 838 of file WindowFunctionIR.cpp.

References anonymous_namespace{WindowFunctionIR.cpp}::get_col_type_name_by_size(), get_fp_type(), get_int_type(), WindowFunctionContext::getOrderKeyColumnBuffers(), WindowFunctionContext::getOrderKeyColumnBufferTypes(), Analyzer::WindowFunction::getOrderKeys(), and WindowFunctionContext::getWindowFunction().

839  {
840  auto const order_key_ti =
841  window_func_context->getWindowFunction()->getOrderKeys().front()->get_type_info();
842  auto const order_key_size = order_key_ti.get_size();
843  auto const order_col_type_name = get_col_type_name_by_size(
844  order_key_size,
845  window_func_context->getOrderKeyColumnBufferTypes().front().is_fp());
846  size_t order_key_size_in_byte = order_key_size * 8;
847  auto const order_key_type =
848  order_key_ti.is_fp() ? get_fp_type(order_key_size_in_byte, cgen_state_->context_)
849  : get_int_type(order_key_size_in_byte, cgen_state_->context_);
850  auto const order_key_buf_type = llvm::PointerType::get(order_key_type, 0);
851  auto const order_key_buf = cgen_state_->llInt(
852  reinterpret_cast<int64_t>(window_func_context->getOrderKeyColumnBuffers().front()));
853  auto const order_key_buf_ptr_lv =
854  cgen_state_->ir_builder_.CreateIntToPtr(order_key_buf, order_key_buf_type);
855  return std::make_pair(order_col_type_name, order_key_buf_ptr_lv);
856 }
std::string get_col_type_name_by_size(const size_t size, const bool is_fp)
const std::vector< SQLTypeInfo > & getOrderKeyColumnBufferTypes() const
llvm::Type * get_fp_type(const int width, llvm::LLVMContext &context)
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1317
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
const std::vector< std::shared_ptr< Analyzer::Expr > > & getOrderKeys() const
Definition: Analyzer.h:2584
const std::vector< const int8_t * > & getOrderKeyColumnBuffers() const
std::unique_ptr< llvm::LLVMContext > context_
Definition: Execute.h:1292
const Analyzer::WindowFunction * getWindowFunction() const

+ Here is the call graph for this function:

WindowPartitionBufferPtrs Executor::codegenLoadPartitionBuffers ( WindowFunctionContext window_func_context,
llvm::Value *  partition_index_lv 
) const
private

Definition at line 858 of file WindowFunctionIR.cpp.

References WindowFunctionContext::counts(), WindowPartitionBufferPtrs::current_partition_start_offset_lv, get_int_type(), WindowPartitionBufferPtrs::num_elem_current_partition_lv, WindowFunctionContext::partitionStartOffset(), WindowFunctionContext::payload(), WindowFunctionContext::sortedPartition(), WindowPartitionBufferPtrs::target_partition_rowid_ptr_lv, and WindowPartitionBufferPtrs::target_partition_sorted_rowid_ptr_lv.

860  {
861  WindowPartitionBufferPtrs bufferPtrs;
862  const auto pi64_type =
863  llvm::PointerType::get(get_int_type(64, cgen_state_->context_), 0);
864  const auto pi32_type =
865  llvm::PointerType::get(get_int_type(32, cgen_state_->context_), 0);
866 
867  // partial sum of # elems of partitions
868  auto partition_start_offset_buf_lv = cgen_state_->llInt(
869  reinterpret_cast<int64_t>(window_func_context->partitionStartOffset()));
870  auto partition_start_offset_ptr_lv =
871  cgen_state_->ir_builder_.CreateIntToPtr(partition_start_offset_buf_lv, pi64_type);
872 
873  // get start offset of the current partition
874  auto current_partition_start_offset_ptr_lv =
875  cgen_state_->ir_builder_.CreateGEP(get_int_type(64, cgen_state_->context_),
876  partition_start_offset_ptr_lv,
877  partition_index_lv);
878  bufferPtrs.current_partition_start_offset_lv = cgen_state_->ir_builder_.CreateLoad(
879  current_partition_start_offset_ptr_lv->getType()->getPointerElementType(),
880  current_partition_start_offset_ptr_lv);
881 
882  // row_id buf of the current partition
883  const auto partition_rowid_buf_lv =
884  cgen_state_->llInt(reinterpret_cast<int64_t>(window_func_context->payload()));
885  const auto partition_rowid_ptr_lv =
886  cgen_state_->ir_builder_.CreateIntToPtr(partition_rowid_buf_lv, pi32_type);
887  bufferPtrs.target_partition_rowid_ptr_lv =
888  cgen_state_->ir_builder_.CreateGEP(get_int_type(32, cgen_state_->context_),
889  partition_rowid_ptr_lv,
891 
892  // row_id buf of ordered current partition
893  const auto sorted_rowid_lv = cgen_state_->llInt(
894  reinterpret_cast<int64_t>(window_func_context->sortedPartition()));
895  const auto sorted_rowid_ptr_lv =
896  cgen_state_->ir_builder_.CreateIntToPtr(sorted_rowid_lv, pi64_type);
898  cgen_state_->ir_builder_.CreateGEP(get_int_type(64, cgen_state_->context_),
899  sorted_rowid_ptr_lv,
901 
902  // # elems per partition
903  const auto partition_count_buf =
904  cgen_state_->llInt(reinterpret_cast<int64_t>(window_func_context->counts()));
905  auto partition_count_buf_ptr_lv =
906  cgen_state_->ir_builder_.CreateIntToPtr(partition_count_buf, pi32_type);
907 
908  // # elems of the given partition
909  const auto num_elem_current_partition_ptr =
910  cgen_state_->ir_builder_.CreateGEP(get_int_type(32, cgen_state_->context_),
911  partition_count_buf_ptr_lv,
912  partition_index_lv);
913  bufferPtrs.num_elem_current_partition_lv = cgen_state_->castToTypeIn(
914  cgen_state_->ir_builder_.CreateLoad(
915  num_elem_current_partition_ptr->getType()->getPointerElementType(),
916  num_elem_current_partition_ptr),
917  64);
918  return bufferPtrs;
919 }
llvm::Value * current_partition_start_offset_lv
llvm::Value * num_elem_current_partition_lv
const int32_t * counts() const
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1317
llvm::Value * target_partition_sorted_rowid_ptr_lv
llvm::Value * target_partition_rowid_ptr_lv
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
const int64_t * partitionStartOffset() const
const int64_t * sortedPartition() const
const int32_t * payload() const

+ Here is the call graph for this function:

llvm::BasicBlock * Executor::codegenSkipDeletedOuterTableRow ( const RelAlgExecutionUnit ra_exe_unit,
const CompilationOptions co 
)
private

Definition at line 3218 of file NativeCodegen.cpp.

3220  {
3222  if (!co.filter_on_deleted_column) {
3223  return nullptr;
3224  }
3225  CHECK(!ra_exe_unit.input_descs.empty());
3226  const auto& outer_input_desc = ra_exe_unit.input_descs[0];
3227  if (outer_input_desc.getSourceType() != InputSourceType::TABLE) {
3228  return nullptr;
3229  }
3230  const auto& table_key = outer_input_desc.getTableKey();
3231  const auto deleted_cd = plan_state_->getDeletedColForTable(table_key);
3232  if (!deleted_cd) {
3233  return nullptr;
3234  }
3235  CHECK(deleted_cd->columnType.is_boolean());
3236  const auto deleted_expr =
3237  makeExpr<Analyzer::ColumnVar>(deleted_cd->columnType,
3238  shared::ColumnKey{table_key, deleted_cd->columnId},
3239  outer_input_desc.getNestLevel());
3240  CodeGenerator code_generator(this);
3241  const auto is_deleted =
3242  code_generator.toBool(code_generator.codegen(deleted_expr.get(), true, co).front());
3243  const auto is_deleted_bb = llvm::BasicBlock::Create(
3244  cgen_state_->context_, "is_deleted", cgen_state_->row_func_);
3245  llvm::BasicBlock* bb = llvm::BasicBlock::Create(
3246  cgen_state_->context_, "is_not_deleted", cgen_state_->row_func_);
3247  cgen_state_->ir_builder_.CreateCondBr(is_deleted, is_deleted_bb, bb);
3248  cgen_state_->ir_builder_.SetInsertPoint(is_deleted_bb);
3249  cgen_state_->ir_builder_.CreateRet(cgen_state_->llInt<int32_t>(0));
3250  cgen_state_->ir_builder_.SetInsertPoint(bb);
3251  return bb;
3252 }
std::vector< InputDescriptor > input_descs
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1317
std::unique_ptr< PlanState > plan_state_
Definition: Execute.h:1347
#define AUTOMATIC_IR_METADATA(CGENSTATE)
#define CHECK(condition)
Definition: Logger.h:291
void Executor::codegenWindowAvgEpilogue ( llvm::Value *  crt_val,
llvm::Value *  window_func_null_val 
)
private

Definition at line 1299 of file WindowFunctionIR.cpp.

References AUTOMATIC_IR_METADATA, anonymous_namespace{WindowFunctionIR.cpp}::get_adjusted_window_type_info(), get_int_type(), WindowProjectNodeContext::getActiveWindowFunctionContext(), kDOUBLE, and kFLOAT.

1300  {
1302  const auto window_func_context =
1304  const auto window_func = window_func_context->getWindowFunction();
1305  const auto window_func_ti = get_adjusted_window_type_info(window_func);
1306  const auto pi32_type =
1307  llvm::PointerType::get(get_int_type(32, cgen_state_->context_), 0);
1308  const auto pi64_type =
1309  llvm::PointerType::get(get_int_type(64, cgen_state_->context_), 0);
1310  const auto aggregate_state_type =
1311  window_func_ti.get_type() == kFLOAT ? pi32_type : pi64_type;
1312  const auto aggregate_state_count_i64 = cgen_state_->llInt(
1313  reinterpret_cast<const int64_t>(window_func_context->aggregateStateCount()));
1314  auto aggregate_state_count = cgen_state_->ir_builder_.CreateIntToPtr(
1315  aggregate_state_count_i64, aggregate_state_type);
1316  std::string agg_count_func_name = "agg_count";
1317  switch (window_func_ti.get_type()) {
1318  case kFLOAT: {
1319  agg_count_func_name += "_float";
1320  break;
1321  }
1322  case kDOUBLE: {
1323  agg_count_func_name += "_double";
1324  break;
1325  }
1326  default: {
1327  break;
1328  }
1329  }
1330  agg_count_func_name += "_skip_val";
1331  cgen_state_->emitCall(agg_count_func_name,
1332  {aggregate_state_count, crt_val, window_func_null_val});
1333 }
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1317
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
static WindowFunctionContext * getActiveWindowFunctionContext(Executor *executor)
#define AUTOMATIC_IR_METADATA(CGENSTATE)
SQLTypeInfo get_adjusted_window_type_info(const Analyzer::WindowFunction *window_func)

+ Here is the call graph for this function:

std::pair< llvm::Value *, llvm::Value * > Executor::codegenWindowFrameBounds ( WindowFunctionContext window_func_context,
const Analyzer::WindowFrame frame_start_bound,
const Analyzer::WindowFrame frame_end_bound,
llvm::Value *  order_key_col_null_val_lv,
WindowFrameBoundFuncArgs args,
CodeGenerator code_generator 
)
private

Definition at line 936 of file WindowFunctionIR.cpp.

References run_benchmark_import::args, CHECK, WindowFrameBoundFuncArgs::current_col_value_lv, WindowFunctionContext::getOrderKeyColumnBuffers(), WindowFunctionContext::getWindowFunction(), Analyzer::WindowFrame::hasTimestampTypeFrameBound(), and WindowFrameBoundFuncArgs::order_type_col_name.

942  {
943  const auto window_func = window_func_context->getWindowFunction();
944  CHECK(window_func);
945  const auto is_timestamp_type_frame = frame_start_bound->hasTimestampTypeFrameBound() ||
946  frame_end_bound->hasTimestampTypeFrameBound();
947 
948  if (window_func->hasRangeModeFraming()) {
949  CHECK(window_func_context->getOrderKeyColumnBuffers().size() == 1);
950  CHECK(window_func->getOrderKeys().size() == 1UL);
951  CHECK(window_func_context->getOrderKeyColumnBuffers().size() == 1UL);
952  args.order_type_col_name = getOrderKeyTypeName(window_func_context);
953  args.current_col_value_lv =
954  codegenLoadCurrentValueFromColBuf(window_func_context, code_generator, args);
955  }
956 
957  auto get_order_key_null_val = [is_timestamp_type_frame,
958  &order_key_col_null_val_lv,
959  this](const Analyzer::WindowFrame* frame_bound) {
960  return is_timestamp_type_frame && !frame_bound->isCurrentRowBound()
961  ? cgen_state_->castToTypeIn(order_key_col_null_val_lv, 64)
962  : order_key_col_null_val_lv;
963  };
964  auto frame_start_bound_lv =
965  codegenFrameBound(true,
966  window_func->hasRangeModeFraming(),
967  window_func->isFrameNavigateWindowFunction(),
968  frame_start_bound,
969  is_timestamp_type_frame,
970  get_order_key_null_val(frame_start_bound),
971  args);
972  auto frame_end_bound_lv =
973  codegenFrameBound(false,
974  window_func->hasRangeModeFraming(),
975  window_func->isFrameNavigateWindowFunction(),
976  frame_end_bound,
977  is_timestamp_type_frame,
978  get_order_key_null_val(frame_end_bound),
979  args);
980  CHECK(frame_start_bound_lv);
981  CHECK(frame_end_bound_lv);
982  return std::make_pair(frame_start_bound_lv, frame_end_bound_lv);
983 }
bool hasTimestampTypeFrameBound() const
Definition: Analyzer.h:2495
llvm::Value * current_col_value_lv
Definition: WindowContext.h:90
llvm::Value * codegenFrameBound(bool for_start_bound, bool for_range_mode, bool for_window_frame_naviation, const Analyzer::WindowFrame *frame_bound, bool is_timestamp_type_frame, llvm::Value *order_key_null_val, const WindowFrameBoundFuncArgs &args)
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1317
llvm::Value * codegenLoadCurrentValueFromColBuf(WindowFunctionContext *window_func_context, CodeGenerator &code_generator, WindowFrameBoundFuncArgs &args) const
const std::string getOrderKeyTypeName(WindowFunctionContext *window_func_context) const
const std::vector< const int8_t * > & getOrderKeyColumnBuffers() const
#define CHECK(condition)
Definition: Logger.h:291
const Analyzer::WindowFunction * getWindowFunction() const
std::string order_type_col_name
Definition: WindowContext.h:96

+ Here is the call graph for this function:

llvm::Value * Executor::codegenWindowFunction ( const size_t  target_index,
const CompilationOptions co 
)
private

Definition at line 21 of file WindowFunctionIR.cpp.

References WindowProjectNodeContext::activateWindowFunctionContext(), run_benchmark_import::args, AUTOMATIC_IR_METADATA, AVG, CHECK, CHECK_EQ, COUNT, COUNT_IF, CUME_DIST, DENSE_RANK, logger::FATAL, FIRST_VALUE, WindowProjectNodeContext::get(), WindowFunctionContext::getWindowFunction(), LAG, LAG_IN_FRAME, LAST_VALUE, LEAD, LEAD_IN_FRAME, LOG, MAX, MIN, NTH_VALUE, NTH_VALUE_IN_FRAME, NTILE, PERCENT_RANK, RANK, ROW_NUMBER, SUM, and SUM_IF.

22  {
24  CodeGenerator code_generator(this);
25 
26  const auto window_func_context =
28  target_index);
29  const auto window_func = window_func_context->getWindowFunction();
30  switch (window_func->getKind()) {
35  // they are always evaluated on the entire partition
36  return code_generator.codegenWindowPosition(window_func_context,
37  code_generator.posArg(nullptr));
38  }
41  // they are always evaluated on the entire partition
42  return cgen_state_->emitCall("percent_window_func",
43  {cgen_state_->llInt(reinterpret_cast<const int64_t>(
44  window_func_context->output())),
45  code_generator.posArg(nullptr)});
46  }
52  // they are always evaluated on the current frame
54  const auto& args = window_func->getArgs();
55  CHECK(!args.empty());
56  const auto arg_lvs = code_generator.codegen(args.front().get(), true, co);
57  CHECK_EQ(arg_lvs.size(), size_t(1));
58  return arg_lvs.front();
59  }
67  // they are always evaluated on the current frame
69  }
74  }
75  default: {
76  LOG(FATAL) << "Invalid window function kind";
77  }
78  }
79  return nullptr;
80 }
#define CHECK_EQ(x, y)
Definition: Logger.h:301
#define LOG(tag)
Definition: Logger.h:285
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1317
static const WindowProjectNodeContext * get(Executor *executor)
const WindowFunctionContext * activateWindowFunctionContext(Executor *executor, const size_t target_index) const
#define AUTOMATIC_IR_METADATA(CGENSTATE)
llvm::Value * codegenWindowNavigationFunctionOnFrame(const CompilationOptions &co)
llvm::Value * codegenWindowFunctionAggregate(const CompilationOptions &co)
#define CHECK(condition)
Definition: Logger.h:291
const Analyzer::WindowFunction * getWindowFunction() const

+ Here is the call graph for this function:

llvm::Value * Executor::codegenWindowFunctionAggregate ( const CompilationOptions co)
private

Definition at line 242 of file WindowFunctionIR.cpp.

References AUTOMATIC_IR_METADATA, AVG, CHECK, WindowProjectNodeContext::get(), get_int_type(), and WindowProjectNodeContext::getActiveWindowFunctionContext().

242  {
244  const auto reset_state_false_bb = codegenWindowResetStateControlFlow();
245  auto aggregate_state = aggregateWindowStatePtr();
246  llvm::Value* aggregate_state_count = nullptr;
247  const auto window_func_context =
249  const auto window_func = window_func_context->getWindowFunction();
250  if (window_func->getKind() == SqlWindowFunctionKind::AVG) {
251  const auto aggregate_state_count_i64 = cgen_state_->llInt(
252  reinterpret_cast<const int64_t>(window_func_context->aggregateStateCount()));
253  const auto pi64_type =
254  llvm::PointerType::get(get_int_type(64, cgen_state_->context_), 0);
255  aggregate_state_count =
256  cgen_state_->ir_builder_.CreateIntToPtr(aggregate_state_count_i64, pi64_type);
257  }
258  codegenWindowFunctionStateInit(aggregate_state);
259  if (window_func->getKind() == SqlWindowFunctionKind::AVG) {
260  const auto count_zero = cgen_state_->llInt(int64_t(0));
261  cgen_state_->emitCall("agg_id", {aggregate_state_count, count_zero});
262  }
263  cgen_state_->ir_builder_.CreateBr(reset_state_false_bb);
264  cgen_state_->ir_builder_.SetInsertPoint(reset_state_false_bb);
266  return codegenWindowFunctionAggregateCalls(aggregate_state, co);
267 }
llvm::Value * aggregateWindowStatePtr()
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1317
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
static WindowFunctionContext * getActiveWindowFunctionContext(Executor *executor)
static const WindowProjectNodeContext * get(Executor *executor)
#define AUTOMATIC_IR_METADATA(CGENSTATE)
void codegenWindowFunctionStateInit(llvm::Value *aggregate_state)
#define CHECK(condition)
Definition: Logger.h:291
llvm::Value * codegenWindowFunctionAggregateCalls(llvm::Value *aggregate_state, const CompilationOptions &co)
llvm::BasicBlock * codegenWindowResetStateControlFlow()

+ Here is the call graph for this function:

llvm::Value * Executor::codegenWindowFunctionAggregateCalls ( llvm::Value *  aggregate_state,
const CompilationOptions co 
)
private

Definition at line 985 of file WindowFunctionIR.cpp.

References run_benchmark_import::args, AUTOMATIC_IR_METADATA, AVG, CHECK, CHECK_EQ, CodeGenerator::codegen(), CodeGenerator::codegenCastBetweenIntTypes(), COUNT, COUNT_IF, WindowFrameBoundFuncArgs::current_partition_start_offset_lv, anonymous_namespace{WindowFunctionIR.cpp}::get_adjusted_window_type_info(), anonymous_namespace{WindowFunctionIR.cpp}::get_col_type_name_by_size(), get_int_type(), anonymous_namespace{WindowFunctionIR.cpp}::get_window_agg_name(), WindowProjectNodeContext::getActiveWindowFunctionContext(), inline_fixed_encoding_null_val(), kDATE, kDOUBLE, kENCODING_DATE_IN_DAYS, kENCODING_FIXED, kFLOAT, kSUM_IF, kTIME, kTIMESTAMP, kTINYINT, MAX, MIN, CodeGenerator::posArg(), SUM, SUM_IF, and window_function_conditional_aggregate().

986  {
988  const auto window_func_context =
990  const auto window_func = window_func_context->getWindowFunction();
991  const auto window_func_ti = get_adjusted_window_type_info(window_func);
992  const auto window_func_null_val =
993  window_func_ti.is_fp()
994  ? cgen_state_->inlineFpNull(window_func_ti)
995  : cgen_state_->castToTypeIn(cgen_state_->inlineIntNull(window_func_ti), 64);
996  if (window_func_context->elementCount() == 0) {
997  // we do not need to generate a code for an empty input table
998  return window_func->getKind() == SqlWindowFunctionKind::AVG
999  ? cgen_state_->inlineFpNull(SQLTypeInfo(SQLTypes::kDOUBLE))
1000  : window_func_null_val;
1001  }
1002  const auto& args = window_func->getArgs();
1003  CodeGenerator code_generator(this);
1004  if (window_func_context->needsToBuildAggregateTree()) {
1005  // compute an aggregated value for each row of the window frame by using segment tree
1006  // when constructing a window context, we build a necessary segment tree (so called
1007  // `aggregate tree`) to query the aggregated value of the specific window frame
1008  const auto pi64_type =
1009  llvm::PointerType::get(get_int_type(64, cgen_state_->context_), 0);
1010  const auto ppi64_type = llvm::PointerType::get(
1011  llvm::PointerType::get(get_int_type(64, cgen_state_->context_), 0), 0);
1012 
1013  auto [frame_start_bound_expr_lv, frame_end_bound_expr_lv] =
1014  codegenFrameBoundRange(window_func, code_generator, co);
1015 
1016  // compute aggregated value over the computed frame range
1017  auto current_row_pos_lv = code_generator.posArg(nullptr);
1018  auto partition_index_lv =
1019  codegenCurrentPartitionIndex(window_func_context, current_row_pos_lv);
1020 
1021  // ordering column buffer
1022  const auto target_col_ti = args.front()->get_type_info();
1023  const auto target_col_size = target_col_ti.get_size();
1024  const auto col_type_name =
1025  get_col_type_name_by_size(target_col_size, target_col_ti.is_fp());
1026 
1027  const auto partition_buf_ptrs =
1028  codegenLoadPartitionBuffers(window_func_context, partition_index_lv);
1029 
1030  auto [order_col_type_name, order_key_buf_ptr_lv] =
1031  codegenLoadOrderKeyBufPtr(window_func_context);
1032 
1033  // null value of the ordering column
1034  const auto order_key_buf_ti =
1035  window_func_context->getOrderKeyColumnBufferTypes().front();
1036  auto const ordering_spec = window_func->getCollation().front();
1037  llvm::Value* order_key_col_null_val_lv{nullptr};
1038  switch (order_key_buf_ti.get_type()) {
1039  case kDATE:
1040  case kTIMESTAMP:
1041  case kTIME: {
1042  if (order_key_buf_ti.get_compression() == kENCODING_FIXED ||
1043  order_key_buf_ti.get_compression() == kENCODING_DATE_IN_DAYS) {
1044  auto null_val = inline_fixed_encoding_null_val(order_key_buf_ti);
1045  order_key_col_null_val_lv = cgen_state_->llInt((int32_t)null_val);
1046  break;
1047  }
1048  }
1049  default: {
1050  order_key_col_null_val_lv = cgen_state_->inlineNull(order_key_buf_ti);
1051  break;
1052  }
1053  }
1054 
1055  auto [null_start_pos_lv, null_end_pos_lv] =
1056  codegenFrameNullRange(window_func_context, partition_index_lv);
1057  auto nulls_first_lv = cgen_state_->llBool(ordering_spec.nulls_first);
1058 
1060  frame_start_bound_expr_lv,
1061  frame_end_bound_expr_lv,
1062  current_row_pos_lv,
1063  nullptr,
1064  partition_buf_ptrs.current_partition_start_offset_lv,
1065  cgen_state_->llInt((int64_t)0),
1066  cgen_state_->llInt((int64_t)1),
1067  partition_buf_ptrs.num_elem_current_partition_lv,
1068  order_key_buf_ptr_lv,
1069  "",
1070  partition_buf_ptrs.target_partition_rowid_ptr_lv,
1071  partition_buf_ptrs.target_partition_sorted_rowid_ptr_lv,
1072  nulls_first_lv,
1073  null_start_pos_lv,
1074  null_end_pos_lv};
1075  auto [frame_start_bound_lv, frame_end_bound_lv] =
1076  codegenWindowFrameBounds(window_func_context,
1077  window_func->getFrameStartBound(),
1078  window_func->getFrameEndBound(),
1079  order_key_col_null_val_lv,
1081  code_generator);
1082 
1083  // codegen to send a query with frame bound to aggregate tree searcher
1084  llvm::Value* aggregation_trees_lv{nullptr};
1085  llvm::Value* invalid_val_lv{nullptr};
1086  llvm::Value* null_val_lv{nullptr};
1087  std::string aggregation_tree_search_func_name{"search_"};
1088  std::string aggregation_tree_getter_func_name{"get_"};
1089 
1090  // prepare null values and aggregate_tree getter and searcher depending on
1091  // a type of the ordering column
1092  auto agg_expr_ti = args.front()->get_type_info();
1093  if (agg_expr_ti.is_fp()) {
1094  if (window_func->getKind() == SqlWindowFunctionKind::MIN) {
1095  invalid_val_lv = cgen_state_->llFp(std::numeric_limits<double>::max());
1096  } else if (window_func->getKind() == SqlWindowFunctionKind::MAX) {
1097  invalid_val_lv = cgen_state_->llFp(std::numeric_limits<double>::lowest());
1098  } else {
1099  invalid_val_lv = cgen_state_->llFp((double)0);
1100  }
1101  null_val_lv = cgen_state_->inlineFpNull(SQLTypeInfo(kDOUBLE));
1102  aggregation_tree_search_func_name += "double";
1103  aggregation_tree_getter_func_name += "double";
1104  } else {
1105  if (window_func->getKind() == SqlWindowFunctionKind::MIN) {
1106  invalid_val_lv = cgen_state_->llInt(std::numeric_limits<int64_t>::max());
1107  } else if (window_func->getKind() == SqlWindowFunctionKind::MAX) {
1108  invalid_val_lv = cgen_state_->llInt(std::numeric_limits<int64_t>::lowest());
1109  } else {
1110  invalid_val_lv = cgen_state_->llInt((int64_t)0);
1111  }
1112  null_val_lv = cgen_state_->llInt(inline_int_null_value<int64_t>());
1113  aggregation_tree_search_func_name += "int64_t";
1114  aggregation_tree_getter_func_name += "integer";
1115  }
1116 
1117  // derived aggregation has a different code path
1118  if (window_func->getKind() == SqlWindowFunctionKind::AVG) {
1119  aggregation_tree_search_func_name += "_derived";
1120  aggregation_tree_getter_func_name += "_derived";
1121  }
1122 
1123  // get a buffer holding aggregate trees for each partition
1124  if (agg_expr_ti.is_fp()) {
1125  if (window_func->getKind() == SqlWindowFunctionKind::AVG) {
1126  aggregation_trees_lv = cgen_state_->llInt(reinterpret_cast<int64_t>(
1127  window_func_context->getDerivedAggregationTreesForDoubleTypeWindowExpr()));
1128  } else {
1129  aggregation_trees_lv = cgen_state_->llInt(reinterpret_cast<int64_t>(
1130  window_func_context->getAggregationTreesForDoubleTypeWindowExpr()));
1131  }
1132  } else {
1133  if (window_func->getKind() == SqlWindowFunctionKind::AVG) {
1134  aggregation_trees_lv = cgen_state_->llInt(reinterpret_cast<int64_t>(
1135  window_func_context->getDerivedAggregationTreesForIntegerTypeWindowExpr()));
1136  } else {
1137  aggregation_trees_lv = cgen_state_->llInt(reinterpret_cast<int64_t>(
1138  window_func_context->getAggregationTreesForIntegerTypeWindowExpr()));
1139  }
1140  }
1141 
1142  CHECK(aggregation_trees_lv);
1143  CHECK(invalid_val_lv);
1144  aggregation_tree_search_func_name += "_aggregation_tree";
1145  aggregation_tree_getter_func_name += "_aggregation_tree";
1146 
1147  // get the aggregate tree of the current partition from a window context
1148  auto aggregation_trees_ptr =
1149  cgen_state_->ir_builder_.CreateIntToPtr(aggregation_trees_lv, ppi64_type);
1150  auto target_aggregation_tree_lv = cgen_state_->emitCall(
1151  aggregation_tree_getter_func_name, {aggregation_trees_ptr, partition_index_lv});
1152 
1153  // a depth of segment tree
1154  const auto tree_depth_buf = cgen_state_->llInt(
1155  reinterpret_cast<int64_t>(window_func_context->getAggregateTreeDepth()));
1156  const auto tree_depth_buf_ptr =
1157  cgen_state_->ir_builder_.CreateIntToPtr(tree_depth_buf, pi64_type);
1158  const auto current_partition_tree_depth_buf_ptr = cgen_state_->ir_builder_.CreateGEP(
1159  get_int_type(64, cgen_state_->context_), tree_depth_buf_ptr, partition_index_lv);
1160  const auto current_partition_tree_depth_lv = cgen_state_->ir_builder_.CreateLoad(
1161  current_partition_tree_depth_buf_ptr->getType()->getPointerElementType(),
1162  current_partition_tree_depth_buf_ptr);
1163 
1164  // a fanout of the current partition's segment tree
1165  const auto aggregation_tree_fanout_lv = cgen_state_->llInt(
1166  static_cast<int64_t>(window_func_context->getAggregateTreeFanout()));
1167 
1168  // agg_type
1169  const auto agg_type_lv =
1170  cgen_state_->llInt(static_cast<int32_t>(window_func->getKind()));
1171 
1172  // send a query to the aggregate tree with the frame range:
1173  // `frame_start_bound_lv` ~ `frame_end_bound_lv`
1174  auto res_lv =
1175  cgen_state_->emitCall(aggregation_tree_search_func_name,
1176  {target_aggregation_tree_lv,
1177  frame_start_bound_lv,
1178  frame_end_bound_lv,
1179  current_partition_tree_depth_lv,
1180  aggregation_tree_fanout_lv,
1181  cgen_state_->llBool(agg_expr_ti.is_decimal()),
1182  cgen_state_->llInt((int64_t)agg_expr_ti.get_scale()),
1183  invalid_val_lv,
1184  null_val_lv,
1185  agg_type_lv});
1186 
1187  // handling returned null value if exists
1188  std::string null_handler_func_name{"handle_null_val_"};
1189  std::vector<llvm::Value*> null_handler_args{res_lv, null_val_lv};
1190 
1191  // determine null_handling function's name
1192  if (window_func->getKind() == SqlWindowFunctionKind::AVG) {
1193  // average aggregate function returns a value as a double
1194  // (and our search* function also returns a double)
1195  if (agg_expr_ti.is_fp()) {
1196  // fp type: double null value
1197  null_handler_func_name += "double_double";
1198  } else {
1199  // non-fp type: int64_t null type
1200  null_handler_func_name += "double_int64_t";
1201  }
1202  } else if (agg_expr_ti.is_fp()) {
1203  // fp type: double null value
1204  null_handler_func_name += "double_double";
1205  } else {
1206  // non-fp type: int64_t null type
1207  null_handler_func_name += "int64_t_int64_t";
1208  }
1209  null_handler_func_name += "_window_framing_agg";
1210 
1211  // prepare null_val
1212  if (window_func->getKind() == SqlWindowFunctionKind::COUNT) {
1213  if (agg_expr_ti.is_fp()) {
1214  null_handler_args.push_back(cgen_state_->llFp((double)0));
1215  } else {
1216  null_handler_args.push_back(cgen_state_->llInt((int64_t)0));
1217  }
1218  } else if (window_func->getKind() == SqlWindowFunctionKind::AVG) {
1219  null_handler_args.push_back(cgen_state_->inlineFpNull(SQLTypeInfo(kDOUBLE)));
1220  } else {
1221  null_handler_args.push_back(cgen_state_->castToTypeIn(window_func_null_val, 64));
1222  }
1223  res_lv = cgen_state_->emitCall(null_handler_func_name, null_handler_args);
1224 
1225  // when AGG_TYPE is double, we get a double type return value we expect an integer
1226  // type value for the count aggregation
1227  if (window_func->getKind() == SqlWindowFunctionKind::COUNT && agg_expr_ti.is_fp()) {
1228  return cgen_state_->ir_builder_.CreateFPToSI(
1229  res_lv, get_int_type(64, cgen_state_->context_));
1230  }
1231  return res_lv;
1232  } else {
1233  auto agg_name = get_window_agg_name(window_func->getKind(), window_func_ti);
1234  Analyzer::Expr* arg_target_expr;
1235  std::vector<llvm::Value*> agg_func_args{aggregate_state};
1236  auto modified_window_func_null_val = window_func_null_val;
1237  if (args.empty() ||
1238  (window_func->getKind() == SqlWindowFunctionKind::COUNT &&
1239  dynamic_cast<Analyzer::Constant*>(args.front().get()) != nullptr)) {
1240  // a count aggregation without an expression: COUNT(1) or COUNT(*)
1241  agg_func_args.push_back(cgen_state_->llInt(int64_t(1)));
1242  } else {
1243  // we use #base_agg_func_name##_skip_val agg function
1244  // i.e.,int64_t agg_sum_skip_val(int64_t* agg, int64_t val, int64_t skip_val)
1245  arg_target_expr = args.front().get();
1246  const auto arg_lvs = code_generator.codegen(arg_target_expr, true, co);
1247  CHECK_EQ(arg_lvs.size(), size_t(1));
1248  // handling current row's value
1249  auto crt_val = arg_lvs.front();
1250  if ((window_func->getKind() == SqlWindowFunctionKind::SUM ||
1251  window_func->getKind() == SqlWindowFunctionKind::SUM_IF) &&
1252  !window_func_ti.is_fp()) {
1253  crt_val = code_generator.codegenCastBetweenIntTypes(
1254  arg_lvs.front(), args.front()->get_type_info(), window_func_ti, false);
1255  }
1256  agg_func_args.push_back(window_func_ti.get_type() == kFLOAT
1257  ? crt_val
1258  : cgen_state_->castToTypeIn(crt_val, 64));
1259  // handle null value and conditional value for conditional aggregates if necessary
1260  llvm::Value* cond_lv{nullptr};
1261  if (window_function_conditional_aggregate(window_func->getKind())) {
1262  switch (window_func->getKind()) {
1264  // COUNT_IF has a single condition expr which is always bool type
1265  modified_window_func_null_val = cgen_state_->castToTypeIn(
1266  cgen_state_->inlineNull(SQLTypeInfo(kTINYINT)), 64);
1267  break;
1269  // FP type input col uses its own null value depending on the type
1270  // otherwise (integer type input col), we use 8-byte type
1271  if (args.front()->get_type_info().is_integer()) {
1272  agg_func_args[1] = cgen_state_->castToTypeIn(agg_func_args[1], 64);
1273  // keep the null value but casting its type to 8-byte
1274  modified_window_func_null_val =
1275  cgen_state_->castToTypeIn(window_func_null_val, 64);
1276  }
1277  auto cond_expr_lv = code_generator.codegen(args[1].get(), true, co).front();
1278  cond_lv =
1280  }
1281  default:
1282  break;
1283  }
1284  }
1285  agg_name += "_skip_val";
1286  agg_func_args.push_back(modified_window_func_null_val);
1287  if (cond_lv) {
1288  agg_func_args.push_back(cond_lv);
1289  }
1290  }
1291  cgen_state_->emitCall(agg_name, agg_func_args);
1292  if (window_func->getKind() == SqlWindowFunctionKind::AVG) {
1293  codegenWindowAvgEpilogue(agg_func_args[1], window_func_null_val);
1294  }
1295  return codegenAggregateWindowState();
1296  }
1297 }
std::string get_col_type_name_by_size(const size_t size, const bool is_fp)
#define CHECK_EQ(x, y)
Definition: Logger.h:301
std::pair< std::string, llvm::Value * > codegenLoadOrderKeyBufPtr(WindowFunctionContext *window_func_context) const
std::pair< llvm::Value *, llvm::Value * > codegenFrameNullRange(WindowFunctionContext *window_func_context, llvm::Value *partition_index_lv) const
llvm::Value * codegenCurrentPartitionIndex(const WindowFunctionContext *window_func_context, llvm::Value *current_row_pos_lv)
Definition: sqltypes.h:66
bool window_function_conditional_aggregate(const SqlWindowFunctionKind kind)
Definition: WindowContext.h:59
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1317
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
static WindowFunctionContext * getActiveWindowFunctionContext(Executor *executor)
std::string get_window_agg_name(const SqlWindowFunctionKind kind, const SQLTypeInfo &window_func_ti)
std::pair< llvm::Value *, llvm::Value * > codegenWindowFrameBounds(WindowFunctionContext *window_func_context, const Analyzer::WindowFrame *frame_start_bound, const Analyzer::WindowFrame *frame_end_bound, llvm::Value *order_key_col_null_val_lv, WindowFrameBoundFuncArgs &args, CodeGenerator &code_generator)
#define AUTOMATIC_IR_METADATA(CGENSTATE)
llvm::Value * current_partition_start_offset_lv
Definition: WindowContext.h:91
Definition: sqltypes.h:70
llvm::Value * codegenConditionalAggregateCondValSelector(llvm::Value *cond_lv, SQLAgg const aggKind, CompilationOptions const &co) const
std::pair< llvm::Value *, llvm::Value * > codegenFrameBoundRange(const Analyzer::WindowFunction *window_func, CodeGenerator &code_generator, const CompilationOptions &co)
void codegenWindowAvgEpilogue(llvm::Value *crt_val, llvm::Value *window_func_null_val)
llvm::Value * codegenAggregateWindowState()
#define CHECK(condition)
Definition: Logger.h:291
int64_t inline_fixed_encoding_null_val(const SQL_TYPE_INFO &ti)
WindowPartitionBufferPtrs codegenLoadPartitionBuffers(WindowFunctionContext *window_func_context, llvm::Value *partition_index_lv) const
SQLTypeInfo get_adjusted_window_type_info(const Analyzer::WindowFunction *window_func)

+ Here is the call graph for this function:

void Executor::codegenWindowFunctionStateInit ( llvm::Value *  aggregate_state)
private

Definition at line 298 of file WindowFunctionIR.cpp.

References AUTOMATIC_IR_METADATA, COUNT, COUNT_IF, anonymous_namespace{WindowFunctionIR.cpp}::get_adjusted_window_type_info(), get_int_type(), WindowProjectNodeContext::getActiveWindowFunctionContext(), kDOUBLE, and kFLOAT.

298  {
300  const auto window_func_context =
302  const auto window_func = window_func_context->getWindowFunction();
303  const auto window_func_ti = get_adjusted_window_type_info(window_func);
304  const auto window_func_null_val =
305  window_func_ti.is_fp()
306  ? cgen_state_->inlineFpNull(window_func_ti)
307  : cgen_state_->castToTypeIn(cgen_state_->inlineIntNull(window_func_ti), 64);
308  llvm::Value* window_func_init_val;
309  const auto window_func_kind = window_func_context->getWindowFunction()->getKind();
310  if (window_func_kind == SqlWindowFunctionKind::COUNT ||
311  window_func_kind == SqlWindowFunctionKind::COUNT_IF) {
312  switch (window_func_ti.get_type()) {
313  case kFLOAT: {
314  window_func_init_val = cgen_state_->llFp(float(0));
315  break;
316  }
317  case kDOUBLE: {
318  window_func_init_val = cgen_state_->llFp(double(0));
319  break;
320  }
321  default: {
322  window_func_init_val = cgen_state_->llInt(int64_t(0));
323  break;
324  }
325  }
326  } else {
327  window_func_init_val = window_func_null_val;
328  }
329  const auto pi32_type =
330  llvm::PointerType::get(get_int_type(32, cgen_state_->context_), 0);
331  switch (window_func_ti.get_type()) {
332  case kDOUBLE: {
333  cgen_state_->emitCall("agg_id_double", {aggregate_state, window_func_init_val});
334  break;
335  }
336  case kFLOAT: {
337  aggregate_state =
338  cgen_state_->ir_builder_.CreateBitCast(aggregate_state, pi32_type);
339  cgen_state_->emitCall("agg_id_float", {aggregate_state, window_func_init_val});
340  break;
341  }
342  default: {
343  cgen_state_->emitCall("agg_id", {aggregate_state, window_func_init_val});
344  break;
345  }
346  }
347 }
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1317
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
static WindowFunctionContext * getActiveWindowFunctionContext(Executor *executor)
#define AUTOMATIC_IR_METADATA(CGENSTATE)
SQLTypeInfo get_adjusted_window_type_info(const Analyzer::WindowFunction *window_func)

+ Here is the call graph for this function:

llvm::Value * Executor::codegenWindowNavigationFunctionOnFrame ( const CompilationOptions co)
private

Definition at line 349 of file WindowFunctionIR.cpp.

References run_benchmark_import::args, AUTOMATIC_IR_METADATA, CHECK, WindowFrameBoundFuncArgs::current_partition_start_offset_lv, anonymous_namespace{WindowFunctionIR.cpp}::get_col_type_name_by_size(), get_fp_type(), get_int_type(), anonymous_namespace{WindowFunctionIR.cpp}::get_null_value_by_size(), anonymous_namespace{WindowFunctionIR.cpp}::get_null_value_by_size_with_encoding(), WindowProjectNodeContext::getActiveWindowFunctionContext(), kENCODING_DATE_IN_DAYS, kSecsPerDay, LAG_IN_FRAME, LEAD_IN_FRAME, NTH_VALUE_IN_FRAME, and UNREACHABLE.

350  {
352  const auto window_func_context =
354  const auto window_func = window_func_context->getWindowFunction();
355  const auto window_func_kind = window_func->getKind();
356  const auto& args = window_func->getArgs();
357  CHECK(args.size() >= 1 && args.size() <= 3);
358  CodeGenerator code_generator(this);
359 
360  const auto target_col_ti = args.front()->get_type_info();
361  const auto target_col_size = target_col_ti.get_size();
362  const auto target_col_type_name =
363  get_col_type_name_by_size(target_col_size, target_col_ti.is_fp());
364  const auto target_col_logical_type_name = get_col_type_name_by_size(
365  window_func->get_type_info().get_size(), window_func->get_type_info().is_fp());
366 
367  // when target_column is fixed encoded, we store the actual column value by
368  // considering it, but our resultset analyzer only considers the type without encoding
369  // scheme so we handle them separately
370  auto logical_null_val_lv =
371  get_null_value_by_size(cgen_state_.get(), window_func->get_type_info());
372  auto target_col_null_val_lv =
374  if (window_func_context->elementCount() == 0) {
375  // we do not need to generate a code for an empty input table
376  return target_col_null_val_lv;
377  }
378 
379  auto [frame_start_bound_expr_lv, frame_end_bound_expr_lv] =
380  codegenFrameBoundRange(window_func, code_generator, co);
381 
382  auto current_row_pos_lv = code_generator.posArg(nullptr);
383  auto partition_index_lv =
384  codegenCurrentPartitionIndex(window_func_context, current_row_pos_lv);
385 
386  // load window function input expression; target_column
387  size_t target_col_size_in_byte = target_col_size * 8;
388  llvm::Type* col_buf_ptr_type =
389  target_col_ti.is_fp()
390  ? get_fp_type(target_col_size_in_byte, cgen_state_->context_)
391  : get_int_type(target_col_size_in_byte, cgen_state_->context_);
392  auto col_buf_type = llvm::PointerType::get(col_buf_ptr_type, 0);
393  auto target_col_buf_ptr_lv = cgen_state_->llInt(reinterpret_cast<int64_t>(
394  window_func_context->getColumnBufferForWindowFunctionExpressions().front()));
395  auto target_col_buf_lv =
396  cgen_state_->ir_builder_.CreateIntToPtr(target_col_buf_ptr_lv, col_buf_type);
397 
398  // prepare various buffer ptrs related to the window partition
399  auto partition_buf_ptrs =
400  codegenLoadPartitionBuffers(window_func_context, partition_index_lv);
401 
402  // null value of the ordering column
403  const auto order_key_buf_ti =
404  window_func_context->getOrderKeyColumnBufferTypes().front();
405  auto const ordering_spec = window_func->getCollation().front();
406  auto order_key_col_null_val_lv =
407  get_null_value_by_size_with_encoding(cgen_state_.get(), order_key_buf_ti);
408 
409  // load ordering column
410  auto [order_col_type_name, order_key_buf_ptr_lv] =
411  codegenLoadOrderKeyBufPtr(window_func_context);
412 
413  // null range
414  auto [null_start_pos_lv, null_end_pos_lv] =
415  codegenFrameNullRange(window_func_context, partition_index_lv);
416 
417  // compute a row index of the current row w.r.t the window frame it belongs to
418  std::string row_idx_on_frame_func = "compute_";
419  row_idx_on_frame_func += order_col_type_name;
420  row_idx_on_frame_func += ordering_spec.is_desc ? "_greater_equal" : "_less_equal";
421  row_idx_on_frame_func += "_current_row_idx_in_frame";
422  auto int64_t_one_val_lv = cgen_state_->llInt((int64_t)1);
423  auto nulls_first_lv = cgen_state_->llBool(ordering_spec.nulls_first);
424  auto cur_row_idx_in_frame_lv =
425  cgen_state_->emitCall(row_idx_on_frame_func,
426  {partition_buf_ptrs.num_elem_current_partition_lv,
427  current_row_pos_lv,
428  order_key_buf_ptr_lv,
429  partition_buf_ptrs.target_partition_rowid_ptr_lv,
430  partition_buf_ptrs.target_partition_sorted_rowid_ptr_lv,
431  order_key_col_null_val_lv,
432  nulls_first_lv,
433  null_start_pos_lv,
434  null_end_pos_lv});
435 
436  // compute frame bound for the current row
437  auto const int64_t_zero_val_lv = cgen_state_->llInt((int64_t)0);
439  frame_start_bound_expr_lv,
440  frame_end_bound_expr_lv,
441  window_func->hasRangeModeFraming() ? current_row_pos_lv : cur_row_idx_in_frame_lv,
442  nullptr,
443  window_func->hasRangeModeFraming()
444  ? int64_t_zero_val_lv
445  : partition_buf_ptrs.current_partition_start_offset_lv,
446  int64_t_zero_val_lv,
447  int64_t_one_val_lv,
448  partition_buf_ptrs.num_elem_current_partition_lv,
449  order_key_buf_ptr_lv,
450  "",
451  partition_buf_ptrs.target_partition_rowid_ptr_lv,
452  partition_buf_ptrs.target_partition_sorted_rowid_ptr_lv,
453  nulls_first_lv,
454  null_start_pos_lv,
455  null_end_pos_lv};
456  auto [frame_start_bound_lv, frame_end_bound_lv] =
457  codegenWindowFrameBounds(window_func_context,
458  window_func->getFrameStartBound(),
459  window_func->getFrameEndBound(),
460  order_key_col_null_val_lv,
462  code_generator);
463 
464  // apply offset
465  llvm::Value* modified_cur_row_idx_in_frame_lv{nullptr};
466  auto const offset_lv =
467  cgen_state_->castToTypeIn(code_generator.codegen(args[1].get(), true, co)[0], 64);
468  switch (window_func_kind) {
470  modified_cur_row_idx_in_frame_lv =
471  cgen_state_->ir_builder_.CreateSub(cur_row_idx_in_frame_lv, offset_lv);
472  break;
474  modified_cur_row_idx_in_frame_lv =
475  cgen_state_->ir_builder_.CreateAdd(cur_row_idx_in_frame_lv, offset_lv);
476  break;
478  auto candidate_row_idx =
479  cgen_state_->ir_builder_.CreateAdd(frame_start_bound_lv, offset_lv);
480  auto out_of_frame_bound_lv =
481  cgen_state_->ir_builder_.CreateICmpSGT(candidate_row_idx, frame_end_bound_lv);
482  // return null if the candidate_row_idx is out of frame bounds
483  modified_cur_row_idx_in_frame_lv = cgen_state_->ir_builder_.CreateSelect(
484  out_of_frame_bound_lv, cgen_state_->llInt((int64_t)-1), candidate_row_idx);
485  break;
486  }
487  default:
488  UNREACHABLE() << "Unsupported window function to navigate a window frame.";
489  }
490  CHECK(modified_cur_row_idx_in_frame_lv);
491 
492  // get the target column value in the frame w.r.t the offset
493  std::string target_func_name = "get_";
494  target_func_name += target_col_type_name + "_value_";
495  target_func_name += target_col_logical_type_name + "_type_";
496  target_func_name += "in_frame";
497  auto res_lv =
498  cgen_state_->emitCall(target_func_name,
499  {modified_cur_row_idx_in_frame_lv,
500  frame_start_bound_lv,
501  frame_end_bound_lv,
502  target_col_buf_lv,
503  partition_buf_ptrs.target_partition_rowid_ptr_lv,
504  partition_buf_ptrs.target_partition_sorted_rowid_ptr_lv,
505  logical_null_val_lv,
506  target_col_null_val_lv});
507  if (target_col_ti.get_compression() == kENCODING_DATE_IN_DAYS) {
508  res_lv = cgen_state_->emitCall(
509  "encode_date",
510  {res_lv, logical_null_val_lv, cgen_state_->llInt((int64_t)kSecsPerDay)});
511  }
512  CHECK(res_lv);
513  return res_lv;
514 }
std::string get_col_type_name_by_size(const size_t size, const bool is_fp)
static constexpr int64_t kSecsPerDay
std::pair< std::string, llvm::Value * > codegenLoadOrderKeyBufPtr(WindowFunctionContext *window_func_context) const
std::pair< llvm::Value *, llvm::Value * > codegenFrameNullRange(WindowFunctionContext *window_func_context, llvm::Value *partition_index_lv) const
llvm::Value * codegenCurrentPartitionIndex(const WindowFunctionContext *window_func_context, llvm::Value *current_row_pos_lv)
#define UNREACHABLE()
Definition: Logger.h:337
llvm::Type * get_fp_type(const int width, llvm::LLVMContext &context)
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1317
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
static WindowFunctionContext * getActiveWindowFunctionContext(Executor *executor)
std::pair< llvm::Value *, llvm::Value * > codegenWindowFrameBounds(WindowFunctionContext *window_func_context, const Analyzer::WindowFrame *frame_start_bound, const Analyzer::WindowFrame *frame_end_bound, llvm::Value *order_key_col_null_val_lv, WindowFrameBoundFuncArgs &args, CodeGenerator &code_generator)
#define AUTOMATIC_IR_METADATA(CGENSTATE)
llvm::Value * current_partition_start_offset_lv
Definition: WindowContext.h:91
std::pair< llvm::Value *, llvm::Value * > codegenFrameBoundRange(const Analyzer::WindowFunction *window_func, CodeGenerator &code_generator, const CompilationOptions &co)
std::unique_ptr< llvm::LLVMContext > context_
Definition: Execute.h:1292
#define CHECK(condition)
Definition: Logger.h:291
llvm::Value * get_null_value_by_size(CgenState *cgen_state, SQLTypeInfo col_ti)
WindowPartitionBufferPtrs codegenLoadPartitionBuffers(WindowFunctionContext *window_func_context, llvm::Value *partition_index_lv) const
llvm::Value * get_null_value_by_size_with_encoding(CgenState *cgen_state, SQLTypeInfo col_ti)

+ Here is the call graph for this function:

llvm::BasicBlock * Executor::codegenWindowResetStateControlFlow ( )
private

Definition at line 269 of file WindowFunctionIR.cpp.

References AUTOMATIC_IR_METADATA, WindowProjectNodeContext::getActiveWindowFunctionContext(), CodeGenerator::posArg(), and CodeGenerator::toBool().

269  {
271  const auto window_func_context =
273  const auto bitset = cgen_state_->llInt(
274  reinterpret_cast<const int64_t>(window_func_context->partitionStart()));
275  const auto min_val = cgen_state_->llInt(int64_t(0));
276  const auto max_val = cgen_state_->llInt(window_func_context->elementCount() - 1);
277  const auto null_val = cgen_state_->llInt(inline_int_null_value<int64_t>());
278  const auto null_bool_val = cgen_state_->llInt<int8_t>(inline_int_null_value<int8_t>());
279  CodeGenerator code_generator(this);
280  const auto reset_state =
281  code_generator.toBool(cgen_state_->emitCall("bit_is_set",
282  {bitset,
283  code_generator.posArg(nullptr),
284  min_val,
285  max_val,
286  null_val,
287  null_bool_val}));
288  const auto reset_state_true_bb = llvm::BasicBlock::Create(
289  cgen_state_->context_, "reset_state.true", cgen_state_->current_func_);
290  const auto reset_state_false_bb = llvm::BasicBlock::Create(
291  cgen_state_->context_, "reset_state.false", cgen_state_->current_func_);
292  cgen_state_->ir_builder_.CreateCondBr(
293  reset_state, reset_state_true_bb, reset_state_false_bb);
294  cgen_state_->ir_builder_.SetInsertPoint(reset_state_true_bb);
295  return reset_state_false_bb;
296 }
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1317
static WindowFunctionContext * getActiveWindowFunctionContext(Executor *executor)
#define AUTOMATIC_IR_METADATA(CGENSTATE)

+ Here is the call graph for this function:

ResultSetPtr Executor::collectAllDeviceResults ( SharedKernelContext shared_context,
const RelAlgExecutionUnit ra_exe_unit,
const QueryMemoryDescriptor query_mem_desc,
const ExecutorDeviceType  device_type,
std::shared_ptr< RowSetMemoryOwner row_set_mem_owner 
)
private

Definition at line 2317 of file Execute.cpp.

References anonymous_namespace{Execute.cpp}::build_row_for_empty_input(), collectAllDeviceShardedTopResults(), DEBUG_TIMER, SharedKernelContext::getFragmentResults(), QueryMemoryDescriptor::getQueryDescriptionType(), GPU, NonGroupedAggregate, reduceMultiDeviceResults(), reduceSpeculativeTopN(), GroupByAndAggregate::shard_count_for_top_groups(), RelAlgExecutionUnit::target_exprs, and use_speculative_top_n().

Referenced by executeWorkUnitImpl().

2322  {
2323  auto timer = DEBUG_TIMER(__func__);
2324  auto& result_per_device = shared_context.getFragmentResults();
2325  if (result_per_device.empty() && query_mem_desc.getQueryDescriptionType() ==
2328  ra_exe_unit.target_exprs, query_mem_desc, device_type);
2329  }
2330  if (use_speculative_top_n(ra_exe_unit, query_mem_desc)) {
2331  try {
2332  return reduceSpeculativeTopN(
2333  ra_exe_unit, result_per_device, row_set_mem_owner, query_mem_desc);
2334  } catch (const std::bad_alloc&) {
2335  throw SpeculativeTopNFailed("Failed during multi-device reduction.");
2336  }
2337  }
2338  const auto shard_count =
2339  device_type == ExecutorDeviceType::GPU
2341  : 0;
2342 
2343  if (shard_count && !result_per_device.empty()) {
2344  return collectAllDeviceShardedTopResults(shared_context, ra_exe_unit);
2345  }
2346  return reduceMultiDeviceResults(
2347  ra_exe_unit, result_per_device, row_set_mem_owner, query_mem_desc);
2348 }
std::vector< Analyzer::Expr * > target_exprs
bool use_speculative_top_n(const RelAlgExecutionUnit &ra_exe_unit, const QueryMemoryDescriptor &query_mem_desc)
ResultSetPtr reduceSpeculativeTopN(const RelAlgExecutionUnit &, std::vector< std::pair< ResultSetPtr, std::vector< size_t >>> &all_fragment_results, std::shared_ptr< RowSetMemoryOwner >, const QueryMemoryDescriptor &) const
Definition: Execute.cpp:1412
ResultSetPtr reduceMultiDeviceResults(const RelAlgExecutionUnit &, std::vector< std::pair< ResultSetPtr, std::vector< size_t >>> &all_fragment_results, std::shared_ptr< RowSetMemoryOwner >, const QueryMemoryDescriptor &) const
Definition: Execute.cpp:1306
ResultSetPtr collectAllDeviceShardedTopResults(SharedKernelContext &shared_context, const RelAlgExecutionUnit &ra_exe_unit) const
Definition: Execute.cpp:2432
QueryDescriptionType getQueryDescriptionType() const
static size_t shard_count_for_top_groups(const RelAlgExecutionUnit &ra_exe_unit)
ResultSetPtr build_row_for_empty_input(const std::vector< Analyzer::Expr * > &target_exprs_in, const QueryMemoryDescriptor &query_mem_desc, const ExecutorDeviceType device_type)
Definition: Execute.cpp:2276
std::vector< std::pair< ResultSetPtr, std::vector< size_t > > > & getFragmentResults()
#define DEBUG_TIMER(name)
Definition: Logger.h:411

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

ResultSetPtr Executor::collectAllDeviceShardedTopResults ( SharedKernelContext shared_context,
const RelAlgExecutionUnit ra_exe_unit 
) const
private

Definition at line 2432 of file Execute.cpp.

References blockSize(), CHECK, CHECK_EQ, CHECK_LE, SharedKernelContext::getFragmentResults(), gridSize(), SortInfo::limit, SortInfo::offset, SortInfo::order_entries, anonymous_namespace{Execute.cpp}::permute_storage_columnar(), anonymous_namespace{Execute.cpp}::permute_storage_row_wise(), run_benchmark_import::result, and RelAlgExecutionUnit::sort_info.

Referenced by collectAllDeviceResults().

2434  {
2435  auto& result_per_device = shared_context.getFragmentResults();
2436  const auto first_result_set = result_per_device.front().first;
2437  CHECK(first_result_set);
2438  auto top_query_mem_desc = first_result_set->getQueryMemDesc();
2439  CHECK(!top_query_mem_desc.hasInterleavedBinsOnGpu());
2440  const auto top_n = ra_exe_unit.sort_info.limit + ra_exe_unit.sort_info.offset;
2441  top_query_mem_desc.setEntryCount(0);
2442  for (auto& result : result_per_device) {
2443  const auto result_set = result.first;
2444  CHECK(result_set);
2445  result_set->sort(ra_exe_unit.sort_info.order_entries, top_n, this);
2446  size_t new_entry_cnt = top_query_mem_desc.getEntryCount() + result_set->rowCount();
2447  top_query_mem_desc.setEntryCount(new_entry_cnt);
2448  }
2449  auto top_result_set = std::make_shared<ResultSet>(first_result_set->getTargetInfos(),
2450  first_result_set->getDeviceType(),
2451  top_query_mem_desc,
2452  first_result_set->getRowSetMemOwner(),
2453  blockSize(),
2454  gridSize());
2455  auto top_storage = top_result_set->allocateStorage();
2456  size_t top_output_row_idx{0};
2457  for (auto& result : result_per_device) {
2458  const auto result_set = result.first;
2459  CHECK(result_set);
2460  const auto& top_permutation = result_set->getPermutationBuffer();
2461  CHECK_LE(top_permutation.size(), top_n);
2462  if (top_query_mem_desc.didOutputColumnar()) {
2463  top_output_row_idx = permute_storage_columnar(result_set->getStorage(),
2464  result_set->getQueryMemDesc(),
2465  top_storage,
2466  top_output_row_idx,
2467  top_query_mem_desc,
2468  top_permutation);
2469  } else {
2470  top_output_row_idx = permute_storage_row_wise(result_set->getStorage(),
2471  top_storage,
2472  top_output_row_idx,
2473  top_query_mem_desc,
2474  top_permutation);
2475  }
2476  }
2477  CHECK_EQ(top_output_row_idx, top_query_mem_desc.getEntryCount());
2478  return top_result_set;
2479 }
#define CHECK_EQ(x, y)
Definition: Logger.h:301
const std::list< Analyzer::OrderEntry > order_entries
size_t permute_storage_row_wise(const ResultSetStorage *input_storage, const ResultSetStorage *output_storage, size_t output_row_index, const QueryMemoryDescriptor &output_query_mem_desc, const std::vector< uint32_t > &top_permutation)
Definition: Execute.cpp:2411
const size_t limit
#define CHECK_LE(x, y)
Definition: Logger.h:304
unsigned gridSize() const
Definition: Execute.cpp:3807
size_t permute_storage_columnar(const ResultSetStorage *input_storage, const QueryMemoryDescriptor &input_query_mem_desc, const ResultSetStorage *output_storage, size_t output_row_index, const QueryMemoryDescriptor &output_query_mem_desc, const std::vector< uint32_t > &top_permutation)
Definition: Execute.cpp:2361
std::vector< std::pair< ResultSetPtr, std::vector< size_t > > > & getFragmentResults()
#define CHECK(condition)
Definition: Logger.h:291
unsigned blockSize() const
Definition: Execute.cpp:3821
const size_t offset

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool Executor::compileBody ( const RelAlgExecutionUnit ra_exe_unit,
GroupByAndAggregate group_by_and_aggregate,
QueryMemoryDescriptor query_mem_desc,
const CompilationOptions co,
const GpuSharedMemoryContext gpu_smem_context = {} 
)
private

Definition at line 3254 of file NativeCodegen.cpp.

3258  {
3260 
3261  // Switch the code generation into a separate filter function if enabled.
3262  // Note that accesses to function arguments are still codegenned from the
3263  // row function's arguments, then later automatically forwarded and
3264  // remapped into filter function arguments by redeclareFilterFunction().
3265  cgen_state_->row_func_bb_ = cgen_state_->ir_builder_.GetInsertBlock();
3266  llvm::Value* loop_done{nullptr};
3267  std::unique_ptr<Executor::FetchCacheAnchor> fetch_cache_anchor;
3268  if (cgen_state_->filter_func_) {
3269  if (cgen_state_->row_func_bb_->getName() == "loop_body") {
3270  auto row_func_entry_bb = &cgen_state_->row_func_->getEntryBlock();
3271  cgen_state_->ir_builder_.SetInsertPoint(row_func_entry_bb,
3272  row_func_entry_bb->begin());
3273  loop_done = cgen_state_->ir_builder_.CreateAlloca(
3274  get_int_type(1, cgen_state_->context_), nullptr, "loop_done");
3275  cgen_state_->ir_builder_.SetInsertPoint(cgen_state_->row_func_bb_);
3276  cgen_state_->ir_builder_.CreateStore(cgen_state_->llBool(true), loop_done);
3277  }
3278  cgen_state_->ir_builder_.SetInsertPoint(cgen_state_->filter_func_bb_);
3279  cgen_state_->current_func_ = cgen_state_->filter_func_;
3280  fetch_cache_anchor = std::make_unique<Executor::FetchCacheAnchor>(cgen_state_.get());
3281  }
3282 
3283  // generate the code for the filter
3284  std::vector<Analyzer::Expr*> primary_quals;
3285  std::vector<Analyzer::Expr*> deferred_quals;
3286  bool short_circuited = CodeGenerator::prioritizeQuals(
3287  ra_exe_unit, primary_quals, deferred_quals, plan_state_->hoisted_filters_);
3288  if (short_circuited) {
3289  VLOG(1) << "Prioritized " << std::to_string(primary_quals.size()) << " quals, "
3290  << "short-circuited and deferred " << std::to_string(deferred_quals.size())
3291  << " quals";
3292  }
3293  llvm::Value* filter_lv = cgen_state_->llBool(true);
3294  CodeGenerator code_generator(this);
3295  for (auto expr : primary_quals) {
3296  // Generate the filter for primary quals
3297  auto cond = code_generator.toBool(code_generator.codegen(expr, true, co).front());
3298  filter_lv = cgen_state_->ir_builder_.CreateAnd(filter_lv, cond);
3299  }
3300  CHECK(filter_lv->getType()->isIntegerTy(1));
3301  llvm::BasicBlock* sc_false{nullptr};
3302  if (!deferred_quals.empty()) {
3303  auto sc_true = llvm::BasicBlock::Create(
3304  cgen_state_->context_, "sc_true", cgen_state_->current_func_);
3305  sc_false = llvm::BasicBlock::Create(
3306  cgen_state_->context_, "sc_false", cgen_state_->current_func_);
3307  cgen_state_->ir_builder_.CreateCondBr(filter_lv, sc_true, sc_false);
3308  cgen_state_->ir_builder_.SetInsertPoint(sc_false);
3309  if (ra_exe_unit.join_quals.empty()) {
3310  cgen_state_->ir_builder_.CreateRet(cgen_state_->llInt(int32_t(0)));
3311  }
3312  cgen_state_->ir_builder_.SetInsertPoint(sc_true);
3313  filter_lv = cgen_state_->llBool(true);
3314  }
3315  for (auto expr : deferred_quals) {
3316  filter_lv = cgen_state_->ir_builder_.CreateAnd(
3317  filter_lv, code_generator.toBool(code_generator.codegen(expr, true, co).front()));
3318  }
3319 
3320  CHECK(filter_lv->getType()->isIntegerTy(1));
3321  auto ret = group_by_and_aggregate.codegen(
3322  filter_lv, sc_false, query_mem_desc, co, gpu_smem_context);
3323 
3324  // Switch the code generation back to the row function if a filter
3325  // function was enabled.
3326  if (cgen_state_->filter_func_) {
3327  if (cgen_state_->row_func_bb_->getName() == "loop_body") {
3328  cgen_state_->ir_builder_.CreateStore(cgen_state_->llBool(false), loop_done);
3329  cgen_state_->ir_builder_.CreateRet(cgen_state_->llInt<int32_t>(0));
3330  }
3331 
3332  cgen_state_->ir_builder_.SetInsertPoint(cgen_state_->row_func_bb_);
3333  cgen_state_->current_func_ = cgen_state_->row_func_;
3334  cgen_state_->filter_func_call_ =
3335  cgen_state_->ir_builder_.CreateCall(cgen_state_->filter_func_, {});
3336 
3337  // Create real filter function declaration after placeholder call
3338  // is emitted.
3340 
3341  if (cgen_state_->row_func_bb_->getName() == "loop_body") {
3342  auto loop_done_true = llvm::BasicBlock::Create(
3343  cgen_state_->context_, "loop_done_true", cgen_state_->row_func_);
3344  auto loop_done_false = llvm::BasicBlock::Create(
3345  cgen_state_->context_, "loop_done_false", cgen_state_->row_func_);
3346  auto loop_done_flag = cgen_state_->ir_builder_.CreateLoad(
3347  loop_done->getType()->getPointerElementType(), loop_done);
3348  cgen_state_->ir_builder_.CreateCondBr(
3349  loop_done_flag, loop_done_true, loop_done_false);
3350  cgen_state_->ir_builder_.SetInsertPoint(loop_done_true);
3351  cgen_state_->ir_builder_.CreateRet(cgen_state_->filter_func_call_);
3352  cgen_state_->ir_builder_.SetInsertPoint(loop_done_false);
3353  } else {
3354  cgen_state_->ir_builder_.CreateRet(cgen_state_->filter_func_call_);
3355  }
3356  }
3357  return ret;
3358 }
bool codegen(llvm::Value *filter_result, llvm::BasicBlock *sc_false, QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const GpuSharedMemoryContext &gpu_smem_context)
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1317
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
std::string to_string(char const *&&v)
const JoinQualsPerNestingLevel join_quals
std::unique_ptr< PlanState > plan_state_
Definition: Execute.h:1347
#define AUTOMATIC_IR_METADATA(CGENSTATE)
static bool prioritizeQuals(const RelAlgExecutionUnit &ra_exe_unit, std::vector< Analyzer::Expr * > &primary_quals, std::vector< Analyzer::Expr * > &deferred_quals, const PlanState::HoistedFiltersSet &hoisted_quals)
Definition: LogicalIR.cpp:157
#define CHECK(condition)
Definition: Logger.h:291
void redeclareFilterFunction()
Definition: IRCodegen.cpp:1020
#define VLOG(n)
Definition: Logger.h:387
std::tuple< CompilationResult, std::unique_ptr< QueryMemoryDescriptor > > Executor::compileWorkUnit ( const std::vector< InputTableInfo > &  query_infos,
const PlanState::DeletedColumnsMap deleted_cols_map,
const RelAlgExecutionUnit ra_exe_unit,
const CompilationOptions co,
const ExecutionOptions eo,
const CudaMgr_Namespace::CudaMgr cuda_mgr,
const bool  allow_lazy_fetch,
std::shared_ptr< RowSetMemoryOwner row_set_mem_owner,
const size_t  max_groups_buffer_entry_count,
const int8_t  crt_min_byte_width,
const bool  has_cardinality_estimation,
ColumnCacheMap column_cache,
RenderInfo render_info = nullptr 
)
private

Definition at line 2708 of file NativeCodegen.cpp.

2720  {
2721  auto timer = DEBUG_TIMER(__func__);
2722 
2724  if (!cuda_mgr) {
2725  throw QueryMustRunOnCpu();
2726  }
2727  }
2728 
2729 #ifndef NDEBUG
2730  static std::uint64_t counter = 0;
2731  ++counter;
2732  VLOG(1) << "CODEGEN #" << counter << ":";
2733  LOG(IR) << "CODEGEN #" << counter << ":";
2734  LOG(PTX) << "CODEGEN #" << counter << ":";
2735  LOG(ASM) << "CODEGEN #" << counter << ":";
2736 #endif
2737 
2738  // cgenstate_manager uses RAII pattern to manage the live time of
2739  // CgenState instances.
2740  Executor::CgenStateManager cgenstate_manager(*this,
2741  allow_lazy_fetch,
2742  query_infos,
2743  deleted_cols_map,
2744  &ra_exe_unit); // locks compilation_mutex
2745 
2746  addTransientStringLiterals(ra_exe_unit, row_set_mem_owner);
2747 
2748  GroupByAndAggregate group_by_and_aggregate(
2749  this,
2750  co.device_type,
2751  ra_exe_unit,
2752  query_infos,
2753  row_set_mem_owner,
2754  has_cardinality_estimation ? std::optional<int64_t>(max_groups_buffer_entry_guess)
2755  : std::nullopt);
2756  auto query_mem_desc =
2757  group_by_and_aggregate.initQueryMemoryDescriptor(eo.allow_multifrag,
2758  max_groups_buffer_entry_guess,
2759  crt_min_byte_width,
2760  render_info,
2762 
2763  if (query_mem_desc->getQueryDescriptionType() ==
2765  !has_cardinality_estimation && (!render_info || !render_info->isInSitu()) &&
2766  !eo.just_explain) {
2767  const auto col_range_info = group_by_and_aggregate.getColRangeInfo();
2768  throw CardinalityEstimationRequired(col_range_info.max - col_range_info.min);
2769  }
2770 
2771  const bool output_columnar = query_mem_desc->didOutputColumnar();
2772  const bool gpu_shared_mem_optimization =
2774  ra_exe_unit,
2775  cuda_mgr,
2776  co.device_type,
2777  cuda_mgr ? this->blockSize() : 1,
2778  cuda_mgr ? this->numBlocksPerMP() : 1);
2779  if (gpu_shared_mem_optimization) {
2780  // disable interleaved bins optimization on the GPU
2781  query_mem_desc->setHasInterleavedBinsOnGpu(false);
2782  LOG(DEBUG1) << "GPU shared memory is used for the " +
2783  query_mem_desc->queryDescTypeToString() + " query(" +
2784  std::to_string(get_shared_memory_size(gpu_shared_mem_optimization,
2785  query_mem_desc.get())) +
2786  " out of " + std::to_string(g_gpu_smem_threshold) + " bytes).";
2787  }
2788 
2789  const GpuSharedMemoryContext gpu_smem_context(
2790  get_shared_memory_size(gpu_shared_mem_optimization, query_mem_desc.get()));
2791 
2793  const size_t num_count_distinct_descs =
2794  query_mem_desc->getCountDistinctDescriptorsSize();
2795  for (size_t i = 0; i < num_count_distinct_descs; i++) {
2796  const auto& count_distinct_descriptor =
2797  query_mem_desc->getCountDistinctDescriptor(i);
2798  if (count_distinct_descriptor.impl_type_ == CountDistinctImplType::UnorderedSet ||
2799  (count_distinct_descriptor.impl_type_ != CountDistinctImplType::Invalid &&
2800  !co.hoist_literals)) {
2801  throw QueryMustRunOnCpu();
2802  }
2803  }
2804 
2805  // we currently do not support varlen projection based on baseline groupby when
2806  // 1) target table is multi-fragmented and 2) multiple gpus are involved for query
2807  // processing in this case, we punt the query to cpu to avoid server crash
2808  for (const auto expr : ra_exe_unit.target_exprs) {
2809  if (auto gby_expr = dynamic_cast<Analyzer::AggExpr*>(expr)) {
2810  bool has_multiple_gpus = cuda_mgr ? cuda_mgr->getDeviceCount() > 1 : false;
2811  if (gby_expr->get_aggtype() == SQLAgg::kSAMPLE && has_multiple_gpus &&
2812  !g_leaf_count) {
2813  std::set<const Analyzer::ColumnVar*,
2814  bool (*)(const Analyzer::ColumnVar*, const Analyzer::ColumnVar*)>
2815  colvar_set(