OmniSciDB  a987f07e93
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
Executor Class Reference

#include <Execute.h>

+ Collaboration diagram for Executor:

Classes

class  CgenStateManager
 
struct  ExecutorMutexHolder
 
class  FetchCacheAnchor
 
struct  GroupColLLVMValue
 
struct  JoinHashTableOrError
 

Public Types

enum  ExtModuleKinds {
  ExtModuleKinds::template_module, ExtModuleKinds::udf_cpu_module, ExtModuleKinds::udf_gpu_module, ExtModuleKinds::rt_udf_cpu_module,
  ExtModuleKinds::rt_udf_gpu_module, ExtModuleKinds::rt_geos_module, ExtModuleKinds::rt_libdevice_module
}
 
using ExecutorId = size_t
 
using CachedCardinality = std::pair< bool, size_t >
 

Public Member Functions

 Executor (const ExecutorId id, Data_Namespace::DataMgr *data_mgr, const size_t block_size_x, const size_t grid_size_x, const size_t max_gpu_slab_size, const std::string &debug_dir, const std::string &debug_file)
 
void clearCaches (bool runtime_only=false)
 
std::string dumpCache () const
 
void reset (bool discard_runtime_modules_only=false)
 
const std::unique_ptr
< llvm::Module > & 
get_rt_module () const
 
const std::unique_ptr
< llvm::Module > & 
get_udf_module (bool is_gpu=false) const
 
const std::unique_ptr
< llvm::Module > & 
get_rt_udf_module (bool is_gpu=false) const
 
const std::unique_ptr
< llvm::Module > & 
get_geos_module () const
 
const std::unique_ptr
< llvm::Module > & 
get_libdevice_module () const
 
bool has_rt_module () const
 
bool has_udf_module (bool is_gpu=false) const
 
bool has_rt_udf_module (bool is_gpu=false) const
 
bool has_geos_module () const
 
bool has_libdevice_module () const
 
const TemporaryTablesgetTemporaryTables ()
 
StringDictionaryProxygetStringDictionaryProxy (const int dict_id, const bool with_generation) const
 
StringDictionaryProxygetStringDictionaryProxy (const int dictId, const std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner, const bool with_generation) const
 
const
StringDictionaryProxy::IdMap
getStringProxyTranslationMap (const int source_dict_id, const int dest_dict_id, const RowSetMemoryOwner::StringTranslationType translation_type, const std::vector< StringOps_Namespace::StringOpInfo > &string_op_infos, std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner, const bool with_generation) const
 
const
StringDictionaryProxy::IdMap
getJoinIntersectionStringProxyTranslationMap (const StringDictionaryProxy *source_proxy, StringDictionaryProxy *dest_proxy, const std::vector< StringOps_Namespace::StringOpInfo > &source_string_op_infos, const std::vector< StringOps_Namespace::StringOpInfo > &dest_source_string_op_infos, std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner) const
 
const
StringDictionaryProxy::TranslationMap
< Datum > * 
getStringProxyNumericTranslationMap (const int source_dict_id, const std::vector< StringOps_Namespace::StringOpInfo > &string_op_infos, std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner, const bool with_generation) const
 
bool isCPUOnly () const
 
bool isArchMaxwell (const ExecutorDeviceType dt) const
 
bool containsLeftDeepOuterJoin () const
 
const ColumnDescriptorgetColumnDescriptor (const Analyzer::ColumnVar *) const
 
const ColumnDescriptorgetPhysicalColumnDescriptor (const Analyzer::ColumnVar *, int) const
 
const Catalog_Namespace::CataloggetCatalog () const
 
void setCatalog (const Catalog_Namespace::Catalog *catalog)
 
Data_Namespace::DataMgrgetDataMgr () const
 
const std::shared_ptr
< RowSetMemoryOwner
getRowSetMemoryOwner () const
 
const TemporaryTablesgetTemporaryTables () const
 
Fragmenter_Namespace::TableInfo getTableInfo (const int table_id) const
 
const TableGenerationgetTableGeneration (const int table_id) const
 
ExpressionRange getColRange (const PhysicalInput &) const
 
size_t getNumBytesForFetchedRow (const std::set< int > &table_ids_to_fetch) const
 
bool hasLazyFetchColumns (const std::vector< Analyzer::Expr * > &target_exprs) const
 
std::vector< ColumnLazyFetchInfogetColLazyFetchInfo (const std::vector< Analyzer::Expr * > &target_exprs) const
 
void interrupt (const QuerySessionId &query_session="", const QuerySessionId &interrupt_session="")
 
void resetInterrupt ()
 
void enableRuntimeQueryInterrupt (const double runtime_query_check_freq, const unsigned pending_query_check_freq) const
 
int8_t warpSize () const
 
unsigned gridSize () const
 
void setGridSize (unsigned grid_size)
 
void resetGridSize ()
 
unsigned numBlocksPerMP () const
 
unsigned blockSize () const
 
void setBlockSize (unsigned block_size)
 
void resetBlockSize ()
 
size_t maxGpuSlabSize () const
 
ResultSetPtr executeWorkUnit (size_t &max_groups_buffer_entry_guess, const bool is_agg, const std::vector< InputTableInfo > &, const RelAlgExecutionUnit &, const CompilationOptions &, const ExecutionOptions &options, const Catalog_Namespace::Catalog &, RenderInfo *render_info, const bool has_cardinality_estimation, ColumnCacheMap &column_cache)
 
TableUpdateMetadata executeUpdate (const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &table_infos, const TableDescriptor *updated_table_desc, const CompilationOptions &co, const ExecutionOptions &eo, Catalog_Namespace::Catalog &cat, std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner, const UpdateLogForFragment::Callback &cb, const bool is_agg)
 
void addTransientStringLiterals (const RelAlgExecutionUnit &ra_exe_unit, const std::shared_ptr< RowSetMemoryOwner > &row_set_mem_owner)
 
int deviceCount (const ExecutorDeviceType) const
 
void setupCaching (const std::unordered_set< PhysicalInput > &phys_inputs, const std::unordered_set< int > &phys_table_ids)
 
void setColRangeCache (const AggregatedColRange &aggregated_col_range)
 
ExecutorId getExecutorId () const
 
QuerySessionIdgetCurrentQuerySession (heavyai::shared_lock< heavyai::shared_mutex > &read_lock)
 
QuerySessionStatus::QueryStatus getQuerySessionStatus (const QuerySessionId &candidate_query_session, heavyai::shared_lock< heavyai::shared_mutex > &read_lock)
 
bool checkCurrentQuerySession (const std::string &candidate_query_session, heavyai::shared_lock< heavyai::shared_mutex > &read_lock)
 
void invalidateRunningQuerySession (heavyai::unique_lock< heavyai::shared_mutex > &write_lock)
 
bool addToQuerySessionList (const QuerySessionId &query_session, const std::string &query_str, const std::string &submitted, const size_t executor_id, const QuerySessionStatus::QueryStatus query_status, heavyai::unique_lock< heavyai::shared_mutex > &write_lock)
 
bool removeFromQuerySessionList (const QuerySessionId &query_session, const std::string &submitted_time_str, heavyai::unique_lock< heavyai::shared_mutex > &write_lock)
 
void setQuerySessionAsInterrupted (const QuerySessionId &query_session, heavyai::unique_lock< heavyai::shared_mutex > &write_lock)
 
bool checkIsQuerySessionInterrupted (const std::string &query_session, heavyai::shared_lock< heavyai::shared_mutex > &read_lock)
 
bool checkIsQuerySessionEnrolled (const QuerySessionId &query_session, heavyai::shared_lock< heavyai::shared_mutex > &read_lock)
 
bool updateQuerySessionStatusWithLock (const QuerySessionId &query_session, const std::string &submitted_time_str, const QuerySessionStatus::QueryStatus updated_query_status, heavyai::unique_lock< heavyai::shared_mutex > &write_lock)
 
bool updateQuerySessionExecutorAssignment (const QuerySessionId &query_session, const std::string &submitted_time_str, const size_t executor_id, heavyai::unique_lock< heavyai::shared_mutex > &write_lock)
 
std::vector< QuerySessionStatusgetQuerySessionInfo (const QuerySessionId &query_session, heavyai::shared_lock< heavyai::shared_mutex > &read_lock)
 
heavyai::shared_mutexgetSessionLock ()
 
CurrentQueryStatus attachExecutorToQuerySession (const QuerySessionId &query_session_id, const std::string &query_str, const std::string &query_submitted_time)
 
void checkPendingQueryStatus (const QuerySessionId &query_session)
 
void clearQuerySessionStatus (const QuerySessionId &query_session, const std::string &submitted_time_str)
 
void updateQuerySessionStatus (const QuerySessionId &query_session, const std::string &submitted_time_str, const QuerySessionStatus::QueryStatus new_query_status)
 
void enrollQuerySession (const QuerySessionId &query_session, const std::string &query_str, const std::string &submitted_time_str, const size_t executor_id, const QuerySessionStatus::QueryStatus query_session_status)
 
size_t getNumCurentSessionsEnrolled () const
 
const std::vector< size_t > getExecutorIdsRunningQuery (const QuerySessionId &interrupt_session) const
 
bool checkNonKernelTimeInterrupted () const
 
void registerExtractedQueryPlanDag (const QueryPlanDAG &query_plan_dag)
 
const QueryPlanDAG getLatestQueryPlanDagExtracted () const
 
void addToCardinalityCache (const std::string &cache_key, const size_t cache_value)
 
CachedCardinality getCachedCardinality (const std::string &cache_key)
 
heavyai::shared_mutexgetDataRecyclerLock ()
 
QueryPlanDagCachegetQueryPlanDagCache ()
 
ResultSetRecyclerHoldergetRecultSetRecyclerHolder ()
 
CgenStategetCgenStatePtr () const
 
PlanStategetPlanStatePtr () const
 
llvm::LLVMContext & getContext ()
 
void update_extension_modules (bool update_runtime_modules_only=false)
 

Static Public Member Functions

static void clearExternalCaches (bool for_update, const TableDescriptor *td, const int current_db_id)
 
template<typename F >
static void registerExtensionFunctions (F register_extension_functions)
 
static std::shared_ptr< ExecutorgetExecutor (const ExecutorId id, const std::string &debug_dir="", const std::string &debug_file="", const SystemParameters &system_parameters=SystemParameters())
 
static void nukeCacheOfExecutors ()
 
static void clearMemory (const Data_Namespace::MemoryLevel memory_level)
 
static size_t getArenaBlockSize ()
 
static void addUdfIrToModule (const std::string &udf_ir_filename, const bool is_cuda_ir)
 
static void initialize_extension_module_sources ()
 
static void registerActiveModule (void *module, const int device_id)
 
static void unregisterActiveModule (const int device_id)
 
static std::pair< int64_t,
int32_t > 
reduceResults (const SQLAgg agg, const SQLTypeInfo &ti, const int64_t agg_init_val, const int8_t out_byte_width, const int64_t *out_vec, const size_t out_vec_sz, const bool is_group_by, const bool float_argument_input)
 
static void update_after_registration (bool update_runtime_modules_only=false)
 
static size_t getBaselineThreshold (bool for_count_distinct, ExecutorDeviceType device_type)
 

Public Attributes

std::mutex compilation_mutex_
 

Static Public Attributes

static const ExecutorId UNITARY_EXECUTOR_ID = 0
 
static const ExecutorId INVALID_EXECUTOR_ID = SIZE_MAX
 
static std::map
< ExtModuleKinds, std::string > 
extension_module_sources
 
static const size_t high_scan_limit
 
static const int32_t ERR_DIV_BY_ZERO {1}
 
static const int32_t ERR_OUT_OF_GPU_MEM {2}
 
static const int32_t ERR_OUT_OF_SLOTS {3}
 
static const int32_t ERR_UNSUPPORTED_SELF_JOIN {4}
 
static const int32_t ERR_OUT_OF_RENDER_MEM {5}
 
static const int32_t ERR_OUT_OF_CPU_MEM {6}
 
static const int32_t ERR_OVERFLOW_OR_UNDERFLOW {7}
 
static const int32_t ERR_OUT_OF_TIME {9}
 
static const int32_t ERR_INTERRUPTED {10}
 
static const int32_t ERR_COLUMNAR_CONVERSION_NOT_SUPPORTED {11}
 
static const int32_t ERR_TOO_MANY_LITERALS {12}
 
static const int32_t ERR_STRING_CONST_IN_RESULTSET {13}
 
static const int32_t ERR_STREAMING_TOP_N_NOT_SUPPORTED_IN_RENDER_QUERY {14}
 
static const int32_t ERR_SINGLE_VALUE_FOUND_MULTIPLE_VALUES {15}
 
static const int32_t ERR_GEOS {16}
 
static const int32_t ERR_WIDTH_BUCKET_INVALID_ARGUMENT {17}
 
static std::mutex register_runtime_extension_functions_mutex_
 
static std::mutex kernel_mutex_
 

Private Types

using PerFragmentCallBack = std::function< void(ResultSetPtr, const Fragmenter_Namespace::FragmentInfo &)>
 

Private Member Functions

void clearMetaInfoCache ()
 
int deviceCountForMemoryLevel (const Data_Namespace::MemoryLevel memory_level) const
 
llvm::Value * codegenWindowFunction (const size_t target_index, const CompilationOptions &co)
 
llvm::Value * codegenConditionalAggregateCondValSelector (llvm::Value *cond_lv, SQLAgg const aggKind, CompilationOptions const &co) const
 
llvm::Value * codegenWindowFunctionAggregate (const CompilationOptions &co)
 
llvm::BasicBlock * codegenWindowResetStateControlFlow ()
 
void codegenWindowFunctionStateInit (llvm::Value *aggregate_state)
 
llvm::Value * codegenWindowFunctionAggregateCalls (llvm::Value *aggregate_state, const CompilationOptions &co)
 
llvm::Value * codegenWindowNavigationFunctionOnFrame (const CompilationOptions &co)
 
llvm::Value * codegenCurrentPartitionIndex (const WindowFunctionContext *window_func_context, llvm::Value *current_row_pos_lv)
 
llvm::Value * codegenFrameBoundExpr (const Analyzer::WindowFunction *window_func, const Analyzer::WindowFrame *frame_bound, CodeGenerator &code_generator, const CompilationOptions &co)
 
llvm::Value * codegenFrameBound (bool for_start_bound, bool for_range_mode, bool for_window_frame_naviation, const Analyzer::WindowFrame *frame_bound, bool is_timestamp_type_frame, llvm::Value *order_key_null_val, const WindowFrameBoundFuncArgs &args)
 
std::pair< std::string,
llvm::Value * > 
codegenLoadOrderKeyBufPtr (WindowFunctionContext *window_func_context) const
 
std::pair< llvm::Value
*, llvm::Value * > 
codegenFrameNullRange (WindowFunctionContext *window_func_context, llvm::Value *partition_index_lv) const
 
WindowPartitionBufferPtrs codegenLoadPartitionBuffers (WindowFunctionContext *window_func_context, llvm::Value *partition_index_lv) const
 
std::pair< llvm::Value
*, llvm::Value * > 
codegenWindowFrameBounds (WindowFunctionContext *window_func_context, const Analyzer::WindowFrame *frame_start_bound, const Analyzer::WindowFrame *frame_end_bound, llvm::Value *order_key_col_null_val_lv, WindowFrameBoundFuncArgs &args, CodeGenerator &code_generator)
 
std::pair< llvm::Value
*, llvm::Value * > 
codegenFrameBoundRange (const Analyzer::WindowFunction *window_func, CodeGenerator &code_generator, const CompilationOptions &co)
 
std::vector< llvm::Value * > prepareRowModeFuncArgs (bool for_start_bound, SqlWindowFrameBoundType bound_type, const WindowFrameBoundFuncArgs &args) const
 
std::vector< llvm::Value * > prepareRangeModeFuncArgs (bool for_start_bound, const Analyzer::WindowFrame *frame_bound, bool is_timestamp_type_frame, llvm::Value *order_key_null_val, const WindowFrameBoundFuncArgs &frame_args) const
 
const std::string getOrderKeyTypeName (WindowFunctionContext *window_func_context) const
 
llvm::Value * codegenLoadCurrentValueFromColBuf (WindowFunctionContext *window_func_context, CodeGenerator &code_generator, WindowFrameBoundFuncArgs &args) const
 
size_t getOrderKeySize (WindowFunctionContext *window_func_context) const
 
const SQLTypeInfo getFirstOrderColTypeInfo (WindowFunctionContext *window_func_context) const
 
std::string getFramingFuncName (const std::string &bound_type, const std::string &order_col_type, const std::string &op_type, bool for_timestamp_type) const
 
void codegenWindowAvgEpilogue (llvm::Value *crt_val, llvm::Value *window_func_null_val)
 
llvm::Value * codegenAggregateWindowState ()
 
llvm::Value * aggregateWindowStatePtr ()
 
CudaMgr_Namespace::CudaMgrcudaMgr () const
 
bool isArchPascalOrLater (const ExecutorDeviceType dt) const
 
bool needFetchAllFragments (const InputColDescriptor &col_desc, const RelAlgExecutionUnit &ra_exe_unit, const FragmentsList &selected_fragments) const
 
bool needLinearizeAllFragments (const ColumnDescriptor *cd, const InputColDescriptor &inner_col_desc, const RelAlgExecutionUnit &ra_exe_unit, const FragmentsList &selected_fragments, const Data_Namespace::MemoryLevel memory_level) const
 
void executeWorkUnitPerFragment (const RelAlgExecutionUnit &ra_exe_unit, const InputTableInfo &table_info, const CompilationOptions &co, const ExecutionOptions &eo, const Catalog_Namespace::Catalog &cat, PerFragmentCallBack &cb, const std::set< size_t > &fragment_indexes_param)
 Compiles and dispatches a work unit per fragment processing results with the per fragment callback. Currently used for computing metrics over fragments (metadata). More...
 
ResultSetPtr executeExplain (const QueryCompilationDescriptor &)
 
ResultSetPtr executeTableFunction (const TableFunctionExecutionUnit exe_unit, const std::vector< InputTableInfo > &table_infos, const CompilationOptions &co, const ExecutionOptions &eo, const Catalog_Namespace::Catalog &cat)
 Compiles and dispatches a table function; that is, a function that takes as input one or more columns and returns a ResultSet, which can be parsed by subsequent execution steps. More...
 
ExecutorDeviceType getDeviceTypeForTargets (const RelAlgExecutionUnit &ra_exe_unit, const ExecutorDeviceType requested_device_type)
 
ResultSetPtr collectAllDeviceResults (SharedKernelContext &shared_context, const RelAlgExecutionUnit &ra_exe_unit, const QueryMemoryDescriptor &query_mem_desc, const ExecutorDeviceType device_type, std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner)
 
ResultSetPtr collectAllDeviceShardedTopResults (SharedKernelContext &shared_context, const RelAlgExecutionUnit &ra_exe_unit) const
 
std::unordered_map< int, const
Analyzer::BinOper * > 
getInnerTabIdToJoinCond () const
 
std::vector< std::unique_ptr
< ExecutionKernel > > 
createKernels (SharedKernelContext &shared_context, const RelAlgExecutionUnit &ra_exe_unit, ColumnFetcher &column_fetcher, const std::vector< InputTableInfo > &table_infos, const ExecutionOptions &eo, const bool is_agg, const bool allow_single_frag_table_opt, const size_t context_count, const QueryCompilationDescriptor &query_comp_desc, const QueryMemoryDescriptor &query_mem_desc, RenderInfo *render_info, std::unordered_set< int > &available_gpus, int &available_cpus)
 
void launchKernels (SharedKernelContext &shared_context, std::vector< std::unique_ptr< ExecutionKernel >> &&kernels, const ExecutorDeviceType device_type)
 
std::vector< size_t > getTableFragmentIndices (const RelAlgExecutionUnit &ra_exe_unit, const ExecutorDeviceType device_type, const size_t table_idx, const size_t outer_frag_idx, std::map< int, const TableFragments * > &selected_tables_fragments, const std::unordered_map< int, const Analyzer::BinOper * > &inner_table_id_to_join_condition)
 
bool skipFragmentPair (const Fragmenter_Namespace::FragmentInfo &outer_fragment_info, const Fragmenter_Namespace::FragmentInfo &inner_fragment_info, const int inner_table_id, const std::unordered_map< int, const Analyzer::BinOper * > &inner_table_id_to_join_condition, const RelAlgExecutionUnit &ra_exe_unit, const ExecutorDeviceType device_type)
 
FetchResult fetchChunks (const ColumnFetcher &, const RelAlgExecutionUnit &ra_exe_unit, const int device_id, const Data_Namespace::MemoryLevel, const std::map< int, const TableFragments * > &, const FragmentsList &selected_fragments, const Catalog_Namespace::Catalog &, std::list< ChunkIter > &, std::list< std::shared_ptr< Chunk_NS::Chunk >> &, DeviceAllocator *device_allocator, const size_t thread_idx, const bool allow_runtime_interrupt)
 
FetchResult fetchUnionChunks (const ColumnFetcher &, const RelAlgExecutionUnit &ra_exe_unit, const int device_id, const Data_Namespace::MemoryLevel, const std::map< int, const TableFragments * > &, const FragmentsList &selected_fragments, const Catalog_Namespace::Catalog &, std::list< ChunkIter > &, std::list< std::shared_ptr< Chunk_NS::Chunk >> &, DeviceAllocator *device_allocator, const size_t thread_idx, const bool allow_runtime_interrupt)
 
std::pair< std::vector
< std::vector< int64_t >
>, std::vector< std::vector
< uint64_t > > > 
getRowCountAndOffsetForAllFrags (const RelAlgExecutionUnit &ra_exe_unit, const CartesianProduct< std::vector< std::vector< size_t >>> &frag_ids_crossjoin, const std::vector< InputDescriptor > &input_descs, const std::map< int, const TableFragments * > &all_tables_fragments)
 
void buildSelectedFragsMapping (std::vector< std::vector< size_t >> &selected_fragments_crossjoin, std::vector< size_t > &local_col_to_frag_pos, const std::list< std::shared_ptr< const InputColDescriptor >> &col_global_ids, const FragmentsList &selected_fragments, const RelAlgExecutionUnit &ra_exe_unit)
 
void buildSelectedFragsMappingForUnion (std::vector< std::vector< size_t >> &selected_fragments_crossjoin, const FragmentsList &selected_fragments, const RelAlgExecutionUnit &ra_exe_unit)
 
std::vector< size_t > getFragmentCount (const FragmentsList &selected_fragments, const size_t scan_idx, const RelAlgExecutionUnit &ra_exe_unit)
 
int32_t executePlanWithGroupBy (const RelAlgExecutionUnit &ra_exe_unit, const CompilationResult &, const bool hoist_literals, ResultSetPtr *results, const ExecutorDeviceType device_type, std::vector< std::vector< const int8_t * >> &col_buffers, const std::vector< size_t > outer_tab_frag_ids, QueryExecutionContext *, const std::vector< std::vector< int64_t >> &num_rows, const std::vector< std::vector< uint64_t >> &frag_offsets, Data_Namespace::DataMgr *, const int device_id, const int outer_table_id, const int64_t limit, const uint32_t start_rowid, const uint32_t num_tables, const bool allow_runtime_interrupt, RenderInfo *render_info, const bool optimize_cuda_block_and_grid_sizes, const int64_t rows_to_process=-1)
 
int32_t executePlanWithoutGroupBy (const RelAlgExecutionUnit &ra_exe_unit, const CompilationResult &, const bool hoist_literals, ResultSetPtr *results, const std::vector< Analyzer::Expr * > &target_exprs, const ExecutorDeviceType device_type, std::vector< std::vector< const int8_t * >> &col_buffers, QueryExecutionContext *query_exe_context, const std::vector< std::vector< int64_t >> &num_rows, const std::vector< std::vector< uint64_t >> &frag_offsets, Data_Namespace::DataMgr *data_mgr, const int device_id, const uint32_t start_rowid, const uint32_t num_tables, const bool allow_runtime_interrupt, RenderInfo *render_info, const bool optimize_cuda_block_and_grid_sizes, const int64_t rows_to_process=-1)
 
ResultSetPtr resultsUnion (SharedKernelContext &shared_context, const RelAlgExecutionUnit &ra_exe_unit)
 
std::vector< int8_t * > getJoinHashTablePtrs (const ExecutorDeviceType device_type, const int device_id)
 
ResultSetPtr reduceMultiDeviceResults (const RelAlgExecutionUnit &, std::vector< std::pair< ResultSetPtr, std::vector< size_t >>> &all_fragment_results, std::shared_ptr< RowSetMemoryOwner >, const QueryMemoryDescriptor &) const
 
ResultSetPtr reduceMultiDeviceResultSets (std::vector< std::pair< ResultSetPtr, std::vector< size_t >>> &all_fragment_results, std::shared_ptr< RowSetMemoryOwner >, const QueryMemoryDescriptor &) const
 
ResultSetPtr reduceSpeculativeTopN (const RelAlgExecutionUnit &, std::vector< std::pair< ResultSetPtr, std::vector< size_t >>> &all_fragment_results, std::shared_ptr< RowSetMemoryOwner >, const QueryMemoryDescriptor &) const
 
ResultSetPtr executeWorkUnitImpl (size_t &max_groups_buffer_entry_guess, const bool is_agg, const bool allow_single_frag_table_opt, const std::vector< InputTableInfo > &, const RelAlgExecutionUnit &, const CompilationOptions &, const ExecutionOptions &options, const Catalog_Namespace::Catalog &, std::shared_ptr< RowSetMemoryOwner >, RenderInfo *render_info, const bool has_cardinality_estimation, ColumnCacheMap &column_cache)
 
std::vector< llvm::Value * > inlineHoistedLiterals ()
 
void AutoTrackBuffersInRuntimeIR ()
 
std::tuple< CompilationResult,
std::unique_ptr
< QueryMemoryDescriptor > > 
compileWorkUnit (const std::vector< InputTableInfo > &query_infos, const PlanState::DeletedColumnsMap &deleted_cols_map, const RelAlgExecutionUnit &ra_exe_unit, const CompilationOptions &co, const ExecutionOptions &eo, const CudaMgr_Namespace::CudaMgr *cuda_mgr, const bool allow_lazy_fetch, std::shared_ptr< RowSetMemoryOwner >, const size_t max_groups_buffer_entry_count, const int8_t crt_min_byte_width, const bool has_cardinality_estimation, ColumnCacheMap &column_cache, RenderInfo *render_info=nullptr)
 
llvm::BasicBlock * codegenSkipDeletedOuterTableRow (const RelAlgExecutionUnit &ra_exe_unit, const CompilationOptions &co)
 
std::vector< JoinLoopbuildJoinLoops (RelAlgExecutionUnit &ra_exe_unit, const CompilationOptions &co, const ExecutionOptions &eo, const std::vector< InputTableInfo > &query_infos, ColumnCacheMap &column_cache)
 
JoinLoop::HoistedFiltersCallback buildHoistLeftHandSideFiltersCb (const RelAlgExecutionUnit &ra_exe_unit, const size_t level_idx, const int inner_table_id, const CompilationOptions &co)
 
std::function< llvm::Value
*(const std::vector
< llvm::Value * >
&, llvm::Value *)> 
buildIsDeletedCb (const RelAlgExecutionUnit &ra_exe_unit, const size_t level_idx, const CompilationOptions &co)
 
std::shared_ptr< HashJoinbuildCurrentLevelHashTable (const JoinCondition &current_level_join_conditions, size_t level_idx, RelAlgExecutionUnit &ra_exe_unit, const CompilationOptions &co, const std::vector< InputTableInfo > &query_infos, ColumnCacheMap &column_cache, std::vector< std::string > &fail_reasons)
 
void redeclareFilterFunction ()
 
llvm::Value * addJoinLoopIterator (const std::vector< llvm::Value * > &prev_iters, const size_t level_idx)
 
void codegenJoinLoops (const std::vector< JoinLoop > &join_loops, const RelAlgExecutionUnit &ra_exe_unit, GroupByAndAggregate &group_by_and_aggregate, llvm::Function *query_func, llvm::BasicBlock *entry_bb, QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const ExecutionOptions &eo)
 
bool compileBody (const RelAlgExecutionUnit &ra_exe_unit, GroupByAndAggregate &group_by_and_aggregate, QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const GpuSharedMemoryContext &gpu_smem_context={})
 
void createErrorCheckControlFlow (llvm::Function *query_func, bool run_with_dynamic_watchdog, bool run_with_allowing_runtime_interrupt, const std::vector< JoinLoop > &join_loops, ExecutorDeviceType device_type, const std::vector< InputTableInfo > &input_table_infos)
 
void insertErrorCodeChecker (llvm::Function *query_func, bool hoist_literals, bool allow_runtime_query_interrupt)
 
void preloadFragOffsets (const std::vector< InputDescriptor > &input_descs, const std::vector< InputTableInfo > &query_infos)
 
JoinHashTableOrError buildHashTableForQualifier (const std::shared_ptr< Analyzer::BinOper > &qual_bin_oper, const std::vector< InputTableInfo > &query_infos, const MemoryLevel memory_level, const JoinType join_type, const HashType preferred_hash_type, ColumnCacheMap &column_cache, const HashTableBuildDagMap &hashtable_build_dag_map, const RegisteredQueryHint &query_hint, const TableIdToNodeMap &table_id_to_node_map)
 
void nukeOldState (const bool allow_lazy_fetch, const std::vector< InputTableInfo > &query_infos, const PlanState::DeletedColumnsMap &deleted_cols_map, const RelAlgExecutionUnit *ra_exe_unit)
 
std::shared_ptr
< CompilationContext
optimizeAndCodegenCPU (llvm::Function *, llvm::Function *, const std::unordered_set< llvm::Function * > &, const CompilationOptions &)
 
std::shared_ptr
< CompilationContext
optimizeAndCodegenGPU (llvm::Function *, llvm::Function *, std::unordered_set< llvm::Function * > &, const bool no_inline, const CudaMgr_Namespace::CudaMgr *cuda_mgr, const bool is_gpu_smem_used, const CompilationOptions &)
 
std::string generatePTX (const std::string &) const
 
void initializeNVPTXBackend () const
 
int64_t deviceCycles (int milliseconds) const
 
GroupColLLVMValue groupByColumnCodegen (Analyzer::Expr *group_by_col, const size_t col_width, const CompilationOptions &, const bool translate_null_val, const int64_t translated_null_val, DiamondCodegen &, std::stack< llvm::BasicBlock * > &, const bool thread_mem_shared)
 
llvm::Value * castToFP (llvm::Value *, SQLTypeInfo const &from_ti, SQLTypeInfo const &to_ti)
 
llvm::Value * castToIntPtrTyIn (llvm::Value *val, const size_t bit_width)
 
std::tuple
< RelAlgExecutionUnit,
PlanState::DeletedColumnsMap
addDeletedColumn (const RelAlgExecutionUnit &ra_exe_unit, const CompilationOptions &co)
 
bool isFragmentFullyDeleted (const int table_id, const Fragmenter_Namespace::FragmentInfo &fragment)
 
FragmentSkipStatus canSkipFragmentForFpQual (const Analyzer::BinOper *comp_expr, const Analyzer::ColumnVar *lhs_col, const Fragmenter_Namespace::FragmentInfo &fragment, const Analyzer::Constant *rhs_const) const
 
std::pair< bool, int64_t > skipFragment (const InputDescriptor &table_desc, const Fragmenter_Namespace::FragmentInfo &frag_info, const std::list< std::shared_ptr< Analyzer::Expr >> &simple_quals, const std::vector< uint64_t > &frag_offsets, const size_t frag_idx)
 
std::pair< bool, int64_t > skipFragmentInnerJoins (const InputDescriptor &table_desc, const RelAlgExecutionUnit &ra_exe_unit, const Fragmenter_Namespace::FragmentInfo &fragment, const std::vector< uint64_t > &frag_offsets, const size_t frag_idx)
 
AggregatedColRange computeColRangesCache (const std::unordered_set< PhysicalInput > &phys_inputs)
 
StringDictionaryGenerations computeStringDictionaryGenerations (const std::unordered_set< PhysicalInput > &phys_inputs)
 
TableGenerations computeTableGenerations (std::unordered_set< int > phys_table_ids)
 
std::vector< int8_t > serializeLiterals (const std::unordered_map< int, CgenState::LiteralValues > &literals, const int device_id)
 
const std::unique_ptr
< llvm::Module > & 
get_extension_module (ExtModuleKinds kind) const
 
bool has_extension_module (ExtModuleKinds kind) const
 
llvm::Value * spillDoubleElement (llvm::Value *elem_val, llvm::Type *elem_ty)
 
ExecutorMutexHolder acquireExecuteMutex ()
 

Static Private Member Functions

static size_t align (const size_t off_in, const size_t alignment)
 

Private Attributes

const ExecutorId executor_id_
 
std::unique_ptr
< llvm::LLVMContext > 
context_
 
std::unique_ptr< CgenStatecgen_state_
 
std::map< ExtModuleKinds,
std::unique_ptr< llvm::Module > > 
extension_modules_
 
std::unique_ptr< PlanStateplan_state_
 
std::shared_ptr
< RowSetMemoryOwner
row_set_mem_owner_
 
std::mutex gpu_exec_mutex_ [max_gpu_count]
 
std::atomic< bool > interrupted_
 
std::mutex str_dict_mutex_
 
std::unique_ptr
< llvm::TargetMachine > 
nvptx_target_machine_
 
unsigned block_size_x_
 
unsigned grid_size_x_
 
const size_t max_gpu_slab_size_
 
const std::string debug_dir_
 
const std::string debug_file_
 
const Catalog_Namespace::Catalogcatalog_
 
Data_Namespace::DataMgrdata_mgr_
 
const TemporaryTablestemporary_tables_
 
TableIdToNodeMap table_id_to_node_map_
 
int64_t kernel_queue_time_ms_ = 0
 
int64_t compilation_queue_time_ms_ = 0
 
std::unique_ptr
< WindowProjectNodeContext
window_project_node_context_owned_
 
WindowFunctionContextactive_window_function_ {nullptr}
 
InputTableInfoCache input_table_info_cache_
 
AggregatedColRange agg_col_range_cache_
 
TableGenerations table_generations_
 
QuerySessionId current_query_session_
 

Static Private Attributes

static const int max_gpu_count {16}
 
static std::mutex gpu_active_modules_mutex_
 
static uint32_t gpu_active_modules_device_mask_ {0x0}
 
static void * gpu_active_modules_ [max_gpu_count]
 
static const size_t baseline_threshold
 
static heavyai::shared_mutex executor_session_mutex_
 
static InterruptFlagMap queries_interrupt_flag_
 
static QuerySessionMap queries_session_map_
 
static std::map< int,
std::shared_ptr< Executor > > 
executors_
 
static heavyai::shared_mutex execute_mutex_
 
static heavyai::shared_mutex executors_cache_mutex_
 
static QueryPlanDagCache query_plan_dag_cache_
 
static heavyai::shared_mutex recycler_mutex_
 
static std::unordered_map
< std::string, size_t > 
cardinality_cache_
 
static ResultSetRecyclerHolder resultset_recycler_holder_
 
static QueryPlanDAG latest_query_plan_extracted_ {EMPTY_QUERY_PLAN}
 

Friends

class BaselineJoinHashTable
 
class CodeGenerator
 
class ColumnFetcher
 
struct DiamondCodegen
 
class ExecutionKernel
 
class KernelSubtask
 
class HashJoin
 
class OverlapsJoinHashTable
 
class RangeJoinHashTable
 
class GroupByAndAggregate
 
class QueryCompilationDescriptor
 
class QueryMemoryDescriptor
 
class QueryMemoryInitializer
 
class QueryFragmentDescriptor
 
class QueryExecutionContext
 
class ResultSet
 
class InValuesBitmap
 
class StringDictionaryTranslationMgr
 
class LeafAggregator
 
class PerfectJoinHashTable
 
class QueryRewriter
 
class PendingExecutionClosure
 
class RelAlgExecutor
 
class TableOptimizer
 
class TableFunctionCompilationContext
 
class TableFunctionExecutionContext
 
struct TargetExprCodegenBuilder
 
struct TargetExprCodegen
 
class WindowProjectNodeContext
 

Detailed Description

Definition at line 368 of file Execute.h.

Member Typedef Documentation

using Executor::CachedCardinality = std::pair<bool, size_t>

Definition at line 1257 of file Execute.h.

using Executor::ExecutorId = size_t

Definition at line 375 of file Execute.h.

Definition at line 785 of file Execute.h.

Member Enumeration Documentation

Enumerator
template_module 
udf_cpu_module 
udf_gpu_module 
rt_udf_cpu_module 
rt_udf_gpu_module 
rt_geos_module 
rt_libdevice_module 

Definition at line 469 of file Execute.h.

469  {
470  template_module, // RuntimeFunctions.bc
471  udf_cpu_module, // Load-time UDFs for CPU execution
472  udf_gpu_module, // Load-time UDFs for GPU execution
473  rt_udf_cpu_module, // Run-time UDF/UDTFs for CPU execution
474  rt_udf_gpu_module, // Run-time UDF/UDTFs for GPU execution
475  rt_geos_module, // geos functions
476  rt_libdevice_module // math library functions for GPU execution
477  };
std::unique_ptr< llvm::Module > udf_gpu_module
std::unique_ptr< llvm::Module > udf_cpu_module

Constructor & Destructor Documentation

Executor::Executor ( const ExecutorId  id,
Data_Namespace::DataMgr data_mgr,
const size_t  block_size_x,
const size_t  grid_size_x,
const size_t  max_gpu_slab_size,
const std::string &  debug_dir,
const std::string &  debug_file 
)

Definition at line 245 of file Execute.cpp.

252  : executor_id_(executor_id)
253  , context_(new llvm::LLVMContext())
254  , cgen_state_(new CgenState({}, false, this))
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1324
const ExecutorId executor_id_
Definition: Execute.h:1298
std::unique_ptr< llvm::LLVMContext > context_
Definition: Execute.h:1299

Member Function Documentation

ExecutorMutexHolder Executor::acquireExecuteMutex ( )
inlineprivate

Definition at line 1413 of file Execute.h.

References execute_mutex_, executor_id_, Executor::ExecutorMutexHolder::shared_lock, Executor::ExecutorMutexHolder::unique_lock, and UNITARY_EXECUTOR_ID.

1413  {
1414  ExecutorMutexHolder ret;
1416  // Only one unitary executor can run at a time
1418  } else {
1420  }
1421  return ret;
1422  }
static heavyai::shared_mutex execute_mutex_
Definition: Execute.h:1407
std::shared_lock< T > shared_lock
const ExecutorId executor_id_
Definition: Execute.h:1298
std::unique_lock< T > unique_lock
static const ExecutorId UNITARY_EXECUTOR_ID
Definition: Execute.h:376
std::tuple< RelAlgExecutionUnit, PlanState::DeletedColumnsMap > Executor::addDeletedColumn ( const RelAlgExecutionUnit ra_exe_unit,
const CompilationOptions co 
)
private

Definition at line 3961 of file Execute.cpp.

References anonymous_namespace{Execute.cpp}::add_deleted_col_to_map(), catalog_, CHECK, CompilationOptions::filter_on_deleted_column, Catalog_Namespace::Catalog::getDeletedColumnIfRowsDeleted(), Catalog_Namespace::Catalog::getMetadataForTable(), and TABLE.

Referenced by executeWorkUnitImpl(), and executeWorkUnitPerFragment().

3963  {
3964  if (!co.filter_on_deleted_column) {
3965  return std::make_tuple(ra_exe_unit, PlanState::DeletedColumnsMap{});
3966  }
3967  auto ra_exe_unit_with_deleted = ra_exe_unit;
3968  PlanState::DeletedColumnsMap deleted_cols_map;
3969  for (const auto& input_table : ra_exe_unit_with_deleted.input_descs) {
3970  if (input_table.getSourceType() != InputSourceType::TABLE) {
3971  continue;
3972  }
3973  const auto td = catalog_->getMetadataForTable(input_table.getTableId());
3974  CHECK(td);
3975  const auto deleted_cd = catalog_->getDeletedColumnIfRowsDeleted(td);
3976  if (!deleted_cd) {
3977  continue;
3978  }
3979  CHECK(deleted_cd->columnType.is_boolean());
3980  // check deleted column is not already present
3981  bool found = false;
3982  for (const auto& input_col : ra_exe_unit_with_deleted.input_col_descs) {
3983  if (input_col.get()->getColId() == deleted_cd->columnId &&
3984  input_col.get()->getScanDesc().getTableId() == deleted_cd->tableId &&
3985  input_col.get()->getScanDesc().getNestLevel() == input_table.getNestLevel()) {
3986  found = true;
3987  add_deleted_col_to_map(deleted_cols_map, deleted_cd);
3988  break;
3989  }
3990  }
3991  if (!found) {
3992  // add deleted column
3993  ra_exe_unit_with_deleted.input_col_descs.emplace_back(new InputColDescriptor(
3994  deleted_cd->columnId, deleted_cd->tableId, input_table.getNestLevel()));
3995  add_deleted_col_to_map(deleted_cols_map, deleted_cd);
3996  }
3997  }
3998  return std::make_tuple(ra_exe_unit_with_deleted, deleted_cols_map);
3999 }
const Catalog_Namespace::Catalog * catalog_
Definition: Execute.h:1379
const ColumnDescriptor * getDeletedColumnIfRowsDeleted(const TableDescriptor *td) const
Definition: Catalog.cpp:3705
std::unordered_map< TableId, const ColumnDescriptor * > DeletedColumnsMap
Definition: PlanState.h:44
#define CHECK(condition)
Definition: Logger.h:289
const TableDescriptor * getMetadataForTable(const std::string &tableName, const bool populateFragmenter=true) const
Returns a pointer to a const TableDescriptor struct matching the provided tableName.
void add_deleted_col_to_map(PlanState::DeletedColumnsMap &deleted_cols_map, const ColumnDescriptor *deleted_cd)
Definition: Execute.cpp:3949

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

llvm::Value * Executor::addJoinLoopIterator ( const std::vector< llvm::Value * > &  prev_iters,
const size_t  level_idx 
)
private

Definition at line 1115 of file IRCodegen.cpp.

References AUTOMATIC_IR_METADATA, and CHECK.

1116  {
1118  // Iterators are added for loop-outer joins when the head of the loop is generated,
1119  // then once again when the body if generated. Allow this instead of special handling
1120  // of call sites.
1121  const auto it = cgen_state_->scan_idx_to_hash_pos_.find(level_idx);
1122  if (it != cgen_state_->scan_idx_to_hash_pos_.end()) {
1123  return it->second;
1124  }
1125  CHECK(!prev_iters.empty());
1126  llvm::Value* matching_row_index = prev_iters.back();
1127  const auto it_ok =
1128  cgen_state_->scan_idx_to_hash_pos_.emplace(level_idx, matching_row_index);
1129  CHECK(it_ok.second);
1130  return matching_row_index;
1131 }
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1324
#define AUTOMATIC_IR_METADATA(CGENSTATE)
#define CHECK(condition)
Definition: Logger.h:289
void Executor::addToCardinalityCache ( const std::string &  cache_key,
const size_t  cache_value 
)

Definition at line 4763 of file Execute.cpp.

References cardinality_cache_, g_use_estimator_result_cache, recycler_mutex_, and VLOG.

4764  {
4767  cardinality_cache_[cache_key] = cache_value;
4768  VLOG(1) << "Put estimated cardinality to the cache";
4769  }
4770 }
static std::unordered_map< std::string, size_t > cardinality_cache_
Definition: Execute.h:1428
std::unique_lock< T > unique_lock
static heavyai::shared_mutex recycler_mutex_
Definition: Execute.h:1427
bool g_use_estimator_result_cache
Definition: Execute.cpp:127
#define VLOG(n)
Definition: Logger.h:383
bool Executor::addToQuerySessionList ( const QuerySessionId query_session,
const std::string &  query_str,
const std::string &  submitted,
const size_t  executor_id,
const QuerySessionStatus::QueryStatus  query_status,
heavyai::unique_lock< heavyai::shared_mutex > &  write_lock 
)

Definition at line 4594 of file Execute.cpp.

References queries_interrupt_flag_, and queries_session_map_.

Referenced by enrollQuerySession().

4600  {
4601  // an internal API that enrolls the query session into the Executor's session map
4602  if (queries_session_map_.count(query_session)) {
4603  if (queries_session_map_.at(query_session).count(submitted_time_str)) {
4604  queries_session_map_.at(query_session).erase(submitted_time_str);
4605  queries_session_map_.at(query_session)
4606  .emplace(submitted_time_str,
4607  QuerySessionStatus(query_session,
4608  executor_id,
4609  query_str,
4610  submitted_time_str,
4611  query_status));
4612  } else {
4613  queries_session_map_.at(query_session)
4614  .emplace(submitted_time_str,
4615  QuerySessionStatus(query_session,
4616  executor_id,
4617  query_str,
4618  submitted_time_str,
4619  query_status));
4620  }
4621  } else {
4622  std::map<std::string, QuerySessionStatus> executor_per_query_map;
4623  executor_per_query_map.emplace(
4624  submitted_time_str,
4626  query_session, executor_id, query_str, submitted_time_str, query_status));
4627  queries_session_map_.emplace(query_session, executor_per_query_map);
4628  }
4629  return queries_interrupt_flag_.emplace(query_session, false).second;
4630 }
static QuerySessionMap queries_session_map_
Definition: Execute.h:1402
static InterruptFlagMap queries_interrupt_flag_
Definition: Execute.h:1400

+ Here is the caller graph for this function:

void Executor::addTransientStringLiterals ( const RelAlgExecutionUnit ra_exe_unit,
const std::shared_ptr< RowSetMemoryOwner > &  row_set_mem_owner 
)

Definition at line 2157 of file Execute.cpp.

References CHECK, getStringDictionaryProxy(), RelAlgExecutionUnit::groupby_exprs, kENCODING_DICT, kSAMPLE, kSINGLE_VALUE, RelAlgExecutionUnit::quals, RelAlgExecutionUnit::simple_quals, RelAlgExecutionUnit::target_exprs, RelAlgExecutionUnit::target_exprs_union, and ScalarExprVisitor< T >::visit().

2159  {
2160  TransientDictIdVisitor dict_id_visitor;
2161 
2162  auto visit_expr =
2163  [this, &dict_id_visitor, &row_set_mem_owner](const Analyzer::Expr* expr) {
2164  if (!expr) {
2165  return;
2166  }
2167  const auto dict_id = dict_id_visitor.visit(expr);
2168  if (dict_id >= 0) {
2169  auto sdp = getStringDictionaryProxy(dict_id, row_set_mem_owner, true);
2170  CHECK(sdp);
2171  TransientStringLiteralsVisitor visitor(sdp, this);
2172  visitor.visit(expr);
2173  }
2174  };
2175 
2176  for (const auto& group_expr : ra_exe_unit.groupby_exprs) {
2177  visit_expr(group_expr.get());
2178  }
2179 
2180  for (const auto& group_expr : ra_exe_unit.quals) {
2181  visit_expr(group_expr.get());
2182  }
2183 
2184  for (const auto& group_expr : ra_exe_unit.simple_quals) {
2185  visit_expr(group_expr.get());
2186  }
2187 
2188  const auto visit_target_expr = [&](const Analyzer::Expr* target_expr) {
2189  const auto& target_type = target_expr->get_type_info();
2190  if (!target_type.is_string() || target_type.get_compression() == kENCODING_DICT) {
2191  const auto agg_expr = dynamic_cast<const Analyzer::AggExpr*>(target_expr);
2192  if (agg_expr) {
2193  if (agg_expr->get_aggtype() == kSINGLE_VALUE ||
2194  agg_expr->get_aggtype() == kSAMPLE) {
2195  visit_expr(agg_expr->get_arg());
2196  }
2197  } else {
2198  visit_expr(target_expr);
2199  }
2200  }
2201  };
2202  const auto& target_exprs = ra_exe_unit.target_exprs;
2203  std::for_each(target_exprs.begin(), target_exprs.end(), visit_target_expr);
2204  const auto& target_exprs_union = ra_exe_unit.target_exprs_union;
2205  std::for_each(target_exprs_union.begin(), target_exprs_union.end(), visit_target_expr);
2206 }
std::vector< Analyzer::Expr * > target_exprs
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
T visit(const Analyzer::Expr *expr) const
std::vector< Analyzer::Expr * > target_exprs_union
StringDictionaryProxy * getStringDictionaryProxy(const int dict_id, const bool with_generation) const
Definition: Execute.h:529
std::list< std::shared_ptr< Analyzer::Expr > > quals
#define CHECK(condition)
Definition: Logger.h:289
std::list< std::shared_ptr< Analyzer::Expr > > simple_quals

+ Here is the call graph for this function:

void Executor::addUdfIrToModule ( const std::string &  udf_ir_filename,
const bool  is_cuda_ir 
)
static

Definition at line 1897 of file NativeCodegen.cpp.

Referenced by DBHandler::initialize().

1898  {
1902  udf_ir_filename;
1903 }
static std::map< ExtModuleKinds, std::string > extension_module_sources
Definition: Execute.h:479

+ Here is the caller graph for this function:

llvm::Value * Executor::aggregateWindowStatePtr ( )
private

Definition at line 225 of file WindowFunctionIR.cpp.

References AUTOMATIC_IR_METADATA, anonymous_namespace{WindowFunctionIR.cpp}::get_adjusted_window_type_info(), get_int_type(), WindowProjectNodeContext::getActiveWindowFunctionContext(), and kFLOAT.

225  {
227  const auto window_func_context =
229  const auto window_func = window_func_context->getWindowFunction();
230  const auto arg_ti = get_adjusted_window_type_info(window_func);
231  llvm::Type* aggregate_state_type =
232  arg_ti.get_type() == kFLOAT
233  ? llvm::PointerType::get(get_int_type(32, cgen_state_->context_), 0)
234  : llvm::PointerType::get(get_int_type(64, cgen_state_->context_), 0);
235  const auto aggregate_state_i64 = cgen_state_->llInt(
236  reinterpret_cast<const int64_t>(window_func_context->aggregateState()));
237  return cgen_state_->ir_builder_.CreateIntToPtr(aggregate_state_i64,
238  aggregate_state_type);
239 }
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1324
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
static WindowFunctionContext * getActiveWindowFunctionContext(Executor *executor)
#define AUTOMATIC_IR_METADATA(CGENSTATE)
std::unique_ptr< llvm::LLVMContext > context_
Definition: Execute.h:1299
SQLTypeInfo get_adjusted_window_type_info(const Analyzer::WindowFunction *window_func)

+ Here is the call graph for this function:

static size_t Executor::align ( const size_t  off_in,
const size_t  alignment 
)
inlinestaticprivate

Definition at line 1290 of file Execute.h.

Referenced by serializeLiterals().

1290  {
1291  size_t off = off_in;
1292  if (off % alignment != 0) {
1293  off += (alignment - off % alignment);
1294  }
1295  return off;
1296  }

+ Here is the caller graph for this function:

CurrentQueryStatus Executor::attachExecutorToQuerySession ( const QuerySessionId query_session_id,
const std::string &  query_str,
const std::string &  query_submitted_time 
)

Definition at line 4492 of file Execute.cpp.

References executor_id_, executor_session_mutex_, updateQuerySessionExecutorAssignment(), and updateQuerySessionStatusWithLock().

4495  {
4496  if (!query_session_id.empty()) {
4497  // if session is valid, do update 1) the exact executor id and 2) query status
4500  query_session_id, query_submitted_time, executor_id_, write_lock);
4501  updateQuerySessionStatusWithLock(query_session_id,
4502  query_submitted_time,
4503  QuerySessionStatus::QueryStatus::PENDING_EXECUTOR,
4504  write_lock);
4505  }
4506  return {query_session_id, query_str};
4507 }
heavyai::unique_lock< heavyai::shared_mutex > write_lock
bool updateQuerySessionStatusWithLock(const QuerySessionId &query_session, const std::string &submitted_time_str, const QuerySessionStatus::QueryStatus updated_query_status, heavyai::unique_lock< heavyai::shared_mutex > &write_lock)
Definition: Execute.cpp:4632
const ExecutorId executor_id_
Definition: Execute.h:1298
bool updateQuerySessionExecutorAssignment(const QuerySessionId &query_session, const std::string &submitted_time_str, const size_t executor_id, heavyai::unique_lock< heavyai::shared_mutex > &write_lock)
Definition: Execute.cpp:4658
std::unique_lock< T > unique_lock
static heavyai::shared_mutex executor_session_mutex_
Definition: Execute.h:1396

+ Here is the call graph for this function:

void Executor::AutoTrackBuffersInRuntimeIR ( )
private

Definition at line 2240 of file NativeCodegen.cpp.

2240  {
2241  llvm::Module* M = cgen_state_->module_;
2242  if (M->getFunction("allocate_varlen_buffer") == nullptr)
2243  return;
2244 
2245  // read metadata
2246  bool should_track = false;
2247  auto* flag = M->getModuleFlag("manage_memory_buffer");
2248  if (auto* cnt = llvm::mdconst::extract_or_null<llvm::ConstantInt>(flag)) {
2249  if (cnt->getZExtValue() == 1) {
2250  should_track = true;
2251  }
2252  }
2253 
2254  if (!should_track) {
2255  // metadata is not present
2256  return;
2257  }
2258 
2259  LOG(INFO) << "Found 'manage_memory_buffer' metadata.";
2260  llvm::SmallVector<llvm::CallInst*, 4> calls_to_analyze;
2261 
2262  for (llvm::Function& F : *M) {
2263  for (llvm::BasicBlock& BB : F) {
2264  for (llvm::Instruction& I : BB) {
2265  if (llvm::CallInst* CI = llvm::dyn_cast<llvm::CallInst>(&I)) {
2266  // Keep track of calls to "allocate_varlen_buffer" for later processing
2267  llvm::Function* called = CI->getCalledFunction();
2268  if (called) {
2269  if (called->getName() == "allocate_varlen_buffer") {
2270  calls_to_analyze.push_back(CI);
2271  }
2272  }
2273  }
2274  }
2275  }
2276  }
2277 
2278  // for each call to "allocate_varlen_buffer", check if there's a corresponding
2279  // call to "register_buffer_with_executor_rsm". If not, add a call to it
2280  llvm::IRBuilder<> Builder(cgen_state_->context_);
2281  auto i64 = get_int_type(64, cgen_state_->context_);
2282  auto i8p = get_int_ptr_type(8, cgen_state_->context_);
2283  auto void_ = llvm::Type::getVoidTy(cgen_state_->context_);
2284  llvm::FunctionType* fnty = llvm::FunctionType::get(void_, {i64, i8p}, false);
2285  llvm::FunctionCallee register_buffer_fn =
2286  M->getOrInsertFunction("register_buffer_with_executor_rsm", fnty, {});
2287 
2288  int64_t executor_addr = reinterpret_cast<int64_t>(this);
2289  for (llvm::CallInst* CI : calls_to_analyze) {
2290  bool found = false;
2291  // for each user of the function, check if its a callinst
2292  // and if the callinst is calling "register_buffer_with_executor_rsm"
2293  // if no such instruction exist, add one registering the buffer
2294  for (llvm::User* U : CI->users()) {
2295  if (llvm::CallInst* call = llvm::dyn_cast<llvm::CallInst>(U)) {
2296  if (call->getCalledFunction() and
2297  call->getCalledFunction()->getName() == "register_buffer_with_executor_rsm") {
2298  found = true;
2299  break;
2300  }
2301  }
2302  }
2303  if (!found) {
2304  Builder.SetInsertPoint(CI->getNextNode());
2305  Builder.CreateCall(register_buffer_fn,
2306  {ll_int(executor_addr, cgen_state_->context_), CI});
2307  }
2308  }
2309 }
#define LOG(tag)
Definition: Logger.h:283
llvm::ConstantInt * ll_int(const T v, llvm::LLVMContext &context)
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1324
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
llvm::Type * get_int_ptr_type(const int width, llvm::LLVMContext &context)
unsigned Executor::blockSize ( ) const

Definition at line 3852 of file Execute.cpp.

References block_size_x_, CHECK, data_mgr_, CudaMgr_Namespace::CudaMgr::getAllDeviceProperties(), and Data_Namespace::DataMgr::getCudaMgr().

Referenced by collectAllDeviceShardedTopResults(), executePlanWithGroupBy(), executePlanWithoutGroupBy(), executeTableFunction(), executeWorkUnitImpl(), reduceMultiDeviceResults(), reduceMultiDeviceResultSets(), and resultsUnion().

3852  {
3853  CHECK(data_mgr_);
3854  const auto cuda_mgr = data_mgr_->getCudaMgr();
3855  if (!cuda_mgr) {
3856  return 0;
3857  }
3858  const auto& dev_props = cuda_mgr->getAllDeviceProperties();
3859  return block_size_x_ ? block_size_x_ : dev_props.front().maxThreadsPerBlock;
3860 }
CudaMgr_Namespace::CudaMgr * getCudaMgr() const
Definition: DataMgr.h:224
Data_Namespace::DataMgr * data_mgr_
Definition: Execute.h:1380
unsigned block_size_x_
Definition: Execute.h:1373
#define CHECK(condition)
Definition: Logger.h:289
const std::vector< DeviceProperties > & getAllDeviceProperties() const
Definition: CudaMgr.h:128

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::shared_ptr< HashJoin > Executor::buildCurrentLevelHashTable ( const JoinCondition current_level_join_conditions,
size_t  level_idx,
RelAlgExecutionUnit ra_exe_unit,
const CompilationOptions co,
const std::vector< InputTableInfo > &  query_infos,
ColumnCacheMap column_cache,
std::vector< std::string > &  fail_reasons 
)
private

Definition at line 956 of file IRCodegen.cpp.

References anonymous_namespace{IRCodegen.cpp}::add_qualifier_to_execution_unit(), AUTOMATIC_IR_METADATA, anonymous_namespace{IRCodegen.cpp}::check_valid_join_qual(), Data_Namespace::CPU_LEVEL, CompilationOptions::device_type, Executor::JoinHashTableOrError::fail_reason, GPU, Data_Namespace::GPU_LEVEL, Executor::JoinHashTableOrError::hash_table, RelAlgExecutionUnit::hash_table_build_plan_dag, IS_EQUIVALENCE, LEFT, OneToOne, JoinCondition::quals, RelAlgExecutionUnit::query_hint, RelAlgExecutionUnit::table_id_to_node_map, JoinCondition::type, and VLOG.

963  {
965  std::shared_ptr<HashJoin> current_level_hash_table;
966  auto handleNonHashtableQual = [&ra_exe_unit, &level_idx, this](
967  JoinType join_type,
968  std::shared_ptr<Analyzer::Expr> qual) {
969  if (join_type == JoinType::LEFT) {
970  plan_state_->addNonHashtableQualForLeftJoin(level_idx, qual);
971  } else {
972  add_qualifier_to_execution_unit(ra_exe_unit, qual);
973  }
974  };
975  for (const auto& join_qual : current_level_join_conditions.quals) {
976  auto qual_bin_oper = std::dynamic_pointer_cast<Analyzer::BinOper>(join_qual);
977  if (current_level_hash_table || !qual_bin_oper ||
978  !IS_EQUIVALENCE(qual_bin_oper->get_optype())) {
979  handleNonHashtableQual(current_level_join_conditions.type, join_qual);
980  if (!current_level_hash_table) {
981  fail_reasons.emplace_back("No equijoin expression found");
982  }
983  continue;
984  }
985  check_valid_join_qual(qual_bin_oper);
986  JoinHashTableOrError hash_table_or_error;
987  if (!current_level_hash_table) {
988  hash_table_or_error = buildHashTableForQualifier(
989  qual_bin_oper,
990  query_infos,
993  current_level_join_conditions.type,
995  column_cache,
996  ra_exe_unit.hash_table_build_plan_dag,
997  ra_exe_unit.query_hint,
998  ra_exe_unit.table_id_to_node_map);
999  current_level_hash_table = hash_table_or_error.hash_table;
1000  }
1001  if (hash_table_or_error.hash_table) {
1002  plan_state_->join_info_.join_hash_tables_.push_back(hash_table_or_error.hash_table);
1003  plan_state_->join_info_.equi_join_tautologies_.push_back(qual_bin_oper);
1004  } else {
1005  fail_reasons.push_back(hash_table_or_error.fail_reason);
1006  if (!current_level_hash_table) {
1007  VLOG(2) << "Building a hashtable based on a qual " << qual_bin_oper->toString()
1008  << " fails: " << hash_table_or_error.fail_reason;
1009  }
1010  handleNonHashtableQual(current_level_join_conditions.type, qual_bin_oper);
1011  }
1012  }
1013  return current_level_hash_table;
1014 }
JoinType
Definition: sqldefs.h:164
#define IS_EQUIVALENCE(X)
Definition: sqldefs.h:69
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1324
TableIdToNodeMap table_id_to_node_map
std::unique_ptr< PlanState > plan_state_
Definition: Execute.h:1354
void add_qualifier_to_execution_unit(RelAlgExecutionUnit &ra_exe_unit, const std::shared_ptr< Analyzer::Expr > &qual)
Definition: IRCodegen.cpp:474
#define AUTOMATIC_IR_METADATA(CGENSTATE)
ExecutorDeviceType device_type
JoinHashTableOrError buildHashTableForQualifier(const std::shared_ptr< Analyzer::BinOper > &qual_bin_oper, const std::vector< InputTableInfo > &query_infos, const MemoryLevel memory_level, const JoinType join_type, const HashType preferred_hash_type, ColumnCacheMap &column_cache, const HashTableBuildDagMap &hashtable_build_dag_map, const RegisteredQueryHint &query_hint, const TableIdToNodeMap &table_id_to_node_map)
Definition: Execute.cpp:3796
std::list< std::shared_ptr< Analyzer::Expr > > quals
RegisteredQueryHint query_hint
#define VLOG(n)
Definition: Logger.h:383
HashTableBuildDagMap hash_table_build_plan_dag
void check_valid_join_qual(std::shared_ptr< Analyzer::BinOper > &bin_oper)
Definition: IRCodegen.cpp:525

+ Here is the call graph for this function:

Executor::JoinHashTableOrError Executor::buildHashTableForQualifier ( const std::shared_ptr< Analyzer::BinOper > &  qual_bin_oper,
const std::vector< InputTableInfo > &  query_infos,
const MemoryLevel  memory_level,
const JoinType  join_type,
const HashType  preferred_hash_type,
ColumnCacheMap column_cache,
const HashTableBuildDagMap hashtable_build_dag_map,
const RegisteredQueryHint query_hint,
const TableIdToNodeMap table_id_to_node_map 
)
private

Definition at line 3796 of file Execute.cpp.

References deviceCountForMemoryLevel(), ERR_INTERRUPTED, g_enable_dynamic_watchdog, g_enable_overlaps_hashjoin, HashJoin::getInstance(), and interrupted_.

3805  {
3806  if (!g_enable_overlaps_hashjoin && qual_bin_oper->is_overlaps_oper()) {
3807  return {nullptr, "Overlaps hash join disabled, attempting to fall back to loop join"};
3808  }
3809  if (g_enable_dynamic_watchdog && interrupted_.load()) {
3811  }
3812  try {
3813  auto tbl = HashJoin::getInstance(qual_bin_oper,
3814  query_infos,
3815  memory_level,
3816  join_type,
3817  preferred_hash_type,
3818  deviceCountForMemoryLevel(memory_level),
3819  column_cache,
3820  this,
3821  hashtable_build_dag_map,
3822  query_hint,
3823  table_id_to_node_map);
3824  return {tbl, ""};
3825  } catch (const HashJoinFail& e) {
3826  return {nullptr, e.what()};
3827  }
3828 }
static const int32_t ERR_INTERRUPTED
Definition: Execute.h:1444
std::atomic< bool > interrupted_
Definition: Execute.h:1364
bool g_enable_dynamic_watchdog
Definition: Execute.cpp:80
bool g_enable_overlaps_hashjoin
Definition: Execute.cpp:102
int deviceCountForMemoryLevel(const Data_Namespace::MemoryLevel memory_level) const
Definition: Execute.cpp:1062
static std::shared_ptr< HashJoin > getInstance(const std::shared_ptr< Analyzer::BinOper > qual_bin_oper, const std::vector< InputTableInfo > &query_infos, const Data_Namespace::MemoryLevel memory_level, const JoinType join_type, const HashType preferred_hash_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor, const HashTableBuildDagMap &hashtable_build_dag_map, const RegisteredQueryHint &query_hint, const TableIdToNodeMap &table_id_to_node_map)
Make hash table from an in-flight SQL query&#39;s parse tree etc.
Definition: HashJoin.cpp:283

+ Here is the call graph for this function:

JoinLoop::HoistedFiltersCallback Executor::buildHoistLeftHandSideFiltersCb ( const RelAlgExecutionUnit ra_exe_unit,
const size_t  level_idx,
const int  inner_table_id,
const CompilationOptions co 
)
private

Definition at line 789 of file IRCodegen.cpp.

References AUTOMATIC_IR_METADATA, CHECK, CodeGenerator::codegen(), g_enable_left_join_filter_hoisting, RelAlgExecutionUnit::join_quals, LEFT, RelAlgExecutionUnit::quals, RelAlgExecutionUnit::simple_quals, CodeGenerator::toBool(), and VLOG.

793  {
795  return nullptr;
796  }
797 
798  const auto& current_level_join_conditions = ra_exe_unit.join_quals[level_idx];
799  if (level_idx == 0 && current_level_join_conditions.type == JoinType::LEFT) {
800  const auto& condition = current_level_join_conditions.quals.front();
801  const auto bin_oper = dynamic_cast<const Analyzer::BinOper*>(condition.get());
802  CHECK(bin_oper) << condition->toString();
803  const auto rhs =
804  dynamic_cast<const Analyzer::ColumnVar*>(bin_oper->get_right_operand());
805  const auto lhs =
806  dynamic_cast<const Analyzer::ColumnVar*>(bin_oper->get_left_operand());
807  if (lhs && rhs && lhs->get_table_id() != rhs->get_table_id()) {
808  const Analyzer::ColumnVar* selected_lhs{nullptr};
809  // grab the left hand side column -- this is somewhat similar to normalize column
810  // pair, and a better solution may be to hoist that function out of the join
811  // framework and normalize columns at the top of build join loops
812  if (lhs->get_table_id() == inner_table_id) {
813  selected_lhs = rhs;
814  } else if (rhs->get_table_id() == inner_table_id) {
815  selected_lhs = lhs;
816  }
817  if (selected_lhs) {
818  std::list<std::shared_ptr<Analyzer::Expr>> hoisted_quals;
819  // get all LHS-only filters
820  auto should_hoist_qual = [&hoisted_quals](const auto& qual, const int table_id) {
821  CHECK(qual);
822 
823  ExprTableIdVisitor visitor;
824  const auto table_ids = visitor.visit(qual.get());
825  if (table_ids.size() == 1 && table_ids.find(table_id) != table_ids.end()) {
826  hoisted_quals.push_back(qual);
827  }
828  };
829  for (const auto& qual : ra_exe_unit.simple_quals) {
830  should_hoist_qual(qual, selected_lhs->get_table_id());
831  }
832  for (const auto& qual : ra_exe_unit.quals) {
833  should_hoist_qual(qual, selected_lhs->get_table_id());
834  }
835 
836  // build the filters callback and return it
837  if (!hoisted_quals.empty()) {
838  return [this, hoisted_quals, co](llvm::BasicBlock* true_bb,
839  llvm::BasicBlock* exit_bb,
840  const std::string& loop_name,
841  llvm::Function* parent_func,
842  CgenState* cgen_state) -> llvm::BasicBlock* {
843  // make sure we have quals to hoist
844  bool has_quals_to_hoist = false;
845  for (const auto& qual : hoisted_quals) {
846  // check to see if the filter was previously hoisted. if all filters were
847  // previously hoisted, this callback becomes a noop
848  if (plan_state_->hoisted_filters_.count(qual) == 0) {
849  has_quals_to_hoist = true;
850  break;
851  }
852  }
853 
854  if (!has_quals_to_hoist) {
855  return nullptr;
856  }
857 
858  AUTOMATIC_IR_METADATA(cgen_state);
859 
860  llvm::IRBuilder<>& builder = cgen_state->ir_builder_;
861  auto& context = builder.getContext();
862 
863  const auto filter_bb =
864  llvm::BasicBlock::Create(context,
865  "hoisted_left_join_filters_" + loop_name,
866  parent_func,
867  /*insert_before=*/true_bb);
868  builder.SetInsertPoint(filter_bb);
869 
870  llvm::Value* filter_lv = cgen_state_->llBool(true);
871  CodeGenerator code_generator(this);
873  for (const auto& qual : hoisted_quals) {
874  if (plan_state_->hoisted_filters_.insert(qual).second) {
875  // qual was inserted into the hoisted filters map, which means we have not
876  // seen this qual before. Generate filter.
877  VLOG(1) << "Generating code for hoisted left hand side qualifier "
878  << qual->toString();
879  auto cond = code_generator.toBool(
880  code_generator.codegen(qual.get(), true, co).front());
881  filter_lv = builder.CreateAnd(filter_lv, cond);
882  }
883  }
884  CHECK(filter_lv->getType()->isIntegerTy(1));
885 
886  builder.CreateCondBr(filter_lv, true_bb, exit_bb);
887  return filter_bb;
888  };
889  }
890  }
891  }
892  }
893  return nullptr;
894 }
bool g_enable_left_join_filter_hoisting
Definition: Execute.cpp:100
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1324
const JoinQualsPerNestingLevel join_quals
std::unique_ptr< PlanState > plan_state_
Definition: Execute.h:1354
#define AUTOMATIC_IR_METADATA(CGENSTATE)
std::list< std::shared_ptr< Analyzer::Expr > > quals
#define CHECK(condition)
Definition: Logger.h:289
#define VLOG(n)
Definition: Logger.h:383
std::list< std::shared_ptr< Analyzer::Expr > > simple_quals

+ Here is the call graph for this function:

std::function< llvm::Value *(const std::vector< llvm::Value * > &, llvm::Value *)> Executor::buildIsDeletedCb ( const RelAlgExecutionUnit ra_exe_unit,
const size_t  level_idx,
const CompilationOptions co 
)
private

Definition at line 897 of file IRCodegen.cpp.

References AUTOMATIC_IR_METADATA, CHECK, CHECK_LT, CodeGenerator::codegen(), CompilationOptions::filter_on_deleted_column, RelAlgExecutionUnit::input_descs, TABLE, and CodeGenerator::toBool().

899  {
901  if (!co.filter_on_deleted_column) {
902  return nullptr;
903  }
904  CHECK_LT(level_idx + 1, ra_exe_unit.input_descs.size());
905  const auto input_desc = ra_exe_unit.input_descs[level_idx + 1];
906  if (input_desc.getSourceType() != InputSourceType::TABLE) {
907  return nullptr;
908  }
909 
910  const auto deleted_cd = plan_state_->getDeletedColForTable(input_desc.getTableId());
911  if (!deleted_cd) {
912  return nullptr;
913  }
914  CHECK(deleted_cd->columnType.is_boolean());
915  const auto deleted_expr = makeExpr<Analyzer::ColumnVar>(deleted_cd->columnType,
916  input_desc.getTableId(),
917  deleted_cd->columnId,
918  input_desc.getNestLevel());
919  return [this, deleted_expr, level_idx, &co](const std::vector<llvm::Value*>& prev_iters,
920  llvm::Value* have_more_inner_rows) {
921  const auto matching_row_index = addJoinLoopIterator(prev_iters, level_idx + 1);
922  // Avoid fetching the deleted column from a position which is not valid.
923  // An invalid position can be returned by a one to one hash lookup (negative)
924  // or at the end of iteration over a set of matching values.
925  llvm::Value* is_valid_it{nullptr};
926  if (have_more_inner_rows) {
927  is_valid_it = have_more_inner_rows;
928  } else {
929  is_valid_it = cgen_state_->ir_builder_.CreateICmp(
930  llvm::ICmpInst::ICMP_SGE, matching_row_index, cgen_state_->llInt<int64_t>(0));
931  }
932  const auto it_valid_bb = llvm::BasicBlock::Create(
933  cgen_state_->context_, "it_valid", cgen_state_->current_func_);
934  const auto it_not_valid_bb = llvm::BasicBlock::Create(
935  cgen_state_->context_, "it_not_valid", cgen_state_->current_func_);
936  cgen_state_->ir_builder_.CreateCondBr(is_valid_it, it_valid_bb, it_not_valid_bb);
937  const auto row_is_deleted_bb = llvm::BasicBlock::Create(
938  cgen_state_->context_, "row_is_deleted", cgen_state_->current_func_);
939  cgen_state_->ir_builder_.SetInsertPoint(it_valid_bb);
940  CodeGenerator code_generator(this);
941  const auto row_is_deleted = code_generator.toBool(
942  code_generator.codegen(deleted_expr.get(), true, co).front());
943  cgen_state_->ir_builder_.CreateBr(row_is_deleted_bb);
944  cgen_state_->ir_builder_.SetInsertPoint(it_not_valid_bb);
945  const auto row_is_deleted_default = cgen_state_->llBool(false);
946  cgen_state_->ir_builder_.CreateBr(row_is_deleted_bb);
947  cgen_state_->ir_builder_.SetInsertPoint(row_is_deleted_bb);
948  auto row_is_deleted_or_default =
949  cgen_state_->ir_builder_.CreatePHI(row_is_deleted->getType(), 2);
950  row_is_deleted_or_default->addIncoming(row_is_deleted, it_valid_bb);
951  row_is_deleted_or_default->addIncoming(row_is_deleted_default, it_not_valid_bb);
952  return row_is_deleted_or_default;
953  };
954 }
std::vector< InputDescriptor > input_descs
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1324
std::unique_ptr< PlanState > plan_state_
Definition: Execute.h:1354
#define AUTOMATIC_IR_METADATA(CGENSTATE)
#define CHECK_LT(x, y)
Definition: Logger.h:299
llvm::Value * addJoinLoopIterator(const std::vector< llvm::Value * > &prev_iters, const size_t level_idx)
Definition: IRCodegen.cpp:1115
#define CHECK(condition)
Definition: Logger.h:289

+ Here is the call graph for this function:

std::vector< JoinLoop > Executor::buildJoinLoops ( RelAlgExecutionUnit ra_exe_unit,
const CompilationOptions co,
const ExecutionOptions eo,
const std::vector< InputTableInfo > &  query_infos,
ColumnCacheMap column_cache 
)
private

Definition at line 544 of file IRCodegen.cpp.

References AUTOMATIC_IR_METADATA, CodeGenerator::cgen_state_, CHECK, CHECK_LT, CodeGenerator::codegen(), INJECT_TIMER, CgenState::ir_builder_, RelAlgExecutionUnit::join_quals, LEFT, PlanState::left_join_non_hashtable_quals_, CgenState::llBool(), MultiSet, OneToOne, CgenState::outer_join_match_found_per_level_, CodeGenerator::plan_state_, Set, Singleton, JoinLoopDomain::slot_lookup_result, CodeGenerator::toBool(), and JoinLoopDomain::values_buffer.

549  {
552  std::vector<JoinLoop> join_loops;
553  for (size_t level_idx = 0, current_hash_table_idx = 0;
554  level_idx < ra_exe_unit.join_quals.size();
555  ++level_idx) {
556  const auto& current_level_join_conditions = ra_exe_unit.join_quals[level_idx];
557  std::vector<std::string> fail_reasons;
558  const auto current_level_hash_table =
559  buildCurrentLevelHashTable(current_level_join_conditions,
560  level_idx,
561  ra_exe_unit,
562  co,
563  query_infos,
564  column_cache,
565  fail_reasons);
566  const auto found_outer_join_matches_cb =
567  [this, level_idx](llvm::Value* found_outer_join_matches) {
568  CHECK_LT(level_idx, cgen_state_->outer_join_match_found_per_level_.size());
569  CHECK(!cgen_state_->outer_join_match_found_per_level_[level_idx]);
570  cgen_state_->outer_join_match_found_per_level_[level_idx] =
571  found_outer_join_matches;
572  };
573  const auto is_deleted_cb = buildIsDeletedCb(ra_exe_unit, level_idx, co);
574  auto rem_left_join_quals_it =
575  plan_state_->left_join_non_hashtable_quals_.find(level_idx);
576  bool has_remaining_left_join_quals =
577  rem_left_join_quals_it != plan_state_->left_join_non_hashtable_quals_.end() &&
578  !rem_left_join_quals_it->second.empty();
579  const auto outer_join_condition_remaining_quals_cb =
580  [this, level_idx, &co](const std::vector<llvm::Value*>& prev_iters) {
581  // when we have multiple quals for the left join in the current join level
582  // we first try to build a hashtable by using one of the possible qual,
583  // and deal with remaining quals as extra join conditions
584  FetchCacheAnchor anchor(cgen_state_.get());
585  addJoinLoopIterator(prev_iters, level_idx + 1);
586  llvm::Value* left_join_cond = cgen_state_->llBool(true);
587  CodeGenerator code_generator(this);
588  auto it = plan_state_->left_join_non_hashtable_quals_.find(level_idx);
589  if (it != plan_state_->left_join_non_hashtable_quals_.end()) {
590  for (auto expr : it->second) {
591  left_join_cond = cgen_state_->ir_builder_.CreateAnd(
592  left_join_cond,
593  code_generator.toBool(
594  code_generator.codegen(expr.get(), true, co).front()));
595  }
596  }
597  return left_join_cond;
598  };
599  if (current_level_hash_table) {
600  const auto hoisted_filters_cb = buildHoistLeftHandSideFiltersCb(
601  ra_exe_unit, level_idx, current_level_hash_table->getInnerTableId(), co);
602  if (current_level_hash_table->getHashType() == HashType::OneToOne) {
603  join_loops.emplace_back(
604  /*kind=*/JoinLoopKind::Singleton,
605  /*type=*/current_level_join_conditions.type,
606  /*iteration_domain_codegen=*/
607  [this, current_hash_table_idx, level_idx, current_level_hash_table, &co](
608  const std::vector<llvm::Value*>& prev_iters) {
609  addJoinLoopIterator(prev_iters, level_idx);
610  JoinLoopDomain domain{{0}};
611  domain.slot_lookup_result =
612  current_level_hash_table->codegenSlot(co, current_hash_table_idx);
613  return domain;
614  },
615  /*outer_condition_match=*/
616  current_level_join_conditions.type == JoinType::LEFT &&
617  has_remaining_left_join_quals
618  ? std::function<llvm::Value*(const std::vector<llvm::Value*>&)>(
619  outer_join_condition_remaining_quals_cb)
620  : nullptr,
621  /*found_outer_matches=*/current_level_join_conditions.type == JoinType::LEFT
622  ? std::function<void(llvm::Value*)>(found_outer_join_matches_cb)
623  : nullptr,
624  /*hoisted_filters=*/hoisted_filters_cb,
625  /*is_deleted=*/is_deleted_cb,
626  /*nested_loop_join=*/false);
627  } else if (auto range_join_table =
628  dynamic_cast<RangeJoinHashTable*>(current_level_hash_table.get())) {
629  join_loops.emplace_back(
630  /* kind= */ JoinLoopKind::MultiSet,
631  /* type= */ current_level_join_conditions.type,
632  /* iteration_domain_codegen= */
633  [this,
634  range_join_table,
635  current_hash_table_idx,
636  level_idx,
637  current_level_hash_table,
638  &co](const std::vector<llvm::Value*>& prev_iters) {
639  addJoinLoopIterator(prev_iters, level_idx);
640  JoinLoopDomain domain{{0}};
641  CHECK(!prev_iters.empty());
642  const auto matching_set = range_join_table->codegenMatchingSetWithOffset(
643  co, current_hash_table_idx, prev_iters.back());
644  domain.values_buffer = matching_set.elements;
645  domain.element_count = matching_set.count;
646  return domain;
647  },
648  /* outer_condition_match= */
649  current_level_join_conditions.type == JoinType::LEFT
650  ? std::function<llvm::Value*(const std::vector<llvm::Value*>&)>(
651  outer_join_condition_remaining_quals_cb)
652  : nullptr,
653  /* found_outer_matches= */
654  current_level_join_conditions.type == JoinType::LEFT
655  ? std::function<void(llvm::Value*)>(found_outer_join_matches_cb)
656  : nullptr,
657  /* hoisted_filters= */ nullptr, // <<! TODO
658  /* is_deleted= */ is_deleted_cb,
659  /*nested_loop_join=*/false);
660  } else {
661  join_loops.emplace_back(
662  /*kind=*/JoinLoopKind::Set,
663  /*type=*/current_level_join_conditions.type,
664  /*iteration_domain_codegen=*/
665  [this, current_hash_table_idx, level_idx, current_level_hash_table, &co](
666  const std::vector<llvm::Value*>& prev_iters) {
667  addJoinLoopIterator(prev_iters, level_idx);
668  JoinLoopDomain domain{{0}};
669  const auto matching_set = current_level_hash_table->codegenMatchingSet(
670  co, current_hash_table_idx);
671  domain.values_buffer = matching_set.elements;
672  domain.element_count = matching_set.count;
673  return domain;
674  },
675  /*outer_condition_match=*/
676  current_level_join_conditions.type == JoinType::LEFT
677  ? std::function<llvm::Value*(const std::vector<llvm::Value*>&)>(
678  outer_join_condition_remaining_quals_cb)
679  : nullptr,
680  /*found_outer_matches=*/current_level_join_conditions.type == JoinType::LEFT
681  ? std::function<void(llvm::Value*)>(found_outer_join_matches_cb)
682  : nullptr,
683  /*hoisted_filters=*/hoisted_filters_cb,
684  /*is_deleted=*/is_deleted_cb,
685  /*nested_loop_join=*/false);
686  }
687  ++current_hash_table_idx;
688  } else {
689  const auto fail_reasons_str = current_level_join_conditions.quals.empty()
690  ? "No equijoin expression found"
691  : boost::algorithm::join(fail_reasons, " | ");
693  ra_exe_unit, eo, query_infos, level_idx, fail_reasons_str);
694  // Callback provided to the `JoinLoop` framework to evaluate the (outer) join
695  // condition.
696  VLOG(1) << "Unable to build hash table, falling back to loop join: "
697  << fail_reasons_str;
698  const auto outer_join_condition_cb =
699  [this, level_idx, &co, &current_level_join_conditions](
700  const std::vector<llvm::Value*>& prev_iters) {
701  // The values generated for the match path don't dominate all uses
702  // since on the non-match path nulls are generated. Reset the cache
703  // once the condition is generated to avoid incorrect reuse.
704  FetchCacheAnchor anchor(cgen_state_.get());
705  addJoinLoopIterator(prev_iters, level_idx + 1);
706  llvm::Value* left_join_cond = cgen_state_->llBool(true);
707  CodeGenerator code_generator(this);
708  for (auto expr : current_level_join_conditions.quals) {
709  left_join_cond = cgen_state_->ir_builder_.CreateAnd(
710  left_join_cond,
711  code_generator.toBool(
712  code_generator.codegen(expr.get(), true, co).front()));
713  }
714  return left_join_cond;
715  };
716  join_loops.emplace_back(
717  /*kind=*/JoinLoopKind::UpperBound,
718  /*type=*/current_level_join_conditions.type,
719  /*iteration_domain_codegen=*/
720  [this, level_idx](const std::vector<llvm::Value*>& prev_iters) {
721  addJoinLoopIterator(prev_iters, level_idx);
722  JoinLoopDomain domain{{0}};
723  auto* arg = get_arg_by_name(cgen_state_->row_func_, "num_rows_per_scan");
724  const auto rows_per_scan_ptr = cgen_state_->ir_builder_.CreateGEP(
725  arg->getType()->getScalarType()->getPointerElementType(),
726  arg,
727  cgen_state_->llInt(int32_t(level_idx + 1)));
728  domain.upper_bound = cgen_state_->ir_builder_.CreateLoad(
729  rows_per_scan_ptr->getType()->getPointerElementType(),
730  rows_per_scan_ptr,
731  "num_rows_per_scan");
732  return domain;
733  },
734  /*outer_condition_match=*/
735  current_level_join_conditions.type == JoinType::LEFT
736  ? std::function<llvm::Value*(const std::vector<llvm::Value*>&)>(
737  outer_join_condition_cb)
738  : nullptr,
739  /*found_outer_matches=*/
740  current_level_join_conditions.type == JoinType::LEFT
741  ? std::function<void(llvm::Value*)>(found_outer_join_matches_cb)
742  : nullptr,
743  /*hoisted_filters=*/nullptr,
744  /*is_deleted=*/is_deleted_cb,
745  /*nested_loop_join=*/true);
746  }
747  }
748  return join_loops;
749 }
llvm::Value * values_buffer
Definition: JoinLoop.h:49
std::string join(T const &container, std::string const &delim)
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1324
llvm::Value * get_arg_by_name(llvm::Function *func, const std::string &name)
Definition: Execute.h:166
#define INJECT_TIMER(DESC)
Definition: measure.h:93
const JoinQualsPerNestingLevel join_quals
std::unique_ptr< PlanState > plan_state_
Definition: Execute.h:1354
#define AUTOMATIC_IR_METADATA(CGENSTATE)
llvm::Value * slot_lookup_result
Definition: JoinLoop.h:47
#define CHECK_LT(x, y)
Definition: Logger.h:299
std::shared_ptr< HashJoin > buildCurrentLevelHashTable(const JoinCondition &current_level_join_conditions, size_t level_idx, RelAlgExecutionUnit &ra_exe_unit, const CompilationOptions &co, const std::vector< InputTableInfo > &query_infos, ColumnCacheMap &column_cache, std::vector< std::string > &fail_reasons)
Definition: IRCodegen.cpp:956
llvm::Value * addJoinLoopIterator(const std::vector< llvm::Value * > &prev_iters, const size_t level_idx)
Definition: IRCodegen.cpp:1115
#define CHECK(condition)
Definition: Logger.h:289
void check_if_loop_join_is_allowed(RelAlgExecutionUnit &ra_exe_unit, const ExecutionOptions &eo, const std::vector< InputTableInfo > &query_infos, const size_t level_idx, const std::string &fail_reason)
Definition: IRCodegen.cpp:484
JoinLoop::HoistedFiltersCallback buildHoistLeftHandSideFiltersCb(const RelAlgExecutionUnit &ra_exe_unit, const size_t level_idx, const int inner_table_id, const CompilationOptions &co)
Definition: IRCodegen.cpp:789
std::vector< JoinLoop > buildJoinLoops(RelAlgExecutionUnit &ra_exe_unit, const CompilationOptions &co, const ExecutionOptions &eo, const std::vector< InputTableInfo > &query_infos, ColumnCacheMap &column_cache)
Definition: IRCodegen.cpp:544
std::function< llvm::Value *(const std::vector< llvm::Value * > &, llvm::Value *)> buildIsDeletedCb(const RelAlgExecutionUnit &ra_exe_unit, const size_t level_idx, const CompilationOptions &co)
Definition: IRCodegen.cpp:897
#define VLOG(n)
Definition: Logger.h:383

+ Here is the call graph for this function:

void Executor::buildSelectedFragsMapping ( std::vector< std::vector< size_t >> &  selected_fragments_crossjoin,
std::vector< size_t > &  local_col_to_frag_pos,
const std::list< std::shared_ptr< const InputColDescriptor >> &  col_global_ids,
const FragmentsList selected_fragments,
const RelAlgExecutionUnit ra_exe_unit 
)
private

Definition at line 3265 of file Execute.cpp.

References CHECK, CHECK_EQ, CHECK_LT, getFragmentCount(), RelAlgExecutionUnit::input_descs, and plan_state_.

Referenced by fetchChunks().

3270  {
3271  local_col_to_frag_pos.resize(plan_state_->global_to_local_col_ids_.size());
3272  size_t frag_pos{0};
3273  const auto& input_descs = ra_exe_unit.input_descs;
3274  for (size_t scan_idx = 0; scan_idx < input_descs.size(); ++scan_idx) {
3275  const int table_id = input_descs[scan_idx].getTableId();
3276  CHECK_EQ(selected_fragments[scan_idx].table_id, table_id);
3277  selected_fragments_crossjoin.push_back(
3278  getFragmentCount(selected_fragments, scan_idx, ra_exe_unit));
3279  for (const auto& col_id : col_global_ids) {
3280  CHECK(col_id);
3281  const auto& input_desc = col_id->getScanDesc();
3282  if (input_desc.getTableId() != table_id ||
3283  input_desc.getNestLevel() != static_cast<int>(scan_idx)) {
3284  continue;
3285  }
3286  auto it = plan_state_->global_to_local_col_ids_.find(*col_id);
3287  CHECK(it != plan_state_->global_to_local_col_ids_.end());
3288  CHECK_LT(static_cast<size_t>(it->second),
3289  plan_state_->global_to_local_col_ids_.size());
3290  local_col_to_frag_pos[it->second] = frag_pos;
3291  }
3292  ++frag_pos;
3293  }
3294 }
#define CHECK_EQ(x, y)
Definition: Logger.h:297
std::vector< InputDescriptor > input_descs
std::unique_ptr< PlanState > plan_state_
Definition: Execute.h:1354
#define CHECK_LT(x, y)
Definition: Logger.h:299
std::vector< size_t > getFragmentCount(const FragmentsList &selected_fragments, const size_t scan_idx, const RelAlgExecutionUnit &ra_exe_unit)
Definition: Execute.cpp:3251
#define CHECK(condition)
Definition: Logger.h:289

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void Executor::buildSelectedFragsMappingForUnion ( std::vector< std::vector< size_t >> &  selected_fragments_crossjoin,
const FragmentsList selected_fragments,
const RelAlgExecutionUnit ra_exe_unit 
)
private

Definition at line 3296 of file Execute.cpp.

References RelAlgExecutionUnit::input_descs.

Referenced by fetchUnionChunks().

3299  {
3300  const auto& input_descs = ra_exe_unit.input_descs;
3301  for (size_t scan_idx = 0; scan_idx < input_descs.size(); ++scan_idx) {
3302  // selected_fragments is set in assignFragsToKernelDispatch execution_kernel.fragments
3303  if (selected_fragments[0].table_id == input_descs[scan_idx].getTableId()) {
3304  selected_fragments_crossjoin.push_back({size_t(1)});
3305  }
3306  }
3307 }
std::vector< InputDescriptor > input_descs

+ Here is the caller graph for this function:

FragmentSkipStatus Executor::canSkipFragmentForFpQual ( const Analyzer::BinOper comp_expr,
const Analyzer::ColumnVar lhs_col,
const Fragmenter_Namespace::FragmentInfo fragment,
const Analyzer::Constant rhs_const 
) const
private

Definition at line 4073 of file Execute.cpp.

References CHECK, extract_max_stat_fp_type(), extract_min_stat_fp_type(), Analyzer::ColumnVar::get_column_id(), Analyzer::Constant::get_constval(), Analyzer::BinOper::get_optype(), SQLTypeInfo::get_type(), Analyzer::Expr::get_type_info(), Fragmenter_Namespace::FragmentInfo::getChunkMetadataMap(), INVALID, kDOUBLE, kEQ, kFLOAT, kGE, kGT, kLE, kLT, NOT_SKIPPABLE, and SKIPPABLE.

Referenced by skipFragment().

4077  {
4078  const int col_id = lhs_col->get_column_id();
4079  auto chunk_meta_it = fragment.getChunkMetadataMap().find(col_id);
4080  if (chunk_meta_it == fragment.getChunkMetadataMap().end()) {
4082  }
4083  double chunk_min{0.};
4084  double chunk_max{0.};
4085  const auto& chunk_type = lhs_col->get_type_info();
4086  chunk_min = extract_min_stat_fp_type(chunk_meta_it->second->chunkStats, chunk_type);
4087  chunk_max = extract_max_stat_fp_type(chunk_meta_it->second->chunkStats, chunk_type);
4088  if (chunk_min > chunk_max) {
4090  }
4091 
4092  const auto datum_fp = rhs_const->get_constval();
4093  const auto rhs_type = rhs_const->get_type_info().get_type();
4094  CHECK(rhs_type == kFLOAT || rhs_type == kDOUBLE);
4095 
4096  // Do we need to codegen the constant like the integer path does?
4097  const auto rhs_val = rhs_type == kFLOAT ? datum_fp.floatval : datum_fp.doubleval;
4098 
4099  // Todo: dedup the following comparison code with the integer/timestamp path, it is
4100  // slightly tricky due to do cleanly as we do not have rowid on this path
4101  switch (comp_expr->get_optype()) {
4102  case kGE:
4103  if (chunk_max < rhs_val) {
4105  }
4106  break;
4107  case kGT:
4108  if (chunk_max <= rhs_val) {
4110  }
4111  break;
4112  case kLE:
4113  if (chunk_min > rhs_val) {
4115  }
4116  break;
4117  case kLT:
4118  if (chunk_min >= rhs_val) {
4120  }
4121  break;
4122  case kEQ:
4123  if (chunk_min > rhs_val || chunk_max < rhs_val) {
4125  }
4126  break;
4127  default:
4128  break;
4129  }
4131 }
double extract_max_stat_fp_type(const ChunkStats &stats, const SQLTypeInfo &ti)
Definition: sqldefs.h:34
Definition: sqldefs.h:35
Definition: sqldefs.h:29
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:380
SQLOps get_optype() const
Definition: Analyzer.h:448
double extract_min_stat_fp_type(const ChunkStats &stats, const SQLTypeInfo &ti)
const ChunkMetadataMap & getChunkMetadataMap() const
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:83
Definition: sqldefs.h:33
Datum get_constval() const
Definition: Analyzer.h:344
#define CHECK(condition)
Definition: Logger.h:289
Definition: sqldefs.h:32
int get_column_id() const
Definition: Analyzer.h:203

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

llvm::Value * Executor::castToFP ( llvm::Value *  value,
SQLTypeInfo const &  from_ti,
SQLTypeInfo const &  to_ti 
)
private

Definition at line 3887 of file Execute.cpp.

References AUTOMATIC_IR_METADATA, cgen_state_, exp_to_scale(), logger::FATAL, SQLTypeInfo::get_scale(), SQLTypeInfo::get_size(), SQLTypeInfo::is_fp(), SQLTypeInfo::is_number(), and LOG.

3889  {
3891  if (value->getType()->isIntegerTy() && from_ti.is_number() && to_ti.is_fp() &&
3892  (!from_ti.is_fp() || from_ti.get_size() != to_ti.get_size())) {
3893  llvm::Type* fp_type{nullptr};
3894  switch (to_ti.get_size()) {
3895  case 4:
3896  fp_type = llvm::Type::getFloatTy(cgen_state_->context_);
3897  break;
3898  case 8:
3899  fp_type = llvm::Type::getDoubleTy(cgen_state_->context_);
3900  break;
3901  default:
3902  LOG(FATAL) << "Unsupported FP size: " << to_ti.get_size();
3903  }
3904  value = cgen_state_->ir_builder_.CreateSIToFP(value, fp_type);
3905  if (from_ti.get_scale()) {
3906  value = cgen_state_->ir_builder_.CreateFDiv(
3907  value,
3908  llvm::ConstantFP::get(value->getType(), exp_to_scale(from_ti.get_scale())));
3909  }
3910  }
3911  return value;
3912 }
#define LOG(tag)
Definition: Logger.h:283
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1324
#define AUTOMATIC_IR_METADATA(CGENSTATE)
uint64_t exp_to_scale(const unsigned exp)

+ Here is the call graph for this function:

llvm::Value * Executor::castToIntPtrTyIn ( llvm::Value *  val,
const size_t  bit_width 
)
private

Definition at line 3914 of file Execute.cpp.

References AUTOMATIC_IR_METADATA, cgen_state_, CHECK, CHECK_LT, and get_int_type().

3914  {
3916  CHECK(val->getType()->isPointerTy());
3917 
3918  const auto val_ptr_type = static_cast<llvm::PointerType*>(val->getType());
3919  const auto val_type = val_ptr_type->getPointerElementType();
3920  size_t val_width = 0;
3921  if (val_type->isIntegerTy()) {
3922  val_width = val_type->getIntegerBitWidth();
3923  } else {
3924  if (val_type->isFloatTy()) {
3925  val_width = 32;
3926  } else {
3927  CHECK(val_type->isDoubleTy());
3928  val_width = 64;
3929  }
3930  }
3931  CHECK_LT(size_t(0), val_width);
3932  if (bitWidth == val_width) {
3933  return val;
3934  }
3935  return cgen_state_->ir_builder_.CreateBitCast(
3936  val, llvm::PointerType::get(get_int_type(bitWidth, cgen_state_->context_), 0));
3937 }
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1324
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
#define AUTOMATIC_IR_METADATA(CGENSTATE)
#define CHECK_LT(x, y)
Definition: Logger.h:299
#define CHECK(condition)
Definition: Logger.h:289

+ Here is the call graph for this function:

bool Executor::checkCurrentQuerySession ( const std::string &  candidate_query_session,
heavyai::shared_lock< heavyai::shared_mutex > &  read_lock 
)

Definition at line 4465 of file Execute.cpp.

References current_query_session_.

4467  {
4468  // if current_query_session is equal to the candidate_query_session,
4469  // or it is empty session we consider
4470  return !candidate_query_session.empty() &&
4471  (current_query_session_ == candidate_query_session);
4472 }
QuerySessionId current_query_session_
Definition: Execute.h:1398
bool Executor::checkIsQuerySessionEnrolled ( const QuerySessionId query_session,
heavyai::shared_lock< heavyai::shared_mutex > &  read_lock 
)

Definition at line 4739 of file Execute.cpp.

References queries_session_map_.

Referenced by executeWorkUnitImpl().

4741  {
4742  if (query_session.empty()) {
4743  return false;
4744  }
4745  return !query_session.empty() && queries_session_map_.count(query_session);
4746 }
static QuerySessionMap queries_session_map_
Definition: Execute.h:1402

+ Here is the caller graph for this function:

bool Executor::checkIsQuerySessionInterrupted ( const std::string &  query_session,
heavyai::shared_lock< heavyai::shared_mutex > &  read_lock 
)

Definition at line 4728 of file Execute.cpp.

References queries_interrupt_flag_.

Referenced by executePlanWithGroupBy(), executePlanWithoutGroupBy(), fetchChunks(), and fetchUnionChunks().

4730  {
4731  if (query_session.empty()) {
4732  return false;
4733  }
4734  auto flag_it = queries_interrupt_flag_.find(query_session);
4735  return !query_session.empty() && flag_it != queries_interrupt_flag_.end() &&
4736  flag_it->second;
4737 }
static InterruptFlagMap queries_interrupt_flag_
Definition: Execute.h:1400

+ Here is the caller graph for this function:

bool Executor::checkNonKernelTimeInterrupted ( ) const

Definition at line 4816 of file Execute.cpp.

References current_query_session_, executor_id_, executor_session_mutex_, queries_interrupt_flag_, and UNITARY_EXECUTOR_ID.

4816  {
4817  // this function should be called within an executor which is assigned
4818  // to the specific query thread (that indicates we already enroll the session)
4819  // check whether this is called from non unitary executor
4821  return false;
4822  };
4824  auto flag_it = queries_interrupt_flag_.find(current_query_session_);
4825  return !current_query_session_.empty() && flag_it != queries_interrupt_flag_.end() &&
4826  flag_it->second;
4827 }
QuerySessionId current_query_session_
Definition: Execute.h:1398
std::shared_lock< T > shared_lock
const ExecutorId executor_id_
Definition: Execute.h:1298
static InterruptFlagMap queries_interrupt_flag_
Definition: Execute.h:1400
static heavyai::shared_mutex executor_session_mutex_
Definition: Execute.h:1396
static const ExecutorId UNITARY_EXECUTOR_ID
Definition: Execute.h:376
void Executor::checkPendingQueryStatus ( const QuerySessionId query_session)

Definition at line 4509 of file Execute.cpp.

References ERR_INTERRUPTED, executor_session_mutex_, queries_interrupt_flag_, queries_session_map_, and VLOG.

4509  {
4510  // check whether we are okay to execute the "pending" query
4511  // i.e., before running the query check if this query session is "ALREADY" interrupted
4513  if (query_session.empty()) {
4514  return;
4515  }
4516  if (queries_interrupt_flag_.find(query_session) == queries_interrupt_flag_.end()) {
4517  // something goes wrong since we assume this is caller's responsibility
4518  // (call this function only for enrolled query session)
4519  if (!queries_session_map_.count(query_session)) {
4520  VLOG(1) << "Interrupting pending query is not available since the query session is "
4521  "not enrolled";
4522  } else {
4523  // here the query session is enrolled but the interrupt flag is not registered
4524  VLOG(1)
4525  << "Interrupting pending query is not available since its interrupt flag is "
4526  "not registered";
4527  }
4528  return;
4529  }
4530  if (queries_interrupt_flag_[query_session]) {
4532  }
4533 }
static QuerySessionMap queries_session_map_
Definition: Execute.h:1402
static const int32_t ERR_INTERRUPTED
Definition: Execute.h:1444
std::shared_lock< T > shared_lock
static InterruptFlagMap queries_interrupt_flag_
Definition: Execute.h:1400
static heavyai::shared_mutex executor_session_mutex_
Definition: Execute.h:1396
#define VLOG(n)
Definition: Logger.h:383
void Executor::clearCaches ( bool  runtime_only = false)
static void Executor::clearExternalCaches ( bool  for_update,
const TableDescriptor td,
const int  current_db_id 
)
inlinestatic

Definition at line 391 of file Execute.h.

References TableDescriptor::getTableChunkKey(), CacheInvalidator< CACHE_HOLDING_TYPES >::invalidateCaches(), and CacheInvalidator< CACHE_HOLDING_TYPES >::invalidateCachesByTable().

Referenced by Parser::InsertIntoTableAsSelectStmt::execute(), Parser::DropTableStmt::execute(), Parser::TruncateTableStmt::execute(), Parser::DropColumnStmt::execute(), Parser::CopyTableStmt::execute(), RelAlgExecutor::executeDelete(), and RelAlgExecutor::executeUpdate().

393  {
394  bool clearEntireCache = true;
395  if (td) {
396  const auto& table_chunk_key_prefix = td->getTableChunkKey(current_db_id);
397  if (!table_chunk_key_prefix.empty()) {
398  auto table_key = boost::hash_value(table_chunk_key_prefix);
400  if (for_update) {
402  } else {
404  }
405  clearEntireCache = false;
406  }
407  }
408  if (clearEntireCache) {
410  if (for_update) {
412  } else {
414  }
415  }
416  }
static void invalidateCachesByTable(size_t table_key)
static void invalidateCaches()
std::vector< int > getTableChunkKey(const int getCurrentDBId) const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void Executor::clearMemory ( const Data_Namespace::MemoryLevel  memory_level)
static

Definition at line 499 of file Execute.cpp.

References Data_Namespace::DataMgr::clearMemory(), Data_Namespace::CPU_LEVEL, execute_mutex_, Catalog_Namespace::SysCatalog::getDataMgr(), Data_Namespace::GPU_LEVEL, Catalog_Namespace::SysCatalog::instance(), and CacheInvalidator< CACHE_HOLDING_TYPES >::invalidateCaches().

Referenced by DBHandler::clear_cpu_memory(), DBHandler::clear_gpu_memory(), QueryRunner::QueryRunner::clearCpuMemory(), and QueryRunner::QueryRunner::clearGpuMemory().

499  {
500  switch (memory_level) {
504  execute_mutex_); // Don't flush memory while queries are running
505 
506  if (memory_level == Data_Namespace::MemoryLevel::CPU_LEVEL) {
507  // The hash table cache uses CPU memory not managed by the buffer manager. In the
508  // future, we should manage these allocations with the buffer manager directly.
509  // For now, assume the user wants to purge the hash table cache when they clear
510  // CPU memory (currently used in ExecuteTest to lower memory pressure)
512  }
515  break;
516  }
517  default: {
518  throw std::runtime_error(
519  "Clearing memory levels other than the CPU level or GPU level is not "
520  "supported.");
521  }
522  }
523 }
static heavyai::shared_mutex execute_mutex_
Definition: Execute.h:1407
void clearMemory(const MemoryLevel memLevel)
Definition: DataMgr.cpp:434
static void invalidateCaches()
Data_Namespace::DataMgr & getDataMgr() const
Definition: SysCatalog.h:232
static SysCatalog & instance()
Definition: SysCatalog.h:341
std::unique_lock< T > unique_lock

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void Executor::clearMetaInfoCache ( )
private

Definition at line 786 of file Execute.cpp.

References agg_col_range_cache_, TableGenerations::clear(), AggregatedColRange::clear(), InputTableInfoCache::clear(), input_table_info_cache_, and table_generations_.

786  {
790 }
AggregatedColRange agg_col_range_cache_
Definition: Execute.h:1394
InputTableInfoCache input_table_info_cache_
Definition: Execute.h:1393
TableGenerations table_generations_
Definition: Execute.h:1395

+ Here is the call graph for this function:

void Executor::clearQuerySessionStatus ( const QuerySessionId query_session,
const std::string &  submitted_time_str 
)

Definition at line 4535 of file Execute.cpp.

References current_query_session_, executor_session_mutex_, invalidateRunningQuerySession(), removeFromQuerySessionList(), and resetInterrupt().

4536  {
4538  // clear the interrupt-related info for a finished query
4539  if (query_session.empty()) {
4540  return;
4541  }
4542  removeFromQuerySessionList(query_session, submitted_time_str, session_write_lock);
4543  if (query_session.compare(current_query_session_) == 0) {
4544  invalidateRunningQuerySession(session_write_lock);
4545  resetInterrupt();
4546  }
4547 }
QuerySessionId current_query_session_
Definition: Execute.h:1398
bool removeFromQuerySessionList(const QuerySessionId &query_session, const std::string &submitted_time_str, heavyai::unique_lock< heavyai::shared_mutex > &write_lock)
Definition: Execute.cpp:4683
std::unique_lock< T > unique_lock
static heavyai::shared_mutex executor_session_mutex_
Definition: Execute.h:1396
void resetInterrupt()
void invalidateRunningQuerySession(heavyai::unique_lock< heavyai::shared_mutex > &write_lock)
Definition: Execute.cpp:4487

+ Here is the call graph for this function:

llvm::Value * Executor::codegenAggregateWindowState ( )
private

Definition at line 1323 of file WindowFunctionIR.cpp.

References AUTOMATIC_IR_METADATA, AVG, COUNT, anonymous_namespace{WindowFunctionIR.cpp}::get_adjusted_window_type_info(), get_int_type(), WindowProjectNodeContext::getActiveWindowFunctionContext(), Analyzer::WindowFunction::getKind(), kDECIMAL, kDOUBLE, and kFLOAT.

1323  {
1325  const auto pi32_type =
1326  llvm::PointerType::get(get_int_type(32, cgen_state_->context_), 0);
1327  const auto pi64_type =
1328  llvm::PointerType::get(get_int_type(64, cgen_state_->context_), 0);
1329  const auto window_func_context =
1331  const Analyzer::WindowFunction* window_func = window_func_context->getWindowFunction();
1332  const auto window_func_ti = get_adjusted_window_type_info(window_func);
1333  const auto aggregate_state_type =
1334  window_func_ti.get_type() == kFLOAT ? pi32_type : pi64_type;
1335  auto aggregate_state = aggregateWindowStatePtr();
1336  if (window_func->getKind() == SqlWindowFunctionKind::AVG) {
1337  const auto aggregate_state_count_i64 = cgen_state_->llInt(
1338  reinterpret_cast<const int64_t>(window_func_context->aggregateStateCount()));
1339  auto aggregate_state_count = cgen_state_->ir_builder_.CreateIntToPtr(
1340  aggregate_state_count_i64, aggregate_state_type);
1341  const auto double_null_lv = cgen_state_->inlineFpNull(SQLTypeInfo(kDOUBLE));
1342  switch (window_func_ti.get_type()) {
1343  case kFLOAT: {
1344  return cgen_state_->emitCall(
1345  "load_avg_float", {aggregate_state, aggregate_state_count, double_null_lv});
1346  }
1347  case kDOUBLE: {
1348  return cgen_state_->emitCall(
1349  "load_avg_double", {aggregate_state, aggregate_state_count, double_null_lv});
1350  }
1351  case kDECIMAL: {
1352  return cgen_state_->emitCall(
1353  "load_avg_decimal",
1354  {aggregate_state,
1355  aggregate_state_count,
1356  double_null_lv,
1357  cgen_state_->llInt<int32_t>(window_func_ti.get_scale())});
1358  }
1359  default: {
1360  return cgen_state_->emitCall(
1361  "load_avg_int", {aggregate_state, aggregate_state_count, double_null_lv});
1362  }
1363  }
1364  }
1365  if (window_func->getKind() == SqlWindowFunctionKind::COUNT) {
1366  return cgen_state_->ir_builder_.CreateLoad(
1367  aggregate_state->getType()->getPointerElementType(), aggregate_state);
1368  }
1369  switch (window_func_ti.get_type()) {
1370  case kFLOAT: {
1371  return cgen_state_->emitCall("load_float", {aggregate_state});
1372  }
1373  case kDOUBLE: {
1374  return cgen_state_->emitCall("load_double", {aggregate_state});
1375  }
1376  default: {
1377  return cgen_state_->ir_builder_.CreateLoad(
1378  aggregate_state->getType()->getPointerElementType(), aggregate_state);
1379  }
1380  }
1381 }
SqlWindowFunctionKind getKind() const
Definition: Analyzer.h:2570
llvm::Value * aggregateWindowStatePtr()
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1324
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
static WindowFunctionContext * getActiveWindowFunctionContext(Executor *executor)
#define AUTOMATIC_IR_METADATA(CGENSTATE)
SQLTypeInfo get_adjusted_window_type_info(const Analyzer::WindowFunction *window_func)

+ Here is the call graph for this function:

llvm::Value * Executor::codegenConditionalAggregateCondValSelector ( llvm::Value *  cond_lv,
SQLAgg const  aggKind,
CompilationOptions const &  co 
) const
private

Definition at line 1383 of file WindowFunctionIR.cpp.

References CHECK, and kSUM_IF.

1386  {
1387  llvm::Value* res_cond_lv{nullptr};
1388  switch (aggKind) {
1389  case kSUM_IF:
1390  if (cond_lv->getType()->isIntegerTy(1)) {
1391  // cond_expr returns i1 type val, just need to cast to i8 type
1392  // i.e., cond_expr IS NULL
1393  res_cond_lv = cgen_state_->castToTypeIn(cond_lv, 8);
1394  } else {
1395  CHECK(cond_lv->getType()->isIntegerTy(8));
1396  // cond_expr may have null value instead of upcasted bool (i1-type) value
1397  // so we have to correctly set true condition
1398  // i.e., i8 @gt_int32_t_nullable_lhs(..., i64 -2147483648, i8 -128)
1399  // has one of the following i8-type values: 1, 0, -128
1400  auto true_cond_lv =
1401  cgen_state_->ir_builder_.CreateICmpEQ(cond_lv, cgen_state_->llInt((int8_t)1));
1402  res_cond_lv = cgen_state_->ir_builder_.CreateSelect(
1403  true_cond_lv, cgen_state_->llInt((int8_t)1), cgen_state_->llInt((int8_t)0));
1404  }
1405  break;
1406  default:
1407  break;
1408  }
1409  return res_cond_lv;
1410 }
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1324
#define CHECK(condition)
Definition: Logger.h:289
llvm::Value * Executor::codegenCurrentPartitionIndex ( const WindowFunctionContext window_func_context,
llvm::Value *  current_row_pos_lv 
)
private

Definition at line 689 of file WindowFunctionIR.cpp.

References WindowFunctionContext::elementCount(), get_int_type(), WindowFunctionContext::getWindowFunction(), Analyzer::WindowFunction::isFrameNavigateWindowFunction(), WindowFunctionContext::partitionCount(), WindowFunctionContext::partitionNumCountBuf(), and WindowFunctionContext::payload().

691  {
692  const auto pi64_type =
693  llvm::PointerType::get(get_int_type(64, cgen_state_->context_), 0);
694  const auto pi32_type =
695  llvm::PointerType::get(get_int_type(32, cgen_state_->context_), 0);
696  auto row_pos_lv = current_row_pos_lv;
697  if (window_func_context->getWindowFunction()->isFrameNavigateWindowFunction()) {
698  // `current_row_pos_lv` indicates the index of the current row, but to figure out it's
699  // index of window partition it belongs to, we need a special approach especially for
700  // window framing navigation function for instance, when we have five rows having two
701  // columns pc and val such as (2,1), (2,2), (2,3), (1,1), (1,2), we build a OneToMany
702  // Perfect Hash Table as: offset: 0 2 / count: 2 3 / payload: i1, i2, i3, i4, i5 where
703  // i1 ~ i3 and i4 ~ i5 are rows for partition 1 (i.e., pc = 1) and 2 (i.e., prc = 2),
704  // respectively. But when processing the first row (2, 1), the original
705  // `current_row_pos_lv` stands for zero so computing which partitions it belongs to is
706  // hard unless hashing the value at runtime. Even if we do hash, we cannot know the
707  // exact hash slot unless we do binary + linear searches multiple times (via payload
708  // buffer and the ordered payload buffer) i.e., when the row (1,2) is assigned to the
709  // partition[4], we cannot find the hash slot index '4' by using `current_row_pos_lv`
710  // unless doing a costly operation like a linear search over the entire window
711  // partition Instead, we collect a hash slot that each row is assigned to and keep
712  // this info at the payload buffer `hash_slot_idx_ptr_lv` and use it for computing
713  // window frame navigation functions
714  auto* const hash_slot_idx_ptr =
715  window_func_context->payload() + window_func_context->elementCount();
716  auto hash_slot_idx_buf_lv =
717  cgen_state_->llInt(reinterpret_cast<int64_t>(hash_slot_idx_ptr));
718  auto hash_slot_idx_ptr_lv =
719  cgen_state_->ir_builder_.CreateIntToPtr(hash_slot_idx_buf_lv, pi32_type);
720  auto hash_slot_idx_load_lv = cgen_state_->ir_builder_.CreateGEP(
721  hash_slot_idx_ptr_lv->getType()->getPointerElementType(),
722  hash_slot_idx_ptr_lv,
723  current_row_pos_lv);
724  row_pos_lv = cgen_state_->castToTypeIn(
725  cgen_state_->ir_builder_.CreateLoad(
726  hash_slot_idx_load_lv->getType()->getPointerElementType(),
727  hash_slot_idx_load_lv,
728  "cur_row_hash_slot_idx"),
729  64);
730  }
731  auto partition_count_lv = cgen_state_->llInt(window_func_context->partitionCount());
732  auto partition_num_count_buf_lv = cgen_state_->llInt(
733  reinterpret_cast<int64_t>(window_func_context->partitionNumCountBuf()));
734  auto partition_num_count_ptr_lv =
735  cgen_state_->ir_builder_.CreateIntToPtr(partition_num_count_buf_lv, pi64_type);
736  return cgen_state_->emitCall(
737  "compute_int64_t_lower_bound",
738  {partition_count_lv, row_pos_lv, partition_num_count_ptr_lv});
739 }
bool isFrameNavigateWindowFunction() const
Definition: Analyzer.h:2624
size_t elementCount() const
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1324
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
size_t partitionCount() const
const int64_t * partitionNumCountBuf() const
const Analyzer::WindowFunction * getWindowFunction() const
const int32_t * payload() const

+ Here is the call graph for this function:

llvm::Value * Executor::codegenFrameBound ( bool  for_start_bound,
bool  for_range_mode,
bool  for_window_frame_naviation,
const Analyzer::WindowFrame frame_bound,
bool  is_timestamp_type_frame,
llvm::Value *  order_key_null_val,
const WindowFrameBoundFuncArgs args 
)
private

Definition at line 557 of file WindowFunctionIR.cpp.

References CHECK, CURRENT_ROW, WindowFrameBoundFuncArgs::current_row_pos_lv, EXPR_FOLLOWING, EXPR_PRECEDING, WindowFrameBoundFuncArgs::frame_end_bound_expr_lv, WindowFrameBoundFuncArgs::frame_start_bound_expr_lv, Analyzer::WindowFrame::getBoundType(), WindowFrameBoundFuncArgs::int64_t_one_val_lv, WindowFrameBoundFuncArgs::int64_t_zero_val_lv, WindowFrameBoundFuncArgs::num_elem_current_partition_lv, WindowFrameBoundFuncArgs::order_type_col_name, UNBOUNDED_FOLLOWING, and UNBOUNDED_PRECEDING.

563  {
564  const auto bound_type = frame_bound->getBoundType();
565  auto adjust_frame_end_bound = [&](llvm::Value* target_bound_lv) {
566  return cgen_state_->ir_builder_.CreateSub(target_bound_lv, args.int64_t_one_val_lv);
567  };
569  CHECK(for_start_bound) << "frame end cannot be UNBOUNDED PRECEDING";
570  return args.int64_t_zero_val_lv;
571  } else if (bound_type == SqlWindowFrameBoundType::UNBOUNDED_FOLLOWING) {
572  CHECK(!for_start_bound) << "frame start cannot be UNBOUNDED FOLLOWING";
573  // adjust frame bound w.r.t the open frame interval if necessary
574  return for_window_frame_naviation
575  ? adjust_frame_end_bound(args.num_elem_current_partition_lv)
576  : args.num_elem_current_partition_lv;
577  }
578  std::vector<llvm::Value*> func_args;
579  std::string op_name =
580  bound_type == SqlWindowFrameBoundType::EXPR_FOLLOWING ? "add" : "sub";
581  if (!for_range_mode) {
582  llvm::Value* current_row_bound_expr_lv{nullptr};
583  if (for_window_frame_naviation) {
584  // we already know a current row's index in (ordered) window frame in this case
585  auto bound_expr =
586  for_start_bound ? args.frame_start_bound_expr_lv : args.frame_end_bound_expr_lv;
587  if (bound_type == SqlWindowFrameBoundType::EXPR_FOLLOWING) {
588  current_row_bound_expr_lv =
589  cgen_state_->ir_builder_.CreateAdd(args.current_row_pos_lv, bound_expr);
590  } else if (bound_type == SqlWindowFrameBoundType::EXPR_PRECEDING) {
591  current_row_bound_expr_lv =
592  cgen_state_->ir_builder_.CreateSub(args.current_row_pos_lv, bound_expr);
593  } else {
595  current_row_bound_expr_lv = args.current_row_pos_lv;
596  }
597  // adjust frame bound w.r.t the open frame interval
598  if (for_start_bound) {
599  return cgen_state_->ir_builder_.CreateSelect(
600  cgen_state_->ir_builder_.CreateICmpSLT(current_row_bound_expr_lv,
601  args.int64_t_zero_val_lv),
602  args.int64_t_zero_val_lv,
603  current_row_bound_expr_lv);
604  } else {
605  return cgen_state_->ir_builder_.CreateSelect(
606  cgen_state_->ir_builder_.CreateICmpSGE(current_row_bound_expr_lv,
608  adjust_frame_end_bound(args.num_elem_current_partition_lv),
609  current_row_bound_expr_lv);
610  }
611  } else {
612  std::string func_class = for_start_bound ? "start" : "end";
613  auto const func_name = "compute_row_mode_" + func_class + "_index_" + op_name;
614  func_args = prepareRowModeFuncArgs(for_start_bound, bound_type, args);
615  current_row_bound_expr_lv = cgen_state_->emitCall(func_name, func_args);
616  }
617  return current_row_bound_expr_lv;
618  } else {
619  std::string func_class = for_start_bound ? "lower" : "upper";
620  auto const func_name = getFramingFuncName(
621  func_class,
622  args.order_type_col_name,
623  op_name,
624  bound_type != SqlWindowFrameBoundType::CURRENT_ROW && is_timestamp_type_frame);
625  func_args = prepareRangeModeFuncArgs(
626  for_start_bound, frame_bound, is_timestamp_type_frame, order_key_null_val, args);
627  auto frame_bound_lv = cgen_state_->emitCall(func_name, func_args);
628  if (!for_start_bound && for_window_frame_naviation) {
629  // adjust frame end bound w.r.t the open frame interval
630  frame_bound_lv = cgen_state_->ir_builder_.CreateSelect(
631  cgen_state_->ir_builder_.CreateICmpSGE(frame_bound_lv,
633  adjust_frame_end_bound(args.num_elem_current_partition_lv),
634  frame_bound_lv);
635  }
636  return frame_bound_lv;
637  }
638 }
llvm::Value * num_elem_current_partition_lv
Definition: WindowContext.h:94
llvm::Value * current_row_pos_lv
Definition: WindowContext.h:89
llvm::Value * frame_end_bound_expr_lv
Definition: WindowContext.h:88
std::string getFramingFuncName(const std::string &bound_type, const std::string &order_col_type, const std::string &op_type, bool for_timestamp_type) const
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1324
SqlWindowFrameBoundType getBoundType() const
Definition: Analyzer.h:2484
std::vector< llvm::Value * > prepareRangeModeFuncArgs(bool for_start_bound, const Analyzer::WindowFrame *frame_bound, bool is_timestamp_type_frame, llvm::Value *order_key_null_val, const WindowFrameBoundFuncArgs &frame_args) const
#define CHECK(condition)
Definition: Logger.h:289
llvm::Value * int64_t_zero_val_lv
Definition: WindowContext.h:92
llvm::Value * int64_t_one_val_lv
Definition: WindowContext.h:93
llvm::Value * frame_start_bound_expr_lv
Definition: WindowContext.h:87
std::string order_type_col_name
Definition: WindowContext.h:96
std::vector< llvm::Value * > prepareRowModeFuncArgs(bool for_start_bound, SqlWindowFrameBoundType bound_type, const WindowFrameBoundFuncArgs &args) const

+ Here is the call graph for this function:

llvm::Value * Executor::codegenFrameBoundExpr ( const Analyzer::WindowFunction window_func,
const Analyzer::WindowFrame frame_bound,
CodeGenerator code_generator,
const CompilationOptions co 
)
private

Definition at line 506 of file WindowFunctionIR.cpp.

References CHECK, CodeGenerator::codegen(), EXPR_FOLLOWING, EXPR_PRECEDING, g_cluster, SQLTypeInfo::get_size(), Analyzer::Expr::get_type_info(), Analyzer::WindowFrame::getBoundExpr(), Analyzer::WindowFunction::getOrderKeys(), Analyzer::WindowFunction::hasRangeModeFraming(), kBIGINT, kINT, and kSMALLINT.

509  {
510  auto needs_bound_expr_codegen = [](const Analyzer::WindowFrame* window_frame) {
511  return window_frame->getBoundType() == SqlWindowFrameBoundType::EXPR_FOLLOWING ||
512  window_frame->getBoundType() == SqlWindowFrameBoundType::EXPR_PRECEDING;
513  };
514  const auto order_col_ti = window_func->getOrderKeys().front()->get_type_info();
515  auto encode_date_col_val = [&order_col_ti, this](llvm::Value* bound_expr_lv) {
516  if (order_col_ti.get_comp_param() == 16) {
517  return cgen_state_->emitCall(
518  "fixed_width_date_encode_noinline",
519  {bound_expr_lv,
520  cgen_state_->castToTypeIn(cgen_state_->inlineIntNull(SQLTypeInfo(kSMALLINT)),
521  32),
522  cgen_state_->inlineIntNull(SQLTypeInfo(kBIGINT))});
523  } else {
524  return cgen_state_->emitCall("fixed_width_date_encode_noinline",
525  {bound_expr_lv,
526  cgen_state_->inlineIntNull(SQLTypeInfo(kINT)),
527  cgen_state_->inlineIntNull(SQLTypeInfo(kBIGINT))});
528  }
529  };
530  llvm::Value* bound_expr_lv{nullptr};
531  if (needs_bound_expr_codegen(frame_bound)) {
532  auto bound_expr = frame_bound->getBoundExpr();
533  if (auto dateadd_expr = dynamic_cast<const Analyzer::DateaddExpr*>(bound_expr)) {
534  if (dateadd_expr->get_datetime_expr()->get_type_info().is_encoded_timestamp()) {
535  dateadd_expr->set_fixed_encoding_null_val();
536  }
537  }
538  auto bound_expr_lvs = code_generator.codegen(bound_expr, true, co);
539  bound_expr_lv = bound_expr_lvs.front();
540  if (order_col_ti.is_date() && window_func->hasRangeModeFraming()) {
541  if (g_cluster) {
542  throw std::runtime_error(
543  "Range mode with date type ordering column is not supported yet.");
544  }
545  bound_expr_lv = encode_date_col_val(bound_expr_lv);
546  }
547  if (frame_bound->getBoundExpr()->get_type_info().get_size() != 8) {
548  bound_expr_lv = cgen_state_->castToTypeIn(bound_expr_lv, 64);
549  }
550  } else {
551  bound_expr_lv = cgen_state_->llInt((int64_t)-1);
552  }
553  CHECK(bound_expr_lv);
554  return bound_expr_lv;
555 }
bool hasRangeModeFraming() const
Definition: Analyzer.h:2604
HOST DEVICE int get_size() const
Definition: sqltypes.h:390
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1324
const std::vector< std::shared_ptr< Analyzer::Expr > > & getOrderKeys() const
Definition: Analyzer.h:2578
const Analyzer::Expr * getBoundExpr() const
Definition: Analyzer.h:2486
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:83
std::vector< llvm::Value * > codegen(const Analyzer::Expr *, const bool fetch_columns, const CompilationOptions &)
Definition: IRCodegen.cpp:30
#define CHECK(condition)
Definition: Logger.h:289
bool g_cluster
Definition: sqltypes.h:60

+ Here is the call graph for this function:

std::pair< llvm::Value *, llvm::Value * > Executor::codegenFrameBoundRange ( const Analyzer::WindowFunction window_func,
CodeGenerator code_generator,
const CompilationOptions co 
)
private

Definition at line 911 of file WindowFunctionIR.cpp.

References CHECK, Analyzer::WindowFunction::getFrameEndBound(), and Analyzer::WindowFunction::getFrameStartBound().

914  {
915  const auto frame_start_bound = window_func->getFrameStartBound();
916  const auto frame_end_bound = window_func->getFrameEndBound();
917  auto frame_start_bound_expr_lv =
918  codegenFrameBoundExpr(window_func, frame_start_bound, code_generator, co);
919  auto frame_end_bound_expr_lv =
920  codegenFrameBoundExpr(window_func, frame_end_bound, code_generator, co);
921  CHECK(frame_start_bound_expr_lv);
922  CHECK(frame_end_bound_expr_lv);
923  return std::make_pair(frame_start_bound_expr_lv, frame_end_bound_expr_lv);
924 }
const Analyzer::WindowFrame * getFrameStartBound() const
Definition: Analyzer.h:2582
const Analyzer::WindowFrame * getFrameEndBound() const
Definition: Analyzer.h:2589
llvm::Value * codegenFrameBoundExpr(const Analyzer::WindowFunction *window_func, const Analyzer::WindowFrame *frame_bound, CodeGenerator &code_generator, const CompilationOptions &co)
#define CHECK(condition)
Definition: Logger.h:289

+ Here is the call graph for this function:

std::pair< llvm::Value *, llvm::Value * > Executor::codegenFrameNullRange ( WindowFunctionContext window_func_context,
llvm::Value *  partition_index_lv 
) const
private

Definition at line 798 of file WindowFunctionIR.cpp.

References get_int_type(), WindowFunctionContext::getNullValueEndPos(), and WindowFunctionContext::getNullValueStartPos().

800  {
801  const auto pi64_type =
802  llvm::PointerType::get(get_int_type(64, cgen_state_->context_), 0);
803  const auto null_start_pos_buf = cgen_state_->llInt(
804  reinterpret_cast<int64_t>(window_func_context->getNullValueStartPos()));
805  const auto null_start_pos_buf_ptr =
806  cgen_state_->ir_builder_.CreateIntToPtr(null_start_pos_buf, pi64_type);
807  const auto null_start_pos_ptr =
808  cgen_state_->ir_builder_.CreateGEP(get_int_type(64, cgen_state_->context_),
809  null_start_pos_buf_ptr,
810  partition_index_lv);
811  auto null_start_pos_lv = cgen_state_->ir_builder_.CreateLoad(
812  null_start_pos_ptr->getType()->getPointerElementType(),
813  null_start_pos_ptr,
814  "null_start_pos");
815  const auto null_end_pos_buf = cgen_state_->llInt(
816  reinterpret_cast<int64_t>(window_func_context->getNullValueEndPos()));
817  const auto null_end_pos_buf_ptr =
818  cgen_state_->ir_builder_.CreateIntToPtr(null_end_pos_buf, pi64_type);
819  const auto null_end_pos_ptr = cgen_state_->ir_builder_.CreateGEP(
820  get_int_type(64, cgen_state_->context_), null_end_pos_buf_ptr, partition_index_lv);
821  auto null_end_pos_lv = cgen_state_->ir_builder_.CreateLoad(
822  null_end_pos_ptr->getType()->getPointerElementType(),
823  null_end_pos_ptr,
824  "null_end_pos");
825  return std::make_pair(null_start_pos_lv, null_end_pos_lv);
826 }
int64_t * getNullValueEndPos() const
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1324
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
int64_t * getNullValueStartPos() const

+ Here is the call graph for this function:

void Executor::codegenJoinLoops ( const std::vector< JoinLoop > &  join_loops,
const RelAlgExecutionUnit ra_exe_unit,
GroupByAndAggregate group_by_and_aggregate,
llvm::Function *  query_func,
llvm::BasicBlock *  entry_bb,
QueryMemoryDescriptor query_mem_desc,
const CompilationOptions co,
const ExecutionOptions eo 
)
private

Definition at line 1133 of file IRCodegen.cpp.

References ExecutionOptions::allow_runtime_query_interrupt, anonymous_namespace{QueryMemoryDescriptor.cpp}::any_of(), AUTOMATIC_IR_METADATA, CHECK, CHECK_EQ, JoinLoop::codegen(), CompilationOptions::device_type, JoinLoopDomain::element_count, get_int_array_type(), get_int_type(), INNER, MultiSet, CodeGenerator::posArg(), GroupByAndAggregate::query_infos_, query_mem_desc, Set, and ExecutionOptions::with_dynamic_watchdog.

1140  {
1142  const auto exit_bb =
1143  llvm::BasicBlock::Create(cgen_state_->context_, "exit", cgen_state_->current_func_);
1144  cgen_state_->ir_builder_.SetInsertPoint(exit_bb);
1145  cgen_state_->ir_builder_.CreateRet(cgen_state_->llInt<int32_t>(0));
1146  cgen_state_->ir_builder_.SetInsertPoint(entry_bb);
1147  CodeGenerator code_generator(this);
1148 
1149  llvm::BasicBlock* loops_entry_bb{nullptr};
1150  auto has_range_join =
1151  std::any_of(join_loops.begin(), join_loops.end(), [](const auto& join_loop) {
1152  return join_loop.kind() == JoinLoopKind::MultiSet;
1153  });
1154  if (has_range_join) {
1155  CHECK_EQ(join_loops.size(), size_t(1));
1156  const auto element_count =
1157  llvm::ConstantInt::get(get_int_type(64, cgen_state_->context_), 9);
1158 
1159  auto compute_packed_offset = [](const int32_t x, const int32_t y) -> uint64_t {
1160  const uint64_t y_shifted = static_cast<uint64_t>(y) << 32;
1161  return y_shifted | static_cast<uint32_t>(x);
1162  };
1163 
1164  const auto values_arr = std::vector<llvm::Constant*>{
1165  llvm::ConstantInt::get(get_int_type(64, cgen_state_->context_), 0),
1166  llvm::ConstantInt::get(get_int_type(64, cgen_state_->context_),
1167  compute_packed_offset(0, 1)),
1168  llvm::ConstantInt::get(get_int_type(64, cgen_state_->context_),
1169  compute_packed_offset(0, -1)),
1170  llvm::ConstantInt::get(get_int_type(64, cgen_state_->context_),
1171  compute_packed_offset(1, 0)),
1172  llvm::ConstantInt::get(get_int_type(64, cgen_state_->context_),
1173  compute_packed_offset(1, 1)),
1174  llvm::ConstantInt::get(get_int_type(64, cgen_state_->context_),
1175  compute_packed_offset(1, -1)),
1176  llvm::ConstantInt::get(get_int_type(64, cgen_state_->context_),
1177  compute_packed_offset(-1, 0)),
1178  llvm::ConstantInt::get(get_int_type(64, cgen_state_->context_),
1179  compute_packed_offset(-1, 1)),
1180  llvm::ConstantInt::get(get_int_type(64, cgen_state_->context_),
1181  compute_packed_offset(-1, -1))};
1182 
1183  const auto constant_values_array = llvm::ConstantArray::get(
1184  get_int_array_type(64, 9, cgen_state_->context_), values_arr);
1185  CHECK(cgen_state_->module_);
1186  const auto values =
1187  new llvm::GlobalVariable(*cgen_state_->module_,
1188  get_int_array_type(64, 9, cgen_state_->context_),
1189  true,
1190  llvm::GlobalValue::LinkageTypes::InternalLinkage,
1191  constant_values_array);
1192  JoinLoop join_loop(
1195  [element_count, values](const std::vector<llvm::Value*>& v) {
1196  JoinLoopDomain domain{{0}};
1197  domain.element_count = element_count;
1198  domain.values_buffer = values;
1199  return domain;
1200  },
1201  nullptr,
1202  nullptr,
1203  nullptr,
1204  nullptr,
1205  "range_key_loop");
1206 
1207  loops_entry_bb = JoinLoop::codegen(
1208  {join_loop},
1209  [this,
1210  query_func,
1211  &query_mem_desc,
1212  &co,
1213  &eo,
1214  &group_by_and_aggregate,
1215  &join_loops,
1216  &ra_exe_unit](const std::vector<llvm::Value*>& prev_iters) {
1217  auto& builder = cgen_state_->ir_builder_;
1218 
1219  auto body_exit_bb =
1220  llvm::BasicBlock::Create(cgen_state_->context_,
1221  "range_key_inner_body_exit",
1222  builder.GetInsertBlock()->getParent());
1223 
1224  auto range_key_body_bb =
1225  llvm::BasicBlock::Create(cgen_state_->context_,
1226  "range_key_loop_body",
1227  builder.GetInsertBlock()->getParent());
1228  builder.SetInsertPoint(range_key_body_bb);
1229 
1230  const auto body_loops_entry_bb = JoinLoop::codegen(
1231  join_loops,
1232  [this,
1233  query_func,
1234  &query_mem_desc,
1235  &co,
1236  &eo,
1237  &group_by_and_aggregate,
1238  &join_loops,
1239  &ra_exe_unit](const std::vector<llvm::Value*>& prev_iters) {
1240  addJoinLoopIterator(prev_iters, join_loops.size());
1241  auto& builder = cgen_state_->ir_builder_;
1242  const auto loop_body_bb =
1243  llvm::BasicBlock::Create(builder.getContext(),
1244  "loop_body",
1245  builder.GetInsertBlock()->getParent());
1246  builder.SetInsertPoint(loop_body_bb);
1247  const bool can_return_error =
1248  compileBody(ra_exe_unit, group_by_and_aggregate, query_mem_desc, co);
1249  if (can_return_error || cgen_state_->needs_error_check_ ||
1250  eo.with_dynamic_watchdog || eo.allow_runtime_query_interrupt) {
1251  createErrorCheckControlFlow(query_func,
1252  eo.with_dynamic_watchdog,
1253  eo.allow_runtime_query_interrupt,
1254  join_loops,
1255  co.device_type,
1256  group_by_and_aggregate.query_infos_);
1257  }
1258  return loop_body_bb;
1259  },
1260  prev_iters.back(),
1261  body_exit_bb,
1262  cgen_state_.get());
1263 
1264  builder.SetInsertPoint(range_key_body_bb);
1265  cgen_state_->ir_builder_.CreateBr(body_loops_entry_bb);
1266 
1267  builder.SetInsertPoint(body_exit_bb);
1268  return range_key_body_bb;
1269  },
1270  code_generator.posArg(nullptr),
1271  exit_bb,
1272  cgen_state_.get());
1273  } else {
1274  loops_entry_bb = JoinLoop::codegen(
1275  join_loops,
1276  /*body_codegen=*/
1277  [this,
1278  query_func,
1279  &query_mem_desc,
1280  &co,
1281  &eo,
1282  &group_by_and_aggregate,
1283  &join_loops,
1284  &ra_exe_unit](const std::vector<llvm::Value*>& prev_iters) {
1286  addJoinLoopIterator(prev_iters, join_loops.size());
1287  auto& builder = cgen_state_->ir_builder_;
1288  const auto loop_body_bb = llvm::BasicBlock::Create(
1289  builder.getContext(), "loop_body", builder.GetInsertBlock()->getParent());
1290  builder.SetInsertPoint(loop_body_bb);
1291  const bool can_return_error =
1292  compileBody(ra_exe_unit, group_by_and_aggregate, query_mem_desc, co);
1293  if (can_return_error || cgen_state_->needs_error_check_ ||
1294  eo.with_dynamic_watchdog || eo.allow_runtime_query_interrupt) {
1295  createErrorCheckControlFlow(query_func,
1296  eo.with_dynamic_watchdog,
1297  eo.allow_runtime_query_interrupt,
1298  join_loops,
1299  co.device_type,
1300  group_by_and_aggregate.query_infos_);
1301  }
1302  return loop_body_bb;
1303  },
1304  /*outer_iter=*/code_generator.posArg(nullptr),
1305  exit_bb,
1306  cgen_state_.get());
1307  }
1308  CHECK(loops_entry_bb);
1309  cgen_state_->ir_builder_.SetInsertPoint(entry_bb);
1310  cgen_state_->ir_builder_.CreateBr(loops_entry_bb);
1311 }
void createErrorCheckControlFlow(llvm::Function *query_func, bool run_with_dynamic_watchdog, bool run_with_allowing_runtime_interrupt, const std::vector< JoinLoop > &join_loops, ExecutorDeviceType device_type, const std::vector< InputTableInfo > &input_table_infos)
#define CHECK_EQ(x, y)
Definition: Logger.h:297
llvm::Value * element_count
Definition: JoinLoop.h:46
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1324
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
bool compileBody(const RelAlgExecutionUnit &ra_exe_unit, GroupByAndAggregate &group_by_and_aggregate, QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const GpuSharedMemoryContext &gpu_smem_context={})
static llvm::BasicBlock * codegen(const std::vector< JoinLoop > &join_loops, const std::function< llvm::BasicBlock *(const std::vector< llvm::Value * > &)> &body_codegen, llvm::Value *outer_iter, llvm::BasicBlock *exit_bb, CgenState *cgen_state)
Definition: JoinLoop.cpp:50
#define AUTOMATIC_IR_METADATA(CGENSTATE)
llvm::Value * addJoinLoopIterator(const std::vector< llvm::Value * > &prev_iters, const size_t level_idx)
Definition: IRCodegen.cpp:1115
#define CHECK(condition)
Definition: Logger.h:289
bool any_of(std::vector< Analyzer::Expr * > const &target_exprs)
llvm::ArrayType * get_int_array_type(int const width, int count, llvm::LLVMContext &context)

+ Here is the call graph for this function:

llvm::Value * Executor::codegenLoadCurrentValueFromColBuf ( WindowFunctionContext window_func_context,
CodeGenerator code_generator,
WindowFrameBoundFuncArgs args 
) const
private

Definition at line 661 of file WindowFunctionIR.cpp.

References CHECK, CodeGenerator::codegenWindowPosition(), WindowFrameBoundFuncArgs::current_row_pos_lv, get_fp_type(), get_int_type(), Analyzer::WindowFunction::getOrderKeys(), WindowFunctionContext::getWindowFunction(), Analyzer::WindowFunction::isFrameNavigateWindowFunction(), and WindowFrameBoundFuncArgs::order_key_buf_ptr_lv.

664  {
665  llvm::Value* current_col_value_ptr_lv{nullptr};
666  const auto order_key_size_in_byte = getOrderKeySize(window_func_context) * 8;
667  auto const order_key_ptr =
668  window_func_context->getWindowFunction()->getOrderKeys().front();
669  CHECK(order_key_ptr);
670  auto const order_col_ti = order_key_ptr->get_type_info();
671  auto const order_col_llvm_type =
672  order_col_ti.is_fp() ? get_fp_type(order_key_size_in_byte, cgen_state_->context_)
673  : get_int_type(order_key_size_in_byte, cgen_state_->context_);
674  if (!window_func_context->getWindowFunction()->isFrameNavigateWindowFunction()) {
675  auto rowid_in_partition_lv = code_generator.codegenWindowPosition(
676  window_func_context, args.current_row_pos_lv);
677  current_col_value_ptr_lv = cgen_state_->ir_builder_.CreateGEP(
678  order_col_llvm_type, args.order_key_buf_ptr_lv, rowid_in_partition_lv);
679  } else {
680  current_col_value_ptr_lv = cgen_state_->ir_builder_.CreateGEP(
681  order_col_llvm_type, args.order_key_buf_ptr_lv, args.current_row_pos_lv);
682  }
683  return cgen_state_->ir_builder_.CreateLoad(
684  current_col_value_ptr_lv->getType()->getPointerElementType(),
685  current_col_value_ptr_lv,
686  "current_col_value");
687 }
bool isFrameNavigateWindowFunction() const
Definition: Analyzer.h:2624
llvm::Value * current_row_pos_lv
Definition: WindowContext.h:89
llvm::Type * get_fp_type(const int width, llvm::LLVMContext &context)
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1324
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
const std::vector< std::shared_ptr< Analyzer::Expr > > & getOrderKeys() const
Definition: Analyzer.h:2578
llvm::Value * codegenWindowPosition(const WindowFunctionContext *window_func_context, llvm::Value *pos_arg)
Definition: ColumnIR.cpp:230
std::unique_ptr< llvm::LLVMContext > context_
Definition: Execute.h:1299
llvm::Value * order_key_buf_ptr_lv
Definition: WindowContext.h:95
#define CHECK(condition)
Definition: Logger.h:289
const Analyzer::WindowFunction * getWindowFunction() const
size_t getOrderKeySize(WindowFunctionContext *window_func_context) const

+ Here is the call graph for this function:

std::pair< std::string, llvm::Value * > Executor::codegenLoadOrderKeyBufPtr ( WindowFunctionContext window_func_context) const
private

Definition at line 828 of file WindowFunctionIR.cpp.

References anonymous_namespace{WindowFunctionIR.cpp}::get_col_type_name_by_size(), get_fp_type(), get_int_type(), WindowFunctionContext::getOrderKeyColumnBuffers(), WindowFunctionContext::getOrderKeyColumnBufferTypes(), Analyzer::WindowFunction::getOrderKeys(), and WindowFunctionContext::getWindowFunction().

829  {
830  auto const order_key_ti =
831  window_func_context->getWindowFunction()->getOrderKeys().front()->get_type_info();
832  auto const order_key_size = order_key_ti.get_size();
833  auto const order_col_type_name = get_col_type_name_by_size(
834  order_key_size,
835  window_func_context->getOrderKeyColumnBufferTypes().front().is_fp());
836  size_t order_key_size_in_byte = order_key_size * 8;
837  auto const order_key_type =
838  order_key_ti.is_fp() ? get_fp_type(order_key_size_in_byte, cgen_state_->context_)
839  : get_int_type(order_key_size_in_byte, cgen_state_->context_);
840  auto const order_key_buf_type = llvm::PointerType::get(order_key_type, 0);
841  auto const order_key_buf = cgen_state_->llInt(
842  reinterpret_cast<int64_t>(window_func_context->getOrderKeyColumnBuffers().front()));
843  auto const order_key_buf_ptr_lv =
844  cgen_state_->ir_builder_.CreateIntToPtr(order_key_buf, order_key_buf_type);
845  return std::make_pair(order_col_type_name, order_key_buf_ptr_lv);
846 }
std::string get_col_type_name_by_size(const size_t size, const bool is_fp)
const std::vector< SQLTypeInfo > & getOrderKeyColumnBufferTypes() const
llvm::Type * get_fp_type(const int width, llvm::LLVMContext &context)
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1324
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
const std::vector< std::shared_ptr< Analyzer::Expr > > & getOrderKeys() const
Definition: Analyzer.h:2578
const std::vector< const int8_t * > & getOrderKeyColumnBuffers() const
std::unique_ptr< llvm::LLVMContext > context_
Definition: Execute.h:1299
const Analyzer::WindowFunction * getWindowFunction() const

+ Here is the call graph for this function:

WindowPartitionBufferPtrs Executor::codegenLoadPartitionBuffers ( WindowFunctionContext window_func_context,
llvm::Value *  partition_index_lv 
) const
private

Definition at line 848 of file WindowFunctionIR.cpp.

References WindowFunctionContext::counts(), WindowPartitionBufferPtrs::current_partition_start_offset_lv, get_int_type(), WindowPartitionBufferPtrs::num_elem_current_partition_lv, WindowFunctionContext::partitionStartOffset(), WindowFunctionContext::payload(), WindowFunctionContext::sortedPartition(), WindowPartitionBufferPtrs::target_partition_rowid_ptr_lv, and WindowPartitionBufferPtrs::target_partition_sorted_rowid_ptr_lv.

850  {
851  WindowPartitionBufferPtrs bufferPtrs;
852  const auto pi64_type =
853  llvm::PointerType::get(get_int_type(64, cgen_state_->context_), 0);
854  const auto pi32_type =
855  llvm::PointerType::get(get_int_type(32, cgen_state_->context_), 0);
856 
857  // partial sum of # elems of partitions
858  auto partition_start_offset_buf_lv = cgen_state_->llInt(
859  reinterpret_cast<int64_t>(window_func_context->partitionStartOffset()));
860  auto partition_start_offset_ptr_lv =
861  cgen_state_->ir_builder_.CreateIntToPtr(partition_start_offset_buf_lv, pi64_type);
862 
863  // get start offset of the current partition
864  auto current_partition_start_offset_ptr_lv =
865  cgen_state_->ir_builder_.CreateGEP(get_int_type(64, cgen_state_->context_),
866  partition_start_offset_ptr_lv,
867  partition_index_lv);
868  bufferPtrs.current_partition_start_offset_lv = cgen_state_->ir_builder_.CreateLoad(
869  current_partition_start_offset_ptr_lv->getType()->getPointerElementType(),
870  current_partition_start_offset_ptr_lv);
871 
872  // row_id buf of the current partition
873  const auto partition_rowid_buf_lv =
874  cgen_state_->llInt(reinterpret_cast<int64_t>(window_func_context->payload()));
875  const auto partition_rowid_ptr_lv =
876  cgen_state_->ir_builder_.CreateIntToPtr(partition_rowid_buf_lv, pi32_type);
877  bufferPtrs.target_partition_rowid_ptr_lv =
878  cgen_state_->ir_builder_.CreateGEP(get_int_type(32, cgen_state_->context_),
879  partition_rowid_ptr_lv,
881 
882  // row_id buf of ordered current partition
883  const auto sorted_rowid_lv = cgen_state_->llInt(
884  reinterpret_cast<int64_t>(window_func_context->sortedPartition()));
885  const auto sorted_rowid_ptr_lv =
886  cgen_state_->ir_builder_.CreateIntToPtr(sorted_rowid_lv, pi64_type);
888  cgen_state_->ir_builder_.CreateGEP(get_int_type(64, cgen_state_->context_),
889  sorted_rowid_ptr_lv,
891 
892  // # elems per partition
893  const auto partition_count_buf =
894  cgen_state_->llInt(reinterpret_cast<int64_t>(window_func_context->counts()));
895  auto partition_count_buf_ptr_lv =
896  cgen_state_->ir_builder_.CreateIntToPtr(partition_count_buf, pi32_type);
897 
898  // # elems of the given partition
899  const auto num_elem_current_partition_ptr =
900  cgen_state_->ir_builder_.CreateGEP(get_int_type(32, cgen_state_->context_),
901  partition_count_buf_ptr_lv,
902  partition_index_lv);
903  bufferPtrs.num_elem_current_partition_lv = cgen_state_->castToTypeIn(
904  cgen_state_->ir_builder_.CreateLoad(
905  num_elem_current_partition_ptr->getType()->getPointerElementType(),
906  num_elem_current_partition_ptr),
907  64);
908  return bufferPtrs;
909 }
llvm::Value * current_partition_start_offset_lv
llvm::Value * num_elem_current_partition_lv
const int32_t * counts() const
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1324
llvm::Value * target_partition_sorted_rowid_ptr_lv
llvm::Value * target_partition_rowid_ptr_lv
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
const int64_t * partitionStartOffset() const
const int64_t * sortedPartition() const
const int32_t * payload() const

+ Here is the call graph for this function:

llvm::BasicBlock * Executor::codegenSkipDeletedOuterTableRow ( const RelAlgExecutionUnit ra_exe_unit,
const CompilationOptions co 
)
private

Definition at line 3218 of file NativeCodegen.cpp.

3220  {
3222  if (!co.filter_on_deleted_column) {
3223  return nullptr;
3224  }
3225  CHECK(!ra_exe_unit.input_descs.empty());
3226  const auto& outer_input_desc = ra_exe_unit.input_descs[0];
3227  if (outer_input_desc.getSourceType() != InputSourceType::TABLE) {
3228  return nullptr;
3229  }
3230  const auto deleted_cd =
3231  plan_state_->getDeletedColForTable(outer_input_desc.getTableId());
3232  if (!deleted_cd) {
3233  return nullptr;
3234  }
3235  CHECK(deleted_cd->columnType.is_boolean());
3236  const auto deleted_expr =
3237  makeExpr<Analyzer::ColumnVar>(deleted_cd->columnType,
3238  outer_input_desc.getTableId(),
3239  deleted_cd->columnId,
3240  outer_input_desc.getNestLevel());
3241  CodeGenerator code_generator(this);
3242  const auto is_deleted =
3243  code_generator.toBool(code_generator.codegen(deleted_expr.get(), true, co).front());
3244  const auto is_deleted_bb = llvm::BasicBlock::Create(
3245  cgen_state_->context_, "is_deleted", cgen_state_->row_func_);
3246  llvm::BasicBlock* bb = llvm::BasicBlock::Create(
3247  cgen_state_->context_, "is_not_deleted", cgen_state_->row_func_);
3248  cgen_state_->ir_builder_.CreateCondBr(is_deleted, is_deleted_bb, bb);
3249  cgen_state_->ir_builder_.SetInsertPoint(is_deleted_bb);
3250  cgen_state_->ir_builder_.CreateRet(cgen_state_->llInt<int32_t>(0));
3251  cgen_state_->ir_builder_.SetInsertPoint(bb);
3252  return bb;
3253 }
std::vector< InputDescriptor > input_descs
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1324
std::unique_ptr< PlanState > plan_state_
Definition: Execute.h:1354
#define AUTOMATIC_IR_METADATA(CGENSTATE)
#define CHECK(condition)
Definition: Logger.h:289
void Executor::codegenWindowAvgEpilogue ( llvm::Value *  crt_val,
llvm::Value *  window_func_null_val 
)
private

Definition at line 1287 of file WindowFunctionIR.cpp.

References AUTOMATIC_IR_METADATA, anonymous_namespace{WindowFunctionIR.cpp}::get_adjusted_window_type_info(), get_int_type(), WindowProjectNodeContext::getActiveWindowFunctionContext(), kDOUBLE, and kFLOAT.

1288  {
1290  const auto window_func_context =
1292  const auto window_func = window_func_context->getWindowFunction();
1293  const auto window_func_ti = get_adjusted_window_type_info(window_func);
1294  const auto pi32_type =
1295  llvm::PointerType::get(get_int_type(32, cgen_state_->context_), 0);
1296  const auto pi64_type =
1297  llvm::PointerType::get(get_int_type(64, cgen_state_->context_), 0);
1298  const auto aggregate_state_type =
1299  window_func_ti.get_type() == kFLOAT ? pi32_type : pi64_type;
1300  const auto aggregate_state_count_i64 = cgen_state_->llInt(
1301  reinterpret_cast<const int64_t>(window_func_context->aggregateStateCount()));
1302  auto aggregate_state_count = cgen_state_->ir_builder_.CreateIntToPtr(
1303  aggregate_state_count_i64, aggregate_state_type);
1304  std::string agg_count_func_name = "agg_count";
1305  switch (window_func_ti.get_type()) {
1306  case kFLOAT: {
1307  agg_count_func_name += "_float";
1308  break;
1309  }
1310  case kDOUBLE: {
1311  agg_count_func_name += "_double";
1312  break;
1313  }
1314  default: {
1315  break;
1316  }
1317  }
1318  agg_count_func_name += "_skip_val";
1319  cgen_state_->emitCall(agg_count_func_name,
1320  {aggregate_state_count, crt_val, window_func_null_val});
1321 }
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1324
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
static WindowFunctionContext * getActiveWindowFunctionContext(Executor *executor)
#define AUTOMATIC_IR_METADATA(CGENSTATE)
SQLTypeInfo get_adjusted_window_type_info(const Analyzer::WindowFunction *window_func)

+ Here is the call graph for this function:

std::pair< llvm::Value *, llvm::Value * > Executor::codegenWindowFrameBounds ( WindowFunctionContext window_func_context,
const Analyzer::WindowFrame frame_start_bound,
const Analyzer::WindowFrame frame_end_bound,
llvm::Value *  order_key_col_null_val_lv,
WindowFrameBoundFuncArgs args,
CodeGenerator code_generator 
)
private

Definition at line 926 of file WindowFunctionIR.cpp.

References run_benchmark_import::args, CHECK, WindowFrameBoundFuncArgs::current_col_value_lv, WindowFunctionContext::getOrderKeyColumnBuffers(), WindowFunctionContext::getWindowFunction(), Analyzer::WindowFrame::hasTimestampTypeFrameBound(), and WindowFrameBoundFuncArgs::order_type_col_name.

932  {
933  const auto window_func = window_func_context->getWindowFunction();
934  CHECK(window_func);
935  const auto is_timestamp_type_frame = frame_start_bound->hasTimestampTypeFrameBound() ||
936  frame_end_bound->hasTimestampTypeFrameBound();
937 
938  if (window_func->hasRangeModeFraming()) {
939  CHECK(window_func_context->getOrderKeyColumnBuffers().size() == 1);
940  CHECK(window_func->getOrderKeys().size() == 1UL);
941  CHECK(window_func_context->getOrderKeyColumnBuffers().size() == 1UL);
942  args.order_type_col_name = getOrderKeyTypeName(window_func_context);
943  args.current_col_value_lv =
944  codegenLoadCurrentValueFromColBuf(window_func_context, code_generator, args);
945  }
946 
947  auto get_order_key_null_val = [is_timestamp_type_frame,
948  &order_key_col_null_val_lv,
949  this](const Analyzer::WindowFrame* frame_bound) {
950  return is_timestamp_type_frame && !frame_bound->isCurrentRowBound()
951  ? cgen_state_->castToTypeIn(order_key_col_null_val_lv, 64)
952  : order_key_col_null_val_lv;
953  };
954  auto frame_start_bound_lv =
955  codegenFrameBound(true,
956  window_func->hasRangeModeFraming(),
957  window_func->isFrameNavigateWindowFunction(),
958  frame_start_bound,
959  is_timestamp_type_frame,
960  get_order_key_null_val(frame_start_bound),
961  args);
962  auto frame_end_bound_lv =
963  codegenFrameBound(false,
964  window_func->hasRangeModeFraming(),
965  window_func->isFrameNavigateWindowFunction(),
966  frame_end_bound,
967  is_timestamp_type_frame,
968  get_order_key_null_val(frame_end_bound),
969  args);
970  CHECK(frame_start_bound_lv);
971  CHECK(frame_end_bound_lv);
972  return std::make_pair(frame_start_bound_lv, frame_end_bound_lv);
973 }
bool hasTimestampTypeFrameBound() const
Definition: Analyzer.h:2491
llvm::Value * current_col_value_lv
Definition: WindowContext.h:90
llvm::Value * codegenFrameBound(bool for_start_bound, bool for_range_mode, bool for_window_frame_naviation, const Analyzer::WindowFrame *frame_bound, bool is_timestamp_type_frame, llvm::Value *order_key_null_val, const WindowFrameBoundFuncArgs &args)
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1324
llvm::Value * codegenLoadCurrentValueFromColBuf(WindowFunctionContext *window_func_context, CodeGenerator &code_generator, WindowFrameBoundFuncArgs &args) const
const std::string getOrderKeyTypeName(WindowFunctionContext *window_func_context) const
const std::vector< const int8_t * > & getOrderKeyColumnBuffers() const
#define CHECK(condition)
Definition: Logger.h:289
const Analyzer::WindowFunction * getWindowFunction() const
std::string order_type_col_name
Definition: WindowContext.h:96

+ Here is the call graph for this function:

llvm::Value * Executor::codegenWindowFunction ( const size_t  target_index,
const CompilationOptions co 
)
private

Definition at line 21 of file WindowFunctionIR.cpp.

References WindowProjectNodeContext::activateWindowFunctionContext(), run_benchmark_import::args, AUTOMATIC_IR_METADATA, AVG, CHECK, CHECK_EQ, COUNT, COUNT_IF, CUME_DIST, DENSE_RANK, logger::FATAL, FIRST_VALUE, WindowProjectNodeContext::get(), WindowFunctionContext::getWindowFunction(), LAG, LAG_IN_FRAME, LAST_VALUE, LEAD, LEAD_IN_FRAME, LOG, MAX, MIN, NTH_VALUE, NTILE, PERCENT_RANK, RANK, ROW_NUMBER, SUM, and SUM_IF.

22  {
24  CodeGenerator code_generator(this);
25 
26  const auto window_func_context =
28  target_index);
29  const auto window_func = window_func_context->getWindowFunction();
30  switch (window_func->getKind()) {
35  // they are always evaluated on the entire partition
36  return code_generator.codegenWindowPosition(window_func_context,
37  code_generator.posArg(nullptr));
38  }
41  // they are always evaluated on the entire partition
42  return cgen_state_->emitCall("percent_window_func",
43  {cgen_state_->llInt(reinterpret_cast<const int64_t>(
44  window_func_context->output())),
45  code_generator.posArg(nullptr)});
46  }
52  // they are always evaluated on the current frame
54  const auto& args = window_func->getArgs();
55  CHECK(!args.empty());
56  const auto arg_lvs = code_generator.codegen(args.front().get(), true, co);
57  CHECK_EQ(arg_lvs.size(), size_t(1));
58  return arg_lvs.front();
59  }
67  // they are always evaluated on the current frame
69  }
73  }
74  default: {
75  LOG(FATAL) << "Invalid window function kind";
76  }
77  }
78  return nullptr;
79 }
#define CHECK_EQ(x, y)
Definition: Logger.h:297
#define LOG(tag)
Definition: Logger.h:283
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1324
static const WindowProjectNodeContext * get(Executor *executor)
const WindowFunctionContext * activateWindowFunctionContext(Executor *executor, const size_t target_index) const
#define AUTOMATIC_IR_METADATA(CGENSTATE)
llvm::Value * codegenWindowNavigationFunctionOnFrame(const CompilationOptions &co)
llvm::Value * codegenWindowFunctionAggregate(const CompilationOptions &co)
#define CHECK(condition)
Definition: Logger.h:289
const Analyzer::WindowFunction * getWindowFunction() const

+ Here is the call graph for this function:

llvm::Value * Executor::codegenWindowFunctionAggregate ( const CompilationOptions co)
private

Definition at line 241 of file WindowFunctionIR.cpp.

References AUTOMATIC_IR_METADATA, AVG, CHECK, WindowProjectNodeContext::get(), get_int_type(), and WindowProjectNodeContext::getActiveWindowFunctionContext().

241  {
243  const auto reset_state_false_bb = codegenWindowResetStateControlFlow();
244  auto aggregate_state = aggregateWindowStatePtr();
245  llvm::Value* aggregate_state_count = nullptr;
246  const auto window_func_context =
248  const auto window_func = window_func_context->getWindowFunction();
249  if (window_func->getKind() == SqlWindowFunctionKind::AVG) {
250  const auto aggregate_state_count_i64 = cgen_state_->llInt(
251  reinterpret_cast<const int64_t>(window_func_context->aggregateStateCount()));
252  const auto pi64_type =
253  llvm::PointerType::get(get_int_type(64, cgen_state_->context_), 0);
254  aggregate_state_count =
255  cgen_state_->ir_builder_.CreateIntToPtr(aggregate_state_count_i64, pi64_type);
256  }
257  codegenWindowFunctionStateInit(aggregate_state);
258  if (window_func->getKind() == SqlWindowFunctionKind::AVG) {
259  const auto count_zero = cgen_state_->llInt(int64_t(0));
260  cgen_state_->emitCall("agg_id", {aggregate_state_count, count_zero});
261  }
262  cgen_state_->ir_builder_.CreateBr(reset_state_false_bb);
263  cgen_state_->ir_builder_.SetInsertPoint(reset_state_false_bb);
265  return codegenWindowFunctionAggregateCalls(aggregate_state, co);
266 }
llvm::Value * aggregateWindowStatePtr()
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1324
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
static WindowFunctionContext * getActiveWindowFunctionContext(Executor *executor)
static const WindowProjectNodeContext * get(Executor *executor)
#define AUTOMATIC_IR_METADATA(CGENSTATE)
void codegenWindowFunctionStateInit(llvm::Value *aggregate_state)
#define CHECK(condition)
Definition: Logger.h:289
llvm::Value * codegenWindowFunctionAggregateCalls(llvm::Value *aggregate_state, const CompilationOptions &co)
llvm::BasicBlock * codegenWindowResetStateControlFlow()

+ Here is the call graph for this function:

llvm::Value * Executor::codegenWindowFunctionAggregateCalls ( llvm::Value *  aggregate_state,
const CompilationOptions co 
)
private

Definition at line 975 of file WindowFunctionIR.cpp.

References run_benchmark_import::args, AUTOMATIC_IR_METADATA, AVG, CHECK, CHECK_EQ, CodeGenerator::codegen(), CodeGenerator::codegenCastBetweenIntTypes(), COUNT, COUNT_IF, WindowFrameBoundFuncArgs::current_partition_start_offset_lv, anonymous_namespace{WindowFunctionIR.cpp}::get_adjusted_window_type_info(), anonymous_namespace{WindowFunctionIR.cpp}::get_col_type_name_by_size(), get_int_type(), anonymous_namespace{WindowFunctionIR.cpp}::get_window_agg_name(), WindowProjectNodeContext::getActiveWindowFunctionContext(), inline_fixed_encoding_null_val(), kDATE, kDOUBLE, kENCODING_DATE_IN_DAYS, kENCODING_FIXED, kFLOAT, kSUM_IF, kTIME, kTIMESTAMP, kTINYINT, MAX, MIN, CodeGenerator::posArg(), SUM, SUM_IF, and window_function_conditional_aggregate().

976  {
978  const auto window_func_context =
980  const auto window_func = window_func_context->getWindowFunction();
981  const auto window_func_ti = get_adjusted_window_type_info(window_func);
982  const auto window_func_null_val =
983  window_func_ti.is_fp()
984  ? cgen_state_->inlineFpNull(window_func_ti)
985  : cgen_state_->castToTypeIn(cgen_state_->inlineIntNull(window_func_ti), 64);
986  if (window_func_context->elementCount() == 0) {
987  // we do not need to generate a code for an empty input table
988  return window_func_null_val;
989  }
990  const auto& args = window_func->getArgs();
991  CodeGenerator code_generator(this);
992  if (window_func_context->needsToBuildAggregateTree()) {
993  // compute an aggregated value for each row of the window frame by using segment tree
994  // when constructing a window context, we build a necessary segment tree (so called
995  // `aggregate tree`) to query the aggregated value of the specific window frame
996  const auto pi64_type =
997  llvm::PointerType::get(get_int_type(64, cgen_state_->context_), 0);
998  const auto ppi64_type = llvm::PointerType::get(
999  llvm::PointerType::get(get_int_type(64, cgen_state_->context_), 0), 0);
1000 
1001  auto [frame_start_bound_expr_lv, frame_end_bound_expr_lv] =
1002  codegenFrameBoundRange(window_func, code_generator, co);
1003 
1004  // compute aggregated value over the computed frame range
1005  auto current_row_pos_lv = code_generator.posArg(nullptr);
1006  auto partition_index_lv =
1007  codegenCurrentPartitionIndex(window_func_context, current_row_pos_lv);
1008 
1009  // ordering column buffer
1010  const auto target_col_ti = args.front()->get_type_info();
1011  const auto target_col_size = target_col_ti.get_size();
1012  const auto col_type_name =
1013  get_col_type_name_by_size(target_col_size, target_col_ti.is_fp());
1014 
1015  const auto partition_buf_ptrs =
1016  codegenLoadPartitionBuffers(window_func_context, partition_index_lv);
1017 
1018  auto [order_col_type_name, order_key_buf_ptr_lv] =
1019  codegenLoadOrderKeyBufPtr(window_func_context);
1020 
1021  // null value of the ordering column
1022  const auto order_key_buf_ti =
1023  window_func_context->getOrderKeyColumnBufferTypes().front();
1024  auto const ordering_spec = window_func->getCollation().front();
1025  llvm::Value* order_key_col_null_val_lv{nullptr};
1026  switch (order_key_buf_ti.get_type()) {
1027  case kDATE:
1028  case kTIMESTAMP:
1029  case kTIME: {
1030  if (order_key_buf_ti.get_compression() == kENCODING_FIXED ||
1031  order_key_buf_ti.get_compression() == kENCODING_DATE_IN_DAYS) {
1032  auto null_val = inline_fixed_encoding_null_val(order_key_buf_ti);
1033  order_key_col_null_val_lv = cgen_state_->llInt((int32_t)null_val);
1034  break;
1035  }
1036  }
1037  default: {
1038  order_key_col_null_val_lv = cgen_state_->inlineNull(order_key_buf_ti);
1039  break;
1040  }
1041  }
1042 
1043  auto [null_start_pos_lv, null_end_pos_lv] =
1044  codegenFrameNullRange(window_func_context, partition_index_lv);
1045  auto nulls_first_lv = cgen_state_->llBool(ordering_spec.nulls_first);
1046 
1048  frame_start_bound_expr_lv,
1049  frame_end_bound_expr_lv,
1050  current_row_pos_lv,
1051  nullptr,
1052  partition_buf_ptrs.current_partition_start_offset_lv,
1053  cgen_state_->llInt((int64_t)0),
1054  cgen_state_->llInt((int64_t)1),
1055  partition_buf_ptrs.num_elem_current_partition_lv,
1056  order_key_buf_ptr_lv,
1057  "",
1058  partition_buf_ptrs.target_partition_rowid_ptr_lv,
1059  partition_buf_ptrs.target_partition_sorted_rowid_ptr_lv,
1060  nulls_first_lv,
1061  null_start_pos_lv,
1062  null_end_pos_lv};
1063  auto [frame_start_bound_lv, frame_end_bound_lv] =
1064  codegenWindowFrameBounds(window_func_context,
1065  window_func->getFrameStartBound(),
1066  window_func->getFrameEndBound(),
1067  order_key_col_null_val_lv,
1069  code_generator);
1070 
1071  // codegen to send a query with frame bound to aggregate tree searcher
1072  llvm::Value* aggregation_trees_lv{nullptr};
1073  llvm::Value* invalid_val_lv{nullptr};
1074  llvm::Value* null_val_lv{nullptr};
1075  std::string aggregation_tree_search_func_name{"search_"};
1076  std::string aggregation_tree_getter_func_name{"get_"};
1077 
1078  // prepare null values and aggregate_tree getter and searcher depending on
1079  // a type of the ordering column
1080  auto agg_expr_ti = args.front()->get_type_info();
1081  if (agg_expr_ti.is_fp()) {
1082  if (window_func->getKind() == SqlWindowFunctionKind::MIN) {
1083  invalid_val_lv = cgen_state_->llFp(std::numeric_limits<double>::max());
1084  } else if (window_func->getKind() == SqlWindowFunctionKind::MAX) {
1085  invalid_val_lv = cgen_state_->llFp(std::numeric_limits<double>::lowest());
1086  } else {
1087  invalid_val_lv = cgen_state_->llFp((double)0);
1088  }
1089  null_val_lv = cgen_state_->inlineFpNull(SQLTypeInfo(kDOUBLE));
1090  aggregation_tree_search_func_name += "double";
1091  aggregation_tree_getter_func_name += "double";
1092  } else {
1093  if (window_func->getKind() == SqlWindowFunctionKind::MIN) {
1094  invalid_val_lv = cgen_state_->llInt(std::numeric_limits<int64_t>::max());
1095  } else if (window_func->getKind() == SqlWindowFunctionKind::MAX) {
1096  invalid_val_lv = cgen_state_->llInt(std::numeric_limits<int64_t>::lowest());
1097  } else {
1098  invalid_val_lv = cgen_state_->llInt((int64_t)0);
1099  }
1100  null_val_lv = cgen_state_->llInt(inline_int_null_value<int64_t>());
1101  aggregation_tree_search_func_name += "int64_t";
1102  aggregation_tree_getter_func_name += "integer";
1103  }
1104 
1105  // derived aggregation has a different code path
1106  if (window_func->getKind() == SqlWindowFunctionKind::AVG) {
1107  aggregation_tree_search_func_name += "_derived";
1108  aggregation_tree_getter_func_name += "_derived";
1109  }
1110 
1111  // get a buffer holding aggregate trees for each partition
1112  if (agg_expr_ti.is_fp()) {
1113  if (window_func->getKind() == SqlWindowFunctionKind::AVG) {
1114  aggregation_trees_lv = cgen_state_->llInt(reinterpret_cast<int64_t>(
1115  window_func_context->getDerivedAggregationTreesForDoubleTypeWindowExpr()));
1116  } else {
1117  aggregation_trees_lv = cgen_state_->llInt(reinterpret_cast<int64_t>(
1118  window_func_context->getAggregationTreesForDoubleTypeWindowExpr()));
1119  }
1120  } else {
1121  if (window_func->getKind() == SqlWindowFunctionKind::AVG) {
1122  aggregation_trees_lv = cgen_state_->llInt(reinterpret_cast<int64_t>(
1123  window_func_context->getDerivedAggregationTreesForIntegerTypeWindowExpr()));
1124  } else {
1125  aggregation_trees_lv = cgen_state_->llInt(reinterpret_cast<int64_t>(
1126  window_func_context->getAggregationTreesForIntegerTypeWindowExpr()));
1127  }
1128  }
1129 
1130  CHECK(aggregation_trees_lv);
1131  CHECK(invalid_val_lv);
1132  aggregation_tree_search_func_name += "_aggregation_tree";
1133  aggregation_tree_getter_func_name += "_aggregation_tree";
1134 
1135  // get the aggregate tree of the current partition from a window context
1136  auto aggregation_trees_ptr =
1137  cgen_state_->ir_builder_.CreateIntToPtr(aggregation_trees_lv, ppi64_type);
1138  auto target_aggregation_tree_lv = cgen_state_->emitCall(
1139  aggregation_tree_getter_func_name, {aggregation_trees_ptr, partition_index_lv});
1140 
1141  // a depth of segment tree
1142  const auto tree_depth_buf = cgen_state_->llInt(
1143  reinterpret_cast<int64_t>(window_func_context->getAggregateTreeDepth()));
1144  const auto tree_depth_buf_ptr =
1145  cgen_state_->ir_builder_.CreateIntToPtr(tree_depth_buf, pi64_type);
1146  const auto current_partition_tree_depth_buf_ptr = cgen_state_->ir_builder_.CreateGEP(
1147  get_int_type(64, cgen_state_->context_), tree_depth_buf_ptr, partition_index_lv);
1148  const auto current_partition_tree_depth_lv = cgen_state_->ir_builder_.CreateLoad(
1149  current_partition_tree_depth_buf_ptr->getType()->getPointerElementType(),
1150  current_partition_tree_depth_buf_ptr);
1151 
1152  // a fanout of the current partition's segment tree
1153  const auto aggregation_tree_fanout_lv = cgen_state_->llInt(
1154  static_cast<int64_t>(window_func_context->getAggregateTreeFanout()));
1155 
1156  // agg_type
1157  const auto agg_type_lv =
1158  cgen_state_->llInt(static_cast<int32_t>(window_func->getKind()));
1159 
1160  // send a query to the aggregate tree with the frame range:
1161  // `frame_start_bound_lv` ~ `frame_end_bound_lv`
1162  auto res_lv =
1163  cgen_state_->emitCall(aggregation_tree_search_func_name,
1164  {target_aggregation_tree_lv,
1165  frame_start_bound_lv,
1166  frame_end_bound_lv,
1167  current_partition_tree_depth_lv,
1168  aggregation_tree_fanout_lv,
1169  cgen_state_->llBool(agg_expr_ti.is_decimal()),
1170  cgen_state_->llInt((int64_t)agg_expr_ti.get_scale()),
1171  invalid_val_lv,
1172  null_val_lv,
1173  agg_type_lv});
1174 
1175  // handling returned null value if exists
1176  std::string null_handler_func_name{"handle_null_val_"};
1177  std::vector<llvm::Value*> null_handler_args{res_lv, null_val_lv};
1178 
1179  // determine null_handling function's name
1180  if (window_func->getKind() == SqlWindowFunctionKind::AVG) {
1181  // average aggregate function returns a value as a double
1182  // (and our search* function also returns a double)
1183  if (agg_expr_ti.is_fp()) {
1184  // fp type: double null value
1185  null_handler_func_name += "double_double";
1186  } else {
1187  // non-fp type: int64_t null type
1188  null_handler_func_name += "double_int64_t";
1189  }
1190  } else if (agg_expr_ti.is_fp()) {
1191  // fp type: double null value
1192  null_handler_func_name += "double_double";
1193  } else {
1194  // non-fp type: int64_t null type
1195  null_handler_func_name += "int64_t_int64_t";
1196  }
1197  null_handler_func_name += "_window_framing_agg";
1198 
1199  // prepare null_val
1200  if (window_func->getKind() == SqlWindowFunctionKind::COUNT) {
1201  if (agg_expr_ti.is_fp()) {
1202  null_handler_args.push_back(cgen_state_->llFp((double)0));
1203  } else {
1204  null_handler_args.push_back(cgen_state_->llInt((int64_t)0));
1205  }
1206  } else if (window_func->getKind() == SqlWindowFunctionKind::AVG) {
1207  null_handler_args.push_back(cgen_state_->inlineFpNull(SQLTypeInfo(kDOUBLE)));
1208  } else {
1209  null_handler_args.push_back(cgen_state_->castToTypeIn(window_func_null_val, 64));
1210  }
1211  res_lv = cgen_state_->emitCall(null_handler_func_name, null_handler_args);
1212 
1213  // when AGG_TYPE is double, we get a double type return value we expect an integer
1214  // type value for the count aggregation
1215  if (window_func->getKind() == SqlWindowFunctionKind::COUNT && agg_expr_ti.is_fp()) {
1216  return cgen_state_->ir_builder_.CreateFPToSI(
1217  res_lv, get_int_type(64, cgen_state_->context_));
1218  }
1219  return res_lv;
1220  } else {
1221  auto agg_name = get_window_agg_name(window_func->getKind(), window_func_ti);
1222  Analyzer::Expr* arg_target_expr;
1223  std::vector<llvm::Value*> agg_func_args{aggregate_state};
1224  auto modified_window_func_null_val = window_func_null_val;
1225  if (args.empty() ||
1226  (window_func->getKind() == SqlWindowFunctionKind::COUNT &&
1227  dynamic_cast<Analyzer::Constant*>(args.front().get()) != nullptr)) {
1228  // a count aggregation without an expression: COUNT(1) or COUNT(*)
1229  agg_func_args.push_back(cgen_state_->llInt(int64_t(1)));
1230  } else {
1231  // we use #base_agg_func_name##_skip_val agg function
1232  // i.e.,int64_t agg_sum_skip_val(int64_t* agg, int64_t val, int64_t skip_val)
1233  arg_target_expr = args.front().get();
1234  const auto arg_lvs = code_generator.codegen(arg_target_expr, true, co);
1235  CHECK_EQ(arg_lvs.size(), size_t(1));
1236  // handling current row's value
1237  auto crt_val = arg_lvs.front();
1238  if ((window_func->getKind() == SqlWindowFunctionKind::SUM ||
1239  window_func->getKind() == SqlWindowFunctionKind::SUM_IF) &&
1240  !window_func_ti.is_fp()) {
1241  crt_val = code_generator.codegenCastBetweenIntTypes(
1242  arg_lvs.front(), args.front()->get_type_info(), window_func_ti, false);
1243  }
1244  agg_func_args.push_back(window_func_ti.get_type() == kFLOAT
1245  ? crt_val
1246  : cgen_state_->castToTypeIn(crt_val, 64));
1247  // handle null value and conditional value for conditional aggregates if necessary
1248  llvm::Value* cond_lv{nullptr};
1249  if (window_function_conditional_aggregate(window_func->getKind())) {
1250  switch (window_func->getKind()) {
1252  // COUNT_IF has a single condition expr which is always bool type
1253  modified_window_func_null_val = cgen_state_->castToTypeIn(
1254  cgen_state_->inlineNull(SQLTypeInfo(kTINYINT)), 64);
1255  break;
1257  // FP type input col uses its own null value depending on the type
1258  // otherwise (integer type input col), we use 8-byte type
1259  if (args.front()->get_type_info().is_integer()) {
1260  agg_func_args[1] = cgen_state_->castToTypeIn(agg_func_args[1], 64);
1261  // keep the null value but casting its type to 8-byte
1262  modified_window_func_null_val =
1263  cgen_state_->castToTypeIn(window_func_null_val, 64);
1264  }
1265  auto cond_expr_lv = code_generator.codegen(args[1].get(), true, co).front();
1266  cond_lv =
1268  }
1269  default:
1270  break;
1271  }
1272  }
1273  agg_name += "_skip_val";
1274  agg_func_args.push_back(modified_window_func_null_val);
1275  if (cond_lv) {
1276  agg_func_args.push_back(cond_lv);
1277  }
1278  }
1279  cgen_state_->emitCall(agg_name, agg_func_args);
1280  if (window_func->getKind() == SqlWindowFunctionKind::AVG) {
1281  codegenWindowAvgEpilogue(agg_func_args[1], window_func_null_val);
1282  }
1283  return codegenAggregateWindowState();
1284  }
1285 }
std::string get_col_type_name_by_size(const size_t size, const bool is_fp)
#define CHECK_EQ(x, y)
Definition: Logger.h:297
std::pair< std::string, llvm::Value * > codegenLoadOrderKeyBufPtr(WindowFunctionContext *window_func_context) const
std::pair< llvm::Value *, llvm::Value * > codegenFrameNullRange(WindowFunctionContext *window_func_context, llvm::Value *partition_index_lv) const
llvm::Value * codegenCurrentPartitionIndex(const WindowFunctionContext *window_func_context, llvm::Value *current_row_pos_lv)
Definition: sqltypes.h:64
bool window_function_conditional_aggregate(const SqlWindowFunctionKind kind)
Definition: WindowContext.h:59
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1324
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
static WindowFunctionContext * getActiveWindowFunctionContext(Executor *executor)
std::string get_window_agg_name(const SqlWindowFunctionKind kind, const SQLTypeInfo &window_func_ti)
std::pair< llvm::Value *, llvm::Value * > codegenWindowFrameBounds(WindowFunctionContext *window_func_context, const Analyzer::WindowFrame *frame_start_bound, const Analyzer::WindowFrame *frame_end_bound, llvm::Value *order_key_col_null_val_lv, WindowFrameBoundFuncArgs &args, CodeGenerator &code_generator)
#define AUTOMATIC_IR_METADATA(CGENSTATE)
llvm::Value * current_partition_start_offset_lv
Definition: WindowContext.h:91
Definition: sqltypes.h:68
llvm::Value * codegenConditionalAggregateCondValSelector(llvm::Value *cond_lv, SQLAgg const aggKind, CompilationOptions const &co) const
std::pair< llvm::Value *, llvm::Value * > codegenFrameBoundRange(const Analyzer::WindowFunction *window_func, CodeGenerator &code_generator, const CompilationOptions &co)
void codegenWindowAvgEpilogue(llvm::Value *crt_val, llvm::Value *window_func_null_val)
llvm::Value * codegenAggregateWindowState()
#define CHECK(condition)
Definition: Logger.h:289
int64_t inline_fixed_encoding_null_val(const SQL_TYPE_INFO &ti)
WindowPartitionBufferPtrs codegenLoadPartitionBuffers(WindowFunctionContext *window_func_context, llvm::Value *partition_index_lv) const
SQLTypeInfo get_adjusted_window_type_info(const Analyzer::WindowFunction *window_func)

+ Here is the call graph for this function:

void Executor::codegenWindowFunctionStateInit ( llvm::Value *  aggregate_state)
private

Definition at line 297 of file WindowFunctionIR.cpp.

References AUTOMATIC_IR_METADATA, COUNT, COUNT_IF, anonymous_namespace{WindowFunctionIR.cpp}::get_adjusted_window_type_info(), get_int_type(), WindowProjectNodeContext::getActiveWindowFunctionContext(), kDOUBLE, and kFLOAT.

297  {
299  const auto window_func_context =
301  const auto window_func = window_func_context->getWindowFunction();
302  const auto window_func_ti = get_adjusted_window_type_info(window_func);
303  const auto window_func_null_val =
304  window_func_ti.is_fp()
305  ? cgen_state_->inlineFpNull(window_func_ti)
306  : cgen_state_->castToTypeIn(cgen_state_->inlineIntNull(window_func_ti), 64);
307  llvm::Value* window_func_init_val;
308  const auto window_func_kind = window_func_context->getWindowFunction()->getKind();
309  if (window_func_kind == SqlWindowFunctionKind::COUNT ||
310  window_func_kind == SqlWindowFunctionKind::COUNT_IF) {
311  switch (window_func_ti.get_type()) {
312  case kFLOAT: {
313  window_func_init_val = cgen_state_->llFp(float(0));
314  break;
315  }
316  case kDOUBLE: {
317  window_func_init_val = cgen_state_->llFp(double(0));
318  break;
319  }
320  default: {
321  window_func_init_val = cgen_state_->llInt(int64_t(0));
322  break;
323  }
324  }
325  } else {
326  window_func_init_val = window_func_null_val;
327  }
328  const auto pi32_type =
329  llvm::PointerType::get(get_int_type(32, cgen_state_->context_), 0);
330  switch (window_func_ti.get_type()) {
331  case kDOUBLE: {
332  cgen_state_->emitCall("agg_id_double", {aggregate_state, window_func_init_val});
333  break;
334  }
335  case kFLOAT: {
336  aggregate_state =
337  cgen_state_->ir_builder_.CreateBitCast(aggregate_state, pi32_type);
338  cgen_state_->emitCall("agg_id_float", {aggregate_state, window_func_init_val});
339  break;
340  }
341  default: {
342  cgen_state_->emitCall("agg_id", {aggregate_state, window_func_init_val});
343  break;
344  }
345  }
346 }
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1324
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
static WindowFunctionContext * getActiveWindowFunctionContext(Executor *executor)
#define AUTOMATIC_IR_METADATA(CGENSTATE)
SQLTypeInfo get_adjusted_window_type_info(const Analyzer::WindowFunction *window_func)

+ Here is the call graph for this function:

llvm::Value * Executor::codegenWindowNavigationFunctionOnFrame ( const CompilationOptions co)
private

Definition at line 348 of file WindowFunctionIR.cpp.

References run_benchmark_import::args, AUTOMATIC_IR_METADATA, CHECK, WindowFrameBoundFuncArgs::current_partition_start_offset_lv, anonymous_namespace{WindowFunctionIR.cpp}::get_col_type_name_by_size(), get_fp_type(), get_int_type(), anonymous_namespace{WindowFunctionIR.cpp}::get_null_value_by_size(), anonymous_namespace{WindowFunctionIR.cpp}::get_null_value_by_size_with_encoding(), WindowProjectNodeContext::getActiveWindowFunctionContext(), kENCODING_DATE_IN_DAYS, kSecsPerDay, LAG_IN_FRAME, and LEAD_IN_FRAME.

349  {
351  const auto window_func_context =
353  const auto window_func = window_func_context->getWindowFunction();
354  const auto window_func_kind = window_func->getKind();
355  // currently, we only support below two window frame navigation functions
356  bool const is_lag_or_lead_in_frame =
357  window_func_kind == SqlWindowFunctionKind::LEAD_IN_FRAME ||
358  window_func_kind == SqlWindowFunctionKind::LAG_IN_FRAME;
359  CHECK(is_lag_or_lead_in_frame);
360  bool const is_lag_in_frame = window_func_kind == SqlWindowFunctionKind::LAG_IN_FRAME;
361  const auto& args = window_func->getArgs();
362  CHECK(args.size() >= 1 && args.size() <= 3);
363  CodeGenerator code_generator(this);
364 
365  const auto target_col_ti = args.front()->get_type_info();
366  const auto target_col_size = target_col_ti.get_size();
367  const auto target_col_type_name =
368  get_col_type_name_by_size(target_col_size, target_col_ti.is_fp());
369  const auto target_col_logical_type_name = get_col_type_name_by_size(
370  window_func->get_type_info().get_size(), window_func->get_type_info().is_fp());
371 
372  // when target_column is fixed encoded, we store the actual column value by
373  // considering it, but our resultset analyzer only considers the type without encoding
374  // scheme so we handle them separately
375  auto logical_null_val_lv =
376  get_null_value_by_size(cgen_state_.get(), window_func->get_type_info());
377  auto target_col_null_val_lv =
379  if (window_func_context->elementCount() == 0) {
380  // we do not need to generate a code for an empty input table
381  return target_col_null_val_lv;
382  }
383 
384  auto [frame_start_bound_expr_lv, frame_end_bound_expr_lv] =
385  codegenFrameBoundRange(window_func, code_generator, co);
386 
387  auto current_row_pos_lv = code_generator.posArg(nullptr);
388  auto partition_index_lv =
389  codegenCurrentPartitionIndex(window_func_context, current_row_pos_lv);
390 
391  // load window function input expression; target_column
392  size_t target_col_size_in_byte = target_col_size * 8;
393  llvm::Type* col_buf_ptr_type =
394  target_col_ti.is_fp()
395  ? get_fp_type(target_col_size_in_byte, cgen_state_->context_)
396  : get_int_type(target_col_size_in_byte, cgen_state_->context_);
397  auto col_buf_type = llvm::PointerType::get(col_buf_ptr_type, 0);
398  auto target_col_buf_ptr_lv = cgen_state_->llInt(reinterpret_cast<int64_t>(
399  window_func_context->getColumnBufferForWindowFunctionExpressions().front()));
400  auto target_col_buf_lv =
401  cgen_state_->ir_builder_.CreateIntToPtr(target_col_buf_ptr_lv, col_buf_type);
402 
403  // prepare various buffer ptrs related to the window partition
404  auto partition_buf_ptrs =
405  codegenLoadPartitionBuffers(window_func_context, partition_index_lv);
406 
407  // null value of the ordering column
408  const auto order_key_buf_ti =
409  window_func_context->getOrderKeyColumnBufferTypes().front();
410  auto const ordering_spec = window_func->getCollation().front();
411  auto order_key_col_null_val_lv =
412  get_null_value_by_size_with_encoding(cgen_state_.get(), order_key_buf_ti);
413 
414  // load ordering column
415  auto [order_col_type_name, order_key_buf_ptr_lv] =
416  codegenLoadOrderKeyBufPtr(window_func_context);
417 
418  // null range
419  auto [null_start_pos_lv, null_end_pos_lv] =
420  codegenFrameNullRange(window_func_context, partition_index_lv);
421 
422  // compute a row index of the current row w.r.t the window frame it belongs to
423  std::string row_idx_on_frame_func = "compute_";
424  row_idx_on_frame_func += order_col_type_name;
425  row_idx_on_frame_func += ordering_spec.is_desc ? "_greater_equal" : "_less_equal";
426  row_idx_on_frame_func += "_current_row_idx_in_frame";
427  auto int64_t_one_val_lv = cgen_state_->llInt((int64_t)1);
428  auto nulls_first_lv = cgen_state_->llBool(ordering_spec.nulls_first);
429  auto cur_row_idx_in_frame_lv =
430  cgen_state_->emitCall(row_idx_on_frame_func,
431  {partition_buf_ptrs.num_elem_current_partition_lv,
432  current_row_pos_lv,
433  order_key_buf_ptr_lv,
434  partition_buf_ptrs.target_partition_rowid_ptr_lv,
435  partition_buf_ptrs.target_partition_sorted_rowid_ptr_lv,
436  order_key_col_null_val_lv,
437  nulls_first_lv,
438  null_start_pos_lv,
439  null_end_pos_lv});
440 
441  // compute frame bound for the current row
442  auto const int64_t_zero_val_lv = cgen_state_->llInt((int64_t)0);
444  frame_start_bound_expr_lv,
445  frame_end_bound_expr_lv,
446  window_func->hasRangeModeFraming() ? current_row_pos_lv : cur_row_idx_in_frame_lv,
447  nullptr,
448  window_func->hasRangeModeFraming()
449  ? int64_t_zero_val_lv
450  : partition_buf_ptrs.current_partition_start_offset_lv,
451  int64_t_zero_val_lv,
452  int64_t_one_val_lv,
453  partition_buf_ptrs.num_elem_current_partition_lv,
454  order_key_buf_ptr_lv,
455  "",
456  partition_buf_ptrs.target_partition_rowid_ptr_lv,
457  partition_buf_ptrs.target_partition_sorted_rowid_ptr_lv,
458  nulls_first_lv,
459  null_start_pos_lv,
460  null_end_pos_lv};
461  auto [frame_start_bound_lv, frame_end_bound_lv] =
462  codegenWindowFrameBounds(window_func_context,
463  window_func->getFrameStartBound(),
464  window_func->getFrameEndBound(),
465  order_key_col_null_val_lv,
467  code_generator);
468 
469  // apply offset
470  llvm::Value* modified_cur_row_idx_in_frame_lv{nullptr};
471  auto const offset_lv =
472  cgen_state_->castToTypeIn(code_generator.codegen(args[1].get(), true, co)[0], 64);
473  if (is_lag_in_frame) {
474  modified_cur_row_idx_in_frame_lv =
475  cgen_state_->ir_builder_.CreateSub(cur_row_idx_in_frame_lv, offset_lv);
476  } else {
477  modified_cur_row_idx_in_frame_lv =
478  cgen_state_->ir_builder_.CreateAdd(cur_row_idx_in_frame_lv, offset_lv);
479  }
480  CHECK(modified_cur_row_idx_in_frame_lv);
481 
482  // get the target column value in the frame w.r.t the offset
483  std::string target_func_name = "get_";
484  target_func_name += target_col_type_name + "_value_";
485  target_func_name += target_col_logical_type_name + "_type_";
486  target_func_name += "in_frame";
487  auto res_lv =
488  cgen_state_->emitCall(target_func_name,
489  {modified_cur_row_idx_in_frame_lv,
490  frame_start_bound_lv,
491  frame_end_bound_lv,
492  target_col_buf_lv,
493  partition_buf_ptrs.target_partition_rowid_ptr_lv,
494  partition_buf_ptrs.target_partition_sorted_rowid_ptr_lv,
495  logical_null_val_lv,
496  target_col_null_val_lv});
497  if (target_col_ti.get_compression() == kENCODING_DATE_IN_DAYS) {
498  res_lv = cgen_state_->emitCall(
499  "encode_date",
500  {res_lv, logical_null_val_lv, cgen_state_->llInt((int64_t)kSecsPerDay)});
501  }
502  CHECK(res_lv);
503  return res_lv;
504 }
std::string get_col_type_name_by_size(const size_t size, const bool is_fp)
static constexpr int64_t kSecsPerDay
std::pair< std::string, llvm::Value * > codegenLoadOrderKeyBufPtr(WindowFunctionContext *window_func_context) const
std::pair< llvm::Value *, llvm::Value * > codegenFrameNullRange(WindowFunctionContext *window_func_context, llvm::Value *partition_index_lv) const
llvm::Value * codegenCurrentPartitionIndex(const WindowFunctionContext *window_func_context, llvm::Value *current_row_pos_lv)
llvm::Type * get_fp_type(const int width, llvm::LLVMContext &context)
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1324
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
static WindowFunctionContext * getActiveWindowFunctionContext(Executor *executor)
std::pair< llvm::Value *, llvm::Value * > codegenWindowFrameBounds(WindowFunctionContext *window_func_context, const Analyzer::WindowFrame *frame_start_bound, const Analyzer::WindowFrame *frame_end_bound, llvm::Value *order_key_col_null_val_lv, WindowFrameBoundFuncArgs &args, CodeGenerator &code_generator)
#define AUTOMATIC_IR_METADATA(CGENSTATE)
llvm::Value * current_partition_start_offset_lv
Definition: WindowContext.h:91
std::pair< llvm::Value *, llvm::Value * > codegenFrameBoundRange(const Analyzer::WindowFunction *window_func, CodeGenerator &code_generator, const CompilationOptions &co)
std::unique_ptr< llvm::LLVMContext > context_
Definition: Execute.h:1299
#define CHECK(condition)
Definition: Logger.h:289
llvm::Value * get_null_value_by_size(CgenState *cgen_state, SQLTypeInfo col_ti)
WindowPartitionBufferPtrs codegenLoadPartitionBuffers(WindowFunctionContext *window_func_context, llvm::Value *partition_index_lv) const
llvm::Value * get_null_value_by_size_with_encoding(CgenState *cgen_state, SQLTypeInfo col_ti)

+ Here is the call graph for this function:

llvm::BasicBlock * Executor::codegenWindowResetStateControlFlow ( )
private

Definition at line 268 of file WindowFunctionIR.cpp.

References AUTOMATIC_IR_METADATA, WindowProjectNodeContext::getActiveWindowFunctionContext(), CodeGenerator::posArg(), and CodeGenerator::toBool().

268  {
270  const auto window_func_context =
272  const auto bitset = cgen_state_->llInt(
273  reinterpret_cast<const int64_t>(window_func_context->partitionStart()));
274  const auto min_val = cgen_state_->llInt(int64_t(0));
275  const auto max_val = cgen_state_->llInt(window_func_context->elementCount() - 1);
276  const auto null_val = cgen_state_->llInt(inline_int_null_value<int64_t>());
277  const auto null_bool_val = cgen_state_->llInt<int8_t>(inline_int_null_value<int8_t>());
278  CodeGenerator code_generator(this);
279  const auto reset_state =
280  code_generator.toBool(cgen_state_->emitCall("bit_is_set",
281  {bitset,
282  code_generator.posArg(nullptr),
283  min_val,
284  max_val,
285  null_val,
286  null_bool_val}));
287  const auto reset_state_true_bb = llvm::BasicBlock::Create(
288  cgen_state_->context_, "reset_state.true", cgen_state_->current_func_);
289  const auto reset_state_false_bb = llvm::BasicBlock::Create(
290  cgen_state_->context_, "reset_state.false", cgen_state_->current_func_);
291  cgen_state_->ir_builder_.CreateCondBr(
292  reset_state, reset_state_true_bb, reset_state_false_bb);
293  cgen_state_->ir_builder_.SetInsertPoint(reset_state_true_bb);
294  return reset_state_false_bb;
295 }
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1324
static WindowFunctionContext * getActiveWindowFunctionContext(Executor *executor)
#define AUTOMATIC_IR_METADATA(CGENSTATE)

+ Here is the call graph for this function:

ResultSetPtr Executor::collectAllDeviceResults ( SharedKernelContext shared_context,
const RelAlgExecutionUnit ra_exe_unit,
const QueryMemoryDescriptor query_mem_desc,
const ExecutorDeviceType  device_type,
std::shared_ptr< RowSetMemoryOwner row_set_mem_owner 
)
private

Definition at line 2342 of file Execute.cpp.

References anonymous_namespace{Execute.cpp}::build_row_for_empty_input(), catalog_, collectAllDeviceShardedTopResults(), DEBUG_TIMER, SharedKernelContext::getFragmentResults(), QueryMemoryDescriptor::getQueryDescriptionType(), GPU, NonGroupedAggregate, reduceMultiDeviceResults(), reduceSpeculativeTopN(), GroupByAndAggregate::shard_count_for_top_groups(), RelAlgExecutionUnit::target_exprs, and use_speculative_top_n().

Referenced by executeWorkUnitImpl().

2347  {
2348  auto timer = DEBUG_TIMER(__func__);
2349  auto& result_per_device = shared_context.getFragmentResults();
2350  if (result_per_device.empty() && query_mem_desc.getQueryDescriptionType() ==
2353  ra_exe_unit.target_exprs, query_mem_desc, device_type);
2354  }
2355  if (use_speculative_top_n(ra_exe_unit, query_mem_desc)) {
2356  try {
2357  return reduceSpeculativeTopN(
2358  ra_exe_unit, result_per_device, row_set_mem_owner, query_mem_desc);
2359  } catch (const std::bad_alloc&) {
2360  throw SpeculativeTopNFailed("Failed during multi-device reduction.");
2361  }
2362  }
2363  const auto shard_count =
2364  device_type == ExecutorDeviceType::GPU
2366  : 0;
2367 
2368  if (shard_count && !result_per_device.empty()) {
2369  return collectAllDeviceShardedTopResults(shared_context, ra_exe_unit);
2370  }
2371  return reduceMultiDeviceResults(
2372  ra_exe_unit, result_per_device, row_set_mem_owner, query_mem_desc);
2373 }
std::vector< Analyzer::Expr * > target_exprs
bool use_speculative_top_n(const RelAlgExecutionUnit &ra_exe_unit, const QueryMemoryDescriptor &query_mem_desc)
ResultSetPtr reduceSpeculativeTopN(const RelAlgExecutionUnit &, std::vector< std::pair< ResultSetPtr, std::vector< size_t >>> &all_fragment_results, std::shared_ptr< RowSetMemoryOwner >, const QueryMemoryDescriptor &) const
Definition: Execute.cpp:1430
const Catalog_Namespace::Catalog * catalog_
Definition: Execute.h:1379
ResultSetPtr reduceMultiDeviceResults(const RelAlgExecutionUnit &, std::vector< std::pair< ResultSetPtr, std::vector< size_t >>> &all_fragment_results, std::shared_ptr< RowSetMemoryOwner >, const QueryMemoryDescriptor &) const
Definition: Execute.cpp:1322
ResultSetPtr collectAllDeviceShardedTopResults(SharedKernelContext &shared_context, const RelAlgExecutionUnit &ra_exe_unit) const
Definition: Execute.cpp:2457
QueryDescriptionType getQueryDescriptionType() const
ResultSetPtr build_row_for_empty_input(const std::vector< Analyzer::Expr * > &target_exprs_in, const QueryMemoryDescriptor &query_mem_desc, const ExecutorDeviceType device_type)
Definition: Execute.cpp:2300
std::vector< std::pair< ResultSetPtr, std::vector< size_t > > > & getFragmentResults()
#define DEBUG_TIMER(name)
Definition: Logger.h:407
static size_t shard_count_for_top_groups(const RelAlgExecutionUnit &ra_exe_unit, const Catalog_Namespace::Catalog &catalog)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

ResultSetPtr Executor::collectAllDeviceShardedTopResults ( SharedKernelContext shared_context,
const RelAlgExecutionUnit ra_exe_unit 
) const
private

Definition at line 2457 of file Execute.cpp.

References blockSize(), catalog_, CHECK, CHECK_EQ, CHECK_LE, SharedKernelContext::getFragmentResults(), gridSize(), SortInfo::limit, SortInfo::offset, SortInfo::order_entries, anonymous_namespace{Execute.cpp}::permute_storage_columnar(), anonymous_namespace{Execute.cpp}::permute_storage_row_wise(), run_benchmark_import::result, and RelAlgExecutionUnit::sort_info.

Referenced by collectAllDeviceResults().

2459  {
2460  auto& result_per_device = shared_context.getFragmentResults();
2461  const auto first_result_set = result_per_device.front().first;
2462  CHECK(first_result_set);
2463  auto top_query_mem_desc = first_result_set->getQueryMemDesc();
2464  CHECK(!top_query_mem_desc.hasInterleavedBinsOnGpu());
2465  const auto top_n = ra_exe_unit.sort_info.limit + ra_exe_unit.sort_info.offset;
2466  top_query_mem_desc.setEntryCount(0);
2467  for (auto& result : result_per_device) {
2468  const auto result_set = result.first;
2469  CHECK(result_set);
2470  result_set->sort(ra_exe_unit.sort_info.order_entries, top_n, this);
2471  size_t new_entry_cnt = top_query_mem_desc.getEntryCount() + result_set->rowCount();
2472  top_query_mem_desc.setEntryCount(new_entry_cnt);
2473  }
2474  auto top_result_set = std::make_shared<ResultSet>(first_result_set->getTargetInfos(),
2475  first_result_set->getDeviceType(),
2476  top_query_mem_desc,
2477  first_result_set->getRowSetMemOwner(),
2478  catalog_,
2479  blockSize(),
2480  gridSize());
2481  auto top_storage = top_result_set->allocateStorage();
2482  size_t top_output_row_idx{0};
2483  for (auto& result : result_per_device) {
2484  const auto result_set = result.first;
2485  CHECK(result_set);
2486  const auto& top_permutation = result_set->getPermutationBuffer();
2487  CHECK_LE(top_permutation.size(), top_n);
2488  if (top_query_mem_desc.didOutputColumnar()) {
2489  top_output_row_idx = permute_storage_columnar(result_set->getStorage(),
2490  result_set->getQueryMemDesc(),
2491  top_storage,
2492  top_output_row_idx,
2493  top_query_mem_desc,
2494  top_permutation);
2495  } else {
2496  top_output_row_idx = permute_storage_row_wise(result_set->getStorage(),
2497  top_storage,
2498  top_output_row_idx,
2499  top_query_mem_desc,
2500  top_permutation);
2501  }
2502  }
2503  CHECK_EQ(top_output_row_idx, top_query_mem_desc.getEntryCount());
2504  return top_result_set;
2505 }
#define CHECK_EQ(x, y)
Definition: Logger.h:297
const std::list< Analyzer::OrderEntry > order_entries
size_t permute_storage_row_wise(const ResultSetStorage *input_storage, const ResultSetStorage *output_storage, size_t output_row_index, const QueryMemoryDescriptor &output_query_mem_desc, const std::vector< uint32_t > &top_permutation)
Definition: Execute.cpp:2436
const size_t limit
const Catalog_Namespace::Catalog * catalog_
Definition: Execute.h:1379
#define CHECK_LE(x, y)
Definition: Logger.h:300
unsigned gridSize() const
Definition: Execute.cpp:3838
size_t permute_storage_columnar(const ResultSetStorage *input_storage, const QueryMemoryDescriptor &input_query_mem_desc, const ResultSetStorage *output_storage, size_t output_row_index, const QueryMemoryDescriptor &output_query_mem_desc, const std::vector< uint32_t > &top_permutation)
Definition: Execute.cpp:2386
std::vector< std::pair< ResultSetPtr, std::vector< size_t > > > & getFragmentResults()
#define CHECK(condition)
Definition: Logger.h:289
unsigned blockSize() const
Definition: Execute.cpp:3852
const size_t offset

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool Executor::compileBody ( const RelAlgExecutionUnit ra_exe_unit,
GroupByAndAggregate group_by_and_aggregate,
QueryMemoryDescriptor query_mem_desc,
const CompilationOptions co,
const GpuSharedMemoryContext gpu_smem_context = {} 
)
private

Definition at line 3255 of file NativeCodegen.cpp.

3259  {
3261 
3262  // Switch the code generation into a separate filter function if enabled.
3263  // Note that accesses to function arguments are still codegenned from the
3264  // row function's arguments, then later automatically forwarded and
3265  // remapped into filter function arguments by redeclareFilterFunction().
3266  cgen_state_->row_func_bb_ = cgen_state_->ir_builder_.GetInsertBlock();
3267  llvm::Value* loop_done{nullptr};
3268  std::unique_ptr<Executor::FetchCacheAnchor> fetch_cache_anchor;
3269  if (cgen_state_->filter_func_) {
3270  if (cgen_state_->row_func_bb_->getName() == "loop_body") {
3271  auto row_func_entry_bb = &cgen_state_->row_func_->getEntryBlock();
3272  cgen_state_->ir_builder_.SetInsertPoint(row_func_entry_bb,
3273  row_func_entry_bb->begin());
3274  loop_done = cgen_state_->ir_builder_.CreateAlloca(
3275  get_int_type(1, cgen_state_->context_), nullptr, "loop_done");
3276  cgen_state_->ir_builder_.SetInsertPoint(cgen_state_->row_func_bb_);
3277  cgen_state_->ir_builder_.CreateStore(cgen_state_->llBool(true), loop_done);
3278  }
3279  cgen_state_->ir_builder_.SetInsertPoint(cgen_state_->filter_func_bb_);
3280  cgen_state_->current_func_ = cgen_state_->filter_func_;
3281  fetch_cache_anchor = std::make_unique<Executor::FetchCacheAnchor>(cgen_state_.get());
3282  }
3283 
3284  // generate the code for the filter
3285  std::vector<Analyzer::Expr*> primary_quals;
3286  std::vector<Analyzer::Expr*> deferred_quals;
3287  bool short_circuited = CodeGenerator::prioritizeQuals(
3288  ra_exe_unit, primary_quals, deferred_quals, plan_state_->hoisted_filters_);
3289  if (short_circuited) {
3290  VLOG(1) << "Prioritized " << std::to_string(primary_quals.size()) << " quals, "
3291  << "short-circuited and deferred " << std::to_string(deferred_quals.size())
3292  << " quals";
3293  }
3294  llvm::Value* filter_lv = cgen_state_->llBool(true);
3295  CodeGenerator code_generator(this);
3296  for (auto expr : primary_quals) {
3297  // Generate the filter for primary quals
3298  auto cond = code_generator.toBool(code_generator.codegen(expr, true, co).front());
3299  filter_lv = cgen_state_->ir_builder_.CreateAnd(filter_lv, cond);
3300  }
3301  CHECK(filter_lv->getType()->isIntegerTy(1));
3302  llvm::BasicBlock* sc_false{nullptr};
3303  if (!deferred_quals.empty()) {
3304  auto sc_true = llvm::BasicBlock::Create(
3305  cgen_state_->context_, "sc_true", cgen_state_->current_func_);
3306  sc_false = llvm::BasicBlock::Create(
3307  cgen_state_->context_, "sc_false", cgen_state_->current_func_);
3308  cgen_state_->ir_builder_.CreateCondBr(filter_lv, sc_true, sc_false);
3309  cgen_state_->ir_builder_.SetInsertPoint(sc_false);
3310  if (ra_exe_unit.join_quals.empty()) {
3311  cgen_state_->ir_builder_.CreateRet(cgen_state_->llInt(int32_t(0)));
3312  }
3313  cgen_state_->ir_builder_.SetInsertPoint(sc_true);
3314  filter_lv = cgen_state_->llBool(true);
3315  }
3316  for (auto expr : deferred_quals) {
3317  filter_lv = cgen_state_->ir_builder_.CreateAnd(
3318  filter_lv, code_generator.toBool(code_generator.codegen(expr, true, co).front()));
3319  }
3320 
3321  CHECK(filter_lv->getType()->isIntegerTy(1));
3322  auto ret = group_by_and_aggregate.codegen(
3323  filter_lv, sc_false, query_mem_desc, co, gpu_smem_context);
3324 
3325  // Switch the code generation back to the row function if a filter
3326  // function was enabled.
3327  if (cgen_state_->filter_func_) {
3328  if (cgen_state_->row_func_bb_->getName() == "loop_body") {
3329  cgen_state_->ir_builder_.CreateStore(cgen_state_->llBool(false), loop_done);
3330  cgen_state_->ir_builder_.CreateRet(cgen_state_->llInt<int32_t>(0));
3331  }
3332 
3333  cgen_state_->ir_builder_.SetInsertPoint(cgen_state_->row_func_bb_);
3334  cgen_state_->current_func_ = cgen_state_->row_func_;
3335  cgen_state_->filter_func_call_ =
3336  cgen_state_->ir_builder_.CreateCall(cgen_state_->filter_func_, {});
3337 
3338  // Create real filter function declaration after placeholder call
3339  // is emitted.
3341 
3342  if (cgen_state_->row_func_bb_->getName() == "loop_body") {
3343  auto loop_done_true = llvm::BasicBlock::Create(
3344  cgen_state_->context_, "loop_done_true", cgen_state_->row_func_);
3345  auto loop_done_false = llvm::BasicBlock::Create(
3346  cgen_state_->context_, "loop_done_false", cgen_state_->row_func_);
3347  auto loop_done_flag = cgen_state_->ir_builder_.CreateLoad(
3348  loop_done->getType()->getPointerElementType(), loop_done);
3349  cgen_state_->ir_builder_.CreateCondBr(
3350  loop_done_flag, loop_done_true, loop_done_false);
3351  cgen_state_->ir_builder_.SetInsertPoint(loop_done_true);
3352  cgen_state_->ir_builder_.CreateRet(cgen_state_->filter_func_call_);
3353  cgen_state_->ir_builder_.SetInsertPoint(loop_done_false);
3354  } else {
3355  cgen_state_->ir_builder_.CreateRet(cgen_state_->filter_func_call_);
3356  }
3357  }
3358  return ret;
3359 }
bool codegen(llvm::Value *filter_result, llvm::BasicBlock *sc_false, QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const GpuSharedMemoryContext &gpu_smem_context)
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1324
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
std::string to_string(char const *&&v)
const JoinQualsPerNestingLevel join_quals
std::unique_ptr< PlanState > plan_state_
Definition: Execute.h:1354
#define AUTOMATIC_IR_METADATA(CGENSTATE)
static bool prioritizeQuals(const RelAlgExecutionUnit &ra_exe_unit, std::vector< Analyzer::Expr * > &primary_quals, std::vector< Analyzer::Expr * > &deferred_quals, const PlanState::HoistedFiltersSet &hoisted_quals)
Definition: LogicalIR.cpp:157
#define CHECK(condition)
Definition: Logger.h:289
void redeclareFilterFunction()
Definition: IRCodegen.cpp:1016
#define VLOG(n)
Definition: Logger.h:383
std::tuple< CompilationResult, std::unique_ptr< QueryMemoryDescriptor > > Executor::compileWorkUnit ( const std::vector< InputTableInfo > &  query_infos,
const PlanState::DeletedColumnsMap deleted_cols_map,
const RelAlgExecutionUnit ra_exe_unit,
const CompilationOptions co,
const ExecutionOptions eo,
const CudaMgr_Namespace::CudaMgr cuda_mgr,
const bool  allow_lazy_fetch,
std::shared_ptr< RowSetMemoryOwner row_set_mem_owner,
const size_t  max_groups_buffer_entry_count,
const int8_t  crt_min_byte_width,
const bool  has_cardinality_estimation,
ColumnCacheMap column_cache,
RenderInfo render_info = nullptr 
)
private

Definition at line 2708 of file NativeCodegen.cpp.

2720  {
2721  auto timer = DEBUG_TIMER(__func__);
2722 
2724  if (!cuda_mgr) {
2725  throw QueryMustRunOnCpu();
2726  }
2727  }
2728 
2729 #ifndef NDEBUG
2730  static std::uint64_t counter = 0;
2731  ++counter;
2732  VLOG(1) << "CODEGEN #" << counter << ":";
2733  LOG(IR) << "CODEGEN #" << counter << ":";
2734  LOG(PTX) << "CODEGEN #" << counter << ":";
2735  LOG(ASM) << "CODEGEN #" << counter << ":";
2736 #endif
2737 
2738  // cgenstate_manager uses RAII pattern to manage the live time of
2739  // CgenState instances.
2740  Executor::CgenStateManager cgenstate_manager(*this,
2741  allow_lazy_fetch,
2742  query_infos,
2743  deleted_cols_map,
2744  &ra_exe_unit); // locks compilation_mutex
2745 
2746  addTransientStringLiterals(ra_exe_unit, row_set_mem_owner);
2747 
2748  GroupByAndAggregate group_by_and_aggregate(
2749  this,
2750  co.device_type,
2751  ra_exe_unit,
2752  query_infos,
2753  row_set_mem_owner,
2754  has_cardinality_estimation ? std::optional<int64_t>(max_groups_buffer_entry_guess)
2755  : std::nullopt);
2756  auto query_mem_desc =
2757  group_by_and_aggregate.initQueryMemoryDescriptor(eo.allow_multifrag,
2758  max_groups_buffer_entry_guess,
2759  crt_min_byte_width,
2760  render_info,
2762 
2763  if (query_mem_desc->getQueryDescriptionType() ==
2765  !has_cardinality_estimation && (!render_info || !render_info->isInSitu()) &&
2766  !eo.just_explain) {
2767  const auto col_range_info = group_by_and_aggregate.getColRangeInfo();
2768  throw CardinalityEstimationRequired(col_range_info.max - col_range_info.min);
2769  }
2770 
2771  const bool output_columnar = query_mem_desc->didOutputColumnar();
2772  const bool gpu_shared_mem_optimization =
2774  ra_exe_unit,
2775  cuda_mgr,
2776  co.device_type,
2777  cuda_mgr ? this->blockSize() : 1,
2778  cuda_mgr ? this->numBlocksPerMP() : 1);
2779  if (gpu_shared_mem_optimization) {
2780  // disable interleaved bins optimization on the GPU
2781  query_mem_desc->setHasInterleavedBinsOnGpu(false);
2782  LOG(DEBUG1) << "GPU shared memory is used for the " +
2783  query_mem_desc->queryDescTypeToString() + " query(" +
2784  std::to_string(get_shared_memory_size(gpu_shared_mem_optimization,
2785  query_mem_desc.get())) +
2786  " out of " + std::to_string(g_gpu_smem_threshold) + " bytes).";
2787  }
2788 
2789  const GpuSharedMemoryContext gpu_smem_context(
2790  get_shared_memory_size(gpu_shared_mem_optimization, query_mem_desc.get()));
2791 
2793  const size_t num_count_distinct_descs =
2794  query_mem_desc->getCountDistinctDescriptorsSize();
2795  for (size_t i = 0; i < num_count_distinct_descs; i++) {
2796  const auto& count_distinct_descriptor =
2797  query_mem_desc->getCountDistinctDescriptor(i);
2798  if (count_distinct_descriptor.impl_type_ == CountDistinctImplType::UnorderedSet ||
2799  (count_distinct_descriptor.impl_type_ != CountDistinctImplType::Invalid &&
2800  !co.hoist_literals)) {
2801  throw QueryMustRunOnCpu();
2802  }
2803  }
2804 
2805  // we currently do not support varlen projection based on baseline groupby when
2806  // 1) target table is multi-fragmented and 2) multiple gpus are involved for query
2807  // processing in this case, we punt the query to cpu to avoid server crash
2808  for (const auto expr : ra_exe_unit.target_exprs) {
2809  if (auto gby_expr = dynamic_cast<Analyzer::AggExpr*>(expr)) {
2810  bool has_multiple_gpus = cuda_mgr ? cuda_mgr->getDeviceCount() > 1 : false;
2811  if (gby_expr->get_aggtype() == SQLAgg::kSAMPLE && has_multiple_gpus &&
2812  !g_leaf_count) {
2813  std::set<const Analyzer::ColumnVar*,
2814  bool (*)(const Analyzer::ColumnVar*, const Analyzer::ColumnVar*)>
2816  gby_expr->collect_column_var(colvar_set, true);
2817  for (const auto cv : colvar_set) {
2818  if (cv->get_type_info().is_varlen()) {
2819  const auto tbl_id = cv->get_table_id();
2820  std::for_each(query_infos.begin(),
2821  query_infos.end(),
2822  [tbl_id](const InputTableInfo& input_table_info) {
2823  if (input_table_info.table_id == tbl_id &&
2824  input_table_info.info.fragments.size() > 1) {
2825  throw QueryMustRunOnCpu();
2826  }
2827  });
2828  }
2829  }
2830  }
2831  }
2832  }
2833  }
2834 
2835  // Read the module template and target either CPU or GPU
2836  // by binding the stream position functions to the right implementation:
2837  // stride access for GPU, contiguous for CPU
2838  CHECK(cgen_state_->module_ == nullptr);
2839  cgen_state_->set_module_shallow_copy(get_rt_module(), /*always_clone=*/true);
2840 
2841  auto is_gpu = co.device_type == ExecutorDeviceType::GPU;
2842  if (is_gpu) {
2843  cgen_sta