OmniSciDB  a5dc49c757
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
Executor Class Reference

#include <Execute.h>

+ Collaboration diagram for Executor:

Classes

class  CgenStateManager
 
struct  ExecutorMutexHolder
 
class  FetchCacheAnchor
 
struct  GroupColLLVMValue
 
struct  JoinHashTableOrError
 

Public Types

enum  ExtModuleKinds {
  ExtModuleKinds::template_module, ExtModuleKinds::udf_cpu_module, ExtModuleKinds::udf_gpu_module, ExtModuleKinds::rt_udf_cpu_module,
  ExtModuleKinds::rt_udf_gpu_module, ExtModuleKinds::rt_geos_module, ExtModuleKinds::rt_libdevice_module
}
 
using ExecutorId = size_t
 
using CachedCardinality = std::pair< bool, size_t >
 

Public Member Functions

 Executor (const ExecutorId id, Data_Namespace::DataMgr *data_mgr, const size_t block_size_x, const size_t grid_size_x, const size_t max_gpu_slab_size, const std::string &debug_dir, const std::string &debug_file)
 
void clearCaches (bool runtime_only=false)
 
std::string dumpCache () const
 
void reset (bool discard_runtime_modules_only=false)
 
const std::unique_ptr
< llvm::Module > & 
get_rt_module () const
 
const std::unique_ptr
< llvm::Module > & 
get_udf_module (bool is_gpu=false) const
 
const std::unique_ptr
< llvm::Module > & 
get_rt_udf_module (bool is_gpu=false) const
 
const std::unique_ptr
< llvm::Module > & 
get_geos_module () const
 
const std::unique_ptr
< llvm::Module > & 
get_libdevice_module () const
 
bool has_rt_module () const
 
bool has_udf_module (bool is_gpu=false) const
 
bool has_rt_udf_module (bool is_gpu=false) const
 
bool has_geos_module () const
 
bool has_libdevice_module () const
 
const TemporaryTablesgetTemporaryTables ()
 
StringDictionaryProxygetStringDictionaryProxy (const shared::StringDictKey &dict_key, const bool with_generation) const
 
StringDictionaryProxygetStringDictionaryProxy (const shared::StringDictKey &dict_key, const std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner, const bool with_generation) const
 
const
StringDictionaryProxy::IdMap
getStringProxyTranslationMap (const shared::StringDictKey &source_dict_key, const shared::StringDictKey &dest_dict_key, const RowSetMemoryOwner::StringTranslationType translation_type, const std::vector< StringOps_Namespace::StringOpInfo > &string_op_infos, std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner, const bool with_generation) const
 
const
StringDictionaryProxy::IdMap
getJoinIntersectionStringProxyTranslationMap (const StringDictionaryProxy *source_proxy, StringDictionaryProxy *dest_proxy, const std::vector< StringOps_Namespace::StringOpInfo > &source_string_op_infos, const std::vector< StringOps_Namespace::StringOpInfo > &dest_source_string_op_infos, std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner) const
 
const
StringDictionaryProxy::TranslationMap
< Datum > * 
getStringProxyNumericTranslationMap (const shared::StringDictKey &source_dict_key, const std::vector< StringOps_Namespace::StringOpInfo > &string_op_infos, std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner, const bool with_generation) const
 
bool isCPUOnly () const
 
bool isArchMaxwell (const ExecutorDeviceType dt) const
 
bool containsLeftDeepOuterJoin () const
 
const ColumnDescriptorgetColumnDescriptor (const Analyzer::ColumnVar *) const
 
const ColumnDescriptorgetPhysicalColumnDescriptor (const Analyzer::ColumnVar *, int) const
 
Data_Namespace::DataMgrgetDataMgr () const
 
const std::shared_ptr
< RowSetMemoryOwner
getRowSetMemoryOwner () const
 
const TemporaryTablesgetTemporaryTables () const
 
Fragmenter_Namespace::TableInfo getTableInfo (const shared::TableKey &table_key) const
 
const TableGenerationgetTableGeneration (const shared::TableKey &table_key) const
 
ExpressionRange getColRange (const PhysicalInput &) const
 
size_t getNumBytesForFetchedRow (const std::set< shared::TableKey > &table_keys_to_fetch) const
 
std::map< shared::ColumnKey,
size_t > 
getColumnByteWidthMap (const std::set< shared::TableKey > &table_ids_to_fetch, const bool include_lazy_fetched_cols) const
 
size_t getNumBytesForFetchedRow (const std::set< int > &table_ids_to_fetch) const
 
ExecutorResourceMgr_Namespace::ChunkRequestInfo getChunkRequestInfo (const ExecutorDeviceType device_type, const std::vector< InputDescriptor > &input_descs, const std::vector< InputTableInfo > &query_infos, const std::vector< std::pair< int32_t, FragmentsList >> &device_fragment_lists) const
 Determines a unique list of chunks and their associated byte sizes for a given query plan. More...
 
bool hasLazyFetchColumns (const std::vector< Analyzer::Expr * > &target_exprs) const
 
std::vector< ColumnLazyFetchInfogetColLazyFetchInfo (const std::vector< Analyzer::Expr * > &target_exprs) const
 
void interrupt (const QuerySessionId &query_session="", const QuerySessionId &interrupt_session="")
 
void resetInterrupt ()
 
void enableRuntimeQueryInterrupt (const double runtime_query_check_freq, const unsigned pending_query_check_freq) const
 
int8_t warpSize () const
 
unsigned gridSize () const
 
void setGridSize (unsigned grid_size)
 
void resetGridSize ()
 
unsigned numBlocksPerMP () const
 
unsigned blockSize () const
 
void setBlockSize (unsigned block_size)
 
void resetBlockSize ()
 
size_t maxGpuSlabSize () const
 
ResultSetPtr executeWorkUnit (size_t &max_groups_buffer_entry_guess, const bool is_agg, const std::vector< InputTableInfo > &, const RelAlgExecutionUnit &, const CompilationOptions &, const ExecutionOptions &options, RenderInfo *render_info, const bool has_cardinality_estimation, ColumnCacheMap &column_cache)
 
TableUpdateMetadata executeUpdate (const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &table_infos, const TableDescriptor *updated_table_desc, const CompilationOptions &co, const ExecutionOptions &eo, const Catalog_Namespace::Catalog &cat, std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner, const UpdateLogForFragment::Callback &cb, const bool is_agg)
 
void addTransientStringLiterals (const RelAlgExecutionUnit &ra_exe_unit, const std::shared_ptr< RowSetMemoryOwner > &row_set_mem_owner)
 
int deviceCount (const ExecutorDeviceType) const
 
void logSystemCPUMemoryStatus (std::string const &tag, size_t const thread_idx) const
 
void logSystemGPUMemoryStatus (std::string const &tag, size_t const thread_idx) const
 
void setupCaching (const std::unordered_set< PhysicalInput > &phys_inputs, const std::unordered_set< shared::TableKey > &phys_table_keys)
 
void setColRangeCache (const AggregatedColRange &aggregated_col_range)
 
ExecutorId getExecutorId () const
 
QuerySessionIdgetCurrentQuerySession (heavyai::shared_lock< heavyai::shared_mutex > &read_lock)
 
QuerySessionStatus::QueryStatus getQuerySessionStatus (const QuerySessionId &candidate_query_session, heavyai::shared_lock< heavyai::shared_mutex > &read_lock)
 
bool checkCurrentQuerySession (const std::string &candidate_query_session, heavyai::shared_lock< heavyai::shared_mutex > &read_lock)
 
void invalidateRunningQuerySession (heavyai::unique_lock< heavyai::shared_mutex > &write_lock)
 
bool addToQuerySessionList (const QuerySessionId &query_session, const std::string &query_str, const std::string &submitted, const size_t executor_id, const QuerySessionStatus::QueryStatus query_status, heavyai::unique_lock< heavyai::shared_mutex > &write_lock)
 
bool removeFromQuerySessionList (const QuerySessionId &query_session, const std::string &submitted_time_str, heavyai::unique_lock< heavyai::shared_mutex > &write_lock)
 
void setQuerySessionAsInterrupted (const QuerySessionId &query_session, heavyai::unique_lock< heavyai::shared_mutex > &write_lock)
 
bool checkIsQuerySessionInterrupted (const std::string &query_session, heavyai::shared_lock< heavyai::shared_mutex > &read_lock)
 
bool checkIsQuerySessionEnrolled (const QuerySessionId &query_session, heavyai::shared_lock< heavyai::shared_mutex > &read_lock)
 
bool updateQuerySessionStatusWithLock (const QuerySessionId &query_session, const std::string &submitted_time_str, const QuerySessionStatus::QueryStatus updated_query_status, heavyai::unique_lock< heavyai::shared_mutex > &write_lock)
 
bool updateQuerySessionExecutorAssignment (const QuerySessionId &query_session, const std::string &submitted_time_str, const size_t executor_id, heavyai::unique_lock< heavyai::shared_mutex > &write_lock)
 
std::vector< QuerySessionStatusgetQuerySessionInfo (const QuerySessionId &query_session, heavyai::shared_lock< heavyai::shared_mutex > &read_lock)
 
heavyai::shared_mutexgetSessionLock ()
 
CurrentQueryStatus attachExecutorToQuerySession (const QuerySessionId &query_session_id, const std::string &query_str, const std::string &query_submitted_time)
 
void checkPendingQueryStatus (const QuerySessionId &query_session)
 
void clearQuerySessionStatus (const QuerySessionId &query_session, const std::string &submitted_time_str)
 
void updateQuerySessionStatus (const QuerySessionId &query_session, const std::string &submitted_time_str, const QuerySessionStatus::QueryStatus new_query_status)
 
void enrollQuerySession (const QuerySessionId &query_session, const std::string &query_str, const std::string &submitted_time_str, const size_t executor_id, const QuerySessionStatus::QueryStatus query_session_status)
 
size_t getNumCurentSessionsEnrolled () const
 
const std::vector< size_t > getExecutorIdsRunningQuery (const QuerySessionId &interrupt_session) const
 
bool checkNonKernelTimeInterrupted () const
 
void registerExtractedQueryPlanDag (const QueryPlanDAG &query_plan_dag)
 
const QueryPlanDAG getLatestQueryPlanDagExtracted () const
 
void addToCardinalityCache (const CardinalityCacheKey &cache_key, const size_t cache_value)
 
CachedCardinality getCachedCardinality (const CardinalityCacheKey &cache_key)
 
heavyai::shared_mutexgetDataRecyclerLock ()
 
QueryPlanDagCachegetQueryPlanDagCache ()
 
ResultSetRecyclerHoldergetResultSetRecyclerHolder ()
 
CgenStategetCgenStatePtr () const
 
PlanStategetPlanStatePtr () const
 
llvm::LLVMContext & getContext ()
 
void update_extension_modules (bool update_runtime_modules_only=false)
 

Static Public Member Functions

static void clearExternalCaches (bool for_update, const TableDescriptor *td, const int current_db_id)
 
template<typename F >
static void registerExtensionFunctions (F register_extension_functions)
 
static std::shared_ptr< ExecutorgetExecutor (const ExecutorId id, const std::string &debug_dir="", const std::string &debug_file="", const SystemParameters &system_parameters=SystemParameters())
 
static void nukeCacheOfExecutors ()
 
static void clearMemory (const Data_Namespace::MemoryLevel memory_level)
 
static size_t getArenaBlockSize ()
 
static void addUdfIrToModule (const std::string &udf_ir_filename, const bool is_cuda_ir)
 
static void initialize_extension_module_sources ()
 
static void registerActiveModule (void *module, const int device_id)
 
static void unregisterActiveModule (const int device_id)
 
static std::pair< int64_t,
int32_t > 
reduceResults (const SQLAgg agg, const SQLTypeInfo &ti, const int64_t agg_init_val, const int8_t out_byte_width, const int64_t *out_vec, const size_t out_vec_sz, const bool is_group_by, const bool float_argument_input)
 
static void clearCardinalityCache ()
 
static void invalidateCardinalityCacheForTable (const shared::TableKey &table_key)
 
static void update_after_registration (bool update_runtime_modules_only=false)
 
static void init_resource_mgr (const size_t num_cpu_slots, const size_t num_gpu_slots, const size_t cpu_result_mem, const size_t cpu_buffer_pool_mem, const size_t gpu_buffer_pool_mem, const double per_query_max_cpu_slots_ratio, const double per_query_max_cpu_result_mem_ratio, const bool allow_cpu_kernel_concurrency, const bool allow_cpu_gpu_kernel_concurrency, const bool allow_cpu_slot_oversubscription_concurrency, const bool allow_cpu_result_mem_oversubscription, const double max_available_resource_use_ratio)
 
static void pause_executor_queue ()
 
static void resume_executor_queue ()
 
static size_t get_executor_resource_pool_total_resource_quantity (const ExecutorResourceMgr_Namespace::ResourceType resource_type)
 
static
ExecutorResourceMgr_Namespace::ResourcePoolInfo 
get_executor_resource_pool_info ()
 
static void set_executor_resource_pool_resource (const ExecutorResourceMgr_Namespace::ResourceType resource_type, const size_t resource_quantity)
 
static size_t getBaselineThreshold (bool for_count_distinct, ExecutorDeviceType device_type)
 
static const
ExecutorResourceMgr_Namespace::ConcurrentResourceGrantPolicy 
get_concurrent_resource_grant_policy (const ExecutorResourceMgr_Namespace::ResourceType resource_type)
 
static void set_concurrent_resource_grant_policy (const ExecutorResourceMgr_Namespace::ConcurrentResourceGrantPolicy &concurrent_resource_grant_policy)
 

Public Attributes

std::mutex compilation_mutex_
 

Static Public Attributes

static constexpr ExecutorId UNITARY_EXECUTOR_ID = 0
 
static constexpr ExecutorId INVALID_EXECUTOR_ID = SIZE_MAX
 
static std::map
< ExtModuleKinds, std::string > 
extension_module_sources
 
static std::mutex register_runtime_extension_functions_mutex_
 
static std::mutex kernel_mutex_
 
static const size_t auto_cpu_mem_bytes {size_t(0)}
 
static std::shared_ptr
< ExecutorResourceMgr_Namespace::ExecutorResourceMgr
executor_resource_mgr_ = nullptr
 

Private Types

using PerFragmentCallBack = std::function< void(ResultSetPtr, const Fragmenter_Namespace::FragmentInfo &)>
 

Private Member Functions

void clearMetaInfoCache ()
 
int deviceCountForMemoryLevel (const Data_Namespace::MemoryLevel memory_level) const
 
llvm::Value * codegenWindowFunction (const size_t target_index, const CompilationOptions &co)
 
llvm::Value * codegenConditionalAggregateCondValSelector (llvm::Value *cond_lv, SQLAgg const aggKind, CompilationOptions const &co) const
 
llvm::Value * codegenWindowFunctionAggregate (CodeGenerator *code_generator, const CompilationOptions &co)
 
std::pair< llvm::BasicBlock
*, llvm::Value * > 
codegenWindowResetStateControlFlow (CodeGenerator *code_generator, const CompilationOptions &co)
 
void codegenWindowFunctionStateInit (CodeGenerator *code_generator, const CompilationOptions &co, llvm::Value *aggregate_state)
 
llvm::Value * codegenWindowFunctionAggregateCalls (llvm::Value *aggregate_state, const CompilationOptions &co)
 
llvm::Value * codegenWindowNavigationFunctionOnFrame (const CompilationOptions &co)
 
llvm::Value * codegenCurrentPartitionIndex (const WindowFunctionContext *window_func_context, CodeGenerator *code_generator, const CompilationOptions &co, llvm::Value *current_row_pos_lv)
 
llvm::Value * codegenFrameBoundExpr (const Analyzer::WindowFunction *window_func, const Analyzer::WindowFrame *frame_bound, CodeGenerator &code_generator, const CompilationOptions &co)
 
llvm::Value * codegenFrameBound (bool for_start_bound, bool for_range_mode, bool for_window_frame_naviation, const Analyzer::WindowFrame *frame_bound, bool is_timestamp_type_frame, llvm::Value *order_key_null_val, const WindowFrameBoundFuncArgs &args)
 
std::pair< std::string,
llvm::Value * > 
codegenLoadOrderKeyBufPtr (WindowFunctionContext *window_func_context, CodeGenerator *code_generator, const CompilationOptions &co) const
 
std::pair< llvm::Value
*, llvm::Value * > 
codegenFrameNullRange (WindowFunctionContext *window_func_context, CodeGenerator *code_generator, const CompilationOptions &co, llvm::Value *partition_index_lv) const
 
WindowPartitionBufferPtrs codegenLoadPartitionBuffers (WindowFunctionContext *window_func_context, CodeGenerator *code_generator, const CompilationOptions &co, llvm::Value *partition_index_lv) const
 
std::pair< llvm::Value
*, llvm::Value * > 
codegenWindowFrameBounds (WindowFunctionContext *window_func_context, const Analyzer::WindowFrame *frame_start_bound, const Analyzer::WindowFrame *frame_end_bound, llvm::Value *order_key_col_null_val_lv, WindowFrameBoundFuncArgs &args, CodeGenerator &code_generator)
 
std::pair< llvm::Value
*, llvm::Value * > 
codegenFrameBoundRange (const Analyzer::WindowFunction *window_func, CodeGenerator &code_generator, const CompilationOptions &co)
 
std::vector< llvm::Value * > prepareRowModeFuncArgs (bool for_start_bound, SqlWindowFrameBoundType bound_type, const WindowFrameBoundFuncArgs &args) const
 
std::vector< llvm::Value * > prepareRangeModeFuncArgs (bool for_start_bound, const Analyzer::WindowFrame *frame_bound, bool is_timestamp_type_frame, llvm::Value *order_key_null_val, const WindowFrameBoundFuncArgs &frame_args) const
 
const std::string getOrderKeyTypeName (WindowFunctionContext *window_func_context) const
 
llvm::Value * codegenLoadCurrentValueFromColBuf (WindowFunctionContext *window_func_context, CodeGenerator &code_generator, WindowFrameBoundFuncArgs &args) const
 
size_t getOrderKeySize (WindowFunctionContext *window_func_context) const
 
const SQLTypeInfo getFirstOrderColTypeInfo (WindowFunctionContext *window_func_context) const
 
std::string getFramingFuncName (const std::string &bound_type, const std::string &order_col_type, const std::string &op_type, bool for_timestamp_type) const
 
void codegenWindowAvgEpilogue (CodeGenerator *code_generator, const CompilationOptions &co, llvm::Value *crt_val, llvm::Value *window_func_null_val)
 
llvm::Value * codegenAggregateWindowState (CodeGenerator *code_generator, const CompilationOptions &co, llvm::Value *aggregate_state)
 
llvm::Value * aggregateWindowStatePtr (CodeGenerator *code_generator, const CompilationOptions &co)
 
CudaMgr_Namespace::CudaMgrcudaMgr () const
 
bool isArchPascalOrLater (const ExecutorDeviceType dt) const
 
bool needFetchAllFragments (const InputColDescriptor &col_desc, const RelAlgExecutionUnit &ra_exe_unit, const FragmentsList &selected_fragments) const
 
bool needLinearizeAllFragments (const ColumnDescriptor *cd, const InputColDescriptor &inner_col_desc, const RelAlgExecutionUnit &ra_exe_unit, const FragmentsList &selected_fragments, const Data_Namespace::MemoryLevel memory_level) const
 
void executeWorkUnitPerFragment (const RelAlgExecutionUnit &ra_exe_unit, const InputTableInfo &table_info, const CompilationOptions &co, const ExecutionOptions &eo, const Catalog_Namespace::Catalog &cat, PerFragmentCallBack &cb, const std::set< size_t > &fragment_indexes_param)
 Compiles and dispatches a work unit per fragment processing results with the per fragment callback. Currently used for computing metrics over fragments (metadata). More...
 
ResultSetPtr executeExplain (const QueryCompilationDescriptor &)
 
ResultSetPtr executeTableFunction (const TableFunctionExecutionUnit exe_unit, const std::vector< InputTableInfo > &table_infos, const CompilationOptions &co, const ExecutionOptions &eo)
 Compiles and dispatches a table function; that is, a function that takes as input one or more columns and returns a ResultSet, which can be parsed by subsequent execution steps. More...
 
ExecutorDeviceType getDeviceTypeForTargets (const RelAlgExecutionUnit &ra_exe_unit, const ExecutorDeviceType requested_device_type)
 
ResultSetPtr collectAllDeviceResults (SharedKernelContext &shared_context, const RelAlgExecutionUnit &ra_exe_unit, const QueryMemoryDescriptor &query_mem_desc, const ExecutorDeviceType device_type, std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner)
 
ResultSetPtr collectAllDeviceShardedTopResults (SharedKernelContext &shared_context, const RelAlgExecutionUnit &ra_exe_unit, const ExecutorDeviceType device_type) const
 
std::unordered_map
< shared::TableKey, const
Analyzer::BinOper * > 
getInnerTabIdToJoinCond () const
 
std::vector< std::unique_ptr
< ExecutionKernel > > 
createKernels (SharedKernelContext &shared_context, const RelAlgExecutionUnit &ra_exe_unit, ColumnFetcher &column_fetcher, const std::vector< InputTableInfo > &table_infos, const ExecutionOptions &eo, const bool is_agg, const bool allow_single_frag_table_opt, const size_t context_count, const QueryCompilationDescriptor &query_comp_desc, const QueryMemoryDescriptor &query_mem_desc, RenderInfo *render_info, std::unordered_set< int > &available_gpus, int &available_cpus)
 
void launchKernelsImpl (SharedKernelContext &shared_context, std::vector< std::unique_ptr< ExecutionKernel >> &&kernels, const ExecutorDeviceType device_type, const size_t requested_num_threads)
 
void launchKernelsLocked (SharedKernelContext &shared_context, std::vector< std::unique_ptr< ExecutionKernel >> &&kernels, const ExecutorDeviceType device_type)
 
void launchKernelsViaResourceMgr (SharedKernelContext &shared_context, std::vector< std::unique_ptr< ExecutionKernel >> &&kernels, const ExecutorDeviceType device_type, const std::vector< InputDescriptor > &input_descs, const QueryMemoryDescriptor &query_mem_desc)
 Launches a vector of kernels for a given query step, gated/scheduled by ExecutorResourceMgr. More...
 
std::vector< size_t > getTableFragmentIndices (const RelAlgExecutionUnit &ra_exe_unit, const ExecutorDeviceType device_type, const size_t table_idx, const size_t outer_frag_idx, std::map< shared::TableKey, const TableFragments * > &selected_tables_fragments, const std::unordered_map< shared::TableKey, const Analyzer::BinOper * > &inner_table_id_to_join_condition)
 
bool skipFragmentPair (const Fragmenter_Namespace::FragmentInfo &outer_fragment_info, const Fragmenter_Namespace::FragmentInfo &inner_fragment_info, const int inner_table_id, const std::unordered_map< shared::TableKey, const Analyzer::BinOper * > &inner_table_id_to_join_condition, const RelAlgExecutionUnit &ra_exe_unit, const ExecutorDeviceType device_type)
 
FetchResult fetchChunks (const ColumnFetcher &, const RelAlgExecutionUnit &ra_exe_unit, const int device_id, const Data_Namespace::MemoryLevel, const std::map< shared::TableKey, const TableFragments * > &, const FragmentsList &selected_fragments, std::list< ChunkIter > &, std::list< std::shared_ptr< Chunk_NS::Chunk >> &, DeviceAllocator *device_allocator, const size_t thread_idx, const bool allow_runtime_interrupt)
 
FetchResult fetchUnionChunks (const ColumnFetcher &, const RelAlgExecutionUnit &ra_exe_unit, const int device_id, const Data_Namespace::MemoryLevel, const std::map< shared::TableKey, const TableFragments * > &, const FragmentsList &selected_fragments, std::list< ChunkIter > &, std::list< std::shared_ptr< Chunk_NS::Chunk >> &, DeviceAllocator *device_allocator, const size_t thread_idx, const bool allow_runtime_interrupt)
 
std::pair< std::vector
< std::vector< int64_t >
>, std::vector< std::vector
< uint64_t > > > 
getRowCountAndOffsetForAllFrags (const RelAlgExecutionUnit &ra_exe_unit, const CartesianProduct< std::vector< std::vector< size_t >>> &frag_ids_crossjoin, const std::vector< InputDescriptor > &input_descs, const std::map< shared::TableKey, const TableFragments * > &all_tables_fragments)
 
void buildSelectedFragsMapping (std::vector< std::vector< size_t >> &selected_fragments_crossjoin, std::vector< size_t > &local_col_to_frag_pos, const std::list< std::shared_ptr< const InputColDescriptor >> &col_global_ids, const FragmentsList &selected_fragments, const RelAlgExecutionUnit &ra_exe_unit)
 
void buildSelectedFragsMappingForUnion (std::vector< std::vector< size_t >> &selected_fragments_crossjoin, const FragmentsList &selected_fragments, const RelAlgExecutionUnit &ra_exe_unit)
 
std::vector< size_t > getFragmentCount (const FragmentsList &selected_fragments, const size_t scan_idx, const RelAlgExecutionUnit &ra_exe_unit)
 
int32_t executePlanWithGroupBy (const RelAlgExecutionUnit &ra_exe_unit, const CompilationResult &, const bool hoist_literals, ResultSetPtr *results, const ExecutorDeviceType device_type, std::vector< std::vector< const int8_t * >> &col_buffers, const std::vector< size_t > outer_tab_frag_ids, QueryExecutionContext *, const std::vector< std::vector< int64_t >> &num_rows, const std::vector< std::vector< uint64_t >> &frag_offsets, Data_Namespace::DataMgr *, const int device_id, const shared::TableKey &outer_table_key, const int64_t limit, const uint32_t start_rowid, const uint32_t num_tables, const bool allow_runtime_interrupt, RenderInfo *render_info, const bool optimize_cuda_block_and_grid_sizes, const int64_t rows_to_process=-1)
 
int32_t executePlanWithoutGroupBy (const RelAlgExecutionUnit &ra_exe_unit, const CompilationResult &, const bool hoist_literals, ResultSetPtr *results, const std::vector< Analyzer::Expr * > &target_exprs, const ExecutorDeviceType device_type, std::vector< std::vector< const int8_t * >> &col_buffers, QueryExecutionContext *query_exe_context, const std::vector< std::vector< int64_t >> &num_rows, const std::vector< std::vector< uint64_t >> &frag_offsets, Data_Namespace::DataMgr *data_mgr, const int device_id, const uint32_t start_rowid, const uint32_t num_tables, const bool allow_runtime_interrupt, RenderInfo *render_info, const bool optimize_cuda_block_and_grid_sizes, const int64_t rows_to_process=-1)
 
ResultSetPtr resultsUnion (SharedKernelContext &shared_context, const RelAlgExecutionUnit &ra_exe_unit)
 
std::vector< int8_t * > getJoinHashTablePtrs (const ExecutorDeviceType device_type, const int device_id)
 
ResultSetPtr reduceMultiDeviceResults (const RelAlgExecutionUnit &, std::vector< std::pair< ResultSetPtr, std::vector< size_t >>> &all_fragment_results, std::shared_ptr< RowSetMemoryOwner >, const QueryMemoryDescriptor &) const
 
std::vector< std::pair
< ResultSetPtr, std::vector
< size_t > > > 
getUniqueThreadSharedResultSets (const std::vector< std::pair< ResultSetPtr, std::vector< size_t >>> &results_per_device) const
 
ResultSetPtr reduceMultiDeviceResultSets (std::vector< std::pair< ResultSetPtr, std::vector< size_t >>> &all_fragment_results, std::shared_ptr< RowSetMemoryOwner >, const QueryMemoryDescriptor &) const
 
ResultSetPtr reduceSpeculativeTopN (const RelAlgExecutionUnit &, std::vector< std::pair< ResultSetPtr, std::vector< size_t >>> &all_fragment_results, std::shared_ptr< RowSetMemoryOwner >, const QueryMemoryDescriptor &) const
 
ResultSetPtr executeWorkUnitImpl (size_t &max_groups_buffer_entry_guess, const bool is_agg, const bool allow_single_frag_table_opt, const std::vector< InputTableInfo > &, const RelAlgExecutionUnit &, const CompilationOptions &, const ExecutionOptions &options, std::shared_ptr< RowSetMemoryOwner >, RenderInfo *render_info, const bool has_cardinality_estimation, ColumnCacheMap &column_cache)
 
std::vector< llvm::Value * > inlineHoistedLiterals ()
 
void AutoTrackBuffersInRuntimeIR ()
 
std::tuple< CompilationResult,
std::unique_ptr
< QueryMemoryDescriptor > > 
compileWorkUnit (const std::vector< InputTableInfo > &query_infos, const PlanState::DeletedColumnsMap &deleted_cols_map, const RelAlgExecutionUnit &ra_exe_unit, const CompilationOptions &co, const ExecutionOptions &eo, const CudaMgr_Namespace::CudaMgr *cuda_mgr, const bool allow_lazy_fetch, std::shared_ptr< RowSetMemoryOwner >, const size_t max_groups_buffer_entry_count, const int8_t crt_min_byte_width, const bool has_cardinality_estimation, ColumnCacheMap &column_cache, RenderInfo *render_info=nullptr)
 
llvm::BasicBlock * codegenSkipDeletedOuterTableRow (const RelAlgExecutionUnit &ra_exe_unit, const CompilationOptions &co)
 
std::vector< JoinLoopbuildJoinLoops (RelAlgExecutionUnit &ra_exe_unit, const CompilationOptions &co, const ExecutionOptions &eo, const std::vector< InputTableInfo > &query_infos, ColumnCacheMap &column_cache)
 
JoinLoop::HoistedFiltersCallback buildHoistLeftHandSideFiltersCb (const RelAlgExecutionUnit &ra_exe_unit, const size_t level_idx, const shared::TableKey &inner_table_key, const CompilationOptions &co)
 
std::function< llvm::Value
*(const std::vector
< llvm::Value * >
&, llvm::Value *)> 
buildIsDeletedCb (const RelAlgExecutionUnit &ra_exe_unit, const size_t level_idx, const CompilationOptions &co)
 
std::shared_ptr< HashJoinbuildCurrentLevelHashTable (const JoinCondition &current_level_join_conditions, size_t level_idx, RelAlgExecutionUnit &ra_exe_unit, const CompilationOptions &co, const std::vector< InputTableInfo > &query_infos, ColumnCacheMap &column_cache, std::vector< std::string > &fail_reasons)
 
void redeclareFilterFunction ()
 
llvm::Value * addJoinLoopIterator (const std::vector< llvm::Value * > &prev_iters, const size_t level_idx)
 
void codegenJoinLoops (const std::vector< JoinLoop > &join_loops, const RelAlgExecutionUnit &ra_exe_unit, GroupByAndAggregate &group_by_and_aggregate, llvm::Function *query_func, llvm::BasicBlock *entry_bb, QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const ExecutionOptions &eo)
 
bool compileBody (const RelAlgExecutionUnit &ra_exe_unit, GroupByAndAggregate &group_by_and_aggregate, QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const GpuSharedMemoryContext &gpu_smem_context={})
 
void createErrorCheckControlFlow (llvm::Function *query_func, bool run_with_dynamic_watchdog, bool run_with_allowing_runtime_interrupt, const std::vector< JoinLoop > &join_loops, ExecutorDeviceType device_type, const std::vector< InputTableInfo > &input_table_infos)
 
void insertErrorCodeChecker (llvm::Function *query_func, unsigned const error_code_idx, bool hoist_literals, bool allow_runtime_query_interrupt)
 
void preloadFragOffsets (const std::vector< InputDescriptor > &input_descs, const std::vector< InputTableInfo > &query_infos)
 
JoinHashTableOrError buildHashTableForQualifier (const std::shared_ptr< Analyzer::BinOper > &qual_bin_oper, const std::vector< InputTableInfo > &query_infos, const MemoryLevel memory_level, const JoinType join_type, const HashType preferred_hash_type, ColumnCacheMap &column_cache, const HashTableBuildDagMap &hashtable_build_dag_map, const RegisteredQueryHint &query_hint, const TableIdToNodeMap &table_id_to_node_map)
 
void nukeOldState (const bool allow_lazy_fetch, const std::vector< InputTableInfo > &query_infos, const PlanState::DeletedColumnsMap &deleted_cols_map, const RelAlgExecutionUnit *ra_exe_unit)
 
std::shared_ptr
< CompilationContext
optimizeAndCodegenCPU (llvm::Function *, llvm::Function *, const std::unordered_set< llvm::Function * > &, const CompilationOptions &)
 
std::shared_ptr
< CompilationContext
optimizeAndCodegenGPU (llvm::Function *, llvm::Function *, std::unordered_set< llvm::Function * > &, const bool no_inline, const CudaMgr_Namespace::CudaMgr *cuda_mgr, const bool is_gpu_smem_used, const CompilationOptions &)
 
std::string generatePTX (const std::string &) const
 
void initializeNVPTXBackend () const
 
int64_t deviceCycles (int milliseconds) const
 
GroupColLLVMValue groupByColumnCodegen (Analyzer::Expr *group_by_col, const size_t col_width, const CompilationOptions &, const bool translate_null_val, const int64_t translated_null_val, DiamondCodegen &, std::stack< llvm::BasicBlock * > &, const bool thread_mem_shared)
 
llvm::Value * castToFP (llvm::Value *, SQLTypeInfo const &from_ti, SQLTypeInfo const &to_ti)
 
llvm::Value * castToIntPtrTyIn (llvm::Value *val, const size_t bit_width)
 
std::tuple
< RelAlgExecutionUnit,
PlanState::DeletedColumnsMap
addDeletedColumn (const RelAlgExecutionUnit &ra_exe_unit, const CompilationOptions &co)
 
bool isFragmentFullyDeleted (const InputDescriptor &table_desc, const Fragmenter_Namespace::FragmentInfo &fragment)
 
FragmentSkipStatus canSkipFragmentForFpQual (const Analyzer::BinOper *comp_expr, const Analyzer::ColumnVar *lhs_col, const Fragmenter_Namespace::FragmentInfo &fragment, const Analyzer::Constant *rhs_const) const
 
std::pair< bool, int64_t > skipFragment (const InputDescriptor &table_desc, const Fragmenter_Namespace::FragmentInfo &frag_info, const std::list< std::shared_ptr< Analyzer::Expr >> &simple_quals, const std::vector< uint64_t > &frag_offsets, const size_t frag_idx)
 
std::pair< bool, int64_t > skipFragmentInnerJoins (const InputDescriptor &table_desc, const RelAlgExecutionUnit &ra_exe_unit, const Fragmenter_Namespace::FragmentInfo &fragment, const std::vector< uint64_t > &frag_offsets, const size_t frag_idx)
 
AggregatedColRange computeColRangesCache (const std::unordered_set< PhysicalInput > &phys_inputs)
 
StringDictionaryGenerations computeStringDictionaryGenerations (const std::unordered_set< PhysicalInput > &phys_inputs)
 
TableGenerations computeTableGenerations (const std::unordered_set< shared::TableKey > &phys_table_keys)
 
std::vector< int8_t > serializeLiterals (const std::unordered_map< int, CgenState::LiteralValues > &literals, const int device_id)
 
const std::unique_ptr
< llvm::Module > & 
get_extension_module (ExtModuleKinds kind) const
 
bool has_extension_module (ExtModuleKinds kind) const
 
llvm::Value * spillDoubleElement (llvm::Value *elem_val, llvm::Type *elem_ty)
 
ExecutorMutexHolder acquireExecuteMutex ()
 

Static Private Member Functions

static size_t align (const size_t off_in, const size_t alignment)
 

Private Attributes

const ExecutorId executor_id_
 
std::unique_ptr
< llvm::LLVMContext > 
context_
 
std::unique_ptr< CgenStatecgen_state_
 
std::map< ExtModuleKinds,
std::unique_ptr< llvm::Module > > 
extension_modules_
 
std::unique_ptr< PlanStateplan_state_
 
std::shared_ptr
< RowSetMemoryOwner
row_set_mem_owner_
 
std::mutex gpu_exec_mutex_ [max_gpu_count]
 
std::atomic< bool > interrupted_ {false}
 
std::mutex str_dict_mutex_
 
std::unique_ptr
< llvm::TargetMachine > 
nvptx_target_machine_
 
unsigned block_size_x_
 
unsigned grid_size_x_
 
const size_t max_gpu_slab_size_
 
const std::string debug_dir_
 
const std::string debug_file_
 
Data_Namespace::DataMgrdata_mgr_
 
const TemporaryTablestemporary_tables_
 
TableIdToNodeMap table_id_to_node_map_
 
int64_t kernel_queue_time_ms_ = 0
 
int64_t compilation_queue_time_ms_ = 0
 
std::unique_ptr
< WindowProjectNodeContext
window_project_node_context_owned_
 
WindowFunctionContextactive_window_function_ {nullptr}
 
InputTableInfoCache input_table_info_cache_
 
AggregatedColRange agg_col_range_cache_
 
TableGenerations table_generations_
 
QuerySessionId current_query_session_
 

Static Private Attributes

static const int max_gpu_count
 
static const size_t auto_num_threads {size_t(0)}
 
static std::mutex gpu_active_modules_mutex_
 
static uint32_t gpu_active_modules_device_mask_ {0x0}
 
static void * gpu_active_modules_ [max_gpu_count]
 
static const size_t baseline_threshold
 
static heavyai::shared_mutex executor_session_mutex_
 
static InterruptFlagMap queries_interrupt_flag_
 
static QuerySessionMap queries_session_map_
 
static std::map< int,
std::shared_ptr< Executor > > 
executors_
 
static heavyai::shared_mutex execute_mutex_
 
static heavyai::shared_mutex executors_cache_mutex_
 
static QueryPlanDagCache query_plan_dag_cache_
 
static heavyai::shared_mutex recycler_mutex_
 
static std::unordered_map
< CardinalityCacheKey, size_t > 
cardinality_cache_
 
static ResultSetRecyclerHolder resultset_recycler_holder_
 
static QueryPlanDAG latest_query_plan_extracted_ {EMPTY_QUERY_PLAN}
 

Friends

class BaselineJoinHashTable
 
class CodeGenerator
 
class ColumnFetcher
 
struct DiamondCodegen
 
class ExecutionKernel
 
class KernelSubtask
 
class HashJoin
 
class BoundingBoxIntersectJoinHashTable
 
class RangeJoinHashTable
 
class GroupByAndAggregate
 
class QueryCompilationDescriptor
 
class QueryMemoryDescriptor
 
class QueryMemoryInitializer
 
class QueryFragmentDescriptor
 
class QueryExecutionContext
 
class ResultSet
 
class InValuesBitmap
 
class StringDictionaryTranslationMgr
 
class LeafAggregator
 
class PerfectJoinHashTable
 
class QueryRewriter
 
class PendingExecutionClosure
 
class RelAlgExecutor
 
class TableOptimizer
 
class TableFunctionCompilationContext
 
class TableFunctionExecutionContext
 
struct TargetExprCodegenBuilder
 
struct TargetExprCodegen
 
class WindowProjectNodeContext
 

Detailed Description

Definition at line 415 of file Execute.h.

Member Typedef Documentation

using Executor::CachedCardinality = std::pair<bool, size_t>

Definition at line 1403 of file Execute.h.

using Executor::ExecutorId = size_t

Definition at line 422 of file Execute.h.

Definition at line 890 of file Execute.h.

Member Enumeration Documentation

Enumerator
template_module 
udf_cpu_module 
udf_gpu_module 
rt_udf_cpu_module 
rt_udf_gpu_module 
rt_geos_module 
rt_libdevice_module 

Definition at line 518 of file Execute.h.

518  {
519  template_module, // RuntimeFunctions.bc
520  udf_cpu_module, // Load-time UDFs for CPU execution
521  udf_gpu_module, // Load-time UDFs for GPU execution
522  rt_udf_cpu_module, // Run-time UDF/UDTFs for CPU execution
523  rt_udf_gpu_module, // Run-time UDF/UDTFs for GPU execution
524  rt_geos_module, // geos functions
525  rt_libdevice_module // math library functions for GPU execution
526  };
std::unique_ptr< llvm::Module > udf_gpu_module
std::unique_ptr< llvm::Module > udf_cpu_module

Constructor & Destructor Documentation

Executor::Executor ( const ExecutorId  id,
Data_Namespace::DataMgr data_mgr,
const size_t  block_size_x,
const size_t  grid_size_x,
const size_t  max_gpu_slab_size,
const std::string &  debug_dir,
const std::string &  debug_file 
)

Definition at line 276 of file Execute.cpp.

283  : executor_id_(executor_id)
284  , context_(new llvm::LLVMContext())
285  , cgen_state_(new CgenState({}, false, this))
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1502
const ExecutorId executor_id_
Definition: Execute.h:1476
std::unique_ptr< llvm::LLVMContext > context_
Definition: Execute.h:1477

Member Function Documentation

ExecutorMutexHolder Executor::acquireExecuteMutex ( )
inlineprivate

Definition at line 1591 of file Execute.h.

References execute_mutex_, executor_id_, Executor::ExecutorMutexHolder::shared_lock, Executor::ExecutorMutexHolder::unique_lock, and UNITARY_EXECUTOR_ID.

1591  {
1592  ExecutorMutexHolder ret;
1594  // Only one unitary executor can run at a time
1596  } else {
1598  }
1599  return ret;
1600  }
static heavyai::shared_mutex execute_mutex_
Definition: Execute.h:1585
std::shared_lock< T > shared_lock
const ExecutorId executor_id_
Definition: Execute.h:1476
std::unique_lock< T > unique_lock
static constexpr ExecutorId UNITARY_EXECUTOR_ID
Definition: Execute.h:423
std::tuple< RelAlgExecutionUnit, PlanState::DeletedColumnsMap > Executor::addDeletedColumn ( const RelAlgExecutionUnit ra_exe_unit,
const CompilationOptions co 
)
private

Definition at line 4475 of file Execute.cpp.

References anonymous_namespace{Execute.cpp}::add_deleted_col_to_map(), CHECK, CompilationOptions::filter_on_deleted_column, Catalog_Namespace::SysCatalog::getCatalog(), Catalog_Namespace::SysCatalog::instance(), and TABLE.

Referenced by executeWorkUnitImpl(), and executeWorkUnitPerFragment().

4477  {
4478  if (!co.filter_on_deleted_column) {
4479  return std::make_tuple(ra_exe_unit, PlanState::DeletedColumnsMap{});
4480  }
4481  auto ra_exe_unit_with_deleted = ra_exe_unit;
4482  PlanState::DeletedColumnsMap deleted_cols_map;
4483  for (const auto& input_table : ra_exe_unit_with_deleted.input_descs) {
4484  if (input_table.getSourceType() != InputSourceType::TABLE) {
4485  continue;
4486  }
4487  const auto& table_key = input_table.getTableKey();
4488  const auto catalog =
4490  CHECK(catalog);
4491  const auto td = catalog->getMetadataForTable(table_key.table_id);
4492  CHECK(td);
4493  const auto deleted_cd = catalog->getDeletedColumnIfRowsDeleted(td);
4494  if (!deleted_cd) {
4495  continue;
4496  }
4497  CHECK(deleted_cd->columnType.is_boolean());
4498  // check deleted column is not already present
4499  bool found = false;
4500  for (const auto& input_col : ra_exe_unit_with_deleted.input_col_descs) {
4501  if (input_col.get()->getColId() == deleted_cd->columnId &&
4502  input_col.get()->getScanDesc().getTableKey() == table_key &&
4503  input_col.get()->getScanDesc().getNestLevel() == input_table.getNestLevel()) {
4504  found = true;
4505  add_deleted_col_to_map(deleted_cols_map, deleted_cd, table_key);
4506  break;
4507  }
4508  }
4509  if (!found) {
4510  // add deleted column
4511  ra_exe_unit_with_deleted.input_col_descs.emplace_back(
4512  new InputColDescriptor(deleted_cd->columnId,
4513  deleted_cd->tableId,
4514  table_key.db_id,
4515  input_table.getNestLevel()));
4516  add_deleted_col_to_map(deleted_cols_map, deleted_cd, table_key);
4517  }
4518  }
4519  return std::make_tuple(ra_exe_unit_with_deleted, deleted_cols_map);
4520 }
std::unordered_map< shared::TableKey, const ColumnDescriptor * > DeletedColumnsMap
Definition: PlanState.h:44
static SysCatalog & instance()
Definition: SysCatalog.h:343
void add_deleted_col_to_map(PlanState::DeletedColumnsMap &deleted_cols_map, const ColumnDescriptor *deleted_cd, const shared::TableKey &table_key)
Definition: Execute.cpp:4463
std::shared_ptr< Catalog > getCatalog(const std::string &dbName)
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

llvm::Value * Executor::addJoinLoopIterator ( const std::vector< llvm::Value * > &  prev_iters,
const size_t  level_idx 
)
private

Definition at line 1186 of file IRCodegen.cpp.

References AUTOMATIC_IR_METADATA, and CHECK.

1187  {
1189  // Iterators are added for loop-outer joins when the head of the loop is generated,
1190  // then once again when the body if generated. Allow this instead of special handling
1191  // of call sites.
1192  const auto it = cgen_state_->scan_idx_to_hash_pos_.find(level_idx);
1193  if (it != cgen_state_->scan_idx_to_hash_pos_.end()) {
1194  return it->second;
1195  }
1196  CHECK(!prev_iters.empty());
1197  llvm::Value* matching_row_index = prev_iters.back();
1198  const auto it_ok =
1199  cgen_state_->scan_idx_to_hash_pos_.emplace(level_idx, matching_row_index);
1200  CHECK(it_ok.second);
1201  return matching_row_index;
1202 }
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1502
#define AUTOMATIC_IR_METADATA(CGENSTATE)
#define CHECK(condition)
Definition: Logger.h:291
void Executor::addToCardinalityCache ( const CardinalityCacheKey cache_key,
const size_t  cache_value 
)

Definition at line 5289 of file Execute.cpp.

References cardinality_cache_, g_use_estimator_result_cache, recycler_mutex_, and VLOG.

5290  {
5293  cardinality_cache_[cache_key] = cache_value;
5294  VLOG(1) << "Put estimated cardinality to the cache";
5295  }
5296 }
std::unique_lock< T > unique_lock
static std::unordered_map< CardinalityCacheKey, size_t > cardinality_cache_
Definition: Execute.h:1607
static heavyai::shared_mutex recycler_mutex_
Definition: Execute.h:1605
bool g_use_estimator_result_cache
Definition: Execute.cpp:139
#define VLOG(n)
Definition: Logger.h:388
bool Executor::addToQuerySessionList ( const QuerySessionId query_session,
const std::string &  query_str,
const std::string &  submitted,
const size_t  executor_id,
const QuerySessionStatus::QueryStatus  query_status,
heavyai::unique_lock< heavyai::shared_mutex > &  write_lock 
)

Definition at line 5120 of file Execute.cpp.

References queries_interrupt_flag_, and queries_session_map_.

Referenced by enrollQuerySession().

5126  {
5127  // an internal API that enrolls the query session into the Executor's session map
5128  if (queries_session_map_.count(query_session)) {
5129  if (queries_session_map_.at(query_session).count(submitted_time_str)) {
5130  queries_session_map_.at(query_session).erase(submitted_time_str);
5131  queries_session_map_.at(query_session)
5132  .emplace(submitted_time_str,
5133  QuerySessionStatus(query_session,
5134  executor_id,
5135  query_str,
5136  submitted_time_str,
5137  query_status));
5138  } else {
5139  queries_session_map_.at(query_session)
5140  .emplace(submitted_time_str,
5141  QuerySessionStatus(query_session,
5142  executor_id,
5143  query_str,
5144  submitted_time_str,
5145  query_status));
5146  }
5147  } else {
5148  std::map<std::string, QuerySessionStatus> executor_per_query_map;
5149  executor_per_query_map.emplace(
5150  submitted_time_str,
5152  query_session, executor_id, query_str, submitted_time_str, query_status));
5153  queries_session_map_.emplace(query_session, executor_per_query_map);
5154  }
5155  return queries_interrupt_flag_.emplace(query_session, false).second;
5156 }
static QuerySessionMap queries_session_map_
Definition: Execute.h:1580
static InterruptFlagMap queries_interrupt_flag_
Definition: Execute.h:1578

+ Here is the caller graph for this function:

void Executor::addTransientStringLiterals ( const RelAlgExecutionUnit ra_exe_unit,
const std::shared_ptr< RowSetMemoryOwner > &  row_set_mem_owner 
)

Definition at line 2523 of file Execute.cpp.

References CHECK, getStringDictionaryProxy(), RelAlgExecutionUnit::groupby_exprs, kENCODING_DICT, kMODE, kSAMPLE, kSINGLE_VALUE, RelAlgExecutionUnit::quals, RelAlgExecutionUnit::simple_quals, RelAlgExecutionUnit::target_exprs, RelAlgExecutionUnit::target_exprs_union, and ScalarExprVisitor< T >::visit().

2525  {
2526  TransientDictIdVisitor dict_id_visitor;
2527 
2528  auto visit_expr =
2529  [this, &dict_id_visitor, &row_set_mem_owner](const Analyzer::Expr* expr) {
2530  if (!expr) {
2531  return;
2532  }
2533  const auto& dict_key = dict_id_visitor.visit(expr);
2534  if (dict_key.dict_id >= 0) {
2535  auto sdp = getStringDictionaryProxy(dict_key, row_set_mem_owner, true);
2536  CHECK(sdp);
2537  TransientStringLiteralsVisitor visitor(sdp, this);
2538  visitor.visit(expr);
2539  }
2540  };
2541 
2542  for (const auto& group_expr : ra_exe_unit.groupby_exprs) {
2543  visit_expr(group_expr.get());
2544  }
2545 
2546  for (const auto& group_expr : ra_exe_unit.quals) {
2547  visit_expr(group_expr.get());
2548  }
2549 
2550  for (const auto& group_expr : ra_exe_unit.simple_quals) {
2551  visit_expr(group_expr.get());
2552  }
2553 
2554  const auto visit_target_expr = [&](const Analyzer::Expr* target_expr) {
2555  const auto& target_type = target_expr->get_type_info();
2556  if (!target_type.is_string() || target_type.get_compression() == kENCODING_DICT) {
2557  const auto agg_expr = dynamic_cast<const Analyzer::AggExpr*>(target_expr);
2558  if (agg_expr) {
2559  // The following agg types require taking into account transient string values
2560  if (agg_expr->get_is_distinct() || agg_expr->get_aggtype() == kSINGLE_VALUE ||
2561  agg_expr->get_aggtype() == kSAMPLE || agg_expr->get_aggtype() == kMODE) {
2562  visit_expr(agg_expr->get_arg());
2563  }
2564  } else {
2565  visit_expr(target_expr);
2566  }
2567  }
2568  };
2569  const auto& target_exprs = ra_exe_unit.target_exprs;
2570  std::for_each(target_exprs.begin(), target_exprs.end(), visit_target_expr);
2571  const auto& target_exprs_union = ra_exe_unit.target_exprs_union;
2572  std::for_each(target_exprs_union.begin(), target_exprs_union.end(), visit_target_expr);
2573 }
std::vector< Analyzer::Expr * > target_exprs
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
T visit(const Analyzer::Expr *expr) const
StringDictionaryProxy * getStringDictionaryProxy(const shared::StringDictKey &dict_key, const bool with_generation) const
Definition: Execute.h:578
std::vector< Analyzer::Expr * > target_exprs_union
std::list< std::shared_ptr< Analyzer::Expr > > quals
#define CHECK(condition)
Definition: Logger.h:291
Definition: sqldefs.h:86
std::list< std::shared_ptr< Analyzer::Expr > > simple_quals

+ Here is the call graph for this function:

void Executor::addUdfIrToModule ( const std::string &  udf_ir_filename,
const bool  is_cuda_ir 
)
static

Definition at line 1956 of file NativeCodegen.cpp.

Referenced by DBHandler::initialize().

1957  {
1961  udf_ir_filename;
1962 }
static std::map< ExtModuleKinds, std::string > extension_module_sources
Definition: Execute.h:528

+ Here is the caller graph for this function:

llvm::Value * Executor::aggregateWindowStatePtr ( CodeGenerator code_generator,
const CompilationOptions co 
)
private

Definition at line 242 of file WindowFunctionIR.cpp.

References AUTOMATIC_IR_METADATA, CodegenUtil::createPtrWithHoistedMemoryAddr(), anonymous_namespace{WindowFunctionIR.cpp}::get_adjusted_window_type_info(), get_int_type(), WindowProjectNodeContext::getActiveWindowFunctionContext(), kFLOAT, and WindowFunctionContext::NUM_EXECUTION_DEVICES.

243  {
245  const auto window_func_context =
247  const auto window_func = window_func_context->getWindowFunction();
248  const auto arg_ti = get_adjusted_window_type_info(window_func);
249  llvm::Type* aggregate_state_type =
250  arg_ti.get_type() == kFLOAT
251  ? llvm::PointerType::get(get_int_type(32, cgen_state_->context_), 0)
252  : llvm::PointerType::get(get_int_type(64, cgen_state_->context_), 0);
253  const auto aggregate_state_i64 = cgen_state_->llInt(
254  reinterpret_cast<const int64_t>(window_func_context->aggregateState()));
256  cgen_state_.get(),
257  code_generator,
258  co,
259  aggregate_state_i64,
260  aggregate_state_type,
262  .front();
263 }
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1502
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
static WindowFunctionContext * getActiveWindowFunctionContext(Executor *executor)
#define AUTOMATIC_IR_METADATA(CGENSTATE)
static const int NUM_EXECUTION_DEVICES
std::unique_ptr< llvm::LLVMContext > context_
Definition: Execute.h:1477
std::vector< llvm::Value * > createPtrWithHoistedMemoryAddr(CgenState *cgen_state, CodeGenerator *code_generator, CompilationOptions const &co, llvm::ConstantInt *ptr_int_val, llvm::Type *type, size_t num_devices_to_hoist_literal)
SQLTypeInfo get_adjusted_window_type_info(const Analyzer::WindowFunction *window_func)

+ Here is the call graph for this function:

static size_t Executor::align ( const size_t  off_in,
const size_t  alignment 
)
inlinestaticprivate

Definition at line 1468 of file Execute.h.

Referenced by serializeLiterals().

1468  {
1469  size_t off = off_in;
1470  if (off % alignment != 0) {
1471  off += (alignment - off % alignment);
1472  }
1473  return off;
1474  }

+ Here is the caller graph for this function:

CurrentQueryStatus Executor::attachExecutorToQuerySession ( const QuerySessionId query_session_id,
const std::string &  query_str,
const std::string &  query_submitted_time 
)

Definition at line 5018 of file Execute.cpp.

References executor_id_, executor_session_mutex_, updateQuerySessionExecutorAssignment(), and updateQuerySessionStatusWithLock().

5021  {
5022  if (!query_session_id.empty()) {
5023  // if session is valid, do update 1) the exact executor id and 2) query status
5026  query_session_id, query_submitted_time, executor_id_, write_lock);
5027  updateQuerySessionStatusWithLock(query_session_id,
5028  query_submitted_time,
5029  QuerySessionStatus::QueryStatus::PENDING_EXECUTOR,
5030  write_lock);
5031  }
5032  return {query_session_id, query_str};
5033 }
bool updateQuerySessionStatusWithLock(const QuerySessionId &query_session, const std::string &submitted_time_str, const QuerySessionStatus::QueryStatus updated_query_status, heavyai::unique_lock< heavyai::shared_mutex > &write_lock)
Definition: Execute.cpp:5158
const ExecutorId executor_id_
Definition: Execute.h:1476
bool updateQuerySessionExecutorAssignment(const QuerySessionId &query_session, const std::string &submitted_time_str, const size_t executor_id, heavyai::unique_lock< heavyai::shared_mutex > &write_lock)
Definition: Execute.cpp:5184
std::unique_lock< T > unique_lock
static heavyai::shared_mutex executor_session_mutex_
Definition: Execute.h:1574

+ Here is the call graph for this function:

void Executor::AutoTrackBuffersInRuntimeIR ( )
private

Definition at line 2303 of file NativeCodegen.cpp.

2303  {
2304  llvm::Module* M = cgen_state_->module_;
2305  if (M->getFunction("allocate_varlen_buffer") == nullptr) {
2306  return;
2307  }
2308 
2309  // read metadata
2310  bool should_track = false;
2311  auto* flag = M->getModuleFlag("manage_memory_buffer");
2312  if (auto* cnt = llvm::mdconst::extract_or_null<llvm::ConstantInt>(flag)) {
2313  if (cnt->getZExtValue() == 1) {
2314  should_track = true;
2315  }
2316  }
2317 
2318  if (!should_track) {
2319  // metadata is not present
2320  return;
2321  }
2322 
2323  LOG(INFO) << "Found 'manage_memory_buffer' metadata.";
2324  llvm::SmallVector<llvm::CallInst*, 4> calls_to_analyze;
2325 
2326  for (llvm::Function& F : *M) {
2327  for (llvm::BasicBlock& BB : F) {
2328  for (llvm::Instruction& I : BB) {
2329  if (llvm::CallInst* CI = llvm::dyn_cast<llvm::CallInst>(&I)) {
2330  // Keep track of calls to "allocate_varlen_buffer" for later processing
2331  auto const called_func_name = CodegenUtil::getCalledFunctionName(*CI);
2332  if (called_func_name && *called_func_name == "allocate_varlen_buffer") {
2333  calls_to_analyze.push_back(CI);
2334  }
2335  }
2336  }
2337  }
2338  }
2339 
2340  // for each call to "allocate_varlen_buffer", check if there's a corresponding
2341  // call to "register_buffer_with_executor_rsm". If not, add a call to it
2342  llvm::IRBuilder<> Builder(cgen_state_->context_);
2343  auto i64 = get_int_type(64, cgen_state_->context_);
2344  auto i8p = get_int_ptr_type(8, cgen_state_->context_);
2345  auto void_ = llvm::Type::getVoidTy(cgen_state_->context_);
2346  llvm::FunctionType* fnty = llvm::FunctionType::get(void_, {i64, i8p}, false);
2347  llvm::FunctionCallee register_buffer_fn =
2348  M->getOrInsertFunction("register_buffer_with_executor_rsm", fnty, {});
2349 
2350  int64_t executor_addr = reinterpret_cast<int64_t>(this);
2351  for (llvm::CallInst* CI : calls_to_analyze) {
2352  bool found = false;
2353  // for each user of the function, check if its a callinst
2354  // and if the callinst is calling "register_buffer_with_executor_rsm"
2355  // if no such instruction exist, add one registering the buffer
2356  for (llvm::User* U : CI->users()) {
2357  if (llvm::CallInst* call = llvm::dyn_cast<llvm::CallInst>(U)) {
2358  auto const func_name = CodegenUtil::getCalledFunctionName(*call);
2359  if (func_name && *func_name == "register_buffer_with_executor_rsm") {
2360  found = true;
2361  break;
2362  }
2363  }
2364  }
2365  if (!found) {
2366  Builder.SetInsertPoint(CI->getNextNode());
2367  Builder.CreateCall(register_buffer_fn,
2368  {ll_int(executor_addr, cgen_state_->context_), CI});
2369  }
2370  }
2371 }
std::optional< std::string_view > getCalledFunctionName(llvm::CallInst &call_inst)
#define LOG(tag)
Definition: Logger.h:285
llvm::ConstantInt * ll_int(const T v, llvm::LLVMContext &context)
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1502
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
llvm::Type * get_int_ptr_type(const int width, llvm::LLVMContext &context)
unsigned Executor::blockSize ( ) const

Definition at line 4366 of file Execute.cpp.

References block_size_x_, CHECK, data_mgr_, CudaMgr_Namespace::CudaMgr::getAllDeviceProperties(), and Data_Namespace::DataMgr::getCudaMgr().

Referenced by collectAllDeviceShardedTopResults(), executePlanWithGroupBy(), executePlanWithoutGroupBy(), executeTableFunction(), executeWorkUnitImpl(), reduceMultiDeviceResults(), reduceMultiDeviceResultSets(), and resultsUnion().

4366  {
4367  CHECK(data_mgr_);
4368  const auto cuda_mgr = data_mgr_->getCudaMgr();
4369  if (!cuda_mgr) {
4370  return 0;
4371  }
4372  const auto& dev_props = cuda_mgr->getAllDeviceProperties();
4373  return block_size_x_ ? block_size_x_ : dev_props.front().maxThreadsPerBlock;
4374 }
CudaMgr_Namespace::CudaMgr * getCudaMgr() const
Definition: DataMgr.h:177
Data_Namespace::DataMgr * data_mgr_
Definition: Execute.h:1558
unsigned block_size_x_
Definition: Execute.h:1552
#define CHECK(condition)
Definition: Logger.h:291
const std::vector< DeviceProperties > & getAllDeviceProperties() const
Definition: CudaMgr.h:134

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::shared_ptr< HashJoin > Executor::buildCurrentLevelHashTable ( const JoinCondition current_level_join_conditions,
size_t  level_idx,
RelAlgExecutionUnit ra_exe_unit,
const CompilationOptions co,
const std::vector< InputTableInfo > &  query_infos,
ColumnCacheMap column_cache,
std::vector< std::string > &  fail_reasons 
)
private

Definition at line 1027 of file IRCodegen.cpp.

References anonymous_namespace{IRCodegen.cpp}::add_qualifier_to_execution_unit(), AUTOMATIC_IR_METADATA, anonymous_namespace{IRCodegen.cpp}::check_valid_join_qual(), Data_Namespace::CPU_LEVEL, CompilationOptions::device_type, Executor::JoinHashTableOrError::fail_reason, GPU, Data_Namespace::GPU_LEVEL, Executor::JoinHashTableOrError::hash_table, RelAlgExecutionUnit::hash_table_build_plan_dag, IS_EQUIVALENCE, LEFT, OneToOne, JoinCondition::quals, RelAlgExecutionUnit::query_hint, RelAlgExecutionUnit::table_id_to_node_map, JoinCondition::type, and VLOG.

1034  {
1036  std::shared_ptr<HashJoin> current_level_hash_table;
1037  auto handleNonHashtableQual = [&ra_exe_unit, &level_idx, this](
1038  JoinType join_type,
1039  std::shared_ptr<Analyzer::Expr> qual) {
1040  if (join_type == JoinType::LEFT) {
1041  plan_state_->addNonHashtableQualForLeftJoin(level_idx, qual);
1042  } else {
1043  add_qualifier_to_execution_unit(ra_exe_unit, qual);
1044  }
1045  };
1046  for (const auto& join_qual : current_level_join_conditions.quals) {
1047  auto qual_bin_oper = std::dynamic_pointer_cast<Analyzer::BinOper>(join_qual);
1048  if (current_level_hash_table || !qual_bin_oper ||
1049  !IS_EQUIVALENCE(qual_bin_oper->get_optype())) {
1050  handleNonHashtableQual(current_level_join_conditions.type, join_qual);
1051  if (!current_level_hash_table) {
1052  fail_reasons.emplace_back("No equijoin expression found");
1053  }
1054  continue;
1055  }
1056  check_valid_join_qual(qual_bin_oper);
1057  JoinHashTableOrError hash_table_or_error;
1058  if (!current_level_hash_table) {
1059  hash_table_or_error = buildHashTableForQualifier(
1060  qual_bin_oper,
1061  query_infos,
1064  current_level_join_conditions.type,
1066  column_cache,
1067  ra_exe_unit.hash_table_build_plan_dag,
1068  ra_exe_unit.query_hint,
1069  ra_exe_unit.table_id_to_node_map);
1070  current_level_hash_table = hash_table_or_error.hash_table;
1071  }
1072  if (hash_table_or_error.hash_table) {
1073  plan_state_->join_info_.join_hash_tables_.push_back(hash_table_or_error.hash_table);
1074  plan_state_->join_info_.equi_join_tautologies_.push_back(qual_bin_oper);
1075  } else {
1076  fail_reasons.push_back(hash_table_or_error.fail_reason);
1077  if (!current_level_hash_table) {
1078  VLOG(2) << "Building a hashtable based on a qual " << qual_bin_oper->toString()
1079  << " fails: " << hash_table_or_error.fail_reason;
1080  }
1081  handleNonHashtableQual(current_level_join_conditions.type, qual_bin_oper);
1082  }
1083  }
1084  return current_level_hash_table;
1085 }
JoinType
Definition: sqldefs.h:238
#define IS_EQUIVALENCE(X)
Definition: sqldefs.h:72
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1502
TableIdToNodeMap table_id_to_node_map
std::unique_ptr< PlanState > plan_state_
Definition: Execute.h:1532
void add_qualifier_to_execution_unit(RelAlgExecutionUnit &ra_exe_unit, const std::shared_ptr< Analyzer::Expr > &qual)
Definition: IRCodegen.cpp:535
#define AUTOMATIC_IR_METADATA(CGENSTATE)
ExecutorDeviceType device_type
JoinHashTableOrError buildHashTableForQualifier(const std::shared_ptr< Analyzer::BinOper > &qual_bin_oper, const std::vector< InputTableInfo > &query_infos, const MemoryLevel memory_level, const JoinType join_type, const HashType preferred_hash_type, ColumnCacheMap &column_cache, const HashTableBuildDagMap &hashtable_build_dag_map, const RegisteredQueryHint &query_hint, const TableIdToNodeMap &table_id_to_node_map)
Definition: Execute.cpp:4309
std::list< std::shared_ptr< Analyzer::Expr > > quals
RegisteredQueryHint query_hint
#define VLOG(n)
Definition: Logger.h:388
HashTableBuildDagMap hash_table_build_plan_dag
void check_valid_join_qual(std::shared_ptr< Analyzer::BinOper > &bin_oper)
Definition: IRCodegen.cpp:586

+ Here is the call graph for this function:

Executor::JoinHashTableOrError Executor::buildHashTableForQualifier ( const std::shared_ptr< Analyzer::BinOper > &  qual_bin_oper,
const std::vector< InputTableInfo > &  query_infos,
const MemoryLevel  memory_level,
const JoinType  join_type,
const HashType  preferred_hash_type,
ColumnCacheMap column_cache,
const HashTableBuildDagMap hashtable_build_dag_map,
const RegisteredQueryHint query_hint,
const TableIdToNodeMap table_id_to_node_map 
)
private

Definition at line 4309 of file Execute.cpp.

References deviceCountForMemoryLevel(), g_enable_bbox_intersect_hashjoin, g_enable_dynamic_watchdog, HashJoin::getInstance(), and interrupted_.

4318  {
4319  if (!g_enable_bbox_intersect_hashjoin && qual_bin_oper->is_bbox_intersect_oper()) {
4320  return {nullptr,
4321  "Bounding box intersection disabled, attempting to fall back to loop join"};
4322  }
4323  if (g_enable_dynamic_watchdog && interrupted_.load()) {
4324  throw QueryExecutionError(ErrorCode::INTERRUPTED);
4325  }
4326  try {
4327  auto tbl = HashJoin::getInstance(qual_bin_oper,
4328  query_infos,
4329  memory_level,
4330  join_type,
4331  preferred_hash_type,
4332  deviceCountForMemoryLevel(memory_level),
4333  column_cache,
4334  this,
4335  hashtable_build_dag_map,
4336  query_hint,
4337  table_id_to_node_map);
4338  return {tbl, ""};
4339  } catch (const HashJoinFail& e) {
4340  return {nullptr, e.what()};
4341  }
4342 }
std::atomic< bool > interrupted_
Definition: Execute.h:1543
bool g_enable_dynamic_watchdog
Definition: Execute.cpp:81
bool g_enable_bbox_intersect_hashjoin
Definition: Execute.cpp:109
int deviceCountForMemoryLevel(const Data_Namespace::MemoryLevel memory_level) const
Definition: Execute.cpp:1330
static std::shared_ptr< HashJoin > getInstance(const std::shared_ptr< Analyzer::BinOper > qual_bin_oper, const std::vector< InputTableInfo > &query_infos, const Data_Namespace::MemoryLevel memory_level, const JoinType join_type, const HashType preferred_hash_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor, const HashTableBuildDagMap &hashtable_build_dag_map, const RegisteredQueryHint &query_hint, const TableIdToNodeMap &table_id_to_node_map)
Make hash table from an in-flight SQL query&#39;s parse tree etc.
Definition: HashJoin.cpp:285

+ Here is the call graph for this function:

JoinLoop::HoistedFiltersCallback Executor::buildHoistLeftHandSideFiltersCb ( const RelAlgExecutionUnit ra_exe_unit,
const size_t  level_idx,
const shared::TableKey inner_table_key,
const CompilationOptions co 
)
private

Definition at line 859 of file IRCodegen.cpp.

References AUTOMATIC_IR_METADATA, CHECK, CodeGenerator::codegen(), g_enable_left_join_filter_hoisting, RelAlgExecutionUnit::join_quals, LEFT, RelAlgExecutionUnit::quals, RelAlgExecutionUnit::simple_quals, CodeGenerator::toBool(), and VLOG.

863  {
865  return nullptr;
866  }
867 
868  const auto& current_level_join_conditions = ra_exe_unit.join_quals[level_idx];
869  if (level_idx == 0 && current_level_join_conditions.type == JoinType::LEFT) {
870  const auto& condition = current_level_join_conditions.quals.front();
871  const auto bin_oper = dynamic_cast<const Analyzer::BinOper*>(condition.get());
872  CHECK(bin_oper) << condition->toString();
873  const auto rhs =
874  dynamic_cast<const Analyzer::ColumnVar*>(bin_oper->get_right_operand());
875  const auto lhs =
876  dynamic_cast<const Analyzer::ColumnVar*>(bin_oper->get_left_operand());
877  if (lhs && rhs && lhs->getTableKey() != rhs->getTableKey()) {
878  const Analyzer::ColumnVar* selected_lhs{nullptr};
879  // grab the left hand side column -- this is somewhat similar to normalize column
880  // pair, and a better solution may be to hoist that function out of the join
881  // framework and normalize columns at the top of build join loops
882  if (lhs->getTableKey() == inner_table_id) {
883  selected_lhs = rhs;
884  } else if (rhs->getTableKey() == inner_table_id) {
885  selected_lhs = lhs;
886  }
887  if (selected_lhs) {
888  std::list<std::shared_ptr<Analyzer::Expr>> hoisted_quals;
889  // get all LHS-only filters
890  auto should_hoist_qual = [&hoisted_quals](const auto& qual,
891  const shared::TableKey& table_key) {
892  CHECK(qual);
893 
894  ExprTableIdVisitor visitor;
895  const auto table_keys = visitor.visit(qual.get());
896  if (table_keys.size() == 1 && table_keys.find(table_key) != table_keys.end()) {
897  hoisted_quals.push_back(qual);
898  }
899  };
900  for (const auto& qual : ra_exe_unit.simple_quals) {
901  should_hoist_qual(qual, selected_lhs->getTableKey());
902  }
903  for (const auto& qual : ra_exe_unit.quals) {
904  should_hoist_qual(qual, selected_lhs->getTableKey());
905  }
906 
907  // build the filters callback and return it
908  if (!hoisted_quals.empty()) {
909  return [this, hoisted_quals, co](llvm::BasicBlock* true_bb,
910  llvm::BasicBlock* exit_bb,
911  const std::string& loop_name,
912  llvm::Function* parent_func,
913  CgenState* cgen_state) -> llvm::BasicBlock* {
914  // make sure we have quals to hoist
915  bool has_quals_to_hoist = false;
916  for (const auto& qual : hoisted_quals) {
917  // check to see if the filter was previously hoisted. if all filters were
918  // previously hoisted, this callback becomes a noop
919  if (plan_state_->hoisted_filters_.count(qual) == 0) {
920  has_quals_to_hoist = true;
921  break;
922  }
923  }
924 
925  if (!has_quals_to_hoist) {
926  return nullptr;
927  }
928 
929  AUTOMATIC_IR_METADATA(cgen_state);
930 
931  llvm::IRBuilder<>& builder = cgen_state->ir_builder_;
932  auto& context = builder.getContext();
933 
934  const auto filter_bb =
935  llvm::BasicBlock::Create(context,
936  "hoisted_left_join_filters_" + loop_name,
937  parent_func,
938  /*insert_before=*/true_bb);
939  builder.SetInsertPoint(filter_bb);
940 
941  llvm::Value* filter_lv = cgen_state_->llBool(true);
942  CodeGenerator code_generator(this);
944  for (const auto& qual : hoisted_quals) {
945  if (plan_state_->hoisted_filters_.insert(qual).second) {
946  // qual was inserted into the hoisted filters map, which means we have not
947  // seen this qual before. Generate filter.
948  VLOG(1) << "Generating code for hoisted left hand side qualifier "
949  << qual->toString();
950  auto cond = code_generator.toBool(
951  code_generator.codegen(qual.get(), true, co).front());
952  filter_lv = builder.CreateAnd(filter_lv, cond);
953  }
954  }
955  CHECK(filter_lv->getType()->isIntegerTy(1));
956 
957  builder.CreateCondBr(filter_lv, true_bb, exit_bb);
958  return filter_bb;
959  };
960  }
961  }
962  }
963  }
964  return nullptr;
965 }
bool g_enable_left_join_filter_hoisting
Definition: Execute.cpp:107
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1502
const JoinQualsPerNestingLevel join_quals
std::unique_ptr< PlanState > plan_state_
Definition: Execute.h:1532
#define AUTOMATIC_IR_METADATA(CGENSTATE)
std::list< std::shared_ptr< Analyzer::Expr > > quals
#define CHECK(condition)
Definition: Logger.h:291
#define VLOG(n)
Definition: Logger.h:388
std::list< std::shared_ptr< Analyzer::Expr > > simple_quals

+ Here is the call graph for this function:

std::function< llvm::Value *(const std::vector< llvm::Value * > &, llvm::Value *)> Executor::buildIsDeletedCb ( const RelAlgExecutionUnit ra_exe_unit,
const size_t  level_idx,
const CompilationOptions co 
)
private

Definition at line 968 of file IRCodegen.cpp.

References AUTOMATIC_IR_METADATA, CHECK, CHECK_LT, CodeGenerator::codegen(), CompilationOptions::filter_on_deleted_column, RelAlgExecutionUnit::input_descs, TABLE, and CodeGenerator::toBool().

970  {
972  if (!co.filter_on_deleted_column) {
973  return nullptr;
974  }
975  CHECK_LT(level_idx + 1, ra_exe_unit.input_descs.size());
976  const auto input_desc = ra_exe_unit.input_descs[level_idx + 1];
977  if (input_desc.getSourceType() != InputSourceType::TABLE) {
978  return nullptr;
979  }
980 
981  const auto deleted_cd = plan_state_->getDeletedColForTable(input_desc.getTableKey());
982  if (!deleted_cd) {
983  return nullptr;
984  }
985  CHECK(deleted_cd->columnType.is_boolean());
986  const auto deleted_expr = makeExpr<Analyzer::ColumnVar>(
987  deleted_cd->columnType,
988  shared::ColumnKey{input_desc.getTableKey(), deleted_cd->columnId},
989  input_desc.getNestLevel());
990  return [this, deleted_expr, level_idx, &co](const std::vector<llvm::Value*>& prev_iters,
991  llvm::Value* have_more_inner_rows) {
992  const auto matching_row_index = addJoinLoopIterator(prev_iters, level_idx + 1);
993  // Avoid fetching the deleted column from a position which is not valid.
994  // An invalid position can be returned by a one to one hash lookup (negative)
995  // or at the end of iteration over a set of matching values.
996  llvm::Value* is_valid_it{nullptr};
997  if (have_more_inner_rows) {
998  is_valid_it = have_more_inner_rows;
999  } else {
1000  is_valid_it = cgen_state_->ir_builder_.CreateICmp(
1001  llvm::ICmpInst::ICMP_SGE, matching_row_index, cgen_state_->llInt<int64_t>(0));
1002  }
1003  const auto it_valid_bb = llvm::BasicBlock::Create(
1004  cgen_state_->context_, "it_valid", cgen_state_->current_func_);
1005  const auto it_not_valid_bb = llvm::BasicBlock::Create(
1006  cgen_state_->context_, "it_not_valid", cgen_state_->current_func_);
1007  cgen_state_->ir_builder_.CreateCondBr(is_valid_it, it_valid_bb, it_not_valid_bb);
1008  const auto row_is_deleted_bb = llvm::BasicBlock::Create(
1009  cgen_state_->context_, "row_is_deleted", cgen_state_->current_func_);
1010  cgen_state_->ir_builder_.SetInsertPoint(it_valid_bb);
1011  CodeGenerator code_generator(this);
1012  const auto row_is_deleted = code_generator.toBool(
1013  code_generator.codegen(deleted_expr.get(), true, co).front());
1014  cgen_state_->ir_builder_.CreateBr(row_is_deleted_bb);
1015  cgen_state_->ir_builder_.SetInsertPoint(it_not_valid_bb);
1016  const auto row_is_deleted_default = cgen_state_->llBool(false);
1017  cgen_state_->ir_builder_.CreateBr(row_is_deleted_bb);
1018  cgen_state_->ir_builder_.SetInsertPoint(row_is_deleted_bb);
1019  auto row_is_deleted_or_default =
1020  cgen_state_->ir_builder_.CreatePHI(row_is_deleted->getType(), 2);
1021  row_is_deleted_or_default->addIncoming(row_is_deleted, it_valid_bb);
1022  row_is_deleted_or_default->addIncoming(row_is_deleted_default, it_not_valid_bb);
1023  return row_is_deleted_or_default;
1024  };
1025 }
std::vector< InputDescriptor > input_descs
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1502
std::unique_ptr< PlanState > plan_state_
Definition: Execute.h:1532
#define AUTOMATIC_IR_METADATA(CGENSTATE)
#define CHECK_LT(x, y)
Definition: Logger.h:303
llvm::Value * addJoinLoopIterator(const std::vector< llvm::Value * > &prev_iters, const size_t level_idx)
Definition: IRCodegen.cpp:1186
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the call graph for this function:

std::vector< JoinLoop > Executor::buildJoinLoops ( RelAlgExecutionUnit ra_exe_unit,
const CompilationOptions co,
const ExecutionOptions eo,
const std::vector< InputTableInfo > &  query_infos,
ColumnCacheMap column_cache 
)
private

Definition at line 610 of file IRCodegen.cpp.

References AUTOMATIC_IR_METADATA, CodeGenerator::cgen_state_, CHECK, CHECK_LT, CodeGenerator::codegen(), INJECT_TIMER, CgenState::ir_builder_, RelAlgExecutionUnit::join_quals, LEFT, PlanState::left_join_non_hashtable_quals_, CgenState::llBool(), MultiSet, OneToOne, CgenState::outer_join_match_found_per_level_, CodeGenerator::plan_state_, Set, Singleton, JoinLoopDomain::slot_lookup_result, CodeGenerator::toBool(), and JoinLoopDomain::values_buffer.

615  {
618  std::vector<JoinLoop> join_loops;
619  for (size_t level_idx = 0, current_hash_table_idx = 0;
620  level_idx < ra_exe_unit.join_quals.size();
621  ++level_idx) {
622  const auto& current_level_join_conditions = ra_exe_unit.join_quals[level_idx];
623  std::vector<std::string> fail_reasons;
624  const auto current_level_hash_table =
625  buildCurrentLevelHashTable(current_level_join_conditions,
626  level_idx,
627  ra_exe_unit,
628  co,
629  query_infos,
630  column_cache,
631  fail_reasons);
632  const auto found_outer_join_matches_cb =
633  [this, level_idx](llvm::Value* found_outer_join_matches) {
634  CHECK_LT(level_idx, cgen_state_->outer_join_match_found_per_level_.size());
635  CHECK(!cgen_state_->outer_join_match_found_per_level_[level_idx]);
636  cgen_state_->outer_join_match_found_per_level_[level_idx] =
637  found_outer_join_matches;
638  };
639  const auto is_deleted_cb = buildIsDeletedCb(ra_exe_unit, level_idx, co);
640  auto rem_left_join_quals_it =
641  plan_state_->left_join_non_hashtable_quals_.find(level_idx);
642  bool has_remaining_left_join_quals =
643  rem_left_join_quals_it != plan_state_->left_join_non_hashtable_quals_.end() &&
644  !rem_left_join_quals_it->second.empty();
645  const auto outer_join_condition_remaining_quals_cb =
646  [this, level_idx, &co](const std::vector<llvm::Value*>& prev_iters) {
647  // when we have multiple quals for the left join in the current join level
648  // we first try to build a hashtable by using one of the possible qual,
649  // and deal with remaining quals as extra join conditions
650  FetchCacheAnchor anchor(cgen_state_.get());
651  addJoinLoopIterator(prev_iters, level_idx + 1);
652  llvm::Value* left_join_cond = cgen_state_->llBool(true);
653  CodeGenerator code_generator(this);
654  auto it = plan_state_->left_join_non_hashtable_quals_.find(level_idx);
655  if (it != plan_state_->left_join_non_hashtable_quals_.end()) {
656  for (auto expr : it->second) {
657  left_join_cond = cgen_state_->ir_builder_.CreateAnd(
658  left_join_cond,
659  code_generator.toBool(
660  code_generator.codegen(expr.get(), true, co).front()));
661  }
662  }
663  return left_join_cond;
664  };
665  if (current_level_hash_table) {
666  const auto hoisted_filters_cb = buildHoistLeftHandSideFiltersCb(
667  ra_exe_unit, level_idx, current_level_hash_table->getInnerTableId(), co);
668  if (current_level_hash_table->getHashType() == HashType::OneToOne) {
669  join_loops.emplace_back(
670  /*kind=*/JoinLoopKind::Singleton,
671  /*type=*/current_level_join_conditions.type,
672  /*iteration_domain_codegen=*/
673  [this, current_hash_table_idx, level_idx, current_level_hash_table, &co](
674  const std::vector<llvm::Value*>& prev_iters) {
675  addJoinLoopIterator(prev_iters, level_idx);
676  JoinLoopDomain domain{{0}};
677  domain.slot_lookup_result =
678  current_level_hash_table->codegenSlot(co, current_hash_table_idx);
679  return domain;
680  },
681  /*outer_condition_match=*/
682  current_level_join_conditions.type == JoinType::LEFT &&
683  has_remaining_left_join_quals
684  ? std::function<llvm::Value*(const std::vector<llvm::Value*>&)>(
685  outer_join_condition_remaining_quals_cb)
686  : nullptr,
687  /*found_outer_matches=*/current_level_join_conditions.type == JoinType::LEFT
688  ? std::function<void(llvm::Value*)>(found_outer_join_matches_cb)
689  : nullptr,
690  /*hoisted_filters=*/hoisted_filters_cb,
691  /*is_deleted=*/is_deleted_cb,
692  /*nested_loop_join=*/false);
693  } else if (auto range_join_table =
694  dynamic_cast<RangeJoinHashTable*>(current_level_hash_table.get())) {
695  join_loops.emplace_back(
696  /* kind= */ JoinLoopKind::MultiSet,
697  /* type= */ current_level_join_conditions.type,
698  /* iteration_domain_codegen= */
699  [this,
700  range_join_table,
701  current_hash_table_idx,
702  level_idx,
703  current_level_hash_table,
704  &co](const std::vector<llvm::Value*>& prev_iters) {
705  addJoinLoopIterator(prev_iters, level_idx);
706  JoinLoopDomain domain{{0}};
707  CHECK(!prev_iters.empty());
708  const auto matching_set = range_join_table->codegenMatchingSetWithOffset(
709  co, current_hash_table_idx, prev_iters.back());
710  domain.values_buffer = matching_set.elements;
711  domain.element_count = matching_set.count;
712  return domain;
713  },
714  /* outer_condition_match= */
715  current_level_join_conditions.type == JoinType::LEFT
716  ? std::function<llvm::Value*(const std::vector<llvm::Value*>&)>(
717  outer_join_condition_remaining_quals_cb)
718  : nullptr,
719  /* found_outer_matches= */
720  current_level_join_conditions.type == JoinType::LEFT
721  ? std::function<void(llvm::Value*)>(found_outer_join_matches_cb)
722  : nullptr,
723  /* hoisted_filters= */ nullptr, // <<! TODO
724  /* is_deleted= */ is_deleted_cb,
725  /*nested_loop_join=*/false);
726  } else {
727  join_loops.emplace_back(
728  /*kind=*/JoinLoopKind::Set,
729  /*type=*/current_level_join_conditions.type,
730  /*iteration_domain_codegen=*/
731  [this, current_hash_table_idx, level_idx, current_level_hash_table, &co](
732  const std::vector<llvm::Value*>& prev_iters) {
733  addJoinLoopIterator(prev_iters, level_idx);
734  JoinLoopDomain domain{{0}};
735  const auto matching_set = current_level_hash_table->codegenMatchingSet(
736  co, current_hash_table_idx);
737  domain.values_buffer = matching_set.elements;
738  domain.element_count = matching_set.count;
739  domain.error_code = matching_set.error_code;
740  return domain;
741  },
742  /*outer_condition_match=*/
743  current_level_join_conditions.type == JoinType::LEFT
744  ? std::function<llvm::Value*(const std::vector<llvm::Value*>&)>(
745  outer_join_condition_remaining_quals_cb)
746  : nullptr,
747  /*found_outer_matches=*/current_level_join_conditions.type == JoinType::LEFT
748  ? std::function<void(llvm::Value*)>(found_outer_join_matches_cb)
749  : nullptr,
750  /*hoisted_filters=*/hoisted_filters_cb,
751  /*is_deleted=*/is_deleted_cb,
752  /*nested_loop_join=*/false);
753  }
754  ++current_hash_table_idx;
755  } else {
756  const auto fail_reasons_str = current_level_join_conditions.quals.empty()
757  ? "No equijoin expression found"
758  : boost::algorithm::join(fail_reasons, " | ");
760  ra_exe_unit, eo, query_infos, level_idx, fail_reasons_str);
761  // Callback provided to the `JoinLoop` framework to evaluate the (outer) join
762  // condition.
763  VLOG(1) << "Unable to build hash table, falling back to loop join: "
764  << fail_reasons_str;
765  const auto outer_join_condition_cb =
766  [this, level_idx, &co, &current_level_join_conditions](
767  const std::vector<llvm::Value*>& prev_iters) {
768  // The values generated for the match path don't dominate all uses
769  // since on the non-match path nulls are generated. Reset the cache
770  // once the condition is generated to avoid incorrect reuse.
771  FetchCacheAnchor anchor(cgen_state_.get());
772  addJoinLoopIterator(prev_iters, level_idx + 1);
773  llvm::Value* left_join_cond = cgen_state_->llBool(true);
774  CodeGenerator code_generator(this);
775  for (auto expr : current_level_join_conditions.quals) {
776  left_join_cond = cgen_state_->ir_builder_.CreateAnd(
777  left_join_cond,
778  code_generator.toBool(
779  code_generator.codegen(expr.get(), true, co).front()));
780  }
781  return left_join_cond;
782  };
783  join_loops.emplace_back(
784  /*kind=*/JoinLoopKind::UpperBound,
785  /*type=*/current_level_join_conditions.type,
786  /*iteration_domain_codegen=*/
787  [this, level_idx](const std::vector<llvm::Value*>& prev_iters) {
788  addJoinLoopIterator(prev_iters, level_idx);
789  JoinLoopDomain domain{{0}};
790  auto* arg = get_arg_by_name(cgen_state_->row_func_, "num_rows_per_scan");
791  const auto rows_per_scan_ptr = cgen_state_->ir_builder_.CreateGEP(
792  arg->getType()->getScalarType()->getPointerElementType(),
793  arg,
794  cgen_state_->llInt(int32_t(level_idx + 1)));
795  domain.upper_bound = cgen_state_->ir_builder_.CreateLoad(
796  rows_per_scan_ptr->getType()->getPointerElementType(),
797  rows_per_scan_ptr,
798  "num_rows_per_scan");
799  return domain;
800  },
801  /*outer_condition_match=*/
802  current_level_join_conditions.type == JoinType::LEFT
803  ? std::function<llvm::Value*(const std::vector<llvm::Value*>&)>(
804  outer_join_condition_cb)
805  : nullptr,
806  /*found_outer_matches=*/
807  current_level_join_conditions.type == JoinType::LEFT
808  ? std::function<void(llvm::Value*)>(found_outer_join_matches_cb)
809  : nullptr,
810  /*hoisted_filters=*/nullptr,
811  /*is_deleted=*/is_deleted_cb,
812  /*nested_loop_join=*/true);
813  }
814  }
815  return join_loops;
816 }
llvm::Value * values_buffer
Definition: JoinLoop.h:49
std::string join(T const &container, std::string const &delim)
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1502
llvm::Value * get_arg_by_name(llvm::Function *func, const std::string &name)
Definition: Execute.h:168
#define INJECT_TIMER(DESC)
Definition: measure.h:122
const JoinQualsPerNestingLevel join_quals
std::unique_ptr< PlanState > plan_state_
Definition: Execute.h:1532
#define AUTOMATIC_IR_METADATA(CGENSTATE)
llvm::Value * slot_lookup_result
Definition: JoinLoop.h:47
#define CHECK_LT(x, y)
Definition: Logger.h:303
std::shared_ptr< HashJoin > buildCurrentLevelHashTable(const JoinCondition &current_level_join_conditions, size_t level_idx, RelAlgExecutionUnit &ra_exe_unit, const CompilationOptions &co, const std::vector< InputTableInfo > &query_infos, ColumnCacheMap &column_cache, std::vector< std::string > &fail_reasons)
Definition: IRCodegen.cpp:1027
llvm::Value * addJoinLoopIterator(const std::vector< llvm::Value * > &prev_iters, const size_t level_idx)
Definition: IRCodegen.cpp:1186
#define CHECK(condition)
Definition: Logger.h:291
void check_if_loop_join_is_allowed(RelAlgExecutionUnit &ra_exe_unit, const ExecutionOptions &eo, const std::vector< InputTableInfo > &query_infos, const size_t level_idx, const std::string &fail_reason)
Definition: IRCodegen.cpp:545
std::vector< JoinLoop > buildJoinLoops(RelAlgExecutionUnit &ra_exe_unit, const CompilationOptions &co, const ExecutionOptions &eo, const std::vector< InputTableInfo > &query_infos, ColumnCacheMap &column_cache)
Definition: IRCodegen.cpp:610
std::function< llvm::Value *(const std::vector< llvm::Value * > &, llvm::Value *)> buildIsDeletedCb(const RelAlgExecutionUnit &ra_exe_unit, const size_t level_idx, const CompilationOptions &co)
Definition: IRCodegen.cpp:968
JoinLoop::HoistedFiltersCallback buildHoistLeftHandSideFiltersCb(const RelAlgExecutionUnit &ra_exe_unit, const size_t level_idx, const shared::TableKey &inner_table_key, const CompilationOptions &co)
Definition: IRCodegen.cpp:859
#define VLOG(n)
Definition: Logger.h:388

+ Here is the call graph for this function:

void Executor::buildSelectedFragsMapping ( std::vector< std::vector< size_t >> &  selected_fragments_crossjoin,
std::vector< size_t > &  local_col_to_frag_pos,
const std::list< std::shared_ptr< const InputColDescriptor >> &  col_global_ids,
const FragmentsList selected_fragments,
const RelAlgExecutionUnit ra_exe_unit 
)
private

Definition at line 3774 of file Execute.cpp.

References CHECK, CHECK_EQ, CHECK_LT, getFragmentCount(), RelAlgExecutionUnit::input_descs, and plan_state_.

Referenced by fetchChunks().

3779  {
3780  local_col_to_frag_pos.resize(plan_state_->global_to_local_col_ids_.size());
3781  size_t frag_pos{0};
3782  const auto& input_descs = ra_exe_unit.input_descs;
3783  for (size_t scan_idx = 0; scan_idx < input_descs.size(); ++scan_idx) {
3784  const auto& table_key = input_descs[scan_idx].getTableKey();
3785  CHECK_EQ(selected_fragments[scan_idx].table_key, table_key);
3786  selected_fragments_crossjoin.push_back(
3787  getFragmentCount(selected_fragments, scan_idx, ra_exe_unit));
3788  for (const auto& col_id : col_global_ids) {
3789  CHECK(col_id);
3790  const auto& input_desc = col_id->getScanDesc();
3791  if (input_desc.getTableKey() != table_key ||
3792  input_desc.getNestLevel() != static_cast<int>(scan_idx)) {
3793  continue;
3794  }
3795  auto it = plan_state_->global_to_local_col_ids_.find(*col_id);
3796  CHECK(it != plan_state_->global_to_local_col_ids_.end());
3797  CHECK_LT(static_cast<size_t>(it->second),
3798  plan_state_->global_to_local_col_ids_.size());
3799  local_col_to_frag_pos[it->second] = frag_pos;
3800  }
3801  ++frag_pos;
3802  }
3803 }
#define CHECK_EQ(x, y)
Definition: Logger.h:301
std::vector< InputDescriptor > input_descs
std::unique_ptr< PlanState > plan_state_
Definition: Execute.h:1532
#define CHECK_LT(x, y)
Definition: Logger.h:303
std::vector< size_t > getFragmentCount(const FragmentsList &selected_fragments, const size_t scan_idx, const RelAlgExecutionUnit &ra_exe_unit)
Definition: Execute.cpp:3760
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void Executor::buildSelectedFragsMappingForUnion ( std::vector< std::vector< size_t >> &  selected_fragments_crossjoin,
const FragmentsList selected_fragments,
const RelAlgExecutionUnit ra_exe_unit 
)
private

Definition at line 3805 of file Execute.cpp.

References RelAlgExecutionUnit::input_descs.

Referenced by fetchUnionChunks().

3808  {
3809  const auto& input_descs = ra_exe_unit.input_descs;
3810  for (size_t scan_idx = 0; scan_idx < input_descs.size(); ++scan_idx) {
3811  // selected_fragments is set in assignFragsToKernelDispatch execution_kernel.fragments
3812  if (selected_fragments[0].table_key == input_descs[scan_idx].getTableKey()) {
3813  selected_fragments_crossjoin.push_back({size_t(1)});
3814  }
3815  }
3816 }
std::vector< InputDescriptor > input_descs

+ Here is the caller graph for this function:

FragmentSkipStatus Executor::canSkipFragmentForFpQual ( const Analyzer::BinOper comp_expr,
const Analyzer::ColumnVar lhs_col,
const Fragmenter_Namespace::FragmentInfo fragment,
const Analyzer::Constant rhs_const 
) const
private

Definition at line 4598 of file Execute.cpp.

References CHECK, shared::ColumnKey::column_id, extract_max_stat_fp_type(), extract_min_stat_fp_type(), Analyzer::Constant::get_constval(), Analyzer::BinOper::get_optype(), SQLTypeInfo::get_type(), Analyzer::Expr::get_type_info(), Fragmenter_Namespace::FragmentInfo::getChunkMetadataMap(), Analyzer::ColumnVar::getColumnKey(), INVALID, kDOUBLE, kEQ, kFLOAT, kGE, kGT, kLE, kLT, NOT_SKIPPABLE, and SKIPPABLE.

Referenced by skipFragment().

4602  {
4603  auto col_id = lhs_col->getColumnKey().column_id;
4604  auto chunk_meta_it = fragment.getChunkMetadataMap().find(col_id);
4605  if (chunk_meta_it == fragment.getChunkMetadataMap().end()) {
4607  }
4608  double chunk_min{0.};
4609  double chunk_max{0.};
4610  const auto& chunk_type = lhs_col->get_type_info();
4611  chunk_min = extract_min_stat_fp_type(chunk_meta_it->second->chunkStats, chunk_type);
4612  chunk_max = extract_max_stat_fp_type(chunk_meta_it->second->chunkStats, chunk_type);
4613  if (chunk_min > chunk_max) {
4615  }
4616 
4617  const auto datum_fp = rhs_const->get_constval();
4618  const auto rhs_type = rhs_const->get_type_info().get_type();
4619  CHECK(rhs_type == kFLOAT || rhs_type == kDOUBLE);
4620 
4621  // Do we need to codegen the constant like the integer path does?
4622  const auto rhs_val = rhs_type == kFLOAT ? datum_fp.floatval : datum_fp.doubleval;
4623 
4624  // Todo: dedup the following comparison code with the integer/timestamp path, it is
4625  // slightly tricky due to do cleanly as we do not have rowid on this path
4626  switch (comp_expr->get_optype()) {
4627  case kGE:
4628  if (chunk_max < rhs_val) {
4630  }
4631  break;
4632  case kGT:
4633  if (chunk_max <= rhs_val) {
4635  }
4636  break;
4637  case kLE:
4638  if (chunk_min > rhs_val) {
4640  }
4641  break;
4642  case kLT:
4643  if (chunk_min >= rhs_val) {
4645  }
4646  break;
4647  case kEQ:
4648  if (chunk_min > rhs_val || chunk_max < rhs_val) {
4650  }
4651  break;
4652  default:
4653  break;
4654  }
4656 }
double extract_max_stat_fp_type(const ChunkStats &stats, const SQLTypeInfo &ti)
Definition: sqldefs.h:37
Definition: sqldefs.h:38
Definition: sqldefs.h:32
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:391
SQLOps get_optype() const
Definition: Analyzer.h:452
double extract_min_stat_fp_type(const ChunkStats &stats, const SQLTypeInfo &ti)
const ChunkMetadataMap & getChunkMetadataMap() const
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:79
Definition: sqldefs.h:36
const shared::ColumnKey & getColumnKey() const
Definition: Analyzer.h:198
Datum get_constval() const
Definition: Analyzer.h:348
#define CHECK(condition)
Definition: Logger.h:291
Definition: sqldefs.h:35

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

llvm::Value * Executor::castToFP ( llvm::Value *  value,
SQLTypeInfo const &  from_ti,
SQLTypeInfo const &  to_ti 
)
private

Definition at line 4401 of file Execute.cpp.

References AUTOMATIC_IR_METADATA, cgen_state_, exp_to_scale(), logger::FATAL, SQLTypeInfo::get_scale(), SQLTypeInfo::get_size(), SQLTypeInfo::is_fp(), SQLTypeInfo::is_number(), and LOG.

4403  {
4405  if (value->getType()->isIntegerTy() && from_ti.is_number() && to_ti.is_fp() &&
4406  (!from_ti.is_fp() || from_ti.get_size() != to_ti.get_size())) {
4407  llvm::Type* fp_type{nullptr};
4408  switch (to_ti.get_size()) {
4409  case 4:
4410  fp_type = llvm::Type::getFloatTy(cgen_state_->context_);
4411  break;
4412  case 8:
4413  fp_type = llvm::Type::getDoubleTy(cgen_state_->context_);
4414  break;
4415  default:
4416  LOG(FATAL) << "Unsupported FP size: " << to_ti.get_size();
4417  }
4418  value = cgen_state_->ir_builder_.CreateSIToFP(value, fp_type);
4419  if (from_ti.get_scale()) {
4420  value = cgen_state_->ir_builder_.CreateFDiv(
4421  value,
4422  llvm::ConstantFP::get(value->getType(), exp_to_scale(from_ti.get_scale())));
4423  }
4424  }
4425  return value;
4426 }
#define LOG(tag)
Definition: Logger.h:285
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1502
#define AUTOMATIC_IR_METADATA(CGENSTATE)
uint64_t exp_to_scale(const unsigned exp)

+ Here is the call graph for this function:

llvm::Value * Executor::castToIntPtrTyIn ( llvm::Value *  val,
const size_t  bit_width 
)
private

Definition at line 4428 of file Execute.cpp.

References AUTOMATIC_IR_METADATA, cgen_state_, CHECK, CHECK_LT, and get_int_type().

4428  {
4430  CHECK(val->getType()->isPointerTy());
4431 
4432  const auto val_ptr_type = static_cast<llvm::PointerType*>(val->getType());
4433  const auto val_type = val_ptr_type->getPointerElementType();
4434  size_t val_width = 0;
4435  if (val_type->isIntegerTy()) {
4436  val_width = val_type->getIntegerBitWidth();
4437  } else {
4438  if (val_type->isFloatTy()) {
4439  val_width = 32;
4440  } else {
4441  CHECK(val_type->isDoubleTy());
4442  val_width = 64;
4443  }
4444  }
4445  CHECK_LT(size_t(0), val_width);
4446  if (bitWidth == val_width) {
4447  return val;
4448  }
4449  return cgen_state_->ir_builder_.CreateBitCast(
4450  val, llvm::PointerType::get(get_int_type(bitWidth, cgen_state_->context_), 0));
4451 }
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1502
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
#define AUTOMATIC_IR_METADATA(CGENSTATE)
#define CHECK_LT(x, y)
Definition: Logger.h:303
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the call graph for this function:

bool Executor::checkCurrentQuerySession ( const std::string &  candidate_query_session,
heavyai::shared_lock< heavyai::shared_mutex > &  read_lock 
)

Definition at line 4991 of file Execute.cpp.

References current_query_session_.

4993  {
4994  // if current_query_session is equal to the candidate_query_session,
4995  // or it is empty session we consider
4996  return !candidate_query_session.empty() &&
4997  (current_query_session_ == candidate_query_session);
4998 }
QuerySessionId current_query_session_
Definition: Execute.h:1576
bool Executor::checkIsQuerySessionEnrolled ( const QuerySessionId query_session,
heavyai::shared_lock< heavyai::shared_mutex > &  read_lock 
)

Definition at line 5265 of file Execute.cpp.

References queries_session_map_.

Referenced by executeWorkUnitImpl().

5267  {
5268  if (query_session.empty()) {
5269  return false;
5270  }
5271  return !query_session.empty() && queries_session_map_.count(query_session);
5272 }
static QuerySessionMap queries_session_map_
Definition: Execute.h:1580

+ Here is the caller graph for this function:

bool Executor::checkIsQuerySessionInterrupted ( const std::string &  query_session,
heavyai::shared_lock< heavyai::shared_mutex > &  read_lock 
)

Definition at line 5254 of file Execute.cpp.

References queries_interrupt_flag_.

Referenced by executePlanWithGroupBy(), executePlanWithoutGroupBy(), fetchChunks(), and fetchUnionChunks().

5256  {
5257  if (query_session.empty()) {
5258  return false;
5259  }
5260  auto flag_it = queries_interrupt_flag_.find(query_session);
5261  return !query_session.empty() && flag_it != queries_interrupt_flag_.end() &&
5262  flag_it->second;
5263 }
static InterruptFlagMap queries_interrupt_flag_
Definition: Execute.h:1578

+ Here is the caller graph for this function:

bool Executor::checkNonKernelTimeInterrupted ( ) const

Definition at line 5363 of file Execute.cpp.

References current_query_session_, executor_id_, executor_session_mutex_, queries_interrupt_flag_, and UNITARY_EXECUTOR_ID.

5363  {
5364  // this function should be called within an executor which is assigned
5365  // to the specific query thread (that indicates we already enroll the session)
5366  // check whether this is called from non unitary executor
5368  return false;
5369  };
5371  auto flag_it = queries_interrupt_flag_.find(current_query_session_);
5372  return !current_query_session_.empty() && flag_it != queries_interrupt_flag_.end() &&
5373  flag_it->second;
5374 }
QuerySessionId current_query_session_
Definition: Execute.h:1576
std::shared_lock< T > shared_lock
const ExecutorId executor_id_
Definition: Execute.h:1476
static InterruptFlagMap queries_interrupt_flag_
Definition: Execute.h:1578
static heavyai::shared_mutex executor_session_mutex_
Definition: Execute.h:1574
static constexpr ExecutorId UNITARY_EXECUTOR_ID
Definition: Execute.h:423
void Executor::checkPendingQueryStatus ( const QuerySessionId query_session)

Definition at line 5035 of file Execute.cpp.

References executor_session_mutex_, queries_interrupt_flag_, queries_session_map_, and VLOG.

5035  {
5036  // check whether we are okay to execute the "pending" query
5037  // i.e., before running the query check if this query session is "ALREADY" interrupted
5039  if (query_session.empty()) {
5040  return;
5041  }
5042  if (queries_interrupt_flag_.find(query_session) == queries_interrupt_flag_.end()) {
5043  // something goes wrong since we assume this is caller's responsibility
5044  // (call this function only for enrolled query session)
5045  if (!queries_session_map_.count(query_session)) {
5046  VLOG(1) << "Interrupting pending query is not available since the query session is "
5047  "not enrolled";
5048  } else {
5049  // here the query session is enrolled but the interrupt flag is not registered
5050  VLOG(1)
5051  << "Interrupting pending query is not available since its interrupt flag is "
5052  "not registered";
5053  }
5054  return;
5055  }
5056  if (queries_interrupt_flag_[query_session]) {
5057  throw QueryExecutionError(ErrorCode::INTERRUPTED);
5058  }
5059 }
static QuerySessionMap queries_session_map_
Definition: Execute.h:1580
std::shared_lock< T > shared_lock
static InterruptFlagMap queries_interrupt_flag_
Definition: Execute.h:1578
static heavyai::shared_mutex executor_session_mutex_
Definition: Execute.h:1574
#define VLOG(n)
Definition: Logger.h:388
void Executor::clearCaches ( bool  runtime_only = false)
void Executor::clearCardinalityCache ( )
static

Definition at line 5309 of file Execute.cpp.

References cardinality_cache_, g_use_estimator_result_cache, and recycler_mutex_.

Referenced by clearExternalCaches().

5309  {
5312  cardinality_cache_.clear();
5313  }
5314 }
std::unique_lock< T > unique_lock
static std::unordered_map< CardinalityCacheKey, size_t > cardinality_cache_
Definition: Execute.h:1607
static heavyai::shared_mutex recycler_mutex_
Definition: Execute.h:1605
bool g_use_estimator_result_cache
Definition: Execute.cpp:139

+ Here is the caller graph for this function:

static void Executor::clearExternalCaches ( bool  for_update,
const TableDescriptor td,
const int  current_db_id 
)
inlinestatic

Definition at line 438 of file Execute.h.

References clearCardinalityCache(), TableDescriptor::getTableChunkKey(), hash_value(), CacheInvalidator< CACHE_HOLDING_TYPES >::invalidateCaches(), CacheInvalidator< CACHE_HOLDING_TYPES >::invalidateCachesByTable(), invalidateCardinalityCacheForTable(), and TableDescriptor::tableId.

Referenced by AlterTableAlterColumnCommand::clearInMemoryData(), clearMemory(), DropForeignTableCommand::execute(), Parser::InsertIntoTableAsSelectStmt::execute(), Parser::DropTableStmt::execute(), Parser::TruncateTableStmt::execute(), Parser::OptimizeTableStmt::execute(), Parser::AddColumnStmt::execute(), Parser::DropColumnStmt::execute(), Parser::AlterTableParamStmt::execute(), Parser::CopyTableStmt::execute(), RelAlgExecutor::executeDelete(), RelAlgExecutor::executeSimpleInsert(), RelAlgExecutor::executeUpdate(), Catalog_Namespace::Catalog::invalidateCachesForTable(), foreign_storage::refresh_foreign_table_unlocked(), DBHandler::set_table_epochs(), Catalog_Namespace::Catalog::setUncappedTableEpoch(), and DBHandler::shutdown().

440  {
441  bool clearEntireCache = true;
442  if (td) {
443  const auto& table_chunk_key_prefix = td->getTableChunkKey(current_db_id);
444  if (!table_chunk_key_prefix.empty()) {
445  auto table_key = boost::hash_value(table_chunk_key_prefix);
447  if (for_update) {
449  } else {
451  }
453  clearEntireCache = false;
454  }
455  }
456  if (clearEntireCache) {
458  if (for_update) {
460  } else {
462  }
464  }
465  }
static void invalidateCachesByTable(size_t table_key)
static void invalidateCaches()
static void clearCardinalityCache()
Definition: Execute.cpp:5309
static void invalidateCardinalityCacheForTable(const shared::TableKey &table_key)
Definition: Execute.cpp:5316
std::size_t hash_value(RexAbstractInput const &rex_ab_input)
Definition: RelAlgDag.cpp:3548
std::vector< int > getTableChunkKey(const int getCurrentDBId) const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void Executor::clearMemory ( const Data_Namespace::MemoryLevel  memory_level)
static

Definition at line 535 of file Execute.cpp.

References clearExternalCaches(), Data_Namespace::DataMgr::clearMemory(), Data_Namespace::CPU_LEVEL, execute_mutex_, Catalog_Namespace::SysCatalog::getDataMgr(), Data_Namespace::GPU_LEVEL, Catalog_Namespace::SysCatalog::instance(), and CacheInvalidator< CACHE_HOLDING_TYPES >::invalidateCaches().

Referenced by DBHandler::clear_cpu_memory(), DBHandler::clear_gpu_memory(), QueryRunner::QueryRunner::clearCpuMemory(), and QueryRunner::QueryRunner::clearGpuMemory().

535  {
536  switch (memory_level) {
540  execute_mutex_); // Don't flush memory while queries are running
541 
542  if (memory_level == Data_Namespace::MemoryLevel::CPU_LEVEL) {
543  // The hash table cache uses CPU memory not managed by the buffer manager. In the
544  // future, we should manage these allocations with the buffer manager directly.
545  // For now, assume the user wants to purge the hash table cache when they clear
546  // CPU memory (currently used in ExecuteTest to lower memory pressure)
547  // TODO: Move JoinHashTableCacheInvalidator to Executor::clearExternalCaches();
549  }
550  Executor::clearExternalCaches(true, nullptr, 0);
552  break;
553  }
554  default: {
555  throw std::runtime_error(
556  "Clearing memory levels other than the CPU level or GPU level is not "
557  "supported.");
558  }
559  }
560 }
static heavyai::shared_mutex execute_mutex_
Definition: Execute.h:1585
void clearMemory(const MemoryLevel memLevel)
Definition: DataMgr.cpp:515
static void invalidateCaches()
Data_Namespace::DataMgr & getDataMgr() const
Definition: SysCatalog.h:234
static SysCatalog & instance()
Definition: SysCatalog.h:343
std::unique_lock< T > unique_lock
static void clearExternalCaches(bool for_update, const TableDescriptor *td, const int current_db_id)
Definition: Execute.h:438

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void Executor::clearMetaInfoCache ( )
private

Definition at line 1054 of file Execute.cpp.

References agg_col_range_cache_, TableGenerations::clear(), AggregatedColRange::clear(), InputTableInfoCache::clear(), input_table_info_cache_, and table_generations_.

1054  {
1058 }
AggregatedColRange agg_col_range_cache_
Definition: Execute.h:1572
InputTableInfoCache input_table_info_cache_
Definition: Execute.h:1571
TableGenerations table_generations_
Definition: Execute.h:1573

+ Here is the call graph for this function:

void Executor::clearQuerySessionStatus ( const QuerySessionId query_session,
const std::string &  submitted_time_str 
)

Definition at line 5061 of file Execute.cpp.

References current_query_session_, executor_session_mutex_, invalidateRunningQuerySession(), removeFromQuerySessionList(), and resetInterrupt().

5062  {
5064  // clear the interrupt-related info for a finished query
5065  if (query_session.empty()) {
5066  return;
5067  }
5068  removeFromQuerySessionList(query_session, submitted_time_str, session_write_lock);
5069  if (query_session.compare(current_query_session_) == 0) {
5070  invalidateRunningQuerySession(session_write_lock);
5071  resetInterrupt();
5072  }
5073 }
QuerySessionId current_query_session_
Definition: Execute.h:1576
bool removeFromQuerySessionList(const QuerySessionId &query_session, const std::string &submitted_time_str, heavyai::unique_lock< heavyai::shared_mutex > &write_lock)
Definition: Execute.cpp:5209
std::unique_lock< T > unique_lock
static heavyai::shared_mutex executor_session_mutex_
Definition: Execute.h:1574
void resetInterrupt()
void invalidateRunningQuerySession(heavyai::unique_lock< heavyai::shared_mutex > &write_lock)
Definition: Execute.cpp:5013

+ Here is the call graph for this function:

llvm::Value * Executor::codegenAggregateWindowState ( CodeGenerator code_generator,
const CompilationOptions co,
llvm::Value *  aggregate_state 
)
private

Definition at line 1510 of file WindowFunctionIR.cpp.

References AUTOMATIC_IR_METADATA, AVG, COUNT, CodegenUtil::createPtrWithHoistedMemoryAddr(), anonymous_namespace{WindowFunctionIR.cpp}::get_adjusted_window_type_info(), get_int_type(), WindowProjectNodeContext::getActiveWindowFunctionContext(), Analyzer::WindowFunction::getKind(), kDECIMAL, kDOUBLE, kFLOAT, and WindowFunctionContext::NUM_EXECUTION_DEVICES.

1512  {
1514  const auto pi32_type =
1515  llvm::PointerType::get(get_int_type(32, cgen_state_->context_), 0);
1516  const auto pi64_type =
1517  llvm::PointerType::get(get_int_type(64, cgen_state_->context_), 0);
1518  const auto window_func_context =
1520  const Analyzer::WindowFunction* window_func = window_func_context->getWindowFunction();
1521  const auto window_func_ti = get_adjusted_window_type_info(window_func);
1522  const auto aggregate_state_type =
1523  window_func_ti.get_type() == kFLOAT ? pi32_type : pi64_type;
1524  if (window_func->getKind() == SqlWindowFunctionKind::AVG) {
1525  const auto aggregate_state_count_i64 = cgen_state_->llInt(
1526  reinterpret_cast<const int64_t>(window_func_context->aggregateStateCount()));
1527  auto aggregate_state_count = CodegenUtil::createPtrWithHoistedMemoryAddr(
1528  cgen_state_.get(),
1529  code_generator,
1530  co,
1531  aggregate_state_count_i64,
1532  aggregate_state_type,
1534  .front();
1535  const auto double_null_lv = cgen_state_->inlineFpNull(SQLTypeInfo(kDOUBLE));
1536  switch (window_func_ti.get_type()) {
1537  case kFLOAT: {
1538  return cgen_state_->emitCall(
1539  "load_avg_float", {aggregate_state, aggregate_state_count, double_null_lv});
1540  }
1541  case kDOUBLE: {
1542  return cgen_state_->emitCall(
1543  "load_avg_double", {aggregate_state, aggregate_state_count, double_null_lv});
1544  }
1545  case kDECIMAL: {
1546  return cgen_state_->emitCall(
1547  "load_avg_decimal",
1548  {aggregate_state,
1549  aggregate_state_count,
1550  double_null_lv,
1551  cgen_state_->llInt<int32_t>(window_func_ti.get_scale())});
1552  }
1553  default: {
1554  return cgen_state_->emitCall(
1555  "load_avg_int", {aggregate_state, aggregate_state_count, double_null_lv});
1556  }
1557  }
1558  }
1559  if (window_func->getKind() == SqlWindowFunctionKind::COUNT) {
1560  return cgen_state_->ir_builder_.CreateLoad(
1561  aggregate_state->getType()->getPointerElementType(), aggregate_state);
1562  }
1563  switch (window_func_ti.get_type()) {
1564  case kFLOAT: {
1565  return cgen_state_->emitCall("load_float", {aggregate_state});
1566  }
1567  case kDOUBLE: {
1568  return cgen_state_->emitCall("load_double", {aggregate_state});
1569  }
1570  default: {
1571  return cgen_state_->ir_builder_.CreateLoad(
1572  aggregate_state->getType()->getPointerElementType(), aggregate_state);
1573  }
1574  }
1575 }
SqlWindowFunctionKind getKind() const
Definition: Analyzer.h:2925
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1502
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
static WindowFunctionContext * getActiveWindowFunctionContext(Executor *executor)
#define AUTOMATIC_IR_METADATA(CGENSTATE)
static const int NUM_EXECUTION_DEVICES
std::vector< llvm::Value * > createPtrWithHoistedMemoryAddr(CgenState *cgen_state, CodeGenerator *code_generator, CompilationOptions const &co, llvm::ConstantInt *ptr_int_val, llvm::Type *type, size_t num_devices_to_hoist_literal)
SQLTypeInfo get_adjusted_window_type_info(const Analyzer::WindowFunction *window_func)

+ Here is the call graph for this function:

llvm::Value * Executor::codegenConditionalAggregateCondValSelector ( llvm::Value *  cond_lv,
SQLAgg const  aggKind,
CompilationOptions const &  co 
) const
private

Definition at line 1577 of file WindowFunctionIR.cpp.

References CHECK, and kSUM_IF.

1580  {
1581  llvm::Value* res_cond_lv{nullptr};
1582  switch (aggKind) {
1583  case kSUM_IF:
1584  if (cond_lv->getType()->isIntegerTy(1)) {
1585  // cond_expr returns i1 type val, just need to cast to i8 type
1586  // i.e., cond_expr IS NULL
1587  res_cond_lv = cgen_state_->castToTypeIn(cond_lv, 8);
1588  } else {
1589  CHECK(cond_lv->getType()->isIntegerTy(8));
1590  // cond_expr may have null value instead of upcasted bool (i1-type) value
1591  // so we have to correctly set true condition
1592  // i.e., i8 @gt_int32_t_nullable_lhs(..., i64 -2147483648, i8 -128)
1593  // has one of the following i8-type values: 1, 0, -128
1594  auto true_cond_lv =
1595  cgen_state_->ir_builder_.CreateICmpEQ(cond_lv, cgen_state_->llInt((int8_t)1));
1596  res_cond_lv = cgen_state_->ir_builder_.CreateSelect(
1597  true_cond_lv, cgen_state_->llInt((int8_t)1), cgen_state_->llInt((int8_t)0));
1598  }
1599  break;
1600  default:
1601  break;
1602  }
1603  return res_cond_lv;
1604 }
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1502
#define CHECK(condition)
Definition: Logger.h:291
llvm::Value * Executor::codegenCurrentPartitionIndex ( const WindowFunctionContext window_func_context,
CodeGenerator code_generator,
const CompilationOptions co,
llvm::Value *  current_row_pos_lv 
)
private

Definition at line 781 of file WindowFunctionIR.cpp.

References CodegenUtil::createPtrWithHoistedMemoryAddr(), WindowFunctionContext::elementCount(), get_int_type(), WindowFunctionContext::getWindowFunction(), Analyzer::WindowFunction::isFrameNavigateWindowFunction(), WindowFunctionContext::NUM_EXECUTION_DEVICES, WindowFunctionContext::partitionCount(), WindowFunctionContext::partitionNumCountBuf(), and WindowFunctionContext::payload().

785  {
786  const auto pi64_type =
787  llvm::PointerType::get(get_int_type(64, cgen_state_->context_), 0);
788  const auto pi32_type =
789  llvm::PointerType::get(get_int_type(32, cgen_state_->context_), 0);
790  auto row_pos_lv = current_row_pos_lv;
791  if (window_func_context->getWindowFunction()->isFrameNavigateWindowFunction()) {
792  // `current_row_pos_lv` indicates the index of the current row, but to figure out
793  // it's index of window partition it belongs to, we need a special approach
794  // especially for window framing navigation function for instance, when we have
795  // five rows having two columns pc and val such as (2,1), (2,2), (2,3), (1,1),
796  // (1,2), we build a OneToMany Perfect Hash Table as: offset: 0 2 / count: 2 3 /
797  // payload: i1, i2, i3, i4, i5 where i1 ~ i3 and i4 ~ i5 are rows for partition 1
798  // (i.e., pc = 1) and 2 (i.e., prc = 2), respectively. But when processing the first
799  // row (2, 1), the original `current_row_pos_lv` stands for zero so computing which
800  // partitions it belongs to is hard unless hashing the value at runtime. Even if we
801  // do hash, we cannot know the exact hash slot unless we do binary + linear searches
802  // multiple times (via payload buffer and the ordered payload buffer) i.e., when the
803  // row (1,2) is assigned to the partition[4], we cannot find the hash slot index '4'
804  // by using `current_row_pos_lv` unless doing a costly operation like a linear
805  // search over the entire window partition Instead, we collect a hash slot that each
806  // row is assigned to and keep this info at the payload buffer
807  // `hash_slot_idx_ptr_lv` and use it for computing window frame navigation functions
808  auto* const hash_slot_idx_ptr =
809  window_func_context->payload() + window_func_context->elementCount();
810  auto hash_slot_idx_buf_lv =
811  cgen_state_->llInt(reinterpret_cast<int64_t>(hash_slot_idx_ptr));
812  auto hash_slot_idx_ptr_lv = CodegenUtil::createPtrWithHoistedMemoryAddr(
813  cgen_state_.get(),
814  code_generator,
815  co,
816  hash_slot_idx_buf_lv,
817  pi32_type,
819  .front();
820  auto hash_slot_idx_load_lv = cgen_state_->ir_builder_.CreateGEP(
821  hash_slot_idx_ptr_lv->getType()->getPointerElementType(),
822  hash_slot_idx_ptr_lv,
823  current_row_pos_lv);
824  row_pos_lv = cgen_state_->castToTypeIn(
825  cgen_state_->ir_builder_.CreateLoad(
826  hash_slot_idx_load_lv->getType()->getPointerElementType(),
827  hash_slot_idx_load_lv,
828  "cur_row_hash_slot_idx"),
829  64);
830  }
831  auto partition_count_lv = cgen_state_->llInt(window_func_context->partitionCount());
832  auto partition_num_count_buf_lv = cgen_state_->llInt(
833  reinterpret_cast<int64_t>(window_func_context->partitionNumCountBuf()));
834  auto partition_num_count_ptr_lv = CodegenUtil::createPtrWithHoistedMemoryAddr(
835  cgen_state_.get(),
836  code_generator,
837  co,
838  partition_num_count_buf_lv,
839  pi64_type,
841  .front();
842  return cgen_state_->emitCall(
843  "compute_int64_t_lower_bound",
844  {partition_count_lv, row_pos_lv, partition_num_count_ptr_lv});
845 }
bool isFrameNavigateWindowFunction() const
Definition: Analyzer.h:2979
size_t elementCount() const
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1502
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
size_t partitionCount() const
static const int NUM_EXECUTION_DEVICES
const int64_t * partitionNumCountBuf() const
const Analyzer::WindowFunction * getWindowFunction() const
const int32_t * payload() const
std::vector< llvm::Value * > createPtrWithHoistedMemoryAddr(CgenState *cgen_state, CodeGenerator *code_generator, CompilationOptions const &co, llvm::ConstantInt *ptr_int_val, llvm::Type *type, size_t num_devices_to_hoist_literal)

+ Here is the call graph for this function:

llvm::Value * Executor::codegenFrameBound ( bool  for_start_bound,
bool  for_range_mode,
bool  for_window_frame_naviation,
const Analyzer::WindowFrame frame_bound,
bool  is_timestamp_type_frame,
llvm::Value *  order_key_null_val,
const WindowFrameBoundFuncArgs args 
)
private

Definition at line 649 of file WindowFunctionIR.cpp.

References CHECK, CURRENT_ROW, WindowFrameBoundFuncArgs::current_row_pos_lv, EXPR_FOLLOWING, EXPR_PRECEDING, WindowFrameBoundFuncArgs::frame_end_bound_expr_lv, WindowFrameBoundFuncArgs::frame_start_bound_expr_lv, Analyzer::WindowFrame::getBoundType(), WindowFrameBoundFuncArgs::int64_t_one_val_lv, WindowFrameBoundFuncArgs::int64_t_zero_val_lv, WindowFrameBoundFuncArgs::num_elem_current_partition_lv, WindowFrameBoundFuncArgs::order_type_col_name, UNBOUNDED_FOLLOWING, and UNBOUNDED_PRECEDING.

655  {
656  const auto bound_type = frame_bound->getBoundType();
657  auto adjust_frame_end_bound = [&](llvm::Value* target_bound_lv) {
658  return cgen_state_->ir_builder_.CreateSub(target_bound_lv, args.int64_t_one_val_lv);
659  };
661  CHECK(for_start_bound) << "frame end cannot be UNBOUNDED PRECEDING";
662  return args.int64_t_zero_val_lv;
663  } else if (bound_type == SqlWindowFrameBoundType::UNBOUNDED_FOLLOWING) {
664  CHECK(!for_start_bound) << "frame start cannot be UNBOUNDED FOLLOWING";
665  // adjust frame bound w.r.t the open frame interval if necessary
666  return for_window_frame_naviation
667  ? adjust_frame_end_bound(args.num_elem_current_partition_lv)
668  : args.num_elem_current_partition_lv;
669  }
670  std::vector<llvm::Value*> func_args;
671  std::string op_name =
672  bound_type == SqlWindowFrameBoundType::EXPR_FOLLOWING ? "add" : "sub";
673  if (!for_range_mode) {
674  llvm::Value* current_row_bound_expr_lv{nullptr};
675  if (for_window_frame_naviation) {
676  // we already know a current row's index in (ordered) window frame in this case
677  auto bound_expr =
678  for_start_bound ? args.frame_start_bound_expr_lv : args.frame_end_bound_expr_lv;
679  if (bound_type == SqlWindowFrameBoundType::EXPR_FOLLOWING) {
680  current_row_bound_expr_lv =
681  cgen_state_->ir_builder_.CreateAdd(args.current_row_pos_lv, bound_expr);
682  } else if (bound_type == SqlWindowFrameBoundType::EXPR_PRECEDING) {
683  current_row_bound_expr_lv =
684  cgen_state_->ir_builder_.CreateSub(args.current_row_pos_lv, bound_expr);
685  } else {
687  current_row_bound_expr_lv = args.current_row_pos_lv;
688  }
689  // adjust frame bound w.r.t the open frame interval
690  if (for_start_bound) {
691  return cgen_state_->ir_builder_.CreateSelect(
692  cgen_state_->ir_builder_.CreateICmpSLT(current_row_bound_expr_lv,
693  args.int64_t_zero_val_lv),
694  args.int64_t_zero_val_lv,
695  current_row_bound_expr_lv);
696  } else {
697  return cgen_state_->ir_builder_.CreateSelect(
698  cgen_state_->ir_builder_.CreateICmpSGE(current_row_bound_expr_lv,
700  adjust_frame_end_bound(args.num_elem_current_partition_lv),
701  current_row_bound_expr_lv);
702  }
703  } else {
704  std::string func_class = for_start_bound ? "start" : "end";
705  auto const func_name = "compute_row_mode_" + func_class + "_index_" + op_name;
706  func_args = prepareRowModeFuncArgs(for_start_bound, bound_type, args);
707  current_row_bound_expr_lv = cgen_state_->emitCall(func_name, func_args);
708  }
709  return current_row_bound_expr_lv;
710  } else {
711  std::string func_class = for_start_bound ? "lower" : "upper";
712  auto const func_name = getFramingFuncName(
713  func_class,
714  args.order_type_col_name,
715  op_name,
716  bound_type != SqlWindowFrameBoundType::CURRENT_ROW && is_timestamp_type_frame);
717  func_args = prepareRangeModeFuncArgs(
718  for_start_bound, frame_bound, is_timestamp_type_frame, order_key_null_val, args);
719  auto frame_bound_lv = cgen_state_->emitCall(func_name, func_args);
720  if (!for_start_bound && for_window_frame_naviation) {
721  // adjust frame end bound w.r.t the open frame interval
722  frame_bound_lv = cgen_state_->ir_builder_.CreateSelect(
723  cgen_state_->ir_builder_.CreateICmpSGE(frame_bound_lv,
725  adjust_frame_end_bound(args.num_elem_current_partition_lv),
726  frame_bound_lv);
727  }
728  return frame_bound_lv;
729  }
730 }
llvm::Value * num_elem_current_partition_lv
llvm::Value * current_row_pos_lv
llvm::Value * frame_end_bound_expr_lv
std::string getFramingFuncName(const std::string &bound_type, const std::string &order_col_type, const std::string &op_type, bool for_timestamp_type) const
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1502
SqlWindowFrameBoundType getBoundType() const
Definition: Analyzer.h:2826
std::vector< llvm::Value * > prepareRangeModeFuncArgs(bool for_start_bound, const Analyzer::WindowFrame *frame_bound, bool is_timestamp_type_frame, llvm::Value *order_key_null_val, const WindowFrameBoundFuncArgs &frame_args) const
#define CHECK(condition)
Definition: Logger.h:291
llvm::Value * int64_t_zero_val_lv
llvm::Value * int64_t_one_val_lv
llvm::Value * frame_start_bound_expr_lv
std::string order_type_col_name
std::vector< llvm::Value * > prepareRowModeFuncArgs(bool for_start_bound, SqlWindowFrameBoundType bound_type, const WindowFrameBoundFuncArgs &args) const

+ Here is the call graph for this function:

llvm::Value * Executor::codegenFrameBoundExpr ( const Analyzer::WindowFunction window_func,
const Analyzer::WindowFrame frame_bound,
CodeGenerator code_generator,
const CompilationOptions co 
)
private

Definition at line 598 of file WindowFunctionIR.cpp.

References CHECK, CodeGenerator::codegen(), EXPR_FOLLOWING, EXPR_PRECEDING, g_cluster, SQLTypeInfo::get_size(), Analyzer::Expr::get_type_info(), Analyzer::WindowFrame::getBoundExpr(), Analyzer::WindowFunction::getOrderKeys(), Analyzer::WindowFunction::hasRangeModeFraming(), kBIGINT, kINT, and kSMALLINT.

601  {
602  auto needs_bound_expr_codegen = [](const Analyzer::WindowFrame* window_frame) {
603  return window_frame->getBoundType() == SqlWindowFrameBoundType::EXPR_FOLLOWING ||
604  window_frame->getBoundType() == SqlWindowFrameBoundType::EXPR_PRECEDING;
605  };
606  const auto order_col_ti = window_func->getOrderKeys().front()->get_type_info();
607  auto encode_date_col_val = [&order_col_ti, this](llvm::Value* bound_expr_lv) {
608  if (order_col_ti.get_comp_param() == 16) {
609  return cgen_state_->emitCall(
610  "fixed_width_date_encode_noinline",
611  {bound_expr_lv,
612  cgen_state_->castToTypeIn(cgen_state_->inlineIntNull(SQLTypeInfo(kSMALLINT)),
613  32),
614  cgen_state_->inlineIntNull(SQLTypeInfo(kBIGINT))});
615  } else {
616  return cgen_state_->emitCall("fixed_width_date_encode_noinline",
617  {bound_expr_lv,
618  cgen_state_->inlineIntNull(SQLTypeInfo(kINT)),
619  cgen_state_->inlineIntNull(SQLTypeInfo(kBIGINT))});
620  }
621  };
622  llvm::Value* bound_expr_lv{nullptr};
623  if (needs_bound_expr_codegen(frame_bound)) {
624  auto bound_expr = frame_bound->getBoundExpr();
625  if (auto dateadd_expr = dynamic_cast<const Analyzer::DateaddExpr*>(bound_expr)) {
626  if (dateadd_expr->get_datetime_expr()->get_type_info().is_encoded_timestamp()) {
627  dateadd_expr->set_fixed_encoding_null_val();
628  }
629  }
630  auto bound_expr_lvs = code_generator.codegen(bound_expr, true, co);
631  bound_expr_lv = bound_expr_lvs.front();
632  if (order_col_ti.is_date() && window_func->hasRangeModeFraming()) {
633  if (g_cluster) {
634  throw std::runtime_error(
635  "Range mode with date type ordering column is not supported yet.");
636  }
637  bound_expr_lv = encode_date_col_val(bound_expr_lv);
638  }
639  if (frame_bound->getBoundExpr()->get_type_info().get_size() != 8) {
640  bound_expr_lv = cgen_state_->castToTypeIn(bound_expr_lv, 64);
641  }
642  } else {
643  bound_expr_lv = cgen_state_->llInt((int64_t)-1);
644  }
645  CHECK(bound_expr_lv);
646  return bound_expr_lv;
647 }
bool hasRangeModeFraming() const
Definition: Analyzer.h:2959
HOST DEVICE int get_size() const
Definition: sqltypes.h:403
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1502
const std::vector< std::shared_ptr< Analyzer::Expr > > & getOrderKeys() const
Definition: Analyzer.h:2933
const Analyzer::Expr * getBoundExpr() const
Definition: Analyzer.h:2828
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:79
std::vector< llvm::Value * > codegen(const Analyzer::Expr *, const bool fetch_columns, const CompilationOptions &)
Definition: IRCodegen.cpp:30
#define CHECK(condition)
Definition: Logger.h:291
bool g_cluster
Definition: sqltypes.h:72

+ Here is the call graph for this function:

std::pair< llvm::Value *, llvm::Value * > Executor::codegenFrameBoundRange ( const Analyzer::WindowFunction window_func,
CodeGenerator code_generator,
const CompilationOptions co 
)
private

Definition at line 1065 of file WindowFunctionIR.cpp.

References CHECK, Analyzer::WindowFunction::getFrameEndBound(), and Analyzer::WindowFunction::getFrameStartBound().

1068  {
1069  const auto frame_start_bound = window_func->getFrameStartBound();
1070  const auto frame_end_bound = window_func->getFrameEndBound();
1071  auto frame_start_bound_expr_lv =
1072  codegenFrameBoundExpr(window_func, frame_start_bound, code_generator, co);
1073  auto frame_end_bound_expr_lv =
1074  codegenFrameBoundExpr(window_func, frame_end_bound, code_generator, co);
1075  CHECK(frame_start_bound_expr_lv);
1076  CHECK(frame_end_bound_expr_lv);
1077  return std::make_pair(frame_start_bound_expr_lv, frame_end_bound_expr_lv);
1078 }
const Analyzer::WindowFrame * getFrameStartBound() const
Definition: Analyzer.h:2937
const Analyzer::WindowFrame * getFrameEndBound() const
Definition: Analyzer.h:2944
llvm::Value * codegenFrameBoundExpr(const Analyzer::WindowFunction *window_func, const Analyzer::WindowFrame *frame_bound, CodeGenerator &code_generator, const CompilationOptions &co)
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the call graph for this function:

std::pair< llvm::Value *, llvm::Value * > Executor::codegenFrameNullRange ( WindowFunctionContext window_func_context,
CodeGenerator code_generator,
const CompilationOptions co,
llvm::Value *  partition_index_lv 
) const
private

Definition at line 904 of file WindowFunctionIR.cpp.

References CodegenUtil::createPtrWithHoistedMemoryAddr(), get_int_type(), WindowFunctionContext::getNullValueEndPos(), WindowFunctionContext::getNullValueStartPos(), and WindowFunctionContext::NUM_EXECUTION_DEVICES.

908  {
909  const auto pi64_type =
910  llvm::PointerType::get(get_int_type(64, cgen_state_->context_), 0);
911  const auto null_start_pos_buf = cgen_state_->llInt(
912  reinterpret_cast<int64_t>(window_func_context->getNullValueStartPos()));
913  const auto null_start_pos_buf_ptr = CodegenUtil::createPtrWithHoistedMemoryAddr(
914  cgen_state_.get(),
915  code_generator,
916  co,
917  null_start_pos_buf,
918  pi64_type,
920  .front();
921  const auto null_start_pos_ptr =
922  cgen_state_->ir_builder_.CreateGEP(get_int_type(64, cgen_state_->context_),
923  null_start_pos_buf_ptr,
924  partition_index_lv);
925  auto null_start_pos_lv = cgen_state_->ir_builder_.CreateLoad(
926  null_start_pos_ptr->getType()->getPointerElementType(),
927  null_start_pos_ptr,
928  "null_start_pos");
929  const auto null_end_pos_buf = cgen_state_->llInt(
930  reinterpret_cast<int64_t>(window_func_context->getNullValueEndPos()));
931  const auto null_end_pos_buf_ptr = CodegenUtil::createPtrWithHoistedMemoryAddr(
932  cgen_state_.get(),
933  code_generator,
934  co,
935  null_end_pos_buf,
936  pi64_type,
938  .front();
939  const auto null_end_pos_ptr = cgen_state_->ir_builder_.CreateGEP(
940  get_int_type(64, cgen_state_->context_), null_end_pos_buf_ptr, partition_index_lv);
941  auto null_end_pos_lv = cgen_state_->ir_builder_.CreateLoad(
942  null_end_pos_ptr->getType()->getPointerElementType(),
943  null_end_pos_ptr,
944  "null_end_pos");
945  return std::make_pair(null_start_pos_lv, null_end_pos_lv);
946 }
int64_t * getNullValueEndPos() const
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1502
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
int64_t * getNullValueStartPos() const
static const int NUM_EXECUTION_DEVICES
std::vector< llvm::Value * > createPtrWithHoistedMemoryAddr(CgenState *cgen_state, CodeGenerator *code_generator, CompilationOptions const &co, llvm::ConstantInt *ptr_int_val, llvm::Type *type, size_t num_devices_to_hoist_literal)

+ Here is the call graph for this function:

void Executor::codegenJoinLoops ( const std::vector< JoinLoop > &  join_loops,
const RelAlgExecutionUnit ra_exe_unit,
GroupByAndAggregate group_by_and_aggregate,
llvm::Function *  query_func,
llvm::BasicBlock *  entry_bb,
QueryMemoryDescriptor query_mem_desc,
const CompilationOptions co,
const ExecutionOptions eo 
)
private

Definition at line 1204 of file IRCodegen.cpp.

References ExecutionOptions::allow_runtime_query_interrupt, anonymous_namespace{QueryMemoryDescriptor.cpp}::any_of(), AUTOMATIC_IR_METADATA, CHECK, CHECK_EQ, JoinLoop::codegen(), CompilationOptions::device_type, JoinLoopDomain::element_count, get_int_array_type(), get_int_type(), INNER, MultiSet, CodeGenerator::posArg(), GroupByAndAggregate::query_infos_, query_mem_desc, Set, and ExecutionOptions::with_dynamic_watchdog.

1211  {
1213  const auto exit_bb =
1214  llvm::BasicBlock::Create(cgen_state_->context_, "exit", cgen_state_->current_func_);
1215  cgen_state_->ir_builder_.SetInsertPoint(exit_bb);
1216  cgen_state_->ir_builder_.CreateRet(cgen_state_->llInt<int32_t>(0));
1217  cgen_state_->ir_builder_.SetInsertPoint(entry_bb);
1218  CodeGenerator code_generator(this);
1219 
1220  llvm::BasicBlock* loops_entry_bb{nullptr};
1221  auto has_range_join =
1222  std::any_of(join_loops.begin(), join_loops.end(), [](const auto& join_loop) {
1223  return join_loop.kind() == JoinLoopKind::MultiSet;
1224  });
1225  if (has_range_join) {
1226  CHECK_EQ(join_loops.size(), size_t(1));
1227  const auto element_count =
1228  llvm::ConstantInt::get(get_int_type(64, cgen_state_->context_), 9);
1229 
1230  auto compute_packed_offset = [](const int32_t x, const int32_t y) -> uint64_t {
1231  const uint64_t y_shifted = static_cast<uint64_t>(y) << 32;
1232  return y_shifted | static_cast<uint32_t>(x);
1233  };
1234 
1235  const auto values_arr = std::vector<llvm::Constant*>{
1236  llvm::ConstantInt::get(get_int_type(64, cgen_state_->context_), 0),
1237  llvm::ConstantInt::get(get_int_type(64, cgen_state_->context_),
1238  compute_packed_offset(0, 1)),
1239  llvm::ConstantInt::get(get_int_type(64, cgen_state_->context_),
1240  compute_packed_offset(0, -1)),
1241  llvm::ConstantInt::get(get_int_type(64, cgen_state_->context_),
1242  compute_packed_offset(1, 0)),
1243  llvm::ConstantInt::get(get_int_type(64, cgen_state_->context_),
1244  compute_packed_offset(1, 1)),
1245  llvm::ConstantInt::get(get_int_type(64, cgen_state_->context_),
1246  compute_packed_offset(1, -1)),
1247  llvm::ConstantInt::get(get_int_type(64, cgen_state_->context_),
1248  compute_packed_offset(-1, 0)),
1249  llvm::ConstantInt::get(get_int_type(64, cgen_state_->context_),
1250  compute_packed_offset(-1, 1)),
1251  llvm::ConstantInt::get(get_int_type(64, cgen_state_->context_),
1252  compute_packed_offset(-1, -1))};
1253 
1254  const auto constant_values_array = llvm::ConstantArray::get(
1255  get_int_array_type(64, 9, cgen_state_->context_), values_arr);
1256  CHECK(cgen_state_->module_);
1257  const auto values =
1258  new llvm::GlobalVariable(*cgen_state_->module_,
1259  get_int_array_type(64, 9, cgen_state_->context_),
1260  true,
1261  llvm::GlobalValue::LinkageTypes::InternalLinkage,
1262  constant_values_array);
1263  JoinLoop join_loop(
1266  [element_count, values](const std::vector<llvm::Value*>& v) {
1267  JoinLoopDomain domain{{0}};
1268  domain.element_count = element_count;
1269  domain.values_buffer = values;
1270  return domain;
1271  },
1272  nullptr,
1273  nullptr,
1274  nullptr,
1275  nullptr,
1276  "range_key_loop");
1277 
1278  loops_entry_bb = JoinLoop::codegen(
1279  {join_loop},
1280  [this,
1281  query_func,
1282  &query_mem_desc,
1283  &co,
1284  &eo,
1285  &group_by_and_aggregate,
1286  &join_loops,
1287  &ra_exe_unit](const std::vector<llvm::Value*>& prev_iters) {
1288  auto& builder = cgen_state_->ir_builder_;
1289 
1290  auto body_exit_bb =
1291  llvm::BasicBlock::Create(cgen_state_->context_,
1292  "range_key_inner_body_exit",
1293  builder.GetInsertBlock()->getParent());
1294 
1295  auto range_key_body_bb =
1296  llvm::BasicBlock::Create(cgen_state_->context_,
1297  "range_key_loop_body",
1298  builder.GetInsertBlock()->getParent());
1299  builder.SetInsertPoint(range_key_body_bb);
1300 
1301  const auto body_loops_entry_bb = JoinLoop::codegen(
1302  join_loops,
1303  [this,
1304  query_func,
1305  &query_mem_desc,
1306  &co,
1307  &eo,
1308  &group_by_and_aggregate,
1309  &join_loops,
1310  &ra_exe_unit](const std::vector<llvm::Value*>& prev_iters) {
1311  addJoinLoopIterator(prev_iters, join_loops.size());
1312  auto& builder = cgen_state_->ir_builder_;
1313  const auto loop_body_bb =
1314  llvm::BasicBlock::Create(builder.getContext(),
1315  "loop_body",
1316  builder.GetInsertBlock()->getParent());
1317  builder.SetInsertPoint(loop_body_bb);
1318  const bool can_return_error =
1319  compileBody(ra_exe_unit, group_by_and_aggregate, query_mem_desc, co);
1320  if (can_return_error || cgen_state_->needs_error_check_ ||
1321  eo.with_dynamic_watchdog || eo.allow_runtime_query_interrupt) {
1322  createErrorCheckControlFlow(query_func,
1323  eo.with_dynamic_watchdog,
1324  eo.allow_runtime_query_interrupt,
1325  join_loops,
1326  co.device_type,
1327  group_by_and_aggregate.query_infos_);
1328  }
1329  return loop_body_bb;
1330  },
1331  prev_iters.back(),
1332  body_exit_bb,
1333  cgen_state_.get());
1334 
1335  builder.SetInsertPoint(range_key_body_bb);
1336  cgen_state_->ir_builder_.CreateBr(body_loops_entry_bb);
1337 
1338  builder.SetInsertPoint(body_exit_bb);
1339  return range_key_body_bb;
1340  },
1341  code_generator.posArg(nullptr),
1342  exit_bb,
1343  cgen_state_.get());
1344  } else {
1345  loops_entry_bb = JoinLoop::codegen(
1346  join_loops,
1347  /*body_codegen=*/
1348  [this,
1349  query_func,
1350  &query_mem_desc,
1351  &co,
1352  &eo,
1353  &group_by_and_aggregate,
1354  &join_loops,
1355  &ra_exe_unit](const std::vector<llvm::Value*>& prev_iters) {
1357  addJoinLoopIterator(prev_iters, join_loops.size());
1358  auto& builder = cgen_state_->ir_builder_;
1359  const auto loop_body_bb = llvm::BasicBlock::Create(
1360  builder.getContext(), "loop_body", builder.GetInsertBlock()->getParent());
1361  builder.SetInsertPoint(loop_body_bb);
1362  const bool can_return_error =
1363  compileBody(ra_exe_unit, group_by_and_aggregate, query_mem_desc, co);
1364  if (can_return_error || cgen_state_->needs_error_check_ ||
1365  eo.with_dynamic_watchdog || eo.allow_runtime_query_interrupt) {
1366  createErrorCheckControlFlow(query_func,
1367  eo.with_dynamic_watchdog,
1368  eo.allow_runtime_query_interrupt,
1369  join_loops,
1370  co.device_type,
1371  group_by_and_aggregate.query_infos_);
1372  }
1373  return loop_body_bb;
1374  },
1375  /*outer_iter=*/code_generator.posArg(nullptr),
1376  exit_bb,
1377  cgen_state_.get());
1378  }
1379  CHECK(loops_entry_bb);
1380  cgen_state_->ir_builder_.SetInsertPoint(entry_bb);
1381  cgen_state_->ir_builder_.CreateBr(loops_entry_bb);
1382 }
void createErrorCheckControlFlow(llvm::Function *query_func, bool run_with_dynamic_watchdog, bool run_with_allowing_runtime_interrupt, const std::vector< JoinLoop > &join_loops, ExecutorDeviceType device_type, const std::vector< InputTableInfo > &input_table_infos)
#define CHECK_EQ(x, y)
Definition: Logger.h:301
llvm::Value * element_count
Definition: JoinLoop.h:46
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1502
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
bool compileBody(const RelAlgExecutionUnit &ra_exe_unit, GroupByAndAggregate &group_by_and_aggregate, QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const GpuSharedMemoryContext &gpu_smem_context={})
static llvm::BasicBlock * codegen(const std::vector< JoinLoop > &join_loops, const std::function< llvm::BasicBlock *(const std::vector< llvm::Value * > &)> &body_codegen, llvm::Value *outer_iter, llvm::BasicBlock *exit_bb, CgenState *cgen_state)
Definition: JoinLoop.cpp:50
#define AUTOMATIC_IR_METADATA(CGENSTATE)
llvm::Value * addJoinLoopIterator(const std::vector< llvm::Value * > &prev_iters, const size_t level_idx)
Definition: IRCodegen.cpp:1186
#define CHECK(condition)
Definition: Logger.h:291
bool any_of(std::vector< Analyzer::Expr * > const &target_exprs)
llvm::ArrayType * get_int_array_type(int const width, int count, llvm::LLVMContext &context)

+ Here is the call graph for this function:

llvm::Value * Executor::codegenLoadCurrentValueFromColBuf ( WindowFunctionContext window_func_context,
CodeGenerator code_generator,
WindowFrameBoundFuncArgs args 
) const
private

Definition at line 753 of file WindowFunctionIR.cpp.

References CHECK, CodeGenerator::codegenWindowPosition(), WindowFrameBoundFuncArgs::current_row_pos_lv, get_fp_type(), get_int_type(), Analyzer::WindowFunction::getOrderKeys(), WindowFunctionContext::getWindowFunction(), Analyzer::WindowFunction::isFrameNavigateWindowFunction(), and WindowFrameBoundFuncArgs::order_key_buf_ptr_lv.

756  {
757  llvm::Value* current_col_value_ptr_lv{nullptr};
758  const auto order_key_size_in_byte = getOrderKeySize(window_func_context) * 8;
759  auto const order_key_ptr =
760  window_func_context->getWindowFunction()->getOrderKeys().front();
761  CHECK(order_key_ptr);
762  auto const order_col_ti = order_key_ptr->get_type_info();
763  auto const order_col_llvm_type =
764  order_col_ti.is_fp() ? get_fp_type(order_key_size_in_byte, cgen_state_->context_)
765  : get_int_type(order_key_size_in_byte, cgen_state_->context_);
766  if (!window_func_context->getWindowFunction()->isFrameNavigateWindowFunction()) {
767  auto rowid_in_partition_lv = code_generator.codegenWindowPosition(
768  window_func_context, args.current_row_pos_lv);
769  current_col_value_ptr_lv = cgen_state_->ir_builder_.CreateGEP(
770  order_col_llvm_type, args.order_key_buf_ptr_lv, rowid_in_partition_lv);
771  } else {
772  current_col_value_ptr_lv = cgen_state_->ir_builder_.CreateGEP(
773  order_col_llvm_type, args.order_key_buf_ptr_lv, args.current_row_pos_lv);
774  }
775  return cgen_state_->ir_builder_.CreateLoad(
776  current_col_value_ptr_lv->getType()->getPointerElementType(),
777  current_col_value_ptr_lv,
778  "current_col_value");
779 }
bool isFrameNavigateWindowFunction() const
Definition: Analyzer.h:2979
llvm::Value * current_row_pos_lv
llvm::Type * get_fp_type(const int width, llvm::LLVMContext &context)
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1502
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
const std::vector< std::shared_ptr< Analyzer::Expr > > & getOrderKeys() const
Definition: Analyzer.h:2933
llvm::Value * codegenWindowPosition(const WindowFunctionContext *window_func_context, llvm::Value *pos_arg)
Definition: ColumnIR.cpp:235
std::unique_ptr< llvm::LLVMContext > context_
Definition: Execute.h:1477
llvm::Value * order_key_buf_ptr_lv
#define CHECK(condition)
Definition: Logger.h:291
const Analyzer::WindowFunction * getWindowFunction() const
size_t getOrderKeySize(WindowFunctionContext *window_func_context) const

+ Here is the call graph for this function:

std::pair< std::string, llvm::Value * > Executor::codegenLoadOrderKeyBufPtr ( WindowFunctionContext window_func_context,
CodeGenerator code_generator,
const CompilationOptions co 
) const
private

Definition at line 948 of file WindowFunctionIR.cpp.

References CodegenUtil::createPtrWithHoistedMemoryAddr(), anonymous_namespace{WindowFunctionIR.cpp}::get_col_type_name_by_size(), get_fp_type(), get_int_type(), WindowFunctionContext::getOrderKeyColumnBuffers(), WindowFunctionContext::getOrderKeyColumnBufferTypes(), Analyzer::WindowFunction::getOrderKeys(), WindowFunctionContext::getWindowFunction(), and WindowFunctionContext::NUM_EXECUTION_DEVICES.

951  {
952  auto const order_key_ti =
953  window_func_context->getWindowFunction()->getOrderKeys().front()->get_type_info();
954  auto const order_key_size = order_key_ti.get_size();
955  auto const order_col_type_name = get_col_type_name_by_size(
956  order_key_size,
957  window_func_context->getOrderKeyColumnBufferTypes().front().is_fp());
958  size_t order_key_size_in_byte = order_key_size * 8;
959  auto const order_key_type =
960  order_key_ti.is_fp() ? get_fp_type(order_key_size_in_byte, cgen_state_->context_)
961  : get_int_type(order_key_size_in_byte, cgen_state_->context_);
962  auto const order_key_buf_type = llvm::PointerType::get(order_key_type, 0);
963  auto const order_key_buf = cgen_state_->llInt(
964  reinterpret_cast<int64_t>(window_func_context->getOrderKeyColumnBuffers().front()));
965  auto const order_key_buf_ptr_lv = CodegenUtil::createPtrWithHoistedMemoryAddr(
966  cgen_state_.get(),
967  code_generator,
968  co,
969  order_key_buf,
970  order_key_buf_type,
972  .front();
973  return std::make_pair(order_col_type_name, order_key_buf_ptr_lv);
974 }
std::string get_col_type_name_by_size(const size_t size, const bool is_fp)
const std::vector< SQLTypeInfo > & getOrderKeyColumnBufferTypes() const
llvm::Type * get_fp_type(const int width, llvm::LLVMContext &context)
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1502
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
const std::vector< std::shared_ptr< Analyzer::Expr > > & getOrderKeys() const
Definition: Analyzer.h:2933
static const int NUM_EXECUTION_DEVICES
const std::vector< const int8_t * > & getOrderKeyColumnBuffers() const
std::unique_ptr< llvm::LLVMContext > context_
Definition: Execute.h:1477
const Analyzer::WindowFunction * getWindowFunction() const
std::vector< llvm::Value * > createPtrWithHoistedMemoryAddr(CgenState *cgen_state, CodeGenerator *code_generator, CompilationOptions const &co, llvm::ConstantInt *ptr_int_val, llvm::Type *type, size_t num_devices_to_hoist_literal)

+ Here is the call graph for this function:

WindowPartitionBufferPtrs Executor::codegenLoadPartitionBuffers ( WindowFunctionContext window_func_context,
CodeGenerator code_generator,
const CompilationOptions co,
llvm::Value *  partition_index_lv 
) const
private

Definition at line 976 of file WindowFunctionIR.cpp.

References WindowFunctionContext::counts(), CodegenUtil::createPtrWithHoistedMemoryAddr(), WindowPartitionBufferPtrs::current_partition_start_offset_lv, get_int_type(), WindowPartitionBufferPtrs::num_elem_current_partition_lv, WindowFunctionContext::NUM_EXECUTION_DEVICES, WindowFunctionContext::partitionStartOffset(), WindowFunctionContext::payload(), WindowFunctionContext::sortedPartition(), WindowPartitionBufferPtrs::target_partition_rowid_ptr_lv, and WindowPartitionBufferPtrs::target_partition_sorted_rowid_ptr_lv.

980  {
981  WindowPartitionBufferPtrs bufferPtrs;
982  const auto pi64_type =
983  llvm::PointerType::get(get_int_type(64, cgen_state_->context_), 0);
984  const auto pi32_type =
985  llvm::PointerType::get(get_int_type(32, cgen_state_->context_), 0);
986 
987  // partial sum of # elems of partitions
988  auto partition_start_offset_buf_lv = cgen_state_->llInt(
989  reinterpret_cast<int64_t>(window_func_context->partitionStartOffset()));
990  auto partition_start_offset_ptr_lv = CodegenUtil::createPtrWithHoistedMemoryAddr(
991  cgen_state_.get(),
992  code_generator,
993  co,
994  partition_start_offset_buf_lv,
995  pi64_type,
997  .front();
998 
999  // get start offset of the current partition
1000  auto current_partition_start_offset_ptr_lv =
1001  cgen_state_->ir_builder_.CreateGEP(get_int_type(64, cgen_state_->context_),
1002  partition_start_offset_ptr_lv,
1003  partition_index_lv);
1004  bufferPtrs.current_partition_start_offset_lv = cgen_state_->ir_builder_.CreateLoad(
1005  current_partition_start_offset_ptr_lv->getType()->getPointerElementType(),
1006  current_partition_start_offset_ptr_lv);
1007 
1008  // row_id buf of the current partition
1009  const auto partition_rowid_buf_lv =
1010  cgen_state_->llInt(reinterpret_cast<int64_t>(window_func_context->payload()));
1011  const auto partition_rowid_ptr_lv = CodegenUtil::createPtrWithHoistedMemoryAddr(
1012  cgen_state_.get(),
1013  code_generator,
1014  co,
1015  partition_rowid_buf_lv,
1016  pi32_type,
1018  .front();
1019  bufferPtrs.target_partition_rowid_ptr_lv =
1020  cgen_state_->ir_builder_.CreateGEP(get_int_type(32, cgen_state_->context_),
1021  partition_rowid_ptr_lv,
1023 
1024  // row_id buf of ordered current partition
1025  const auto sorted_rowid_lv = cgen_state_->llInt(
1026  reinterpret_cast<int64_t>(window_func_context->sortedPartition()));
1027  const auto sorted_rowid_ptr_lv = CodegenUtil::createPtrWithHoistedMemoryAddr(
1028  cgen_state_.get(),
1029  code_generator,
1030  co,
1031  sorted_rowid_lv,
1032  pi64_type,
1034  .front();
1036  cgen_state_->ir_builder_.CreateGEP(get_int_type(64, cgen_state_->context_),
1037  sorted_rowid_ptr_lv,
1039 
1040  // # elems per partition
1041  const auto partition_count_buf =
1042  cgen_state_->llInt(reinterpret_cast<int64_t>(window_func_context->counts()));
1043  auto partition_count_buf_ptr_lv = CodegenUtil::createPtrWithHoistedMemoryAddr(
1044  cgen_state_.get(),
1045  code_generator,
1046  co,
1047  partition_count_buf,
1048  pi32_type,
1050  .front();
1051 
1052  // # elems of the given partition
1053  const auto num_elem_current_partition_ptr =
1054  cgen_state_->ir_builder_.CreateGEP(get_int_type(32, cgen_state_->context_),
1055  partition_count_buf_ptr_lv,
1056  partition_index_lv);
1057  bufferPtrs.num_elem_current_partition_lv = cgen_state_->castToTypeIn(
1058  cgen_state_->ir_builder_.CreateLoad(
1059  num_elem_current_partition_ptr->getType()->getPointerElementType(),
1060  num_elem_current_partition_ptr),
1061  64);
1062  return bufferPtrs;
1063 }
llvm::Value * current_partition_start_offset_lv
llvm::Value * num_elem_current_partition_lv
const int32_t * counts() const
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1502
llvm::Value * target_partition_sorted_rowid_ptr_lv
llvm::Value * target_partition_rowid_ptr_lv
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
const int64_t * partitionStartOffset() const
static const int NUM_EXECUTION_DEVICES
const int64_t * sortedPartition() const
const int32_t * payload() const
std::vector< llvm::Value * > createPtrWithHoistedMemoryAddr(CgenState *cgen_state, CodeGenerator *code_generator, CompilationOptions const &co, llvm::ConstantInt *ptr_int_val, llvm::Type *type, size_t num_devices_to_hoist_literal)

+ Here is the call graph for this function:

llvm::BasicBlock * Executor::codegenSkipDeletedOuterTableRow ( const RelAlgExecutionUnit ra_exe_unit,
const CompilationOptions co 
)
private

Definition at line 3311 of file NativeCodegen.cpp.

3313  {
3315  if (!co.filter_on_deleted_column) {
3316  return nullptr;
3317  }
3318  CHECK(!ra_exe_unit.input_descs.empty());
3319  const auto& outer_input_desc = ra_exe_unit.input_descs[0];
3320  if (outer_input_desc.getSourceType() != InputSourceType::TABLE) {
3321  return nullptr;
3322  }
3323  const auto& table_key = outer_input_desc.getTableKey();
3324  const auto deleted_cd = plan_state_->getDeletedColForTable(table_key);
3325  if (!deleted_cd) {
3326  return nullptr;
3327  }
3328  CHECK(deleted_cd->columnType.is_boolean());
3329  const auto deleted_expr =
3330  makeExpr<Analyzer::ColumnVar>(deleted_cd->columnType,
3331  shared::ColumnKey{table_key, deleted_cd->columnId},
3332  outer_input_desc.getNestLevel());
3333  CodeGenerator code_generator(this);
3334  const auto is_deleted =
3335  code_generator.toBool(code_generator.codegen(deleted_expr.get(), true, co).front());
3336  const auto is_deleted_bb = llvm::BasicBlock::Create(
3337  cgen_state_->context_, "is_deleted", cgen_state_->row_func_);
3338  llvm::BasicBlock* bb = llvm::BasicBlock::Create(
3339  cgen_state_->context_, "is_not_deleted", cgen_state_->row_func_);
3340  cgen_state_->ir_builder_.CreateCondBr(is_deleted, is_deleted_bb, bb);
3341  cgen_state_->ir_builder_.SetInsertPoint(is_deleted_bb);
3342  cgen_state_->ir_builder_.CreateRet(cgen_state_->llInt<int32_t>(0));
3343  cgen_state_->ir_builder_.SetInsertPoint(bb);
3344  return bb;
3345 }
std::vector< InputDescriptor > input_descs
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1502
std::unique_ptr< PlanState > plan_state_
Definition: Execute.h:1532
#define AUTOMATIC_IR_METADATA(CGENSTATE)
#define CHECK(condition)
Definition: Logger.h:291
void Executor::codegenWindowAvgEpilogue ( CodeGenerator code_generator,
const CompilationOptions co,
llvm::Value *  crt_val,
llvm::Value *  window_func_null_val 
)
private

Definition at line 1466 of file WindowFunctionIR.cpp.

References AUTOMATIC_IR_METADATA, CodegenUtil::createPtrWithHoistedMemoryAddr(), anonymous_namespace{WindowFunctionIR.cpp}::get_adjusted_window_type_info(), get_int_type(), WindowProjectNodeContext::getActiveWindowFunctionContext(), kDOUBLE, kFLOAT, and WindowFunctionContext::NUM_EXECUTION_DEVICES.

1469  {
1471  const auto window_func_context =
1473  const auto window_func = window_func_context->getWindowFunction();
1474  const auto window_func_ti = get_adjusted_window_type_info(window_func);
1475  const auto pi32_type =
1476  llvm::PointerType::get(get_int_type(32, cgen_state_->context_), 0);
1477  const auto pi64_type =
1478  llvm::PointerType::get(get_int_type(64, cgen_state_->context_), 0);
1479  const auto aggregate_state_type =
1480  window_func_ti.get_type() == kFLOAT ? pi32_type : pi64_type;
1481  const auto aggregate_state_count_i64 = cgen_state_->llInt(
1482  reinterpret_cast<const int64_t>(window_func_context->aggregateStateCount()));
1483  auto aggregate_state_count = CodegenUtil::createPtrWithHoistedMemoryAddr(
1484  cgen_state_.get(),
1485  code_generator,
1486  co,
1487  aggregate_state_count_i64,
1488  aggregate_state_type,
1490  .front();
1491  std::string agg_count_func_name = "agg_count";
1492  switch (window_func_ti.get_type()) {
1493  case kFLOAT: {
1494  agg_count_func_name += "_float";
1495  break;
1496  }
1497  case kDOUBLE: {
1498  agg_count_func_name += "_double";
1499  break;
1500  }
1501  default: {
1502  break;
1503  }
1504  }
1505  agg_count_func_name += "_skip_val";
1506  cgen_state_->emitCall(agg_count_func_name,
1507  {aggregate_state_count, crt_val, window_func_null_val});
1508 }
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1502
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
static WindowFunctionContext * getActiveWindowFunctionContext(Executor *executor)
#define AUTOMATIC_IR_METADATA(CGENSTATE)
static const int NUM_EXECUTION_DEVICES
std::vector< llvm::Value * > createPtrWithHoistedMemoryAddr(CgenState *cgen_state, CodeGenerator *code_generator, CompilationOptions const &co, llvm::ConstantInt *ptr_int_val, llvm::Type *type, size_t num_devices_to_hoist_literal)
SQLTypeInfo get_adjusted_window_type_info(const Analyzer::WindowFunction *window_func)

+ Here is the call graph for this function:

std::pair< llvm::Value *, llvm::Value * > Executor::codegenWindowFrameBounds ( WindowFunctionContext window_func_context,
const Analyzer::WindowFrame frame_start_bound,
const Analyzer::WindowFrame frame_end_bound,
llvm::Value *  order_key_col_null_val_lv,
WindowFrameBoundFuncArgs args,
CodeGenerator code_generator 
)
private

Definition at line 1080 of file WindowFunctionIR.cpp.

References run_benchmark_import::args, CHECK, WindowFrameBoundFuncArgs::current_col_value_lv, WindowFunctionContext::getOrderKeyColumnBuffers(), WindowFunctionContext::getWindowFunction(), Analyzer::WindowFrame::hasTimestampTypeFrameBound(), and WindowFrameBoundFuncArgs::order_type_col_name.

1086  {
1087  const auto window_func = window_func_context->getWindowFunction();
1088  CHECK(window_func);
1089  const auto is_timestamp_type_frame = frame_start_bound->hasTimestampTypeFrameBound() ||
1090  frame_end_bound->hasTimestampTypeFrameBound();
1091 
1092  if (window_func->hasRangeModeFraming()) {
1093  CHECK(window_func_context->getOrderKeyColumnBuffers().size() == 1);
1094  CHECK(window_func->getOrderKeys().size() == 1UL);
1095  CHECK(window_func_context->getOrderKeyColumnBuffers().size() == 1UL);
1096  args.order_type_col_name = getOrderKeyTypeName(window_func_context);
1097  args.current_col_value_lv =
1098  codegenLoadCurrentValueFromColBuf(window_func_context, code_generator, args);
1099  }
1100 
1101  auto get_order_key_null_val = [is_timestamp_type_frame,
1102  &order_key_col_null_val_lv,
1103  this](const Analyzer::WindowFrame* frame_bound) {
1104  return is_timestamp_type_frame && !frame_bound->isCurrentRowBound()
1105  ? cgen_state_->castToTypeIn(order_key_col_null_val_lv, 64)
1106  : order_key_col_null_val_lv;
1107  };
1108  auto frame_start_bound_lv =
1109  codegenFrameBound(true,
1110  window_func->hasRangeModeFraming(),
1111  window_func->isFrameNavigateWindowFunction(),
1112  frame_start_bound,
1113  is_timestamp_type_frame,
1114  get_order_key_null_val(frame_start_bound),
1115  args);
1116  auto frame_end_bound_lv =
1117  codegenFrameBound(false,
1118  window_func->hasRangeModeFraming(),
1119  window_func->isFrameNavigateWindowFunction(),
1120  frame_end_bound,
1121  is_timestamp_type_frame,
1122  get_order_key_null_val(frame_end_bound),
1123  args);
1124  CHECK(frame_start_bound_lv);
1125  CHECK(frame_end_bound_lv);
1126  return std::make_pair(frame_start_bound_lv, frame_end_bound_lv);
1127 }
bool hasTimestampTypeFrameBound() const
Definition: Analyzer.h:2833
llvm::Value * current_col_value_lv
llvm::Value * codegenFrameBound(bool for_start_bound, bool for_range_mode, bool for_window_frame_naviation, const Analyzer::WindowFrame *frame_bound, bool is_timestamp_type_frame, llvm::Value *order_key_null_val, const WindowFrameBoundFuncArgs &args)
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1502
llvm::Value * codegenLoadCurrentValueFromColBuf(WindowFunctionContext *window_func_context, CodeGenerator &code_generator, WindowFrameBoundFuncArgs &args) const
const std::string getOrderKeyTypeName(WindowFunctionContext *window_func_context) const
const std::vector< const int8_t * > & getOrderKeyColumnBuffers() const
#define CHECK(condition)
Definition: Logger.h:291
const Analyzer::WindowFunction * getWindowFunction() const
std::string order_type_col_name

+ Here is the call graph for this function:

llvm::Value * Executor::codegenWindowFunction ( const size_t  target_index,
const CompilationOptions co 
)
private

Definition at line 22 of file WindowFunctionIR.cpp.

References WindowProjectNodeContext::activateWindowFunctionContext(), run_benchmark_import::args, AUTOMATIC_IR_METADATA, AVG, BACKWARD_FILL, CHECK, CHECK_EQ, CONDITIONAL_CHANGE_EVENT, COUNT, COUNT_IF, CUME_DIST, DENSE_RANK, logger::FATAL, FIRST_VALUE, FIRST_VALUE_IN_FRAME, FORWARD_FILL, WindowProjectNodeContext::get(), WindowFunctionContext::getWindowFunction(), LAG, LAG_IN_FRAME, LAST_VALUE, LAST_VALUE_IN_FRAME, LEAD, LEAD_IN_FRAME, LOG, MAX, MIN, NTH_VALUE, NTH_VALUE_IN_FRAME, NTILE, PERCENT_RANK, RANK, ROW_NUMBER, SUM, and SUM_IF.

23  {
25  CodeGenerator code_generator(this);
26 
27  const auto window_func_context =
29  target_index);
30  const auto window_func = window_func_context->getWindowFunction();
31  switch (window_func->getKind()) {
36  return code_generator.codegenWindowPosition(window_func_context,
37  code_generator.posArg(nullptr));
40  return cgen_state_->emitCall("percent_window_func",
41  {cgen_state_->llInt(reinterpret_cast<const int64_t>(
42  window_func_context->output())),
43  code_generator.posArg(nullptr)});
49  // they are always evaluated on the current frame
51  const auto& args = window_func->getArgs();
52  CHECK(!args.empty());
53  const auto arg_lvs = code_generator.codegen(args.front().get(), true, co);
54  CHECK_EQ(arg_lvs.size(), size_t(1));
55  return arg_lvs.front();
56  }
65  return codegenWindowFunctionAggregate(&code_generator, co);
74  default:
75  LOG(FATAL) << "Invalid window function kind";
76  }
77  return nullptr;
78 }
#define CHECK_EQ(x, y)
Definition: Logger.h:301
#define LOG(tag)
Definition: Logger.h:285
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1502
static const WindowProjectNodeContext * get(Executor *executor)
const WindowFunctionContext * activateWindowFunctionContext(Executor *executor, const size_t target_index) const
#define AUTOMATIC_IR_METADATA(CGENSTATE)
llvm::Value * codegenWindowFunctionAggregate(CodeGenerator *code_generator, const CompilationOptions &co)
llvm::Value * codegenWindowNavigationFunctionOnFrame(const CompilationOptions &co)
#define CHECK(condition)
Definition: Logger.h:291
const Analyzer::WindowFunction * getWindowFunction() const

+ Here is the call graph for this function:

llvm::Value * Executor::codegenWindowFunctionAggregate ( CodeGenerator code_generator,
const CompilationOptions co 
)
private

Definition at line 265 of file WindowFunctionIR.cpp.

References AUTOMATIC_IR_METADATA, AVG, CHECK, CodegenUtil::createPtrWithHoistedMemoryAddr(), WindowProjectNodeContext::get(), get_int_type(), WindowProjectNodeContext::getActiveWindowFunctionContext(), and WindowFunctionContext::NUM_EXECUTION_DEVICES.

266  {
268  auto [reset_state_false_bb, aggregate_state] =
269  codegenWindowResetStateControlFlow(code_generator, co);
270  llvm::Value* aggregate_state_count = nullptr;
271  const auto window_func_context =
273  const auto window_func = window_func_context->getWindowFunction();
274  if (window_func->getKind() == SqlWindowFunctionKind::AVG) {
275  const auto aggregate_state_count_i64 = cgen_state_->llInt(
276  reinterpret_cast<const int64_t>(window_func_context->aggregateStateCount()));
277  const auto pi64_type =
278  llvm::PointerType::get(get_int_type(64, cgen_state_->context_), 0);
279  aggregate_state_count = CodegenUtil::createPtrWithHoistedMemoryAddr(
280  cgen_state_.get(),
281  code_generator,
282  co,
283  aggregate_state_count_i64,
284  pi64_type,
286  .front();
287  }
288  codegenWindowFunctionStateInit(code_generator, co, aggregate_state);
289  if (window_func->getKind() == SqlWindowFunctionKind::AVG) {
290  const auto count_zero = cgen_state_->llInt(int64_t(0));
291  cgen_state_->emitCall("agg_id", {aggregate_state_count, count_zero});
292  }
293  cgen_state_->ir_builder_.CreateBr(reset_state_false_bb);
294  cgen_state_->ir_builder_.SetInsertPoint(reset_state_false_bb);
296  return codegenWindowFunctionAggregateCalls(aggregate_state, co);
297 }
void codegenWindowFunctionStateInit(CodeGenerator *code_generator, const CompilationOptions &co, llvm::Value *aggregate_state)
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1502
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
static WindowFunctionContext * getActiveWindowFunctionContext(Executor *executor)
static const WindowProjectNodeContext * get(Executor *executor)
std::pair< llvm::BasicBlock *, llvm::Value * > codegenWindowResetStateControlFlow(CodeGenerator *code_generator, const CompilationOptions &co)
#define AUTOMATIC_IR_METADATA(CGENSTATE)
static const int NUM_EXECUTION_DEVICES
#define CHECK(condition)
Definition: Logger.h:291
std::vector< llvm::Value * > createPtrWithHoistedMemoryAddr(CgenState *cgen_state, CodeGenerator *code_generator, CompilationOptions const &co, llvm::ConstantInt *ptr_int_val, llvm::Type *type, size_t num_devices_to_hoist_literal)
llvm::Value * codegenWindowFunctionAggregateCalls(llvm::Value *aggregate_state, const CompilationOptions &co)

+ Here is the call graph for this function:

llvm::Value * Executor::codegenWindowFunctionAggregateCalls ( llvm::Value *  aggregate_state,
const CompilationOptions co 
)
private

Definition at line 1129 of file WindowFunctionIR.cpp.

References run_benchmark_import::args, AUTOMATIC_IR_METADATA, AVG, CHECK, CHECK_EQ, CodeGenerator::codegen(), CodeGenerator::codegenCastBetweenIntTypes(), COUNT, COUNT_IF, CodegenUtil::createPtrWithHoistedMemoryAddr(), WindowFrameBoundFuncArgs::current_partition_start_offset_lv, anonymous_namespace{WindowFunctionIR.cpp}::get_adjusted_window_type_info(), anonymous_namespace{WindowFunctionIR.cpp}::get_col_type_name_by_size(), get_int_type(), anonymous_namespace{WindowFunctionIR.cpp}::get_null_value_by_size(), anonymous_namespace{WindowFunctionIR.cpp}::get_window_agg_name(), WindowProjectNodeContext::getActiveWindowFunctionContext(), inline_fixed_encoding_null_val(), kDATE, kDOUBLE, kENCODING_DATE_IN_DAYS, kENCODING_FIXED, kFLOAT, kSUM_IF, kTIME, kTIMESTAMP, kTINYINT, MAX, MIN, WindowFunctionContext::NUM_EXECUTION_DEVICES, CodeGenerator::posArg(), SUM, SUM_IF, and window_function_conditional_aggregate().

1130  {
1132  const auto window_func_context =
1134  const auto window_func = window_func_context->getWindowFunction();
1135  const auto window_func_ti = get_adjusted_window_type_info(window_func);
1136  const auto window_func_null_val =
1137  window_func_ti.is_fp()
1138  ? cgen_state_->inlineFpNull(window_func_ti)
1139  : cgen_state_->castToTypeIn(cgen_state_->inlineIntNull(window_func_ti), 64);
1140  if (window_func_context->elementCount() == 0) {
1141  // we do not need to generate a code for an empty input table
1142  return window_func->getKind() == SqlWindowFunctionKind::AVG
1143  ? cgen_state_->inlineFpNull(SQLTypeInfo(SQLTypes::kDOUBLE))
1144  : window_func_null_val;
1145  }
1146  const auto& args = window_func->getArgs();
1147  CodeGenerator code_generator(this);
1148  if (window_func_context->needsToBuildAggregateTree()) {
1149  // compute an aggregated value for each row of the window frame by using segment
1150  // tree when constructing a window context, we build a necessary segment tree (so
1151  // called `aggregate tree`) to query the aggregated value of the specific window
1152  // frame
1153  const auto pi64_type =
1154  llvm::PointerType::get(get_int_type(64, cgen_state_->context_), 0);
1155  const auto ppi64_type = llvm::PointerType::get(
1156  llvm::PointerType::get(get_int_type(64, cgen_state_->context_), 0), 0);
1157 
1158  auto [frame_start_bound_expr_lv, frame_end_bound_expr_lv] =
1159  codegenFrameBoundRange(window_func, code_generator, co);
1160 
1161  // compute aggregated value over the computed frame range
1162  auto current_row_pos_lv = code_generator.posArg(nullptr);
1163  auto partition_index_lv = codegenCurrentPartitionIndex(
1164  window_func_context, &code_generator, co, current_row_pos_lv);
1165 
1166  // ordering column buffer
1167  const auto target_col_ti = args.front()->get_type_info();
1168  const auto target_col_size = target_col_ti.get_size();
1169  const auto col_type_name =
1170  get_col_type_name_by_size(target_col_size, target_col_ti.is_fp());
1171 
1172  const auto partition_buf_ptrs = codegenLoadPartitionBuffers(
1173  window_func_context, &code_generator, co, partition_index_lv);
1174 
1175  auto [order_col_type_name, order_key_buf_ptr_lv] =
1176  codegenLoadOrderKeyBufPtr(window_func_context, &code_generator, co);
1177 
1178  // null value of the ordering column
1179  const auto order_key_buf_ti =
1180  window_func_context->getOrderKeyColumnBufferTypes().front();
1181  auto const ordering_spec = window_func->getCollation().front();
1182  llvm::Value* order_key_col_null_val_lv{nullptr};
1183  switch (order_key_buf_ti.get_type()) {
1184  case kDATE:
1185  case kTIMESTAMP:
1186  case kTIME: {
1187  if (order_key_buf_ti.get_compression() == kENCODING_FIXED ||
1188  order_key_buf_ti.get_compression() == kENCODING_DATE_IN_DAYS) {
1189  auto null_val = inline_fixed_encoding_null_val(order_key_buf_ti);
1190  order_key_col_null_val_lv = cgen_state_->llInt((int32_t)null_val);
1191  break;
1192  }
1193  }
1194  default: {
1195  order_key_col_null_val_lv = cgen_state_->inlineNull(order_key_buf_ti);
1196  break;
1197  }
1198  }
1199 
1200  auto [null_start_pos_lv, null_end_pos_lv] = codegenFrameNullRange(
1201  window_func_context, &code_generator, co, partition_index_lv);
1202  auto nulls_first_lv = cgen_state_->llBool(ordering_spec.nulls_first);
1203 
1205  frame_start_bound_expr_lv,
1206  frame_end_bound_expr_lv,
1207  current_row_pos_lv,
1208  nullptr,
1209  partition_buf_ptrs.current_partition_start_offset_lv,
1210  cgen_state_->llInt((int64_t)0),
1211  cgen_state_->llInt((int64_t)1),
1212  partition_buf_ptrs.num_elem_current_partition_lv,
1213  order_key_buf_ptr_lv,
1214  "",
1215  partition_buf_ptrs.target_partition_rowid_ptr_lv,
1216  partition_buf_ptrs.target_partition_sorted_rowid_ptr_lv,
1217  nulls_first_lv,
1218  null_start_pos_lv,
1219  null_end_pos_lv};
1220  auto [frame_start_bound_lv, frame_end_bound_lv] =
1221  codegenWindowFrameBounds(window_func_context,
1222  window_func->getFrameStartBound(),
1223  window_func->getFrameEndBound(),
1224  order_key_col_null_val_lv,
1226  code_generator);
1227 
1228  // codegen to send a query with frame bound to aggregate tree searcher
1229  llvm::ConstantInt* aggregation_trees_lv{nullptr};
1230  llvm::Value* invalid_val_lv{nullptr};
1231  llvm::Value* null_val_lv{nullptr};
1232  std::string aggregation_tree_search_func_name{"search_"};
1233  std::string aggregation_tree_getter_func_name{"get_"};
1234 
1235  // prepare null values and aggregate_tree getter and searcher depending on
1236  // a type of the ordering column
1237  auto agg_expr_ti = args.front()->get_type_info();
1238  if (agg_expr_ti.is_fp()) {
1239  if (window_func->getKind() == SqlWindowFunctionKind::MIN) {
1240  invalid_val_lv = cgen_state_->llFp(std::numeric_limits<double>::max());
1241  } else if (window_func->getKind() == SqlWindowFunctionKind::MAX) {
1242  invalid_val_lv = cgen_state_->llFp(std::numeric_limits<double>::lowest());
1243  } else {
1244  invalid_val_lv = cgen_state_->llFp((double)0);
1245  }
1246  null_val_lv = cgen_state_->inlineFpNull(SQLTypeInfo(kDOUBLE));
1247  aggregation_tree_search_func_name += "double";
1248  aggregation_tree_getter_func_name += "double";
1249  } else {
1250  if (window_func->getKind() == SqlWindowFunctionKind::MIN) {
1251  invalid_val_lv = cgen_state_->llInt(std::numeric_limits<int64_t>::max());
1252  } else if (window_func->getKind() == SqlWindowFunctionKind::MAX) {
1253  invalid_val_lv = cgen_state_->llInt(std::numeric_limits<int64_t>::lowest());
1254  } else {
1255  invalid_val_lv = cgen_state_->llInt((int64_t)0);
1256  }
1257  null_val_lv = cgen_state_->llInt(inline_int_null_value<int64_t>());
1258  aggregation_tree_search_func_name += "int64_t";
1259  aggregation_tree_getter_func_name += "integer";
1260  }
1261 
1262  // derived aggregation has a different code path
1263  if (window_func->getKind() == SqlWindowFunctionKind::AVG) {
1264  aggregation_tree_search_func_name += "_derived";
1265  aggregation_tree_getter_func_name += "_derived";
1266  }
1267 
1268  // get a buffer holding aggregate trees for each partition
1269  if (agg_expr_ti.is_fp()) {
1270  if (window_func->getKind() == SqlWindowFunctionKind::AVG) {
1271  aggregation_trees_lv = cgen_state_->llInt(reinterpret_cast<int64_t>(
1272  window_func_context->getDerivedAggregationTreesForDoubleTypeWindowExpr()));
1273  } else {
1274  aggregation_trees_lv = cgen_state_->llInt(reinterpret_cast<int64_t>(
1275  window_func_context->getAggregationTreesForDoubleTypeWindowExpr()));
1276  }
1277  } else {
1278  if (window_func->getKind() == SqlWindowFunctionKind::AVG) {
1279  aggregation_trees_lv = cgen_state_->llInt(reinterpret_cast<int64_t>(
1280  window_func_context->getDerivedAggregationTreesForIntegerTypeWindowExpr()));
1281  } else {
1282  aggregation_trees_lv = cgen_state_->llInt(reinterpret_cast<int64_t>(
1283  window_func_context->getAggregationTreesForIntegerTypeWindowExpr()));
1284  }
1285  }
1286 
1287  CHECK(aggregation_trees_lv);
1288  CHECK(invalid_val_lv);
1289  aggregation_tree_search_func_name += "_aggregation_tree";
1290  aggregation_tree_getter_func_name += "_aggregation_tree";
1291 
1292  // get the aggregate tree of the current partition from a window context
1293  auto aggregation_trees_ptr = CodegenUtil::createPtrWithHoistedMemoryAddr(
1294  cgen_state_.get(),
1295  &code_generator,
1296  co,
1297  aggregation_trees_lv,
1298  ppi64_type,
1300  .front();
1301  auto target_aggregation_tree_lv = cgen_state_->emitCall(
1302  aggregation_tree_getter_func_name, {aggregation_trees_ptr, partition_index_lv});
1303 
1304  // a depth of segment tree
1305  const auto tree_depth_buf = cgen_state_->llInt(
1306  reinterpret_cast<int64_t>(window_func_context->getAggregateTreeDepth()));
1307  const auto tree_depth_buf_ptr = CodegenUtil::createPtrWithHoistedMemoryAddr(
1308  cgen_state_.get(),
1309  &code_generator,
1310  co,
1311  tree_depth_buf,
1312  pi64_type,
1314  .front();
1315  const auto current_partition_tree_depth_buf_ptr = cgen_state_->ir_builder_.CreateGEP(
1316  get_int_type(64, cgen_state_->context_), tree_depth_buf_ptr, partition_index_lv);
1317  const auto current_partition_tree_depth_lv = cgen_state_->ir_builder_.CreateLoad(
1318  current_partition_tree_depth_buf_ptr->getType()->getPointerElementType(),
1319  current_partition_tree_depth_buf_ptr);
1320 
1321  // a fanout of the current partition's segment tree
1322  const auto aggregation_tree_fanout_lv = cgen_state_->llInt(
1323  static_cast<int64_t>(window_func_context->getAggregateTreeFanout()));
1324 
1325  // agg_type
1326  const auto agg_type_lv =
1327  cgen_state_->llInt(static_cast<int32_t>(window_func->getKind()));
1328 
1329  // send a query to the aggregate tree with the frame range:
1330  // `frame_start_bound_lv` ~ `frame_end_bound_lv`
1331  auto res_lv =
1332  cgen_state_->emitCall(aggregation_tree_search_func_name,
1333  {target_aggregation_tree_lv,
1334  frame_start_bound_lv,
1335  frame_end_bound_lv,
1336  current_partition_tree_depth_lv,
1337  aggregation_tree_fanout_lv,
1338  cgen_state_->llBool(agg_expr_ti.is_decimal()),
1339  cgen_state_->llInt((int64_t)agg_expr_ti.get_scale()),
1340  invalid_val_lv,
1341  null_val_lv,
1342  agg_type_lv});
1343 
1344  // handling returned null value if exists
1345  std::string null_handler_func_name{"handle_null_val_"};
1346  std::vector<llvm::Value*> null_handler_args{res_lv, null_val_lv};
1347 
1348  // determine null_handling function's name
1349  if (window_func->getKind() == SqlWindowFunctionKind::AVG) {
1350  // average aggregate function returns a value as a double
1351  // (and our search* function also returns a double)
1352  if (agg_expr_ti.is_fp()) {
1353  // fp type: double null value
1354  null_handler_func_name += "double_double";
1355  } else {
1356  // non-fp type: int64_t null type
1357  null_handler_func_name += "double_int64_t";
1358  }
1359  } else if (agg_expr_ti.is_fp()) {
1360  // fp type: double null value
1361  null_handler_func_name += "double_double";
1362  } else {
1363  // non-fp type: int64_t null type
1364  null_handler_func_name += "int64_t_int64_t";
1365  }
1366  null_handler_func_name += "_window_framing_agg";
1367 
1368  // prepare null_val
1369  if (window_func->getKind() == SqlWindowFunctionKind::COUNT) {
1370  if (agg_expr_ti.is_fp()) {
1371  null_handler_args.push_back(cgen_state_->llFp((double)0));
1372  } else {
1373  null_handler_args.push_back(cgen_state_->llInt((int64_t)0));
1374  }
1375  } else if (window_func->getKind() == SqlWindowFunctionKind::AVG) {
1376  null_handler_args.push_back(cgen_state_->inlineFpNull(SQLTypeInfo(kDOUBLE)));
1377  } else {
1378  null_handler_args.push_back(cgen_state_->castToTypeIn(window_func_null_val, 64));
1379  }
1380  res_lv = cgen_state_->emitCall(null_handler_func_name, null_handler_args);
1381 
1382  // when AGG_TYPE is double, we get a double type return value we expect an integer
1383  // type value for the count aggregation
1384  if (window_func->getKind() == SqlWindowFunctionKind::COUNT && agg_expr_ti.is_fp()) {
1385  return cgen_state_->ir_builder_.CreateFPToSI(
1386  res_lv, get_int_type(64, cgen_state_->context_));
1387  } else if (window_func->getKind() != SqlWindowFunctionKind::COUNT &&
1388  agg_expr_ti.is_date_in_days()) {
1389  // we need to decode the "encoded" date column value
1390  auto date_null_val = get_null_value_by_size(cgen_state_.get(), agg_expr_ti);
1391  if (date_null_val->getType()->getScalarSizeInBits() != 32) {
1392  date_null_val = cgen_state_->castToTypeIn(date_null_val, 32);
1393  }
1394  return cgen_state_->emitCall("fixed_width_date_decode",
1395  {res_lv, date_null_val, null_val_lv});
1396  }
1397  return res_lv;
1398  } else {
1399  auto agg_name = get_window_agg_name(window_func->getKind(), window_func_ti);
1400  Analyzer::Expr* arg_target_expr;
1401  std::vector<llvm::Value*> agg_func_args{aggregate_state};
1402  auto modified_window_func_null_val = window_func_null_val;
1403  if (args.empty() ||
1404  (window_func->getKind() == SqlWindowFunctionKind::COUNT &&
1405  dynamic_cast<Analyzer::Constant*>(args.front().get()) != nullptr)) {
1406  // a count aggregation without an expression: COUNT(1) or COUNT(*)
1407  agg_func_args.push_back(cgen_state_->llInt(int64_t(1)));
1408  } else {
1409  // we use #base_agg_func_name##_skip_val agg function
1410  // i.e.,int64_t agg_sum_skip_val(int64_t* agg, int64_t val, int64_t skip_val)
1411  arg_target_expr = args.front().get();
1412  const auto arg_lvs = code_generator.codegen(arg_target_expr, true, co);
1413  CHECK_EQ(arg_lvs.size(), size_t(1));
1414  // handling current row's value
1415  auto crt_val = arg_lvs.front();
1416  if ((window_func->getKind() == SqlWindowFunctionKind::SUM ||
1417  window_func->getKind() == SqlWindowFunctionKind::SUM_IF) &&
1418  !window_func_ti.is_fp()) {
1419  crt_val = code_generator.codegenCastBetweenIntTypes(
1420  arg_lvs.front(), args.front()->get_type_info(), window_func_ti, false);
1421  }
1422  agg_func_args.push_back(window_func_ti.get_type() == kFLOAT
1423  ? crt_val
1424  : cgen_state_->castToTypeIn(crt_val, 64));
1425  // handle null value and conditional value for conditional aggregates if necessary
1426  llvm::Value* cond_lv{nullptr};
1427  if (window_function_conditional_aggregate(window_func->getKind())) {
1428  switch (window_func->getKind()) {
1430  // COUNT_IF has a single condition expr which is always bool type
1431  modified_window_func_null_val = cgen_state_->castToTypeIn(
1432  cgen_state_->inlineNull(SQLTypeInfo(kTINYINT)), 64);
1433  break;
1435  // FP type input col uses its own null value depending on the type
1436  // otherwise (integer type input col), we use 8-byte type
1437  if (args.front()->get_type_info().is_integer()) {
1438  agg_func_args[1] = cgen_state_->castToTypeIn(agg_func_args[1], 64);
1439  // keep the null value but casting its type to 8-byte
1440  modified_window_func_null_val =
1441  cgen_state_->castToTypeIn(window_func_null_val, 64);
1442  }
1443  auto cond_expr_lv = code_generator.codegen(args[1].get(), true, co).front();
1444  cond_lv =
1446  }
1447  default:
1448  break;
1449  }
1450  }
1451  agg_name += "_skip_val";
1452  agg_func_args.push_back(modified_window_func_null_val);
1453  if (cond_lv) {
1454  agg_func_args.push_back(cond_lv);
1455  }
1456  }
1457  cgen_state_->emitCall(agg_name, agg_func_args);
1458  if (window_func->getKind() == SqlWindowFunctionKind::AVG) {
1460  &code_generator, co, agg_func_args[1], window_func_null_val);
1461  }
1462  return codegenAggregateWindowState(&code_generator, co, aggregate_state);
1463  }
1464 }
std::string get_col_type_name_by_size(const size_t size, const bool is_fp)
#define CHECK_EQ(x, y)
Definition: Logger.h:301
Definition: sqltypes.h:76
bool window_function_conditional_aggregate(const SqlWindowFunctionKind kind)
Definition: WindowContext.h:78
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1502
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
static WindowFunctionContext * getActiveWindowFunctionContext(Executor *executor)
std::string get_window_agg_name(const SqlWindowFunctionKind kind, const SQLTypeInfo &window_func_ti)
std::pair< llvm::Value *, llvm::Value * > codegenFrameNullRange(WindowFunctionContext *window_func_context, CodeGenerator *code_generator, const CompilationOptions &co, llvm::Value *partition_index_lv) const
std::pair< llvm::Value *, llvm::Value * > codegenWindowFrameBounds(WindowFunctionContext *window_func_context, const Analyzer::WindowFrame *frame_start_bound, const Analyzer::WindowFrame *frame_end_bound, llvm::Value *order_key_col_null_val_lv, WindowFrameBoundFuncArgs &args, CodeGenerator &code_generator)
#define AUTOMATIC_IR_METADATA(CGENSTATE)
static const int NUM_EXECUTION_DEVICES
llvm::Value * current_partition_start_offset_lv
Definition: sqltypes.h:80
void codegenWindowAvgEpilogue(CodeGenerator *code_generator, const CompilationOptions &co, llvm::Value *crt_val, llvm::Value *window_func_null_val)
llvm::Value * codegenConditionalAggregateCondValSelector(llvm::Value *cond_lv, SQLAgg const aggKind, CompilationOptions const &co) const
std::pair< llvm::Value *, llvm::Value * > codegenFrameBoundRange(const Analyzer::WindowFunction *window_func, CodeGenerator &code_generator, const CompilationOptions &co)
llvm::Value * codegenCurrentPartitionIndex(const WindowFunctionContext *window_func_context, CodeGenerator *code_generator, const CompilationOptions &co, llvm::Value *current_row_pos_lv)
#define CHECK(condition)
Definition: Logger.h:291
WindowPartitionBufferPtrs codegenLoadPartitionBuffers(WindowFunctionContext *window_func_context, CodeGenerator *code_generator, const CompilationOptions &co, llvm::Value *partition_index_lv) const
int64_t inline_fixed_encoding_null_val(const SQL_TYPE_INFO &ti)
std::pair< std::string, llvm::Value * > codegenLoadOrderKeyBufPtr(WindowFunctionContext *window_func_context, CodeGenerator *code_generator, const CompilationOptions &co) const
llvm::Value * get_null_value_by_size(CgenState *cgen_state, SQLTypeInfo col_ti)
std::vector< llvm::Value * > createPtrWithHoistedMemoryAddr(CgenState *cgen_state, CodeGenerator *code_generator, CompilationOptions const &co, llvm::ConstantInt *ptr_int_val, llvm::Type *type, size_t num_devices_to_hoist_literal)
llvm::Value * codegenAggregateWindowState(CodeGenerator *code_generator, const CompilationOptions &co, llvm::Value *aggregate_state)
SQLTypeInfo get_adjusted_window_type_info(const Analyzer::WindowFunction *window_func)

+ Here is the call graph for this function:

void Executor::codegenWindowFunctionStateInit ( CodeGenerator code_generator,
const CompilationOptions co,
llvm::Value *  aggregate_state 
)
private

Definition at line 339 of file WindowFunctionIR.cpp.

References AUTOMATIC_IR_METADATA, COUNT, COUNT_IF, anonymous_namespace{WindowFunctionIR.cpp}::get_adjusted_window_type_info(), get_int_type(), WindowProjectNodeContext::getActiveWindowFunctionContext(), kDOUBLE, and kFLOAT.

341  {
343  const auto window_func_context =
345  const auto window_func = window_func_context->getWindowFunction();
346  const auto window_func_ti = get_adjusted_window_type_info(window_func);
347  const auto window_func_null_val =
348  window_func_ti.is_fp()
349  ? cgen_state_->inlineFpNull(window_func_ti)
350  : cgen_state_->castToTypeIn(cgen_state_->inlineIntNull(window_func_ti), 64);
351  llvm::Value* window_func_init_val;
352  const auto window_func_kind = window_func_context->getWindowFunction()->getKind();
353  if (window_func_kind == SqlWindowFunctionKind::COUNT ||
354  window_func_kind == SqlWindowFunctionKind::COUNT_IF) {
355  switch (window_func_ti.get_type()) {
356  case kFLOAT: {
357  window_func_init_val = cgen_state_->llFp(float(0));
358  break;
359  }
360  case kDOUBLE: {
361  window_func_init_val = cgen_state_->llFp(double(0));
362  break;
363  }
364  default: {
365  window_func_init_val = cgen_state_->llInt(int64_t(0));
366  break;
367  }
368  }
369  } else {
370  window_func_init_val = window_func_null_val;
371  }
372  const auto pi32_type =
373  llvm::PointerType::get(get_int_type(32, cgen_state_->context_), 0);
374  switch (window_func_ti.get_type()) {
375  case kDOUBLE: {
376  cgen_state_->emitCall("agg_id_double", {aggregate_state, window_func_init_val});
377  break;
378  }
379  case kFLOAT: {
380  aggregate_state =
381  cgen_state_->ir_builder_.CreateBitCast(aggregate_state, pi32_type);
382  cgen_state_->emitCall("agg_id_float", {aggregate_state, window_func_init_val});
383  break;
384  }
385  default: {
386  cgen_state_->emitCall("agg_id", {aggregate_state, window_func_init_val});
387  break;
388  }
389  }
390 }
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1502
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
static WindowFunctionContext * getActiveWindowFunctionContext(Executor *executor)
#define AUTOMATIC_IR_METADATA(CGENSTATE)
SQLTypeInfo get_adjusted_window_type_info(const Analyzer::WindowFunction *window_func)

+ Here is the call graph for this function:

llvm::Value * Executor::codegenWindowNavigationFunctionOnFrame ( const CompilationOptions co)
private

Definition at line 392 of file WindowFunctionIR.cpp.

References run_benchmark_import::args, AUTOMATIC_IR_METADATA, CHECK, CodegenUtil::createPtrWithHoistedMemoryAddr(), WindowFrameBoundFuncArgs::current_partition_start_offset_lv, FIRST_VALUE_IN_FRAME, FORWARD_FILL, anonymous_namespace{WindowFunctionIR.cpp}::get_col_type_name_by_size(), get_fp_type(), get_int_type(), anonymous_namespace{WindowFunctionIR.cpp}::get_null_value_by_size(), anonymous_namespace{WindowFunctionIR.cpp}::get_null_value_by_size_with_encoding(), WindowProjectNodeContext::getActiveWindowFunctionContext(), kENCODING_DATE_IN_DAYS, kSecsPerDay, LAG_IN_FRAME, LAST_VALUE_IN_FRAME, LEAD_IN_FRAME, NTH_VALUE_IN_FRAME, WindowFunctionContext::NUM_EXECUTION_DEVICES, and UNREACHABLE.

393  {
395  const auto window_func_context =
397  const auto window_func = window_func_context->getWindowFunction();
398  const auto window_func_kind = window_func->getKind();
399  const auto& args = window_func->getArgs();
400  CHECK(args.size() >= 1 && args.size() <= 3);
401  CodeGenerator code_generator(this);
402 
403  const auto target_col_ti = args.front()->get_type_info();
404  const auto target_col_size = target_col_ti.get_size();
405  const auto target_col_type_name =
406  get_col_type_name_by_size(target_col_size, target_col_ti.is_fp());
407  const auto target_col_logical_type_name = get_col_type_name_by_size(
408  window_func->get_type_info().get_size(), window_func->get_type_info().is_fp());
409 
410  // when target_column is fixed encoded, we store the actual column value by
411  // considering it, but our resultset analyzer only considers the type without encoding
412  // scheme so we handle them separately
413  auto logical_null_val_lv =
414  get_null_value_by_size(cgen_state_.get(), window_func->get_type_info());
415  auto target_col_null_val_lv =
417  if (window_func_context->elementCount() == 0) {
418  // we do not need to generate a code for an empty input table
419  return target_col_null_val_lv;
420  }
421 
422  auto current_row_pos_lv = code_generator.posArg(nullptr);
423  auto partition_index_lv = codegenCurrentPartitionIndex(
424  window_func_context, &code_generator, co, current_row_pos_lv);
425 
426  // load window function input expression; target_column
427  size_t target_col_size_in_byte = target_col_size * 8;
428  llvm::Type* col_buf_ptr_type =
429  target_col_ti.is_fp()
430  ? get_fp_type(target_col_size_in_byte, cgen_state_->context_)
431  : get_int_type(target_col_size_in_byte, cgen_state_->context_);
432  auto col_buf_type = llvm::PointerType::get(col_buf_ptr_type, 0);
433  auto target_col_buf_ptr_lv = cgen_state_->llInt(reinterpret_cast<int64_t>(
434  window_func_context->getColumnBufferForWindowFunctionExpressions().front()));
435  auto target_col_buf_lv = CodegenUtil::createPtrWithHoistedMemoryAddr(
436  cgen_state_.get(),
437  &code_generator,
438  co,
439  target_col_buf_ptr_lv,
440  col_buf_type,
442  .front();
443 
444  // prepare various buffer ptrs related to the window partition
445  auto partition_buf_ptrs = codegenLoadPartitionBuffers(
446  window_func_context, &code_generator, co, partition_index_lv);
447 
448  // null value of the ordering column
449  const auto order_key_buf_ti =
450  window_func_context->getOrderKeyColumnBufferTypes().front();
451  auto const ordering_spec = window_func->getCollation().front();
452  auto order_key_col_null_val_lv =
453  get_null_value_by_size_with_encoding(cgen_state_.get(), order_key_buf_ti);
454 
455  // load ordering column
456  auto [order_col_type_name, order_key_buf_ptr_lv] =
457  codegenLoadOrderKeyBufPtr(window_func_context, &code_generator, co);
458 
459  // null range
460  auto [null_start_pos_lv, null_end_pos_lv] =
461  codegenFrameNullRange(window_func_context, &code_generator, co, partition_index_lv);
462 
463  // compute a row index of the current row w.r.t the window frame it belongs to
464  std::string row_idx_on_frame_func = "compute_";
465  row_idx_on_frame_func += order_col_type_name;
466  row_idx_on_frame_func += ordering_spec.is_desc ? "_greater_equal" : "_less_equal";
467  row_idx_on_frame_func += "_current_row_idx_in_frame";
468  auto int64_t_one_val_lv = cgen_state_->llInt((int64_t)1);
469  auto nulls_first_lv = cgen_state_->llBool(ordering_spec.nulls_first);
470  auto cur_row_idx_in_frame_lv =
471  cgen_state_->emitCall(row_idx_on_frame_func,
472  {partition_buf_ptrs.num_elem_current_partition_lv,
473  current_row_pos_lv,
474  order_key_buf_ptr_lv,
475  partition_buf_ptrs.target_partition_rowid_ptr_lv,
476  partition_buf_ptrs.target_partition_sorted_rowid_ptr_lv,
477  order_key_col_null_val_lv,
478  nulls_first_lv,
479  null_start_pos_lv,
480  null_end_pos_lv});
481 
482  if (window_func->isMissingValueFillingFunction()) {
483  // We classify both FORWARD_FILL and BACKWARD_FILL as window frame navigate function
484  // b/c they need to determine the current row index within a sorted partition
485  // (as we did for window frame navigation functions) to compute the correct and
486  // consistent resultset Otherwise, the query result may differ per execution due to
487  // missing table ordering Now we know the current row's index in the sorted
488  // partition (cur_row_idx_in_frame_lv), so we can return by calling the runtime
489  // function with the index we computed
490  std::string func_name = "fill_" + target_col_type_name + "_missing_value";
491 
492  llvm::Value* forward_fill_lv =
493  cgen_state_->llBool(window_func_kind == SqlWindowFunctionKind::FORWARD_FILL);
494  return cgen_state_->emitCall(func_name,
495  {cur_row_idx_in_frame_lv,
496  target_col_null_val_lv,
497  target_col_buf_lv,
498  partition_buf_ptrs.num_elem_current_partition_lv,
499  partition_buf_ptrs.target_partition_rowid_ptr_lv,
500  partition_buf_ptrs.target_partition_sorted_rowid_ptr_lv,
501  forward_fill_lv});
502  }
503 
504  // compute frame bound for the current row
505  auto [frame_start_bound_expr_lv, frame_end_bound_expr_lv] =
506  codegenFrameBoundRange(window_func, code_generator, co);
507 
508  // compute frame bound for the current row
509  auto const int64_t_zero_val_lv = cgen_state_->llInt((int64_t)0);
511  frame_start_bound_expr_lv,
512  frame_end_bound_expr_lv,
513  window_func->hasRangeModeFraming() ? current_row_pos_lv : cur_row_idx_in_frame_lv,
514  nullptr,
515  window_func->hasRangeModeFraming()
516  ? int64_t_zero_val_lv
517  : partition_buf_ptrs.current_partition_start_offset_lv,
518  int64_t_zero_val_lv,
519  int64_t_one_val_lv,
520  partition_buf_ptrs.num_elem_current_partition_lv,
521  order_key_buf_ptr_lv,
522  "",
523  partition_buf_ptrs.target_partition_rowid_ptr_lv,
524  partition_buf_ptrs.target_partition_sorted_rowid_ptr_lv,
525  nulls_first_lv,
526  null_start_pos_lv,
527  null_end_pos_lv};
528  auto [frame_start_bound_lv, frame_end_bound_lv] =
529  codegenWindowFrameBounds(window_func_context,
530  window_func->getFrameStartBound(),
531  window_func->getFrameEndBound(),
532  order_key_col_null_val_lv,
534  code_generator);
535 
536  // compute the index of the current row in frame it belongs to
537  llvm::Value* modified_cur_row_idx_in_frame_lv{nullptr};
538  llvm::Value* offset_lv{nullptr};
539  switch (window_func_kind) {
541  offset_lv = cgen_state_->castToTypeIn(
542  code_generator.codegen(args[1].get(), true, co)[0], 64);
543  modified_cur_row_idx_in_frame_lv =
544  cgen_state_->ir_builder_.CreateSub(cur_row_idx_in_frame_lv, offset_lv);
545  break;
547  offset_lv = cgen_state_->castToTypeIn(
548  code_generator.codegen(args[1].get(), true, co)[0], 64);
549  modified_cur_row_idx_in_frame_lv =
550  cgen_state_->ir_builder_.CreateAdd(cur_row_idx_in_frame_lv, offset_lv);
551  break;
553  modified_cur_row_idx_in_frame_lv = frame_start_bound_lv;
554  break;
556  modified_cur_row_idx_in_frame_lv = frame_end_bound_lv;
557  break;
559  offset_lv = cgen_state_->castToTypeIn(
560  code_generator.codegen(args[1].get(), true, co)[0], 64);
561  auto candidate_offset_lv =
562  cgen_state_->ir_builder_.CreateAdd(frame_start_bound_lv, offset_lv);
563  auto out_of_frame_bound_lv =
564  cgen_state_->ir_builder_.CreateICmpSGT(candidate_offset_lv, frame_end_bound_lv);
565  modified_cur_row_idx_in_frame_lv = cgen_state_->ir_builder_.CreateSelect(
566  out_of_frame_bound_lv, cgen_state_->llInt((int64_t)-1), candidate_offset_lv);
567  break;
568  }
569  default:
570  UNREACHABLE() << "Unsupported window function to navigate a window frame.";
571  }
572  CHECK(modified_cur_row_idx_in_frame_lv);
573 
574  // get the target column value in the frame w.r.t the offset
575  std::string target_func_name = "get_";
576  target_func_name += target_col_type_name + "_value_";
577  target_func_name += target_col_logical_type_name + "_type_";
578  target_func_name += "in_frame";
579  auto res_lv =
580  cgen_state_->emitCall(target_func_name,
581  {modified_cur_row_idx_in_frame_lv,
582  frame_start_bound_lv,
583  frame_end_bound_lv,
584  target_col_buf_lv,
585  partition_buf_ptrs.target_partition_rowid_ptr_lv,
586  partition_buf_ptrs.target_partition_sorted_rowid_ptr_lv,
587  logical_null_val_lv,
588  target_col_null_val_lv});
589  if (target_col_ti.get_compression() == kENCODING_DATE_IN_DAYS) {
590  res_lv = cgen_state_->emitCall(
591  "encode_date",
592  {res_lv, logical_null_val_lv, cgen_state_->llInt((int64_t)kSecsPerDay)});
593  }
594  CHECK(res_lv);
595  return res_lv;
596 }
std::string get_col_type_name_by_size(const size_t size, const bool is_fp)
static constexpr int64_t kSecsPerDay
#define UNREACHABLE()
Definition: Logger.h:338
llvm::Type * get_fp_type(const int width, llvm::LLVMContext &context)
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1502
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
static WindowFunctionContext * getActiveWindowFunctionContext(Executor *executor)
std::pair< llvm::Value *, llvm::Value * > codegenFrameNullRange(WindowFunctionContext *window_func_context, CodeGenerator *code_generator, const CompilationOptions &co, llvm::Value *partition_index_lv) const
std::pair< llvm::Value *, llvm::Value * > codegenWindowFrameBounds(WindowFunctionContext *window_func_context, const Analyzer::WindowFrame *frame_start_bound, const Analyzer::WindowFrame *frame_end_bound, llvm::Value *order_key_col_null_val_lv, WindowFrameBoundFuncArgs &args, CodeGenerator &code_generator)
#define AUTOMATIC_IR_METADATA(CGENSTATE)
static const int NUM_EXECUTION_DEVICES
llvm::Value * current_partition_start_offset_lv
std::pair< llvm::Value *, llvm::Value * > codegenFrameBoundRange(const Analyzer::WindowFunction *window_func, CodeGenerator &code_generator, const CompilationOptions &co)
std::unique_ptr< llvm::LLVMContext > context_
Definition: Execute.h:1477
llvm::Value * codegenCurrentPartitionIndex(const WindowFunctionContext *window_func_context, CodeGenerator *code_generator, const CompilationOptions &co, llvm::Value *current_row_pos_lv)
#define CHECK(condition)
Definition: Logger.h:291
WindowPartitionBufferPtrs codegenLoadPartitionBuffers(WindowFunctionContext *window_func_context, CodeGenerator *code_generator, const CompilationOptions &co, llvm::Value *partition_index_lv) const
std::pair< std::string, llvm::Value * > codegenLoadOrderKeyBufPtr(WindowFunctionContext *window_func_context, CodeGenerator *code_generator, const CompilationOptions &co) const
llvm::Value * get_null_value_by_size(CgenState *cgen_state, SQLTypeInfo col_ti)
std::vector< llvm::Value * > createPtrWithHoistedMemoryAddr(CgenState *cgen_state, CodeGenerator *code_generator, CompilationOptions const &co, llvm::ConstantInt *ptr_int_val, llvm::Type *type, size_t num_devices_to_hoist_literal)
llvm::Value * get_null_value_by_size_with_encoding(CgenState *cgen_state, SQLTypeInfo col_ti)

+ Here is the call graph for this function:

std::pair< llvm::BasicBlock *, llvm::Value * > Executor::codegenWindowResetStateControlFlow ( CodeGenerator code_generator,
const CompilationOptions co 
)
private

Definition at line 299 of file WindowFunctionIR.cpp.

References AUTOMATIC_IR_METADATA, CodegenUtil::createPtrWithHoistedMemoryAddr(), get_int_type(), WindowProjectNodeContext::getActiveWindowFunctionContext(), WindowFunctionContext::NUM_EXECUTION_DEVICES, CodeGenerator::posArg(), and CodeGenerator::toBool().

301  {
303  const auto window_func_context =
305  auto aggregate_state = aggregateWindowStatePtr(code_generator, co);
306  const auto bitset = cgen_state_->llInt(
307  reinterpret_cast<const int64_t>(window_func_context->partitionStart()));
308  const auto bitset_lv =
310  cgen_state_.get(),
311  code_generator,
312  co,
313  bitset,
314  llvm::PointerType::get(get_int_type(8, cgen_state_->context_), 0),
316  .front();
317  const auto min_val = cgen_state_->llInt(int64_t(0));
318  const auto max_val = cgen_state_->llInt(window_func_context->elementCount() - 1);
319  const auto null_val = cgen_state_->llInt(inline_int_null_value<int64_t>());
320  const auto null_bool_val = cgen_state_->llInt<int8_t>(inline_int_null_value<int8_t>());
321  const auto reset_state =
322  code_generator->toBool(cgen_state_->emitCall("bit_is_set",
323  {bitset_lv,
324  code_generator->posArg(nullptr),
325  min_val,
326  max_val,
327  null_val,
328  null_bool_val}));
329  const auto reset_state_true_bb = llvm::BasicBlock::Create(
330  cgen_state_->context_, "reset_state.true", cgen_state_->current_func_);
331  const auto reset_state_false_bb = llvm::BasicBlock::Create(
332  cgen_state_->context_, "reset_state.false", cgen_state_->current_func_);
333  cgen_state_->ir_builder_.CreateCondBr(
334  reset_state, reset_state_true_bb, reset_state_false_bb);
335  cgen_state_->ir_builder_.SetInsertPoint(reset_state_true_bb);
336  return std::make_pair(reset_state_false_bb, aggregate_state);
337 }
llvm::Value * posArg(const Analyzer::Expr *) const
Definition: ColumnIR.cpp:590
llvm::Value * aggregateWindowStatePtr(CodeGenerator *code_generator, const CompilationOptions &co)
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1502
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
static WindowFunctionContext * getActiveWindowFunctionContext(Executor *executor)
#define AUTOMATIC_IR_METADATA(CGENSTATE)
static const int NUM_EXECUTION_DEVICES
llvm::Value * toBool(llvm::Value *)
Definition: LogicalIR.cpp:344
std::vector< llvm::Value * > createPtrWithHoistedMemoryAddr(CgenState *cgen_state, CodeGenerator *code_generator, CompilationOptions const &co, llvm::ConstantInt *ptr_int_val, llvm::Type *type, size_t num_devices_to_hoist_literal)

+ Here is the call graph for this function:

ResultSetPtr Executor::collectAllDeviceResults ( SharedKernelContext shared_context,
const RelAlgExecutionUnit ra_exe_unit,
const QueryMemoryDescriptor query_mem_desc,
const ExecutorDeviceType  device_type,
std::shared_ptr< RowSetMemoryOwner row_set_mem_owner 
)
private

Definition at line 2715 of file Execute.cpp.

References anonymous_namespace{Execute.cpp}::build_row_for_empty_input(), collectAllDeviceShardedTopResults(), DEBUG_TIMER, SharedKernelContext::getFragmentResults(), QueryMemoryDescriptor::getQueryDescriptionType(), GPU, heavyai::NonGroupedAggregate, reduceMultiDeviceResults(), reduceSpeculativeTopN(), GroupByAndAggregate::shard_count_for_top_groups(), RelAlgExecutionUnit::target_exprs, and use_speculative_top_n().

Referenced by executeWorkUnitImpl().

2720  {
2721  auto timer = DEBUG_TIMER(__func__);
2722  auto& result_per_device = shared_context.getFragmentResults();
2723  if (result_per_device.empty() && query_mem_desc.getQueryDescriptionType() ==
2726  ra_exe_unit.target_exprs, query_mem_desc, device_type);
2727  }
2728  if (use_speculative_top_n(ra_exe_unit, query_mem_desc)) {
2729  try {
2730  return reduceSpeculativeTopN(
2731  ra_exe_unit, result_per_device, row_set_mem_owner, query_mem_desc);
2732  } catch (const std::bad_alloc&) {
2733  throw SpeculativeTopNFailed("Failed during multi-device reduction.");
2734  }
2735  }
2736  const auto shard_count =
2737  device_type == ExecutorDeviceType::GPU
2739  : 0;
2740 
2741  if (shard_count && !result_per_device.empty()) {
2742  return collectAllDeviceShardedTopResults(shared_context, ra_exe_unit, device_type);
2743  }
2744  return reduceMultiDeviceResults(
2745  ra_exe_unit, result_per_device, row_set_mem_owner, query_mem_desc);
2746 }
std::vector< Analyzer::Expr * > target_exprs
NonGroupedAggregate
Definition: enums.h:58
bool use_speculative_top_n(const RelAlgExecutionUnit &ra_exe_unit, const QueryMemoryDescriptor &query_mem_desc)
ResultSetPtr collectAllDeviceShardedTopResults(SharedKernelContext &shared_context, const RelAlgExecutionUnit &ra_exe_unit, const ExecutorDeviceType device_type) const
Definition: Execute.cpp:2830
ResultSetPtr reduceSpeculativeTopN(const RelAlgExecutionUnit &, std::vector< std::pair< ResultSetPtr, std::vector< size_t >>> &all_fragment_results, std::shared_ptr< RowSetMemoryOwner >, const QueryMemoryDescriptor &) const
Definition: Execute.cpp:1724
ResultSetPtr reduceMultiDeviceResults(const RelAlgExecutionUnit &, std::vector< std::pair< ResultSetPtr, std::vector< size_t >>> &all_fragment_results, std::shared_ptr< RowSetMemoryOwner >, const QueryMemoryDescriptor &) const
Definition: Execute.cpp:1589
QueryDescriptionType getQueryDescriptionType() const
static size_t shard_count_for_top_groups(const RelAlgExecutionUnit &ra_exe_unit)
ResultSetPtr build_row_for_empty_input(const std::vector< Analyzer::Expr * > &target_exprs_in, const QueryMemoryDescriptor &query_mem_desc, const ExecutorDeviceType device_type)
Definition: Execute.cpp:2673
std::vector< std::pair< ResultSetPtr, std::vector< size_t > > > & getFragmentResults()
#define DEBUG_TIMER(name)
Definition: Logger.h:412

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

ResultSetPtr Executor::collectAllDeviceShardedTopResults ( SharedKernelContext shared_context,
const RelAlgExecutionUnit ra_exe_unit,
const ExecutorDeviceType  device_type 
) const
private