17 #ifndef QUERYENGINE_EXECUTE_H
18 #define QUERYENGINE_EXECUTE_H
43 #include "../Logger/Logger.h"
44 #include "../Shared/SystemParameters.h"
45 #include "../Shared/mapd_shared_mutex.h"
46 #include "../Shared/measure.h"
47 #include "../Shared/thread_count.h"
48 #include "../Shared/toString.h"
49 #include "../StringDictionary/LruCache.hpp"
50 #include "../StringDictionary/StringDictionary.h"
51 #include "../StringDictionary/StringDictionaryProxy.h"
55 #include <llvm/IR/Function.h>
56 #include <llvm/IR/Value.h>
57 #include <llvm/Linker/Linker.h>
58 #include <llvm/Transforms/Utils/ValueMapper.h>
59 #include <rapidjson/document.h>
63 #include <condition_variable>
72 #include <unordered_map>
73 #include <unordered_set>
88 const std::string& query_str,
89 const std::chrono::time_point<std::chrono::system_clock> submitted_time)
97 const size_t executor_id,
98 const std::string& query_str,
99 const std::chrono::time_point<std::chrono::system_clock> submitted_time)
107 const size_t executor_id,
108 const std::string& query_str,
109 const std::chrono::time_point<std::chrono::system_clock> submitted_time,
145 std::map<const QuerySessionId, std::map<std::string, QuerySessionStatus>>;
163 for (
auto& arg : func->args()) {
164 if (arg.getName() ==
name) {
199 if (!cast_expr || cast_expr->get_optype() !=
kCAST) {
202 return cast_expr->get_operand();
225 const int table_id) {
227 const auto it = temporary_tables->find(table_id);
228 CHECK(it != temporary_tables->end());
236 CHECK(cd || temporary_tables);
243 return temp->getColType(col_id);
246 template <
typename PtrTy>
248 std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner,
251 std::vector<SQLTypeInfo> col_types;
252 for (
size_t i = 0;
i < result->colCount(); ++
i) {
255 return new ColumnarResults(row_set_mem_owner, *result, number, col_types);
260 const std::vector<std::shared_ptr<Analyzer::Expr>>& exprs) {
261 std::vector<Analyzer::Expr*> exprs_not_owned;
262 for (
const auto& expr : exprs) {
263 exprs_not_owned.push_back(expr.get());
265 return exprs_not_owned;
269 std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner,
280 : std::runtime_error(
"Retry query compilation with no GPU lazy fetch.") {}
286 : std::runtime_error(
"Retry query compilation with new scan limit.")
300 : std::runtime_error(
"Retry query compilation with no compaction.") {}
310 ParseIRError(
const std::string message) : std::runtime_error(message) {}
316 : std::runtime_error(
317 "NONE ENCODED String types are not supported as input result set.") {}
331 const std::shared_ptr<ResultSet>& rs);
333 std::vector<TargetValue>
getEntryAt(
const size_t index)
const override;
338 return rs_->getRowSetMemOwner()->getLiteralStringDictProxy();
353 std::function<void(const UpdateLogForFragment&, ColumnToFragmentsMap&)>;
360 std::shared_ptr<ResultSet>
rs_;
370 static_assert(
sizeof(
float) == 4 &&
sizeof(
double) == 8,
371 "Host hardware not supported, unexpected size of float / double.");
372 static_assert(
sizeof(time_t) == 8,
373 "Host hardware not supported, 64-bit time support is required.");
380 const size_t block_size_x,
381 const size_t grid_size_x,
382 const size_t max_gpu_slab_size,
383 const std::string& debug_dir,
384 const std::string& debug_file);
388 const std::string& debug_dir =
"",
389 const std::string& debug_file =
"",
393 mapd_unique_lock<mapd_shared_mutex> flush_lock(
412 const bool with_generation)
const {
419 const std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner,
420 const bool with_generation)
const;
427 return cgen_state_->contains_left_deep_outer_join_;
451 const std::vector<Analyzer::Expr*>& target_exprs)
const;
455 void interrupt(
const std::string& query_session =
"",
456 const std::string& interrupt_session =
"");
461 const unsigned pending_query_check_freq)
const;
473 const std::vector<InputTableInfo>&,
479 const bool has_cardinality_estimation,
483 const std::vector<InputTableInfo>& table_infos,
487 std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner,
519 llvm::Value* window_func_null_val,
520 llvm::Value* multiplicity_lv);
531 <<
"No CudaMgr instantiated, unable to check device architecture";
532 return cuda_mgr->isArchPascalOrLater();
542 std::function<void(ResultSetPtr, const Fragmenter_Namespace::FragmentInfo&)>;
555 const std::set<int>& fragment_ids);
565 const std::vector<InputTableInfo>& table_infos,
579 std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner);
595 const std::vector<InputTableInfo>& table_infos,
598 const bool allow_single_frag_table_opt,
599 const size_t context_count,
603 std::unordered_set<int>& available_gpus,
604 int& available_cpus);
610 template <
typename THREAD_POOL>
612 std::vector<std::unique_ptr<ExecutionKernel>>&& kernels);
617 const size_t table_idx,
618 const size_t outer_frag_idx,
619 std::map<int, const TableFragments*>& selected_tables_fragments,
620 const std::unordered_map<int, const Analyzer::BinOper*>&
621 inner_table_id_to_join_condition);
625 const int inner_table_id,
626 const std::unordered_map<int, const Analyzer::BinOper*>&
627 inner_table_id_to_join_condition,
635 const std::map<int, const TableFragments*>&,
638 std::list<ChunkIter>&,
639 std::list<std::shared_ptr<Chunk_NS::Chunk>>&,
646 const std::map<int, const TableFragments*>&,
649 std::list<ChunkIter>&,
650 std::list<std::shared_ptr<Chunk_NS::Chunk>>&,
653 std::pair<std::vector<std::vector<int64_t>>, std::vector<std::vector<uint64_t>>>
656 const CartesianProduct<std::vector<std::vector<size_t>>>& frag_ids_crossjoin,
657 const std::vector<InputDescriptor>& input_descs,
658 const std::map<int, const TableFragments*>& all_tables_fragments);
661 std::vector<std::vector<size_t>>& selected_fragments_crossjoin,
662 std::vector<size_t>& local_col_to_frag_pos,
663 const std::list<std::shared_ptr<const InputColDescriptor>>& col_global_ids,
668 std::vector<std::vector<size_t>>& selected_fragments_crossjoin,
669 std::vector<size_t>& local_col_to_frag_pos,
670 const std::list<std::shared_ptr<const InputColDescriptor>>& col_global_ids,
675 const size_t scan_idx,
680 const bool hoist_literals,
683 std::vector<std::vector<const int8_t*>>& col_buffers,
684 const std::vector<size_t> outer_tab_frag_ids,
686 const std::vector<std::vector<int64_t>>& num_rows,
687 const std::vector<std::vector<uint64_t>>& frag_offsets,
690 const int outer_table_id,
692 const uint32_t start_rowid,
693 const uint32_t num_tables,
698 const bool hoist_literals,
700 const std::vector<Analyzer::Expr*>& target_exprs,
702 std::vector<std::vector<const int8_t*>>& col_buffers,
704 const std::vector<std::vector<int64_t>>& num_rows,
705 const std::vector<std::vector<uint64_t>>& frag_offsets,
708 const uint32_t start_rowid,
709 const uint32_t num_tables,
715 const int64_t agg_init_val,
716 const int8_t out_byte_width,
717 const int64_t* out_vec,
718 const size_t out_vec_sz,
719 const bool is_group_by,
720 const bool float_argument_input);
723 std::shared_ptr<CompilationContext>,
731 const int device_id);
734 std::vector<std::pair<
ResultSetPtr, std::vector<size_t>>>& all_fragment_results,
735 std::shared_ptr<RowSetMemoryOwner>,
738 std::vector<std::pair<
ResultSetPtr, std::vector<size_t>>>& all_fragment_results,
739 std::shared_ptr<RowSetMemoryOwner>,
743 std::vector<std::pair<
ResultSetPtr, std::vector<size_t>>>& all_fragment_results,
744 std::shared_ptr<RowSetMemoryOwner>,
749 const bool allow_single_frag_table_opt,
750 const std::vector<InputTableInfo>&,
755 std::shared_ptr<RowSetMemoryOwner>,
757 const bool has_cardinality_estimation,
762 std::tuple<CompilationResult, std::unique_ptr<QueryMemoryDescriptor>>
compileWorkUnit(
763 const std::vector<InputTableInfo>& query_infos,
769 const bool allow_lazy_fetch,
770 std::shared_ptr<RowSetMemoryOwner>,
771 const size_t max_groups_buffer_entry_count,
772 const int8_t crt_min_byte_width,
773 const bool has_cardinality_estimation,
783 const std::vector<InputTableInfo>& query_infos,
787 std::function<llvm::Value*(const std::vector<llvm::Value*>&, llvm::Value*)>
789 const size_t level_idx,
797 const std::vector<InputTableInfo>& query_infos,
799 std::vector<std::string>& fail_reasons);
802 const size_t level_idx);
806 llvm::Function* query_func,
807 llvm::BasicBlock* entry_bb,
818 bool run_with_dynamic_watchdog,
819 bool run_with_allowing_runtime_interrupt,
821 const std::vector<InputTableInfo>& input_table_infos);
826 const std::vector<InputTableInfo>& query_infos);
834 const std::shared_ptr<Analyzer::BinOper>& qual_bin_oper,
835 const std::vector<InputTableInfo>& query_infos,
841 const std::vector<InputTableInfo>& query_infos,
848 const std::unordered_set<llvm::Function*>&,
853 std::unordered_set<llvm::Function*>&,
854 const bool no_inline,
868 const size_t col_width,
870 const bool translate_null_val,
871 const int64_t translated_null_val,
873 std::stack<llvm::BasicBlock*>&,
874 const bool thread_mem_shared);
881 std::tuple<RelAlgExecutionUnit, PlanState::DeletedColumnsMap>
addDeletedColumn(
891 const std::list<std::shared_ptr<Analyzer::Expr>>& simple_quals,
892 const std::vector<uint64_t>& frag_offsets,
893 const size_t frag_idx);
899 const std::vector<uint64_t>& frag_offsets,
900 const size_t frag_idx);
903 const std::unordered_set<PhysicalInput>& phys_inputs);
905 const std::unordered_set<PhysicalInput>& phys_inputs);
909 void setupCaching(
const std::unordered_set<PhysicalInput>& phys_inputs,
910 const std::unordered_set<int>& phys_table_ids);
918 mapd_shared_lock<mapd_shared_mutex>&
read_lock);
922 const std::string& query_str,
923 const std::chrono::time_point<std::chrono::system_clock> submitted,
924 const size_t executor_id,
926 mapd_unique_lock<mapd_shared_mutex>&
write_lock);
929 const std::chrono::time_point<std::chrono::system_clock> submitted,
930 mapd_unique_lock<mapd_shared_mutex>&
write_lock);
932 mapd_unique_lock<mapd_shared_mutex>&
write_lock);
934 mapd_shared_lock<mapd_shared_mutex>&
read_lock);
936 mapd_shared_lock<mapd_shared_mutex>&
read_lock);
939 const std::chrono::time_point<std::chrono::system_clock> submitted,
941 mapd_unique_lock<mapd_shared_mutex>&
write_lock);
944 const std::chrono::time_point<std::chrono::system_clock> submitted,
945 const size_t executor_id,
946 mapd_unique_lock<mapd_shared_mutex>&
write_lock);
949 mapd_shared_lock<mapd_shared_mutex>&
read_lock);
953 std::shared_ptr<const query_state::QueryState>& query_state);
957 const std::chrono::time_point<std::chrono::system_clock> submitted,
958 bool acquire_spin_lock);
960 std::shared_ptr<const query_state::QueryState>& query_state,
964 const std::chrono::time_point<std::chrono::system_clock> submitted,
968 const std::string& query_str,
969 const std::chrono::time_point<std::chrono::system_clock> submitted,
970 const size_t executor_id,
983 const std::unordered_map<int, CgenState::LiteralValues>& literals,
984 const int device_id);
986 static size_t align(
const size_t off_in,
const size_t alignment) {
988 if (off % alignment != 0) {
989 off += (alignment - off % alignment);
1144 std::string null_check_suffix{
"_nullable"};
1147 null_check_suffix +=
"_rhs";
1150 null_check_suffix +=
"_lhs";
1152 return null_check_suffix;
1166 const size_t cpu_count,
1167 const size_t gpu_count);
1173 #endif // QUERYENGINE_EXECUTE_H
void read_rt_udf_gpu_module(const std::string &udf_ir)
SQLTypeInfo getColumnType(const size_t col_idx) const
CudaMgr_Namespace::CudaMgr * getCudaMgr() const
const std::string debug_dir_
llvm::Value * translated_value
bool is_agg(const Analyzer::Expr *expr)
static mapd_shared_mutex executor_session_mutex_
AggregatedColRange computeColRangesCache(const std::unordered_set< PhysicalInput > &phys_inputs)
std::shared_ptr< HashJoin > buildCurrentLevelHashTable(const JoinCondition ¤t_level_join_conditions, RelAlgExecutionUnit &ra_exe_unit, const CompilationOptions &co, const std::vector< InputTableInfo > &query_infos, ColumnCacheMap &column_cache, std::vector< std::string > &fail_reasons)
void enableRuntimeQueryInterrupt(const double runtime_query_check_freq, const unsigned pending_query_check_freq) const
std::vector< std::unique_ptr< ExecutionKernel > > createKernels(SharedKernelContext &shared_context, const RelAlgExecutionUnit &ra_exe_unit, ColumnFetcher &column_fetcher, const std::vector< InputTableInfo > &table_infos, const ExecutionOptions &eo, const bool is_agg, const bool allow_single_frag_table_opt, const size_t context_count, const QueryCompilationDescriptor &query_comp_desc, const QueryMemoryDescriptor &query_mem_desc, RenderInfo *render_info, std::unordered_set< int > &available_gpus, int &available_cpus)
bool updateQuerySessionExecutorAssignment(const QuerySessionId &query_session, const std::chrono::time_point< std::chrono::system_clock > submitted, const size_t executor_id, mapd_unique_lock< mapd_shared_mutex > &write_lock)
static QuerySessionMap queries_session_map_
static mapd_shared_mutex execute_mutex_
FetchResult fetchUnionChunks(const ColumnFetcher &, const RelAlgExecutionUnit &ra_exe_unit, const int device_id, const Data_Namespace::MemoryLevel, const std::map< int, const TableFragments * > &, const FragmentsList &selected_fragments, const Catalog_Namespace::Catalog &, std::list< ChunkIter > &, std::list< std::shared_ptr< Chunk_NS::Chunk >> &, DeviceAllocator *device_allocator)
std::unique_ptr< llvm::TargetMachine > nvptx_target_machine_
CurrentQueryStatus attachExecutorToQuerySession(std::shared_ptr< const query_state::QueryState > &query_state)
QuerySessionStatus(const QuerySessionId &query_session, const std::string &query_str, const std::chrono::time_point< std::chrono::system_clock > submitted_time)
int64_t kernel_queue_time_ms_
size_t maxGpuSlabSize() const
ExecutorMutexHolder acquireExecuteMutex()
int64_t compilation_queue_time_ms_
CompilationRetryNoLazyFetch()
std::map< const ColumnDescriptor *, std::set< int >> ColumnToFragmentsMap
void invalidateRunningQuerySession(mapd_unique_lock< mapd_shared_mutex > &write_lock)
void checkPendingQueryStatus(const QuerySessionId &query_session)
static const int32_t ERR_INTERRUPTED
const Analyzer::Expr * remove_cast_to_int(const Analyzer::Expr *expr)
class for a per-database catalog. also includes metadata for the current database and the current use...
bool addToQuerySessionList(const QuerySessionId &query_session, const std::string &query_str, const std::chrono::time_point< std::chrono::system_clock > submitted, const size_t executor_id, const QuerySessionStatus::QueryStatus query_status, mapd_unique_lock< mapd_shared_mutex > &write_lock)
bool is_trivial_loop_join(const std::vector< InputTableInfo > &query_infos, const RelAlgExecutionUnit &ra_exe_unit)
FetchCacheAnchor(CgenState *cgen_state)
void codegenJoinLoops(const std::vector< JoinLoop > &join_loops, const RelAlgExecutionUnit &ra_exe_unit, GroupByAndAggregate &group_by_and_aggregate, llvm::Function *query_func, llvm::BasicBlock *entry_bb, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const ExecutionOptions &eo)
const std::chrono::time_point< std::chrono::system_clock > getQuerySubmittedTime()
const std::shared_ptr< RowSetMemoryOwner > getRowSetMemoryOwner() const
JoinHashTableOrError buildHashTableForQualifier(const std::shared_ptr< Analyzer::BinOper > &qual_bin_oper, const std::vector< InputTableInfo > &query_infos, const MemoryLevel memory_level, const HashType preferred_hash_type, ColumnCacheMap &column_cache, const QueryHint &query_hint)
StringDictionaryProxy * getLiteralDictionary() const override
QuerySessionStatus(const QuerySessionId &query_session, const size_t executor_id, const std::string &query_str, const std::chrono::time_point< std::chrono::system_clock > submitted_time, const QuerySessionStatus::QueryStatus &query_status)
std::vector< size_t > getTableFragmentIndices(const RelAlgExecutionUnit &ra_exe_unit, const ExecutorDeviceType device_type, const size_t table_idx, const size_t outer_frag_idx, std::map< int, const TableFragments * > &selected_tables_fragments, const std::unordered_map< int, const Analyzer::BinOper * > &inner_table_id_to_join_condition)
void clearQuerySessionStatus(const QuerySessionId &query_session, const std::chrono::time_point< std::chrono::system_clock > submitted, bool acquire_spin_lock)
ResultSetPtr executeTableFunction(const TableFunctionExecutionUnit exe_unit, const std::vector< InputTableInfo > &table_infos, const CompilationOptions &co, const ExecutionOptions &eo, const Catalog_Namespace::Catalog &cat)
Compiles and dispatches a table function; that is, a function that takes as input one or more columns...
void read_rt_udf_cpu_module(const std::string &udf_ir)
Fragmenter_Namespace::RowDataProvider RowDataProvider
Data_Namespace::DataMgr & getDataMgr() const
std::unordered_set< int > get_available_gpus(const Catalog_Namespace::Catalog &cat)
static const int max_gpu_count
std::map< const QuerySessionId, std::map< std::string, QuerySessionStatus >> QuerySessionMap
size_t const getFragmentIndex() const
static const size_t code_cache_size
void setQueryStatusAsRunning()
bool isArchPascalOrLater(const ExecutorDeviceType dt) const
std::ostream & operator<<(std::ostream &os, const SessionInfo &session_info)
static std::atomic_flag execute_spin_lock_
std::pair< QuerySessionId, std::string > CurrentQueryStatus
static mapd_shared_mutex executors_cache_mutex_
bool is_udf_module_present(bool cpu_only=false)
std::vector< ColumnLazyFetchInfo > getColLazyFetchInfo(const std::vector< Analyzer::Expr * > &target_exprs) const
static const size_t baseline_threshold
void registerActiveModule(void *module, const int device_id) const
std::tuple< RelAlgExecutionUnit, PlanState::DeletedColumnsMap > addDeletedColumn(const RelAlgExecutionUnit &ra_exe_unit, const CompilationOptions &co)
TableGenerations computeTableGenerations(std::unordered_set< int > phys_table_ids)
const SQLTypeInfo get_column_type(const int col_id, const int table_id, const ColumnDescriptor *cd, const TemporaryTables *temporary_tables)
llvm::Value * aggregateWindowStatePtr()
void read_udf_cpu_module(const std::string &udf_ir_filename)
Driver for running cleanup processes on a table. TableOptimizer provides functions for various cleanu...
SQLTypeInfo get_logical_type_info(const SQLTypeInfo &type_info)
static std::pair< int64_t, int32_t > reduceResults(const SQLAgg agg, const SQLTypeInfo &ti, const int64_t agg_init_val, const int8_t out_byte_width, const int64_t *out_vec, const size_t out_vec_sz, const bool is_group_by, const bool float_argument_input)
void read_udf_gpu_module(const std::string &udf_ir_filename)
std::vector< std::string > CodeCacheKey
const Analyzer::Expr * extract_cast_arg(const Analyzer::Expr *expr)
bool checkCurrentQuerySession(const std::string &candidate_query_session, mapd_shared_lock< mapd_shared_mutex > &read_lock)
const ColumnarResults * rows_to_columnar_results(std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner, const PtrTy &result, const int number)
const QuerySessionStatus::QueryStatus getQueryStatus()
static const int32_t ERR_GEOS
const std::string query_str_
static std::shared_ptr< Executor > getExecutor(const ExecutorId id, const std::string &debug_dir="", const std::string &debug_file="", const SystemParameters system_parameters=SystemParameters())
AggregatedColRange agg_col_range_cache_
std::shared_ptr< ResultSet > ResultSetPtr
static void * gpu_active_modules_[max_gpu_count]
std::unique_ptr< CgenState > cgen_state_
FetchResult fetchChunks(const ColumnFetcher &, const RelAlgExecutionUnit &ra_exe_unit, const int device_id, const Data_Namespace::MemoryLevel, const std::map< int, const TableFragments * > &, const FragmentsList &selected_fragments, const Catalog_Namespace::Catalog &, std::list< ChunkIter > &, std::list< std::shared_ptr< Chunk_NS::Chunk >> &, DeviceAllocator *device_allocator)
static const int32_t ERR_TOO_MANY_LITERALS
llvm::Value * original_value
ParseIRError(const std::string message)
std::vector< Analyzer::Expr * > get_exprs_not_owned(const std::vector< std::shared_ptr< Analyzer::Expr >> &exprs)
static uint32_t gpu_active_modules_device_mask_
HOST DEVICE SQLTypes get_type() const
void buildSelectedFragsMappingForUnion(std::vector< std::vector< size_t >> &selected_fragments_crossjoin, std::vector< size_t > &local_col_to_frag_pos, const std::list< std::shared_ptr< const InputColDescriptor >> &col_global_ids, const FragmentsList &selected_fragments, const RelAlgExecutionUnit &ra_exe_unit)
int deviceCount(const ExecutorDeviceType) const
llvm::Value * castToIntPtrTyIn(llvm::Value *val, const size_t bit_width)
size_t getNumBytesForFetchedRow(const std::set< int > &table_ids_to_fetch) const
CompilationRetryNoCompaction()
static std::mutex kernel_mutex_
unsigned numBlocksPerMP() const
Container for compilation results and assorted options for a single execution unit.
std::unique_ptr< WindowProjectNodeContext > window_project_node_context_owned_
std::vector< QuerySessionStatus > getQuerySessionInfo(const QuerySessionId &query_session, mapd_shared_lock< mapd_shared_mutex > &read_lock)
std::vector< FragmentsPerTable > FragmentsList
bool needFetchAllFragments(const InputColDescriptor &col_desc, const RelAlgExecutionUnit &ra_exe_unit, const FragmentsList &selected_fragments) const
const QuerySessionId query_session_
std::shared_ptr< HashJoin > hash_table
mapd_shared_mutex & getSessionLock()
#define LOG_IF(severity, condition)
static void clearMemory(const Data_Namespace::MemoryLevel memory_level)
Fragmenter_Namespace::TableInfo getTableInfo(const int table_id) const
std::shared_ptr< CompilationContext > getCodeFromCache(const CodeCacheKey &, const CodeCache &)
static const size_t high_scan_limit
CodeCache gpu_code_cache_
static const int32_t ERR_STRING_CONST_IN_RESULTSET
void codegenWindowAvgEpilogue(llvm::Value *crt_val, llvm::Value *window_func_null_val, llvm::Value *multiplicity_lv)
int32_t executePlanWithGroupBy(const RelAlgExecutionUnit &ra_exe_unit, const CompilationResult &, const bool hoist_literals, ResultSetPtr &results, const ExecutorDeviceType device_type, std::vector< std::vector< const int8_t * >> &col_buffers, const std::vector< size_t > outer_tab_frag_ids, QueryExecutionContext *, const std::vector< std::vector< int64_t >> &num_rows, const std::vector< std::vector< uint64_t >> &frag_offsets, Data_Namespace::DataMgr *, const int device_id, const int outer_table_id, const int64_t limit, const uint32_t start_rowid, const uint32_t num_tables, RenderInfo *render_info)
std::vector< int64_t > getJoinHashTablePtrs(const ExecutorDeviceType device_type, const int device_id)
void preloadFragOffsets(const std::vector< InputDescriptor > &input_descs, const std::vector< InputTableInfo > &query_infos)
QuerySessionId & getCurrentQuerySession(mapd_shared_lock< mapd_shared_mutex > &read_lock)
bool isFragmentFullyDeleted(const int table_id, const Fragmenter_Namespace::FragmentInfo &fragment)
void insertErrorCodeChecker(llvm::Function *query_func, bool hoist_literals)
bool checkIsQuerySessionEnrolled(const QuerySessionId &query_session, mapd_shared_lock< mapd_shared_mutex > &read_lock)
void setQueryStatus(const QuerySessionStatus::QueryStatus &status)
static const int32_t ERR_STREAMING_TOP_N_NOT_SUPPORTED_IN_RENDER_QUERY
const ExecutorId executor_id_
static const int32_t ERR_COLUMNAR_CONVERSION_NOT_SUPPORTED
const ResultSetPtr & get_temporary_table(const TemporaryTables *temporary_tables, const int table_id)
std::map< QuerySessionId, bool > InterruptFlagMap
const size_t max_gpu_slab_size_
ResultSetPtr reduceSpeculativeTopN(const RelAlgExecutionUnit &, std::vector< std::pair< ResultSetPtr, std::vector< size_t >>> &all_fragment_results, std::shared_ptr< RowSetMemoryOwner >, const QueryMemoryDescriptor &) const
ResultSetPtr collectAllDeviceResults(SharedKernelContext &shared_context, const RelAlgExecutionUnit &ra_exe_unit, const QueryMemoryDescriptor &query_mem_desc, const ExecutorDeviceType device_type, std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner)
const ColumnDescriptor * getPhysicalColumnDescriptor(const Analyzer::ColumnVar *, int) const
bool compileBody(const RelAlgExecutionUnit &ra_exe_unit, GroupByAndAggregate &group_by_and_aggregate, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const GpuSharedMemoryContext &gpu_smem_context={})
static const int32_t ERR_DIV_BY_ZERO
std::tuple< CompilationResult, std::unique_ptr< QueryMemoryDescriptor > > compileWorkUnit(const std::vector< InputTableInfo > &query_infos, const PlanState::DeletedColumnsMap &deleted_cols_map, const RelAlgExecutionUnit &ra_exe_unit, const CompilationOptions &co, const ExecutionOptions &eo, const CudaMgr_Namespace::CudaMgr *cuda_mgr, const bool allow_lazy_fetch, std::shared_ptr< RowSetMemoryOwner >, const size_t max_groups_buffer_entry_count, const int8_t crt_min_byte_width, const bool has_cardinality_estimation, ColumnCacheMap &column_cache, RenderInfo *render_info=nullptr)
llvm::Value * get_arg_by_name(llvm::Function *func, const std::string &name)
std::shared_ptr< CompilationContext > optimizeAndCodegenGPU(llvm::Function *, llvm::Function *, std::unordered_set< llvm::Function * > &, const bool no_inline, const CudaMgr_Namespace::CudaMgr *cuda_mgr, const CompilationOptions &)
std::vector< llvm::Value * > LLVMValueVector
size_t get_context_count(const ExecutorDeviceType device_type, const size_t cpu_count, const size_t gpu_count)
const ColumnDescriptor * get_column_descriptor_maybe(const int col_id, const int table_id, const Catalog_Namespace::Catalog &cat)
std::vector< TargetValue > getEntryAt(const size_t index) const override
int get_logical_size() const
std::unordered_map< int, std::vector< llvm::Value * > > fetch_cache_
decltype(FragmentInfoType::fragmentId) const getFragmentId() const
int64_t deviceCycles(int milliseconds) const
std::string generatePTX(const std::string &) const
std::mutex str_dict_mutex_
const Catalog_Namespace::Catalog * catalog_
#define INJECT_TIMER(DESC)
friend class PendingExecutionClosure
void setQuerySessionAsInterrupted(const QuerySessionId &query_session, mapd_unique_lock< mapd_shared_mutex > &write_lock)
static const int32_t ERR_OUT_OF_RENDER_MEM
std::unordered_map< TableId, const ColumnDescriptor * > DeletedColumnsMap
ResultSetPtr reduceMultiDeviceResults(const RelAlgExecutionUnit &, std::vector< std::pair< ResultSetPtr, std::vector< size_t >>> &all_fragment_results, std::shared_ptr< RowSetMemoryOwner >, const QueryMemoryDescriptor &) const
std::shared_timed_mutex mapd_shared_mutex
const std::string debug_file_
ResultSetPtr collectAllDeviceShardedTopResults(SharedKernelContext &shared_context, const RelAlgExecutionUnit &ra_exe_unit) const
CachedCardinality getCachedCardinality(const std::string &cache_key)
decltype(FragmentInfoType::physicalTableId) const getPhysicalTableId() const
std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner_
Used by Fragmenter classes to store info about each fragment - the fragment id and number of tuples(r...
void setColRangeCache(const AggregatedColRange &aggregated_col_range)
bool containsLeftDeepOuterJoin() const
void setCatalog(const Catalog_Namespace::Catalog *catalog)
StringDictionaryProxy * getStringDictionaryProxy(const int dict_id, const bool with_generation) const
std::shared_ptr< CompilationContext > optimizeAndCodegenCPU(llvm::Function *, llvm::Function *, const std::unordered_set< llvm::Function * > &, const CompilationOptions &)
static const int32_t ERR_OVERFLOW_OR_UNDERFLOW
bool is_timeinterval() const
static std::unordered_map< std::string, size_t > cardinality_cache_
static InterruptFlagMap queries_interrupt_flag_
FragmentInfoType const & getFragmentInfo() const
std::unique_ptr< PlanState > plan_state_
const ColumnDescriptor * getMetadataForColumn(int tableId, const std::string &colName) const
bool updateQuerySessionStatusWithLock(const QuerySessionId &query_session, const std::chrono::time_point< std::chrono::system_clock > submitted, const QuerySessionStatus::QueryStatus updated_query_status, mapd_unique_lock< mapd_shared_mutex > &write_lock)
static const int32_t ERR_OUT_OF_TIME
void initializeNVPTXBackend() const
mapd_unique_lock< mapd_shared_mutex > unique_lock
bool removeFromQuerySessionList(const QuerySessionId &query_session, const std::chrono::time_point< std::chrono::system_clock > submitted, mapd_unique_lock< mapd_shared_mutex > &write_lock)
const TableGeneration & getTableGeneration(const int table_id) const
llvm::Value * castToFP(llvm::Value *, SQLTypeInfo const &from_ti, SQLTypeInfo const &to_ti)
std::pair< bool, size_t > CachedCardinality
void interrupt(const std::string &query_session="", const std::string &interrupt_session="")
const ColumnDescriptor * getColumnDescriptor(const Analyzer::ColumnVar *) const
static const int32_t ERR_UNSUPPORTED_SELF_JOIN
const unsigned block_size_x_
const unsigned grid_size_x_
specifies the content in-memory of a row in the column metadata table
std::vector< TargetValue > getTranslatedEntryAt(const size_t index) const override
static const int32_t ERR_SINGLE_VALUE_FOUND_MULTIPLE_VALUES
static std::map< int, std::shared_ptr< Executor > > executors_
static const int32_t ERR_OUT_OF_GPU_MEM
const TemporaryTables * getTemporaryTables()
std::string get_null_check_suffix(const SQLTypeInfo &lhs_ti, const SQLTypeInfo &rhs_ti)
void executeWorkUnitPerFragment(const RelAlgExecutionUnit &ra_exe_unit, const InputTableInfo &table_info, const CompilationOptions &co, const ExecutionOptions &eo, const Catalog_Namespace::Catalog &cat, PerFragmentCallBack &cb, const std::set< int > &fragment_ids)
Compiles and dispatches a work unit per fragment processing results with the per fragment callback...
static void addCodeToCache(const CodeCacheKey &, std::shared_ptr< CompilationContext >, llvm::Module *, CodeCache &)
const Catalog_Namespace::Catalog * getCatalog() const
QuerySessionStatus(const QuerySessionId &query_session, const size_t executor_id, const std::string &query_str, const std::chrono::time_point< std::chrono::system_clock > submitted_time)
std::unordered_map< int, std::unordered_map< int, std::shared_ptr< const ColumnarResults >>> ColumnCacheMap
int32_t executePlanWithoutGroupBy(const RelAlgExecutionUnit &ra_exe_unit, const CompilationResult &, const bool hoist_literals, ResultSetPtr &results, const std::vector< Analyzer::Expr * > &target_exprs, const ExecutorDeviceType device_type, std::vector< std::vector< const int8_t * >> &col_buffers, QueryExecutionContext *query_exe_context, const std::vector< std::vector< int64_t >> &num_rows, const std::vector< std::vector< uint64_t >> &frag_offsets, Data_Namespace::DataMgr *data_mgr, const int device_id, const uint32_t start_rowid, const uint32_t num_tables, RenderInfo *render_info)
void enrollQuerySession(const QuerySessionId &query_session, const std::string &query_str, const std::chrono::time_point< std::chrono::system_clock > submitted, const size_t executor_id, const QuerySessionStatus::QueryStatus query_session_status)
ResultSetPtr resultsUnion(SharedKernelContext &shared_context, const RelAlgExecutionUnit &ra_exe_unit)
static QuerySessionId current_query_session_
std::shared_ptr< ResultSet > rs_
std::vector< size_t > getFragmentCount(const FragmentsList &selected_fragments, const size_t scan_idx, const RelAlgExecutionUnit &ra_exe_unit)
llvm::BasicBlock * codegenSkipDeletedOuterTableRow(const RelAlgExecutionUnit &ra_exe_unit, const CompilationOptions &co)
std::unique_ptr< QueryCompilationDescriptor > QueryCompilationDescriptorOwned
HOST DEVICE EncodingType get_compression() const
const ColumnarResults * columnarize_result(std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner, const ResultSetPtr &result, const int frag_id)
std::vector< int8_t > serializeLiterals(const std::unordered_map< int, CgenState::LiteralValues > &literals, const int device_id)
InputTableInfoCache input_table_info_cache_
void codegenWindowFunctionStateInit(llvm::Value *aggregate_state)
void launchKernels(SharedKernelContext &shared_context, std::vector< std::unique_ptr< ExecutionKernel >> &&kernels)
CodeCache cpu_code_cache_
bool checkIsQuerySessionInterrupted(const QuerySessionId &query_session, mapd_shared_lock< mapd_shared_mutex > &read_lock)
std::pair< bool, int64_t > skipFragment(const InputDescriptor &table_desc, const Fragmenter_Namespace::FragmentInfo &frag_info, const std::list< std::shared_ptr< Analyzer::Expr >> &simple_quals, const std::vector< uint64_t > &frag_offsets, const size_t frag_idx)
unsigned gridSize() const
llvm::Value * spillDoubleElement(llvm::Value *elem_val, llvm::Type *elem_ty)
std::pair< std::vector< std::vector< int64_t > >, std::vector< std::vector< uint64_t > > > getRowCountAndOffsetForAllFrags(const RelAlgExecutionUnit &ra_exe_unit, const CartesianProduct< std::vector< std::vector< size_t >>> &frag_ids_crossjoin, const std::vector< InputDescriptor > &input_descs, const std::map< int, const TableFragments * > &all_tables_fragments)
StringDictionaryGenerations computeStringDictionaryGenerations(const std::unordered_set< PhysicalInput > &phys_inputs)
void updateQuerySessionStatus(std::shared_ptr< const query_state::QueryState > &query_state, const QuerySessionStatus::QueryStatus new_query_status)
void setExecutorId(const size_t executor_id)
FragmentInfoType const & fragment_info_
void nukeOldState(const bool allow_lazy_fetch, const std::vector< InputTableInfo > &query_infos, const PlanState::DeletedColumnsMap &deleted_cols_map, const RelAlgExecutionUnit *ra_exe_unit)
const std::string getQueryStr()
std::pair< bool, int64_t > skipFragmentInnerJoins(const InputDescriptor &table_desc, const RelAlgExecutionUnit &ra_exe_unit, const Fragmenter_Namespace::FragmentInfo &fragment, const std::vector< uint64_t > &frag_offsets, const size_t frag_idx)
void buildSelectedFragsMapping(std::vector< std::vector< size_t >> &selected_fragments_crossjoin, std::vector< size_t > &local_col_to_frag_pos, const std::list< std::shared_ptr< const InputColDescriptor >> &col_global_ids, const FragmentsList &selected_fragments, const RelAlgExecutionUnit &ra_exe_unit)
llvm::Value * codegenAggregateWindowState()
TableGenerations table_generations_
void unregisterActiveModule(void *module, const int device_id) const
std::function< void(ResultSetPtr, const Fragmenter_Namespace::FragmentInfo &)> PerFragmentCallBack
mapd_shared_lock< mapd_shared_mutex > read_lock
const size_t getExecutorId()
size_t getRunningExecutorId(mapd_shared_lock< mapd_shared_mutex > &read_lock)
std::string QuerySessionId
llvm::Value * addJoinLoopIterator(const std::vector< llvm::Value * > &prev_iters, const size_t level_idx)
ResultSetPtr reduceMultiDeviceResultSets(std::vector< std::pair< ResultSetPtr, std::vector< size_t >>> &all_fragment_results, std::shared_ptr< RowSetMemoryOwner >, const QueryMemoryDescriptor &) const
llvm::Value * codegenWindowFunctionAggregate(const CompilationOptions &co)
void addToCardinalityCache(const std::string &cache_key, const size_t cache_value)
static std::atomic< bool > interrupted_
static const int32_t ERR_OUT_OF_SLOTS
static std::mutex compilation_mutex_
std::vector< llvm::Value * > inlineHoistedLiterals()
mapd_shared_lock< mapd_shared_mutex > shared_lock
static size_t running_query_executor_id_
ResultSetPtr executeWorkUnitImpl(size_t &max_groups_buffer_entry_guess, const bool is_agg, const bool allow_single_frag_table_opt, const std::vector< InputTableInfo > &, const RelAlgExecutionUnit &, const CompilationOptions &, const ExecutionOptions &options, const Catalog_Namespace::Catalog &, std::shared_ptr< RowSetMemoryOwner >, RenderInfo *render_info, const bool has_cardinality_estimation, ColumnCacheMap &column_cache)
std::unordered_map< int, const Analyzer::BinOper * > getInnerTabIdToJoinCond() const
ExecutorDeviceType getDeviceTypeForTargets(const RelAlgExecutionUnit &ra_exe_unit, const ExecutorDeviceType requested_device_type)
Executor(const ExecutorId id, const size_t block_size_x, const size_t grid_size_x, const size_t max_gpu_slab_size, const std::string &debug_dir, const std::string &debug_file)
GroupColLLVMValue groupByColumnCodegen(Analyzer::Expr *group_by_col, const size_t col_width, const CompilationOptions &, const bool translate_null_val, const int64_t translated_null_val, GroupByAndAggregate::DiamondCodegen &, std::stack< llvm::BasicBlock * > &, const bool thread_mem_shared)
uint32_t log2_bytes(const uint32_t bytes)
ExpressionRange getColRange(const PhysicalInput &) const
std::string numeric_type_name(const SQLTypeInfo &ti)
mapd_unique_lock< mapd_shared_mutex > write_lock
void redeclareFilterFunction()
UpdateLogForFragment(FragmentInfoType const &fragment_info, size_t const, const std::shared_ptr< ResultSet > &rs)
bool is_unnest(const Analyzer::Expr *expr)
std::vector< JoinLoop > buildJoinLoops(RelAlgExecutionUnit &ra_exe_unit, const CompilationOptions &co, const ExecutionOptions &eo, const std::vector< InputTableInfo > &query_infos, ColumnCacheMap &column_cache)
std::function< llvm::Value *(const std::vector< llvm::Value * > &, llvm::Value *)> buildIsDeletedCb(const RelAlgExecutionUnit &ra_exe_unit, const size_t level_idx, const CompilationOptions &co)
auto getResultSet() const
unsigned blockSize() const
Execution unit for relational algebra. It's a low-level description of any relational algebra operati...
HOST DEVICE bool get_notnull() const
std::unordered_map< int, std::vector< llvm::Value * > > saved_fetch_cache
static size_t align(const size_t off_in, const size_t alignment)
std::unique_ptr< QueryMemoryDescriptor > QueryMemoryDescriptorOwned
size_t const getRowCount() const override
const std::chrono::time_point< std::chrono::system_clock > submitted_time_
const QuerySessionId getQuerySession()
static const int32_t ERR_OUT_OF_CPU_MEM
QuerySessionStatus::QueryStatus query_status_
int deviceCountForMemoryLevel(const Data_Namespace::MemoryLevel memory_level) const
Descriptor for the fragments required for an execution kernel.
llvm::Value * codegenWindowFunctionAggregateCalls(llvm::Value *aggregate_state, const CompilationOptions &co)
Fragmenter_Namespace::FragmentInfo FragmentInfoType
bool is_rt_udf_module_present(bool cpu_only=false)
static size_t getArenaBlockSize()
ResultSetPtr executeWorkUnit(size_t &max_groups_buffer_entry_guess, const bool is_agg, const std::vector< InputTableInfo > &, const RelAlgExecutionUnit &, const CompilationOptions &, const ExecutionOptions &options, const Catalog_Namespace::Catalog &, RenderInfo *render_info, const bool has_cardinality_estimation, ColumnCacheMap &column_cache)
std::mutex gpu_exec_mutex_[max_gpu_count]
DEVICE void swap(ARGS &&...args)
static mapd_shared_mutex recycler_mutex_
void register_buffer_with_executor_rsm(int64_t exec, int8_t *buffer)
llvm::Value * codegenWindowFunction(const size_t target_index, const CompilationOptions &co)
SQLOps get_optype() const
WindowFunctionContext * active_window_function_
static std::mutex gpu_active_modules_mutex_
void setupCaching(const std::unordered_set< PhysicalInput > &phys_inputs, const std::unordered_set< int > &phys_table_ids)
static void nukeCacheOfExecutors()
void clearMetaInfoCache()
std::function< void(const UpdateLogForFragment &, ColumnToFragmentsMap &)> Callback
const ColumnDescriptor * get_column_descriptor(const int col_id, const int table_id, const Catalog_Namespace::Catalog &cat)
void createErrorCheckControlFlow(llvm::Function *query_func, bool run_with_dynamic_watchdog, bool run_with_allowing_runtime_interrupt, ExecutorDeviceType device_type, const std::vector< InputTableInfo > &input_table_infos)
size_t const getEntryCount() const override
llvm::BasicBlock * codegenWindowResetStateControlFlow()
const TemporaryTables * temporary_tables_
void executeUpdate(const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &table_infos, const CompilationOptions &co, const ExecutionOptions &eo, const Catalog_Namespace::Catalog &cat, std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner, const UpdateLogForFragment::Callback &cb, const bool is_agg)
CompilationRetryNewScanLimit(const size_t new_scan_limit)
WatchdogException(const std::string &cause)
static const ExecutorId UNITARY_EXECUTOR_ID
bool isArchMaxwell(const ExecutorDeviceType dt) const
bool skipFragmentPair(const Fragmenter_Namespace::FragmentInfo &outer_fragment_info, const Fragmenter_Namespace::FragmentInfo &inner_fragment_info, const int inner_table_id, const std::unordered_map< int, const Analyzer::BinOper * > &inner_table_id_to_join_condition, const RelAlgExecutionUnit &ra_exe_unit, const ExecutorDeviceType device_type)
ResultSetPtr executeExplain(const QueryCompilationDescriptor &)