OmniSciDB  6686921089
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
Execute.h
Go to the documentation of this file.
1 /*
2  * Copyright 2020 OmniSci, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef QUERYENGINE_EXECUTE_H
18 #define QUERYENGINE_EXECUTE_H
19 
20 #include <algorithm>
21 #include <atomic>
22 #include <condition_variable>
23 #include <cstddef>
24 #include <cstdlib>
25 #include <deque>
26 #include <functional>
27 #include <limits>
28 #include <map>
29 #include <mutex>
30 #include <stack>
31 #include <unordered_map>
32 #include <unordered_set>
33 
34 #include <llvm/IR/Function.h>
35 #include <llvm/IR/Value.h>
36 #include <llvm/Linker/Linker.h>
37 #include <llvm/Transforms/Utils/ValueMapper.h>
38 #include <rapidjson/document.h>
39 
43 #include "QueryEngine/CgenState.h"
44 #include "QueryEngine/CodeCache.h"
55 #include "QueryEngine/PlanState.h"
63 
64 #include "DataMgr/Chunk/Chunk.h"
65 #include "Logger/Logger.h"
67 #include "Shared/funcannotations.h"
69 #include "Shared/measure.h"
70 #include "Shared/thread_count.h"
71 #include "Shared/toString.h"
76 
77 using QueryCompilationDescriptorOwned = std::unique_ptr<QueryCompilationDescriptor>;
79 using QueryMemoryDescriptorOwned = std::unique_ptr<QueryMemoryDescriptor>;
80 using QuerySessionId = std::string;
81 using CurrentQueryStatus = std::pair<QuerySessionId, std::string>;
82 using InterruptFlagMap = std::map<QuerySessionId, bool>;
84  // A class that is used to describe the query session's info
85  public:
86  /* todo(yoonmin): support more query status
87  * i.e., RUNNING_SORT, RUNNING_CARD_EST, CLEANUP, ... */
88  enum QueryStatus {
89  UNDEFINED = 0,
95  };
96 
97  QuerySessionStatus(const QuerySessionId& query_session,
98  const std::string& query_str,
99  const std::string& submitted_time)
100  : query_session_(query_session)
101  , executor_id_(0)
102  , query_str_(query_str)
103  , submitted_time_(submitted_time)
105  QuerySessionStatus(const QuerySessionId& query_session,
106  const size_t executor_id,
107  const std::string& query_str,
108  const std::string& submitted_time)
109  : query_session_(query_session)
110  , executor_id_(executor_id)
111  , query_str_(query_str)
112  , submitted_time_(submitted_time)
114  QuerySessionStatus(const QuerySessionId& query_session,
115  const size_t executor_id,
116  const std::string& query_str,
117  const std::string& submitted_time,
118  const QuerySessionStatus::QueryStatus& query_status)
119  : query_session_(query_session)
120  , executor_id_(executor_id)
121  , query_str_(query_str)
122  , submitted_time_(submitted_time)
123  , query_status_(query_status) {}
124 
126  const std::string getQueryStr() { return query_str_; }
127  const size_t getExecutorId() { return executor_id_; }
128  const std::string& getQuerySubmittedTime() { return submitted_time_; }
131  query_status_ = status;
132  }
133  void setExecutorId(const size_t executor_id) { executor_id_ = executor_id; }
134 
135  private:
137  size_t executor_id_;
138  const std::string query_str_;
139  const std::string submitted_time_;
140  // Currently we use three query status:
141  // 1) PENDING_IN_QUEUE: a task is submitted to the dispatch_queue but hangs due to no
142  // existing worker (= executor) 2) PENDING_IN_EXECUTOR: a task is assigned to the
143  // specific executor but waits to get the resource to run 3) RUNNING: a task is assigned
144  // to the specific executor and its execution has been successfully started
145  // 4) RUNNING_REDUCTION: a task is in the reduction phase
147 };
148 using QuerySessionMap =
149  std::map<const QuerySessionId, std::map<std::string, QuerySessionStatus>>;
150 
151 extern void read_rt_udf_gpu_module(const std::string& udf_ir);
152 extern void read_rt_udf_cpu_module(const std::string& udf_ir);
153 extern bool is_rt_udf_module_present(bool cpu_only = false);
154 
155 class ColumnFetcher;
156 
157 class WatchdogException : public std::runtime_error {
158  public:
159  WatchdogException(const std::string& cause) : std::runtime_error(cause) {}
160 };
161 
162 class Executor;
163 
164 inline llvm::Value* get_arg_by_name(llvm::Function* func, const std::string& name) {
165  for (auto& arg : func->args()) {
166  if (arg.getName() == name) {
167  return &arg;
168  }
169  }
170  CHECK(false);
171  return nullptr;
172 }
173 
174 inline uint32_t log2_bytes(const uint32_t bytes) {
175  switch (bytes) {
176  case 1:
177  return 0;
178  case 2:
179  return 1;
180  case 4:
181  return 2;
182  case 8:
183  return 3;
184  default:
185  abort();
186  }
187 }
188 
190  const int col_id,
191  const int table_id,
193  CHECK_GT(table_id, 0);
194  const auto col_desc = cat.getMetadataForColumn(table_id, col_id);
195  CHECK(col_desc);
196  return col_desc;
197 }
198 
199 inline const Analyzer::Expr* extract_cast_arg(const Analyzer::Expr* expr) {
200  const auto cast_expr = dynamic_cast<const Analyzer::UOper*>(expr);
201  if (!cast_expr || cast_expr->get_optype() != kCAST) {
202  return expr;
203  }
204  return cast_expr->get_operand();
205 }
206 
207 inline std::string numeric_type_name(const SQLTypeInfo& ti) {
208  CHECK(ti.is_integer() || ti.is_decimal() || ti.is_boolean() || ti.is_time() ||
209  ti.is_fp() || (ti.is_string() && ti.get_compression() == kENCODING_DICT) ||
210  ti.is_timeinterval());
211  if (ti.is_integer() || ti.is_decimal() || ti.is_boolean() || ti.is_time() ||
212  ti.is_string() || ti.is_timeinterval()) {
213  return "int" + std::to_string(ti.get_logical_size() * 8) + "_t";
214  }
215  return ti.get_type() == kDOUBLE ? "double" : "float";
216 }
217 
219  const int col_id,
220  const int table_id,
222  CHECK(table_id);
223  return table_id > 0 ? get_column_descriptor(col_id, table_id, cat) : nullptr;
224 }
225 
226 inline const ResultSetPtr& get_temporary_table(const TemporaryTables* temporary_tables,
227  const int table_id) {
228  CHECK_LT(table_id, 0);
229  const auto it = temporary_tables->find(table_id);
230  CHECK(it != temporary_tables->end());
231  return it->second;
232 }
233 
234 inline const SQLTypeInfo get_column_type(const int col_id,
235  const int table_id,
236  const ColumnDescriptor* cd,
237  const TemporaryTables* temporary_tables) {
238  CHECK(cd || temporary_tables);
239  if (cd) {
240  CHECK_EQ(col_id, cd->columnId);
241  CHECK_EQ(table_id, cd->tableId);
242  return cd->columnType;
243  }
244  const auto& temp = get_temporary_table(temporary_tables, table_id);
245  return temp->getColType(col_id);
246 }
247 
248 // TODO(alex): Adjust interfaces downstream and make this not needed.
249 inline std::vector<Analyzer::Expr*> get_exprs_not_owned(
250  const std::vector<std::shared_ptr<Analyzer::Expr>>& exprs) {
251  std::vector<Analyzer::Expr*> exprs_not_owned;
252  for (const auto& expr : exprs) {
253  exprs_not_owned.push_back(expr.get());
254  }
255  return exprs_not_owned;
256 }
257 
258 class CompilationRetryNoLazyFetch : public std::runtime_error {
259  public:
261  : std::runtime_error("Retry query compilation with no GPU lazy fetch.") {}
262 };
263 
264 class CompilationRetryNewScanLimit : public std::runtime_error {
265  public:
266  CompilationRetryNewScanLimit(const size_t new_scan_limit)
267  : std::runtime_error("Retry query compilation with new scan limit.")
268  , new_scan_limit_(new_scan_limit) {}
269 
271 };
272 
273 class TooManyLiterals : public std::runtime_error {
274  public:
275  TooManyLiterals() : std::runtime_error("Too many literals in the query") {}
276 };
277 
278 class CompilationRetryNoCompaction : public std::runtime_error {
279  public:
281  : std::runtime_error("Retry query compilation with no compaction.") {}
282 };
283 
284 // Throwing QueryMustRunOnCpu allows us retry a query step on CPU if
285 // g_allow_query_step_cpu_retry is true (on by default) by catching
286 // the exception at the query step execution level in RelAlgExecutor,
287 // or if g_allow_query_step_cpu_retry is false but g_allow_cpu_retry is true,
288 // by retrying the entire query on CPU (if both flags are false, we return an
289 // error). This flag is thrown for the following broad categories of conditions:
290 // 1) we have not implemented an operator on GPU and so cannot codegen for GPU
291 // 2) we catch an unexpected GPU compilation/linking error (perhaps due
292 // to an outdated driver/CUDA installation not allowing a modern operator)
293 // 3) when we detect up front that we will not have enough GPU memory to execute
294 // a query.
295 // There is a fourth scenerio where our pre-flight GPU memory check passed but for
296 // whatever reason we still run out of memory. In those cases we go down the
297 // handleOutOfMemoryRetry path, which will first try per-fragment execution on GPU,
298 // and if that fails, CPU execution.
299 // Note that for distributed execution failures on leaves, we do not retry queries
300 // TODO(todd): See if CPU retry of individual steps can be turned on safely for
301 // distributed
302 
303 class QueryMustRunOnCpu : public std::runtime_error {
304  public:
305  QueryMustRunOnCpu() : std::runtime_error("Query must run in cpu mode.") {}
306 
307  QueryMustRunOnCpu(const std::string& err) : std::runtime_error(err) {}
308 };
309 
310 class ParseIRError : public std::runtime_error {
311  public:
312  ParseIRError(const std::string message) : std::runtime_error(message) {}
313 };
314 
315 class StringConstInResultSet : public std::runtime_error {
316  public:
318  : std::runtime_error(
319  "NONE ENCODED String types are not supported as input result set.") {}
320 };
321 
322 class ExtensionFunction;
323 
325 using ColumnToFragmentsMap = std::map<const ColumnDescriptor*, std::set<int32_t>>;
326 using TableToFragmentIds = std::map<int32_t, std::set<int32_t>>;
327 
331 };
332 
334  public:
336 
337  UpdateLogForFragment(FragmentInfoType const& fragment_info,
338  size_t const,
339  const std::shared_ptr<ResultSet>& rs);
340 
341  std::vector<TargetValue> getEntryAt(const size_t index) const override;
342  std::vector<TargetValue> getTranslatedEntryAt(const size_t index) const override;
343 
344  size_t const getRowCount() const override;
346  return rs_->getRowSetMemOwner()->getLiteralStringDictProxy();
347  }
348  size_t const getEntryCount() const override;
349  size_t const getFragmentIndex() const;
350  FragmentInfoType const& getFragmentInfo() const;
353  }
355  return fragment_info_.fragmentId;
356  }
357 
358  SQLTypeInfo getColumnType(const size_t col_idx) const;
359 
360  using Callback = std::function<void(const UpdateLogForFragment&, TableUpdateMetadata&)>;
361 
362  auto getResultSet() const { return rs_; }
363 
364  private:
367  std::shared_ptr<ResultSet> rs_;
368 };
369 
370 using LLVMValueVector = std::vector<llvm::Value*>;
371 
373 
374 std::ostream& operator<<(std::ostream&, FetchResult const&);
375 
376 class Executor {
377  static_assert(sizeof(float) == 4 && sizeof(double) == 8,
378  "Host hardware not supported, unexpected size of float / double.");
379  static_assert(sizeof(time_t) == 8,
380  "Host hardware not supported, 64-bit time support is required.");
381 
382  public:
383  using ExecutorId = size_t;
384  static const ExecutorId UNITARY_EXECUTOR_ID = 0;
385 
386  Executor(const ExecutorId id,
387  Data_Namespace::DataMgr* data_mgr,
388  const size_t block_size_x,
389  const size_t grid_size_x,
390  const size_t max_gpu_slab_size,
391  const std::string& debug_dir,
392  const std::string& debug_file);
393 
394  static std::shared_ptr<Executor> getExecutor(
395  const ExecutorId id,
396  const std::string& debug_dir = "",
397  const std::string& debug_file = "",
398  const SystemParameters& system_parameters = SystemParameters());
399 
400  static void nukeCacheOfExecutors() {
401  mapd_unique_lock<mapd_shared_mutex> flush_lock(
402  execute_mutex_); // don't want native code to vanish while executing
403  mapd_unique_lock<mapd_shared_mutex> lock(executors_cache_mutex_);
404  (decltype(executors_){}).swap(executors_);
405  }
406 
407  static void clearMemory(const Data_Namespace::MemoryLevel memory_level);
408 
409  static size_t getArenaBlockSize();
410 
411  static void addUdfIrToModule(const std::string& udf_ir_filename, const bool is_cuda_ir);
412 
417 
422  const bool with_generation) const {
424  return getStringDictionaryProxy(dict_id, row_set_mem_owner_, with_generation);
425  }
426 
428  const int dictId,
429  const std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner,
430  const bool with_generation) const;
431 
432  bool isCPUOnly() const;
433 
434  bool isArchMaxwell(const ExecutorDeviceType dt) const;
435 
437  return cgen_state_->contains_left_deep_outer_join_;
438  }
439 
441 
443  int) const;
444 
445  const Catalog_Namespace::Catalog* getCatalog() const;
446  void setCatalog(const Catalog_Namespace::Catalog* catalog);
447 
449  CHECK(data_mgr_);
450  return data_mgr_;
451  }
452 
453  const std::shared_ptr<RowSetMemoryOwner> getRowSetMemoryOwner() const;
454 
455  const TemporaryTables* getTemporaryTables() const;
456 
457  Fragmenter_Namespace::TableInfo getTableInfo(const int table_id) const;
458 
459  const TableGeneration& getTableGeneration(const int table_id) const;
460 
462 
463  size_t getNumBytesForFetchedRow(const std::set<int>& table_ids_to_fetch) const;
464 
465  bool hasLazyFetchColumns(const std::vector<Analyzer::Expr*>& target_exprs) const;
466  std::vector<ColumnLazyFetchInfo> getColLazyFetchInfo(
467  const std::vector<Analyzer::Expr*>& target_exprs) const;
468 
469  void registerActiveModule(void* module, const int device_id) const;
470  void unregisterActiveModule(void* module, const int device_id) const;
471  void interrupt(const QuerySessionId& query_session = "",
472  const QuerySessionId& interrupt_session = "");
473  void resetInterrupt();
474 
475  // only for testing usage
476  void enableRuntimeQueryInterrupt(const double runtime_query_check_freq,
477  const unsigned pending_query_check_freq) const;
478 
479  static const size_t high_scan_limit{32000000};
480 
481  int8_t warpSize() const;
482  unsigned gridSize() const;
483  unsigned numBlocksPerMP() const;
484  unsigned blockSize() const;
485  size_t maxGpuSlabSize() const;
486 
487  ResultSetPtr executeWorkUnit(size_t& max_groups_buffer_entry_guess,
488  const bool is_agg,
489  const std::vector<InputTableInfo>&,
490  const RelAlgExecutionUnit&,
491  const CompilationOptions&,
492  const ExecutionOptions& options,
494  RenderInfo* render_info,
495  const bool has_cardinality_estimation,
496  ColumnCacheMap& column_cache);
497 
499  const std::vector<InputTableInfo>& table_infos,
500  const TableDescriptor* updated_table_desc,
501  const CompilationOptions& co,
502  const ExecutionOptions& eo,
504  std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner,
506  const bool is_agg);
507 
509  const RelAlgExecutionUnit& ra_exe_unit,
510  const std::shared_ptr<RowSetMemoryOwner>& row_set_mem_owner);
511 
512  private:
513  void clearMetaInfoCache();
514 
515  int deviceCount(const ExecutorDeviceType) const;
516  int deviceCountForMemoryLevel(const Data_Namespace::MemoryLevel memory_level) const;
517 
518  // Generate code for a window function target.
519  llvm::Value* codegenWindowFunction(const size_t target_index,
520  const CompilationOptions& co);
521 
522  // Generate code for an aggregate window function target.
523  llvm::Value* codegenWindowFunctionAggregate(const CompilationOptions& co);
524 
525  // The aggregate state requires a state reset when starting a new partition. Generate
526  // the new partition check and return the continuation basic block.
527  llvm::BasicBlock* codegenWindowResetStateControlFlow();
528 
529  // Generate code for initializing the state of a window aggregate.
530  void codegenWindowFunctionStateInit(llvm::Value* aggregate_state);
531 
532  // Generates the required calls for an aggregate window function and returns the final
533  // result.
534  llvm::Value* codegenWindowFunctionAggregateCalls(llvm::Value* aggregate_state,
535  const CompilationOptions& co);
536 
537  // The AVG window function requires some post-processing: the sum is divided by count
538  // and the result is stored back for the current row.
539  void codegenWindowAvgEpilogue(llvm::Value* crt_val,
540  llvm::Value* window_func_null_val,
541  llvm::Value* multiplicity_lv);
542 
543  // Generates code which loads the current aggregate value for the window context.
544  llvm::Value* codegenAggregateWindowState();
545 
546  llvm::Value* aggregateWindowStatePtr();
547 
549  CHECK(data_mgr_);
550  auto cuda_mgr = data_mgr_->getCudaMgr();
551  CHECK(cuda_mgr);
552  return cuda_mgr;
553  }
554 
556  if (dt == ExecutorDeviceType::GPU) {
557  return cudaMgr()->isArchPascalOrLater();
558  }
559  return false;
560  }
561 
562  bool needFetchAllFragments(const InputColDescriptor& col_desc,
563  const RelAlgExecutionUnit& ra_exe_unit,
564  const FragmentsList& selected_fragments) const;
565 
567  const InputColDescriptor& inner_col_desc,
568  const RelAlgExecutionUnit& ra_exe_unit,
569  const FragmentsList& selected_fragments,
570  const Data_Namespace::MemoryLevel memory_level) const;
571 
572  using PerFragmentCallBack =
573  std::function<void(ResultSetPtr, const Fragmenter_Namespace::FragmentInfo&)>;
574 
580  void executeWorkUnitPerFragment(const RelAlgExecutionUnit& ra_exe_unit,
581  const InputTableInfo& table_info,
582  const CompilationOptions& co,
583  const ExecutionOptions& eo,
586  const std::set<size_t>& fragment_indexes_param);
587 
589 
596  const std::vector<InputTableInfo>& table_infos,
597  const CompilationOptions& co,
598  const ExecutionOptions& eo,
600 
602  const RelAlgExecutionUnit& ra_exe_unit,
603  const ExecutorDeviceType requested_device_type);
604 
606  SharedKernelContext& shared_context,
607  const RelAlgExecutionUnit& ra_exe_unit,
609  const ExecutorDeviceType device_type,
610  std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner);
611 
613  SharedKernelContext& shared_context,
614  const RelAlgExecutionUnit& ra_exe_unit) const;
615 
616  std::unordered_map<int, const Analyzer::BinOper*> getInnerTabIdToJoinCond() const;
617 
622  std::vector<std::unique_ptr<ExecutionKernel>> createKernels(
623  SharedKernelContext& shared_context,
624  const RelAlgExecutionUnit& ra_exe_unit,
625  ColumnFetcher& column_fetcher,
626  const std::vector<InputTableInfo>& table_infos,
627  const ExecutionOptions& eo,
628  const bool is_agg,
629  const bool allow_single_frag_table_opt,
630  const size_t context_count,
631  const QueryCompilationDescriptor& query_comp_desc,
633  RenderInfo* render_info,
634  std::unordered_set<int>& available_gpus,
635  int& available_cpus);
636 
641  void launchKernels(SharedKernelContext& shared_context,
642  std::vector<std::unique_ptr<ExecutionKernel>>&& kernels,
643  const ExecutorDeviceType device_type);
644 
645  std::vector<size_t> getTableFragmentIndices(
646  const RelAlgExecutionUnit& ra_exe_unit,
647  const ExecutorDeviceType device_type,
648  const size_t table_idx,
649  const size_t outer_frag_idx,
650  std::map<int, const TableFragments*>& selected_tables_fragments,
651  const std::unordered_map<int, const Analyzer::BinOper*>&
652  inner_table_id_to_join_condition);
653 
654  bool skipFragmentPair(const Fragmenter_Namespace::FragmentInfo& outer_fragment_info,
655  const Fragmenter_Namespace::FragmentInfo& inner_fragment_info,
656  const int inner_table_id,
657  const std::unordered_map<int, const Analyzer::BinOper*>&
658  inner_table_id_to_join_condition,
659  const RelAlgExecutionUnit& ra_exe_unit,
660  const ExecutorDeviceType device_type);
661 
663  const RelAlgExecutionUnit& ra_exe_unit,
664  const int device_id,
666  const std::map<int, const TableFragments*>&,
667  const FragmentsList& selected_fragments,
669  std::list<ChunkIter>&,
670  std::list<std::shared_ptr<Chunk_NS::Chunk>>&,
671  DeviceAllocator* device_allocator,
672  const size_t thread_idx,
673  const bool allow_runtime_interrupt);
674 
676  const RelAlgExecutionUnit& ra_exe_unit,
677  const int device_id,
679  const std::map<int, const TableFragments*>&,
680  const FragmentsList& selected_fragments,
682  std::list<ChunkIter>&,
683  std::list<std::shared_ptr<Chunk_NS::Chunk>>&,
684  DeviceAllocator* device_allocator,
685  const size_t thread_idx,
686  const bool allow_runtime_interrupt);
687 
688  std::pair<std::vector<std::vector<int64_t>>, std::vector<std::vector<uint64_t>>>
690  const RelAlgExecutionUnit& ra_exe_unit,
691  const CartesianProduct<std::vector<std::vector<size_t>>>& frag_ids_crossjoin,
692  const std::vector<InputDescriptor>& input_descs,
693  const std::map<int, const TableFragments*>& all_tables_fragments);
694 
696  std::vector<std::vector<size_t>>& selected_fragments_crossjoin,
697  std::vector<size_t>& local_col_to_frag_pos,
698  const std::list<std::shared_ptr<const InputColDescriptor>>& col_global_ids,
699  const FragmentsList& selected_fragments,
700  const RelAlgExecutionUnit& ra_exe_unit);
701 
703  std::vector<std::vector<size_t>>& selected_fragments_crossjoin,
704  std::vector<size_t>& local_col_to_frag_pos,
705  const std::list<std::shared_ptr<const InputColDescriptor>>& col_global_ids,
706  const FragmentsList& selected_fragments,
707  const RelAlgExecutionUnit& ra_exe_unit);
708 
709  std::vector<size_t> getFragmentCount(const FragmentsList& selected_fragments,
710  const size_t scan_idx,
711  const RelAlgExecutionUnit& ra_exe_unit);
712 
713  // pass nullptr to results if it shouldn't be extracted from the execution context
714  int32_t executePlanWithGroupBy(const RelAlgExecutionUnit& ra_exe_unit,
715  const CompilationResult&,
716  const bool hoist_literals,
717  ResultSetPtr* results,
718  const ExecutorDeviceType device_type,
719  std::vector<std::vector<const int8_t*>>& col_buffers,
720  const std::vector<size_t> outer_tab_frag_ids,
722  const std::vector<std::vector<int64_t>>& num_rows,
723  const std::vector<std::vector<uint64_t>>& frag_offsets,
725  const int device_id,
726  const int outer_table_id,
727  const int64_t limit,
728  const uint32_t start_rowid,
729  const uint32_t num_tables,
730  const bool allow_runtime_interrupt,
731  RenderInfo* render_info,
732  const int64_t rows_to_process = -1);
733  // pass nullptr to results if it shouldn't be extracted from the execution context
735  const RelAlgExecutionUnit& ra_exe_unit,
736  const CompilationResult&,
737  const bool hoist_literals,
738  ResultSetPtr* results,
739  const std::vector<Analyzer::Expr*>& target_exprs,
740  const ExecutorDeviceType device_type,
741  std::vector<std::vector<const int8_t*>>& col_buffers,
742  QueryExecutionContext* query_exe_context,
743  const std::vector<std::vector<int64_t>>& num_rows,
744  const std::vector<std::vector<uint64_t>>& frag_offsets,
745  Data_Namespace::DataMgr* data_mgr,
746  const int device_id,
747  const uint32_t start_rowid,
748  const uint32_t num_tables,
749  const bool allow_runtime_interrupt,
750  RenderInfo* render_info,
751  const int64_t rows_to_process = -1);
752 
753  public: // Temporary, ask saman about this
754  static std::pair<int64_t, int32_t> reduceResults(const SQLAgg agg,
755  const SQLTypeInfo& ti,
756  const int64_t agg_init_val,
757  const int8_t out_byte_width,
758  const int64_t* out_vec,
759  const size_t out_vec_sz,
760  const bool is_group_by,
761  const bool float_argument_input);
762 
763  static void addCodeToCache(const CodeCacheKey&,
764  std::shared_ptr<CompilationContext>,
765  llvm::Module*,
766  CodeCache&);
767 
768  private:
770  const RelAlgExecutionUnit& ra_exe_unit);
771  std::vector<int8_t*> getJoinHashTablePtrs(const ExecutorDeviceType device_type,
772  const int device_id);
774  const RelAlgExecutionUnit&,
775  std::vector<std::pair<ResultSetPtr, std::vector<size_t>>>& all_fragment_results,
776  std::shared_ptr<RowSetMemoryOwner>,
777  const QueryMemoryDescriptor&) const;
779  std::vector<std::pair<ResultSetPtr, std::vector<size_t>>>& all_fragment_results,
780  std::shared_ptr<RowSetMemoryOwner>,
781  const QueryMemoryDescriptor&) const;
783  const RelAlgExecutionUnit&,
784  std::vector<std::pair<ResultSetPtr, std::vector<size_t>>>& all_fragment_results,
785  std::shared_ptr<RowSetMemoryOwner>,
786  const QueryMemoryDescriptor&) const;
787 
788  ResultSetPtr executeWorkUnitImpl(size_t& max_groups_buffer_entry_guess,
789  const bool is_agg,
790  const bool allow_single_frag_table_opt,
791  const std::vector<InputTableInfo>&,
792  const RelAlgExecutionUnit&,
793  const CompilationOptions&,
794  const ExecutionOptions& options,
796  std::shared_ptr<RowSetMemoryOwner>,
797  RenderInfo* render_info,
798  const bool has_cardinality_estimation,
799  ColumnCacheMap& column_cache);
800 
801  std::vector<llvm::Value*> inlineHoistedLiterals();
802 
803  std::tuple<CompilationResult, std::unique_ptr<QueryMemoryDescriptor>> compileWorkUnit(
804  const std::vector<InputTableInfo>& query_infos,
805  const PlanState::DeletedColumnsMap& deleted_cols_map,
806  const RelAlgExecutionUnit& ra_exe_unit,
807  const CompilationOptions& co,
808  const ExecutionOptions& eo,
809  const CudaMgr_Namespace::CudaMgr* cuda_mgr,
810  const bool allow_lazy_fetch,
811  std::shared_ptr<RowSetMemoryOwner>,
812  const size_t max_groups_buffer_entry_count,
813  const int8_t crt_min_byte_width,
814  const bool has_cardinality_estimation,
815  ColumnCacheMap& column_cache,
816  RenderInfo* render_info = nullptr);
817  // Generate code to skip the deleted rows in the outermost table.
818  llvm::BasicBlock* codegenSkipDeletedOuterTableRow(
819  const RelAlgExecutionUnit& ra_exe_unit,
820  const CompilationOptions& co);
821  std::vector<JoinLoop> buildJoinLoops(RelAlgExecutionUnit& ra_exe_unit,
822  const CompilationOptions& co,
823  const ExecutionOptions& eo,
824  const std::vector<InputTableInfo>& query_infos,
825  ColumnCacheMap& column_cache);
826  // Create a callback which hoists left hand side filters above the join for left joins,
827  // eliminating extra computation of the probe and matches if the row does not pass the
828  // filters
830  const RelAlgExecutionUnit& ra_exe_unit,
831  const size_t level_idx,
832  const int inner_table_id,
833  const CompilationOptions& co);
834  // Create a callback which generates code which returns true iff the row on the given
835  // level is deleted.
836  std::function<llvm::Value*(const std::vector<llvm::Value*>&, llvm::Value*)>
837  buildIsDeletedCb(const RelAlgExecutionUnit& ra_exe_unit,
838  const size_t level_idx,
839  const CompilationOptions& co);
840  // Builds a join hash table for the provided conditions on the current level.
841  // Returns null iff on failure and provides the reasons in `fail_reasons`.
842  std::shared_ptr<HashJoin> buildCurrentLevelHashTable(
843  const JoinCondition& current_level_join_conditions,
844  size_t level_idx,
845  RelAlgExecutionUnit& ra_exe_unit,
846  const CompilationOptions& co,
847  const std::vector<InputTableInfo>& query_infos,
848  ColumnCacheMap& column_cache,
849  std::vector<std::string>& fail_reasons);
851  llvm::Value* addJoinLoopIterator(const std::vector<llvm::Value*>& prev_iters,
852  const size_t level_idx);
853  void codegenJoinLoops(const std::vector<JoinLoop>& join_loops,
854  const RelAlgExecutionUnit& ra_exe_unit,
855  GroupByAndAggregate& group_by_and_aggregate,
856  llvm::Function* query_func,
857  llvm::BasicBlock* entry_bb,
859  const CompilationOptions& co,
860  const ExecutionOptions& eo);
861  bool compileBody(const RelAlgExecutionUnit& ra_exe_unit,
862  GroupByAndAggregate& group_by_and_aggregate,
864  const CompilationOptions& co,
865  const GpuSharedMemoryContext& gpu_smem_context = {});
866 
867  void createErrorCheckControlFlow(llvm::Function* query_func,
868  bool run_with_dynamic_watchdog,
869  bool run_with_allowing_runtime_interrupt,
870  ExecutorDeviceType device_type,
871  const std::vector<InputTableInfo>& input_table_infos);
872 
873  void insertErrorCodeChecker(llvm::Function* query_func,
874  bool hoist_literals,
875  bool allow_runtime_query_interrupt);
876 
877  void preloadFragOffsets(const std::vector<InputDescriptor>& input_descs,
878  const std::vector<InputTableInfo>& query_infos);
879 
881  std::shared_ptr<HashJoin> hash_table;
882  std::string fail_reason;
883  };
884 
886  const std::shared_ptr<Analyzer::BinOper>& qual_bin_oper,
887  const std::vector<InputTableInfo>& query_infos,
888  const MemoryLevel memory_level,
889  const JoinType join_type,
890  const HashType preferred_hash_type,
891  ColumnCacheMap& column_cache,
892  const HashTableBuildDagMap& hashtable_build_dag_map,
893  const RegisteredQueryHint& query_hint,
894  const TableIdToNodeMap& table_id_to_node_map);
895  void nukeOldState(const bool allow_lazy_fetch,
896  const std::vector<InputTableInfo>& query_infos,
897  const PlanState::DeletedColumnsMap& deleted_cols_map,
898  const RelAlgExecutionUnit* ra_exe_unit);
899 
900  std::shared_ptr<CompilationContext> optimizeAndCodegenCPU(
901  llvm::Function*,
902  llvm::Function*,
903  const std::unordered_set<llvm::Function*>&,
904  const CompilationOptions&);
905  std::shared_ptr<CompilationContext> optimizeAndCodegenGPU(
906  llvm::Function*,
907  llvm::Function*,
908  std::unordered_set<llvm::Function*>&,
909  const bool no_inline,
910  const CudaMgr_Namespace::CudaMgr* cuda_mgr,
911  const CompilationOptions&);
912  std::string generatePTX(const std::string&) const;
913  void initializeNVPTXBackend() const;
914 
915  int64_t deviceCycles(int milliseconds) const;
916 
918  llvm::Value* translated_value;
919  llvm::Value* original_value;
920  };
921 
923  const size_t col_width,
924  const CompilationOptions&,
925  const bool translate_null_val,
926  const int64_t translated_null_val,
928  std::stack<llvm::BasicBlock*>&,
929  const bool thread_mem_shared);
930 
931  llvm::Value* castToFP(llvm::Value*,
932  SQLTypeInfo const& from_ti,
933  SQLTypeInfo const& to_ti);
934  llvm::Value* castToIntPtrTyIn(llvm::Value* val, const size_t bit_width);
935 
936  std::tuple<RelAlgExecutionUnit, PlanState::DeletedColumnsMap> addDeletedColumn(
937  const RelAlgExecutionUnit& ra_exe_unit,
938  const CompilationOptions& co);
939 
940  bool isFragmentFullyDeleted(const int table_id,
941  const Fragmenter_Namespace::FragmentInfo& fragment);
942 
943  std::pair<bool, int64_t> skipFragment(
944  const InputDescriptor& table_desc,
945  const Fragmenter_Namespace::FragmentInfo& frag_info,
946  const std::list<std::shared_ptr<Analyzer::Expr>>& simple_quals,
947  const std::vector<uint64_t>& frag_offsets,
948  const size_t frag_idx);
949 
950  std::pair<bool, int64_t> skipFragmentInnerJoins(
951  const InputDescriptor& table_desc,
952  const RelAlgExecutionUnit& ra_exe_unit,
953  const Fragmenter_Namespace::FragmentInfo& fragment,
954  const std::vector<uint64_t>& frag_offsets,
955  const size_t frag_idx);
956 
958  const std::unordered_set<PhysicalInput>& phys_inputs);
960  const std::unordered_set<PhysicalInput>& phys_inputs);
961  TableGenerations computeTableGenerations(std::unordered_set<int> phys_table_ids);
962 
963  public:
964  void setupCaching(const std::unordered_set<PhysicalInput>& phys_inputs,
965  const std::unordered_set<int>& phys_table_ids);
966  void setColRangeCache(const AggregatedColRange& aggregated_col_range) {
967  agg_col_range_cache_ = aggregated_col_range;
968  }
969  ExecutorId getExecutorId() const { return executor_id_; };
970  QuerySessionId& getCurrentQuerySession(mapd_shared_lock<mapd_shared_mutex>& read_lock);
972  const QuerySessionId& candidate_query_session,
973  mapd_shared_lock<mapd_shared_mutex>& read_lock);
974  bool checkCurrentQuerySession(const std::string& candidate_query_session,
975  mapd_shared_lock<mapd_shared_mutex>& read_lock);
976  void invalidateRunningQuerySession(mapd_unique_lock<mapd_shared_mutex>& write_lock);
977  bool addToQuerySessionList(const QuerySessionId& query_session,
978  const std::string& query_str,
979  const std::string& submitted,
980  const size_t executor_id,
981  const QuerySessionStatus::QueryStatus query_status,
982  mapd_unique_lock<mapd_shared_mutex>& write_lock);
983  bool removeFromQuerySessionList(const QuerySessionId& query_session,
984  const std::string& submitted_time_str,
985  mapd_unique_lock<mapd_shared_mutex>& write_lock);
986  void setQuerySessionAsInterrupted(const QuerySessionId& query_session,
987  mapd_unique_lock<mapd_shared_mutex>& write_lock);
988  bool checkIsQuerySessionInterrupted(const std::string& query_session,
989  mapd_shared_lock<mapd_shared_mutex>& read_lock);
990  bool checkIsQuerySessionEnrolled(const QuerySessionId& query_session,
991  mapd_shared_lock<mapd_shared_mutex>& read_lock);
993  const QuerySessionId& query_session,
994  const std::string& submitted_time_str,
995  const QuerySessionStatus::QueryStatus updated_query_status,
996  mapd_unique_lock<mapd_shared_mutex>& write_lock);
998  const QuerySessionId& query_session,
999  const std::string& submitted_time_str,
1000  const size_t executor_id,
1001  mapd_unique_lock<mapd_shared_mutex>& write_lock);
1002  std::vector<QuerySessionStatus> getQuerySessionInfo(
1003  const QuerySessionId& query_session,
1004  mapd_shared_lock<mapd_shared_mutex>& read_lock);
1005 
1008  const QuerySessionId& query_session_id,
1009  const std::string& query_str,
1010  const std::string& query_submitted_time);
1011  void checkPendingQueryStatus(const QuerySessionId& query_session);
1012  void clearQuerySessionStatus(const QuerySessionId& query_session,
1013  const std::string& submitted_time_str);
1014  void updateQuerySessionStatus(const QuerySessionId& query_session,
1015  const std::string& submitted_time_str,
1016  const QuerySessionStatus::QueryStatus new_query_status);
1017  void enrollQuerySession(const QuerySessionId& query_session,
1018  const std::string& query_str,
1019  const std::string& submitted_time_str,
1020  const size_t executor_id,
1021  const QuerySessionStatus::QueryStatus query_session_status);
1022  // get a set of executor ids that a given session has fired regardless of
1023  // each executor's status: pending or running
1024  const std::vector<size_t> getExecutorIdsRunningQuery(
1025  const QuerySessionId& interrupt_session) const;
1026  // check whether the current session that this executor manages is interrupted
1027  // while performing non-kernel time task
1028  bool checkNonKernelTimeInterrupted() const;
1029 
1030  // true when we have matched cardinality, and false otherwise
1031  using CachedCardinality = std::pair<bool, size_t>;
1032  void addToCardinalityCache(const std::string& cache_key, const size_t cache_value);
1033  CachedCardinality getCachedCardinality(const std::string& cache_key);
1034 
1038  JoinColumnSide target_side,
1039  bool extract_only_col_id);
1040 
1041  private:
1042  std::shared_ptr<CompilationContext> getCodeFromCache(const CodeCacheKey&,
1043  const CodeCache&);
1044 
1045  std::vector<int8_t> serializeLiterals(
1046  const std::unordered_map<int, CgenState::LiteralValues>& literals,
1047  const int device_id);
1048 
1049  static size_t align(const size_t off_in, const size_t alignment) {
1050  size_t off = off_in;
1051  if (off % alignment != 0) {
1052  off += (alignment - off % alignment);
1053  }
1054  return off;
1055  }
1056 
1057  std::unique_ptr<CgenState> cgen_state_;
1058 
1060  public:
1062  : cgen_state_(cgen_state), saved_fetch_cache(cgen_state_->fetch_cache_) {}
1064 
1065  private:
1067  std::unordered_map<int, std::vector<llvm::Value*>> saved_fetch_cache;
1068  };
1069 
1070  llvm::Value* spillDoubleElement(llvm::Value* elem_val, llvm::Type* elem_ty);
1071 
1072  std::unique_ptr<PlanState> plan_state_;
1073  std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner_;
1074 
1075  static const int max_gpu_count{16};
1077 
1078  static std::mutex gpu_active_modules_mutex_;
1081  // indicates whether this executor has been interrupted
1082  std::atomic<bool> interrupted_;
1083 
1084  mutable std::mutex str_dict_mutex_;
1085 
1086  mutable std::unique_ptr<llvm::TargetMachine> nvptx_target_machine_;
1087 
1090 
1091  static const size_t baseline_threshold{
1092  1000000}; // if a perfect hash needs more entries, use baseline
1093  static const size_t code_cache_size{1000};
1094 
1095  const unsigned block_size_x_;
1096  const unsigned grid_size_x_;
1097  const size_t max_gpu_slab_size_;
1098  const std::string debug_dir_;
1099  const std::string debug_file_;
1100 
1106 
1109 
1110  // Singleton instance used for an execution unit which is a project with window
1111  // functions.
1112  std::unique_ptr<WindowProjectNodeContext> window_project_node_context_owned_;
1113  // The active window function.
1115 
1120  // a query session that this executor manages
1122  // a pair of <QuerySessionId, interrupted_flag>
1124  // a pair of <QuerySessionId, query_session_status>
1126  static std::map<int, std::shared_ptr<Executor>> executors_;
1127 
1128  // SQL queries take a shared lock, exclusive options (cache clear, memory clear) take a
1129  // write lock
1131 
1133  mapd_shared_lock<mapd_shared_mutex> shared_lock;
1134  mapd_unique_lock<mapd_shared_mutex> unique_lock;
1135  };
1137  ExecutorMutexHolder ret;
1139  // Only one unitary executor can run at a time
1140  ret.unique_lock = mapd_unique_lock<mapd_shared_mutex>(execute_mutex_);
1141  } else {
1142  ret.shared_lock = mapd_shared_lock<mapd_shared_mutex>(execute_mutex_);
1143  }
1144  return ret;
1145  }
1146 
1148 
1150  const QueryPlanHash INVALID_QUERY_PLAN_HASH{std::hash<std::string>{}(EMPTY_QUERY_PLAN)};
1152  static std::unordered_map<std::string, size_t> cardinality_cache_;
1153 
1154  public:
1155  static const int32_t ERR_DIV_BY_ZERO{1};
1156  static const int32_t ERR_OUT_OF_GPU_MEM{2};
1157  static const int32_t ERR_OUT_OF_SLOTS{3};
1158  static const int32_t ERR_UNSUPPORTED_SELF_JOIN{4};
1159  static const int32_t ERR_OUT_OF_RENDER_MEM{5};
1160  static const int32_t ERR_OUT_OF_CPU_MEM{6};
1161  static const int32_t ERR_OVERFLOW_OR_UNDERFLOW{7};
1162  static const int32_t ERR_OUT_OF_TIME{9};
1163  static const int32_t ERR_INTERRUPTED{10};
1164  static const int32_t ERR_COLUMNAR_CONVERSION_NOT_SUPPORTED{11};
1165  static const int32_t ERR_TOO_MANY_LITERALS{12};
1166  static const int32_t ERR_STRING_CONST_IN_RESULTSET{13};
1168  static const int32_t ERR_SINGLE_VALUE_FOUND_MULTIPLE_VALUES{15};
1169  static const int32_t ERR_GEOS{16};
1170  static const int32_t ERR_WIDTH_BUCKET_INVALID_ARGUMENT{17};
1171 
1172  static std::mutex compilation_mutex_;
1173  static std::mutex kernel_mutex_;
1174 
1176  friend class CodeGenerator;
1177  friend class ColumnFetcher;
1178  friend struct DiamondCodegen; // cgen_state_
1179  friend class ExecutionKernel;
1180  friend class KernelSubtask;
1181  friend class HashJoin; // cgen_state_
1183  friend class RangeJoinHashTable;
1184  friend class GroupByAndAggregate;
1190  friend class ResultSet;
1191  friend class InValuesBitmap;
1192  friend class LeafAggregator;
1193  friend class PerfectJoinHashTable;
1194  friend class QueryRewriter;
1196  friend class RelAlgExecutor;
1197  friend class TableOptimizer;
1201  friend struct TargetExprCodegen;
1203 };
1204 
1205 inline std::string get_null_check_suffix(const SQLTypeInfo& lhs_ti,
1206  const SQLTypeInfo& rhs_ti) {
1207  if (lhs_ti.get_notnull() && rhs_ti.get_notnull()) {
1208  return "";
1209  }
1210  std::string null_check_suffix{"_nullable"};
1211  if (lhs_ti.get_notnull()) {
1212  CHECK(!rhs_ti.get_notnull());
1213  null_check_suffix += "_rhs";
1214  } else if (rhs_ti.get_notnull()) {
1215  CHECK(!lhs_ti.get_notnull());
1216  null_check_suffix += "_lhs";
1217  }
1218  return null_check_suffix;
1219 }
1220 
1221 inline bool is_unnest(const Analyzer::Expr* expr) {
1222  return dynamic_cast<const Analyzer::UOper*>(expr) &&
1223  static_cast<const Analyzer::UOper*>(expr)->get_optype() == kUNNEST;
1224 }
1225 
1226 inline bool is_constructed_point(const Analyzer::Expr* expr) {
1227  auto uoper = dynamic_cast<const Analyzer::UOper*>(expr);
1228  auto oper = (uoper && uoper->get_optype() == kCAST) ? uoper->get_operand() : expr;
1229  auto arr = dynamic_cast<const Analyzer::ArrayExpr*>(oper);
1230  return (arr && arr->isLocalAlloc() && arr->get_type_info().is_fixlen_array());
1231 }
1232 
1233 bool is_trivial_loop_join(const std::vector<InputTableInfo>& query_infos,
1234  const RelAlgExecutionUnit& ra_exe_unit);
1235 
1236 std::unordered_set<int> get_available_gpus(const Catalog_Namespace::Catalog& cat);
1237 
1238 size_t get_context_count(const ExecutorDeviceType device_type,
1239  const size_t cpu_count,
1240  const size_t gpu_count);
1241 
1242 extern "C" RUNTIME_EXPORT void register_buffer_with_executor_rsm(int64_t exec,
1243  int8_t* buffer);
1244 
1246 
1247 #endif // QUERYENGINE_EXECUTE_H
bool updateQuerySessionStatusWithLock(const QuerySessionId &query_session, const std::string &submitted_time_str, const QuerySessionStatus::QueryStatus updated_query_status, mapd_unique_lock< mapd_shared_mutex > &write_lock)
Definition: Execute.cpp:4111
void read_rt_udf_gpu_module(const std::string &udf_ir)
SQLTypeInfo getColumnType(const size_t col_idx) const
CudaMgr_Namespace::CudaMgr * getCudaMgr() const
Definition: DataMgr.h:218
const std::string debug_dir_
Definition: Execute.h:1098
QuerySessionId & getCurrentQuerySession(mapd_shared_lock< mapd_shared_mutex > &read_lock)
Definition: Execute.cpp:3946
llvm::Value * translated_value
Definition: Execute.h:918
void executeWorkUnitPerFragment(const RelAlgExecutionUnit &ra_exe_unit, const InputTableInfo &table_info, const CompilationOptions &co, const ExecutionOptions &eo, const Catalog_Namespace::Catalog &cat, PerFragmentCallBack &cb, const std::set< size_t > &fragment_indexes_param)
Compiles and dispatches a work unit per fragment processing results with the per fragment callback...
Definition: Execute.cpp:1608
bool is_agg(const Analyzer::Expr *expr)
static mapd_shared_mutex executor_session_mutex_
Definition: Execute.h:1119
AggregatedColRange computeColRangesCache(const std::unordered_set< PhysicalInput > &phys_inputs)
Definition: Execute.cpp:3857
void enableRuntimeQueryInterrupt(const double runtime_query_check_freq, const unsigned pending_query_check_freq) const
Definition: Execute.cpp:4227
SQLAgg
Definition: sqldefs.h:71
#define CHECK_EQ(x, y)
Definition: Logger.h:217
std::vector< std::unique_ptr< ExecutionKernel > > createKernels(SharedKernelContext &shared_context, const RelAlgExecutionUnit &ra_exe_unit, ColumnFetcher &column_fetcher, const std::vector< InputTableInfo > &table_infos, const ExecutionOptions &eo, const bool is_agg, const bool allow_single_frag_table_opt, const size_t context_count, const QueryCompilationDescriptor &query_comp_desc, const QueryMemoryDescriptor &query_mem_desc, RenderInfo *render_info, std::unordered_set< int > &available_gpus, int &available_cpus)
Definition: Execute.cpp:2111
std::string JoinColumnsInfo
static QuerySessionMap queries_session_map_
Definition: Execute.h:1125
CudaMgr_Namespace::CudaMgr * cudaMgr() const
Definition: Execute.h:548
static mapd_shared_mutex execute_mutex_
Definition: Execute.h:1130
int32_t executePlanWithGroupBy(const RelAlgExecutionUnit &ra_exe_unit, const CompilationResult &, const bool hoist_literals, ResultSetPtr *results, const ExecutorDeviceType device_type, std::vector< std::vector< const int8_t * >> &col_buffers, const std::vector< size_t > outer_tab_frag_ids, QueryExecutionContext *, const std::vector< std::vector< int64_t >> &num_rows, const std::vector< std::vector< uint64_t >> &frag_offsets, Data_Namespace::DataMgr *, const int device_id, const int outer_table_id, const int64_t limit, const uint32_t start_rowid, const uint32_t num_tables, const bool allow_runtime_interrupt, RenderInfo *render_info, const int64_t rows_to_process=-1)
Definition: Execute.cpp:3155
std::unique_ptr< llvm::TargetMachine > nvptx_target_machine_
Definition: Execute.h:1086
int64_t kernel_queue_time_ms_
Definition: Execute.h:1107
JoinType
Definition: sqldefs.h:108
size_t maxGpuSlabSize() const
Definition: Execute.cpp:3464
ExecutorMutexHolder acquireExecuteMutex()
Definition: Execute.h:1136
Data_Namespace::DataMgr * data_mgr_
Definition: Execute.h:1103
int64_t compilation_queue_time_ms_
Definition: Execute.h:1108
const std::string & getQuerySubmittedTime()
Definition: Execute.h:128
friend class ResultSet
Definition: Execute.h:1190
std::map< const ColumnDescriptor *, std::set< int32_t >> ColumnToFragmentsMap
Definition: Execute.h:325
std::string cat(Ts &&...args)
void invalidateRunningQuerySession(mapd_unique_lock< mapd_shared_mutex > &write_lock)
Definition: Execute.cpp:3972
void checkPendingQueryStatus(const QuerySessionId &query_session)
Definition: Execute.cpp:3994
static const int32_t ERR_INTERRUPTED
Definition: Execute.h:1163
const Analyzer::Expr * remove_cast_to_int(const Analyzer::Expr *expr)
Definition: ColumnIR.cpp:537
class for a per-database catalog. also includes metadata for the current database and the current use...
Definition: Catalog.h:111
std::vector< int8_t * > getJoinHashTablePtrs(const ExecutorDeviceType device_type, const int device_id)
Definition: Execute.cpp:3343
bool is_trivial_loop_join(const std::vector< InputTableInfo > &query_infos, const RelAlgExecutionUnit &ra_exe_unit)
Definition: Execute.cpp:1182
FetchCacheAnchor(CgenState *cgen_state)
Definition: Execute.h:1061
void codegenJoinLoops(const std::vector< JoinLoop > &join_loops, const RelAlgExecutionUnit &ra_exe_unit, GroupByAndAggregate &group_by_and_aggregate, llvm::Function *query_func, llvm::BasicBlock *entry_bb, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const ExecutionOptions &eo)
Definition: IRCodegen.cpp:1126
const std::shared_ptr< RowSetMemoryOwner > getRowSetMemoryOwner() const
Definition: Execute.cpp:303
StringDictionaryProxy * getLiteralDictionary() const override
Definition: Execute.h:345
std::atomic< bool > interrupted_
Definition: Execute.h:1082
std::vector< size_t > getTableFragmentIndices(const RelAlgExecutionUnit &ra_exe_unit, const ExecutorDeviceType device_type, const size_t table_idx, const size_t outer_frag_idx, std::map< int, const TableFragments * > &selected_tables_fragments, const std::unordered_map< int, const Analyzer::BinOper * > &inner_table_id_to_join_condition)
Definition: Execute.cpp:2297
ExecutorDeviceType
ResultSetPtr executeTableFunction(const TableFunctionExecutionUnit exe_unit, const std::vector< InputTableInfo > &table_infos, const CompilationOptions &co, const ExecutionOptions &eo, const Catalog_Namespace::Catalog &cat)
Compiles and dispatches a table function; that is, a function that takes as input one or more columns...
Definition: Execute.cpp:1691
void read_rt_udf_cpu_module(const std::string &udf_ir)
Fragmenter_Namespace::RowDataProvider RowDataProvider
Definition: Execute.h:324
static const int max_gpu_count
Definition: Execute.h:1075
GroupColLLVMValue groupByColumnCodegen(Analyzer::Expr *group_by_col, const size_t col_width, const CompilationOptions &, const bool translate_null_val, const int64_t translated_null_val, DiamondCodegen &, std::stack< llvm::BasicBlock * > &, const bool thread_mem_shared)
Definition: IRCodegen.cpp:1304
std::map< const QuerySessionId, std::map< std::string, QuerySessionStatus >> QuerySessionMap
Definition: Execute.h:149
#define const
size_t const getFragmentIndex() const
static const size_t code_cache_size
Definition: Execute.h:1093
bool isArchPascalOrLater(const ExecutorDeviceType dt) const
Definition: Execute.h:555
std::ostream & operator<<(std::ostream &os, const SessionInfo &session_info)
Definition: SessionInfo.cpp:57
bool is_fp() const
Definition: sqltypes.h:513
std::pair< QuerySessionId, std::string > CurrentQueryStatus
Definition: Execute.h:81
bool is_constructed_point(const Analyzer::Expr *expr)
Definition: Execute.h:1226
static mapd_shared_mutex executors_cache_mutex_
Definition: Execute.h:1147
std::function< llvm::BasicBlock *(llvm::BasicBlock *, llvm::BasicBlock *, const std::string &, llvm::Function *, CgenState *)> HoistedFiltersCallback
Definition: JoinLoop.h:61
std::vector< ColumnLazyFetchInfo > getColLazyFetchInfo(const std::vector< Analyzer::Expr * > &target_exprs) const
Definition: Execute.cpp:364
static const size_t baseline_threshold
Definition: Execute.h:1091
void updateQuerySessionStatus(const QuerySessionId &query_session, const std::string &submitted_time_str, const QuerySessionStatus::QueryStatus new_query_status)
Definition: Execute.cpp:4034
void registerActiveModule(void *module, const int device_id) const
JoinColumnSide
string name
Definition: setup.in.py:72
std::unordered_set< int > get_available_gpus(const Data_Namespace::DataMgr *data_mgr)
Definition: Execute.cpp:1060
std::tuple< RelAlgExecutionUnit, PlanState::DeletedColumnsMap > addDeletedColumn(const RelAlgExecutionUnit &ra_exe_unit, const CompilationOptions &co)
Definition: Execute.cpp:3546
TableGenerations computeTableGenerations(std::unordered_set< int > phys_table_ids)
Definition: Execute.cpp:3904
bool isArchPascalOrLater() const
Definition: CudaMgr.h:142
TableToFragmentIds fragments_with_deleted_rows
Definition: Execute.h:330
bool hasLazyFetchColumns(const std::vector< Analyzer::Expr * > &target_exprs) const
Definition: Execute.cpp:353
const SQLTypeInfo get_column_type(const int col_id, const int table_id, const ColumnDescriptor *cd, const TemporaryTables *temporary_tables)
Definition: Execute.h:234
llvm::Value * aggregateWindowStatePtr()
Driver for running cleanup processes on a table. TableOptimizer provides functions for various cleanu...
static std::pair< int64_t, int32_t > reduceResults(const SQLAgg agg, const SQLTypeInfo &ti, const int64_t agg_init_val, const int8_t out_byte_width, const int64_t *out_vec, const size_t out_vec_sz, const bool is_group_by, const bool float_argument_input)
Definition: Execute.cpp:688
Definition: sqldefs.h:49
std::vector< std::string > CodeCacheKey
Definition: CodeCache.h:25
QuerySessionId current_query_session_
Definition: Execute.h:1121
const Analyzer::Expr * extract_cast_arg(const Analyzer::Expr *expr)
Definition: Execute.h:199
bool checkCurrentQuerySession(const std::string &candidate_query_session, mapd_shared_lock< mapd_shared_mutex > &read_lock)
Definition: Execute.cpp:3951
int32_t executePlanWithoutGroupBy(const RelAlgExecutionUnit &ra_exe_unit, const CompilationResult &, const bool hoist_literals, ResultSetPtr *results, const std::vector< Analyzer::Expr * > &target_exprs, const ExecutorDeviceType device_type, std::vector< std::vector< const int8_t * >> &col_buffers, QueryExecutionContext *query_exe_context, const std::vector< std::vector< int64_t >> &num_rows, const std::vector< std::vector< uint64_t >> &frag_offsets, Data_Namespace::DataMgr *data_mgr, const int device_id, const uint32_t start_rowid, const uint32_t num_tables, const bool allow_runtime_interrupt, RenderInfo *render_info, const int64_t rows_to_process=-1)
Definition: Execute.cpp:2935
const QuerySessionStatus::QueryStatus getQueryStatus()
Definition: Execute.h:129
static const int32_t ERR_GEOS
Definition: Execute.h:1169
const std::string query_str_
Definition: Execute.h:138
QuerySessionStatus(const QuerySessionId &query_session, const std::string &query_str, const std::string &submitted_time)
Definition: Execute.h:97
AggregatedColRange agg_col_range_cache_
Definition: Execute.h:1117
std::shared_ptr< ResultSet > ResultSetPtr
static void * gpu_active_modules_[max_gpu_count]
Definition: Execute.h:1080
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1057
static const int32_t ERR_TOO_MANY_LITERALS
Definition: Execute.h:1165
llvm::Value * original_value
Definition: Execute.h:919
void enrollQuerySession(const QuerySessionId &query_session, const std::string &query_str, const std::string &submitted_time_str, const size_t executor_id, const QuerySessionStatus::QueryStatus query_session_status)
Definition: Execute.cpp:4050
ParseIRError(const std::string message)
Definition: Execute.h:312
std::vector< Analyzer::Expr * > get_exprs_not_owned(const std::vector< std::shared_ptr< Analyzer::Expr >> &exprs)
Definition: Execute.h:249
static uint32_t gpu_active_modules_device_mask_
Definition: Execute.h:1079
void launchKernels(SharedKernelContext &shared_context, std::vector< std::unique_ptr< ExecutionKernel >> &&kernels, const ExecutorDeviceType device_type)
Definition: Execute.cpp:2245
TableUpdateMetadata executeUpdate(const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &table_infos, const TableDescriptor *updated_table_desc, const CompilationOptions &co, const ExecutionOptions &eo, const Catalog_Namespace::Catalog &cat, std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner, const UpdateLogForFragment::Callback &cb, const bool is_agg)
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:329
void buildSelectedFragsMappingForUnion(std::vector< std::vector< size_t >> &selected_fragments_crossjoin, std::vector< size_t > &local_col_to_frag_pos, const std::list< std::shared_ptr< const InputColDescriptor >> &col_global_ids, const FragmentsList &selected_fragments, const RelAlgExecutionUnit &ra_exe_unit)
Definition: Execute.cpp:2882
int deviceCount(const ExecutorDeviceType) const
Definition: Execute.cpp:673
llvm::Value * castToIntPtrTyIn(llvm::Value *val, const size_t bit_width)
Definition: Execute.cpp:3500
size_t getNumBytesForFetchedRow(const std::set< int > &table_ids_to_fetch) const
Definition: Execute.cpp:323
static std::mutex kernel_mutex_
Definition: Execute.h:1173
unsigned numBlocksPerMP() const
Definition: Execute.cpp:3449
#define CHECK_GT(x, y)
Definition: Logger.h:221
Container for compilation results and assorted options for a single execution unit.
bool isCPUOnly() const
Definition: Execute.cpp:273
std::unique_ptr< WindowProjectNodeContext > window_project_node_context_owned_
Definition: Execute.h:1112
std::vector< QuerySessionStatus > getQuerySessionInfo(const QuerySessionId &query_session, mapd_shared_lock< mapd_shared_mutex > &read_lock)
Definition: Execute.cpp:4261
void addTransientStringLiterals(const RelAlgExecutionUnit &ra_exe_unit, const std::shared_ptr< RowSetMemoryOwner > &row_set_mem_owner)
Definition: Execute.cpp:1739
std::vector< FragmentsPerTable > FragmentsList
bool is_time() const
Definition: sqltypes.h:515
bool needFetchAllFragments(const InputColDescriptor &col_desc, const RelAlgExecutionUnit &ra_exe_unit, const FragmentsList &selected_fragments) const
Definition: Execute.cpp:2478
const QuerySessionId query_session_
Definition: Execute.h:136
std::shared_ptr< HashJoin > hash_table
Definition: Execute.h:881
std::string to_string(char const *&&v)
bool checkIsQuerySessionInterrupted(const std::string &query_session, mapd_shared_lock< mapd_shared_mutex > &read_lock)
Definition: Execute.cpp:4207
mapd_shared_mutex & getSessionLock()
Definition: Execute.cpp:3942
bool checkNonKernelTimeInterrupted() const
Definition: Execute.cpp:4295
static void clearMemory(const Data_Namespace::MemoryLevel memory_level)
Definition: Execute.cpp:201
QuerySessionStatus(const QuerySessionId &query_session, const size_t executor_id, const std::string &query_str, const std::string &submitted_time)
Definition: Execute.h:105
std::function< void(const UpdateLogForFragment &, TableUpdateMetadata &)> Callback
Definition: Execute.h:360
Fragmenter_Namespace::TableInfo getTableInfo(const int table_id) const
Definition: Execute.cpp:311
RUNTIME_EXPORT void register_buffer_with_executor_rsm(int64_t exec, int8_t *buffer)
std::shared_ptr< CompilationContext > getCodeFromCache(const CodeCacheKey &, const CodeCache &)
static const size_t high_scan_limit
Definition: Execute.h:479
QueryMustRunOnCpu(const std::string &err)
Definition: Execute.h:307
CodeCache gpu_code_cache_
Definition: Execute.h:1089
static const int32_t ERR_STRING_CONST_IN_RESULTSET
Definition: Execute.h:1166
void codegenWindowAvgEpilogue(llvm::Value *crt_val, llvm::Value *window_func_null_val, llvm::Value *multiplicity_lv)
static std::shared_ptr< Executor > getExecutor(const ExecutorId id, const std::string &debug_dir="", const std::string &debug_file="", const SystemParameters &system_parameters=SystemParameters())
Definition: Execute.cpp:176
void preloadFragOffsets(const std::vector< InputDescriptor > &input_descs, const std::vector< InputTableInfo > &query_infos)
Definition: Execute.cpp:3377
bool isFragmentFullyDeleted(const int table_id, const Fragmenter_Namespace::FragmentInfo &fragment)
Definition: Execute.cpp:3625
bool checkIsQuerySessionEnrolled(const QuerySessionId &query_session, mapd_shared_lock< mapd_shared_mutex > &read_lock)
Definition: Execute.cpp:4218
void setQueryStatus(const QuerySessionStatus::QueryStatus &status)
Definition: Execute.h:130
std::unordered_map< int, const ResultSetPtr & > TemporaryTables
Definition: InputMetadata.h:31
FetchResult fetchUnionChunks(const ColumnFetcher &, const RelAlgExecutionUnit &ra_exe_unit, const int device_id, const Data_Namespace::MemoryLevel, const std::map< int, const TableFragments * > &, const FragmentsList &selected_fragments, const Catalog_Namespace::Catalog &, std::list< ChunkIter > &, std::list< std::shared_ptr< Chunk_NS::Chunk >> &, DeviceAllocator *device_allocator, const size_t thread_idx, const bool allow_runtime_interrupt)
Definition: Execute.cpp:2659
static const int32_t ERR_STREAMING_TOP_N_NOT_SUPPORTED_IN_RENDER_QUERY
Definition: Execute.h:1167
const ExecutorId executor_id_
Definition: Execute.h:1101
static const int32_t ERR_COLUMNAR_CONVERSION_NOT_SUPPORTED
Definition: Execute.h:1164
const ResultSetPtr & get_temporary_table(const TemporaryTables *temporary_tables, const int table_id)
Definition: Execute.h:226
int8_t warpSize() const
Definition: Execute.cpp:3432
std::map< QuerySessionId, bool > InterruptFlagMap
Definition: Execute.h:82
const size_t max_gpu_slab_size_
Definition: Execute.h:1097
ResultSetPtr reduceSpeculativeTopN(const RelAlgExecutionUnit &, std::vector< std::pair< ResultSetPtr, std::vector< size_t >>> &all_fragment_results, std::shared_ptr< RowSetMemoryOwner >, const QueryMemoryDescriptor &) const
Definition: Execute.cpp:1033
ResultSetPtr collectAllDeviceResults(SharedKernelContext &shared_context, const RelAlgExecutionUnit &ra_exe_unit, const QueryMemoryDescriptor &query_mem_desc, const ExecutorDeviceType device_type, std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner)
Definition: Execute.cpp:1920
const ColumnDescriptor * getPhysicalColumnDescriptor(const Analyzer::ColumnVar *, int) const
Definition: Execute.cpp:284
bool compileBody(const RelAlgExecutionUnit &ra_exe_unit, GroupByAndAggregate &group_by_and_aggregate, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const GpuSharedMemoryContext &gpu_smem_context={})
static const int32_t ERR_DIV_BY_ZERO
Definition: Execute.h:1155
std::tuple< CompilationResult, std::unique_ptr< QueryMemoryDescriptor > > compileWorkUnit(const std::vector< InputTableInfo > &query_infos, const PlanState::DeletedColumnsMap &deleted_cols_map, const RelAlgExecutionUnit &ra_exe_unit, const CompilationOptions &co, const ExecutionOptions &eo, const CudaMgr_Namespace::CudaMgr *cuda_mgr, const bool allow_lazy_fetch, std::shared_ptr< RowSetMemoryOwner >, const size_t max_groups_buffer_entry_count, const int8_t crt_min_byte_width, const bool has_cardinality_estimation, ColumnCacheMap &column_cache, RenderInfo *render_info=nullptr)
TableIdToNodeMap table_id_to_node_map_
Definition: Execute.h:1105
bool addToQuerySessionList(const QuerySessionId &query_session, const std::string &query_str, const std::string &submitted, const size_t executor_id, const QuerySessionStatus::QueryStatus query_status, mapd_unique_lock< mapd_shared_mutex > &write_lock)
Definition: Execute.cpp:4074
llvm::Value * get_arg_by_name(llvm::Function *func, const std::string &name)
Definition: Execute.h:164
std::shared_ptr< CompilationContext > optimizeAndCodegenGPU(llvm::Function *, llvm::Function *, std::unordered_set< llvm::Function * > &, const bool no_inline, const CudaMgr_Namespace::CudaMgr *cuda_mgr, const CompilationOptions &)
std::vector< llvm::Value * > LLVMValueVector
Definition: Execute.h:370
size_t get_context_count(const ExecutorDeviceType device_type, const size_t cpu_count, const size_t gpu_count)
Definition: Execute.cpp:1074
const ColumnDescriptor * get_column_descriptor_maybe(const int col_id, const int table_id, const Catalog_Namespace::Catalog &cat)
Definition: Execute.h:218
std::vector< TargetValue > getEntryAt(const size_t index) const override
int get_logical_size() const
Definition: sqltypes.h:340
std::unordered_map< int, std::vector< llvm::Value * > > fetch_cache_
Definition: CgenState.h:341
decltype(FragmentInfoType::fragmentId) const getFragmentId() const
Definition: Execute.h:354
int64_t deviceCycles(int milliseconds) const
Definition: Execute.cpp:3468
std::string generatePTX(const std::string &) const
std::mutex str_dict_mutex_
Definition: Execute.h:1084
bool is_integer() const
Definition: sqltypes.h:511
const Catalog_Namespace::Catalog * catalog_
Definition: Execute.h:1102
friend class PendingExecutionClosure
Definition: Execute.h:1195
void setQuerySessionAsInterrupted(const QuerySessionId &query_session, mapd_unique_lock< mapd_shared_mutex > &write_lock)
Definition: Execute.cpp:4196
static const int32_t ERR_OUT_OF_RENDER_MEM
Definition: Execute.h:1159
std::unordered_map< TableId, const ColumnDescriptor * > DeletedColumnsMap
Definition: PlanState.h:44
ResultSetPtr reduceMultiDeviceResults(const RelAlgExecutionUnit &, std::vector< std::pair< ResultSetPtr, std::vector< size_t >>> &all_fragment_results, std::shared_ptr< RowSetMemoryOwner >, const QueryMemoryDescriptor &) const
Definition: Execute.cpp:926
std::shared_timed_mutex mapd_shared_mutex
const std::string debug_file_
Definition: Execute.h:1099
ResultSetPtr collectAllDeviceShardedTopResults(SharedKernelContext &shared_context, const RelAlgExecutionUnit &ra_exe_unit) const
Definition: Execute.cpp:2035
CachedCardinality getCachedCardinality(const std::string &cache_key)
Definition: Execute.cpp:4251
decltype(FragmentInfoType::physicalTableId) const getPhysicalTableId() const
Definition: Execute.h:351
std::unordered_map< int, const RelAlgNode * > TableIdToNodeMap
std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner_
Definition: Execute.h:1073
Used by Fragmenter classes to store info about each fragment - the fragment id and number of tuples(r...
Definition: Fragmenter.h:77
void setColRangeCache(const AggregatedColRange &aggregated_col_range)
Definition: Execute.h:966
bool containsLeftDeepOuterJoin() const
Definition: Execute.h:436
void setCatalog(const Catalog_Namespace::Catalog *catalog)
Definition: Execute.cpp:299
StringDictionaryProxy * getStringDictionaryProxy(const int dict_id, const bool with_generation) const
Definition: Execute.h:421
std::shared_ptr< CompilationContext > optimizeAndCodegenCPU(llvm::Function *, llvm::Function *, const std::unordered_set< llvm::Function * > &, const CompilationOptions &)
static const int32_t ERR_OVERFLOW_OR_UNDERFLOW
Definition: Execute.h:1161
bool is_timeinterval() const
Definition: sqltypes.h:520
static std::unordered_map< std::string, size_t > cardinality_cache_
Definition: Execute.h:1152
const QueryPlanHash INVALID_QUERY_PLAN_HASH
Definition: Execute.h:1150
static InterruptFlagMap queries_interrupt_flag_
Definition: Execute.h:1123
FragmentInfoType const & getFragmentInfo() const
std::unique_ptr< PlanState > plan_state_
Definition: Execute.h:1072
void insertErrorCodeChecker(llvm::Function *query_func, bool hoist_literals, bool allow_runtime_query_interrupt)
const ColumnDescriptor * getMetadataForColumn(int tableId, const std::string &colName) const
static const int32_t ERR_OUT_OF_TIME
Definition: Execute.h:1162
void initializeNVPTXBackend() const
mapd_unique_lock< mapd_shared_mutex > unique_lock
Definition: Execute.h:1134
std::map< int32_t, std::set< int32_t >> TableToFragmentIds
Definition: Execute.h:326
const std::string submitted_time_
Definition: Execute.h:139
const TableGeneration & getTableGeneration(const int table_id) const
Definition: Execute.cpp:315
llvm::Value * castToFP(llvm::Value *, SQLTypeInfo const &from_ti, SQLTypeInfo const &to_ti)
Definition: Execute.cpp:3473
size_t fragment_index_
Definition: Execute.h:366
std::pair< bool, size_t > CachedCardinality
Definition: Execute.h:1031
const ColumnDescriptor * getColumnDescriptor(const Analyzer::ColumnVar *) const
Definition: Execute.cpp:278
static const int32_t ERR_UNSUPPORTED_SELF_JOIN
Definition: Execute.h:1158
const unsigned block_size_x_
Definition: Execute.h:1095
const unsigned grid_size_x_
Definition: Execute.h:1096
specifies the content in-memory of a row in the column metadata table
bool is_boolean() const
Definition: sqltypes.h:516
std::vector< TargetValue > getTranslatedEntryAt(const size_t index) const override
static const int32_t ERR_SINGLE_VALUE_FOUND_MULTIPLE_VALUES
Definition: Execute.h:1168
static std::map< int, std::shared_ptr< Executor > > executors_
Definition: Execute.h:1126
static const int32_t ERR_OUT_OF_GPU_MEM
Definition: Execute.h:1156
const TemporaryTables * getTemporaryTables()
Definition: Execute.h:416
std::string get_null_check_suffix(const SQLTypeInfo &lhs_ti, const SQLTypeInfo &rhs_ti)
Definition: Execute.h:1205
static void addCodeToCache(const CodeCacheKey &, std::shared_ptr< CompilationContext >, llvm::Module *, CodeCache &)
const Catalog_Namespace::Catalog * getCatalog() const
Definition: Execute.cpp:295
#define RUNTIME_EXPORT
std::unordered_map< int, std::unordered_map< int, std::shared_ptr< const ColumnarResults >>> ColumnCacheMap
bool updateQuerySessionExecutorAssignment(const QuerySessionId &query_session, const std::string &submitted_time_str, const size_t executor_id, mapd_unique_lock< mapd_shared_mutex > &write_lock)
Definition: Execute.cpp:4137
Executor(const ExecutorId id, Data_Namespace::DataMgr *data_mgr, const size_t block_size_x, const size_t grid_size_x, const size_t max_gpu_slab_size, const std::string &debug_dir, const std::string &debug_file)
Definition: Execute.cpp:155
ColumnToFragmentsMap columns_for_metadata_update
Definition: Execute.h:329
const std::vector< size_t > getExecutorIdsRunningQuery(const QuerySessionId &interrupt_session) const
Definition: Execute.cpp:4279
#define CHECK_LT(x, y)
Definition: Logger.h:219
ResultSetPtr resultsUnion(SharedKernelContext &shared_context, const RelAlgExecutionUnit &ra_exe_unit)
Definition: Execute.cpp:898
std::shared_ptr< ResultSet > rs_
Definition: Execute.h:367
JoinHashTableOrError buildHashTableForQualifier(const std::shared_ptr< Analyzer::BinOper > &qual_bin_oper, const std::vector< InputTableInfo > &query_infos, const MemoryLevel memory_level, const JoinType join_type, const HashType preferred_hash_type, ColumnCacheMap &column_cache, const HashTableBuildDagMap &hashtable_build_dag_map, const RegisteredQueryHint &query_hint, const TableIdToNodeMap &table_id_to_node_map)
Definition: Execute.cpp:3398
std::vector< size_t > getFragmentCount(const FragmentsList &selected_fragments, const size_t scan_idx, const RelAlgExecutionUnit &ra_exe_unit)
Definition: Execute.cpp:2837
static void addUdfIrToModule(const std::string &udf_ir_filename, const bool is_cuda_ir)
llvm::BasicBlock * codegenSkipDeletedOuterTableRow(const RelAlgExecutionUnit &ra_exe_unit, const CompilationOptions &co)
std::unique_ptr< QueryCompilationDescriptor > QueryCompilationDescriptorOwned
Definition: Execute.h:77
size_t ExecutorId
Definition: Execute.h:383
HOST DEVICE EncodingType get_compression() const
Definition: sqltypes.h:337
std::shared_ptr< HashJoin > buildCurrentLevelHashTable(const JoinCondition &current_level_join_conditions, size_t level_idx, RelAlgExecutionUnit &ra_exe_unit, const CompilationOptions &co, const std::vector< InputTableInfo > &query_infos, ColumnCacheMap &column_cache, std::vector< std::string > &fail_reasons)
Definition: IRCodegen.cpp:949
std::vector< int8_t > serializeLiterals(const std::unordered_map< int, CgenState::LiteralValues > &literals, const int device_id)
Definition: Execute.cpp:411
mapd_shared_mutex & getDataRecyclerLock()
Definition: Execute.cpp:3927
InputTableInfoCache input_table_info_cache_
Definition: Execute.h:1116
const Expr * get_operand() const
Definition: Analyzer.h:370
void codegenWindowFunctionStateInit(llvm::Value *aggregate_state)
CodeCache cpu_code_cache_
Definition: Execute.h:1088
std::pair< bool, int64_t > skipFragment(const InputDescriptor &table_desc, const Fragmenter_Namespace::FragmentInfo &frag_info, const std::list< std::shared_ptr< Analyzer::Expr >> &simple_quals, const std::vector< uint64_t > &frag_offsets, const size_t frag_idx)
Definition: Execute.cpp:3658
unsigned gridSize() const
Definition: Execute.cpp:3440
llvm::Value * spillDoubleElement(llvm::Value *elem_val, llvm::Type *elem_ty)
std::pair< std::vector< std::vector< int64_t > >, std::vector< std::vector< uint64_t > > > getRowCountAndOffsetForAllFrags(const RelAlgExecutionUnit &ra_exe_unit, const CartesianProduct< std::vector< std::vector< size_t >>> &frag_ids_crossjoin, const std::vector< InputDescriptor > &input_descs, const std::map< int, const TableFragments * > &all_tables_fragments)
Definition: Execute.cpp:2429
friend class KernelSubtask
Definition: Execute.h:1180
size_t QueryPlanHash
StringDictionaryGenerations computeStringDictionaryGenerations(const std::unordered_set< PhysicalInput > &phys_inputs)
Definition: Execute.cpp:3883
static const int32_t ERR_WIDTH_BUCKET_INVALID_ARGUMENT
Definition: Execute.h:1170
Data_Namespace::DataMgr * getDataMgr() const
Definition: Execute.h:448
bool needLinearizeAllFragments(const ColumnDescriptor *cd, const InputColDescriptor &inner_col_desc, const RelAlgExecutionUnit &ra_exe_unit, const FragmentsList &selected_fragments, const Data_Namespace::MemoryLevel memory_level) const
Definition: Execute.cpp:2497
void setExecutorId(const size_t executor_id)
Definition: Execute.h:133
FragmentInfoType const & fragment_info_
Definition: Execute.h:365
void nukeOldState(const bool allow_lazy_fetch, const std::vector< InputTableInfo > &query_infos, const PlanState::DeletedColumnsMap &deleted_cols_map, const RelAlgExecutionUnit *ra_exe_unit)
Definition: Execute.cpp:3358
const std::string getQueryStr()
Definition: Execute.h:126
QuerySessionStatus::QueryStatus getQuerySessionStatus(const QuerySessionId &candidate_query_session, mapd_shared_lock< mapd_shared_mutex > &read_lock)
Definition: Execute.cpp:3960
std::pair< bool, int64_t > skipFragmentInnerJoins(const InputDescriptor &table_desc, const RelAlgExecutionUnit &ra_exe_unit, const Fragmenter_Namespace::FragmentInfo &fragment, const std::vector< uint64_t > &frag_offsets, const size_t frag_idx)
Definition: Execute.cpp:3824
void buildSelectedFragsMapping(std::vector< std::vector< size_t >> &selected_fragments_crossjoin, std::vector< size_t > &local_col_to_frag_pos, const std::list< std::shared_ptr< const InputColDescriptor >> &col_global_ids, const FragmentsList &selected_fragments, const RelAlgExecutionUnit &ra_exe_unit)
Definition: Execute.cpp:2851
llvm::Value * codegenAggregateWindowState()
TableGenerations table_generations_
Definition: Execute.h:1118
void unregisterActiveModule(void *module, const int device_id) const
std::function< void(ResultSetPtr, const Fragmenter_Namespace::FragmentInfo &)> PerFragmentCallBack
Definition: Execute.h:573
void resetInterrupt()
mapd_shared_lock< mapd_shared_mutex > read_lock
const size_t getExecutorId()
Definition: Execute.h:127
size_t executor_id_
Definition: Execute.h:137
bool removeFromQuerySessionList(const QuerySessionId &query_session, const std::string &submitted_time_str, mapd_unique_lock< mapd_shared_mutex > &write_lock)
Definition: Execute.cpp:4162
std::string QuerySessionId
Definition: Execute.h:80
llvm::Value * addJoinLoopIterator(const std::vector< llvm::Value * > &prev_iters, const size_t level_idx)
Definition: IRCodegen.cpp:1108
ResultSetPtr reduceMultiDeviceResultSets(std::vector< std::pair< ResultSetPtr, std::vector< size_t >>> &all_fragment_results, std::shared_ptr< RowSetMemoryOwner >, const QueryMemoryDescriptor &) const
Definition: Execute.cpp:973
llvm::Value * codegenWindowFunctionAggregate(const CompilationOptions &co)
void addToCardinalityCache(const std::string &cache_key, const size_t cache_value)
Definition: Execute.cpp:4242
constexpr char const * EMPTY_QUERY_PLAN
#define CHECK(condition)
Definition: Logger.h:209
QueryPlanDagCache & getQueryPlanDagCache()
Definition: Execute.cpp:3931
static const int32_t ERR_OUT_OF_SLOTS
Definition: Execute.h:1157
static std::mutex compilation_mutex_
Definition: Execute.h:1172
void interrupt(const QuerySessionId &query_session="", const QuerySessionId &interrupt_session="")
std::vector< llvm::Value * > inlineHoistedLiterals()
FetchResult fetchChunks(const ColumnFetcher &, const RelAlgExecutionUnit &ra_exe_unit, const int device_id, const Data_Namespace::MemoryLevel, const std::map< int, const TableFragments * > &, const FragmentsList &selected_fragments, const Catalog_Namespace::Catalog &, std::list< ChunkIter > &, std::list< std::shared_ptr< Chunk_NS::Chunk >> &, DeviceAllocator *device_allocator, const size_t thread_idx, const bool allow_runtime_interrupt)
Definition: Execute.cpp:2520
mapd_shared_lock< mapd_shared_mutex > shared_lock
Definition: Execute.h:1133
ResultSetPtr executeWorkUnitImpl(size_t &max_groups_buffer_entry_guess, const bool is_agg, const bool allow_single_frag_table_opt, const std::vector< InputTableInfo > &, const RelAlgExecutionUnit &, const CompilationOptions &, const ExecutionOptions &options, const Catalog_Namespace::Catalog &, std::shared_ptr< RowSetMemoryOwner >, RenderInfo *render_info, const bool has_cardinality_estimation, ColumnCacheMap &column_cache)
Definition: Execute.cpp:1437
JoinColumnsInfo getJoinColumnsInfo(const Analyzer::Expr *join_expr, JoinColumnSide target_side, bool extract_only_col_id)
Definition: Execute.cpp:3935
std::unordered_map< int, const Analyzer::BinOper * > getInnerTabIdToJoinCond() const
Definition: Execute.cpp:2085
ExecutorDeviceType getDeviceTypeForTargets(const RelAlgExecutionUnit &ra_exe_unit, const ExecutorDeviceType requested_device_type)
Definition: Execute.cpp:1787
JoinLoop::HoistedFiltersCallback buildHoistLeftHandSideFiltersCb(const RelAlgExecutionUnit &ra_exe_unit, const size_t level_idx, const int inner_table_id, const CompilationOptions &co)
Definition: IRCodegen.cpp:782
uint32_t log2_bytes(const uint32_t bytes)
Definition: Execute.h:174
ExpressionRange getColRange(const PhysicalInput &) const
Definition: Execute.cpp:319
std::string numeric_type_name(const SQLTypeInfo &ti)
Definition: Execute.h:207
CurrentQueryStatus attachExecutorToQuerySession(const QuerySessionId &query_session_id, const std::string &query_str, const std::string &query_submitted_time)
Definition: Execute.cpp:3977
mapd_unique_lock< mapd_shared_mutex > write_lock
void redeclareFilterFunction()
Definition: IRCodegen.cpp:1009
SQLTypeInfo columnType
QuerySessionStatus(const QuerySessionId &query_session, const size_t executor_id, const std::string &query_str, const std::string &submitted_time, const QuerySessionStatus::QueryStatus &query_status)
Definition: Execute.h:114
UpdateLogForFragment(FragmentInfoType const &fragment_info, size_t const, const std::shared_ptr< ResultSet > &rs)
bool is_unnest(const Analyzer::Expr *expr)
Definition: Execute.h:1221
bool is_string() const
Definition: sqltypes.h:509
std::vector< JoinLoop > buildJoinLoops(RelAlgExecutionUnit &ra_exe_unit, const CompilationOptions &co, const ExecutionOptions &eo, const std::vector< InputTableInfo > &query_infos, ColumnCacheMap &column_cache)
Definition: IRCodegen.cpp:545
std::function< llvm::Value *(const std::vector< llvm::Value * > &, llvm::Value *)> buildIsDeletedCb(const RelAlgExecutionUnit &ra_exe_unit, const size_t level_idx, const CompilationOptions &co)
Definition: IRCodegen.cpp:890
auto getResultSet() const
Definition: Execute.h:362
unsigned blockSize() const
Definition: Execute.cpp:3454
Execution unit for relational algebra. It&#39;s a low-level description of any relational algebra operati...
HOST DEVICE bool get_notnull() const
Definition: sqltypes.h:336
std::unordered_map< int, std::vector< llvm::Value * > > saved_fetch_cache
Definition: Execute.h:1067
ExecutorId getExecutorId() const
Definition: Execute.h:969
static size_t align(const size_t off_in, const size_t alignment)
Definition: Execute.h:1049
std::unique_ptr< QueryMemoryDescriptor > QueryMemoryDescriptorOwned
Definition: Execute.h:79
size_t const getRowCount() const override
const QuerySessionId getQuerySession()
Definition: Execute.h:125
void clearQuerySessionStatus(const QuerySessionId &query_session, const std::string &submitted_time_str)
Definition: Execute.cpp:4020
static const int32_t ERR_OUT_OF_CPU_MEM
Definition: Execute.h:1160
QuerySessionStatus::QueryStatus query_status_
Definition: Execute.h:146
bool is_decimal() const
Definition: sqltypes.h:512
int deviceCountForMemoryLevel(const Data_Namespace::MemoryLevel memory_level) const
Definition: Execute.cpp:681
Descriptor for the fragments required for an execution kernel.
llvm::Value * codegenWindowFunctionAggregateCalls(llvm::Value *aggregate_state, const CompilationOptions &co)
Fragmenter_Namespace::FragmentInfo FragmentInfoType
Definition: Execute.h:335
bool is_rt_udf_module_present(bool cpu_only=false)
static size_t getArenaBlockSize()
Definition: Execute.cpp:226
ResultSetPtr executeWorkUnit(size_t &max_groups_buffer_entry_guess, const bool is_agg, const std::vector< InputTableInfo > &, const RelAlgExecutionUnit &, const CompilationOptions &, const ExecutionOptions &options, const Catalog_Namespace::Catalog &, RenderInfo *render_info, const bool has_cardinality_estimation, ColumnCacheMap &column_cache)
Definition: Execute.cpp:1370
std::mutex gpu_exec_mutex_[max_gpu_count]
Definition: Execute.h:1076
HashType
Definition: HashTable.h:19
DEVICE void swap(ARGS &&...args)
Definition: gpu_enabled.h:114
static mapd_shared_mutex recycler_mutex_
Definition: Execute.h:1151
llvm::Value * codegenWindowFunction(const size_t target_index, const CompilationOptions &co)
SQLOps get_optype() const
Definition: Analyzer.h:369
static QueryPlanDagCache query_plan_dag_cache_
Definition: Execute.h:1149
WindowFunctionContext * active_window_function_
Definition: Execute.h:1114
static std::mutex gpu_active_modules_mutex_
Definition: Execute.h:1078
void setupCaching(const std::unordered_set< PhysicalInput > &phys_inputs, const std::unordered_set< int > &phys_table_ids)
Definition: Execute.cpp:3916
static void nukeCacheOfExecutors()
Definition: Execute.h:400
void clearMetaInfoCache()
Definition: Execute.cpp:405
const ColumnDescriptor * get_column_descriptor(const int col_id, const int table_id, const Catalog_Namespace::Catalog &cat)
Definition: Execute.h:189
void createErrorCheckControlFlow(llvm::Function *query_func, bool run_with_dynamic_watchdog, bool run_with_allowing_runtime_interrupt, ExecutorDeviceType device_type, const std::vector< InputTableInfo > &input_table_infos)
size_t const getEntryCount() const override
llvm::BasicBlock * codegenWindowResetStateControlFlow()
const TemporaryTables * temporary_tables_
Definition: Execute.h:1104
CompilationRetryNewScanLimit(const size_t new_scan_limit)
Definition: Execute.h:266
WatchdogException(const std::string &cause)
Definition: Execute.h:159
static const ExecutorId UNITARY_EXECUTOR_ID
Definition: Execute.h:384
std::unordered_map< JoinColumnsInfo, HashTableBuildDag > HashTableBuildDagMap
bool isArchMaxwell(const ExecutorDeviceType dt) const
bool skipFragmentPair(const Fragmenter_Namespace::FragmentInfo &outer_fragment_info, const Fragmenter_Namespace::FragmentInfo &inner_fragment_info, const int inner_table_id, const std::unordered_map< int, const Analyzer::BinOper * > &inner_table_id_to_join_condition, const RelAlgExecutionUnit &ra_exe_unit, const ExecutorDeviceType device_type)
Definition: Execute.cpp:2339
ResultSetPtr executeExplain(const QueryCompilationDescriptor &)
Definition: Execute.cpp:1735