OmniSciDB  c1a53651b2
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
Execute.h
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef QUERYENGINE_EXECUTE_H
18 #define QUERYENGINE_EXECUTE_H
19 
20 #include <algorithm>
21 #include <atomic>
22 #include <condition_variable>
23 #include <cstddef>
24 #include <cstdlib>
25 #include <deque>
26 #include <functional>
27 #include <limits>
28 #include <map>
29 #include <mutex>
30 #include <queue>
31 #include <stack>
32 #include <unordered_map>
33 #include <unordered_set>
34 
35 #include <llvm/IR/Function.h>
36 #include <llvm/IR/Value.h>
37 #include <llvm/Linker/Linker.h>
38 #include <llvm/Transforms/Utils/ValueMapper.h>
39 #include <rapidjson/document.h>
40 
44 #include "QueryEngine/CgenState.h"
45 #include "QueryEngine/CodeCache.h"
58 #include "QueryEngine/PlanState.h"
67 
68 #include "DataMgr/Chunk/Chunk.h"
69 #include "Logger/Logger.h"
70 #include "Shared/DbObjectKeys.h"
72 #include "Shared/funcannotations.h"
74 #include "Shared/measure.h"
75 #include "Shared/thread_count.h"
76 #include "Shared/toString.h"
81 
82 using QueryCompilationDescriptorOwned = std::unique_ptr<QueryCompilationDescriptor>;
84 using QueryMemoryDescriptorOwned = std::unique_ptr<QueryMemoryDescriptor>;
85 using QuerySessionId = std::string;
86 using CurrentQueryStatus = std::pair<QuerySessionId, std::string>;
87 using InterruptFlagMap = std::map<QuerySessionId, bool>;
89  // A class that is used to describe the query session's info
90  public:
91  /* todo(yoonmin): support more query status
92  * i.e., RUNNING_SORT, RUNNING_CARD_EST, CLEANUP, ... */
93  enum QueryStatus {
94  UNDEFINED = 0,
100  };
101 
102  QuerySessionStatus(const QuerySessionId& query_session,
103  const std::string& query_str,
104  const std::string& submitted_time)
105  : query_session_(query_session)
106  , executor_id_(0)
107  , query_str_(query_str)
108  , submitted_time_(submitted_time)
110  QuerySessionStatus(const QuerySessionId& query_session,
111  const size_t executor_id,
112  const std::string& query_str,
113  const std::string& submitted_time)
114  : query_session_(query_session)
115  , executor_id_(executor_id)
116  , query_str_(query_str)
117  , submitted_time_(submitted_time)
119  QuerySessionStatus(const QuerySessionId& query_session,
120  const size_t executor_id,
121  const std::string& query_str,
122  const std::string& submitted_time,
123  const QuerySessionStatus::QueryStatus& query_status)
124  : query_session_(query_session)
125  , executor_id_(executor_id)
126  , query_str_(query_str)
127  , submitted_time_(submitted_time)
128  , query_status_(query_status) {}
129 
131  const std::string getQueryStr() { return query_str_; }
132  const size_t getExecutorId() { return executor_id_; }
133  const std::string& getQuerySubmittedTime() { return submitted_time_; }
136  query_status_ = status;
137  }
138  void setExecutorId(const size_t executor_id) { executor_id_ = executor_id; }
139 
140  private:
142  size_t executor_id_;
143  const std::string query_str_;
144  const std::string submitted_time_;
145  // Currently we use three query status:
146  // 1) PENDING_IN_QUEUE: a task is submitted to the dispatch_queue but hangs due to no
147  // existing worker (= executor) 2) PENDING_IN_EXECUTOR: a task is assigned to the
148  // specific executor but waits to get the resource to run 3) RUNNING: a task is assigned
149  // to the specific executor and its execution has been successfully started
150  // 4) RUNNING_REDUCTION: a task is in the reduction phase
152 };
153 using QuerySessionMap =
154  std::map<const QuerySessionId, std::map<std::string, QuerySessionStatus>>;
155 
156 class ColumnFetcher;
157 
158 class WatchdogException : public std::runtime_error {
159  public:
160  WatchdogException(const std::string& cause) : std::runtime_error(cause) {}
161 };
162 
164 
165 class Executor;
166 
167 inline llvm::Value* get_arg_by_name(llvm::Function* func, const std::string& name) {
168  for (auto& arg : func->args()) {
169  if (arg.getName() == name) {
170  return &arg;
171  }
172  }
173  CHECK(false);
174  return nullptr;
175 }
176 
177 inline uint32_t log2_bytes(const uint32_t bytes) {
178  switch (bytes) {
179  case 1:
180  return 0;
181  case 2:
182  return 1;
183  case 4:
184  return 2;
185  case 8:
186  return 3;
187  default:
188  abort();
189  }
190 }
191 
193  const shared::ColumnKey& column_key) {
194  CHECK_GT(column_key.db_id, 0);
195  CHECK_GT(column_key.table_id, 0);
196  const auto col_desc = Catalog_Namespace::get_metadata_for_column(column_key);
197  CHECK(col_desc);
198  return col_desc;
199 }
200 
201 inline const Analyzer::Expr* extract_cast_arg(const Analyzer::Expr* expr) {
202  const auto cast_expr = dynamic_cast<const Analyzer::UOper*>(expr);
203  if (!cast_expr || cast_expr->get_optype() != kCAST) {
204  return expr;
205  }
206  return cast_expr->get_operand();
207 }
208 
209 inline std::string numeric_type_name(const SQLTypeInfo& ti) {
210  CHECK(ti.is_integer() || ti.is_decimal() || ti.is_boolean() || ti.is_time() ||
211  ti.is_fp() || (ti.is_string() && ti.get_compression() == kENCODING_DICT) ||
212  ti.is_timeinterval());
213  if (ti.is_integer() || ti.is_decimal() || ti.is_boolean() || ti.is_time() ||
214  ti.is_string() || ti.is_timeinterval()) {
215  return "int" + std::to_string(ti.get_logical_size() * 8) + "_t";
216  }
217  return ti.get_type() == kDOUBLE ? "double" : "float";
218 }
219 
221  const shared::ColumnKey& column_key) {
222  return column_key.table_id > 0 ? get_column_descriptor(column_key) : nullptr;
223 }
224 
225 inline const ResultSetPtr& get_temporary_table(const TemporaryTables* temporary_tables,
226  const int table_id) {
227  CHECK_LT(table_id, 0);
228  const auto it = temporary_tables->find(table_id);
229  CHECK(it != temporary_tables->end());
230  return it->second;
231 }
232 
233 inline const SQLTypeInfo get_column_type(const int col_id,
234  const int table_id,
235  const ColumnDescriptor* cd,
236  const TemporaryTables* temporary_tables) {
237  CHECK(cd || temporary_tables);
238  if (cd) {
239  CHECK_EQ(col_id, cd->columnId);
240  CHECK_EQ(table_id, cd->tableId);
241  return cd->columnType;
242  }
243  const auto& temp = get_temporary_table(temporary_tables, table_id);
244  return temp->getColType(col_id);
245 }
246 
247 class CompilationRetryNoLazyFetch : public std::runtime_error {
248  public:
250  : std::runtime_error("Retry query compilation with no GPU lazy fetch.") {}
251 };
252 
253 class CompilationRetryNewScanLimit : public std::runtime_error {
254  public:
255  CompilationRetryNewScanLimit(const size_t new_scan_limit)
256  : std::runtime_error("Retry query compilation with new scan limit.")
257  , new_scan_limit_(new_scan_limit) {}
258 
260 };
261 
262 class TooManyLiterals : public std::runtime_error {
263  public:
264  TooManyLiterals() : std::runtime_error("Too many literals in the query") {}
265 };
266 
267 class CompilationRetryNoCompaction : public std::runtime_error {
268  public:
270  : std::runtime_error("Retry query compilation with no compaction.") {}
271 };
272 
273 // Throwing QueryMustRunOnCpu allows us retry a query step on CPU if
274 // g_allow_query_step_cpu_retry is true (on by default) by catching
275 // the exception at the query step execution level in RelAlgExecutor,
276 // or if g_allow_query_step_cpu_retry is false but g_allow_cpu_retry is true,
277 // by retrying the entire query on CPU (if both flags are false, we return an
278 // error). This flag is thrown for the following broad categories of conditions:
279 // 1) we have not implemented an operator on GPU and so cannot codegen for GPU
280 // 2) we catch an unexpected GPU compilation/linking error (perhaps due
281 // to an outdated driver/CUDA installation not allowing a modern operator)
282 // 3) when we detect up front that we will not have enough GPU memory to execute
283 // a query.
284 // There is a fourth scenerio where our pre-flight GPU memory check passed but for
285 // whatever reason we still run out of memory. In those cases we go down the
286 // handleOutOfMemoryRetry path, which will first try per-fragment execution on GPU,
287 // and if that fails, CPU execution.
288 // Note that for distributed execution failures on leaves, we do not retry queries
289 // TODO(todd): See if CPU retry of individual steps can be turned on safely for
290 // distributed
291 
292 class QueryMustRunOnCpu : public std::runtime_error {
293  public:
294  QueryMustRunOnCpu() : std::runtime_error("Query must run in cpu mode.") {}
295 
296  QueryMustRunOnCpu(const std::string& err) : std::runtime_error(err) {}
297 };
298 
299 class ParseIRError : public std::runtime_error {
300  public:
301  ParseIRError(const std::string message) : std::runtime_error(message) {}
302 };
303 
304 class StringConstInResultSet : public std::runtime_error {
305  public:
307  : std::runtime_error(
308  "NONE ENCODED String types are not supported as input result set.") {}
309 };
310 
311 class ExtensionFunction;
312 
314 using ColumnToFragmentsMap = std::map<const ColumnDescriptor*, std::set<int32_t>>;
315 using TableToFragmentIds = std::map<int32_t, std::set<int32_t>>;
316 
320 };
321 
323  public:
325 
326  UpdateLogForFragment(FragmentInfoType const& fragment_info,
327  size_t const,
328  const std::shared_ptr<ResultSet>& rs);
329 
330  std::vector<TargetValue> getEntryAt(const size_t index) const override;
331  std::vector<TargetValue> getTranslatedEntryAt(const size_t index) const override;
332 
333  size_t const getRowCount() const override;
335  return rs_->getRowSetMemOwner()->getLiteralStringDictProxy();
336  }
337  size_t const getEntryCount() const override;
338  size_t const getFragmentIndex() const;
339  FragmentInfoType const& getFragmentInfo() const;
342  }
343  decltype(FragmentInfoType::fragmentId) const getFragmentId() const {
344  return fragment_info_.fragmentId;
345  }
346 
347  SQLTypeInfo getColumnType(const size_t col_idx) const;
348 
349  using Callback = std::function<void(const UpdateLogForFragment&, TableUpdateMetadata&)>;
350 
351  auto getResultSet() const { return rs_; }
352 
353  private:
356  std::shared_ptr<ResultSet> rs_;
357 };
358 
359 using LLVMValueVector = std::vector<llvm::Value*>;
360 
362 
363 std::ostream& operator<<(std::ostream&, FetchResult const&);
364 
365 class Executor {
366  static_assert(sizeof(float) == 4 && sizeof(double) == 8,
367  "Host hardware not supported, unexpected size of float / double.");
368  static_assert(sizeof(time_t) == 8,
369  "Host hardware not supported, 64-bit time support is required.");
370 
371  public:
372  using ExecutorId = size_t;
373  static const ExecutorId UNITARY_EXECUTOR_ID = 0;
374  static const ExecutorId INVALID_EXECUTOR_ID = SIZE_MAX;
375 
376  Executor(const ExecutorId id,
377  Data_Namespace::DataMgr* data_mgr,
378  const size_t block_size_x,
379  const size_t grid_size_x,
380  const size_t max_gpu_slab_size,
381  const std::string& debug_dir,
382  const std::string& debug_file);
383 
384  void clearCaches(bool runtime_only = false);
385 
386  std::string dumpCache() const;
387 
388  static void clearExternalCaches(bool for_update,
389  const TableDescriptor* td,
390  const int current_db_id) {
391  bool clearEntireCache = true;
392  if (td) {
393  const auto& table_chunk_key_prefix = td->getTableChunkKey(current_db_id);
394  if (!table_chunk_key_prefix.empty()) {
395  auto table_key = boost::hash_value(table_chunk_key_prefix);
397  if (for_update) {
399  } else {
401  }
402  clearEntireCache = false;
403  }
404  }
405  if (clearEntireCache) {
407  if (for_update) {
409  } else {
411  }
412  }
413  }
414 
415  void reset(bool discard_runtime_modules_only = false);
416 
417  template <typename F>
418  static void registerExtensionFunctions(F register_extension_functions) {
419  // Don't want native code to vanish while executing:
421  // Blocks Executor::getExecutor:
423  // Lock registration to avoid
424  // java.util.ConcurrentModificationException from calcite server
425  // when client registrations arrive too fast. Also blocks
426  // Executor::get_rt_udf_module for retrieving runtime UDF/UDTF
427  // module until this registration has rebuild it via
428  // Executor::update_after_registration:
429  std::lock_guard<std::mutex> register_lock(
431 
432  // Reset all executors:
433  for (auto& executor_item : Executor::executors_) {
434  executor_item.second->reset(/*discard_runtime_modules_only=*/true);
435  }
436  // Call registration worker, see
437  // DBHandler::register_runtime_extension_functions for details. In
438  // short, updates Executor::extension_module_sources,
439  // table_functions::TableFunctionsFactory, and registers runtime
440  // extension functions with Calcite:
441  register_extension_functions();
442 
443  // Update executors with registered LLVM modules:
444  update_after_registration(/*update_runtime_modules_only=*/true);
445  }
446 
447  static std::shared_ptr<Executor> getExecutor(
448  const ExecutorId id,
449  const std::string& debug_dir = "",
450  const std::string& debug_file = "",
451  const SystemParameters& system_parameters = SystemParameters());
452 
453  static void nukeCacheOfExecutors() {
455  execute_mutex_); // don't want native code to vanish while executing
457  executors_.clear();
458  }
459 
460  static void clearMemory(const Data_Namespace::MemoryLevel memory_level);
461 
462  static size_t getArenaBlockSize();
463 
464  static void addUdfIrToModule(const std::string& udf_ir_filename, const bool is_cuda_ir);
465 
466  enum class ExtModuleKinds {
467  template_module, // RuntimeFunctions.bc
468  udf_cpu_module, // Load-time UDFs for CPU execution
469  udf_gpu_module, // Load-time UDFs for GPU execution
470  rt_udf_cpu_module, // Run-time UDF/UDTFs for CPU execution
471  rt_udf_gpu_module, // Run-time UDF/UDTFs for GPU execution
472  rt_geos_module, // geos functions
473  rt_libdevice_module // math library functions for GPU execution
474  };
475  // Globally available mapping of extension module sources. Not thread-safe.
476  static std::map<ExtModuleKinds, std::string> extension_module_sources;
478 
479  // Convenience functions for retrieving executor-local extension modules, thread-safe:
480  const std::unique_ptr<llvm::Module>& get_rt_module() const {
482  }
483  const std::unique_ptr<llvm::Module>& get_udf_module(bool is_gpu = false) const {
484  return get_extension_module(
486  }
487  const std::unique_ptr<llvm::Module>& get_rt_udf_module(bool is_gpu = false) const {
488  std::lock_guard<std::mutex> lock(
490  return get_extension_module(
492  }
493  const std::unique_ptr<llvm::Module>& get_geos_module() const {
495  }
496  const std::unique_ptr<llvm::Module>& get_libdevice_module() const {
498  }
499 
500  bool has_rt_module() const {
502  }
503  bool has_udf_module(bool is_gpu = false) const {
504  return has_extension_module(
506  }
507  bool has_rt_udf_module(bool is_gpu = false) const {
508  return has_extension_module(
510  }
511  bool has_geos_module() const {
513  }
514  bool has_libdevice_module() const {
516  }
517 
522 
527  const bool with_generation) const {
529  return getStringDictionaryProxy(dict_key, row_set_mem_owner_, with_generation);
530  }
531 
533  const shared::StringDictKey& dict_key,
534  const std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner,
535  const bool with_generation) const;
536 
538  const shared::StringDictKey& source_dict_key,
539  const shared::StringDictKey& dest_dict_key,
540  const RowSetMemoryOwner::StringTranslationType translation_type,
541  const std::vector<StringOps_Namespace::StringOpInfo>& string_op_infos,
542  std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner,
543  const bool with_generation) const;
544 
546  const StringDictionaryProxy* source_proxy,
547  StringDictionaryProxy* dest_proxy,
548  const std::vector<StringOps_Namespace::StringOpInfo>& source_string_op_infos,
549  const std::vector<StringOps_Namespace::StringOpInfo>& dest_source_string_op_infos,
550  std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner) const;
551 
553  const shared::StringDictKey& source_dict_key,
554  const std::vector<StringOps_Namespace::StringOpInfo>& string_op_infos,
555  std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner,
556  const bool with_generation) const;
557 
558  bool isCPUOnly() const;
559 
560  bool isArchMaxwell(const ExecutorDeviceType dt) const;
561 
563  return cgen_state_->contains_left_deep_outer_join_;
564  }
565 
567 
569  int) const;
570 
572  CHECK(data_mgr_);
573  return data_mgr_;
574  }
575 
576  const std::shared_ptr<RowSetMemoryOwner> getRowSetMemoryOwner() const;
577 
578  const TemporaryTables* getTemporaryTables() const;
579 
581 
582  const TableGeneration& getTableGeneration(const shared::TableKey& table_key) const;
583 
585 
587  const std::set<shared::TableKey>& table_keys_to_fetch) const;
588 
589  bool hasLazyFetchColumns(const std::vector<Analyzer::Expr*>& target_exprs) const;
590  std::vector<ColumnLazyFetchInfo> getColLazyFetchInfo(
591  const std::vector<Analyzer::Expr*>& target_exprs) const;
592 
593  static void registerActiveModule(void* module, const int device_id);
594  static void unregisterActiveModule(const int device_id);
595  void interrupt(const QuerySessionId& query_session = "",
596  const QuerySessionId& interrupt_session = "");
597  void resetInterrupt();
598 
599  // only for testing usage
600  void enableRuntimeQueryInterrupt(const double runtime_query_check_freq,
601  const unsigned pending_query_check_freq) const;
602 
603  static const size_t high_scan_limit{128000000};
604 
605  int8_t warpSize() const;
606  unsigned gridSize() const;
607  void setGridSize(unsigned grid_size);
608  void resetGridSize();
609  unsigned numBlocksPerMP() const;
610  unsigned blockSize() const;
611  void setBlockSize(unsigned block_size);
612  void resetBlockSize();
613  size_t maxGpuSlabSize() const;
614 
615  ResultSetPtr executeWorkUnit(size_t& max_groups_buffer_entry_guess,
616  const bool is_agg,
617  const std::vector<InputTableInfo>&,
618  const RelAlgExecutionUnit&,
619  const CompilationOptions&,
620  const ExecutionOptions& options,
621  RenderInfo* render_info,
622  const bool has_cardinality_estimation,
623  ColumnCacheMap& column_cache);
624 
626  const std::vector<InputTableInfo>& table_infos,
627  const TableDescriptor* updated_table_desc,
628  const CompilationOptions& co,
629  const ExecutionOptions& eo,
631  std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner,
633  const bool is_agg);
634 
636  const RelAlgExecutionUnit& ra_exe_unit,
637  const std::shared_ptr<RowSetMemoryOwner>& row_set_mem_owner);
638 
639  int deviceCount(const ExecutorDeviceType) const;
640 
641  private:
642  void clearMetaInfoCache();
643 
644  int deviceCountForMemoryLevel(const Data_Namespace::MemoryLevel memory_level) const;
645 
646  // Generate code for a window function target.
647  llvm::Value* codegenWindowFunction(const size_t target_index,
648  const CompilationOptions& co);
649 
651  llvm::Value* cond_lv,
652  SQLAgg const aggKind,
653  CompilationOptions const& co) const;
654 
655  // Generate code for an aggregate window function target.
656  llvm::Value* codegenWindowFunctionAggregate(const CompilationOptions& co);
657 
658  // The aggregate state requires a state reset when starting a new partition. Generate
659  // the new partition check and return the continuation basic block.
660  llvm::BasicBlock* codegenWindowResetStateControlFlow();
661 
662  // Generate code for initializing the state of a window aggregate.
663  void codegenWindowFunctionStateInit(llvm::Value* aggregate_state);
664 
665  // Generates the required calls for an aggregate window function and returns the final
666  // result.
667  llvm::Value* codegenWindowFunctionAggregateCalls(llvm::Value* aggregate_state,
668  const CompilationOptions& co);
669 
670  // Generate code for computing window navigation function on frame
672 
673  // Generate code for computing current partition index from a given row_pos
674  llvm::Value* codegenCurrentPartitionIndex(
675  const WindowFunctionContext* window_func_context,
676  llvm::Value* current_row_pos_lv);
677 
678  // Generate code to analyze user-given window frame bound expr
679  llvm::Value* codegenFrameBoundExpr(const Analyzer::WindowFunction* window_func,
680  const Analyzer::WindowFrame* frame_bound,
681  CodeGenerator& code_generator,
682  const CompilationOptions& co);
683 
684  // Generate code for a given frame bound
685  llvm::Value* codegenFrameBound(bool for_start_bound,
686  bool for_range_mode,
687  bool for_window_frame_naviation,
688  const Analyzer::WindowFrame* frame_bound,
689  bool is_timestamp_type_frame,
690  llvm::Value* order_key_null_val,
692 
693  std::pair<std::string, llvm::Value*> codegenLoadOrderKeyBufPtr(
694  WindowFunctionContext* window_func_context) const;
695 
696  // Generate code to load null range of the window partition
697  std::pair<llvm::Value*, llvm::Value*> codegenFrameNullRange(
698  WindowFunctionContext* window_func_context,
699  llvm::Value* partition_index_lv) const;
700 
701  // Generate codes for loading various buffers of window partitions
703  WindowFunctionContext* window_func_context,
704  llvm::Value* partition_index_lv) const;
705 
706  // Generate code for computing a window frame bound
707  std::pair<llvm::Value*, llvm::Value*> codegenWindowFrameBounds(
708  WindowFunctionContext* window_func_context,
709  const Analyzer::WindowFrame* frame_start_bound,
710  const Analyzer::WindowFrame* frame_end_bound,
711  llvm::Value* order_key_col_null_val_lv,
713  CodeGenerator& code_generator);
714 
715  // Generate codes for computing a pair of window frame bounds
716  std::pair<llvm::Value*, llvm::Value*> codegenFrameBoundRange(
717  const Analyzer::WindowFunction* window_func,
718  CodeGenerator& code_generator,
719  const CompilationOptions& co);
720 
721  // frequently used utility functions to generate code for window framing
722  std::vector<llvm::Value*> prepareRowModeFuncArgs(
723  bool for_start_bound,
724  SqlWindowFrameBoundType bound_type,
725  const WindowFrameBoundFuncArgs& args) const;
726  std::vector<llvm::Value*> prepareRangeModeFuncArgs(
727  bool for_start_bound,
728  const Analyzer::WindowFrame* frame_bound,
729  bool is_timestamp_type_frame,
730  llvm::Value* order_key_null_val,
731  const WindowFrameBoundFuncArgs& frame_args) const;
732  const std::string getOrderKeyTypeName(WindowFunctionContext* window_func_context) const;
734  WindowFunctionContext* window_func_context,
735  CodeGenerator& code_generator,
737  size_t getOrderKeySize(WindowFunctionContext* window_func_context) const;
739  WindowFunctionContext* window_func_context) const;
740  std::string getFramingFuncName(const std::string& bound_type,
741  const std::string& order_col_type,
742  const std::string& op_type,
743  bool for_timestamp_type) const;
744 
745  // The AVG window function requires some post-processing: the sum is divided by count
746  // and the result is stored back for the current row.
747  void codegenWindowAvgEpilogue(llvm::Value* crt_val, llvm::Value* window_func_null_val);
748 
749  // Generates code which loads the current aggregate value for the window context.
750  llvm::Value* codegenAggregateWindowState();
751 
752  llvm::Value* aggregateWindowStatePtr();
753 
755  CHECK(data_mgr_);
756  auto cuda_mgr = data_mgr_->getCudaMgr();
757  CHECK(cuda_mgr);
758  return cuda_mgr;
759  }
760 
762  if (dt == ExecutorDeviceType::GPU) {
763  return cudaMgr()->isArchPascalOrLater();
764  }
765  return false;
766  }
767 
768  bool needFetchAllFragments(const InputColDescriptor& col_desc,
769  const RelAlgExecutionUnit& ra_exe_unit,
770  const FragmentsList& selected_fragments) const;
771 
773  const InputColDescriptor& inner_col_desc,
774  const RelAlgExecutionUnit& ra_exe_unit,
775  const FragmentsList& selected_fragments,
776  const Data_Namespace::MemoryLevel memory_level) const;
777 
778  using PerFragmentCallBack =
779  std::function<void(ResultSetPtr, const Fragmenter_Namespace::FragmentInfo&)>;
780 
786  void executeWorkUnitPerFragment(const RelAlgExecutionUnit& ra_exe_unit,
787  const InputTableInfo& table_info,
788  const CompilationOptions& co,
789  const ExecutionOptions& eo,
792  const std::set<size_t>& fragment_indexes_param);
793 
795 
802  const std::vector<InputTableInfo>& table_infos,
803  const CompilationOptions& co,
804  const ExecutionOptions& eo);
805 
807  const RelAlgExecutionUnit& ra_exe_unit,
808  const ExecutorDeviceType requested_device_type);
809 
811  SharedKernelContext& shared_context,
812  const RelAlgExecutionUnit& ra_exe_unit,
814  const ExecutorDeviceType device_type,
815  std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner);
816 
818  SharedKernelContext& shared_context,
819  const RelAlgExecutionUnit& ra_exe_unit) const;
820 
821  std::unordered_map<shared::TableKey, const Analyzer::BinOper*> getInnerTabIdToJoinCond()
822  const;
823 
828  std::vector<std::unique_ptr<ExecutionKernel>> createKernels(
829  SharedKernelContext& shared_context,
830  const RelAlgExecutionUnit& ra_exe_unit,
831  ColumnFetcher& column_fetcher,
832  const std::vector<InputTableInfo>& table_infos,
833  const ExecutionOptions& eo,
834  const bool is_agg,
835  const bool allow_single_frag_table_opt,
836  const size_t context_count,
837  const QueryCompilationDescriptor& query_comp_desc,
839  RenderInfo* render_info,
840  std::unordered_set<int>& available_gpus,
841  int& available_cpus);
842 
847  void launchKernels(SharedKernelContext& shared_context,
848  std::vector<std::unique_ptr<ExecutionKernel>>&& kernels,
849  const ExecutorDeviceType device_type);
850 
851  std::vector<size_t> getTableFragmentIndices(
852  const RelAlgExecutionUnit& ra_exe_unit,
853  const ExecutorDeviceType device_type,
854  const size_t table_idx,
855  const size_t outer_frag_idx,
856  std::map<shared::TableKey, const TableFragments*>& selected_tables_fragments,
857  const std::unordered_map<shared::TableKey, const Analyzer::BinOper*>&
858  inner_table_id_to_join_condition);
859 
860  bool skipFragmentPair(
861  const Fragmenter_Namespace::FragmentInfo& outer_fragment_info,
862  const Fragmenter_Namespace::FragmentInfo& inner_fragment_info,
863  const int inner_table_id,
864  const std::unordered_map<shared::TableKey, const Analyzer::BinOper*>&
865  inner_table_id_to_join_condition,
866  const RelAlgExecutionUnit& ra_exe_unit,
867  const ExecutorDeviceType device_type);
868 
870  const RelAlgExecutionUnit& ra_exe_unit,
871  const int device_id,
873  const std::map<shared::TableKey, const TableFragments*>&,
874  const FragmentsList& selected_fragments,
875  std::list<ChunkIter>&,
876  std::list<std::shared_ptr<Chunk_NS::Chunk>>&,
877  DeviceAllocator* device_allocator,
878  const size_t thread_idx,
879  const bool allow_runtime_interrupt);
880 
882  const RelAlgExecutionUnit& ra_exe_unit,
883  const int device_id,
885  const std::map<shared::TableKey, const TableFragments*>&,
886  const FragmentsList& selected_fragments,
887  std::list<ChunkIter>&,
888  std::list<std::shared_ptr<Chunk_NS::Chunk>>&,
889  DeviceAllocator* device_allocator,
890  const size_t thread_idx,
891  const bool allow_runtime_interrupt);
892 
893  std::pair<std::vector<std::vector<int64_t>>, std::vector<std::vector<uint64_t>>>
895  const RelAlgExecutionUnit& ra_exe_unit,
896  const CartesianProduct<std::vector<std::vector<size_t>>>& frag_ids_crossjoin,
897  const std::vector<InputDescriptor>& input_descs,
898  const std::map<shared::TableKey, const TableFragments*>& all_tables_fragments);
899 
901  std::vector<std::vector<size_t>>& selected_fragments_crossjoin,
902  std::vector<size_t>& local_col_to_frag_pos,
903  const std::list<std::shared_ptr<const InputColDescriptor>>& col_global_ids,
904  const FragmentsList& selected_fragments,
905  const RelAlgExecutionUnit& ra_exe_unit);
906 
908  std::vector<std::vector<size_t>>& selected_fragments_crossjoin,
909  const FragmentsList& selected_fragments,
910  const RelAlgExecutionUnit& ra_exe_unit);
911 
912  std::vector<size_t> getFragmentCount(const FragmentsList& selected_fragments,
913  const size_t scan_idx,
914  const RelAlgExecutionUnit& ra_exe_unit);
915 
916  // pass nullptr to results if it shouldn't be extracted from the execution context
917  int32_t executePlanWithGroupBy(const RelAlgExecutionUnit& ra_exe_unit,
918  const CompilationResult&,
919  const bool hoist_literals,
920  ResultSetPtr* results,
921  const ExecutorDeviceType device_type,
922  std::vector<std::vector<const int8_t*>>& col_buffers,
923  const std::vector<size_t> outer_tab_frag_ids,
925  const std::vector<std::vector<int64_t>>& num_rows,
926  const std::vector<std::vector<uint64_t>>& frag_offsets,
928  const int device_id,
929  const shared::TableKey& outer_table_key,
930  const int64_t limit,
931  const uint32_t start_rowid,
932  const uint32_t num_tables,
933  const bool allow_runtime_interrupt,
934  RenderInfo* render_info,
935  const bool optimize_cuda_block_and_grid_sizes,
936  const int64_t rows_to_process = -1);
937  // pass nullptr to results if it shouldn't be extracted from the execution context
939  const RelAlgExecutionUnit& ra_exe_unit,
940  const CompilationResult&,
941  const bool hoist_literals,
942  ResultSetPtr* results,
943  const std::vector<Analyzer::Expr*>& target_exprs,
944  const ExecutorDeviceType device_type,
945  std::vector<std::vector<const int8_t*>>& col_buffers,
946  QueryExecutionContext* query_exe_context,
947  const std::vector<std::vector<int64_t>>& num_rows,
948  const std::vector<std::vector<uint64_t>>& frag_offsets,
949  Data_Namespace::DataMgr* data_mgr,
950  const int device_id,
951  const uint32_t start_rowid,
952  const uint32_t num_tables,
953  const bool allow_runtime_interrupt,
954  RenderInfo* render_info,
955  const bool optimize_cuda_block_and_grid_sizes,
956  const int64_t rows_to_process = -1);
957 
958  public: // Temporary, ask saman about this
959  static std::pair<int64_t, int32_t> reduceResults(const SQLAgg agg,
960  const SQLTypeInfo& ti,
961  const int64_t agg_init_val,
962  const int8_t out_byte_width,
963  const int64_t* out_vec,
964  const size_t out_vec_sz,
965  const bool is_group_by,
966  const bool float_argument_input);
967 
968  private:
970  const RelAlgExecutionUnit& ra_exe_unit);
971  std::vector<int8_t*> getJoinHashTablePtrs(const ExecutorDeviceType device_type,
972  const int device_id);
974  const RelAlgExecutionUnit&,
975  std::vector<std::pair<ResultSetPtr, std::vector<size_t>>>& all_fragment_results,
976  std::shared_ptr<RowSetMemoryOwner>,
977  const QueryMemoryDescriptor&) const;
979  std::vector<std::pair<ResultSetPtr, std::vector<size_t>>>& all_fragment_results,
980  std::shared_ptr<RowSetMemoryOwner>,
981  const QueryMemoryDescriptor&) const;
983  const RelAlgExecutionUnit&,
984  std::vector<std::pair<ResultSetPtr, std::vector<size_t>>>& all_fragment_results,
985  std::shared_ptr<RowSetMemoryOwner>,
986  const QueryMemoryDescriptor&) const;
987 
988  ResultSetPtr executeWorkUnitImpl(size_t& max_groups_buffer_entry_guess,
989  const bool is_agg,
990  const bool allow_single_frag_table_opt,
991  const std::vector<InputTableInfo>&,
992  const RelAlgExecutionUnit&,
993  const CompilationOptions&,
994  const ExecutionOptions& options,
995  std::shared_ptr<RowSetMemoryOwner>,
996  RenderInfo* render_info,
997  const bool has_cardinality_estimation,
998  ColumnCacheMap& column_cache);
999 
1000  std::vector<llvm::Value*> inlineHoistedLiterals();
1001 
1003 
1004  std::tuple<CompilationResult, std::unique_ptr<QueryMemoryDescriptor>> compileWorkUnit(
1005  const std::vector<InputTableInfo>& query_infos,
1006  const PlanState::DeletedColumnsMap& deleted_cols_map,
1007  const RelAlgExecutionUnit& ra_exe_unit,
1008  const CompilationOptions& co,
1009  const ExecutionOptions& eo,
1010  const CudaMgr_Namespace::CudaMgr* cuda_mgr,
1011  const bool allow_lazy_fetch,
1012  std::shared_ptr<RowSetMemoryOwner>,
1013  const size_t max_groups_buffer_entry_count,
1014  const int8_t crt_min_byte_width,
1015  const bool has_cardinality_estimation,
1016  ColumnCacheMap& column_cache,
1017  RenderInfo* render_info = nullptr);
1018  // Generate code to skip the deleted rows in the outermost table.
1019  llvm::BasicBlock* codegenSkipDeletedOuterTableRow(
1020  const RelAlgExecutionUnit& ra_exe_unit,
1021  const CompilationOptions& co);
1022  std::vector<JoinLoop> buildJoinLoops(RelAlgExecutionUnit& ra_exe_unit,
1023  const CompilationOptions& co,
1024  const ExecutionOptions& eo,
1025  const std::vector<InputTableInfo>& query_infos,
1026  ColumnCacheMap& column_cache);
1027  // Create a callback which hoists left hand side filters above the join for left joins,
1028  // eliminating extra computation of the probe and matches if the row does not pass the
1029  // filters
1031  const RelAlgExecutionUnit& ra_exe_unit,
1032  const size_t level_idx,
1033  const shared::TableKey& inner_table_key,
1034  const CompilationOptions& co);
1035  // Create a callback which generates code which returns true iff the row on the given
1036  // level is deleted.
1037  std::function<llvm::Value*(const std::vector<llvm::Value*>&, llvm::Value*)>
1038  buildIsDeletedCb(const RelAlgExecutionUnit& ra_exe_unit,
1039  const size_t level_idx,
1040  const CompilationOptions& co);
1041  // Builds a join hash table for the provided conditions on the current level.
1042  // Returns null iff on failure and provides the reasons in `fail_reasons`.
1043  std::shared_ptr<HashJoin> buildCurrentLevelHashTable(
1044  const JoinCondition& current_level_join_conditions,
1045  size_t level_idx,
1046  RelAlgExecutionUnit& ra_exe_unit,
1047  const CompilationOptions& co,
1048  const std::vector<InputTableInfo>& query_infos,
1049  ColumnCacheMap& column_cache,
1050  std::vector<std::string>& fail_reasons);
1051  void redeclareFilterFunction();
1052  llvm::Value* addJoinLoopIterator(const std::vector<llvm::Value*>& prev_iters,
1053  const size_t level_idx);
1054  void codegenJoinLoops(const std::vector<JoinLoop>& join_loops,
1055  const RelAlgExecutionUnit& ra_exe_unit,
1056  GroupByAndAggregate& group_by_and_aggregate,
1057  llvm::Function* query_func,
1058  llvm::BasicBlock* entry_bb,
1060  const CompilationOptions& co,
1061  const ExecutionOptions& eo);
1062  bool compileBody(const RelAlgExecutionUnit& ra_exe_unit,
1063  GroupByAndAggregate& group_by_and_aggregate,
1065  const CompilationOptions& co,
1066  const GpuSharedMemoryContext& gpu_smem_context = {});
1067 
1068  void createErrorCheckControlFlow(llvm::Function* query_func,
1069  bool run_with_dynamic_watchdog,
1070  bool run_with_allowing_runtime_interrupt,
1071  const std::vector<JoinLoop>& join_loops,
1072  ExecutorDeviceType device_type,
1073  const std::vector<InputTableInfo>& input_table_infos);
1074 
1075  void insertErrorCodeChecker(llvm::Function* query_func,
1076  bool hoist_literals,
1077  bool allow_runtime_query_interrupt);
1078 
1079  void preloadFragOffsets(const std::vector<InputDescriptor>& input_descs,
1080  const std::vector<InputTableInfo>& query_infos);
1081 
1083  std::shared_ptr<HashJoin> hash_table;
1084  std::string fail_reason;
1085  };
1086 
1088  const std::shared_ptr<Analyzer::BinOper>& qual_bin_oper,
1089  const std::vector<InputTableInfo>& query_infos,
1090  const MemoryLevel memory_level,
1091  const JoinType join_type,
1092  const HashType preferred_hash_type,
1093  ColumnCacheMap& column_cache,
1094  const HashTableBuildDagMap& hashtable_build_dag_map,
1095  const RegisteredQueryHint& query_hint,
1096  const TableIdToNodeMap& table_id_to_node_map);
1097  void nukeOldState(const bool allow_lazy_fetch,
1098  const std::vector<InputTableInfo>& query_infos,
1099  const PlanState::DeletedColumnsMap& deleted_cols_map,
1100  const RelAlgExecutionUnit* ra_exe_unit);
1101 
1102  std::shared_ptr<CompilationContext> optimizeAndCodegenCPU(
1103  llvm::Function*,
1104  llvm::Function*,
1105  const std::unordered_set<llvm::Function*>&,
1106  const CompilationOptions&);
1107  std::shared_ptr<CompilationContext> optimizeAndCodegenGPU(
1108  llvm::Function*,
1109  llvm::Function*,
1110  std::unordered_set<llvm::Function*>&,
1111  const bool no_inline,
1112  const CudaMgr_Namespace::CudaMgr* cuda_mgr,
1113  const bool is_gpu_smem_used,
1114  const CompilationOptions&);
1115  std::string generatePTX(const std::string&) const;
1116  void initializeNVPTXBackend() const;
1117 
1118  int64_t deviceCycles(int milliseconds) const;
1119 
1121  llvm::Value* translated_value;
1122  llvm::Value* original_value;
1123  };
1124 
1126  const size_t col_width,
1127  const CompilationOptions&,
1128  const bool translate_null_val,
1129  const int64_t translated_null_val,
1130  DiamondCodegen&,
1131  std::stack<llvm::BasicBlock*>&,
1132  const bool thread_mem_shared);
1133 
1134  llvm::Value* castToFP(llvm::Value*,
1135  SQLTypeInfo const& from_ti,
1136  SQLTypeInfo const& to_ti);
1137  llvm::Value* castToIntPtrTyIn(llvm::Value* val, const size_t bit_width);
1138 
1139  std::tuple<RelAlgExecutionUnit, PlanState::DeletedColumnsMap> addDeletedColumn(
1140  const RelAlgExecutionUnit& ra_exe_unit,
1141  const CompilationOptions& co);
1142 
1143  bool isFragmentFullyDeleted(const InputDescriptor& table_desc,
1144  const Fragmenter_Namespace::FragmentInfo& fragment);
1145 
1147  const Analyzer::BinOper* comp_expr,
1148  const Analyzer::ColumnVar* lhs_col,
1149  const Fragmenter_Namespace::FragmentInfo& fragment,
1150  const Analyzer::Constant* rhs_const) const;
1151 
1152  std::pair<bool, int64_t> skipFragment(
1153  const InputDescriptor& table_desc,
1154  const Fragmenter_Namespace::FragmentInfo& frag_info,
1155  const std::list<std::shared_ptr<Analyzer::Expr>>& simple_quals,
1156  const std::vector<uint64_t>& frag_offsets,
1157  const size_t frag_idx);
1158 
1159  std::pair<bool, int64_t> skipFragmentInnerJoins(
1160  const InputDescriptor& table_desc,
1161  const RelAlgExecutionUnit& ra_exe_unit,
1162  const Fragmenter_Namespace::FragmentInfo& fragment,
1163  const std::vector<uint64_t>& frag_offsets,
1164  const size_t frag_idx);
1165 
1167  const std::unordered_set<PhysicalInput>& phys_inputs);
1169  const std::unordered_set<PhysicalInput>& phys_inputs);
1171  const std::unordered_set<shared::TableKey>& phys_table_keys);
1172 
1173  public:
1174  void setupCaching(const std::unordered_set<PhysicalInput>& phys_inputs,
1175  const std::unordered_set<shared::TableKey>& phys_table_keys);
1176  void setColRangeCache(const AggregatedColRange& aggregated_col_range) {
1177  agg_col_range_cache_ = aggregated_col_range;
1178  }
1183  const QuerySessionId& candidate_query_session,
1185  bool checkCurrentQuerySession(const std::string& candidate_query_session,
1189  bool addToQuerySessionList(const QuerySessionId& query_session,
1190  const std::string& query_str,
1191  const std::string& submitted,
1192  const size_t executor_id,
1193  const QuerySessionStatus::QueryStatus query_status,
1196  const QuerySessionId& query_session,
1197  const std::string& submitted_time_str,
1200  const QuerySessionId& query_session,
1203  const std::string& query_session,
1206  const QuerySessionId& query_session,
1209  const QuerySessionId& query_session,
1210  const std::string& submitted_time_str,
1211  const QuerySessionStatus::QueryStatus updated_query_status,
1214  const QuerySessionId& query_session,
1215  const std::string& submitted_time_str,
1216  const size_t executor_id,
1218  std::vector<QuerySessionStatus> getQuerySessionInfo(
1219  const QuerySessionId& query_session,
1221 
1224  const QuerySessionId& query_session_id,
1225  const std::string& query_str,
1226  const std::string& query_submitted_time);
1227  void checkPendingQueryStatus(const QuerySessionId& query_session);
1228  void clearQuerySessionStatus(const QuerySessionId& query_session,
1229  const std::string& submitted_time_str);
1230  void updateQuerySessionStatus(const QuerySessionId& query_session,
1231  const std::string& submitted_time_str,
1232  const QuerySessionStatus::QueryStatus new_query_status);
1233  void enrollQuerySession(const QuerySessionId& query_session,
1234  const std::string& query_str,
1235  const std::string& submitted_time_str,
1236  const size_t executor_id,
1237  const QuerySessionStatus::QueryStatus query_session_status);
1238  size_t getNumCurentSessionsEnrolled() const;
1239  // get a set of executor ids that a given session has fired regardless of
1240  // each executor's status: pending or running
1241  const std::vector<size_t> getExecutorIdsRunningQuery(
1242  const QuerySessionId& interrupt_session) const;
1243  // check whether the current session that this executor manages is interrupted
1244  // while performing non-kernel time task
1245  bool checkNonKernelTimeInterrupted() const;
1246  void registerExtractedQueryPlanDag(const QueryPlanDAG& query_plan_dag);
1248 
1249  // true when we have matched cardinality, and false otherwise
1250  using CachedCardinality = std::pair<bool, size_t>;
1251  void addToCardinalityCache(const std::string& cache_key, const size_t cache_value);
1252  CachedCardinality getCachedCardinality(const std::string& cache_key);
1253 
1257 
1258  CgenState* getCgenStatePtr() const { return cgen_state_.get(); }
1259  PlanState* getPlanStatePtr() const { return plan_state_.get(); }
1260 
1261  llvm::LLVMContext& getContext() { return *context_.get(); }
1262  void update_extension_modules(bool update_runtime_modules_only = false);
1263 
1264  static void update_after_registration(bool update_runtime_modules_only = false) {
1265  for (auto executor_item : Executor::executors_) {
1266  executor_item.second->update_extension_modules(update_runtime_modules_only);
1267  }
1268  }
1269 
1270  static size_t getBaselineThreshold(bool for_count_distinct,
1271  ExecutorDeviceType device_type) {
1272  return for_count_distinct ? (device_type == ExecutorDeviceType::GPU
1276  }
1277 
1278  private:
1279  std::vector<int8_t> serializeLiterals(
1280  const std::unordered_map<int, CgenState::LiteralValues>& literals,
1281  const int device_id);
1282 
1283  static size_t align(const size_t off_in, const size_t alignment) {
1284  size_t off = off_in;
1285  if (off % alignment != 0) {
1286  off += (alignment - off % alignment);
1287  }
1288  return off;
1289  }
1290 
1292  std::unique_ptr<llvm::LLVMContext> context_;
1293 
1294  public:
1295  // CgenStateManager uses RAII pattern to ensure that recursive code
1296  // generation (e.g. as in multi-step multi-subqueries) uses a new
1297  // CgenState instance for each recursion depth while restoring the
1298  // old CgenState instances when returning from recursion.
1300  public:
1301  CgenStateManager(Executor& executor);
1302  CgenStateManager(Executor& executor,
1303  const bool allow_lazy_fetch,
1304  const std::vector<InputTableInfo>& query_infos,
1305  const PlanState::DeletedColumnsMap& deleted_cols_map,
1306  const RelAlgExecutionUnit* ra_exe_unit);
1308 
1309  private:
1311  std::chrono::steady_clock::time_point lock_queue_clock_;
1312  std::lock_guard<std::mutex> lock_;
1313  std::unique_ptr<CgenState> cgen_state_;
1314  };
1315 
1316  private:
1317  std::unique_ptr<CgenState> cgen_state_;
1318 
1319  const std::unique_ptr<llvm::Module>& get_extension_module(ExtModuleKinds kind) const {
1320  auto it = extension_modules_.find(kind);
1321  if (it != extension_modules_.end()) {
1322  return it->second;
1323  }
1324  static const std::unique_ptr<llvm::Module> empty;
1325  return empty;
1326  }
1327 
1329  return extension_modules_.find(kind) != extension_modules_.end();
1330  }
1331 
1332  std::map<ExtModuleKinds, std::unique_ptr<llvm::Module>> extension_modules_;
1333 
1335  public:
1337  : cgen_state_(cgen_state), saved_fetch_cache(cgen_state_->fetch_cache_) {}
1339 
1340  private:
1342  std::unordered_map<size_t, std::vector<llvm::Value*>> saved_fetch_cache;
1343  };
1344 
1345  llvm::Value* spillDoubleElement(llvm::Value* elem_val, llvm::Type* elem_ty);
1346 
1347  std::unique_ptr<PlanState> plan_state_;
1348  std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner_;
1349 
1350  static const int max_gpu_count{16};
1352 
1353  static std::mutex gpu_active_modules_mutex_;
1356  // indicates whether this executor has been interrupted
1357  std::atomic<bool> interrupted_;
1358 
1359  mutable std::mutex str_dict_mutex_;
1360 
1361  mutable std::unique_ptr<llvm::TargetMachine> nvptx_target_machine_;
1362 
1363  static const size_t baseline_threshold{
1364  1000000}; // if a perfect hash needs more entries, use baseline
1365 
1366  unsigned block_size_x_;
1367  unsigned grid_size_x_;
1368  const size_t max_gpu_slab_size_;
1369  const std::string debug_dir_;
1370  const std::string debug_file_;
1371 
1375 
1378 
1379  // Singleton instance used for an execution unit which is a project with window
1380  // functions.
1381  std::unique_ptr<WindowProjectNodeContext> window_project_node_context_owned_;
1382  // The active window function.
1384 
1389  // a query session that this executor manages
1391  // a pair of <QuerySessionId, interrupted_flag>
1393  // a pair of <QuerySessionId, query_session_status>
1395  static std::map<int, std::shared_ptr<Executor>> executors_;
1396 
1397  // SQL queries take a shared lock, exclusive options (cache clear, memory clear) take a
1398  // write lock
1400 
1404  };
1406  ExecutorMutexHolder ret;
1408  // Only one unitary executor can run at a time
1410  } else {
1412  }
1413  return ret;
1414  }
1415 
1417 
1420  static std::unordered_map<std::string, size_t> cardinality_cache_;
1422 
1423  // a variable used for testing query plan DAG extractor when a query has a table
1424  // function
1426 
1427  public:
1428  static const int32_t ERR_DIV_BY_ZERO{1};
1429  static const int32_t ERR_OUT_OF_GPU_MEM{2};
1430  static const int32_t ERR_OUT_OF_SLOTS{3};
1431  static const int32_t ERR_UNSUPPORTED_SELF_JOIN{4};
1432  static const int32_t ERR_OUT_OF_RENDER_MEM{5};
1433  static const int32_t ERR_OUT_OF_CPU_MEM{6};
1434  static const int32_t ERR_OVERFLOW_OR_UNDERFLOW{7};
1435  static const int32_t ERR_OUT_OF_TIME{9};
1436  static const int32_t ERR_INTERRUPTED{10};
1437  static const int32_t ERR_COLUMNAR_CONVERSION_NOT_SUPPORTED{11};
1438  static const int32_t ERR_TOO_MANY_LITERALS{12};
1439  static const int32_t ERR_STRING_CONST_IN_RESULTSET{13};
1441  static const int32_t ERR_SINGLE_VALUE_FOUND_MULTIPLE_VALUES{15};
1442  static const int32_t ERR_GEOS{16};
1443  static const int32_t ERR_WIDTH_BUCKET_INVALID_ARGUMENT{17};
1444 
1445  // Although compilation is Executor-local, an executor may trigger
1446  // threaded compilations (see executeWorkUnitPerFragment) that share
1447  // executor cgen_state and LLVM context, for instance.
1449 
1450  // Runtime extension function registration updates
1451  // extension_modules_ that needs to be kept blocked from codegen
1452  // until the update is complete.
1454  static std::mutex kernel_mutex_; // TODO: should this be executor-local mutex?
1455 
1457  friend class CodeGenerator;
1458  friend class ColumnFetcher;
1459  friend struct DiamondCodegen; // cgen_state_
1460  friend class ExecutionKernel;
1461  friend class KernelSubtask;
1462  friend class HashJoin; // cgen_state_
1464  friend class RangeJoinHashTable;
1465  friend class GroupByAndAggregate;
1471  friend class ResultSet;
1472  friend class InValuesBitmap;
1474  friend class LeafAggregator;
1475  friend class PerfectJoinHashTable;
1476  friend class QueryRewriter;
1478  friend class RelAlgExecutor;
1479  friend class TableOptimizer;
1483  friend struct TargetExprCodegen;
1485 };
1486 
1487 inline std::string get_null_check_suffix(const SQLTypeInfo& lhs_ti,
1488  const SQLTypeInfo& rhs_ti) {
1489  if (lhs_ti.get_notnull() && rhs_ti.get_notnull()) {
1490  return "";
1491  }
1492  std::string null_check_suffix{"_nullable"};
1493  if (lhs_ti.get_notnull()) {
1494  CHECK(!rhs_ti.get_notnull());
1495  null_check_suffix += "_rhs";
1496  } else if (rhs_ti.get_notnull()) {
1497  CHECK(!lhs_ti.get_notnull());
1498  null_check_suffix += "_lhs";
1499  }
1500  return null_check_suffix;
1501 }
1502 
1503 inline bool is_unnest(const Analyzer::Expr* expr) {
1504  return dynamic_cast<const Analyzer::UOper*>(expr) &&
1505  static_cast<const Analyzer::UOper*>(expr)->get_optype() == kUNNEST;
1506 }
1507 
1508 inline bool is_constructed_point(const Analyzer::Expr* expr) {
1509  auto uoper = dynamic_cast<const Analyzer::UOper*>(expr);
1510  auto oper = (uoper && uoper->get_optype() == kCAST) ? uoper->get_operand() : expr;
1511  auto arr = dynamic_cast<const Analyzer::ArrayExpr*>(oper);
1512  return (arr && arr->isLocalAlloc() && arr->get_type_info().is_fixlen_array());
1513 }
1514 
1515 size_t get_loop_join_size(const std::vector<InputTableInfo>& query_infos,
1516  const RelAlgExecutionUnit& ra_exe_unit);
1517 
1518 std::unordered_set<int> get_available_gpus(const Catalog_Namespace::Catalog& cat);
1519 
1520 size_t get_context_count(const ExecutorDeviceType device_type,
1521  const size_t cpu_count,
1522  const size_t gpu_count);
1523 
1524 extern "C" RUNTIME_EXPORT void register_buffer_with_executor_rsm(int64_t exec,
1525  int8_t* buffer);
1526 
1528 
1529 inline std::string toString(const Executor::ExtModuleKinds& kind) {
1530  switch (kind) {
1532  return "template_module";
1534  return "rt_geos_module";
1536  return "rt_libdevice_module";
1538  return "udf_cpu_module";
1540  return "udf_gpu_module";
1542  return "rt_udf_cpu_module";
1544  return "rt_udf_gpu_module";
1545  }
1546  LOG(FATAL) << "Invalid LLVM module kind.";
1547  return "";
1548 }
1549 
1550 namespace foreign_storage {
1551 void populate_string_dictionary(int32_t table_id, int32_t col_id, int32_t db_id);
1552 }
1553 
1554 #endif // QUERYENGINE_EXECUTE_H
void createErrorCheckControlFlow(llvm::Function *query_func, bool run_with_dynamic_watchdog, bool run_with_allowing_runtime_interrupt, const std::vector< JoinLoop > &join_loops, ExecutorDeviceType device_type, const std::vector< InputTableInfo > &input_table_infos)
SQLTypeInfo getColumnType(const size_t col_idx) const
CudaMgr_Namespace::CudaMgr * getCudaMgr() const
Definition: DataMgr.h:224
const std::string debug_dir_
Definition: Execute.h:1369
llvm::Value * translated_value
Definition: Execute.h:1121
void executeWorkUnitPerFragment(const RelAlgExecutionUnit &ra_exe_unit, const InputTableInfo &table_info, const CompilationOptions &co, const ExecutionOptions &eo, const Catalog_Namespace::Catalog &cat, PerFragmentCallBack &cb, const std::set< size_t > &fragment_indexes_param)
Compiles and dispatches a work unit per fragment processing results with the per fragment callback...
Definition: Execute.cpp:1972
bool is_agg(const Analyzer::Expr *expr)
SqlWindowFrameBoundType
Definition: sqldefs.h:150
static void invalidateCachesByTable(size_t table_key)
AggregatedColRange computeColRangesCache(const std::unordered_set< PhysicalInput > &phys_inputs)
Definition: Execute.cpp:4349
void enableRuntimeQueryInterrupt(const double runtime_query_check_freq, const unsigned pending_query_check_freq) const
Definition: Execute.cpp:4729
SQLAgg
Definition: sqldefs.h:73
#define CHECK_EQ(x, y)
Definition: Logger.h:301
const QueryPlanDAG getLatestQueryPlanDagExtracted() const
Definition: Execute.cpp:4816
std::vector< std::unique_ptr< ExecutionKernel > > createKernels(SharedKernelContext &shared_context, const RelAlgExecutionUnit &ra_exe_unit, ColumnFetcher &column_fetcher, const std::vector< InputTableInfo > &table_infos, const ExecutionOptions &eo, const bool is_agg, const bool allow_single_frag_table_opt, const size_t context_count, const QueryCompilationDescriptor &query_comp_desc, const QueryMemoryDescriptor &query_mem_desc, RenderInfo *render_info, std::unordered_set< int > &available_gpus, int &available_cpus)
Definition: Execute.cpp:2507
ExtModuleKinds
Definition: Execute.h:466
void codegenJoinLoops(const std::vector< JoinLoop > &join_loops, const RelAlgExecutionUnit &ra_exe_unit, GroupByAndAggregate &group_by_and_aggregate, llvm::Function *query_func, llvm::BasicBlock *entry_bb, QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const ExecutionOptions &eo)
Definition: IRCodegen.cpp:1137
static heavyai::shared_mutex execute_mutex_
Definition: Execute.h:1399
const std::unique_ptr< llvm::Module > & get_udf_module(bool is_gpu=false) const
Definition: Execute.h:483
static QuerySessionMap queries_session_map_
Definition: Execute.h:1394
CudaMgr_Namespace::CudaMgr * cudaMgr() const
Definition: Execute.h:754
std::unique_ptr< llvm::TargetMachine > nvptx_target_machine_
Definition: Execute.h:1361
bool checkIsQuerySessionInterrupted(const std::string &query_session, heavyai::shared_lock< heavyai::shared_mutex > &read_lock)
Definition: Execute.cpp:4709
bool has_libdevice_module() const
Definition: Execute.h:514
int64_t kernel_queue_time_ms_
Definition: Execute.h:1376
JoinType
Definition: sqldefs.h:165
size_t maxGpuSlabSize() const
Definition: Execute.cpp:3847
ExecutorMutexHolder acquireExecuteMutex()
Definition: Execute.h:1405
Data_Namespace::DataMgr * data_mgr_
Definition: Execute.h:1372
std::vector< size_t > getTableFragmentIndices(const RelAlgExecutionUnit &ra_exe_unit, const ExecutorDeviceType device_type, const size_t table_idx, const size_t outer_frag_idx, std::map< shared::TableKey, const TableFragments * > &selected_tables_fragments, const std::unordered_map< shared::TableKey, const Analyzer::BinOper * > &inner_table_id_to_join_condition)
Definition: Execute.cpp:2694
int64_t compilation_queue_time_ms_
Definition: Execute.h:1377
const std::string & getQuerySubmittedTime()
Definition: Execute.h:133
friend class ResultSet
Definition: Execute.h:1471
std::map< const ColumnDescriptor *, std::set< int32_t >> ColumnToFragmentsMap
Definition: Execute.h:314
std::pair< std::string, llvm::Value * > codegenLoadOrderKeyBufPtr(WindowFunctionContext *window_func_context) const
std::pair< llvm::Value *, llvm::Value * > codegenFrameNullRange(WindowFunctionContext *window_func_context, llvm::Value *partition_index_lv) const
const std::unique_ptr< llvm::Module > & get_geos_module() const
Definition: Execute.h:493
std::string cat(Ts &&...args)
llvm::Value * codegenCurrentPartitionIndex(const WindowFunctionContext *window_func_context, llvm::Value *current_row_pos_lv)
static void initialize_extension_module_sources()
Definition: Execute.cpp:266
void checkPendingQueryStatus(const QuerySessionId &query_session)
Definition: Execute.cpp:4490
const StringDictionaryProxy::IdMap * getJoinIntersectionStringProxyTranslationMap(const StringDictionaryProxy *source_proxy, StringDictionaryProxy *dest_proxy, const std::vector< StringOps_Namespace::StringOpInfo > &source_string_op_infos, const std::vector< StringOps_Namespace::StringOpInfo > &dest_source_string_op_infos, std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner) const
Definition: Execute.cpp:582
static void registerActiveModule(void *module, const int device_id)
static const int32_t ERR_INTERRUPTED
Definition: Execute.h:1436
const Analyzer::Expr * remove_cast_to_int(const Analyzer::Expr *expr)
Definition: ColumnIR.cpp:606
class for a per-database catalog. also includes metadata for the current database and the current use...
Definition: Catalog.h:132
std::unordered_map< size_t, std::vector< llvm::Value * > > fetch_cache_
Definition: CgenState.h:378
std::vector< int8_t * > getJoinHashTablePtrs(const ExecutorDeviceType device_type, const int device_id)
Definition: Execute.cpp:3709
std::unordered_map< shared::TableKey, const ColumnDescriptor * > DeletedColumnsMap
Definition: PlanState.h:43
FetchCacheAnchor(CgenState *cgen_state)
Definition: Execute.h:1336
heavyai::shared_lock< heavyai::shared_mutex > read_lock
const std::unique_ptr< llvm::Module > & get_extension_module(ExtModuleKinds kind) const
Definition: Execute.h:1319
const std::shared_ptr< RowSetMemoryOwner > getRowSetMemoryOwner() const
Definition: Execute.cpp:668
StringDictionaryProxy * getLiteralDictionary() const override
Definition: Execute.h:334
std::atomic< bool > interrupted_
Definition: Execute.h:1357
static ResultSetRecyclerHolder resultset_recycler_holder_
Definition: Execute.h:1421
ExecutorDeviceType
Fragmenter_Namespace::RowDataProvider RowDataProvider
Definition: Execute.h:313
static const int max_gpu_count
Definition: Execute.h:1350
GroupColLLVMValue groupByColumnCodegen(Analyzer::Expr *group_by_col, const size_t col_width, const CompilationOptions &, const bool translate_null_val, const int64_t translated_null_val, DiamondCodegen &, std::stack< llvm::BasicBlock * > &, const bool thread_mem_shared)
Definition: IRCodegen.cpp:1317
std::map< const QuerySessionId, std::map< std::string, QuerySessionStatus >> QuerySessionMap
Definition: Execute.h:154
size_t const getFragmentIndex() const
#define LOG(tag)
Definition: Logger.h:285
std::string QueryPlanDAG
bool isArchPascalOrLater(const ExecutorDeviceType dt) const
Definition: Execute.h:761
void AutoTrackBuffersInRuntimeIR()
std::ostream & operator<<(std::ostream &os, const SessionInfo &session_info)
Definition: SessionInfo.cpp:57
bool is_fp() const
Definition: sqltypes.h:584
Cache for physical column ranges. Set by the aggregator on the leaves.
std::pair< QuerySessionId, std::string > CurrentQueryStatus
Definition: Execute.h:86
bool is_constructed_point(const Analyzer::Expr *expr)
Definition: Execute.h:1508
llvm::Value * codegenFrameBound(bool for_start_bound, bool for_range_mode, bool for_window_frame_naviation, const Analyzer::WindowFrame *frame_bound, bool is_timestamp_type_frame, llvm::Value *order_key_null_val, const WindowFrameBoundFuncArgs &args)
heavyai::unique_lock< heavyai::shared_mutex > unique_lock
Definition: Execute.h:1403
std::function< llvm::BasicBlock *(llvm::BasicBlock *, llvm::BasicBlock *, const std::string &, llvm::Function *, CgenState *)> HoistedFiltersCallback
Definition: JoinLoop.h:61
std::vector< ColumnLazyFetchInfo > getColLazyFetchInfo(const std::vector< Analyzer::Expr * > &target_exprs) const
Definition: Execute.cpp:734
static const size_t baseline_threshold
Definition: Execute.h:1363
void updateQuerySessionStatus(const QuerySessionId &query_session, const std::string &submitted_time_str, const QuerySessionStatus::QueryStatus new_query_status)
Definition: Execute.cpp:4530
const std::unique_ptr< llvm::Module > & get_rt_udf_module(bool is_gpu=false) const
Definition: Execute.h:487
std::unordered_set< int > get_available_gpus(const Data_Namespace::DataMgr *data_mgr)
Definition: Execute.cpp:1439
std::unordered_map< size_t, std::vector< llvm::Value * > > saved_fetch_cache
Definition: Execute.h:1342
std::tuple< RelAlgExecutionUnit, PlanState::DeletedColumnsMap > addDeletedColumn(const RelAlgExecutionUnit &ra_exe_unit, const CompilationOptions &co)
Definition: Execute.cpp:3930
bool isArchPascalOrLater() const
Definition: CudaMgr.h:150
TableToFragmentIds fragments_with_deleted_rows
Definition: Execute.h:319
bool hasLazyFetchColumns(const std::vector< Analyzer::Expr * > &target_exprs) const
Definition: Execute.cpp:723
const SQLTypeInfo get_column_type(const int col_id, const int table_id, const ColumnDescriptor *cd, const TemporaryTables *temporary_tables)
Definition: Execute.h:233
const ColumnDescriptor * get_metadata_for_column(const ::shared::ColumnKey &column_key)
llvm::Value * aggregateWindowStatePtr()
FetchResult fetchUnionChunks(const ColumnFetcher &, const RelAlgExecutionUnit &ra_exe_unit, const int device_id, const Data_Namespace::MemoryLevel, const std::map< shared::TableKey, const TableFragments * > &, const FragmentsList &selected_fragments, std::list< ChunkIter > &, std::list< std::shared_ptr< Chunk_NS::Chunk >> &, DeviceAllocator *device_allocator, const size_t thread_idx, const bool allow_runtime_interrupt)
Definition: Execute.cpp:3102
Driver for running cleanup processes on a table. TableOptimizer provides functions for various cleanu...
static std::pair< int64_t, int32_t > reduceResults(const SQLAgg agg, const SQLTypeInfo &ti, const int64_t agg_init_val, const int8_t out_byte_width, const int64_t *out_vec, const size_t out_vec_sz, const bool is_group_by, const bool float_argument_input)
Definition: Execute.cpp:1054
Definition: sqldefs.h:48
Macros and functions for groupby buffer compaction.
const StringDictionaryProxy::IdMap * getStringProxyTranslationMap(const shared::StringDictKey &source_dict_key, const shared::StringDictKey &dest_dict_key, const RowSetMemoryOwner::StringTranslationType translation_type, const std::vector< StringOps_Namespace::StringOpInfo > &string_op_infos, std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner, const bool with_generation) const
Definition: Execute.cpp:567
QuerySessionId current_query_session_
Definition: Execute.h:1390
const Analyzer::Expr * extract_cast_arg(const Analyzer::Expr *expr)
Definition: Execute.h:201
heavyai::shared_mutex & getSessionLock()
Definition: Execute.cpp:4437
std::string getFramingFuncName(const std::string &bound_type, const std::string &order_col_type, const std::string &op_type, bool for_timestamp_type) const
const QuerySessionStatus::QueryStatus getQueryStatus()
Definition: Execute.h:134
static const int32_t ERR_GEOS
Definition: Execute.h:1442
const std::string query_str_
Definition: Execute.h:143
QuerySessionStatus(const QuerySessionId &query_session, const std::string &query_str, const std::string &submitted_time)
Definition: Execute.h:102
AggregatedColRange agg_col_range_cache_
Definition: Execute.h:1386
std::shared_ptr< ResultSet > ResultSetPtr
static void * gpu_active_modules_[max_gpu_count]
Definition: Execute.h:1355
heavyai::unique_lock< heavyai::shared_mutex > write_lock
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1317
static const int32_t ERR_TOO_MANY_LITERALS
Definition: Execute.h:1438
llvm::Value * original_value
Definition: Execute.h:1122
void enrollQuerySession(const QuerySessionId &query_session, const std::string &query_str, const std::string &submitted_time_str, const size_t executor_id, const QuerySessionStatus::QueryStatus query_session_status)
Definition: Execute.cpp:4546
ParseIRError(const std::string message)
Definition: Execute.h:301
int32_t executePlanWithoutGroupBy(const RelAlgExecutionUnit &ra_exe_unit, const CompilationResult &, const bool hoist_literals, ResultSetPtr *results, const std::vector< Analyzer::Expr * > &target_exprs, const ExecutorDeviceType device_type, std::vector< std::vector< const int8_t * >> &col_buffers, QueryExecutionContext *query_exe_context, const std::vector< std::vector< int64_t >> &num_rows, const std::vector< std::vector< uint64_t >> &frag_offsets, Data_Namespace::DataMgr *data_mgr, const int device_id, const uint32_t start_rowid, const uint32_t num_tables, const bool allow_runtime_interrupt, RenderInfo *render_info, const bool optimize_cuda_block_and_grid_sizes, const int64_t rows_to_process=-1)
Definition: Execute.cpp:3294
static uint32_t gpu_active_modules_device_mask_
Definition: Execute.h:1354
void launchKernels(SharedKernelContext &shared_context, std::vector< std::unique_ptr< ExecutionKernel >> &&kernels, const ExecutorDeviceType device_type)
Definition: Execute.cpp:2641
TableUpdateMetadata executeUpdate(const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &table_infos, const TableDescriptor *updated_table_desc, const CompilationOptions &co, const ExecutionOptions &eo, const Catalog_Namespace::Catalog &cat, std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner, const UpdateLogForFragment::Callback &cb, const bool is_agg)
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:381
FragmentSkipStatus canSkipFragmentForFpQual(const Analyzer::BinOper *comp_expr, const Analyzer::ColumnVar *lhs_col, const Fragmenter_Namespace::FragmentInfo &fragment, const Analyzer::Constant *rhs_const) const
Definition: Execute.cpp:4053
static void invalidateCaches()
int deviceCount(const ExecutorDeviceType) const
Definition: Execute.cpp:1039
llvm::Value * castToIntPtrTyIn(llvm::Value *val, const size_t bit_width)
Definition: Execute.cpp:3883
void reset(bool discard_runtime_modules_only=false)
Definition: Execute.cpp:295
static std::mutex kernel_mutex_
Definition: Execute.h:1454
unsigned numBlocksPerMP() const
Definition: Execute.cpp:3816
StringDictionaryProxy * getStringDictionaryProxy(const shared::StringDictKey &dict_key, const bool with_generation) const
Definition: Execute.h:526
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1313
#define CHECK_GT(x, y)
Definition: Logger.h:305
Container for compilation results and assorted options for a single execution unit.
bool isCPUOnly() const
Definition: Execute.cpp:646
void resetGridSize()
Definition: Execute.cpp:3835
bool checkCurrentQuerySession(const std::string &candidate_query_session, heavyai::shared_lock< heavyai::shared_mutex > &read_lock)
Definition: Execute.cpp:4446
void clearCaches(bool runtime_only=false)
std::unique_ptr< WindowProjectNodeContext > window_project_node_context_owned_
Definition: Execute.h:1381
void addTransientStringLiterals(const RelAlgExecutionUnit &ra_exe_unit, const std::shared_ptr< RowSetMemoryOwner > &row_set_mem_owner)
Definition: Execute.cpp:2132
std::vector< FragmentsPerTable > FragmentsList
bool is_time() const
Definition: sqltypes.h:586
bool needFetchAllFragments(const InputColDescriptor &col_desc, const RelAlgExecutionUnit &ra_exe_unit, const FragmentsList &selected_fragments) const
Definition: Execute.cpp:2874
const QuerySessionId query_session_
Definition: Execute.h:141
std::shared_ptr< HashJoin > hash_table
Definition: Execute.h:1083
std::string to_string(char const *&&v)
bool updateQuerySessionStatusWithLock(const QuerySessionId &query_session, const std::string &submitted_time_str, const QuerySessionStatus::QueryStatus updated_query_status, heavyai::unique_lock< heavyai::shared_mutex > &write_lock)
Definition: Execute.cpp:4613
ResultSetPtr executeWorkUnit(size_t &max_groups_buffer_entry_guess, const bool is_agg, const std::vector< InputTableInfo > &, const RelAlgExecutionUnit &, const CompilationOptions &, const ExecutionOptions &options, RenderInfo *render_info, const bool has_cardinality_estimation, ColumnCacheMap &column_cache)
Definition: Execute.cpp:1741
bool checkNonKernelTimeInterrupted() const
Definition: Execute.cpp:4797
static void clearMemory(const Data_Namespace::MemoryLevel memory_level)
Definition: Execute.cpp:497
QuerySessionStatus(const QuerySessionId &query_session, const size_t executor_id, const std::string &query_str, const std::string &submitted_time)
Definition: Execute.h:110
std::function< void(const UpdateLogForFragment &, TableUpdateMetadata &)> Callback
Definition: Execute.h:349
bool removeFromQuerySessionList(const QuerySessionId &query_session, const std::string &submitted_time_str, heavyai::unique_lock< heavyai::shared_mutex > &write_lock)
Definition: Execute.cpp:4664
RUNTIME_EXPORT void register_buffer_with_executor_rsm(int64_t exec, int8_t *buffer)
std::unordered_map< shared::TableKey, const Analyzer::BinOper * > getInnerTabIdToJoinCond() const
Definition: Execute.cpp:2482
static const size_t high_scan_limit
Definition: Execute.h:603
const std::unique_ptr< llvm::Module > & get_libdevice_module() const
Definition: Execute.h:496
QueryMustRunOnCpu(const std::string &err)
Definition: Execute.h:296
static const int32_t ERR_STRING_CONST_IN_RESULTSET
Definition: Execute.h:1439
std::shared_lock< T > shared_lock
static std::shared_ptr< Executor > getExecutor(const ExecutorId id, const std::string &debug_dir="", const std::string &debug_file="", const SystemParameters &system_parameters=SystemParameters())
Definition: Execute.cpp:475
void preloadFragOffsets(const std::vector< InputDescriptor > &input_descs, const std::vector< InputTableInfo > &query_infos)
Definition: Execute.cpp:3744
std::pair< std::vector< std::vector< int64_t > >, std::vector< std::vector< uint64_t > > > getRowCountAndOffsetForAllFrags(const RelAlgExecutionUnit &ra_exe_unit, const CartesianProduct< std::vector< std::vector< size_t >>> &frag_ids_crossjoin, const std::vector< InputDescriptor > &input_descs, const std::map< shared::TableKey, const TableFragments * > &all_tables_fragments)
Definition: Execute.cpp:2825
void setQueryStatus(const QuerySessionStatus::QueryStatus &status)
Definition: Execute.h:135
std::unordered_map< int, const ResultSetPtr & > TemporaryTables
Definition: InputMetadata.h:31
static const int32_t ERR_STREAMING_TOP_N_NOT_SUPPORTED_IN_RENDER_QUERY
Definition: Execute.h:1440
const ExecutorId executor_id_
Definition: Execute.h:1291
bool updateQuerySessionExecutorAssignment(const QuerySessionId &query_session, const std::string &submitted_time_str, const size_t executor_id, heavyai::unique_lock< heavyai::shared_mutex > &write_lock)
Definition: Execute.cpp:4639
static const int32_t ERR_COLUMNAR_CONVERSION_NOT_SUPPORTED
Definition: Execute.h:1437
const ResultSetPtr & get_temporary_table(const TemporaryTables *temporary_tables, const int table_id)
Definition: Execute.h:225
int8_t warpSize() const
Definition: Execute.cpp:3799
std::map< QuerySessionId, bool > InterruptFlagMap
Definition: Execute.h:87
const size_t max_gpu_slab_size_
Definition: Execute.h:1368
ResultSetPtr reduceSpeculativeTopN(const RelAlgExecutionUnit &, std::vector< std::pair< ResultSetPtr, std::vector< size_t >>> &all_fragment_results, std::shared_ptr< RowSetMemoryOwner >, const QueryMemoryDescriptor &) const
Definition: Execute.cpp:1412
ResultSetPtr collectAllDeviceResults(SharedKernelContext &shared_context, const RelAlgExecutionUnit &ra_exe_unit, const QueryMemoryDescriptor &query_mem_desc, const ExecutorDeviceType device_type, std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner)
Definition: Execute.cpp:2317
const ColumnDescriptor * getPhysicalColumnDescriptor(const Analyzer::ColumnVar *, int) const
Definition: Execute.cpp:656
std::lock_guard< std::mutex > lock_
Definition: Execute.h:1312
static void unregisterActiveModule(const int device_id)
static const int32_t ERR_DIV_BY_ZERO
Definition: Execute.h:1428
std::tuple< CompilationResult, std::unique_ptr< QueryMemoryDescriptor > > compileWorkUnit(const std::vector< InputTableInfo > &query_infos, const PlanState::DeletedColumnsMap &deleted_cols_map, const RelAlgExecutionUnit &ra_exe_unit, const CompilationOptions &co, const ExecutionOptions &eo, const CudaMgr_Namespace::CudaMgr *cuda_mgr, const bool allow_lazy_fetch, std::shared_ptr< RowSetMemoryOwner >, const size_t max_groups_buffer_entry_count, const int8_t crt_min_byte_width, const bool has_cardinality_estimation, ColumnCacheMap &column_cache, RenderInfo *render_info=nullptr)
TableIdToNodeMap table_id_to_node_map_
Definition: Execute.h:1374
bool compileBody(const RelAlgExecutionUnit &ra_exe_unit, GroupByAndAggregate &group_by_and_aggregate, QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const GpuSharedMemoryContext &gpu_smem_context={})
std::unordered_map< size_t, HashTableBuildDag > HashTableBuildDagMap
llvm::Value * get_arg_by_name(llvm::Function *func, const std::string &name)
Definition: Execute.h:167
std::vector< llvm::Value * > LLVMValueVector
Definition: Execute.h:359
size_t get_context_count(const ExecutorDeviceType device_type, const size_t cpu_count, const size_t gpu_count)
Definition: Execute.cpp:1453
std::vector< TargetValue > getEntryAt(const size_t index) const override
int get_logical_size() const
Definition: sqltypes.h:403
decltype(FragmentInfoType::fragmentId) const getFragmentId() const
Definition: Execute.h:343
int64_t deviceCycles(int milliseconds) const
Definition: Execute.cpp:3851
std::string generatePTX(const std::string &) const
std::vector< llvm::Value * > prepareRangeModeFuncArgs(bool for_start_bound, const Analyzer::WindowFrame *frame_bound, bool is_timestamp_type_frame, llvm::Value *order_key_null_val, const WindowFrameBoundFuncArgs &frame_args) const
std::mutex str_dict_mutex_
Definition: Execute.h:1359
bool is_integer() const
Definition: sqltypes.h:582
friend class PendingExecutionClosure
Definition: Execute.h:1477
const ColumnDescriptor * get_column_descriptor_maybe(const shared::ColumnKey &column_key)
Definition: Execute.h:220
Fragmenter_Namespace::TableInfo getTableInfo(const shared::TableKey &table_key) const
Definition: Execute.cpp:676
static const int32_t ERR_OUT_OF_RENDER_MEM
Definition: Execute.h:1432
ResultSetPtr reduceMultiDeviceResults(const RelAlgExecutionUnit &, std::vector< std::pair< ResultSetPtr, std::vector< size_t >>> &all_fragment_results, std::shared_ptr< RowSetMemoryOwner >, const QueryMemoryDescriptor &) const
Definition: Execute.cpp:1306
const SQLTypeInfo getFirstOrderColTypeInfo(WindowFunctionContext *window_func_context) const
const std::string debug_file_
Definition: Execute.h:1370
ResultSetPtr collectAllDeviceShardedTopResults(SharedKernelContext &shared_context, const RelAlgExecutionUnit &ra_exe_unit) const
Definition: Execute.cpp:2432
void populate_string_dictionary(int32_t table_id, int32_t col_id, int32_t db_id)
Definition: Execute.cpp:205
CachedCardinality getCachedCardinality(const std::string &cache_key)
Definition: Execute.cpp:4753
decltype(FragmentInfoType::physicalTableId) const getPhysicalTableId() const
Definition: Execute.h:340
std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner_
Definition: Execute.h:1348
Used by Fragmenter classes to store info about each fragment - the fragment id and number of tuples(r...
Definition: Fragmenter.h:86
void setColRangeCache(const AggregatedColRange &aggregated_col_range)
Definition: Execute.h:1176
bool containsLeftDeepOuterJoin() const
Definition: Execute.h:562
static QueryPlanDAG latest_query_plan_extracted_
Definition: Execute.h:1425
int32_t executePlanWithGroupBy(const RelAlgExecutionUnit &ra_exe_unit, const CompilationResult &, const bool hoist_literals, ResultSetPtr *results, const ExecutorDeviceType device_type, std::vector< std::vector< const int8_t * >> &col_buffers, const std::vector< size_t > outer_tab_frag_ids, QueryExecutionContext *, const std::vector< std::vector< int64_t >> &num_rows, const std::vector< std::vector< uint64_t >> &frag_offsets, Data_Namespace::DataMgr *, const int device_id, const shared::TableKey &outer_table_key, const int64_t limit, const uint32_t start_rowid, const uint32_t num_tables, const bool allow_runtime_interrupt, RenderInfo *render_info, const bool optimize_cuda_block_and_grid_sizes, const int64_t rows_to_process=-1)
Definition: Execute.cpp:3519
size_t getNumCurentSessionsEnrolled() const
Definition: Execute.cpp:4570
bool has_rt_module() const
Definition: Execute.h:500
std::shared_ptr< CompilationContext > optimizeAndCodegenCPU(llvm::Function *, llvm::Function *, const std::unordered_set< llvm::Function * > &, const CompilationOptions &)
const ColumnDescriptor * get_column_descriptor(const shared::ColumnKey &column_key)
Definition: Execute.h:192
std::pair< llvm::Value *, llvm::Value * > codegenWindowFrameBounds(WindowFunctionContext *window_func_context, const Analyzer::WindowFrame *frame_start_bound, const Analyzer::WindowFrame *frame_end_bound, llvm::Value *order_key_col_null_val_lv, WindowFrameBoundFuncArgs &args, CodeGenerator &code_generator)
static const int32_t ERR_OVERFLOW_OR_UNDERFLOW
Definition: Execute.h:1434
bool is_timeinterval() const
Definition: sqltypes.h:591
static std::unordered_map< std::string, size_t > cardinality_cache_
Definition: Execute.h:1420
static InterruptFlagMap queries_interrupt_flag_
Definition: Execute.h:1392
FragmentInfoType const & getFragmentInfo() const
std::unique_lock< T > unique_lock
std::unique_ptr< PlanState > plan_state_
Definition: Execute.h:1347
void insertErrorCodeChecker(llvm::Function *query_func, bool hoist_literals, bool allow_runtime_query_interrupt)
static const int32_t ERR_OUT_OF_TIME
Definition: Execute.h:1435
void initializeNVPTXBackend() const
std::map< int32_t, std::set< int32_t >> TableToFragmentIds
Definition: Execute.h:315
const std::string submitted_time_
Definition: Execute.h:144
ResultSetPtr executeTableFunction(const TableFunctionExecutionUnit exe_unit, const std::vector< InputTableInfo > &table_infos, const CompilationOptions &co, const ExecutionOptions &eo)
Compiles and dispatches a table function; that is, a function that takes as input one or more columns...
Definition: Execute.cpp:2052
llvm::Value * castToFP(llvm::Value *, SQLTypeInfo const &from_ti, SQLTypeInfo const &to_ti)
Definition: Execute.cpp:3856
size_t fragment_index_
Definition: Execute.h:355
std::pair< bool, size_t > CachedCardinality
Definition: Execute.h:1250
void setupCaching(const std::unordered_set< PhysicalInput > &phys_inputs, const std::unordered_set< shared::TableKey > &phys_table_keys)
Definition: Execute.cpp:4415
const ColumnDescriptor * getColumnDescriptor(const Analyzer::ColumnVar *) const
Definition: Execute.cpp:651
static const int32_t ERR_UNSUPPORTED_SELF_JOIN
Definition: Execute.h:1431
std::string toString(const ExecutorDeviceType &device_type)
bool checkIsQuerySessionEnrolled(const QuerySessionId &query_session, heavyai::shared_lock< heavyai::shared_mutex > &read_lock)
Definition: Execute.cpp:4720
specifies the content in-memory of a row in the column metadata table
bool is_boolean() const
Definition: sqltypes.h:587
std::vector< TargetValue > getTranslatedEntryAt(const size_t index) const override
static const int32_t ERR_SINGLE_VALUE_FOUND_MULTIPLE_VALUES
Definition: Execute.h:1441
static std::map< int, std::shared_ptr< Executor > > executors_
Definition: Execute.h:1395
unsigned grid_size_x_
Definition: Execute.h:1367
QuerySessionStatus::QueryStatus getQuerySessionStatus(const QuerySessionId &candidate_query_session, heavyai::shared_lock< heavyai::shared_mutex > &read_lock)
Definition: Execute.cpp:4456
static const int32_t ERR_OUT_OF_GPU_MEM
Definition: Execute.h:1429
const TemporaryTables * getTemporaryTables()
Definition: Execute.h:521
std::string get_null_check_suffix(const SQLTypeInfo &lhs_ti, const SQLTypeInfo &rhs_ti)
Definition: Execute.h:1487
const std::unique_ptr< llvm::Module > & get_rt_module() const
Definition: Execute.h:480
static const ExecutorId INVALID_EXECUTOR_ID
Definition: Execute.h:374
#define RUNTIME_EXPORT
Executor(const ExecutorId id, Data_Namespace::DataMgr *data_mgr, const size_t block_size_x, const size_t grid_size_x, const size_t max_gpu_slab_size, const std::string &debug_dir, const std::string &debug_file)
Definition: Execute.cpp:244
std::string dumpCache() const
Definition: Execute.cpp:4849
ColumnToFragmentsMap columns_for_metadata_update
Definition: Execute.h:318
std::unordered_map< shared::TableKey, const RelAlgNode * > TableIdToNodeMap
const std::vector< size_t > getExecutorIdsRunningQuery(const QuerySessionId &interrupt_session) const
Definition: Execute.cpp:4781
#define CHECK_LT(x, y)
Definition: Logger.h:303
ResultSetPtr resultsUnion(SharedKernelContext &shared_context, const RelAlgExecutionUnit &ra_exe_unit)
Definition: Execute.cpp:1280
void registerExtractedQueryPlanDag(const QueryPlanDAG &query_plan_dag)
Definition: Execute.cpp:4810
std::shared_ptr< ResultSet > rs_
Definition: Execute.h:356
JoinHashTableOrError buildHashTableForQualifier(const std::shared_ptr< Analyzer::BinOper > &qual_bin_oper, const std::vector< InputTableInfo > &query_infos, const MemoryLevel memory_level, const JoinType join_type, const HashType preferred_hash_type, ColumnCacheMap &column_cache, const HashTableBuildDagMap &hashtable_build_dag_map, const RegisteredQueryHint &query_hint, const TableIdToNodeMap &table_id_to_node_map)
Definition: Execute.cpp:3765
QuerySessionId & getCurrentQuerySession(heavyai::shared_lock< heavyai::shared_mutex > &read_lock)
Definition: Execute.cpp:4441
std::vector< size_t > getFragmentCount(const FragmentsList &selected_fragments, const size_t scan_idx, const RelAlgExecutionUnit &ra_exe_unit)
Definition: Execute.cpp:3220
llvm::Value * codegenLoadCurrentValueFromColBuf(WindowFunctionContext *window_func_context, CodeGenerator &code_generator, WindowFrameBoundFuncArgs &args) const
static void addUdfIrToModule(const std::string &udf_ir_filename, const bool is_cuda_ir)
llvm::BasicBlock * codegenSkipDeletedOuterTableRow(const RelAlgExecutionUnit &ra_exe_unit, const CompilationOptions &co)
std::unique_ptr< QueryCompilationDescriptor > QueryCompilationDescriptorOwned
Definition: Execute.h:82
size_t ExecutorId
Definition: Execute.h:372
void setGridSize(unsigned grid_size)
Definition: Execute.cpp:3831
HOST DEVICE EncodingType get_compression() const
Definition: sqltypes.h:389
std::shared_ptr< HashJoin > buildCurrentLevelHashTable(const JoinCondition &current_level_join_conditions, size_t level_idx, RelAlgExecutionUnit &ra_exe_unit, const CompilationOptions &co, const std::vector< InputTableInfo > &query_infos, ColumnCacheMap &column_cache, std::vector< std::string > &fail_reasons)
Definition: IRCodegen.cpp:960
const std::string getOrderKeyTypeName(WindowFunctionContext *window_func_context) const
static heavyai::shared_mutex recycler_mutex_
Definition: Execute.h:1419
static void update_after_registration(bool update_runtime_modules_only=false)
Definition: Execute.h:1264
std::vector< int8_t > serializeLiterals(const std::unordered_map< int, CgenState::LiteralValues > &literals, const int device_id)
Definition: Execute.cpp:777
std::shared_ptr< CompilationContext > optimizeAndCodegenGPU(llvm::Function *, llvm::Function *, std::unordered_set< llvm::Function * > &, const bool no_inline, const CudaMgr_Namespace::CudaMgr *cuda_mgr, const bool is_gpu_smem_used, const CompilationOptions &)
InputTableInfoCache input_table_info_cache_
Definition: Execute.h:1385
size_t getNumBytesForFetchedRow(const std::set< shared::TableKey > &table_keys_to_fetch) const
Definition: Execute.cpp:690
void setBlockSize(unsigned block_size)
Definition: Execute.cpp:3839
const Expr * get_operand() const
Definition: Analyzer.h:384
std::chrono::steady_clock::time_point lock_queue_clock_
Definition: Execute.h:1311
llvm::Value * codegenConditionalAggregateCondValSelector(llvm::Value *cond_lv, SQLAgg const aggKind, CompilationOptions const &co) const
void codegenWindowFunctionStateInit(llvm::Value *aggregate_state)
std::pair< bool, int64_t > skipFragment(const InputDescriptor &table_desc, const Fragmenter_Namespace::FragmentInfo &frag_info, const std::list< std::shared_ptr< Analyzer::Expr >> &simple_quals, const std::vector< uint64_t > &frag_offsets, const size_t frag_idx)
Definition: Execute.cpp:4113
std::pair< llvm::Value *, llvm::Value * > codegenFrameBoundRange(const Analyzer::WindowFunction *window_func, CodeGenerator &code_generator, const CompilationOptions &co)
unsigned gridSize() const
Definition: Execute.cpp:3807
std::unordered_map< shared::TableKey, std::unordered_map< int, std::shared_ptr< const ColumnarResults >>> ColumnCacheMap
llvm::Value * spillDoubleElement(llvm::Value *elem_val, llvm::Type *elem_ty)
TableGenerations computeTableGenerations(const std::unordered_set< shared::TableKey > &phys_table_keys)
Definition: Execute.cpp:4403
friend class KernelSubtask
Definition: Execute.h:1461
PlanState * getPlanStatePtr() const
Definition: Execute.h:1259
void codegenWindowAvgEpilogue(llvm::Value *crt_val, llvm::Value *window_func_null_val)
static std::map< ExtModuleKinds, std::string > extension_module_sources
Definition: Execute.h:476
StringDictionaryGenerations computeStringDictionaryGenerations(const std::unordered_set< PhysicalInput > &phys_inputs)
Definition: Execute.cpp:4377
unsigned block_size_x_
Definition: Execute.h:1366
bool has_udf_module(bool is_gpu=false) const
Definition: Execute.h:503
static const int32_t ERR_WIDTH_BUCKET_INVALID_ARGUMENT
Definition: Execute.h:1443
Data_Namespace::DataMgr * getDataMgr() const
Definition: Execute.h:571
bool needLinearizeAllFragments(const ColumnDescriptor *cd, const InputColDescriptor &inner_col_desc, const RelAlgExecutionUnit &ra_exe_unit, const FragmentsList &selected_fragments, const Data_Namespace::MemoryLevel memory_level) const
Definition: Execute.cpp:2893
void setExecutorId(const size_t executor_id)
Definition: Execute.h:138
std::unique_ptr< llvm::LLVMContext > context_
Definition: Execute.h:1292
CgenState * getCgenStatePtr() const
Definition: Execute.h:1258
FragmentInfoType const & fragment_info_
Definition: Execute.h:354
void nukeOldState(const bool allow_lazy_fetch, const std::vector< InputTableInfo > &query_infos, const PlanState::DeletedColumnsMap &deleted_cols_map, const RelAlgExecutionUnit *ra_exe_unit)
Definition: Execute.cpp:3724
static heavyai::shared_mutex executor_session_mutex_
Definition: Execute.h:1388
const std::string getQueryStr()
Definition: Execute.h:131
std::pair< bool, int64_t > skipFragmentInnerJoins(const InputDescriptor &table_desc, const RelAlgExecutionUnit &ra_exe_unit, const Fragmenter_Namespace::FragmentInfo &fragment, const std::vector< uint64_t > &frag_offsets, const size_t frag_idx)
Definition: Execute.cpp:4316
void buildSelectedFragsMapping(std::vector< std::vector< size_t >> &selected_fragments_crossjoin, std::vector< size_t > &local_col_to_frag_pos, const std::list< std::shared_ptr< const InputColDescriptor >> &col_global_ids, const FragmentsList &selected_fragments, const RelAlgExecutionUnit &ra_exe_unit)
Definition: Execute.cpp:3234
llvm::Value * codegenAggregateWindowState()
TableGenerations table_generations_
Definition: Execute.h:1387
llvm::Value * codegenWindowNavigationFunctionOnFrame(const CompilationOptions &co)
std::function< void(ResultSetPtr, const Fragmenter_Namespace::FragmentInfo &)> PerFragmentCallBack
Definition: Execute.h:779
void resetInterrupt()
const size_t getExecutorId()
Definition: Execute.h:132
size_t executor_id_
Definition: Execute.h:142
llvm::Value * codegenFrameBoundExpr(const Analyzer::WindowFunction *window_func, const Analyzer::WindowFrame *frame_bound, CodeGenerator &code_generator, const CompilationOptions &co)
void buildSelectedFragsMappingForUnion(std::vector< std::vector< size_t >> &selected_fragments_crossjoin, const FragmentsList &selected_fragments, const RelAlgExecutionUnit &ra_exe_unit)
Definition: Execute.cpp:3265
static void registerExtensionFunctions(F register_extension_functions)
Definition: Execute.h:418
std::string QuerySessionId
Definition: Execute.h:85
llvm::Value * addJoinLoopIterator(const std::vector< llvm::Value * > &prev_iters, const size_t level_idx)
Definition: IRCodegen.cpp:1119
ResultSetPtr reduceMultiDeviceResultSets(std::vector< std::pair< ResultSetPtr, std::vector< size_t >>> &all_fragment_results, std::shared_ptr< RowSetMemoryOwner >, const QueryMemoryDescriptor &) const
Definition: Execute.cpp:1352
llvm::Value * codegenWindowFunctionAggregate(const CompilationOptions &co)
void addToCardinalityCache(const std::string &cache_key, const size_t cache_value)
Definition: Execute.cpp:4744
#define CHECK(condition)
Definition: Logger.h:291
QueryPlanDagCache & getQueryPlanDagCache()
Definition: Execute.cpp:4429
bool has_extension_module(ExtModuleKinds kind) const
Definition: Execute.h:1328
static const int32_t ERR_OUT_OF_SLOTS
Definition: Execute.h:1430
void resetBlockSize()
Definition: Execute.cpp:3843
heavyai::shared_lock< heavyai::shared_mutex > shared_lock
Definition: Execute.h:1402
static void clearExternalCaches(bool for_update, const TableDescriptor *td, const int current_db_id)
Definition: Execute.h:388
CgenStateManager(Executor &executor)
Definition: Execute.cpp:406
std::mutex compilation_mutex_
Definition: Execute.h:1448
void interrupt(const QuerySessionId &query_session="", const QuerySessionId &interrupt_session="")
heavyai::shared_mutex & getDataRecyclerLock()
Definition: Execute.cpp:4425
bool has_rt_udf_module(bool is_gpu=false) const
Definition: Execute.h:507
std::vector< llvm::Value * > inlineHoistedLiterals()
static size_t getBaselineThreshold(bool for_count_distinct, ExecutorDeviceType device_type)
Definition: Execute.h:1270
ExecutorDeviceType getDeviceTypeForTargets(const RelAlgExecutionUnit &ra_exe_unit, const ExecutorDeviceType requested_device_type)
Definition: Execute.cpp:2184
void invalidateRunningQuerySession(heavyai::unique_lock< heavyai::shared_mutex > &write_lock)
Definition: Execute.cpp:4468
uint32_t log2_bytes(const uint32_t bytes)
Definition: Execute.h:177
ExpressionRange getColRange(const PhysicalInput &) const
Definition: Execute.cpp:686
std::string numeric_type_name(const SQLTypeInfo &ti)
Definition: Execute.h:209
CurrentQueryStatus attachExecutorToQuerySession(const QuerySessionId &query_session_id, const std::string &query_str, const std::string &query_submitted_time)
Definition: Execute.cpp:4473
bool skipFragmentPair(const Fragmenter_Namespace::FragmentInfo &outer_fragment_info, const Fragmenter_Namespace::FragmentInfo &inner_fragment_info, const int inner_table_id, const std::unordered_map< shared::TableKey, const Analyzer::BinOper * > &inner_table_id_to_join_condition, const RelAlgExecutionUnit &ra_exe_unit, const ExecutorDeviceType device_type)
Definition: Execute.cpp:2736
void redeclareFilterFunction()
Definition: IRCodegen.cpp:1020
SQLTypeInfo columnType
QuerySessionStatus(const QuerySessionId &query_session, const size_t executor_id, const std::string &query_str, const std::string &submitted_time, const QuerySessionStatus::QueryStatus &query_status)
Definition: Execute.h:119
UpdateLogForFragment(FragmentInfoType const &fragment_info, size_t const, const std::shared_ptr< ResultSet > &rs)
bool is_unnest(const Analyzer::Expr *expr)
Definition: Execute.h:1503
bool is_string() const
Definition: sqltypes.h:580
const TableGeneration & getTableGeneration(const shared::TableKey &table_key) const
Definition: Execute.cpp:681
std::vector< JoinLoop > buildJoinLoops(RelAlgExecutionUnit &ra_exe_unit, const CompilationOptions &co, const ExecutionOptions &eo, const std::vector< InputTableInfo > &query_infos, ColumnCacheMap &column_cache)
Definition: IRCodegen.cpp:544
std::function< llvm::Value *(const std::vector< llvm::Value * > &, llvm::Value *)> buildIsDeletedCb(const RelAlgExecutionUnit &ra_exe_unit, const size_t level_idx, const CompilationOptions &co)
Definition: IRCodegen.cpp:901
auto getResultSet() const
Definition: Execute.h:351
unsigned blockSize() const
Definition: Execute.cpp:3821
string name
Definition: setup.in.py:72
std::shared_timed_mutex shared_mutex
static std::mutex register_runtime_extension_functions_mutex_
Definition: Execute.h:1453
size_t getOrderKeySize(WindowFunctionContext *window_func_context) const
JoinLoop::HoistedFiltersCallback buildHoistLeftHandSideFiltersCb(const RelAlgExecutionUnit &ra_exe_unit, const size_t level_idx, const shared::TableKey &inner_table_key, const CompilationOptions &co)
Definition: IRCodegen.cpp:792
Execution unit for relational algebra. It&#39;s a low-level description of any relational algebra operati...
HOST DEVICE bool get_notnull() const
Definition: sqltypes.h:388
ExecutorId getExecutorId() const
Definition: Execute.h:1179
static size_t align(const size_t off_in, const size_t alignment)
Definition: Execute.h:1283
static heavyai::shared_mutex executors_cache_mutex_
Definition: Execute.h:1416
std::unique_ptr< QueryMemoryDescriptor > QueryMemoryDescriptorOwned
Definition: Execute.h:84
size_t const getRowCount() const override
const QuerySessionId getQuerySession()
Definition: Execute.h:130
void clearQuerySessionStatus(const QuerySessionId &query_session, const std::string &submitted_time_str)
Definition: Execute.cpp:4516
static const int32_t ERR_OUT_OF_CPU_MEM
Definition: Execute.h:1433
QuerySessionStatus::QueryStatus query_status_
Definition: Execute.h:151
bool is_decimal() const
Definition: sqltypes.h:583
void setQuerySessionAsInterrupted(const QuerySessionId &query_session, heavyai::unique_lock< heavyai::shared_mutex > &write_lock)
Definition: Execute.cpp:4698
int deviceCountForMemoryLevel(const Data_Namespace::MemoryLevel memory_level) const
Definition: Execute.cpp:1047
ResultSetPtr executeWorkUnitImpl(size_t &max_groups_buffer_entry_guess, const bool is_agg, const bool allow_single_frag_table_opt, const std::vector< InputTableInfo > &, const RelAlgExecutionUnit &, const CompilationOptions &, const ExecutionOptions &options, std::shared_ptr< RowSetMemoryOwner >, RenderInfo *render_info, const bool has_cardinality_estimation, ColumnCacheMap &column_cache)
Definition: Execute.cpp:1806
std::vector< QuerySessionStatus > getQuerySessionInfo(const QuerySessionId &query_session, heavyai::shared_lock< heavyai::shared_mutex > &read_lock)
Definition: Execute.cpp:4763
Descriptor for the fragments required for an execution kernel.
WindowPartitionBufferPtrs codegenLoadPartitionBuffers(WindowFunctionContext *window_func_context, llvm::Value *partition_index_lv) const
llvm::Value * codegenWindowFunctionAggregateCalls(llvm::Value *aggregate_state, const CompilationOptions &co)
Fragmenter_Namespace::FragmentInfo FragmentInfoType
Definition: Execute.h:324
static size_t getArenaBlockSize()
Definition: Execute.cpp:523
const StringDictionaryProxy::TranslationMap< Datum > * getStringProxyNumericTranslationMap(const shared::StringDictKey &source_dict_key, const std::vector< StringOps_Namespace::StringOpInfo > &string_op_infos, std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner, const bool with_generation) const
Definition: Execute.cpp:601
std::mutex gpu_exec_mutex_[max_gpu_count]
Definition: Execute.h:1351
HashType
Definition: HashTable.h:19
llvm::LLVMContext & getContext()
Definition: Execute.h:1261
llvm::Value * codegenWindowFunction(const size_t target_index, const CompilationOptions &co)
std::vector< int > getTableChunkKey(const int getCurrentDBId) const
bool addToQuerySessionList(const QuerySessionId &query_session, const std::string &query_str, const std::string &submitted, const size_t executor_id, const QuerySessionStatus::QueryStatus query_status, heavyai::unique_lock< heavyai::shared_mutex > &write_lock)
Definition: Execute.cpp:4575
SQLOps get_optype() const
Definition: Analyzer.h:383
static QueryPlanDagCache query_plan_dag_cache_
Definition: Execute.h:1418
WindowFunctionContext * active_window_function_
Definition: Execute.h:1383
static std::mutex gpu_active_modules_mutex_
Definition: Execute.h:1353
static void nukeCacheOfExecutors()
Definition: Execute.h:453
void clearMetaInfoCache()
Definition: Execute.cpp:771
FragmentSkipStatus
Definition: Execute.h:163
size_t const getEntryCount() const override
llvm::BasicBlock * codegenWindowResetStateControlFlow()
const TemporaryTables * temporary_tables_
Definition: Execute.h:1373
CompilationRetryNewScanLimit(const size_t new_scan_limit)
Definition: Execute.h:255
std::vector< llvm::Value * > prepareRowModeFuncArgs(bool for_start_bound, SqlWindowFrameBoundType bound_type, const WindowFrameBoundFuncArgs &args) const
WatchdogException(const std::string &cause)
Definition: Execute.h:160
static const ExecutorId UNITARY_EXECUTOR_ID
Definition: Execute.h:373
bool has_geos_module() const
Definition: Execute.h:511
void update_extension_modules(bool update_runtime_modules_only=false)
Definition: Execute.cpp:318
ResultSetRecyclerHolder & getRecultSetRecyclerHolder()
Definition: Execute.cpp:4433
FetchResult fetchChunks(const ColumnFetcher &, const RelAlgExecutionUnit &ra_exe_unit, const int device_id, const Data_Namespace::MemoryLevel, const std::map< shared::TableKey, const TableFragments * > &, const FragmentsList &selected_fragments, std::list< ChunkIter > &, std::list< std::shared_ptr< Chunk_NS::Chunk >> &, DeviceAllocator *device_allocator, const size_t thread_idx, const bool allow_runtime_interrupt)
Definition: Execute.cpp:2916
bool isArchMaxwell(const ExecutorDeviceType dt) const
size_t get_loop_join_size(const std::vector< InputTableInfo > &query_infos, const RelAlgExecutionUnit &ra_exe_unit)
Definition: Execute.cpp:1561
ResultSetPtr executeExplain(const QueryCompilationDescriptor &)
Definition: Execute.cpp:2128
std::map< ExtModuleKinds, std::unique_ptr< llvm::Module > > extension_modules_
Definition: Execute.h:1332
bool isFragmentFullyDeleted(const InputDescriptor &table_desc, const Fragmenter_Namespace::FragmentInfo &fragment)
Definition: Execute.cpp:4016