OmniSciDB  0b528656ed
Execute.h
Go to the documentation of this file.
1 /*
2  * Copyright 2020 OmniSci, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef QUERYENGINE_EXECUTE_H
18 #define QUERYENGINE_EXECUTE_H
19 
20 #include "AggregatedColRange.h"
21 #include "BufferCompaction.h"
22 #include "CartesianProduct.h"
23 #include "CgenState.h"
24 #include "CodeCache.h"
25 #include "DateTimeUtils.h"
27 #include "GpuSharedMemoryContext.h"
28 #include "GroupByAndAggregate.h"
29 #include "JoinHashTable.h"
31 #include "NvidiaKernel.h"
32 #include "PlanState.h"
33 #include "RelAlgExecutionUnit.h"
34 #include "RelAlgTranslator.h"
36 #include "TableGenerations.h"
37 #include "TargetMetaInfo.h"
38 #include "WindowContext.h"
39 
40 #include "../Shared/Logger.h"
41 #include "../Shared/SystemParameters.h"
42 #include "../Shared/mapd_shared_mutex.h"
43 #include "../Shared/measure.h"
44 #include "../Shared/thread_count.h"
45 #include "../StringDictionary/LruCache.hpp"
46 #include "../StringDictionary/StringDictionary.h"
47 #include "../StringDictionary/StringDictionaryProxy.h"
48 #include "DataMgr/Chunk/Chunk.h"
50 
51 #include <llvm/IR/Function.h>
52 #include <llvm/IR/Value.h>
53 #include <llvm/Linker/Linker.h>
54 #include <llvm/Transforms/Utils/ValueMapper.h>
55 #include <rapidjson/document.h>
56 
57 #include <algorithm>
58 #include <atomic>
59 #include <condition_variable>
60 #include <cstddef>
61 #include <cstdlib>
62 #include <deque>
63 #include <functional>
64 #include <limits>
65 #include <map>
66 #include <mutex>
67 #include <stack>
68 #include <unordered_map>
69 #include <unordered_set>
70 
72 using QueryCompilationDescriptorOwned = std::unique_ptr<QueryCompilationDescriptor>;
74 using QueryMemoryDescriptorOwned = std::unique_ptr<QueryMemoryDescriptor>;
75 using InterruptFlagMap = std::map<std::string, bool>;
76 
77 extern void read_udf_gpu_module(const std::string& udf_ir_filename);
78 extern void read_udf_cpu_module(const std::string& udf_ir_filename);
79 extern bool is_udf_module_present(bool cpu_only = false);
80 extern void read_rt_udf_gpu_module(const std::string& udf_ir);
81 extern void read_rt_udf_cpu_module(const std::string& udf_ir);
82 extern bool is_rt_udf_module_present(bool cpu_only = false);
83 
84 class ColumnFetcher;
85 class ExecutionResult;
86 
87 class WatchdogException : public std::runtime_error {
88  public:
89  WatchdogException(const std::string& cause) : std::runtime_error(cause) {}
90 };
91 
92 class Executor;
93 
94 inline llvm::Value* get_arg_by_name(llvm::Function* func, const std::string& name) {
95  for (auto& arg : func->args()) {
96  if (arg.getName() == name) {
97  return &arg;
98  }
99  }
100  CHECK(false);
101  return nullptr;
102 }
103 
104 inline uint32_t log2_bytes(const uint32_t bytes) {
105  switch (bytes) {
106  case 1:
107  return 0;
108  case 2:
109  return 1;
110  case 4:
111  return 2;
112  case 8:
113  return 3;
114  default:
115  abort();
116  }
117 }
118 
120  const int col_id,
121  const int table_id,
123  CHECK_GT(table_id, 0);
124  const auto col_desc = cat.getMetadataForColumn(table_id, col_id);
125  CHECK(col_desc);
126  return col_desc;
127 }
128 
129 inline const Analyzer::Expr* extract_cast_arg(const Analyzer::Expr* expr) {
130  const auto cast_expr = dynamic_cast<const Analyzer::UOper*>(expr);
131  if (!cast_expr || cast_expr->get_optype() != kCAST) {
132  return expr;
133  }
134  return cast_expr->get_operand();
135 }
136 
137 inline std::string numeric_type_name(const SQLTypeInfo& ti) {
138  CHECK(ti.is_integer() || ti.is_decimal() || ti.is_boolean() || ti.is_time() ||
139  ti.is_fp() || (ti.is_string() && ti.get_compression() == kENCODING_DICT) ||
140  ti.is_timeinterval());
141  if (ti.is_integer() || ti.is_decimal() || ti.is_boolean() || ti.is_time() ||
142  ti.is_string() || ti.is_timeinterval()) {
143  return "int" + std::to_string(ti.get_logical_size() * 8) + "_t";
144  }
145  return ti.get_type() == kDOUBLE ? "double" : "float";
146 }
147 
149  const int col_id,
150  const int table_id,
152  CHECK(table_id);
153  return table_id > 0 ? get_column_descriptor(col_id, table_id, cat) : nullptr;
154 }
155 
156 inline const ResultSetPtr& get_temporary_table(const TemporaryTables* temporary_tables,
157  const int table_id) {
158  CHECK_LT(table_id, 0);
159  const auto it = temporary_tables->find(table_id);
160  CHECK(it != temporary_tables->end());
161  return it->second;
162 }
163 
164 inline const SQLTypeInfo get_column_type(const int col_id,
165  const int table_id,
166  const ColumnDescriptor* cd,
167  const TemporaryTables* temporary_tables) {
168  CHECK(cd || temporary_tables);
169  if (cd) {
170  CHECK_EQ(col_id, cd->columnId);
171  CHECK_EQ(table_id, cd->tableId);
172  return cd->columnType;
173  }
174  const auto& temp = get_temporary_table(temporary_tables, table_id);
175  return temp->getColType(col_id);
176 }
177 
178 template <typename PtrTy>
180  std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner,
181  const PtrTy& result,
182  const int number) {
183  std::vector<SQLTypeInfo> col_types;
184  for (size_t i = 0; i < result->colCount(); ++i) {
185  col_types.push_back(get_logical_type_info(result->getColType(i)));
186  }
187  return new ColumnarResults(row_set_mem_owner, *result, number, col_types);
188 }
189 
190 // TODO(alex): Adjust interfaces downstream and make this not needed.
191 inline std::vector<Analyzer::Expr*> get_exprs_not_owned(
192  const std::vector<std::shared_ptr<Analyzer::Expr>>& exprs) {
193  std::vector<Analyzer::Expr*> exprs_not_owned;
194  for (const auto& expr : exprs) {
195  exprs_not_owned.push_back(expr.get());
196  }
197  return exprs_not_owned;
198 }
199 
201  std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner,
202  const ResultSetPtr& result,
203  const int frag_id) {
205  CHECK_EQ(0, frag_id);
206  return rows_to_columnar_results(row_set_mem_owner, result, result->colCount());
207 }
208 
209 class CompilationRetryNoLazyFetch : public std::runtime_error {
210  public:
212  : std::runtime_error("Retry query compilation with no GPU lazy fetch.") {}
213 };
214 
215 class CompilationRetryNewScanLimit : public std::runtime_error {
216  public:
217  CompilationRetryNewScanLimit(const size_t new_scan_limit)
218  : std::runtime_error("Retry query compilation with new scan limit.")
219  , new_scan_limit_(new_scan_limit) {}
220 
222 };
223 
224 class TooManyLiterals : public std::runtime_error {
225  public:
226  TooManyLiterals() : std::runtime_error("Too many literals in the query") {}
227 };
228 
229 class CompilationRetryNoCompaction : public std::runtime_error {
230  public:
232  : std::runtime_error("Retry query compilation with no compaction.") {}
233 };
234 
235 class QueryMustRunOnCpu : public std::runtime_error {
236  public:
237  QueryMustRunOnCpu() : std::runtime_error("Query must run in cpu mode.") {}
238 };
239 
240 class SringConstInResultSet : public std::runtime_error {
241  public:
243  : std::runtime_error(
244  "NONE ENCODED String types are not supported as input result set.") {}
245 };
246 
247 class ExtensionFunction;
248 
249 namespace std {
250 template <>
251 struct hash<std::vector<int>> {
252  size_t operator()(const std::vector<int>& vec) const {
253  return vec.size() ^ boost::hash_range(vec.begin(), vec.end());
254  }
255 };
256 
257 template <>
258 struct hash<std::pair<int, int>> {
259  size_t operator()(const std::pair<int, int>& p) const {
260  return boost::hash<std::pair<int, int>>()(p);
261  }
262 };
263 
264 } // namespace std
265 
267 
269  public:
271 
272  UpdateLogForFragment(FragmentInfoType const& fragment_info,
273  size_t const,
274  const std::shared_ptr<ResultSet>& rs);
275 
276  std::vector<TargetValue> getEntryAt(const size_t index) const override;
277  std::vector<TargetValue> getTranslatedEntryAt(const size_t index) const override;
278 
279  size_t const getRowCount() const override;
281  return rs_->getRowSetMemOwner()->getLiteralStringDictProxy();
282  }
283  size_t const getEntryCount() const override;
284  size_t const getFragmentIndex() const;
285  FragmentInfoType const& getFragmentInfo() const;
286  decltype(FragmentInfoType::physicalTableId) const getPhysicalTableId() const {
287  return fragment_info_.physicalTableId;
288  }
289  decltype(FragmentInfoType::fragmentId) const getFragmentId() const {
290  return fragment_info_.fragmentId;
291  }
292 
293  SQLTypeInfo getColumnType(const size_t col_idx) const;
294 
295  using Callback = std::function<void(const UpdateLogForFragment&)>;
296 
297  auto getResultSet() const { return rs_; }
298 
299  private:
302  std::shared_ptr<ResultSet> rs_;
303 };
304 
305 using LLVMValueVector = std::vector<llvm::Value*>;
306 
308 
309 struct FetchResult {
310  std::vector<std::vector<const int8_t*>> col_buffers;
311  std::vector<std::vector<int64_t>> num_rows;
312  std::vector<std::vector<uint64_t>> frag_offsets;
313 };
314 
315 std::ostream& operator<<(std::ostream&, FetchResult const&);
316 
317 class Executor {
318  static_assert(sizeof(float) == 4 && sizeof(double) == 8,
319  "Host hardware not supported, unexpected size of float / double.");
320  static_assert(sizeof(time_t) == 8,
321  "Host hardware not supported, 64-bit time support is required.");
322 
323  public:
324  using ExecutorId = size_t;
325  static const ExecutorId UNITARY_EXECUTOR_ID = 0;
326 
327  Executor(const ExecutorId id,
328  const size_t block_size_x,
329  const size_t grid_size_x,
330  const size_t max_gpu_slab_size,
331  const std::string& debug_dir,
332  const std::string& debug_file);
333 
334  static std::shared_ptr<Executor> getExecutor(
335  const ExecutorId id,
336  const std::string& debug_dir = "",
337  const std::string& debug_file = "",
338  const SystemParameters system_parameters = SystemParameters());
339 
340  static void nukeCacheOfExecutors() {
341  mapd_unique_lock<mapd_shared_mutex> flush_lock(
342  execute_mutex_); // don't want native code to vanish while executing
343  mapd_unique_lock<mapd_shared_mutex> lock(executors_cache_mutex_);
344  (decltype(executors_){}).swap(executors_);
345  }
346 
347  static void clearMemory(const Data_Namespace::MemoryLevel memory_level);
348 
349  static size_t getArenaBlockSize();
350 
351  StringDictionaryProxy* getStringDictionaryProxy(
352  const int dictId,
353  const std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner,
354  const bool with_generation) const;
355 
356  bool isCPUOnly() const;
357 
358  bool isArchMaxwell(const ExecutorDeviceType dt) const;
359 
361  return cgen_state_->contains_left_deep_outer_join_;
362  }
363 
364  const ColumnDescriptor* getColumnDescriptor(const Analyzer::ColumnVar*) const;
365 
366  const ColumnDescriptor* getPhysicalColumnDescriptor(const Analyzer::ColumnVar*,
367  int) const;
368 
369  const Catalog_Namespace::Catalog* getCatalog() const;
370  void setCatalog(const Catalog_Namespace::Catalog* catalog);
371 
372  const std::shared_ptr<RowSetMemoryOwner> getRowSetMemoryOwner() const;
373 
374  const TemporaryTables* getTemporaryTables() const;
375 
376  Fragmenter_Namespace::TableInfo getTableInfo(const int table_id) const;
377 
378  const TableGeneration& getTableGeneration(const int table_id) const;
379 
380  ExpressionRange getColRange(const PhysicalInput&) const;
381 
382  size_t getNumBytesForFetchedRow(const std::set<int>& table_ids_to_fetch) const;
383 
384  std::vector<ColumnLazyFetchInfo> getColLazyFetchInfo(
385  const std::vector<Analyzer::Expr*>& target_exprs) const;
386 
387  void registerActiveModule(void* module, const int device_id) const;
388  void unregisterActiveModule(void* module, const int device_id) const;
389  void interrupt(const std::string& query_session = "",
390  const std::string& interrupt_session = "");
391  void resetInterrupt();
392 
393  // only for testing usage
394  void enableRuntimeQueryInterrupt(const unsigned interrupt_freq) const;
395 
396  static const size_t high_scan_limit{32000000};
397 
398  int8_t warpSize() const;
399  unsigned gridSize() const;
400  unsigned numBlocksPerMP() const;
401  unsigned blockSize() const;
402  size_t maxGpuSlabSize() const;
403 
404  private:
405  void clearMetaInfoCache();
406 
407  int deviceCount(const ExecutorDeviceType) const;
408  int deviceCountForMemoryLevel(const Data_Namespace::MemoryLevel memory_level) const;
409 
410  // Generate code for a window function target.
411  llvm::Value* codegenWindowFunction(const size_t target_index,
412  const CompilationOptions& co);
413 
414  // Generate code for an aggregate window function target.
415  llvm::Value* codegenWindowFunctionAggregate(const CompilationOptions& co);
416 
417  // The aggregate state requires a state reset when starting a new partition. Generate
418  // the new partition check and return the continuation basic block.
419  llvm::BasicBlock* codegenWindowResetStateControlFlow();
420 
421  // Generate code for initializing the state of a window aggregate.
422  void codegenWindowFunctionStateInit(llvm::Value* aggregate_state);
423 
424  // Generates the required calls for an aggregate window function and returns the final
425  // result.
426  llvm::Value* codegenWindowFunctionAggregateCalls(llvm::Value* aggregate_state,
427  const CompilationOptions& co);
428 
429  // The AVG window function requires some post-processing: the sum is divided by count
430  // and the result is stored back for the current row.
431  void codegenWindowAvgEpilogue(llvm::Value* crt_val,
432  llvm::Value* window_func_null_val,
433  llvm::Value* multiplicity_lv);
434 
435  // Generates code which loads the current aggregate value for the window context.
436  llvm::Value* codegenAggregateWindowState();
437 
438  llvm::Value* aggregateWindowStatePtr();
439 
441  std::shared_ptr<CompilationContext> generated_code;
442  std::unordered_map<int, CgenState::LiteralValues> literal_values;
444  std::string llvm_ir;
446  };
447 
449  if (dt == ExecutorDeviceType::GPU) {
450  const auto cuda_mgr = catalog_->getDataMgr().getCudaMgr();
451  LOG_IF(FATAL, cuda_mgr == nullptr)
452  << "No CudaMgr instantiated, unable to check device architecture";
453  return cuda_mgr->isArchPascalOrLater();
454  }
455  return false;
456  }
457 
458  bool needFetchAllFragments(const InputColDescriptor& col_desc,
459  const RelAlgExecutionUnit& ra_exe_unit,
460  const FragmentsList& selected_fragments) const;
461 
463  private:
464  Executor* executor_;
466  const std::vector<InputTableInfo>& query_infos_;
468  mutable std::vector<uint64_t> all_frag_row_offsets_;
469  mutable std::mutex all_frag_row_offsets_mutex_;
470  const std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner_;
472  std::vector<std::pair<ResultSetPtr, std::vector<size_t>>> all_fragment_results_;
473  std::atomic_flag dynamic_watchdog_set_ = ATOMIC_FLAG_INIT;
474  static std::mutex reduce_mutex_;
475 
476  void runImpl(const ExecutorDeviceType chosen_device_type,
477  int chosen_device_id,
478  const ExecutionOptions& eo,
479  const ColumnFetcher& column_fetcher,
480  const QueryCompilationDescriptor& query_comp_desc,
481  const QueryMemoryDescriptor& query_mem_desc,
482  const FragmentsList& frag_list,
483  const ExecutorDispatchMode kernel_dispatch_mode,
484  const int64_t rowid_lookup_key);
485 
486  public:
487  ExecutionDispatch(Executor* executor,
488  const RelAlgExecutionUnit& ra_exe_unit,
489  const std::vector<InputTableInfo>& query_infos,
491  const std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner,
492  RenderInfo* render_info);
493 
494  ExecutionDispatch(const ExecutionDispatch&) = delete;
495 
496  ExecutionDispatch& operator=(const ExecutionDispatch&) = delete;
497 
499 
500  ExecutionDispatch& operator=(ExecutionDispatch&&) = delete;
501 
502  std::tuple<QueryCompilationDescriptorOwned, QueryMemoryDescriptorOwned> compile(
503  const size_t max_groups_buffer_entry_guess,
504  const int8_t crt_min_byte_width,
505  const CompilationOptions& co,
506  const ExecutionOptions& eo,
507  const ColumnFetcher& column_fetcher,
508  const bool has_cardinality_estimation);
509 
510  void run(const ExecutorDeviceType chosen_device_type,
511  int chosen_device_id,
512  const ExecutionOptions& eo,
513  const ColumnFetcher& column_fetcher,
514  const QueryCompilationDescriptor& query_comp_desc,
515  const QueryMemoryDescriptor& query_mem_desc,
516  const FragmentsList& frag_ids,
517  const ExecutorDispatchMode kernel_dispatch_mode,
518  const int64_t rowid_lookup_key);
519 
520  const RelAlgExecutionUnit& getExecutionUnit() const;
521 
522  const std::vector<uint64_t>& getFragOffsets() const;
523 
524  std::vector<std::pair<ResultSetPtr, std::vector<size_t>>>& getFragmentResults();
525 
527  };
528 
529  ResultSetPtr executeWorkUnit(size_t& max_groups_buffer_entry_guess,
530  const bool is_agg,
531  const std::vector<InputTableInfo>&,
532  const RelAlgExecutionUnit&,
533  const CompilationOptions&,
534  const ExecutionOptions& options,
536  std::shared_ptr<RowSetMemoryOwner>,
537  RenderInfo* render_info,
538  const bool has_cardinality_estimation,
539  ColumnCacheMap& column_cache);
540 
541  void executeUpdate(const RelAlgExecutionUnit& ra_exe_unit,
542  const std::vector<InputTableInfo>& table_infos,
543  const CompilationOptions& co,
544  const ExecutionOptions& eo,
546  std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner,
548  const bool is_agg);
549 
550  using PerFragmentCallBack =
551  std::function<void(ResultSetPtr, const Fragmenter_Namespace::FragmentInfo&)>;
552 
558  void executeWorkUnitPerFragment(const RelAlgExecutionUnit& ra_exe_unit,
559  const InputTableInfo& table_info,
560  const CompilationOptions& co,
561  const ExecutionOptions& eo,
563  PerFragmentCallBack& cb);
564 
565  ResultSetPtr executeExplain(const QueryCompilationDescriptor&);
566 
572  ResultSetPtr executeTableFunction(const TableFunctionExecutionUnit exe_unit,
573  const std::vector<InputTableInfo>& table_infos,
574  const CompilationOptions& co,
575  const ExecutionOptions& eo,
577 
578  // TODO(alex): remove
579  ExecutorDeviceType getDeviceTypeForTargets(
580  const RelAlgExecutionUnit& ra_exe_unit,
581  const ExecutorDeviceType requested_device_type);
582 
583  ResultSetPtr collectAllDeviceResults(
584  ExecutionDispatch& execution_dispatch,
585  const std::vector<Analyzer::Expr*>& target_exprs,
586  const QueryMemoryDescriptor& query_mem_desc,
587  const ExecutorDeviceType device_type,
588  std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner);
589 
590  ResultSetPtr collectAllDeviceShardedTopResults(
591  ExecutionDispatch& execution_dispatch) const;
592 
593  std::unordered_map<int, const Analyzer::BinOper*> getInnerTabIdToJoinCond() const;
594 
595  template <typename THREAD_POOL>
596  void dispatchFragments(
597  const std::function<void(const ExecutorDeviceType chosen_device_type,
598  int chosen_device_id,
599  const QueryCompilationDescriptor& query_comp_desc,
600  const QueryMemoryDescriptor& query_mem_desc,
601  const FragmentsList& frag_list,
602  const ExecutorDispatchMode kernel_dispatch_mode,
603  const int64_t rowid_lookup_key)> dispatch,
604  const ExecutionDispatch& execution_dispatch,
605  const std::vector<InputTableInfo>& table_infos,
606  const ExecutionOptions& eo,
607  const bool is_agg,
608  const bool allow_single_frag_table_opt,
609  const size_t context_count,
610  const QueryCompilationDescriptor& query_comp_desc,
611  const QueryMemoryDescriptor& query_mem_desc,
612  std::unordered_set<int>& available_gpus,
613  int& available_cpus);
614 
615  std::vector<size_t> getTableFragmentIndices(
616  const RelAlgExecutionUnit& ra_exe_unit,
617  const ExecutorDeviceType device_type,
618  const size_t table_idx,
619  const size_t outer_frag_idx,
620  std::map<int, const TableFragments*>& selected_tables_fragments,
621  const std::unordered_map<int, const Analyzer::BinOper*>&
622  inner_table_id_to_join_condition);
623 
624  bool skipFragmentPair(const Fragmenter_Namespace::FragmentInfo& outer_fragment_info,
625  const Fragmenter_Namespace::FragmentInfo& inner_fragment_info,
626  const int inner_table_id,
627  const std::unordered_map<int, const Analyzer::BinOper*>&
628  inner_table_id_to_join_condition,
629  const RelAlgExecutionUnit& ra_exe_unit,
630  const ExecutorDeviceType device_type);
631 
632  FetchResult fetchChunks(const ColumnFetcher&,
633  const RelAlgExecutionUnit& ra_exe_unit,
634  const int device_id,
636  const std::map<int, const TableFragments*>&,
637  const FragmentsList& selected_fragments,
639  std::list<ChunkIter>&,
640  std::list<std::shared_ptr<Chunk_NS::Chunk>>&,
641  DeviceAllocator* device_allocator);
642 
643  FetchResult fetchUnionChunks(const ColumnFetcher&,
644  const RelAlgExecutionUnit& ra_exe_unit,
645  const int device_id,
647  const std::map<int, const TableFragments*>&,
648  const FragmentsList& selected_fragments,
650  std::list<ChunkIter>&,
651  std::list<std::shared_ptr<Chunk_NS::Chunk>>&,
652  DeviceAllocator* device_allocator);
653 
654  std::pair<std::vector<std::vector<int64_t>>, std::vector<std::vector<uint64_t>>>
655  getRowCountAndOffsetForAllFrags(
656  const RelAlgExecutionUnit& ra_exe_unit,
657  const CartesianProduct<std::vector<std::vector<size_t>>>& frag_ids_crossjoin,
658  const std::vector<InputDescriptor>& input_descs,
659  const std::map<int, const TableFragments*>& all_tables_fragments);
660 
661  void buildSelectedFragsMapping(
662  std::vector<std::vector<size_t>>& selected_fragments_crossjoin,
663  std::vector<size_t>& local_col_to_frag_pos,
664  const std::list<std::shared_ptr<const InputColDescriptor>>& col_global_ids,
665  const FragmentsList& selected_fragments,
666  const RelAlgExecutionUnit& ra_exe_unit);
667 
668  void buildSelectedFragsMappingForUnion(
669  std::vector<std::vector<size_t>>& selected_fragments_crossjoin,
670  std::vector<size_t>& local_col_to_frag_pos,
671  const std::list<std::shared_ptr<const InputColDescriptor>>& col_global_ids,
672  const FragmentsList& selected_fragments,
673  const RelAlgExecutionUnit& ra_exe_unit);
674 
675  std::vector<size_t> getFragmentCount(const FragmentsList& selected_fragments,
676  const size_t scan_idx,
677  const RelAlgExecutionUnit& ra_exe_unit);
678 
679  int32_t executePlanWithGroupBy(const RelAlgExecutionUnit& ra_exe_unit,
680  const CompilationResult&,
681  const bool hoist_literals,
682  ResultSetPtr& results,
683  const ExecutorDeviceType device_type,
684  std::vector<std::vector<const int8_t*>>& col_buffers,
685  const std::vector<size_t> outer_tab_frag_ids,
687  const std::vector<std::vector<int64_t>>& num_rows,
688  const std::vector<std::vector<uint64_t>>& frag_offsets,
690  const int device_id,
691  const int outer_table_id,
692  const int64_t limit,
693  const uint32_t start_rowid,
694  const uint32_t num_tables,
695  RenderInfo* render_info);
696  int32_t executePlanWithoutGroupBy(
697  const RelAlgExecutionUnit& ra_exe_unit,
698  const CompilationResult&,
699  const bool hoist_literals,
700  ResultSetPtr& results,
701  const std::vector<Analyzer::Expr*>& target_exprs,
702  const ExecutorDeviceType device_type,
703  std::vector<std::vector<const int8_t*>>& col_buffers,
704  QueryExecutionContext* query_exe_context,
705  const std::vector<std::vector<int64_t>>& num_rows,
706  const std::vector<std::vector<uint64_t>>& frag_offsets,
707  Data_Namespace::DataMgr* data_mgr,
708  const int device_id,
709  const uint32_t start_rowid,
710  const uint32_t num_tables,
711  RenderInfo* render_info);
712 
713  public: // Temporary, ask saman about this
714  static std::pair<int64_t, int32_t> reduceResults(const SQLAgg agg,
715  const SQLTypeInfo& ti,
716  const int64_t agg_init_val,
717  const int8_t out_byte_width,
718  const int64_t* out_vec,
719  const size_t out_vec_sz,
720  const bool is_group_by,
721  const bool float_argument_input);
722 
723  static void addCodeToCache(const CodeCacheKey&,
724  std::shared_ptr<CompilationContext>,
725  llvm::Module*,
726  CodeCache&);
727 
728  private:
729  ResultSetPtr resultsUnion(ExecutionDispatch& execution_dispatch);
730  std::vector<int64_t> getJoinHashTablePtrs(const ExecutorDeviceType device_type,
731  const int device_id);
732  ResultSetPtr reduceMultiDeviceResults(
733  const RelAlgExecutionUnit&,
734  std::vector<std::pair<ResultSetPtr, std::vector<size_t>>>& all_fragment_results,
735  std::shared_ptr<RowSetMemoryOwner>,
736  const QueryMemoryDescriptor&) const;
737  ResultSetPtr reduceMultiDeviceResultSets(
738  std::vector<std::pair<ResultSetPtr, std::vector<size_t>>>& all_fragment_results,
739  std::shared_ptr<RowSetMemoryOwner>,
740  const QueryMemoryDescriptor&) const;
741  ResultSetPtr reduceSpeculativeTopN(
742  const RelAlgExecutionUnit&,
743  std::vector<std::pair<ResultSetPtr, std::vector<size_t>>>& all_fragment_results,
744  std::shared_ptr<RowSetMemoryOwner>,
745  const QueryMemoryDescriptor&) const;
746 
747  ResultSetPtr executeWorkUnitImpl(size_t& max_groups_buffer_entry_guess,
748  const bool is_agg,
749  const bool allow_single_frag_table_opt,
750  const std::vector<InputTableInfo>&,
751  const RelAlgExecutionUnit&,
752  const CompilationOptions&,
753  const ExecutionOptions& options,
755  std::shared_ptr<RowSetMemoryOwner>,
756  RenderInfo* render_info,
757  const bool has_cardinality_estimation,
758  ColumnCacheMap& column_cache);
759 
760  std::vector<llvm::Value*> inlineHoistedLiterals();
761 
762  std::tuple<Executor::CompilationResult, std::unique_ptr<QueryMemoryDescriptor>>
763  compileWorkUnit(const std::vector<InputTableInfo>& query_infos,
764  const RelAlgExecutionUnit& ra_exe_unit,
765  const CompilationOptions& co,
766  const ExecutionOptions& eo,
767  const CudaMgr_Namespace::CudaMgr* cuda_mgr,
768  const bool allow_lazy_fetch,
769  std::shared_ptr<RowSetMemoryOwner>,
770  const size_t max_groups_buffer_entry_count,
771  const int8_t crt_min_byte_width,
772  const bool has_cardinality_estimation,
773  ColumnCacheMap& column_cache,
774  RenderInfo* render_info = nullptr);
775  // Generate code to skip the deleted rows in the outermost table.
776  llvm::BasicBlock* codegenSkipDeletedOuterTableRow(
777  const RelAlgExecutionUnit& ra_exe_unit,
778  const CompilationOptions& co);
779  std::vector<JoinLoop> buildJoinLoops(RelAlgExecutionUnit& ra_exe_unit,
780  const CompilationOptions& co,
781  const ExecutionOptions& eo,
782  const std::vector<InputTableInfo>& query_infos,
783  ColumnCacheMap& column_cache);
784  // Create a callback which generates code which returns true iff the row on the given
785  // level is deleted.
786  std::function<llvm::Value*(const std::vector<llvm::Value*>&, llvm::Value*)>
787  buildIsDeletedCb(const RelAlgExecutionUnit& ra_exe_unit,
788  const size_t level_idx,
789  const CompilationOptions& co);
790  // Builds a join hash table for the provided conditions on the current level.
791  // Returns null iff on failure and provides the reasons in `fail_reasons`.
792  std::shared_ptr<JoinHashTableInterface> buildCurrentLevelHashTable(
793  const JoinCondition& current_level_join_conditions,
794  RelAlgExecutionUnit& ra_exe_unit,
795  const CompilationOptions& co,
796  const std::vector<InputTableInfo>& query_infos,
797  ColumnCacheMap& column_cache,
798  std::vector<std::string>& fail_reasons);
799  llvm::Value* addJoinLoopIterator(const std::vector<llvm::Value*>& prev_iters,
800  const size_t level_idx);
801  void codegenJoinLoops(const std::vector<JoinLoop>& join_loops,
802  const RelAlgExecutionUnit& ra_exe_unit,
803  GroupByAndAggregate& group_by_and_aggregate,
804  llvm::Function* query_func,
805  llvm::BasicBlock* entry_bb,
806  const QueryMemoryDescriptor& query_mem_desc,
807  const CompilationOptions& co,
808  const ExecutionOptions& eo);
809  bool compileBody(const RelAlgExecutionUnit& ra_exe_unit,
810  GroupByAndAggregate& group_by_and_aggregate,
811  const QueryMemoryDescriptor& query_mem_desc,
812  const CompilationOptions& co,
813  const GpuSharedMemoryContext& gpu_smem_context = {});
814 
815  void createErrorCheckControlFlow(llvm::Function* query_func,
816  bool run_with_dynamic_watchdog,
817  bool run_with_allowing_runtime_interrupt,
818  ExecutorDeviceType device_type);
819 
820  void preloadFragOffsets(const std::vector<InputDescriptor>& input_descs,
821  const std::vector<InputTableInfo>& query_infos);
822 
824  std::shared_ptr<JoinHashTableInterface> hash_table;
825  std::string fail_reason;
826  };
827 
828  JoinHashTableOrError buildHashTableForQualifier(
829  const std::shared_ptr<Analyzer::BinOper>& qual_bin_oper,
830  const std::vector<InputTableInfo>& query_infos,
831  const MemoryLevel memory_level,
832  const JoinHashTableInterface::HashType preferred_hash_type,
833  ColumnCacheMap& column_cache);
834  void nukeOldState(const bool allow_lazy_fetch,
835  const std::vector<InputTableInfo>& query_infos,
836  const RelAlgExecutionUnit* ra_exe_unit);
837 
838  std::shared_ptr<CompilationContext> optimizeAndCodegenCPU(
839  llvm::Function*,
840  llvm::Function*,
841  const std::unordered_set<llvm::Function*>&,
842  const CompilationOptions&);
843  std::shared_ptr<CompilationContext> optimizeAndCodegenGPU(
844  llvm::Function*,
845  llvm::Function*,
846  std::unordered_set<llvm::Function*>&,
847  const bool no_inline,
848  const CudaMgr_Namespace::CudaMgr* cuda_mgr,
849  const CompilationOptions&);
850  std::string generatePTX(const std::string&) const;
851  void initializeNVPTXBackend() const;
852 
853  int64_t deviceCycles(int milliseconds) const;
854 
856  llvm::Value* translated_value;
857  llvm::Value* original_value;
858  };
859 
860  GroupColLLVMValue groupByColumnCodegen(Analyzer::Expr* group_by_col,
861  const size_t col_width,
862  const CompilationOptions&,
863  const bool translate_null_val,
864  const int64_t translated_null_val,
866  std::stack<llvm::BasicBlock*>&,
867  const bool thread_mem_shared);
868 
869  llvm::Value* castToFP(llvm::Value* val);
870  llvm::Value* castToIntPtrTyIn(llvm::Value* val, const size_t bit_width);
871 
872  RelAlgExecutionUnit addDeletedColumn(const RelAlgExecutionUnit& ra_exe_unit,
873  const CompilationOptions& co);
874 
875  std::pair<bool, int64_t> skipFragment(
876  const InputDescriptor& table_desc,
877  const Fragmenter_Namespace::FragmentInfo& frag_info,
878  const std::list<std::shared_ptr<Analyzer::Expr>>& simple_quals,
879  const std::vector<uint64_t>& frag_offsets,
880  const size_t frag_idx);
881 
882  std::pair<bool, int64_t> skipFragmentInnerJoins(
883  const InputDescriptor& table_desc,
884  const RelAlgExecutionUnit& ra_exe_unit,
885  const Fragmenter_Namespace::FragmentInfo& fragment,
886  const std::vector<uint64_t>& frag_offsets,
887  const size_t frag_idx);
888 
889  AggregatedColRange computeColRangesCache(
890  const std::unordered_set<PhysicalInput>& phys_inputs);
891  StringDictionaryGenerations computeStringDictionaryGenerations(
892  const std::unordered_set<PhysicalInput>& phys_inputs);
893  TableGenerations computeTableGenerations(std::unordered_set<int> phys_table_ids);
894 
895  public:
896  void setupCaching(const std::unordered_set<PhysicalInput>& phys_inputs,
897  const std::unordered_set<int>& phys_table_ids);
898 
899  template <typename SESSION_MAP_LOCK>
900  void setCurrentQuerySession(const std::string& query_session,
901  SESSION_MAP_LOCK& write_lock);
902  template <typename SESSION_MAP_LOCK>
903  std::string& getCurrentQuerySession(SESSION_MAP_LOCK& read_lock);
904  template <typename SESSION_MAP_LOCK>
905  bool checkCurrentQuerySession(const std::string& candidate_query_session,
906  SESSION_MAP_LOCK& read_lock);
907  template <typename SESSION_MAP_LOCK>
908  void invalidateQuerySession(SESSION_MAP_LOCK& write_lock);
909  template <typename SESSION_MAP_LOCK>
910  bool addToQuerySessionList(const std::string& query_session,
911  SESSION_MAP_LOCK& write_lock);
912  template <typename SESSION_MAP_LOCK>
913  bool removeFromQuerySessionList(const std::string& query_session,
914  SESSION_MAP_LOCK& write_lock);
915  template <typename SESSION_MAP_LOCK>
916  void setQuerySessionAsInterrupted(const std::string& query_session,
917  SESSION_MAP_LOCK& write_lock);
918  template <typename SESSION_MAP_LOCK>
919  bool checkIsQuerySessionInterrupted(const std::string& query_session,
920  SESSION_MAP_LOCK& read_lock);
921  mapd_shared_mutex& getSessionLock();
922 
923  // true when we have matched cardinality, and false otherwise
924  using CachedCardinality = std::pair<bool, size_t>;
925  void addToCardinalityCache(const std::string& cache_key, const size_t cache_value);
926  CachedCardinality getCachedCardinality(const std::string& cache_key);
927 
928  private:
929  std::shared_ptr<CompilationContext> getCodeFromCache(const CodeCacheKey&,
930  const CodeCache&);
931 
932  std::vector<int8_t> serializeLiterals(
933  const std::unordered_map<int, CgenState::LiteralValues>& literals,
934  const int device_id);
935 
936  static size_t align(const size_t off_in, const size_t alignment) {
937  size_t off = off_in;
938  if (off % alignment != 0) {
939  off += (alignment - off % alignment);
940  }
941  return off;
942  }
943 
944  std::unique_ptr<CgenState> cgen_state_;
945 
947  public:
949  : cgen_state_(cgen_state), saved_fetch_cache(cgen_state_->fetch_cache_) {}
950  ~FetchCacheAnchor() { cgen_state_->fetch_cache_.swap(saved_fetch_cache); }
951 
952  private:
954  std::unordered_map<int, std::vector<llvm::Value*>> saved_fetch_cache;
955  };
956 
957  llvm::Value* spillDoubleElement(llvm::Value* elem_val, llvm::Type* elem_ty);
958 
959  std::unique_ptr<PlanState> plan_state_;
960  std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner_;
961 
962  static const int max_gpu_count{16};
963  std::mutex gpu_exec_mutex_[max_gpu_count];
964 
965  mutable std::mutex gpu_active_modules_mutex_;
967  mutable void* gpu_active_modules_[max_gpu_count];
968  std::atomic<bool> interrupted_;
969 
970  mutable std::shared_ptr<StringDictionaryProxy> lit_str_dict_proxy_;
971  mutable std::mutex str_dict_mutex_;
972 
973  mutable std::unique_ptr<llvm::TargetMachine> nvptx_target_machine_;
974 
977 
978  static const size_t baseline_threshold{
979  1000000}; // if a perfect hash needs more entries, use baseline
980  static const size_t code_cache_size{1000};
981 
982  const unsigned block_size_x_;
983  const unsigned grid_size_x_;
984  const size_t max_gpu_slab_size_;
985  const std::string debug_dir_;
986  const std::string debug_file_;
987 
991 
992  int64_t kernel_queue_time_ms_ = 0;
993  int64_t compilation_queue_time_ms_ = 0;
994 
995  // Singleton instance used for an execution unit which is a project with window
996  // functions.
997  std::unique_ptr<WindowProjectNodeContext> window_project_node_context_owned_;
998  // The active window function.
999  WindowFunctionContext* active_window_function_{nullptr};
1000 
1006  static std::string current_query_session_;
1007  // a pair of <query_session, interrupted_flag>
1009 
1010  static std::map<int, std::shared_ptr<Executor>> executors_;
1011  static std::atomic_flag execute_spin_lock_;
1012 
1013  // SQL queries take a shared lock, exclusive options (cache clear, memory clear) take a
1014  // write lock
1017 
1018  // for now we use recycler_mutex only for cardinality_cache_
1019  // and will expand its coverage for more interesting caches for query excution
1021  static std::unordered_map<std::string, size_t> cardinality_cache_;
1022 
1023  public:
1024  static const int32_t ERR_DIV_BY_ZERO{1};
1025  static const int32_t ERR_OUT_OF_GPU_MEM{2};
1026  static const int32_t ERR_OUT_OF_SLOTS{3};
1027  static const int32_t ERR_UNSUPPORTED_SELF_JOIN{4};
1028  static const int32_t ERR_OUT_OF_RENDER_MEM{5};
1029  static const int32_t ERR_OUT_OF_CPU_MEM{6};
1030  static const int32_t ERR_OVERFLOW_OR_UNDERFLOW{7};
1031  static const int32_t ERR_OUT_OF_TIME{9};
1032  static const int32_t ERR_INTERRUPTED{10};
1033  static const int32_t ERR_COLUMNAR_CONVERSION_NOT_SUPPORTED{11};
1034  static const int32_t ERR_TOO_MANY_LITERALS{12};
1035  static const int32_t ERR_STRING_CONST_IN_RESULTSET{13};
1036  static const int32_t ERR_STREAMING_TOP_N_NOT_SUPPORTED_IN_RENDER_QUERY{14};
1037  static const int32_t ERR_SINGLE_VALUE_FOUND_MULTIPLE_VALUES{15};
1038  static const int32_t ERR_GEOS{16};
1039 
1040  static std::mutex compilation_mutex_;
1041  static std::mutex kernel_mutex_;
1042 
1044  friend class CodeGenerator;
1045  friend class ColumnFetcher;
1047  friend class GroupByAndAggregate;
1053  friend class ResultSet;
1054  friend class InValuesBitmap;
1055  friend class JoinHashTable;
1056  friend class LeafAggregator;
1057  friend class QueryRewriter;
1058  friend class PendingExecutionClosure;
1059  friend class RelAlgExecutor;
1060  friend class TableOptimizer;
1064  friend struct TargetExprCodegen;
1066 };
1067 
1068 inline std::string get_null_check_suffix(const SQLTypeInfo& lhs_ti,
1069  const SQLTypeInfo& rhs_ti) {
1070  if (lhs_ti.get_notnull() && rhs_ti.get_notnull()) {
1071  return "";
1072  }
1073  std::string null_check_suffix{"_nullable"};
1074  if (lhs_ti.get_notnull()) {
1075  CHECK(!rhs_ti.get_notnull());
1076  null_check_suffix += "_rhs";
1077  } else if (rhs_ti.get_notnull()) {
1078  CHECK(!lhs_ti.get_notnull());
1079  null_check_suffix += "_lhs";
1080  }
1081  return null_check_suffix;
1082 }
1083 
1084 inline bool is_unnest(const Analyzer::Expr* expr) {
1085  return dynamic_cast<const Analyzer::UOper*>(expr) &&
1086  static_cast<const Analyzer::UOper*>(expr)->get_optype() == kUNNEST;
1087 }
1088 
1089 bool is_trivial_loop_join(const std::vector<InputTableInfo>& query_infos,
1090  const RelAlgExecutionUnit& ra_exe_unit);
1091 
1092 std::unordered_set<int> get_available_gpus(const Catalog_Namespace::Catalog& cat);
1093 
1094 size_t get_context_count(const ExecutorDeviceType device_type,
1095  const size_t cpu_count,
1096  const size_t gpu_count);
1097 
1098 extern "C" void register_buffer_with_executor_rsm(int64_t exec, int8_t* buffer);
1099 
1101 
1102 #endif // QUERYENGINE_EXECUTE_H
void read_rt_udf_gpu_module(const std::string &udf_ir)
const std::string debug_dir_
Definition: Execute.h:985
llvm::Value * translated_value
Definition: Execute.h:856
bool is_agg(const Analyzer::Expr *expr)
static mapd_shared_mutex executor_session_mutex_
Definition: Execute.h:1005
SQLAgg
Definition: sqldefs.h:71
#define CHECK_EQ(x, y)
Definition: Logger.h:205
std::ostream & operator<<(std::ostream &, FetchResult const &)
Definition: Execute.cpp:2280
std::vector< uint64_t > all_frag_row_offsets_
Definition: Execute.h:468
static mapd_shared_mutex execute_mutex_
Definition: Execute.h:1015
bool is_time() const
Definition: sqltypes.h:414
std::unique_ptr< llvm::TargetMachine > nvptx_target_machine_
Definition: Execute.h:973
bool is_string() const
Definition: sqltypes.h:408
bool is_boolean() const
Definition: sqltypes.h:415
const int8_t const int64_t * num_rows
class for a per-database catalog. also includes metadata for the current database and the current use...
Definition: Catalog.h:86
std::shared_ptr< StringDictionaryProxy > lit_str_dict_proxy_
Definition: Execute.h:970
FetchCacheAnchor(CgenState *cgen_state)
Definition: Execute.h:948
StringDictionaryProxy * getLiteralDictionary() const override
Definition: Execute.h:280
std::atomic< bool > interrupted_
Definition: Execute.h:968
bool is_integer() const
Definition: sqltypes.h:410
ExecutorDeviceType
void read_rt_udf_cpu_module(const std::string &udf_ir)
Fragmenter_Namespace::RowDataProvider RowDataProvider
Definition: Execute.h:266
decltype(FragmentInfoType::physicalTableId) const getPhysicalTableId() const
Definition: Execute.h:286
std::unordered_map< int, std::unordered_map< int, std::shared_ptr< const ColumnarResults > >> ColumnCacheMap
std::unordered_map< int, CgenState::LiteralValues > literal_values
Definition: Execute.h:442
static std::atomic_flag execute_spin_lock_
Definition: Execute.h:1011
static mapd_shared_mutex executors_cache_mutex_
Definition: Execute.h:1016
bool is_udf_module_present(bool cpu_only=false)
bool containsLeftDeepOuterJoin() const
Definition: Execute.h:360
static std::mutex reduce_mutex_
Definition: Execute.h:474
const SQLTypeInfo get_column_type(const int col_id, const int table_id, const ColumnDescriptor *cd, const TemporaryTables *temporary_tables)
Definition: Execute.h:164
void read_udf_cpu_module(const std::string &udf_ir_filename)
Driver for running cleanup processes on a table. TableOptimizer provides functions for various cleanu...
SQLTypeInfo get_logical_type_info(const SQLTypeInfo &type_info)
Definition: sqltypes.h:807
void read_udf_gpu_module(const std::string &udf_ir_filename)
Definition: sqldefs.h:49
std::vector< std::string > CodeCacheKey
Definition: CodeCache.h:25
const Analyzer::Expr * extract_cast_arg(const Analyzer::Expr *expr)
Definition: Execute.h:129
const ColumnDescriptor * getMetadataForColumn(int tableId, const std::string &colName) const
const ColumnarResults * rows_to_columnar_results(std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner, const PtrTy &result, const int number)
Definition: Execute.h:179
const Analyzer::Expr * remove_cast_to_int(const Analyzer::Expr *expr)
Definition: ColumnIR.cpp:531
AggregatedColRange agg_col_range_cache_
Definition: Execute.h:1002
size_t operator()(const std::pair< int, int > &p) const
Definition: Execute.h:259
std::shared_ptr< ResultSet > ResultSetPtr
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:944
llvm::Value * original_value
Definition: Execute.h:857
std::vector< Analyzer::Expr * > get_exprs_not_owned(const std::vector< std::shared_ptr< Analyzer::Expr >> &exprs)
Definition: Execute.h:191
uint32_t gpu_active_modules_device_mask_
Definition: Execute.h:966
bool is_timeinterval() const
Definition: sqltypes.h:419
bool isArchPascalOrLater(const ExecutorDeviceType dt) const
Definition: Execute.h:448
HOST DEVICE EncodingType get_compression() const
Definition: sqltypes.h:265
static std::mutex kernel_mutex_
Definition: Execute.h:1041
#define CHECK_GT(x, y)
Definition: Logger.h:209
bool is_decimal() const
Definition: sqltypes.h:411
std::unique_ptr< WindowProjectNodeContext > window_project_node_context_owned_
Definition: Execute.h:997
std::vector< FragmentsPerTable > FragmentsList
std::string to_string(char const *&&v)
std::function< void(const UpdateLogForFragment &)> Callback
Definition: Execute.h:295
#define LOG_IF(severity, condition)
Definition: Logger.h:287
ResultSet(const std::vector< TargetInfo > &targets, const ExecutorDeviceType device_type, const QueryMemoryDescriptor &query_mem_desc, const std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner, const Executor *executor)
Definition: ResultSet.cpp:100
ExecutorDispatchMode
CodeCache gpu_code_cache_
Definition: Execute.h:976
std::mutex all_frag_row_offsets_mutex_
Definition: Execute.h:469
int get_logical_size() const
Definition: sqltypes.h:268
HOST DEVICE bool get_notnull() const
Definition: sqltypes.h:264
std::unordered_map< int, const ResultSetPtr & > TemporaryTables
Definition: InputMetadata.h:31
const ExecutorId executor_id_
Definition: Execute.h:988
const ResultSetPtr & get_temporary_table(const TemporaryTables *temporary_tables, const int table_id)
Definition: Execute.h:156
const size_t max_gpu_slab_size_
Definition: Execute.h:984
std::shared_ptr< CompilationContext > generated_code
Definition: Execute.h:441
decltype(FragmentInfoType::fragmentId) const getFragmentId() const
Definition: Execute.h:289
std::string cat(Ts &&... args)
llvm::Value * get_arg_by_name(llvm::Function *func, const std::string &name)
Definition: Execute.h:94
std::unordered_set< int > get_available_gpus(const Catalog_Namespace::Catalog &cat)
Definition: Execute.cpp:997
std::map< std::string, bool > InterruptFlagMap
Definition: Execute.h:75
std::vector< llvm::Value * > LLVMValueVector
Definition: Execute.h:305
const ColumnDescriptor * get_column_descriptor_maybe(const int col_id, const int table_id, const Catalog_Namespace::Catalog &cat)
Definition: Execute.h:148
const Catalog_Namespace::Catalog & cat_
Definition: Execute.h:467
std::mutex str_dict_mutex_
Definition: Execute.h:971
const Catalog_Namespace::Catalog * catalog_
Definition: Execute.h:989
#define INJECT_TIMER(DESC)
Definition: measure.h:91
std::shared_timed_mutex mapd_shared_mutex
const std::string debug_file_
Definition: Execute.h:986
std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner_
Definition: Execute.h:960
Used by Fragmenter classes to store info about each fragment - the fragment id and number of tuples(r...
Definition: Fragmenter.h:78
const std::vector< InputTableInfo > & query_infos_
Definition: Execute.h:466
static std::unordered_map< std::string, size_t > cardinality_cache_
Definition: Execute.h:1021
static InterruptFlagMap queries_interrupt_flag_
Definition: Execute.h:1008
std::unique_ptr< PlanState > plan_state_
Definition: Execute.h:959
const std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner_
Definition: Execute.h:470
size_t fragment_index_
Definition: Execute.h:301
std::pair< bool, size_t > CachedCardinality
Definition: Execute.h:924
const unsigned block_size_x_
Definition: Execute.h:982
const unsigned grid_size_x_
Definition: Execute.h:983
specifies the content in-memory of a row in the column metadata table
static std::map< int, std::shared_ptr< Executor > > executors_
Definition: Execute.h:1010
size_t operator()(const std::vector< int > &vec) const
Definition: Execute.h:252
std::string get_null_check_suffix(const SQLTypeInfo &lhs_ti, const SQLTypeInfo &rhs_ti)
Definition: Execute.h:1068
std::shared_ptr< JoinHashTableInterface > hash_table
Definition: Execute.h:824
#define CHECK_LT(x, y)
Definition: Logger.h:207
static std::string current_query_session_
Definition: Execute.h:1006
std::shared_ptr< ResultSet > rs_
Definition: Execute.h:302
std::unique_ptr< QueryCompilationDescriptor > QueryCompilationDescriptorOwned
Definition: Execute.h:72
size_t ExecutorId
Definition: Execute.h:324
size_t get_context_count(const ExecutorDeviceType device_type, const size_t cpu_count, const size_t gpu_count)
Definition: Execute.cpp:1009
const ColumnarResults * columnarize_result(std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner, const ResultSetPtr &result, const int frag_id)
Definition: Execute.h:200
StringDictionaryGenerations string_dictionary_generations_
Definition: Execute.h:1003
InputTableInfoCache input_table_info_cache_
Definition: Execute.h:1001
CodeCache cpu_code_cache_
Definition: Execute.h:975
const int64_t const uint32_t const uint32_t const uint32_t const bool const bool const int32_t frag_idx
FragmentInfoType const & fragment_info_
Definition: Execute.h:300
std::vector< std::vector< const int8_t * > > col_buffers
Definition: Execute.h:310
TableGenerations table_generations_
Definition: Execute.h:1004
std::function< void(ResultSetPtr, const Fragmenter_Namespace::FragmentInfo &)> PerFragmentCallBack
Definition: Execute.h:551
mapd_shared_lock< mapd_shared_mutex > read_lock
GpuSharedMemoryContext gpu_smem_context
Definition: Execute.h:445
#define CHECK(condition)
Definition: Logger.h:197
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:257
const int8_t * literals
bool is_trivial_loop_join(const std::vector< InputTableInfo > &query_infos, const RelAlgExecutionUnit &ra_exe_unit)
Definition: Execute.cpp:1117
static std::mutex compilation_mutex_
Definition: Execute.h:1040
std::vector< std::pair< ResultSetPtr, std::vector< size_t > > > all_fragment_results_
Definition: Execute.h:472
std::vector< std::vector< int64_t > > num_rows
Definition: Execute.h:311
Executor(const ExecutorId id, const size_t block_size_x, const size_t grid_size_x, const size_t max_gpu_slab_size, const std::string &debug_dir, const std::string &debug_file)
Definition: Execute.cpp:129
uint32_t log2_bytes(const uint32_t bytes)
Definition: Execute.h:104
std::string numeric_type_name(const SQLTypeInfo &ti)
Definition: Execute.h:137
mapd_unique_lock< mapd_shared_mutex > write_lock
SQLTypeInfo columnType
static bool run
bool is_unnest(const Analyzer::Expr *expr)
Definition: Execute.h:1084
std::vector< std::vector< uint64_t > > frag_offsets
Definition: Execute.h:312
Execution unit for relational algebra. It&#39;s a low-level description of any relational algebra operati...
std::unordered_map< int, std::vector< llvm::Value * > > saved_fetch_cache
Definition: Execute.h:954
CgenState * cgen_state_
Definition: Execute.h:953
static size_t align(const size_t off_in, const size_t alignment)
Definition: Execute.h:936
RenderInfo * render_info_
Definition: Execute.h:471
std::unique_ptr< QueryMemoryDescriptor > QueryMemoryDescriptorOwned
Definition: Execute.h:74
Descriptor for the fragments required for a query.
bool is_rt_udf_module_present(bool cpu_only=false)
static mapd_shared_mutex recycler_mutex_
Definition: Execute.h:1020
void register_buffer_with_executor_rsm(int64_t exec, int8_t *buffer)
auto getResultSet() const
Definition: Execute.h:297
const RelAlgExecutionUnit & ra_exe_unit_
Definition: Execute.h:465
std::mutex gpu_active_modules_mutex_
Definition: Execute.h:965
SQLOps get_optype() const
Definition: Analyzer.h:371
static void nukeCacheOfExecutors()
Definition: Execute.h:340
const ColumnDescriptor * get_column_descriptor(const int col_id, const int table_id, const Catalog_Namespace::Catalog &cat)
Definition: Execute.h:119
bool is_fp() const
Definition: sqltypes.h:412
const TemporaryTables * temporary_tables_
Definition: Execute.h:990
CompilationRetryNewScanLimit(const size_t new_scan_limit)
Definition: Execute.h:217
WatchdogException(const std::string &cause)
Definition: Execute.h:89