OmniSciDB  8a228a1076
Execute.h
Go to the documentation of this file.
1 /*
2  * Copyright 2020 OmniSci, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef QUERYENGINE_EXECUTE_H
18 #define QUERYENGINE_EXECUTE_H
19 
20 #include "AggregatedColRange.h"
21 #include "BufferCompaction.h"
22 #include "CartesianProduct.h"
23 #include "CgenState.h"
24 #include "CodeCache.h"
25 #include "DateTimeUtils.h"
27 #include "ExecutionKernel.h"
28 #include "GpuSharedMemoryContext.h"
29 #include "GroupByAndAggregate.h"
32 #include "NvidiaKernel.h"
33 #include "PlanState.h"
34 #include "RelAlgExecutionUnit.h"
35 #include "RelAlgTranslator.h"
37 #include "TableGenerations.h"
38 #include "TargetMetaInfo.h"
39 #include "WindowContext.h"
40 
42 
43 #include "../Logger/Logger.h"
44 #include "../Shared/SystemParameters.h"
45 #include "../Shared/mapd_shared_mutex.h"
46 #include "../Shared/measure.h"
47 #include "../Shared/thread_count.h"
48 #include "../StringDictionary/LruCache.hpp"
49 #include "../StringDictionary/StringDictionary.h"
50 #include "../StringDictionary/StringDictionaryProxy.h"
51 #include "DataMgr/Chunk/Chunk.h"
53 
54 #include <llvm/IR/Function.h>
55 #include <llvm/IR/Value.h>
56 #include <llvm/Linker/Linker.h>
57 #include <llvm/Transforms/Utils/ValueMapper.h>
58 #include <rapidjson/document.h>
59 
60 #include <algorithm>
61 #include <atomic>
62 #include <condition_variable>
63 #include <cstddef>
64 #include <cstdlib>
65 #include <deque>
66 #include <functional>
67 #include <limits>
68 #include <map>
69 #include <mutex>
70 #include <stack>
71 #include <unordered_map>
72 #include <unordered_set>
73 
74 using QueryCompilationDescriptorOwned = std::unique_ptr<QueryCompilationDescriptor>;
76 using QueryMemoryDescriptorOwned = std::unique_ptr<QueryMemoryDescriptor>;
77 using InterruptFlagMap = std::map<std::string, bool>;
78 
79 extern void read_udf_gpu_module(const std::string& udf_ir_filename);
80 extern void read_udf_cpu_module(const std::string& udf_ir_filename);
81 extern bool is_udf_module_present(bool cpu_only = false);
82 extern void read_rt_udf_gpu_module(const std::string& udf_ir);
83 extern void read_rt_udf_cpu_module(const std::string& udf_ir);
84 extern bool is_rt_udf_module_present(bool cpu_only = false);
85 
86 class ColumnFetcher;
87 
88 class WatchdogException : public std::runtime_error {
89  public:
90  WatchdogException(const std::string& cause) : std::runtime_error(cause) {}
91 };
92 
93 class Executor;
94 
95 inline llvm::Value* get_arg_by_name(llvm::Function* func, const std::string& name) {
96  for (auto& arg : func->args()) {
97  if (arg.getName() == name) {
98  return &arg;
99  }
100  }
101  CHECK(false);
102  return nullptr;
103 }
104 
105 inline uint32_t log2_bytes(const uint32_t bytes) {
106  switch (bytes) {
107  case 1:
108  return 0;
109  case 2:
110  return 1;
111  case 4:
112  return 2;
113  case 8:
114  return 3;
115  default:
116  abort();
117  }
118 }
119 
121  const int col_id,
122  const int table_id,
124  CHECK_GT(table_id, 0);
125  const auto col_desc = cat.getMetadataForColumn(table_id, col_id);
126  CHECK(col_desc);
127  return col_desc;
128 }
129 
130 inline const Analyzer::Expr* extract_cast_arg(const Analyzer::Expr* expr) {
131  const auto cast_expr = dynamic_cast<const Analyzer::UOper*>(expr);
132  if (!cast_expr || cast_expr->get_optype() != kCAST) {
133  return expr;
134  }
135  return cast_expr->get_operand();
136 }
137 
138 inline std::string numeric_type_name(const SQLTypeInfo& ti) {
139  CHECK(ti.is_integer() || ti.is_decimal() || ti.is_boolean() || ti.is_time() ||
140  ti.is_fp() || (ti.is_string() && ti.get_compression() == kENCODING_DICT) ||
141  ti.is_timeinterval());
142  if (ti.is_integer() || ti.is_decimal() || ti.is_boolean() || ti.is_time() ||
143  ti.is_string() || ti.is_timeinterval()) {
144  return "int" + std::to_string(ti.get_logical_size() * 8) + "_t";
145  }
146  return ti.get_type() == kDOUBLE ? "double" : "float";
147 }
148 
150  const int col_id,
151  const int table_id,
153  CHECK(table_id);
154  return table_id > 0 ? get_column_descriptor(col_id, table_id, cat) : nullptr;
155 }
156 
157 inline const ResultSetPtr& get_temporary_table(const TemporaryTables* temporary_tables,
158  const int table_id) {
159  CHECK_LT(table_id, 0);
160  const auto it = temporary_tables->find(table_id);
161  CHECK(it != temporary_tables->end());
162  return it->second;
163 }
164 
165 inline const SQLTypeInfo get_column_type(const int col_id,
166  const int table_id,
167  const ColumnDescriptor* cd,
168  const TemporaryTables* temporary_tables) {
169  CHECK(cd || temporary_tables);
170  if (cd) {
171  CHECK_EQ(col_id, cd->columnId);
172  CHECK_EQ(table_id, cd->tableId);
173  return cd->columnType;
174  }
175  const auto& temp = get_temporary_table(temporary_tables, table_id);
176  return temp->getColType(col_id);
177 }
178 
179 template <typename PtrTy>
181  std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner,
182  const PtrTy& result,
183  const int number) {
184  std::vector<SQLTypeInfo> col_types;
185  for (size_t i = 0; i < result->colCount(); ++i) {
186  col_types.push_back(get_logical_type_info(result->getColType(i)));
187  }
188  return new ColumnarResults(row_set_mem_owner, *result, number, col_types);
189 }
190 
191 // TODO(alex): Adjust interfaces downstream and make this not needed.
192 inline std::vector<Analyzer::Expr*> get_exprs_not_owned(
193  const std::vector<std::shared_ptr<Analyzer::Expr>>& exprs) {
194  std::vector<Analyzer::Expr*> exprs_not_owned;
195  for (const auto& expr : exprs) {
196  exprs_not_owned.push_back(expr.get());
197  }
198  return exprs_not_owned;
199 }
200 
202  std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner,
203  const ResultSetPtr& result,
204  const int frag_id) {
206  CHECK_EQ(0, frag_id);
207  return rows_to_columnar_results(row_set_mem_owner, result, result->colCount());
208 }
209 
210 class CompilationRetryNoLazyFetch : public std::runtime_error {
211  public:
213  : std::runtime_error("Retry query compilation with no GPU lazy fetch.") {}
214 };
215 
216 class CompilationRetryNewScanLimit : public std::runtime_error {
217  public:
218  CompilationRetryNewScanLimit(const size_t new_scan_limit)
219  : std::runtime_error("Retry query compilation with new scan limit.")
220  , new_scan_limit_(new_scan_limit) {}
221 
223 };
224 
225 class TooManyLiterals : public std::runtime_error {
226  public:
227  TooManyLiterals() : std::runtime_error("Too many literals in the query") {}
228 };
229 
230 class CompilationRetryNoCompaction : public std::runtime_error {
231  public:
233  : std::runtime_error("Retry query compilation with no compaction.") {}
234 };
235 
236 class QueryMustRunOnCpu : public std::runtime_error {
237  public:
238  QueryMustRunOnCpu() : std::runtime_error("Query must run in cpu mode.") {}
239 };
240 
241 class SringConstInResultSet : public std::runtime_error {
242  public:
244  : std::runtime_error(
245  "NONE ENCODED String types are not supported as input result set.") {}
246 };
247 
248 class ExtensionFunction;
249 
251 
253  public:
255 
256  UpdateLogForFragment(FragmentInfoType const& fragment_info,
257  size_t const,
258  const std::shared_ptr<ResultSet>& rs);
259 
260  std::vector<TargetValue> getEntryAt(const size_t index) const override;
261  std::vector<TargetValue> getTranslatedEntryAt(const size_t index) const override;
262 
263  size_t const getRowCount() const override;
265  return rs_->getRowSetMemOwner()->getLiteralStringDictProxy();
266  }
267  size_t const getEntryCount() const override;
268  size_t const getFragmentIndex() const;
269  FragmentInfoType const& getFragmentInfo() const;
270  decltype(FragmentInfoType::physicalTableId) const getPhysicalTableId() const {
271  return fragment_info_.physicalTableId;
272  }
273  decltype(FragmentInfoType::fragmentId) const getFragmentId() const {
274  return fragment_info_.fragmentId;
275  }
276 
277  SQLTypeInfo getColumnType(const size_t col_idx) const;
278 
279  using Callback = std::function<void(const UpdateLogForFragment&)>;
280 
281  auto getResultSet() const { return rs_; }
282 
283  private:
286  std::shared_ptr<ResultSet> rs_;
287 };
288 
289 using LLVMValueVector = std::vector<llvm::Value*>;
290 
292 
293 std::ostream& operator<<(std::ostream&, FetchResult const&);
294 
295 class Executor {
296  static_assert(sizeof(float) == 4 && sizeof(double) == 8,
297  "Host hardware not supported, unexpected size of float / double.");
298  static_assert(sizeof(time_t) == 8,
299  "Host hardware not supported, 64-bit time support is required.");
300 
301  public:
302  using ExecutorId = size_t;
303  static const ExecutorId UNITARY_EXECUTOR_ID = 0;
304 
305  Executor(const ExecutorId id,
306  const size_t block_size_x,
307  const size_t grid_size_x,
308  const size_t max_gpu_slab_size,
309  const std::string& debug_dir,
310  const std::string& debug_file);
311 
312  static std::shared_ptr<Executor> getExecutor(
313  const ExecutorId id,
314  const std::string& debug_dir = "",
315  const std::string& debug_file = "",
316  const SystemParameters system_parameters = SystemParameters());
317 
318  static void nukeCacheOfExecutors() {
319  mapd_unique_lock<mapd_shared_mutex> flush_lock(
320  execute_mutex_); // don't want native code to vanish while executing
321  mapd_unique_lock<mapd_shared_mutex> lock(executors_cache_mutex_);
322  (decltype(executors_){}).swap(executors_);
323  }
324 
325  static void clearMemory(const Data_Namespace::MemoryLevel memory_level);
326 
327  static size_t getArenaBlockSize();
328 
329  StringDictionaryProxy* getStringDictionaryProxy(
330  const int dictId,
331  const std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner,
332  const bool with_generation) const;
333 
334  bool isCPUOnly() const;
335 
336  bool isArchMaxwell(const ExecutorDeviceType dt) const;
337 
339  return cgen_state_->contains_left_deep_outer_join_;
340  }
341 
342  const ColumnDescriptor* getColumnDescriptor(const Analyzer::ColumnVar*) const;
343 
344  const ColumnDescriptor* getPhysicalColumnDescriptor(const Analyzer::ColumnVar*,
345  int) const;
346 
347  const Catalog_Namespace::Catalog* getCatalog() const;
348  void setCatalog(const Catalog_Namespace::Catalog* catalog);
349 
350  const std::shared_ptr<RowSetMemoryOwner> getRowSetMemoryOwner() const;
351 
352  const TemporaryTables* getTemporaryTables() const;
353 
354  Fragmenter_Namespace::TableInfo getTableInfo(const int table_id) const;
355 
356  const TableGeneration& getTableGeneration(const int table_id) const;
357 
358  ExpressionRange getColRange(const PhysicalInput&) const;
359 
360  size_t getNumBytesForFetchedRow(const std::set<int>& table_ids_to_fetch) const;
361 
362  std::vector<ColumnLazyFetchInfo> getColLazyFetchInfo(
363  const std::vector<Analyzer::Expr*>& target_exprs) const;
364 
365  void registerActiveModule(void* module, const int device_id) const;
366  void unregisterActiveModule(void* module, const int device_id) const;
367  void interrupt(const std::string& query_session = "",
368  const std::string& interrupt_session = "");
369  void resetInterrupt();
370 
371  // only for testing usage
372  void enableRuntimeQueryInterrupt(const unsigned interrupt_freq) const;
373 
374  static const size_t high_scan_limit{32000000};
375 
376  int8_t warpSize() const;
377  unsigned gridSize() const;
378  unsigned numBlocksPerMP() const;
379  unsigned blockSize() const;
380  size_t maxGpuSlabSize() const;
381 
382  ResultSetPtr executeWorkUnit(size_t& max_groups_buffer_entry_guess,
383  const bool is_agg,
384  const std::vector<InputTableInfo>&,
385  const RelAlgExecutionUnit&,
386  const CompilationOptions&,
387  const ExecutionOptions& options,
389  RenderInfo* render_info,
390  const bool has_cardinality_estimation,
391  ColumnCacheMap& column_cache);
392 
393  void executeUpdate(const RelAlgExecutionUnit& ra_exe_unit,
394  const std::vector<InputTableInfo>& table_infos,
395  const CompilationOptions& co,
396  const ExecutionOptions& eo,
398  std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner,
400  const bool is_agg);
401 
402  private:
403  void clearMetaInfoCache();
404 
405  int deviceCount(const ExecutorDeviceType) const;
406  int deviceCountForMemoryLevel(const Data_Namespace::MemoryLevel memory_level) const;
407 
408  // Generate code for a window function target.
409  llvm::Value* codegenWindowFunction(const size_t target_index,
410  const CompilationOptions& co);
411 
412  // Generate code for an aggregate window function target.
413  llvm::Value* codegenWindowFunctionAggregate(const CompilationOptions& co);
414 
415  // The aggregate state requires a state reset when starting a new partition. Generate
416  // the new partition check and return the continuation basic block.
417  llvm::BasicBlock* codegenWindowResetStateControlFlow();
418 
419  // Generate code for initializing the state of a window aggregate.
420  void codegenWindowFunctionStateInit(llvm::Value* aggregate_state);
421 
422  // Generates the required calls for an aggregate window function and returns the final
423  // result.
424  llvm::Value* codegenWindowFunctionAggregateCalls(llvm::Value* aggregate_state,
425  const CompilationOptions& co);
426 
427  // The AVG window function requires some post-processing: the sum is divided by count
428  // and the result is stored back for the current row.
429  void codegenWindowAvgEpilogue(llvm::Value* crt_val,
430  llvm::Value* window_func_null_val,
431  llvm::Value* multiplicity_lv);
432 
433  // Generates code which loads the current aggregate value for the window context.
434  llvm::Value* codegenAggregateWindowState();
435 
436  llvm::Value* aggregateWindowStatePtr();
437 
439  if (dt == ExecutorDeviceType::GPU) {
440  const auto cuda_mgr = catalog_->getDataMgr().getCudaMgr();
441  LOG_IF(FATAL, cuda_mgr == nullptr)
442  << "No CudaMgr instantiated, unable to check device architecture";
443  return cuda_mgr->isArchPascalOrLater();
444  }
445  return false;
446  }
447 
448  bool needFetchAllFragments(const InputColDescriptor& col_desc,
449  const RelAlgExecutionUnit& ra_exe_unit,
450  const FragmentsList& selected_fragments) const;
451 
452  using PerFragmentCallBack =
453  std::function<void(ResultSetPtr, const Fragmenter_Namespace::FragmentInfo&)>;
454 
460  void executeWorkUnitPerFragment(const RelAlgExecutionUnit& ra_exe_unit,
461  const InputTableInfo& table_info,
462  const CompilationOptions& co,
463  const ExecutionOptions& eo,
464  const Catalog_Namespace::Catalog& cat,
465  PerFragmentCallBack& cb);
466 
467  ResultSetPtr executeExplain(const QueryCompilationDescriptor&);
468 
474  ResultSetPtr executeTableFunction(const TableFunctionExecutionUnit exe_unit,
475  const std::vector<InputTableInfo>& table_infos,
476  const CompilationOptions& co,
477  const ExecutionOptions& eo,
478  const Catalog_Namespace::Catalog& cat);
479 
480  ExecutorDeviceType getDeviceTypeForTargets(
481  const RelAlgExecutionUnit& ra_exe_unit,
482  const ExecutorDeviceType requested_device_type);
483 
484  ResultSetPtr collectAllDeviceResults(
485  SharedKernelContext& shared_context,
486  const RelAlgExecutionUnit& ra_exe_unit,
487  const QueryMemoryDescriptor& query_mem_desc,
488  const ExecutorDeviceType device_type,
489  std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner);
490 
491  ResultSetPtr collectAllDeviceShardedTopResults(
492  SharedKernelContext& shared_context,
493  const RelAlgExecutionUnit& ra_exe_unit) const;
494 
495  std::unordered_map<int, const Analyzer::BinOper*> getInnerTabIdToJoinCond() const;
496 
501  std::vector<std::unique_ptr<ExecutionKernel>> createKernels(
502  SharedKernelContext& shared_context,
503  const RelAlgExecutionUnit& ra_exe_unit,
504  ColumnFetcher& column_fetcher,
505  const std::vector<InputTableInfo>& table_infos,
506  const ExecutionOptions& eo,
507  const bool is_agg,
508  const bool allow_single_frag_table_opt,
509  const size_t context_count,
510  const QueryCompilationDescriptor& query_comp_desc,
511  const QueryMemoryDescriptor& query_mem_desc,
512  RenderInfo* render_info,
513  std::unordered_set<int>& available_gpus,
514  int& available_cpus);
515 
520  template <typename THREAD_POOL>
521  void launchKernels(SharedKernelContext& shared_context,
522  std::vector<std::unique_ptr<ExecutionKernel>>&& kernels);
523 
524  std::vector<size_t> getTableFragmentIndices(
525  const RelAlgExecutionUnit& ra_exe_unit,
526  const ExecutorDeviceType device_type,
527  const size_t table_idx,
528  const size_t outer_frag_idx,
529  std::map<int, const TableFragments*>& selected_tables_fragments,
530  const std::unordered_map<int, const Analyzer::BinOper*>&
531  inner_table_id_to_join_condition);
532 
533  bool skipFragmentPair(const Fragmenter_Namespace::FragmentInfo& outer_fragment_info,
534  const Fragmenter_Namespace::FragmentInfo& inner_fragment_info,
535  const int inner_table_id,
536  const std::unordered_map<int, const Analyzer::BinOper*>&
537  inner_table_id_to_join_condition,
538  const RelAlgExecutionUnit& ra_exe_unit,
539  const ExecutorDeviceType device_type);
540 
541  FetchResult fetchChunks(const ColumnFetcher&,
542  const RelAlgExecutionUnit& ra_exe_unit,
543  const int device_id,
545  const std::map<int, const TableFragments*>&,
546  const FragmentsList& selected_fragments,
548  std::list<ChunkIter>&,
549  std::list<std::shared_ptr<Chunk_NS::Chunk>>&,
550  DeviceAllocator* device_allocator);
551 
552  FetchResult fetchUnionChunks(const ColumnFetcher&,
553  const RelAlgExecutionUnit& ra_exe_unit,
554  const int device_id,
556  const std::map<int, const TableFragments*>&,
557  const FragmentsList& selected_fragments,
559  std::list<ChunkIter>&,
560  std::list<std::shared_ptr<Chunk_NS::Chunk>>&,
561  DeviceAllocator* device_allocator);
562 
563  std::pair<std::vector<std::vector<int64_t>>, std::vector<std::vector<uint64_t>>>
564  getRowCountAndOffsetForAllFrags(
565  const RelAlgExecutionUnit& ra_exe_unit,
566  const CartesianProduct<std::vector<std::vector<size_t>>>& frag_ids_crossjoin,
567  const std::vector<InputDescriptor>& input_descs,
568  const std::map<int, const TableFragments*>& all_tables_fragments);
569 
570  void buildSelectedFragsMapping(
571  std::vector<std::vector<size_t>>& selected_fragments_crossjoin,
572  std::vector<size_t>& local_col_to_frag_pos,
573  const std::list<std::shared_ptr<const InputColDescriptor>>& col_global_ids,
574  const FragmentsList& selected_fragments,
575  const RelAlgExecutionUnit& ra_exe_unit);
576 
577  void buildSelectedFragsMappingForUnion(
578  std::vector<std::vector<size_t>>& selected_fragments_crossjoin,
579  std::vector<size_t>& local_col_to_frag_pos,
580  const std::list<std::shared_ptr<const InputColDescriptor>>& col_global_ids,
581  const FragmentsList& selected_fragments,
582  const RelAlgExecutionUnit& ra_exe_unit);
583 
584  std::vector<size_t> getFragmentCount(const FragmentsList& selected_fragments,
585  const size_t scan_idx,
586  const RelAlgExecutionUnit& ra_exe_unit);
587 
588  int32_t executePlanWithGroupBy(const RelAlgExecutionUnit& ra_exe_unit,
589  const CompilationResult&,
590  const bool hoist_literals,
591  ResultSetPtr& results,
592  const ExecutorDeviceType device_type,
593  std::vector<std::vector<const int8_t*>>& col_buffers,
594  const std::vector<size_t> outer_tab_frag_ids,
596  const std::vector<std::vector<int64_t>>& num_rows,
597  const std::vector<std::vector<uint64_t>>& frag_offsets,
599  const int device_id,
600  const int outer_table_id,
601  const int64_t limit,
602  const uint32_t start_rowid,
603  const uint32_t num_tables,
604  RenderInfo* render_info);
605  int32_t executePlanWithoutGroupBy(
606  const RelAlgExecutionUnit& ra_exe_unit,
607  const CompilationResult&,
608  const bool hoist_literals,
609  ResultSetPtr& results,
610  const std::vector<Analyzer::Expr*>& target_exprs,
611  const ExecutorDeviceType device_type,
612  std::vector<std::vector<const int8_t*>>& col_buffers,
613  QueryExecutionContext* query_exe_context,
614  const std::vector<std::vector<int64_t>>& num_rows,
615  const std::vector<std::vector<uint64_t>>& frag_offsets,
616  Data_Namespace::DataMgr* data_mgr,
617  const int device_id,
618  const uint32_t start_rowid,
619  const uint32_t num_tables,
620  RenderInfo* render_info);
621 
622  public: // Temporary, ask saman about this
623  static std::pair<int64_t, int32_t> reduceResults(const SQLAgg agg,
624  const SQLTypeInfo& ti,
625  const int64_t agg_init_val,
626  const int8_t out_byte_width,
627  const int64_t* out_vec,
628  const size_t out_vec_sz,
629  const bool is_group_by,
630  const bool float_argument_input);
631 
632  static void addCodeToCache(const CodeCacheKey&,
633  std::shared_ptr<CompilationContext>,
634  llvm::Module*,
635  CodeCache&);
636 
637  private:
638  ResultSetPtr resultsUnion(SharedKernelContext& shared_context,
639  const RelAlgExecutionUnit& ra_exe_unit);
640  std::vector<int64_t> getJoinHashTablePtrs(const ExecutorDeviceType device_type,
641  const int device_id);
642  ResultSetPtr reduceMultiDeviceResults(
643  const RelAlgExecutionUnit&,
644  std::vector<std::pair<ResultSetPtr, std::vector<size_t>>>& all_fragment_results,
645  std::shared_ptr<RowSetMemoryOwner>,
646  const QueryMemoryDescriptor&) const;
647  ResultSetPtr reduceMultiDeviceResultSets(
648  std::vector<std::pair<ResultSetPtr, std::vector<size_t>>>& all_fragment_results,
649  std::shared_ptr<RowSetMemoryOwner>,
650  const QueryMemoryDescriptor&) const;
651  ResultSetPtr reduceSpeculativeTopN(
652  const RelAlgExecutionUnit&,
653  std::vector<std::pair<ResultSetPtr, std::vector<size_t>>>& all_fragment_results,
654  std::shared_ptr<RowSetMemoryOwner>,
655  const QueryMemoryDescriptor&) const;
656 
657  ResultSetPtr executeWorkUnitImpl(size_t& max_groups_buffer_entry_guess,
658  const bool is_agg,
659  const bool allow_single_frag_table_opt,
660  const std::vector<InputTableInfo>&,
661  const RelAlgExecutionUnit&,
662  const CompilationOptions&,
663  const ExecutionOptions& options,
665  std::shared_ptr<RowSetMemoryOwner>,
666  RenderInfo* render_info,
667  const bool has_cardinality_estimation,
668  ColumnCacheMap& column_cache);
669 
670  std::vector<llvm::Value*> inlineHoistedLiterals();
671 
672  std::tuple<CompilationResult, std::unique_ptr<QueryMemoryDescriptor>> compileWorkUnit(
673  const std::vector<InputTableInfo>& query_infos,
674  const RelAlgExecutionUnit& ra_exe_unit,
675  const CompilationOptions& co,
676  const ExecutionOptions& eo,
677  const CudaMgr_Namespace::CudaMgr* cuda_mgr,
678  const bool allow_lazy_fetch,
679  std::shared_ptr<RowSetMemoryOwner>,
680  const size_t max_groups_buffer_entry_count,
681  const int8_t crt_min_byte_width,
682  const bool has_cardinality_estimation,
683  ColumnCacheMap& column_cache,
684  RenderInfo* render_info = nullptr);
685  // Generate code to skip the deleted rows in the outermost table.
686  llvm::BasicBlock* codegenSkipDeletedOuterTableRow(
687  const RelAlgExecutionUnit& ra_exe_unit,
688  const CompilationOptions& co);
689  std::vector<JoinLoop> buildJoinLoops(RelAlgExecutionUnit& ra_exe_unit,
690  const CompilationOptions& co,
691  const ExecutionOptions& eo,
692  const std::vector<InputTableInfo>& query_infos,
693  ColumnCacheMap& column_cache);
694  // Create a callback which generates code which returns true iff the row on the given
695  // level is deleted.
696  std::function<llvm::Value*(const std::vector<llvm::Value*>&, llvm::Value*)>
697  buildIsDeletedCb(const RelAlgExecutionUnit& ra_exe_unit,
698  const size_t level_idx,
699  const CompilationOptions& co);
700  // Builds a join hash table for the provided conditions on the current level.
701  // Returns null iff on failure and provides the reasons in `fail_reasons`.
702  std::shared_ptr<JoinHashTableInterface> buildCurrentLevelHashTable(
703  const JoinCondition& current_level_join_conditions,
704  RelAlgExecutionUnit& ra_exe_unit,
705  const CompilationOptions& co,
706  const std::vector<InputTableInfo>& query_infos,
707  ColumnCacheMap& column_cache,
708  std::vector<std::string>& fail_reasons);
709  llvm::Value* addJoinLoopIterator(const std::vector<llvm::Value*>& prev_iters,
710  const size_t level_idx);
711  void codegenJoinLoops(const std::vector<JoinLoop>& join_loops,
712  const RelAlgExecutionUnit& ra_exe_unit,
713  GroupByAndAggregate& group_by_and_aggregate,
714  llvm::Function* query_func,
715  llvm::BasicBlock* entry_bb,
716  const QueryMemoryDescriptor& query_mem_desc,
717  const CompilationOptions& co,
718  const ExecutionOptions& eo);
719  bool compileBody(const RelAlgExecutionUnit& ra_exe_unit,
720  GroupByAndAggregate& group_by_and_aggregate,
721  const QueryMemoryDescriptor& query_mem_desc,
722  const CompilationOptions& co,
723  const GpuSharedMemoryContext& gpu_smem_context = {});
724 
725  void createErrorCheckControlFlow(llvm::Function* query_func,
726  bool run_with_dynamic_watchdog,
727  bool run_with_allowing_runtime_interrupt,
728  ExecutorDeviceType device_type);
729 
730  void preloadFragOffsets(const std::vector<InputDescriptor>& input_descs,
731  const std::vector<InputTableInfo>& query_infos);
732 
734  std::shared_ptr<JoinHashTableInterface> hash_table;
735  std::string fail_reason;
736  };
737 
738  JoinHashTableOrError buildHashTableForQualifier(
739  const std::shared_ptr<Analyzer::BinOper>& qual_bin_oper,
740  const std::vector<InputTableInfo>& query_infos,
741  const MemoryLevel memory_level,
742  const JoinHashTableInterface::HashType preferred_hash_type,
743  ColumnCacheMap& column_cache);
744  void nukeOldState(const bool allow_lazy_fetch,
745  const std::vector<InputTableInfo>& query_infos,
746  const RelAlgExecutionUnit* ra_exe_unit);
747 
748  std::shared_ptr<CompilationContext> optimizeAndCodegenCPU(
749  llvm::Function*,
750  llvm::Function*,
751  const std::unordered_set<llvm::Function*>&,
752  const CompilationOptions&);
753  std::shared_ptr<CompilationContext> optimizeAndCodegenGPU(
754  llvm::Function*,
755  llvm::Function*,
756  std::unordered_set<llvm::Function*>&,
757  const bool no_inline,
758  const CudaMgr_Namespace::CudaMgr* cuda_mgr,
759  const CompilationOptions&);
760  std::string generatePTX(const std::string&) const;
761  void initializeNVPTXBackend() const;
762 
763  int64_t deviceCycles(int milliseconds) const;
764 
766  llvm::Value* translated_value;
767  llvm::Value* original_value;
768  };
769 
770  GroupColLLVMValue groupByColumnCodegen(Analyzer::Expr* group_by_col,
771  const size_t col_width,
772  const CompilationOptions&,
773  const bool translate_null_val,
774  const int64_t translated_null_val,
776  std::stack<llvm::BasicBlock*>&,
777  const bool thread_mem_shared);
778 
779  llvm::Value* castToFP(llvm::Value* val);
780  llvm::Value* castToIntPtrTyIn(llvm::Value* val, const size_t bit_width);
781 
782  RelAlgExecutionUnit addDeletedColumn(const RelAlgExecutionUnit& ra_exe_unit,
783  const CompilationOptions& co);
784 
785  std::pair<bool, int64_t> skipFragment(
786  const InputDescriptor& table_desc,
787  const Fragmenter_Namespace::FragmentInfo& frag_info,
788  const std::list<std::shared_ptr<Analyzer::Expr>>& simple_quals,
789  const std::vector<uint64_t>& frag_offsets,
790  const size_t frag_idx);
791 
792  std::pair<bool, int64_t> skipFragmentInnerJoins(
793  const InputDescriptor& table_desc,
794  const RelAlgExecutionUnit& ra_exe_unit,
795  const Fragmenter_Namespace::FragmentInfo& fragment,
796  const std::vector<uint64_t>& frag_offsets,
797  const size_t frag_idx);
798 
799  AggregatedColRange computeColRangesCache(
800  const std::unordered_set<PhysicalInput>& phys_inputs);
801  StringDictionaryGenerations computeStringDictionaryGenerations(
802  const std::unordered_set<PhysicalInput>& phys_inputs);
803  TableGenerations computeTableGenerations(std::unordered_set<int> phys_table_ids);
804 
805  public:
806  void setupCaching(const std::unordered_set<PhysicalInput>& phys_inputs,
807  const std::unordered_set<int>& phys_table_ids);
808  void setColRangeCache(const AggregatedColRange& aggregated_col_range) {
809  agg_col_range_cache_ = aggregated_col_range;
810  }
811 
812  template <typename SESSION_MAP_LOCK>
813  void setCurrentQuerySession(const std::string& query_session,
814  SESSION_MAP_LOCK& write_lock);
815  template <typename SESSION_MAP_LOCK>
816  std::string& getCurrentQuerySession(SESSION_MAP_LOCK& read_lock);
817  template <typename SESSION_MAP_LOCK>
818  bool checkCurrentQuerySession(const std::string& candidate_query_session,
819  SESSION_MAP_LOCK& read_lock);
820  template <typename SESSION_MAP_LOCK>
821  void invalidateQuerySession(SESSION_MAP_LOCK& write_lock);
822  template <typename SESSION_MAP_LOCK>
823  bool addToQuerySessionList(const std::string& query_session,
824  SESSION_MAP_LOCK& write_lock);
825  template <typename SESSION_MAP_LOCK>
826  bool removeFromQuerySessionList(const std::string& query_session,
827  SESSION_MAP_LOCK& write_lock);
828  template <typename SESSION_MAP_LOCK>
829  void setQuerySessionAsInterrupted(const std::string& query_session,
830  SESSION_MAP_LOCK& write_lock);
831  template <typename SESSION_MAP_LOCK>
832  bool checkIsQuerySessionInterrupted(const std::string& query_session,
833  SESSION_MAP_LOCK& read_lock);
834  mapd_shared_mutex& getSessionLock();
835 
836  // true when we have matched cardinality, and false otherwise
837  using CachedCardinality = std::pair<bool, size_t>;
838  void addToCardinalityCache(const std::string& cache_key, const size_t cache_value);
839  CachedCardinality getCachedCardinality(const std::string& cache_key);
840 
841  private:
842  std::shared_ptr<CompilationContext> getCodeFromCache(const CodeCacheKey&,
843  const CodeCache&);
844 
845  std::vector<int8_t> serializeLiterals(
846  const std::unordered_map<int, CgenState::LiteralValues>& literals,
847  const int device_id);
848 
849  static size_t align(const size_t off_in, const size_t alignment) {
850  size_t off = off_in;
851  if (off % alignment != 0) {
852  off += (alignment - off % alignment);
853  }
854  return off;
855  }
856 
857  std::unique_ptr<CgenState> cgen_state_;
858 
860  public:
862  : cgen_state_(cgen_state), saved_fetch_cache(cgen_state_->fetch_cache_) {}
863  ~FetchCacheAnchor() { cgen_state_->fetch_cache_.swap(saved_fetch_cache); }
864 
865  private:
867  std::unordered_map<int, std::vector<llvm::Value*>> saved_fetch_cache;
868  };
869 
870  llvm::Value* spillDoubleElement(llvm::Value* elem_val, llvm::Type* elem_ty);
871 
872  std::unique_ptr<PlanState> plan_state_;
873  std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner_;
874 
875  static const int max_gpu_count{16};
876  std::mutex gpu_exec_mutex_[max_gpu_count];
877 
878  static std::mutex gpu_active_modules_mutex_;
880  static void* gpu_active_modules_[max_gpu_count];
881  static std::atomic<bool> interrupted_;
882 
883  mutable std::shared_ptr<StringDictionaryProxy> lit_str_dict_proxy_;
884  mutable std::mutex str_dict_mutex_;
885 
886  mutable std::unique_ptr<llvm::TargetMachine> nvptx_target_machine_;
887 
890 
891  static const size_t baseline_threshold{
892  1000000}; // if a perfect hash needs more entries, use baseline
893  static const size_t code_cache_size{1000};
894 
895  const unsigned block_size_x_;
896  const unsigned grid_size_x_;
897  const size_t max_gpu_slab_size_;
898  const std::string debug_dir_;
899  const std::string debug_file_;
900 
904 
905  int64_t kernel_queue_time_ms_ = 0;
906  int64_t compilation_queue_time_ms_ = 0;
907 
908  // Singleton instance used for an execution unit which is a project with window
909  // functions.
910  std::unique_ptr<WindowProjectNodeContext> window_project_node_context_owned_;
911  // The active window function.
912  WindowFunctionContext* active_window_function_{nullptr};
913 
919  static std::string current_query_session_;
920  // a pair of <query_session, interrupted_flag>
922 
923  static std::map<int, std::shared_ptr<Executor>> executors_;
924  static std::atomic_flag execute_spin_lock_;
925 
926  // SQL queries take a shared lock, exclusive options (cache clear, memory clear) take a
927  // write lock
930 
931  // for now we use recycler_mutex only for cardinality_cache_
932  // and will expand its coverage for more interesting caches for query excution
934  static std::unordered_map<std::string, size_t> cardinality_cache_;
935 
936  public:
937  static const int32_t ERR_DIV_BY_ZERO{1};
938  static const int32_t ERR_OUT_OF_GPU_MEM{2};
939  static const int32_t ERR_OUT_OF_SLOTS{3};
940  static const int32_t ERR_UNSUPPORTED_SELF_JOIN{4};
941  static const int32_t ERR_OUT_OF_RENDER_MEM{5};
942  static const int32_t ERR_OUT_OF_CPU_MEM{6};
943  static const int32_t ERR_OVERFLOW_OR_UNDERFLOW{7};
944  static const int32_t ERR_OUT_OF_TIME{9};
945  static const int32_t ERR_INTERRUPTED{10};
946  static const int32_t ERR_COLUMNAR_CONVERSION_NOT_SUPPORTED{11};
947  static const int32_t ERR_TOO_MANY_LITERALS{12};
948  static const int32_t ERR_STRING_CONST_IN_RESULTSET{13};
949  static const int32_t ERR_STREAMING_TOP_N_NOT_SUPPORTED_IN_RENDER_QUERY{14};
950  static const int32_t ERR_SINGLE_VALUE_FOUND_MULTIPLE_VALUES{15};
951  static const int32_t ERR_GEOS{16};
952 
953  static std::mutex compilation_mutex_;
954  static std::mutex kernel_mutex_;
955 
956  friend class BaselineJoinHashTable;
957  friend class CodeGenerator;
958  friend class ColumnFetcher;
959  friend class ExecutionKernel;
960  friend class OverlapsJoinHashTable;
961  friend class GroupByAndAggregate;
963  friend class QueryMemoryDescriptor;
966  friend class QueryExecutionContext;
967  friend class ResultSet;
968  friend class InValuesBitmap;
969  friend class JoinHashTable;
970  friend class LeafAggregator;
971  friend class QueryRewriter;
972  friend class PendingExecutionClosure;
973  friend class RelAlgExecutor;
974  friend class TableOptimizer;
978  friend struct TargetExprCodegen;
980 };
981 
982 inline std::string get_null_check_suffix(const SQLTypeInfo& lhs_ti,
983  const SQLTypeInfo& rhs_ti) {
984  if (lhs_ti.get_notnull() && rhs_ti.get_notnull()) {
985  return "";
986  }
987  std::string null_check_suffix{"_nullable"};
988  if (lhs_ti.get_notnull()) {
989  CHECK(!rhs_ti.get_notnull());
990  null_check_suffix += "_rhs";
991  } else if (rhs_ti.get_notnull()) {
992  CHECK(!lhs_ti.get_notnull());
993  null_check_suffix += "_lhs";
994  }
995  return null_check_suffix;
996 }
997 
998 inline bool is_unnest(const Analyzer::Expr* expr) {
999  return dynamic_cast<const Analyzer::UOper*>(expr) &&
1000  static_cast<const Analyzer::UOper*>(expr)->get_optype() == kUNNEST;
1001 }
1002 
1003 bool is_trivial_loop_join(const std::vector<InputTableInfo>& query_infos,
1004  const RelAlgExecutionUnit& ra_exe_unit);
1005 
1006 std::unordered_set<int> get_available_gpus(const Catalog_Namespace::Catalog& cat);
1007 
1008 size_t get_context_count(const ExecutorDeviceType device_type,
1009  const size_t cpu_count,
1010  const size_t gpu_count);
1011 
1012 extern "C" void register_buffer_with_executor_rsm(int64_t exec, int8_t* buffer);
1013 
1015 
1016 #endif // QUERYENGINE_EXECUTE_H
void read_rt_udf_gpu_module(const std::string &udf_ir)
const std::string debug_dir_
Definition: Execute.h:898
llvm::Value * translated_value
Definition: Execute.h:766
bool is_agg(const Analyzer::Expr *expr)
static mapd_shared_mutex executor_session_mutex_
Definition: Execute.h:918
SQLAgg
Definition: sqldefs.h:71
#define CHECK_EQ(x, y)
Definition: Logger.h:205
std::ostream & operator<<(std::ostream &, FetchResult const &)
Definition: Execute.cpp:2292
static mapd_shared_mutex execute_mutex_
Definition: Execute.h:928
bool is_time() const
Definition: sqltypes.h:422
std::unique_ptr< llvm::TargetMachine > nvptx_target_machine_
Definition: Execute.h:886
bool is_string() const
Definition: sqltypes.h:416
bool is_boolean() const
Definition: sqltypes.h:423
const int8_t const int64_t * num_rows
class for a per-database catalog. also includes metadata for the current database and the current use...
Definition: Catalog.h:86
std::shared_ptr< StringDictionaryProxy > lit_str_dict_proxy_
Definition: Execute.h:883
FetchCacheAnchor(CgenState *cgen_state)
Definition: Execute.h:861
StringDictionaryProxy * getLiteralDictionary() const override
Definition: Execute.h:264
bool is_integer() const
Definition: sqltypes.h:418
ExecutorDeviceType
void read_rt_udf_cpu_module(const std::string &udf_ir)
Fragmenter_Namespace::RowDataProvider RowDataProvider
Definition: Execute.h:250
decltype(FragmentInfoType::physicalTableId) const getPhysicalTableId() const
Definition: Execute.h:270
std::unordered_map< int, std::unordered_map< int, std::shared_ptr< const ColumnarResults > >> ColumnCacheMap
static std::atomic_flag execute_spin_lock_
Definition: Execute.h:924
static mapd_shared_mutex executors_cache_mutex_
Definition: Execute.h:929
bool is_udf_module_present(bool cpu_only=false)
bool containsLeftDeepOuterJoin() const
Definition: Execute.h:338
const SQLTypeInfo get_column_type(const int col_id, const int table_id, const ColumnDescriptor *cd, const TemporaryTables *temporary_tables)
Definition: Execute.h:165
void read_udf_cpu_module(const std::string &udf_ir_filename)
Driver for running cleanup processes on a table. TableOptimizer provides functions for various cleanu...
SQLTypeInfo get_logical_type_info(const SQLTypeInfo &type_info)
Definition: sqltypes.h:819
void read_udf_gpu_module(const std::string &udf_ir_filename)
Definition: sqldefs.h:49
std::vector< std::string > CodeCacheKey
Definition: CodeCache.h:25
const Analyzer::Expr * extract_cast_arg(const Analyzer::Expr *expr)
Definition: Execute.h:130
const ColumnDescriptor * getMetadataForColumn(int tableId, const std::string &colName) const
const ColumnarResults * rows_to_columnar_results(std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner, const PtrTy &result, const int number)
Definition: Execute.h:180
const Analyzer::Expr * remove_cast_to_int(const Analyzer::Expr *expr)
Definition: ColumnIR.cpp:543
AggregatedColRange agg_col_range_cache_
Definition: Execute.h:915
std::shared_ptr< ResultSet > ResultSetPtr
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:857
llvm::Value * original_value
Definition: Execute.h:767
std::vector< Analyzer::Expr * > get_exprs_not_owned(const std::vector< std::shared_ptr< Analyzer::Expr >> &exprs)
Definition: Execute.h:192
static uint32_t gpu_active_modules_device_mask_
Definition: Execute.h:879
bool is_timeinterval() const
Definition: sqltypes.h:427
bool isArchPascalOrLater(const ExecutorDeviceType dt) const
Definition: Execute.h:438
HOST DEVICE EncodingType get_compression() const
Definition: sqltypes.h:267
static std::mutex kernel_mutex_
Definition: Execute.h:954
#define CHECK_GT(x, y)
Definition: Logger.h:209
Container for compilation results and assorted options for a single execution unit.
bool is_decimal() const
Definition: sqltypes.h:419
std::unique_ptr< WindowProjectNodeContext > window_project_node_context_owned_
Definition: Execute.h:910
std::vector< FragmentsPerTable > FragmentsList
std::string to_string(char const *&&v)
std::function< void(const UpdateLogForFragment &)> Callback
Definition: Execute.h:279
#define LOG_IF(severity, condition)
Definition: Logger.h:287
ResultSet(const std::vector< TargetInfo > &targets, const ExecutorDeviceType device_type, const QueryMemoryDescriptor &query_mem_desc, const std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner, const Executor *executor)
Definition: ResultSet.cpp:104
CodeCache gpu_code_cache_
Definition: Execute.h:889
int get_logical_size() const
Definition: sqltypes.h:270
HOST DEVICE bool get_notnull() const
Definition: sqltypes.h:266
std::unordered_map< int, const ResultSetPtr & > TemporaryTables
Definition: InputMetadata.h:31
const ExecutorId executor_id_
Definition: Execute.h:901
const ResultSetPtr & get_temporary_table(const TemporaryTables *temporary_tables, const int table_id)
Definition: Execute.h:157
const size_t max_gpu_slab_size_
Definition: Execute.h:897
decltype(FragmentInfoType::fragmentId) const getFragmentId() const
Definition: Execute.h:273
std::string cat(Ts &&... args)
llvm::Value * get_arg_by_name(llvm::Function *func, const std::string &name)
Definition: Execute.h:95
std::unordered_set< int > get_available_gpus(const Catalog_Namespace::Catalog &cat)
Definition: Execute.cpp:995
std::map< std::string, bool > InterruptFlagMap
Definition: Execute.h:77
std::vector< llvm::Value * > LLVMValueVector
Definition: Execute.h:289
const ColumnDescriptor * get_column_descriptor_maybe(const int col_id, const int table_id, const Catalog_Namespace::Catalog &cat)
Definition: Execute.h:149
std::mutex str_dict_mutex_
Definition: Execute.h:884
const Catalog_Namespace::Catalog * catalog_
Definition: Execute.h:902
#define INJECT_TIMER(DESC)
Definition: measure.h:91
std::shared_timed_mutex mapd_shared_mutex
const std::string debug_file_
Definition: Execute.h:899
std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner_
Definition: Execute.h:873
Used by Fragmenter classes to store info about each fragment - the fragment id and number of tuples(r...
Definition: Fragmenter.h:78
void setColRangeCache(const AggregatedColRange &aggregated_col_range)
Definition: Execute.h:808
static std::unordered_map< std::string, size_t > cardinality_cache_
Definition: Execute.h:934
static InterruptFlagMap queries_interrupt_flag_
Definition: Execute.h:921
std::unique_ptr< PlanState > plan_state_
Definition: Execute.h:872
size_t fragment_index_
Definition: Execute.h:285
std::pair< bool, size_t > CachedCardinality
Definition: Execute.h:837
const unsigned block_size_x_
Definition: Execute.h:895
const unsigned grid_size_x_
Definition: Execute.h:896
specifies the content in-memory of a row in the column metadata table
static std::map< int, std::shared_ptr< Executor > > executors_
Definition: Execute.h:923
std::string get_null_check_suffix(const SQLTypeInfo &lhs_ti, const SQLTypeInfo &rhs_ti)
Definition: Execute.h:982
std::shared_ptr< JoinHashTableInterface > hash_table
Definition: Execute.h:734
#define CHECK_LT(x, y)
Definition: Logger.h:207
static std::string current_query_session_
Definition: Execute.h:919
std::shared_ptr< ResultSet > rs_
Definition: Execute.h:286
std::unique_ptr< QueryCompilationDescriptor > QueryCompilationDescriptorOwned
Definition: Execute.h:74
size_t ExecutorId
Definition: Execute.h:302
size_t get_context_count(const ExecutorDeviceType device_type, const size_t cpu_count, const size_t gpu_count)
Definition: Execute.cpp:1007
const ColumnarResults * columnarize_result(std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner, const ResultSetPtr &result, const int frag_id)
Definition: Execute.h:201
StringDictionaryGenerations string_dictionary_generations_
Definition: Execute.h:916
InputTableInfoCache input_table_info_cache_
Definition: Execute.h:914
CodeCache cpu_code_cache_
Definition: Execute.h:888
const int64_t const uint32_t const uint32_t const uint32_t const bool const bool const int32_t frag_idx
FragmentInfoType const & fragment_info_
Definition: Execute.h:284
TableGenerations table_generations_
Definition: Execute.h:917
std::function< void(ResultSetPtr, const Fragmenter_Namespace::FragmentInfo &)> PerFragmentCallBack
Definition: Execute.h:453
mapd_shared_lock< mapd_shared_mutex > read_lock
static std::atomic< bool > interrupted_
Definition: Execute.h:881
#define CHECK(condition)
Definition: Logger.h:197
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:259
const int8_t * literals
bool is_trivial_loop_join(const std::vector< InputTableInfo > &query_infos, const RelAlgExecutionUnit &ra_exe_unit)
Definition: Execute.cpp:1115
static std::mutex compilation_mutex_
Definition: Execute.h:953
Executor(const ExecutorId id, const size_t block_size_x, const size_t grid_size_x, const size_t max_gpu_slab_size, const std::string &debug_dir, const std::string &debug_file)
Definition: Execute.cpp:129
uint32_t log2_bytes(const uint32_t bytes)
Definition: Execute.h:105
std::string numeric_type_name(const SQLTypeInfo &ti)
Definition: Execute.h:138
mapd_unique_lock< mapd_shared_mutex > write_lock
SQLTypeInfo columnType
bool is_unnest(const Analyzer::Expr *expr)
Definition: Execute.h:998
Execution unit for relational algebra. It&#39;s a low-level description of any relational algebra operati...
std::unordered_map< int, std::vector< llvm::Value * > > saved_fetch_cache
Definition: Execute.h:867
CgenState * cgen_state_
Definition: Execute.h:866
static size_t align(const size_t off_in, const size_t alignment)
Definition: Execute.h:849
std::unique_ptr< QueryMemoryDescriptor > QueryMemoryDescriptorOwned
Definition: Execute.h:76
Descriptor for the fragments required for an execution kernel.
bool is_rt_udf_module_present(bool cpu_only=false)
static mapd_shared_mutex recycler_mutex_
Definition: Execute.h:933
void register_buffer_with_executor_rsm(int64_t exec, int8_t *buffer)
auto getResultSet() const
Definition: Execute.h:281
static std::mutex gpu_active_modules_mutex_
Definition: Execute.h:878
SQLOps get_optype() const
Definition: Analyzer.h:371
static void nukeCacheOfExecutors()
Definition: Execute.h:318
const ColumnDescriptor * get_column_descriptor(const int col_id, const int table_id, const Catalog_Namespace::Catalog &cat)
Definition: Execute.h:120
bool is_fp() const
Definition: sqltypes.h:420
const TemporaryTables * temporary_tables_
Definition: Execute.h:903
CompilationRetryNewScanLimit(const size_t new_scan_limit)
Definition: Execute.h:218
WatchdogException(const std::string &cause)
Definition: Execute.h:90