OmniSciDB  a575cb28ea
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Execute.h
Go to the documentation of this file.
1 /*
2  * Copyright 2020 OmniSci, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef QUERYENGINE_EXECUTE_H
18 #define QUERYENGINE_EXECUTE_H
19 
20 #include "AggregatedColRange.h"
21 #include "BufferCompaction.h"
22 #include "CartesianProduct.h"
23 #include "CgenState.h"
24 #include "CodeCache.h"
25 #include "DateTimeUtils.h"
27 #include "ExecutionKernel.h"
28 #include "GpuSharedMemoryContext.h"
29 #include "GroupByAndAggregate.h"
31 #include "NvidiaKernel.h"
32 #include "PlanState.h"
33 #include "RelAlgExecutionUnit.h"
34 #include "RelAlgTranslator.h"
36 #include "TableGenerations.h"
37 #include "TargetMetaInfo.h"
38 #include "WindowContext.h"
39 
42 
43 #include "../Logger/Logger.h"
44 #include "../Shared/SystemParameters.h"
45 #include "../Shared/mapd_shared_mutex.h"
46 #include "../Shared/measure.h"
47 #include "../Shared/thread_count.h"
48 #include "../Shared/toString.h"
49 #include "../StringDictionary/LruCache.hpp"
50 #include "../StringDictionary/StringDictionary.h"
51 #include "../StringDictionary/StringDictionaryProxy.h"
52 #include "DataMgr/Chunk/Chunk.h"
54 
55 #include <llvm/IR/Function.h>
56 #include <llvm/IR/Value.h>
57 #include <llvm/Linker/Linker.h>
58 #include <llvm/Transforms/Utils/ValueMapper.h>
59 #include <rapidjson/document.h>
60 
61 #include <algorithm>
62 #include <atomic>
63 #include <condition_variable>
64 #include <cstddef>
65 #include <cstdlib>
66 #include <deque>
67 #include <functional>
68 #include <limits>
69 #include <map>
70 #include <mutex>
71 #include <stack>
72 #include <unordered_map>
73 #include <unordered_set>
74 
75 using QueryCompilationDescriptorOwned = std::unique_ptr<QueryCompilationDescriptor>;
77 using QueryMemoryDescriptorOwned = std::unique_ptr<QueryMemoryDescriptor>;
78 using QuerySessionId = std::string;
79 using CurrentQueryStatus = std::pair<QuerySessionId, std::string>;
80 using InterruptFlagMap = std::map<QuerySessionId, bool>;
82  // A class that is used to describe the query session's info
83  public:
85 
87  const QuerySessionId& query_session,
88  const std::string& query_str,
89  const std::chrono::time_point<std::chrono::system_clock> submitted_time)
90  : query_session_(query_session)
91  , executor_id_(0)
92  , query_str_(query_str)
93  , submitted_time_(submitted_time)
96  const QuerySessionId& query_session,
97  const size_t executor_id,
98  const std::string& query_str,
99  const std::chrono::time_point<std::chrono::system_clock> submitted_time)
100  : query_session_(query_session)
101  , executor_id_(executor_id)
102  , query_str_(query_str)
103  , submitted_time_(submitted_time)
106  const QuerySessionId& query_session,
107  const size_t executor_id,
108  const std::string& query_str,
109  const std::chrono::time_point<std::chrono::system_clock> submitted_time,
110  const QuerySessionStatus::QueryStatus& query_status)
111  : query_session_(query_session)
112  , executor_id_(executor_id)
113  , query_str_(query_str)
114  , submitted_time_(submitted_time)
115  , query_status_(query_status) {}
116 
118  const std::string getQueryStr() { return query_str_; }
119  const size_t getExecutorId() { return executor_id_; }
120  const std::chrono::time_point<std::chrono::system_clock> getQuerySubmittedTime() {
121  return submitted_time_;
122  }
125  query_status_ = status;
126  }
127  void setExecutorId(const size_t executor_id) { executor_id_ = executor_id; }
129  query_status_ = QuerySessionStatus::QueryStatus::RUNNING;
130  }
131 
132  private:
134  size_t executor_id_;
135  const std::string query_str_;
136  const std::chrono::time_point<std::chrono::system_clock> submitted_time_;
137  // Currently we use three query status:
138  // 1) PENDING_IN_QUEUE: a task is submitted to the dispatch_queue but hangs due to no
139  // existing worker (= executor) 2) PENDING_IN_EXECUTOR: a task is assigned to the
140  // specific executor but waits to get the resource to run 3) RUNNING: a task is assigned
141  // to the specific executor and its execution has been successfully started
143 };
144 using QuerySessionMap =
145  std::map<const QuerySessionId, std::map<std::string, QuerySessionStatus>>;
146 extern void read_udf_gpu_module(const std::string& udf_ir_filename);
147 extern void read_udf_cpu_module(const std::string& udf_ir_filename);
148 extern bool is_udf_module_present(bool cpu_only = false);
149 extern void read_rt_udf_gpu_module(const std::string& udf_ir);
150 extern void read_rt_udf_cpu_module(const std::string& udf_ir);
151 extern bool is_rt_udf_module_present(bool cpu_only = false);
152 
153 class ColumnFetcher;
154 
155 class WatchdogException : public std::runtime_error {
156  public:
157  WatchdogException(const std::string& cause) : std::runtime_error(cause) {}
158 };
159 
160 class Executor;
161 
162 inline llvm::Value* get_arg_by_name(llvm::Function* func, const std::string& name) {
163  for (auto& arg : func->args()) {
164  if (arg.getName() == name) {
165  return &arg;
166  }
167  }
168  CHECK(false);
169  return nullptr;
170 }
171 
172 inline uint32_t log2_bytes(const uint32_t bytes) {
173  switch (bytes) {
174  case 1:
175  return 0;
176  case 2:
177  return 1;
178  case 4:
179  return 2;
180  case 8:
181  return 3;
182  default:
183  abort();
184  }
185 }
186 
188  const int col_id,
189  const int table_id,
191  CHECK_GT(table_id, 0);
192  const auto col_desc = cat.getMetadataForColumn(table_id, col_id);
193  CHECK(col_desc);
194  return col_desc;
195 }
196 
197 inline const Analyzer::Expr* extract_cast_arg(const Analyzer::Expr* expr) {
198  const auto cast_expr = dynamic_cast<const Analyzer::UOper*>(expr);
199  if (!cast_expr || cast_expr->get_optype() != kCAST) {
200  return expr;
201  }
202  return cast_expr->get_operand();
203 }
204 
205 inline std::string numeric_type_name(const SQLTypeInfo& ti) {
206  CHECK(ti.is_integer() || ti.is_decimal() || ti.is_boolean() || ti.is_time() ||
207  ti.is_fp() || (ti.is_string() && ti.get_compression() == kENCODING_DICT) ||
208  ti.is_timeinterval());
209  if (ti.is_integer() || ti.is_decimal() || ti.is_boolean() || ti.is_time() ||
210  ti.is_string() || ti.is_timeinterval()) {
211  return "int" + std::to_string(ti.get_logical_size() * 8) + "_t";
212  }
213  return ti.get_type() == kDOUBLE ? "double" : "float";
214 }
215 
217  const int col_id,
218  const int table_id,
220  CHECK(table_id);
221  return table_id > 0 ? get_column_descriptor(col_id, table_id, cat) : nullptr;
222 }
223 
224 inline const ResultSetPtr& get_temporary_table(const TemporaryTables* temporary_tables,
225  const int table_id) {
226  CHECK_LT(table_id, 0);
227  const auto it = temporary_tables->find(table_id);
228  CHECK(it != temporary_tables->end());
229  return it->second;
230 }
231 
232 inline const SQLTypeInfo get_column_type(const int col_id,
233  const int table_id,
234  const ColumnDescriptor* cd,
235  const TemporaryTables* temporary_tables) {
236  CHECK(cd || temporary_tables);
237  if (cd) {
238  CHECK_EQ(col_id, cd->columnId);
239  CHECK_EQ(table_id, cd->tableId);
240  return cd->columnType;
241  }
242  const auto& temp = get_temporary_table(temporary_tables, table_id);
243  return temp->getColType(col_id);
244 }
245 
246 template <typename PtrTy>
248  std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner,
249  const PtrTy& result,
250  const int number) {
251  std::vector<SQLTypeInfo> col_types;
252  for (size_t i = 0; i < result->colCount(); ++i) {
253  col_types.push_back(get_logical_type_info(result->getColType(i)));
254  }
255  return new ColumnarResults(row_set_mem_owner, *result, number, col_types);
256 }
257 
258 // TODO(alex): Adjust interfaces downstream and make this not needed.
259 inline std::vector<Analyzer::Expr*> get_exprs_not_owned(
260  const std::vector<std::shared_ptr<Analyzer::Expr>>& exprs) {
261  std::vector<Analyzer::Expr*> exprs_not_owned;
262  for (const auto& expr : exprs) {
263  exprs_not_owned.push_back(expr.get());
264  }
265  return exprs_not_owned;
266 }
267 
269  std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner,
270  const ResultSetPtr& result,
271  const int frag_id) {
273  CHECK_EQ(0, frag_id);
274  return rows_to_columnar_results(row_set_mem_owner, result, result->colCount());
275 }
276 
277 class CompilationRetryNoLazyFetch : public std::runtime_error {
278  public:
280  : std::runtime_error("Retry query compilation with no GPU lazy fetch.") {}
281 };
282 
283 class CompilationRetryNewScanLimit : public std::runtime_error {
284  public:
285  CompilationRetryNewScanLimit(const size_t new_scan_limit)
286  : std::runtime_error("Retry query compilation with new scan limit.")
287  , new_scan_limit_(new_scan_limit) {}
288 
290 };
291 
292 class TooManyLiterals : public std::runtime_error {
293  public:
294  TooManyLiterals() : std::runtime_error("Too many literals in the query") {}
295 };
296 
297 class CompilationRetryNoCompaction : public std::runtime_error {
298  public:
300  : std::runtime_error("Retry query compilation with no compaction.") {}
301 };
302 
303 class QueryMustRunOnCpu : public std::runtime_error {
304  public:
305  QueryMustRunOnCpu() : std::runtime_error("Query must run in cpu mode.") {}
306 };
307 
308 class ParseIRError : public std::runtime_error {
309  public:
310  ParseIRError(const std::string message) : std::runtime_error(message) {}
311 };
312 
313 class SringConstInResultSet : public std::runtime_error {
314  public:
316  : std::runtime_error(
317  "NONE ENCODED String types are not supported as input result set.") {}
318 };
319 
320 class ExtensionFunction;
321 
323 using ColumnToFragmentsMap = std::map<const ColumnDescriptor*, std::set<int>>;
324 
326  public:
328 
329  UpdateLogForFragment(FragmentInfoType const& fragment_info,
330  size_t const,
331  const std::shared_ptr<ResultSet>& rs);
332 
333  std::vector<TargetValue> getEntryAt(const size_t index) const override;
334  std::vector<TargetValue> getTranslatedEntryAt(const size_t index) const override;
335 
336  size_t const getRowCount() const override;
338  return rs_->getRowSetMemOwner()->getLiteralStringDictProxy();
339  }
340  size_t const getEntryCount() const override;
341  size_t const getFragmentIndex() const;
342  FragmentInfoType const& getFragmentInfo() const;
345  }
346  decltype(FragmentInfoType::fragmentId) const getFragmentId() const {
347  return fragment_info_.fragmentId;
348  }
349 
350  SQLTypeInfo getColumnType(const size_t col_idx) const;
351 
352  using Callback =
353  std::function<void(const UpdateLogForFragment&, ColumnToFragmentsMap&)>;
354 
355  auto getResultSet() const { return rs_; }
356 
357  private:
360  std::shared_ptr<ResultSet> rs_;
361 };
362 
363 using LLVMValueVector = std::vector<llvm::Value*>;
364 
366 
367 std::ostream& operator<<(std::ostream&, FetchResult const&);
368 
369 class Executor {
370  static_assert(sizeof(float) == 4 && sizeof(double) == 8,
371  "Host hardware not supported, unexpected size of float / double.");
372  static_assert(sizeof(time_t) == 8,
373  "Host hardware not supported, 64-bit time support is required.");
374 
375  public:
376  using ExecutorId = size_t;
377  static const ExecutorId UNITARY_EXECUTOR_ID = 0;
378 
379  Executor(const ExecutorId id,
380  const size_t block_size_x,
381  const size_t grid_size_x,
382  const size_t max_gpu_slab_size,
383  const std::string& debug_dir,
384  const std::string& debug_file);
385 
386  static std::shared_ptr<Executor> getExecutor(
387  const ExecutorId id,
388  const std::string& debug_dir = "",
389  const std::string& debug_file = "",
390  const SystemParameters system_parameters = SystemParameters());
391 
392  static void nukeCacheOfExecutors() {
393  mapd_unique_lock<mapd_shared_mutex> flush_lock(
394  execute_mutex_); // don't want native code to vanish while executing
395  mapd_unique_lock<mapd_shared_mutex> lock(executors_cache_mutex_);
396  (decltype(executors_){}).swap(executors_);
397  }
398 
399  static void clearMemory(const Data_Namespace::MemoryLevel memory_level);
400 
401  static size_t getArenaBlockSize();
402 
407 
412  const bool with_generation) const {
414  return getStringDictionaryProxy(dict_id, row_set_mem_owner_, with_generation);
415  }
416 
418  const int dictId,
419  const std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner,
420  const bool with_generation) const;
421 
422  bool isCPUOnly() const;
423 
424  bool isArchMaxwell(const ExecutorDeviceType dt) const;
425 
427  return cgen_state_->contains_left_deep_outer_join_;
428  }
429 
431 
433  int) const;
434 
435  const Catalog_Namespace::Catalog* getCatalog() const;
436  void setCatalog(const Catalog_Namespace::Catalog* catalog);
437 
438  const std::shared_ptr<RowSetMemoryOwner> getRowSetMemoryOwner() const;
439 
440  const TemporaryTables* getTemporaryTables() const;
441 
442  Fragmenter_Namespace::TableInfo getTableInfo(const int table_id) const;
443 
444  const TableGeneration& getTableGeneration(const int table_id) const;
445 
447 
448  size_t getNumBytesForFetchedRow(const std::set<int>& table_ids_to_fetch) const;
449 
450  std::vector<ColumnLazyFetchInfo> getColLazyFetchInfo(
451  const std::vector<Analyzer::Expr*>& target_exprs) const;
452 
453  void registerActiveModule(void* module, const int device_id) const;
454  void unregisterActiveModule(void* module, const int device_id) const;
455  void interrupt(const std::string& query_session = "",
456  const std::string& interrupt_session = "");
457  void resetInterrupt();
458 
459  // only for testing usage
460  void enableRuntimeQueryInterrupt(const double runtime_query_check_freq,
461  const unsigned pending_query_check_freq) const;
462 
463  static const size_t high_scan_limit{32000000};
464 
465  int8_t warpSize() const;
466  unsigned gridSize() const;
467  unsigned numBlocksPerMP() const;
468  unsigned blockSize() const;
469  size_t maxGpuSlabSize() const;
470 
471  ResultSetPtr executeWorkUnit(size_t& max_groups_buffer_entry_guess,
472  const bool is_agg,
473  const std::vector<InputTableInfo>&,
474  const RelAlgExecutionUnit&,
475  const CompilationOptions&,
476  const ExecutionOptions& options,
478  RenderInfo* render_info,
479  const bool has_cardinality_estimation,
480  ColumnCacheMap& column_cache);
481 
482  void executeUpdate(const RelAlgExecutionUnit& ra_exe_unit,
483  const std::vector<InputTableInfo>& table_infos,
484  const CompilationOptions& co,
485  const ExecutionOptions& eo,
487  std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner,
489  const bool is_agg);
490 
491  private:
492  void clearMetaInfoCache();
493 
494  int deviceCount(const ExecutorDeviceType) const;
495  int deviceCountForMemoryLevel(const Data_Namespace::MemoryLevel memory_level) const;
496 
497  // Generate code for a window function target.
498  llvm::Value* codegenWindowFunction(const size_t target_index,
499  const CompilationOptions& co);
500 
501  // Generate code for an aggregate window function target.
502  llvm::Value* codegenWindowFunctionAggregate(const CompilationOptions& co);
503 
504  // The aggregate state requires a state reset when starting a new partition. Generate
505  // the new partition check and return the continuation basic block.
506  llvm::BasicBlock* codegenWindowResetStateControlFlow();
507 
508  // Generate code for initializing the state of a window aggregate.
509  void codegenWindowFunctionStateInit(llvm::Value* aggregate_state);
510 
511  // Generates the required calls for an aggregate window function and returns the final
512  // result.
513  llvm::Value* codegenWindowFunctionAggregateCalls(llvm::Value* aggregate_state,
514  const CompilationOptions& co);
515 
516  // The AVG window function requires some post-processing: the sum is divided by count
517  // and the result is stored back for the current row.
518  void codegenWindowAvgEpilogue(llvm::Value* crt_val,
519  llvm::Value* window_func_null_val,
520  llvm::Value* multiplicity_lv);
521 
522  // Generates code which loads the current aggregate value for the window context.
523  llvm::Value* codegenAggregateWindowState();
524 
525  llvm::Value* aggregateWindowStatePtr();
526 
528  if (dt == ExecutorDeviceType::GPU) {
529  const auto cuda_mgr = catalog_->getDataMgr().getCudaMgr();
530  LOG_IF(FATAL, cuda_mgr == nullptr)
531  << "No CudaMgr instantiated, unable to check device architecture";
532  return cuda_mgr->isArchPascalOrLater();
533  }
534  return false;
535  }
536 
537  bool needFetchAllFragments(const InputColDescriptor& col_desc,
538  const RelAlgExecutionUnit& ra_exe_unit,
539  const FragmentsList& selected_fragments) const;
540 
541  using PerFragmentCallBack =
542  std::function<void(ResultSetPtr, const Fragmenter_Namespace::FragmentInfo&)>;
543 
549  void executeWorkUnitPerFragment(const RelAlgExecutionUnit& ra_exe_unit,
550  const InputTableInfo& table_info,
551  const CompilationOptions& co,
552  const ExecutionOptions& eo,
555  const std::set<int>& fragment_ids);
556 
558 
565  const std::vector<InputTableInfo>& table_infos,
566  const CompilationOptions& co,
567  const ExecutionOptions& eo,
569 
571  const RelAlgExecutionUnit& ra_exe_unit,
572  const ExecutorDeviceType requested_device_type);
573 
575  SharedKernelContext& shared_context,
576  const RelAlgExecutionUnit& ra_exe_unit,
578  const ExecutorDeviceType device_type,
579  std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner);
580 
582  SharedKernelContext& shared_context,
583  const RelAlgExecutionUnit& ra_exe_unit) const;
584 
585  std::unordered_map<int, const Analyzer::BinOper*> getInnerTabIdToJoinCond() const;
586 
591  std::vector<std::unique_ptr<ExecutionKernel>> createKernels(
592  SharedKernelContext& shared_context,
593  const RelAlgExecutionUnit& ra_exe_unit,
594  ColumnFetcher& column_fetcher,
595  const std::vector<InputTableInfo>& table_infos,
596  const ExecutionOptions& eo,
597  const bool is_agg,
598  const bool allow_single_frag_table_opt,
599  const size_t context_count,
600  const QueryCompilationDescriptor& query_comp_desc,
602  RenderInfo* render_info,
603  std::unordered_set<int>& available_gpus,
604  int& available_cpus);
605 
610  template <typename THREAD_POOL>
611  void launchKernels(SharedKernelContext& shared_context,
612  std::vector<std::unique_ptr<ExecutionKernel>>&& kernels);
613 
614  std::vector<size_t> getTableFragmentIndices(
615  const RelAlgExecutionUnit& ra_exe_unit,
616  const ExecutorDeviceType device_type,
617  const size_t table_idx,
618  const size_t outer_frag_idx,
619  std::map<int, const TableFragments*>& selected_tables_fragments,
620  const std::unordered_map<int, const Analyzer::BinOper*>&
621  inner_table_id_to_join_condition);
622 
623  bool skipFragmentPair(const Fragmenter_Namespace::FragmentInfo& outer_fragment_info,
624  const Fragmenter_Namespace::FragmentInfo& inner_fragment_info,
625  const int inner_table_id,
626  const std::unordered_map<int, const Analyzer::BinOper*>&
627  inner_table_id_to_join_condition,
628  const RelAlgExecutionUnit& ra_exe_unit,
629  const ExecutorDeviceType device_type);
630 
632  const RelAlgExecutionUnit& ra_exe_unit,
633  const int device_id,
635  const std::map<int, const TableFragments*>&,
636  const FragmentsList& selected_fragments,
638  std::list<ChunkIter>&,
639  std::list<std::shared_ptr<Chunk_NS::Chunk>>&,
640  DeviceAllocator* device_allocator);
641 
643  const RelAlgExecutionUnit& ra_exe_unit,
644  const int device_id,
646  const std::map<int, const TableFragments*>&,
647  const FragmentsList& selected_fragments,
649  std::list<ChunkIter>&,
650  std::list<std::shared_ptr<Chunk_NS::Chunk>>&,
651  DeviceAllocator* device_allocator);
652 
653  std::pair<std::vector<std::vector<int64_t>>, std::vector<std::vector<uint64_t>>>
655  const RelAlgExecutionUnit& ra_exe_unit,
656  const CartesianProduct<std::vector<std::vector<size_t>>>& frag_ids_crossjoin,
657  const std::vector<InputDescriptor>& input_descs,
658  const std::map<int, const TableFragments*>& all_tables_fragments);
659 
661  std::vector<std::vector<size_t>>& selected_fragments_crossjoin,
662  std::vector<size_t>& local_col_to_frag_pos,
663  const std::list<std::shared_ptr<const InputColDescriptor>>& col_global_ids,
664  const FragmentsList& selected_fragments,
665  const RelAlgExecutionUnit& ra_exe_unit);
666 
668  std::vector<std::vector<size_t>>& selected_fragments_crossjoin,
669  std::vector<size_t>& local_col_to_frag_pos,
670  const std::list<std::shared_ptr<const InputColDescriptor>>& col_global_ids,
671  const FragmentsList& selected_fragments,
672  const RelAlgExecutionUnit& ra_exe_unit);
673 
674  std::vector<size_t> getFragmentCount(const FragmentsList& selected_fragments,
675  const size_t scan_idx,
676  const RelAlgExecutionUnit& ra_exe_unit);
677 
678  int32_t executePlanWithGroupBy(const RelAlgExecutionUnit& ra_exe_unit,
679  const CompilationResult&,
680  const bool hoist_literals,
681  ResultSetPtr& results,
682  const ExecutorDeviceType device_type,
683  std::vector<std::vector<const int8_t*>>& col_buffers,
684  const std::vector<size_t> outer_tab_frag_ids,
686  const std::vector<std::vector<int64_t>>& num_rows,
687  const std::vector<std::vector<uint64_t>>& frag_offsets,
689  const int device_id,
690  const int outer_table_id,
691  const int64_t limit,
692  const uint32_t start_rowid,
693  const uint32_t num_tables,
694  RenderInfo* render_info);
696  const RelAlgExecutionUnit& ra_exe_unit,
697  const CompilationResult&,
698  const bool hoist_literals,
699  ResultSetPtr& results,
700  const std::vector<Analyzer::Expr*>& target_exprs,
701  const ExecutorDeviceType device_type,
702  std::vector<std::vector<const int8_t*>>& col_buffers,
703  QueryExecutionContext* query_exe_context,
704  const std::vector<std::vector<int64_t>>& num_rows,
705  const std::vector<std::vector<uint64_t>>& frag_offsets,
706  Data_Namespace::DataMgr* data_mgr,
707  const int device_id,
708  const uint32_t start_rowid,
709  const uint32_t num_tables,
710  RenderInfo* render_info);
711 
712  public: // Temporary, ask saman about this
713  static std::pair<int64_t, int32_t> reduceResults(const SQLAgg agg,
714  const SQLTypeInfo& ti,
715  const int64_t agg_init_val,
716  const int8_t out_byte_width,
717  const int64_t* out_vec,
718  const size_t out_vec_sz,
719  const bool is_group_by,
720  const bool float_argument_input);
721 
722  static void addCodeToCache(const CodeCacheKey&,
723  std::shared_ptr<CompilationContext>,
724  llvm::Module*,
725  CodeCache&);
726 
727  private:
729  const RelAlgExecutionUnit& ra_exe_unit);
730  std::vector<int64_t> getJoinHashTablePtrs(const ExecutorDeviceType device_type,
731  const int device_id);
733  const RelAlgExecutionUnit&,
734  std::vector<std::pair<ResultSetPtr, std::vector<size_t>>>& all_fragment_results,
735  std::shared_ptr<RowSetMemoryOwner>,
736  const QueryMemoryDescriptor&) const;
738  std::vector<std::pair<ResultSetPtr, std::vector<size_t>>>& all_fragment_results,
739  std::shared_ptr<RowSetMemoryOwner>,
740  const QueryMemoryDescriptor&) const;
742  const RelAlgExecutionUnit&,
743  std::vector<std::pair<ResultSetPtr, std::vector<size_t>>>& all_fragment_results,
744  std::shared_ptr<RowSetMemoryOwner>,
745  const QueryMemoryDescriptor&) const;
746 
747  ResultSetPtr executeWorkUnitImpl(size_t& max_groups_buffer_entry_guess,
748  const bool is_agg,
749  const bool allow_single_frag_table_opt,
750  const std::vector<InputTableInfo>&,
751  const RelAlgExecutionUnit&,
752  const CompilationOptions&,
753  const ExecutionOptions& options,
755  std::shared_ptr<RowSetMemoryOwner>,
756  RenderInfo* render_info,
757  const bool has_cardinality_estimation,
758  ColumnCacheMap& column_cache);
759 
760  std::vector<llvm::Value*> inlineHoistedLiterals();
761 
762  std::tuple<CompilationResult, std::unique_ptr<QueryMemoryDescriptor>> compileWorkUnit(
763  const std::vector<InputTableInfo>& query_infos,
764  const PlanState::DeletedColumnsMap& deleted_cols_map,
765  const RelAlgExecutionUnit& ra_exe_unit,
766  const CompilationOptions& co,
767  const ExecutionOptions& eo,
768  const CudaMgr_Namespace::CudaMgr* cuda_mgr,
769  const bool allow_lazy_fetch,
770  std::shared_ptr<RowSetMemoryOwner>,
771  const size_t max_groups_buffer_entry_count,
772  const int8_t crt_min_byte_width,
773  const bool has_cardinality_estimation,
774  ColumnCacheMap& column_cache,
775  RenderInfo* render_info = nullptr);
776  // Generate code to skip the deleted rows in the outermost table.
777  llvm::BasicBlock* codegenSkipDeletedOuterTableRow(
778  const RelAlgExecutionUnit& ra_exe_unit,
779  const CompilationOptions& co);
780  std::vector<JoinLoop> buildJoinLoops(RelAlgExecutionUnit& ra_exe_unit,
781  const CompilationOptions& co,
782  const ExecutionOptions& eo,
783  const std::vector<InputTableInfo>& query_infos,
784  ColumnCacheMap& column_cache);
785  // Create a callback which generates code which returns true iff the row on the given
786  // level is deleted.
787  std::function<llvm::Value*(const std::vector<llvm::Value*>&, llvm::Value*)>
788  buildIsDeletedCb(const RelAlgExecutionUnit& ra_exe_unit,
789  const size_t level_idx,
790  const CompilationOptions& co);
791  // Builds a join hash table for the provided conditions on the current level.
792  // Returns null iff on failure and provides the reasons in `fail_reasons`.
793  std::shared_ptr<HashJoin> buildCurrentLevelHashTable(
794  const JoinCondition& current_level_join_conditions,
795  RelAlgExecutionUnit& ra_exe_unit,
796  const CompilationOptions& co,
797  const std::vector<InputTableInfo>& query_infos,
798  ColumnCacheMap& column_cache,
799  std::vector<std::string>& fail_reasons);
801  llvm::Value* addJoinLoopIterator(const std::vector<llvm::Value*>& prev_iters,
802  const size_t level_idx);
803  void codegenJoinLoops(const std::vector<JoinLoop>& join_loops,
804  const RelAlgExecutionUnit& ra_exe_unit,
805  GroupByAndAggregate& group_by_and_aggregate,
806  llvm::Function* query_func,
807  llvm::BasicBlock* entry_bb,
809  const CompilationOptions& co,
810  const ExecutionOptions& eo);
811  bool compileBody(const RelAlgExecutionUnit& ra_exe_unit,
812  GroupByAndAggregate& group_by_and_aggregate,
814  const CompilationOptions& co,
815  const GpuSharedMemoryContext& gpu_smem_context = {});
816 
817  void createErrorCheckControlFlow(llvm::Function* query_func,
818  bool run_with_dynamic_watchdog,
819  bool run_with_allowing_runtime_interrupt,
820  ExecutorDeviceType device_type,
821  const std::vector<InputTableInfo>& input_table_infos);
822 
823  void insertErrorCodeChecker(llvm::Function* query_func, bool hoist_literals);
824 
825  void preloadFragOffsets(const std::vector<InputDescriptor>& input_descs,
826  const std::vector<InputTableInfo>& query_infos);
827 
829  std::shared_ptr<HashJoin> hash_table;
830  std::string fail_reason;
831  };
832 
834  const std::shared_ptr<Analyzer::BinOper>& qual_bin_oper,
835  const std::vector<InputTableInfo>& query_infos,
836  const MemoryLevel memory_level,
837  const HashType preferred_hash_type,
838  ColumnCacheMap& column_cache,
839  const QueryHint& query_hint);
840  void nukeOldState(const bool allow_lazy_fetch,
841  const std::vector<InputTableInfo>& query_infos,
842  const PlanState::DeletedColumnsMap& deleted_cols_map,
843  const RelAlgExecutionUnit* ra_exe_unit);
844 
845  std::shared_ptr<CompilationContext> optimizeAndCodegenCPU(
846  llvm::Function*,
847  llvm::Function*,
848  const std::unordered_set<llvm::Function*>&,
849  const CompilationOptions&);
850  std::shared_ptr<CompilationContext> optimizeAndCodegenGPU(
851  llvm::Function*,
852  llvm::Function*,
853  std::unordered_set<llvm::Function*>&,
854  const bool no_inline,
855  const CudaMgr_Namespace::CudaMgr* cuda_mgr,
856  const CompilationOptions&);
857  std::string generatePTX(const std::string&) const;
858  void initializeNVPTXBackend() const;
859 
860  int64_t deviceCycles(int milliseconds) const;
861 
863  llvm::Value* translated_value;
864  llvm::Value* original_value;
865  };
866 
868  const size_t col_width,
869  const CompilationOptions&,
870  const bool translate_null_val,
871  const int64_t translated_null_val,
873  std::stack<llvm::BasicBlock*>&,
874  const bool thread_mem_shared);
875 
876  llvm::Value* castToFP(llvm::Value*,
877  SQLTypeInfo const& from_ti,
878  SQLTypeInfo const& to_ti);
879  llvm::Value* castToIntPtrTyIn(llvm::Value* val, const size_t bit_width);
880 
881  std::tuple<RelAlgExecutionUnit, PlanState::DeletedColumnsMap> addDeletedColumn(
882  const RelAlgExecutionUnit& ra_exe_unit,
883  const CompilationOptions& co);
884 
885  bool isFragmentFullyDeleted(const int table_id,
886  const Fragmenter_Namespace::FragmentInfo& fragment);
887 
888  std::pair<bool, int64_t> skipFragment(
889  const InputDescriptor& table_desc,
890  const Fragmenter_Namespace::FragmentInfo& frag_info,
891  const std::list<std::shared_ptr<Analyzer::Expr>>& simple_quals,
892  const std::vector<uint64_t>& frag_offsets,
893  const size_t frag_idx);
894 
895  std::pair<bool, int64_t> skipFragmentInnerJoins(
896  const InputDescriptor& table_desc,
897  const RelAlgExecutionUnit& ra_exe_unit,
898  const Fragmenter_Namespace::FragmentInfo& fragment,
899  const std::vector<uint64_t>& frag_offsets,
900  const size_t frag_idx);
901 
903  const std::unordered_set<PhysicalInput>& phys_inputs);
905  const std::unordered_set<PhysicalInput>& phys_inputs);
906  TableGenerations computeTableGenerations(std::unordered_set<int> phys_table_ids);
907 
908  public:
909  void setupCaching(const std::unordered_set<PhysicalInput>& phys_inputs,
910  const std::unordered_set<int>& phys_table_ids);
911  void setColRangeCache(const AggregatedColRange& aggregated_col_range) {
912  agg_col_range_cache_ = aggregated_col_range;
913  }
914 
915  QuerySessionId& getCurrentQuerySession(mapd_shared_lock<mapd_shared_mutex>& read_lock);
916  size_t getRunningExecutorId(mapd_shared_lock<mapd_shared_mutex>& read_lock);
917  bool checkCurrentQuerySession(const std::string& candidate_query_session,
918  mapd_shared_lock<mapd_shared_mutex>& read_lock);
919  void invalidateRunningQuerySession(mapd_unique_lock<mapd_shared_mutex>& write_lock);
921  const QuerySessionId& query_session,
922  const std::string& query_str,
923  const std::chrono::time_point<std::chrono::system_clock> submitted,
924  const size_t executor_id,
925  const QuerySessionStatus::QueryStatus query_status,
926  mapd_unique_lock<mapd_shared_mutex>& write_lock);
928  const QuerySessionId& query_session,
929  const std::chrono::time_point<std::chrono::system_clock> submitted,
930  mapd_unique_lock<mapd_shared_mutex>& write_lock);
931  void setQuerySessionAsInterrupted(const QuerySessionId& query_session,
932  mapd_unique_lock<mapd_shared_mutex>& write_lock);
933  bool checkIsQuerySessionInterrupted(const QuerySessionId& query_session,
934  mapd_shared_lock<mapd_shared_mutex>& read_lock);
935  bool checkIsQuerySessionEnrolled(const QuerySessionId& query_session,
936  mapd_shared_lock<mapd_shared_mutex>& read_lock);
938  const QuerySessionId& query_session,
939  const std::chrono::time_point<std::chrono::system_clock> submitted,
940  const QuerySessionStatus::QueryStatus updated_query_status,
941  mapd_unique_lock<mapd_shared_mutex>& write_lock);
943  const QuerySessionId& query_session,
944  const std::chrono::time_point<std::chrono::system_clock> submitted,
945  const size_t executor_id,
946  mapd_unique_lock<mapd_shared_mutex>& write_lock);
947  std::vector<QuerySessionStatus> getQuerySessionInfo(
948  const QuerySessionId& query_session,
949  mapd_shared_lock<mapd_shared_mutex>& read_lock);
950 
953  std::shared_ptr<const query_state::QueryState>& query_state);
954  void checkPendingQueryStatus(const QuerySessionId& query_session);
956  const QuerySessionId& query_session,
957  const std::chrono::time_point<std::chrono::system_clock> submitted,
958  bool acquire_spin_lock);
960  std::shared_ptr<const query_state::QueryState>& query_state,
961  const QuerySessionStatus::QueryStatus new_query_status);
963  const QuerySessionId& query_session,
964  const std::chrono::time_point<std::chrono::system_clock> submitted,
965  const QuerySessionStatus::QueryStatus new_query_status);
966  void enrollQuerySession(
967  const QuerySessionId& query_session,
968  const std::string& query_str,
969  const std::chrono::time_point<std::chrono::system_clock> submitted,
970  const size_t executor_id,
971  const QuerySessionStatus::QueryStatus query_session_status);
972 
973  // true when we have matched cardinality, and false otherwise
974  using CachedCardinality = std::pair<bool, size_t>;
975  void addToCardinalityCache(const std::string& cache_key, const size_t cache_value);
976  CachedCardinality getCachedCardinality(const std::string& cache_key);
977 
978  private:
979  std::shared_ptr<CompilationContext> getCodeFromCache(const CodeCacheKey&,
980  const CodeCache&);
981 
982  std::vector<int8_t> serializeLiterals(
983  const std::unordered_map<int, CgenState::LiteralValues>& literals,
984  const int device_id);
985 
986  static size_t align(const size_t off_in, const size_t alignment) {
987  size_t off = off_in;
988  if (off % alignment != 0) {
989  off += (alignment - off % alignment);
990  }
991  return off;
992  }
993 
994  std::unique_ptr<CgenState> cgen_state_;
995 
997  public:
999  : cgen_state_(cgen_state), saved_fetch_cache(cgen_state_->fetch_cache_) {}
1001 
1002  private:
1004  std::unordered_map<int, std::vector<llvm::Value*>> saved_fetch_cache;
1005  };
1006 
1007  llvm::Value* spillDoubleElement(llvm::Value* elem_val, llvm::Type* elem_ty);
1008 
1009  std::unique_ptr<PlanState> plan_state_;
1010  std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner_;
1011 
1012  static const int max_gpu_count{16};
1014 
1015  static std::mutex gpu_active_modules_mutex_;
1018  static std::atomic<bool> interrupted_;
1019 
1020  mutable std::mutex str_dict_mutex_;
1021 
1022  mutable std::unique_ptr<llvm::TargetMachine> nvptx_target_machine_;
1023 
1026 
1027  static const size_t baseline_threshold{
1028  1000000}; // if a perfect hash needs more entries, use baseline
1029  static const size_t code_cache_size{1000};
1030 
1031  const unsigned block_size_x_;
1032  const unsigned grid_size_x_;
1033  const size_t max_gpu_slab_size_;
1034  const std::string debug_dir_;
1035  const std::string debug_file_;
1036 
1040 
1043 
1044  // Singleton instance used for an execution unit which is a project with window
1045  // functions.
1046  std::unique_ptr<WindowProjectNodeContext> window_project_node_context_owned_;
1047  // The active window function.
1049 
1054  // a query session that currently is running
1056  // an executor's id that executes the running query
1058  // a pair of <QuerySessionId, interrupted_flag>
1060  // a pair of <QuerySessionId, query_session_status>
1062 
1063  static std::map<int, std::shared_ptr<Executor>> executors_;
1064  static std::atomic_flag execute_spin_lock_;
1065 
1066  // SQL queries take a shared lock, exclusive options (cache clear, memory clear) take a
1067  // write lock
1069 
1071  mapd_shared_lock<mapd_shared_mutex> shared_lock;
1072  mapd_unique_lock<mapd_shared_mutex> unique_lock;
1073  };
1075  ExecutorMutexHolder ret;
1077  // Only one unitary executor can run at a time
1078  ret.unique_lock = mapd_unique_lock<mapd_shared_mutex>(execute_mutex_);
1079  } else {
1080  ret.shared_lock = mapd_shared_lock<mapd_shared_mutex>(execute_mutex_);
1081  }
1082  return ret;
1083  }
1084 
1086 
1087  // for now we use recycler_mutex only for cardinality_cache_
1088  // and will expand its coverage for more interesting caches for query excution
1090  static std::unordered_map<std::string, size_t> cardinality_cache_;
1091 
1092  public:
1093  static const int32_t ERR_DIV_BY_ZERO{1};
1094  static const int32_t ERR_OUT_OF_GPU_MEM{2};
1095  static const int32_t ERR_OUT_OF_SLOTS{3};
1096  static const int32_t ERR_UNSUPPORTED_SELF_JOIN{4};
1097  static const int32_t ERR_OUT_OF_RENDER_MEM{5};
1098  static const int32_t ERR_OUT_OF_CPU_MEM{6};
1099  static const int32_t ERR_OVERFLOW_OR_UNDERFLOW{7};
1100  static const int32_t ERR_OUT_OF_TIME{9};
1101  static const int32_t ERR_INTERRUPTED{10};
1102  static const int32_t ERR_COLUMNAR_CONVERSION_NOT_SUPPORTED{11};
1103  static const int32_t ERR_TOO_MANY_LITERALS{12};
1104  static const int32_t ERR_STRING_CONST_IN_RESULTSET{13};
1106  static const int32_t ERR_SINGLE_VALUE_FOUND_MULTIPLE_VALUES{15};
1107  static const int32_t ERR_GEOS{16};
1108 
1109  static std::mutex compilation_mutex_;
1110  static std::mutex kernel_mutex_;
1111 
1113  friend class CodeGenerator;
1114  friend class ColumnFetcher;
1115  friend class ExecutionKernel;
1116  friend class HashJoin; // cgen_state_
1118  friend class GroupByAndAggregate;
1124  friend class ResultSet;
1125  friend class InValuesBitmap;
1126  friend class LeafAggregator;
1127  friend class PerfectJoinHashTable;
1128  friend class QueryRewriter;
1130  friend class RelAlgExecutor;
1131  friend class TableOptimizer;
1135  friend struct TargetExprCodegen;
1137 };
1138 
1139 inline std::string get_null_check_suffix(const SQLTypeInfo& lhs_ti,
1140  const SQLTypeInfo& rhs_ti) {
1141  if (lhs_ti.get_notnull() && rhs_ti.get_notnull()) {
1142  return "";
1143  }
1144  std::string null_check_suffix{"_nullable"};
1145  if (lhs_ti.get_notnull()) {
1146  CHECK(!rhs_ti.get_notnull());
1147  null_check_suffix += "_rhs";
1148  } else if (rhs_ti.get_notnull()) {
1149  CHECK(!lhs_ti.get_notnull());
1150  null_check_suffix += "_lhs";
1151  }
1152  return null_check_suffix;
1153 }
1154 
1155 inline bool is_unnest(const Analyzer::Expr* expr) {
1156  return dynamic_cast<const Analyzer::UOper*>(expr) &&
1157  static_cast<const Analyzer::UOper*>(expr)->get_optype() == kUNNEST;
1158 }
1159 
1160 bool is_trivial_loop_join(const std::vector<InputTableInfo>& query_infos,
1161  const RelAlgExecutionUnit& ra_exe_unit);
1162 
1163 std::unordered_set<int> get_available_gpus(const Catalog_Namespace::Catalog& cat);
1164 
1165 size_t get_context_count(const ExecutorDeviceType device_type,
1166  const size_t cpu_count,
1167  const size_t gpu_count);
1168 
1169 extern "C" void register_buffer_with_executor_rsm(int64_t exec, int8_t* buffer);
1170 
1172 
1173 #endif // QUERYENGINE_EXECUTE_H
void read_rt_udf_gpu_module(const std::string &udf_ir)
SQLTypeInfo getColumnType(const size_t col_idx) const
CudaMgr_Namespace::CudaMgr * getCudaMgr() const
Definition: DataMgr.h:206
const std::string debug_dir_
Definition: Execute.h:1034
llvm::Value * translated_value
Definition: Execute.h:863
bool is_agg(const Analyzer::Expr *expr)
static mapd_shared_mutex executor_session_mutex_
Definition: Execute.h:1053
AggregatedColRange computeColRangesCache(const std::unordered_set< PhysicalInput > &phys_inputs)
Definition: Execute.cpp:3590
std::shared_ptr< HashJoin > buildCurrentLevelHashTable(const JoinCondition &current_level_join_conditions, RelAlgExecutionUnit &ra_exe_unit, const CompilationOptions &co, const std::vector< InputTableInfo > &query_infos, ColumnCacheMap &column_cache, std::vector< std::string > &fail_reasons)
Definition: IRCodegen.cpp:490
void enableRuntimeQueryInterrupt(const double runtime_query_check_freq, const unsigned pending_query_check_freq) const
Definition: Execute.cpp:3964
SQLAgg
Definition: sqldefs.h:71
#define CHECK_EQ(x, y)
Definition: Logger.h:205
std::vector< std::unique_ptr< ExecutionKernel > > createKernels(SharedKernelContext &shared_context, const RelAlgExecutionUnit &ra_exe_unit, ColumnFetcher &column_fetcher, const std::vector< InputTableInfo > &table_infos, const ExecutionOptions &eo, const bool is_agg, const bool allow_single_frag_table_opt, const size_t context_count, const QueryCompilationDescriptor &query_comp_desc, const QueryMemoryDescriptor &query_mem_desc, RenderInfo *render_info, std::unordered_set< int > &available_gpus, int &available_cpus)
Definition: Execute.cpp:1998
bool updateQuerySessionExecutorAssignment(const QuerySessionId &query_session, const std::chrono::time_point< std::chrono::system_clock > submitted, const size_t executor_id, mapd_unique_lock< mapd_shared_mutex > &write_lock)
Definition: Execute.cpp:3879
static QuerySessionMap queries_session_map_
Definition: Execute.h:1061
static mapd_shared_mutex execute_mutex_
Definition: Execute.h:1068
FetchResult fetchUnionChunks(const ColumnFetcher &, const RelAlgExecutionUnit &ra_exe_unit, const int device_id, const Data_Namespace::MemoryLevel, const std::map< int, const TableFragments * > &, const FragmentsList &selected_fragments, const Catalog_Namespace::Catalog &, std::list< ChunkIter > &, std::list< std::shared_ptr< Chunk_NS::Chunk >> &, DeviceAllocator *device_allocator)
Definition: Execute.cpp:2459
std::unique_ptr< llvm::TargetMachine > nvptx_target_machine_
Definition: Execute.h:1022
CurrentQueryStatus attachExecutorToQuerySession(std::shared_ptr< const query_state::QueryState > &query_state)
Definition: Execute.cpp:3686
QuerySessionStatus(const QuerySessionId &query_session, const std::string &query_str, const std::chrono::time_point< std::chrono::system_clock > submitted_time)
Definition: Execute.h:86
int64_t kernel_queue_time_ms_
Definition: Execute.h:1041
size_t maxGpuSlabSize() const
Definition: Execute.cpp:3196
ExecutorMutexHolder acquireExecuteMutex()
Definition: Execute.h:1074
int64_t compilation_queue_time_ms_
Definition: Execute.h:1042
friend class ResultSet
Definition: Execute.h:1124
std::map< const ColumnDescriptor *, std::set< int >> ColumnToFragmentsMap
Definition: Execute.h:323
std::string cat(Ts &&...args)
void invalidateRunningQuerySession(mapd_unique_lock< mapd_shared_mutex > &write_lock)
Definition: Execute.cpp:3680
void checkPendingQueryStatus(const QuerySessionId &query_session)
Definition: Execute.cpp:3713
static const int32_t ERR_INTERRUPTED
Definition: Execute.h:1101
const Analyzer::Expr * remove_cast_to_int(const Analyzer::Expr *expr)
Definition: ColumnIR.cpp:538
class for a per-database catalog. also includes metadata for the current database and the current use...
Definition: Catalog.h:101
bool addToQuerySessionList(const QuerySessionId &query_session, const std::string &query_str, const std::chrono::time_point< std::chrono::system_clock > submitted, const size_t executor_id, const QuerySessionStatus::QueryStatus query_status, mapd_unique_lock< mapd_shared_mutex > &write_lock)
Definition: Execute.cpp:3818
bool is_trivial_loop_join(const std::vector< InputTableInfo > &query_infos, const RelAlgExecutionUnit &ra_exe_unit)
Definition: Execute.cpp:1148
FetchCacheAnchor(CgenState *cgen_state)
Definition: Execute.h:998
void codegenJoinLoops(const std::vector< JoinLoop > &join_loops, const RelAlgExecutionUnit &ra_exe_unit, GroupByAndAggregate &group_by_and_aggregate, llvm::Function *query_func, llvm::BasicBlock *entry_bb, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const ExecutionOptions &eo)
Definition: IRCodegen.cpp:654
const std::chrono::time_point< std::chrono::system_clock > getQuerySubmittedTime()
Definition: Execute.h:120
const std::shared_ptr< RowSetMemoryOwner > getRowSetMemoryOwner() const
Definition: Execute.cpp:280
JoinHashTableOrError buildHashTableForQualifier(const std::shared_ptr< Analyzer::BinOper > &qual_bin_oper, const std::vector< InputTableInfo > &query_infos, const MemoryLevel memory_level, const HashType preferred_hash_type, ColumnCacheMap &column_cache, const QueryHint &query_hint)
Definition: Execute.cpp:3129
StringDictionaryProxy * getLiteralDictionary() const override
Definition: Execute.h:337
QuerySessionStatus(const QuerySessionId &query_session, const size_t executor_id, const std::string &query_str, const std::chrono::time_point< std::chrono::system_clock > submitted_time, const QuerySessionStatus::QueryStatus &query_status)
Definition: Execute.h:105
std::vector< size_t > getTableFragmentIndices(const RelAlgExecutionUnit &ra_exe_unit, const ExecutorDeviceType device_type, const size_t table_idx, const size_t outer_frag_idx, std::map< int, const TableFragments * > &selected_tables_fragments, const std::unordered_map< int, const Analyzer::BinOper * > &inner_table_id_to_join_condition)
Definition: Execute.cpp:2154
ExecutorDeviceType
void clearQuerySessionStatus(const QuerySessionId &query_session, const std::chrono::time_point< std::chrono::system_clock > submitted, bool acquire_spin_lock)
Definition: Execute.cpp:3739
ResultSetPtr executeTableFunction(const TableFunctionExecutionUnit exe_unit, const std::vector< InputTableInfo > &table_infos, const CompilationOptions &co, const ExecutionOptions &eo, const Catalog_Namespace::Catalog &cat)
Compiles and dispatches a table function; that is, a function that takes as input one or more columns...
Definition: Execute.cpp:1650
void read_rt_udf_cpu_module(const std::string &udf_ir)
Fragmenter_Namespace::RowDataProvider RowDataProvider
Definition: Execute.h:322
Data_Namespace::DataMgr & getDataMgr() const
Definition: Catalog.h:222
std::unordered_set< int > get_available_gpus(const Catalog_Namespace::Catalog &cat)
Definition: Execute.cpp:1028
static const int max_gpu_count
Definition: Execute.h:1012
std::map< const QuerySessionId, std::map< std::string, QuerySessionStatus >> QuerySessionMap
Definition: Execute.h:145
size_t const getFragmentIndex() const
static const size_t code_cache_size
Definition: Execute.h:1029
void setQueryStatusAsRunning()
Definition: Execute.h:128
bool isArchPascalOrLater(const ExecutorDeviceType dt) const
Definition: Execute.h:527
std::ostream & operator<<(std::ostream &os, const SessionInfo &session_info)
Definition: SessionInfo.cpp:53
static std::atomic_flag execute_spin_lock_
Definition: Execute.h:1064
bool is_fp() const
Definition: sqltypes.h:482
std::pair< QuerySessionId, std::string > CurrentQueryStatus
Definition: Execute.h:79
static mapd_shared_mutex executors_cache_mutex_
Definition: Execute.h:1085
bool is_udf_module_present(bool cpu_only=false)
std::vector< ColumnLazyFetchInfo > getColLazyFetchInfo(const std::vector< Analyzer::Expr * > &target_exprs) const
Definition: Execute.cpp:330
static const size_t baseline_threshold
Definition: Execute.h:1027
void registerActiveModule(void *module, const int device_id) const
std::tuple< RelAlgExecutionUnit, PlanState::DeletedColumnsMap > addDeletedColumn(const RelAlgExecutionUnit &ra_exe_unit, const CompilationOptions &co)
Definition: Execute.cpp:3279
TableGenerations computeTableGenerations(std::unordered_set< int > phys_table_ids)
Definition: Execute.cpp:3637
const SQLTypeInfo get_column_type(const int col_id, const int table_id, const ColumnDescriptor *cd, const TemporaryTables *temporary_tables)
Definition: Execute.h:232
llvm::Value * aggregateWindowStatePtr()
void read_udf_cpu_module(const std::string &udf_ir_filename)
Driver for running cleanup processes on a table. TableOptimizer provides functions for various cleanu...
SQLTypeInfo get_logical_type_info(const SQLTypeInfo &type_info)
Definition: sqltypes.h:893
static std::pair< int64_t, int32_t > reduceResults(const SQLAgg agg, const SQLTypeInfo &ti, const int64_t agg_init_val, const int8_t out_byte_width, const int64_t *out_vec, const size_t out_vec_sz, const bool is_group_by, const bool float_argument_input)
Definition: Execute.cpp:656
void read_udf_gpu_module(const std::string &udf_ir_filename)
Definition: sqldefs.h:49
std::vector< std::string > CodeCacheKey
Definition: CodeCache.h:25
const Analyzer::Expr * extract_cast_arg(const Analyzer::Expr *expr)
Definition: Execute.h:197
bool checkCurrentQuerySession(const std::string &candidate_query_session, mapd_shared_lock< mapd_shared_mutex > &read_lock)
Definition: Execute.cpp:3672
const ColumnarResults * rows_to_columnar_results(std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner, const PtrTy &result, const int number)
Definition: Execute.h:247
const QuerySessionStatus::QueryStatus getQueryStatus()
Definition: Execute.h:123
static const int32_t ERR_GEOS
Definition: Execute.h:1107
const std::string query_str_
Definition: Execute.h:135
static std::shared_ptr< Executor > getExecutor(const ExecutorId id, const std::string &debug_dir="", const std::string &debug_file="", const SystemParameters system_parameters=SystemParameters())
Definition: Execute.cpp:157
AggregatedColRange agg_col_range_cache_
Definition: Execute.h:1051
std::shared_ptr< ResultSet > ResultSetPtr
static void * gpu_active_modules_[max_gpu_count]
Definition: Execute.h:1017
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:994
FetchResult fetchChunks(const ColumnFetcher &, const RelAlgExecutionUnit &ra_exe_unit, const int device_id, const Data_Namespace::MemoryLevel, const std::map< int, const TableFragments * > &, const FragmentsList &selected_fragments, const Catalog_Namespace::Catalog &, std::list< ChunkIter > &, std::list< std::shared_ptr< Chunk_NS::Chunk >> &, DeviceAllocator *device_allocator)
Definition: Execute.cpp:2360
static const int32_t ERR_TOO_MANY_LITERALS
Definition: Execute.h:1103
llvm::Value * original_value
Definition: Execute.h:864
ParseIRError(const std::string message)
Definition: Execute.h:310
std::vector< Analyzer::Expr * > get_exprs_not_owned(const std::vector< std::shared_ptr< Analyzer::Expr >> &exprs)
Definition: Execute.h:259
static uint32_t gpu_active_modules_device_mask_
Definition: Execute.h:1016
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:311
void buildSelectedFragsMappingForUnion(std::vector< std::vector< size_t >> &selected_fragments_crossjoin, std::vector< size_t > &local_col_to_frag_pos, const std::list< std::shared_ptr< const InputColDescriptor >> &col_global_ids, const FragmentsList &selected_fragments, const RelAlgExecutionUnit &ra_exe_unit)
Definition: Execute.cpp:2654
int deviceCount(const ExecutorDeviceType) const
Definition: Execute.cpp:639
llvm::Value * castToIntPtrTyIn(llvm::Value *val, const size_t bit_width)
Definition: Execute.cpp:3235
size_t getNumBytesForFetchedRow(const std::set< int > &table_ids_to_fetch) const
Definition: Execute.cpp:300
static std::mutex kernel_mutex_
Definition: Execute.h:1110
unsigned numBlocksPerMP() const
Definition: Execute.cpp:3178
#define CHECK_GT(x, y)
Definition: Logger.h:209
Container for compilation results and assorted options for a single execution unit.
bool isCPUOnly() const
Definition: Execute.cpp:250
std::unique_ptr< WindowProjectNodeContext > window_project_node_context_owned_
Definition: Execute.h:1046
std::vector< QuerySessionStatus > getQuerySessionInfo(const QuerySessionId &query_session, mapd_shared_lock< mapd_shared_mutex > &read_lock)
Definition: Execute.cpp:3998
std::vector< FragmentsPerTable > FragmentsList
bool is_time() const
Definition: sqltypes.h:484
bool needFetchAllFragments(const InputColDescriptor &col_desc, const RelAlgExecutionUnit &ra_exe_unit, const FragmentsList &selected_fragments) const
Definition: Execute.cpp:2335
const QuerySessionId query_session_
Definition: Execute.h:133
std::shared_ptr< HashJoin > hash_table
Definition: Execute.h:829
std::string to_string(char const *&&v)
mapd_shared_mutex & getSessionLock()
Definition: Execute.cpp:3659
#define LOG_IF(severity, condition)
Definition: Logger.h:287
static void clearMemory(const Data_Namespace::MemoryLevel memory_level)
Definition: Execute.cpp:179
Fragmenter_Namespace::TableInfo getTableInfo(const int table_id) const
Definition: Execute.cpp:288
std::shared_ptr< CompilationContext > getCodeFromCache(const CodeCacheKey &, const CodeCache &)
static const size_t high_scan_limit
Definition: Execute.h:463
CodeCache gpu_code_cache_
Definition: Execute.h:1025
static const int32_t ERR_STRING_CONST_IN_RESULTSET
Definition: Execute.h:1104
void codegenWindowAvgEpilogue(llvm::Value *crt_val, llvm::Value *window_func_null_val, llvm::Value *multiplicity_lv)
int32_t executePlanWithGroupBy(const RelAlgExecutionUnit &ra_exe_unit, const CompilationResult &, const bool hoist_literals, ResultSetPtr &results, const ExecutorDeviceType device_type, std::vector< std::vector< const int8_t * >> &col_buffers, const std::vector< size_t > outer_tab_frag_ids, QueryExecutionContext *, const std::vector< std::vector< int64_t >> &num_rows, const std::vector< std::vector< uint64_t >> &frag_offsets, Data_Namespace::DataMgr *, const int device_id, const int outer_table_id, const int64_t limit, const uint32_t start_rowid, const uint32_t num_tables, RenderInfo *render_info)
Definition: Execute.cpp:2908
std::vector< int64_t > getJoinHashTablePtrs(const ExecutorDeviceType device_type, const int device_id)
Definition: Execute.cpp:3074
void preloadFragOffsets(const std::vector< InputDescriptor > &input_descs, const std::vector< InputTableInfo > &query_infos)
Definition: Execute.cpp:3108
QuerySessionId & getCurrentQuerySession(mapd_shared_lock< mapd_shared_mutex > &read_lock)
Definition: Execute.cpp:3663
bool isFragmentFullyDeleted(const int table_id, const Fragmenter_Namespace::FragmentInfo &fragment)
Definition: Execute.cpp:3358
void insertErrorCodeChecker(llvm::Function *query_func, bool hoist_literals)
bool checkIsQuerySessionEnrolled(const QuerySessionId &query_session, mapd_shared_lock< mapd_shared_mutex > &read_lock)
Definition: Execute.cpp:3958
void setQueryStatus(const QuerySessionStatus::QueryStatus &status)
Definition: Execute.h:124
std::unordered_map< int, const ResultSetPtr & > TemporaryTables
Definition: InputMetadata.h:31
static const int32_t ERR_STREAMING_TOP_N_NOT_SUPPORTED_IN_RENDER_QUERY
Definition: Execute.h:1105
const ExecutorId executor_id_
Definition: Execute.h:1037
static const int32_t ERR_COLUMNAR_CONVERSION_NOT_SUPPORTED
Definition: Execute.h:1102
const ResultSetPtr & get_temporary_table(const TemporaryTables *temporary_tables, const int table_id)
Definition: Execute.h:224
int8_t warpSize() const
Definition: Execute.cpp:3160
std::map< QuerySessionId, bool > InterruptFlagMap
Definition: Execute.h:80
const size_t max_gpu_slab_size_
Definition: Execute.h:1033
ResultSetPtr reduceSpeculativeTopN(const RelAlgExecutionUnit &, std::vector< std::pair< ResultSetPtr, std::vector< size_t >>> &all_fragment_results, std::shared_ptr< RowSetMemoryOwner >, const QueryMemoryDescriptor &) const
Definition: Execute.cpp:1001
ResultSetPtr collectAllDeviceResults(SharedKernelContext &shared_context, const RelAlgExecutionUnit &ra_exe_unit, const QueryMemoryDescriptor &query_mem_desc, const ExecutorDeviceType device_type, std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner)
Definition: Execute.cpp:1807
const ColumnDescriptor * getPhysicalColumnDescriptor(const Analyzer::ColumnVar *, int) const
Definition: Execute.cpp:261
bool compileBody(const RelAlgExecutionUnit &ra_exe_unit, GroupByAndAggregate &group_by_and_aggregate, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const GpuSharedMemoryContext &gpu_smem_context={})
static const int32_t ERR_DIV_BY_ZERO
Definition: Execute.h:1093
std::tuple< CompilationResult, std::unique_ptr< QueryMemoryDescriptor > > compileWorkUnit(const std::vector< InputTableInfo > &query_infos, const PlanState::DeletedColumnsMap &deleted_cols_map, const RelAlgExecutionUnit &ra_exe_unit, const CompilationOptions &co, const ExecutionOptions &eo, const CudaMgr_Namespace::CudaMgr *cuda_mgr, const bool allow_lazy_fetch, std::shared_ptr< RowSetMemoryOwner >, const size_t max_groups_buffer_entry_count, const int8_t crt_min_byte_width, const bool has_cardinality_estimation, ColumnCacheMap &column_cache, RenderInfo *render_info=nullptr)
llvm::Value * get_arg_by_name(llvm::Function *func, const std::string &name)
Definition: Execute.h:162
std::shared_ptr< CompilationContext > optimizeAndCodegenGPU(llvm::Function *, llvm::Function *, std::unordered_set< llvm::Function * > &, const bool no_inline, const CudaMgr_Namespace::CudaMgr *cuda_mgr, const CompilationOptions &)
std::vector< llvm::Value * > LLVMValueVector
Definition: Execute.h:363
size_t get_context_count(const ExecutorDeviceType device_type, const size_t cpu_count, const size_t gpu_count)
Definition: Execute.cpp:1040
const ColumnDescriptor * get_column_descriptor_maybe(const int col_id, const int table_id, const Catalog_Namespace::Catalog &cat)
Definition: Execute.h:216
std::vector< TargetValue > getEntryAt(const size_t index) const override
int get_logical_size() const
Definition: sqltypes.h:322
std::unordered_map< int, std::vector< llvm::Value * > > fetch_cache_
Definition: CgenState.h:330
decltype(FragmentInfoType::fragmentId) const getFragmentId() const
Definition: Execute.h:346
int64_t deviceCycles(int milliseconds) const
Definition: Execute.cpp:3200
std::string generatePTX(const std::string &) const
std::mutex str_dict_mutex_
Definition: Execute.h:1020
bool is_integer() const
Definition: sqltypes.h:480
const Catalog_Namespace::Catalog * catalog_
Definition: Execute.h:1038
#define INJECT_TIMER(DESC)
Definition: measure.h:93
friend class PendingExecutionClosure
Definition: Execute.h:1129
void setQuerySessionAsInterrupted(const QuerySessionId &query_session, mapd_unique_lock< mapd_shared_mutex > &write_lock)
Definition: Execute.cpp:3939
static const int32_t ERR_OUT_OF_RENDER_MEM
Definition: Execute.h:1097
std::unordered_map< TableId, const ColumnDescriptor * > DeletedColumnsMap
Definition: PlanState.h:44
ResultSetPtr reduceMultiDeviceResults(const RelAlgExecutionUnit &, std::vector< std::pair< ResultSetPtr, std::vector< size_t >>> &all_fragment_results, std::shared_ptr< RowSetMemoryOwner >, const QueryMemoryDescriptor &) const
Definition: Execute.cpp:894
std::shared_timed_mutex mapd_shared_mutex
const std::string debug_file_
Definition: Execute.h:1035
ResultSetPtr collectAllDeviceShardedTopResults(SharedKernelContext &shared_context, const RelAlgExecutionUnit &ra_exe_unit) const
Definition: Execute.cpp:1922
CachedCardinality getCachedCardinality(const std::string &cache_key)
Definition: Execute.cpp:3988
decltype(FragmentInfoType::physicalTableId) const getPhysicalTableId() const
Definition: Execute.h:343
std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner_
Definition: Execute.h:1010
Used by Fragmenter classes to store info about each fragment - the fragment id and number of tuples(r...
Definition: Fragmenter.h:78
void setColRangeCache(const AggregatedColRange &aggregated_col_range)
Definition: Execute.h:911
bool containsLeftDeepOuterJoin() const
Definition: Execute.h:426
void setCatalog(const Catalog_Namespace::Catalog *catalog)
Definition: Execute.cpp:276
StringDictionaryProxy * getStringDictionaryProxy(const int dict_id, const bool with_generation) const
Definition: Execute.h:411
std::shared_ptr< CompilationContext > optimizeAndCodegenCPU(llvm::Function *, llvm::Function *, const std::unordered_set< llvm::Function * > &, const CompilationOptions &)
static const int32_t ERR_OVERFLOW_OR_UNDERFLOW
Definition: Execute.h:1099
bool is_timeinterval() const
Definition: sqltypes.h:489
static std::unordered_map< std::string, size_t > cardinality_cache_
Definition: Execute.h:1090
static InterruptFlagMap queries_interrupt_flag_
Definition: Execute.h:1059
FragmentInfoType const & getFragmentInfo() const
std::unique_ptr< PlanState > plan_state_
Definition: Execute.h:1009
const ColumnDescriptor * getMetadataForColumn(int tableId, const std::string &colName) const
bool updateQuerySessionStatusWithLock(const QuerySessionId &query_session, const std::chrono::time_point< std::chrono::system_clock > submitted, const QuerySessionStatus::QueryStatus updated_query_status, mapd_unique_lock< mapd_shared_mutex > &write_lock)
Definition: Execute.cpp:3851
static const int32_t ERR_OUT_OF_TIME
Definition: Execute.h:1100
void initializeNVPTXBackend() const
mapd_unique_lock< mapd_shared_mutex > unique_lock
Definition: Execute.h:1072
bool removeFromQuerySessionList(const QuerySessionId &query_session, const std::chrono::time_point< std::chrono::system_clock > submitted, mapd_unique_lock< mapd_shared_mutex > &write_lock)
Definition: Execute.cpp:3903
const TableGeneration & getTableGeneration(const int table_id) const
Definition: Execute.cpp:292
llvm::Value * castToFP(llvm::Value *, SQLTypeInfo const &from_ti, SQLTypeInfo const &to_ti)
Definition: Execute.cpp:3208
size_t fragment_index_
Definition: Execute.h:359
std::pair< bool, size_t > CachedCardinality
Definition: Execute.h:974
void interrupt(const std::string &query_session="", const std::string &interrupt_session="")
const ColumnDescriptor * getColumnDescriptor(const Analyzer::ColumnVar *) const
Definition: Execute.cpp:255
static const int32_t ERR_UNSUPPORTED_SELF_JOIN
Definition: Execute.h:1096
const unsigned block_size_x_
Definition: Execute.h:1031
const unsigned grid_size_x_
Definition: Execute.h:1032
specifies the content in-memory of a row in the column metadata table
bool is_boolean() const
Definition: sqltypes.h:485
std::vector< TargetValue > getTranslatedEntryAt(const size_t index) const override
static const int32_t ERR_SINGLE_VALUE_FOUND_MULTIPLE_VALUES
Definition: Execute.h:1106
static std::map< int, std::shared_ptr< Executor > > executors_
Definition: Execute.h:1063
static const int32_t ERR_OUT_OF_GPU_MEM
Definition: Execute.h:1094
const TemporaryTables * getTemporaryTables()
Definition: Execute.h:406
std::string get_null_check_suffix(const SQLTypeInfo &lhs_ti, const SQLTypeInfo &rhs_ti)
Definition: Execute.h:1139
void executeWorkUnitPerFragment(const RelAlgExecutionUnit &ra_exe_unit, const InputTableInfo &table_info, const CompilationOptions &co, const ExecutionOptions &eo, const Catalog_Namespace::Catalog &cat, PerFragmentCallBack &cb, const std::set< int > &fragment_ids)
Compiles and dispatches a work unit per fragment processing results with the per fragment callback...
Definition: Execute.cpp:1568
static void addCodeToCache(const CodeCacheKey &, std::shared_ptr< CompilationContext >, llvm::Module *, CodeCache &)
const Catalog_Namespace::Catalog * getCatalog() const
Definition: Execute.cpp:272
QuerySessionStatus(const QuerySessionId &query_session, const size_t executor_id, const std::string &query_str, const std::chrono::time_point< std::chrono::system_clock > submitted_time)
Definition: Execute.h:95
std::unordered_map< int, std::unordered_map< int, std::shared_ptr< const ColumnarResults >>> ColumnCacheMap
int32_t executePlanWithoutGroupBy(const RelAlgExecutionUnit &ra_exe_unit, const CompilationResult &, const bool hoist_literals, ResultSetPtr &results, const std::vector< Analyzer::Expr * > &target_exprs, const ExecutorDeviceType device_type, std::vector< std::vector< const int8_t * >> &col_buffers, QueryExecutionContext *query_exe_context, const std::vector< std::vector< int64_t >> &num_rows, const std::vector< std::vector< uint64_t >> &frag_offsets, Data_Namespace::DataMgr *data_mgr, const int device_id, const uint32_t start_rowid, const uint32_t num_tables, RenderInfo *render_info)
Definition: Execute.cpp:2707
void enrollQuerySession(const QuerySessionId &query_session, const std::string &query_str, const std::chrono::time_point< std::chrono::system_clock > submitted, const size_t executor_id, const QuerySessionStatus::QueryStatus query_session_status)
Definition: Execute.cpp:3799
#define CHECK_LT(x, y)
Definition: Logger.h:207
ResultSetPtr resultsUnion(SharedKernelContext &shared_context, const RelAlgExecutionUnit &ra_exe_unit)
Definition: Execute.cpp:866
static QuerySessionId current_query_session_
Definition: Execute.h:1055
std::shared_ptr< ResultSet > rs_
Definition: Execute.h:360
std::vector< size_t > getFragmentCount(const FragmentsList &selected_fragments, const size_t scan_idx, const RelAlgExecutionUnit &ra_exe_unit)
Definition: Execute.cpp:2609
llvm::BasicBlock * codegenSkipDeletedOuterTableRow(const RelAlgExecutionUnit &ra_exe_unit, const CompilationOptions &co)
std::unique_ptr< QueryCompilationDescriptor > QueryCompilationDescriptorOwned
Definition: Execute.h:75
size_t ExecutorId
Definition: Execute.h:376
HOST DEVICE EncodingType get_compression() const
Definition: sqltypes.h:319
const ColumnarResults * columnarize_result(std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner, const ResultSetPtr &result, const int frag_id)
Definition: Execute.h:268
std::vector< int8_t > serializeLiterals(const std::unordered_map< int, CgenState::LiteralValues > &literals, const int device_id)
Definition: Execute.cpp:377
InputTableInfoCache input_table_info_cache_
Definition: Execute.h:1050
void codegenWindowFunctionStateInit(llvm::Value *aggregate_state)
void launchKernels(SharedKernelContext &shared_context, std::vector< std::unique_ptr< ExecutionKernel >> &&kernels)
CodeCache cpu_code_cache_
Definition: Execute.h:1024
bool checkIsQuerySessionInterrupted(const QuerySessionId &query_session, mapd_shared_lock< mapd_shared_mutex > &read_lock)
Definition: Execute.cpp:3950
std::pair< bool, int64_t > skipFragment(const InputDescriptor &table_desc, const Fragmenter_Namespace::FragmentInfo &frag_info, const std::list< std::shared_ptr< Analyzer::Expr >> &simple_quals, const std::vector< uint64_t > &frag_offsets, const size_t frag_idx)
Definition: Execute.cpp:3391
unsigned gridSize() const
Definition: Execute.cpp:3169
llvm::Value * spillDoubleElement(llvm::Value *elem_val, llvm::Type *elem_ty)
std::pair< std::vector< std::vector< int64_t > >, std::vector< std::vector< uint64_t > > > getRowCountAndOffsetForAllFrags(const RelAlgExecutionUnit &ra_exe_unit, const CartesianProduct< std::vector< std::vector< size_t >>> &frag_ids_crossjoin, const std::vector< InputDescriptor > &input_descs, const std::map< int, const TableFragments * > &all_tables_fragments)
Definition: Execute.cpp:2286
StringDictionaryGenerations computeStringDictionaryGenerations(const std::unordered_set< PhysicalInput > &phys_inputs)
Definition: Execute.cpp:3616
void updateQuerySessionStatus(std::shared_ptr< const query_state::QueryState > &query_state, const QuerySessionStatus::QueryStatus new_query_status)
Definition: Execute.cpp:3761
void setExecutorId(const size_t executor_id)
Definition: Execute.h:127
FragmentInfoType const & fragment_info_
Definition: Execute.h:358
void nukeOldState(const bool allow_lazy_fetch, const std::vector< InputTableInfo > &query_infos, const PlanState::DeletedColumnsMap &deleted_cols_map, const RelAlgExecutionUnit *ra_exe_unit)
Definition: Execute.cpp:3089
const std::string getQueryStr()
Definition: Execute.h:118
std::pair< bool, int64_t > skipFragmentInnerJoins(const InputDescriptor &table_desc, const RelAlgExecutionUnit &ra_exe_unit, const Fragmenter_Namespace::FragmentInfo &fragment, const std::vector< uint64_t > &frag_offsets, const size_t frag_idx)
Definition: Execute.cpp:3557
void buildSelectedFragsMapping(std::vector< std::vector< size_t >> &selected_fragments_crossjoin, std::vector< size_t > &local_col_to_frag_pos, const std::list< std::shared_ptr< const InputColDescriptor >> &col_global_ids, const FragmentsList &selected_fragments, const RelAlgExecutionUnit &ra_exe_unit)
Definition: Execute.cpp:2623
llvm::Value * codegenAggregateWindowState()
TableGenerations table_generations_
Definition: Execute.h:1052
void unregisterActiveModule(void *module, const int device_id) const
std::function< void(ResultSetPtr, const Fragmenter_Namespace::FragmentInfo &)> PerFragmentCallBack
Definition: Execute.h:542
void resetInterrupt()
mapd_shared_lock< mapd_shared_mutex > read_lock
const size_t getExecutorId()
Definition: Execute.h:119
size_t executor_id_
Definition: Execute.h:134
size_t getRunningExecutorId(mapd_shared_lock< mapd_shared_mutex > &read_lock)
Definition: Execute.cpp:3668
std::string QuerySessionId
Definition: Execute.h:78
llvm::Value * addJoinLoopIterator(const std::vector< llvm::Value * > &prev_iters, const size_t level_idx)
Definition: IRCodegen.cpp:636
ResultSetPtr reduceMultiDeviceResultSets(std::vector< std::pair< ResultSetPtr, std::vector< size_t >>> &all_fragment_results, std::shared_ptr< RowSetMemoryOwner >, const QueryMemoryDescriptor &) const
Definition: Execute.cpp:941
llvm::Value * codegenWindowFunctionAggregate(const CompilationOptions &co)
void addToCardinalityCache(const std::string &cache_key, const size_t cache_value)
Definition: Execute.cpp:3979
static std::atomic< bool > interrupted_
Definition: Execute.h:1018
#define CHECK(condition)
Definition: Logger.h:197
static const int32_t ERR_OUT_OF_SLOTS
Definition: Execute.h:1095
static std::mutex compilation_mutex_
Definition: Execute.h:1109
std::vector< llvm::Value * > inlineHoistedLiterals()
mapd_shared_lock< mapd_shared_mutex > shared_lock
Definition: Execute.h:1071
static size_t running_query_executor_id_
Definition: Execute.h:1057
ResultSetPtr executeWorkUnitImpl(size_t &max_groups_buffer_entry_guess, const bool is_agg, const bool allow_single_frag_table_opt, const std::vector< InputTableInfo > &, const RelAlgExecutionUnit &, const CompilationOptions &, const ExecutionOptions &options, const Catalog_Namespace::Catalog &, std::shared_ptr< RowSetMemoryOwner >, RenderInfo *render_info, const bool has_cardinality_estimation, ColumnCacheMap &column_cache)
Definition: Execute.cpp:1411
std::unordered_map< int, const Analyzer::BinOper * > getInnerTabIdToJoinCond() const
Definition: Execute.cpp:1972
ExecutorDeviceType getDeviceTypeForTargets(const RelAlgExecutionUnit &ra_exe_unit, const ExecutorDeviceType requested_device_type)
Definition: Execute.cpp:1675
Executor(const ExecutorId id, const size_t block_size_x, const size_t grid_size_x, const size_t max_gpu_slab_size, const std::string &debug_dir, const std::string &debug_file)
Definition: Execute.cpp:138
GroupColLLVMValue groupByColumnCodegen(Analyzer::Expr *group_by_col, const size_t col_width, const CompilationOptions &, const bool translate_null_val, const int64_t translated_null_val, GroupByAndAggregate::DiamondCodegen &, std::stack< llvm::BasicBlock * > &, const bool thread_mem_shared)
Definition: IRCodegen.cpp:704
uint32_t log2_bytes(const uint32_t bytes)
Definition: Execute.h:172
ExpressionRange getColRange(const PhysicalInput &) const
Definition: Execute.cpp:296
std::string numeric_type_name(const SQLTypeInfo &ti)
Definition: Execute.h:205
mapd_unique_lock< mapd_shared_mutex > write_lock
void redeclareFilterFunction()
Definition: IRCodegen.cpp:538
SQLTypeInfo columnType
UpdateLogForFragment(FragmentInfoType const &fragment_info, size_t const, const std::shared_ptr< ResultSet > &rs)
bool is_unnest(const Analyzer::Expr *expr)
Definition: Execute.h:1155
bool is_string() const
Definition: sqltypes.h:478
std::vector< JoinLoop > buildJoinLoops(RelAlgExecutionUnit &ra_exe_unit, const CompilationOptions &co, const ExecutionOptions &eo, const std::vector< InputTableInfo > &query_infos, ColumnCacheMap &column_cache)
Definition: IRCodegen.cpp:260
std::function< llvm::Value *(const std::vector< llvm::Value * > &, llvm::Value *)> buildIsDeletedCb(const RelAlgExecutionUnit &ra_exe_unit, const size_t level_idx, const CompilationOptions &co)
Definition: IRCodegen.cpp:431
auto getResultSet() const
Definition: Execute.h:355
unsigned blockSize() const
Definition: Execute.cpp:3186
string name
Definition: setup.py:44
Execution unit for relational algebra. It&#39;s a low-level description of any relational algebra operati...
HOST DEVICE bool get_notnull() const
Definition: sqltypes.h:318
std::unordered_map< int, std::vector< llvm::Value * > > saved_fetch_cache
Definition: Execute.h:1004
static size_t align(const size_t off_in, const size_t alignment)
Definition: Execute.h:986
std::unique_ptr< QueryMemoryDescriptor > QueryMemoryDescriptorOwned
Definition: Execute.h:77
size_t const getRowCount() const override
const std::chrono::time_point< std::chrono::system_clock > submitted_time_
Definition: Execute.h:136
const QuerySessionId getQuerySession()
Definition: Execute.h:117
static const int32_t ERR_OUT_OF_CPU_MEM
Definition: Execute.h:1098
QuerySessionStatus::QueryStatus query_status_
Definition: Execute.h:142
bool is_decimal() const
Definition: sqltypes.h:481
int deviceCountForMemoryLevel(const Data_Namespace::MemoryLevel memory_level) const
Definition: Execute.cpp:649
Descriptor for the fragments required for an execution kernel.
llvm::Value * codegenWindowFunctionAggregateCalls(llvm::Value *aggregate_state, const CompilationOptions &co)
Fragmenter_Namespace::FragmentInfo FragmentInfoType
Definition: Execute.h:327
bool is_rt_udf_module_present(bool cpu_only=false)
static size_t getArenaBlockSize()
Definition: Execute.cpp:204
ResultSetPtr executeWorkUnit(size_t &max_groups_buffer_entry_guess, const bool is_agg, const std::vector< InputTableInfo > &, const RelAlgExecutionUnit &, const CompilationOptions &, const ExecutionOptions &options, const Catalog_Namespace::Catalog &, RenderInfo *render_info, const bool has_cardinality_estimation, ColumnCacheMap &column_cache)
Definition: Execute.cpp:1344
std::mutex gpu_exec_mutex_[max_gpu_count]
Definition: Execute.h:1013
HashType
Definition: HashTable.h:19
DEVICE void swap(ARGS &&...args)
Definition: gpu_enabled.h:114
static mapd_shared_mutex recycler_mutex_
Definition: Execute.h:1089
void register_buffer_with_executor_rsm(int64_t exec, int8_t *buffer)
llvm::Value * codegenWindowFunction(const size_t target_index, const CompilationOptions &co)
SQLOps get_optype() const
Definition: Analyzer.h:370
WindowFunctionContext * active_window_function_
Definition: Execute.h:1048
static std::mutex gpu_active_modules_mutex_
Definition: Execute.h:1015
void setupCaching(const std::unordered_set< PhysicalInput > &phys_inputs, const std::unordered_set< int > &phys_table_ids)
Definition: Execute.cpp:3649
static void nukeCacheOfExecutors()
Definition: Execute.h:392
void clearMetaInfoCache()
Definition: Execute.cpp:371
std::function< void(const UpdateLogForFragment &, ColumnToFragmentsMap &)> Callback
Definition: Execute.h:353
const ColumnDescriptor * get_column_descriptor(const int col_id, const int table_id, const Catalog_Namespace::Catalog &cat)
Definition: Execute.h:187
void createErrorCheckControlFlow(llvm::Function *query_func, bool run_with_dynamic_watchdog, bool run_with_allowing_runtime_interrupt, ExecutorDeviceType device_type, const std::vector< InputTableInfo > &input_table_infos)
size_t const getEntryCount() const override
llvm::BasicBlock * codegenWindowResetStateControlFlow()
const TemporaryTables * temporary_tables_
Definition: Execute.h:1039
void executeUpdate(const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &table_infos, const CompilationOptions &co, const ExecutionOptions &eo, const Catalog_Namespace::Catalog &cat, std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner, const UpdateLogForFragment::Callback &cb, const bool is_agg)
CompilationRetryNewScanLimit(const size_t new_scan_limit)
Definition: Execute.h:285
WatchdogException(const std::string &cause)
Definition: Execute.h:157
static const ExecutorId UNITARY_EXECUTOR_ID
Definition: Execute.h:377
bool isArchMaxwell(const ExecutorDeviceType dt) const
bool skipFragmentPair(const Fragmenter_Namespace::FragmentInfo &outer_fragment_info, const Fragmenter_Namespace::FragmentInfo &inner_fragment_info, const int inner_table_id, const std::unordered_map< int, const Analyzer::BinOper * > &inner_table_id_to_join_condition, const RelAlgExecutionUnit &ra_exe_unit, const ExecutorDeviceType device_type)
Definition: Execute.cpp:2196
ResultSetPtr executeExplain(const QueryCompilationDescriptor &)
Definition: Execute.cpp:1671