OmniSciDB  95562058bd
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
ExecutionKernel Class Reference

#include <ExecutionKernel.h>

+ Collaboration diagram for ExecutionKernel:

Public Member Functions

 ExecutionKernel (const RelAlgExecutionUnit &ra_exe_unit, const ExecutorDeviceType chosen_device_type, int chosen_device_id, const ExecutionOptions &eo, const ColumnFetcher &column_fetcher, const QueryCompilationDescriptor &query_comp_desc, const QueryMemoryDescriptor &query_mem_desc, const FragmentsList &frag_list, const ExecutorDispatchMode kernel_dispatch_mode, RenderInfo *render_info, const int64_t rowid_lookup_key)
 
void run (Executor *executor, SharedKernelContext &shared_context)
 

Private Member Functions

void runImpl (Executor *executor, SharedKernelContext &shared_context)
 

Private Attributes

const RelAlgExecutionUnitra_exe_unit_
 
const ExecutorDeviceType chosen_device_type
 
int chosen_device_id
 
const ExecutionOptionseo
 
const ColumnFetchercolumn_fetcher
 
const QueryCompilationDescriptorquery_comp_desc
 
const QueryMemoryDescriptorquery_mem_desc
 
const FragmentsList frag_list
 
const ExecutorDispatchMode kernel_dispatch_mode
 
RenderInforender_info_
 
const int64_t rowid_lookup_key
 
ResultSetPtr device_results_
 

Detailed Description

Definition at line 48 of file ExecutionKernel.h.

Constructor & Destructor Documentation

ExecutionKernel::ExecutionKernel ( const RelAlgExecutionUnit ra_exe_unit,
const ExecutorDeviceType  chosen_device_type,
int  chosen_device_id,
const ExecutionOptions eo,
const ColumnFetcher column_fetcher,
const QueryCompilationDescriptor query_comp_desc,
const QueryMemoryDescriptor query_mem_desc,
const FragmentsList frag_list,
const ExecutorDispatchMode  kernel_dispatch_mode,
RenderInfo render_info,
const int64_t  rowid_lookup_key 
)
inline

Definition at line 50 of file ExecutionKernel.h.

61  : ra_exe_unit_(ra_exe_unit)
64  , eo(eo)
65  , column_fetcher(column_fetcher)
66  , query_comp_desc(query_comp_desc)
67  , query_mem_desc(query_mem_desc)
70  , render_info_(render_info)
const ExecutionOptions & eo
const ExecutorDispatchMode kernel_dispatch_mode
const RelAlgExecutionUnit & ra_exe_unit_
const int64_t rowid_lookup_key
const ExecutorDeviceType chosen_device_type
RenderInfo * render_info_
const QueryMemoryDescriptor & query_mem_desc
const QueryCompilationDescriptor & query_comp_desc
const FragmentsList frag_list
const ColumnFetcher & column_fetcher

Member Function Documentation

void ExecutionKernel::run ( Executor executor,
SharedKernelContext shared_context 
)

Definition at line 90 of file ExecutionKernel.cpp.

References DEBUG_TIMER, Executor::ERR_COLUMNAR_CONVERSION_NOT_SUPPORTED, Executor::ERR_OUT_OF_CPU_MEM, Executor::ERR_OUT_OF_GPU_MEM, Executor::ERR_OUT_OF_RENDER_MEM, Executor::ERR_STRING_CONST_IN_RESULTSET, Executor::ERR_TOO_MANY_LITERALS, QueryMemoryDescriptor::getQueryDescriptionType(), INJECT_TIMER, kernel_dispatch_mode, MultifragmentKernel, query_mem_desc, runImpl(), and OutOfHostMemory::what().

Referenced by Executor::executeUpdate(), Executor::executeWorkUnitPerFragment(), and Executor::launchKernels().

90  {
91  DEBUG_TIMER("ExecutionKernel::run");
92  INJECT_TIMER(kernel_run);
93  try {
94  runImpl(executor, shared_context);
95  } catch (const OutOfHostMemory& e) {
97  } catch (const std::bad_alloc& e) {
99  } catch (const OutOfRenderMemory& e) {
101  } catch (const OutOfMemory& e) {
102  throw QueryExecutionError(
104  e.what(),
108  } catch (const ColumnarConversionNotSupported& e) {
110  } catch (const TooManyLiterals& e) {
112  } catch (const SringConstInResultSet& e) {
114  } catch (const QueryExecutionError& e) {
115  throw e;
116  }
117 }
virtual const char * what() const noexceptfinal
Definition: checked_alloc.h:38
const ExecutorDispatchMode kernel_dispatch_mode
void runImpl(Executor *executor, SharedKernelContext &shared_context)
static const int32_t ERR_TOO_MANY_LITERALS
Definition: Execute.h:991
static const int32_t ERR_STRING_CONST_IN_RESULTSET
Definition: Execute.h:992
static const int32_t ERR_COLUMNAR_CONVERSION_NOT_SUPPORTED
Definition: Execute.h:990
#define INJECT_TIMER(DESC)
Definition: measure.h:93
static const int32_t ERR_OUT_OF_RENDER_MEM
Definition: Execute.h:985
const QueryMemoryDescriptor & query_mem_desc
static const int32_t ERR_OUT_OF_GPU_MEM
Definition: Execute.h:982
QueryDescriptionType getQueryDescriptionType() const
#define DEBUG_TIMER(name)
Definition: Logger.h:313
static const int32_t ERR_OUT_OF_CPU_MEM
Definition: Execute.h:986

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void ExecutionKernel::runImpl ( Executor executor,
SharedKernelContext shared_context 
)
private

Definition at line 119 of file ExecutionKernel.cpp.

References SharedKernelContext::addDeviceResults(), CHECK, CHECK_EQ, CHECK_GE, CHECK_GT, CHECK_LT, chosen_device_id, chosen_device_type, FetchResult::col_buffers, column_fetcher, QueryFragmentDescriptor::computeAllTablesFragments(), CPU, Data_Namespace::CPU_LEVEL, device_results_, dynamic_watchdog_init(), SharedKernelContext::dynamic_watchdog_set, ExecutionOptions::dynamic_watchdog_time_limit, eo, Executor::ERR_OUT_OF_CPU_MEM, Executor::ERR_OUT_OF_GPU_MEM, ExecutionOptions::executor_type, Extern, frag_list, FetchResult::frag_offsets, QueryCompilationDescriptor::getCompilationResult(), SharedKernelContext::getFragOffsets(), QueryMemoryDescriptor::getQueryDescriptionType(), QueryMemoryDescriptor::getQueryExecutionContext(), SharedKernelContext::getQueryInfos(), GPU, Data_Namespace::GPU_LEVEL, RelAlgExecutionUnit::groupby_exprs, QueryCompilationDescriptor::hoistLiterals(), logger::INFO, RelAlgExecutionUnit::input_descs, RenderInfo::isPotentialInSituRender(), kernel_dispatch_mode, KernelPerFragment, LOG, Executor::max_gpu_count, MultifragmentKernel, Native, anonymous_namespace{ExecutionKernel.cpp}::need_to_hold_chunk(), FetchResult::num_rows, CompilationResult::output_columnar, Projection, query_comp_desc, anonymous_namespace{ExecutionKernel.cpp}::query_has_inner_join(), query_mem_desc, ra_exe_unit_, render_info_, rowid_lookup_key, run_query_external(), RelAlgExecutionUnit::scan_limit, serialize_to_sql(), QueryMemoryDescriptor::sortOnGpu(), RelAlgExecutionUnit::target_exprs, target_exprs_to_infos(), to_string(), RelAlgExecutionUnit::union_all, VLOG, and ExecutionOptions::with_dynamic_watchdog.

Referenced by run().

119  {
120  CHECK(executor);
121  const auto memory_level = chosen_device_type == ExecutorDeviceType::GPU
124  CHECK_GE(frag_list.size(), size_t(1));
125  // frag_list[0].table_id is how we tell which query we are running for UNION ALL.
126  const int outer_table_id = ra_exe_unit_.union_all
127  ? frag_list[0].table_id
128  : ra_exe_unit_.input_descs[0].getTableId();
129  CHECK_EQ(frag_list[0].table_id, outer_table_id);
130  const auto& outer_tab_frag_ids = frag_list[0].fragment_ids;
131 
134 
135  auto catalog = executor->getCatalog();
136  CHECK(catalog);
137 
138  // need to own them while query executes
139  auto chunk_iterators_ptr = std::make_shared<std::list<ChunkIter>>();
140  std::list<std::shared_ptr<Chunk_NS::Chunk>> chunks;
141  std::unique_ptr<std::lock_guard<std::mutex>> gpu_lock;
142  std::unique_ptr<CudaAllocator> device_allocator;
144  gpu_lock.reset(
145  new std::lock_guard<std::mutex>(executor->gpu_exec_mutex_[chosen_device_id]));
146  device_allocator =
147  std::make_unique<CudaAllocator>(&catalog->getDataMgr(), chosen_device_id);
148  }
149  FetchResult fetch_result;
150  try {
151  std::map<int, const TableFragments*> all_tables_fragments;
153  all_tables_fragments, ra_exe_unit_, shared_context.getQueryInfos());
154 
155  fetch_result = ra_exe_unit_.union_all
156  ? executor->fetchUnionChunks(column_fetcher,
157  ra_exe_unit_,
159  memory_level,
160  all_tables_fragments,
161  frag_list,
162  *catalog,
163  *chunk_iterators_ptr,
164  chunks,
165  device_allocator.get())
166  : executor->fetchChunks(column_fetcher,
167  ra_exe_unit_,
169  memory_level,
170  all_tables_fragments,
171  frag_list,
172  *catalog,
173  *chunk_iterators_ptr,
174  chunks,
175  device_allocator.get());
176  if (fetch_result.num_rows.empty()) {
177  return;
178  }
180  !shared_context.dynamic_watchdog_set.test_and_set(std::memory_order_acquire)) {
183  LOG(INFO) << "Dynamic Watchdog budget: CPU: "
185  << std::to_string(cycle_budget) << " cycles";
186  }
187  } catch (const OutOfMemory&) {
188  throw QueryExecutionError(
194  return;
195  }
196 
198  if (ra_exe_unit_.input_descs.size() > 1) {
199  throw std::runtime_error("Joins not supported through external execution");
200  }
201  const auto query = serialize_to_sql(&ra_exe_unit_, catalog);
202  GroupByAndAggregate group_by_and_aggregate(executor,
204  ra_exe_unit_,
205  shared_context.getQueryInfos(),
206  executor->row_set_mem_owner_,
207  std::nullopt);
208  const auto query_mem_desc =
209  group_by_and_aggregate.initQueryMemoryDescriptor(false, 0, 8, nullptr, false);
211  query,
212  fetch_result,
213  executor->plan_state_.get(),
217  executor});
218  shared_context.addDeviceResults(std::move(device_results_), outer_tab_frag_ids);
219  return;
220  }
221  const CompilationResult& compilation_result = query_comp_desc.getCompilationResult();
222  std::unique_ptr<QueryExecutionContext> query_exe_context_owned;
223  const bool do_render = render_info_ && render_info_->isPotentialInSituRender();
224 
225  int64_t total_num_input_rows{-1};
227  query_mem_desc.getQueryDescriptionType() == QueryDescriptionType::Projection) {
228  total_num_input_rows = 0;
229  std::for_each(fetch_result.num_rows.begin(),
230  fetch_result.num_rows.end(),
231  [&total_num_input_rows](const std::vector<int64_t>& frag_row_count) {
232  total_num_input_rows = std::accumulate(frag_row_count.begin(),
233  frag_row_count.end(),
234  total_num_input_rows);
235  });
236  VLOG(2) << "total_num_input_rows=" << total_num_input_rows;
237  // TODO(adb): we may want to take this early out for all queries, but we are most
238  // likely to see this query pattern on the kernel per fragment path (e.g. with HAVING
239  // 0=1)
240  if (total_num_input_rows == 0) {
241  return;
242  }
243 
245  total_num_input_rows *= ra_exe_unit_.input_descs.size();
246  }
247  }
248 
250  try {
251  query_exe_context_owned =
252  query_mem_desc.getQueryExecutionContext(ra_exe_unit_,
253  executor,
257  total_num_input_rows,
258  fetch_result.col_buffers,
259  fetch_result.frag_offsets,
260  executor->getRowSetMemoryOwner(),
261  compilation_result.output_columnar,
262  query_mem_desc.sortOnGpu(),
263  do_render ? render_info_ : nullptr);
264  } catch (const OutOfHostMemory& e) {
265  throw QueryExecutionError(Executor::ERR_OUT_OF_CPU_MEM);
266  }
267  }
268  QueryExecutionContext* query_exe_context{query_exe_context_owned.get()};
269  CHECK(query_exe_context);
270  int32_t err{0};
271  uint32_t start_rowid{0};
272  if (rowid_lookup_key >= 0) {
273  if (!frag_list.empty()) {
274  const auto& all_frag_row_offsets = shared_context.getFragOffsets();
275  start_rowid = rowid_lookup_key -
276  all_frag_row_offsets[frag_list.begin()->fragment_ids.front()];
277  }
278  }
279 
280  if (ra_exe_unit_.groupby_exprs.empty()) {
281  err = executor->executePlanWithoutGroupBy(ra_exe_unit_,
282  compilation_result,
287  fetch_result.col_buffers,
288  query_exe_context,
289  fetch_result.num_rows,
290  fetch_result.frag_offsets,
291  &catalog->getDataMgr(),
293  start_rowid,
294  ra_exe_unit_.input_descs.size(),
295  do_render ? render_info_ : nullptr);
296  } else {
297  if (ra_exe_unit_.union_all) {
298  VLOG(1) << "outer_table_id=" << outer_table_id
299  << " ra_exe_unit_.scan_limit=" << ra_exe_unit_.scan_limit;
300  }
301  err = executor->executePlanWithGroupBy(ra_exe_unit_,
302  compilation_result,
306  fetch_result.col_buffers,
307  outer_tab_frag_ids,
308  query_exe_context,
309  fetch_result.num_rows,
310  fetch_result.frag_offsets,
311  &catalog->getDataMgr(),
313  outer_table_id,
315  start_rowid,
316  ra_exe_unit_.input_descs.size(),
317  do_render ? render_info_ : nullptr);
318  }
319  if (device_results_) {
320  std::list<std::shared_ptr<Chunk_NS::Chunk>> chunks_to_hold;
321  for (const auto& chunk : chunks) {
322  if (need_to_hold_chunk(chunk.get(), ra_exe_unit_)) {
323  chunks_to_hold.push_back(chunk);
324  }
325  }
326  device_results_->holdChunks(chunks_to_hold);
327  device_results_->holdChunkIterators(chunk_iterators_ptr);
328  } else {
329  VLOG(1) << "null device_results.";
330  }
331  if (err) {
332  throw QueryExecutionError(err);
333  }
334  shared_context.addDeviceResults(std::move(device_results_), outer_tab_frag_ids);
335 }
std::vector< Analyzer::Expr * > target_exprs
#define CHECK_EQ(x, y)
Definition: Logger.h:205
std::atomic_flag dynamic_watchdog_set
const ExecutionOptions & eo
const std::vector< uint64_t > & getFragOffsets()
static const int max_gpu_count
Definition: Execute.h:917
const std::optional< bool > union_all
#define LOG(tag)
Definition: Logger.h:188
const ExecutorDispatchMode kernel_dispatch_mode
const RelAlgExecutionUnit & ra_exe_unit_
const int64_t rowid_lookup_key
void addDeviceResults(ResultSetPtr &&device_results, std::vector< size_t > outer_table_fragment_ids)
std::vector< InputDescriptor > input_descs
const ExecutorDeviceType chosen_device_type
#define CHECK_GE(x, y)
Definition: Logger.h:210
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
std::unique_ptr< ResultSet > run_query_external(const ExecutionUnitSql &sql, const FetchResult &fetch_result, const PlanState *plan_state, const ExternalQueryOutputSpec &output_spec)
RenderInfo * render_info_
#define CHECK_GT(x, y)
Definition: Logger.h:209
std::string to_string(char const *&&v)
ExecutorType executor_type
const bool with_dynamic_watchdog
const QueryMemoryDescriptor & query_mem_desc
const QueryCompilationDescriptor & query_comp_desc
uint64_t dynamic_watchdog_init(unsigned ms_budget)
static const int32_t ERR_OUT_OF_GPU_MEM
Definition: Execute.h:982
bool need_to_hold_chunk(const Chunk_NS::Chunk *chunk, const RelAlgExecutionUnit &ra_exe_unit)
QueryDescriptionType getQueryDescriptionType() const
#define CHECK_LT(x, y)
Definition: Logger.h:207
const FragmentsList frag_list
bool query_has_inner_join(const RelAlgExecutionUnit &ra_exe_unit)
const std::vector< InputTableInfo > & getQueryInfos() const
ResultSetPtr device_results_
bool isPotentialInSituRender() const
Definition: RenderInfo.cpp:64
#define CHECK(condition)
Definition: Logger.h:197
std::vector< TargetInfo > target_exprs_to_infos(const std::vector< Analyzer::Expr * > &targets, const QueryMemoryDescriptor &query_mem_desc)
ExecutionUnitSql serialize_to_sql(const RelAlgExecutionUnit *ra_exe_unit, const Catalog_Namespace::Catalog *catalog)
const unsigned dynamic_watchdog_time_limit
static const int32_t ERR_OUT_OF_CPU_MEM
Definition: Execute.h:986
static void computeAllTablesFragments(std::map< int, const TableFragments * > &all_tables_fragments, const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos)
#define VLOG(n)
Definition: Logger.h:291
const ColumnFetcher & column_fetcher

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

Member Data Documentation

int ExecutionKernel::chosen_device_id
private

Definition at line 78 of file ExecutionKernel.h.

Referenced by runImpl().

const ExecutorDeviceType ExecutionKernel::chosen_device_type
private

Definition at line 77 of file ExecutionKernel.h.

Referenced by runImpl().

const ColumnFetcher& ExecutionKernel::column_fetcher
private

Definition at line 80 of file ExecutionKernel.h.

Referenced by runImpl().

ResultSetPtr ExecutionKernel::device_results_
private

Definition at line 88 of file ExecutionKernel.h.

Referenced by runImpl().

const ExecutionOptions& ExecutionKernel::eo
private

Definition at line 79 of file ExecutionKernel.h.

Referenced by runImpl().

const FragmentsList ExecutionKernel::frag_list
private

Definition at line 83 of file ExecutionKernel.h.

Referenced by runImpl().

const ExecutorDispatchMode ExecutionKernel::kernel_dispatch_mode
private

Definition at line 84 of file ExecutionKernel.h.

Referenced by run(), and runImpl().

const QueryCompilationDescriptor& ExecutionKernel::query_comp_desc
private

Definition at line 81 of file ExecutionKernel.h.

Referenced by runImpl().

const QueryMemoryDescriptor& ExecutionKernel::query_mem_desc
private

Definition at line 82 of file ExecutionKernel.h.

Referenced by run(), and runImpl().

const RelAlgExecutionUnit& ExecutionKernel::ra_exe_unit_
private

Definition at line 76 of file ExecutionKernel.h.

Referenced by runImpl().

RenderInfo* ExecutionKernel::render_info_
private

Definition at line 85 of file ExecutionKernel.h.

Referenced by runImpl().

const int64_t ExecutionKernel::rowid_lookup_key
private

Definition at line 86 of file ExecutionKernel.h.

Referenced by runImpl().


The documentation for this class was generated from the following files: