OmniSciDB  ab4938a6a3
QueryFragmentDescriptor Class Reference

#include <QueryFragmentDescriptor.h>

Public Member Functions

 QueryFragmentDescriptor (const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, const std::vector< Data_Namespace::MemoryInfo > &gpu_mem_infos, const double gpu_input_mem_limit_percent, const std::vector< size_t > allowed_outer_fragment_indices)
 
void buildFragmentKernelMap (const RelAlgExecutionUnit &ra_exe_unit, const std::vector< uint64_t > &frag_offsets, const int device_count, const ExecutorDeviceType &device_type, const bool enable_multifrag_kernels, const bool enable_inner_join_fragment_skipping, Executor *executor)
 
template<typename DISPATCH_FCN >
void assignFragsToMultiDispatch (DISPATCH_FCN f) const
 
template<typename DISPATCH_FCN >
void assignFragsToKernelDispatch (DISPATCH_FCN f, const RelAlgExecutionUnit &ra_exe_unit) const
 
bool shouldCheckWorkUnitWatchdog () const
 

Static Public Member Functions

static void computeAllTablesFragments (std::map< int, const TableFragments *> &all_tables_fragments, const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos)
 

Protected Member Functions

void buildFragmentPerKernelMapForUnion (const RelAlgExecutionUnit &ra_exe_unit, const std::vector< uint64_t > &frag_offsets, const int device_count, const ExecutorDeviceType &device_type, Executor *executor)
 
void buildFragmentPerKernelMap (const RelAlgExecutionUnit &ra_exe_unit, const std::vector< uint64_t > &frag_offsets, const int device_count, const ExecutorDeviceType &device_type, Executor *executor)
 
void buildMultifragKernelMap (const RelAlgExecutionUnit &ra_exe_unit, const std::vector< uint64_t > &frag_offsets, const int device_count, const ExecutorDeviceType &device_type, const bool enable_inner_join_fragment_skipping, Executor *executor)
 
bool terminateDispatchMaybe (size_t &tuple_count, const RelAlgExecutionUnit &ra_exe_unit, const ExecutionKernel &kernel) const
 
void checkDeviceMemoryUsage (const Fragmenter_Namespace::FragmentInfo &fragment, const int device_id, const size_t num_cols)
 

Protected Attributes

std::vector< size_t > allowed_outer_fragment_indices_
 
size_t outer_fragments_size_ = 0
 
int64_t rowid_lookup_key_ = -1
 
std::map< int, const TableFragments * > selected_tables_fragments_
 
std::map< int, std::vector< ExecutionKernel > > execution_kernels_per_device_
 
double gpu_input_mem_limit_percent_
 
std::map< size_t, size_t > tuple_count_per_device_
 
std::map< size_t, size_t > available_gpu_mem_bytes_
 

Detailed Description

Definition at line 65 of file QueryFragmentDescriptor.h.

Constructor & Destructor Documentation

◆ QueryFragmentDescriptor()

QueryFragmentDescriptor::QueryFragmentDescriptor ( const RelAlgExecutionUnit ra_exe_unit,
const std::vector< InputTableInfo > &  query_infos,
const std::vector< Data_Namespace::MemoryInfo > &  gpu_mem_infos,
const double  gpu_input_mem_limit_percent,
const std::vector< size_t >  allowed_outer_fragment_indices 
)

Definition at line 23 of file QueryFragmentDescriptor.cpp.

References available_gpu_mem_bytes_, CHECK_EQ, RelAlgExecutionUnit::input_descs, and selected_tables_fragments_.

29  : allowed_outer_fragment_indices_(allowed_outer_fragment_indices)
30  , gpu_input_mem_limit_percent_(gpu_input_mem_limit_percent) {
31  const size_t input_desc_count{ra_exe_unit.input_descs.size()};
32  CHECK_EQ(query_infos.size(), input_desc_count);
33  for (size_t table_idx = 0; table_idx < input_desc_count; ++table_idx) {
34  const auto table_id = ra_exe_unit.input_descs[table_idx].getTableId();
35  if (!selected_tables_fragments_.count(table_id)) {
36  selected_tables_fragments_[ra_exe_unit.input_descs[table_idx].getTableId()] =
37  &query_infos[table_idx].info.fragments;
38  }
39  }
40 
41  for (size_t device_id = 0; device_id < gpu_mem_infos.size(); device_id++) {
42  const auto& gpu_mem_info = gpu_mem_infos[device_id];
43  available_gpu_mem_bytes_[device_id] =
44  gpu_mem_info.maxNumPages * gpu_mem_info.pageSize;
45  }
46 }
std::map< int, const TableFragments * > selected_tables_fragments_
#define CHECK_EQ(x, y)
Definition: Logger.h:205
std::vector< InputDescriptor > input_descs
std::vector< size_t > allowed_outer_fragment_indices_
std::map< size_t, size_t > available_gpu_mem_bytes_

Member Function Documentation

◆ assignFragsToKernelDispatch()

template<typename DISPATCH_FCN >
void QueryFragmentDescriptor::assignFragsToKernelDispatch ( DISPATCH_FCN  f,
const RelAlgExecutionUnit ra_exe_unit 
) const
inline

Dispatch one fragment for each device. Iterate the device map and dispatch one kernel for each device per iteration. This allows balanced dispatch as well as early termination if the number of rows passing the kernel can be computed at dispatch time and the scan limit is reached.

Definition at line 108 of file QueryFragmentDescriptor.h.

References CHECK.

Referenced by Executor::dispatchFragments().

109  {
110  if (execution_kernels_per_device_.empty()) {
111  return;
112  }
113 
114  size_t tuple_count = 0;
115 
116  std::unordered_map<int, size_t> execution_kernel_index;
117  for (const auto& device_itr : execution_kernels_per_device_) {
118  CHECK(execution_kernel_index.insert(std::make_pair(device_itr.first, size_t(0)))
119  .second);
120  }
121 
122  bool dispatch_finished = false;
123  while (!dispatch_finished) {
124  dispatch_finished = true;
125  for (const auto& device_itr : execution_kernels_per_device_) {
126  auto& kernel_idx = execution_kernel_index[device_itr.first];
127  if (kernel_idx < device_itr.second.size()) {
128  dispatch_finished = false;
129  const auto& execution_kernel = device_itr.second[kernel_idx++];
130  f(device_itr.first, execution_kernel.fragments, rowid_lookup_key_);
131  if (terminateDispatchMaybe(tuple_count, ra_exe_unit, execution_kernel)) {
132  return;
133  }
134  }
135  }
136  }
137  }
bool terminateDispatchMaybe(size_t &tuple_count, const RelAlgExecutionUnit &ra_exe_unit, const ExecutionKernel &kernel) const
#define CHECK(condition)
Definition: Logger.h:197
std::map< int, std::vector< ExecutionKernel > > execution_kernels_per_device_
+ Here is the caller graph for this function:

◆ assignFragsToMultiDispatch()

template<typename DISPATCH_FCN >
void QueryFragmentDescriptor::assignFragsToMultiDispatch ( DISPATCH_FCN  f) const
inline

Dispatch multi-fragment kernels. Currently GPU only. Each GPU should have only one kernel, with multiple fragments in its fragments list.

Definition at line 91 of file QueryFragmentDescriptor.h.

References CHECK_EQ.

Referenced by Executor::dispatchFragments().

91  {
92  for (const auto& device_itr : execution_kernels_per_device_) {
93  const auto& execution_kernels = device_itr.second;
94  CHECK_EQ(execution_kernels.size(), size_t(1));
95 
96  const auto& fragments_list = execution_kernels.front().fragments;
97  f(device_itr.first, fragments_list, rowid_lookup_key_);
98  }
99  }
#define CHECK_EQ(x, y)
Definition: Logger.h:205
std::map< int, std::vector< ExecutionKernel > > execution_kernels_per_device_
+ Here is the caller graph for this function:

◆ buildFragmentKernelMap()

void QueryFragmentDescriptor::buildFragmentKernelMap ( const RelAlgExecutionUnit ra_exe_unit,
const std::vector< uint64_t > &  frag_offsets,
const int  device_count,
const ExecutorDeviceType device_type,
const bool  enable_multifrag_kernels,
const bool  enable_inner_join_fragment_skipping,
Executor executor 
)

Definition at line 62 of file QueryFragmentDescriptor.cpp.

References buildFragmentPerKernelMap(), buildFragmentPerKernelMapForUnion(), buildMultifragKernelMap(), and RelAlgExecutionUnit::union_all.

Referenced by Executor::dispatchFragments().

69  {
70  if (ra_exe_unit.union_all) {
72  ra_exe_unit, frag_offsets, device_count, device_type, executor);
73  } else if (enable_multifrag_kernels) {
74  buildMultifragKernelMap(ra_exe_unit,
75  frag_offsets,
76  device_count,
77  device_type,
78  enable_inner_join_fragment_skipping,
79  executor);
80  } else {
82  ra_exe_unit, frag_offsets, device_count, device_type, executor);
83  }
84 }
const std::optional< bool > union_all
void buildMultifragKernelMap(const RelAlgExecutionUnit &ra_exe_unit, const std::vector< uint64_t > &frag_offsets, const int device_count, const ExecutorDeviceType &device_type, const bool enable_inner_join_fragment_skipping, Executor *executor)
void buildFragmentPerKernelMapForUnion(const RelAlgExecutionUnit &ra_exe_unit, const std::vector< uint64_t > &frag_offsets, const int device_count, const ExecutorDeviceType &device_type, Executor *executor)
void buildFragmentPerKernelMap(const RelAlgExecutionUnit &ra_exe_unit, const std::vector< uint64_t > &frag_offsets, const int device_count, const ExecutorDeviceType &device_type, Executor *executor)
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ buildFragmentPerKernelMap()

void QueryFragmentDescriptor::buildFragmentPerKernelMap ( const RelAlgExecutionUnit ra_exe_unit,
const std::vector< uint64_t > &  frag_offsets,
const int  device_count,
const ExecutorDeviceType device_type,
Executor executor 
)
protected

Definition at line 197 of file QueryFragmentDescriptor.cpp.

References allowed_outer_fragment_indices_, CHECK, CHECK_GT, checkDeviceMemoryUsage(), anonymous_namespace{QueryFragmentDescriptor.cpp}::compute_fragment_tuple_count(), CPU, Data_Namespace::CPU_LEVEL, execution_kernels_per_device_, GPU, Data_Namespace::GPU_LEVEL, RelAlgExecutionUnit::input_descs, outer_fragments_size_, rowid_lookup_key_, selected_tables_fragments_, and RelAlgExecutionUnit::simple_quals.

Referenced by buildFragmentKernelMap().

202  {
203  const auto& outer_table_desc = ra_exe_unit.input_descs.front();
204  const int outer_table_id = outer_table_desc.getTableId();
205  auto it = selected_tables_fragments_.find(outer_table_id);
206  CHECK(it != selected_tables_fragments_.end());
207  const auto outer_fragments = it->second;
208  outer_fragments_size_ = outer_fragments->size();
209 
210  const auto num_bytes_for_row = executor->getNumBytesForFetchedRow();
211 
212  const ColumnDescriptor* deleted_cd{nullptr};
213  if (outer_table_id > 0) {
214  // Intermediate tables will not have a table descriptor and will also not have deleted
215  // rows.
216  const auto& catalog = executor->getCatalog();
217  const auto td = catalog->getMetadataForTable(outer_table_id);
218  CHECK(td);
219  deleted_cd = catalog->getDeletedColumnIfRowsDeleted(td);
220  }
221 
222  for (size_t i = 0; i < outer_fragments->size(); ++i) {
223  if (!allowed_outer_fragment_indices_.empty()) {
224  if (std::find(allowed_outer_fragment_indices_.begin(),
226  i) == allowed_outer_fragment_indices_.end()) {
227  continue;
228  }
229  }
230 
231  const auto& fragment = (*outer_fragments)[i];
232  const auto skip_frag = executor->skipFragment(
233  outer_table_desc, fragment, ra_exe_unit.simple_quals, frag_offsets, i);
234  if (skip_frag.first) {
235  continue;
236  }
237  rowid_lookup_key_ = std::max(rowid_lookup_key_, skip_frag.second);
238  const int chosen_device_count =
239  device_type == ExecutorDeviceType::CPU ? 1 : device_count;
240  CHECK_GT(chosen_device_count, 0);
241  const auto memory_level = device_type == ExecutorDeviceType::GPU
244  int device_id = (device_type == ExecutorDeviceType::CPU || fragment.shard == -1)
245  ? fragment.deviceIds[static_cast<int>(memory_level)]
246  : fragment.shard % chosen_device_count;
247 
248  if (device_type == ExecutorDeviceType::GPU) {
249  checkDeviceMemoryUsage(fragment, device_id, num_bytes_for_row);
250  }
251 
252  ExecutionKernel execution_kernel{
253  device_id, {}, compute_fragment_tuple_count(fragment, deleted_cd)};
254  for (size_t j = 0; j < ra_exe_unit.input_descs.size(); ++j) {
255  const auto frag_ids =
256  executor->getTableFragmentIndices(ra_exe_unit,
257  device_type,
258  j,
259  i,
261  executor->getInnerTabIdToJoinCond());
262  const auto table_id = ra_exe_unit.input_descs[j].getTableId();
263  auto table_frags_it = selected_tables_fragments_.find(table_id);
264  CHECK(table_frags_it != selected_tables_fragments_.end());
265 
266  execution_kernel.fragments.emplace_back(FragmentsPerTable{table_id, frag_ids});
267  }
268 
269  if (execution_kernels_per_device_.find(device_id) ==
272  .insert(std::make_pair(device_id,
273  std::vector<ExecutionKernel>{execution_kernel}))
274  .second);
275  } else {
276  execution_kernels_per_device_[device_id].emplace_back(execution_kernel);
277  }
278  }
279 }
std::map< int, const TableFragments * > selected_tables_fragments_
std::vector< InputDescriptor > input_descs
#define CHECK_GT(x, y)
Definition: Logger.h:209
void checkDeviceMemoryUsage(const Fragmenter_Namespace::FragmentInfo &fragment, const int device_id, const size_t num_cols)
specifies the content in-memory of a row in the column metadata table
std::optional< size_t > compute_fragment_tuple_count(const Fragmenter_Namespace::FragmentInfo &fragment, const ColumnDescriptor *deleted_cd)
#define CHECK(condition)
Definition: Logger.h:197
std::map< int, std::vector< ExecutionKernel > > execution_kernels_per_device_
std::vector< size_t > allowed_outer_fragment_indices_
std::list< std::shared_ptr< Analyzer::Expr > > simple_quals
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ buildFragmentPerKernelMapForUnion()

void QueryFragmentDescriptor::buildFragmentPerKernelMapForUnion ( const RelAlgExecutionUnit ra_exe_unit,
const std::vector< uint64_t > &  frag_offsets,
const int  device_count,
const ExecutorDeviceType device_type,
Executor executor 
)
protected

Definition at line 99 of file QueryFragmentDescriptor.cpp.

References CHECK, CHECK_GT, checkDeviceMemoryUsage(), anonymous_namespace{QueryFragmentDescriptor.cpp}::compute_fragment_tuple_count(), CPU, Data_Namespace::CPU_LEVEL, execution_kernels_per_device_, GPU, Data_Namespace::GPU_LEVEL, RelAlgExecutionUnit::input_descs, shared::printContainer(), rowid_lookup_key_, selected_tables_fragments_, RelAlgExecutionUnit::simple_quals, and VLOG.

Referenced by buildFragmentKernelMap().

104  {
105  for (size_t j = 0; j < ra_exe_unit.input_descs.size(); ++j) {
106  auto const& table_desc = ra_exe_unit.input_descs[j];
107  int const table_id = table_desc.getTableId();
108  TableFragments const* fragments = selected_tables_fragments_.at(table_id);
109 
110  const auto num_bytes_for_row = executor->getNumBytesForFetchedRow();
111 
112  const ColumnDescriptor* deleted_cd{nullptr};
113  if (table_id > 0) {
114  // Temporary tables will not have a table descriptor and not have deleted rows.
115  const auto& catalog = executor->getCatalog();
116  const auto td = catalog->getMetadataForTable(table_id);
117  CHECK(td);
118  deleted_cd = catalog->getDeletedColumnIfRowsDeleted(td);
119  }
120  VLOG(1) << "table_id=" << table_id << " fragments->size()=" << fragments->size()
121  << " fragments->front().physicalTableId="
122  << fragments->front().physicalTableId
123  << " fragments->front().getNumTuples()=" << fragments->front().getNumTuples()
124  << " fragments->front().getPhysicalNumTuples()="
125  << fragments->front().getPhysicalNumTuples();
126 
127  for (size_t i = 0; i < fragments->size(); ++i) {
128  const auto& fragment = (*fragments)[i];
129  const auto skip_frag = executor->skipFragment(
130  table_desc, fragment, ra_exe_unit.simple_quals, frag_offsets, i);
131  if (skip_frag.first) {
132  continue;
133  }
134  rowid_lookup_key_ = std::max(rowid_lookup_key_, skip_frag.second);
135  const int chosen_device_count =
136  device_type == ExecutorDeviceType::CPU ? 1 : device_count;
137  CHECK_GT(chosen_device_count, 0);
138  const auto memory_level = device_type == ExecutorDeviceType::GPU
141 
142  int device_id = (device_type == ExecutorDeviceType::CPU || fragment.shard == -1)
143  ? fragment.deviceIds[static_cast<int>(memory_level)]
144  : fragment.shard % chosen_device_count;
145 
146  VLOG(1) << "device_type_is_cpu=" << (device_type == ExecutorDeviceType::CPU)
147  << " chosen_device_count=" << chosen_device_count
148  << " fragment.shard=" << fragment.shard
149  << " fragment.deviceIds.size()=" << fragment.deviceIds.size()
150  << " int(memory_level)=" << int(memory_level) << " device_id=" << device_id;
151 
152  if (device_type == ExecutorDeviceType::GPU) {
153  checkDeviceMemoryUsage(fragment, device_id, num_bytes_for_row);
154  }
155 
156  const auto frag_ids =
157  executor->getTableFragmentIndices(ra_exe_unit,
158  device_type,
159  j,
160  i,
162  executor->getInnerTabIdToJoinCond());
163 
164  VLOG(1) << "table_id=" << table_id << " frag_ids.size()=" << frag_ids.size()
165  << " frag_ids.front()=" << frag_ids.front();
166  ExecutionKernel execution_kernel{
167  device_id,
168  {FragmentsPerTable{table_id, frag_ids}},
169  compute_fragment_tuple_count(fragment, deleted_cd)};
170 
171  auto itr = execution_kernels_per_device_.find(device_id);
172  if (itr == execution_kernels_per_device_.end()) {
173  auto const pair = execution_kernels_per_device_.insert(std::make_pair(
174  device_id, std::vector<ExecutionKernel>{std::move(execution_kernel)}));
175  CHECK(pair.second);
176  } else {
177  itr->second.emplace_back(std::move(execution_kernel));
178  }
179  }
180  std::vector<int> table_ids =
181  std::accumulate(execution_kernels_per_device_[0].begin(),
183  std::vector<int>(),
184  [](auto&& vec, auto& exe_kern) {
185  vec.push_back(exe_kern.fragments[0].table_id);
186  return vec;
187  });
188  VLOG(1) << "execution_kernels_per_device_.size()="
190  << " execution_kernels_per_device_[0].size()="
191  << execution_kernels_per_device_[0].size()
192  << " execution_kernels_per_device_[0][*].fragments[0].table_id="
193  << shared::printContainer(table_ids);
194  }
195 }
std::map< int, const TableFragments * > selected_tables_fragments_
std::vector< Fragmenter_Namespace::FragmentInfo > TableFragments
std::vector< InputDescriptor > input_descs
#define CHECK_GT(x, y)
Definition: Logger.h:209
void checkDeviceMemoryUsage(const Fragmenter_Namespace::FragmentInfo &fragment, const int device_id, const size_t num_cols)
specifies the content in-memory of a row in the column metadata table
std::optional< size_t > compute_fragment_tuple_count(const Fragmenter_Namespace::FragmentInfo &fragment, const ColumnDescriptor *deleted_cd)
#define CHECK(condition)
Definition: Logger.h:197
std::map< int, std::vector< ExecutionKernel > > execution_kernels_per_device_
PrintContainer< CONTAINER > printContainer(CONTAINER &container)
Definition: misc.h:60
#define VLOG(n)
Definition: Logger.h:291
std::list< std::shared_ptr< Analyzer::Expr > > simple_quals
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ buildMultifragKernelMap()

void QueryFragmentDescriptor::buildMultifragKernelMap ( const RelAlgExecutionUnit ra_exe_unit,
const std::vector< uint64_t > &  frag_offsets,
const int  device_count,
const ExecutorDeviceType device_type,
const bool  enable_inner_join_fragment_skipping,
Executor executor 
)
protected

Definition at line 281 of file QueryFragmentDescriptor.cpp.

References allowed_outer_fragment_indices_, CHECK, CHECK_EQ, checkDeviceMemoryUsage(), execution_kernels_per_device_, GPU, Data_Namespace::GPU_LEVEL, RelAlgExecutionUnit::input_descs, outer_fragments_size_, rowid_lookup_key_, selected_tables_fragments_, and RelAlgExecutionUnit::simple_quals.

Referenced by buildFragmentKernelMap().

287  {
288  // Allocate all the fragments of the tables involved in the query to available
289  // devices. The basic idea: the device is decided by the outer table in the
290  // query (the first table in a join) and we need to broadcast the fragments
291  // in the inner table to each device. Sharding will change this model.
292  const auto& outer_table_desc = ra_exe_unit.input_descs.front();
293  const int outer_table_id = outer_table_desc.getTableId();
294  auto it = selected_tables_fragments_.find(outer_table_id);
295  CHECK(it != selected_tables_fragments_.end());
296  const auto outer_fragments = it->second;
297  outer_fragments_size_ = outer_fragments->size();
298 
299  const auto inner_table_id_to_join_condition = executor->getInnerTabIdToJoinCond();
300  const auto num_bytes_for_row = executor->getNumBytesForFetchedRow();
301 
302  for (size_t outer_frag_id = 0; outer_frag_id < outer_fragments->size();
303  ++outer_frag_id) {
304  if (!allowed_outer_fragment_indices_.empty()) {
305  if (std::find(allowed_outer_fragment_indices_.begin(),
307  outer_frag_id) == allowed_outer_fragment_indices_.end()) {
308  continue;
309  }
310  }
311 
312  const auto& fragment = (*outer_fragments)[outer_frag_id];
313  auto skip_frag = executor->skipFragment(outer_table_desc,
314  fragment,
315  ra_exe_unit.simple_quals,
316  frag_offsets,
317  outer_frag_id);
318  if (enable_inner_join_fragment_skipping &&
319  (skip_frag == std::pair<bool, int64_t>(false, -1))) {
320  skip_frag = executor->skipFragmentInnerJoins(
321  outer_table_desc, ra_exe_unit, fragment, frag_offsets, outer_frag_id);
322  }
323  if (skip_frag.first) {
324  continue;
325  }
326  const int device_id =
327  fragment.shard == -1
328  ? fragment.deviceIds[static_cast<int>(Data_Namespace::GPU_LEVEL)]
329  : fragment.shard % device_count;
330  if (device_type == ExecutorDeviceType::GPU) {
331  checkDeviceMemoryUsage(fragment, device_id, num_bytes_for_row);
332  }
333  for (size_t j = 0; j < ra_exe_unit.input_descs.size(); ++j) {
334  const auto table_id = ra_exe_unit.input_descs[j].getTableId();
335  auto table_frags_it = selected_tables_fragments_.find(table_id);
336  CHECK(table_frags_it != selected_tables_fragments_.end());
337  const auto frag_ids =
338  executor->getTableFragmentIndices(ra_exe_unit,
339  device_type,
340  j,
341  outer_frag_id,
343  inner_table_id_to_join_condition);
344 
345  if (execution_kernels_per_device_.find(device_id) ==
347  std::vector<ExecutionKernel> kernels{
348  ExecutionKernel{device_id, FragmentsList{}, std::nullopt}};
349  CHECK(execution_kernels_per_device_.insert(std::make_pair(device_id, kernels))
350  .second);
351  }
352 
353  // Multifrag kernels only have one execution kernel per device. Grab the execution
354  // kernel object and push back into its fragments list.
355  CHECK_EQ(execution_kernels_per_device_[device_id].size(), size_t(1));
356  auto& execution_kernel = execution_kernels_per_device_[device_id].front();
357 
358  auto& kernel_frag_list = execution_kernel.fragments;
359  if (kernel_frag_list.size() < j + 1) {
360  kernel_frag_list.emplace_back(FragmentsPerTable{table_id, frag_ids});
361  } else {
362  CHECK_EQ(kernel_frag_list[j].table_id, table_id);
363  auto& curr_frag_ids = kernel_frag_list[j].fragment_ids;
364  for (const int frag_id : frag_ids) {
365  if (std::find(curr_frag_ids.begin(), curr_frag_ids.end(), frag_id) ==
366  curr_frag_ids.end()) {
367  curr_frag_ids.push_back(frag_id);
368  }
369  }
370  }
371  }
372  rowid_lookup_key_ = std::max(rowid_lookup_key_, skip_frag.second);
373  }
374 }
std::map< int, const TableFragments * > selected_tables_fragments_
#define CHECK_EQ(x, y)
Definition: Logger.h:205
std::vector< InputDescriptor > input_descs
std::vector< FragmentsPerTable > FragmentsList
void checkDeviceMemoryUsage(const Fragmenter_Namespace::FragmentInfo &fragment, const int device_id, const size_t num_cols)
#define CHECK(condition)
Definition: Logger.h:197
std::map< int, std::vector< ExecutionKernel > > execution_kernels_per_device_
std::vector< size_t > allowed_outer_fragment_indices_
std::list< std::shared_ptr< Analyzer::Expr > > simple_quals
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ checkDeviceMemoryUsage()

void QueryFragmentDescriptor::checkDeviceMemoryUsage ( const Fragmenter_Namespace::FragmentInfo fragment,
const int  device_id,
const size_t  num_cols 
)
protected

Definition at line 410 of file QueryFragmentDescriptor.cpp.

References available_gpu_mem_bytes_, CHECK_GE, g_cluster, Fragmenter_Namespace::FragmentInfo::getNumTuples(), gpu_input_mem_limit_percent_, LOG, tuple_count_per_device_, and logger::WARNING.

Referenced by buildFragmentPerKernelMap(), buildFragmentPerKernelMapForUnion(), and buildMultifragKernelMap().

413  {
414  if (g_cluster) {
415  // Disabled in distributed mode for now
416  return;
417  }
418  CHECK_GE(device_id, 0);
419  tuple_count_per_device_[device_id] += fragment.getNumTuples();
420  const size_t gpu_bytes_limit =
422  if (tuple_count_per_device_[device_id] * num_bytes_for_row > gpu_bytes_limit) {
423  LOG(WARNING) << "Not enough memory on device " << device_id
424  << " for input chunks totaling "
425  << tuple_count_per_device_[device_id] * num_bytes_for_row
426  << " bytes (available device memory: " << gpu_bytes_limit << " bytes)";
427  throw QueryMustRunOnCpu();
428  }
429 }
std::map< size_t, size_t > tuple_count_per_device_
#define LOG(tag)
Definition: Logger.h:188
#define CHECK_GE(x, y)
Definition: Logger.h:210
bool g_cluster
std::map< size_t, size_t > available_gpu_mem_bytes_
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ computeAllTablesFragments()

void QueryFragmentDescriptor::computeAllTablesFragments ( std::map< int, const TableFragments *> &  all_tables_fragments,
const RelAlgExecutionUnit ra_exe_unit,
const std::vector< InputTableInfo > &  query_infos 
)
static

Definition at line 48 of file QueryFragmentDescriptor.cpp.

References CHECK_EQ, and RelAlgExecutionUnit::input_descs.

Referenced by Executor::ExecutionDispatch::runImpl().

51  {
52  for (size_t tab_idx = 0; tab_idx < ra_exe_unit.input_descs.size(); ++tab_idx) {
53  int table_id = ra_exe_unit.input_descs[tab_idx].getTableId();
54  CHECK_EQ(query_infos[tab_idx].table_id, table_id);
55  const auto& fragments = query_infos[tab_idx].info.fragments;
56  if (!all_tables_fragments.count(table_id)) {
57  all_tables_fragments.insert(std::make_pair(table_id, &fragments));
58  }
59  }
60 }
#define CHECK_EQ(x, y)
Definition: Logger.h:205
std::vector< InputDescriptor > input_descs
+ Here is the caller graph for this function:

◆ shouldCheckWorkUnitWatchdog()

bool QueryFragmentDescriptor::shouldCheckWorkUnitWatchdog ( ) const
inline

Definition at line 139 of file QueryFragmentDescriptor.h.

Referenced by Executor::dispatchFragments().

139  {
140  return rowid_lookup_key_ < 0 && !execution_kernels_per_device_.empty();
141  }
std::map< int, std::vector< ExecutionKernel > > execution_kernels_per_device_
+ Here is the caller graph for this function:

◆ terminateDispatchMaybe()

bool QueryFragmentDescriptor::terminateDispatchMaybe ( size_t &  tuple_count,
const RelAlgExecutionUnit ra_exe_unit,
const ExecutionKernel kernel 
) const
protected

Definition at line 392 of file QueryFragmentDescriptor.cpp.

References anonymous_namespace{QueryFragmentDescriptor.cpp}::is_sample_query(), SortInfo::limit, SortInfo::offset, ExecutionKernel::outer_tuple_count, and RelAlgExecutionUnit::sort_info.

395  {
396  const auto sample_query_limit =
397  ra_exe_unit.sort_info.limit + ra_exe_unit.sort_info.offset;
398  if (!kernel.outer_tuple_count) {
399  return false;
400  } else {
401  tuple_count += *kernel.outer_tuple_count;
402  if (is_sample_query(ra_exe_unit) && sample_query_limit > 0 &&
403  tuple_count >= sample_query_limit) {
404  return true;
405  }
406  }
407  return false;
408 }
const size_t limit
const SortInfo sort_info
bool is_sample_query(const RelAlgExecutionUnit &ra_exe_unit)
std::optional< size_t > outer_tuple_count
const size_t offset
+ Here is the call graph for this function:

Member Data Documentation

◆ allowed_outer_fragment_indices_

std::vector<size_t> QueryFragmentDescriptor::allowed_outer_fragment_indices_
protected

◆ available_gpu_mem_bytes_

std::map<size_t, size_t> QueryFragmentDescriptor::available_gpu_mem_bytes_
protected

Definition at line 154 of file QueryFragmentDescriptor.h.

Referenced by checkDeviceMemoryUsage(), and QueryFragmentDescriptor().

◆ execution_kernels_per_device_

std::map<int, std::vector<ExecutionKernel> > QueryFragmentDescriptor::execution_kernels_per_device_
protected

◆ gpu_input_mem_limit_percent_

double QueryFragmentDescriptor::gpu_input_mem_limit_percent_
protected

Definition at line 152 of file QueryFragmentDescriptor.h.

Referenced by checkDeviceMemoryUsage().

◆ outer_fragments_size_

size_t QueryFragmentDescriptor::outer_fragments_size_ = 0
protected

◆ rowid_lookup_key_

int64_t QueryFragmentDescriptor::rowid_lookup_key_ = -1
protected

◆ selected_tables_fragments_

std::map<int, const TableFragments*> QueryFragmentDescriptor::selected_tables_fragments_
protected

◆ tuple_count_per_device_

std::map<size_t, size_t> QueryFragmentDescriptor::tuple_count_per_device_
protected

Definition at line 153 of file QueryFragmentDescriptor.h.

Referenced by checkDeviceMemoryUsage().


The documentation for this class was generated from the following files: