OmniSciDB  04ee39c94c
QueryFragmentDescriptor Class Reference

#include <QueryFragmentDescriptor.h>

Public Member Functions

 QueryFragmentDescriptor (const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, const std::vector< Data_Namespace::MemoryInfo > &gpu_mem_infos, const double gpu_input_mem_limit_percent)
 
void buildFragmentKernelMap (const RelAlgExecutionUnit &ra_exe_unit, const std::vector< uint64_t > &frag_offsets, const int device_count, const ExecutorDeviceType &device_type, const bool enable_multifrag_kernels, const bool enable_inner_join_fragment_skipping, Executor *executor)
 
template<typename DISPATCH_FCN >
void assignFragsToMultiDispatch (DISPATCH_FCN f) const
 
template<typename DISPATCH_FCN >
void assignFragsToKernelDispatch (DISPATCH_FCN f, const RelAlgExecutionUnit &ra_exe_unit) const
 
bool shouldCheckWorkUnitWatchdog () const
 

Static Public Member Functions

static void computeAllTablesFragments (std::map< int, const TableFragments *> &all_tables_fragments, const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos)
 

Protected Member Functions

void buildFragmentPerKernelMap (const RelAlgExecutionUnit &ra_exe_unit, const std::vector< uint64_t > &frag_offsets, const int device_count, const ExecutorDeviceType &device_type, Executor *executor)
 
void buildMultifragKernelMap (const RelAlgExecutionUnit &ra_exe_unit, const std::vector< uint64_t > &frag_offsets, const int device_count, const ExecutorDeviceType &device_type, const bool enable_inner_join_fragment_skipping, Executor *executor)
 
const size_t getOuterFragmentTupleSize (const size_t frag_index) const
 
bool terminateDispatchMaybe (const RelAlgExecutionUnit &ra_exe_unit, const size_t kernel_id) const
 
void checkDeviceMemoryUsage (const Fragmenter_Namespace::FragmentInfo &fragment, const int device_id, const size_t num_cols)
 

Protected Attributes

size_t outer_fragments_size_ = 0
 
int64_t rowid_lookup_key_ = -1
 
std::map< int, const TableFragments * > selected_tables_fragments_
 
std::vector< FragmentsListfragments_per_kernel_
 
std::map< int, std::set< size_t > > kernels_per_device_
 
std::vector< size_t > outer_fragment_tuple_sizes_
 
double gpu_input_mem_limit_percent_
 
std::map< size_t, size_t > tuple_count_per_device_
 
std::map< size_t, size_t > available_gpu_mem_bytes_
 

Detailed Description

Definition at line 55 of file QueryFragmentDescriptor.h.

Constructor & Destructor Documentation

◆ QueryFragmentDescriptor()

QueryFragmentDescriptor::QueryFragmentDescriptor ( const RelAlgExecutionUnit ra_exe_unit,
const std::vector< InputTableInfo > &  query_infos,
const std::vector< Data_Namespace::MemoryInfo > &  gpu_mem_infos,
const double  gpu_input_mem_limit_percent 
)

Definition at line 22 of file QueryFragmentDescriptor.cpp.

References available_gpu_mem_bytes_, CHECK_EQ, RelAlgExecutionUnit::input_descs, and selected_tables_fragments_.

27  : gpu_input_mem_limit_percent_(gpu_input_mem_limit_percent) {
28  const size_t input_desc_count{ra_exe_unit.input_descs.size()};
29  CHECK_EQ(query_infos.size(), input_desc_count);
30  for (size_t table_idx = 0; table_idx < input_desc_count; ++table_idx) {
31  const auto table_id = ra_exe_unit.input_descs[table_idx].getTableId();
32  if (!selected_tables_fragments_.count(table_id)) {
33  selected_tables_fragments_[ra_exe_unit.input_descs[table_idx].getTableId()] =
34  &query_infos[table_idx].info.fragments;
35  }
36  }
37 
38  for (size_t device_id = 0; device_id < gpu_mem_infos.size(); device_id++) {
39  const auto& gpu_mem_info = gpu_mem_infos[device_id];
40  available_gpu_mem_bytes_[device_id] =
41  gpu_mem_info.maxNumPages * gpu_mem_info.pageSize;
42  }
43 }
std::map< int, const TableFragments * > selected_tables_fragments_
#define CHECK_EQ(x, y)
Definition: Logger.h:195
const std::vector< InputDescriptor > input_descs
std::map< size_t, size_t > available_gpu_mem_bytes_

Member Function Documentation

◆ assignFragsToKernelDispatch()

template<typename DISPATCH_FCN >
void QueryFragmentDescriptor::assignFragsToKernelDispatch ( DISPATCH_FCN  f,
const RelAlgExecutionUnit ra_exe_unit 
) const
inline

Definition at line 87 of file QueryFragmentDescriptor.h.

References CHECK_LT.

Referenced by Executor::dispatchFragments().

88  {
89  for (const auto& kv : kernels_per_device_) {
90  for (const auto& kernel_id : kv.second) {
91  CHECK_LT(kernel_id, fragments_per_kernel_.size());
92 
93  const auto frag_list = fragments_per_kernel_[kernel_id];
94  f(kv.first, frag_list, rowid_lookup_key_);
95 
96  if (terminateDispatchMaybe(ra_exe_unit, kernel_id)) {
97  return;
98  }
99  }
100  }
101  }
std::map< int, std::set< size_t > > kernels_per_device_
bool terminateDispatchMaybe(const RelAlgExecutionUnit &ra_exe_unit, const size_t kernel_id) const
std::vector< FragmentsList > fragments_per_kernel_
#define CHECK_LT(x, y)
Definition: Logger.h:197
+ Here is the caller graph for this function:

◆ assignFragsToMultiDispatch()

template<typename DISPATCH_FCN >
void QueryFragmentDescriptor::assignFragsToMultiDispatch ( DISPATCH_FCN  f) const
inline

Definition at line 76 of file QueryFragmentDescriptor.h.

References CHECK_EQ, and CHECK_LT.

Referenced by Executor::dispatchFragments().

76  {
77  for (const auto& kv : kernels_per_device_) {
78  CHECK_EQ(kv.second.size(), size_t(1));
79  const auto kernel_id = *kv.second.begin();
80  CHECK_LT(kernel_id, fragments_per_kernel_.size());
81 
82  f(kv.first, fragments_per_kernel_[kernel_id], rowid_lookup_key_);
83  }
84  }
#define CHECK_EQ(x, y)
Definition: Logger.h:195
std::map< int, std::set< size_t > > kernels_per_device_
std::vector< FragmentsList > fragments_per_kernel_
#define CHECK_LT(x, y)
Definition: Logger.h:197
+ Here is the caller graph for this function:

◆ buildFragmentKernelMap()

void QueryFragmentDescriptor::buildFragmentKernelMap ( const RelAlgExecutionUnit ra_exe_unit,
const std::vector< uint64_t > &  frag_offsets,
const int  device_count,
const ExecutorDeviceType device_type,
const bool  enable_multifrag_kernels,
const bool  enable_inner_join_fragment_skipping,
Executor executor 
)

Definition at line 59 of file QueryFragmentDescriptor.cpp.

References buildFragmentPerKernelMap(), and buildMultifragKernelMap().

Referenced by Executor::dispatchFragments().

66  {
67  if (enable_multifrag_kernels) {
68  buildMultifragKernelMap(ra_exe_unit,
69  frag_offsets,
70  device_count,
71  device_type,
72  enable_inner_join_fragment_skipping,
73  executor);
74  } else {
76  ra_exe_unit, frag_offsets, device_count, device_type, executor);
77  }
78 }
void buildMultifragKernelMap(const RelAlgExecutionUnit &ra_exe_unit, const std::vector< uint64_t > &frag_offsets, const int device_count, const ExecutorDeviceType &device_type, const bool enable_inner_join_fragment_skipping, Executor *executor)
void buildFragmentPerKernelMap(const RelAlgExecutionUnit &ra_exe_unit, const std::vector< uint64_t > &frag_offsets, const int device_count, const ExecutorDeviceType &device_type, Executor *executor)
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ buildFragmentPerKernelMap()

void QueryFragmentDescriptor::buildFragmentPerKernelMap ( const RelAlgExecutionUnit ra_exe_unit,
const std::vector< uint64_t > &  frag_offsets,
const int  device_count,
const ExecutorDeviceType device_type,
Executor executor 
)
protected

Definition at line 80 of file QueryFragmentDescriptor.cpp.

References CHECK, CHECK_GT, CHECK_LE, checkDeviceMemoryUsage(), CPU, Data_Namespace::CPU_LEVEL, fragments_per_kernel_, GPU, Data_Namespace::GPU_LEVEL, RelAlgExecutionUnit::input_descs, kernels_per_device_, outer_fragment_tuple_sizes_, outer_fragments_size_, rowid_lookup_key_, selected_tables_fragments_, and RelAlgExecutionUnit::simple_quals.

Referenced by buildFragmentKernelMap().

85  {
86  const auto& outer_table_desc = ra_exe_unit.input_descs.front();
87  const int outer_table_id = outer_table_desc.getTableId();
88  auto it = selected_tables_fragments_.find(outer_table_id);
89  CHECK(it != selected_tables_fragments_.end());
90  const auto outer_fragments = it->second;
91  outer_fragments_size_ = outer_fragments->size();
92 
93  const auto num_bytes_for_row = executor->getNumBytesForFetchedRow();
94 
95  for (size_t i = 0; i < outer_fragments->size(); ++i) {
96  const auto& fragment = (*outer_fragments)[i];
97  const auto skip_frag = executor->skipFragment(
98  outer_table_desc, fragment, ra_exe_unit.simple_quals, frag_offsets, i);
99  if (skip_frag.first) {
100  continue;
101  }
102  // NOTE: Using kernel index instead of frag index now
103  outer_fragment_tuple_sizes_.push_back(fragment.getNumTuples());
104  rowid_lookup_key_ = std::max(rowid_lookup_key_, skip_frag.second);
105  const int chosen_device_count =
106  device_type == ExecutorDeviceType::CPU ? 1 : device_count;
107  CHECK_GT(chosen_device_count, 0);
108  const auto memory_level = device_type == ExecutorDeviceType::GPU
111  int device_id = (device_type == ExecutorDeviceType::CPU || fragment.shard == -1)
112  ? fragment.deviceIds[static_cast<int>(memory_level)]
113  : fragment.shard % chosen_device_count;
114 
115  if (device_type == ExecutorDeviceType::GPU) {
116  checkDeviceMemoryUsage(fragment, device_id, num_bytes_for_row);
117  }
118  // Since we may have skipped fragments, the fragments_per_kernel_ vector may be
119  // smaller than the outer_fragments size
120  CHECK_LE(fragments_per_kernel_.size(), i);
121  fragments_per_kernel_.emplace_back(FragmentsList{});
122  const auto kernel_id = fragments_per_kernel_.size() - 1;
123  CHECK(kernels_per_device_[device_id].insert(kernel_id).second);
124 
125  for (size_t j = 0; j < ra_exe_unit.input_descs.size(); ++j) {
126  const auto frag_ids =
127  executor->getTableFragmentIndices(ra_exe_unit,
128  device_type,
129  j,
130  i,
132  executor->getInnerTabIdToJoinCond());
133  const auto table_id = ra_exe_unit.input_descs[j].getTableId();
134  auto table_frags_it = selected_tables_fragments_.find(table_id);
135  CHECK(table_frags_it != selected_tables_fragments_.end());
136 
137  fragments_per_kernel_[kernel_id].emplace_back(
138  FragmentsPerTable{table_id, frag_ids});
139  }
140  }
141 }
std::map< int, const TableFragments * > selected_tables_fragments_
std::vector< size_t > outer_fragment_tuple_sizes_
const std::vector< InputDescriptor > input_descs
#define CHECK_GT(x, y)
Definition: Logger.h:199
std::vector< FragmentsPerTable > FragmentsList
std::map< int, std::set< size_t > > kernels_per_device_
std::vector< FragmentsList > fragments_per_kernel_
void checkDeviceMemoryUsage(const Fragmenter_Namespace::FragmentInfo &fragment, const int device_id, const size_t num_cols)
#define CHECK_LE(x, y)
Definition: Logger.h:198
#define CHECK(condition)
Definition: Logger.h:187
std::list< std::shared_ptr< Analyzer::Expr > > simple_quals
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ buildMultifragKernelMap()

void QueryFragmentDescriptor::buildMultifragKernelMap ( const RelAlgExecutionUnit ra_exe_unit,
const std::vector< uint64_t > &  frag_offsets,
const int  device_count,
const ExecutorDeviceType device_type,
const bool  enable_inner_join_fragment_skipping,
Executor executor 
)
protected

Definition at line 143 of file QueryFragmentDescriptor.cpp.

References CHECK, CHECK_EQ, CHECK_LT, checkDeviceMemoryUsage(), fragments_per_kernel_, GPU, Data_Namespace::GPU_LEVEL, RelAlgExecutionUnit::input_descs, kernels_per_device_, outer_fragments_size_, rowid_lookup_key_, selected_tables_fragments_, and RelAlgExecutionUnit::simple_quals.

Referenced by buildFragmentKernelMap().

149  {
150  // Allocate all the fragments of the tables involved in the query to available
151  // devices. The basic idea: the device is decided by the outer table in the
152  // query (the first table in a join) and we need to broadcast the fragments
153  // in the inner table to each device. Sharding will change this model.
154  const auto& outer_table_desc = ra_exe_unit.input_descs.front();
155  const int outer_table_id = outer_table_desc.getTableId();
156  auto it = selected_tables_fragments_.find(outer_table_id);
157  CHECK(it != selected_tables_fragments_.end());
158  const auto outer_fragments = it->second;
159  outer_fragments_size_ = outer_fragments->size();
160 
161  const auto inner_table_id_to_join_condition = executor->getInnerTabIdToJoinCond();
162  const auto num_bytes_for_row = executor->getNumBytesForFetchedRow();
163 
164  for (size_t outer_frag_id = 0; outer_frag_id < outer_fragments->size();
165  ++outer_frag_id) {
166  const auto& fragment = (*outer_fragments)[outer_frag_id];
167  auto skip_frag = executor->skipFragment(outer_table_desc,
168  fragment,
169  ra_exe_unit.simple_quals,
170  frag_offsets,
171  outer_frag_id);
172  if (enable_inner_join_fragment_skipping &&
173  (skip_frag == std::pair<bool, int64_t>(false, -1))) {
174  skip_frag = executor->skipFragmentInnerJoins(
175  outer_table_desc, ra_exe_unit, fragment, frag_offsets, outer_frag_id);
176  }
177  if (skip_frag.first) {
178  continue;
179  }
180  const int device_id =
181  fragment.shard == -1
182  ? fragment.deviceIds[static_cast<int>(Data_Namespace::GPU_LEVEL)]
183  : fragment.shard % device_count;
184  if (device_type == ExecutorDeviceType::GPU) {
185  checkDeviceMemoryUsage(fragment, device_id, num_bytes_for_row);
186  }
187  for (size_t j = 0; j < ra_exe_unit.input_descs.size(); ++j) {
188  const auto table_id = ra_exe_unit.input_descs[j].getTableId();
189  auto table_frags_it = selected_tables_fragments_.find(table_id);
190  CHECK(table_frags_it != selected_tables_fragments_.end());
191  const auto frag_ids =
192  executor->getTableFragmentIndices(ra_exe_unit,
193  device_type,
194  j,
195  outer_frag_id,
197  inner_table_id_to_join_condition);
198 
199  if (kernels_per_device_.find(device_id) == kernels_per_device_.end()) {
200  fragments_per_kernel_.emplace_back(FragmentsList{});
201  kernels_per_device_.insert(std::make_pair(
202  device_id, std::set<size_t>({fragments_per_kernel_.size() - 1})));
203  }
204  const auto kernel_id = *kernels_per_device_[device_id].begin();
205  CHECK_LT(kernel_id, fragments_per_kernel_.size());
206  if (fragments_per_kernel_[kernel_id].size() < j + 1) {
207  fragments_per_kernel_[kernel_id].emplace_back(
208  FragmentsPerTable{table_id, frag_ids});
209  } else {
210  CHECK_EQ(fragments_per_kernel_[kernel_id][j].table_id, table_id);
211  auto& curr_frag_ids = fragments_per_kernel_[kernel_id][j].fragment_ids;
212  for (const int frag_id : frag_ids) {
213  if (std::find(curr_frag_ids.begin(), curr_frag_ids.end(), frag_id) ==
214  curr_frag_ids.end()) {
215  curr_frag_ids.push_back(frag_id);
216  }
217  }
218  }
219  }
220  rowid_lookup_key_ = std::max(rowid_lookup_key_, skip_frag.second);
221  }
222 }
std::map< int, const TableFragments * > selected_tables_fragments_
#define CHECK_EQ(x, y)
Definition: Logger.h:195
const std::vector< InputDescriptor > input_descs
std::vector< FragmentsPerTable > FragmentsList
std::map< int, std::set< size_t > > kernels_per_device_
std::vector< FragmentsList > fragments_per_kernel_
void checkDeviceMemoryUsage(const Fragmenter_Namespace::FragmentInfo &fragment, const int device_id, const size_t num_cols)
#define CHECK_LT(x, y)
Definition: Logger.h:197
#define CHECK(condition)
Definition: Logger.h:187
std::list< std::shared_ptr< Analyzer::Expr > > simple_quals
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ checkDeviceMemoryUsage()

void QueryFragmentDescriptor::checkDeviceMemoryUsage ( const Fragmenter_Namespace::FragmentInfo fragment,
const int  device_id,
const size_t  num_cols 
)
protected

Definition at line 252 of file QueryFragmentDescriptor.cpp.

References available_gpu_mem_bytes_, CHECK_GE, g_cluster, Fragmenter_Namespace::FragmentInfo::getNumTuples(), gpu_input_mem_limit_percent_, LOG, tuple_count_per_device_, and logger::WARNING.

Referenced by buildFragmentPerKernelMap(), and buildMultifragKernelMap().

255  {
256  if (g_cluster) {
257  // Disabled in distributed mode for now
258  return;
259  }
260  CHECK_GE(device_id, 0);
261  tuple_count_per_device_[device_id] += fragment.getNumTuples();
262  const size_t gpu_bytes_limit =
264  if (tuple_count_per_device_[device_id] * num_bytes_for_row > gpu_bytes_limit) {
265  LOG(WARNING) << "Not enough memory on device " << device_id
266  << " for input chunks totaling "
267  << tuple_count_per_device_[device_id] * num_bytes_for_row
268  << " bytes (available device memory: " << gpu_bytes_limit << " bytes)";
269  throw QueryMustRunOnCpu();
270  }
271 }
std::map< size_t, size_t > tuple_count_per_device_
#define LOG(tag)
Definition: Logger.h:182
#define CHECK_GE(x, y)
Definition: Logger.h:200
bool g_cluster
std::map< size_t, size_t > available_gpu_mem_bytes_
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ computeAllTablesFragments()

void QueryFragmentDescriptor::computeAllTablesFragments ( std::map< int, const TableFragments *> &  all_tables_fragments,
const RelAlgExecutionUnit ra_exe_unit,
const std::vector< InputTableInfo > &  query_infos 
)
static

Definition at line 45 of file QueryFragmentDescriptor.cpp.

References CHECK_EQ, and RelAlgExecutionUnit::input_descs.

Referenced by Executor::ExecutionDispatch::runImpl().

48  {
49  for (size_t tab_idx = 0; tab_idx < ra_exe_unit.input_descs.size(); ++tab_idx) {
50  int table_id = ra_exe_unit.input_descs[tab_idx].getTableId();
51  CHECK_EQ(query_infos[tab_idx].table_id, table_id);
52  const auto& fragments = query_infos[tab_idx].info.fragments;
53  if (!all_tables_fragments.count(table_id)) {
54  all_tables_fragments.insert(std::make_pair(table_id, &fragments));
55  }
56  }
57 }
#define CHECK_EQ(x, y)
Definition: Logger.h:195
const std::vector< InputDescriptor > input_descs
+ Here is the caller graph for this function:

◆ getOuterFragmentTupleSize()

const size_t QueryFragmentDescriptor::getOuterFragmentTupleSize ( const size_t  frag_index) const
inlineprotected

Definition at line 134 of file QueryFragmentDescriptor.h.

Referenced by terminateDispatchMaybe().

134  {
135  if (frag_index < outer_fragment_tuple_sizes_.size()) {
136  return outer_fragment_tuple_sizes_[frag_index];
137  } else {
138  return 0;
139  }
140  }
std::vector< size_t > outer_fragment_tuple_sizes_
+ Here is the caller graph for this function:

◆ shouldCheckWorkUnitWatchdog()

bool QueryFragmentDescriptor::shouldCheckWorkUnitWatchdog ( ) const
inline

Definition at line 103 of file QueryFragmentDescriptor.h.

Referenced by Executor::dispatchFragments().

103  {
104  return rowid_lookup_key_ < 0 && fragments_per_kernel_.size() > 0;
105  }
std::vector< FragmentsList > fragments_per_kernel_
+ Here is the caller graph for this function:

◆ terminateDispatchMaybe()

bool QueryFragmentDescriptor::terminateDispatchMaybe ( const RelAlgExecutionUnit ra_exe_unit,
const size_t  kernel_id 
) const
protected

Definition at line 240 of file QueryFragmentDescriptor.cpp.

References getOuterFragmentTupleSize(), anonymous_namespace{QueryFragmentDescriptor.cpp}::is_sample_query(), SortInfo::limit, SortInfo::offset, and RelAlgExecutionUnit::sort_info.

242  {
243  const auto sample_query_limit =
244  ra_exe_unit.sort_info.limit + ra_exe_unit.sort_info.offset;
245  if (is_sample_query(ra_exe_unit) && sample_query_limit > 0 &&
246  getOuterFragmentTupleSize(kernel_id) >= sample_query_limit) {
247  return true;
248  }
249  return false;
250 }
const size_t limit
const SortInfo sort_info
bool is_sample_query(const RelAlgExecutionUnit &ra_exe_unit)
const size_t getOuterFragmentTupleSize(const size_t frag_index) const
const size_t offset
+ Here is the call graph for this function:

Member Data Documentation

◆ available_gpu_mem_bytes_

std::map<size_t, size_t> QueryFragmentDescriptor::available_gpu_mem_bytes_
protected

Definition at line 119 of file QueryFragmentDescriptor.h.

Referenced by checkDeviceMemoryUsage(), and QueryFragmentDescriptor().

◆ fragments_per_kernel_

std::vector<FragmentsList> QueryFragmentDescriptor::fragments_per_kernel_
protected

◆ gpu_input_mem_limit_percent_

double QueryFragmentDescriptor::gpu_input_mem_limit_percent_
protected

Definition at line 117 of file QueryFragmentDescriptor.h.

Referenced by checkDeviceMemoryUsage().

◆ kernels_per_device_

std::map<int, std::set<size_t> > QueryFragmentDescriptor::kernels_per_device_
protected

◆ outer_fragment_tuple_sizes_

std::vector<size_t> QueryFragmentDescriptor::outer_fragment_tuple_sizes_
protected

Definition at line 115 of file QueryFragmentDescriptor.h.

Referenced by buildFragmentPerKernelMap().

◆ outer_fragments_size_

size_t QueryFragmentDescriptor::outer_fragments_size_ = 0
protected

◆ rowid_lookup_key_

int64_t QueryFragmentDescriptor::rowid_lookup_key_ = -1
protected

◆ selected_tables_fragments_

std::map<int, const TableFragments*> QueryFragmentDescriptor::selected_tables_fragments_
protected

◆ tuple_count_per_device_

std::map<size_t, size_t> QueryFragmentDescriptor::tuple_count_per_device_
protected

Definition at line 118 of file QueryFragmentDescriptor.h.

Referenced by checkDeviceMemoryUsage().


The documentation for this class was generated from the following files: