OmniSciDB  6686921089
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
QueryMemoryDescriptor.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2018 MapD Technologies, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "QueryMemoryDescriptor.h"
18 
19 #include "../Execute.h"
20 #include "../ExpressionRewrite.h"
21 #include "../GroupByAndAggregate.h"
22 #include "../StreamingTopN.h"
23 #include "../UsedColumnsVisitor.h"
24 #include "ColSlotContext.h"
25 
26 #include <boost/algorithm/cxx11/any_of.hpp>
27 
29 extern bool g_enable_columnar_output;
30 
31 namespace {
32 
33 bool is_int_and_no_bigger_than(const SQLTypeInfo& ti, const size_t byte_width) {
34  if (!ti.is_integer()) {
35  return false;
36  }
37  return get_bit_width(ti) <= (byte_width * 8);
38 }
39 
41  return range.getIntMin() > INT32_MIN && range.getIntMax() < EMPTY_KEY_32 - 1;
42 }
43 
44 std::vector<int64_t> target_expr_group_by_indices(
45  const std::list<std::shared_ptr<Analyzer::Expr>>& groupby_exprs,
46  const std::vector<Analyzer::Expr*>& target_exprs) {
47  std::vector<int64_t> indices(target_exprs.size(), -1);
48  for (size_t target_idx = 0; target_idx < target_exprs.size(); ++target_idx) {
49  const auto target_expr = target_exprs[target_idx];
50  if (dynamic_cast<const Analyzer::AggExpr*>(target_expr)) {
51  continue;
52  }
53  const auto var_expr = dynamic_cast<const Analyzer::Var*>(target_expr);
54  if (var_expr && var_expr->get_which_row() == Analyzer::Var::kGROUPBY) {
55  indices[target_idx] = var_expr->get_varno() - 1;
56  continue;
57  }
58  }
59  return indices;
60 }
61 
62 std::vector<int64_t> target_expr_proj_indices(const RelAlgExecutionUnit& ra_exe_unit,
64  if (ra_exe_unit.input_descs.size() > 1 ||
65  !ra_exe_unit.sort_info.order_entries.empty()) {
66  return {};
67  }
68  std::vector<int64_t> target_indices(ra_exe_unit.target_exprs.size(), -1);
69  UsedColumnsVisitor columns_visitor;
70  std::unordered_set<int> used_columns;
71  for (const auto& simple_qual : ra_exe_unit.simple_quals) {
72  const auto crt_used_columns = columns_visitor.visit(simple_qual.get());
73  used_columns.insert(crt_used_columns.begin(), crt_used_columns.end());
74  }
75  for (const auto& qual : ra_exe_unit.quals) {
76  const auto crt_used_columns = columns_visitor.visit(qual.get());
77  used_columns.insert(crt_used_columns.begin(), crt_used_columns.end());
78  }
79  for (const auto& target : ra_exe_unit.target_exprs) {
80  const auto col_var = dynamic_cast<const Analyzer::ColumnVar*>(target);
81  if (col_var) {
82  const auto cd = get_column_descriptor_maybe(
83  col_var->get_column_id(), col_var->get_table_id(), cat);
84  if (!cd || !cd->isVirtualCol) {
85  continue;
86  }
87  }
88  const auto crt_used_columns = columns_visitor.visit(target);
89  used_columns.insert(crt_used_columns.begin(), crt_used_columns.end());
90  }
91  for (size_t target_idx = 0; target_idx < ra_exe_unit.target_exprs.size();
92  ++target_idx) {
93  const auto target_expr = ra_exe_unit.target_exprs[target_idx];
94  CHECK(target_expr);
95  const auto& ti = target_expr->get_type_info();
96  // TODO: add proper lazy fetch for varlen types in result set
97  if (ti.is_varlen()) {
98  continue;
99  }
100  const auto col_var = dynamic_cast<const Analyzer::ColumnVar*>(target_expr);
101  if (!col_var) {
102  continue;
103  }
104  if (!ti.is_varlen() &&
105  used_columns.find(col_var->get_column_id()) == used_columns.end()) {
106  // setting target index to be zero so that later it can be decoded properly (in lazy
107  // fetch, the zeroth target index indicates the corresponding rowid column for the
108  // projected entry)
109  target_indices[target_idx] = 0;
110  }
111  }
112  return target_indices;
113 }
114 
116  const size_t group_col_width) {
117  if (range.getType() == ExpressionRangeType::Invalid) {
118  return sizeof(int64_t);
119  }
120  switch (range.getType()) {
122  if (group_col_width == sizeof(int64_t) && range.hasNulls()) {
123  return sizeof(int64_t);
124  }
125  return is_valid_int32_range(range) ? sizeof(int32_t) : sizeof(int64_t);
128  return sizeof(int64_t); // No compaction for floating point yet.
129  default:
130  UNREACHABLE();
131  }
132  return sizeof(int64_t);
133 }
134 
135 // TODO(miyu): make sure following setting of compact width is correct in all cases.
137  const std::vector<InputTableInfo>& query_infos,
138  const Executor* executor) {
139  int8_t compact_width{4};
140  for (const auto& groupby_expr : ra_exe_unit.groupby_exprs) {
141  const auto expr_range = getExpressionRange(groupby_expr.get(), query_infos, executor);
142  compact_width = std::max(compact_width,
144  expr_range, groupby_expr->get_type_info().get_size()));
145  }
146  return compact_width;
147 }
148 
149 bool use_streaming_top_n(const RelAlgExecutionUnit& ra_exe_unit,
150  const bool output_columnar) {
151  if (g_cluster) {
152  return false; // TODO(miyu)
153  }
154 
155  for (const auto target_expr : ra_exe_unit.target_exprs) {
156  if (dynamic_cast<const Analyzer::AggExpr*>(target_expr)) {
157  return false;
158  }
159  if (dynamic_cast<const Analyzer::WindowFunction*>(target_expr)) {
160  return false;
161  }
162  }
163 
164  // TODO: Allow streaming top n for columnar output
165  if (!output_columnar && ra_exe_unit.sort_info.order_entries.size() == 1 &&
166  ra_exe_unit.sort_info.limit &&
168  const auto only_order_entry = ra_exe_unit.sort_info.order_entries.front();
169  CHECK_GT(only_order_entry.tle_no, int(0));
170  CHECK_LE(static_cast<size_t>(only_order_entry.tle_no),
171  ra_exe_unit.target_exprs.size());
172  const auto order_entry_expr = ra_exe_unit.target_exprs[only_order_entry.tle_no - 1];
173  const auto n = ra_exe_unit.sort_info.offset + ra_exe_unit.sort_info.limit;
174  if ((order_entry_expr->get_type_info().is_number() ||
175  order_entry_expr->get_type_info().is_time()) &&
176  n <= 100000) { // TODO(miyu): relax?
177  return true;
178  }
179  }
180 
181  return false;
182 }
183 
184 template <class T>
185 inline std::vector<int8_t> get_col_byte_widths(const T& col_expr_list) {
186  std::vector<int8_t> col_widths;
187  size_t col_expr_idx = 0;
188  for (const auto& col_expr : col_expr_list) {
189  if (!col_expr) {
190  // row index
191  col_widths.push_back(sizeof(int64_t));
192  } else {
193  bool is_varlen_projection{false};
194  if constexpr (std::is_same<T, std::list<std::shared_ptr<Analyzer::Expr>>>::value) {
196  !(std::dynamic_pointer_cast<const Analyzer::GeoExpr>(col_expr) == nullptr);
197  } else {
199  !(dynamic_cast<const Analyzer::GeoExpr*>(col_expr) == nullptr);
200  }
201 
202  if (is_varlen_projection) {
203  col_widths.push_back(sizeof(int64_t));
204  ++col_expr_idx;
205  continue;
206  }
207  const auto agg_info = get_target_info(col_expr, g_bigint_count);
208  const auto chosen_type = get_compact_type(agg_info);
209  if ((chosen_type.is_string() && chosen_type.get_compression() == kENCODING_NONE) ||
210  chosen_type.is_array()) {
211  col_widths.push_back(sizeof(int64_t));
212  col_widths.push_back(sizeof(int64_t));
213  ++col_expr_idx;
214  continue;
215  }
216  if (chosen_type.is_geometry()) {
217  for (auto i = 0; i < chosen_type.get_physical_coord_cols(); ++i) {
218  col_widths.push_back(sizeof(int64_t));
219  col_widths.push_back(sizeof(int64_t));
220  }
221  ++col_expr_idx;
222  continue;
223  }
224  const auto col_expr_bitwidth = get_bit_width(chosen_type);
225  CHECK_EQ(size_t(0), col_expr_bitwidth % 8);
226  col_widths.push_back(static_cast<int8_t>(col_expr_bitwidth >> 3));
227  // for average, we'll need to keep the count as well
228  if (agg_info.agg_kind == kAVG) {
229  CHECK(agg_info.is_agg);
230  col_widths.push_back(sizeof(int64_t));
231  }
232  }
233  ++col_expr_idx;
234  }
235  return col_widths;
236 }
237 
238 } // namespace
239 
240 std::unique_ptr<QueryMemoryDescriptor> QueryMemoryDescriptor::init(
241  const Executor* executor,
242  const RelAlgExecutionUnit& ra_exe_unit,
243  const std::vector<InputTableInfo>& query_infos,
244  const ColRangeInfo& col_range_info,
245  const KeylessInfo& keyless_info,
246  const bool allow_multifrag,
247  const ExecutorDeviceType device_type,
248  const int8_t crt_min_byte_width,
249  const bool sort_on_gpu_hint,
250  const size_t shard_count,
251  const size_t max_groups_buffer_entry_count,
252  RenderInfo* render_info,
253  const CountDistinctDescriptors count_distinct_descriptors,
254  const bool must_use_baseline_sort,
255  const bool output_columnar_hint,
256  const bool streaming_top_n_hint) {
257  auto group_col_widths = get_col_byte_widths(ra_exe_unit.groupby_exprs);
258  const bool is_group_by{!group_col_widths.empty()};
259 
260  auto col_slot_context = ColSlotContext(ra_exe_unit.target_exprs, {});
261 
262  const auto min_slot_size = QueryMemoryDescriptor::pick_target_compact_width(
263  ra_exe_unit, query_infos, crt_min_byte_width);
264 
265  col_slot_context.setAllSlotsPaddedSize(min_slot_size);
266  col_slot_context.validate();
267 
268  if (!is_group_by) {
269  CHECK(!must_use_baseline_sort);
270 
271  return std::make_unique<QueryMemoryDescriptor>(
272  executor,
273  ra_exe_unit,
274  query_infos,
275  allow_multifrag,
276  false,
277  false,
278  -1,
279  ColRangeInfo{ra_exe_unit.estimator ? QueryDescriptionType::Estimator
281  0,
282  0,
283  0,
284  false},
285  col_slot_context,
286  std::vector<int8_t>{},
287  /*group_col_compact_width=*/0,
288  std::vector<int64_t>{},
289  /*entry_count=*/1,
290  count_distinct_descriptors,
291  false,
292  output_columnar_hint,
293  render_info && render_info->isPotentialInSituRender(),
294  must_use_baseline_sort,
295  /*use_streaming_top_n=*/false);
296  }
297 
298  size_t entry_count = 1;
299  auto actual_col_range_info = col_range_info;
300  bool interleaved_bins_on_gpu = false;
301  bool keyless_hash = false;
302  bool streaming_top_n = false;
303  int8_t group_col_compact_width = 0;
304  int32_t idx_target_as_key = -1;
305  auto output_columnar = output_columnar_hint;
306  std::vector<int64_t> target_groupby_indices;
307 
308  switch (col_range_info.hash_type_) {
310  if (render_info) {
311  render_info->setInSituDataIfUnset(false);
312  }
313  // keyless hash: whether or not group columns are stored at the beginning of the
314  // output buffer
315  keyless_hash =
316  (!sort_on_gpu_hint ||
318  col_range_info.max, col_range_info.min, col_range_info.bucket)) &&
319  !col_range_info.bucket && !must_use_baseline_sort && keyless_info.keyless;
320 
321  // if keyless, then this target index indicates wheter an entry is empty or not
322  // (acts as a key)
323  idx_target_as_key = keyless_info.target_index;
324 
325  if (group_col_widths.size() > 1) {
326  // col range info max contains the expected cardinality of the output
327  entry_count = static_cast<size_t>(actual_col_range_info.max);
328  actual_col_range_info.bucket = 0;
329  } else {
330  // single column perfect hash
331  entry_count = std::max(
332  GroupByAndAggregate::getBucketedCardinality(col_range_info), int64_t(1));
333  const size_t interleaved_max_threshold{512};
334 
335  if (must_use_baseline_sort) {
336  target_groupby_indices = target_expr_group_by_indices(ra_exe_unit.groupby_exprs,
337  ra_exe_unit.target_exprs);
338  col_slot_context =
339  ColSlotContext(ra_exe_unit.target_exprs, target_groupby_indices);
340  }
341 
342  bool has_varlen_sample_agg = false;
343  for (const auto& target_expr : ra_exe_unit.target_exprs) {
344  if (target_expr->get_contains_agg()) {
345  const auto agg_expr = dynamic_cast<Analyzer::AggExpr*>(target_expr);
346  CHECK(agg_expr);
347  if (agg_expr->get_aggtype() == kSAMPLE &&
348  agg_expr->get_type_info().is_varlen()) {
349  has_varlen_sample_agg = true;
350  break;
351  }
352  }
353  }
354 
355  interleaved_bins_on_gpu = keyless_hash && !has_varlen_sample_agg &&
356  (entry_count <= interleaved_max_threshold) &&
357  (device_type == ExecutorDeviceType::GPU) &&
359  count_distinct_descriptors) &&
360  !output_columnar;
361  }
362  break;
363  }
365  if (render_info) {
366  render_info->setInSituDataIfUnset(false);
367  }
368  entry_count = shard_count
369  ? (max_groups_buffer_entry_count + shard_count - 1) / shard_count
370  : max_groups_buffer_entry_count;
371  target_groupby_indices = target_expr_group_by_indices(ra_exe_unit.groupby_exprs,
372  ra_exe_unit.target_exprs);
373  col_slot_context = ColSlotContext(ra_exe_unit.target_exprs, target_groupby_indices);
374 
375  group_col_compact_width =
376  output_columnar ? 8
377  : pick_baseline_key_width(ra_exe_unit, query_infos, executor);
378 
379  actual_col_range_info =
381  break;
382  }
384  CHECK(!must_use_baseline_sort);
385 
386  if (streaming_top_n_hint && use_streaming_top_n(ra_exe_unit, output_columnar)) {
387  streaming_top_n = true;
388  entry_count = ra_exe_unit.sort_info.offset + ra_exe_unit.sort_info.limit;
389  } else {
390  if (ra_exe_unit.use_bump_allocator) {
391  output_columnar = false;
392  entry_count = 0;
393  } else {
394  entry_count = ra_exe_unit.scan_limit
395  ? static_cast<size_t>(ra_exe_unit.scan_limit)
396  : max_groups_buffer_entry_count;
397  }
398  }
399 
400  const auto catalog = executor->getCatalog();
401  CHECK(catalog);
402  target_groupby_indices = executor->plan_state_->allow_lazy_fetch_
403  ? target_expr_proj_indices(ra_exe_unit, *catalog)
404  : std::vector<int64_t>{};
405 
406  col_slot_context = ColSlotContext(ra_exe_unit.target_exprs, target_groupby_indices);
407  break;
408  }
409  default:
410  UNREACHABLE() << "Unknown query type";
411  }
412 
413  return std::make_unique<QueryMemoryDescriptor>(
414  executor,
415  ra_exe_unit,
416  query_infos,
417  allow_multifrag,
418  keyless_hash,
419  interleaved_bins_on_gpu,
420  idx_target_as_key,
421  actual_col_range_info,
422  col_slot_context,
423  group_col_widths,
424  group_col_compact_width,
425  target_groupby_indices,
426  entry_count,
427  count_distinct_descriptors,
428  sort_on_gpu_hint,
429  output_columnar,
430  render_info && render_info->isPotentialInSituRender(),
431  must_use_baseline_sort,
432  streaming_top_n);
433 }
434 
435 namespace {
436 bool anyOf(std::vector<Analyzer::Expr*> const& target_exprs, SQLAgg const agg_kind) {
437  return boost::algorithm::any_of(target_exprs, [agg_kind](Analyzer::Expr const* expr) {
438  auto const* const agg = dynamic_cast<Analyzer::AggExpr const*>(expr);
439  return agg && agg->get_aggtype() == agg_kind;
440  });
441 }
442 } // namespace
443 
445  const Executor* executor,
446  const RelAlgExecutionUnit& ra_exe_unit,
447  const std::vector<InputTableInfo>& query_infos,
448  const bool allow_multifrag,
449  const bool keyless_hash,
450  const bool interleaved_bins_on_gpu,
451  const int32_t idx_target_as_key,
452  const ColRangeInfo& col_range_info,
453  const ColSlotContext& col_slot_context,
454  const std::vector<int8_t>& group_col_widths,
455  const int8_t group_col_compact_width,
456  const std::vector<int64_t>& target_groupby_indices,
457  const size_t entry_count,
458  const CountDistinctDescriptors count_distinct_descriptors,
459  const bool sort_on_gpu_hint,
460  const bool output_columnar_hint,
461  const bool render_output,
462  const bool must_use_baseline_sort,
463  const bool use_streaming_top_n)
464  : executor_(executor)
465  , allow_multifrag_(allow_multifrag)
466  , query_desc_type_(col_range_info.hash_type_)
467  , keyless_hash_(keyless_hash)
468  , interleaved_bins_on_gpu_(interleaved_bins_on_gpu)
469  , idx_target_as_key_(idx_target_as_key)
470  , group_col_widths_(group_col_widths)
471  , group_col_compact_width_(group_col_compact_width)
472  , target_groupby_indices_(target_groupby_indices)
473  , entry_count_(entry_count)
474  , min_val_(col_range_info.min)
475  , max_val_(col_range_info.max)
476  , bucket_(col_range_info.bucket)
477  , has_nulls_(col_range_info.has_nulls)
478  , count_distinct_descriptors_(count_distinct_descriptors)
479  , output_columnar_(false)
480  , render_output_(render_output)
481  , must_use_baseline_sort_(must_use_baseline_sort)
482  , is_table_function_(false)
483  , use_streaming_top_n_(use_streaming_top_n)
484  , force_4byte_float_(false)
485  , col_slot_context_(col_slot_context) {
488 
489  sort_on_gpu_ = sort_on_gpu_hint && canOutputColumnar() && !keyless_hash_;
490 
491  if (sort_on_gpu_) {
492  CHECK(!ra_exe_unit.use_bump_allocator);
493  output_columnar_ = true;
494  } else {
495  switch (query_desc_type_) {
497  output_columnar_ = output_columnar_hint;
498  break;
500  output_columnar_ = output_columnar_hint &&
503  !anyOf(ra_exe_unit.target_exprs, kAPPROX_QUANTILE);
504  break;
506  output_columnar_ = output_columnar_hint;
507  break;
509  output_columnar_ = output_columnar_hint &&
512  !anyOf(ra_exe_unit.target_exprs, kAPPROX_QUANTILE);
513  break;
514  default:
515  output_columnar_ = false;
516  break;
517  }
518  }
519 
521  // TODO(adb): Ensure fixed size buffer allocations are correct with all logical column
522  // sizes
523  CHECK(!ra_exe_unit.use_bump_allocator);
526  }
527 
528 #ifdef HAVE_CUDA
529  // Check Streaming Top N heap usage, bail if > max slab size, CUDA ONLY
530  if (use_streaming_top_n_ && executor->getDataMgr()->gpusPresent()) {
531  const auto thread_count = executor->blockSize() * executor->gridSize();
532  const auto total_buff_size =
534  if (total_buff_size > executor_->maxGpuSlabSize()) {
535  throw StreamingTopNOOM(total_buff_size);
536  }
537  }
538 #endif
539 }
540 
542  : executor_(nullptr)
543  , allow_multifrag_(false)
544  , query_desc_type_(QueryDescriptionType::Projection)
545  , keyless_hash_(false)
546  , interleaved_bins_on_gpu_(false)
547  , idx_target_as_key_(0)
548  , group_col_compact_width_(0)
549  , entry_count_(0)
550  , min_val_(0)
551  , max_val_(0)
552  , bucket_(0)
553  , has_nulls_(false)
554  , sort_on_gpu_(false)
555  , output_columnar_(false)
556  , render_output_(false)
557  , must_use_baseline_sort_(false)
558  , is_table_function_(false)
559  , use_streaming_top_n_(false)
560  , force_4byte_float_(false) {}
561 
563  const size_t entry_count,
564  const QueryDescriptionType query_desc_type,
565  const bool is_table_function)
566  : executor_(executor)
567  , allow_multifrag_(false)
568  , query_desc_type_(query_desc_type)
569  , keyless_hash_(false)
570  , interleaved_bins_on_gpu_(false)
571  , idx_target_as_key_(0)
572  , group_col_compact_width_(0)
573  , entry_count_(entry_count)
574  , min_val_(0)
575  , max_val_(0)
576  , bucket_(0)
577  , has_nulls_(false)
578  , sort_on_gpu_(false)
579  , output_columnar_(false)
580  , render_output_(false)
581  , must_use_baseline_sort_(false)
582  , is_table_function_(is_table_function)
583  , use_streaming_top_n_(false)
584  , force_4byte_float_(false) {}
585 
587  const int64_t min_val,
588  const int64_t max_val,
589  const bool has_nulls,
590  const std::vector<int8_t>& group_col_widths)
591  : executor_(nullptr)
592  , allow_multifrag_(false)
593  , query_desc_type_(query_desc_type)
594  , keyless_hash_(false)
595  , interleaved_bins_on_gpu_(false)
596  , idx_target_as_key_(0)
597  , group_col_widths_(group_col_widths)
598  , group_col_compact_width_(0)
599  , entry_count_(0)
600  , min_val_(min_val)
601  , max_val_(max_val)
602  , bucket_(0)
603  , has_nulls_(false)
604  , sort_on_gpu_(false)
605  , output_columnar_(false)
606  , render_output_(false)
607  , must_use_baseline_sort_(false)
608  , is_table_function_(false)
609  , use_streaming_top_n_(false)
610  , force_4byte_float_(false) {}
611 
613  // Note that this method does not check ptr reference members (e.g. executor_) or
614  // entry_count_
615  if (query_desc_type_ != other.query_desc_type_) {
616  return false;
617  }
618  if (keyless_hash_ != other.keyless_hash_) {
619  return false;
620  }
622  return false;
623  }
624  if (idx_target_as_key_ != other.idx_target_as_key_) {
625  return false;
626  }
627  if (force_4byte_float_ != other.force_4byte_float_) {
628  return false;
629  }
630  if (group_col_widths_ != other.group_col_widths_) {
631  return false;
632  }
634  return false;
635  }
637  return false;
638  }
639  if (min_val_ != other.min_val_) {
640  return false;
641  }
642  if (max_val_ != other.max_val_) {
643  return false;
644  }
645  if (bucket_ != other.bucket_) {
646  return false;
647  }
648  if (has_nulls_ != other.has_nulls_) {
649  return false;
650  }
652  return false;
653  } else {
654  // Count distinct descriptors can legitimately differ in device only.
655  for (size_t i = 0; i < count_distinct_descriptors_.size(); ++i) {
656  auto ref_count_distinct_desc = other.count_distinct_descriptors_[i];
657  auto count_distinct_desc = count_distinct_descriptors_[i];
658  count_distinct_desc.device_type = ref_count_distinct_desc.device_type;
659  if (ref_count_distinct_desc != count_distinct_desc) {
660  return false;
661  }
662  }
663  }
664  if (sort_on_gpu_ != other.sort_on_gpu_) {
665  return false;
666  }
667  if (output_columnar_ != other.output_columnar_) {
668  return false;
669  }
670  if (col_slot_context_ != other.col_slot_context_) {
671  return false;
672  }
673  return true;
674 }
675 
676 std::unique_ptr<QueryExecutionContext> QueryMemoryDescriptor::getQueryExecutionContext(
677  const RelAlgExecutionUnit& ra_exe_unit,
678  const Executor* executor,
679  const ExecutorDeviceType device_type,
680  const ExecutorDispatchMode dispatch_mode,
681  const int device_id,
682  const int64_t num_rows,
683  const std::vector<std::vector<const int8_t*>>& col_buffers,
684  const std::vector<std::vector<uint64_t>>& frag_offsets,
685  std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner,
686  const bool output_columnar,
687  const bool sort_on_gpu,
688  const size_t thread_idx,
689  RenderInfo* render_info) const {
690  auto timer = DEBUG_TIMER(__func__);
691  if (frag_offsets.empty()) {
692  return nullptr;
693  }
694  return std::unique_ptr<QueryExecutionContext>(
695  new QueryExecutionContext(ra_exe_unit,
696  *this,
697  executor,
698  device_type,
699  dispatch_mode,
700  device_id,
701  num_rows,
702  col_buffers,
703  frag_offsets,
704  row_set_mem_owner,
705  output_columnar,
706  sort_on_gpu,
707  thread_idx,
708  render_info));
709 }
710 
712  const RelAlgExecutionUnit& ra_exe_unit,
713  const std::vector<InputTableInfo>& query_infos,
714  const int8_t crt_min_byte_width) {
715  if (g_bigint_count) {
716  return sizeof(int64_t);
717  }
718  int8_t compact_width{0};
719  auto col_it = ra_exe_unit.input_col_descs.begin();
720  int unnest_array_col_id{std::numeric_limits<int>::min()};
721  for (const auto& groupby_expr : ra_exe_unit.groupby_exprs) {
722  const auto uoper = dynamic_cast<Analyzer::UOper*>(groupby_expr.get());
723  if (uoper && uoper->get_optype() == kUNNEST) {
724  const auto& arg_ti = uoper->get_operand()->get_type_info();
725  CHECK(arg_ti.is_array());
726  const auto& elem_ti = arg_ti.get_elem_type();
727  if (elem_ti.is_string() && elem_ti.get_compression() == kENCODING_DICT) {
728  unnest_array_col_id = (*col_it)->getColId();
729  } else {
730  compact_width = crt_min_byte_width;
731  break;
732  }
733  }
734  ++col_it;
735  }
736  if (!compact_width &&
737  (ra_exe_unit.groupby_exprs.size() != 1 || !ra_exe_unit.groupby_exprs.front())) {
738  compact_width = crt_min_byte_width;
739  }
740  if (!compact_width) {
741  col_it = ra_exe_unit.input_col_descs.begin();
742  std::advance(col_it, ra_exe_unit.groupby_exprs.size());
743  for (const auto target : ra_exe_unit.target_exprs) {
744  const auto& ti = target->get_type_info();
745  const auto agg = dynamic_cast<const Analyzer::AggExpr*>(target);
746  if (agg && agg->get_arg()) {
747  compact_width = crt_min_byte_width;
748  break;
749  }
750 
751  if (agg) {
752  CHECK_EQ(kCOUNT, agg->get_aggtype());
753  CHECK(!agg->get_is_distinct());
754  ++col_it;
755  continue;
756  }
757 
758  if (is_int_and_no_bigger_than(ti, 4) ||
759  (ti.is_string() && ti.get_compression() == kENCODING_DICT)) {
760  ++col_it;
761  continue;
762  }
763 
764  const auto uoper = dynamic_cast<Analyzer::UOper*>(target);
765  if (uoper && uoper->get_optype() == kUNNEST &&
766  (*col_it)->getColId() == unnest_array_col_id) {
767  const auto arg_ti = uoper->get_operand()->get_type_info();
768  CHECK(arg_ti.is_array());
769  const auto& elem_ti = arg_ti.get_elem_type();
770  if (elem_ti.is_string() && elem_ti.get_compression() == kENCODING_DICT) {
771  ++col_it;
772  continue;
773  }
774  }
775 
776  compact_width = crt_min_byte_width;
777  break;
778  }
779  }
780  if (!compact_width) {
781  size_t total_tuples{0};
782  for (const auto& qi : query_infos) {
783  total_tuples += qi.info.getNumTuples();
784  }
785  return total_tuples <= static_cast<size_t>(std::numeric_limits<uint32_t>::max()) ||
786  unnest_array_col_id != std::numeric_limits<int>::min()
787  ? 4
788  : crt_min_byte_width;
789  } else {
790  // TODO(miyu): relax this condition to allow more cases just w/o padding
791  for (auto wid : get_col_byte_widths(ra_exe_unit.target_exprs)) {
792  compact_width = std::max(compact_width, wid);
793  }
794  return compact_width;
795  }
796 }
797 
800 }
801 
804  size_t total_bytes{0};
805  if (keyless_hash_) {
806  // ignore, there's no group column in the output buffer
808  } else {
809  total_bytes += group_col_widths_.size() * getEffectiveKeyWidth();
810  total_bytes = align_to_int64(total_bytes);
811  }
812  total_bytes += getColsSize();
813  return align_to_int64(total_bytes);
814 }
815 
817  return (interleaved_bins_on_gpu_ ? executor_->warpSize() : 1);
818 }
819 
822 }
823 
832 }
833 
839  const size_t num_entries_per_column) const {
840  return col_slot_context_.getTotalBytesOfColumnarBuffers(num_entries_per_column);
841 }
842 
853  const size_t projection_count) const {
854  constexpr size_t row_index_width = sizeof(int64_t);
855  return getTotalBytesOfColumnarBuffers(projection_count) +
856  row_index_width * projection_count;
857 }
858 
859 size_t QueryMemoryDescriptor::getColOnlyOffInBytes(const size_t col_idx) const {
860  return col_slot_context_.getColOnlyOffInBytes(col_idx);
861 }
862 
863 /*
864  * Returns the memory offset in bytes for a specific agg column in the output
865  * memory buffer. Depending on the query type, there may be some extra portion
866  * of memory prepended at the beginning of the buffer. A brief description of
867  * the memory layout is as follows:
868  * 1. projections: index column (64bit) + all target columns
869  * 2. group by: all group columns (64-bit each) + all agg columns
870  * 2a. if keyless, there is no prepending group column stored at the beginning
871  */
872 size_t QueryMemoryDescriptor::getColOffInBytes(const size_t col_idx) const {
873  const auto warp_count = getWarpCount();
874  if (output_columnar_) {
875  CHECK_EQ(size_t(1), warp_count);
876  size_t offset{0};
877  if (!keyless_hash_) {
879  }
880  for (size_t index = 0; index < col_idx; ++index) {
882  }
883  return offset;
884  }
885 
886  size_t offset{0};
887  if (keyless_hash_) {
888  // ignore, there's no group column in the output buffer
890  } else {
891  offset += group_col_widths_.size() * getEffectiveKeyWidth();
892  offset = align_to_int64(offset);
893  }
894  offset += getColOnlyOffInBytes(col_idx);
895  return offset;
896 }
897 
898 /*
899  * Returns the memory offset for a particular group column in the prepended group
900  * columns portion of the memory.
901  */
903  const size_t group_idx) const {
905  CHECK(group_idx < getGroupbyColCount());
906  size_t offset{0};
907  for (size_t col_idx = 0; col_idx < group_idx; col_idx++) {
908  // TODO(Saman): relax that int64_bit part immediately
909  offset += align_to_int64(
910  std::max(groupColWidth(col_idx), static_cast<int8_t>(sizeof(int64_t))) *
911  getEntryCount());
912  }
913  return offset;
914 }
915 
916 /*
917  * Returns total amount of memory prepended at the beginning of the output memory
918  * buffer.
919  */
922  size_t buffer_size{0};
923  for (size_t group_idx = 0; group_idx < getGroupbyColCount(); group_idx++) {
924  buffer_size += align_to_int64(
925  std::max(groupColWidth(group_idx), static_cast<int8_t>(sizeof(int64_t))) *
926  getEntryCount());
927  }
928  return buffer_size;
929 }
930 
931 size_t QueryMemoryDescriptor::getColOffInBytesInNextBin(const size_t col_idx) const {
932  auto warp_count = getWarpCount();
933  if (output_columnar_) {
934  CHECK_EQ(size_t(1), group_col_widths_.size());
935  CHECK_EQ(size_t(1), warp_count);
936  return getPaddedSlotWidthBytes(col_idx);
937  }
938 
939  return warp_count * getRowSize();
940 }
941 
942 size_t QueryMemoryDescriptor::getNextColOffInBytes(const int8_t* col_ptr,
943  const size_t bin,
944  const size_t col_idx) const {
946  size_t offset{0};
947  auto warp_count = getWarpCount();
948  const auto chosen_bytes = getPaddedSlotWidthBytes(col_idx);
949  const auto total_slot_count = getSlotCount();
950  if (col_idx + 1 == total_slot_count) {
951  if (output_columnar_) {
952  return (entry_count_ - bin) * chosen_bytes;
953  } else {
954  return static_cast<size_t>(align_to_int64(col_ptr + chosen_bytes) - col_ptr);
955  }
956  }
957 
958  const auto next_chosen_bytes = getPaddedSlotWidthBytes(col_idx + 1);
959  if (output_columnar_) {
960  CHECK_EQ(size_t(1), group_col_widths_.size());
961  CHECK_EQ(size_t(1), warp_count);
962 
963  offset = align_to_int64(entry_count_ * chosen_bytes);
964 
965  offset += bin * (next_chosen_bytes - chosen_bytes);
966  return offset;
967  }
968 
969  if (next_chosen_bytes == sizeof(int64_t)) {
970  return static_cast<size_t>(align_to_int64(col_ptr + chosen_bytes) - col_ptr);
971  } else {
972  return chosen_bytes;
973  }
974 }
975 
977  const size_t col_idx) const {
978  const auto chosen_bytes = getPaddedSlotWidthBytes(col_idx);
979  const auto total_slot_count = getSlotCount();
980  if (col_idx + 1 == total_slot_count) {
981  return static_cast<size_t>(align_to_int64(col_ptr + chosen_bytes) - col_ptr);
982  }
983 
984  const auto next_chosen_bytes = getPaddedSlotWidthBytes(col_idx + 1);
985 
986  if (next_chosen_bytes == sizeof(int64_t)) {
987  return static_cast<size_t>(align_to_int64(col_ptr + chosen_bytes) - col_ptr);
988  } else {
989  return chosen_bytes;
990  }
991 }
992 
994  const RelAlgExecutionUnit& ra_exe_unit,
995  const unsigned thread_count,
996  const ExecutorDeviceType device_type) const {
997  if (use_streaming_top_n_) {
998  const size_t n = ra_exe_unit.sort_info.offset + ra_exe_unit.sort_info.limit;
999  return streaming_top_n::get_heap_size(getRowSize(), n, thread_count);
1000  }
1001  return getBufferSizeBytes(device_type, entry_count_);
1002 }
1003 
1016  const size_t entry_count) const {
1017  if (keyless_hash_ && !output_columnar_) {
1018  CHECK_GE(group_col_widths_.size(), size_t(1));
1019  auto row_bytes = align_to_int64(getColsSize());
1020 
1021  return (interleavedBins(device_type) ? executor_->warpSize() : 1) * entry_count *
1022  row_bytes;
1023  }
1024 
1025  constexpr size_t row_index_width = sizeof(int64_t);
1026  size_t total_bytes{0};
1027  if (output_columnar_) {
1029  ? row_index_width * entry_count
1030  : sizeof(int64_t) * group_col_widths_.size() * entry_count) +
1032  } else {
1033  total_bytes = getRowSize() * entry_count;
1034  }
1035 
1036  return total_bytes;
1037 }
1038 
1040  const ExecutorDeviceType device_type) const {
1041  return getBufferSizeBytes(device_type, entry_count_);
1042 }
1043 
1045  output_columnar_ = val;
1048  }
1049 }
1050 
1051 /*
1052  * Indicates the query types that are currently allowed to use the logical
1053  * sized columns instead of padded sized ones.
1054  */
1056  // In distributed mode, result sets are serialized using rowwise iterators, so we use
1057  // consistent slot widths for now
1058  return output_columnar_ && !g_cluster &&
1060 }
1061 
1063  size_t total_slot_count = col_slot_context_.getSlotCount();
1064 
1065  if (target_groupby_indices_.empty()) {
1066  return total_slot_count;
1067  }
1068  return total_slot_count - std::count_if(target_groupby_indices_.begin(),
1070  [](const int64_t i) { return i >= 0; });
1071 }
1072 
1075  getGroupbyColCount() == 1);
1076 }
1077 
1080 }
1081 
1083  if (g_cluster || is_table_function_) {
1084  return true;
1085  }
1087  return true;
1088  }
1089  if (executor_->isCPUOnly() || render_output_ ||
1093  getGroupbyColCount() > 1)) {
1094  return true;
1095  }
1098 }
1099 
1101  return device_type == ExecutorDeviceType::GPU && !render_output_ &&
1103 }
1104 
1106  return interleaved_bins_on_gpu_ && device_type == ExecutorDeviceType::GPU;
1107 }
1108 
1109 // TODO(Saman): an implementation detail, so move this out of QMD
1111  const ExecutorDeviceType device_type) const {
1112  if (device_type == ExecutorDeviceType::GPU) {
1113  return executor_->cudaMgr()->isArchVoltaOrGreaterForAll();
1114  }
1115  return false;
1116 }
1117 
1119  return col_slot_context_.getColCount();
1120 }
1121 
1124 }
1125 
1126 const int8_t QueryMemoryDescriptor::getPaddedSlotWidthBytes(const size_t slot_idx) const {
1127  return col_slot_context_.getSlotInfo(slot_idx).padded_size;
1128 }
1129 
1131  const size_t slot_idx) const {
1132  return col_slot_context_.getSlotInfo(slot_idx).logical_size;
1133 }
1134 
1136  const size_t col_idx) const {
1137  const auto& col_slots = col_slot_context_.getSlotsForCol(col_idx);
1138  CHECK_EQ(col_slots.size(), size_t(1));
1139  return col_slots.front();
1140 }
1141 
1142 void QueryMemoryDescriptor::useConsistentSlotWidthSize(const int8_t slot_width_size) {
1143  col_slot_context_.setAllSlotsSize(slot_width_size);
1144 }
1145 
1147  // Note: Actual row size may include padding (see ResultSetBufferAccessors.h)
1149 }
1150 
1152  const int8_t actual_min_byte_width) const {
1153  return col_slot_context_.getMinPaddedByteSize(actual_min_byte_width);
1154 }
1155 
1157  const std::vector<std::tuple<int8_t, int8_t>>& slots_for_col) {
1158  col_slot_context_.addColumn(slots_for_col);
1159 }
1160 
1163 }
1164 
1167 }
1168 
1173 }
1174 
1176  switch (query_desc_type_) {
1178  return "Perfect Hash";
1180  return "Baseline Hash";
1182  return "Projection";
1184  return "Non-grouped Aggregate";
1186  return "Estimator";
1187  default:
1188  UNREACHABLE();
1189  }
1190  return "";
1191 }
1192 
1193 std::string QueryMemoryDescriptor::toString() const {
1194  auto str = reductionKey();
1195  str += "\tAllow Multifrag: " + ::toString(allow_multifrag_) + "\n";
1196  str += "\tInterleaved Bins on GPU: " + ::toString(interleaved_bins_on_gpu_) + "\n";
1197  str += "\tBlocks Share Memory: " + ::toString(blocksShareMemory()) + "\n";
1198  str += "\tThreads Share Memory: " + ::toString(threadsShareMemory()) + "\n";
1199  str += "\tUses Fast Group Values: " + ::toString(usesGetGroupValueFast()) + "\n";
1200  str +=
1201  "\tLazy Init Groups (GPU): " + ::toString(lazyInitGroups(ExecutorDeviceType::GPU)) +
1202  "\n";
1203  str += "\tEntry Count: " + std::to_string(entry_count_) + "\n";
1204  str += "\tMin Val (perfect hash only): " + std::to_string(min_val_) + "\n";
1205  str += "\tMax Val (perfect hash only): " + std::to_string(max_val_) + "\n";
1206  str += "\tBucket Val (perfect hash only): " + std::to_string(bucket_) + "\n";
1207  str += "\tSort on GPU: " + ::toString(sort_on_gpu_) + "\n";
1208  str += "\tUse Streaming Top N: " + ::toString(use_streaming_top_n_) + "\n";
1209  str += "\tOutput Columnar: " + ::toString(output_columnar_) + "\n";
1210  str += "\tRender Output: " + ::toString(render_output_) + "\n";
1211  str += "\tUse Baseline Sort: " + ::toString(must_use_baseline_sort_) + "\n";
1212  str += "\tIs Table Function: " + ::toString(is_table_function_) + "\n";
1213  return str;
1214 }
1215 
1217  std::string str;
1218  str += "Query Memory Descriptor State\n";
1219  str += "\tQuery Type: " + queryDescTypeToString() + "\n";
1220  str +=
1221  "\tKeyless Hash: " + ::toString(keyless_hash_) +
1222  (keyless_hash_ ? ", target index for key: " + std::to_string(getTargetIdxForKey())
1223  : "") +
1224  "\n";
1225  str += "\tEffective key width: " + std::to_string(getEffectiveKeyWidth()) + "\n";
1226  str += "\tNumber of group columns: " + std::to_string(getGroupbyColCount()) + "\n";
1227  const auto group_indices_size = targetGroupbyIndicesSize();
1228  if (group_indices_size) {
1229  std::vector<std::string> group_indices_strings;
1230  for (size_t target_idx = 0; target_idx < group_indices_size; ++target_idx) {
1231  group_indices_strings.push_back(std::to_string(getTargetGroupbyIndex(target_idx)));
1232  }
1233  str += "\tTarget group by indices: " +
1234  boost::algorithm::join(group_indices_strings, ",") + "\n";
1235  }
1236  str += "\t" + col_slot_context_.toString();
1237  return str;
1238 }
1239 
1240 std::vector<TargetInfo> target_exprs_to_infos(
1241  const std::vector<Analyzer::Expr*>& targets,
1243  std::vector<TargetInfo> target_infos;
1244  for (const auto target_expr : targets) {
1245  auto target = get_target_info(target_expr, g_bigint_count);
1246  if (query_mem_desc.getQueryDescriptionType() ==
1248  set_notnull(target, false);
1249  target.sql_type.set_notnull(false);
1250  }
1251  target_infos.push_back(target);
1252  }
1253  return target_infos;
1254 }
1255 
1257  int64_t buffer_element_size{0};
1258  for (size_t i = 0; i < col_slot_context_.getSlotCount(); i++) {
1259  try {
1260  const auto slot_element_size = col_slot_context_.varlenOutputElementSize(i);
1261  if (slot_element_size < 0) {
1262  return std::nullopt;
1263  }
1264  buffer_element_size += slot_element_size;
1265  } catch (...) {
1266  continue;
1267  }
1268  }
1269  return buffer_element_size;
1270 }
1271 
1272 size_t QueryMemoryDescriptor::varlenOutputRowSizeToSlot(const size_t slot_idx) const {
1273  int64_t buffer_element_size{0};
1275  for (size_t i = 0; i < slot_idx; i++) {
1276  try {
1277  const auto slot_element_size = col_slot_context_.varlenOutputElementSize(i);
1278  if (slot_element_size < 0) {
1279  continue;
1280  }
1281  buffer_element_size += slot_element_size;
1282  } catch (...) {
1283  continue;
1284  }
1285  }
1286  return buffer_element_size;
1287 }
size_t varlenOutputRowSizeToSlot(const size_t slot_idx) const
int8_t getMinPaddedByteSize(const int8_t actual_min_byte_width) const
std::vector< Analyzer::Expr * > target_exprs
static bool many_entries(const int64_t max_val, const int64_t min_val, const int64_t bucket)
int64_t getIntMin() const
SQLAgg
Definition: sqldefs.h:71
#define CHECK_EQ(x, y)
Definition: Logger.h:217
size_t getBufferSizeBytes(const RelAlgExecutionUnit &ra_exe_unit, const unsigned thread_count, const ExecutorDeviceType device_type) const
bool g_enable_smem_group_by
static int64_t getBucketedCardinality(const ColRangeInfo &col_range_info)
std::vector< int64_t > target_expr_proj_indices(const RelAlgExecutionUnit &ra_exe_unit, const Catalog_Namespace::Catalog &cat)
void alignPaddedSlots(const bool sort_on_gpu)
std::string cat(Ts &&...args)
int8_t logical_size
class for a per-database catalog. also includes metadata for the current database and the current use...
Definition: Catalog.h:111
size_t getTotalBytesOfColumnarProjections(const size_t projection_count) const
int64_t getTargetGroupbyIndex(const size_t target_idx) const
ExecutorDeviceType
std::string toString() const
bool isLogicalSizedColumnsAllowed() const
#define const
TargetInfo get_target_info(const PointerType target_expr, const bool bigint_count)
Definition: TargetInfo.h:92
std::vector< int8_t > get_col_byte_widths(const T &col_expr_list)
int8_t pick_baseline_key_component_width(const ExpressionRange &range, const size_t group_col_width)
const std::list< Analyzer::OrderEntry > order_entries
std::string join(T const &container, std::string const &delim)
std::vector< InputDescriptor > input_descs
static std::unique_ptr< QueryMemoryDescriptor > init(const Executor *executor, const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, const ColRangeInfo &col_range_info, const KeylessInfo &keyless_info, const bool allow_multifrag, const ExecutorDeviceType device_type, const int8_t crt_min_byte_width, const bool sort_on_gpu_hint, const size_t shard_count, const size_t max_groups_buffer_entry_count, RenderInfo *render_info, const CountDistinctDescriptors count_distinct_descriptors, const bool must_use_baseline_sort, const bool output_columnar_hint, const bool streaming_top_n_hint)
#define UNREACHABLE()
Definition: Logger.h:253
void setOutputColumnar(const bool val)
const SortAlgorithm algorithm
#define CHECK_GE(x, y)
Definition: Logger.h:222
size_t getAllSlotsPaddedSize() const
size_t getAllSlotsAlignedPaddedSize() const
size_t getNextColOffInBytes(const int8_t *col_ptr, const size_t bin, const size_t col_idx) const
size_t getEffectiveKeyWidth() const
bool use_streaming_top_n(const RelAlgExecutionUnit &ra_exe_unit, const bool output_columnar)
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
T visit(const Analyzer::Expr *expr) const
#define CHECK_GT(x, y)
Definition: Logger.h:221
void setAllSlotsSize(const int8_t slot_width_size)
std::string to_string(char const *&&v)
void useConsistentSlotWidthSize(const int8_t slot_width_size)
const SlotSize & getSlotInfo(const size_t slot_idx) const
size_t getColOnlyOffInBytes(const size_t col_idx) const
ExecutorDispatchMode
size_t getColOnlyOffInBytes(const size_t slot_idx) const
const SQLTypeInfo get_compact_type(const TargetInfo &target)
bool is_varlen_projection(const Analyzer::Expr *target_expr, const SQLTypeInfo &ti)
const size_t limit
bool g_enable_columnar_output
Definition: Execute.cpp:95
int8_t groupColWidth(const size_t key_idx) const
size_t get_bit_width(const SQLTypeInfo &ti)
#define INT32_MIN
const ColumnDescriptor * get_column_descriptor_maybe(const int col_id, const int table_id, const Catalog_Namespace::Catalog &cat)
Definition: Execute.h:218
std::vector< CountDistinctDescriptor > CountDistinctDescriptors
Definition: CountDistinct.h:35
size_t getCompactByteWidth() const
Provides column info and slot info for the output buffer and some metadata helpers.
size_t getGroupbyColCount() const
bool is_integer() const
Definition: sqltypes.h:511
bool lazyInitGroups(const ExecutorDeviceType) const
size_t targetGroupbyIndicesSize() const
size_t getPrependedGroupBufferSizeInBytes() const
std::unique_ptr< QueryExecutionContext > getQueryExecutionContext(const RelAlgExecutionUnit &, const Executor *executor, const ExecutorDeviceType device_type, const ExecutorDispatchMode dispatch_mode, const int device_id, const int64_t num_rows, const std::vector< std::vector< const int8_t * >> &col_buffers, const std::vector< std::vector< uint64_t >> &frag_offsets, std::shared_ptr< RowSetMemoryOwner >, const bool output_columnar, const bool sort_on_gpu, const size_t thread_idx, RenderInfo *) const
size_t getTotalBytesOfColumnarBuffers() const
std::vector< int64_t > target_groupby_indices_
static int8_t pick_target_compact_width(const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, const int8_t crt_min_byte_width)
bool g_bigint_count
CountDistinctDescriptors count_distinct_descriptors_
bool is_valid_int32_range(const ExpressionRange &range)
void validate() const
int get_varno() const
Definition: Analyzer.h:274
const int8_t getPaddedSlotWidthBytes(const size_t slot_idx) const
ExpressionRange getExpressionRange(const Analyzer::BinOper *expr, const std::vector< InputTableInfo > &query_infos, const Executor *, boost::optional< std::list< std::shared_ptr< Analyzer::Expr >>> simple_quals)
bool hasNulls() const
int64_t varlenOutputElementSize(const size_t slot_idx) const
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:77
QueryDescriptionType getQueryDescriptionType() const
std::vector< int64_t > target_expr_group_by_indices(const std::list< std::shared_ptr< Analyzer::Expr >> &groupby_exprs, const std::vector< Analyzer::Expr * > &target_exprs)
bool anyOf(std::vector< Analyzer::Expr * > const &target_exprs, SQLAgg const agg_kind)
std::optional< size_t > varlenOutputBufferElemSize() const
void addColumn(const std::vector< std::tuple< int8_t, int8_t >> &slots_for_col)
#define CHECK_LT(x, y)
Definition: Logger.h:219
#define CHECK_LE(x, y)
Definition: Logger.h:220
size_t getNextColOffInBytesRowOnly(const int8_t *col_ptr, const size_t col_idx) const
const Expr * get_operand() const
Definition: Analyzer.h:370
QueryDescriptionType query_desc_type_
int8_t padded_size
Definition: sqldefs.h:76
int8_t updateActualMinByteWidth(const int8_t actual_min_byte_width) const
size_t getTotalBytesOfColumnarBuffers(const size_t entry_count) const
bool operator==(const QueryMemoryDescriptor &other) const
Descriptor for the result set buffer layout.
bool is_int_and_no_bigger_than(const SQLTypeInfo &ti, const size_t byte_width)
std::list< std::shared_ptr< Analyzer::Expr > > quals
ExpressionRangeType getType() const
size_t get_heap_size(const size_t row_size, const size_t n, const size_t thread_count)
int64_t getIntMax() const
bool isWarpSyncRequired(const ExecutorDeviceType) const
std::string toString() const
size_t getSlotCount() const
void setAllSlotsPaddedSizeToLogicalSize()
bool interleavedBins(const ExecutorDeviceType) const
bool g_enable_watchdog false
Definition: Execute.cpp:76
#define CHECK(condition)
Definition: Logger.h:209
#define DEBUG_TIMER(name)
Definition: Logger.h:352
size_t getColCount() const
std::vector< int8_t > group_col_widths_
#define EMPTY_KEY_32
QueryDescriptionType
Definition: Types.h:26
bool g_cluster
std::vector< TargetInfo > target_exprs_to_infos(const std::vector< Analyzer::Expr * > &targets, const QueryMemoryDescriptor &query_mem_desc)
const std::vector< size_t > & getSlotsForCol(const size_t col_idx) const
std::string queryDescTypeToString() const
void sort_on_gpu(int64_t *val_buff, int32_t *idx_buff, const uint64_t entry_count, const bool desc, const uint32_t chosen_bytes, ThrustAllocator &alloc)
std::list< std::shared_ptr< const InputColDescriptor > > input_col_descs
constexpr double n
Definition: Utm.h:46
void addColSlotInfo(const std::vector< std::tuple< int8_t, int8_t >> &slots_for_col)
const size_t offset
static bool countDescriptorsLogicallyEmpty(const CountDistinctDescriptors &count_distinct_descriptors)
void setAllUnsetSlotsPaddedSize(const int8_t padded_size)
const int8_t getSlotIndexForSingleSlotCol(const size_t col_idx) const
const int8_t getLogicalSlotWidthBytes(const size_t slot_idx) const
Definition: sqldefs.h:72
size_t getColOffInBytes(const size_t col_idx) const
size_t getColOffInBytesInNextBin(const size_t col_idx) const
if(yyssp >=yyss+yystacksize-1)
FORCE_INLINE HOST DEVICE T align_to_int64(T addr)
int8_t pick_baseline_key_width(const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, const Executor *executor)
std::string reductionKey() const
std::list< std::shared_ptr< Analyzer::Expr > > simple_quals
void set_notnull(TargetInfo &target, const bool not_null)
int32_t getTargetIdxForKey() const
size_t getPrependedGroupColOffInBytes(const size_t group_idx) const