OmniSciDB  c0231cc57d
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
QueryMemoryDescriptor.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "QueryMemoryDescriptor.h"
18 
19 #include "../Execute.h"
20 #include "../ExpressionRewrite.h"
21 #include "../GroupByAndAggregate.h"
22 #include "../StreamingTopN.h"
23 #include "../UsedColumnsVisitor.h"
24 #include "ColSlotContext.h"
25 
26 #include <boost/algorithm/cxx11/any_of.hpp>
27 
29 extern bool g_enable_columnar_output;
30 extern size_t g_streaming_topn_max;
31 
32 namespace {
33 
34 bool is_int_and_no_bigger_than(const SQLTypeInfo& ti, const size_t byte_width) {
35  if (!ti.is_integer()) {
36  return false;
37  }
38  return get_bit_width(ti) <= (byte_width * 8);
39 }
40 
42  return range.getIntMin() > INT32_MIN && range.getIntMax() < EMPTY_KEY_32 - 1;
43 }
44 
45 std::vector<int64_t> target_expr_group_by_indices(
46  const std::list<std::shared_ptr<Analyzer::Expr>>& groupby_exprs,
47  const std::vector<Analyzer::Expr*>& target_exprs) {
48  std::vector<int64_t> indices(target_exprs.size(), -1);
49  for (size_t target_idx = 0; target_idx < target_exprs.size(); ++target_idx) {
50  const auto target_expr = target_exprs[target_idx];
51  if (dynamic_cast<const Analyzer::AggExpr*>(target_expr)) {
52  continue;
53  }
54  const auto var_expr = dynamic_cast<const Analyzer::Var*>(target_expr);
55  if (var_expr && var_expr->get_which_row() == Analyzer::Var::kGROUPBY) {
56  indices[target_idx] = var_expr->get_varno() - 1;
57  continue;
58  }
59  }
60  return indices;
61 }
62 
63 std::vector<int64_t> target_expr_proj_indices(const RelAlgExecutionUnit& ra_exe_unit,
65  if (ra_exe_unit.input_descs.size() > 1 ||
66  !ra_exe_unit.sort_info.order_entries.empty()) {
67  return {};
68  }
69  std::vector<int64_t> target_indices(ra_exe_unit.target_exprs.size(), -1);
70  UsedColumnsVisitor columns_visitor;
71  std::unordered_set<int> used_columns;
72  for (const auto& simple_qual : ra_exe_unit.simple_quals) {
73  const auto crt_used_columns = columns_visitor.visit(simple_qual.get());
74  used_columns.insert(crt_used_columns.begin(), crt_used_columns.end());
75  }
76  for (const auto& qual : ra_exe_unit.quals) {
77  const auto crt_used_columns = columns_visitor.visit(qual.get());
78  used_columns.insert(crt_used_columns.begin(), crt_used_columns.end());
79  }
80  for (const auto& target : ra_exe_unit.target_exprs) {
81  const auto col_var = dynamic_cast<const Analyzer::ColumnVar*>(target);
82  if (col_var) {
83  const auto cd = get_column_descriptor_maybe(
84  col_var->get_column_id(), col_var->get_table_id(), cat);
85  if (!cd || !cd->isVirtualCol) {
86  continue;
87  }
88  }
89  const auto crt_used_columns = columns_visitor.visit(target);
90  used_columns.insert(crt_used_columns.begin(), crt_used_columns.end());
91  }
92  for (size_t target_idx = 0; target_idx < ra_exe_unit.target_exprs.size();
93  ++target_idx) {
94  const auto target_expr = ra_exe_unit.target_exprs[target_idx];
95  CHECK(target_expr);
96  const auto& ti = target_expr->get_type_info();
97  // TODO: add proper lazy fetch for varlen types in result set
98  if (ti.is_varlen()) {
99  continue;
100  }
101  const auto col_var = dynamic_cast<const Analyzer::ColumnVar*>(target_expr);
102  if (!col_var) {
103  continue;
104  }
105  if (!ti.is_varlen() &&
106  used_columns.find(col_var->get_column_id()) == used_columns.end()) {
107  // setting target index to be zero so that later it can be decoded properly (in lazy
108  // fetch, the zeroth target index indicates the corresponding rowid column for the
109  // projected entry)
110  target_indices[target_idx] = 0;
111  }
112  }
113  return target_indices;
114 }
115 
117  const size_t group_col_width) {
118  if (range.getType() == ExpressionRangeType::Invalid) {
119  return sizeof(int64_t);
120  }
121  switch (range.getType()) {
123  if (group_col_width == sizeof(int64_t) && range.hasNulls()) {
124  return sizeof(int64_t);
125  }
126  return is_valid_int32_range(range) ? sizeof(int32_t) : sizeof(int64_t);
129  return sizeof(int64_t); // No compaction for floating point yet.
130  default:
131  UNREACHABLE();
132  }
133  return sizeof(int64_t);
134 }
135 
136 // TODO(miyu): make sure following setting of compact width is correct in all cases.
138  const std::vector<InputTableInfo>& query_infos,
139  const Executor* executor) {
140  int8_t compact_width{4};
141  for (const auto& groupby_expr : ra_exe_unit.groupby_exprs) {
142  const auto expr_range = getExpressionRange(groupby_expr.get(), query_infos, executor);
143  compact_width = std::max(compact_width,
145  expr_range, groupby_expr->get_type_info().get_size()));
146  }
147  return compact_width;
148 }
149 
150 bool use_streaming_top_n(const RelAlgExecutionUnit& ra_exe_unit,
151  const bool output_columnar) {
152  if (g_cluster) {
153  return false; // TODO(miyu)
154  }
155 
156  for (const auto target_expr : ra_exe_unit.target_exprs) {
157  if (dynamic_cast<const Analyzer::AggExpr*>(target_expr)) {
158  return false;
159  }
160  if (dynamic_cast<const Analyzer::WindowFunction*>(target_expr)) {
161  return false;
162  }
163  }
164 
165  // TODO: Allow streaming top n for columnar output
166  if (!output_columnar && ra_exe_unit.sort_info.order_entries.size() == 1 &&
167  ra_exe_unit.sort_info.limit &&
169  const auto only_order_entry = ra_exe_unit.sort_info.order_entries.front();
170  CHECK_GT(only_order_entry.tle_no, int(0));
171  CHECK_LE(static_cast<size_t>(only_order_entry.tle_no),
172  ra_exe_unit.target_exprs.size());
173  const auto order_entry_expr = ra_exe_unit.target_exprs[only_order_entry.tle_no - 1];
174  const auto n = ra_exe_unit.sort_info.offset + ra_exe_unit.sort_info.limit;
175  if ((order_entry_expr->get_type_info().is_number() ||
176  order_entry_expr->get_type_info().is_time()) &&
177  n <= g_streaming_topn_max) {
178  return true;
179  }
180  }
181 
182  return false;
183 }
184 
185 template <class T>
186 inline std::vector<int8_t> get_col_byte_widths(const T& col_expr_list) {
187  std::vector<int8_t> col_widths;
188  size_t col_expr_idx = 0;
189  for (const auto& col_expr : col_expr_list) {
190  if (!col_expr) {
191  // row index
192  col_widths.push_back(sizeof(int64_t));
193  } else {
194  bool is_varlen_projection{false};
195  if constexpr (std::is_same<T, std::list<std::shared_ptr<Analyzer::Expr>>>::value) {
197  !(std::dynamic_pointer_cast<const Analyzer::GeoExpr>(col_expr) == nullptr);
198  } else {
200  !(dynamic_cast<const Analyzer::GeoExpr*>(col_expr) == nullptr);
201  }
202 
203  if (is_varlen_projection) {
204  col_widths.push_back(sizeof(int64_t));
205  ++col_expr_idx;
206  continue;
207  }
208  const auto agg_info = get_target_info(col_expr, g_bigint_count);
209  const auto chosen_type = get_compact_type(agg_info);
210  if ((chosen_type.is_string() && chosen_type.get_compression() == kENCODING_NONE) ||
211  chosen_type.is_array()) {
212  col_widths.push_back(sizeof(int64_t));
213  col_widths.push_back(sizeof(int64_t));
214  ++col_expr_idx;
215  continue;
216  }
217  if (chosen_type.is_geometry()) {
218  for (auto i = 0; i < chosen_type.get_physical_coord_cols(); ++i) {
219  col_widths.push_back(sizeof(int64_t));
220  col_widths.push_back(sizeof(int64_t));
221  }
222  ++col_expr_idx;
223  continue;
224  }
225  const auto col_expr_bitwidth = get_bit_width(chosen_type);
226  CHECK_EQ(size_t(0), col_expr_bitwidth % 8);
227  col_widths.push_back(static_cast<int8_t>(col_expr_bitwidth >> 3));
228  // for average, we'll need to keep the count as well
229  if (agg_info.agg_kind == kAVG) {
230  CHECK(agg_info.is_agg);
231  col_widths.push_back(sizeof(int64_t));
232  }
233  }
234  ++col_expr_idx;
235  }
236  return col_widths;
237 }
238 
239 } // namespace
240 
241 std::unique_ptr<QueryMemoryDescriptor> QueryMemoryDescriptor::init(
242  const Executor* executor,
243  const RelAlgExecutionUnit& ra_exe_unit,
244  const std::vector<InputTableInfo>& query_infos,
245  const ColRangeInfo& col_range_info,
246  const KeylessInfo& keyless_info,
247  const bool allow_multifrag,
248  const ExecutorDeviceType device_type,
249  const int8_t crt_min_byte_width,
250  const bool sort_on_gpu_hint,
251  const size_t shard_count,
252  const size_t max_groups_buffer_entry_count,
253  RenderInfo* render_info,
254  const CountDistinctDescriptors count_distinct_descriptors,
255  const bool must_use_baseline_sort,
256  const bool output_columnar_hint,
257  const bool streaming_top_n_hint) {
258  auto group_col_widths = get_col_byte_widths(ra_exe_unit.groupby_exprs);
259  const bool is_group_by{!group_col_widths.empty()};
260 
261  auto col_slot_context = ColSlotContext(ra_exe_unit.target_exprs, {});
262 
263  const auto min_slot_size = QueryMemoryDescriptor::pick_target_compact_width(
264  ra_exe_unit, query_infos, crt_min_byte_width);
265 
266  col_slot_context.setAllSlotsPaddedSize(min_slot_size);
267  col_slot_context.validate();
268 
269  if (!is_group_by) {
270  CHECK(!must_use_baseline_sort);
271 
272  return std::make_unique<QueryMemoryDescriptor>(
273  executor,
274  ra_exe_unit,
275  query_infos,
276  allow_multifrag,
277  false,
278  false,
279  -1,
280  ColRangeInfo{ra_exe_unit.estimator ? QueryDescriptionType::Estimator
282  0,
283  0,
284  0,
285  false},
286  col_slot_context,
287  std::vector<int8_t>{},
288  /*group_col_compact_width=*/0,
289  std::vector<int64_t>{},
290  /*entry_count=*/1,
291  count_distinct_descriptors,
292  false,
293  output_columnar_hint,
294  render_info && render_info->isInSitu(),
295  must_use_baseline_sort,
296  /*use_streaming_top_n=*/false);
297  }
298 
299  size_t entry_count = 1;
300  auto actual_col_range_info = col_range_info;
301  bool interleaved_bins_on_gpu = false;
302  bool keyless_hash = false;
303  bool streaming_top_n = false;
304  int8_t group_col_compact_width = 0;
305  int32_t idx_target_as_key = -1;
306  auto output_columnar = output_columnar_hint;
307  std::vector<int64_t> target_groupby_indices;
308 
309  switch (col_range_info.hash_type_) {
311  if (render_info) {
312  // TODO(croot): this can be removed now thanks to the more centralized
313  // NonInsituQueryClassifier code, but keeping it just in case
314  render_info->setNonInSitu();
315  }
316  // keyless hash: whether or not group columns are stored at the beginning of the
317  // output buffer
318  keyless_hash =
319  (!sort_on_gpu_hint ||
321  col_range_info.max, col_range_info.min, col_range_info.bucket)) &&
322  !col_range_info.bucket && !must_use_baseline_sort && keyless_info.keyless;
323 
324  // if keyless, then this target index indicates wheter an entry is empty or not
325  // (acts as a key)
326  idx_target_as_key = keyless_info.target_index;
327 
328  if (group_col_widths.size() > 1) {
329  // col range info max contains the expected cardinality of the output
330  entry_count = static_cast<size_t>(actual_col_range_info.max);
331  actual_col_range_info.bucket = 0;
332  } else {
333  // single column perfect hash
334  entry_count = std::max(
335  GroupByAndAggregate::getBucketedCardinality(col_range_info), int64_t(1));
336  const size_t interleaved_max_threshold{512};
337 
338  if (must_use_baseline_sort) {
339  target_groupby_indices = target_expr_group_by_indices(ra_exe_unit.groupby_exprs,
340  ra_exe_unit.target_exprs);
341  col_slot_context =
342  ColSlotContext(ra_exe_unit.target_exprs, target_groupby_indices);
343  }
344 
345  bool has_varlen_sample_agg = false;
346  for (const auto& target_expr : ra_exe_unit.target_exprs) {
347  if (target_expr->get_contains_agg()) {
348  const auto agg_expr = dynamic_cast<Analyzer::AggExpr*>(target_expr);
349  CHECK(agg_expr);
350  if (agg_expr->get_aggtype() == kSAMPLE &&
351  agg_expr->get_type_info().is_varlen()) {
352  has_varlen_sample_agg = true;
353  break;
354  }
355  }
356  }
357 
358  interleaved_bins_on_gpu = keyless_hash && !has_varlen_sample_agg &&
359  (entry_count <= interleaved_max_threshold) &&
360  (device_type == ExecutorDeviceType::GPU) &&
362  count_distinct_descriptors) &&
363  !output_columnar;
364  }
365  break;
366  }
368  if (render_info) {
369  // TODO(croot): this can be removed now thanks to the more centralized
370  // NonInsituQueryClassifier code, but keeping it just in case
371  render_info->setNonInSitu();
372  }
373  entry_count = shard_count
374  ? (max_groups_buffer_entry_count + shard_count - 1) / shard_count
375  : max_groups_buffer_entry_count;
376  target_groupby_indices = target_expr_group_by_indices(ra_exe_unit.groupby_exprs,
377  ra_exe_unit.target_exprs);
378  col_slot_context = ColSlotContext(ra_exe_unit.target_exprs, target_groupby_indices);
379 
380  group_col_compact_width =
381  output_columnar ? 8
382  : pick_baseline_key_width(ra_exe_unit, query_infos, executor);
383 
384  actual_col_range_info =
386  break;
387  }
389  CHECK(!must_use_baseline_sort);
390 
391  if (streaming_top_n_hint && use_streaming_top_n(ra_exe_unit, output_columnar)) {
392  streaming_top_n = true;
393  entry_count = ra_exe_unit.sort_info.offset + ra_exe_unit.sort_info.limit;
394  } else {
395  if (ra_exe_unit.use_bump_allocator) {
396  output_columnar = false;
397  entry_count = 0;
398  } else {
399  entry_count = ra_exe_unit.scan_limit
400  ? static_cast<size_t>(ra_exe_unit.scan_limit)
401  : max_groups_buffer_entry_count;
402  }
403  }
404 
405  const auto catalog = executor->getCatalog();
406  CHECK(catalog);
407  target_groupby_indices = executor->plan_state_->allow_lazy_fetch_
408  ? target_expr_proj_indices(ra_exe_unit, *catalog)
409  : std::vector<int64_t>{};
410 
411  col_slot_context = ColSlotContext(ra_exe_unit.target_exprs, target_groupby_indices);
412  break;
413  }
414  default:
415  UNREACHABLE() << "Unknown query type";
416  }
417 
418  return std::make_unique<QueryMemoryDescriptor>(executor,
419  ra_exe_unit,
420  query_infos,
421  allow_multifrag,
422  keyless_hash,
423  interleaved_bins_on_gpu,
424  idx_target_as_key,
425  actual_col_range_info,
426  col_slot_context,
427  group_col_widths,
428  group_col_compact_width,
429  target_groupby_indices,
430  entry_count,
431  count_distinct_descriptors,
432  sort_on_gpu_hint,
433  output_columnar,
434  render_info && render_info->isInSitu(),
435  must_use_baseline_sort,
436  streaming_top_n);
437 }
438 
439 namespace {
440 bool anyOf(std::vector<Analyzer::Expr*> const& target_exprs, SQLAgg const agg_kind) {
441  return boost::algorithm::any_of(target_exprs, [agg_kind](Analyzer::Expr const* expr) {
442  auto const* const agg = dynamic_cast<Analyzer::AggExpr const*>(expr);
443  return agg && agg->get_aggtype() == agg_kind;
444  });
445 }
446 } // namespace
447 
449  const Executor* executor,
450  const RelAlgExecutionUnit& ra_exe_unit,
451  const std::vector<InputTableInfo>& query_infos,
452  const bool allow_multifrag,
453  const bool keyless_hash,
454  const bool interleaved_bins_on_gpu,
455  const int32_t idx_target_as_key,
456  const ColRangeInfo& col_range_info,
457  const ColSlotContext& col_slot_context,
458  const std::vector<int8_t>& group_col_widths,
459  const int8_t group_col_compact_width,
460  const std::vector<int64_t>& target_groupby_indices,
461  const size_t entry_count,
462  const CountDistinctDescriptors count_distinct_descriptors,
463  const bool sort_on_gpu_hint,
464  const bool output_columnar_hint,
465  const bool render_output,
466  const bool must_use_baseline_sort,
467  const bool use_streaming_top_n)
468  : executor_(executor)
469  , allow_multifrag_(allow_multifrag)
470  , query_desc_type_(col_range_info.hash_type_)
471  , keyless_hash_(keyless_hash)
472  , interleaved_bins_on_gpu_(interleaved_bins_on_gpu)
473  , idx_target_as_key_(idx_target_as_key)
474  , group_col_widths_(group_col_widths)
475  , group_col_compact_width_(group_col_compact_width)
476  , target_groupby_indices_(target_groupby_indices)
477  , entry_count_(entry_count)
478  , min_val_(col_range_info.min)
479  , max_val_(col_range_info.max)
480  , bucket_(col_range_info.bucket)
481  , has_nulls_(col_range_info.has_nulls)
482  , count_distinct_descriptors_(count_distinct_descriptors)
483  , output_columnar_(false)
484  , render_output_(render_output)
485  , must_use_baseline_sort_(must_use_baseline_sort)
486  , is_table_function_(false)
487  , use_streaming_top_n_(use_streaming_top_n)
488  , force_4byte_float_(false)
489  , col_slot_context_(col_slot_context) {
493 
494  sort_on_gpu_ = sort_on_gpu_hint && canOutputColumnar() && !keyless_hash_;
495  if (sort_on_gpu_) {
496  CHECK(!ra_exe_unit.use_bump_allocator);
497  output_columnar_ = true;
498  } else {
499  switch (query_desc_type_) {
501  output_columnar_ = output_columnar_hint;
502  break;
504  output_columnar_ = output_columnar_hint &&
507  !anyOf(ra_exe_unit.target_exprs, kAPPROX_QUANTILE);
508  break;
510  output_columnar_ = output_columnar_hint;
511  break;
513  output_columnar_ = output_columnar_hint &&
516  !anyOf(ra_exe_unit.target_exprs, kAPPROX_QUANTILE);
517  break;
518  default:
519  output_columnar_ = false;
520  break;
521  }
522  }
523 
525  // TODO(adb): Ensure fixed size buffer allocations are correct with all logical column
526  // sizes
527  CHECK(!ra_exe_unit.use_bump_allocator);
530  }
531 
532 #ifdef HAVE_CUDA
533  // Check Streaming Top N heap usage, bail if > max slab size, CUDA ONLY
534  if (use_streaming_top_n_ && executor->getDataMgr()->gpusPresent()) {
535  const auto thread_count = executor->blockSize() * executor->gridSize();
536  const auto total_buff_size =
538  if (total_buff_size > executor_->maxGpuSlabSize()) {
539  throw StreamingTopNOOM(total_buff_size);
540  }
541  }
542 #endif
543 }
544 
546  : executor_(nullptr)
547  , allow_multifrag_(false)
548  , query_desc_type_(QueryDescriptionType::Projection)
549  , keyless_hash_(false)
550  , interleaved_bins_on_gpu_(false)
551  , idx_target_as_key_(0)
552  , group_col_compact_width_(0)
553  , entry_count_(0)
554  , min_val_(0)
555  , max_val_(0)
556  , bucket_(0)
557  , has_nulls_(false)
558  , sort_on_gpu_(false)
559  , output_columnar_(false)
560  , render_output_(false)
561  , must_use_baseline_sort_(false)
562  , is_table_function_(false)
563  , use_streaming_top_n_(false)
564  , force_4byte_float_(false) {}
565 
567  const size_t entry_count,
568  const QueryDescriptionType query_desc_type,
569  const bool is_table_function)
570  : executor_(executor)
571  , allow_multifrag_(false)
572  , query_desc_type_(query_desc_type)
573  , keyless_hash_(false)
574  , interleaved_bins_on_gpu_(false)
575  , idx_target_as_key_(0)
576  , group_col_compact_width_(0)
577  , entry_count_(entry_count)
578  , min_val_(0)
579  , max_val_(0)
580  , bucket_(0)
581  , has_nulls_(false)
582  , sort_on_gpu_(false)
583  , output_columnar_(false)
584  , render_output_(false)
585  , must_use_baseline_sort_(false)
586  , is_table_function_(is_table_function)
587  , use_streaming_top_n_(false)
588  , force_4byte_float_(false) {}
589 
591  const int64_t min_val,
592  const int64_t max_val,
593  const bool has_nulls,
594  const std::vector<int8_t>& group_col_widths)
595  : executor_(nullptr)
596  , allow_multifrag_(false)
597  , query_desc_type_(query_desc_type)
598  , keyless_hash_(false)
599  , interleaved_bins_on_gpu_(false)
600  , idx_target_as_key_(0)
601  , group_col_widths_(group_col_widths)
602  , group_col_compact_width_(0)
603  , entry_count_(0)
604  , min_val_(min_val)
605  , max_val_(max_val)
606  , bucket_(0)
607  , has_nulls_(false)
608  , sort_on_gpu_(false)
609  , output_columnar_(false)
610  , render_output_(false)
611  , must_use_baseline_sort_(false)
612  , is_table_function_(false)
613  , use_streaming_top_n_(false)
614  , force_4byte_float_(false) {}
615 
617  // Note that this method does not check ptr reference members (e.g. executor_) or
618  // entry_count_
619  if (query_desc_type_ != other.query_desc_type_) {
620  return false;
621  }
622  if (keyless_hash_ != other.keyless_hash_) {
623  return false;
624  }
626  return false;
627  }
628  if (idx_target_as_key_ != other.idx_target_as_key_) {
629  return false;
630  }
631  if (force_4byte_float_ != other.force_4byte_float_) {
632  return false;
633  }
634  if (group_col_widths_ != other.group_col_widths_) {
635  return false;
636  }
638  return false;
639  }
641  return false;
642  }
643  if (min_val_ != other.min_val_) {
644  return false;
645  }
646  if (max_val_ != other.max_val_) {
647  return false;
648  }
649  if (bucket_ != other.bucket_) {
650  return false;
651  }
652  if (has_nulls_ != other.has_nulls_) {
653  return false;
654  }
656  return false;
657  } else {
658  // Count distinct descriptors can legitimately differ in device only.
659  for (size_t i = 0; i < count_distinct_descriptors_.size(); ++i) {
660  auto ref_count_distinct_desc = other.count_distinct_descriptors_[i];
661  auto count_distinct_desc = count_distinct_descriptors_[i];
662  count_distinct_desc.device_type = ref_count_distinct_desc.device_type;
663  if (ref_count_distinct_desc != count_distinct_desc) {
664  return false;
665  }
666  }
667  }
668  if (sort_on_gpu_ != other.sort_on_gpu_) {
669  return false;
670  }
671  if (output_columnar_ != other.output_columnar_) {
672  return false;
673  }
674  if (col_slot_context_ != other.col_slot_context_) {
675  return false;
676  }
677  return true;
678 }
679 
680 std::unique_ptr<QueryExecutionContext> QueryMemoryDescriptor::getQueryExecutionContext(
681  const RelAlgExecutionUnit& ra_exe_unit,
682  const Executor* executor,
683  const ExecutorDeviceType device_type,
684  const ExecutorDispatchMode dispatch_mode,
685  const int device_id,
686  const int outer_table_id,
687  const int64_t num_rows,
688  const std::vector<std::vector<const int8_t*>>& col_buffers,
689  const std::vector<std::vector<uint64_t>>& frag_offsets,
690  std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner,
691  const bool output_columnar,
692  const bool sort_on_gpu,
693  const size_t thread_idx,
694  RenderInfo* render_info) const {
695  auto timer = DEBUG_TIMER(__func__);
696  if (frag_offsets.empty()) {
697  return nullptr;
698  }
699  return std::unique_ptr<QueryExecutionContext>(
700  new QueryExecutionContext(ra_exe_unit,
701  *this,
702  executor,
703  device_type,
704  dispatch_mode,
705  device_id,
706  outer_table_id,
707  num_rows,
708  col_buffers,
709  frag_offsets,
710  row_set_mem_owner,
711  output_columnar,
712  sort_on_gpu,
713  thread_idx,
714  render_info));
715 }
716 
718  const RelAlgExecutionUnit& ra_exe_unit,
719  const std::vector<InputTableInfo>& query_infos,
720  const int8_t crt_min_byte_width) {
721  if (g_bigint_count) {
722  return sizeof(int64_t);
723  }
724  int8_t compact_width{0};
725  auto col_it = ra_exe_unit.input_col_descs.begin();
726  auto const end = ra_exe_unit.input_col_descs.end();
727  int unnest_array_col_id{std::numeric_limits<int>::min()};
728  for (const auto& groupby_expr : ra_exe_unit.groupby_exprs) {
729  const auto uoper = dynamic_cast<Analyzer::UOper*>(groupby_expr.get());
730  if (uoper && uoper->get_optype() == kUNNEST) {
731  const auto& arg_ti = uoper->get_operand()->get_type_info();
732  CHECK(arg_ti.is_array());
733  const auto& elem_ti = arg_ti.get_elem_type();
734  if (elem_ti.is_string() && elem_ti.get_compression() == kENCODING_DICT) {
735  unnest_array_col_id = (*col_it)->getColId();
736  } else {
737  compact_width = crt_min_byte_width;
738  break;
739  }
740  }
741  if (col_it != end) {
742  ++col_it;
743  }
744  }
745  if (!compact_width &&
746  (ra_exe_unit.groupby_exprs.size() != 1 || !ra_exe_unit.groupby_exprs.front())) {
747  compact_width = crt_min_byte_width;
748  }
749  if (!compact_width) {
750  col_it = ra_exe_unit.input_col_descs.begin();
751  std::advance(col_it, ra_exe_unit.groupby_exprs.size());
752  for (const auto target : ra_exe_unit.target_exprs) {
753  const auto& ti = target->get_type_info();
754  const auto agg = dynamic_cast<const Analyzer::AggExpr*>(target);
755  if (agg && agg->get_arg()) {
756  compact_width = crt_min_byte_width;
757  break;
758  }
759 
760  if (agg) {
761  CHECK_EQ(kCOUNT, agg->get_aggtype());
762  CHECK(!agg->get_is_distinct());
763  if (col_it != end) {
764  ++col_it;
765  }
766  continue;
767  }
768 
769  if (is_int_and_no_bigger_than(ti, 4) ||
770  (ti.is_string() && ti.get_compression() == kENCODING_DICT)) {
771  if (col_it != end) {
772  ++col_it;
773  }
774  continue;
775  }
776 
777  const auto uoper = dynamic_cast<Analyzer::UOper*>(target);
778  if (uoper && uoper->get_optype() == kUNNEST &&
779  (*col_it)->getColId() == unnest_array_col_id) {
780  const auto arg_ti = uoper->get_operand()->get_type_info();
781  CHECK(arg_ti.is_array());
782  const auto& elem_ti = arg_ti.get_elem_type();
783  if (elem_ti.is_string() && elem_ti.get_compression() == kENCODING_DICT) {
784  if (col_it != end) {
785  ++col_it;
786  }
787  continue;
788  }
789  }
790 
791  compact_width = crt_min_byte_width;
792  break;
793  }
794  }
795  if (!compact_width) {
796  size_t total_tuples{0};
797  for (const auto& qi : query_infos) {
798  total_tuples += qi.info.getNumTuples();
799  }
800  return total_tuples <= static_cast<size_t>(std::numeric_limits<uint32_t>::max()) ||
801  unnest_array_col_id != std::numeric_limits<int>::min()
802  ? 4
803  : crt_min_byte_width;
804  } else {
805  // TODO(miyu): relax this condition to allow more cases just w/o padding
806  for (auto wid : get_col_byte_widths(ra_exe_unit.target_exprs)) {
807  compact_width = std::max(compact_width, wid);
808  }
809  return compact_width;
810  }
811 }
812 
815 }
816 
819  size_t total_bytes{0};
820  if (keyless_hash_) {
821  // ignore, there's no group column in the output buffer
823  } else {
824  total_bytes += group_col_widths_.size() * getEffectiveKeyWidth();
825  total_bytes = align_to_int64(total_bytes);
826  }
827  total_bytes += getColsSize();
828  return align_to_int64(total_bytes);
829 }
830 
832  return (interleaved_bins_on_gpu_ ? executor_->warpSize() : 1);
833 }
834 
837 }
838 
847 }
848 
854  const size_t num_entries_per_column) const {
855  return col_slot_context_.getTotalBytesOfColumnarBuffers(num_entries_per_column);
856 }
857 
868  const size_t projection_count) const {
869  constexpr size_t row_index_width = sizeof(int64_t);
870  return getTotalBytesOfColumnarBuffers(projection_count) +
871  row_index_width * projection_count;
872 }
873 
874 size_t QueryMemoryDescriptor::getColOnlyOffInBytes(const size_t col_idx) const {
875  return col_slot_context_.getColOnlyOffInBytes(col_idx);
876 }
877 
878 /*
879  * Returns the memory offset in bytes for a specific agg column in the output
880  * memory buffer. Depending on the query type, there may be some extra portion
881  * of memory prepended at the beginning of the buffer. A brief description of
882  * the memory layout is as follows:
883  * 1. projections: index column (64bit) + all target columns
884  * 2. group by: all group columns (64-bit each) + all agg columns
885  * 2a. if keyless, there is no prepending group column stored at the beginning
886  */
887 size_t QueryMemoryDescriptor::getColOffInBytes(const size_t col_idx) const {
888  const auto warp_count = getWarpCount();
889  if (output_columnar_) {
890  CHECK_EQ(size_t(1), warp_count);
891  size_t offset{0};
892  if (!keyless_hash_) {
894  }
895  for (size_t index = 0; index < col_idx; ++index) {
897  }
898  return offset;
899  }
900 
901  size_t offset{0};
902  if (keyless_hash_) {
903  // ignore, there's no group column in the output buffer
905  } else {
906  offset += group_col_widths_.size() * getEffectiveKeyWidth();
907  offset = align_to_int64(offset);
908  }
909  offset += getColOnlyOffInBytes(col_idx);
910  return offset;
911 }
912 
913 /*
914  * Returns the memory offset for a particular group column in the prepended group
915  * columns portion of the memory.
916  */
918  const size_t group_idx) const {
920  CHECK(group_idx < getGroupbyColCount());
921  size_t offset{0};
922  for (size_t col_idx = 0; col_idx < group_idx; col_idx++) {
923  // TODO(Saman): relax that int64_bit part immediately
924  offset += align_to_int64(
925  std::max(groupColWidth(col_idx), static_cast<int8_t>(sizeof(int64_t))) *
926  getEntryCount());
927  }
928  return offset;
929 }
930 
931 /*
932  * Returns total amount of memory prepended at the beginning of the output memory
933  * buffer.
934  */
937  size_t buffer_size{0};
938  for (size_t group_idx = 0; group_idx < getGroupbyColCount(); group_idx++) {
939  buffer_size += align_to_int64(
940  std::max(groupColWidth(group_idx), static_cast<int8_t>(sizeof(int64_t))) *
941  getEntryCount());
942  }
943  return buffer_size;
944 }
945 
946 size_t QueryMemoryDescriptor::getColOffInBytesInNextBin(const size_t col_idx) const {
947  auto warp_count = getWarpCount();
948  if (output_columnar_) {
949  CHECK_EQ(size_t(1), group_col_widths_.size());
950  CHECK_EQ(size_t(1), warp_count);
951  return getPaddedSlotWidthBytes(col_idx);
952  }
953 
954  return warp_count * getRowSize();
955 }
956 
957 size_t QueryMemoryDescriptor::getNextColOffInBytes(const int8_t* col_ptr,
958  const size_t bin,
959  const size_t col_idx) const {
961  size_t offset{0};
962  auto warp_count = getWarpCount();
963  const auto chosen_bytes = getPaddedSlotWidthBytes(col_idx);
964  const auto total_slot_count = getSlotCount();
965  if (col_idx + 1 == total_slot_count) {
966  if (output_columnar_) {
967  return (entry_count_ - bin) * chosen_bytes;
968  } else {
969  return static_cast<size_t>(align_to_int64(col_ptr + chosen_bytes) - col_ptr);
970  }
971  }
972 
973  const auto next_chosen_bytes = getPaddedSlotWidthBytes(col_idx + 1);
974  if (output_columnar_) {
975  CHECK_EQ(size_t(1), group_col_widths_.size());
976  CHECK_EQ(size_t(1), warp_count);
977 
978  offset = align_to_int64(entry_count_ * chosen_bytes);
979 
980  offset += bin * (next_chosen_bytes - chosen_bytes);
981  return offset;
982  }
983 
984  if (next_chosen_bytes == sizeof(int64_t)) {
985  return static_cast<size_t>(align_to_int64(col_ptr + chosen_bytes) - col_ptr);
986  } else {
987  return chosen_bytes;
988  }
989 }
990 
992  const size_t col_idx) const {
993  const auto chosen_bytes = getPaddedSlotWidthBytes(col_idx);
994  const auto total_slot_count = getSlotCount();
995  if (col_idx + 1 == total_slot_count) {
996  return static_cast<size_t>(align_to_int64(col_ptr + chosen_bytes) - col_ptr);
997  }
998 
999  const auto next_chosen_bytes = getPaddedSlotWidthBytes(col_idx + 1);
1000 
1001  if (next_chosen_bytes == sizeof(int64_t)) {
1002  return static_cast<size_t>(align_to_int64(col_ptr + chosen_bytes) - col_ptr);
1003  } else {
1004  return chosen_bytes;
1005  }
1006 }
1007 
1009  const RelAlgExecutionUnit& ra_exe_unit,
1010  const unsigned thread_count,
1011  const ExecutorDeviceType device_type) const {
1012  if (use_streaming_top_n_) {
1013  const size_t n = ra_exe_unit.sort_info.offset + ra_exe_unit.sort_info.limit;
1014  return streaming_top_n::get_heap_size(getRowSize(), n, thread_count);
1015  }
1016  return getBufferSizeBytes(device_type, entry_count_);
1017 }
1018 
1032  const size_t entry_count) const {
1033  if (keyless_hash_ && !output_columnar_) {
1034  CHECK_GE(group_col_widths_.size(), size_t(1));
1035  auto row_bytes = align_to_int64(getColsSize());
1036 
1037  return (interleavedBins(device_type) ? executor_->warpSize() : 1) * entry_count *
1038  row_bytes;
1039  }
1040 
1041  constexpr size_t row_index_width = sizeof(int64_t);
1042  size_t total_bytes{0};
1043  if (output_columnar_) {
1044  switch (query_desc_type_) {
1046  total_bytes = row_index_width * entry_count + getTotalBytesOfColumnarBuffers();
1047  break;
1049  total_bytes = getTotalBytesOfColumnarBuffers();
1050  break;
1051  default:
1052  total_bytes = sizeof(int64_t) * group_col_widths_.size() * entry_count +
1054  break;
1055  }
1056  } else {
1057  total_bytes = getRowSize() * entry_count;
1058  }
1059  return total_bytes;
1060 }
1061 
1063  const ExecutorDeviceType device_type) const {
1064  return getBufferSizeBytes(device_type, entry_count_);
1065 }
1066 
1068  output_columnar_ = val;
1071  }
1072 }
1073 
1074 /*
1075  * Indicates the query types that are currently allowed to use the logical
1076  * sized columns instead of padded sized ones.
1077  */
1079  // In distributed mode, result sets are serialized using rowwise iterators, so we use
1080  // consistent slot widths for now
1081  return output_columnar_ && !g_cluster &&
1083  query_desc_type_ == QueryDescriptionType::TableFunction);
1084 }
1085 
1087  size_t total_slot_count = col_slot_context_.getSlotCount();
1088 
1089  if (target_groupby_indices_.empty()) {
1090  return total_slot_count;
1091  }
1092  return total_slot_count - std::count_if(target_groupby_indices_.begin(),
1094  [](const int64_t i) { return i >= 0; });
1095 }
1096 
1099  getGroupbyColCount() == 1);
1100 }
1101 
1104 }
1105 
1107  if (g_cluster) {
1108  return true;
1109  }
1111  return true;
1112  }
1113  if (executor_->isCPUOnly() || render_output_ ||
1118  getGroupbyColCount() > 1)) {
1119  return true;
1120  }
1123 }
1124 
1126  return device_type == ExecutorDeviceType::GPU && !render_output_ &&
1128 }
1129 
1131  return interleaved_bins_on_gpu_ && device_type == ExecutorDeviceType::GPU;
1132 }
1133 
1134 // TODO(Saman): an implementation detail, so move this out of QMD
1136  const ExecutorDeviceType device_type) const {
1137  if (device_type == ExecutorDeviceType::GPU) {
1138  return executor_->cudaMgr()->isArchVoltaOrGreaterForAll();
1139  }
1140  return false;
1141 }
1142 
1144  return col_slot_context_.getColCount();
1145 }
1146 
1149 }
1150 
1151 const int8_t QueryMemoryDescriptor::getPaddedSlotWidthBytes(const size_t slot_idx) const {
1152  return col_slot_context_.getSlotInfo(slot_idx).padded_size;
1153 }
1154 
1156  const int8_t bytes) {
1157  col_slot_context_.setPaddedSlotWidthBytes(slot_idx, bytes);
1158 }
1159 
1161  const size_t slot_idx) const {
1162  return col_slot_context_.getSlotInfo(slot_idx).logical_size;
1163 }
1164 
1166  const size_t col_idx) const {
1167  const auto& col_slots = col_slot_context_.getSlotsForCol(col_idx);
1168  CHECK_EQ(col_slots.size(), size_t(1));
1169  return col_slots.front();
1170 }
1171 
1172 void QueryMemoryDescriptor::useConsistentSlotWidthSize(const int8_t slot_width_size) {
1173  col_slot_context_.setAllSlotsSize(slot_width_size);
1174 }
1175 
1177  // Note: Actual row size may include padding (see ResultSetBufferAccessors.h)
1179 }
1180 
1182  const int8_t actual_min_byte_width) const {
1183  return col_slot_context_.getMinPaddedByteSize(actual_min_byte_width);
1184 }
1185 
1187  const std::vector<std::tuple<int8_t, int8_t>>& slots_for_col) {
1188  col_slot_context_.addColumn(slots_for_col);
1189 }
1190 
1191 void QueryMemoryDescriptor::addColSlotInfoFlatBuffer(const int64_t flatbuffer_size) {
1192  col_slot_context_.addColumnFlatBuffer(flatbuffer_size);
1193 }
1194 
1197 }
1198 
1201 }
1202 
1207 }
1208 
1210  switch (query_desc_type_) {
1212  return "Perfect Hash";
1214  return "Baseline Hash";
1216  return "Projection";
1218  return "Table Function";
1220  return "Non-grouped Aggregate";
1222  return "Estimator";
1223  default:
1224  UNREACHABLE();
1225  }
1226  return "";
1227 }
1228 
1229 std::string QueryMemoryDescriptor::toString() const {
1230  auto str = reductionKey();
1231  str += "\tAllow Multifrag: " + ::toString(allow_multifrag_) + "\n";
1232  str += "\tInterleaved Bins on GPU: " + ::toString(interleaved_bins_on_gpu_) + "\n";
1233  str += "\tBlocks Share Memory: " + ::toString(blocksShareMemory()) + "\n";
1234  str += "\tThreads Share Memory: " + ::toString(threadsShareMemory()) + "\n";
1235  str += "\tUses Fast Group Values: " + ::toString(usesGetGroupValueFast()) + "\n";
1236  str +=
1237  "\tLazy Init Groups (GPU): " + ::toString(lazyInitGroups(ExecutorDeviceType::GPU)) +
1238  "\n";
1239  str += "\tEntry Count: " + std::to_string(entry_count_) + "\n";
1240  str += "\tMin Val (perfect hash only): " + std::to_string(min_val_) + "\n";
1241  str += "\tMax Val (perfect hash only): " + std::to_string(max_val_) + "\n";
1242  str += "\tBucket Val (perfect hash only): " + std::to_string(bucket_) + "\n";
1243  str += "\tSort on GPU: " + ::toString(sort_on_gpu_) + "\n";
1244  str += "\tUse Streaming Top N: " + ::toString(use_streaming_top_n_) + "\n";
1245  str += "\tOutput Columnar: " + ::toString(output_columnar_) + "\n";
1246  str += "\tRender Output: " + ::toString(render_output_) + "\n";
1247  str += "\tUse Baseline Sort: " + ::toString(must_use_baseline_sort_) + "\n";
1248  str += "\tIs Table Function: " + ::toString(is_table_function_) + "\n";
1249  return str;
1250 }
1251 
1253  std::string str;
1254  str += "Query Memory Descriptor State\n";
1255  str += "\tQuery Type: " + queryDescTypeToString() + "\n";
1256  str +=
1257  "\tKeyless Hash: " + ::toString(keyless_hash_) +
1258  (keyless_hash_ ? ", target index for key: " + std::to_string(getTargetIdxForKey())
1259  : "") +
1260  "\n";
1261  str += "\tEffective key width: " + std::to_string(getEffectiveKeyWidth()) + "\n";
1262  str += "\tNumber of group columns: " + std::to_string(getGroupbyColCount()) + "\n";
1263  const auto group_indices_size = targetGroupbyIndicesSize();
1264  if (group_indices_size) {
1265  std::vector<std::string> group_indices_strings;
1266  for (size_t target_idx = 0; target_idx < group_indices_size; ++target_idx) {
1267  group_indices_strings.push_back(std::to_string(getTargetGroupbyIndex(target_idx)));
1268  }
1269  str += "\tTarget group by indices: " +
1270  boost::algorithm::join(group_indices_strings, ",") + "\n";
1271  }
1272  str += "\t" + col_slot_context_.toString();
1273  return str;
1274 }
1275 
1276 std::vector<TargetInfo> target_exprs_to_infos(
1277  const std::vector<Analyzer::Expr*>& targets,
1279  std::vector<TargetInfo> target_infos;
1280  for (const auto target_expr : targets) {
1281  auto target = get_target_info(target_expr, g_bigint_count);
1282  if (query_mem_desc.getQueryDescriptionType() ==
1284  set_notnull(target, false);
1285  target.sql_type.set_notnull(false);
1286  }
1287  target_infos.push_back(target);
1288  }
1289  return target_infos;
1290 }
1291 
1293  int64_t buffer_element_size{0};
1294  for (size_t i = 0; i < col_slot_context_.getSlotCount(); i++) {
1295  try {
1296  const auto slot_element_size = col_slot_context_.varlenOutputElementSize(i);
1297  if (slot_element_size < 0) {
1298  return std::nullopt;
1299  }
1300  buffer_element_size += slot_element_size;
1301  } catch (...) {
1302  continue;
1303  }
1304  }
1305  return buffer_element_size;
1306 }
1307 
1308 size_t QueryMemoryDescriptor::varlenOutputRowSizeToSlot(const size_t slot_idx) const {
1309  int64_t buffer_element_size{0};
1311  for (size_t i = 0; i < slot_idx; i++) {
1312  try {
1313  const auto slot_element_size = col_slot_context_.varlenOutputElementSize(i);
1314  if (slot_element_size < 0) {
1315  continue;
1316  }
1317  buffer_element_size += slot_element_size;
1318  } catch (...) {
1319  continue;
1320  }
1321  }
1322  return buffer_element_size;
1323 }
size_t varlenOutputRowSizeToSlot(const size_t slot_idx) const
int8_t getMinPaddedByteSize(const int8_t actual_min_byte_width) const
std::vector< Analyzer::Expr * > target_exprs
static bool many_entries(const int64_t max_val, const int64_t min_val, const int64_t bucket)
void addColSlotInfoFlatBuffer(const int64_t flatbuffer_size)
int64_t getIntMin() const
SQLAgg
Definition: sqldefs.h:72
#define CHECK_EQ(x, y)
Definition: Logger.h:230
size_t getBufferSizeBytes(const RelAlgExecutionUnit &ra_exe_unit, const unsigned thread_count, const ExecutorDeviceType device_type) const
bool g_enable_smem_group_by
static int64_t getBucketedCardinality(const ColRangeInfo &col_range_info)
std::vector< int64_t > target_expr_proj_indices(const RelAlgExecutionUnit &ra_exe_unit, const Catalog_Namespace::Catalog &cat)
void alignPaddedSlots(const bool sort_on_gpu)
std::string cat(Ts &&...args)
int8_t logical_size
class for a per-database catalog. also includes metadata for the current database and the current use...
Definition: Catalog.h:132
size_t getTotalBytesOfColumnarProjections(const size_t projection_count) const
int64_t getTargetGroupbyIndex(const size_t target_idx) const
ExecutorDeviceType
void sort_on_gpu(int64_t *val_buff, int32_t *idx_buff, const uint64_t entry_count, const bool desc, const uint32_t chosen_bytes, ThrustAllocator &alloc, const int device_id)
std::string toString() const
bool isLogicalSizedColumnsAllowed() const
void setPaddedSlotWidthBytes(const size_t slot_idx, const int8_t bytes)
std::vector< int8_t > get_col_byte_widths(const T &col_expr_list)
int8_t pick_baseline_key_component_width(const ExpressionRange &range, const size_t group_col_width)
const std::list< Analyzer::OrderEntry > order_entries
std::string join(T const &container, std::string const &delim)
std::vector< InputDescriptor > input_descs
static std::unique_ptr< QueryMemoryDescriptor > init(const Executor *executor, const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, const ColRangeInfo &col_range_info, const KeylessInfo &keyless_info, const bool allow_multifrag, const ExecutorDeviceType device_type, const int8_t crt_min_byte_width, const bool sort_on_gpu_hint, const size_t shard_count, const size_t max_groups_buffer_entry_count, RenderInfo *render_info, const CountDistinctDescriptors count_distinct_descriptors, const bool must_use_baseline_sort, const bool output_columnar_hint, const bool streaming_top_n_hint)
#define UNREACHABLE()
Definition: Logger.h:266
void setOutputColumnar(const bool val)
const SortAlgorithm algorithm
#define CHECK_GE(x, y)
Definition: Logger.h:235
size_t getAllSlotsPaddedSize() const
size_t getAllSlotsAlignedPaddedSize() const
size_t getNextColOffInBytes(const int8_t *col_ptr, const size_t bin, const size_t col_idx) const
size_t getEffectiveKeyWidth() const
bool use_streaming_top_n(const RelAlgExecutionUnit &ra_exe_unit, const bool output_columnar)
size_t g_streaming_topn_max
Definition: ResultSet.cpp:49
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
T visit(const Analyzer::Expr *expr) const
#define CHECK_GT(x, y)
Definition: Logger.h:234
void setAllSlotsSize(const int8_t slot_width_size)
TargetInfo get_target_info(const Analyzer::Expr *target_expr, const bool bigint_count)
Definition: TargetInfo.h:97
std::string to_string(char const *&&v)
void useConsistentSlotWidthSize(const int8_t slot_width_size)
const SlotSize & getSlotInfo(const size_t slot_idx) const
size_t getColOnlyOffInBytes(const size_t col_idx) const
ExecutorDispatchMode
size_t getColOnlyOffInBytes(const size_t slot_idx) const
const SQLTypeInfo get_compact_type(const TargetInfo &target)
bool is_varlen_projection(const Analyzer::Expr *target_expr, const SQLTypeInfo &ti)
const size_t limit
bool g_enable_columnar_output
Definition: Execute.cpp:99
int8_t groupColWidth(const size_t key_idx) const
size_t get_bit_width(const SQLTypeInfo &ti)
void addColumnFlatBuffer(const int64_t flatbuffer_size)
const ColumnDescriptor * get_column_descriptor_maybe(const int col_id, const int table_id, const Catalog_Namespace::Catalog &cat)
Definition: Execute.h:220
std::vector< CountDistinctDescriptor > CountDistinctDescriptors
Definition: CountDistinct.h:34
size_t getCompactByteWidth() const
Provides column info and slot info for the output buffer and some metadata helpers.
size_t getGroupbyColCount() const
bool is_integer() const
Definition: sqltypes.h:602
std::unique_ptr< QueryExecutionContext > getQueryExecutionContext(const RelAlgExecutionUnit &, const Executor *executor, const ExecutorDeviceType device_type, const ExecutorDispatchMode dispatch_mode, const int device_id, const int outer_table_id, const int64_t num_rows, const std::vector< std::vector< const int8_t * >> &col_buffers, const std::vector< std::vector< uint64_t >> &frag_offsets, std::shared_ptr< RowSetMemoryOwner >, const bool output_columnar, const bool sort_on_gpu, const size_t thread_idx, RenderInfo *) const
bool lazyInitGroups(const ExecutorDeviceType) const
size_t targetGroupbyIndicesSize() const
size_t getPrependedGroupBufferSizeInBytes() const
size_t getTotalBytesOfColumnarBuffers() const
std::vector< int64_t > target_groupby_indices_
static int8_t pick_target_compact_width(const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, const int8_t crt_min_byte_width)
bool g_bigint_count
CountDistinctDescriptors count_distinct_descriptors_
bool is_valid_int32_range(const ExpressionRange &range)
void validate() const
int get_varno() const
Definition: Analyzer.h:283
const int8_t getPaddedSlotWidthBytes(const size_t slot_idx) const
ExpressionRange getExpressionRange(const Analyzer::BinOper *expr, const std::vector< InputTableInfo > &query_infos, const Executor *, boost::optional< std::list< std::shared_ptr< Analyzer::Expr >>> simple_quals)
bool hasNulls() const
int64_t varlenOutputElementSize(const size_t slot_idx) const
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:82
QueryDescriptionType getQueryDescriptionType() const
std::vector< int64_t > target_expr_group_by_indices(const std::list< std::shared_ptr< Analyzer::Expr >> &groupby_exprs, const std::vector< Analyzer::Expr * > &target_exprs)
bool anyOf(std::vector< Analyzer::Expr * > const &target_exprs, SQLAgg const agg_kind)
std::optional< size_t > varlenOutputBufferElemSize() const
void addColumn(const std::vector< std::tuple< int8_t, int8_t >> &slots_for_col)
#define CHECK_LT(x, y)
Definition: Logger.h:232
#define CHECK_LE(x, y)
Definition: Logger.h:233
size_t getNextColOffInBytesRowOnly(const int8_t *col_ptr, const size_t col_idx) const
const Expr * get_operand() const
Definition: Analyzer.h:379
QueryDescriptionType query_desc_type_
int8_t padded_size
Definition: sqldefs.h:77
int8_t updateActualMinByteWidth(const int8_t actual_min_byte_width) const
size_t getTotalBytesOfColumnarBuffers(const size_t entry_count) const
bool operator==(const QueryMemoryDescriptor &other) const
Descriptor for the result set buffer layout.
bool is_int_and_no_bigger_than(const SQLTypeInfo &ti, const size_t byte_width)
SQLAgg get_aggtype() const
Definition: Analyzer.h:1202
std::list< std::shared_ptr< Analyzer::Expr > > quals
ExpressionRangeType getType() const
size_t get_heap_size(const size_t row_size, const size_t n, const size_t thread_count)
int64_t getIntMax() const
bool isWarpSyncRequired(const ExecutorDeviceType) const
std::string toString() const
size_t getSlotCount() const
void setAllSlotsPaddedSizeToLogicalSize()
bool interleavedBins(const ExecutorDeviceType) const
bool g_enable_watchdog false
Definition: Execute.cpp:79
#define CHECK(condition)
Definition: Logger.h:222
#define DEBUG_TIMER(name)
Definition: Logger.h:371
size_t getColCount() const
std::vector< int8_t > group_col_widths_
#define EMPTY_KEY_32
QueryDescriptionType
Definition: Types.h:29
bool g_cluster
void setPaddedSlotWidthBytes(const size_t slot_idx, const int8_t bytes)
std::vector< TargetInfo > target_exprs_to_infos(const std::vector< Analyzer::Expr * > &targets, const QueryMemoryDescriptor &query_mem_desc)
const std::vector< size_t > & getSlotsForCol(const size_t col_idx) const
std::string queryDescTypeToString() const
std::list< std::shared_ptr< const InputColDescriptor > > input_col_descs
constexpr double n
Definition: Utm.h:38
void addColSlotInfo(const std::vector< std::tuple< int8_t, int8_t >> &slots_for_col)
const size_t offset
static bool countDescriptorsLogicallyEmpty(const CountDistinctDescriptors &count_distinct_descriptors)
void setAllUnsetSlotsPaddedSize(const int8_t padded_size)
const int8_t getSlotIndexForSingleSlotCol(const size_t col_idx) const
const int8_t getLogicalSlotWidthBytes(const size_t slot_idx) const
Definition: sqldefs.h:73
size_t getColOffInBytes(const size_t col_idx) const
size_t getColOffInBytesInNextBin(const size_t col_idx) const
FORCE_INLINE HOST DEVICE T align_to_int64(T addr)
int8_t pick_baseline_key_width(const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, const Executor *executor)
std::string reductionKey() const
std::list< std::shared_ptr< Analyzer::Expr > > simple_quals
void set_notnull(TargetInfo &target, const bool not_null)
int32_t getTargetIdxForKey() const
size_t getPrependedGroupColOffInBytes(const size_t group_idx) const