OmniSciDB  85c2d10cdc
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
QueryMemoryDescriptor.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2018 MapD Technologies, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "QueryMemoryDescriptor.h"
18 
19 #include "../Execute.h"
20 #include "../ExpressionRewrite.h"
21 #include "../GroupByAndAggregate.h"
22 #include "../StreamingTopN.h"
23 #include "../UsedColumnsVisitor.h"
24 #include "ColSlotContext.h"
25 
26 #include <boost/algorithm/cxx11/any_of.hpp>
27 
29 extern bool g_enable_columnar_output;
30 
31 namespace {
32 
33 bool is_int_and_no_bigger_than(const SQLTypeInfo& ti, const size_t byte_width) {
34  if (!ti.is_integer()) {
35  return false;
36  }
37  return get_bit_width(ti) <= (byte_width * 8);
38 }
39 
40 std::vector<int64_t> target_expr_group_by_indices(
41  const std::list<std::shared_ptr<Analyzer::Expr>>& groupby_exprs,
42  const std::vector<Analyzer::Expr*>& target_exprs) {
43  std::vector<int64_t> indices(target_exprs.size(), -1);
44  for (size_t target_idx = 0; target_idx < target_exprs.size(); ++target_idx) {
45  const auto target_expr = target_exprs[target_idx];
46  if (dynamic_cast<const Analyzer::AggExpr*>(target_expr)) {
47  continue;
48  }
49  const auto var_expr = dynamic_cast<const Analyzer::Var*>(target_expr);
50  if (var_expr && var_expr->get_which_row() == Analyzer::Var::kGROUPBY) {
51  indices[target_idx] = var_expr->get_varno() - 1;
52  continue;
53  }
54  }
55  return indices;
56 }
57 
58 std::vector<int64_t> target_expr_proj_indices(const RelAlgExecutionUnit& ra_exe_unit,
60  if (ra_exe_unit.input_descs.size() > 1 ||
61  !ra_exe_unit.sort_info.order_entries.empty()) {
62  return {};
63  }
64  std::vector<int64_t> target_indices(ra_exe_unit.target_exprs.size(), -1);
65  UsedColumnsVisitor columns_visitor;
66  std::unordered_set<int> used_columns;
67  for (const auto& simple_qual : ra_exe_unit.simple_quals) {
68  const auto crt_used_columns = columns_visitor.visit(simple_qual.get());
69  used_columns.insert(crt_used_columns.begin(), crt_used_columns.end());
70  }
71  for (const auto& qual : ra_exe_unit.quals) {
72  const auto crt_used_columns = columns_visitor.visit(qual.get());
73  used_columns.insert(crt_used_columns.begin(), crt_used_columns.end());
74  }
75  for (const auto& target : ra_exe_unit.target_exprs) {
76  const auto col_var = dynamic_cast<const Analyzer::ColumnVar*>(target);
77  if (col_var) {
78  const auto cd = get_column_descriptor_maybe(
79  col_var->get_column_id(), col_var->get_table_id(), cat);
80  if (!cd || !cd->isVirtualCol) {
81  continue;
82  }
83  }
84  const auto crt_used_columns = columns_visitor.visit(target);
85  used_columns.insert(crt_used_columns.begin(), crt_used_columns.end());
86  }
87  for (size_t target_idx = 0; target_idx < ra_exe_unit.target_exprs.size();
88  ++target_idx) {
89  const auto target_expr = ra_exe_unit.target_exprs[target_idx];
90  CHECK(target_expr);
91  const auto& ti = target_expr->get_type_info();
92  // TODO: add proper lazy fetch for varlen types in result set
93  if (ti.is_varlen()) {
94  continue;
95  }
96  const auto col_var = dynamic_cast<const Analyzer::ColumnVar*>(target_expr);
97  if (!col_var) {
98  continue;
99  }
100  if (!ti.is_varlen() &&
101  used_columns.find(col_var->get_column_id()) == used_columns.end()) {
102  // setting target index to be zero so that later it can be decoded properly (in lazy
103  // fetch, the zeroth target index indicates the corresponding rowid column for the
104  // projected entry)
105  target_indices[target_idx] = 0;
106  }
107  }
108  return target_indices;
109 }
110 
112  const size_t group_col_width) {
113  if (range.getType() == ExpressionRangeType::Invalid) {
114  return sizeof(int64_t);
115  }
116  switch (range.getType()) {
118  if (group_col_width == sizeof(int64_t) && range.hasNulls()) {
119  return sizeof(int64_t);
120  }
121  return range.getIntMax() < EMPTY_KEY_32 - 1 ? sizeof(int32_t) : sizeof(int64_t);
124  return sizeof(int64_t); // No compaction for floating point yet.
125  default:
126  UNREACHABLE();
127  }
128  return sizeof(int64_t);
129 }
130 
131 // TODO(miyu): make sure following setting of compact width is correct in all cases.
133  const std::vector<InputTableInfo>& query_infos,
134  const Executor* executor) {
135  int8_t compact_width{4};
136  for (const auto& groupby_expr : ra_exe_unit.groupby_exprs) {
137  const auto expr_range = getExpressionRange(groupby_expr.get(), query_infos, executor);
138  compact_width = std::max(compact_width,
140  expr_range, groupby_expr->get_type_info().get_size()));
141  }
142  return compact_width;
143 }
144 
145 bool use_streaming_top_n(const RelAlgExecutionUnit& ra_exe_unit,
146  const bool output_columnar) {
147  if (g_cluster) {
148  return false; // TODO(miyu)
149  }
150 
151  for (const auto target_expr : ra_exe_unit.target_exprs) {
152  if (dynamic_cast<const Analyzer::AggExpr*>(target_expr)) {
153  return false;
154  }
155  if (dynamic_cast<const Analyzer::WindowFunction*>(target_expr)) {
156  return false;
157  }
158  }
159 
160  // TODO: Allow streaming top n for columnar output
161  if (!output_columnar && ra_exe_unit.sort_info.order_entries.size() == 1 &&
162  ra_exe_unit.sort_info.limit &&
164  const auto only_order_entry = ra_exe_unit.sort_info.order_entries.front();
165  CHECK_GT(only_order_entry.tle_no, int(0));
166  CHECK_LE(static_cast<size_t>(only_order_entry.tle_no),
167  ra_exe_unit.target_exprs.size());
168  const auto order_entry_expr = ra_exe_unit.target_exprs[only_order_entry.tle_no - 1];
169  const auto n = ra_exe_unit.sort_info.offset + ra_exe_unit.sort_info.limit;
170  if ((order_entry_expr->get_type_info().is_number() ||
171  order_entry_expr->get_type_info().is_time()) &&
172  n <= 100000) { // TODO(miyu): relax?
173  return true;
174  }
175  }
176 
177  return false;
178 }
179 
180 } // namespace
181 
182 std::unique_ptr<QueryMemoryDescriptor> QueryMemoryDescriptor::init(
183  const Executor* executor,
184  const RelAlgExecutionUnit& ra_exe_unit,
185  const std::vector<InputTableInfo>& query_infos,
186  const ColRangeInfo& col_range_info,
187  const KeylessInfo& keyless_info,
188  const bool allow_multifrag,
189  const ExecutorDeviceType device_type,
190  const int8_t crt_min_byte_width,
191  const bool sort_on_gpu_hint,
192  const size_t shard_count,
193  const size_t max_groups_buffer_entry_count,
194  RenderInfo* render_info,
195  const CountDistinctDescriptors count_distinct_descriptors,
196  const bool must_use_baseline_sort,
197  const bool output_columnar_hint,
198  const bool streaming_top_n_hint) {
199  auto group_col_widths = get_col_byte_widths(ra_exe_unit.groupby_exprs);
200  const bool is_group_by{!group_col_widths.empty()};
201 
202  auto col_slot_context = ColSlotContext(ra_exe_unit.target_exprs, {});
203 
204  const auto min_slot_size = QueryMemoryDescriptor::pick_target_compact_width(
205  ra_exe_unit, query_infos, crt_min_byte_width);
206 
207  col_slot_context.setAllSlotsPaddedSize(min_slot_size);
208  col_slot_context.validate();
209 
210  if (!is_group_by) {
211  CHECK(!must_use_baseline_sort);
212 
213  return std::make_unique<QueryMemoryDescriptor>(
214  executor,
215  ra_exe_unit,
216  query_infos,
217  allow_multifrag,
218  false,
219  false,
220  -1,
221  ColRangeInfo{ra_exe_unit.estimator ? QueryDescriptionType::Estimator
223  0,
224  0,
225  0,
226  false},
227  col_slot_context,
228  std::vector<int8_t>{},
229  /*group_col_compact_width=*/0,
230  std::vector<int64_t>{},
231  /*entry_count=*/1,
232  count_distinct_descriptors,
233  false,
234  output_columnar_hint,
235  render_info && render_info->isPotentialInSituRender(),
236  must_use_baseline_sort,
237  /*use_streaming_top_n=*/false);
238  }
239 
240  size_t entry_count = 1;
241  auto actual_col_range_info = col_range_info;
242  bool interleaved_bins_on_gpu = false;
243  bool keyless_hash = false;
244  bool streaming_top_n = false;
245  int8_t group_col_compact_width = 0;
246  int32_t idx_target_as_key = -1;
247  auto output_columnar = output_columnar_hint;
248  std::vector<int64_t> target_groupby_indices;
249 
250  switch (col_range_info.hash_type_) {
252  if (render_info) {
253  render_info->setInSituDataIfUnset(false);
254  }
255  // keyless hash: whether or not group columns are stored at the beginning of the
256  // output buffer
257  keyless_hash =
258  (!sort_on_gpu_hint ||
260  col_range_info.max, col_range_info.min, col_range_info.bucket)) &&
261  !col_range_info.bucket && !must_use_baseline_sort && keyless_info.keyless;
262 
263  // if keyless, then this target index indicates wheter an entry is empty or not
264  // (acts as a key)
265  idx_target_as_key = keyless_info.target_index;
266 
267  if (group_col_widths.size() > 1) {
268  // col range info max contains the expected cardinality of the output
269  entry_count = static_cast<size_t>(actual_col_range_info.max);
270  actual_col_range_info.bucket = 0;
271  } else {
272  // single column perfect hash
273  entry_count = std::max(
274  GroupByAndAggregate::getBucketedCardinality(col_range_info), int64_t(1));
275  const size_t interleaved_max_threshold{512};
276 
277  if (must_use_baseline_sort) {
278  target_groupby_indices = target_expr_group_by_indices(ra_exe_unit.groupby_exprs,
279  ra_exe_unit.target_exprs);
280  col_slot_context =
281  ColSlotContext(ra_exe_unit.target_exprs, target_groupby_indices);
282  }
283 
284  bool has_varlen_sample_agg = false;
285  for (const auto& target_expr : ra_exe_unit.target_exprs) {
286  if (target_expr->get_contains_agg()) {
287  const auto agg_expr = dynamic_cast<Analyzer::AggExpr*>(target_expr);
288  CHECK(agg_expr);
289  if (agg_expr->get_aggtype() == kSAMPLE &&
290  agg_expr->get_type_info().is_varlen()) {
291  has_varlen_sample_agg = true;
292  break;
293  }
294  }
295  }
296 
297  interleaved_bins_on_gpu = keyless_hash && !has_varlen_sample_agg &&
298  (entry_count <= interleaved_max_threshold) &&
299  (device_type == ExecutorDeviceType::GPU) &&
301  count_distinct_descriptors) &&
302  !output_columnar;
303  }
304  break;
305  }
307  if (render_info) {
308  render_info->setInSituDataIfUnset(false);
309  }
310  entry_count = shard_count
311  ? (max_groups_buffer_entry_count + shard_count - 1) / shard_count
312  : max_groups_buffer_entry_count;
313  target_groupby_indices = target_expr_group_by_indices(ra_exe_unit.groupby_exprs,
314  ra_exe_unit.target_exprs);
315  col_slot_context = ColSlotContext(ra_exe_unit.target_exprs, target_groupby_indices);
316 
317  group_col_compact_width =
318  output_columnar ? 8
319  : pick_baseline_key_width(ra_exe_unit, query_infos, executor);
320 
321  actual_col_range_info =
323  break;
324  }
326  CHECK(!must_use_baseline_sort);
327 
328  if (streaming_top_n_hint && use_streaming_top_n(ra_exe_unit, output_columnar)) {
329  streaming_top_n = true;
330  entry_count = ra_exe_unit.sort_info.offset + ra_exe_unit.sort_info.limit;
331  } else {
332  if (ra_exe_unit.use_bump_allocator) {
333  output_columnar = false;
334  entry_count = 0;
335  } else {
336  entry_count = ra_exe_unit.scan_limit
337  ? static_cast<size_t>(ra_exe_unit.scan_limit)
338  : max_groups_buffer_entry_count;
339  }
340  }
341 
342  const auto catalog = executor->getCatalog();
343  CHECK(catalog);
344  target_groupby_indices = executor->plan_state_->allow_lazy_fetch_
345  ? target_expr_proj_indices(ra_exe_unit, *catalog)
346  : std::vector<int64_t>{};
347 
348  col_slot_context = ColSlotContext(ra_exe_unit.target_exprs, target_groupby_indices);
349  break;
350  }
351  default:
352  UNREACHABLE() << "Unknown query type";
353  }
354 
355  return std::make_unique<QueryMemoryDescriptor>(
356  executor,
357  ra_exe_unit,
358  query_infos,
359  allow_multifrag,
360  keyless_hash,
361  interleaved_bins_on_gpu,
362  idx_target_as_key,
363  actual_col_range_info,
364  col_slot_context,
365  group_col_widths,
366  group_col_compact_width,
367  target_groupby_indices,
368  entry_count,
369  count_distinct_descriptors,
370  sort_on_gpu_hint,
371  output_columnar,
372  render_info && render_info->isPotentialInSituRender(),
373  must_use_baseline_sort,
374  streaming_top_n);
375 }
376 
377 namespace {
378 bool anyOf(std::vector<Analyzer::Expr*> const& target_exprs, SQLAgg const agg_kind) {
379  return boost::algorithm::any_of(target_exprs, [agg_kind](Analyzer::Expr const* expr) {
380  auto const* const agg = dynamic_cast<Analyzer::AggExpr const*>(expr);
381  return agg && agg->get_aggtype() == agg_kind;
382  });
383 }
384 } // namespace
385 
387  const Executor* executor,
388  const RelAlgExecutionUnit& ra_exe_unit,
389  const std::vector<InputTableInfo>& query_infos,
390  const bool allow_multifrag,
391  const bool keyless_hash,
392  const bool interleaved_bins_on_gpu,
393  const int32_t idx_target_as_key,
394  const ColRangeInfo& col_range_info,
395  const ColSlotContext& col_slot_context,
396  const std::vector<int8_t>& group_col_widths,
397  const int8_t group_col_compact_width,
398  const std::vector<int64_t>& target_groupby_indices,
399  const size_t entry_count,
400  const CountDistinctDescriptors count_distinct_descriptors,
401  const bool sort_on_gpu_hint,
402  const bool output_columnar_hint,
403  const bool render_output,
404  const bool must_use_baseline_sort,
405  const bool use_streaming_top_n)
406  : executor_(executor)
407  , allow_multifrag_(allow_multifrag)
408  , query_desc_type_(col_range_info.hash_type_)
409  , keyless_hash_(keyless_hash)
410  , interleaved_bins_on_gpu_(interleaved_bins_on_gpu)
411  , idx_target_as_key_(idx_target_as_key)
412  , group_col_widths_(group_col_widths)
413  , group_col_compact_width_(group_col_compact_width)
414  , target_groupby_indices_(target_groupby_indices)
415  , entry_count_(entry_count)
416  , min_val_(col_range_info.min)
417  , max_val_(col_range_info.max)
418  , bucket_(col_range_info.bucket)
419  , has_nulls_(col_range_info.has_nulls)
420  , count_distinct_descriptors_(count_distinct_descriptors)
421  , output_columnar_(false)
422  , render_output_(render_output)
423  , must_use_baseline_sort_(must_use_baseline_sort)
424  , is_table_function_(false)
425  , use_streaming_top_n_(use_streaming_top_n)
426  , force_4byte_float_(false)
427  , col_slot_context_(col_slot_context) {
430 
431  sort_on_gpu_ = sort_on_gpu_hint && canOutputColumnar() && !keyless_hash_;
432 
433  if (sort_on_gpu_) {
434  CHECK(!ra_exe_unit.use_bump_allocator);
435  output_columnar_ = true;
436  } else {
437  switch (query_desc_type_) {
439  output_columnar_ = output_columnar_hint;
440  break;
442  output_columnar_ = output_columnar_hint &&
445  !anyOf(ra_exe_unit.target_exprs, kAPPROX_MEDIAN);
446  break;
448  output_columnar_ = output_columnar_hint;
449  break;
451  output_columnar_ = output_columnar_hint &&
454  !anyOf(ra_exe_unit.target_exprs, kAPPROX_MEDIAN);
455  break;
456  default:
457  output_columnar_ = false;
458  break;
459  }
460  }
461 
463  // TODO(adb): Ensure fixed size buffer allocations are correct with all logical column
464  // sizes
465  CHECK(!ra_exe_unit.use_bump_allocator);
468  }
469 
470 #ifdef HAVE_CUDA
471  // Check Streaming Top N heap usage, bail if > max slab size, CUDA ONLY
472  if (use_streaming_top_n_ && executor->catalog_->getDataMgr().gpusPresent()) {
473  const auto thread_count = executor->blockSize() * executor->gridSize();
474  const auto total_buff_size =
476  if (total_buff_size > executor_->maxGpuSlabSize()) {
477  throw StreamingTopNOOM(total_buff_size);
478  }
479  }
480 #endif
481 }
482 
484  : executor_(nullptr)
485  , allow_multifrag_(false)
486  , query_desc_type_(QueryDescriptionType::Projection)
487  , keyless_hash_(false)
488  , interleaved_bins_on_gpu_(false)
489  , idx_target_as_key_(0)
490  , group_col_compact_width_(0)
491  , entry_count_(0)
492  , min_val_(0)
493  , max_val_(0)
494  , bucket_(0)
495  , has_nulls_(false)
496  , sort_on_gpu_(false)
497  , output_columnar_(false)
498  , render_output_(false)
499  , must_use_baseline_sort_(false)
500  , is_table_function_(false)
501  , use_streaming_top_n_(false)
502  , force_4byte_float_(false) {}
503 
505  const size_t entry_count,
506  const QueryDescriptionType query_desc_type,
507  const bool is_table_function)
508  : executor_(executor)
509  , allow_multifrag_(false)
510  , query_desc_type_(query_desc_type)
511  , keyless_hash_(false)
512  , interleaved_bins_on_gpu_(false)
513  , idx_target_as_key_(0)
514  , group_col_compact_width_(0)
515  , entry_count_(entry_count)
516  , min_val_(0)
517  , max_val_(0)
518  , bucket_(0)
519  , has_nulls_(false)
520  , sort_on_gpu_(false)
521  , output_columnar_(false)
522  , render_output_(false)
523  , must_use_baseline_sort_(false)
524  , is_table_function_(is_table_function)
525  , use_streaming_top_n_(false)
526  , force_4byte_float_(false) {}
527 
529  const int64_t min_val,
530  const int64_t max_val,
531  const bool has_nulls,
532  const std::vector<int8_t>& group_col_widths)
533  : executor_(nullptr)
534  , allow_multifrag_(false)
535  , query_desc_type_(query_desc_type)
536  , keyless_hash_(false)
537  , interleaved_bins_on_gpu_(false)
538  , idx_target_as_key_(0)
539  , group_col_widths_(group_col_widths)
540  , group_col_compact_width_(0)
541  , entry_count_(0)
542  , min_val_(min_val)
543  , max_val_(max_val)
544  , bucket_(0)
545  , has_nulls_(false)
546  , sort_on_gpu_(false)
547  , output_columnar_(false)
548  , render_output_(false)
549  , must_use_baseline_sort_(false)
550  , is_table_function_(false)
551  , use_streaming_top_n_(false)
552  , force_4byte_float_(false) {}
553 
555  // Note that this method does not check ptr reference members (e.g. executor_) or
556  // entry_count_
557  if (query_desc_type_ != other.query_desc_type_) {
558  return false;
559  }
560  if (keyless_hash_ != other.keyless_hash_) {
561  return false;
562  }
564  return false;
565  }
566  if (idx_target_as_key_ != other.idx_target_as_key_) {
567  return false;
568  }
569  if (force_4byte_float_ != other.force_4byte_float_) {
570  return false;
571  }
572  if (group_col_widths_ != other.group_col_widths_) {
573  return false;
574  }
576  return false;
577  }
579  return false;
580  }
581  if (min_val_ != other.min_val_) {
582  return false;
583  }
584  if (max_val_ != other.max_val_) {
585  return false;
586  }
587  if (bucket_ != other.bucket_) {
588  return false;
589  }
590  if (has_nulls_ != other.has_nulls_) {
591  return false;
592  }
594  return false;
595  } else {
596  // Count distinct descriptors can legitimately differ in device only.
597  for (size_t i = 0; i < count_distinct_descriptors_.size(); ++i) {
598  auto ref_count_distinct_desc = other.count_distinct_descriptors_[i];
599  auto count_distinct_desc = count_distinct_descriptors_[i];
600  count_distinct_desc.device_type = ref_count_distinct_desc.device_type;
601  if (ref_count_distinct_desc != count_distinct_desc) {
602  return false;
603  }
604  }
605  }
606  if (sort_on_gpu_ != other.sort_on_gpu_) {
607  return false;
608  }
609  if (output_columnar_ != other.output_columnar_) {
610  return false;
611  }
612  if (col_slot_context_ != other.col_slot_context_) {
613  return false;
614  }
615  return true;
616 }
617 
618 std::unique_ptr<QueryExecutionContext> QueryMemoryDescriptor::getQueryExecutionContext(
619  const RelAlgExecutionUnit& ra_exe_unit,
620  const Executor* executor,
621  const ExecutorDeviceType device_type,
622  const ExecutorDispatchMode dispatch_mode,
623  const int device_id,
624  const int64_t num_rows,
625  const std::vector<std::vector<const int8_t*>>& col_buffers,
626  const std::vector<std::vector<uint64_t>>& frag_offsets,
627  std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner,
628  const bool output_columnar,
629  const bool sort_on_gpu,
630  const size_t thread_idx,
631  RenderInfo* render_info) const {
632  auto timer = DEBUG_TIMER(__func__);
633  if (frag_offsets.empty()) {
634  return nullptr;
635  }
636  return std::unique_ptr<QueryExecutionContext>(
637  new QueryExecutionContext(ra_exe_unit,
638  *this,
639  executor,
640  device_type,
641  dispatch_mode,
642  device_id,
643  num_rows,
644  col_buffers,
645  frag_offsets,
646  row_set_mem_owner,
647  output_columnar,
648  sort_on_gpu,
649  thread_idx,
650  render_info));
651 }
652 
654  const RelAlgExecutionUnit& ra_exe_unit,
655  const std::vector<InputTableInfo>& query_infos,
656  const int8_t crt_min_byte_width) {
657  if (g_bigint_count) {
658  return sizeof(int64_t);
659  }
660  int8_t compact_width{0};
661  auto col_it = ra_exe_unit.input_col_descs.begin();
662  int unnest_array_col_id{std::numeric_limits<int>::min()};
663  for (const auto& groupby_expr : ra_exe_unit.groupby_exprs) {
664  const auto uoper = dynamic_cast<Analyzer::UOper*>(groupby_expr.get());
665  if (uoper && uoper->get_optype() == kUNNEST) {
666  const auto& arg_ti = uoper->get_operand()->get_type_info();
667  CHECK(arg_ti.is_array());
668  const auto& elem_ti = arg_ti.get_elem_type();
669  if (elem_ti.is_string() && elem_ti.get_compression() == kENCODING_DICT) {
670  unnest_array_col_id = (*col_it)->getColId();
671  } else {
672  compact_width = crt_min_byte_width;
673  break;
674  }
675  }
676  ++col_it;
677  }
678  if (!compact_width &&
679  (ra_exe_unit.groupby_exprs.size() != 1 || !ra_exe_unit.groupby_exprs.front())) {
680  compact_width = crt_min_byte_width;
681  }
682  if (!compact_width) {
683  col_it = ra_exe_unit.input_col_descs.begin();
684  std::advance(col_it, ra_exe_unit.groupby_exprs.size());
685  for (const auto target : ra_exe_unit.target_exprs) {
686  const auto& ti = target->get_type_info();
687  const auto agg = dynamic_cast<const Analyzer::AggExpr*>(target);
688  if (agg && agg->get_arg()) {
689  compact_width = crt_min_byte_width;
690  break;
691  }
692 
693  if (agg) {
694  CHECK_EQ(kCOUNT, agg->get_aggtype());
695  CHECK(!agg->get_is_distinct());
696  ++col_it;
697  continue;
698  }
699 
700  if (is_int_and_no_bigger_than(ti, 4) ||
701  (ti.is_string() && ti.get_compression() == kENCODING_DICT)) {
702  ++col_it;
703  continue;
704  }
705 
706  const auto uoper = dynamic_cast<Analyzer::UOper*>(target);
707  if (uoper && uoper->get_optype() == kUNNEST &&
708  (*col_it)->getColId() == unnest_array_col_id) {
709  const auto arg_ti = uoper->get_operand()->get_type_info();
710  CHECK(arg_ti.is_array());
711  const auto& elem_ti = arg_ti.get_elem_type();
712  if (elem_ti.is_string() && elem_ti.get_compression() == kENCODING_DICT) {
713  ++col_it;
714  continue;
715  }
716  }
717 
718  compact_width = crt_min_byte_width;
719  break;
720  }
721  }
722  if (!compact_width) {
723  size_t total_tuples{0};
724  for (const auto& qi : query_infos) {
725  total_tuples += qi.info.getNumTuples();
726  }
727  return total_tuples <= static_cast<size_t>(std::numeric_limits<uint32_t>::max()) ||
728  unnest_array_col_id != std::numeric_limits<int>::min()
729  ? 4
730  : crt_min_byte_width;
731  } else {
732  // TODO(miyu): relax this condition to allow more cases just w/o padding
733  for (auto wid : get_col_byte_widths(ra_exe_unit.target_exprs)) {
734  compact_width = std::max(compact_width, wid);
735  }
736  return compact_width;
737  }
738 }
739 
742 }
743 
746  size_t total_bytes{0};
747  if (keyless_hash_) {
748  // ignore, there's no group column in the output buffer
750  } else {
751  total_bytes += group_col_widths_.size() * getEffectiveKeyWidth();
752  total_bytes = align_to_int64(total_bytes);
753  }
754  total_bytes += getColsSize();
755  return align_to_int64(total_bytes);
756 }
757 
759  return (interleaved_bins_on_gpu_ ? executor_->warpSize() : 1);
760 }
761 
764 }
765 
774 }
775 
781  const size_t num_entries_per_column) const {
782  return col_slot_context_.getTotalBytesOfColumnarBuffers(num_entries_per_column);
783 }
784 
795  const size_t projection_count) const {
796  constexpr size_t row_index_width = sizeof(int64_t);
797  return getTotalBytesOfColumnarBuffers(projection_count) +
798  row_index_width * projection_count;
799 }
800 
801 size_t QueryMemoryDescriptor::getColOnlyOffInBytes(const size_t col_idx) const {
802  return col_slot_context_.getColOnlyOffInBytes(col_idx);
803 }
804 
805 /*
806  * Returns the memory offset in bytes for a specific agg column in the output
807  * memory buffer. Depending on the query type, there may be some extra portion
808  * of memory prepended at the beginning of the buffer. A brief description of
809  * the memory layout is as follows:
810  * 1. projections: index column (64bit) + all target columns
811  * 2. group by: all group columns (64-bit each) + all agg columns
812  * 2a. if keyless, there is no prepending group column stored at the beginning
813  */
814 size_t QueryMemoryDescriptor::getColOffInBytes(const size_t col_idx) const {
815  const auto warp_count = getWarpCount();
816  if (output_columnar_) {
817  CHECK_EQ(size_t(1), warp_count);
818  size_t offset{0};
819  if (!keyless_hash_) {
821  }
822  for (size_t index = 0; index < col_idx; ++index) {
824  }
825  return offset;
826  }
827 
828  size_t offset{0};
829  if (keyless_hash_) {
830  // ignore, there's no group column in the output buffer
832  } else {
833  offset += group_col_widths_.size() * getEffectiveKeyWidth();
834  offset = align_to_int64(offset);
835  }
836  offset += getColOnlyOffInBytes(col_idx);
837  return offset;
838 }
839 
840 /*
841  * Returns the memory offset for a particular group column in the prepended group
842  * columns portion of the memory.
843  */
845  const size_t group_idx) const {
847  CHECK(group_idx < getGroupbyColCount());
848  size_t offset{0};
849  for (size_t col_idx = 0; col_idx < group_idx; col_idx++) {
850  // TODO(Saman): relax that int64_bit part immediately
851  offset += align_to_int64(
852  std::max(groupColWidth(col_idx), static_cast<int8_t>(sizeof(int64_t))) *
853  getEntryCount());
854  }
855  return offset;
856 }
857 
858 /*
859  * Returns total amount of memory prepended at the beginning of the output memory
860  * buffer.
861  */
864  size_t buffer_size{0};
865  for (size_t group_idx = 0; group_idx < getGroupbyColCount(); group_idx++) {
866  buffer_size += align_to_int64(
867  std::max(groupColWidth(group_idx), static_cast<int8_t>(sizeof(int64_t))) *
868  getEntryCount());
869  }
870  return buffer_size;
871 }
872 
873 size_t QueryMemoryDescriptor::getColOffInBytesInNextBin(const size_t col_idx) const {
874  auto warp_count = getWarpCount();
875  if (output_columnar_) {
876  CHECK_EQ(size_t(1), group_col_widths_.size());
877  CHECK_EQ(size_t(1), warp_count);
878  return getPaddedSlotWidthBytes(col_idx);
879  }
880 
881  return warp_count * getRowSize();
882 }
883 
884 size_t QueryMemoryDescriptor::getNextColOffInBytes(const int8_t* col_ptr,
885  const size_t bin,
886  const size_t col_idx) const {
888  size_t offset{0};
889  auto warp_count = getWarpCount();
890  const auto chosen_bytes = getPaddedSlotWidthBytes(col_idx);
891  const auto total_slot_count = getSlotCount();
892  if (col_idx + 1 == total_slot_count) {
893  if (output_columnar_) {
894  return (entry_count_ - bin) * chosen_bytes;
895  } else {
896  return static_cast<size_t>(align_to_int64(col_ptr + chosen_bytes) - col_ptr);
897  }
898  }
899 
900  const auto next_chosen_bytes = getPaddedSlotWidthBytes(col_idx + 1);
901  if (output_columnar_) {
902  CHECK_EQ(size_t(1), group_col_widths_.size());
903  CHECK_EQ(size_t(1), warp_count);
904 
905  offset = align_to_int64(entry_count_ * chosen_bytes);
906 
907  offset += bin * (next_chosen_bytes - chosen_bytes);
908  return offset;
909  }
910 
911  if (next_chosen_bytes == sizeof(int64_t)) {
912  return static_cast<size_t>(align_to_int64(col_ptr + chosen_bytes) - col_ptr);
913  } else {
914  return chosen_bytes;
915  }
916 }
917 
919  const size_t col_idx) const {
920  const auto chosen_bytes = getPaddedSlotWidthBytes(col_idx);
921  const auto total_slot_count = getSlotCount();
922  if (col_idx + 1 == total_slot_count) {
923  return static_cast<size_t>(align_to_int64(col_ptr + chosen_bytes) - col_ptr);
924  }
925 
926  const auto next_chosen_bytes = getPaddedSlotWidthBytes(col_idx + 1);
927 
928  if (next_chosen_bytes == sizeof(int64_t)) {
929  return static_cast<size_t>(align_to_int64(col_ptr + chosen_bytes) - col_ptr);
930  } else {
931  return chosen_bytes;
932  }
933 }
934 
936  const RelAlgExecutionUnit& ra_exe_unit,
937  const unsigned thread_count,
938  const ExecutorDeviceType device_type) const {
939  if (use_streaming_top_n_) {
940  const size_t n = ra_exe_unit.sort_info.offset + ra_exe_unit.sort_info.limit;
941  return streaming_top_n::get_heap_size(getRowSize(), n, thread_count);
942  }
943  return getBufferSizeBytes(device_type, entry_count_);
944 }
945 
958  const size_t entry_count) const {
960  CHECK_GE(group_col_widths_.size(), size_t(1));
961  auto row_bytes = align_to_int64(getColsSize());
962 
963  return (interleavedBins(device_type) ? executor_->warpSize() : 1) * entry_count *
964  row_bytes;
965  }
966 
967  constexpr size_t row_index_width = sizeof(int64_t);
968  size_t total_bytes{0};
969  if (output_columnar_) {
971  ? row_index_width * entry_count
972  : sizeof(int64_t) * group_col_widths_.size() * entry_count) +
974  } else {
975  total_bytes = getRowSize() * entry_count;
976  }
977 
978  return total_bytes;
979 }
980 
982  const ExecutorDeviceType device_type) const {
983  return getBufferSizeBytes(device_type, entry_count_);
984 }
985 
987  output_columnar_ = val;
990  }
991 }
992 
993 /*
994  * Indicates the query types that are currently allowed to use the logical
995  * sized columns instead of padded sized ones.
996  */
998  // In distributed mode, result sets are serialized using rowwise iterators, so we use
999  // consistent slot widths for now
1000  return output_columnar_ && !g_cluster &&
1002 }
1003 
1005  size_t total_slot_count = col_slot_context_.getSlotCount();
1006 
1007  if (target_groupby_indices_.empty()) {
1008  return total_slot_count;
1009  }
1010  return total_slot_count - std::count_if(target_groupby_indices_.begin(),
1012  [](const int64_t i) { return i >= 0; });
1013 }
1014 
1017  getGroupbyColCount() == 1);
1018 }
1019 
1022 }
1023 
1025  if (g_cluster || is_table_function_) {
1026  return true;
1027  }
1029  return true;
1030  }
1031  if (executor_->isCPUOnly() || render_output_ ||
1035  getGroupbyColCount() > 1)) {
1036  return true;
1037  }
1040 }
1041 
1043  return device_type == ExecutorDeviceType::GPU && !render_output_ &&
1045 }
1046 
1048  return interleaved_bins_on_gpu_ && device_type == ExecutorDeviceType::GPU;
1049 }
1050 
1051 // TODO(Saman): an implementation detail, so move this out of QMD
1053  const ExecutorDeviceType device_type) const {
1054  if (device_type != ExecutorDeviceType::GPU) {
1055  return false;
1056  } else {
1057  auto cuda_mgr = executor_->getCatalog()->getDataMgr().getCudaMgr();
1058  CHECK(cuda_mgr);
1059  return cuda_mgr->isArchVoltaOrGreaterForAll();
1060  }
1061 }
1062 
1064  return col_slot_context_.getColCount();
1065 }
1066 
1069 }
1070 
1071 const int8_t QueryMemoryDescriptor::getPaddedSlotWidthBytes(const size_t slot_idx) const {
1072  return col_slot_context_.getSlotInfo(slot_idx).padded_size;
1073 }
1074 
1076  const size_t slot_idx) const {
1077  return col_slot_context_.getSlotInfo(slot_idx).logical_size;
1078 }
1079 
1081  const size_t col_idx) const {
1082  const auto& col_slots = col_slot_context_.getSlotsForCol(col_idx);
1083  CHECK_EQ(col_slots.size(), size_t(1));
1084  return col_slots.front();
1085 }
1086 
1087 void QueryMemoryDescriptor::useConsistentSlotWidthSize(const int8_t slot_width_size) {
1088  col_slot_context_.setAllSlotsSize(slot_width_size);
1089 }
1090 
1092  // Note: Actual row size may include padding (see ResultSetBufferAccessors.h)
1094 }
1095 
1097  const int8_t actual_min_byte_width) const {
1098  return col_slot_context_.getMinPaddedByteSize(actual_min_byte_width);
1099 }
1100 
1102  const std::vector<std::tuple<int8_t, int8_t>>& slots_for_col) {
1103  col_slot_context_.addColumn(slots_for_col);
1104 }
1105 
1108 }
1109 
1112 }
1113 
1118 }
1119 
1121  switch (query_desc_type_) {
1123  return "Perfect Hash";
1125  return "Baseline Hash";
1127  return "Projection";
1129  return "Non-grouped Aggregate";
1131  return "Estimator";
1132  default:
1133  UNREACHABLE();
1134  }
1135  return "";
1136 }
1137 
1138 std::string QueryMemoryDescriptor::toString() const {
1139  auto str = reductionKey();
1140  str += "\tAllow Multifrag: " + ::toString(allow_multifrag_) + "\n";
1141  str += "\tInterleaved Bins on GPU: " + ::toString(interleaved_bins_on_gpu_) + "\n";
1142  str += "\tBlocks Share Memory: " + ::toString(blocksShareMemory()) + "\n";
1143  str += "\tThreads Share Memory: " + ::toString(threadsShareMemory()) + "\n";
1144  str += "\tUses Fast Group Values: " + ::toString(usesGetGroupValueFast()) + "\n";
1145  str +=
1146  "\tLazy Init Groups (GPU): " + ::toString(lazyInitGroups(ExecutorDeviceType::GPU)) +
1147  "\n";
1148  str += "\tEntry Count: " + std::to_string(entry_count_) + "\n";
1149  str += "\tMin Val (perfect hash only): " + std::to_string(min_val_) + "\n";
1150  str += "\tMax Val (perfect hash only): " + std::to_string(max_val_) + "\n";
1151  str += "\tBucket Val (perfect hash only): " + std::to_string(bucket_) + "\n";
1152  str += "\tSort on GPU: " + ::toString(sort_on_gpu_) + "\n";
1153  str += "\tUse Streaming Top N: " + ::toString(use_streaming_top_n_) + "\n";
1154  str += "\tOutput Columnar: " + ::toString(output_columnar_) + "\n";
1155  str += "\tRender Output: " + ::toString(render_output_) + "\n";
1156  str += "\tUse Baseline Sort: " + ::toString(must_use_baseline_sort_) + "\n";
1157  return str;
1158 }
1159 
1161  std::string str;
1162  str += "Query Memory Descriptor State\n";
1163  str += "\tQuery Type: " + queryDescTypeToString() + "\n";
1164  str +=
1165  "\tKeyless Hash: " + ::toString(keyless_hash_) +
1166  (keyless_hash_ ? ", target index for key: " + std::to_string(getTargetIdxForKey())
1167  : "") +
1168  "\n";
1169  str += "\tEffective key width: " + std::to_string(getEffectiveKeyWidth()) + "\n";
1170  str += "\tNumber of group columns: " + std::to_string(getGroupbyColCount()) + "\n";
1171  const auto group_indices_size = targetGroupbyIndicesSize();
1172  if (group_indices_size) {
1173  std::vector<std::string> group_indices_strings;
1174  for (size_t target_idx = 0; target_idx < group_indices_size; ++target_idx) {
1175  group_indices_strings.push_back(std::to_string(getTargetGroupbyIndex(target_idx)));
1176  }
1177  str += "\tTarget group by indices: " +
1178  boost::algorithm::join(group_indices_strings, ",") + "\n";
1179  }
1180  str += "\t" + col_slot_context_.toString();
1181  return str;
1182 }
1183 
1184 std::vector<TargetInfo> target_exprs_to_infos(
1185  const std::vector<Analyzer::Expr*>& targets,
1187  std::vector<TargetInfo> target_infos;
1188  for (const auto target_expr : targets) {
1189  auto target = get_target_info(target_expr, g_bigint_count);
1190  if (query_mem_desc.getQueryDescriptionType() ==
1192  set_notnull(target, false);
1193  target.sql_type.set_notnull(false);
1194  }
1195  target_infos.push_back(target);
1196  }
1197  return target_infos;
1198 }
int8_t getMinPaddedByteSize(const int8_t actual_min_byte_width) const
std::vector< Analyzer::Expr * > target_exprs
static bool many_entries(const int64_t max_val, const int64_t min_val, const int64_t bucket)
SQLAgg
Definition: sqldefs.h:71
#define CHECK_EQ(x, y)
Definition: Logger.h:205
size_t getBufferSizeBytes(const RelAlgExecutionUnit &ra_exe_unit, const unsigned thread_count, const ExecutorDeviceType device_type) const
bool g_enable_smem_group_by
static int64_t getBucketedCardinality(const ColRangeInfo &col_range_info)
std::vector< int64_t > target_expr_proj_indices(const RelAlgExecutionUnit &ra_exe_unit, const Catalog_Namespace::Catalog &cat)
void alignPaddedSlots(const bool sort_on_gpu)
std::string cat(Ts &&...args)
int8_t logical_size
class for a per-database catalog. also includes metadata for the current database and the current use...
Definition: Catalog.h:101
size_t getTotalBytesOfColumnarProjections(const size_t projection_count) const
int64_t getTargetGroupbyIndex(const size_t target_idx) const
ExecutorDeviceType
std::string toString() const
bool isLogicalSizedColumnsAllowed() const
#define const
TargetInfo get_target_info(const PointerType target_expr, const bool bigint_count)
Definition: TargetInfo.h:79
int8_t pick_baseline_key_component_width(const ExpressionRange &range, const size_t group_col_width)
const std::list< Analyzer::OrderEntry > order_entries
std::string join(T const &container, std::string const &delim)
std::vector< InputDescriptor > input_descs
static std::unique_ptr< QueryMemoryDescriptor > init(const Executor *executor, const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, const ColRangeInfo &col_range_info, const KeylessInfo &keyless_info, const bool allow_multifrag, const ExecutorDeviceType device_type, const int8_t crt_min_byte_width, const bool sort_on_gpu_hint, const size_t shard_count, const size_t max_groups_buffer_entry_count, RenderInfo *render_info, const CountDistinctDescriptors count_distinct_descriptors, const bool must_use_baseline_sort, const bool output_columnar_hint, const bool streaming_top_n_hint)
#define UNREACHABLE()
Definition: Logger.h:241
void setOutputColumnar(const bool val)
const SortAlgorithm algorithm
#define CHECK_GE(x, y)
Definition: Logger.h:210
size_t getAllSlotsPaddedSize() const
size_t getAllSlotsAlignedPaddedSize() const
size_t getNextColOffInBytes(const int8_t *col_ptr, const size_t bin, const size_t col_idx) const
size_t getEffectiveKeyWidth() const
bool use_streaming_top_n(const RelAlgExecutionUnit &ra_exe_unit, const bool output_columnar)
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
T visit(const Analyzer::Expr *expr) const
#define CHECK_GT(x, y)
Definition: Logger.h:209
void setAllSlotsSize(const int8_t slot_width_size)
std::string to_string(char const *&&v)
void useConsistentSlotWidthSize(const int8_t slot_width_size)
const SlotSize & getSlotInfo(const size_t slot_idx) const
size_t getColOnlyOffInBytes(const size_t col_idx) const
ExecutorDispatchMode
size_t getColOnlyOffInBytes(const size_t slot_idx) const
const size_t limit
bool g_enable_columnar_output
Definition: Execute.cpp:93
int8_t groupColWidth(const size_t key_idx) const
size_t get_bit_width(const SQLTypeInfo &ti)
const ColumnDescriptor * get_column_descriptor_maybe(const int col_id, const int table_id, const Catalog_Namespace::Catalog &cat)
Definition: Execute.h:222
std::vector< CountDistinctDescriptor > CountDistinctDescriptors
Definition: CountDistinct.h:35
const SortInfo sort_info
size_t getCompactByteWidth() const
Provides column info and slot info for the output buffer and some metadata helpers.
size_t getGroupbyColCount() const
bool is_integer() const
Definition: sqltypes.h:490
bool lazyInitGroups(const ExecutorDeviceType) const
size_t targetGroupbyIndicesSize() const
size_t getPrependedGroupBufferSizeInBytes() const
std::unique_ptr< QueryExecutionContext > getQueryExecutionContext(const RelAlgExecutionUnit &, const Executor *executor, const ExecutorDeviceType device_type, const ExecutorDispatchMode dispatch_mode, const int device_id, const int64_t num_rows, const std::vector< std::vector< const int8_t * >> &col_buffers, const std::vector< std::vector< uint64_t >> &frag_offsets, std::shared_ptr< RowSetMemoryOwner >, const bool output_columnar, const bool sort_on_gpu, const size_t thread_idx, RenderInfo *) const
size_t getTotalBytesOfColumnarBuffers() const
std::vector< int64_t > target_groupby_indices_
static int8_t pick_target_compact_width(const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, const int8_t crt_min_byte_width)
bool g_bigint_count
CountDistinctDescriptors count_distinct_descriptors_
void validate() const
int get_varno() const
Definition: Analyzer.h:275
const int8_t getPaddedSlotWidthBytes(const size_t slot_idx) const
ExpressionRange getExpressionRange(const Analyzer::BinOper *expr, const std::vector< InputTableInfo > &query_infos, const Executor *, boost::optional< std::list< std::shared_ptr< Analyzer::Expr >>> simple_quals)
bool hasNulls() const
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:78
QueryDescriptionType getQueryDescriptionType() const
std::vector< int64_t > target_expr_group_by_indices(const std::list< std::shared_ptr< Analyzer::Expr >> &groupby_exprs, const std::vector< Analyzer::Expr * > &target_exprs)
bool anyOf(std::vector< Analyzer::Expr * > const &target_exprs, SQLAgg const agg_kind)
void addColumn(const std::vector< std::tuple< int8_t, int8_t >> &slots_for_col)
std::vector< int8_t > get_col_byte_widths(const T &col_expr_list)
#define CHECK_LE(x, y)
Definition: Logger.h:208
size_t getNextColOffInBytesRowOnly(const int8_t *col_ptr, const size_t col_idx) const
const Expr * get_operand() const
Definition: Analyzer.h:371
QueryDescriptionType query_desc_type_
int8_t padded_size
Definition: sqldefs.h:76
int8_t updateActualMinByteWidth(const int8_t actual_min_byte_width) const
size_t getTotalBytesOfColumnarBuffers(const size_t entry_count) const
bool operator==(const QueryMemoryDescriptor &other) const
Descriptor for the result set buffer layout.
bool is_int_and_no_bigger_than(const SQLTypeInfo &ti, const size_t byte_width)
std::list< std::shared_ptr< Analyzer::Expr > > quals
ExpressionRangeType getType() const
size_t get_heap_size(const size_t row_size, const size_t n, const size_t thread_count)
int64_t getIntMax() const
bool isWarpSyncRequired(const ExecutorDeviceType) const
std::string toString() const
size_t getSlotCount() const
void setAllSlotsPaddedSizeToLogicalSize()
bool interleavedBins(const ExecutorDeviceType) const
bool g_enable_watchdog false
Definition: Execute.cpp:76
#define CHECK(condition)
Definition: Logger.h:197
#define DEBUG_TIMER(name)
Definition: Logger.h:313
size_t getColCount() const
std::vector< int8_t > group_col_widths_
#define EMPTY_KEY_32
QueryDescriptionType
Definition: Types.h:26
bool g_cluster
std::vector< TargetInfo > target_exprs_to_infos(const std::vector< Analyzer::Expr * > &targets, const QueryMemoryDescriptor &query_mem_desc)
const std::vector< size_t > & getSlotsForCol(const size_t col_idx) const
std::string queryDescTypeToString() const
void sort_on_gpu(int64_t *val_buff, int32_t *idx_buff, const uint64_t entry_count, const bool desc, const uint32_t chosen_bytes, ThrustAllocator &alloc)
std::list< std::shared_ptr< const InputColDescriptor > > input_col_descs
void addColSlotInfo(const std::vector< std::tuple< int8_t, int8_t >> &slots_for_col)
const size_t offset
static bool countDescriptorsLogicallyEmpty(const CountDistinctDescriptors &count_distinct_descriptors)
void setAllUnsetSlotsPaddedSize(const int8_t padded_size)
const int8_t getSlotIndexForSingleSlotCol(const size_t col_idx) const
const int8_t getLogicalSlotWidthBytes(const size_t slot_idx) const
size_t getColOffInBytes(const size_t col_idx) const
size_t getColOffInBytesInNextBin(const size_t col_idx) const
if(yyssp >=yyss+yystacksize-1)
FORCE_INLINE HOST DEVICE T align_to_int64(T addr)
int8_t pick_baseline_key_width(const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, const Executor *executor)
std::string reductionKey() const
std::list< std::shared_ptr< Analyzer::Expr > > simple_quals
void set_notnull(TargetInfo &target, const bool not_null)
int32_t getTargetIdxForKey() const
size_t getPrependedGroupColOffInBytes(const size_t group_idx) const