OmniSciDB  467d548b97
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
GroupByAndAggregate.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2017 MapD Technologies, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "GroupByAndAggregate.h"
18 #include "AggregateUtils.h"
19 
20 #include "CardinalityEstimator.h"
21 #include "CodeGenerator.h"
23 #include "ExpressionRange.h"
24 #include "ExpressionRewrite.h"
25 #include "GpuInitGroups.h"
26 #include "InPlaceSort.h"
28 #include "MaxwellCodegenPatch.h"
30 #include "TargetExprBuilder.h"
31 
32 #include "../CudaMgr/CudaMgr.h"
33 #include "../Shared/checked_alloc.h"
34 #include "../Utils/ChunkIter.h"
36 #include "Execute.h"
37 #include "QueryTemplateGenerator.h"
38 #include "RuntimeFunctions.h"
39 #include "StreamingTopN.h"
40 #include "TopKSort.h"
41 #include "WindowContext.h"
42 
43 #include <llvm/Transforms/Utils/BasicBlockUtils.h>
44 
45 #include <numeric>
46 #include <thread>
47 
48 bool g_cluster{false};
49 bool g_bigint_count{false};
51 extern size_t g_leaf_count;
52 
53 namespace {
54 
55 int32_t get_agg_count(const std::vector<Analyzer::Expr*>& target_exprs) {
56  int32_t agg_count{0};
57  for (auto target_expr : target_exprs) {
58  CHECK(target_expr);
59  const auto agg_expr = dynamic_cast<Analyzer::AggExpr*>(target_expr);
60  if (!agg_expr || agg_expr->get_aggtype() == kSAMPLE) {
61  const auto& ti = target_expr->get_type_info();
62  // TODO(pavan): or if is_geometry()
63  if (ti.is_array() || (ti.is_string() && ti.get_compression() == kENCODING_NONE)) {
64  agg_count += 2;
65  } else if (ti.is_geometry()) {
66  agg_count += ti.get_physical_coord_cols() * 2;
67  } else {
68  ++agg_count;
69  }
70  continue;
71  }
72  if (agg_expr && agg_expr->get_aggtype() == kAVG) {
73  agg_count += 2;
74  } else {
75  ++agg_count;
76  }
77  }
78  return agg_count;
79 }
80 
82  const auto col = dynamic_cast<const Analyzer::ColumnVar*>(expr);
83  if (!col) {
84  return false;
85  }
86  const auto cd =
87  get_column_descriptor_maybe(col->get_column_id(), col->get_table_id(), cat);
88  if (!cd || !cd->isVirtualCol) {
89  return false;
90  }
91  CHECK_EQ("rowid", cd->columnName);
92  return true;
93 }
94 
95 bool has_count_distinct(const RelAlgExecutionUnit& ra_exe_unit) {
96  for (const auto& target_expr : ra_exe_unit.target_exprs) {
97  const auto agg_info = get_target_info(target_expr, g_bigint_count);
98  if (agg_info.is_agg && is_distinct_target(agg_info)) {
99  return true;
100  }
101  }
102  return false;
103 }
104 
106  const int64_t max_entry_count) {
107  try {
108  return static_cast<int64_t>(checked_int64_t(col_range_info.max) -
109  checked_int64_t(col_range_info.min)) >= max_entry_count;
110  } catch (...) {
111  return true;
112  }
113 }
114 
115 bool cardinality_estimate_less_than_column_range(const int64_t cardinality_estimate,
116  const ColRangeInfo& col_range_info) {
117  try {
118  // the cardinality estimate is the size of the baseline hash table. further penalize
119  // the baseline hash table by a factor of 2x due to overhead in computing baseline
120  // hash. This has the overall effect of penalizing baseline hash over perfect hash by
121  // 4x; i.e. if the cardinality of the filtered data is less than 25% of the entry
122  // count of the column, we use baseline hash on the filtered set
123  return checked_int64_t(cardinality_estimate) * 2 <
124  static_cast<int64_t>(checked_int64_t(col_range_info.max) -
125  checked_int64_t(col_range_info.min));
126  } catch (...) {
127  return false;
128  }
129 }
130 
131 } // namespace
132 
134  // Use baseline layout more eagerly on the GPU if the query uses count distinct,
135  // because our HyperLogLog implementation is 4x less memory efficient on GPU.
136  // Technically, this only applies to APPROX_COUNT_DISTINCT, but in practice we
137  // can expect this to be true anyway for grouped queries since the precise version
138  // uses significantly more memory.
139  const int64_t baseline_threshold =
144  if (ra_exe_unit_.groupby_exprs.size() != 1) {
145  try {
146  checked_int64_t cardinality{1};
147  bool has_nulls{false};
148  for (const auto& groupby_expr : ra_exe_unit_.groupby_exprs) {
149  auto col_range_info = getExprRangeInfo(groupby_expr.get());
150  if (col_range_info.hash_type_ != QueryDescriptionType::GroupByPerfectHash) {
151  // going through baseline hash if a non-integer type is encountered
152  return {QueryDescriptionType::GroupByBaselineHash, 0, 0, 0, false};
153  }
154  auto crt_col_cardinality = getBucketedCardinality(col_range_info);
155  CHECK_GE(crt_col_cardinality, 0);
156  cardinality *= crt_col_cardinality;
157  if (col_range_info.has_nulls) {
158  has_nulls = true;
159  }
160  }
161  // For zero or high cardinalities, use baseline layout.
162  if (!cardinality || cardinality > baseline_threshold) {
163  return {QueryDescriptionType::GroupByBaselineHash, 0, 0, 0, false};
164  }
166  0,
167  int64_t(cardinality),
168  0,
169  has_nulls};
170  } catch (...) { // overflow when computing cardinality
171  return {QueryDescriptionType::GroupByBaselineHash, 0, 0, 0, false};
172  }
173  }
174  // For single column groupby on high timestamps, force baseline hash due to wide ranges
175  // we are likely to encounter when applying quals to the expression range
176  // TODO: consider allowing TIMESTAMP(9) (nanoseconds) with quals to use perfect hash if
177  // the range is small enough
178  if (ra_exe_unit_.groupby_exprs.front() &&
179  ra_exe_unit_.groupby_exprs.front()->get_type_info().is_high_precision_timestamp() &&
180  ra_exe_unit_.simple_quals.size() > 0) {
181  return {QueryDescriptionType::GroupByBaselineHash, 0, 0, 0, false};
182  }
183  const auto col_range_info = getExprRangeInfo(ra_exe_unit_.groupby_exprs.front().get());
184  if (!ra_exe_unit_.groupby_exprs.front()) {
185  return col_range_info;
186  }
187  static const int64_t MAX_BUFFER_SIZE = 1 << 30;
188  const int64_t col_count =
190  int64_t max_entry_count = MAX_BUFFER_SIZE / (col_count * sizeof(int64_t));
192  max_entry_count = std::min(max_entry_count, baseline_threshold);
193  }
194  const auto& groupby_expr_ti = ra_exe_unit_.groupby_exprs.front()->get_type_info();
195  if (groupby_expr_ti.is_string() && !col_range_info.bucket) {
196  CHECK(groupby_expr_ti.get_compression() == kENCODING_DICT);
197 
198  const bool has_filters =
199  !ra_exe_unit_.quals.empty() || !ra_exe_unit_.simple_quals.empty();
200  if (has_filters &&
201  is_column_range_too_big_for_perfect_hash(col_range_info, max_entry_count)) {
202  // if filters are present, we can use the filter to narrow the cardinality of the
203  // group by in the case of ranges too big for perfect hash. Otherwise, we are better
204  // off attempting perfect hash (since we know the range will be made of
205  // monotonically increasing numbers from min to max for dictionary encoded strings)
206  // and failing later due to excessive memory use.
207  // Check the conditions where baseline hash can provide a performance increase and
208  // return baseline hash (potentially forcing an estimator query) as the range type.
209  // Otherwise, return col_range_info which will likely be perfect hash, though could
210  // be baseline from a previous call of this function prior to the estimator query.
211  if (!ra_exe_unit_.sort_info.order_entries.empty()) {
212  // TODO(adb): allow some sorts to pass through this block by centralizing sort
213  // algorithm decision making
215  is_column_range_too_big_for_perfect_hash(col_range_info, max_entry_count)) {
216  // always use baseline hash for column range too big for perfect hash with count
217  // distinct descriptors. We will need 8GB of CPU memory minimum for the perfect
218  // hash group by in this case.
220  col_range_info.min,
221  col_range_info.max,
222  0,
223  col_range_info.has_nulls};
224  } else {
225  // use original col range for sort
226  return col_range_info;
227  }
228  }
229  // if filters are present and the filtered range is less than the cardinality of
230  // the column, consider baseline hash
233  col_range_info)) {
235  col_range_info.min,
236  col_range_info.max,
237  0,
238  col_range_info.has_nulls};
239  }
240  }
241  } else if ((!expr_is_rowid(ra_exe_unit_.groupby_exprs.front().get(),
242  *executor_->catalog_)) &&
243  is_column_range_too_big_for_perfect_hash(col_range_info, max_entry_count) &&
244  !col_range_info.bucket) {
246  col_range_info.min,
247  col_range_info.max,
248  0,
249  col_range_info.has_nulls};
250  }
251  return col_range_info;
252 }
253 
255  if (!expr) {
256  return {QueryDescriptionType::Projection, 0, 0, 0, false};
257  }
258 
259  const auto expr_range = getExpressionRange(
260  expr, query_infos_, executor_, boost::make_optional(ra_exe_unit_.simple_quals));
261  switch (expr_range.getType()) {
263  if (expr_range.getIntMin() > expr_range.getIntMax()) {
264  return {
265  QueryDescriptionType::GroupByBaselineHash, 0, -1, 0, expr_range.hasNulls()};
266  }
268  expr_range.getIntMin(),
269  expr_range.getIntMax(),
270  expr_range.getBucket(),
271  expr_range.hasNulls()};
272  }
275  if (expr_range.getFpMin() > expr_range.getFpMax()) {
276  return {
277  QueryDescriptionType::GroupByBaselineHash, 0, -1, 0, expr_range.hasNulls()};
278  }
279  return {QueryDescriptionType::GroupByBaselineHash, 0, 0, 0, false};
280  }
282  return {QueryDescriptionType::GroupByBaselineHash, 0, 0, 0, false};
283  default:
284  CHECK(false);
285  }
286  CHECK(false);
287  return {QueryDescriptionType::NonGroupedAggregate, 0, 0, 0, false};
288 }
289 
291  checked_int64_t crt_col_cardinality =
292  checked_int64_t(col_range_info.max) - checked_int64_t(col_range_info.min);
293  if (col_range_info.bucket) {
294  crt_col_cardinality /= col_range_info.bucket;
295  }
296  return static_cast<int64_t>(crt_col_cardinality +
297  (1 + (col_range_info.has_nulls ? 1 : 0)));
298 }
299 
300 #define LL_CONTEXT executor_->cgen_state_->context_
301 #define LL_BUILDER executor_->cgen_state_->ir_builder_
302 #define LL_BOOL(v) executor_->cgen_state_->llBool(v)
303 #define LL_INT(v) executor_->cgen_state_->llInt(v)
304 #define LL_FP(v) executor_->cgen_state_->llFp(v)
305 #define ROW_FUNC executor_->cgen_state_->row_func_
306 #define CUR_FUNC executor_->cgen_state_->current_func_
307 
309  Executor* executor,
310  const ExecutorDeviceType device_type,
311  const RelAlgExecutionUnit& ra_exe_unit,
312  const std::vector<InputTableInfo>& query_infos,
313  std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner,
314  const std::optional<int64_t>& group_cardinality_estimation)
315  : executor_(executor)
316  , ra_exe_unit_(ra_exe_unit)
317  , query_infos_(query_infos)
318  , row_set_mem_owner_(row_set_mem_owner)
319  , device_type_(device_type)
320  , group_cardinality_estimation_(group_cardinality_estimation) {
321  for (const auto& groupby_expr : ra_exe_unit_.groupby_exprs) {
322  if (!groupby_expr) {
323  continue;
324  }
325  const auto& groupby_ti = groupby_expr->get_type_info();
326  if (groupby_ti.is_string() && groupby_ti.get_compression() != kENCODING_DICT) {
327  throw std::runtime_error(
328  "Cannot group by string columns which are not dictionary encoded.");
329  }
330  if (groupby_ti.is_array()) {
331  throw std::runtime_error("Group by array not supported");
332  }
333  if (groupby_ti.is_geometry()) {
334  throw std::runtime_error("Group by geometry not supported");
335  }
336  }
337 }
338 
340  const size_t shard_count) const {
341  size_t device_count{0};
343  auto cuda_mgr = executor_->getCatalog()->getDataMgr().getCudaMgr();
344  CHECK(cuda_mgr);
345  device_count = executor_->getCatalog()->getDataMgr().getCudaMgr()->getDeviceCount();
346  CHECK_GT(device_count, 0u);
347  }
348 
349  int64_t bucket{col_range_info.bucket};
350 
351  if (shard_count) {
352  CHECK(!col_range_info.bucket);
353  /*
354  when a node has fewer devices than shard count,
355  a) In a distributed setup, the minimum distance between two keys would be
356  device_count because shards are stored consecutively across the physical tables,
357  i.e if a shard column has values 0 to 9, and 3 shards on each leaf, then node 1
358  would have values: 0,1,2,6,7,8 and node 2 would have values: 3,4,5,9. If each leaf
359  node has only 1 device, in this case, all the keys from each node are loaded on
360  the device each.
361 
362  b) In a single node setup, the distance would be minimum of device_count or
363  difference of device_count - shard_count. For example: If a single node server
364  running on 3 devices a shard column has values 0 to 9 in a table with 4 shards,
365  device to fragment keys mapping would be: device 1 - 4,8,3,7 device 2 - 1,5,9
366  device 3 - 2, 6 The bucket value would be 4(shards) - 3(devices) = 1 i.e. minimum
367  of device_count or difference.
368 
369  When a node has device count equal to or more than shard count then the
370  minimum distance is always at least shard_count * no of leaf nodes.
371  */
372  if (device_count < shard_count) {
373  bucket = g_leaf_count ? std::max(device_count, static_cast<size_t>(1))
374  : std::min(device_count, shard_count - device_count);
375  } else {
376  bucket = shard_count * std::max(g_leaf_count, static_cast<size_t>(1));
377  }
378  }
379 
380  return bucket;
381 }
382 
383 std::unique_ptr<QueryMemoryDescriptor> GroupByAndAggregate::initQueryMemoryDescriptor(
384  const bool allow_multifrag,
385  const size_t max_groups_buffer_entry_count,
386  const int8_t crt_min_byte_width,
387  RenderInfo* render_info,
388  const bool output_columnar_hint) {
389  const auto shard_count =
392  : 0;
393  bool sort_on_gpu_hint =
394  device_type_ == ExecutorDeviceType::GPU && allow_multifrag &&
397  // must_use_baseline_sort is true iff we'd sort on GPU with the old algorithm
398  // but the total output buffer size would be too big or it's a sharded top query.
399  // For the sake of managing risk, use the new result set way very selectively for
400  // this case only (alongside the baseline layout we've enabled for a while now).
401  bool must_use_baseline_sort = shard_count;
402  std::unique_ptr<QueryMemoryDescriptor> query_mem_desc;
403  while (true) {
404  query_mem_desc = initQueryMemoryDescriptorImpl(allow_multifrag,
405  max_groups_buffer_entry_count,
406  crt_min_byte_width,
407  sort_on_gpu_hint,
408  render_info,
409  must_use_baseline_sort,
410  output_columnar_hint);
411  CHECK(query_mem_desc);
412  if (query_mem_desc->sortOnGpu() &&
413  (query_mem_desc->getBufferSizeBytes(device_type_) +
414  align_to_int64(query_mem_desc->getEntryCount() * sizeof(int32_t))) >
415  2 * 1024 * 1024 * 1024L) {
416  must_use_baseline_sort = true;
417  sort_on_gpu_hint = false;
418  } else {
419  break;
420  }
421  }
422  return query_mem_desc;
423 }
424 
425 std::unique_ptr<QueryMemoryDescriptor> GroupByAndAggregate::initQueryMemoryDescriptorImpl(
426  const bool allow_multifrag,
427  const size_t max_groups_buffer_entry_count,
428  const int8_t crt_min_byte_width,
429  const bool sort_on_gpu_hint,
430  RenderInfo* render_info,
431  const bool must_use_baseline_sort,
432  const bool output_columnar_hint) {
434 
435  const auto count_distinct_descriptors = initCountDistinctDescriptors();
436 
437  auto group_col_widths = get_col_byte_widths(ra_exe_unit_.groupby_exprs, {});
438 
439  const bool is_group_by{!ra_exe_unit_.groupby_exprs.empty()};
440 
441  auto col_range_info_nosharding = getColRangeInfo();
442 
443  const auto shard_count =
444  device_type_ == ExecutorDeviceType::GPU
445  ? shard_count_for_top_groups(ra_exe_unit_, *executor_->getCatalog())
446  : 0;
447 
448  const auto col_range_info =
449  ColRangeInfo{col_range_info_nosharding.hash_type_,
450  col_range_info_nosharding.min,
451  col_range_info_nosharding.max,
452  getShardedTopBucket(col_range_info_nosharding, shard_count),
453  col_range_info_nosharding.has_nulls};
454 
455  // Non-grouped aggregates do not support accessing aggregated ranges
456  // Keyless hash is currently only supported with single-column perfect hash
457  const auto keyless_info = !(is_group_by && col_range_info.hash_type_ ==
459  ? KeylessInfo{false, -1}
460  : getKeylessInfo(ra_exe_unit_.target_exprs, is_group_by);
461 
462  if (g_enable_watchdog &&
463  ((col_range_info.hash_type_ == QueryDescriptionType::GroupByBaselineHash &&
464  max_groups_buffer_entry_count > 120000000) ||
465  (col_range_info.hash_type_ == QueryDescriptionType::GroupByPerfectHash &&
466  ra_exe_unit_.groupby_exprs.size() == 1 &&
467  (col_range_info.max - col_range_info.min) /
468  std::max(col_range_info.bucket, int64_t(1)) >
469  130000000))) {
470  throw WatchdogException("Query would use too much memory");
471  }
472  try {
473  return QueryMemoryDescriptor::init(executor_,
474  ra_exe_unit_,
475  query_infos_,
476  col_range_info,
477  keyless_info,
478  allow_multifrag,
479  device_type_,
480  crt_min_byte_width,
481  sort_on_gpu_hint,
482  shard_count,
483  max_groups_buffer_entry_count,
484  render_info,
485  count_distinct_descriptors,
486  must_use_baseline_sort,
487  output_columnar_hint,
488  /*streaming_top_n_hint=*/true);
489  } catch (const StreamingTopNOOM& e) {
490  LOG(WARNING) << e.what() << " Disabling Streaming Top N.";
491  return QueryMemoryDescriptor::init(executor_,
492  ra_exe_unit_,
493  query_infos_,
494  col_range_info,
495  keyless_info,
496  allow_multifrag,
497  device_type_,
498  crt_min_byte_width,
499  sort_on_gpu_hint,
500  shard_count,
501  max_groups_buffer_entry_count,
502  render_info,
503  count_distinct_descriptors,
504  must_use_baseline_sort,
505  output_columnar_hint,
506  /*streaming_top_n_hint=*/false);
507  }
508 }
509 
512 }
513 
514 namespace {
515 
517  const Analyzer::Expr* expr,
518  Executor* executor,
519  std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner) {
520  if (!expr) {
521  return;
522  }
523 
524  const auto array_expr = dynamic_cast<const Analyzer::ArrayExpr*>(expr);
525  if (array_expr) {
526  for (size_t i = 0; i < array_expr->getElementCount(); i++) {
528  array_expr->getElement(i), executor, row_set_mem_owner);
529  }
530  return;
531  }
532 
533  const auto cast_expr = dynamic_cast<const Analyzer::UOper*>(expr);
534  const auto& expr_ti = expr->get_type_info();
535  if (cast_expr && cast_expr->get_optype() == kCAST && expr_ti.is_string()) {
536  CHECK_EQ(kENCODING_DICT, expr_ti.get_compression());
537  auto sdp = executor->getStringDictionaryProxy(
538  expr_ti.get_comp_param(), row_set_mem_owner, true);
539  CHECK(sdp);
540  const auto str_lit_expr =
541  dynamic_cast<const Analyzer::Constant*>(cast_expr->get_operand());
542  if (str_lit_expr && str_lit_expr->get_constval().stringval) {
543  sdp->getOrAddTransient(*str_lit_expr->get_constval().stringval);
544  }
545  return;
546  }
547  const auto case_expr = dynamic_cast<const Analyzer::CaseExpr*>(expr);
548  if (!case_expr) {
549  return;
550  }
551  Analyzer::DomainSet domain_set;
552  case_expr->get_domain(domain_set);
553  if (domain_set.empty()) {
554  return;
555  }
556  if (expr_ti.is_string()) {
557  CHECK_EQ(kENCODING_DICT, expr_ti.get_compression());
558  auto sdp = executor->getStringDictionaryProxy(
559  expr_ti.get_comp_param(), row_set_mem_owner, true);
560  CHECK(sdp);
561  for (const auto domain_expr : domain_set) {
562  const auto cast_expr = dynamic_cast<const Analyzer::UOper*>(domain_expr);
563  const auto str_lit_expr =
564  cast_expr && cast_expr->get_optype() == kCAST
565  ? dynamic_cast<const Analyzer::Constant*>(cast_expr->get_operand())
566  : dynamic_cast<const Analyzer::Constant*>(domain_expr);
567  if (str_lit_expr && str_lit_expr->get_constval().stringval) {
568  sdp->getOrAddTransient(*str_lit_expr->get_constval().stringval);
569  }
570  }
571  }
572 }
573 
574 } // namespace
575 
577  const RelAlgExecutionUnit& ra_exe_unit,
578  Executor* executor,
579  std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner) {
580  for (const auto& group_expr : ra_exe_unit.groupby_exprs) {
582  group_expr.get(), executor, row_set_mem_owner);
583  }
584  for (const auto target_expr : ra_exe_unit.target_exprs) {
585  const auto& target_type = target_expr->get_type_info();
586  if (target_type.is_string() && target_type.get_compression() != kENCODING_DICT) {
587  continue;
588  }
589  const auto agg_expr = dynamic_cast<const Analyzer::AggExpr*>(target_expr);
590  if (agg_expr) {
591  if (agg_expr->get_aggtype() == kSINGLE_VALUE ||
592  agg_expr->get_aggtype() == kSAMPLE) {
594  agg_expr->get_arg(), executor, row_set_mem_owner);
595  }
596  } else {
598  target_expr, executor, row_set_mem_owner);
599  }
600  }
601  row_set_mem_owner->addLiteralStringDictProxy(executor->lit_str_dict_proxy_);
602 }
603 
605  CountDistinctDescriptors count_distinct_descriptors;
606  for (const auto target_expr : ra_exe_unit_.target_exprs) {
607  auto agg_info = get_target_info(target_expr, g_bigint_count);
608  if (is_distinct_target(agg_info)) {
609  CHECK(agg_info.is_agg);
610  CHECK(agg_info.agg_kind == kCOUNT || agg_info.agg_kind == kAPPROX_COUNT_DISTINCT);
611  const auto agg_expr = static_cast<const Analyzer::AggExpr*>(target_expr);
612  const auto& arg_ti = agg_expr->get_arg()->get_type_info();
613  if (arg_ti.is_string() && arg_ti.get_compression() != kENCODING_DICT) {
614  throw std::runtime_error(
615  "Strings must be dictionary-encoded for COUNT(DISTINCT).");
616  }
617  if (agg_info.agg_kind == kAPPROX_COUNT_DISTINCT && arg_ti.is_array()) {
618  throw std::runtime_error("APPROX_COUNT_DISTINCT on arrays not supported yet");
619  }
620  if (agg_info.agg_kind == kAPPROX_COUNT_DISTINCT && arg_ti.is_geometry()) {
621  throw std::runtime_error(
622  "APPROX_COUNT_DISTINCT on geometry columns not supported");
623  }
624  if (agg_info.is_distinct && arg_ti.is_geometry()) {
625  throw std::runtime_error("COUNT DISTINCT on geometry columns not supported");
626  }
627  ColRangeInfo no_range_info{QueryDescriptionType::Projection, 0, 0, 0, false};
628  auto arg_range_info =
629  arg_ti.is_fp() ? no_range_info : getExprRangeInfo(agg_expr->get_arg());
630  CountDistinctImplType count_distinct_impl_type{CountDistinctImplType::StdSet};
631  int64_t bitmap_sz_bits{0};
632  if (agg_info.agg_kind == kAPPROX_COUNT_DISTINCT) {
633  const auto error_rate = agg_expr->get_error_rate();
634  if (error_rate) {
635  CHECK(error_rate->get_type_info().get_type() == kINT);
636  CHECK_GE(error_rate->get_constval().intval, 1);
637  bitmap_sz_bits = hll_size_for_rate(error_rate->get_constval().smallintval);
638  } else {
639  bitmap_sz_bits = g_hll_precision_bits;
640  }
641  }
642  if (arg_range_info.isEmpty()) {
643  count_distinct_descriptors.emplace_back(
645  0,
646  64,
647  agg_info.agg_kind == kAPPROX_COUNT_DISTINCT,
648  device_type_,
649  1});
650  continue;
651  }
652  if (arg_range_info.hash_type_ == QueryDescriptionType::GroupByPerfectHash &&
653  !(arg_ti.is_array() || arg_ti.is_geometry())) { // TODO(alex): allow bitmap
654  // implementation for arrays
655  count_distinct_impl_type = CountDistinctImplType::Bitmap;
656  if (agg_info.agg_kind == kCOUNT) {
657  bitmap_sz_bits = arg_range_info.max - arg_range_info.min + 1;
658  const int64_t MAX_BITMAP_BITS{8 * 1000 * 1000 * 1000L};
659  if (bitmap_sz_bits <= 0 || bitmap_sz_bits > MAX_BITMAP_BITS) {
660  count_distinct_impl_type = CountDistinctImplType::StdSet;
661  }
662  }
663  }
664  if (agg_info.agg_kind == kAPPROX_COUNT_DISTINCT &&
665  count_distinct_impl_type == CountDistinctImplType::StdSet &&
666  !(arg_ti.is_array() || arg_ti.is_geometry())) {
667  count_distinct_impl_type = CountDistinctImplType::Bitmap;
668  }
669 
670  if (g_enable_watchdog && !(arg_range_info.isEmpty()) &&
671  count_distinct_impl_type == CountDistinctImplType::StdSet) {
672  throw WatchdogException("Cannot use a fast path for COUNT distinct");
673  }
674  const auto sub_bitmap_count =
676  count_distinct_descriptors.emplace_back(
677  CountDistinctDescriptor{count_distinct_impl_type,
678  arg_range_info.min,
679  bitmap_sz_bits,
680  agg_info.agg_kind == kAPPROX_COUNT_DISTINCT,
681  device_type_,
682  sub_bitmap_count});
683  } else {
684  count_distinct_descriptors.emplace_back(CountDistinctDescriptor{
685  CountDistinctImplType::Invalid, 0, 0, false, device_type_, 0});
686  }
687  }
688  return count_distinct_descriptors;
689 }
690 
701  const std::vector<Analyzer::Expr*>& target_expr_list,
702  const bool is_group_by) const {
703  bool keyless{true}, found{false};
704  int32_t num_agg_expr{0};
705  int32_t index{0};
706  for (const auto target_expr : target_expr_list) {
707  const auto agg_info = get_target_info(target_expr, g_bigint_count);
708  const auto chosen_type = get_compact_type(agg_info);
709  if (agg_info.is_agg) {
710  num_agg_expr++;
711  }
712  if (!found && agg_info.is_agg && !is_distinct_target(agg_info)) {
713  auto agg_expr = dynamic_cast<const Analyzer::AggExpr*>(target_expr);
714  CHECK(agg_expr);
715  const auto arg_expr = agg_arg(target_expr);
716  const bool float_argument_input = takes_float_argument(agg_info);
717  switch (agg_info.agg_kind) {
718  case kAVG:
719  ++index;
720  if (arg_expr && !arg_expr->get_type_info().get_notnull()) {
721  auto expr_range_info = getExpressionRange(arg_expr, query_infos_, executor_);
722  if (expr_range_info.getType() == ExpressionRangeType::Invalid ||
723  expr_range_info.hasNulls()) {
724  break;
725  }
726  }
727  found = true;
728  break;
729  case kCOUNT:
730  if (arg_expr && !arg_expr->get_type_info().get_notnull()) {
731  auto expr_range_info = getExpressionRange(arg_expr, query_infos_, executor_);
732  if (expr_range_info.getType() == ExpressionRangeType::Invalid ||
733  expr_range_info.hasNulls()) {
734  break;
735  }
736  }
737  found = true;
738  break;
739  case kSUM: {
740  auto arg_ti = arg_expr->get_type_info();
741  if (constrained_not_null(arg_expr, ra_exe_unit_.quals)) {
742  arg_ti.set_notnull(true);
743  }
744  if (!arg_ti.get_notnull()) {
745  auto expr_range_info = getExpressionRange(arg_expr, query_infos_, executor_);
746  if (expr_range_info.getType() != ExpressionRangeType::Invalid &&
747  !expr_range_info.hasNulls()) {
748  found = true;
749  }
750  } else {
751  auto expr_range_info = getExpressionRange(arg_expr, query_infos_, executor_);
752  switch (expr_range_info.getType()) {
755  if (expr_range_info.getFpMax() < 0 || expr_range_info.getFpMin() > 0) {
756  found = true;
757  }
758  break;
760  if (expr_range_info.getIntMax() < 0 || expr_range_info.getIntMin() > 0) {
761  found = true;
762  }
763  break;
764  default:
765  break;
766  }
767  }
768  break;
769  }
770  case kMIN: {
771  CHECK(agg_expr && agg_expr->get_arg());
772  const auto& arg_ti = agg_expr->get_arg()->get_type_info();
773  if (arg_ti.is_string() || arg_ti.is_array()) {
774  break;
775  }
776  auto expr_range_info =
777  getExpressionRange(agg_expr->get_arg(), query_infos_, executor_);
778  auto init_max = get_agg_initial_val(agg_info.agg_kind,
779  chosen_type,
780  is_group_by || float_argument_input,
781  float_argument_input ? sizeof(float) : 8);
782  switch (expr_range_info.getType()) {
785  auto double_max =
786  *reinterpret_cast<const double*>(may_alias_ptr(&init_max));
787  if (expr_range_info.getFpMax() < double_max) {
788  found = true;
789  }
790  break;
791  }
793  if (expr_range_info.getIntMax() < init_max) {
794  found = true;
795  }
796  break;
797  default:
798  break;
799  }
800  break;
801  }
802  case kMAX: {
803  CHECK(agg_expr && agg_expr->get_arg());
804  const auto& arg_ti = agg_expr->get_arg()->get_type_info();
805  if (arg_ti.is_string() || arg_ti.is_array()) {
806  break;
807  }
808  auto expr_range_info =
809  getExpressionRange(agg_expr->get_arg(), query_infos_, executor_);
810  // NULL sentinel and init value for kMAX are identical, which results in
811  // ambiguity in detecting empty keys in presence of nulls.
812  if (expr_range_info.getType() == ExpressionRangeType::Invalid ||
813  expr_range_info.hasNulls()) {
814  break;
815  }
816  auto init_min = get_agg_initial_val(agg_info.agg_kind,
817  chosen_type,
818  is_group_by || float_argument_input,
819  float_argument_input ? sizeof(float) : 8);
820  switch (expr_range_info.getType()) {
823  auto double_min =
824  *reinterpret_cast<const double*>(may_alias_ptr(&init_min));
825  if (expr_range_info.getFpMin() > double_min) {
826  found = true;
827  }
828  break;
829  }
831  if (expr_range_info.getIntMin() > init_min) {
832  found = true;
833  }
834  break;
835  default:
836  break;
837  }
838  break;
839  }
840  default:
841  keyless = false;
842  break;
843  }
844  }
845  if (!keyless) {
846  break;
847  }
848  if (!found) {
849  ++index;
850  }
851  }
852 
853  // shouldn't use keyless for projection only
854  return {
855  keyless && found,
856  index,
857  };
858 }
859 
861  const std::list<Analyzer::OrderEntry>& order_entries) {
862  if (order_entries.size() > 1) { // TODO(alex): lift this restriction
863  return false;
864  }
865  for (const auto& order_entry : order_entries) {
866  CHECK_GE(order_entry.tle_no, 1);
867  CHECK_LE(static_cast<size_t>(order_entry.tle_no), ra_exe_unit_.target_exprs.size());
868  const auto target_expr = ra_exe_unit_.target_exprs[order_entry.tle_no - 1];
869  if (!dynamic_cast<Analyzer::AggExpr*>(target_expr)) {
870  return false;
871  }
872  // TODO(alex): relax the restrictions
873  auto agg_expr = static_cast<Analyzer::AggExpr*>(target_expr);
874  if (agg_expr->get_is_distinct() || agg_expr->get_aggtype() == kAVG ||
875  agg_expr->get_aggtype() == kMIN || agg_expr->get_aggtype() == kMAX ||
876  agg_expr->get_aggtype() == kAPPROX_COUNT_DISTINCT) {
877  return false;
878  }
879  if (agg_expr->get_arg()) {
880  const auto& arg_ti = agg_expr->get_arg()->get_type_info();
881  if (arg_ti.is_fp()) {
882  return false;
883  }
884  auto expr_range_info = getExprRangeInfo(agg_expr->get_arg());
885  // TOD(adb): QMD not actually initialized here?
886  if ((!(expr_range_info.hash_type_ == QueryDescriptionType::GroupByPerfectHash &&
887  /* query_mem_desc.getGroupbyColCount() == 1 */ false) ||
888  expr_range_info.has_nulls) &&
889  order_entry.is_desc == order_entry.nulls_first) {
890  return false;
891  }
892  }
893  const auto& target_ti = target_expr->get_type_info();
894  CHECK(!target_ti.is_array());
895  if (!target_ti.is_integer()) {
896  return false;
897  }
898  }
899  return true;
900 }
901 
903  llvm::Value* cond,
904  Executor* executor,
905  const bool chain_to_next,
906  const std::string& label_prefix,
907  DiamondCodegen* parent,
908  const bool share_false_edge_with_parent)
909  : executor_(executor), chain_to_next_(chain_to_next), parent_(parent) {
910  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
911  if (parent_) {
913  }
914  cond_true_ = llvm::BasicBlock::Create(LL_CONTEXT, label_prefix + "_true", CUR_FUNC);
915  if (share_false_edge_with_parent) {
916  CHECK(parent);
918  } else {
920  llvm::BasicBlock::Create(LL_CONTEXT, label_prefix + "_false", CUR_FUNC);
921  }
922 
923  LL_BUILDER.CreateCondBr(cond, cond_true_, cond_false_);
924  LL_BUILDER.SetInsertPoint(cond_true_);
925 }
926 
928  CHECK(!parent_);
929  chain_to_next_ = true;
930 }
931 
932 void GroupByAndAggregate::DiamondCodegen::setFalseTarget(llvm::BasicBlock* cond_false) {
933  CHECK(!parent_ || orig_cond_false_ != parent_->cond_false_);
934  cond_false_ = cond_false;
935 }
936 
938  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
939  if (parent_ && orig_cond_false_ != parent_->cond_false_) {
940  LL_BUILDER.CreateBr(parent_->cond_false_);
941  } else if (chain_to_next_) {
942  LL_BUILDER.CreateBr(cond_false_);
943  }
944  if (!parent_ || (!chain_to_next_ && cond_false_ != parent_->cond_false_)) {
945  LL_BUILDER.SetInsertPoint(orig_cond_false_);
946  }
947 }
948 
949 bool GroupByAndAggregate::codegen(llvm::Value* filter_result,
950  llvm::BasicBlock* sc_false,
952  const CompilationOptions& co,
953  const GpuSharedMemoryContext& gpu_smem_context) {
954  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
955  CHECK(filter_result);
956 
957  bool can_return_error = false;
958  llvm::BasicBlock* filter_false{nullptr};
959 
960  {
961  const bool is_group_by = !ra_exe_unit_.groupby_exprs.empty();
962 
963  if (executor_->isArchMaxwell(co.device_type)) {
965  }
966  DiamondCodegen filter_cfg(filter_result,
967  executor_,
968  !is_group_by || query_mem_desc.usesGetGroupValueFast(),
969  "filter", // filter_true and filter_false basic blocks
970  nullptr,
971  false);
972  filter_false = filter_cfg.cond_false_;
973 
974  if (is_group_by) {
976  !query_mem_desc.useStreamingTopN()) {
977  const auto crt_matched = get_arg_by_name(ROW_FUNC, "crt_matched");
978  LL_BUILDER.CreateStore(LL_INT(int32_t(1)), crt_matched);
979  auto total_matched_ptr = get_arg_by_name(ROW_FUNC, "total_matched");
980  llvm::Value* old_total_matched_val{nullptr};
982  old_total_matched_val =
983  LL_BUILDER.CreateAtomicRMW(llvm::AtomicRMWInst::Add,
984  total_matched_ptr,
985  LL_INT(int32_t(1)),
986  llvm::AtomicOrdering::Monotonic);
987  } else {
988  old_total_matched_val = LL_BUILDER.CreateLoad(total_matched_ptr);
989  LL_BUILDER.CreateStore(
990  LL_BUILDER.CreateAdd(old_total_matched_val, LL_INT(int32_t(1))),
991  total_matched_ptr);
992  }
993  auto old_total_matched_ptr = get_arg_by_name(ROW_FUNC, "old_total_matched");
994  LL_BUILDER.CreateStore(old_total_matched_val, old_total_matched_ptr);
995  }
996 
997  auto agg_out_ptr_w_idx = codegenGroupBy(query_mem_desc, co, filter_cfg);
998  if (query_mem_desc.usesGetGroupValueFast() ||
999  query_mem_desc.getQueryDescriptionType() ==
1001  if (query_mem_desc.getGroupbyColCount() > 1) {
1002  filter_cfg.setChainToNext();
1003  }
1004  // Don't generate null checks if the group slot is guaranteed to be non-null,
1005  // as it's the case for get_group_value_fast* family.
1006  can_return_error = codegenAggCalls(
1007  agg_out_ptr_w_idx, {}, query_mem_desc, co, gpu_smem_context, filter_cfg);
1008  } else {
1009  {
1010  llvm::Value* nullcheck_cond{nullptr};
1011  if (query_mem_desc.didOutputColumnar()) {
1012  nullcheck_cond = LL_BUILDER.CreateICmpSGE(std::get<1>(agg_out_ptr_w_idx),
1013  LL_INT(int32_t(0)));
1014  } else {
1015  nullcheck_cond = LL_BUILDER.CreateICmpNE(
1016  std::get<0>(agg_out_ptr_w_idx),
1017  llvm::ConstantPointerNull::get(
1018  llvm::PointerType::get(get_int_type(64, LL_CONTEXT), 0)));
1019  }
1020  DiamondCodegen nullcheck_cfg(
1021  nullcheck_cond, executor_, false, "groupby_nullcheck", &filter_cfg, false);
1023  agg_out_ptr_w_idx, {}, query_mem_desc, co, gpu_smem_context, filter_cfg);
1024  }
1025  can_return_error = true;
1026  if (query_mem_desc.getQueryDescriptionType() ==
1028  query_mem_desc.useStreamingTopN()) {
1029  // Ignore rejection on pushing current row to top-K heap.
1030  LL_BUILDER.CreateRet(LL_INT(int32_t(0)));
1031  } else {
1032  CodeGenerator code_generator(executor_);
1033  LL_BUILDER.CreateRet(LL_BUILDER.CreateNeg(LL_BUILDER.CreateTrunc(
1034  // TODO(alex): remove the trunc once pos is converted to 32 bits
1035  code_generator.posArg(nullptr),
1036  get_int_type(32, LL_CONTEXT))));
1037  }
1038  }
1039  } else {
1040  if (ra_exe_unit_.estimator) {
1041  std::stack<llvm::BasicBlock*> array_loops;
1042  codegenEstimator(array_loops, filter_cfg, query_mem_desc, co);
1043  } else {
1044  auto arg_it = ROW_FUNC->arg_begin();
1045  std::vector<llvm::Value*> agg_out_vec;
1046  for (int32_t i = 0; i < get_agg_count(ra_exe_unit_.target_exprs); ++i) {
1047  agg_out_vec.push_back(&*arg_it++);
1048  }
1049  can_return_error = codegenAggCalls(std::make_tuple(nullptr, nullptr),
1050  agg_out_vec,
1051  query_mem_desc,
1052  co,
1053  gpu_smem_context,
1054  filter_cfg);
1055  }
1056  }
1057  }
1058 
1059  if (ra_exe_unit_.join_quals.empty()) {
1060  executor_->cgen_state_->ir_builder_.CreateRet(LL_INT(int32_t(0)));
1061  } else if (sc_false) {
1062  const auto saved_insert_block = LL_BUILDER.GetInsertBlock();
1063  LL_BUILDER.SetInsertPoint(sc_false);
1064  LL_BUILDER.CreateBr(filter_false);
1065  LL_BUILDER.SetInsertPoint(saved_insert_block);
1066  }
1067 
1068  return can_return_error;
1069 }
1070 
1072  llvm::Value* groups_buffer,
1074  const CompilationOptions& co,
1075  DiamondCodegen& diamond_codegen) {
1076  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1078  CHECK_EQ(size_t(1), ra_exe_unit_.groupby_exprs.size());
1079  const auto group_expr = ra_exe_unit_.groupby_exprs.front();
1080  CHECK(!group_expr);
1081  if (!query_mem_desc.didOutputColumnar()) {
1082  CHECK_EQ(size_t(0), query_mem_desc.getRowSize() % sizeof(int64_t));
1083  }
1084  const int32_t row_size_quad = query_mem_desc.didOutputColumnar()
1085  ? 0
1086  : query_mem_desc.getRowSize() / sizeof(int64_t);
1087  CodeGenerator code_generator(executor_);
1088  if (query_mem_desc.useStreamingTopN()) {
1089  const auto& only_order_entry = ra_exe_unit_.sort_info.order_entries.front();
1090  CHECK_GE(only_order_entry.tle_no, int(1));
1091  const size_t target_idx = only_order_entry.tle_no - 1;
1092  CHECK_LT(target_idx, ra_exe_unit_.target_exprs.size());
1093  const auto order_entry_expr = ra_exe_unit_.target_exprs[target_idx];
1094  const auto chosen_bytes =
1095  static_cast<size_t>(query_mem_desc.getPaddedSlotWidthBytes(target_idx));
1096  auto order_entry_lv = executor_->cgen_state_->castToTypeIn(
1097  code_generator.codegen(order_entry_expr, true, co).front(), chosen_bytes * 8);
1099  std::string fname = "get_bin_from_k_heap";
1100  const auto& oe_ti = order_entry_expr->get_type_info();
1101  llvm::Value* null_key_lv = nullptr;
1102  if (oe_ti.is_integer() || oe_ti.is_decimal() || oe_ti.is_time()) {
1103  const size_t bit_width = order_entry_lv->getType()->getIntegerBitWidth();
1104  switch (bit_width) {
1105  case 32:
1106  null_key_lv = LL_INT(static_cast<int32_t>(inline_int_null_val(oe_ti)));
1107  break;
1108  case 64:
1109  null_key_lv = LL_INT(static_cast<int64_t>(inline_int_null_val(oe_ti)));
1110  break;
1111  default:
1112  CHECK(false);
1113  }
1114  fname += "_int" + std::to_string(bit_width) + "_t";
1115  } else {
1116  CHECK(oe_ti.is_fp());
1117  if (order_entry_lv->getType()->isDoubleTy()) {
1118  null_key_lv = LL_FP(static_cast<double>(inline_fp_null_val(oe_ti)));
1119  } else {
1120  null_key_lv = LL_FP(static_cast<float>(inline_fp_null_val(oe_ti)));
1121  }
1122  fname += order_entry_lv->getType()->isDoubleTy() ? "_double" : "_float";
1123  }
1124  const auto key_slot_idx =
1126  return emitCall(
1127  fname,
1128  {groups_buffer,
1129  LL_INT(n),
1130  LL_INT(row_size_quad),
1131  LL_INT(static_cast<uint32_t>(query_mem_desc.getColOffInBytes(key_slot_idx))),
1132  LL_BOOL(only_order_entry.is_desc),
1133  LL_BOOL(!order_entry_expr->get_type_info().get_notnull()),
1134  LL_BOOL(only_order_entry.nulls_first),
1135  null_key_lv,
1136  order_entry_lv});
1137  } else {
1138  llvm::Value* output_buffer_entry_count_lv{nullptr};
1140  output_buffer_entry_count_lv =
1141  LL_BUILDER.CreateLoad(get_arg_by_name(ROW_FUNC, "max_matched"));
1142  CHECK(output_buffer_entry_count_lv);
1143  }
1144  const auto group_expr_lv =
1145  LL_BUILDER.CreateLoad(get_arg_by_name(ROW_FUNC, "old_total_matched"));
1146  std::vector<llvm::Value*> args{
1147  groups_buffer,
1148  output_buffer_entry_count_lv
1149  ? output_buffer_entry_count_lv
1150  : LL_INT(static_cast<int32_t>(query_mem_desc.getEntryCount())),
1151  group_expr_lv,
1152  code_generator.posArg(nullptr)};
1153  if (query_mem_desc.didOutputColumnar()) {
1154  const auto columnar_output_offset =
1155  emitCall("get_columnar_scan_output_offset", args);
1156  return columnar_output_offset;
1157  }
1158  args.push_back(LL_INT(row_size_quad));
1159  return emitCall("get_scan_output_slot", args);
1160  }
1161 }
1162 
1163 std::tuple<llvm::Value*, llvm::Value*> GroupByAndAggregate::codegenGroupBy(
1165  const CompilationOptions& co,
1166  DiamondCodegen& diamond_codegen) {
1167  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1168  auto arg_it = ROW_FUNC->arg_begin();
1169  auto groups_buffer = arg_it++;
1170 
1171  std::stack<llvm::BasicBlock*> array_loops;
1172 
1173  // TODO(Saman): move this logic outside of this function.
1175  if (query_mem_desc.didOutputColumnar()) {
1176  return std::make_tuple(
1177  &*groups_buffer,
1178  codegenOutputSlot(&*groups_buffer, query_mem_desc, co, diamond_codegen));
1179  } else {
1180  return std::make_tuple(
1181  codegenOutputSlot(&*groups_buffer, query_mem_desc, co, diamond_codegen),
1182  nullptr);
1183  }
1184  }
1185 
1186  CHECK(query_mem_desc.getQueryDescriptionType() ==
1188  query_mem_desc.getQueryDescriptionType() ==
1190 
1191  const int32_t row_size_quad = query_mem_desc.didOutputColumnar()
1192  ? 0
1193  : query_mem_desc.getRowSize() / sizeof(int64_t);
1194 
1195  const auto col_width_size = query_mem_desc.isSingleColumnGroupByWithPerfectHash()
1196  ? sizeof(int64_t)
1197  : query_mem_desc.getEffectiveKeyWidth();
1198  // for multi-column group by
1199  llvm::Value* group_key = nullptr;
1200  llvm::Value* key_size_lv = nullptr;
1201 
1202  if (!query_mem_desc.isSingleColumnGroupByWithPerfectHash()) {
1203  key_size_lv = LL_INT(static_cast<int32_t>(query_mem_desc.getGroupbyColCount()));
1204  if (query_mem_desc.getQueryDescriptionType() ==
1206  group_key =
1207  LL_BUILDER.CreateAlloca(llvm::Type::getInt64Ty(LL_CONTEXT), key_size_lv);
1208  } else if (query_mem_desc.getQueryDescriptionType() ==
1210  group_key =
1211  col_width_size == sizeof(int32_t)
1212  ? LL_BUILDER.CreateAlloca(llvm::Type::getInt32Ty(LL_CONTEXT), key_size_lv)
1213  : LL_BUILDER.CreateAlloca(llvm::Type::getInt64Ty(LL_CONTEXT), key_size_lv);
1214  }
1215  CHECK(group_key);
1216  CHECK(key_size_lv);
1217  }
1218 
1219  int32_t subkey_idx = 0;
1220  CHECK(query_mem_desc.getGroupbyColCount() == ra_exe_unit_.groupby_exprs.size());
1221  for (const auto& group_expr : ra_exe_unit_.groupby_exprs) {
1222  const auto col_range_info = getExprRangeInfo(group_expr.get());
1223  const auto translated_null_value = static_cast<int64_t>(
1224  query_mem_desc.isSingleColumnGroupByWithPerfectHash()
1225  ? checked_int64_t(query_mem_desc.getMaxVal()) +
1226  (query_mem_desc.getBucket() ? query_mem_desc.getBucket() : 1)
1227  : checked_int64_t(col_range_info.max) +
1228  (col_range_info.bucket ? col_range_info.bucket : 1));
1229 
1230  const bool col_has_nulls =
1231  query_mem_desc.getQueryDescriptionType() ==
1233  ? (query_mem_desc.isSingleColumnGroupByWithPerfectHash()
1234  ? query_mem_desc.hasNulls()
1235  : col_range_info.has_nulls)
1236  : false;
1237 
1238  const auto group_expr_lvs =
1239  executor_->groupByColumnCodegen(group_expr.get(),
1240  col_width_size,
1241  co,
1242  col_has_nulls,
1243  translated_null_value,
1244  diamond_codegen,
1245  array_loops,
1246  query_mem_desc.threadsShareMemory());
1247  const auto group_expr_lv = group_expr_lvs.translated_value;
1248  if (query_mem_desc.isSingleColumnGroupByWithPerfectHash()) {
1249  CHECK_EQ(size_t(1), ra_exe_unit_.groupby_exprs.size());
1250  return codegenSingleColumnPerfectHash(query_mem_desc,
1251  co,
1252  &*groups_buffer,
1253  group_expr_lv,
1254  group_expr_lvs.original_value,
1255  row_size_quad);
1256  } else {
1257  // store the sub-key to the buffer
1258  LL_BUILDER.CreateStore(group_expr_lv,
1259  LL_BUILDER.CreateGEP(group_key, LL_INT(subkey_idx++)));
1260  }
1261  }
1262  if (query_mem_desc.getQueryDescriptionType() ==
1264  CHECK(ra_exe_unit_.groupby_exprs.size() != 1);
1266  &*groups_buffer, group_key, key_size_lv, query_mem_desc, row_size_quad);
1267  } else if (query_mem_desc.getQueryDescriptionType() ==
1270  &*groups_buffer,
1271  group_key,
1272  key_size_lv,
1273  query_mem_desc,
1274  col_width_size,
1275  row_size_quad);
1276  }
1277  CHECK(false);
1278  return std::make_tuple(nullptr, nullptr);
1279 }
1280 
1281 std::tuple<llvm::Value*, llvm::Value*>
1284  const CompilationOptions& co,
1285  llvm::Value* groups_buffer,
1286  llvm::Value* group_expr_lv_translated,
1287  llvm::Value* group_expr_lv_original,
1288  const int32_t row_size_quad) {
1289  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1290  CHECK(query_mem_desc.usesGetGroupValueFast());
1291  std::string get_group_fn_name{query_mem_desc.didOutputColumnar()
1292  ? "get_columnar_group_bin_offset"
1293  : "get_group_value_fast"};
1294  if (!query_mem_desc.didOutputColumnar() && query_mem_desc.hasKeylessHash()) {
1295  get_group_fn_name += "_keyless";
1296  }
1297  if (query_mem_desc.interleavedBins(co.device_type)) {
1298  CHECK(!query_mem_desc.didOutputColumnar());
1299  CHECK(query_mem_desc.hasKeylessHash());
1300  get_group_fn_name += "_semiprivate";
1301  }
1302  std::vector<llvm::Value*> get_group_fn_args{&*groups_buffer,
1303  &*group_expr_lv_translated};
1304  if (group_expr_lv_original && get_group_fn_name == "get_group_value_fast" &&
1305  query_mem_desc.mustUseBaselineSort()) {
1306  get_group_fn_name += "_with_original_key";
1307  get_group_fn_args.push_back(group_expr_lv_original);
1308  }
1309  get_group_fn_args.push_back(LL_INT(query_mem_desc.getMinVal()));
1310  get_group_fn_args.push_back(LL_INT(query_mem_desc.getBucket()));
1311  if (!query_mem_desc.hasKeylessHash()) {
1312  if (!query_mem_desc.didOutputColumnar()) {
1313  get_group_fn_args.push_back(LL_INT(row_size_quad));
1314  }
1315  } else {
1316  if (!query_mem_desc.didOutputColumnar()) {
1317  get_group_fn_args.push_back(LL_INT(row_size_quad));
1318  }
1319  if (query_mem_desc.interleavedBins(co.device_type)) {
1320  auto warp_idx = emitCall("thread_warp_idx", {LL_INT(executor_->warpSize())});
1321  get_group_fn_args.push_back(warp_idx);
1322  get_group_fn_args.push_back(LL_INT(executor_->warpSize()));
1323  }
1324  }
1325  if (get_group_fn_name == "get_columnar_group_bin_offset") {
1326  return std::make_tuple(&*groups_buffer,
1327  emitCall(get_group_fn_name, get_group_fn_args));
1328  }
1329  return std::make_tuple(emitCall(get_group_fn_name, get_group_fn_args), nullptr);
1330 }
1331 
1332 std::tuple<llvm::Value*, llvm::Value*> GroupByAndAggregate::codegenMultiColumnPerfectHash(
1333  llvm::Value* groups_buffer,
1334  llvm::Value* group_key,
1335  llvm::Value* key_size_lv,
1336  const QueryMemoryDescriptor& query_mem_desc,
1337  const int32_t row_size_quad) {
1338  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1339  CHECK(query_mem_desc.getQueryDescriptionType() ==
1341  // compute the index (perfect hash)
1342  auto perfect_hash_func = codegenPerfectHashFunction();
1343  auto hash_lv =
1344  LL_BUILDER.CreateCall(perfect_hash_func, std::vector<llvm::Value*>{group_key});
1345 
1346  if (query_mem_desc.didOutputColumnar()) {
1347  if (!query_mem_desc.hasKeylessHash()) {
1348  const std::string set_matching_func_name{
1349  "set_matching_group_value_perfect_hash_columnar"};
1350  const std::vector<llvm::Value*> set_matching_func_arg{
1351  groups_buffer,
1352  hash_lv,
1353  group_key,
1354  key_size_lv,
1355  llvm::ConstantInt::get(get_int_type(32, LL_CONTEXT),
1356  query_mem_desc.getEntryCount())};
1357  emitCall(set_matching_func_name, set_matching_func_arg);
1358  }
1359  return std::make_tuple(groups_buffer, hash_lv);
1360  } else {
1361  if (query_mem_desc.hasKeylessHash()) {
1362  return std::make_tuple(emitCall("get_matching_group_value_perfect_hash_keyless",
1363  {groups_buffer, hash_lv, LL_INT(row_size_quad)}),
1364  nullptr);
1365  } else {
1366  return std::make_tuple(
1367  emitCall(
1368  "get_matching_group_value_perfect_hash",
1369  {groups_buffer, hash_lv, group_key, key_size_lv, LL_INT(row_size_quad)}),
1370  nullptr);
1371  }
1372  }
1373 }
1374 
1375 std::tuple<llvm::Value*, llvm::Value*>
1377  const CompilationOptions& co,
1378  llvm::Value* groups_buffer,
1379  llvm::Value* group_key,
1380  llvm::Value* key_size_lv,
1381  const QueryMemoryDescriptor& query_mem_desc,
1382  const size_t key_width,
1383  const int32_t row_size_quad) {
1384  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1385  auto arg_it = ROW_FUNC->arg_begin(); // groups_buffer
1386  ++arg_it; // current match count
1387  ++arg_it; // total match count
1388  ++arg_it; // old match count
1389  ++arg_it; // output buffer slots count
1390  ++arg_it; // aggregate init values
1391  CHECK(arg_it->getName() == "agg_init_val");
1392  if (group_key->getType() != llvm::Type::getInt64PtrTy(LL_CONTEXT)) {
1393  CHECK(key_width == sizeof(int32_t));
1394  group_key =
1395  LL_BUILDER.CreatePointerCast(group_key, llvm::Type::getInt64PtrTy(LL_CONTEXT));
1396  }
1397  std::vector<llvm::Value*> func_args{
1398  groups_buffer,
1399  LL_INT(static_cast<int32_t>(query_mem_desc.getEntryCount())),
1400  &*group_key,
1401  &*key_size_lv,
1402  LL_INT(static_cast<int32_t>(key_width))};
1403  std::string func_name{"get_group_value"};
1404  if (query_mem_desc.didOutputColumnar()) {
1405  func_name += "_columnar_slot";
1406  } else {
1407  func_args.push_back(LL_INT(row_size_quad));
1408  func_args.push_back(&*arg_it);
1409  }
1410  if (co.with_dynamic_watchdog) {
1411  func_name += "_with_watchdog";
1412  }
1413  if (query_mem_desc.didOutputColumnar()) {
1414  return std::make_tuple(groups_buffer, emitCall(func_name, func_args));
1415  } else {
1416  return std::make_tuple(emitCall(func_name, func_args), nullptr);
1417  }
1418 }
1419 
1421  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1422  CHECK_GT(ra_exe_unit_.groupby_exprs.size(), size_t(1));
1423  auto ft = llvm::FunctionType::get(
1424  get_int_type(32, LL_CONTEXT),
1425  std::vector<llvm::Type*>{llvm::PointerType::get(get_int_type(64, LL_CONTEXT), 0)},
1426  false);
1427  auto key_hash_func = llvm::Function::Create(ft,
1428  llvm::Function::ExternalLinkage,
1429  "perfect_key_hash",
1430  executor_->cgen_state_->module_);
1431  executor_->cgen_state_->helper_functions_.push_back(key_hash_func);
1432  mark_function_always_inline(key_hash_func);
1433  auto& key_buff_arg = *key_hash_func->args().begin();
1434  llvm::Value* key_buff_lv = &key_buff_arg;
1435  auto bb = llvm::BasicBlock::Create(LL_CONTEXT, "entry", key_hash_func);
1436  llvm::IRBuilder<> key_hash_func_builder(bb);
1437  llvm::Value* hash_lv{llvm::ConstantInt::get(get_int_type(64, LL_CONTEXT), 0)};
1438  std::vector<int64_t> cardinalities;
1439  for (const auto& groupby_expr : ra_exe_unit_.groupby_exprs) {
1440  auto col_range_info = getExprRangeInfo(groupby_expr.get());
1441  CHECK(col_range_info.hash_type_ == QueryDescriptionType::GroupByPerfectHash);
1442  cardinalities.push_back(getBucketedCardinality(col_range_info));
1443  }
1444  size_t dim_idx = 0;
1445  for (const auto& groupby_expr : ra_exe_unit_.groupby_exprs) {
1446  auto key_comp_lv = key_hash_func_builder.CreateLoad(
1447  key_hash_func_builder.CreateGEP(key_buff_lv, LL_INT(dim_idx)));
1448  auto col_range_info = getExprRangeInfo(groupby_expr.get());
1449  auto crt_term_lv =
1450  key_hash_func_builder.CreateSub(key_comp_lv, LL_INT(col_range_info.min));
1451  if (col_range_info.bucket) {
1452  crt_term_lv =
1453  key_hash_func_builder.CreateSDiv(crt_term_lv, LL_INT(col_range_info.bucket));
1454  }
1455  for (size_t prev_dim_idx = 0; prev_dim_idx < dim_idx; ++prev_dim_idx) {
1456  crt_term_lv = key_hash_func_builder.CreateMul(crt_term_lv,
1457  LL_INT(cardinalities[prev_dim_idx]));
1458  }
1459  hash_lv = key_hash_func_builder.CreateAdd(hash_lv, crt_term_lv);
1460  ++dim_idx;
1461  }
1462  key_hash_func_builder.CreateRet(
1463  key_hash_func_builder.CreateTrunc(hash_lv, get_int_type(32, LL_CONTEXT)));
1464  return key_hash_func;
1465 }
1466 
1468  const TargetInfo& agg_info,
1469  llvm::Value* target) {
1470  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1471  const auto& agg_type = agg_info.sql_type;
1472  const size_t chosen_bytes = agg_type.get_size();
1473 
1474  bool need_conversion{false};
1475  llvm::Value* arg_null{nullptr};
1476  llvm::Value* agg_null{nullptr};
1477  llvm::Value* target_to_cast{target};
1478  if (arg_type.is_fp()) {
1479  arg_null = executor_->cgen_state_->inlineFpNull(arg_type);
1480  if (agg_type.is_fp()) {
1481  agg_null = executor_->cgen_state_->inlineFpNull(agg_type);
1482  if (!static_cast<llvm::ConstantFP*>(arg_null)->isExactlyValue(
1483  static_cast<llvm::ConstantFP*>(agg_null)->getValueAPF())) {
1484  need_conversion = true;
1485  }
1486  } else {
1487  CHECK(agg_info.agg_kind == kCOUNT || agg_info.agg_kind == kAPPROX_COUNT_DISTINCT);
1488  return target;
1489  }
1490  } else {
1491  arg_null = executor_->cgen_state_->inlineIntNull(arg_type);
1492  if (agg_type.is_fp()) {
1493  agg_null = executor_->cgen_state_->inlineFpNull(agg_type);
1494  need_conversion = true;
1495  target_to_cast = executor_->castToFP(target);
1496  } else {
1497  agg_null = executor_->cgen_state_->inlineIntNull(agg_type);
1498  if ((static_cast<llvm::ConstantInt*>(arg_null)->getBitWidth() !=
1499  static_cast<llvm::ConstantInt*>(agg_null)->getBitWidth()) ||
1500  (static_cast<llvm::ConstantInt*>(arg_null)->getValue() !=
1501  static_cast<llvm::ConstantInt*>(agg_null)->getValue())) {
1502  need_conversion = true;
1503  }
1504  }
1505  }
1506  if (need_conversion) {
1507  auto cmp = arg_type.is_fp() ? LL_BUILDER.CreateFCmpOEQ(target, arg_null)
1508  : LL_BUILDER.CreateICmpEQ(target, arg_null);
1509  return LL_BUILDER.CreateSelect(
1510  cmp,
1511  agg_null,
1512  executor_->cgen_state_->castToTypeIn(target_to_cast, chosen_bytes << 3));
1513  } else {
1514  return target;
1515  }
1516 }
1517 
1519  const Analyzer::WindowFunction* window_func,
1520  const QueryMemoryDescriptor& query_mem_desc,
1521  const CompilationOptions& co,
1522  DiamondCodegen& diamond_codegen) {
1523  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1524  const auto window_func_context =
1526  if (window_func_context && window_function_is_aggregate(window_func->getKind())) {
1527  const int32_t row_size_quad = query_mem_desc.didOutputColumnar()
1528  ? 0
1529  : query_mem_desc.getRowSize() / sizeof(int64_t);
1530  auto arg_it = ROW_FUNC->arg_begin();
1531  auto groups_buffer = arg_it++;
1532  CodeGenerator code_generator(executor_);
1533  if (!window_func_context->getRowNumber()) {
1534  CHECK(window_func->getKind() == SqlWindowFunctionKind::COUNT);
1535  window_func_context->setRowNumber(emitCall(
1536  "row_number_window_func",
1537  {LL_INT(reinterpret_cast<const int64_t>(window_func_context->output())),
1538  code_generator.posArg(nullptr)}));
1539  }
1540  const auto pos_in_window = LL_BUILDER.CreateTrunc(window_func_context->getRowNumber(),
1541  get_int_type(32, LL_CONTEXT));
1542  llvm::Value* entry_count_lv =
1543  LL_INT(static_cast<int32_t>(query_mem_desc.getEntryCount()));
1544  std::vector<llvm::Value*> args{
1545  &*groups_buffer, entry_count_lv, pos_in_window, code_generator.posArg(nullptr)};
1546  if (query_mem_desc.didOutputColumnar()) {
1547  const auto columnar_output_offset =
1548  emitCall("get_columnar_scan_output_offset", args);
1549  return LL_BUILDER.CreateSExt(columnar_output_offset, get_int_type(64, LL_CONTEXT));
1550  }
1551  args.push_back(LL_INT(row_size_quad));
1552  return emitCall("get_scan_output_slot", args);
1553  }
1554  auto arg_it = ROW_FUNC->arg_begin();
1555  auto groups_buffer = arg_it++;
1556  return codegenOutputSlot(&*groups_buffer, query_mem_desc, co, diamond_codegen);
1557 }
1558 
1560  const std::tuple<llvm::Value*, llvm::Value*>& agg_out_ptr_w_idx_in,
1561  const std::vector<llvm::Value*>& agg_out_vec,
1562  const QueryMemoryDescriptor& query_mem_desc,
1563  const CompilationOptions& co,
1564  const GpuSharedMemoryContext& gpu_smem_context,
1565  DiamondCodegen& diamond_codegen) {
1566  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1567  auto agg_out_ptr_w_idx = agg_out_ptr_w_idx_in;
1568  // TODO(alex): unify the two cases, the output for non-group by queries
1569  // should be a contiguous buffer
1570  const bool is_group_by = std::get<0>(agg_out_ptr_w_idx);
1571  bool can_return_error = false;
1572  if (is_group_by) {
1573  CHECK(agg_out_vec.empty());
1574  } else {
1575  CHECK(!agg_out_vec.empty());
1576  }
1577 
1578  // output buffer is casted into a byte stream to be able to handle data elements of
1579  // different sizes (only used when actual column width sizes are used)
1580  llvm::Value* output_buffer_byte_stream{nullptr};
1581  llvm::Value* out_row_idx{nullptr};
1582  if (query_mem_desc.didOutputColumnar() && !g_cluster &&
1584  output_buffer_byte_stream = LL_BUILDER.CreateBitCast(
1585  std::get<0>(agg_out_ptr_w_idx),
1586  llvm::PointerType::get(llvm::Type::getInt8Ty(LL_CONTEXT), 0));
1587  output_buffer_byte_stream->setName("out_buff_b_stream");
1588  CHECK(std::get<1>(agg_out_ptr_w_idx));
1589  out_row_idx = LL_BUILDER.CreateZExt(std::get<1>(agg_out_ptr_w_idx),
1590  llvm::Type::getInt64Ty(LL_CONTEXT));
1591  out_row_idx->setName("out_row_idx");
1592  }
1593 
1594  TargetExprCodegenBuilder target_builder(query_mem_desc, ra_exe_unit_, is_group_by);
1595  for (size_t target_idx = 0; target_idx < ra_exe_unit_.target_exprs.size();
1596  ++target_idx) {
1597  auto target_expr = ra_exe_unit_.target_exprs[target_idx];
1598  CHECK(target_expr);
1599 
1600  target_builder(target_expr, executor_, co);
1601  }
1602 
1603  target_builder.codegen(this,
1604  executor_,
1605  query_mem_desc,
1606  co,
1607  gpu_smem_context,
1608  agg_out_ptr_w_idx,
1609  agg_out_vec,
1610  output_buffer_byte_stream,
1611  out_row_idx,
1612  diamond_codegen);
1613 
1614  for (auto target_expr : ra_exe_unit_.target_exprs) {
1615  CHECK(target_expr);
1616  executor_->plan_state_->isLazyFetchColumn(target_expr);
1617  }
1618 
1619  return can_return_error;
1620 }
1621 
1626  llvm::Value* output_buffer_byte_stream,
1627  llvm::Value* out_row_idx,
1628  const std::tuple<llvm::Value*, llvm::Value*>& agg_out_ptr_w_idx,
1629  const QueryMemoryDescriptor& query_mem_desc,
1630  const size_t chosen_bytes,
1631  const size_t agg_out_off,
1632  const size_t target_idx) {
1633  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1634  llvm::Value* agg_col_ptr{nullptr};
1635  if (query_mem_desc.didOutputColumnar()) {
1636  // TODO(Saman): remove the second columnar branch, and support all query description
1637  // types through the first branch. Then, input arguments should also be cleaned up
1638  if (!g_cluster &&
1640  CHECK(chosen_bytes == 1 || chosen_bytes == 2 || chosen_bytes == 4 ||
1641  chosen_bytes == 8);
1642  CHECK(output_buffer_byte_stream);
1643  CHECK(out_row_idx);
1644  uint32_t col_off = query_mem_desc.getColOffInBytes(agg_out_off);
1645  // multiplying by chosen_bytes, i.e., << log2(chosen_bytes)
1646  auto out_per_col_byte_idx =
1647  LL_BUILDER.CreateShl(out_row_idx, __builtin_ffs(chosen_bytes) - 1);
1648  auto byte_offset = LL_BUILDER.CreateAdd(out_per_col_byte_idx,
1649  LL_INT(static_cast<int64_t>(col_off)));
1650  byte_offset->setName("out_byte_off_target_" + std::to_string(target_idx));
1651  auto output_ptr = LL_BUILDER.CreateGEP(output_buffer_byte_stream, byte_offset);
1652  agg_col_ptr = LL_BUILDER.CreateBitCast(
1653  output_ptr,
1654  llvm::PointerType::get(get_int_type((chosen_bytes << 3), LL_CONTEXT), 0));
1655  agg_col_ptr->setName("out_ptr_target_" + std::to_string(target_idx));
1656  } else {
1657  uint32_t col_off = query_mem_desc.getColOffInBytes(agg_out_off);
1658  CHECK_EQ(size_t(0), col_off % chosen_bytes);
1659  col_off /= chosen_bytes;
1660  CHECK(std::get<1>(agg_out_ptr_w_idx));
1661  auto offset = LL_BUILDER.CreateAdd(std::get<1>(agg_out_ptr_w_idx), LL_INT(col_off));
1662  agg_col_ptr = LL_BUILDER.CreateGEP(
1663  LL_BUILDER.CreateBitCast(
1664  std::get<0>(agg_out_ptr_w_idx),
1665  llvm::PointerType::get(get_int_type((chosen_bytes << 3), LL_CONTEXT), 0)),
1666  offset);
1667  }
1668  } else {
1669  uint32_t col_off = query_mem_desc.getColOnlyOffInBytes(agg_out_off);
1670  CHECK_EQ(size_t(0), col_off % chosen_bytes);
1671  col_off /= chosen_bytes;
1672  agg_col_ptr = LL_BUILDER.CreateGEP(
1673  LL_BUILDER.CreateBitCast(
1674  std::get<0>(agg_out_ptr_w_idx),
1675  llvm::PointerType::get(get_int_type((chosen_bytes << 3), LL_CONTEXT), 0)),
1676  LL_INT(col_off));
1677  }
1678  CHECK(agg_col_ptr);
1679  return agg_col_ptr;
1680 }
1681 
1683  std::stack<llvm::BasicBlock*>& array_loops,
1684  GroupByAndAggregate::DiamondCodegen& diamond_codegen,
1685  const QueryMemoryDescriptor& query_mem_desc,
1686  const CompilationOptions& co) {
1687  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1688  const auto& estimator_arg = ra_exe_unit_.estimator->getArgument();
1689  auto estimator_comp_count_lv = LL_INT(static_cast<int32_t>(estimator_arg.size()));
1690  auto estimator_key_lv = LL_BUILDER.CreateAlloca(llvm::Type::getInt64Ty(LL_CONTEXT),
1691  estimator_comp_count_lv);
1692  int32_t subkey_idx = 0;
1693  for (const auto& estimator_arg_comp : estimator_arg) {
1694  const auto estimator_arg_comp_lvs =
1695  executor_->groupByColumnCodegen(estimator_arg_comp.get(),
1696  query_mem_desc.getEffectiveKeyWidth(),
1697  co,
1698  false,
1699  0,
1700  diamond_codegen,
1701  array_loops,
1702  true);
1703  CHECK(!estimator_arg_comp_lvs.original_value);
1704  const auto estimator_arg_comp_lv = estimator_arg_comp_lvs.translated_value;
1705  // store the sub-key to the buffer
1706  LL_BUILDER.CreateStore(estimator_arg_comp_lv,
1707  LL_BUILDER.CreateGEP(estimator_key_lv, LL_INT(subkey_idx++)));
1708  }
1709  const auto int8_ptr_ty = llvm::PointerType::get(get_int_type(8, LL_CONTEXT), 0);
1710  const auto bitmap = LL_BUILDER.CreateBitCast(&*ROW_FUNC->arg_begin(), int8_ptr_ty);
1711  const auto key_bytes = LL_BUILDER.CreateBitCast(estimator_key_lv, int8_ptr_ty);
1712  const auto estimator_comp_bytes_lv =
1713  LL_INT(static_cast<int32_t>(estimator_arg.size() * sizeof(int64_t)));
1714  const auto bitmap_size_lv =
1715  LL_INT(static_cast<uint32_t>(ra_exe_unit_.estimator->getBufferSize()));
1716  emitCall(ra_exe_unit_.estimator->getRuntimeFunctionName(),
1717  {bitmap, &*bitmap_size_lv, key_bytes, &*estimator_comp_bytes_lv});
1718 }
1719 
1720 extern "C" void agg_count_distinct(int64_t* agg, const int64_t val) {
1721  reinterpret_cast<std::set<int64_t>*>(*agg)->insert(val);
1722 }
1723 
1724 extern "C" void agg_count_distinct_skip_val(int64_t* agg,
1725  const int64_t val,
1726  const int64_t skip_val) {
1727  if (val != skip_val) {
1728  agg_count_distinct(agg, val);
1729  }
1730 }
1731 
1733  const size_t target_idx,
1734  const Analyzer::Expr* target_expr,
1735  std::vector<llvm::Value*>& agg_args,
1736  const QueryMemoryDescriptor& query_mem_desc,
1737  const ExecutorDeviceType device_type) {
1738  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1739  const auto agg_info = get_target_info(target_expr, g_bigint_count);
1740  const auto& arg_ti =
1741  static_cast<const Analyzer::AggExpr*>(target_expr)->get_arg()->get_type_info();
1742  if (arg_ti.is_fp()) {
1743  agg_args.back() = executor_->cgen_state_->ir_builder_.CreateBitCast(
1744  agg_args.back(), get_int_type(64, executor_->cgen_state_->context_));
1745  }
1746  const auto& count_distinct_descriptor =
1747  query_mem_desc.getCountDistinctDescriptor(target_idx);
1748  CHECK(count_distinct_descriptor.impl_type_ != CountDistinctImplType::Invalid);
1749  if (agg_info.agg_kind == kAPPROX_COUNT_DISTINCT) {
1750  CHECK(count_distinct_descriptor.impl_type_ == CountDistinctImplType::Bitmap);
1751  agg_args.push_back(LL_INT(int32_t(count_distinct_descriptor.bitmap_sz_bits)));
1752  if (device_type == ExecutorDeviceType::GPU) {
1753  const auto base_dev_addr = getAdditionalLiteral(-1);
1754  const auto base_host_addr = getAdditionalLiteral(-2);
1755  agg_args.push_back(base_dev_addr);
1756  agg_args.push_back(base_host_addr);
1757  emitCall("agg_approximate_count_distinct_gpu", agg_args);
1758  } else {
1759  emitCall("agg_approximate_count_distinct", agg_args);
1760  }
1761  return;
1762  }
1763  std::string agg_fname{"agg_count_distinct"};
1764  if (count_distinct_descriptor.impl_type_ == CountDistinctImplType::Bitmap) {
1765  agg_fname += "_bitmap";
1766  agg_args.push_back(LL_INT(static_cast<int64_t>(count_distinct_descriptor.min_val)));
1767  }
1768  if (agg_info.skip_null_val) {
1769  auto null_lv = executor_->cgen_state_->castToTypeIn(
1770  (arg_ti.is_fp()
1771  ? static_cast<llvm::Value*>(executor_->cgen_state_->inlineFpNull(arg_ti))
1772  : static_cast<llvm::Value*>(executor_->cgen_state_->inlineIntNull(arg_ti))),
1773  64);
1774  null_lv = executor_->cgen_state_->ir_builder_.CreateBitCast(
1775  null_lv, get_int_type(64, executor_->cgen_state_->context_));
1776  agg_fname += "_skip_val";
1777  agg_args.push_back(null_lv);
1778  }
1779  if (device_type == ExecutorDeviceType::GPU) {
1780  CHECK(count_distinct_descriptor.impl_type_ == CountDistinctImplType::Bitmap);
1781  agg_fname += "_gpu";
1782  const auto base_dev_addr = getAdditionalLiteral(-1);
1783  const auto base_host_addr = getAdditionalLiteral(-2);
1784  agg_args.push_back(base_dev_addr);
1785  agg_args.push_back(base_host_addr);
1786  agg_args.push_back(LL_INT(int64_t(count_distinct_descriptor.sub_bitmap_count)));
1787  CHECK_EQ(size_t(0),
1788  count_distinct_descriptor.bitmapPaddedSizeBytes() %
1789  count_distinct_descriptor.sub_bitmap_count);
1790  agg_args.push_back(LL_INT(int64_t(count_distinct_descriptor.bitmapPaddedSizeBytes() /
1791  count_distinct_descriptor.sub_bitmap_count)));
1792  }
1793  if (count_distinct_descriptor.impl_type_ == CountDistinctImplType::Bitmap) {
1794  emitCall(agg_fname, agg_args);
1795  } else {
1796  executor_->cgen_state_->emitExternalCall(
1797  agg_fname, llvm::Type::getVoidTy(LL_CONTEXT), agg_args);
1798  }
1799 }
1800 
1801 llvm::Value* GroupByAndAggregate::getAdditionalLiteral(const int32_t off) {
1802  CHECK_LT(off, 0);
1803  const auto lit_buff_lv = get_arg_by_name(ROW_FUNC, "literals");
1804  return LL_BUILDER.CreateLoad(LL_BUILDER.CreateGEP(
1805  LL_BUILDER.CreateBitCast(lit_buff_lv,
1806  llvm::PointerType::get(get_int_type(64, LL_CONTEXT), 0)),
1807  LL_INT(off)));
1808 }
1809 
1810 std::vector<llvm::Value*> GroupByAndAggregate::codegenAggArg(
1811  const Analyzer::Expr* target_expr,
1812  const CompilationOptions& co) {
1813  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1814  const auto agg_expr = dynamic_cast<const Analyzer::AggExpr*>(target_expr);
1815  const auto func_expr = dynamic_cast<const Analyzer::FunctionOper*>(target_expr);
1816  const auto arr_expr = dynamic_cast<const Analyzer::ArrayExpr*>(target_expr);
1817 
1818  // TODO(alex): handle arrays uniformly?
1819  CodeGenerator code_generator(executor_);
1820  if (target_expr) {
1821  const auto& target_ti = target_expr->get_type_info();
1822  if (target_ti.is_array() && !executor_->plan_state_->isLazyFetchColumn(target_expr)) {
1823  const auto target_lvs =
1824  agg_expr ? code_generator.codegen(agg_expr->get_arg(), true, co)
1825  : code_generator.codegen(
1826  target_expr, !executor_->plan_state_->allow_lazy_fetch_, co);
1827  if (!func_expr && !arr_expr) {
1828  // Something with the chunk transport is code that was generated from a source
1829  // other than an ARRAY[] expression
1830  CHECK_EQ(size_t(1), target_lvs.size());
1831  CHECK(!agg_expr || agg_expr->get_aggtype() == kSAMPLE);
1832  const auto i32_ty = get_int_type(32, executor_->cgen_state_->context_);
1833  const auto i8p_ty =
1834  llvm::PointerType::get(get_int_type(8, executor_->cgen_state_->context_), 0);
1835  const auto& elem_ti = target_ti.get_elem_type();
1836  return {
1837  executor_->cgen_state_->emitExternalCall(
1838  "array_buff",
1839  i8p_ty,
1840  {target_lvs.front(), code_generator.posArg(target_expr)}),
1841  executor_->cgen_state_->emitExternalCall(
1842  "array_size",
1843  i32_ty,
1844  {target_lvs.front(),
1845  code_generator.posArg(target_expr),
1846  executor_->cgen_state_->llInt(log2_bytes(elem_ti.get_logical_size()))})};
1847  } else {
1848  if (agg_expr) {
1849  throw std::runtime_error(
1850  "Using array[] operator as argument to an aggregate operator is not "
1851  "supported");
1852  }
1853  CHECK(func_expr || arr_expr);
1854  if (dynamic_cast<const Analyzer::FunctionOper*>(target_expr)) {
1855  CHECK_EQ(size_t(1), target_lvs.size());
1856 
1857  const auto target_lv = LL_BUILDER.CreateLoad(target_lvs[0]);
1858 
1859  // const auto target_lv_type = target_lvs[0]->getType();
1860  // CHECK(target_lv_type->isStructTy());
1861  // CHECK_EQ(target_lv_type->getNumContainedTypes(), 3u);
1862  const auto i8p_ty = llvm::PointerType::get(
1863  get_int_type(8, executor_->cgen_state_->context_), 0);
1864  const auto ptr = LL_BUILDER.CreatePointerCast(
1865  LL_BUILDER.CreateExtractValue(target_lv, 0), i8p_ty);
1866  const auto size = LL_BUILDER.CreateExtractValue(target_lv, 1);
1867  const auto null_flag = LL_BUILDER.CreateExtractValue(target_lv, 2);
1868 
1869  const auto nullcheck_ok_bb =
1870  llvm::BasicBlock::Create(LL_CONTEXT, "arr_nullcheck_ok_bb", CUR_FUNC);
1871  const auto nullcheck_fail_bb =
1872  llvm::BasicBlock::Create(LL_CONTEXT, "arr_nullcheck_fail_bb", CUR_FUNC);
1873 
1874  // TODO(adb): probably better to zext the bool
1875  const auto nullcheck = LL_BUILDER.CreateICmpEQ(
1876  null_flag, executor_->cgen_state_->llInt(static_cast<int8_t>(1)));
1877  LL_BUILDER.CreateCondBr(nullcheck, nullcheck_fail_bb, nullcheck_ok_bb);
1878 
1879  const auto ret_bb =
1880  llvm::BasicBlock::Create(LL_CONTEXT, "arr_return", CUR_FUNC);
1881  LL_BUILDER.SetInsertPoint(ret_bb);
1882  auto result_phi = LL_BUILDER.CreatePHI(i8p_ty, 2, "array_ptr_return");
1883  result_phi->addIncoming(ptr, nullcheck_ok_bb);
1884 
1885  const auto null_arr_sentinel = LL_BUILDER.CreateIntToPtr(
1886  executor_->cgen_state_->llInt(static_cast<int8_t>(0)), i8p_ty);
1887  result_phi->addIncoming(null_arr_sentinel, nullcheck_fail_bb);
1888 
1889  LL_BUILDER.SetInsertPoint(nullcheck_ok_bb);
1890  executor_->cgen_state_->emitExternalCall(
1891  "register_buffer_with_executor_rsm",
1892  llvm::Type::getVoidTy(executor_->cgen_state_->context_),
1893  {executor_->cgen_state_->llInt(reinterpret_cast<int64_t>(executor_)), ptr});
1894  LL_BUILDER.CreateBr(ret_bb);
1895 
1896  LL_BUILDER.SetInsertPoint(nullcheck_fail_bb);
1897  LL_BUILDER.CreateBr(ret_bb);
1898 
1899  LL_BUILDER.SetInsertPoint(ret_bb);
1900 
1901  return {result_phi, size};
1902  }
1903  CHECK_EQ(size_t(2), target_lvs.size());
1904  return {target_lvs[0], target_lvs[1]};
1905  }
1906  }
1907  if (target_ti.is_geometry() &&
1908  !executor_->plan_state_->isLazyFetchColumn(target_expr)) {
1909  auto generate_coord_lvs =
1910  [&](auto* selected_target_expr,
1911  bool const fetch_columns) -> std::vector<llvm::Value*> {
1912  const auto target_lvs =
1913  code_generator.codegen(selected_target_expr, fetch_columns, co);
1914  const auto geo_uoper = dynamic_cast<const Analyzer::GeoUOper*>(target_expr);
1915  const auto geo_binoper = dynamic_cast<const Analyzer::GeoBinOper*>(target_expr);
1916  if (geo_uoper || geo_binoper) {
1917  CHECK(target_expr->get_type_info().is_geometry());
1918  CHECK_EQ(2 * static_cast<size_t>(target_ti.get_physical_coord_cols()),
1919  target_lvs.size());
1920  return target_lvs;
1921  }
1922  CHECK_EQ(static_cast<size_t>(target_ti.get_physical_coord_cols()),
1923  target_lvs.size());
1924 
1925  const auto i32_ty = get_int_type(32, executor_->cgen_state_->context_);
1926  const auto i8p_ty =
1927  llvm::PointerType::get(get_int_type(8, executor_->cgen_state_->context_), 0);
1928  std::vector<llvm::Value*> coords;
1929  size_t ctr = 0;
1930  for (const auto& target_lv : target_lvs) {
1931  // TODO(adb): consider adding a utility to sqltypes so we can get the types of
1932  // the physical coords cols based on the sqltype (e.g. TINYINT for col 0, INT
1933  // for col 1 for pols / mpolys, etc). Hardcoding for now. first array is the
1934  // coords array (TINYINT). Subsequent arrays are regular INT.
1935 
1936  const size_t elem_sz = ctr == 0 ? 1 : 4;
1937  ctr++;
1938  int32_t fixlen = -1;
1939  if (target_ti.get_type() == kPOINT) {
1940  const auto col_var = dynamic_cast<const Analyzer::ColumnVar*>(target_expr);
1941  if (col_var) {
1942  const auto coords_cd = executor_->getPhysicalColumnDescriptor(col_var, 1);
1943  if (coords_cd && coords_cd->columnType.get_type() == kARRAY) {
1944  fixlen = coords_cd->columnType.get_size();
1945  }
1946  }
1947  }
1948  if (fixlen > 0) {
1949  coords.push_back(executor_->cgen_state_->emitExternalCall(
1950  "fast_fixlen_array_buff",
1951  i8p_ty,
1952  {target_lv, code_generator.posArg(selected_target_expr)}));
1953  coords.push_back(executor_->cgen_state_->llInt(int64_t(fixlen)));
1954  continue;
1955  }
1956  coords.push_back(executor_->cgen_state_->emitExternalCall(
1957  "array_buff",
1958  i8p_ty,
1959  {target_lv, code_generator.posArg(selected_target_expr)}));
1960  coords.push_back(executor_->cgen_state_->emitExternalCall(
1961  "array_size",
1962  i32_ty,
1963  {target_lv,
1964  code_generator.posArg(selected_target_expr),
1965  executor_->cgen_state_->llInt(log2_bytes(elem_sz))}));
1966  }
1967  return coords;
1968  };
1969 
1970  if (agg_expr) {
1971  return generate_coord_lvs(agg_expr->get_arg(), true);
1972  } else {
1973  return generate_coord_lvs(target_expr,
1974  !executor_->plan_state_->allow_lazy_fetch_);
1975  }
1976  }
1977  }
1978  return agg_expr ? code_generator.codegen(agg_expr->get_arg(), true, co)
1979  : code_generator.codegen(
1980  target_expr, !executor_->plan_state_->allow_lazy_fetch_, co);
1981 }
1982 
1983 llvm::Value* GroupByAndAggregate::emitCall(const std::string& fname,
1984  const std::vector<llvm::Value*>& args) {
1985  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1986  return executor_->cgen_state_->emitCall(fname, args);
1987 }
1988 
1989 void GroupByAndAggregate::checkErrorCode(llvm::Value* retCode) {
1990  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1991  auto zero_const = llvm::ConstantInt::get(retCode->getType(), 0, true);
1992  auto rc_check_condition = executor_->cgen_state_->ir_builder_.CreateICmp(
1993  llvm::ICmpInst::ICMP_EQ, retCode, zero_const);
1994 
1995  executor_->cgen_state_->emitErrorCheck(rc_check_condition, retCode, "rc");
1996 }
1997 
1998 #undef CUR_FUNC
1999 #undef ROW_FUNC
2000 #undef LL_FP
2001 #undef LL_INT
2002 #undef LL_BOOL
2003 #undef LL_BUILDER
2004 #undef LL_CONTEXT
2005 
2007  const RelAlgExecutionUnit& ra_exe_unit,
2008  const Catalog_Namespace::Catalog& catalog) {
2009  if (ra_exe_unit.sort_info.order_entries.size() != 1 || !ra_exe_unit.sort_info.limit) {
2010  return 0;
2011  }
2012  for (const auto& group_expr : ra_exe_unit.groupby_exprs) {
2013  const auto grouped_col_expr =
2014  dynamic_cast<const Analyzer::ColumnVar*>(group_expr.get());
2015  if (!grouped_col_expr) {
2016  continue;
2017  }
2018  if (grouped_col_expr->get_table_id() <= 0) {
2019  return 0;
2020  }
2021  const auto td = catalog.getMetadataForTable(grouped_col_expr->get_table_id());
2022  if (td->shardedColumnId == grouped_col_expr->get_column_id()) {
2023  return td->nShards;
2024  }
2025  }
2026  return 0;
2027 }
void codegenEstimator(std::stack< llvm::BasicBlock * > &array_loops, GroupByAndAggregate::DiamondCodegen &diamond_codegen, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &)
const Analyzer::Expr * agg_arg(const Analyzer::Expr *expr)
std::vector< Analyzer::Expr * > target_exprs
#define CHECK_EQ(x, y)
Definition: Logger.h:205
SqlWindowFunctionKind getKind() const
Definition: Analyzer.h:1447
bool constrained_not_null(const Analyzer::Expr *expr, const std::list< std::shared_ptr< Analyzer::Expr >> &quals)
#define ROW_FUNC
bool gpuCanHandleOrderEntries(const std::list< Analyzer::OrderEntry > &order_entries)
static int64_t getBucketedCardinality(const ColRangeInfo &col_range_info)
const int32_t groups_buffer_size return groups_buffer
llvm::Value * getAdditionalLiteral(const int32_t off)
void get_domain(DomainSet &domain_set) const override
Definition: Analyzer.cpp:3096
llvm::Value * codegenAggColumnPtr(llvm::Value *output_buffer_byte_stream, llvm::Value *out_row_idx, const std::tuple< llvm::Value *, llvm::Value * > &agg_out_ptr_w_idx, const QueryMemoryDescriptor &query_mem_desc, const size_t chosen_bytes, const size_t agg_out_off, const size_t target_idx)
: returns the pointer to where the aggregation should be stored.
HOST DEVICE int get_size() const
Definition: sqltypes.h:269
bool codegenAggCalls(const std::tuple< llvm::Value *, llvm::Value * > &agg_out_ptr_w_idx, const std::vector< llvm::Value * > &agg_out_vec, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const GpuSharedMemoryContext &gpu_smem_context, DiamondCodegen &diamond_codegen)
bool g_enable_watchdog
std::string cat(Ts &&...args)
#define LL_BUILDER
class for a per-database catalog. also includes metadata for the current database and the current use...
Definition: Catalog.h:97
int hll_size_for_rate(const int err_percent)
Definition: HyperLogLog.h:115
boost::multiprecision::number< boost::multiprecision::cpp_int_backend< 64, 64, boost::multiprecision::signed_magnitude, boost::multiprecision::checked, void >> checked_int64_t
bool is_column_range_too_big_for_perfect_hash(const ColRangeInfo &col_range_info, const int64_t max_entry_count)
#define LL_CONTEXT
bool expr_is_rowid(const Analyzer::Expr *expr, const Catalog_Namespace::Catalog &cat)
ALWAYS_INLINE uint64_t agg_count(uint64_t *agg, const int64_t)
ExecutorDeviceType
std::unique_ptr< QueryMemoryDescriptor > initQueryMemoryDescriptorImpl(const bool allow_multifrag, const size_t max_groups_buffer_entry_count, const int8_t crt_min_byte_width, const bool sort_on_gpu_hint, RenderInfo *render_info, const bool must_use_baseline_sort, const bool output_columnar_hint)
SQLTypeInfo sql_type
Definition: TargetInfo.h:42
Streaming Top N algorithm.
#define LOG(tag)
Definition: Logger.h:188
TargetInfo get_target_info(const PointerType target_expr, const bool bigint_count)
Definition: TargetInfo.h:78
void mark_function_always_inline(llvm::Function *func)
bool is_fp() const
Definition: sqltypes.h:421
ColRangeInfo getColRangeInfo()
void codegen(GroupByAndAggregate *group_by_and_agg, Executor *executor, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const GpuSharedMemoryContext &gpu_smem_context, const std::tuple< llvm::Value *, llvm::Value * > &agg_out_ptr_w_idx, const std::vector< llvm::Value * > &agg_out_vec, llvm::Value *output_buffer_byte_stream, llvm::Value *out_row_idx, GroupByAndAggregate::DiamondCodegen &diamond_codegen) const
const std::list< Analyzer::OrderEntry > order_entries
#define LL_INT(v)
static const size_t baseline_threshold
Definition: Execute.h:933
bool codegen(llvm::Value *filter_result, llvm::BasicBlock *sc_false, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const GpuSharedMemoryContext &gpu_smem_context)
QueryDescriptionType hash_type_
llvm::Value * posArg(const Analyzer::Expr *) const
Definition: ColumnIR.cpp:515
static std::unique_ptr< QueryMemoryDescriptor > init(const Executor *executor, const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, const ColRangeInfo &col_range_info, const KeylessInfo &keyless_info, const bool allow_multifrag, const ExecutorDeviceType device_type, const int8_t crt_min_byte_width, const bool sort_on_gpu_hint, const size_t shard_count, const size_t max_groups_buffer_entry_count, RenderInfo *render_info, const CountDistinctDescriptors count_distinct_descriptors, const bool must_use_baseline_sort, const bool output_columnar_hint, const bool streaming_top_n_hint)
#define CHECK_GE(x, y)
Definition: Logger.h:210
llvm::Value * emitCall(const std::string &fname, const std::vector< llvm::Value * > &args)
std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner_
Definition: sqldefs.h:49
int64_t get_agg_initial_val(const SQLAgg agg, const SQLTypeInfo &ti, const bool enable_compaction, const unsigned min_byte_width_to_compact)
Expr * get_arg() const
Definition: Analyzer.h:1096
size_t getEffectiveKeyWidth() const
void checkErrorCode(llvm::Value *retCode)
size_t get_heap_key_slot_index(const std::vector< Analyzer::Expr * > &target_exprs, const size_t target_idx)
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
bool takes_float_argument(const TargetInfo &target_info)
Definition: TargetInfo.h:133
bool has_count_distinct(const RelAlgExecutionUnit &ra_exe_unit)
int g_hll_precision_bits
std::tuple< llvm::Value *, llvm::Value * > codegenMultiColumnBaselineHash(const CompilationOptions &co, llvm::Value *groups_buffer, llvm::Value *group_key, llvm::Value *key_size_lv, const QueryMemoryDescriptor &query_mem_desc, const size_t key_width, const int32_t row_size_quad)
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
#define CHECK_GT(x, y)
Definition: Logger.h:209
std::list< const Expr * > DomainSet
Definition: Analyzer.h:61
double inline_fp_null_val(const SQL_TYPE_INFO &ti)
static WindowFunctionContext * getActiveWindowFunctionContext(Executor *executor)
std::string to_string(char const *&&v)
Helpers for codegen of target expressions.
#define LL_BOOL(v)
size_t getColOnlyOffInBytes(const size_t col_idx) const
size_t get_count_distinct_sub_bitmap_count(const size_t bitmap_sz_bits, const RelAlgExecutionUnit &ra_exe_unit, const ExecutorDeviceType device_type)
Definition: sqldefs.h:73
const SQLTypeInfo get_compact_type(const TargetInfo &target)
const size_t limit
CountDistinctDescriptors initCountDistinctDescriptors()
GroupByAndAggregate(Executor *executor, const ExecutorDeviceType device_type, const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner, const std::optional< int64_t > &group_cardinality_estimation)
llvm::Value * get_arg_by_name(llvm::Function *func, const std::string &name)
Definition: Execute.h:129
const ColumnDescriptor * get_column_descriptor_maybe(const int col_id, const int table_id, const Catalog_Namespace::Catalog &cat)
Definition: Execute.h:183
std::vector< CountDistinctDescriptor > CountDistinctDescriptors
Definition: CountDistinct.h:35
const SortInfo sort_info
size_t getGroupbyColCount() const
const JoinQualsPerNestingLevel join_quals
llvm::Value * convertNullIfAny(const SQLTypeInfo &arg_type, const TargetInfo &agg_info, llvm::Value *target)
void setFalseTarget(llvm::BasicBlock *cond_false)
#define LL_FP(v)
std::tuple< llvm::Value *, llvm::Value * > codegenSingleColumnPerfectHash(const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, llvm::Value *groups_buffer, llvm::Value *group_expr_lv_translated, llvm::Value *group_expr_lv_original, const int32_t row_size_quad)
std::tuple< llvm::Value *, llvm::Value * > codegenGroupBy(const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, DiamondCodegen &codegen)
bool g_bigint_count
Definition: sqldefs.h:75
bool is_distinct_target(const TargetInfo &target_info)
Definition: TargetInfo.h:129
KeylessInfo getKeylessInfo(const std::vector< Analyzer::Expr * > &target_expr_list, const bool is_group_by) const
void codegenCountDistinct(const size_t target_idx, const Analyzer::Expr *target_expr, std::vector< llvm::Value * > &agg_args, const QueryMemoryDescriptor &, const ExecutorDeviceType)
const int8_t getPaddedSlotWidthBytes(const size_t slot_idx) const
DiamondCodegen(llvm::Value *cond, Executor *executor, const bool chain_to_next, const std::string &label_prefix, DiamondCodegen *parent, const bool share_false_edge_with_parent)
ExpressionRange getExpressionRange(const Analyzer::BinOper *expr, const std::vector< InputTableInfo > &query_infos, const Executor *, boost::optional< std::list< std::shared_ptr< Analyzer::Expr >>> simple_quals)
const std::shared_ptr< Analyzer::Estimator > estimator
#define AUTOMATIC_IR_METADATA(CGENSTATE)
std::tuple< llvm::Value *, llvm::Value * > codegenMultiColumnPerfectHash(llvm::Value *groups_buffer, llvm::Value *group_key, llvm::Value *key_size_lv, const QueryMemoryDescriptor &query_mem_desc, const int32_t row_size_quad)
SQLAgg agg_kind
Definition: TargetInfo.h:41
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:78
QueryDescriptionType getQueryDescriptionType() const
int64_t getShardedTopBucket(const ColRangeInfo &col_range_info, const size_t shard_count) const
ExecutorDeviceType device_type
const CountDistinctDescriptor & getCountDistinctDescriptor(const size_t idx) const
std::vector< llvm::Value * > codegen(const Analyzer::Expr *, const bool fetch_columns, const CompilationOptions &)
Definition: IRCodegen.cpp:26
bool window_function_is_aggregate(const SqlWindowFunctionKind kind)
Definition: WindowContext.h:42
#define CHECK_LT(x, y)
Definition: Logger.h:207
const std::vector< InputTableInfo > & query_infos_
bool isSingleColumnGroupByWithPerfectHash() const
void agg_count_distinct(int64_t *agg, const int64_t val)
#define CHECK_LE(x, y)
Definition: Logger.h:208
std::unique_ptr< QueryMemoryDescriptor > initQueryMemoryDescriptor(const bool allow_multifrag, const size_t max_groups_buffer_entry_count, const int8_t crt_min_byte_width, RenderInfo *render_info, const bool output_columnar_hint)
const ExecutorDeviceType device_type_
Definition: sqldefs.h:76
std::vector< llvm::Value * > codegenAggArg(const Analyzer::Expr *target_expr, const CompilationOptions &co)
llvm::Function * codegenPerfectHashFunction()
llvm::Value * codegenWindowRowPointer(const Analyzer::WindowFunction *window_func, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, DiamondCodegen &diamond_codegen)
bool cardinality_estimate_less_than_column_range(const int64_t cardinality_estimate, const ColRangeInfo &col_range_info)
int32_t get_agg_count(const std::vector< Analyzer::Expr * > &target_exprs)
Descriptor for the result set buffer layout.
CountDistinctImplType
const std::optional< int64_t > group_cardinality_estimation_
void add_transient_string_literals_for_expression(const Analyzer::Expr *expr, Executor *executor, std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner)
llvm::Value * codegenOutputSlot(llvm::Value *groups_buffer, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, DiamondCodegen &diamond_codegen)
std::list< std::shared_ptr< Analyzer::Expr > > quals
bool interleavedBins(const ExecutorDeviceType) const
#define CHECK(condition)
Definition: Logger.h:197
bool is_geometry() const
Definition: sqltypes.h:429
int64_t inline_int_null_val(const SQL_TYPE_INFO &ti)
Estimators to be used when precise cardinality isn&#39;t useful.
bool g_cluster
ColRangeInfo getExprRangeInfo(const Analyzer::Expr *expr) const
std::vector< int8_t > get_col_byte_widths(const T &col_expr_list, const std::vector< int64_t > &col_exprs_to_not_project)
#define CUR_FUNC
uint32_t log2_bytes(const uint32_t bytes)
Definition: Execute.h:139
Definition: sqltypes.h:47
const TableDescriptor * getMetadataForTable(const std::string &tableName, const bool populateFragmenter=true) const
Returns a pointer to a const TableDescriptor struct matching the provided tableName.
size_t g_leaf_count
Definition: ParserNode.cpp:68
const RelAlgExecutionUnit & ra_exe_unit_
const size_t offset
Definition: sqldefs.h:74
Definition: sqldefs.h:72
size_t getColOffInBytes(const size_t col_idx) const
const int64_t const uint32_t const uint32_t const uint32_t const bool keyless
FORCE_INLINE HOST DEVICE T align_to_int64(T addr)
SQLOps get_optype() const
Definition: Analyzer.h:370
std::list< std::shared_ptr< Analyzer::Expr > > simple_quals
static size_t shard_count_for_top_groups(const RelAlgExecutionUnit &ra_exe_unit, const Catalog_Namespace::Catalog &catalog)
void agg_count_distinct_skip_val(int64_t *agg, const int64_t val, const int64_t skip_val)