OmniSciDB  06b3bd477c
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
GroupByAndAggregate.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2017 MapD Technologies, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "GroupByAndAggregate.h"
18 #include "AggregateUtils.h"
19 
20 #include "CardinalityEstimator.h"
21 #include "CodeGenerator.h"
23 #include "ExpressionRange.h"
24 #include "ExpressionRewrite.h"
25 #include "GpuInitGroups.h"
26 #include "InPlaceSort.h"
28 #include "MaxwellCodegenPatch.h"
30 #include "TargetExprBuilder.h"
31 
32 #include "../CudaMgr/CudaMgr.h"
33 #include "../Shared/checked_alloc.h"
34 #include "../Utils/ChunkIter.h"
36 #include "Execute.h"
37 #include "QueryTemplateGenerator.h"
38 #include "RuntimeFunctions.h"
39 #include "StreamingTopN.h"
40 #include "TopKSort.h"
41 #include "WindowContext.h"
42 
43 #include <llvm/Transforms/Utils/BasicBlockUtils.h>
44 
45 #include <numeric>
46 #include <thread>
47 
48 bool g_cluster{false};
49 bool g_bigint_count{false};
51 extern size_t g_leaf_count;
52 
53 namespace {
54 
55 int32_t get_agg_count(const std::vector<Analyzer::Expr*>& target_exprs) {
56  int32_t agg_count{0};
57  for (auto target_expr : target_exprs) {
58  CHECK(target_expr);
59  const auto agg_expr = dynamic_cast<Analyzer::AggExpr*>(target_expr);
60  if (!agg_expr || agg_expr->get_aggtype() == kSAMPLE) {
61  const auto& ti = target_expr->get_type_info();
62  // TODO(pavan): or if is_geometry()
63  if (ti.is_array() || (ti.is_string() && ti.get_compression() == kENCODING_NONE)) {
64  agg_count += 2;
65  } else if (ti.is_geometry()) {
66  agg_count += ti.get_physical_coord_cols() * 2;
67  } else {
68  ++agg_count;
69  }
70  continue;
71  }
72  if (agg_expr && agg_expr->get_aggtype() == kAVG) {
73  agg_count += 2;
74  } else {
75  ++agg_count;
76  }
77  }
78  return agg_count;
79 }
80 
82  const auto col = dynamic_cast<const Analyzer::ColumnVar*>(expr);
83  if (!col) {
84  return false;
85  }
86  const auto cd =
87  get_column_descriptor_maybe(col->get_column_id(), col->get_table_id(), cat);
88  if (!cd || !cd->isVirtualCol) {
89  return false;
90  }
91  CHECK_EQ("rowid", cd->columnName);
92  return true;
93 }
94 
95 bool has_count_distinct(const RelAlgExecutionUnit& ra_exe_unit) {
96  for (const auto& target_expr : ra_exe_unit.target_exprs) {
97  const auto agg_info = get_target_info(target_expr, g_bigint_count);
98  if (agg_info.is_agg && is_distinct_target(agg_info)) {
99  return true;
100  }
101  }
102  return false;
103 }
104 
106  const int64_t max_entry_count) {
107  try {
108  return static_cast<int64_t>(checked_int64_t(col_range_info.max) -
109  checked_int64_t(col_range_info.min)) >= max_entry_count;
110  } catch (...) {
111  return true;
112  }
113 }
114 
115 } // namespace
116 
118  // Use baseline layout more eagerly on the GPU if the query uses count distinct,
119  // because our HyperLogLog implementation is 4x less memory efficient on GPU.
120  // Technically, this only applies to APPROX_COUNT_DISTINCT, but in practice we
121  // can expect this to be true anyway for grouped queries since the precise version
122  // uses significantly more memory.
123  const int64_t baseline_threshold =
128  if (ra_exe_unit_.groupby_exprs.size() != 1) {
129  try {
130  checked_int64_t cardinality{1};
131  bool has_nulls{false};
132  for (const auto& groupby_expr : ra_exe_unit_.groupby_exprs) {
133  auto col_range_info = getExprRangeInfo(groupby_expr.get());
134  if (col_range_info.hash_type_ != QueryDescriptionType::GroupByPerfectHash) {
135  // going through baseline hash if a non-integer type is encountered
136  return {QueryDescriptionType::GroupByBaselineHash, 0, 0, 0, false};
137  }
138  auto crt_col_cardinality = getBucketedCardinality(col_range_info);
139  CHECK_GE(crt_col_cardinality, 0);
140  cardinality *= crt_col_cardinality;
141  if (col_range_info.has_nulls) {
142  has_nulls = true;
143  }
144  }
145  // For zero or high cardinalities, use baseline layout.
146  if (!cardinality || cardinality > baseline_threshold) {
147  return {QueryDescriptionType::GroupByBaselineHash, 0, 0, 0, false};
148  }
150  0,
151  int64_t(cardinality),
152  0,
153  has_nulls};
154  } catch (...) { // overflow when computing cardinality
155  return {QueryDescriptionType::GroupByBaselineHash, 0, 0, 0, false};
156  }
157  }
158  // For single column groupby on high timestamps, force baseline hash due to wide ranges
159  // we are likely to encounter when applying quals to the expression range
160  // TODO: consider allowing TIMESTAMP(9) (nanoseconds) with quals to use perfect hash if
161  // the range is small enough
162  if (ra_exe_unit_.groupby_exprs.front() &&
163  ra_exe_unit_.groupby_exprs.front()->get_type_info().is_high_precision_timestamp() &&
164  ra_exe_unit_.simple_quals.size() > 0) {
165  return {QueryDescriptionType::GroupByBaselineHash, 0, 0, 0, false};
166  }
167  const auto col_range_info = getExprRangeInfo(ra_exe_unit_.groupby_exprs.front().get());
168  if (!ra_exe_unit_.groupby_exprs.front()) {
169  return col_range_info;
170  }
171  static const int64_t MAX_BUFFER_SIZE = 1 << 30;
172  const int64_t col_count =
174  int64_t max_entry_count = MAX_BUFFER_SIZE / (col_count * sizeof(int64_t));
176  max_entry_count = std::min(max_entry_count, baseline_threshold);
177  }
178  if ((!ra_exe_unit_.groupby_exprs.front()->get_type_info().is_string() &&
179  !expr_is_rowid(ra_exe_unit_.groupby_exprs.front().get(), *executor_->catalog_)) &&
180  is_column_range_too_big_for_perfect_hash(col_range_info, max_entry_count) &&
181  !col_range_info.bucket) {
183  col_range_info.min,
184  col_range_info.max,
185  0,
186  col_range_info.has_nulls};
187  }
188  return col_range_info;
189 }
190 
192  if (!expr) {
193  return {QueryDescriptionType::Projection, 0, 0, 0, false};
194  }
195 
196  const auto expr_range = getExpressionRange(
197  expr, query_infos_, executor_, boost::make_optional(ra_exe_unit_.simple_quals));
198  switch (expr_range.getType()) {
200  if (expr_range.getIntMin() > expr_range.getIntMax()) {
201  return {
202  QueryDescriptionType::GroupByBaselineHash, 0, -1, 0, expr_range.hasNulls()};
203  }
205  expr_range.getIntMin(),
206  expr_range.getIntMax(),
207  expr_range.getBucket(),
208  expr_range.hasNulls()};
209  }
212  if (expr_range.getFpMin() > expr_range.getFpMax()) {
213  return {
214  QueryDescriptionType::GroupByBaselineHash, 0, -1, 0, expr_range.hasNulls()};
215  }
216  return {QueryDescriptionType::GroupByBaselineHash, 0, 0, 0, false};
217  }
219  return {QueryDescriptionType::GroupByBaselineHash, 0, 0, 0, false};
220  default:
221  CHECK(false);
222  }
223  CHECK(false);
224  return {QueryDescriptionType::NonGroupedAggregate, 0, 0, 0, false};
225 }
226 
228  checked_int64_t crt_col_cardinality =
229  checked_int64_t(col_range_info.max) - checked_int64_t(col_range_info.min);
230  if (col_range_info.bucket) {
231  crt_col_cardinality /= col_range_info.bucket;
232  }
233  return static_cast<int64_t>(crt_col_cardinality +
234  (1 + (col_range_info.has_nulls ? 1 : 0)));
235 }
236 
237 #define LL_CONTEXT executor_->cgen_state_->context_
238 #define LL_BUILDER executor_->cgen_state_->ir_builder_
239 #define LL_BOOL(v) executor_->cgen_state_->llBool(v)
240 #define LL_INT(v) executor_->cgen_state_->llInt(v)
241 #define LL_FP(v) executor_->cgen_state_->llFp(v)
242 #define ROW_FUNC executor_->cgen_state_->row_func_
243 
245  Executor* executor,
246  const ExecutorDeviceType device_type,
247  const RelAlgExecutionUnit& ra_exe_unit,
248  const std::vector<InputTableInfo>& query_infos,
249  std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner)
250  : executor_(executor)
251  , ra_exe_unit_(ra_exe_unit)
252  , query_infos_(query_infos)
253  , row_set_mem_owner_(row_set_mem_owner)
254  , device_type_(device_type) {
255  for (const auto& groupby_expr : ra_exe_unit_.groupby_exprs) {
256  if (!groupby_expr) {
257  continue;
258  }
259  const auto& groupby_ti = groupby_expr->get_type_info();
260  if (groupby_ti.is_string() && groupby_ti.get_compression() != kENCODING_DICT) {
261  throw std::runtime_error(
262  "Cannot group by string columns which are not dictionary encoded.");
263  }
264  if (groupby_ti.is_array()) {
265  throw std::runtime_error("Group by array not supported");
266  }
267  if (groupby_ti.is_geometry()) {
268  throw std::runtime_error("Group by geometry not supported");
269  }
270  }
271 }
272 
274  const size_t shard_count) const {
275  size_t device_count{0};
277  auto cuda_mgr = executor_->getCatalog()->getDataMgr().getCudaMgr();
278  CHECK(cuda_mgr);
279  device_count = executor_->getCatalog()->getDataMgr().getCudaMgr()->getDeviceCount();
280  CHECK_GT(device_count, 0u);
281  }
282 
283  int64_t bucket{col_range_info.bucket};
284 
285  if (shard_count) {
286  CHECK(!col_range_info.bucket);
287  /*
288  when a node has fewer devices than shard count,
289  a) In a distributed setup, the minimum distance between two keys would be
290  device_count because shards are stored consecutively across the physical tables, i.e
291  if a shard column has values 0 to 9, and 3 shards on each leaf, then node 1 would
292  have values: 0,1,2,6,7,8 and node 2 would have values: 3,4,5,9. If each leaf node
293  has only 1 device, in this case, all the keys from each node are loaded on the
294  device each.
295 
296  b) In a single node setup, the distance would be minimum of device_count or
297  difference of device_count - shard_count. For example: If a single node server
298  running on 3 devices a shard column has values 0 to 9 in a table with 4 shards,
299  device to fragment keys mapping would be: device 1 - 4,8,3,7 device 2 - 1,5,9 device
300  3 - 2, 6 The bucket value would be 4(shards) - 3(devices) = 1 i.e. minimum of
301  device_count or difference.
302 
303  When a node has device count equal to or more than shard count then the
304  minimum distance is always at least shard_count * no of leaf nodes.
305  */
306  if (device_count < shard_count) {
307  bucket = g_leaf_count ? std::max(device_count, static_cast<size_t>(1))
308  : std::min(device_count, shard_count - device_count);
309  } else {
310  bucket = shard_count * std::max(g_leaf_count, static_cast<size_t>(1));
311  }
312  }
313 
314  return bucket;
315 }
316 
317 std::unique_ptr<QueryMemoryDescriptor> GroupByAndAggregate::initQueryMemoryDescriptor(
318  const bool allow_multifrag,
319  const size_t max_groups_buffer_entry_count,
320  const int8_t crt_min_byte_width,
321  RenderInfo* render_info,
322  const bool output_columnar_hint) {
323  const auto shard_count =
326  : 0;
327  bool sort_on_gpu_hint =
328  device_type_ == ExecutorDeviceType::GPU && allow_multifrag &&
331  // must_use_baseline_sort is true iff we'd sort on GPU with the old algorithm
332  // but the total output buffer size would be too big or it's a sharded top query.
333  // For the sake of managing risk, use the new result set way very selectively for
334  // this case only (alongside the baseline layout we've enabled for a while now).
335  bool must_use_baseline_sort = shard_count;
336  std::unique_ptr<QueryMemoryDescriptor> query_mem_desc;
337  while (true) {
338  query_mem_desc = initQueryMemoryDescriptorImpl(allow_multifrag,
339  max_groups_buffer_entry_count,
340  crt_min_byte_width,
341  sort_on_gpu_hint,
342  render_info,
343  must_use_baseline_sort,
344  output_columnar_hint);
345  CHECK(query_mem_desc);
346  if (query_mem_desc->sortOnGpu() &&
347  (query_mem_desc->getBufferSizeBytes(device_type_) +
348  align_to_int64(query_mem_desc->getEntryCount() * sizeof(int32_t))) >
349  2 * 1024 * 1024 * 1024L) {
350  must_use_baseline_sort = true;
351  sort_on_gpu_hint = false;
352  } else {
353  break;
354  }
355  }
356  return query_mem_desc;
357 }
358 
359 std::unique_ptr<QueryMemoryDescriptor> GroupByAndAggregate::initQueryMemoryDescriptorImpl(
360  const bool allow_multifrag,
361  const size_t max_groups_buffer_entry_count,
362  const int8_t crt_min_byte_width,
363  const bool sort_on_gpu_hint,
364  RenderInfo* render_info,
365  const bool must_use_baseline_sort,
366  const bool output_columnar_hint) {
368 
369  const auto count_distinct_descriptors = initCountDistinctDescriptors();
370 
371  auto group_col_widths = get_col_byte_widths(ra_exe_unit_.groupby_exprs, {});
372 
373  const bool is_group_by{!ra_exe_unit_.groupby_exprs.empty()};
374 
375  auto col_range_info_nosharding = getColRangeInfo();
376 
377  const auto shard_count =
378  device_type_ == ExecutorDeviceType::GPU
379  ? shard_count_for_top_groups(ra_exe_unit_, *executor_->getCatalog())
380  : 0;
381 
382  const auto col_range_info =
383  ColRangeInfo{col_range_info_nosharding.hash_type_,
384  col_range_info_nosharding.min,
385  col_range_info_nosharding.max,
386  getShardedTopBucket(col_range_info_nosharding, shard_count),
387  col_range_info_nosharding.has_nulls};
388 
389  // Non-grouped aggregates do not support accessing aggregated ranges
390  // Keyless hash is currently only supported with single-column perfect hash
391  const auto keyless_info = !(is_group_by && col_range_info.hash_type_ ==
393  ? KeylessInfo{false, -1}
394  : getKeylessInfo(ra_exe_unit_.target_exprs, is_group_by);
395 
396  if (g_enable_watchdog &&
397  ((col_range_info.hash_type_ == QueryDescriptionType::GroupByBaselineHash &&
398  max_groups_buffer_entry_count > 120000000) ||
399  (col_range_info.hash_type_ == QueryDescriptionType::GroupByPerfectHash &&
400  ra_exe_unit_.groupby_exprs.size() == 1 &&
401  (col_range_info.max - col_range_info.min) /
402  std::max(col_range_info.bucket, int64_t(1)) >
403  130000000))) {
404  throw WatchdogException("Query would use too much memory");
405  }
406  try {
407  return QueryMemoryDescriptor::init(executor_,
408  ra_exe_unit_,
409  query_infos_,
410  col_range_info,
411  keyless_info,
412  allow_multifrag,
413  device_type_,
414  crt_min_byte_width,
415  sort_on_gpu_hint,
416  shard_count,
417  max_groups_buffer_entry_count,
418  render_info,
419  count_distinct_descriptors,
420  must_use_baseline_sort,
421  output_columnar_hint,
422  /*streaming_top_n_hint=*/true);
423  } catch (const StreamingTopNOOM& e) {
424  LOG(WARNING) << e.what() << " Disabling Streaming Top N.";
425  return QueryMemoryDescriptor::init(executor_,
426  ra_exe_unit_,
427  query_infos_,
428  col_range_info,
429  keyless_info,
430  allow_multifrag,
431  device_type_,
432  crt_min_byte_width,
433  sort_on_gpu_hint,
434  shard_count,
435  max_groups_buffer_entry_count,
436  render_info,
437  count_distinct_descriptors,
438  must_use_baseline_sort,
439  output_columnar_hint,
440  /*streaming_top_n_hint=*/false);
441  }
442 }
443 
446 }
447 
448 namespace {
449 
451  const Analyzer::Expr* expr,
452  Executor* executor,
453  std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner) {
454  if (!expr) {
455  return;
456  }
457 
458  const auto array_expr = dynamic_cast<const Analyzer::ArrayExpr*>(expr);
459  if (array_expr) {
460  for (size_t i = 0; i < array_expr->getElementCount(); i++) {
462  array_expr->getElement(i), executor, row_set_mem_owner);
463  }
464  return;
465  }
466 
467  const auto cast_expr = dynamic_cast<const Analyzer::UOper*>(expr);
468  const auto& expr_ti = expr->get_type_info();
469  if (cast_expr && cast_expr->get_optype() == kCAST && expr_ti.is_string()) {
470  CHECK_EQ(kENCODING_DICT, expr_ti.get_compression());
471  auto sdp = executor->getStringDictionaryProxy(
472  expr_ti.get_comp_param(), row_set_mem_owner, true);
473  CHECK(sdp);
474  const auto str_lit_expr =
475  dynamic_cast<const Analyzer::Constant*>(cast_expr->get_operand());
476  if (str_lit_expr && str_lit_expr->get_constval().stringval) {
477  sdp->getOrAddTransient(*str_lit_expr->get_constval().stringval);
478  }
479  return;
480  }
481  const auto case_expr = dynamic_cast<const Analyzer::CaseExpr*>(expr);
482  if (!case_expr) {
483  return;
484  }
485  Analyzer::DomainSet domain_set;
486  case_expr->get_domain(domain_set);
487  if (domain_set.empty()) {
488  return;
489  }
490  if (expr_ti.is_string()) {
491  CHECK_EQ(kENCODING_DICT, expr_ti.get_compression());
492  auto sdp = executor->getStringDictionaryProxy(
493  expr_ti.get_comp_param(), row_set_mem_owner, true);
494  CHECK(sdp);
495  for (const auto domain_expr : domain_set) {
496  const auto cast_expr = dynamic_cast<const Analyzer::UOper*>(domain_expr);
497  const auto str_lit_expr =
498  cast_expr && cast_expr->get_optype() == kCAST
499  ? dynamic_cast<const Analyzer::Constant*>(cast_expr->get_operand())
500  : dynamic_cast<const Analyzer::Constant*>(domain_expr);
501  if (str_lit_expr && str_lit_expr->get_constval().stringval) {
502  sdp->getOrAddTransient(*str_lit_expr->get_constval().stringval);
503  }
504  }
505  }
506 }
507 
508 } // namespace
509 
511  const RelAlgExecutionUnit& ra_exe_unit,
512  Executor* executor,
513  std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner) {
514  for (const auto& group_expr : ra_exe_unit.groupby_exprs) {
516  group_expr.get(), executor, row_set_mem_owner);
517  }
518  for (const auto target_expr : ra_exe_unit.target_exprs) {
519  const auto& target_type = target_expr->get_type_info();
520  if (target_type.is_string() && target_type.get_compression() != kENCODING_DICT) {
521  continue;
522  }
523  const auto agg_expr = dynamic_cast<const Analyzer::AggExpr*>(target_expr);
524  if (agg_expr) {
525  if (agg_expr->get_aggtype() == kSINGLE_VALUE ||
526  agg_expr->get_aggtype() == kSAMPLE) {
528  agg_expr->get_arg(), executor, row_set_mem_owner);
529  }
530  } else {
532  target_expr, executor, row_set_mem_owner);
533  }
534  }
535  row_set_mem_owner->addLiteralStringDictProxy(executor->lit_str_dict_proxy_);
536 }
537 
539  CountDistinctDescriptors count_distinct_descriptors;
540  for (const auto target_expr : ra_exe_unit_.target_exprs) {
541  auto agg_info = get_target_info(target_expr, g_bigint_count);
542  if (is_distinct_target(agg_info)) {
543  CHECK(agg_info.is_agg);
544  CHECK(agg_info.agg_kind == kCOUNT || agg_info.agg_kind == kAPPROX_COUNT_DISTINCT);
545  const auto agg_expr = static_cast<const Analyzer::AggExpr*>(target_expr);
546  const auto& arg_ti = agg_expr->get_arg()->get_type_info();
547  if (arg_ti.is_string() && arg_ti.get_compression() != kENCODING_DICT) {
548  throw std::runtime_error(
549  "Strings must be dictionary-encoded for COUNT(DISTINCT).");
550  }
551  if (agg_info.agg_kind == kAPPROX_COUNT_DISTINCT && arg_ti.is_array()) {
552  throw std::runtime_error("APPROX_COUNT_DISTINCT on arrays not supported yet");
553  }
554  if (agg_info.agg_kind == kAPPROX_COUNT_DISTINCT && arg_ti.is_geometry()) {
555  throw std::runtime_error(
556  "APPROX_COUNT_DISTINCT on geometry columns not supported");
557  }
558  if (agg_info.is_distinct && arg_ti.is_geometry()) {
559  throw std::runtime_error("COUNT DISTINCT on geometry columns not supported");
560  }
561  ColRangeInfo no_range_info{QueryDescriptionType::Projection, 0, 0, 0, false};
562  auto arg_range_info =
563  arg_ti.is_fp() ? no_range_info : getExprRangeInfo(agg_expr->get_arg());
564  CountDistinctImplType count_distinct_impl_type{CountDistinctImplType::StdSet};
565  int64_t bitmap_sz_bits{0};
566  if (agg_info.agg_kind == kAPPROX_COUNT_DISTINCT) {
567  const auto error_rate = agg_expr->get_error_rate();
568  if (error_rate) {
569  CHECK(error_rate->get_type_info().get_type() == kINT);
570  CHECK_GE(error_rate->get_constval().intval, 1);
571  bitmap_sz_bits = hll_size_for_rate(error_rate->get_constval().smallintval);
572  } else {
573  bitmap_sz_bits = g_hll_precision_bits;
574  }
575  }
576  if (arg_range_info.isEmpty()) {
577  count_distinct_descriptors.emplace_back(
579  0,
580  64,
581  agg_info.agg_kind == kAPPROX_COUNT_DISTINCT,
582  device_type_,
583  1});
584  continue;
585  }
586  if (arg_range_info.hash_type_ == QueryDescriptionType::GroupByPerfectHash &&
587  !(arg_ti.is_array() || arg_ti.is_geometry())) { // TODO(alex): allow bitmap
588  // implementation for arrays
589  count_distinct_impl_type = CountDistinctImplType::Bitmap;
590  if (agg_info.agg_kind == kCOUNT) {
591  bitmap_sz_bits = arg_range_info.max - arg_range_info.min + 1;
592  const int64_t MAX_BITMAP_BITS{8 * 1000 * 1000 * 1000L};
593  if (bitmap_sz_bits <= 0 || bitmap_sz_bits > MAX_BITMAP_BITS) {
594  count_distinct_impl_type = CountDistinctImplType::StdSet;
595  }
596  }
597  }
598  if (agg_info.agg_kind == kAPPROX_COUNT_DISTINCT &&
599  count_distinct_impl_type == CountDistinctImplType::StdSet &&
600  !(arg_ti.is_array() || arg_ti.is_geometry())) {
601  count_distinct_impl_type = CountDistinctImplType::Bitmap;
602  }
603 
604  if (g_enable_watchdog && !(arg_range_info.isEmpty()) &&
605  count_distinct_impl_type == CountDistinctImplType::StdSet) {
606  throw WatchdogException("Cannot use a fast path for COUNT distinct");
607  }
608  const auto sub_bitmap_count =
610  count_distinct_descriptors.emplace_back(
611  CountDistinctDescriptor{count_distinct_impl_type,
612  arg_range_info.min,
613  bitmap_sz_bits,
614  agg_info.agg_kind == kAPPROX_COUNT_DISTINCT,
615  device_type_,
616  sub_bitmap_count});
617  } else {
618  count_distinct_descriptors.emplace_back(CountDistinctDescriptor{
619  CountDistinctImplType::Invalid, 0, 0, false, device_type_, 0});
620  }
621  }
622  return count_distinct_descriptors;
623 }
624 
635  const std::vector<Analyzer::Expr*>& target_expr_list,
636  const bool is_group_by) const {
637  bool keyless{true}, found{false};
638  int32_t num_agg_expr{0};
639  int32_t index{0};
640  for (const auto target_expr : target_expr_list) {
641  const auto agg_info = get_target_info(target_expr, g_bigint_count);
642  const auto chosen_type = get_compact_type(agg_info);
643  if (agg_info.is_agg) {
644  num_agg_expr++;
645  }
646  if (!found && agg_info.is_agg && !is_distinct_target(agg_info)) {
647  auto agg_expr = dynamic_cast<const Analyzer::AggExpr*>(target_expr);
648  CHECK(agg_expr);
649  const auto arg_expr = agg_arg(target_expr);
650  const bool float_argument_input = takes_float_argument(agg_info);
651  switch (agg_info.agg_kind) {
652  case kAVG:
653  ++index;
654  if (arg_expr && !arg_expr->get_type_info().get_notnull()) {
655  auto expr_range_info = getExpressionRange(arg_expr, query_infos_, executor_);
656  if (expr_range_info.getType() == ExpressionRangeType::Invalid ||
657  expr_range_info.hasNulls()) {
658  break;
659  }
660  }
661  found = true;
662  break;
663  case kCOUNT:
664  if (arg_expr && !arg_expr->get_type_info().get_notnull()) {
665  auto expr_range_info = getExpressionRange(arg_expr, query_infos_, executor_);
666  if (expr_range_info.getType() == ExpressionRangeType::Invalid ||
667  expr_range_info.hasNulls()) {
668  break;
669  }
670  }
671  found = true;
672  break;
673  case kSUM: {
674  auto arg_ti = arg_expr->get_type_info();
675  if (constrained_not_null(arg_expr, ra_exe_unit_.quals)) {
676  arg_ti.set_notnull(true);
677  }
678  if (!arg_ti.get_notnull()) {
679  auto expr_range_info = getExpressionRange(arg_expr, query_infos_, executor_);
680  if (expr_range_info.getType() != ExpressionRangeType::Invalid &&
681  !expr_range_info.hasNulls()) {
682  found = true;
683  }
684  } else {
685  auto expr_range_info = getExpressionRange(arg_expr, query_infos_, executor_);
686  switch (expr_range_info.getType()) {
689  if (expr_range_info.getFpMax() < 0 || expr_range_info.getFpMin() > 0) {
690  found = true;
691  }
692  break;
694  if (expr_range_info.getIntMax() < 0 || expr_range_info.getIntMin() > 0) {
695  found = true;
696  }
697  break;
698  default:
699  break;
700  }
701  }
702  break;
703  }
704  case kMIN: {
705  CHECK(agg_expr && agg_expr->get_arg());
706  const auto& arg_ti = agg_expr->get_arg()->get_type_info();
707  if (arg_ti.is_string() || arg_ti.is_array()) {
708  break;
709  }
710  auto expr_range_info =
711  getExpressionRange(agg_expr->get_arg(), query_infos_, executor_);
712  auto init_max = get_agg_initial_val(agg_info.agg_kind,
713  chosen_type,
714  is_group_by || float_argument_input,
715  float_argument_input ? sizeof(float) : 8);
716  switch (expr_range_info.getType()) {
719  auto double_max =
720  *reinterpret_cast<const double*>(may_alias_ptr(&init_max));
721  if (expr_range_info.getFpMax() < double_max) {
722  found = true;
723  }
724  break;
725  }
727  if (expr_range_info.getIntMax() < init_max) {
728  found = true;
729  }
730  break;
731  default:
732  break;
733  }
734  break;
735  }
736  case kMAX: {
737  CHECK(agg_expr && agg_expr->get_arg());
738  const auto& arg_ti = agg_expr->get_arg()->get_type_info();
739  if (arg_ti.is_string() || arg_ti.is_array()) {
740  break;
741  }
742  auto expr_range_info =
743  getExpressionRange(agg_expr->get_arg(), query_infos_, executor_);
744  // NULL sentinel and init value for kMAX are identical, which results in
745  // ambiguity in detecting empty keys in presence of nulls.
746  if (expr_range_info.getType() == ExpressionRangeType::Invalid ||
747  expr_range_info.hasNulls()) {
748  break;
749  }
750  auto init_min = get_agg_initial_val(agg_info.agg_kind,
751  chosen_type,
752  is_group_by || float_argument_input,
753  float_argument_input ? sizeof(float) : 8);
754  switch (expr_range_info.getType()) {
757  auto double_min =
758  *reinterpret_cast<const double*>(may_alias_ptr(&init_min));
759  if (expr_range_info.getFpMin() > double_min) {
760  found = true;
761  }
762  break;
763  }
765  if (expr_range_info.getIntMin() > init_min) {
766  found = true;
767  }
768  break;
769  default:
770  break;
771  }
772  break;
773  }
774  default:
775  keyless = false;
776  break;
777  }
778  }
779  if (!keyless) {
780  break;
781  }
782  if (!found) {
783  ++index;
784  }
785  }
786 
787  // shouldn't use keyless for projection only
788  return {
789  keyless && found,
790  index,
791  };
792 }
793 
795  const std::list<Analyzer::OrderEntry>& order_entries) {
796  if (order_entries.size() > 1) { // TODO(alex): lift this restriction
797  return false;
798  }
799  for (const auto& order_entry : order_entries) {
800  CHECK_GE(order_entry.tle_no, 1);
801  CHECK_LE(static_cast<size_t>(order_entry.tle_no), ra_exe_unit_.target_exprs.size());
802  const auto target_expr = ra_exe_unit_.target_exprs[order_entry.tle_no - 1];
803  if (!dynamic_cast<Analyzer::AggExpr*>(target_expr)) {
804  return false;
805  }
806  // TODO(alex): relax the restrictions
807  auto agg_expr = static_cast<Analyzer::AggExpr*>(target_expr);
808  if (agg_expr->get_is_distinct() || agg_expr->get_aggtype() == kAVG ||
809  agg_expr->get_aggtype() == kMIN || agg_expr->get_aggtype() == kMAX ||
810  agg_expr->get_aggtype() == kAPPROX_COUNT_DISTINCT) {
811  return false;
812  }
813  if (agg_expr->get_arg()) {
814  const auto& arg_ti = agg_expr->get_arg()->get_type_info();
815  if (arg_ti.is_fp()) {
816  return false;
817  }
818  auto expr_range_info = getExprRangeInfo(agg_expr->get_arg());
819  // TOD(adb): QMD not actually initialized here?
820  if ((!(expr_range_info.hash_type_ == QueryDescriptionType::GroupByPerfectHash &&
821  /* query_mem_desc.getGroupbyColCount() == 1 */ false) ||
822  expr_range_info.has_nulls) &&
823  order_entry.is_desc == order_entry.nulls_first) {
824  return false;
825  }
826  }
827  const auto& target_ti = target_expr->get_type_info();
828  CHECK(!target_ti.is_array());
829  if (!target_ti.is_integer()) {
830  return false;
831  }
832  }
833  return true;
834 }
835 
837  llvm::Value* cond,
838  Executor* executor,
839  const bool chain_to_next,
840  const std::string& label_prefix,
841  DiamondCodegen* parent,
842  const bool share_false_edge_with_parent)
843  : executor_(executor), chain_to_next_(chain_to_next), parent_(parent) {
844  if (parent_) {
846  }
847  cond_true_ = llvm::BasicBlock::Create(LL_CONTEXT, label_prefix + "_true", ROW_FUNC);
848  if (share_false_edge_with_parent) {
849  CHECK(parent);
851  } else {
853  llvm::BasicBlock::Create(LL_CONTEXT, label_prefix + "_false", ROW_FUNC);
854  }
855 
856  LL_BUILDER.CreateCondBr(cond, cond_true_, cond_false_);
857  LL_BUILDER.SetInsertPoint(cond_true_);
858 }
859 
861  CHECK(!parent_);
862  chain_to_next_ = true;
863 }
864 
865 void GroupByAndAggregate::DiamondCodegen::setFalseTarget(llvm::BasicBlock* cond_false) {
866  CHECK(!parent_ || orig_cond_false_ != parent_->cond_false_);
867  cond_false_ = cond_false;
868 }
869 
871  if (parent_ && orig_cond_false_ != parent_->cond_false_) {
872  LL_BUILDER.CreateBr(parent_->cond_false_);
873  } else if (chain_to_next_) {
874  LL_BUILDER.CreateBr(cond_false_);
875  }
876  if (!parent_ || (!chain_to_next_ && cond_false_ != parent_->cond_false_)) {
877  LL_BUILDER.SetInsertPoint(orig_cond_false_);
878  }
879 }
880 
881 bool GroupByAndAggregate::codegen(llvm::Value* filter_result,
882  llvm::BasicBlock* sc_false,
884  const CompilationOptions& co,
885  const GpuSharedMemoryContext& gpu_smem_context) {
886  CHECK(filter_result);
887 
888  bool can_return_error = false;
889  llvm::BasicBlock* filter_false{nullptr};
890 
891  {
892  const bool is_group_by = !ra_exe_unit_.groupby_exprs.empty();
893 
894  if (executor_->isArchMaxwell(co.device_type)) {
896  }
897  DiamondCodegen filter_cfg(filter_result,
898  executor_,
899  !is_group_by || query_mem_desc.usesGetGroupValueFast(),
900  "filter",
901  nullptr,
902  false);
903  filter_false = filter_cfg.cond_false_;
904 
905  if (is_group_by) {
907  !query_mem_desc.useStreamingTopN()) {
908  const auto crt_matched = get_arg_by_name(ROW_FUNC, "crt_matched");
909  LL_BUILDER.CreateStore(LL_INT(int32_t(1)), crt_matched);
910  auto total_matched_ptr = get_arg_by_name(ROW_FUNC, "total_matched");
911  llvm::Value* old_total_matched_val{nullptr};
913  old_total_matched_val =
914  LL_BUILDER.CreateAtomicRMW(llvm::AtomicRMWInst::Add,
915  total_matched_ptr,
916  LL_INT(int32_t(1)),
917  llvm::AtomicOrdering::Monotonic);
918  } else {
919  old_total_matched_val = LL_BUILDER.CreateLoad(total_matched_ptr);
920  LL_BUILDER.CreateStore(
921  LL_BUILDER.CreateAdd(old_total_matched_val, LL_INT(int32_t(1))),
922  total_matched_ptr);
923  }
924  auto old_total_matched_ptr = get_arg_by_name(ROW_FUNC, "old_total_matched");
925  LL_BUILDER.CreateStore(old_total_matched_val, old_total_matched_ptr);
926  }
927 
928  auto agg_out_ptr_w_idx = codegenGroupBy(query_mem_desc, co, filter_cfg);
929  if (query_mem_desc.usesGetGroupValueFast() ||
930  query_mem_desc.getQueryDescriptionType() ==
932  if (query_mem_desc.getGroupbyColCount() > 1) {
933  filter_cfg.setChainToNext();
934  }
935  // Don't generate null checks if the group slot is guaranteed to be non-null,
936  // as it's the case for get_group_value_fast* family.
937  can_return_error = codegenAggCalls(
938  agg_out_ptr_w_idx, {}, query_mem_desc, co, gpu_smem_context, filter_cfg);
939  } else {
940  {
941  llvm::Value* nullcheck_cond{nullptr};
942  if (query_mem_desc.didOutputColumnar()) {
943  nullcheck_cond = LL_BUILDER.CreateICmpSGE(std::get<1>(agg_out_ptr_w_idx),
944  LL_INT(int32_t(0)));
945  } else {
946  nullcheck_cond = LL_BUILDER.CreateICmpNE(
947  std::get<0>(agg_out_ptr_w_idx),
948  llvm::ConstantPointerNull::get(
949  llvm::PointerType::get(get_int_type(64, LL_CONTEXT), 0)));
950  }
951  DiamondCodegen nullcheck_cfg(
952  nullcheck_cond, executor_, false, "groupby_nullcheck", &filter_cfg, false);
954  agg_out_ptr_w_idx, {}, query_mem_desc, co, gpu_smem_context, filter_cfg);
955  }
956  can_return_error = true;
957  if (query_mem_desc.getQueryDescriptionType() ==
959  query_mem_desc.useStreamingTopN()) {
960  // Ignore rejection on pushing current row to top-K heap.
961  LL_BUILDER.CreateRet(LL_INT(int32_t(0)));
962  } else {
963  CodeGenerator code_generator(executor_);
964  LL_BUILDER.CreateRet(LL_BUILDER.CreateNeg(LL_BUILDER.CreateTrunc(
965  // TODO(alex): remove the trunc once pos is converted to 32 bits
966  code_generator.posArg(nullptr),
967  get_int_type(32, LL_CONTEXT))));
968  }
969  }
970  } else {
971  if (ra_exe_unit_.estimator) {
972  std::stack<llvm::BasicBlock*> array_loops;
973  codegenEstimator(array_loops, filter_cfg, query_mem_desc, co);
974  } else {
975  auto arg_it = ROW_FUNC->arg_begin();
976  std::vector<llvm::Value*> agg_out_vec;
977  for (int32_t i = 0; i < get_agg_count(ra_exe_unit_.target_exprs); ++i) {
978  agg_out_vec.push_back(&*arg_it++);
979  }
980  can_return_error = codegenAggCalls(std::make_tuple(nullptr, nullptr),
981  agg_out_vec,
982  query_mem_desc,
983  co,
984  gpu_smem_context,
985  filter_cfg);
986  }
987  }
988  }
989 
990  if (ra_exe_unit_.join_quals.empty()) {
991  executor_->cgen_state_->ir_builder_.CreateRet(LL_INT(int32_t(0)));
992  } else if (sc_false) {
993  const auto saved_insert_block = LL_BUILDER.GetInsertBlock();
994  LL_BUILDER.SetInsertPoint(sc_false);
995  LL_BUILDER.CreateBr(filter_false);
996  LL_BUILDER.SetInsertPoint(saved_insert_block);
997  }
998 
999  return can_return_error;
1000 }
1001 
1003  llvm::Value* groups_buffer,
1005  const CompilationOptions& co,
1006  DiamondCodegen& diamond_codegen) {
1008  CHECK_EQ(size_t(1), ra_exe_unit_.groupby_exprs.size());
1009  const auto group_expr = ra_exe_unit_.groupby_exprs.front();
1010  CHECK(!group_expr);
1011  if (!query_mem_desc.didOutputColumnar()) {
1012  CHECK_EQ(size_t(0), query_mem_desc.getRowSize() % sizeof(int64_t));
1013  }
1014  const int32_t row_size_quad = query_mem_desc.didOutputColumnar()
1015  ? 0
1016  : query_mem_desc.getRowSize() / sizeof(int64_t);
1017  CodeGenerator code_generator(executor_);
1018  if (query_mem_desc.useStreamingTopN()) {
1019  const auto& only_order_entry = ra_exe_unit_.sort_info.order_entries.front();
1020  CHECK_GE(only_order_entry.tle_no, int(1));
1021  const size_t target_idx = only_order_entry.tle_no - 1;
1022  CHECK_LT(target_idx, ra_exe_unit_.target_exprs.size());
1023  const auto order_entry_expr = ra_exe_unit_.target_exprs[target_idx];
1024  const auto chosen_bytes =
1025  static_cast<size_t>(query_mem_desc.getPaddedSlotWidthBytes(target_idx));
1026  auto order_entry_lv = executor_->cgen_state_->castToTypeIn(
1027  code_generator.codegen(order_entry_expr, true, co).front(), chosen_bytes * 8);
1029  std::string fname = "get_bin_from_k_heap";
1030  const auto& oe_ti = order_entry_expr->get_type_info();
1031  llvm::Value* null_key_lv = nullptr;
1032  if (oe_ti.is_integer() || oe_ti.is_decimal() || oe_ti.is_time()) {
1033  const size_t bit_width = order_entry_lv->getType()->getIntegerBitWidth();
1034  switch (bit_width) {
1035  case 32:
1036  null_key_lv = LL_INT(static_cast<int32_t>(inline_int_null_val(oe_ti)));
1037  break;
1038  case 64:
1039  null_key_lv = LL_INT(static_cast<int64_t>(inline_int_null_val(oe_ti)));
1040  break;
1041  default:
1042  CHECK(false);
1043  }
1044  fname += "_int" + std::to_string(bit_width) + "_t";
1045  } else {
1046  CHECK(oe_ti.is_fp());
1047  if (order_entry_lv->getType()->isDoubleTy()) {
1048  null_key_lv = LL_FP(static_cast<double>(inline_fp_null_val(oe_ti)));
1049  } else {
1050  null_key_lv = LL_FP(static_cast<float>(inline_fp_null_val(oe_ti)));
1051  }
1052  fname += order_entry_lv->getType()->isDoubleTy() ? "_double" : "_float";
1053  }
1054  const auto key_slot_idx =
1056  return emitCall(
1057  fname,
1058  {groups_buffer,
1059  LL_INT(n),
1060  LL_INT(row_size_quad),
1061  LL_INT(static_cast<uint32_t>(query_mem_desc.getColOffInBytes(key_slot_idx))),
1062  LL_BOOL(only_order_entry.is_desc),
1063  LL_BOOL(!order_entry_expr->get_type_info().get_notnull()),
1064  LL_BOOL(only_order_entry.nulls_first),
1065  null_key_lv,
1066  order_entry_lv});
1067  } else {
1068  llvm::Value* output_buffer_entry_count_lv{nullptr};
1070  output_buffer_entry_count_lv =
1071  LL_BUILDER.CreateLoad(get_arg_by_name(ROW_FUNC, "max_matched"));
1072  CHECK(output_buffer_entry_count_lv);
1073  }
1074  const auto group_expr_lv =
1075  LL_BUILDER.CreateLoad(get_arg_by_name(ROW_FUNC, "old_total_matched"));
1076  std::vector<llvm::Value*> args{
1077  groups_buffer,
1078  output_buffer_entry_count_lv
1079  ? output_buffer_entry_count_lv
1080  : LL_INT(static_cast<int32_t>(query_mem_desc.getEntryCount())),
1081  group_expr_lv,
1082  code_generator.posArg(nullptr)};
1083  if (query_mem_desc.didOutputColumnar()) {
1084  const auto columnar_output_offset =
1085  emitCall("get_columnar_scan_output_offset", args);
1086  return columnar_output_offset;
1087  }
1088  args.push_back(LL_INT(row_size_quad));
1089  return emitCall("get_scan_output_slot", args);
1090  }
1091 }
1092 
1093 std::tuple<llvm::Value*, llvm::Value*> GroupByAndAggregate::codegenGroupBy(
1095  const CompilationOptions& co,
1096  DiamondCodegen& diamond_codegen) {
1097  auto arg_it = ROW_FUNC->arg_begin();
1098  auto groups_buffer = arg_it++;
1099 
1100  std::stack<llvm::BasicBlock*> array_loops;
1101 
1102  // TODO(Saman): move this logic outside of this function.
1104  if (query_mem_desc.didOutputColumnar()) {
1105  return std::make_tuple(
1106  &*groups_buffer,
1107  codegenOutputSlot(&*groups_buffer, query_mem_desc, co, diamond_codegen));
1108  } else {
1109  return std::make_tuple(
1110  codegenOutputSlot(&*groups_buffer, query_mem_desc, co, diamond_codegen),
1111  nullptr);
1112  }
1113  }
1114 
1115  CHECK(query_mem_desc.getQueryDescriptionType() ==
1117  query_mem_desc.getQueryDescriptionType() ==
1119 
1120  const int32_t row_size_quad = query_mem_desc.didOutputColumnar()
1121  ? 0
1122  : query_mem_desc.getRowSize() / sizeof(int64_t);
1123 
1124  const auto col_width_size = query_mem_desc.isSingleColumnGroupByWithPerfectHash()
1125  ? sizeof(int64_t)
1126  : query_mem_desc.getEffectiveKeyWidth();
1127  // for multi-column group by
1128  llvm::Value* group_key = nullptr;
1129  llvm::Value* key_size_lv = nullptr;
1130 
1131  if (!query_mem_desc.isSingleColumnGroupByWithPerfectHash()) {
1132  key_size_lv = LL_INT(static_cast<int32_t>(query_mem_desc.getGroupbyColCount()));
1133  if (query_mem_desc.getQueryDescriptionType() ==
1135  group_key =
1136  LL_BUILDER.CreateAlloca(llvm::Type::getInt64Ty(LL_CONTEXT), key_size_lv);
1137  } else if (query_mem_desc.getQueryDescriptionType() ==
1139  group_key =
1140  col_width_size == sizeof(int32_t)
1141  ? LL_BUILDER.CreateAlloca(llvm::Type::getInt32Ty(LL_CONTEXT), key_size_lv)
1142  : LL_BUILDER.CreateAlloca(llvm::Type::getInt64Ty(LL_CONTEXT), key_size_lv);
1143  }
1144  CHECK(group_key);
1145  CHECK(key_size_lv);
1146  }
1147 
1148  int32_t subkey_idx = 0;
1149  CHECK(query_mem_desc.getGroupbyColCount() == ra_exe_unit_.groupby_exprs.size());
1150  for (const auto& group_expr : ra_exe_unit_.groupby_exprs) {
1151  const auto col_range_info = getExprRangeInfo(group_expr.get());
1152  const auto translated_null_value = static_cast<int64_t>(
1153  query_mem_desc.isSingleColumnGroupByWithPerfectHash()
1154  ? checked_int64_t(query_mem_desc.getMaxVal()) +
1155  (query_mem_desc.getBucket() ? query_mem_desc.getBucket() : 1)
1156  : checked_int64_t(col_range_info.max) +
1157  (col_range_info.bucket ? col_range_info.bucket : 1));
1158 
1159  const bool col_has_nulls =
1160  query_mem_desc.getQueryDescriptionType() ==
1162  ? (query_mem_desc.isSingleColumnGroupByWithPerfectHash()
1163  ? query_mem_desc.hasNulls()
1164  : col_range_info.has_nulls)
1165  : false;
1166 
1167  const auto group_expr_lvs =
1168  executor_->groupByColumnCodegen(group_expr.get(),
1169  col_width_size,
1170  co,
1171  col_has_nulls,
1172  translated_null_value,
1173  diamond_codegen,
1174  array_loops,
1175  query_mem_desc.threadsShareMemory());
1176  const auto group_expr_lv = group_expr_lvs.translated_value;
1177  if (query_mem_desc.isSingleColumnGroupByWithPerfectHash()) {
1178  CHECK_EQ(size_t(1), ra_exe_unit_.groupby_exprs.size());
1179  return codegenSingleColumnPerfectHash(query_mem_desc,
1180  co,
1181  &*groups_buffer,
1182  group_expr_lv,
1183  group_expr_lvs.original_value,
1184  row_size_quad);
1185  } else {
1186  // store the sub-key to the buffer
1187  LL_BUILDER.CreateStore(group_expr_lv,
1188  LL_BUILDER.CreateGEP(group_key, LL_INT(subkey_idx++)));
1189  }
1190  }
1191  if (query_mem_desc.getQueryDescriptionType() ==
1193  CHECK(ra_exe_unit_.groupby_exprs.size() != 1);
1195  &*groups_buffer, group_key, key_size_lv, query_mem_desc, row_size_quad);
1196  } else if (query_mem_desc.getQueryDescriptionType() ==
1199  &*groups_buffer,
1200  group_key,
1201  key_size_lv,
1202  query_mem_desc,
1203  col_width_size,
1204  row_size_quad);
1205  }
1206  CHECK(false);
1207  return std::make_tuple(nullptr, nullptr);
1208 }
1209 
1210 std::tuple<llvm::Value*, llvm::Value*>
1213  const CompilationOptions& co,
1214  llvm::Value* groups_buffer,
1215  llvm::Value* group_expr_lv_translated,
1216  llvm::Value* group_expr_lv_original,
1217  const int32_t row_size_quad) {
1218  CHECK(query_mem_desc.usesGetGroupValueFast());
1219  std::string get_group_fn_name{query_mem_desc.didOutputColumnar()
1220  ? "get_columnar_group_bin_offset"
1221  : "get_group_value_fast"};
1222  if (!query_mem_desc.didOutputColumnar() && query_mem_desc.hasKeylessHash()) {
1223  get_group_fn_name += "_keyless";
1224  }
1225  if (query_mem_desc.interleavedBins(co.device_type)) {
1226  CHECK(!query_mem_desc.didOutputColumnar());
1227  CHECK(query_mem_desc.hasKeylessHash());
1228  get_group_fn_name += "_semiprivate";
1229  }
1230  std::vector<llvm::Value*> get_group_fn_args{&*groups_buffer,
1231  &*group_expr_lv_translated};
1232  if (group_expr_lv_original && get_group_fn_name == "get_group_value_fast" &&
1233  query_mem_desc.mustUseBaselineSort()) {
1234  get_group_fn_name += "_with_original_key";
1235  get_group_fn_args.push_back(group_expr_lv_original);
1236  }
1237  get_group_fn_args.push_back(LL_INT(query_mem_desc.getMinVal()));
1238  get_group_fn_args.push_back(LL_INT(query_mem_desc.getBucket()));
1239  if (!query_mem_desc.hasKeylessHash()) {
1240  if (!query_mem_desc.didOutputColumnar()) {
1241  get_group_fn_args.push_back(LL_INT(row_size_quad));
1242  }
1243  } else {
1244  if (!query_mem_desc.didOutputColumnar()) {
1245  get_group_fn_args.push_back(LL_INT(row_size_quad));
1246  }
1247  if (query_mem_desc.interleavedBins(co.device_type)) {
1248  auto warp_idx = emitCall("thread_warp_idx", {LL_INT(executor_->warpSize())});
1249  get_group_fn_args.push_back(warp_idx);
1250  get_group_fn_args.push_back(LL_INT(executor_->warpSize()));
1251  }
1252  }
1253  if (get_group_fn_name == "get_columnar_group_bin_offset") {
1254  return std::make_tuple(&*groups_buffer,
1255  emitCall(get_group_fn_name, get_group_fn_args));
1256  }
1257  return std::make_tuple(emitCall(get_group_fn_name, get_group_fn_args), nullptr);
1258 }
1259 
1260 std::tuple<llvm::Value*, llvm::Value*> GroupByAndAggregate::codegenMultiColumnPerfectHash(
1261  llvm::Value* groups_buffer,
1262  llvm::Value* group_key,
1263  llvm::Value* key_size_lv,
1264  const QueryMemoryDescriptor& query_mem_desc,
1265  const int32_t row_size_quad) {
1266  CHECK(query_mem_desc.getQueryDescriptionType() ==
1268  // compute the index (perfect hash)
1269  auto perfect_hash_func = codegenPerfectHashFunction();
1270  auto hash_lv =
1271  LL_BUILDER.CreateCall(perfect_hash_func, std::vector<llvm::Value*>{group_key});
1272 
1273  if (query_mem_desc.didOutputColumnar()) {
1274  if (!query_mem_desc.hasKeylessHash()) {
1275  const std::string set_matching_func_name{
1276  "set_matching_group_value_perfect_hash_columnar"};
1277  const std::vector<llvm::Value*> set_matching_func_arg{
1278  groups_buffer,
1279  hash_lv,
1280  group_key,
1281  key_size_lv,
1282  llvm::ConstantInt::get(get_int_type(32, LL_CONTEXT),
1283  query_mem_desc.getEntryCount())};
1284  emitCall(set_matching_func_name, set_matching_func_arg);
1285  }
1286  return std::make_tuple(groups_buffer, hash_lv);
1287  } else {
1288  if (query_mem_desc.hasKeylessHash()) {
1289  return std::make_tuple(emitCall("get_matching_group_value_perfect_hash_keyless",
1290  {groups_buffer, hash_lv, LL_INT(row_size_quad)}),
1291  nullptr);
1292  } else {
1293  return std::make_tuple(
1294  emitCall(
1295  "get_matching_group_value_perfect_hash",
1296  {groups_buffer, hash_lv, group_key, key_size_lv, LL_INT(row_size_quad)}),
1297  nullptr);
1298  }
1299  }
1300 }
1301 
1302 std::tuple<llvm::Value*, llvm::Value*>
1304  const CompilationOptions& co,
1305  llvm::Value* groups_buffer,
1306  llvm::Value* group_key,
1307  llvm::Value* key_size_lv,
1308  const QueryMemoryDescriptor& query_mem_desc,
1309  const size_t key_width,
1310  const int32_t row_size_quad) {
1311  auto arg_it = ROW_FUNC->arg_begin(); // groups_buffer
1312  ++arg_it; // current match count
1313  ++arg_it; // total match count
1314  ++arg_it; // old match count
1315  ++arg_it; // output buffer slots count
1316  ++arg_it; // aggregate init values
1317  CHECK(arg_it->getName() == "agg_init_val");
1318  if (group_key->getType() != llvm::Type::getInt64PtrTy(LL_CONTEXT)) {
1319  CHECK(key_width == sizeof(int32_t));
1320  group_key =
1321  LL_BUILDER.CreatePointerCast(group_key, llvm::Type::getInt64PtrTy(LL_CONTEXT));
1322  }
1323  std::vector<llvm::Value*> func_args{
1324  groups_buffer,
1325  LL_INT(static_cast<int32_t>(query_mem_desc.getEntryCount())),
1326  &*group_key,
1327  &*key_size_lv,
1328  LL_INT(static_cast<int32_t>(key_width))};
1329  std::string func_name{"get_group_value"};
1330  if (query_mem_desc.didOutputColumnar()) {
1331  func_name += "_columnar_slot";
1332  } else {
1333  func_args.push_back(LL_INT(row_size_quad));
1334  func_args.push_back(&*arg_it);
1335  }
1336  if (co.with_dynamic_watchdog) {
1337  func_name += "_with_watchdog";
1338  }
1339  if (query_mem_desc.didOutputColumnar()) {
1340  return std::make_tuple(groups_buffer, emitCall(func_name, func_args));
1341  } else {
1342  return std::make_tuple(emitCall(func_name, func_args), nullptr);
1343  }
1344 }
1345 
1347  CHECK_GT(ra_exe_unit_.groupby_exprs.size(), size_t(1));
1348  auto ft = llvm::FunctionType::get(
1349  get_int_type(32, LL_CONTEXT),
1350  std::vector<llvm::Type*>{llvm::PointerType::get(get_int_type(64, LL_CONTEXT), 0)},
1351  false);
1352  auto key_hash_func = llvm::Function::Create(ft,
1353  llvm::Function::ExternalLinkage,
1354  "perfect_key_hash",
1355  executor_->cgen_state_->module_);
1356  executor_->cgen_state_->helper_functions_.push_back(key_hash_func);
1357  mark_function_always_inline(key_hash_func);
1358  auto& key_buff_arg = *key_hash_func->args().begin();
1359  llvm::Value* key_buff_lv = &key_buff_arg;
1360  auto bb = llvm::BasicBlock::Create(LL_CONTEXT, "entry", key_hash_func);
1361  llvm::IRBuilder<> key_hash_func_builder(bb);
1362  llvm::Value* hash_lv{llvm::ConstantInt::get(get_int_type(64, LL_CONTEXT), 0)};
1363  std::vector<int64_t> cardinalities;
1364  for (const auto& groupby_expr : ra_exe_unit_.groupby_exprs) {
1365  auto col_range_info = getExprRangeInfo(groupby_expr.get());
1366  CHECK(col_range_info.hash_type_ == QueryDescriptionType::GroupByPerfectHash);
1367  cardinalities.push_back(getBucketedCardinality(col_range_info));
1368  }
1369  size_t dim_idx = 0;
1370  for (const auto& groupby_expr : ra_exe_unit_.groupby_exprs) {
1371  auto key_comp_lv = key_hash_func_builder.CreateLoad(
1372  key_hash_func_builder.CreateGEP(key_buff_lv, LL_INT(dim_idx)));
1373  auto col_range_info = getExprRangeInfo(groupby_expr.get());
1374  auto crt_term_lv =
1375  key_hash_func_builder.CreateSub(key_comp_lv, LL_INT(col_range_info.min));
1376  if (col_range_info.bucket) {
1377  crt_term_lv =
1378  key_hash_func_builder.CreateSDiv(crt_term_lv, LL_INT(col_range_info.bucket));
1379  }
1380  for (size_t prev_dim_idx = 0; prev_dim_idx < dim_idx; ++prev_dim_idx) {
1381  crt_term_lv = key_hash_func_builder.CreateMul(crt_term_lv,
1382  LL_INT(cardinalities[prev_dim_idx]));
1383  }
1384  hash_lv = key_hash_func_builder.CreateAdd(hash_lv, crt_term_lv);
1385  ++dim_idx;
1386  }
1387  key_hash_func_builder.CreateRet(
1388  key_hash_func_builder.CreateTrunc(hash_lv, get_int_type(32, LL_CONTEXT)));
1389  return key_hash_func;
1390 }
1391 
1393  const TargetInfo& agg_info,
1394  llvm::Value* target) {
1395  const auto& agg_type = agg_info.sql_type;
1396  const size_t chosen_bytes = agg_type.get_size();
1397 
1398  bool need_conversion{false};
1399  llvm::Value* arg_null{nullptr};
1400  llvm::Value* agg_null{nullptr};
1401  llvm::Value* target_to_cast{target};
1402  if (arg_type.is_fp()) {
1403  arg_null = executor_->cgen_state_->inlineFpNull(arg_type);
1404  if (agg_type.is_fp()) {
1405  agg_null = executor_->cgen_state_->inlineFpNull(agg_type);
1406  if (!static_cast<llvm::ConstantFP*>(arg_null)->isExactlyValue(
1407  static_cast<llvm::ConstantFP*>(agg_null)->getValueAPF())) {
1408  need_conversion = true;
1409  }
1410  } else {
1411  CHECK(agg_info.agg_kind == kCOUNT || agg_info.agg_kind == kAPPROX_COUNT_DISTINCT);
1412  return target;
1413  }
1414  } else {
1415  arg_null = executor_->cgen_state_->inlineIntNull(arg_type);
1416  if (agg_type.is_fp()) {
1417  agg_null = executor_->cgen_state_->inlineFpNull(agg_type);
1418  need_conversion = true;
1419  target_to_cast = executor_->castToFP(target);
1420  } else {
1421  agg_null = executor_->cgen_state_->inlineIntNull(agg_type);
1422  if ((static_cast<llvm::ConstantInt*>(arg_null)->getBitWidth() !=
1423  static_cast<llvm::ConstantInt*>(agg_null)->getBitWidth()) ||
1424  (static_cast<llvm::ConstantInt*>(arg_null)->getValue() !=
1425  static_cast<llvm::ConstantInt*>(agg_null)->getValue())) {
1426  need_conversion = true;
1427  }
1428  }
1429  }
1430  if (need_conversion) {
1431  auto cmp = arg_type.is_fp() ? LL_BUILDER.CreateFCmpOEQ(target, arg_null)
1432  : LL_BUILDER.CreateICmpEQ(target, arg_null);
1433  return LL_BUILDER.CreateSelect(
1434  cmp,
1435  agg_null,
1436  executor_->cgen_state_->castToTypeIn(target_to_cast, chosen_bytes << 3));
1437  } else {
1438  return target;
1439  }
1440 }
1441 
1443  const Analyzer::WindowFunction* window_func,
1444  const QueryMemoryDescriptor& query_mem_desc,
1445  const CompilationOptions& co,
1446  DiamondCodegen& diamond_codegen) {
1447  const auto window_func_context =
1449  if (window_func_context && window_function_is_aggregate(window_func->getKind())) {
1450  const int32_t row_size_quad = query_mem_desc.didOutputColumnar()
1451  ? 0
1452  : query_mem_desc.getRowSize() / sizeof(int64_t);
1453  auto arg_it = ROW_FUNC->arg_begin();
1454  auto groups_buffer = arg_it++;
1455  CodeGenerator code_generator(executor_);
1456  if (!window_func_context->getRowNumber()) {
1457  CHECK(window_func->getKind() == SqlWindowFunctionKind::COUNT);
1458  window_func_context->setRowNumber(emitCall(
1459  "row_number_window_func",
1460  {LL_INT(reinterpret_cast<const int64_t>(window_func_context->output())),
1461  code_generator.posArg(nullptr)}));
1462  }
1463  const auto pos_in_window = LL_BUILDER.CreateTrunc(window_func_context->getRowNumber(),
1464  get_int_type(32, LL_CONTEXT));
1465  llvm::Value* entry_count_lv =
1466  LL_INT(static_cast<int32_t>(query_mem_desc.getEntryCount()));
1467  std::vector<llvm::Value*> args{
1468  &*groups_buffer, entry_count_lv, pos_in_window, code_generator.posArg(nullptr)};
1469  if (query_mem_desc.didOutputColumnar()) {
1470  const auto columnar_output_offset =
1471  emitCall("get_columnar_scan_output_offset", args);
1472  return LL_BUILDER.CreateSExt(columnar_output_offset, get_int_type(64, LL_CONTEXT));
1473  }
1474  args.push_back(LL_INT(row_size_quad));
1475  return emitCall("get_scan_output_slot", args);
1476  }
1477  auto arg_it = ROW_FUNC->arg_begin();
1478  auto groups_buffer = arg_it++;
1479  return codegenOutputSlot(&*groups_buffer, query_mem_desc, co, diamond_codegen);
1480 }
1481 
1483  const std::tuple<llvm::Value*, llvm::Value*>& agg_out_ptr_w_idx_in,
1484  const std::vector<llvm::Value*>& agg_out_vec,
1485  const QueryMemoryDescriptor& query_mem_desc,
1486  const CompilationOptions& co,
1487  const GpuSharedMemoryContext& gpu_smem_context,
1488  DiamondCodegen& diamond_codegen) {
1489  auto agg_out_ptr_w_idx = agg_out_ptr_w_idx_in;
1490  // TODO(alex): unify the two cases, the output for non-group by queries
1491  // should be a contiguous buffer
1492  const bool is_group_by{std::get<0>(agg_out_ptr_w_idx)};
1493  bool can_return_error = false;
1494  if (is_group_by) {
1495  CHECK(agg_out_vec.empty());
1496  } else {
1497  CHECK(!agg_out_vec.empty());
1498  }
1499 
1500  // output buffer is casted into a byte stream to be able to handle data elements of
1501  // different sizes (only used when actual column width sizes are used)
1502  llvm::Value* output_buffer_byte_stream{nullptr};
1503  llvm::Value* out_row_idx{nullptr};
1504  if (query_mem_desc.didOutputColumnar() && !g_cluster &&
1506  output_buffer_byte_stream = LL_BUILDER.CreateBitCast(
1507  std::get<0>(agg_out_ptr_w_idx),
1508  llvm::PointerType::get(llvm::Type::getInt8Ty(LL_CONTEXT), 0));
1509  output_buffer_byte_stream->setName("out_buff_b_stream");
1510  CHECK(std::get<1>(agg_out_ptr_w_idx));
1511  out_row_idx = LL_BUILDER.CreateZExt(std::get<1>(agg_out_ptr_w_idx),
1512  llvm::Type::getInt64Ty(LL_CONTEXT));
1513  out_row_idx->setName("out_row_idx");
1514  }
1515 
1516  TargetExprCodegenBuilder target_builder(query_mem_desc, ra_exe_unit_, is_group_by);
1517  for (size_t target_idx = 0; target_idx < ra_exe_unit_.target_exprs.size();
1518  ++target_idx) {
1519  auto target_expr = ra_exe_unit_.target_exprs[target_idx];
1520  CHECK(target_expr);
1521 
1522  target_builder(target_expr, executor_, co);
1523  }
1524 
1525  target_builder.codegen(this,
1526  executor_,
1527  query_mem_desc,
1528  co,
1529  gpu_smem_context,
1530  agg_out_ptr_w_idx,
1531  agg_out_vec,
1532  output_buffer_byte_stream,
1533  out_row_idx,
1534  diamond_codegen);
1535 
1536  for (auto target_expr : ra_exe_unit_.target_exprs) {
1537  CHECK(target_expr);
1538  executor_->plan_state_->isLazyFetchColumn(target_expr);
1539  }
1540 
1541  return can_return_error;
1542 }
1543 
1548  llvm::Value* output_buffer_byte_stream,
1549  llvm::Value* out_row_idx,
1550  const std::tuple<llvm::Value*, llvm::Value*>& agg_out_ptr_w_idx,
1551  const QueryMemoryDescriptor& query_mem_desc,
1552  const size_t chosen_bytes,
1553  const size_t agg_out_off,
1554  const size_t target_idx) {
1555  llvm::Value* agg_col_ptr{nullptr};
1556  if (query_mem_desc.didOutputColumnar()) {
1557  // TODO(Saman): remove the second columnar branch, and support all query description
1558  // types through the first branch. Then, input arguments should also be cleaned up
1559  if (!g_cluster &&
1561  CHECK(chosen_bytes == 1 || chosen_bytes == 2 || chosen_bytes == 4 ||
1562  chosen_bytes == 8);
1563  CHECK(output_buffer_byte_stream);
1564  CHECK(out_row_idx);
1565  uint32_t col_off = query_mem_desc.getColOffInBytes(agg_out_off);
1566  // multiplying by chosen_bytes, i.e., << log2(chosen_bytes)
1567  auto out_per_col_byte_idx =
1568  LL_BUILDER.CreateShl(out_row_idx, __builtin_ffs(chosen_bytes) - 1);
1569  auto byte_offset = LL_BUILDER.CreateAdd(out_per_col_byte_idx,
1570  LL_INT(static_cast<int64_t>(col_off)));
1571  byte_offset->setName("out_byte_off_target_" + std::to_string(target_idx));
1572  auto output_ptr = LL_BUILDER.CreateGEP(output_buffer_byte_stream, byte_offset);
1573  agg_col_ptr = LL_BUILDER.CreateBitCast(
1574  output_ptr,
1575  llvm::PointerType::get(get_int_type((chosen_bytes << 3), LL_CONTEXT), 0));
1576  agg_col_ptr->setName("out_ptr_target_" + std::to_string(target_idx));
1577  } else {
1578  uint32_t col_off = query_mem_desc.getColOffInBytes(agg_out_off);
1579  CHECK_EQ(size_t(0), col_off % chosen_bytes);
1580  col_off /= chosen_bytes;
1581  CHECK(std::get<1>(agg_out_ptr_w_idx));
1582  auto offset = LL_BUILDER.CreateAdd(std::get<1>(agg_out_ptr_w_idx), LL_INT(col_off));
1583  agg_col_ptr = LL_BUILDER.CreateGEP(
1584  LL_BUILDER.CreateBitCast(
1585  std::get<0>(agg_out_ptr_w_idx),
1586  llvm::PointerType::get(get_int_type((chosen_bytes << 3), LL_CONTEXT), 0)),
1587  offset);
1588  }
1589  } else {
1590  uint32_t col_off = query_mem_desc.getColOnlyOffInBytes(agg_out_off);
1591  CHECK_EQ(size_t(0), col_off % chosen_bytes);
1592  col_off /= chosen_bytes;
1593  agg_col_ptr = LL_BUILDER.CreateGEP(
1594  LL_BUILDER.CreateBitCast(
1595  std::get<0>(agg_out_ptr_w_idx),
1596  llvm::PointerType::get(get_int_type((chosen_bytes << 3), LL_CONTEXT), 0)),
1597  LL_INT(col_off));
1598  }
1599  CHECK(agg_col_ptr);
1600  return agg_col_ptr;
1601 }
1602 
1604  std::stack<llvm::BasicBlock*>& array_loops,
1605  GroupByAndAggregate::DiamondCodegen& diamond_codegen,
1606  const QueryMemoryDescriptor& query_mem_desc,
1607  const CompilationOptions& co) {
1608  const auto& estimator_arg = ra_exe_unit_.estimator->getArgument();
1609  auto estimator_comp_count_lv = LL_INT(static_cast<int32_t>(estimator_arg.size()));
1610  auto estimator_key_lv = LL_BUILDER.CreateAlloca(llvm::Type::getInt64Ty(LL_CONTEXT),
1611  estimator_comp_count_lv);
1612  int32_t subkey_idx = 0;
1613  for (const auto& estimator_arg_comp : estimator_arg) {
1614  const auto estimator_arg_comp_lvs =
1615  executor_->groupByColumnCodegen(estimator_arg_comp.get(),
1616  query_mem_desc.getEffectiveKeyWidth(),
1617  co,
1618  false,
1619  0,
1620  diamond_codegen,
1621  array_loops,
1622  true);
1623  CHECK(!estimator_arg_comp_lvs.original_value);
1624  const auto estimator_arg_comp_lv = estimator_arg_comp_lvs.translated_value;
1625  // store the sub-key to the buffer
1626  LL_BUILDER.CreateStore(estimator_arg_comp_lv,
1627  LL_BUILDER.CreateGEP(estimator_key_lv, LL_INT(subkey_idx++)));
1628  }
1629  const auto int8_ptr_ty = llvm::PointerType::get(get_int_type(8, LL_CONTEXT), 0);
1630  const auto bitmap = LL_BUILDER.CreateBitCast(&*ROW_FUNC->arg_begin(), int8_ptr_ty);
1631  const auto key_bytes = LL_BUILDER.CreateBitCast(estimator_key_lv, int8_ptr_ty);
1632  const auto estimator_comp_bytes_lv =
1633  LL_INT(static_cast<int32_t>(estimator_arg.size() * sizeof(int64_t)));
1634  const auto bitmap_size_lv =
1635  LL_INT(static_cast<uint32_t>(ra_exe_unit_.estimator->getBufferSize()));
1636  emitCall(ra_exe_unit_.estimator->getRuntimeFunctionName(),
1637  {bitmap, &*bitmap_size_lv, key_bytes, &*estimator_comp_bytes_lv});
1638 }
1639 
1640 extern "C" void agg_count_distinct(int64_t* agg, const int64_t val) {
1641  reinterpret_cast<std::set<int64_t>*>(*agg)->insert(val);
1642 }
1643 
1644 extern "C" void agg_count_distinct_skip_val(int64_t* agg,
1645  const int64_t val,
1646  const int64_t skip_val) {
1647  if (val != skip_val) {
1648  agg_count_distinct(agg, val);
1649  }
1650 }
1651 
1653  const size_t target_idx,
1654  const Analyzer::Expr* target_expr,
1655  std::vector<llvm::Value*>& agg_args,
1656  const QueryMemoryDescriptor& query_mem_desc,
1657  const ExecutorDeviceType device_type) {
1658  const auto agg_info = get_target_info(target_expr, g_bigint_count);
1659  const auto& arg_ti =
1660  static_cast<const Analyzer::AggExpr*>(target_expr)->get_arg()->get_type_info();
1661  if (arg_ti.is_fp()) {
1662  agg_args.back() = executor_->cgen_state_->ir_builder_.CreateBitCast(
1663  agg_args.back(), get_int_type(64, executor_->cgen_state_->context_));
1664  }
1665  const auto& count_distinct_descriptor =
1666  query_mem_desc.getCountDistinctDescriptor(target_idx);
1667  CHECK(count_distinct_descriptor.impl_type_ != CountDistinctImplType::Invalid);
1668  if (agg_info.agg_kind == kAPPROX_COUNT_DISTINCT) {
1669  CHECK(count_distinct_descriptor.impl_type_ == CountDistinctImplType::Bitmap);
1670  agg_args.push_back(LL_INT(int32_t(count_distinct_descriptor.bitmap_sz_bits)));
1671  if (device_type == ExecutorDeviceType::GPU) {
1672  const auto base_dev_addr = getAdditionalLiteral(-1);
1673  const auto base_host_addr = getAdditionalLiteral(-2);
1674  agg_args.push_back(base_dev_addr);
1675  agg_args.push_back(base_host_addr);
1676  emitCall("agg_approximate_count_distinct_gpu", agg_args);
1677  } else {
1678  emitCall("agg_approximate_count_distinct", agg_args);
1679  }
1680  return;
1681  }
1682  std::string agg_fname{"agg_count_distinct"};
1683  if (count_distinct_descriptor.impl_type_ == CountDistinctImplType::Bitmap) {
1684  agg_fname += "_bitmap";
1685  agg_args.push_back(LL_INT(static_cast<int64_t>(count_distinct_descriptor.min_val)));
1686  }
1687  if (agg_info.skip_null_val) {
1688  auto null_lv = executor_->cgen_state_->castToTypeIn(
1689  (arg_ti.is_fp()
1690  ? static_cast<llvm::Value*>(executor_->cgen_state_->inlineFpNull(arg_ti))
1691  : static_cast<llvm::Value*>(executor_->cgen_state_->inlineIntNull(arg_ti))),
1692  64);
1693  null_lv = executor_->cgen_state_->ir_builder_.CreateBitCast(
1694  null_lv, get_int_type(64, executor_->cgen_state_->context_));
1695  agg_fname += "_skip_val";
1696  agg_args.push_back(null_lv);
1697  }
1698  if (device_type == ExecutorDeviceType::GPU) {
1699  CHECK(count_distinct_descriptor.impl_type_ == CountDistinctImplType::Bitmap);
1700  agg_fname += "_gpu";
1701  const auto base_dev_addr = getAdditionalLiteral(-1);
1702  const auto base_host_addr = getAdditionalLiteral(-2);
1703  agg_args.push_back(base_dev_addr);
1704  agg_args.push_back(base_host_addr);
1705  agg_args.push_back(LL_INT(int64_t(count_distinct_descriptor.sub_bitmap_count)));
1706  CHECK_EQ(size_t(0),
1707  count_distinct_descriptor.bitmapPaddedSizeBytes() %
1708  count_distinct_descriptor.sub_bitmap_count);
1709  agg_args.push_back(LL_INT(int64_t(count_distinct_descriptor.bitmapPaddedSizeBytes() /
1710  count_distinct_descriptor.sub_bitmap_count)));
1711  }
1712  if (count_distinct_descriptor.impl_type_ == CountDistinctImplType::Bitmap) {
1713  emitCall(agg_fname, agg_args);
1714  } else {
1715  executor_->cgen_state_->emitExternalCall(
1716  agg_fname, llvm::Type::getVoidTy(LL_CONTEXT), agg_args);
1717  }
1718 }
1719 
1720 llvm::Value* GroupByAndAggregate::getAdditionalLiteral(const int32_t off) {
1721  CHECK_LT(off, 0);
1722  const auto lit_buff_lv = get_arg_by_name(ROW_FUNC, "literals");
1723  return LL_BUILDER.CreateLoad(LL_BUILDER.CreateGEP(
1724  LL_BUILDER.CreateBitCast(lit_buff_lv,
1725  llvm::PointerType::get(get_int_type(64, LL_CONTEXT), 0)),
1726  LL_INT(off)));
1727 }
1728 
1729 std::vector<llvm::Value*> GroupByAndAggregate::codegenAggArg(
1730  const Analyzer::Expr* target_expr,
1731  const CompilationOptions& co) {
1732  const auto agg_expr = dynamic_cast<const Analyzer::AggExpr*>(target_expr);
1733  const auto func_expr = dynamic_cast<const Analyzer::FunctionOper*>(target_expr);
1734  const auto arr_expr = dynamic_cast<const Analyzer::ArrayExpr*>(target_expr);
1735 
1736  // TODO(alex): handle arrays uniformly?
1737  CodeGenerator code_generator(executor_);
1738  if (target_expr) {
1739  const auto& target_ti = target_expr->get_type_info();
1740  if (target_ti.is_array() && !executor_->plan_state_->isLazyFetchColumn(target_expr)) {
1741  const auto target_lvs =
1742  agg_expr ? code_generator.codegen(agg_expr->get_arg(), true, co)
1743  : code_generator.codegen(
1744  target_expr, !executor_->plan_state_->allow_lazy_fetch_, co);
1745  if (!func_expr && !arr_expr) {
1746  // Something with the chunk transport is code that was generated from a source
1747  // other than an ARRAY[] expression
1748  CHECK_EQ(size_t(1), target_lvs.size());
1749  CHECK(!agg_expr || agg_expr->get_aggtype() == kSAMPLE);
1750  const auto i32_ty = get_int_type(32, executor_->cgen_state_->context_);
1751  const auto i8p_ty =
1752  llvm::PointerType::get(get_int_type(8, executor_->cgen_state_->context_), 0);
1753  const auto& elem_ti = target_ti.get_elem_type();
1754  return {
1755  executor_->cgen_state_->emitExternalCall(
1756  "array_buff",
1757  i8p_ty,
1758  {target_lvs.front(), code_generator.posArg(target_expr)}),
1759  executor_->cgen_state_->emitExternalCall(
1760  "array_size",
1761  i32_ty,
1762  {target_lvs.front(),
1763  code_generator.posArg(target_expr),
1764  executor_->cgen_state_->llInt(log2_bytes(elem_ti.get_logical_size()))})};
1765  } else {
1766  if (agg_expr) {
1767  throw std::runtime_error(
1768  "Using array[] operator as argument to an aggregate operator is not "
1769  "supported");
1770  }
1771  CHECK(func_expr || arr_expr);
1772  if (dynamic_cast<const Analyzer::FunctionOper*>(target_expr)) {
1773  CHECK_EQ(size_t(1), target_lvs.size());
1774 
1775  const auto target_lv = LL_BUILDER.CreateLoad(target_lvs[0]);
1776 
1777  // const auto target_lv_type = target_lvs[0]->getType();
1778  // CHECK(target_lv_type->isStructTy());
1779  // CHECK_EQ(target_lv_type->getNumContainedTypes(), 3u);
1780  const auto i8p_ty = llvm::PointerType::get(
1781  get_int_type(8, executor_->cgen_state_->context_), 0);
1782  const auto ptr = LL_BUILDER.CreatePointerCast(
1783  LL_BUILDER.CreateExtractValue(target_lv, 0), i8p_ty);
1784  const auto size = LL_BUILDER.CreateExtractValue(target_lv, 1);
1785  const auto null_flag = LL_BUILDER.CreateExtractValue(target_lv, 2);
1786 
1787  const auto nullcheck_ok_bb = llvm::BasicBlock::Create(
1788  LL_CONTEXT, "arr_nullcheck_ok_bb", executor_->cgen_state_->row_func_);
1789  const auto nullcheck_fail_bb = llvm::BasicBlock::Create(
1790  LL_CONTEXT, "arr_nullcheck_fail_bb", executor_->cgen_state_->row_func_);
1791 
1792  // TODO(adb): probably better to zext the bool
1793  const auto nullcheck = LL_BUILDER.CreateICmpEQ(
1794  null_flag, executor_->cgen_state_->llInt(static_cast<int8_t>(1)));
1795  LL_BUILDER.CreateCondBr(nullcheck, nullcheck_fail_bb, nullcheck_ok_bb);
1796 
1797  const auto ret_bb = llvm::BasicBlock::Create(
1798  LL_CONTEXT, "arr_return", executor_->cgen_state_->row_func_);
1799  LL_BUILDER.SetInsertPoint(ret_bb);
1800  auto result_phi = LL_BUILDER.CreatePHI(i8p_ty, 2, "array_ptr_return");
1801  result_phi->addIncoming(ptr, nullcheck_ok_bb);
1802 
1803  const auto null_arr_sentinel = LL_BUILDER.CreateIntToPtr(
1804  executor_->cgen_state_->llInt(static_cast<int8_t>(0)), i8p_ty);
1805  result_phi->addIncoming(null_arr_sentinel, nullcheck_fail_bb);
1806 
1807  LL_BUILDER.SetInsertPoint(nullcheck_ok_bb);
1808  executor_->cgen_state_->emitExternalCall(
1809  "register_buffer_with_executor_rsm",
1810  llvm::Type::getVoidTy(executor_->cgen_state_->context_),
1811  {executor_->cgen_state_->llInt(reinterpret_cast<int64_t>(executor_)), ptr});
1812  LL_BUILDER.CreateBr(ret_bb);
1813 
1814  LL_BUILDER.SetInsertPoint(nullcheck_fail_bb);
1815  LL_BUILDER.CreateBr(ret_bb);
1816 
1817  LL_BUILDER.SetInsertPoint(ret_bb);
1818 
1819  return {result_phi, size};
1820  }
1821  CHECK_EQ(size_t(2), target_lvs.size());
1822  return {target_lvs[0], target_lvs[1]};
1823  }
1824  }
1825  if (target_ti.is_geometry() &&
1826  !executor_->plan_state_->isLazyFetchColumn(target_expr)) {
1827  auto generate_coord_lvs =
1828  [&](auto* selected_target_expr,
1829  bool const fetch_columns) -> std::vector<llvm::Value*> {
1830  const auto target_lvs =
1831  code_generator.codegen(selected_target_expr, fetch_columns, co);
1832  const auto geo_uoper = dynamic_cast<const Analyzer::GeoUOper*>(target_expr);
1833  const auto geo_binoper = dynamic_cast<const Analyzer::GeoBinOper*>(target_expr);
1834  if (geo_uoper || geo_binoper) {
1835  CHECK(target_expr->get_type_info().is_geometry());
1836  CHECK_EQ(2 * static_cast<size_t>(target_ti.get_physical_coord_cols()),
1837  target_lvs.size());
1838  return target_lvs;
1839  }
1840  CHECK_EQ(static_cast<size_t>(target_ti.get_physical_coord_cols()),
1841  target_lvs.size());
1842 
1843  const auto i32_ty = get_int_type(32, executor_->cgen_state_->context_);
1844  const auto i8p_ty =
1845  llvm::PointerType::get(get_int_type(8, executor_->cgen_state_->context_), 0);
1846  std::vector<llvm::Value*> coords;
1847  size_t ctr = 0;
1848  for (const auto& target_lv : target_lvs) {
1849  // TODO(adb): consider adding a utility to sqltypes so we can get the types of
1850  // the physical coords cols based on the sqltype (e.g. TINYINT for col 0, INT
1851  // for col 1 for pols / mpolys, etc). Hardcoding for now. first array is the
1852  // coords array (TINYINT). Subsequent arrays are regular INT.
1853 
1854  const size_t elem_sz = ctr == 0 ? 1 : 4;
1855  ctr++;
1856  int32_t fixlen = -1;
1857  if (target_ti.get_type() == kPOINT) {
1858  const auto col_var = dynamic_cast<const Analyzer::ColumnVar*>(target_expr);
1859  if (col_var) {
1860  const auto coords_cd = executor_->getPhysicalColumnDescriptor(col_var, 1);
1861  if (coords_cd && coords_cd->columnType.get_type() == kARRAY) {
1862  fixlen = coords_cd->columnType.get_size();
1863  }
1864  }
1865  }
1866  if (fixlen > 0) {
1867  coords.push_back(executor_->cgen_state_->emitExternalCall(
1868  "fast_fixlen_array_buff",
1869  i8p_ty,
1870  {target_lv, code_generator.posArg(selected_target_expr)}));
1871  coords.push_back(executor_->cgen_state_->llInt(int64_t(fixlen)));
1872  continue;
1873  }
1874  coords.push_back(executor_->cgen_state_->emitExternalCall(
1875  "array_buff",
1876  i8p_ty,
1877  {target_lv, code_generator.posArg(selected_target_expr)}));
1878  coords.push_back(executor_->cgen_state_->emitExternalCall(
1879  "array_size",
1880  i32_ty,
1881  {target_lv,
1882  code_generator.posArg(selected_target_expr),
1883  executor_->cgen_state_->llInt(log2_bytes(elem_sz))}));
1884  }
1885  return coords;
1886  };
1887 
1888  if (agg_expr) {
1889  return generate_coord_lvs(agg_expr->get_arg(), true);
1890  } else {
1891  return generate_coord_lvs(target_expr,
1892  !executor_->plan_state_->allow_lazy_fetch_);
1893  }
1894  }
1895  }
1896  return agg_expr ? code_generator.codegen(agg_expr->get_arg(), true, co)
1897  : code_generator.codegen(
1898  target_expr, !executor_->plan_state_->allow_lazy_fetch_, co);
1899 }
1900 
1901 llvm::Value* GroupByAndAggregate::emitCall(const std::string& fname,
1902  const std::vector<llvm::Value*>& args) {
1903  return executor_->cgen_state_->emitCall(fname, args);
1904 }
1905 
1906 void GroupByAndAggregate::checkErrorCode(llvm::Value* retCode) {
1907  auto zero_const = llvm::ConstantInt::get(retCode->getType(), 0, true);
1908  auto rc_check_condition = executor_->cgen_state_->ir_builder_.CreateICmp(
1909  llvm::ICmpInst::ICMP_EQ, retCode, zero_const);
1910 
1911  executor_->cgen_state_->emitErrorCheck(rc_check_condition, retCode, "rc");
1912 }
1913 
1914 #undef ROW_FUNC
1915 #undef LL_FP
1916 #undef LL_INT
1917 #undef LL_BOOL
1918 #undef LL_BUILDER
1919 #undef LL_CONTEXT
1920 
1922  const RelAlgExecutionUnit& ra_exe_unit,
1923  const Catalog_Namespace::Catalog& catalog) {
1924  if (ra_exe_unit.sort_info.order_entries.size() != 1 || !ra_exe_unit.sort_info.limit) {
1925  return 0;
1926  }
1927  for (const auto& group_expr : ra_exe_unit.groupby_exprs) {
1928  const auto grouped_col_expr =
1929  dynamic_cast<const Analyzer::ColumnVar*>(group_expr.get());
1930  if (!grouped_col_expr) {
1931  continue;
1932  }
1933  if (grouped_col_expr->get_table_id() <= 0) {
1934  return 0;
1935  }
1936  const auto td = catalog.getMetadataForTable(grouped_col_expr->get_table_id());
1937  if (td->shardedColumnId == grouped_col_expr->get_column_id()) {
1938  return td->nShards;
1939  }
1940  }
1941  return 0;
1942 }
void codegenEstimator(std::stack< llvm::BasicBlock * > &array_loops, GroupByAndAggregate::DiamondCodegen &diamond_codegen, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &)
const Analyzer::Expr * agg_arg(const Analyzer::Expr *expr)
std::vector< Analyzer::Expr * > target_exprs
#define CHECK_EQ(x, y)
Definition: Logger.h:205
SqlWindowFunctionKind getKind() const
Definition: Analyzer.h:1448
bool constrained_not_null(const Analyzer::Expr *expr, const std::list< std::shared_ptr< Analyzer::Expr >> &quals)
#define ROW_FUNC
bool gpuCanHandleOrderEntries(const std::list< Analyzer::OrderEntry > &order_entries)
static int64_t getBucketedCardinality(const ColRangeInfo &col_range_info)
const int32_t groups_buffer_size return groups_buffer
llvm::Value * getAdditionalLiteral(const int32_t off)
void get_domain(DomainSet &domain_set) const override
Definition: Analyzer.cpp:3096
llvm::Value * codegenAggColumnPtr(llvm::Value *output_buffer_byte_stream, llvm::Value *out_row_idx, const std::tuple< llvm::Value *, llvm::Value * > &agg_out_ptr_w_idx, const QueryMemoryDescriptor &query_mem_desc, const size_t chosen_bytes, const size_t agg_out_off, const size_t target_idx)
: returns the pointer to where the aggregation should be stored.
HOST DEVICE int get_size() const
Definition: sqltypes.h:268
bool codegenAggCalls(const std::tuple< llvm::Value *, llvm::Value * > &agg_out_ptr_w_idx, const std::vector< llvm::Value * > &agg_out_vec, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const GpuSharedMemoryContext &gpu_smem_context, DiamondCodegen &diamond_codegen)
bool g_enable_watchdog
std::string cat(Ts &&...args)
#define LL_BUILDER
class for a per-database catalog. also includes metadata for the current database and the current use...
Definition: Catalog.h:86
int hll_size_for_rate(const int err_percent)
Definition: HyperLogLog.h:115
std::vector< int8_t > get_col_byte_widths(const T &col_expr_list, const std::vector< ssize_t > &col_exprs_to_not_project)
boost::multiprecision::number< boost::multiprecision::cpp_int_backend< 64, 64, boost::multiprecision::signed_magnitude, boost::multiprecision::checked, void >> checked_int64_t
bool is_column_range_too_big_for_perfect_hash(const ColRangeInfo &col_range_info, const int64_t max_entry_count)
GroupByAndAggregate(Executor *executor, const ExecutorDeviceType device_type, const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, std::shared_ptr< RowSetMemoryOwner >)
#define LL_CONTEXT
bool expr_is_rowid(const Analyzer::Expr *expr, const Catalog_Namespace::Catalog &cat)
ALWAYS_INLINE uint64_t agg_count(uint64_t *agg, const int64_t)
ExecutorDeviceType
std::unique_ptr< QueryMemoryDescriptor > initQueryMemoryDescriptorImpl(const bool allow_multifrag, const size_t max_groups_buffer_entry_count, const int8_t crt_min_byte_width, const bool sort_on_gpu_hint, RenderInfo *render_info, const bool must_use_baseline_sort, const bool output_columnar_hint)
SQLTypeInfo sql_type
Definition: TargetInfo.h:42
Streaming Top N algorithm.
#define LOG(tag)
Definition: Logger.h:188
TargetInfo get_target_info(const PointerType target_expr, const bool bigint_count)
Definition: TargetInfo.h:78
void mark_function_always_inline(llvm::Function *func)
bool is_fp() const
Definition: sqltypes.h:419
ColRangeInfo getColRangeInfo()
void codegen(GroupByAndAggregate *group_by_and_agg, Executor *executor, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const GpuSharedMemoryContext &gpu_smem_context, const std::tuple< llvm::Value *, llvm::Value * > &agg_out_ptr_w_idx, const std::vector< llvm::Value * > &agg_out_vec, llvm::Value *output_buffer_byte_stream, llvm::Value *out_row_idx, GroupByAndAggregate::DiamondCodegen &diamond_codegen) const
const std::list< Analyzer::OrderEntry > order_entries
#define LL_INT(v)
static const size_t baseline_threshold
Definition: Execute.h:889
bool codegen(llvm::Value *filter_result, llvm::BasicBlock *sc_false, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const GpuSharedMemoryContext &gpu_smem_context)
QueryDescriptionType hash_type_
llvm::Value * posArg(const Analyzer::Expr *) const
Definition: ColumnIR.cpp:509
static std::unique_ptr< QueryMemoryDescriptor > init(const Executor *executor, const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, const ColRangeInfo &col_range_info, const KeylessInfo &keyless_info, const bool allow_multifrag, const ExecutorDeviceType device_type, const int8_t crt_min_byte_width, const bool sort_on_gpu_hint, const size_t shard_count, const size_t max_groups_buffer_entry_count, RenderInfo *render_info, const CountDistinctDescriptors count_distinct_descriptors, const bool must_use_baseline_sort, const bool output_columnar_hint, const bool streaming_top_n_hint)
#define CHECK_GE(x, y)
Definition: Logger.h:210
llvm::Value * emitCall(const std::string &fname, const std::vector< llvm::Value * > &args)
std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner_
Definition: sqldefs.h:49
int64_t get_agg_initial_val(const SQLAgg agg, const SQLTypeInfo &ti, const bool enable_compaction, const unsigned min_byte_width_to_compact)
Expr * get_arg() const
Definition: Analyzer.h:1097
size_t getEffectiveKeyWidth() const
void checkErrorCode(llvm::Value *retCode)
size_t get_heap_key_slot_index(const std::vector< Analyzer::Expr * > &target_exprs, const size_t target_idx)
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
bool takes_float_argument(const TargetInfo &target_info)
Definition: TargetInfo.h:133
bool has_count_distinct(const RelAlgExecutionUnit &ra_exe_unit)
int g_hll_precision_bits
std::tuple< llvm::Value *, llvm::Value * > codegenMultiColumnBaselineHash(const CompilationOptions &co, llvm::Value *groups_buffer, llvm::Value *group_key, llvm::Value *key_size_lv, const QueryMemoryDescriptor &query_mem_desc, const size_t key_width, const int32_t row_size_quad)
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
#define CHECK_GT(x, y)
Definition: Logger.h:209
std::list< const Expr * > DomainSet
Definition: Analyzer.h:62
double inline_fp_null_val(const SQL_TYPE_INFO &ti)
static WindowFunctionContext * getActiveWindowFunctionContext(Executor *executor)
std::string to_string(char const *&&v)
Helpers for codegen of target expressions.
#define LL_BOOL(v)
size_t getColOnlyOffInBytes(const size_t col_idx) const
size_t get_count_distinct_sub_bitmap_count(const size_t bitmap_sz_bits, const RelAlgExecutionUnit &ra_exe_unit, const ExecutorDeviceType device_type)
Definition: sqldefs.h:73
const SQLTypeInfo get_compact_type(const TargetInfo &target)
const size_t limit
CountDistinctDescriptors initCountDistinctDescriptors()
CHECK(cgen_state)
llvm::Value * get_arg_by_name(llvm::Function *func, const std::string &name)
Definition: Execute.h:95
const ColumnDescriptor * get_column_descriptor_maybe(const int col_id, const int table_id, const Catalog_Namespace::Catalog &cat)
Definition: Execute.h:149
std::vector< CountDistinctDescriptor > CountDistinctDescriptors
Definition: CountDistinct.h:35
const SortInfo sort_info
size_t getGroupbyColCount() const
const JoinQualsPerNestingLevel join_quals
llvm::Value * convertNullIfAny(const SQLTypeInfo &arg_type, const TargetInfo &agg_info, llvm::Value *target)
void setFalseTarget(llvm::BasicBlock *cond_false)
#define LL_FP(v)
std::tuple< llvm::Value *, llvm::Value * > codegenSingleColumnPerfectHash(const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, llvm::Value *groups_buffer, llvm::Value *group_expr_lv_translated, llvm::Value *group_expr_lv_original, const int32_t row_size_quad)
std::tuple< llvm::Value *, llvm::Value * > codegenGroupBy(const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, DiamondCodegen &codegen)
bool g_bigint_count
Definition: sqldefs.h:75
bool is_distinct_target(const TargetInfo &target_info)
Definition: TargetInfo.h:129
KeylessInfo getKeylessInfo(const std::vector< Analyzer::Expr * > &target_expr_list, const bool is_group_by) const
void codegenCountDistinct(const size_t target_idx, const Analyzer::Expr *target_expr, std::vector< llvm::Value * > &agg_args, const QueryMemoryDescriptor &, const ExecutorDeviceType)
const int8_t getPaddedSlotWidthBytes(const size_t slot_idx) const
DiamondCodegen(llvm::Value *cond, Executor *executor, const bool chain_to_next, const std::string &label_prefix, DiamondCodegen *parent, const bool share_false_edge_with_parent)
ExpressionRange getExpressionRange(const Analyzer::BinOper *expr, const std::vector< InputTableInfo > &query_infos, const Executor *, boost::optional< std::list< std::shared_ptr< Analyzer::Expr >>> simple_quals)
const std::shared_ptr< Analyzer::Estimator > estimator
std::tuple< llvm::Value *, llvm::Value * > codegenMultiColumnPerfectHash(llvm::Value *groups_buffer, llvm::Value *group_key, llvm::Value *key_size_lv, const QueryMemoryDescriptor &query_mem_desc, const int32_t row_size_quad)
SQLAgg agg_kind
Definition: TargetInfo.h:41
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:79
QueryDescriptionType getQueryDescriptionType() const
int64_t getShardedTopBucket(const ColRangeInfo &col_range_info, const size_t shard_count) const
ExecutorDeviceType device_type
const CountDistinctDescriptor & getCountDistinctDescriptor(const size_t idx) const
std::vector< llvm::Value * > codegen(const Analyzer::Expr *, const bool fetch_columns, const CompilationOptions &)
Definition: IRCodegen.cpp:26
bool window_function_is_aggregate(const SqlWindowFunctionKind kind)
Definition: WindowContext.h:42
#define CHECK_LT(x, y)
Definition: Logger.h:207
const std::vector< InputTableInfo > & query_infos_
bool isSingleColumnGroupByWithPerfectHash() const
void agg_count_distinct(int64_t *agg, const int64_t val)
#define CHECK_LE(x, y)
Definition: Logger.h:208
std::unique_ptr< QueryMemoryDescriptor > initQueryMemoryDescriptor(const bool allow_multifrag, const size_t max_groups_buffer_entry_count, const int8_t crt_min_byte_width, RenderInfo *render_info, const bool output_columnar_hint)
const ExecutorDeviceType device_type_
Definition: sqldefs.h:76
std::vector< llvm::Value * > codegenAggArg(const Analyzer::Expr *target_expr, const CompilationOptions &co)
llvm::Function * codegenPerfectHashFunction()
llvm::Value * codegenWindowRowPointer(const Analyzer::WindowFunction *window_func, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, DiamondCodegen &diamond_codegen)
int32_t get_agg_count(const std::vector< Analyzer::Expr * > &target_exprs)
Descriptor for the result set buffer layout.
CountDistinctImplType
void add_transient_string_literals_for_expression(const Analyzer::Expr *expr, Executor *executor, std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner)
llvm::Value * codegenOutputSlot(llvm::Value *groups_buffer, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, DiamondCodegen &diamond_codegen)
std::list< std::shared_ptr< Analyzer::Expr > > quals
bool interleavedBins(const ExecutorDeviceType) const
bool is_geometry() const
Definition: sqltypes.h:427
int64_t inline_int_null_val(const SQL_TYPE_INFO &ti)
Estimators to be used when precise cardinality isn&#39;t useful.
bool g_cluster
ColRangeInfo getExprRangeInfo(const Analyzer::Expr *expr) const
uint32_t log2_bytes(const uint32_t bytes)
Definition: Execute.h:105
Definition: sqltypes.h:46
const TableDescriptor * getMetadataForTable(const std::string &tableName, const bool populateFragmenter=true) const
Returns a pointer to a const TableDescriptor struct matching the provided tableName.
size_t g_leaf_count
Definition: ParserNode.cpp:70
const RelAlgExecutionUnit & ra_exe_unit_
const size_t offset
Definition: sqldefs.h:74
Definition: sqldefs.h:72
size_t getColOffInBytes(const size_t col_idx) const
const int64_t const uint32_t const uint32_t const uint32_t const bool keyless
FORCE_INLINE HOST DEVICE T align_to_int64(T addr)
SQLOps get_optype() const
Definition: Analyzer.h:371
std::list< std::shared_ptr< Analyzer::Expr > > simple_quals
static size_t shard_count_for_top_groups(const RelAlgExecutionUnit &ra_exe_unit, const Catalog_Namespace::Catalog &catalog)
void agg_count_distinct_skip_val(int64_t *agg, const int64_t val, const int64_t skip_val)