OmniSciDB  17c254d2f8
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
GroupByAndAggregate.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2017 MapD Technologies, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "GroupByAndAggregate.h"
18 #include "AggregateUtils.h"
20 
21 #include "CardinalityEstimator.h"
22 #include "CodeGenerator.h"
24 #include "ExpressionRange.h"
25 #include "ExpressionRewrite.h"
26 #include "GpuInitGroups.h"
27 #include "InPlaceSort.h"
29 #include "MaxwellCodegenPatch.h"
31 #include "TargetExprBuilder.h"
32 
33 #include "../CudaMgr/CudaMgr.h"
34 #include "../Shared/checked_alloc.h"
35 #include "../Utils/ChunkIter.h"
37 #include "Execute.h"
38 #include "QueryTemplateGenerator.h"
39 #include "RuntimeFunctions.h"
40 #include "StreamingTopN.h"
41 #include "TopKSort.h"
42 #include "WindowContext.h"
43 
44 #include <llvm/Transforms/Utils/BasicBlockUtils.h>
45 
46 #include <numeric>
47 #include <thread>
48 
49 bool g_cluster{false};
50 bool g_bigint_count{false};
52 extern size_t g_leaf_count;
53 
54 namespace {
55 
56 int32_t get_agg_count(const std::vector<Analyzer::Expr*>& target_exprs) {
57  int32_t agg_count{0};
58  for (auto target_expr : target_exprs) {
59  CHECK(target_expr);
60  const auto agg_expr = dynamic_cast<Analyzer::AggExpr*>(target_expr);
61  if (!agg_expr || agg_expr->get_aggtype() == kSAMPLE) {
62  const auto& ti = target_expr->get_type_info();
63  // TODO(pavan): or if is_geometry()
64  if (ti.is_array() || (ti.is_string() && ti.get_compression() == kENCODING_NONE)) {
65  agg_count += 2;
66  } else if (ti.is_geometry()) {
67  agg_count += ti.get_physical_coord_cols() * 2;
68  } else {
69  ++agg_count;
70  }
71  continue;
72  }
73  if (agg_expr && agg_expr->get_aggtype() == kAVG) {
74  agg_count += 2;
75  } else {
76  ++agg_count;
77  }
78  }
79  return agg_count;
80 }
81 
83  const auto col = dynamic_cast<const Analyzer::ColumnVar*>(expr);
84  if (!col) {
85  return false;
86  }
87  const auto cd =
88  get_column_descriptor_maybe(col->get_column_id(), col->get_table_id(), cat);
89  if (!cd || !cd->isVirtualCol) {
90  return false;
91  }
92  CHECK_EQ("rowid", cd->columnName);
93  return true;
94 }
95 
96 bool has_count_distinct(const RelAlgExecutionUnit& ra_exe_unit) {
97  for (const auto& target_expr : ra_exe_unit.target_exprs) {
98  const auto agg_info = get_target_info(target_expr, g_bigint_count);
99  if (agg_info.is_agg && is_distinct_target(agg_info)) {
100  return true;
101  }
102  }
103  return false;
104 }
105 
107  const int64_t max_entry_count) {
108  try {
109  return static_cast<int64_t>(checked_int64_t(col_range_info.max) -
110  checked_int64_t(col_range_info.min)) >= max_entry_count;
111  } catch (...) {
112  return true;
113  }
114 }
115 
116 } // namespace
117 
119  // Use baseline layout more eagerly on the GPU if the query uses count distinct,
120  // because our HyperLogLog implementation is 4x less memory efficient on GPU.
121  // Technically, this only applies to APPROX_COUNT_DISTINCT, but in practice we
122  // can expect this to be true anyway for grouped queries since the precise version
123  // uses significantly more memory.
124  const int64_t baseline_threshold =
129  if (ra_exe_unit_.groupby_exprs.size() != 1) {
130  try {
131  checked_int64_t cardinality{1};
132  bool has_nulls{false};
133  for (const auto& groupby_expr : ra_exe_unit_.groupby_exprs) {
134  auto col_range_info = getExprRangeInfo(groupby_expr.get());
135  if (col_range_info.hash_type_ != QueryDescriptionType::GroupByPerfectHash) {
136  // going through baseline hash if a non-integer type is encountered
137  return {QueryDescriptionType::GroupByBaselineHash, 0, 0, 0, false};
138  }
139  auto crt_col_cardinality = getBucketedCardinality(col_range_info);
140  CHECK_GE(crt_col_cardinality, 0);
141  cardinality *= crt_col_cardinality;
142  if (col_range_info.has_nulls) {
143  has_nulls = true;
144  }
145  }
146  // For zero or high cardinalities, use baseline layout.
147  if (!cardinality || cardinality > baseline_threshold) {
148  return {QueryDescriptionType::GroupByBaselineHash, 0, 0, 0, false};
149  }
151  0,
152  int64_t(cardinality),
153  0,
154  has_nulls};
155  } catch (...) { // overflow when computing cardinality
156  return {QueryDescriptionType::GroupByBaselineHash, 0, 0, 0, false};
157  }
158  }
159  // For single column groupby on high timestamps, force baseline hash due to wide ranges
160  // we are likely to encounter when applying quals to the expression range
161  // TODO: consider allowing TIMESTAMP(9) (nanoseconds) with quals to use perfect hash if
162  // the range is small enough
163  if (ra_exe_unit_.groupby_exprs.front() &&
164  ra_exe_unit_.groupby_exprs.front()->get_type_info().is_high_precision_timestamp() &&
165  ra_exe_unit_.simple_quals.size() > 0) {
166  return {QueryDescriptionType::GroupByBaselineHash, 0, 0, 0, false};
167  }
168  const auto col_range_info = getExprRangeInfo(ra_exe_unit_.groupby_exprs.front().get());
169  if (!ra_exe_unit_.groupby_exprs.front()) {
170  return col_range_info;
171  }
172  static const int64_t MAX_BUFFER_SIZE = 1 << 30;
173  const int64_t col_count =
175  int64_t max_entry_count = MAX_BUFFER_SIZE / (col_count * sizeof(int64_t));
177  max_entry_count = std::min(max_entry_count, baseline_threshold);
178  }
179  if ((!ra_exe_unit_.groupby_exprs.front()->get_type_info().is_string() &&
180  !expr_is_rowid(ra_exe_unit_.groupby_exprs.front().get(), *executor_->catalog_)) &&
181  is_column_range_too_big_for_perfect_hash(col_range_info, max_entry_count) &&
182  !col_range_info.bucket) {
184  col_range_info.min,
185  col_range_info.max,
186  0,
187  col_range_info.has_nulls};
188  }
189  return col_range_info;
190 }
191 
193  if (!expr) {
194  return {QueryDescriptionType::Projection, 0, 0, 0, false};
195  }
196 
197  const auto expr_range = getExpressionRange(
198  expr, query_infos_, executor_, boost::make_optional(ra_exe_unit_.simple_quals));
199  switch (expr_range.getType()) {
201  if (expr_range.getIntMin() > expr_range.getIntMax()) {
202  return {
203  QueryDescriptionType::GroupByBaselineHash, 0, -1, 0, expr_range.hasNulls()};
204  }
206  expr_range.getIntMin(),
207  expr_range.getIntMax(),
208  expr_range.getBucket(),
209  expr_range.hasNulls()};
210  }
213  if (expr_range.getFpMin() > expr_range.getFpMax()) {
214  return {
215  QueryDescriptionType::GroupByBaselineHash, 0, -1, 0, expr_range.hasNulls()};
216  }
217  return {QueryDescriptionType::GroupByBaselineHash, 0, 0, 0, false};
218  }
220  return {QueryDescriptionType::GroupByBaselineHash, 0, 0, 0, false};
221  default:
222  CHECK(false);
223  }
224  CHECK(false);
225  return {QueryDescriptionType::NonGroupedAggregate, 0, 0, 0, false};
226 }
227 
229  checked_int64_t crt_col_cardinality =
230  checked_int64_t(col_range_info.max) - checked_int64_t(col_range_info.min);
231  if (col_range_info.bucket) {
232  crt_col_cardinality /= col_range_info.bucket;
233  }
234  return static_cast<int64_t>(crt_col_cardinality +
235  (1 + (col_range_info.has_nulls ? 1 : 0)));
236 }
237 
238 #define LL_CONTEXT executor_->cgen_state_->context_
239 #define LL_BUILDER executor_->cgen_state_->ir_builder_
240 #define LL_BOOL(v) executor_->cgen_state_->llBool(v)
241 #define LL_INT(v) executor_->cgen_state_->llInt(v)
242 #define LL_FP(v) executor_->cgen_state_->llFp(v)
243 #define ROW_FUNC executor_->cgen_state_->row_func_
244 
246  Executor* executor,
247  const ExecutorDeviceType device_type,
248  const RelAlgExecutionUnit& ra_exe_unit,
249  const std::vector<InputTableInfo>& query_infos,
250  std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner)
251  : executor_(executor)
252  , ra_exe_unit_(ra_exe_unit)
253  , query_infos_(query_infos)
254  , row_set_mem_owner_(row_set_mem_owner)
255  , device_type_(device_type) {
256  for (const auto& groupby_expr : ra_exe_unit_.groupby_exprs) {
257  if (!groupby_expr) {
258  continue;
259  }
260  const auto& groupby_ti = groupby_expr->get_type_info();
261  if (groupby_ti.is_string() && groupby_ti.get_compression() != kENCODING_DICT) {
262  throw std::runtime_error(
263  "Cannot group by string columns which are not dictionary encoded.");
264  }
265  if (groupby_ti.is_array()) {
266  throw std::runtime_error("Group by array not supported");
267  }
268  if (groupby_ti.is_geometry()) {
269  throw std::runtime_error("Group by geometry not supported");
270  }
271  }
272 }
273 
275  const size_t shard_count) const {
276  size_t device_count{0};
278  device_count = executor_->getCatalog()->getDataMgr().getCudaMgr()->getDeviceCount();
279  CHECK_GT(device_count, 0u);
280  }
281 
282  int64_t bucket{col_range_info.bucket};
283 
284  if (shard_count) {
285  CHECK(!col_range_info.bucket);
286  /*
287  when a node has fewer devices than shard count,
288  a) In a distributed setup, the minimum distance between two keys would be
289  device_count because shards are stored consecutively across the physical tables, i.e
290  if a shard column has values 0 to 9, and 3 shards on each leaf, then node 1 would
291  have values: 0,1,2,6,7,8 and node 2 would have values: 3,4,5,9. If each leaf node
292  has only 1 device, in this case, all the keys from each node are loaded on the
293  device each.
294 
295  b) In a single node setup, the distance would be minimum of device_count or
296  difference of device_count - shard_count. For example: If a single node server
297  running on 3 devices a shard column has values 0 to 9 in a table with 4 shards,
298  device to fragment keys mapping would be: device 1 - 4,8,3,7 device 2 - 1,5,9 device
299  3 - 2, 6 The bucket value would be 4(shards) - 3(devices) = 1 i.e. minimum of
300  device_count or difference.
301 
302  When a node has device count equal to or more than shard count then the
303  minimum distance is always at least shard_count * no of leaf nodes.
304  */
305  if (device_count < shard_count) {
306  bucket = g_leaf_count ? std::max(device_count, static_cast<size_t>(1))
307  : std::min(device_count, shard_count - device_count);
308  } else {
309  bucket = shard_count * std::max(g_leaf_count, static_cast<size_t>(1));
310  }
311  }
312 
313  return bucket;
314 }
315 
316 std::unique_ptr<QueryMemoryDescriptor> GroupByAndAggregate::initQueryMemoryDescriptor(
317  const bool allow_multifrag,
318  const size_t max_groups_buffer_entry_count,
319  const int8_t crt_min_byte_width,
320  RenderInfo* render_info,
321  const bool output_columnar_hint) {
322  const auto shard_count =
325  : 0;
326  bool sort_on_gpu_hint =
327  device_type_ == ExecutorDeviceType::GPU && allow_multifrag &&
330  // must_use_baseline_sort is true iff we'd sort on GPU with the old algorithm
331  // but the total output buffer size would be too big or it's a sharded top query.
332  // For the sake of managing risk, use the new result set way very selectively for
333  // this case only (alongside the baseline layout we've enabled for a while now).
334  bool must_use_baseline_sort = shard_count;
335  std::unique_ptr<QueryMemoryDescriptor> query_mem_desc;
336  while (true) {
337  query_mem_desc = initQueryMemoryDescriptorImpl(allow_multifrag,
338  max_groups_buffer_entry_count,
339  crt_min_byte_width,
340  sort_on_gpu_hint,
341  render_info,
342  must_use_baseline_sort,
343  output_columnar_hint);
344  CHECK(query_mem_desc);
345  if (query_mem_desc->sortOnGpu() &&
346  (query_mem_desc->getBufferSizeBytes(device_type_) +
347  align_to_int64(query_mem_desc->getEntryCount() * sizeof(int32_t))) >
348  2 * 1024 * 1024 * 1024L) {
349  must_use_baseline_sort = true;
350  sort_on_gpu_hint = false;
351  } else {
352  break;
353  }
354  }
355  return query_mem_desc;
356 }
357 
358 std::unique_ptr<QueryMemoryDescriptor> GroupByAndAggregate::initQueryMemoryDescriptorImpl(
359  const bool allow_multifrag,
360  const size_t max_groups_buffer_entry_count,
361  const int8_t crt_min_byte_width,
362  const bool sort_on_gpu_hint,
363  RenderInfo* render_info,
364  const bool must_use_baseline_sort,
365  const bool output_columnar_hint) {
367 
368  const auto count_distinct_descriptors = initCountDistinctDescriptors();
369 
370  auto group_col_widths = get_col_byte_widths(ra_exe_unit_.groupby_exprs, {});
371 
372  const bool is_group_by{!ra_exe_unit_.groupby_exprs.empty()};
373 
374  auto col_range_info_nosharding = getColRangeInfo();
375 
376  const auto shard_count =
377  device_type_ == ExecutorDeviceType::GPU
378  ? shard_count_for_top_groups(ra_exe_unit_, *executor_->getCatalog())
379  : 0;
380 
381  const auto col_range_info =
382  ColRangeInfo{col_range_info_nosharding.hash_type_,
383  col_range_info_nosharding.min,
384  col_range_info_nosharding.max,
385  getShardedTopBucket(col_range_info_nosharding, shard_count),
386  col_range_info_nosharding.has_nulls};
387 
388  // Non-grouped aggregates do not support accessing aggregated ranges
389  // Keyless hash is currently only supported with single-column perfect hash
390  const auto keyless_info = !(is_group_by && col_range_info.hash_type_ ==
392  ? KeylessInfo{false, -1}
393  : getKeylessInfo(ra_exe_unit_.target_exprs, is_group_by);
394 
395  if (g_enable_watchdog &&
396  ((col_range_info.hash_type_ == QueryDescriptionType::GroupByBaselineHash &&
397  max_groups_buffer_entry_count > 120000000) ||
398  (col_range_info.hash_type_ == QueryDescriptionType::GroupByPerfectHash &&
399  ra_exe_unit_.groupby_exprs.size() == 1 &&
400  (col_range_info.max - col_range_info.min) /
401  std::max(col_range_info.bucket, int64_t(1)) >
402  130000000))) {
403  throw WatchdogException("Query would use too much memory");
404  }
405  try {
406  return QueryMemoryDescriptor::init(executor_,
407  ra_exe_unit_,
408  query_infos_,
409  col_range_info,
410  keyless_info,
411  allow_multifrag,
412  device_type_,
413  crt_min_byte_width,
414  sort_on_gpu_hint,
415  shard_count,
416  max_groups_buffer_entry_count,
417  render_info,
418  count_distinct_descriptors,
419  must_use_baseline_sort,
420  output_columnar_hint,
421  /*streaming_top_n_hint=*/true);
422  } catch (const StreamingTopNOOM& e) {
423  LOG(WARNING) << e.what() << " Disabling Streaming Top N.";
424  return QueryMemoryDescriptor::init(executor_,
425  ra_exe_unit_,
426  query_infos_,
427  col_range_info,
428  keyless_info,
429  allow_multifrag,
430  device_type_,
431  crt_min_byte_width,
432  sort_on_gpu_hint,
433  shard_count,
434  max_groups_buffer_entry_count,
435  render_info,
436  count_distinct_descriptors,
437  must_use_baseline_sort,
438  output_columnar_hint,
439  /*streaming_top_n_hint=*/false);
440  }
441 }
442 
445 }
446 
447 namespace {
448 
450  const Analyzer::Expr* expr,
451  Executor* executor,
452  std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner) {
453  if (!expr) {
454  return;
455  }
456 
457  const auto array_expr = dynamic_cast<const Analyzer::ArrayExpr*>(expr);
458  if (array_expr) {
459  for (size_t i = 0; i < array_expr->getElementCount(); i++) {
461  array_expr->getElement(i), executor, row_set_mem_owner);
462  }
463  return;
464  }
465 
466  const auto cast_expr = dynamic_cast<const Analyzer::UOper*>(expr);
467  const auto& expr_ti = expr->get_type_info();
468  if (cast_expr && cast_expr->get_optype() == kCAST && expr_ti.is_string()) {
469  CHECK_EQ(kENCODING_DICT, expr_ti.get_compression());
470  auto sdp = executor->getStringDictionaryProxy(
471  expr_ti.get_comp_param(), row_set_mem_owner, true);
472  CHECK(sdp);
473  const auto str_lit_expr =
474  dynamic_cast<const Analyzer::Constant*>(cast_expr->get_operand());
475  if (str_lit_expr && str_lit_expr->get_constval().stringval) {
476  sdp->getOrAddTransient(*str_lit_expr->get_constval().stringval);
477  }
478  return;
479  }
480  const auto case_expr = dynamic_cast<const Analyzer::CaseExpr*>(expr);
481  if (!case_expr) {
482  return;
483  }
484  Analyzer::DomainSet domain_set;
485  case_expr->get_domain(domain_set);
486  if (domain_set.empty()) {
487  return;
488  }
489  if (expr_ti.is_string()) {
490  CHECK_EQ(kENCODING_DICT, expr_ti.get_compression());
491  auto sdp = executor->getStringDictionaryProxy(
492  expr_ti.get_comp_param(), row_set_mem_owner, true);
493  CHECK(sdp);
494  for (const auto domain_expr : domain_set) {
495  const auto cast_expr = dynamic_cast<const Analyzer::UOper*>(domain_expr);
496  const auto str_lit_expr =
497  cast_expr && cast_expr->get_optype() == kCAST
498  ? dynamic_cast<const Analyzer::Constant*>(cast_expr->get_operand())
499  : dynamic_cast<const Analyzer::Constant*>(domain_expr);
500  if (str_lit_expr && str_lit_expr->get_constval().stringval) {
501  sdp->getOrAddTransient(*str_lit_expr->get_constval().stringval);
502  }
503  }
504  }
505 }
506 
507 } // namespace
508 
510  const RelAlgExecutionUnit& ra_exe_unit,
511  Executor* executor,
512  std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner) {
513  for (const auto& group_expr : ra_exe_unit.groupby_exprs) {
515  group_expr.get(), executor, row_set_mem_owner);
516  }
517  for (const auto target_expr : ra_exe_unit.target_exprs) {
518  const auto& target_type = target_expr->get_type_info();
519  if (target_type.is_string() && target_type.get_compression() != kENCODING_DICT) {
520  continue;
521  }
522  const auto agg_expr = dynamic_cast<const Analyzer::AggExpr*>(target_expr);
523  if (agg_expr) {
524  if (agg_expr->get_aggtype() == kSINGLE_VALUE ||
525  agg_expr->get_aggtype() == kSAMPLE) {
527  agg_expr->get_arg(), executor, row_set_mem_owner);
528  }
529  } else {
531  target_expr, executor, row_set_mem_owner);
532  }
533  }
534  row_set_mem_owner->addLiteralStringDictProxy(executor->lit_str_dict_proxy_);
535 }
536 
538  CountDistinctDescriptors count_distinct_descriptors;
539  for (const auto target_expr : ra_exe_unit_.target_exprs) {
540  auto agg_info = get_target_info(target_expr, g_bigint_count);
541  if (is_distinct_target(agg_info)) {
542  CHECK(agg_info.is_agg);
543  CHECK(agg_info.agg_kind == kCOUNT || agg_info.agg_kind == kAPPROX_COUNT_DISTINCT);
544  const auto agg_expr = static_cast<const Analyzer::AggExpr*>(target_expr);
545  const auto& arg_ti = agg_expr->get_arg()->get_type_info();
546  if (arg_ti.is_string() && arg_ti.get_compression() != kENCODING_DICT) {
547  throw std::runtime_error(
548  "Strings must be dictionary-encoded for COUNT(DISTINCT).");
549  }
550  if (agg_info.agg_kind == kAPPROX_COUNT_DISTINCT && arg_ti.is_array()) {
551  throw std::runtime_error("APPROX_COUNT_DISTINCT on arrays not supported yet");
552  }
553  if (agg_info.agg_kind == kAPPROX_COUNT_DISTINCT && arg_ti.is_geometry()) {
554  throw std::runtime_error(
555  "APPROX_COUNT_DISTINCT on geometry columns not supported");
556  }
557  if (agg_info.is_distinct && arg_ti.is_geometry()) {
558  throw std::runtime_error("COUNT DISTINCT on geometry columns not supported");
559  }
560  ColRangeInfo no_range_info{QueryDescriptionType::Projection, 0, 0, 0, false};
561  auto arg_range_info =
562  arg_ti.is_fp() ? no_range_info : getExprRangeInfo(agg_expr->get_arg());
563  CountDistinctImplType count_distinct_impl_type{CountDistinctImplType::StdSet};
564  int64_t bitmap_sz_bits{0};
565  if (agg_info.agg_kind == kAPPROX_COUNT_DISTINCT) {
566  const auto error_rate = agg_expr->get_error_rate();
567  if (error_rate) {
568  CHECK(error_rate->get_type_info().get_type() == kINT);
569  CHECK_GE(error_rate->get_constval().intval, 1);
570  bitmap_sz_bits = hll_size_for_rate(error_rate->get_constval().smallintval);
571  } else {
572  bitmap_sz_bits = g_hll_precision_bits;
573  }
574  }
575  if (arg_range_info.isEmpty()) {
576  count_distinct_descriptors.emplace_back(
578  0,
579  64,
580  agg_info.agg_kind == kAPPROX_COUNT_DISTINCT,
581  device_type_,
582  1});
583  continue;
584  }
585  if (arg_range_info.hash_type_ == QueryDescriptionType::GroupByPerfectHash &&
586  !(arg_ti.is_array() || arg_ti.is_geometry())) { // TODO(alex): allow bitmap
587  // implementation for arrays
588  count_distinct_impl_type = CountDistinctImplType::Bitmap;
589  if (agg_info.agg_kind == kCOUNT) {
590  bitmap_sz_bits = arg_range_info.max - arg_range_info.min + 1;
591  const int64_t MAX_BITMAP_BITS{8 * 1000 * 1000 * 1000L};
592  if (bitmap_sz_bits <= 0 || bitmap_sz_bits > MAX_BITMAP_BITS) {
593  count_distinct_impl_type = CountDistinctImplType::StdSet;
594  }
595  }
596  }
597  if (agg_info.agg_kind == kAPPROX_COUNT_DISTINCT &&
598  count_distinct_impl_type == CountDistinctImplType::StdSet &&
599  !(arg_ti.is_array() || arg_ti.is_geometry())) {
600  count_distinct_impl_type = CountDistinctImplType::Bitmap;
601  }
602 
603  if (g_enable_watchdog && !(arg_range_info.isEmpty()) &&
604  count_distinct_impl_type == CountDistinctImplType::StdSet) {
605  throw WatchdogException("Cannot use a fast path for COUNT distinct");
606  }
607  const auto sub_bitmap_count =
609  count_distinct_descriptors.emplace_back(
610  CountDistinctDescriptor{count_distinct_impl_type,
611  arg_range_info.min,
612  bitmap_sz_bits,
613  agg_info.agg_kind == kAPPROX_COUNT_DISTINCT,
614  device_type_,
615  sub_bitmap_count});
616  } else {
617  count_distinct_descriptors.emplace_back(CountDistinctDescriptor{
618  CountDistinctImplType::Invalid, 0, 0, false, device_type_, 0});
619  }
620  }
621  return count_distinct_descriptors;
622 }
623 
634  const std::vector<Analyzer::Expr*>& target_expr_list,
635  const bool is_group_by) const {
636  bool keyless{true}, found{false};
637  int32_t num_agg_expr{0};
638  int32_t index{0};
639  for (const auto target_expr : target_expr_list) {
640  const auto agg_info = get_target_info(target_expr, g_bigint_count);
641  const auto chosen_type = get_compact_type(agg_info);
642  if (agg_info.is_agg) {
643  num_agg_expr++;
644  }
645  if (!found && agg_info.is_agg && !is_distinct_target(agg_info)) {
646  auto agg_expr = dynamic_cast<const Analyzer::AggExpr*>(target_expr);
647  CHECK(agg_expr);
648  const auto arg_expr = agg_arg(target_expr);
649  const bool float_argument_input = takes_float_argument(agg_info);
650  switch (agg_info.agg_kind) {
651  case kAVG:
652  ++index;
653  if (arg_expr && !arg_expr->get_type_info().get_notnull()) {
654  auto expr_range_info = getExpressionRange(arg_expr, query_infos_, executor_);
655  if (expr_range_info.getType() == ExpressionRangeType::Invalid ||
656  expr_range_info.hasNulls()) {
657  break;
658  }
659  }
660  found = true;
661  break;
662  case kCOUNT:
663  if (arg_expr && !arg_expr->get_type_info().get_notnull()) {
664  auto expr_range_info = getExpressionRange(arg_expr, query_infos_, executor_);
665  if (expr_range_info.getType() == ExpressionRangeType::Invalid ||
666  expr_range_info.hasNulls()) {
667  break;
668  }
669  }
670  found = true;
671  break;
672  case kSUM: {
673  auto arg_ti = arg_expr->get_type_info();
674  if (constrained_not_null(arg_expr, ra_exe_unit_.quals)) {
675  arg_ti.set_notnull(true);
676  }
677  if (!arg_ti.get_notnull()) {
678  auto expr_range_info = getExpressionRange(arg_expr, query_infos_, executor_);
679  if (expr_range_info.getType() != ExpressionRangeType::Invalid &&
680  !expr_range_info.hasNulls()) {
681  found = true;
682  }
683  } else {
684  auto expr_range_info = getExpressionRange(arg_expr, query_infos_, executor_);
685  switch (expr_range_info.getType()) {
688  if (expr_range_info.getFpMax() < 0 || expr_range_info.getFpMin() > 0) {
689  found = true;
690  }
691  break;
693  if (expr_range_info.getIntMax() < 0 || expr_range_info.getIntMin() > 0) {
694  found = true;
695  }
696  break;
697  default:
698  break;
699  }
700  }
701  break;
702  }
703  case kMIN: {
704  CHECK(agg_expr && agg_expr->get_arg());
705  const auto& arg_ti = agg_expr->get_arg()->get_type_info();
706  if (arg_ti.is_string() || arg_ti.is_array()) {
707  break;
708  }
709  auto expr_range_info =
710  getExpressionRange(agg_expr->get_arg(), query_infos_, executor_);
711  auto init_max = get_agg_initial_val(agg_info.agg_kind,
712  chosen_type,
713  is_group_by || float_argument_input,
714  float_argument_input ? sizeof(float) : 8);
715  switch (expr_range_info.getType()) {
718  auto double_max =
719  *reinterpret_cast<const double*>(may_alias_ptr(&init_max));
720  if (expr_range_info.getFpMax() < double_max) {
721  found = true;
722  }
723  break;
724  }
726  if (expr_range_info.getIntMax() < init_max) {
727  found = true;
728  }
729  break;
730  default:
731  break;
732  }
733  break;
734  }
735  case kMAX: {
736  CHECK(agg_expr && agg_expr->get_arg());
737  const auto& arg_ti = agg_expr->get_arg()->get_type_info();
738  if (arg_ti.is_string() || arg_ti.is_array()) {
739  break;
740  }
741  auto expr_range_info =
742  getExpressionRange(agg_expr->get_arg(), query_infos_, executor_);
743  // NULL sentinel and init value for kMAX are identical, which results in
744  // ambiguity in detecting empty keys in presence of nulls.
745  if (expr_range_info.getType() == ExpressionRangeType::Invalid ||
746  expr_range_info.hasNulls()) {
747  break;
748  }
749  auto init_min = get_agg_initial_val(agg_info.agg_kind,
750  chosen_type,
751  is_group_by || float_argument_input,
752  float_argument_input ? sizeof(float) : 8);
753  switch (expr_range_info.getType()) {
756  auto double_min =
757  *reinterpret_cast<const double*>(may_alias_ptr(&init_min));
758  if (expr_range_info.getFpMin() > double_min) {
759  found = true;
760  }
761  break;
762  }
764  if (expr_range_info.getIntMin() > init_min) {
765  found = true;
766  }
767  break;
768  default:
769  break;
770  }
771  break;
772  }
773  default:
774  keyless = false;
775  break;
776  }
777  }
778  if (!keyless) {
779  break;
780  }
781  if (!found) {
782  ++index;
783  }
784  }
785 
786  // shouldn't use keyless for projection only
787  return {
788  keyless && found,
789  index,
790  };
791 }
792 
794  const std::list<Analyzer::OrderEntry>& order_entries) {
795  if (order_entries.size() > 1) { // TODO(alex): lift this restriction
796  return false;
797  }
798  for (const auto& order_entry : order_entries) {
799  CHECK_GE(order_entry.tle_no, 1);
800  CHECK_LE(static_cast<size_t>(order_entry.tle_no), ra_exe_unit_.target_exprs.size());
801  const auto target_expr = ra_exe_unit_.target_exprs[order_entry.tle_no - 1];
802  if (!dynamic_cast<Analyzer::AggExpr*>(target_expr)) {
803  return false;
804  }
805  // TODO(alex): relax the restrictions
806  auto agg_expr = static_cast<Analyzer::AggExpr*>(target_expr);
807  if (agg_expr->get_is_distinct() || agg_expr->get_aggtype() == kAVG ||
808  agg_expr->get_aggtype() == kMIN || agg_expr->get_aggtype() == kMAX ||
809  agg_expr->get_aggtype() == kAPPROX_COUNT_DISTINCT) {
810  return false;
811  }
812  if (agg_expr->get_arg()) {
813  const auto& arg_ti = agg_expr->get_arg()->get_type_info();
814  if (arg_ti.is_fp()) {
815  return false;
816  }
817  auto expr_range_info = getExprRangeInfo(agg_expr->get_arg());
818  // TOD(adb): QMD not actually initialized here?
819  if ((!(expr_range_info.hash_type_ == QueryDescriptionType::GroupByPerfectHash &&
820  /* query_mem_desc.getGroupbyColCount() == 1 */ false) ||
821  expr_range_info.has_nulls) &&
822  order_entry.is_desc == order_entry.nulls_first) {
823  return false;
824  }
825  }
826  const auto& target_ti = target_expr->get_type_info();
827  CHECK(!target_ti.is_array());
828  if (!target_ti.is_integer()) {
829  return false;
830  }
831  }
832  return true;
833 }
834 
836  llvm::Value* cond,
837  Executor* executor,
838  const bool chain_to_next,
839  const std::string& label_prefix,
840  DiamondCodegen* parent,
841  const bool share_false_edge_with_parent)
842  : executor_(executor), chain_to_next_(chain_to_next), parent_(parent) {
843  if (parent_) {
845  }
846  cond_true_ = llvm::BasicBlock::Create(LL_CONTEXT, label_prefix + "_true", ROW_FUNC);
847  if (share_false_edge_with_parent) {
848  CHECK(parent);
850  } else {
852  llvm::BasicBlock::Create(LL_CONTEXT, label_prefix + "_false", ROW_FUNC);
853  }
854 
855  LL_BUILDER.CreateCondBr(cond, cond_true_, cond_false_);
856  LL_BUILDER.SetInsertPoint(cond_true_);
857 }
858 
860  CHECK(!parent_);
861  chain_to_next_ = true;
862 }
863 
864 void GroupByAndAggregate::DiamondCodegen::setFalseTarget(llvm::BasicBlock* cond_false) {
865  CHECK(!parent_ || orig_cond_false_ != parent_->cond_false_);
866  cond_false_ = cond_false;
867 }
868 
870  if (parent_ && orig_cond_false_ != parent_->cond_false_) {
871  LL_BUILDER.CreateBr(parent_->cond_false_);
872  } else if (chain_to_next_) {
873  LL_BUILDER.CreateBr(cond_false_);
874  }
875  if (!parent_ || (!chain_to_next_ && cond_false_ != parent_->cond_false_)) {
876  LL_BUILDER.SetInsertPoint(orig_cond_false_);
877  }
878 }
879 
880 bool GroupByAndAggregate::codegen(llvm::Value* filter_result,
881  llvm::BasicBlock* sc_false,
883  const CompilationOptions& co,
884  const GpuSharedMemoryContext& gpu_smem_context) {
885  CHECK(filter_result);
886 
887  bool can_return_error = false;
888  llvm::BasicBlock* filter_false{nullptr};
889 
890  {
891  const bool is_group_by = !ra_exe_unit_.groupby_exprs.empty();
892 
893  if (executor_->isArchMaxwell(co.device_type)) {
895  }
896  DiamondCodegen filter_cfg(filter_result,
897  executor_,
898  !is_group_by || query_mem_desc.usesGetGroupValueFast(),
899  "filter",
900  nullptr,
901  false);
902  filter_false = filter_cfg.cond_false_;
903 
904  if (is_group_by) {
906  !query_mem_desc.useStreamingTopN()) {
907  const auto crt_matched = get_arg_by_name(ROW_FUNC, "crt_matched");
908  LL_BUILDER.CreateStore(LL_INT(int32_t(1)), crt_matched);
909  auto total_matched_ptr = get_arg_by_name(ROW_FUNC, "total_matched");
910  llvm::Value* old_total_matched_val{nullptr};
912  old_total_matched_val =
913  LL_BUILDER.CreateAtomicRMW(llvm::AtomicRMWInst::Add,
914  total_matched_ptr,
915  LL_INT(int32_t(1)),
916  llvm::AtomicOrdering::Monotonic);
917  } else {
918  old_total_matched_val = LL_BUILDER.CreateLoad(total_matched_ptr);
919  LL_BUILDER.CreateStore(
920  LL_BUILDER.CreateAdd(old_total_matched_val, LL_INT(int32_t(1))),
921  total_matched_ptr);
922  }
923  auto old_total_matched_ptr = get_arg_by_name(ROW_FUNC, "old_total_matched");
924  LL_BUILDER.CreateStore(old_total_matched_val, old_total_matched_ptr);
925  }
926 
927  auto agg_out_ptr_w_idx = codegenGroupBy(query_mem_desc, co, filter_cfg);
928  if (query_mem_desc.usesGetGroupValueFast() ||
929  query_mem_desc.getQueryDescriptionType() ==
931  if (query_mem_desc.getGroupbyColCount() > 1) {
932  filter_cfg.setChainToNext();
933  }
934  // Don't generate null checks if the group slot is guaranteed to be non-null,
935  // as it's the case for get_group_value_fast* family.
936  can_return_error = codegenAggCalls(
937  agg_out_ptr_w_idx, {}, query_mem_desc, co, gpu_smem_context, filter_cfg);
938  } else {
939  {
940  llvm::Value* nullcheck_cond{nullptr};
941  if (query_mem_desc.didOutputColumnar()) {
942  nullcheck_cond = LL_BUILDER.CreateICmpSGE(std::get<1>(agg_out_ptr_w_idx),
943  LL_INT(int32_t(0)));
944  } else {
945  nullcheck_cond = LL_BUILDER.CreateICmpNE(
946  std::get<0>(agg_out_ptr_w_idx),
947  llvm::ConstantPointerNull::get(
948  llvm::PointerType::get(get_int_type(64, LL_CONTEXT), 0)));
949  }
950  DiamondCodegen nullcheck_cfg(
951  nullcheck_cond, executor_, false, "groupby_nullcheck", &filter_cfg, false);
953  agg_out_ptr_w_idx, {}, query_mem_desc, co, gpu_smem_context, filter_cfg);
954  }
955  can_return_error = true;
956  if (query_mem_desc.getQueryDescriptionType() ==
958  query_mem_desc.useStreamingTopN()) {
959  // Ignore rejection on pushing current row to top-K heap.
960  LL_BUILDER.CreateRet(LL_INT(int32_t(0)));
961  } else {
962  CodeGenerator code_generator(executor_);
963  LL_BUILDER.CreateRet(LL_BUILDER.CreateNeg(LL_BUILDER.CreateTrunc(
964  // TODO(alex): remove the trunc once pos is converted to 32 bits
965  code_generator.posArg(nullptr),
966  get_int_type(32, LL_CONTEXT))));
967  }
968  }
969  } else {
970  if (ra_exe_unit_.estimator) {
971  std::stack<llvm::BasicBlock*> array_loops;
972  codegenEstimator(array_loops, filter_cfg, query_mem_desc, co);
973  } else {
974  auto arg_it = ROW_FUNC->arg_begin();
975  std::vector<llvm::Value*> agg_out_vec;
976  for (int32_t i = 0; i < get_agg_count(ra_exe_unit_.target_exprs); ++i) {
977  agg_out_vec.push_back(&*arg_it++);
978  }
979  can_return_error = codegenAggCalls(std::make_tuple(nullptr, nullptr),
980  agg_out_vec,
981  query_mem_desc,
982  co,
983  gpu_smem_context,
984  filter_cfg);
985  }
986  }
987  }
988 
989  if (ra_exe_unit_.join_quals.empty()) {
990  executor_->cgen_state_->ir_builder_.CreateRet(LL_INT(int32_t(0)));
991  } else if (sc_false) {
992  const auto saved_insert_block = LL_BUILDER.GetInsertBlock();
993  LL_BUILDER.SetInsertPoint(sc_false);
994  LL_BUILDER.CreateBr(filter_false);
995  LL_BUILDER.SetInsertPoint(saved_insert_block);
996  }
997 
998  return can_return_error;
999 }
1000 
1002  llvm::Value* groups_buffer,
1004  const CompilationOptions& co,
1005  DiamondCodegen& diamond_codegen) {
1007  CHECK_EQ(size_t(1), ra_exe_unit_.groupby_exprs.size());
1008  const auto group_expr = ra_exe_unit_.groupby_exprs.front();
1009  CHECK(!group_expr);
1010  if (!query_mem_desc.didOutputColumnar()) {
1011  CHECK_EQ(size_t(0), query_mem_desc.getRowSize() % sizeof(int64_t));
1012  }
1013  const int32_t row_size_quad = query_mem_desc.didOutputColumnar()
1014  ? 0
1015  : query_mem_desc.getRowSize() / sizeof(int64_t);
1016  CodeGenerator code_generator(executor_);
1017  if (query_mem_desc.useStreamingTopN()) {
1018  const auto& only_order_entry = ra_exe_unit_.sort_info.order_entries.front();
1019  CHECK_GE(only_order_entry.tle_no, int(1));
1020  const size_t target_idx = only_order_entry.tle_no - 1;
1021  CHECK_LT(target_idx, ra_exe_unit_.target_exprs.size());
1022  const auto order_entry_expr = ra_exe_unit_.target_exprs[target_idx];
1023  const auto chosen_bytes =
1024  static_cast<size_t>(query_mem_desc.getPaddedSlotWidthBytes(target_idx));
1025  auto order_entry_lv = executor_->cgen_state_->castToTypeIn(
1026  code_generator.codegen(order_entry_expr, true, co).front(), chosen_bytes * 8);
1028  std::string fname = "get_bin_from_k_heap";
1029  const auto& oe_ti = order_entry_expr->get_type_info();
1030  llvm::Value* null_key_lv = nullptr;
1031  if (oe_ti.is_integer() || oe_ti.is_decimal() || oe_ti.is_time()) {
1032  const size_t bit_width = order_entry_lv->getType()->getIntegerBitWidth();
1033  switch (bit_width) {
1034  case 32:
1035  null_key_lv = LL_INT(static_cast<int32_t>(inline_int_null_val(oe_ti)));
1036  break;
1037  case 64:
1038  null_key_lv = LL_INT(static_cast<int64_t>(inline_int_null_val(oe_ti)));
1039  break;
1040  default:
1041  CHECK(false);
1042  }
1043  fname += "_int" + std::to_string(bit_width) + "_t";
1044  } else {
1045  CHECK(oe_ti.is_fp());
1046  if (order_entry_lv->getType()->isDoubleTy()) {
1047  null_key_lv = LL_FP(static_cast<double>(inline_fp_null_val(oe_ti)));
1048  } else {
1049  null_key_lv = LL_FP(static_cast<float>(inline_fp_null_val(oe_ti)));
1050  }
1051  fname += order_entry_lv->getType()->isDoubleTy() ? "_double" : "_float";
1052  }
1053  const auto key_slot_idx =
1055  return emitCall(
1056  fname,
1057  {groups_buffer,
1058  LL_INT(n),
1059  LL_INT(row_size_quad),
1060  LL_INT(static_cast<uint32_t>(query_mem_desc.getColOffInBytes(key_slot_idx))),
1061  LL_BOOL(only_order_entry.is_desc),
1062  LL_BOOL(!order_entry_expr->get_type_info().get_notnull()),
1063  LL_BOOL(only_order_entry.nulls_first),
1064  null_key_lv,
1065  order_entry_lv});
1066  } else {
1067  llvm::Value* output_buffer_entry_count_lv{nullptr};
1069  output_buffer_entry_count_lv =
1070  LL_BUILDER.CreateLoad(get_arg_by_name(ROW_FUNC, "max_matched"));
1071  CHECK(output_buffer_entry_count_lv);
1072  }
1073  const auto group_expr_lv =
1074  LL_BUILDER.CreateLoad(get_arg_by_name(ROW_FUNC, "old_total_matched"));
1075  std::vector<llvm::Value*> args{
1076  groups_buffer,
1077  output_buffer_entry_count_lv
1078  ? output_buffer_entry_count_lv
1079  : LL_INT(static_cast<int32_t>(query_mem_desc.getEntryCount())),
1080  group_expr_lv,
1081  code_generator.posArg(nullptr)};
1082  if (query_mem_desc.didOutputColumnar()) {
1083  const auto columnar_output_offset =
1084  emitCall("get_columnar_scan_output_offset", args);
1085  return columnar_output_offset;
1086  }
1087  args.push_back(LL_INT(row_size_quad));
1088  return emitCall("get_scan_output_slot", args);
1089  }
1090 }
1091 
1092 std::tuple<llvm::Value*, llvm::Value*> GroupByAndAggregate::codegenGroupBy(
1094  const CompilationOptions& co,
1095  DiamondCodegen& diamond_codegen) {
1096  auto arg_it = ROW_FUNC->arg_begin();
1097  auto groups_buffer = arg_it++;
1098 
1099  std::stack<llvm::BasicBlock*> array_loops;
1100 
1101  // TODO(Saman): move this logic outside of this function.
1103  if (query_mem_desc.didOutputColumnar()) {
1104  return std::make_tuple(
1105  &*groups_buffer,
1106  codegenOutputSlot(&*groups_buffer, query_mem_desc, co, diamond_codegen));
1107  } else {
1108  return std::make_tuple(
1109  codegenOutputSlot(&*groups_buffer, query_mem_desc, co, diamond_codegen),
1110  nullptr);
1111  }
1112  }
1113 
1114  CHECK(query_mem_desc.getQueryDescriptionType() ==
1116  query_mem_desc.getQueryDescriptionType() ==
1118 
1119  const int32_t row_size_quad = query_mem_desc.didOutputColumnar()
1120  ? 0
1121  : query_mem_desc.getRowSize() / sizeof(int64_t);
1122 
1123  const auto col_width_size = query_mem_desc.isSingleColumnGroupByWithPerfectHash()
1124  ? sizeof(int64_t)
1125  : query_mem_desc.getEffectiveKeyWidth();
1126  // for multi-column group by
1127  llvm::Value* group_key = nullptr;
1128  llvm::Value* key_size_lv = nullptr;
1129 
1130  if (!query_mem_desc.isSingleColumnGroupByWithPerfectHash()) {
1131  key_size_lv = LL_INT(static_cast<int32_t>(query_mem_desc.getGroupbyColCount()));
1132  if (query_mem_desc.getQueryDescriptionType() ==
1134  group_key =
1135  LL_BUILDER.CreateAlloca(llvm::Type::getInt64Ty(LL_CONTEXT), key_size_lv);
1136  } else if (query_mem_desc.getQueryDescriptionType() ==
1138  group_key =
1139  col_width_size == sizeof(int32_t)
1140  ? LL_BUILDER.CreateAlloca(llvm::Type::getInt32Ty(LL_CONTEXT), key_size_lv)
1141  : LL_BUILDER.CreateAlloca(llvm::Type::getInt64Ty(LL_CONTEXT), key_size_lv);
1142  }
1143  CHECK(group_key);
1144  CHECK(key_size_lv);
1145  }
1146 
1147  int32_t subkey_idx = 0;
1148  CHECK(query_mem_desc.getGroupbyColCount() == ra_exe_unit_.groupby_exprs.size());
1149  for (const auto& group_expr : ra_exe_unit_.groupby_exprs) {
1150  const auto col_range_info = getExprRangeInfo(group_expr.get());
1151  const auto translated_null_value = static_cast<int64_t>(
1152  query_mem_desc.isSingleColumnGroupByWithPerfectHash()
1153  ? checked_int64_t(query_mem_desc.getMaxVal()) +
1154  (query_mem_desc.getBucket() ? query_mem_desc.getBucket() : 1)
1155  : checked_int64_t(col_range_info.max) +
1156  (col_range_info.bucket ? col_range_info.bucket : 1));
1157 
1158  const bool col_has_nulls =
1159  query_mem_desc.getQueryDescriptionType() ==
1161  ? (query_mem_desc.isSingleColumnGroupByWithPerfectHash()
1162  ? query_mem_desc.hasNulls()
1163  : col_range_info.has_nulls)
1164  : false;
1165 
1166  const auto group_expr_lvs =
1167  executor_->groupByColumnCodegen(group_expr.get(),
1168  col_width_size,
1169  co,
1170  col_has_nulls,
1171  translated_null_value,
1172  diamond_codegen,
1173  array_loops,
1174  query_mem_desc.threadsShareMemory());
1175  const auto group_expr_lv = group_expr_lvs.translated_value;
1176  if (query_mem_desc.isSingleColumnGroupByWithPerfectHash()) {
1177  CHECK_EQ(size_t(1), ra_exe_unit_.groupby_exprs.size());
1178  return codegenSingleColumnPerfectHash(query_mem_desc,
1179  co,
1180  &*groups_buffer,
1181  group_expr_lv,
1182  group_expr_lvs.original_value,
1183  row_size_quad);
1184  } else {
1185  // store the sub-key to the buffer
1186  LL_BUILDER.CreateStore(group_expr_lv,
1187  LL_BUILDER.CreateGEP(group_key, LL_INT(subkey_idx++)));
1188  }
1189  }
1190  if (query_mem_desc.getQueryDescriptionType() ==
1192  CHECK(ra_exe_unit_.groupby_exprs.size() != 1);
1194  &*groups_buffer, group_key, key_size_lv, query_mem_desc, row_size_quad);
1195  } else if (query_mem_desc.getQueryDescriptionType() ==
1198  &*groups_buffer,
1199  group_key,
1200  key_size_lv,
1201  query_mem_desc,
1202  col_width_size,
1203  row_size_quad);
1204  }
1205  CHECK(false);
1206  return std::make_tuple(nullptr, nullptr);
1207 }
1208 
1209 std::tuple<llvm::Value*, llvm::Value*>
1212  const CompilationOptions& co,
1213  llvm::Value* groups_buffer,
1214  llvm::Value* group_expr_lv_translated,
1215  llvm::Value* group_expr_lv_original,
1216  const int32_t row_size_quad) {
1217  CHECK(query_mem_desc.usesGetGroupValueFast());
1218  std::string get_group_fn_name{query_mem_desc.didOutputColumnar()
1219  ? "get_columnar_group_bin_offset"
1220  : "get_group_value_fast"};
1221  if (!query_mem_desc.didOutputColumnar() && query_mem_desc.hasKeylessHash()) {
1222  get_group_fn_name += "_keyless";
1223  }
1224  if (query_mem_desc.interleavedBins(co.device_type)) {
1225  CHECK(!query_mem_desc.didOutputColumnar());
1226  CHECK(query_mem_desc.hasKeylessHash());
1227  get_group_fn_name += "_semiprivate";
1228  }
1229  std::vector<llvm::Value*> get_group_fn_args{&*groups_buffer,
1230  &*group_expr_lv_translated};
1231  if (group_expr_lv_original && get_group_fn_name == "get_group_value_fast" &&
1232  query_mem_desc.mustUseBaselineSort()) {
1233  get_group_fn_name += "_with_original_key";
1234  get_group_fn_args.push_back(group_expr_lv_original);
1235  }
1236  get_group_fn_args.push_back(LL_INT(query_mem_desc.getMinVal()));
1237  get_group_fn_args.push_back(LL_INT(query_mem_desc.getBucket()));
1238  if (!query_mem_desc.hasKeylessHash()) {
1239  if (!query_mem_desc.didOutputColumnar()) {
1240  get_group_fn_args.push_back(LL_INT(row_size_quad));
1241  }
1242  } else {
1243  if (!query_mem_desc.didOutputColumnar()) {
1244  get_group_fn_args.push_back(LL_INT(row_size_quad));
1245  }
1246  if (query_mem_desc.interleavedBins(co.device_type)) {
1247  auto warp_idx = emitCall("thread_warp_idx", {LL_INT(executor_->warpSize())});
1248  get_group_fn_args.push_back(warp_idx);
1249  get_group_fn_args.push_back(LL_INT(executor_->warpSize()));
1250  }
1251  }
1252  if (get_group_fn_name == "get_columnar_group_bin_offset") {
1253  return std::make_tuple(&*groups_buffer,
1254  emitCall(get_group_fn_name, get_group_fn_args));
1255  }
1256  return std::make_tuple(emitCall(get_group_fn_name, get_group_fn_args), nullptr);
1257 }
1258 
1259 std::tuple<llvm::Value*, llvm::Value*> GroupByAndAggregate::codegenMultiColumnPerfectHash(
1260  llvm::Value* groups_buffer,
1261  llvm::Value* group_key,
1262  llvm::Value* key_size_lv,
1263  const QueryMemoryDescriptor& query_mem_desc,
1264  const int32_t row_size_quad) {
1265  CHECK(query_mem_desc.getQueryDescriptionType() ==
1267  // compute the index (perfect hash)
1268  auto perfect_hash_func = codegenPerfectHashFunction();
1269  auto hash_lv =
1270  LL_BUILDER.CreateCall(perfect_hash_func, std::vector<llvm::Value*>{group_key});
1271 
1272  if (query_mem_desc.didOutputColumnar()) {
1273  if (!query_mem_desc.hasKeylessHash()) {
1274  const std::string set_matching_func_name{
1275  "set_matching_group_value_perfect_hash_columnar"};
1276  const std::vector<llvm::Value*> set_matching_func_arg{
1277  groups_buffer,
1278  hash_lv,
1279  group_key,
1280  key_size_lv,
1281  llvm::ConstantInt::get(get_int_type(32, LL_CONTEXT),
1282  query_mem_desc.getEntryCount())};
1283  emitCall(set_matching_func_name, set_matching_func_arg);
1284  }
1285  return std::make_tuple(groups_buffer, hash_lv);
1286  } else {
1287  if (query_mem_desc.hasKeylessHash()) {
1288  return std::make_tuple(emitCall("get_matching_group_value_perfect_hash_keyless",
1289  {groups_buffer, hash_lv, LL_INT(row_size_quad)}),
1290  nullptr);
1291  } else {
1292  return std::make_tuple(
1293  emitCall(
1294  "get_matching_group_value_perfect_hash",
1295  {groups_buffer, hash_lv, group_key, key_size_lv, LL_INT(row_size_quad)}),
1296  nullptr);
1297  }
1298  }
1299 }
1300 
1301 std::tuple<llvm::Value*, llvm::Value*>
1303  const CompilationOptions& co,
1304  llvm::Value* groups_buffer,
1305  llvm::Value* group_key,
1306  llvm::Value* key_size_lv,
1307  const QueryMemoryDescriptor& query_mem_desc,
1308  const size_t key_width,
1309  const int32_t row_size_quad) {
1310  auto arg_it = ROW_FUNC->arg_begin(); // groups_buffer
1311  ++arg_it; // current match count
1312  ++arg_it; // total match count
1313  ++arg_it; // old match count
1314  ++arg_it; // output buffer slots count
1315  ++arg_it; // aggregate init values
1316  CHECK(arg_it->getName() == "agg_init_val");
1317  if (group_key->getType() != llvm::Type::getInt64PtrTy(LL_CONTEXT)) {
1318  CHECK(key_width == sizeof(int32_t));
1319  group_key =
1320  LL_BUILDER.CreatePointerCast(group_key, llvm::Type::getInt64PtrTy(LL_CONTEXT));
1321  }
1322  std::vector<llvm::Value*> func_args{
1323  groups_buffer,
1324  LL_INT(static_cast<int32_t>(query_mem_desc.getEntryCount())),
1325  &*group_key,
1326  &*key_size_lv,
1327  LL_INT(static_cast<int32_t>(key_width))};
1328  std::string func_name{"get_group_value"};
1329  if (query_mem_desc.didOutputColumnar()) {
1330  func_name += "_columnar_slot";
1331  } else {
1332  func_args.push_back(LL_INT(row_size_quad));
1333  func_args.push_back(&*arg_it);
1334  }
1335  if (co.with_dynamic_watchdog) {
1336  func_name += "_with_watchdog";
1337  }
1338  if (query_mem_desc.didOutputColumnar()) {
1339  return std::make_tuple(groups_buffer, emitCall(func_name, func_args));
1340  } else {
1341  return std::make_tuple(emitCall(func_name, func_args), nullptr);
1342  }
1343 }
1344 
1346  CHECK_GT(ra_exe_unit_.groupby_exprs.size(), size_t(1));
1347  auto ft = llvm::FunctionType::get(
1348  get_int_type(32, LL_CONTEXT),
1349  std::vector<llvm::Type*>{llvm::PointerType::get(get_int_type(64, LL_CONTEXT), 0)},
1350  false);
1351  auto key_hash_func = llvm::Function::Create(ft,
1352  llvm::Function::ExternalLinkage,
1353  "perfect_key_hash",
1354  executor_->cgen_state_->module_);
1355  executor_->cgen_state_->helper_functions_.push_back(key_hash_func);
1356  mark_function_always_inline(key_hash_func);
1357  auto& key_buff_arg = *key_hash_func->args().begin();
1358  llvm::Value* key_buff_lv = &key_buff_arg;
1359  auto bb = llvm::BasicBlock::Create(LL_CONTEXT, "entry", key_hash_func);
1360  llvm::IRBuilder<> key_hash_func_builder(bb);
1361  llvm::Value* hash_lv{llvm::ConstantInt::get(get_int_type(64, LL_CONTEXT), 0)};
1362  std::vector<int64_t> cardinalities;
1363  for (const auto& groupby_expr : ra_exe_unit_.groupby_exprs) {
1364  auto col_range_info = getExprRangeInfo(groupby_expr.get());
1365  CHECK(col_range_info.hash_type_ == QueryDescriptionType::GroupByPerfectHash);
1366  cardinalities.push_back(getBucketedCardinality(col_range_info));
1367  }
1368  size_t dim_idx = 0;
1369  for (const auto& groupby_expr : ra_exe_unit_.groupby_exprs) {
1370  auto key_comp_lv = key_hash_func_builder.CreateLoad(
1371  key_hash_func_builder.CreateGEP(key_buff_lv, LL_INT(dim_idx)));
1372  auto col_range_info = getExprRangeInfo(groupby_expr.get());
1373  auto crt_term_lv =
1374  key_hash_func_builder.CreateSub(key_comp_lv, LL_INT(col_range_info.min));
1375  if (col_range_info.bucket) {
1376  crt_term_lv =
1377  key_hash_func_builder.CreateSDiv(crt_term_lv, LL_INT(col_range_info.bucket));
1378  }
1379  for (size_t prev_dim_idx = 0; prev_dim_idx < dim_idx; ++prev_dim_idx) {
1380  crt_term_lv = key_hash_func_builder.CreateMul(crt_term_lv,
1381  LL_INT(cardinalities[prev_dim_idx]));
1382  }
1383  hash_lv = key_hash_func_builder.CreateAdd(hash_lv, crt_term_lv);
1384  ++dim_idx;
1385  }
1386  key_hash_func_builder.CreateRet(
1387  key_hash_func_builder.CreateTrunc(hash_lv, get_int_type(32, LL_CONTEXT)));
1388  return key_hash_func;
1389 }
1390 
1392  const TargetInfo& agg_info,
1393  llvm::Value* target) {
1394  const auto& agg_type = agg_info.sql_type;
1395  const size_t chosen_bytes = agg_type.get_size();
1396 
1397  bool need_conversion{false};
1398  llvm::Value* arg_null{nullptr};
1399  llvm::Value* agg_null{nullptr};
1400  llvm::Value* target_to_cast{target};
1401  if (arg_type.is_fp()) {
1402  arg_null = executor_->cgen_state_->inlineFpNull(arg_type);
1403  if (agg_type.is_fp()) {
1404  agg_null = executor_->cgen_state_->inlineFpNull(agg_type);
1405  if (!static_cast<llvm::ConstantFP*>(arg_null)->isExactlyValue(
1406  static_cast<llvm::ConstantFP*>(agg_null)->getValueAPF())) {
1407  need_conversion = true;
1408  }
1409  } else {
1410  CHECK(agg_info.agg_kind == kCOUNT || agg_info.agg_kind == kAPPROX_COUNT_DISTINCT);
1411  return target;
1412  }
1413  } else {
1414  arg_null = executor_->cgen_state_->inlineIntNull(arg_type);
1415  if (agg_type.is_fp()) {
1416  agg_null = executor_->cgen_state_->inlineFpNull(agg_type);
1417  need_conversion = true;
1418  target_to_cast = executor_->castToFP(target);
1419  } else {
1420  agg_null = executor_->cgen_state_->inlineIntNull(agg_type);
1421  if ((static_cast<llvm::ConstantInt*>(arg_null)->getBitWidth() !=
1422  static_cast<llvm::ConstantInt*>(agg_null)->getBitWidth()) ||
1423  (static_cast<llvm::ConstantInt*>(arg_null)->getValue() !=
1424  static_cast<llvm::ConstantInt*>(agg_null)->getValue())) {
1425  need_conversion = true;
1426  }
1427  }
1428  }
1429  if (need_conversion) {
1430  auto cmp = arg_type.is_fp() ? LL_BUILDER.CreateFCmpOEQ(target, arg_null)
1431  : LL_BUILDER.CreateICmpEQ(target, arg_null);
1432  return LL_BUILDER.CreateSelect(
1433  cmp,
1434  agg_null,
1435  executor_->cgen_state_->castToTypeIn(target_to_cast, chosen_bytes << 3));
1436  } else {
1437  return target;
1438  }
1439 }
1440 
1442  const Analyzer::WindowFunction* window_func,
1443  const QueryMemoryDescriptor& query_mem_desc,
1444  const CompilationOptions& co,
1445  DiamondCodegen& diamond_codegen) {
1446  const auto window_func_context =
1448  if (window_func_context && window_function_is_aggregate(window_func->getKind())) {
1449  const int32_t row_size_quad = query_mem_desc.didOutputColumnar()
1450  ? 0
1451  : query_mem_desc.getRowSize() / sizeof(int64_t);
1452  auto arg_it = ROW_FUNC->arg_begin();
1453  auto groups_buffer = arg_it++;
1454  CodeGenerator code_generator(executor_);
1455  if (!window_func_context->getRowNumber()) {
1456  CHECK(window_func->getKind() == SqlWindowFunctionKind::COUNT);
1457  window_func_context->setRowNumber(emitCall(
1458  "row_number_window_func",
1459  {LL_INT(reinterpret_cast<const int64_t>(window_func_context->output())),
1460  code_generator.posArg(nullptr)}));
1461  }
1462  const auto pos_in_window = LL_BUILDER.CreateTrunc(window_func_context->getRowNumber(),
1463  get_int_type(32, LL_CONTEXT));
1464  llvm::Value* entry_count_lv =
1465  LL_INT(static_cast<int32_t>(query_mem_desc.getEntryCount()));
1466  std::vector<llvm::Value*> args{
1467  &*groups_buffer, entry_count_lv, pos_in_window, code_generator.posArg(nullptr)};
1468  if (query_mem_desc.didOutputColumnar()) {
1469  const auto columnar_output_offset =
1470  emitCall("get_columnar_scan_output_offset", args);
1471  return LL_BUILDER.CreateSExt(columnar_output_offset, get_int_type(64, LL_CONTEXT));
1472  }
1473  args.push_back(LL_INT(row_size_quad));
1474  return emitCall("get_scan_output_slot", args);
1475  }
1476  auto arg_it = ROW_FUNC->arg_begin();
1477  auto groups_buffer = arg_it++;
1478  return codegenOutputSlot(&*groups_buffer, query_mem_desc, co, diamond_codegen);
1479 }
1480 
1482  const std::tuple<llvm::Value*, llvm::Value*>& agg_out_ptr_w_idx_in,
1483  const std::vector<llvm::Value*>& agg_out_vec,
1484  const QueryMemoryDescriptor& query_mem_desc,
1485  const CompilationOptions& co,
1486  const GpuSharedMemoryContext& gpu_smem_context,
1487  DiamondCodegen& diamond_codegen) {
1488  auto agg_out_ptr_w_idx = agg_out_ptr_w_idx_in;
1489  // TODO(alex): unify the two cases, the output for non-group by queries
1490  // should be a contiguous buffer
1491  const bool is_group_by{std::get<0>(agg_out_ptr_w_idx)};
1492  bool can_return_error = false;
1493  if (is_group_by) {
1494  CHECK(agg_out_vec.empty());
1495  } else {
1496  CHECK(!agg_out_vec.empty());
1497  }
1498 
1499  // output buffer is casted into a byte stream to be able to handle data elements of
1500  // different sizes (only used when actual column width sizes are used)
1501  llvm::Value* output_buffer_byte_stream{nullptr};
1502  llvm::Value* out_row_idx{nullptr};
1503  if (query_mem_desc.didOutputColumnar() && !g_cluster &&
1505  output_buffer_byte_stream = LL_BUILDER.CreateBitCast(
1506  std::get<0>(agg_out_ptr_w_idx),
1507  llvm::PointerType::get(llvm::Type::getInt8Ty(LL_CONTEXT), 0));
1508  output_buffer_byte_stream->setName("out_buff_b_stream");
1509  CHECK(std::get<1>(agg_out_ptr_w_idx));
1510  out_row_idx = LL_BUILDER.CreateZExt(std::get<1>(agg_out_ptr_w_idx),
1511  llvm::Type::getInt64Ty(LL_CONTEXT));
1512  out_row_idx->setName("out_row_idx");
1513  }
1514 
1515  TargetExprCodegenBuilder target_builder(query_mem_desc, ra_exe_unit_, is_group_by);
1516  for (size_t target_idx = 0; target_idx < ra_exe_unit_.target_exprs.size();
1517  ++target_idx) {
1518  auto target_expr = ra_exe_unit_.target_exprs[target_idx];
1519  CHECK(target_expr);
1520 
1521  target_builder(target_expr, executor_, co);
1522  }
1523 
1524  target_builder.codegen(this,
1525  executor_,
1526  query_mem_desc,
1527  co,
1528  gpu_smem_context,
1529  agg_out_ptr_w_idx,
1530  agg_out_vec,
1531  output_buffer_byte_stream,
1532  out_row_idx,
1533  diamond_codegen);
1534 
1535  for (auto target_expr : ra_exe_unit_.target_exprs) {
1536  CHECK(target_expr);
1537  executor_->plan_state_->isLazyFetchColumn(target_expr);
1538  }
1539 
1540  return can_return_error;
1541 }
1542 
1547  llvm::Value* output_buffer_byte_stream,
1548  llvm::Value* out_row_idx,
1549  const std::tuple<llvm::Value*, llvm::Value*>& agg_out_ptr_w_idx,
1550  const QueryMemoryDescriptor& query_mem_desc,
1551  const size_t chosen_bytes,
1552  const size_t agg_out_off,
1553  const size_t target_idx) {
1554  llvm::Value* agg_col_ptr{nullptr};
1555  if (query_mem_desc.didOutputColumnar()) {
1556  // TODO(Saman): remove the second columnar branch, and support all query description
1557  // types through the first branch. Then, input arguments should also be cleaned up
1558  if (!g_cluster &&
1560  CHECK(chosen_bytes == 1 || chosen_bytes == 2 || chosen_bytes == 4 ||
1561  chosen_bytes == 8);
1562  CHECK(output_buffer_byte_stream);
1563  CHECK(out_row_idx);
1564  uint32_t col_off = query_mem_desc.getColOffInBytes(agg_out_off);
1565  // multiplying by chosen_bytes, i.e., << log2(chosen_bytes)
1566  auto out_per_col_byte_idx =
1567  LL_BUILDER.CreateShl(out_row_idx, __builtin_ffs(chosen_bytes) - 1);
1568  auto byte_offset = LL_BUILDER.CreateAdd(out_per_col_byte_idx,
1569  LL_INT(static_cast<int64_t>(col_off)));
1570  byte_offset->setName("out_byte_off_target_" + std::to_string(target_idx));
1571  auto output_ptr = LL_BUILDER.CreateGEP(output_buffer_byte_stream, byte_offset);
1572  agg_col_ptr = LL_BUILDER.CreateBitCast(
1573  output_ptr,
1574  llvm::PointerType::get(get_int_type((chosen_bytes << 3), LL_CONTEXT), 0));
1575  agg_col_ptr->setName("out_ptr_target_" + std::to_string(target_idx));
1576  } else {
1577  uint32_t col_off = query_mem_desc.getColOffInBytes(agg_out_off);
1578  CHECK_EQ(size_t(0), col_off % chosen_bytes);
1579  col_off /= chosen_bytes;
1580  CHECK(std::get<1>(agg_out_ptr_w_idx));
1581  auto offset = LL_BUILDER.CreateAdd(std::get<1>(agg_out_ptr_w_idx), LL_INT(col_off));
1582  agg_col_ptr = LL_BUILDER.CreateGEP(
1583  LL_BUILDER.CreateBitCast(
1584  std::get<0>(agg_out_ptr_w_idx),
1585  llvm::PointerType::get(get_int_type((chosen_bytes << 3), LL_CONTEXT), 0)),
1586  offset);
1587  }
1588  } else {
1589  uint32_t col_off = query_mem_desc.getColOnlyOffInBytes(agg_out_off);
1590  CHECK_EQ(size_t(0), col_off % chosen_bytes);
1591  col_off /= chosen_bytes;
1592  agg_col_ptr = LL_BUILDER.CreateGEP(
1593  LL_BUILDER.CreateBitCast(
1594  std::get<0>(agg_out_ptr_w_idx),
1595  llvm::PointerType::get(get_int_type((chosen_bytes << 3), LL_CONTEXT), 0)),
1596  LL_INT(col_off));
1597  }
1598  CHECK(agg_col_ptr);
1599  return agg_col_ptr;
1600 }
1601 
1603  std::stack<llvm::BasicBlock*>& array_loops,
1604  GroupByAndAggregate::DiamondCodegen& diamond_codegen,
1605  const QueryMemoryDescriptor& query_mem_desc,
1606  const CompilationOptions& co) {
1607  const auto& estimator_arg = ra_exe_unit_.estimator->getArgument();
1608  auto estimator_comp_count_lv = LL_INT(static_cast<int32_t>(estimator_arg.size()));
1609  auto estimator_key_lv = LL_BUILDER.CreateAlloca(llvm::Type::getInt64Ty(LL_CONTEXT),
1610  estimator_comp_count_lv);
1611  int32_t subkey_idx = 0;
1612  for (const auto& estimator_arg_comp : estimator_arg) {
1613  const auto estimator_arg_comp_lvs =
1614  executor_->groupByColumnCodegen(estimator_arg_comp.get(),
1615  query_mem_desc.getEffectiveKeyWidth(),
1616  co,
1617  false,
1618  0,
1619  diamond_codegen,
1620  array_loops,
1621  true);
1622  CHECK(!estimator_arg_comp_lvs.original_value);
1623  const auto estimator_arg_comp_lv = estimator_arg_comp_lvs.translated_value;
1624  // store the sub-key to the buffer
1625  LL_BUILDER.CreateStore(estimator_arg_comp_lv,
1626  LL_BUILDER.CreateGEP(estimator_key_lv, LL_INT(subkey_idx++)));
1627  }
1628  const auto int8_ptr_ty = llvm::PointerType::get(get_int_type(8, LL_CONTEXT), 0);
1629  const auto bitmap = LL_BUILDER.CreateBitCast(&*ROW_FUNC->arg_begin(), int8_ptr_ty);
1630  const auto key_bytes = LL_BUILDER.CreateBitCast(estimator_key_lv, int8_ptr_ty);
1631  const auto estimator_comp_bytes_lv =
1632  LL_INT(static_cast<int32_t>(estimator_arg.size() * sizeof(int64_t)));
1633  const auto bitmap_size_lv =
1634  LL_INT(static_cast<uint32_t>(ra_exe_unit_.estimator->getBufferSize()));
1635  emitCall(ra_exe_unit_.estimator->getRuntimeFunctionName(),
1636  {bitmap, &*bitmap_size_lv, key_bytes, &*estimator_comp_bytes_lv});
1637 }
1638 
1639 extern "C" void agg_count_distinct(int64_t* agg, const int64_t val) {
1640  reinterpret_cast<std::set<int64_t>*>(*agg)->insert(val);
1641 }
1642 
1643 extern "C" void agg_count_distinct_skip_val(int64_t* agg,
1644  const int64_t val,
1645  const int64_t skip_val) {
1646  if (val != skip_val) {
1647  agg_count_distinct(agg, val);
1648  }
1649 }
1650 
1652  const size_t target_idx,
1653  const Analyzer::Expr* target_expr,
1654  std::vector<llvm::Value*>& agg_args,
1655  const QueryMemoryDescriptor& query_mem_desc,
1656  const ExecutorDeviceType device_type) {
1657  const auto agg_info = get_target_info(target_expr, g_bigint_count);
1658  const auto& arg_ti =
1659  static_cast<const Analyzer::AggExpr*>(target_expr)->get_arg()->get_type_info();
1660  if (arg_ti.is_fp()) {
1661  agg_args.back() = executor_->cgen_state_->ir_builder_.CreateBitCast(
1662  agg_args.back(), get_int_type(64, executor_->cgen_state_->context_));
1663  }
1664  const auto& count_distinct_descriptor =
1665  query_mem_desc.getCountDistinctDescriptor(target_idx);
1666  CHECK(count_distinct_descriptor.impl_type_ != CountDistinctImplType::Invalid);
1667  if (agg_info.agg_kind == kAPPROX_COUNT_DISTINCT) {
1668  CHECK(count_distinct_descriptor.impl_type_ == CountDistinctImplType::Bitmap);
1669  agg_args.push_back(LL_INT(int32_t(count_distinct_descriptor.bitmap_sz_bits)));
1670  if (device_type == ExecutorDeviceType::GPU) {
1671  const auto base_dev_addr = getAdditionalLiteral(-1);
1672  const auto base_host_addr = getAdditionalLiteral(-2);
1673  agg_args.push_back(base_dev_addr);
1674  agg_args.push_back(base_host_addr);
1675  emitCall("agg_approximate_count_distinct_gpu", agg_args);
1676  } else {
1677  emitCall("agg_approximate_count_distinct", agg_args);
1678  }
1679  return;
1680  }
1681  std::string agg_fname{"agg_count_distinct"};
1682  if (count_distinct_descriptor.impl_type_ == CountDistinctImplType::Bitmap) {
1683  agg_fname += "_bitmap";
1684  agg_args.push_back(LL_INT(static_cast<int64_t>(count_distinct_descriptor.min_val)));
1685  }
1686  if (agg_info.skip_null_val) {
1687  auto null_lv = executor_->cgen_state_->castToTypeIn(
1688  (arg_ti.is_fp()
1689  ? static_cast<llvm::Value*>(executor_->cgen_state_->inlineFpNull(arg_ti))
1690  : static_cast<llvm::Value*>(executor_->cgen_state_->inlineIntNull(arg_ti))),
1691  64);
1692  null_lv = executor_->cgen_state_->ir_builder_.CreateBitCast(
1693  null_lv, get_int_type(64, executor_->cgen_state_->context_));
1694  agg_fname += "_skip_val";
1695  agg_args.push_back(null_lv);
1696  }
1697  if (device_type == ExecutorDeviceType::GPU) {
1698  CHECK(count_distinct_descriptor.impl_type_ == CountDistinctImplType::Bitmap);
1699  agg_fname += "_gpu";
1700  const auto base_dev_addr = getAdditionalLiteral(-1);
1701  const auto base_host_addr = getAdditionalLiteral(-2);
1702  agg_args.push_back(base_dev_addr);
1703  agg_args.push_back(base_host_addr);
1704  agg_args.push_back(LL_INT(int64_t(count_distinct_descriptor.sub_bitmap_count)));
1705  CHECK_EQ(size_t(0),
1706  count_distinct_descriptor.bitmapPaddedSizeBytes() %
1707  count_distinct_descriptor.sub_bitmap_count);
1708  agg_args.push_back(LL_INT(int64_t(count_distinct_descriptor.bitmapPaddedSizeBytes() /
1709  count_distinct_descriptor.sub_bitmap_count)));
1710  }
1711  if (count_distinct_descriptor.impl_type_ == CountDistinctImplType::Bitmap) {
1712  emitCall(agg_fname, agg_args);
1713  } else {
1714  executor_->cgen_state_->emitExternalCall(
1715  agg_fname, llvm::Type::getVoidTy(LL_CONTEXT), agg_args);
1716  }
1717 }
1718 
1719 llvm::Value* GroupByAndAggregate::getAdditionalLiteral(const int32_t off) {
1720  CHECK_LT(off, 0);
1721  const auto lit_buff_lv = get_arg_by_name(ROW_FUNC, "literals");
1722  return LL_BUILDER.CreateLoad(LL_BUILDER.CreateGEP(
1723  LL_BUILDER.CreateBitCast(lit_buff_lv,
1724  llvm::PointerType::get(get_int_type(64, LL_CONTEXT), 0)),
1725  LL_INT(off)));
1726 }
1727 
1728 std::vector<llvm::Value*> GroupByAndAggregate::codegenAggArg(
1729  const Analyzer::Expr* target_expr,
1730  const CompilationOptions& co) {
1731  const auto agg_expr = dynamic_cast<const Analyzer::AggExpr*>(target_expr);
1732  const auto func_expr = dynamic_cast<const Analyzer::FunctionOper*>(target_expr);
1733  const auto arr_expr = dynamic_cast<const Analyzer::ArrayExpr*>(target_expr);
1734 
1735  // TODO(alex): handle arrays uniformly?
1736  CodeGenerator code_generator(executor_);
1737  if (target_expr) {
1738  const auto& target_ti = target_expr->get_type_info();
1739  if (target_ti.is_array() && !executor_->plan_state_->isLazyFetchColumn(target_expr)) {
1740  const auto target_lvs =
1741  agg_expr ? code_generator.codegen(agg_expr->get_arg(), true, co)
1742  : code_generator.codegen(
1743  target_expr, !executor_->plan_state_->allow_lazy_fetch_, co);
1744  if (!func_expr && !arr_expr) {
1745  // Something with the chunk transport is code that was generated from a source
1746  // other than an ARRAY[] expression
1747  CHECK_EQ(size_t(1), target_lvs.size());
1748  CHECK(!agg_expr || agg_expr->get_aggtype() == kSAMPLE);
1749  const auto i32_ty = get_int_type(32, executor_->cgen_state_->context_);
1750  const auto i8p_ty =
1751  llvm::PointerType::get(get_int_type(8, executor_->cgen_state_->context_), 0);
1752  const auto& elem_ti = target_ti.get_elem_type();
1753  return {
1754  executor_->cgen_state_->emitExternalCall(
1755  "array_buff",
1756  i8p_ty,
1757  {target_lvs.front(), code_generator.posArg(target_expr)}),
1758  executor_->cgen_state_->emitExternalCall(
1759  "array_size",
1760  i32_ty,
1761  {target_lvs.front(),
1762  code_generator.posArg(target_expr),
1763  executor_->cgen_state_->llInt(log2_bytes(elem_ti.get_logical_size()))})};
1764  } else {
1765  if (agg_expr) {
1766  throw std::runtime_error(
1767  "Using array[] operator as argument to an aggregate operator is not "
1768  "supported");
1769  }
1770  CHECK(func_expr || arr_expr);
1771  if (dynamic_cast<const Analyzer::FunctionOper*>(target_expr)) {
1772  CHECK_EQ(size_t(1), target_lvs.size());
1773 
1774  const auto target_lv = LL_BUILDER.CreateLoad(target_lvs[0]);
1775 
1776  // const auto target_lv_type = target_lvs[0]->getType();
1777  // CHECK(target_lv_type->isStructTy());
1778  // CHECK_EQ(target_lv_type->getNumContainedTypes(), 3u);
1779  const auto i8p_ty = llvm::PointerType::get(
1780  get_int_type(8, executor_->cgen_state_->context_), 0);
1781  const auto ptr = LL_BUILDER.CreatePointerCast(
1782  LL_BUILDER.CreateExtractValue(target_lv, 0), i8p_ty);
1783  const auto size = LL_BUILDER.CreateExtractValue(target_lv, 1);
1784  const auto null_flag = LL_BUILDER.CreateExtractValue(target_lv, 2);
1785 
1786  const auto nullcheck_ok_bb = llvm::BasicBlock::Create(
1787  LL_CONTEXT, "arr_nullcheck_ok_bb", executor_->cgen_state_->row_func_);
1788  const auto nullcheck_fail_bb = llvm::BasicBlock::Create(
1789  LL_CONTEXT, "arr_nullcheck_fail_bb", executor_->cgen_state_->row_func_);
1790 
1791  // TODO(adb): probably better to zext the bool
1792  const auto nullcheck = LL_BUILDER.CreateICmpEQ(
1793  null_flag, executor_->cgen_state_->llInt(static_cast<int8_t>(1)));
1794  LL_BUILDER.CreateCondBr(nullcheck, nullcheck_fail_bb, nullcheck_ok_bb);
1795 
1796  const auto ret_bb = llvm::BasicBlock::Create(
1797  LL_CONTEXT, "arr_return", executor_->cgen_state_->row_func_);
1798  LL_BUILDER.SetInsertPoint(ret_bb);
1799  auto result_phi = LL_BUILDER.CreatePHI(i8p_ty, 2, "array_ptr_return");
1800  result_phi->addIncoming(ptr, nullcheck_ok_bb);
1801 
1802  const auto null_arr_sentinel = LL_BUILDER.CreateIntToPtr(
1803  executor_->cgen_state_->llInt(static_cast<int8_t>(0)), i8p_ty);
1804  result_phi->addIncoming(null_arr_sentinel, nullcheck_fail_bb);
1805 
1806  LL_BUILDER.SetInsertPoint(nullcheck_ok_bb);
1807  executor_->cgen_state_->emitExternalCall(
1808  "register_buffer_with_executor_rsm",
1809  llvm::Type::getVoidTy(executor_->cgen_state_->context_),
1810  {executor_->cgen_state_->llInt(reinterpret_cast<int64_t>(executor_)), ptr});
1811  LL_BUILDER.CreateBr(ret_bb);
1812 
1813  LL_BUILDER.SetInsertPoint(nullcheck_fail_bb);
1814  LL_BUILDER.CreateBr(ret_bb);
1815 
1816  LL_BUILDER.SetInsertPoint(ret_bb);
1817 
1818  return {result_phi, size};
1819  }
1820  CHECK_EQ(size_t(2), target_lvs.size());
1821  return {target_lvs[0], target_lvs[1]};
1822  }
1823  }
1824  if (target_ti.is_geometry() &&
1825  !executor_->plan_state_->isLazyFetchColumn(target_expr)) {
1826  auto generate_coord_lvs =
1827  [&](auto* selected_target_expr,
1828  bool const fetch_columns) -> std::vector<llvm::Value*> {
1829  const auto target_lvs =
1830  code_generator.codegen(selected_target_expr, fetch_columns, co);
1831  const auto geo_expr = dynamic_cast<const Analyzer::GeoExpr*>(target_expr);
1832  if (geo_expr) {
1833  CHECK_EQ(2 * static_cast<size_t>(target_ti.get_physical_coord_cols()),
1834  target_lvs.size());
1835  return target_lvs;
1836  }
1837  CHECK_EQ(static_cast<size_t>(target_ti.get_physical_coord_cols()),
1838  target_lvs.size());
1839 
1840  const auto i32_ty = get_int_type(32, executor_->cgen_state_->context_);
1841  const auto i8p_ty =
1842  llvm::PointerType::get(get_int_type(8, executor_->cgen_state_->context_), 0);
1843  std::vector<llvm::Value*> coords;
1844  size_t ctr = 0;
1845  for (const auto& target_lv : target_lvs) {
1846  // TODO(adb): consider adding a utility to sqltypes so we can get the types of
1847  // the physical coords cols based on the sqltype (e.g. TINYINT for col 0, INT
1848  // for col 1 for pols / mpolys, etc). Hardcoding for now. first array is the
1849  // coords array (TINYINT). Subsequent arrays are regular INT.
1850 
1851  const size_t elem_sz = ctr == 0 ? 1 : 4;
1852  ctr++;
1853  int32_t fixlen = -1;
1854  if (target_ti.get_type() == kPOINT) {
1855  const auto col_var = dynamic_cast<const Analyzer::ColumnVar*>(target_expr);
1856  if (col_var) {
1857  const auto coords_cd = executor_->getPhysicalColumnDescriptor(col_var, 1);
1858  if (coords_cd && coords_cd->columnType.get_type() == kARRAY) {
1859  fixlen = coords_cd->columnType.get_size();
1860  }
1861  }
1862  }
1863  if (fixlen > 0) {
1864  coords.push_back(executor_->cgen_state_->emitExternalCall(
1865  "fast_fixlen_array_buff",
1866  i8p_ty,
1867  {target_lv, code_generator.posArg(selected_target_expr)}));
1868  coords.push_back(executor_->cgen_state_->llInt(int64_t(fixlen)));
1869  continue;
1870  }
1871  coords.push_back(executor_->cgen_state_->emitExternalCall(
1872  "array_buff",
1873  i8p_ty,
1874  {target_lv, code_generator.posArg(selected_target_expr)}));
1875  coords.push_back(executor_->cgen_state_->emitExternalCall(
1876  "array_size",
1877  i32_ty,
1878  {target_lv,
1879  code_generator.posArg(selected_target_expr),
1880  executor_->cgen_state_->llInt(log2_bytes(elem_sz))}));
1881  }
1882  return coords;
1883  };
1884 
1885  if (agg_expr) {
1886  return generate_coord_lvs(agg_expr->get_arg(), true);
1887  } else {
1888  return generate_coord_lvs(target_expr,
1889  !executor_->plan_state_->allow_lazy_fetch_);
1890  }
1891  }
1892  }
1893  return agg_expr ? code_generator.codegen(agg_expr->get_arg(), true, co)
1894  : code_generator.codegen(
1895  target_expr, !executor_->plan_state_->allow_lazy_fetch_, co);
1896 }
1897 
1898 llvm::Value* GroupByAndAggregate::emitCall(const std::string& fname,
1899  const std::vector<llvm::Value*>& args) {
1900  return executor_->cgen_state_->emitCall(fname, args);
1901 }
1902 
1903 void GroupByAndAggregate::checkErrorCode(llvm::Value* retCode) {
1904  auto zero_const = llvm::ConstantInt::get(retCode->getType(), 0, true);
1905  auto rc_check_condition = executor_->cgen_state_->ir_builder_.CreateICmp(
1906  llvm::ICmpInst::ICMP_EQ, retCode, zero_const);
1907 
1908  executor_->cgen_state_->emitErrorCheck(rc_check_condition, retCode, "rc");
1909 }
1910 
1911 #undef ROW_FUNC
1912 #undef LL_FP
1913 #undef LL_INT
1914 #undef LL_BOOL
1915 #undef LL_BUILDER
1916 #undef LL_CONTEXT
1917 
1919  const RelAlgExecutionUnit& ra_exe_unit,
1920  const Catalog_Namespace::Catalog& catalog) {
1921  if (ra_exe_unit.sort_info.order_entries.size() != 1 || !ra_exe_unit.sort_info.limit) {
1922  return 0;
1923  }
1924  for (const auto& group_expr : ra_exe_unit.groupby_exprs) {
1925  const auto grouped_col_expr =
1926  dynamic_cast<const Analyzer::ColumnVar*>(group_expr.get());
1927  if (!grouped_col_expr) {
1928  continue;
1929  }
1930  if (grouped_col_expr->get_table_id() <= 0) {
1931  return 0;
1932  }
1933  const auto td = catalog.getMetadataForTable(grouped_col_expr->get_table_id());
1934  if (td->shardedColumnId == grouped_col_expr->get_column_id()) {
1935  return td->nShards;
1936  }
1937  }
1938  return 0;
1939 }
void codegenEstimator(std::stack< llvm::BasicBlock * > &array_loops, GroupByAndAggregate::DiamondCodegen &diamond_codegen, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &)
const Analyzer::Expr * agg_arg(const Analyzer::Expr *expr)
std::vector< Analyzer::Expr * > target_exprs
#define CHECK_EQ(x, y)
Definition: Logger.h:205
SqlWindowFunctionKind getKind() const
Definition: Analyzer.h:1396
bool constrained_not_null(const Analyzer::Expr *expr, const std::list< std::shared_ptr< Analyzer::Expr >> &quals)
#define ROW_FUNC
bool gpuCanHandleOrderEntries(const std::list< Analyzer::OrderEntry > &order_entries)
static int64_t getBucketedCardinality(const ColRangeInfo &col_range_info)
const int32_t groups_buffer_size return groups_buffer
llvm::Value * getAdditionalLiteral(const int32_t off)
void get_domain(DomainSet &domain_set) const override
Definition: Analyzer.cpp:2927
llvm::Value * codegenAggColumnPtr(llvm::Value *output_buffer_byte_stream, llvm::Value *out_row_idx, const std::tuple< llvm::Value *, llvm::Value * > &agg_out_ptr_w_idx, const QueryMemoryDescriptor &query_mem_desc, const size_t chosen_bytes, const size_t agg_out_off, const size_t target_idx)
: returns the pointer to where the aggregation should be stored.
HOST DEVICE int get_size() const
Definition: sqltypes.h:268
bool codegenAggCalls(const std::tuple< llvm::Value *, llvm::Value * > &agg_out_ptr_w_idx, const std::vector< llvm::Value * > &agg_out_vec, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const GpuSharedMemoryContext &gpu_smem_context, DiamondCodegen &diamond_codegen)
std::string cat(Ts &&...args)
#define LL_BUILDER
class for a per-database catalog. also includes metadata for the current database and the current use...
Definition: Catalog.h:86
int hll_size_for_rate(const int err_percent)
Definition: HyperLogLog.h:115
std::vector< int8_t > get_col_byte_widths(const T &col_expr_list, const std::vector< ssize_t > &col_exprs_to_not_project)
boost::multiprecision::number< boost::multiprecision::cpp_int_backend< 64, 64, boost::multiprecision::signed_magnitude, boost::multiprecision::checked, void >> checked_int64_t
bool is_column_range_too_big_for_perfect_hash(const ColRangeInfo &col_range_info, const int64_t max_entry_count)
GroupByAndAggregate(Executor *executor, const ExecutorDeviceType device_type, const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, std::shared_ptr< RowSetMemoryOwner >)
#define LL_CONTEXT
bool expr_is_rowid(const Analyzer::Expr *expr, const Catalog_Namespace::Catalog &cat)
ALWAYS_INLINE uint64_t agg_count(uint64_t *agg, const int64_t)
ExecutorDeviceType
std::unique_ptr< QueryMemoryDescriptor > initQueryMemoryDescriptorImpl(const bool allow_multifrag, const size_t max_groups_buffer_entry_count, const int8_t crt_min_byte_width, const bool sort_on_gpu_hint, RenderInfo *render_info, const bool must_use_baseline_sort, const bool output_columnar_hint)
SQLTypeInfo sql_type
Definition: TargetInfo.h:42
Streaming Top N algorithm.
#define LOG(tag)
Definition: Logger.h:188
TargetInfo get_target_info(const PointerType target_expr, const bool bigint_count)
Definition: TargetInfo.h:66
void mark_function_always_inline(llvm::Function *func)
bool is_fp() const
Definition: sqltypes.h:413
ColRangeInfo getColRangeInfo()
void codegen(GroupByAndAggregate *group_by_and_agg, Executor *executor, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const GpuSharedMemoryContext &gpu_smem_context, const std::tuple< llvm::Value *, llvm::Value * > &agg_out_ptr_w_idx, const std::vector< llvm::Value * > &agg_out_vec, llvm::Value *output_buffer_byte_stream, llvm::Value *out_row_idx, GroupByAndAggregate::DiamondCodegen &diamond_codegen) const
const std::list< Analyzer::OrderEntry > order_entries
#define LL_INT(v)
static const size_t baseline_threshold
Definition: Execute.h:1000
bool codegen(llvm::Value *filter_result, llvm::BasicBlock *sc_false, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const GpuSharedMemoryContext &gpu_smem_context)
QueryDescriptionType hash_type_
llvm::Value * posArg(const Analyzer::Expr *) const
Definition: ColumnIR.cpp:507
static std::unique_ptr< QueryMemoryDescriptor > init(const Executor *executor, const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, const ColRangeInfo &col_range_info, const KeylessInfo &keyless_info, const bool allow_multifrag, const ExecutorDeviceType device_type, const int8_t crt_min_byte_width, const bool sort_on_gpu_hint, const size_t shard_count, const size_t max_groups_buffer_entry_count, RenderInfo *render_info, const CountDistinctDescriptors count_distinct_descriptors, const bool must_use_baseline_sort, const bool output_columnar_hint, const bool streaming_top_n_hint)
#define CHECK_GE(x, y)
Definition: Logger.h:210
llvm::Value * emitCall(const std::string &fname, const std::vector< llvm::Value * > &args)
std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner_
Definition: sqldefs.h:49
int64_t get_agg_initial_val(const SQLAgg agg, const SQLTypeInfo &ti, const bool enable_compaction, const unsigned min_byte_width_to_compact)
Expr * get_arg() const
Definition: Analyzer.h:1045
size_t getEffectiveKeyWidth() const
void checkErrorCode(llvm::Value *retCode)
size_t get_heap_key_slot_index(const std::vector< Analyzer::Expr * > &target_exprs, const size_t target_idx)
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
bool takes_float_argument(const TargetInfo &target_info)
Definition: TargetInfo.h:121
bool has_count_distinct(const RelAlgExecutionUnit &ra_exe_unit)
int g_hll_precision_bits
std::tuple< llvm::Value *, llvm::Value * > codegenMultiColumnBaselineHash(const CompilationOptions &co, llvm::Value *groups_buffer, llvm::Value *group_key, llvm::Value *key_size_lv, const QueryMemoryDescriptor &query_mem_desc, const size_t key_width, const int32_t row_size_quad)
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
#define CHECK_GT(x, y)
Definition: Logger.h:209
std::list< const Expr * > DomainSet
Definition: Analyzer.h:61
double inline_fp_null_val(const SQL_TYPE_INFO &ti)
std::string to_string(char const *&&v)
bool g_enable_watchdog
Helpers for codegen of target expressions.
#define LL_BOOL(v)
size_t getColOnlyOffInBytes(const size_t col_idx) const
size_t get_count_distinct_sub_bitmap_count(const size_t bitmap_sz_bits, const RelAlgExecutionUnit &ra_exe_unit, const ExecutorDeviceType device_type)
Definition: sqldefs.h:73
const SQLTypeInfo get_compact_type(const TargetInfo &target)
const size_t limit
CountDistinctDescriptors initCountDistinctDescriptors()
CHECK(cgen_state)
llvm::Value * get_arg_by_name(llvm::Function *func, const std::string &name)
Definition: Execute.h:123
size_t g_leaf_count
Definition: ParserNode.cpp:69
const ColumnDescriptor * get_column_descriptor_maybe(const int col_id, const int table_id, const Catalog_Namespace::Catalog &cat)
Definition: Execute.h:177
std::vector< CountDistinctDescriptor > CountDistinctDescriptors
Definition: CountDistinct.h:35
const SortInfo sort_info
size_t getGroupbyColCount() const
const JoinQualsPerNestingLevel join_quals
llvm::Value * convertNullIfAny(const SQLTypeInfo &arg_type, const TargetInfo &agg_info, llvm::Value *target)
void setFalseTarget(llvm::BasicBlock *cond_false)
#define LL_FP(v)
std::tuple< llvm::Value *, llvm::Value * > codegenSingleColumnPerfectHash(const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, llvm::Value *groups_buffer, llvm::Value *group_expr_lv_translated, llvm::Value *group_expr_lv_original, const int32_t row_size_quad)
std::tuple< llvm::Value *, llvm::Value * > codegenGroupBy(const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, DiamondCodegen &codegen)
bool g_bigint_count
Definition: sqldefs.h:75
bool is_distinct_target(const TargetInfo &target_info)
Definition: TargetInfo.h:117
KeylessInfo getKeylessInfo(const std::vector< Analyzer::Expr * > &target_expr_list, const bool is_group_by) const
void codegenCountDistinct(const size_t target_idx, const Analyzer::Expr *target_expr, std::vector< llvm::Value * > &agg_args, const QueryMemoryDescriptor &, const ExecutorDeviceType)
const int8_t getPaddedSlotWidthBytes(const size_t slot_idx) const
DiamondCodegen(llvm::Value *cond, Executor *executor, const bool chain_to_next, const std::string &label_prefix, DiamondCodegen *parent, const bool share_false_edge_with_parent)
ExpressionRange getExpressionRange(const Analyzer::BinOper *expr, const std::vector< InputTableInfo > &query_infos, const Executor *, boost::optional< std::list< std::shared_ptr< Analyzer::Expr >>> simple_quals)
const std::shared_ptr< Analyzer::Estimator > estimator
std::tuple< llvm::Value *, llvm::Value * > codegenMultiColumnPerfectHash(llvm::Value *groups_buffer, llvm::Value *group_key, llvm::Value *key_size_lv, const QueryMemoryDescriptor &query_mem_desc, const int32_t row_size_quad)
SQLAgg agg_kind
Definition: TargetInfo.h:41
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:78
QueryDescriptionType getQueryDescriptionType() const
int64_t getShardedTopBucket(const ColRangeInfo &col_range_info, const size_t shard_count) const
ExecutorDeviceType device_type
const CountDistinctDescriptor & getCountDistinctDescriptor(const size_t idx) const
std::vector< llvm::Value * > codegen(const Analyzer::Expr *, const bool fetch_columns, const CompilationOptions &)
Definition: IRCodegen.cpp:26
bool window_function_is_aggregate(const SqlWindowFunctionKind kind)
Definition: WindowContext.h:42
#define CHECK_LT(x, y)
Definition: Logger.h:207
const std::vector< InputTableInfo > & query_infos_
bool isSingleColumnGroupByWithPerfectHash() const
void agg_count_distinct(int64_t *agg, const int64_t val)
#define CHECK_LE(x, y)
Definition: Logger.h:208
std::unique_ptr< QueryMemoryDescriptor > initQueryMemoryDescriptor(const bool allow_multifrag, const size_t max_groups_buffer_entry_count, const int8_t crt_min_byte_width, RenderInfo *render_info, const bool output_columnar_hint)
const ExecutorDeviceType device_type_
Definition: sqldefs.h:76
std::vector< llvm::Value * > codegenAggArg(const Analyzer::Expr *target_expr, const CompilationOptions &co)
llvm::Function * codegenPerfectHashFunction()
llvm::Value * codegenWindowRowPointer(const Analyzer::WindowFunction *window_func, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, DiamondCodegen &diamond_codegen)
int32_t get_agg_count(const std::vector< Analyzer::Expr * > &target_exprs)
Descriptor for the result set buffer layout.
CountDistinctImplType
void add_transient_string_literals_for_expression(const Analyzer::Expr *expr, Executor *executor, std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner)
llvm::Value * codegenOutputSlot(llvm::Value *groups_buffer, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, DiamondCodegen &diamond_codegen)
std::list< std::shared_ptr< Analyzer::Expr > > quals
bool interleavedBins(const ExecutorDeviceType) const
int64_t inline_int_null_val(const SQL_TYPE_INFO &ti)
Estimators to be used when precise cardinality isn&#39;t useful.
bool g_cluster
ColRangeInfo getExprRangeInfo(const Analyzer::Expr *expr) const
uint32_t log2_bytes(const uint32_t bytes)
Definition: Execute.h:133
Definition: sqltypes.h:46
const TableDescriptor * getMetadataForTable(const std::string &tableName, const bool populateFragmenter=true) const
Returns a pointer to a const TableDescriptor struct matching the provided tableName.
Allocate GPU memory using GpuBuffers via DataMgr.
const RelAlgExecutionUnit & ra_exe_unit_
const size_t offset
Definition: sqldefs.h:74
Definition: sqldefs.h:72
size_t getColOffInBytes(const size_t col_idx) const
const int64_t const uint32_t const uint32_t const uint32_t const bool keyless
FORCE_INLINE HOST DEVICE T align_to_int64(T addr)
SQLOps get_optype() const
Definition: Analyzer.h:364
static WindowFunctionContext * getActiveWindowFunctionContext()
std::list< std::shared_ptr< Analyzer::Expr > > simple_quals
static size_t shard_count_for_top_groups(const RelAlgExecutionUnit &ra_exe_unit, const Catalog_Namespace::Catalog &catalog)
void agg_count_distinct_skip_val(int64_t *agg, const int64_t val, const int64_t skip_val)