OmniSciDB  8a228a1076
TargetExprBuilder.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2019 OmniSci, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
23 #include "TargetExprBuilder.h"
24 
25 #include "CodeGenerator.h"
26 #include "Execute.h"
27 #include "GroupByAndAggregate.h"
28 #include "Logger/Logger.h"
29 #include "MaxwellCodegenPatch.h"
31 
32 #define LL_CONTEXT executor->cgen_state_->context_
33 #define LL_BUILDER executor->cgen_state_->ir_builder_
34 #define LL_BOOL(v) executor->ll_bool(v)
35 #define LL_INT(v) executor->cgen_state_->llInt(v)
36 #define LL_FP(v) executor->cgen_state_->llFp(v)
37 #define ROW_FUNC executor->cgen_state_->row_func_
38 
39 namespace {
40 
41 std::vector<std::string> agg_fn_base_names(const TargetInfo& target_info) {
42  const auto& chosen_type = get_compact_type(target_info);
43  if (!target_info.is_agg || target_info.agg_kind == kSAMPLE) {
44  if (chosen_type.is_geometry()) {
45  return std::vector<std::string>(2 * chosen_type.get_physical_coord_cols(),
46  "agg_id");
47  }
48  if (chosen_type.is_varlen()) {
49  return {"agg_id", "agg_id"};
50  }
51  return {"agg_id"};
52  }
53  switch (target_info.agg_kind) {
54  case kAVG:
55  return {"agg_sum", "agg_count"};
56  case kCOUNT:
57  return {target_info.is_distinct ? "agg_count_distinct" : "agg_count"};
58  case kMAX:
59  return {"agg_max"};
60  case kMIN:
61  return {"agg_min"};
62  case kSUM:
63  return {"agg_sum"};
65  return {"agg_approximate_count_distinct"};
66  case kSINGLE_VALUE:
67  return {"checked_single_agg_id"};
68  case kSAMPLE:
69  return {"agg_id"};
70  default:
71  UNREACHABLE() << "Unrecognized agg kind: " << std::to_string(target_info.agg_kind);
72  }
73  return {};
74 }
75 
76 inline bool is_columnar_projection(const QueryMemoryDescriptor& query_mem_desc) {
77  return query_mem_desc.getQueryDescriptionType() == QueryDescriptionType::Projection &&
78  query_mem_desc.didOutputColumnar();
79 }
80 
81 bool is_simple_count(const TargetInfo& target_info) {
82  return target_info.is_agg && target_info.agg_kind == kCOUNT && !target_info.is_distinct;
83 }
84 
85 bool target_has_geo(const TargetInfo& target_info) {
86  return target_info.is_agg ? target_info.agg_arg_type.is_geometry()
87  : target_info.sql_type.is_geometry();
88 }
89 
90 } // namespace
91 
93  GroupByAndAggregate* group_by_and_agg,
94  Executor* executor,
95  const QueryMemoryDescriptor& query_mem_desc,
96  const CompilationOptions& co,
97  const GpuSharedMemoryContext& gpu_smem_context,
98  const std::tuple<llvm::Value*, llvm::Value*>& agg_out_ptr_w_idx_in,
99  const std::vector<llvm::Value*>& agg_out_vec,
100  llvm::Value* output_buffer_byte_stream,
101  llvm::Value* out_row_idx,
102  GroupByAndAggregate::DiamondCodegen& diamond_codegen,
103  GroupByAndAggregate::DiamondCodegen* sample_cfg) const {
104  CHECK(group_by_and_agg);
105  CHECK(executor);
106  AUTOMATIC_IR_METADATA(executor->cgen_state_.get());
107 
108  auto agg_out_ptr_w_idx = agg_out_ptr_w_idx_in;
109  const auto arg_expr = agg_arg(target_expr);
110 
111  const auto agg_fn_names = agg_fn_base_names(target_info);
112  const auto window_func = dynamic_cast<const Analyzer::WindowFunction*>(target_expr);
114  auto target_lvs =
115  window_func
116  ? std::vector<llvm::Value*>{executor->codegenWindowFunction(target_idx, co)}
117  : group_by_and_agg->codegenAggArg(target_expr, co);
118  const auto window_row_ptr = window_func
119  ? group_by_and_agg->codegenWindowRowPointer(
120  window_func, query_mem_desc, co, diamond_codegen)
121  : nullptr;
122  if (window_row_ptr) {
123  agg_out_ptr_w_idx =
124  std::make_tuple(window_row_ptr, std::get<1>(agg_out_ptr_w_idx_in));
125  if (window_function_is_aggregate(window_func->getKind())) {
126  out_row_idx = window_row_ptr;
127  }
128  }
129 
130  llvm::Value* str_target_lv{nullptr};
131  if (target_lvs.size() == 3 && !target_has_geo(target_info)) {
132  // none encoding string, pop the packed pointer + length since
133  // it's only useful for IS NULL checks and assumed to be only
134  // two components (pointer and length) for the purpose of projection
135  str_target_lv = target_lvs.front();
136  target_lvs.erase(target_lvs.begin());
137  }
139  // Geo cols are expanded to the physical coord cols. Each physical coord col is an
140  // array. Ensure that the target values generated match the number of agg
141  // functions before continuing
142  if (target_lvs.size() < agg_fn_names.size()) {
143  CHECK_EQ(target_lvs.size(), agg_fn_names.size() / 2);
144  std::vector<llvm::Value*> new_target_lvs;
145  new_target_lvs.reserve(agg_fn_names.size());
146  for (const auto& target_lv : target_lvs) {
147  new_target_lvs.push_back(target_lv);
148  new_target_lvs.push_back(target_lv);
149  }
150  target_lvs = new_target_lvs;
151  }
152  }
153  if (target_lvs.size() < agg_fn_names.size()) {
154  CHECK_EQ(size_t(1), target_lvs.size());
155  CHECK_EQ(size_t(2), agg_fn_names.size());
156  for (size_t i = 1; i < agg_fn_names.size(); ++i) {
157  target_lvs.push_back(target_lvs.front());
158  }
159  } else {
161  if (!target_info.is_agg) {
162  CHECK_EQ(static_cast<size_t>(2 * target_info.sql_type.get_physical_coord_cols()),
163  target_lvs.size());
164  CHECK_EQ(agg_fn_names.size(), target_lvs.size());
165  }
166  } else {
167  CHECK(str_target_lv || (agg_fn_names.size() == target_lvs.size()));
168  CHECK(target_lvs.size() == 1 || target_lvs.size() == 2);
169  }
170  }
171 
172  int32_t slot_index = base_slot_index;
173  CHECK_GE(slot_index, 0);
174  CHECK(is_group_by || static_cast<size_t>(slot_index) < agg_out_vec.size());
175 
176  uint32_t col_off{0};
177  if (co.device_type == ExecutorDeviceType::GPU && query_mem_desc.threadsShareMemory() &&
179  (!arg_expr || arg_expr->get_type_info().get_notnull())) {
180  CHECK_EQ(size_t(1), agg_fn_names.size());
181  const auto chosen_bytes = query_mem_desc.getPaddedSlotWidthBytes(slot_index);
182  llvm::Value* agg_col_ptr{nullptr};
183  if (is_group_by) {
184  if (query_mem_desc.didOutputColumnar()) {
185  col_off = query_mem_desc.getColOffInBytes(slot_index);
186  CHECK_EQ(size_t(0), col_off % chosen_bytes);
187  col_off /= chosen_bytes;
188  CHECK(std::get<1>(agg_out_ptr_w_idx));
189  auto offset =
190  LL_BUILDER.CreateAdd(std::get<1>(agg_out_ptr_w_idx), LL_INT(col_off));
191  agg_col_ptr = LL_BUILDER.CreateGEP(
192  LL_BUILDER.CreateBitCast(
193  std::get<0>(agg_out_ptr_w_idx),
194  llvm::PointerType::get(get_int_type((chosen_bytes << 3), LL_CONTEXT), 0)),
195  offset);
196  } else {
197  col_off = query_mem_desc.getColOnlyOffInBytes(slot_index);
198  CHECK_EQ(size_t(0), col_off % chosen_bytes);
199  col_off /= chosen_bytes;
200  agg_col_ptr = LL_BUILDER.CreateGEP(
201  LL_BUILDER.CreateBitCast(
202  std::get<0>(agg_out_ptr_w_idx),
203  llvm::PointerType::get(get_int_type((chosen_bytes << 3), LL_CONTEXT), 0)),
204  LL_INT(col_off));
205  }
206  }
207 
208  if (chosen_bytes != sizeof(int32_t)) {
209  CHECK_EQ(8, chosen_bytes);
210  if (g_bigint_count) {
211  const auto acc_i64 = LL_BUILDER.CreateBitCast(
212  is_group_by ? agg_col_ptr : agg_out_vec[slot_index],
213  llvm::PointerType::get(get_int_type(64, LL_CONTEXT), 0));
214  if (gpu_smem_context.isSharedMemoryUsed()) {
215  group_by_and_agg->emitCall(
216  "agg_count_shared", std::vector<llvm::Value*>{acc_i64, LL_INT(int64_t(1))});
217  } else {
218  LL_BUILDER.CreateAtomicRMW(llvm::AtomicRMWInst::Add,
219  acc_i64,
220  LL_INT(int64_t(1)),
221  llvm::AtomicOrdering::Monotonic);
222  }
223  } else {
224  auto acc_i32 = LL_BUILDER.CreateBitCast(
225  is_group_by ? agg_col_ptr : agg_out_vec[slot_index],
226  llvm::PointerType::get(get_int_type(32, LL_CONTEXT), 0));
227  if (gpu_smem_context.isSharedMemoryUsed()) {
228  acc_i32 = LL_BUILDER.CreatePointerCast(
229  acc_i32, llvm::Type::getInt32PtrTy(LL_CONTEXT, 3));
230  }
231  LL_BUILDER.CreateAtomicRMW(llvm::AtomicRMWInst::Add,
232  acc_i32,
233  LL_INT(1),
234  llvm::AtomicOrdering::Monotonic);
235  }
236  } else {
237  const auto acc_i32 = (is_group_by ? agg_col_ptr : agg_out_vec[slot_index]);
238  if (gpu_smem_context.isSharedMemoryUsed()) {
239  // Atomic operation on address space level 3 (Shared):
240  const auto shared_acc_i32 = LL_BUILDER.CreatePointerCast(
241  acc_i32, llvm::Type::getInt32PtrTy(LL_CONTEXT, 3));
242  LL_BUILDER.CreateAtomicRMW(llvm::AtomicRMWInst::Add,
243  shared_acc_i32,
244  LL_INT(1),
245  llvm::AtomicOrdering::Monotonic);
246  } else {
247  LL_BUILDER.CreateAtomicRMW(llvm::AtomicRMWInst::Add,
248  acc_i32,
249  LL_INT(1),
250  llvm::AtomicOrdering::Monotonic);
251  }
252  }
253  return;
254  }
255 
256  codegenAggregate(group_by_and_agg,
257  executor,
258  query_mem_desc,
259  co,
260  target_lvs,
261  agg_out_ptr_w_idx,
262  agg_out_vec,
263  output_buffer_byte_stream,
264  out_row_idx,
265  slot_index);
266 }
267 
269  GroupByAndAggregate* group_by_and_agg,
270  Executor* executor,
271  const QueryMemoryDescriptor& query_mem_desc,
272  const CompilationOptions& co,
273  const std::vector<llvm::Value*>& target_lvs,
274  const std::tuple<llvm::Value*, llvm::Value*>& agg_out_ptr_w_idx,
275  const std::vector<llvm::Value*>& agg_out_vec,
276  llvm::Value* output_buffer_byte_stream,
277  llvm::Value* out_row_idx,
278  int32_t slot_index) const {
279  AUTOMATIC_IR_METADATA(executor->cgen_state_.get());
280  size_t target_lv_idx = 0;
281  const bool lazy_fetched{executor->plan_state_->isLazyFetchColumn(target_expr)};
282 
283  CodeGenerator code_generator(executor);
284 
285  const auto agg_fn_names = agg_fn_base_names(target_info);
286  auto arg_expr = agg_arg(target_expr);
287 
288  for (const auto& agg_base_name : agg_fn_names) {
289  if (target_info.is_distinct && arg_expr->get_type_info().is_array()) {
290  CHECK_EQ(static_cast<size_t>(query_mem_desc.getLogicalSlotWidthBytes(slot_index)),
291  sizeof(int64_t));
292  // TODO(miyu): check if buffer may be columnar here
293  CHECK(!query_mem_desc.didOutputColumnar());
294  const auto& elem_ti = arg_expr->get_type_info().get_elem_type();
295  uint32_t col_off{0};
296  if (is_group_by) {
297  const auto col_off_in_bytes = query_mem_desc.getColOnlyOffInBytes(slot_index);
298  CHECK_EQ(size_t(0), col_off_in_bytes % sizeof(int64_t));
299  col_off /= sizeof(int64_t);
300  }
301  executor->cgen_state_->emitExternalCall(
302  "agg_count_distinct_array_" + numeric_type_name(elem_ti),
303  llvm::Type::getVoidTy(LL_CONTEXT),
304  {is_group_by
305  ? LL_BUILDER.CreateGEP(std::get<0>(agg_out_ptr_w_idx), LL_INT(col_off))
306  : agg_out_vec[slot_index],
307  target_lvs[target_lv_idx],
308  code_generator.posArg(arg_expr),
309  elem_ti.is_fp()
310  ? static_cast<llvm::Value*>(executor->cgen_state_->inlineFpNull(elem_ti))
311  : static_cast<llvm::Value*>(
312  executor->cgen_state_->inlineIntNull(elem_ti))});
313  ++slot_index;
314  ++target_lv_idx;
315  continue;
316  }
317 
318  llvm::Value* agg_col_ptr{nullptr};
319  const auto chosen_bytes =
320  static_cast<size_t>(query_mem_desc.getPaddedSlotWidthBytes(slot_index));
321  const auto& chosen_type = get_compact_type(target_info);
322  const auto& arg_type =
323  ((arg_expr && arg_expr->get_type_info().get_type() != kNULLT) &&
327  const bool is_fp_arg =
328  !lazy_fetched && arg_type.get_type() != kNULLT && arg_type.is_fp();
329  if (is_group_by) {
330  agg_col_ptr = group_by_and_agg->codegenAggColumnPtr(output_buffer_byte_stream,
331  out_row_idx,
332  agg_out_ptr_w_idx,
333  query_mem_desc,
334  chosen_bytes,
335  slot_index,
336  target_idx);
337  CHECK(agg_col_ptr);
338  agg_col_ptr->setName("agg_col_ptr");
339  }
340 
341  const bool float_argument_input = takes_float_argument(target_info);
342  const bool is_count_in_avg = target_info.agg_kind == kAVG && target_lv_idx == 1;
343  // The count component of an average should never be compacted.
344  const auto agg_chosen_bytes =
345  float_argument_input && !is_count_in_avg ? sizeof(float) : chosen_bytes;
346  if (float_argument_input) {
347  CHECK_GE(chosen_bytes, sizeof(float));
348  }
349 
350  auto target_lv = target_lvs[target_lv_idx];
351  const auto needs_unnest_double_patch = group_by_and_agg->needsUnnestDoublePatch(
352  target_lv, agg_base_name, query_mem_desc.threadsShareMemory(), co);
353  const auto need_skip_null = !needs_unnest_double_patch && target_info.skip_null_val;
354  if (!needs_unnest_double_patch) {
355  if (need_skip_null && !is_agg_domain_range_equivalent(target_info.agg_kind)) {
356  target_lv = group_by_and_agg->convertNullIfAny(arg_type, target_info, target_lv);
357  } else if (is_fp_arg) {
358  target_lv = executor->castToFP(target_lv);
359  }
360  if (!dynamic_cast<const Analyzer::AggExpr*>(target_expr) || arg_expr) {
361  target_lv =
362  executor->cgen_state_->castToTypeIn(target_lv, (agg_chosen_bytes << 3));
363  }
364  }
365 
366  const bool is_simple_count_target = is_simple_count(target_info);
367  llvm::Value* str_target_lv{nullptr};
368  if (target_lvs.size() == 3 && !target_has_geo(target_info)) {
369  // none encoding string
370  str_target_lv = target_lvs.front();
371  }
372  std::vector<llvm::Value*> agg_args{
373  executor->castToIntPtrTyIn((is_group_by ? agg_col_ptr : agg_out_vec[slot_index]),
374  (agg_chosen_bytes << 3)),
375  (is_simple_count_target && !arg_expr)
376  ? (agg_chosen_bytes == sizeof(int32_t) ? LL_INT(int32_t(0))
377  : LL_INT(int64_t(0)))
378  : (is_simple_count_target && arg_expr && str_target_lv ? str_target_lv
379  : target_lv)};
380  if (query_mem_desc.isLogicalSizedColumnsAllowed()) {
381  if (is_simple_count_target && arg_expr && str_target_lv) {
382  agg_args[1] =
383  agg_chosen_bytes == sizeof(int32_t) ? LL_INT(int32_t(0)) : LL_INT(int64_t(0));
384  }
385  }
386  std::string agg_fname{agg_base_name};
387  if (is_fp_arg) {
388  if (!lazy_fetched) {
389  if (agg_chosen_bytes == sizeof(float)) {
390  CHECK_EQ(arg_type.get_type(), kFLOAT);
391  agg_fname += "_float";
392  } else {
393  CHECK_EQ(agg_chosen_bytes, sizeof(double));
394  agg_fname += "_double";
395  }
396  }
397  } else if (agg_chosen_bytes == sizeof(int32_t)) {
398  agg_fname += "_int32";
399  } else if (agg_chosen_bytes == sizeof(int16_t) &&
400  query_mem_desc.didOutputColumnar()) {
401  agg_fname += "_int16";
402  } else if (agg_chosen_bytes == sizeof(int8_t) && query_mem_desc.didOutputColumnar()) {
403  agg_fname += "_int8";
404  }
405 
407  CHECK_EQ(agg_chosen_bytes, sizeof(int64_t));
408  CHECK(!chosen_type.is_fp());
409  group_by_and_agg->codegenCountDistinct(
410  target_idx, target_expr, agg_args, query_mem_desc, co.device_type);
411  } else {
412  const auto& arg_ti = target_info.agg_arg_type;
413  if (need_skip_null && !arg_ti.is_geometry()) {
414  agg_fname += "_skip_val";
415  }
416 
418  (need_skip_null && !arg_ti.is_geometry())) {
419  llvm::Value* null_in_lv{nullptr};
420  if (arg_ti.is_fp()) {
421  null_in_lv =
422  static_cast<llvm::Value*>(executor->cgen_state_->inlineFpNull(arg_ti));
423  } else {
424  null_in_lv = static_cast<llvm::Value*>(executor->cgen_state_->inlineIntNull(
426  ? arg_ti
427  : target_info.sql_type));
428  }
429  CHECK(null_in_lv);
430  auto null_lv =
431  executor->cgen_state_->castToTypeIn(null_in_lv, (agg_chosen_bytes << 3));
432  agg_args.push_back(null_lv);
433  }
434  if (!target_info.is_distinct) {
436  query_mem_desc.threadsShareMemory()) {
437  agg_fname += "_shared";
438  if (needs_unnest_double_patch) {
439  agg_fname = patch_agg_fname(agg_fname);
440  }
441  }
442  auto agg_fname_call_ret_lv = group_by_and_agg->emitCall(agg_fname, agg_args);
443 
444  if (agg_fname.find("checked") != std::string::npos) {
445  group_by_and_agg->checkErrorCode(agg_fname_call_ret_lv);
446  }
447  }
448  }
449  const auto window_func = dynamic_cast<const Analyzer::WindowFunction*>(target_expr);
450  if (window_func && window_function_requires_peer_handling(window_func)) {
451  const auto window_func_context =
453  const auto pending_outputs =
454  LL_INT(window_func_context->aggregateStatePendingOutputs());
455  executor->cgen_state_->emitExternalCall("add_window_pending_output",
456  llvm::Type::getVoidTy(LL_CONTEXT),
457  {agg_args.front(), pending_outputs});
458  const auto& window_func_ti = window_func->get_type_info();
459  std::string apply_window_pending_outputs_name = "apply_window_pending_outputs";
460  switch (window_func_ti.get_type()) {
461  case kFLOAT: {
462  apply_window_pending_outputs_name += "_float";
463  if (query_mem_desc.didOutputColumnar()) {
464  apply_window_pending_outputs_name += "_columnar";
465  }
466  break;
467  }
468  case kDOUBLE: {
469  apply_window_pending_outputs_name += "_double";
470  break;
471  }
472  default: {
473  apply_window_pending_outputs_name += "_int";
474  if (query_mem_desc.didOutputColumnar()) {
475  apply_window_pending_outputs_name +=
476  std::to_string(window_func_ti.get_size() * 8);
477  } else {
478  apply_window_pending_outputs_name += "64";
479  }
480  break;
481  }
482  }
483  const auto partition_end =
484  LL_INT(reinterpret_cast<int64_t>(window_func_context->partitionEnd()));
485  executor->cgen_state_->emitExternalCall(apply_window_pending_outputs_name,
486  llvm::Type::getVoidTy(LL_CONTEXT),
487  {pending_outputs,
488  target_lvs.front(),
489  partition_end,
490  code_generator.posArg(nullptr)});
491  }
492 
493  ++slot_index;
494  ++target_lv_idx;
495  }
496 }
497 
499  const Executor* executor,
500  const CompilationOptions& co) {
501  AUTOMATIC_IR_METADATA(executor->cgen_state_.get());
502  if (query_mem_desc.getPaddedSlotWidthBytes(slot_index_counter) == 0) {
503  CHECK(!dynamic_cast<const Analyzer::AggExpr*>(target_expr));
504  ++slot_index_counter;
505  ++target_index_counter;
506  return;
507  }
508  if (dynamic_cast<const Analyzer::UOper*>(target_expr) &&
509  static_cast<const Analyzer::UOper*>(target_expr)->get_optype() == kUNNEST) {
510  throw std::runtime_error("UNNEST not supported in the projection list yet.");
511  }
512  if ((executor->plan_state_->isLazyFetchColumn(target_expr) || !is_group_by) &&
513  (static_cast<size_t>(query_mem_desc.getPaddedSlotWidthBytes(slot_index_counter)) <
514  sizeof(int64_t)) &&
515  !is_columnar_projection(query_mem_desc)) {
516  // TODO(miyu): enable different byte width in the layout w/o padding
518  }
519 
520  auto target_info = get_target_info(target_expr, g_bigint_count);
521  auto arg_expr = agg_arg(target_expr);
522  if (arg_expr) {
524  target_info.skip_null_val = false;
525  } else if (query_mem_desc.getQueryDescriptionType() ==
527  !arg_expr->get_type_info().is_varlen()) {
528  // TODO: COUNT is currently not null-aware for varlen types. Need to add proper code
529  // generation for handling varlen nulls.
530  target_info.skip_null_val = true;
531  } else if (constrained_not_null(arg_expr, ra_exe_unit.quals)) {
532  target_info.skip_null_val = false;
533  }
534  }
535 
536  if (!(query_mem_desc.getQueryDescriptionType() ==
540  sample_exprs_to_codegen.emplace_back(target_expr,
541  target_info,
542  slot_index_counter,
543  target_index_counter++,
544  is_group_by);
545  } else {
546  target_exprs_to_codegen.emplace_back(target_expr,
547  target_info,
548  slot_index_counter,
549  target_index_counter++,
550  is_group_by);
551  }
552 
553  const auto agg_fn_names = agg_fn_base_names(target_info);
554  slot_index_counter += agg_fn_names.size();
555 }
556 
557 namespace {
558 
560  const QueryMemoryDescriptor& query_mem_desc) {
561  const bool is_group_by{query_mem_desc.isGroupBy()};
562  if (target_info.agg_kind == kSAMPLE && target_info.sql_type.is_string() &&
563  target_info.sql_type.get_compression() != kENCODING_NONE) {
564  return get_agg_initial_val(target_info.agg_kind,
565  target_info.sql_type,
566  is_group_by,
567  query_mem_desc.getCompactByteWidth());
568  }
569  return 0;
570 }
571 
572 } // namespace
573 
575  GroupByAndAggregate* group_by_and_agg,
576  Executor* executor,
577  const QueryMemoryDescriptor& query_mem_desc,
578  const CompilationOptions& co,
579  const GpuSharedMemoryContext& gpu_smem_context,
580  const std::tuple<llvm::Value*, llvm::Value*>& agg_out_ptr_w_idx,
581  const std::vector<llvm::Value*>& agg_out_vec,
582  llvm::Value* output_buffer_byte_stream,
583  llvm::Value* out_row_idx,
584  GroupByAndAggregate::DiamondCodegen& diamond_codegen) const {
585  CHECK(group_by_and_agg);
586  CHECK(executor);
587  AUTOMATIC_IR_METADATA(executor->cgen_state_.get());
588 
589  for (const auto& target_expr_codegen : target_exprs_to_codegen) {
590  target_expr_codegen.codegen(group_by_and_agg,
591  executor,
592  query_mem_desc,
593  co,
594  gpu_smem_context,
595  agg_out_ptr_w_idx,
596  agg_out_vec,
597  output_buffer_byte_stream,
598  out_row_idx,
599  diamond_codegen);
600  }
601  if (!sample_exprs_to_codegen.empty()) {
602  codegenSampleExpressions(group_by_and_agg,
603  executor,
604  query_mem_desc,
605  co,
606  agg_out_ptr_w_idx,
607  agg_out_vec,
608  output_buffer_byte_stream,
609  out_row_idx,
610  diamond_codegen);
611  }
612 }
613 
615  GroupByAndAggregate* group_by_and_agg,
616  Executor* executor,
617  const QueryMemoryDescriptor& query_mem_desc,
618  const CompilationOptions& co,
619  const std::tuple<llvm::Value*, llvm::Value*>& agg_out_ptr_w_idx,
620  const std::vector<llvm::Value*>& agg_out_vec,
621  llvm::Value* output_buffer_byte_stream,
622  llvm::Value* out_row_idx,
623  GroupByAndAggregate::DiamondCodegen& diamond_codegen) const {
624  AUTOMATIC_IR_METADATA(executor->cgen_state_.get());
625  CHECK(!sample_exprs_to_codegen.empty());
627  if (sample_exprs_to_codegen.size() == 1 &&
628  !sample_exprs_to_codegen.front().target_info.sql_type.is_varlen()) {
629  codegenSingleSlotSampleExpression(group_by_and_agg,
630  executor,
631  query_mem_desc,
632  co,
633  agg_out_ptr_w_idx,
634  agg_out_vec,
635  output_buffer_byte_stream,
636  out_row_idx,
637  diamond_codegen);
638  } else {
639  codegenMultiSlotSampleExpressions(group_by_and_agg,
640  executor,
641  query_mem_desc,
642  co,
643  agg_out_ptr_w_idx,
644  agg_out_vec,
645  output_buffer_byte_stream,
646  out_row_idx,
647  diamond_codegen);
648  }
649 }
650 
652  GroupByAndAggregate* group_by_and_agg,
653  Executor* executor,
654  const QueryMemoryDescriptor& query_mem_desc,
655  const CompilationOptions& co,
656  const std::tuple<llvm::Value*, llvm::Value*>& agg_out_ptr_w_idx,
657  const std::vector<llvm::Value*>& agg_out_vec,
658  llvm::Value* output_buffer_byte_stream,
659  llvm::Value* out_row_idx,
660  GroupByAndAggregate::DiamondCodegen& diamond_codegen) const {
661  AUTOMATIC_IR_METADATA(executor->cgen_state_.get());
662  CHECK_EQ(size_t(1), sample_exprs_to_codegen.size());
663  CHECK(!sample_exprs_to_codegen.front().target_info.sql_type.is_varlen());
665  // no need for the atomic if we only have one SAMPLE target
666  sample_exprs_to_codegen.front().codegen(group_by_and_agg,
667  executor,
668  query_mem_desc,
669  co,
670  {},
671  agg_out_ptr_w_idx,
672  agg_out_vec,
673  output_buffer_byte_stream,
674  out_row_idx,
675  diamond_codegen);
676 }
677 
679  GroupByAndAggregate* group_by_and_agg,
680  Executor* executor,
681  const QueryMemoryDescriptor& query_mem_desc,
682  const CompilationOptions& co,
683  const std::tuple<llvm::Value*, llvm::Value*>& agg_out_ptr_w_idx,
684  const std::vector<llvm::Value*>& agg_out_vec,
685  llvm::Value* output_buffer_byte_stream,
686  llvm::Value* out_row_idx,
687  GroupByAndAggregate::DiamondCodegen& diamond_codegen) const {
688  AUTOMATIC_IR_METADATA(executor->cgen_state_.get());
689  CHECK(sample_exprs_to_codegen.size() > 1 ||
690  sample_exprs_to_codegen.front().target_info.sql_type.is_varlen());
692  const auto& first_sample_expr = sample_exprs_to_codegen.front();
693  auto target_lvs = group_by_and_agg->codegenAggArg(first_sample_expr.target_expr, co);
694  CHECK_GE(target_lvs.size(), size_t(1));
695 
696  const auto init_val =
697  get_initial_agg_val(first_sample_expr.target_info, query_mem_desc);
698 
699  llvm::Value* agg_col_ptr{nullptr};
700  if (is_group_by) {
701  const auto agg_column_size_bytes =
702  query_mem_desc.isLogicalSizedColumnsAllowed() &&
703  !first_sample_expr.target_info.sql_type.is_varlen()
704  ? first_sample_expr.target_info.sql_type.get_size()
705  : sizeof(int64_t);
706  agg_col_ptr = group_by_and_agg->codegenAggColumnPtr(output_buffer_byte_stream,
707  out_row_idx,
708  agg_out_ptr_w_idx,
709  query_mem_desc,
710  agg_column_size_bytes,
711  first_sample_expr.base_slot_index,
712  first_sample_expr.target_idx);
713  } else {
714  CHECK_LT(static_cast<size_t>(first_sample_expr.base_slot_index), agg_out_vec.size());
715  agg_col_ptr =
716  executor->castToIntPtrTyIn(agg_out_vec[first_sample_expr.base_slot_index], 64);
717  }
718 
719  auto sample_cas_lv = codegenSlotEmptyKey(agg_col_ptr, target_lvs, executor, init_val);
720 
722  sample_cas_lv, executor, false, "sample_valcheck", &diamond_codegen, false);
723 
724  for (const auto& target_expr_codegen : sample_exprs_to_codegen) {
725  target_expr_codegen.codegen(group_by_and_agg,
726  executor,
727  query_mem_desc,
728  co,
729  {},
730  agg_out_ptr_w_idx,
731  agg_out_vec,
732  output_buffer_byte_stream,
733  out_row_idx,
734  diamond_codegen,
735  &sample_cfg);
736  }
737 }
738 
740  llvm::Value* agg_col_ptr,
741  std::vector<llvm::Value*>& target_lvs,
742  Executor* executor,
743  const int64_t init_val) const {
744  AUTOMATIC_IR_METADATA(executor->cgen_state_.get());
745  const auto& first_sample_expr = sample_exprs_to_codegen.front();
746  const auto first_sample_slot_bytes =
747  first_sample_expr.target_info.sql_type.is_varlen()
748  ? sizeof(int64_t)
749  : first_sample_expr.target_info.sql_type.get_size();
750  llvm::Value* target_lv_casted{nullptr};
751  // deciding whether proper casting is required for the first sample's slot:
752  if (first_sample_expr.target_info.sql_type.is_varlen()) {
753  target_lv_casted =
754  LL_BUILDER.CreatePtrToInt(target_lvs.front(), llvm::Type::getInt64Ty(LL_CONTEXT));
755  } else if (first_sample_expr.target_info.sql_type.is_fp()) {
756  // Initialization value for SAMPLE on a float column should be 0
757  CHECK_EQ(init_val, 0);
758  if (query_mem_desc.isLogicalSizedColumnsAllowed()) {
759  target_lv_casted = executor->cgen_state_->ir_builder_.CreateFPToSI(
760  target_lvs.front(),
761  first_sample_slot_bytes == sizeof(float) ? llvm::Type::getInt32Ty(LL_CONTEXT)
762  : llvm::Type::getInt64Ty(LL_CONTEXT));
763  } else {
764  target_lv_casted = executor->cgen_state_->ir_builder_.CreateFPToSI(
765  target_lvs.front(), llvm::Type::getInt64Ty(LL_CONTEXT));
766  }
767  } else if (first_sample_slot_bytes != sizeof(int64_t) &&
768  !query_mem_desc.isLogicalSizedColumnsAllowed()) {
769  target_lv_casted =
770  executor->cgen_state_->ir_builder_.CreateCast(llvm::Instruction::CastOps::SExt,
771  target_lvs.front(),
772  llvm::Type::getInt64Ty(LL_CONTEXT));
773  } else {
774  target_lv_casted = target_lvs.front();
775  }
776 
777  std::string slot_empty_cas_func_name("slotEmptyKeyCAS");
778  llvm::Value* init_val_lv{LL_INT(init_val)};
779  if (query_mem_desc.isLogicalSizedColumnsAllowed() &&
780  !first_sample_expr.target_info.sql_type.is_varlen()) {
781  // add proper suffix to the function name:
782  switch (first_sample_slot_bytes) {
783  case 1:
784  slot_empty_cas_func_name += "_int8";
785  break;
786  case 2:
787  slot_empty_cas_func_name += "_int16";
788  break;
789  case 4:
790  slot_empty_cas_func_name += "_int32";
791  break;
792  case 8:
793  break;
794  default:
795  UNREACHABLE() << "Invalid slot size for slotEmptyKeyCAS function.";
796  break;
797  }
798  if (first_sample_slot_bytes != sizeof(int64_t)) {
799  init_val_lv = llvm::ConstantInt::get(
800  get_int_type(first_sample_slot_bytes * 8, LL_CONTEXT), init_val);
801  }
802  }
803 
804  auto sample_cas_lv = executor->cgen_state_->emitExternalCall(
805  slot_empty_cas_func_name,
806  llvm::Type::getInt1Ty(executor->cgen_state_->context_),
807  {agg_col_ptr, target_lv_casted, init_val_lv});
808  return sample_cas_lv;
809 }
#define LL_BUILDER
const Analyzer::Expr * agg_arg(const Analyzer::Expr *expr)
#define CHECK_EQ(x, y)
Definition: Logger.h:205
void codegen(GroupByAndAggregate *group_by_and_agg, Executor *executor, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const GpuSharedMemoryContext &gpu_smem_context, const std::tuple< llvm::Value *, llvm::Value *> &agg_out_ptr_w_idx, const std::vector< llvm::Value *> &agg_out_vec, llvm::Value *output_buffer_byte_stream, llvm::Value *out_row_idx, GroupByAndAggregate::DiamondCodegen &diamond_codegen) const
bool target_has_geo(const TargetInfo &target_info)
bool constrained_not_null(const Analyzer::Expr *expr, const std::list< std::shared_ptr< Analyzer::Expr >> &quals)
std::vector< std::string > agg_fn_base_names(const TargetInfo &target_info)
bool is_string() const
Definition: sqltypes.h:416
SQLTypeInfo sql_type
Definition: TargetInfo.h:42
TargetInfo get_target_info(const PointerType target_expr, const bool bigint_count)
Definition: TargetInfo.h:78
#define UNREACHABLE()
Definition: Logger.h:241
#define CHECK_GE(x, y)
Definition: Logger.h:210
int64_t get_agg_initial_val(const SQLAgg agg, const SQLTypeInfo &ti, const bool enable_compaction, const unsigned min_byte_width_to_compact)
void checkErrorCode(llvm::Value *retCode)
bool takes_float_argument(const TargetInfo &target_info)
Definition: TargetInfo.h:133
HOST DEVICE EncodingType get_compression() const
Definition: sqltypes.h:267
bool skip_null_val
Definition: TargetInfo.h:44
void codegenAggregate(GroupByAndAggregate *group_by_and_agg, Executor *executor, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const std::vector< llvm::Value *> &target_lvs, const std::tuple< llvm::Value *, llvm::Value *> &agg_out_ptr_w_idx, const std::vector< llvm::Value *> &agg_out_vec, llvm::Value *output_buffer_byte_stream, llvm::Value *out_row_idx, int32_t slot_index) const
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
static WindowFunctionContext * getActiveWindowFunctionContext(Executor *executor)
std::string to_string(char const *&&v)
SQLTypeInfo agg_arg_type
Definition: TargetInfo.h:43
const int8_t getPaddedSlotWidthBytes(const size_t slot_idx) const
std::string patch_agg_fname(const std::string &agg_name)
Helpers for codegen of target expressions.
Definition: sqldefs.h:73
const SQLTypeInfo get_compact_type(const TargetInfo &target)
llvm::Value * codegenAggColumnPtr(llvm::Value *output_buffer_byte_stream, llvm::Value *out_row_idx, const std::tuple< llvm::Value *, llvm::Value *> &agg_out_ptr_w_idx, const QueryMemoryDescriptor &query_mem_desc, const size_t chosen_bytes, const size_t agg_out_off, const size_t target_idx)
: returns the pointer to where the aggregation should be stored.
bool is_agg
Definition: TargetInfo.h:40
#define LL_INT(v)
llvm::Value * convertNullIfAny(const SQLTypeInfo &arg_type, const TargetInfo &agg_info, llvm::Value *target)
llvm::Value * codegenSlotEmptyKey(llvm::Value *agg_col_ptr, std::vector< llvm::Value *> &target_lvs, Executor *executor, const int64_t init_val) const
bool g_bigint_count
Definition: sqldefs.h:75
#define LL_CONTEXT
bool is_distinct_target(const TargetInfo &target_info)
Definition: TargetInfo.h:129
void codegenCountDistinct(const size_t target_idx, const Analyzer::Expr *target_expr, std::vector< llvm::Value *> &agg_args, const QueryMemoryDescriptor &, const ExecutorDeviceType)
#define AUTOMATIC_IR_METADATA(CGENSTATE)
SQLAgg agg_kind
Definition: TargetInfo.h:41
void codegenSampleExpressions(GroupByAndAggregate *group_by_and_agg, Executor *executor, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const std::tuple< llvm::Value *, llvm::Value *> &agg_out_ptr_w_idx, const std::vector< llvm::Value *> &agg_out_vec, llvm::Value *output_buffer_byte_stream, llvm::Value *out_row_idx, GroupByAndAggregate::DiamondCodegen &diamond_codegen) const
ExecutorDeviceType device_type
bool window_function_is_aggregate(const SqlWindowFunctionKind kind)
Definition: WindowContext.h:42
#define CHECK_LT(x, y)
Definition: Logger.h:207
bool is_geometry() const
Definition: sqltypes.h:428
size_t getColOnlyOffInBytes(const size_t col_idx) const
llvm::Value * emitCall(const std::string &fname, const std::vector< llvm::Value *> &args)
const Analyzer::Expr * target_expr
bool needsUnnestDoublePatch(llvm::Value *val_ptr, const std::string &agg_base_name, const bool threads_share_memory, const CompilationOptions &co) const
Definition: sqldefs.h:76
void codegen(GroupByAndAggregate *group_by_and_agg, Executor *executor, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const GpuSharedMemoryContext &gpu_smem_context, const std::tuple< llvm::Value *, llvm::Value *> &agg_out_ptr_w_idx, const std::vector< llvm::Value *> &agg_out_vec, llvm::Value *output_buffer_byte_stream, llvm::Value *out_row_idx, GroupByAndAggregate::DiamondCodegen &diamond_codegen, GroupByAndAggregate::DiamondCodegen *sample_cfg=nullptr) const
std::vector< llvm::Value * > codegenAggArg(const Analyzer::Expr *target_expr, const CompilationOptions &co)
llvm::Value * codegenWindowRowPointer(const Analyzer::WindowFunction *window_func, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, DiamondCodegen &diamond_codegen)
bool window_function_requires_peer_handling(const Analyzer::WindowFunction *window_func)
bool is_simple_count(const TargetInfo &target_info)
void codegenSingleSlotSampleExpression(GroupByAndAggregate *group_by_and_agg, Executor *executor, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const std::tuple< llvm::Value *, llvm::Value *> &agg_out_ptr_w_idx, const std::vector< llvm::Value *> &agg_out_vec, llvm::Value *output_buffer_byte_stream, llvm::Value *out_row_idx, GroupByAndAggregate::DiamondCodegen &diamond_codegen) const
int get_physical_coord_cols() const
Definition: sqltypes.h:295
llvm::Value * posArg(const Analyzer::Expr *) const
Definition: ColumnIR.cpp:518
#define CHECK(condition)
Definition: Logger.h:197
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:259
static void resetWindowFunctionContext(Executor *executor)
const int8_t getLogicalSlotWidthBytes(const size_t slot_idx) const
int64_t get_initial_agg_val(const TargetInfo &target_info, const QueryMemoryDescriptor &query_mem_desc)
size_t getColOffInBytes(const size_t col_idx) const
std::string numeric_type_name(const SQLTypeInfo &ti)
Definition: Execute.h:138
bool is_distinct
Definition: TargetInfo.h:45
void operator()(const Analyzer::Expr *target_expr, const Executor *executor, const CompilationOptions &co)
QueryDescriptionType getQueryDescriptionType() const
Definition: sqldefs.h:74
void codegenMultiSlotSampleExpressions(GroupByAndAggregate *group_by_and_agg, Executor *executor, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const std::tuple< llvm::Value *, llvm::Value *> &agg_out_ptr_w_idx, const std::vector< llvm::Value *> &agg_out_vec, llvm::Value *output_buffer_byte_stream, llvm::Value *out_row_idx, GroupByAndAggregate::DiamondCodegen &diamond_codegen) const
Definition: sqldefs.h:72
bool is_columnar_projection(const QueryMemoryDescriptor &query_mem_desc)
bool is_agg_domain_range_equivalent(const SQLAgg &agg_kind)
Definition: TargetInfo.h:64