OmniSciDB  21ac014ffc
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
TargetExprBuilder.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2019 OmniSci, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
23 #include "TargetExprBuilder.h"
24 
25 #include "CodeGenerator.h"
26 #include "Execute.h"
27 #include "GroupByAndAggregate.h"
28 #include "Logger/Logger.h"
29 #include "MaxwellCodegenPatch.h"
31 
32 #define LL_CONTEXT executor->cgen_state_->context_
33 #define LL_BUILDER executor->cgen_state_->ir_builder_
34 #define LL_BOOL(v) executor->ll_bool(v)
35 #define LL_INT(v) executor->cgen_state_->llInt(v)
36 #define LL_FP(v) executor->cgen_state_->llFp(v)
37 #define ROW_FUNC executor->cgen_state_->row_func_
38 
39 namespace {
40 
41 inline bool is_varlen_projection(const Analyzer::Expr* target_expr,
42  const SQLTypeInfo& ti) {
43  return dynamic_cast<const Analyzer::GeoExpr*>(target_expr) && ti.get_type() == kPOINT;
44 }
45 
46 std::vector<std::string> agg_fn_base_names(const TargetInfo& target_info,
47  const bool is_varlen_projection) {
48  const auto& chosen_type = get_compact_type(target_info);
49  if (is_varlen_projection) {
50  // TODO: support other types here
51  CHECK(chosen_type.is_geometry());
52  return {"agg_id_varlen"};
53  }
54  if (!target_info.is_agg || target_info.agg_kind == kSAMPLE) {
55  if (chosen_type.is_geometry()) {
56  return std::vector<std::string>(2 * chosen_type.get_physical_coord_cols(),
57  "agg_id");
58  }
59  if (chosen_type.is_varlen()) {
60  // not a varlen projection (not creating new varlen outputs). Just store the pointer
61  // and offset into the input buffer in the output slots.
62  return {"agg_id", "agg_id"};
63  }
64  return {"agg_id"};
65  }
66  switch (target_info.agg_kind) {
67  case kAVG:
68  return {"agg_sum", "agg_count"};
69  case kCOUNT:
70  return {target_info.is_distinct ? "agg_count_distinct" : "agg_count"};
71  case kMAX:
72  return {"agg_max"};
73  case kMIN:
74  return {"agg_min"};
75  case kSUM:
76  return {"agg_sum"};
78  return {"agg_approximate_count_distinct"};
79  case kAPPROX_QUANTILE:
80  return {"agg_approx_quantile"};
81  case kSINGLE_VALUE:
82  return {"checked_single_agg_id"};
83  case kSAMPLE:
84  return {"agg_id"};
85  default:
86  UNREACHABLE() << "Unrecognized agg kind: " << std::to_string(target_info.agg_kind);
87  }
88  return {};
89 }
90 
92  return query_mem_desc.getQueryDescriptionType() == QueryDescriptionType::Projection &&
93  query_mem_desc.didOutputColumnar();
94 }
95 
96 bool is_simple_count(const TargetInfo& target_info) {
97  return target_info.is_agg && target_info.agg_kind == kCOUNT && !target_info.is_distinct;
98 }
99 
100 bool target_has_geo(const TargetInfo& target_info) {
101  return target_info.is_agg ? target_info.agg_arg_type.is_geometry()
102  : target_info.sql_type.is_geometry();
103 }
104 
105 } // namespace
106 
108  GroupByAndAggregate* group_by_and_agg,
109  Executor* executor,
111  const CompilationOptions& co,
112  const GpuSharedMemoryContext& gpu_smem_context,
113  const std::tuple<llvm::Value*, llvm::Value*>& agg_out_ptr_w_idx_in,
114  const std::vector<llvm::Value*>& agg_out_vec,
115  llvm::Value* output_buffer_byte_stream,
116  llvm::Value* out_row_idx,
117  llvm::Value* varlen_output_buffer,
118  DiamondCodegen& diamond_codegen,
119  DiamondCodegen* sample_cfg) const {
120  CHECK(group_by_and_agg);
121  CHECK(executor);
122  AUTOMATIC_IR_METADATA(executor->cgen_state_.get());
123 
124  auto agg_out_ptr_w_idx = agg_out_ptr_w_idx_in;
125  const auto arg_expr = agg_arg(target_expr);
126 
127  const bool varlen_projection = is_varlen_projection(target_expr, target_info.sql_type);
128  const auto agg_fn_names = agg_fn_base_names(target_info, varlen_projection);
129  const auto window_func = dynamic_cast<const Analyzer::WindowFunction*>(target_expr);
131  auto target_lvs =
132  window_func
133  ? std::vector<llvm::Value*>{executor->codegenWindowFunction(target_idx, co)}
134  : group_by_and_agg->codegenAggArg(target_expr, co);
135  const auto window_row_ptr = window_func
136  ? group_by_and_agg->codegenWindowRowPointer(
137  window_func, query_mem_desc, co, diamond_codegen)
138  : nullptr;
139  if (window_row_ptr) {
140  agg_out_ptr_w_idx =
141  std::make_tuple(window_row_ptr, std::get<1>(agg_out_ptr_w_idx_in));
142  if (window_function_is_aggregate(window_func->getKind())) {
143  out_row_idx = window_row_ptr;
144  }
145  }
146 
147  llvm::Value* str_target_lv{nullptr};
148  if (target_lvs.size() == 3 && !target_has_geo(target_info)) {
149  // none encoding string, pop the packed pointer + length since
150  // it's only useful for IS NULL checks and assumed to be only
151  // two components (pointer and length) for the purpose of projection
152  str_target_lv = target_lvs.front();
153  target_lvs.erase(target_lvs.begin());
154  }
155  if (target_info.sql_type.is_geometry() && !varlen_projection) {
156  // Geo cols are expanded to the physical coord cols. Each physical coord col is an
157  // array. Ensure that the target values generated match the number of agg
158  // functions before continuing
159  if (target_lvs.size() < agg_fn_names.size()) {
160  CHECK_EQ(target_lvs.size(), agg_fn_names.size() / 2);
161  std::vector<llvm::Value*> new_target_lvs;
162  new_target_lvs.reserve(agg_fn_names.size());
163  for (const auto& target_lv : target_lvs) {
164  new_target_lvs.push_back(target_lv);
165  new_target_lvs.push_back(target_lv);
166  }
167  target_lvs = new_target_lvs;
168  }
169  }
170  if (target_lvs.size() < agg_fn_names.size()) {
171  CHECK_EQ(size_t(1), target_lvs.size());
172  CHECK_EQ(size_t(2), agg_fn_names.size());
173  for (size_t i = 1; i < agg_fn_names.size(); ++i) {
174  target_lvs.push_back(target_lvs.front());
175  }
176  } else {
178  if (!target_info.is_agg && !varlen_projection) {
179  CHECK_EQ(static_cast<size_t>(2 * target_info.sql_type.get_physical_coord_cols()),
180  target_lvs.size());
181  CHECK_EQ(agg_fn_names.size(), target_lvs.size());
182  }
183  } else {
184  CHECK(str_target_lv || (agg_fn_names.size() == target_lvs.size()));
185  CHECK(target_lvs.size() == 1 || target_lvs.size() == 2);
186  }
187  }
188 
189  int32_t slot_index = base_slot_index;
190  CHECK_GE(slot_index, 0);
191  CHECK(is_group_by || static_cast<size_t>(slot_index) < agg_out_vec.size());
192 
193  uint32_t col_off{0};
194  if (co.device_type == ExecutorDeviceType::GPU && query_mem_desc.threadsShareMemory() &&
196  (!arg_expr || arg_expr->get_type_info().get_notnull())) {
197  CHECK_EQ(size_t(1), agg_fn_names.size());
198  const auto chosen_bytes = query_mem_desc.getPaddedSlotWidthBytes(slot_index);
199  llvm::Value* agg_col_ptr{nullptr};
200  if (is_group_by) {
201  if (query_mem_desc.didOutputColumnar()) {
202  col_off = query_mem_desc.getColOffInBytes(slot_index);
203  CHECK_EQ(size_t(0), col_off % chosen_bytes);
204  col_off /= chosen_bytes;
205  CHECK(std::get<1>(agg_out_ptr_w_idx));
206  auto offset =
207  LL_BUILDER.CreateAdd(std::get<1>(agg_out_ptr_w_idx), LL_INT(col_off));
208  agg_col_ptr = LL_BUILDER.CreateGEP(
209  LL_BUILDER.CreateBitCast(
210  std::get<0>(agg_out_ptr_w_idx),
211  llvm::PointerType::get(get_int_type((chosen_bytes << 3), LL_CONTEXT), 0)),
212  offset);
213  } else {
214  col_off = query_mem_desc.getColOnlyOffInBytes(slot_index);
215  CHECK_EQ(size_t(0), col_off % chosen_bytes);
216  col_off /= chosen_bytes;
217  agg_col_ptr = LL_BUILDER.CreateGEP(
218  LL_BUILDER.CreateBitCast(
219  std::get<0>(agg_out_ptr_w_idx),
220  llvm::PointerType::get(get_int_type((chosen_bytes << 3), LL_CONTEXT), 0)),
221  LL_INT(col_off));
222  }
223  }
224 
225  if (chosen_bytes != sizeof(int32_t)) {
226  CHECK_EQ(8, chosen_bytes);
227  if (g_bigint_count) {
228  const auto acc_i64 = LL_BUILDER.CreateBitCast(
229  is_group_by ? agg_col_ptr : agg_out_vec[slot_index],
230  llvm::PointerType::get(get_int_type(64, LL_CONTEXT), 0));
231  if (gpu_smem_context.isSharedMemoryUsed()) {
232  group_by_and_agg->emitCall(
233  "agg_count_shared", std::vector<llvm::Value*>{acc_i64, LL_INT(int64_t(1))});
234  } else {
235  LL_BUILDER.CreateAtomicRMW(llvm::AtomicRMWInst::Add,
236  acc_i64,
237  LL_INT(int64_t(1)),
238  llvm::AtomicOrdering::Monotonic);
239  }
240  } else {
241  auto acc_i32 = LL_BUILDER.CreateBitCast(
242  is_group_by ? agg_col_ptr : agg_out_vec[slot_index],
243  llvm::PointerType::get(get_int_type(32, LL_CONTEXT), 0));
244  if (gpu_smem_context.isSharedMemoryUsed()) {
245  acc_i32 = LL_BUILDER.CreatePointerCast(
246  acc_i32, llvm::Type::getInt32PtrTy(LL_CONTEXT, 3));
247  }
248  LL_BUILDER.CreateAtomicRMW(llvm::AtomicRMWInst::Add,
249  acc_i32,
250  LL_INT(1),
251  llvm::AtomicOrdering::Monotonic);
252  }
253  } else {
254  const auto acc_i32 = (is_group_by ? agg_col_ptr : agg_out_vec[slot_index]);
255  if (gpu_smem_context.isSharedMemoryUsed()) {
256  // Atomic operation on address space level 3 (Shared):
257  const auto shared_acc_i32 = LL_BUILDER.CreatePointerCast(
258  acc_i32, llvm::Type::getInt32PtrTy(LL_CONTEXT, 3));
259  LL_BUILDER.CreateAtomicRMW(llvm::AtomicRMWInst::Add,
260  shared_acc_i32,
261  LL_INT(1),
262  llvm::AtomicOrdering::Monotonic);
263  } else {
264  LL_BUILDER.CreateAtomicRMW(llvm::AtomicRMWInst::Add,
265  acc_i32,
266  LL_INT(1),
267  llvm::AtomicOrdering::Monotonic);
268  }
269  }
270  return;
271  }
272 
273  codegenAggregate(group_by_and_agg,
274  executor,
275  query_mem_desc,
276  co,
277  target_lvs,
278  agg_out_ptr_w_idx,
279  agg_out_vec,
280  output_buffer_byte_stream,
281  out_row_idx,
282  varlen_output_buffer,
283  slot_index);
284 }
285 
287  GroupByAndAggregate* group_by_and_agg,
288  Executor* executor,
290  const CompilationOptions& co,
291  const std::vector<llvm::Value*>& target_lvs,
292  const std::tuple<llvm::Value*, llvm::Value*>& agg_out_ptr_w_idx,
293  const std::vector<llvm::Value*>& agg_out_vec,
294  llvm::Value* output_buffer_byte_stream,
295  llvm::Value* out_row_idx,
296  llvm::Value* varlen_output_buffer,
297  int32_t slot_index) const {
298  AUTOMATIC_IR_METADATA(executor->cgen_state_.get());
299  size_t target_lv_idx = 0;
300  const bool lazy_fetched{executor->plan_state_->isLazyFetchColumn(target_expr)};
301 
302  CodeGenerator code_generator(executor);
303 
304  const auto agg_fn_names = agg_fn_base_names(
306  auto arg_expr = agg_arg(target_expr);
307 
308  for (const auto& agg_base_name : agg_fn_names) {
309  if (target_info.is_distinct && arg_expr->get_type_info().is_array()) {
310  CHECK_EQ(static_cast<size_t>(query_mem_desc.getLogicalSlotWidthBytes(slot_index)),
311  sizeof(int64_t));
312  // TODO(miyu): check if buffer may be columnar here
313  CHECK(!query_mem_desc.didOutputColumnar());
314  const auto& elem_ti = arg_expr->get_type_info().get_elem_type();
315  uint32_t col_off{0};
316  if (is_group_by) {
317  const auto col_off_in_bytes = query_mem_desc.getColOnlyOffInBytes(slot_index);
318  CHECK_EQ(size_t(0), col_off_in_bytes % sizeof(int64_t));
319  col_off /= sizeof(int64_t);
320  }
321  executor->cgen_state_->emitExternalCall(
322  "agg_count_distinct_array_" + numeric_type_name(elem_ti),
323  llvm::Type::getVoidTy(LL_CONTEXT),
324  {is_group_by
325  ? LL_BUILDER.CreateGEP(std::get<0>(agg_out_ptr_w_idx), LL_INT(col_off))
326  : agg_out_vec[slot_index],
327  target_lvs[target_lv_idx],
328  code_generator.posArg(arg_expr),
329  elem_ti.is_fp()
330  ? static_cast<llvm::Value*>(executor->cgen_state_->inlineFpNull(elem_ti))
331  : static_cast<llvm::Value*>(
332  executor->cgen_state_->inlineIntNull(elem_ti))});
333  ++slot_index;
334  ++target_lv_idx;
335  continue;
336  }
337 
338  llvm::Value* agg_col_ptr{nullptr};
339  const auto chosen_bytes =
340  static_cast<size_t>(query_mem_desc.getPaddedSlotWidthBytes(slot_index));
341  const auto& chosen_type = get_compact_type(target_info);
342  const auto& arg_type =
343  ((arg_expr && arg_expr->get_type_info().get_type() != kNULLT) &&
347  const bool is_fp_arg =
348  !lazy_fetched && arg_type.get_type() != kNULLT && arg_type.is_fp();
349  if (is_group_by) {
350  agg_col_ptr = group_by_and_agg->codegenAggColumnPtr(output_buffer_byte_stream,
351  out_row_idx,
352  agg_out_ptr_w_idx,
353  query_mem_desc,
354  chosen_bytes,
355  slot_index,
356  target_idx);
357  CHECK(agg_col_ptr);
358  agg_col_ptr->setName("agg_col_ptr");
359  }
360 
362  CHECK(!query_mem_desc.didOutputColumnar());
363 
365  CHECK_LT(target_lv_idx, target_lvs.size());
366  CHECK(varlen_output_buffer);
367  auto target_lv = target_lvs[target_lv_idx];
368 
369  std::string agg_fname_suffix = "";
371  query_mem_desc.threadsShareMemory()) {
372  agg_fname_suffix += "_shared";
373  }
374 
375  // first write the varlen data into the varlen buffer and get the pointer location
376  // into the varlen buffer
377  auto& builder = executor->cgen_state_->ir_builder_;
378  auto orig_bb = builder.GetInsertBlock();
379  auto target_ptr_type = llvm::dyn_cast<llvm::PointerType>(target_lv->getType());
380  CHECK(target_ptr_type) << "Varlen projections expect a pointer input.";
381  auto is_nullptr =
382  builder.CreateICmp(llvm::CmpInst::ICMP_EQ,
383  target_lv,
384  llvm::ConstantPointerNull::get(llvm::PointerType::get(
385  target_ptr_type->getPointerElementType(), 0)));
386  llvm::BasicBlock* true_bb{nullptr};
387  {
388  DiamondCodegen nullcheck_diamond(
389  is_nullptr, executor, false, "varlen_null_check", nullptr, false);
390  // maintain a reference to the true bb, overriding the diamond codegen destructor
391  true_bb = nullcheck_diamond.cond_true_;
392  // if not null, process the pointer and insert it into the varlen buffer
393  builder.SetInsertPoint(nullcheck_diamond.cond_false_);
394  auto arr_ptr_lv = executor->cgen_state_->ir_builder_.CreateBitCast(
395  target_lv,
396  llvm::PointerType::get(get_int_type(8, executor->cgen_state_->context_), 0));
397  const int64_t chosen_bytes =
399  const auto output_buffer_slot = LL_BUILDER.CreateZExt(
400  LL_BUILDER.CreateLoad(get_arg_by_name(ROW_FUNC, "old_total_matched")),
401  llvm::Type::getInt64Ty(LL_CONTEXT));
402  const auto varlen_buffer_row_sz = query_mem_desc.varlenOutputBufferElemSize();
403  CHECK(varlen_buffer_row_sz);
404  const auto output_buffer_slot_bytes = LL_BUILDER.CreateAdd(
405  LL_BUILDER.CreateMul(output_buffer_slot,
406  executor->cgen_state_->llInt(
407  static_cast<int64_t>(*varlen_buffer_row_sz))),
408  executor->cgen_state_->llInt(static_cast<int64_t>(
409  query_mem_desc.varlenOutputRowSizeToSlot(slot_index))));
410 
411  std::vector<llvm::Value*> varlen_agg_args{
412  executor->castToIntPtrTyIn(varlen_output_buffer, 8),
413  output_buffer_slot_bytes,
414  arr_ptr_lv,
415  executor->cgen_state_->llInt(chosen_bytes)};
416  auto varlen_offset_ptr =
417  group_by_and_agg->emitCall(agg_base_name + agg_fname_suffix, varlen_agg_args);
418 
419  // then write that pointer location into the 64 bit slot in the output buffer
420  auto varlen_offset_int = LL_BUILDER.CreatePtrToInt(
421  varlen_offset_ptr, llvm::Type::getInt64Ty(LL_CONTEXT));
422  builder.CreateBr(nullcheck_diamond.cond_true_);
423 
424  // use the true block to do the output buffer insertion regardless of nullness
425  builder.SetInsertPoint(nullcheck_diamond.cond_true_);
426  auto output_phi =
427  builder.CreatePHI(llvm::Type::getInt64Ty(executor->cgen_state_->context_), 2);
428  output_phi->addIncoming(varlen_offset_int, nullcheck_diamond.cond_false_);
429  output_phi->addIncoming(executor->cgen_state_->llInt(static_cast<int64_t>(0)),
430  orig_bb);
431 
432  std::vector<llvm::Value*> agg_args{agg_col_ptr, output_phi};
433  group_by_and_agg->emitCall("agg_id" + agg_fname_suffix, agg_args);
434  }
435  CHECK(true_bb);
436  builder.SetInsertPoint(true_bb);
437 
438  ++slot_index;
439  ++target_lv_idx;
440  continue;
441  }
442 
443  const bool float_argument_input = takes_float_argument(target_info);
444  const bool is_count_in_avg = target_info.agg_kind == kAVG && target_lv_idx == 1;
445  // The count component of an average should never be compacted.
446  const auto agg_chosen_bytes =
447  float_argument_input && !is_count_in_avg ? sizeof(float) : chosen_bytes;
448  if (float_argument_input) {
449  CHECK_GE(chosen_bytes, sizeof(float));
450  }
451 
452  auto target_lv = target_lvs[target_lv_idx];
453  const auto needs_unnest_double_patch = group_by_and_agg->needsUnnestDoublePatch(
454  target_lv, agg_base_name, query_mem_desc.threadsShareMemory(), co);
455  const auto need_skip_null = !needs_unnest_double_patch && target_info.skip_null_val;
456  if (!needs_unnest_double_patch) {
457  if (need_skip_null && !is_agg_domain_range_equivalent(target_info.agg_kind)) {
458  target_lv = group_by_and_agg->convertNullIfAny(arg_type, target_info, target_lv);
459  } else if (is_fp_arg) {
460  target_lv = executor->castToFP(target_lv, arg_type, target_info.sql_type);
461  }
462  if (!dynamic_cast<const Analyzer::AggExpr*>(target_expr) || arg_expr) {
463  target_lv =
464  executor->cgen_state_->castToTypeIn(target_lv, (agg_chosen_bytes << 3));
465  }
466  }
467 
468  const bool is_simple_count_target = is_simple_count(target_info);
469  llvm::Value* str_target_lv{nullptr};
470  if (target_lvs.size() == 3 && !target_has_geo(target_info)) {
471  // none encoding string
472  str_target_lv = target_lvs.front();
473  }
474  std::vector<llvm::Value*> agg_args{
475  executor->castToIntPtrTyIn((is_group_by ? agg_col_ptr : agg_out_vec[slot_index]),
476  (agg_chosen_bytes << 3)),
477  (is_simple_count_target && !arg_expr)
478  ? (agg_chosen_bytes == sizeof(int32_t) ? LL_INT(int32_t(0))
479  : LL_INT(int64_t(0)))
480  : (is_simple_count_target && arg_expr && str_target_lv ? str_target_lv
481  : target_lv)};
482  if (query_mem_desc.isLogicalSizedColumnsAllowed()) {
483  if (is_simple_count_target && arg_expr && str_target_lv) {
484  agg_args[1] =
485  agg_chosen_bytes == sizeof(int32_t) ? LL_INT(int32_t(0)) : LL_INT(int64_t(0));
486  }
487  }
488  std::string agg_fname{agg_base_name};
489  if (is_fp_arg) {
490  if (!lazy_fetched) {
491  if (agg_chosen_bytes == sizeof(float)) {
492  CHECK_EQ(arg_type.get_type(), kFLOAT);
493  agg_fname += "_float";
494  } else {
495  CHECK_EQ(agg_chosen_bytes, sizeof(double));
496  agg_fname += "_double";
497  }
498  }
499  } else if (agg_chosen_bytes == sizeof(int32_t)) {
500  agg_fname += "_int32";
501  } else if (agg_chosen_bytes == sizeof(int16_t) &&
502  query_mem_desc.didOutputColumnar()) {
503  agg_fname += "_int16";
504  } else if (agg_chosen_bytes == sizeof(int8_t) && query_mem_desc.didOutputColumnar()) {
505  agg_fname += "_int8";
506  }
507 
509  CHECK_EQ(agg_chosen_bytes, sizeof(int64_t));
510  CHECK(!chosen_type.is_fp());
511  group_by_and_agg->codegenCountDistinct(
512  target_idx, target_expr, agg_args, query_mem_desc, co.device_type);
513  } else if (target_info.agg_kind == kAPPROX_QUANTILE) {
514  CHECK_EQ(agg_chosen_bytes, sizeof(int64_t));
515  group_by_and_agg->codegenApproxQuantile(
516  target_idx, target_expr, agg_args, query_mem_desc, co.device_type);
517  } else {
518  const auto& arg_ti = target_info.agg_arg_type;
519  if (need_skip_null && !arg_ti.is_geometry()) {
520  agg_fname += "_skip_val";
521  }
522 
524  (need_skip_null && !arg_ti.is_geometry())) {
525  llvm::Value* null_in_lv{nullptr};
526  if (arg_ti.is_fp()) {
527  null_in_lv =
528  static_cast<llvm::Value*>(executor->cgen_state_->inlineFpNull(arg_ti));
529  } else {
530  null_in_lv = static_cast<llvm::Value*>(executor->cgen_state_->inlineIntNull(
532  ? arg_ti
533  : target_info.sql_type));
534  }
535  CHECK(null_in_lv);
536  auto null_lv =
537  executor->cgen_state_->castToTypeIn(null_in_lv, (agg_chosen_bytes << 3));
538  agg_args.push_back(null_lv);
539  }
540  if (!target_info.is_distinct) {
542  query_mem_desc.threadsShareMemory()) {
543  agg_fname += "_shared";
544  if (needs_unnest_double_patch) {
545  agg_fname = patch_agg_fname(agg_fname);
546  }
547  }
548  auto agg_fname_call_ret_lv = group_by_and_agg->emitCall(agg_fname, agg_args);
549 
550  if (agg_fname.find("checked") != std::string::npos) {
551  group_by_and_agg->checkErrorCode(agg_fname_call_ret_lv);
552  }
553  }
554  }
555  const auto window_func = dynamic_cast<const Analyzer::WindowFunction*>(target_expr);
556  if (window_func && window_function_requires_peer_handling(window_func)) {
557  const auto window_func_context =
559  const auto pending_outputs =
560  LL_INT(window_func_context->aggregateStatePendingOutputs());
561  executor->cgen_state_->emitExternalCall("add_window_pending_output",
562  llvm::Type::getVoidTy(LL_CONTEXT),
563  {agg_args.front(), pending_outputs});
564  const auto& window_func_ti = window_func->get_type_info();
565  std::string apply_window_pending_outputs_name = "apply_window_pending_outputs";
566  switch (window_func_ti.get_type()) {
567  case kFLOAT: {
568  apply_window_pending_outputs_name += "_float";
569  if (query_mem_desc.didOutputColumnar()) {
570  apply_window_pending_outputs_name += "_columnar";
571  }
572  break;
573  }
574  case kDOUBLE: {
575  apply_window_pending_outputs_name += "_double";
576  break;
577  }
578  default: {
579  apply_window_pending_outputs_name += "_int";
580  if (query_mem_desc.didOutputColumnar()) {
581  apply_window_pending_outputs_name +=
582  std::to_string(window_func_ti.get_size() * 8);
583  } else {
584  apply_window_pending_outputs_name += "64";
585  }
586  break;
587  }
588  }
589  const auto partition_end =
590  LL_INT(reinterpret_cast<int64_t>(window_func_context->partitionEnd()));
591  executor->cgen_state_->emitExternalCall(apply_window_pending_outputs_name,
592  llvm::Type::getVoidTy(LL_CONTEXT),
593  {pending_outputs,
594  target_lvs.front(),
595  partition_end,
596  code_generator.posArg(nullptr)});
597  }
598 
599  ++slot_index;
600  ++target_lv_idx;
601  }
602 }
603 
605  const Executor* executor,
606  const CompilationOptions& co) {
607  AUTOMATIC_IR_METADATA(executor->cgen_state_.get());
608  if (query_mem_desc.getPaddedSlotWidthBytes(slot_index_counter) == 0) {
609  CHECK(!dynamic_cast<const Analyzer::AggExpr*>(target_expr));
610  ++slot_index_counter;
611  ++target_index_counter;
612  return;
613  }
614  if (dynamic_cast<const Analyzer::UOper*>(target_expr) &&
615  static_cast<const Analyzer::UOper*>(target_expr)->get_optype() == kUNNEST) {
616  throw std::runtime_error("UNNEST not supported in the projection list yet.");
617  }
618  if ((executor->plan_state_->isLazyFetchColumn(target_expr) || !is_group_by) &&
619  (static_cast<size_t>(query_mem_desc.getPaddedSlotWidthBytes(slot_index_counter)) <
620  sizeof(int64_t)) &&
621  !is_columnar_projection(query_mem_desc)) {
622  // TODO(miyu): enable different byte width in the layout w/o padding
624  }
625 
626  auto target_info = get_target_info(target_expr, g_bigint_count);
627  auto arg_expr = agg_arg(target_expr);
628  if (arg_expr) {
631  target_info.skip_null_val = false;
632  } else if (query_mem_desc.getQueryDescriptionType() ==
634  !arg_expr->get_type_info().is_varlen()) {
635  // TODO: COUNT is currently not null-aware for varlen types. Need to add proper code
636  // generation for handling varlen nulls.
637  target_info.skip_null_val = true;
638  } else if (constrained_not_null(arg_expr, ra_exe_unit.quals)) {
639  target_info.skip_null_val = false;
640  }
641  }
642 
643  if (!(query_mem_desc.getQueryDescriptionType() ==
647  sample_exprs_to_codegen.emplace_back(target_expr,
648  target_info,
649  slot_index_counter,
650  target_index_counter++,
651  is_group_by);
652  } else {
653  target_exprs_to_codegen.emplace_back(target_expr,
654  target_info,
655  slot_index_counter,
656  target_index_counter++,
657  is_group_by);
658  }
659 
660  const auto agg_fn_names = agg_fn_base_names(
662  slot_index_counter += agg_fn_names.size();
663 }
664 
665 namespace {
666 
668  const QueryMemoryDescriptor& query_mem_desc) {
669  const bool is_group_by{query_mem_desc.isGroupBy()};
670  if (target_info.agg_kind == kSAMPLE && target_info.sql_type.is_string() &&
671  target_info.sql_type.get_compression() != kENCODING_NONE) {
672  return get_agg_initial_val(target_info.agg_kind,
673  target_info.sql_type,
674  is_group_by,
675  query_mem_desc.getCompactByteWidth());
676  }
677  return 0;
678 }
679 
680 } // namespace
681 
683  GroupByAndAggregate* group_by_and_agg,
684  Executor* executor,
685  const QueryMemoryDescriptor& query_mem_desc,
686  const CompilationOptions& co,
687  const GpuSharedMemoryContext& gpu_smem_context,
688  const std::tuple<llvm::Value*, llvm::Value*>& agg_out_ptr_w_idx,
689  const std::vector<llvm::Value*>& agg_out_vec,
690  llvm::Value* output_buffer_byte_stream,
691  llvm::Value* out_row_idx,
692  llvm::Value* varlen_output_buffer,
693  DiamondCodegen& diamond_codegen) const {
694  CHECK(group_by_and_agg);
695  CHECK(executor);
696  AUTOMATIC_IR_METADATA(executor->cgen_state_.get());
697 
698  for (const auto& target_expr_codegen : target_exprs_to_codegen) {
699  target_expr_codegen.codegen(group_by_and_agg,
700  executor,
701  query_mem_desc,
702  co,
703  gpu_smem_context,
704  agg_out_ptr_w_idx,
705  agg_out_vec,
706  output_buffer_byte_stream,
707  out_row_idx,
708  varlen_output_buffer,
709  diamond_codegen);
710  }
711  if (!sample_exprs_to_codegen.empty()) {
712  codegenSampleExpressions(group_by_and_agg,
713  executor,
714  query_mem_desc,
715  co,
716  agg_out_ptr_w_idx,
717  agg_out_vec,
718  output_buffer_byte_stream,
719  out_row_idx,
720  diamond_codegen);
721  }
722 }
723 
725  GroupByAndAggregate* group_by_and_agg,
726  Executor* executor,
727  const QueryMemoryDescriptor& query_mem_desc,
728  const CompilationOptions& co,
729  const std::tuple<llvm::Value*, llvm::Value*>& agg_out_ptr_w_idx,
730  const std::vector<llvm::Value*>& agg_out_vec,
731  llvm::Value* output_buffer_byte_stream,
732  llvm::Value* out_row_idx,
733  DiamondCodegen& diamond_codegen) const {
734  AUTOMATIC_IR_METADATA(executor->cgen_state_.get());
735  CHECK(!sample_exprs_to_codegen.empty());
737  if (sample_exprs_to_codegen.size() == 1 &&
738  !sample_exprs_to_codegen.front().target_info.sql_type.is_varlen()) {
739  codegenSingleSlotSampleExpression(group_by_and_agg,
740  executor,
741  query_mem_desc,
742  co,
743  agg_out_ptr_w_idx,
744  agg_out_vec,
745  output_buffer_byte_stream,
746  out_row_idx,
747  diamond_codegen);
748  } else {
749  codegenMultiSlotSampleExpressions(group_by_and_agg,
750  executor,
751  query_mem_desc,
752  co,
753  agg_out_ptr_w_idx,
754  agg_out_vec,
755  output_buffer_byte_stream,
756  out_row_idx,
757  diamond_codegen);
758  }
759 }
760 
762  GroupByAndAggregate* group_by_and_agg,
763  Executor* executor,
764  const QueryMemoryDescriptor& query_mem_desc,
765  const CompilationOptions& co,
766  const std::tuple<llvm::Value*, llvm::Value*>& agg_out_ptr_w_idx,
767  const std::vector<llvm::Value*>& agg_out_vec,
768  llvm::Value* output_buffer_byte_stream,
769  llvm::Value* out_row_idx,
770  DiamondCodegen& diamond_codegen) const {
771  AUTOMATIC_IR_METADATA(executor->cgen_state_.get());
772  CHECK_EQ(size_t(1), sample_exprs_to_codegen.size());
773  CHECK(!sample_exprs_to_codegen.front().target_info.sql_type.is_varlen());
775  // no need for the atomic if we only have one SAMPLE target
776  sample_exprs_to_codegen.front().codegen(group_by_and_agg,
777  executor,
778  query_mem_desc,
779  co,
780  {},
781  agg_out_ptr_w_idx,
782  agg_out_vec,
783  output_buffer_byte_stream,
784  out_row_idx,
785  /*varlen_output_buffer=*/nullptr,
786  diamond_codegen);
787 }
788 
790  GroupByAndAggregate* group_by_and_agg,
791  Executor* executor,
792  const QueryMemoryDescriptor& query_mem_desc,
793  const CompilationOptions& co,
794  const std::tuple<llvm::Value*, llvm::Value*>& agg_out_ptr_w_idx,
795  const std::vector<llvm::Value*>& agg_out_vec,
796  llvm::Value* output_buffer_byte_stream,
797  llvm::Value* out_row_idx,
798  DiamondCodegen& diamond_codegen) const {
799  AUTOMATIC_IR_METADATA(executor->cgen_state_.get());
800  CHECK(sample_exprs_to_codegen.size() > 1 ||
801  sample_exprs_to_codegen.front().target_info.sql_type.is_varlen());
803  const auto& first_sample_expr = sample_exprs_to_codegen.front();
804  auto target_lvs = group_by_and_agg->codegenAggArg(first_sample_expr.target_expr, co);
805  CHECK_GE(target_lvs.size(), size_t(1));
806 
807  const auto init_val =
808  get_initial_agg_val(first_sample_expr.target_info, query_mem_desc);
809 
810  llvm::Value* agg_col_ptr{nullptr};
811  if (is_group_by) {
812  const auto agg_column_size_bytes =
813  query_mem_desc.isLogicalSizedColumnsAllowed() &&
814  !first_sample_expr.target_info.sql_type.is_varlen()
815  ? first_sample_expr.target_info.sql_type.get_size()
816  : sizeof(int64_t);
817  agg_col_ptr = group_by_and_agg->codegenAggColumnPtr(output_buffer_byte_stream,
818  out_row_idx,
819  agg_out_ptr_w_idx,
820  query_mem_desc,
821  agg_column_size_bytes,
822  first_sample_expr.base_slot_index,
823  first_sample_expr.target_idx);
824  } else {
825  CHECK_LT(static_cast<size_t>(first_sample_expr.base_slot_index), agg_out_vec.size());
826  agg_col_ptr =
827  executor->castToIntPtrTyIn(agg_out_vec[first_sample_expr.base_slot_index], 64);
828  }
829 
830  auto sample_cas_lv = codegenSlotEmptyKey(agg_col_ptr, target_lvs, executor, init_val);
831 
832  DiamondCodegen sample_cfg(
833  sample_cas_lv, executor, false, "sample_valcheck", &diamond_codegen, false);
834 
835  for (const auto& target_expr_codegen : sample_exprs_to_codegen) {
836  target_expr_codegen.codegen(group_by_and_agg,
837  executor,
838  query_mem_desc,
839  co,
840  {},
841  agg_out_ptr_w_idx,
842  agg_out_vec,
843  output_buffer_byte_stream,
844  out_row_idx,
845  /*varlen_output_buffer=*/nullptr,
846  diamond_codegen,
847  &sample_cfg);
848  }
849 }
850 
852  llvm::Value* agg_col_ptr,
853  std::vector<llvm::Value*>& target_lvs,
854  Executor* executor,
855  const int64_t init_val) const {
856  AUTOMATIC_IR_METADATA(executor->cgen_state_.get());
857  const auto& first_sample_expr = sample_exprs_to_codegen.front();
858  const auto first_sample_slot_bytes =
859  first_sample_expr.target_info.sql_type.is_varlen()
860  ? sizeof(int64_t)
861  : first_sample_expr.target_info.sql_type.get_size();
862  llvm::Value* target_lv_casted{nullptr};
863  // deciding whether proper casting is required for the first sample's slot:
864  if (first_sample_expr.target_info.sql_type.is_varlen()) {
865  target_lv_casted =
866  LL_BUILDER.CreatePtrToInt(target_lvs.front(), llvm::Type::getInt64Ty(LL_CONTEXT));
867  } else if (first_sample_expr.target_info.sql_type.is_fp()) {
868  // Initialization value for SAMPLE on a float column should be 0
869  CHECK_EQ(init_val, 0);
870  if (query_mem_desc.isLogicalSizedColumnsAllowed()) {
871  target_lv_casted = executor->cgen_state_->ir_builder_.CreateFPToSI(
872  target_lvs.front(),
873  first_sample_slot_bytes == sizeof(float) ? llvm::Type::getInt32Ty(LL_CONTEXT)
874  : llvm::Type::getInt64Ty(LL_CONTEXT));
875  } else {
876  target_lv_casted = executor->cgen_state_->ir_builder_.CreateFPToSI(
877  target_lvs.front(), llvm::Type::getInt64Ty(LL_CONTEXT));
878  }
879  } else if (first_sample_slot_bytes != sizeof(int64_t) &&
880  !query_mem_desc.isLogicalSizedColumnsAllowed()) {
881  target_lv_casted =
882  executor->cgen_state_->ir_builder_.CreateCast(llvm::Instruction::CastOps::SExt,
883  target_lvs.front(),
884  llvm::Type::getInt64Ty(LL_CONTEXT));
885  } else {
886  target_lv_casted = target_lvs.front();
887  }
888 
889  std::string slot_empty_cas_func_name("slotEmptyKeyCAS");
890  llvm::Value* init_val_lv{LL_INT(init_val)};
891  if (query_mem_desc.isLogicalSizedColumnsAllowed() &&
892  !first_sample_expr.target_info.sql_type.is_varlen()) {
893  // add proper suffix to the function name:
894  switch (first_sample_slot_bytes) {
895  case 1:
896  slot_empty_cas_func_name += "_int8";
897  break;
898  case 2:
899  slot_empty_cas_func_name += "_int16";
900  break;
901  case 4:
902  slot_empty_cas_func_name += "_int32";
903  break;
904  case 8:
905  break;
906  default:
907  UNREACHABLE() << "Invalid slot size for slotEmptyKeyCAS function.";
908  break;
909  }
910  if (first_sample_slot_bytes != sizeof(int64_t)) {
911  init_val_lv = llvm::ConstantInt::get(
912  get_int_type(first_sample_slot_bytes * 8, LL_CONTEXT), init_val);
913  }
914  }
915 
916  auto sample_cas_lv = executor->cgen_state_->emitExternalCall(
917  slot_empty_cas_func_name,
918  llvm::Type::getInt1Ty(executor->cgen_state_->context_),
919  {agg_col_ptr, target_lv_casted, init_val_lv});
920  return sample_cas_lv;
921 }
size_t varlenOutputRowSizeToSlot(const size_t slot_idx) const
#define LL_BUILDER
const Analyzer::Expr * agg_arg(const Analyzer::Expr *expr)
#define CHECK_EQ(x, y)
Definition: Logger.h:214
bool target_has_geo(const TargetInfo &target_info)
bool constrained_not_null(const Analyzer::Expr *expr, const std::list< std::shared_ptr< Analyzer::Expr >> &quals)
llvm::BasicBlock * cond_false_
llvm::Value * codegenAggColumnPtr(llvm::Value *output_buffer_byte_stream, llvm::Value *out_row_idx, const std::tuple< llvm::Value *, llvm::Value * > &agg_out_ptr_w_idx, const QueryMemoryDescriptor &query_mem_desc, const size_t chosen_bytes, const size_t agg_out_off, const size_t target_idx)
: returns the pointer to where the aggregation should be stored.
std::vector< std::string > agg_fn_base_names(const TargetInfo &target_info, const bool is_varlen_projection)
bool isLogicalSizedColumnsAllowed() const
SQLTypeInfo sql_type
Definition: TargetInfo.h:51
TargetInfo get_target_info(const PointerType target_expr, const bool bigint_count)
Definition: TargetInfo.h:92
void codegen(GroupByAndAggregate *group_by_and_agg, Executor *executor, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const GpuSharedMemoryContext &gpu_smem_context, const std::tuple< llvm::Value *, llvm::Value * > &agg_out_ptr_w_idx, const std::vector< llvm::Value * > &agg_out_vec, llvm::Value *output_buffer_byte_stream, llvm::Value *out_row_idx, llvm::Value *varlen_output_buffer, DiamondCodegen &diamond_codegen, DiamondCodegen *sample_cfg=nullptr) const
void codegenMultiSlotSampleExpressions(GroupByAndAggregate *group_by_and_agg, Executor *executor, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const std::tuple< llvm::Value *, llvm::Value * > &agg_out_ptr_w_idx, const std::vector< llvm::Value * > &agg_out_vec, llvm::Value *output_buffer_byte_stream, llvm::Value *out_row_idx, DiamondCodegen &diamond_codegen) const
llvm::Value * posArg(const Analyzer::Expr *) const
Definition: ColumnIR.cpp:512
#define UNREACHABLE()
Definition: Logger.h:250
#define CHECK_GE(x, y)
Definition: Logger.h:219
llvm::Value * emitCall(const std::string &fname, const std::vector< llvm::Value * > &args)
int64_t get_agg_initial_val(const SQLAgg agg, const SQLTypeInfo &ti, const bool enable_compaction, const unsigned min_byte_width_to_compact)
void codegenApproxQuantile(const size_t target_idx, const Analyzer::Expr *target_expr, std::vector< llvm::Value * > &agg_args, const QueryMemoryDescriptor &query_mem_desc, const ExecutorDeviceType device_type)
void checkErrorCode(llvm::Value *retCode)
bool takes_float_argument(const TargetInfo &target_info)
Definition: TargetInfo.h:157
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:323
bool needsUnnestDoublePatch(llvm::Value const *val_ptr, const std::string &agg_base_name, const bool threads_share_memory, const CompilationOptions &co) const
bool skip_null_val
Definition: TargetInfo.h:53
llvm::BasicBlock * cond_true_
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
static WindowFunctionContext * getActiveWindowFunctionContext(Executor *executor)
std::string to_string(char const *&&v)
SQLTypeInfo agg_arg_type
Definition: TargetInfo.h:52
llvm::Value * codegenSlotEmptyKey(llvm::Value *agg_col_ptr, std::vector< llvm::Value * > &target_lvs, Executor *executor, const int64_t init_val) const
std::string patch_agg_fname(const std::string &agg_name)
Helpers for codegen of target expressions.
size_t getColOnlyOffInBytes(const size_t col_idx) const
Definition: sqldefs.h:73
const SQLTypeInfo get_compact_type(const TargetInfo &target)
bool is_varlen_projection(const Analyzer::Expr *target_expr, const SQLTypeInfo &ti)
bool is_agg
Definition: TargetInfo.h:49
llvm::Value * get_arg_by_name(llvm::Function *func, const std::string &name)
Definition: Execute.h:166
#define LL_INT(v)
llvm::Value * convertNullIfAny(const SQLTypeInfo &arg_type, const TargetInfo &agg_info, llvm::Value *target)
void codegenSampleExpressions(GroupByAndAggregate *group_by_and_agg, Executor *executor, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const std::tuple< llvm::Value *, llvm::Value * > &agg_out_ptr_w_idx, const std::vector< llvm::Value * > &agg_out_vec, llvm::Value *output_buffer_byte_stream, llvm::Value *out_row_idx, DiamondCodegen &diamond_codegen) const
bool g_bigint_count
Definition: sqldefs.h:75
void codegen(GroupByAndAggregate *group_by_and_agg, Executor *executor, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const GpuSharedMemoryContext &gpu_smem_context, const std::tuple< llvm::Value *, llvm::Value * > &agg_out_ptr_w_idx, const std::vector< llvm::Value * > &agg_out_vec, llvm::Value *output_buffer_byte_stream, llvm::Value *out_row_idx, llvm::Value *varlen_output_buffer, DiamondCodegen &diamond_codegen) const
#define LL_CONTEXT
bool is_distinct_target(const TargetInfo &target_info)
Definition: TargetInfo.h:153
void codegenCountDistinct(const size_t target_idx, const Analyzer::Expr *target_expr, std::vector< llvm::Value * > &agg_args, const QueryMemoryDescriptor &, const ExecutorDeviceType)
const int8_t getPaddedSlotWidthBytes(const size_t slot_idx) const
#define AUTOMATIC_IR_METADATA(CGENSTATE)
SQLAgg agg_kind
Definition: TargetInfo.h:50
QueryDescriptionType getQueryDescriptionType() const
ExecutorDeviceType device_type
std::optional< size_t > varlenOutputBufferElemSize() const
bool window_function_is_aggregate(const SqlWindowFunctionKind kind)
Definition: WindowContext.h:42
#define CHECK_LT(x, y)
Definition: Logger.h:216
HOST DEVICE EncodingType get_compression() const
Definition: sqltypes.h:331
const Analyzer::Expr * target_expr
Definition: sqldefs.h:76
std::vector< llvm::Value * > codegenAggArg(const Analyzer::Expr *target_expr, const CompilationOptions &co)
llvm::Value * codegenWindowRowPointer(const Analyzer::WindowFunction *window_func, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, DiamondCodegen &diamond_codegen)
bool window_function_requires_peer_handling(const Analyzer::WindowFunction *window_func)
bool is_simple_count(const TargetInfo &target_info)
#define CHECK(condition)
Definition: Logger.h:206
bool is_geometry() const
Definition: sqltypes.h:510
static void resetWindowFunctionContext(Executor *executor)
int64_t get_initial_agg_val(const TargetInfo &target_info, const QueryMemoryDescriptor &query_mem_desc)
std::string numeric_type_name(const SQLTypeInfo &ti)
Definition: Execute.h:209
bool is_string() const
Definition: sqltypes.h:498
bool is_distinct
Definition: TargetInfo.h:54
void operator()(const Analyzer::Expr *target_expr, const Executor *executor, const CompilationOptions &co)
Definition: sqldefs.h:74
const int8_t getLogicalSlotWidthBytes(const size_t slot_idx) const
int get_physical_coord_cols() const
Definition: sqltypes.h:359
Definition: sqldefs.h:72
size_t getColOffInBytes(const size_t col_idx) const
void codegenAggregate(GroupByAndAggregate *group_by_and_agg, Executor *executor, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const std::vector< llvm::Value * > &target_lvs, const std::tuple< llvm::Value *, llvm::Value * > &agg_out_ptr_w_idx, const std::vector< llvm::Value * > &agg_out_vec, llvm::Value *output_buffer_byte_stream, llvm::Value *out_row_idx, llvm::Value *varlen_output_buffer, int32_t slot_index) const
bool is_columnar_projection(const QueryMemoryDescriptor &query_mem_desc)
#define ROW_FUNC
void codegenSingleSlotSampleExpression(GroupByAndAggregate *group_by_and_agg, Executor *executor, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const std::tuple< llvm::Value *, llvm::Value * > &agg_out_ptr_w_idx, const std::vector< llvm::Value * > &agg_out_vec, llvm::Value *output_buffer_byte_stream, llvm::Value *out_row_idx, DiamondCodegen &diamond_codegen) const
bool is_agg_domain_range_equivalent(const SQLAgg &agg_kind)
Definition: TargetInfo.h:78