OmniSciDB  8fa3bf436f
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
TargetExprBuilder.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2019 OmniSci, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
23 #include "TargetExprBuilder.h"
24 
25 #include "CodeGenerator.h"
26 #include "Execute.h"
27 #include "GroupByAndAggregate.h"
28 #include "Logger/Logger.h"
29 #include "MaxwellCodegenPatch.h"
31 
32 #define LL_CONTEXT executor->cgen_state_->context_
33 #define LL_BUILDER executor->cgen_state_->ir_builder_
34 #define LL_BOOL(v) executor->ll_bool(v)
35 #define LL_INT(v) executor->cgen_state_->llInt(v)
36 #define LL_FP(v) executor->cgen_state_->llFp(v)
37 #define ROW_FUNC executor->cgen_state_->row_func_
38 
39 namespace {
40 
41 std::vector<std::string> agg_fn_base_names(const TargetInfo& target_info) {
42  const auto& chosen_type = get_compact_type(target_info);
43  if (!target_info.is_agg || target_info.agg_kind == kSAMPLE) {
44  if (chosen_type.is_geometry()) {
45  return std::vector<std::string>(2 * chosen_type.get_physical_coord_cols(),
46  "agg_id");
47  }
48  if (chosen_type.is_varlen()) {
49  return {"agg_id", "agg_id"};
50  }
51  return {"agg_id"};
52  }
53  switch (target_info.agg_kind) {
54  case kAVG:
55  return {"agg_sum", "agg_count"};
56  case kCOUNT:
57  return {target_info.is_distinct ? "agg_count_distinct" : "agg_count"};
58  case kMAX:
59  return {"agg_max"};
60  case kMIN:
61  return {"agg_min"};
62  case kSUM:
63  return {"agg_sum"};
65  return {"agg_approximate_count_distinct"};
66  case kAPPROX_MEDIAN:
67  return {"agg_approx_median"};
68  case kSINGLE_VALUE:
69  return {"checked_single_agg_id"};
70  case kSAMPLE:
71  return {"agg_id"};
72  default:
73  UNREACHABLE() << "Unrecognized agg kind: " << std::to_string(target_info.agg_kind);
74  }
75  return {};
76 }
77 
79  return query_mem_desc.getQueryDescriptionType() == QueryDescriptionType::Projection &&
80  query_mem_desc.didOutputColumnar();
81 }
82 
83 bool is_simple_count(const TargetInfo& target_info) {
84  return target_info.is_agg && target_info.agg_kind == kCOUNT && !target_info.is_distinct;
85 }
86 
87 bool target_has_geo(const TargetInfo& target_info) {
88  return target_info.is_agg ? target_info.agg_arg_type.is_geometry()
89  : target_info.sql_type.is_geometry();
90 }
91 
92 } // namespace
93 
95  GroupByAndAggregate* group_by_and_agg,
96  Executor* executor,
98  const CompilationOptions& co,
99  const GpuSharedMemoryContext& gpu_smem_context,
100  const std::tuple<llvm::Value*, llvm::Value*>& agg_out_ptr_w_idx_in,
101  const std::vector<llvm::Value*>& agg_out_vec,
102  llvm::Value* output_buffer_byte_stream,
103  llvm::Value* out_row_idx,
104  DiamondCodegen& diamond_codegen,
105  DiamondCodegen* sample_cfg) const {
106  CHECK(group_by_and_agg);
107  CHECK(executor);
108  AUTOMATIC_IR_METADATA(executor->cgen_state_.get());
109 
110  auto agg_out_ptr_w_idx = agg_out_ptr_w_idx_in;
111  const auto arg_expr = agg_arg(target_expr);
112 
113  const auto agg_fn_names = agg_fn_base_names(target_info);
114  const auto window_func = dynamic_cast<const Analyzer::WindowFunction*>(target_expr);
116  auto target_lvs =
117  window_func
118  ? std::vector<llvm::Value*>{executor->codegenWindowFunction(target_idx, co)}
119  : group_by_and_agg->codegenAggArg(target_expr, co);
120  const auto window_row_ptr = window_func
121  ? group_by_and_agg->codegenWindowRowPointer(
122  window_func, query_mem_desc, co, diamond_codegen)
123  : nullptr;
124  if (window_row_ptr) {
125  agg_out_ptr_w_idx =
126  std::make_tuple(window_row_ptr, std::get<1>(agg_out_ptr_w_idx_in));
127  if (window_function_is_aggregate(window_func->getKind())) {
128  out_row_idx = window_row_ptr;
129  }
130  }
131 
132  llvm::Value* str_target_lv{nullptr};
133  if (target_lvs.size() == 3 && !target_has_geo(target_info)) {
134  // none encoding string, pop the packed pointer + length since
135  // it's only useful for IS NULL checks and assumed to be only
136  // two components (pointer and length) for the purpose of projection
137  str_target_lv = target_lvs.front();
138  target_lvs.erase(target_lvs.begin());
139  }
141  // Geo cols are expanded to the physical coord cols. Each physical coord col is an
142  // array. Ensure that the target values generated match the number of agg
143  // functions before continuing
144  if (target_lvs.size() < agg_fn_names.size()) {
145  CHECK_EQ(target_lvs.size(), agg_fn_names.size() / 2);
146  std::vector<llvm::Value*> new_target_lvs;
147  new_target_lvs.reserve(agg_fn_names.size());
148  for (const auto& target_lv : target_lvs) {
149  new_target_lvs.push_back(target_lv);
150  new_target_lvs.push_back(target_lv);
151  }
152  target_lvs = new_target_lvs;
153  }
154  }
155  if (target_lvs.size() < agg_fn_names.size()) {
156  CHECK_EQ(size_t(1), target_lvs.size());
157  CHECK_EQ(size_t(2), agg_fn_names.size());
158  for (size_t i = 1; i < agg_fn_names.size(); ++i) {
159  target_lvs.push_back(target_lvs.front());
160  }
161  } else {
163  if (!target_info.is_agg) {
164  CHECK_EQ(static_cast<size_t>(2 * target_info.sql_type.get_physical_coord_cols()),
165  target_lvs.size());
166  CHECK_EQ(agg_fn_names.size(), target_lvs.size());
167  }
168  } else {
169  CHECK(str_target_lv || (agg_fn_names.size() == target_lvs.size()));
170  CHECK(target_lvs.size() == 1 || target_lvs.size() == 2);
171  }
172  }
173 
174  int32_t slot_index = base_slot_index;
175  CHECK_GE(slot_index, 0);
176  CHECK(is_group_by || static_cast<size_t>(slot_index) < agg_out_vec.size());
177 
178  uint32_t col_off{0};
179  if (co.device_type == ExecutorDeviceType::GPU && query_mem_desc.threadsShareMemory() &&
181  (!arg_expr || arg_expr->get_type_info().get_notnull())) {
182  CHECK_EQ(size_t(1), agg_fn_names.size());
183  const auto chosen_bytes = query_mem_desc.getPaddedSlotWidthBytes(slot_index);
184  llvm::Value* agg_col_ptr{nullptr};
185  if (is_group_by) {
186  if (query_mem_desc.didOutputColumnar()) {
187  col_off = query_mem_desc.getColOffInBytes(slot_index);
188  CHECK_EQ(size_t(0), col_off % chosen_bytes);
189  col_off /= chosen_bytes;
190  CHECK(std::get<1>(agg_out_ptr_w_idx));
191  auto offset =
192  LL_BUILDER.CreateAdd(std::get<1>(agg_out_ptr_w_idx), LL_INT(col_off));
193  agg_col_ptr = LL_BUILDER.CreateGEP(
194  LL_BUILDER.CreateBitCast(
195  std::get<0>(agg_out_ptr_w_idx),
196  llvm::PointerType::get(get_int_type((chosen_bytes << 3), LL_CONTEXT), 0)),
197  offset);
198  } else {
199  col_off = query_mem_desc.getColOnlyOffInBytes(slot_index);
200  CHECK_EQ(size_t(0), col_off % chosen_bytes);
201  col_off /= chosen_bytes;
202  agg_col_ptr = LL_BUILDER.CreateGEP(
203  LL_BUILDER.CreateBitCast(
204  std::get<0>(agg_out_ptr_w_idx),
205  llvm::PointerType::get(get_int_type((chosen_bytes << 3), LL_CONTEXT), 0)),
206  LL_INT(col_off));
207  }
208  }
209 
210  if (chosen_bytes != sizeof(int32_t)) {
211  CHECK_EQ(8, chosen_bytes);
212  if (g_bigint_count) {
213  const auto acc_i64 = LL_BUILDER.CreateBitCast(
214  is_group_by ? agg_col_ptr : agg_out_vec[slot_index],
215  llvm::PointerType::get(get_int_type(64, LL_CONTEXT), 0));
216  if (gpu_smem_context.isSharedMemoryUsed()) {
217  group_by_and_agg->emitCall(
218  "agg_count_shared", std::vector<llvm::Value*>{acc_i64, LL_INT(int64_t(1))});
219  } else {
220  LL_BUILDER.CreateAtomicRMW(llvm::AtomicRMWInst::Add,
221  acc_i64,
222  LL_INT(int64_t(1)),
223  llvm::AtomicOrdering::Monotonic);
224  }
225  } else {
226  auto acc_i32 = LL_BUILDER.CreateBitCast(
227  is_group_by ? agg_col_ptr : agg_out_vec[slot_index],
228  llvm::PointerType::get(get_int_type(32, LL_CONTEXT), 0));
229  if (gpu_smem_context.isSharedMemoryUsed()) {
230  acc_i32 = LL_BUILDER.CreatePointerCast(
231  acc_i32, llvm::Type::getInt32PtrTy(LL_CONTEXT, 3));
232  }
233  LL_BUILDER.CreateAtomicRMW(llvm::AtomicRMWInst::Add,
234  acc_i32,
235  LL_INT(1),
236  llvm::AtomicOrdering::Monotonic);
237  }
238  } else {
239  const auto acc_i32 = (is_group_by ? agg_col_ptr : agg_out_vec[slot_index]);
240  if (gpu_smem_context.isSharedMemoryUsed()) {
241  // Atomic operation on address space level 3 (Shared):
242  const auto shared_acc_i32 = LL_BUILDER.CreatePointerCast(
243  acc_i32, llvm::Type::getInt32PtrTy(LL_CONTEXT, 3));
244  LL_BUILDER.CreateAtomicRMW(llvm::AtomicRMWInst::Add,
245  shared_acc_i32,
246  LL_INT(1),
247  llvm::AtomicOrdering::Monotonic);
248  } else {
249  LL_BUILDER.CreateAtomicRMW(llvm::AtomicRMWInst::Add,
250  acc_i32,
251  LL_INT(1),
252  llvm::AtomicOrdering::Monotonic);
253  }
254  }
255  return;
256  }
257 
258  codegenAggregate(group_by_and_agg,
259  executor,
260  query_mem_desc,
261  co,
262  target_lvs,
263  agg_out_ptr_w_idx,
264  agg_out_vec,
265  output_buffer_byte_stream,
266  out_row_idx,
267  slot_index);
268 }
269 
271  GroupByAndAggregate* group_by_and_agg,
272  Executor* executor,
274  const CompilationOptions& co,
275  const std::vector<llvm::Value*>& target_lvs,
276  const std::tuple<llvm::Value*, llvm::Value*>& agg_out_ptr_w_idx,
277  const std::vector<llvm::Value*>& agg_out_vec,
278  llvm::Value* output_buffer_byte_stream,
279  llvm::Value* out_row_idx,
280  int32_t slot_index) const {
281  AUTOMATIC_IR_METADATA(executor->cgen_state_.get());
282  size_t target_lv_idx = 0;
283  const bool lazy_fetched{executor->plan_state_->isLazyFetchColumn(target_expr)};
284 
285  CodeGenerator code_generator(executor);
286 
287  const auto agg_fn_names = agg_fn_base_names(target_info);
288  auto arg_expr = agg_arg(target_expr);
289 
290  for (const auto& agg_base_name : agg_fn_names) {
291  if (target_info.is_distinct && arg_expr->get_type_info().is_array()) {
292  CHECK_EQ(static_cast<size_t>(query_mem_desc.getLogicalSlotWidthBytes(slot_index)),
293  sizeof(int64_t));
294  // TODO(miyu): check if buffer may be columnar here
295  CHECK(!query_mem_desc.didOutputColumnar());
296  const auto& elem_ti = arg_expr->get_type_info().get_elem_type();
297  uint32_t col_off{0};
298  if (is_group_by) {
299  const auto col_off_in_bytes = query_mem_desc.getColOnlyOffInBytes(slot_index);
300  CHECK_EQ(size_t(0), col_off_in_bytes % sizeof(int64_t));
301  col_off /= sizeof(int64_t);
302  }
303  executor->cgen_state_->emitExternalCall(
304  "agg_count_distinct_array_" + numeric_type_name(elem_ti),
305  llvm::Type::getVoidTy(LL_CONTEXT),
306  {is_group_by
307  ? LL_BUILDER.CreateGEP(std::get<0>(agg_out_ptr_w_idx), LL_INT(col_off))
308  : agg_out_vec[slot_index],
309  target_lvs[target_lv_idx],
310  code_generator.posArg(arg_expr),
311  elem_ti.is_fp()
312  ? static_cast<llvm::Value*>(executor->cgen_state_->inlineFpNull(elem_ti))
313  : static_cast<llvm::Value*>(
314  executor->cgen_state_->inlineIntNull(elem_ti))});
315  ++slot_index;
316  ++target_lv_idx;
317  continue;
318  }
319 
320  llvm::Value* agg_col_ptr{nullptr};
321  const auto chosen_bytes =
322  static_cast<size_t>(query_mem_desc.getPaddedSlotWidthBytes(slot_index));
323  const auto& chosen_type = get_compact_type(target_info);
324  const auto& arg_type =
325  ((arg_expr && arg_expr->get_type_info().get_type() != kNULLT) &&
329  const bool is_fp_arg =
330  !lazy_fetched && arg_type.get_type() != kNULLT && arg_type.is_fp();
331  if (is_group_by) {
332  agg_col_ptr = group_by_and_agg->codegenAggColumnPtr(output_buffer_byte_stream,
333  out_row_idx,
334  agg_out_ptr_w_idx,
335  query_mem_desc,
336  chosen_bytes,
337  slot_index,
338  target_idx);
339  CHECK(agg_col_ptr);
340  agg_col_ptr->setName("agg_col_ptr");
341  }
342 
343  const bool float_argument_input = takes_float_argument(target_info);
344  const bool is_count_in_avg = target_info.agg_kind == kAVG && target_lv_idx == 1;
345  // The count component of an average should never be compacted.
346  const auto agg_chosen_bytes =
347  float_argument_input && !is_count_in_avg ? sizeof(float) : chosen_bytes;
348  if (float_argument_input) {
349  CHECK_GE(chosen_bytes, sizeof(float));
350  }
351 
352  auto target_lv = target_lvs[target_lv_idx];
353  const auto needs_unnest_double_patch = group_by_and_agg->needsUnnestDoublePatch(
354  target_lv, agg_base_name, query_mem_desc.threadsShareMemory(), co);
355  const auto need_skip_null = !needs_unnest_double_patch && target_info.skip_null_val;
356  if (!needs_unnest_double_patch) {
357  if (need_skip_null && !is_agg_domain_range_equivalent(target_info.agg_kind)) {
358  target_lv = group_by_and_agg->convertNullIfAny(arg_type, target_info, target_lv);
359  } else if (is_fp_arg) {
360  target_lv = executor->castToFP(target_lv, arg_type, target_info.sql_type);
361  }
362  if (!dynamic_cast<const Analyzer::AggExpr*>(target_expr) || arg_expr) {
363  target_lv =
364  executor->cgen_state_->castToTypeIn(target_lv, (agg_chosen_bytes << 3));
365  }
366  }
367 
368  const bool is_simple_count_target = is_simple_count(target_info);
369  llvm::Value* str_target_lv{nullptr};
370  if (target_lvs.size() == 3 && !target_has_geo(target_info)) {
371  // none encoding string
372  str_target_lv = target_lvs.front();
373  }
374  std::vector<llvm::Value*> agg_args{
375  executor->castToIntPtrTyIn((is_group_by ? agg_col_ptr : agg_out_vec[slot_index]),
376  (agg_chosen_bytes << 3)),
377  (is_simple_count_target && !arg_expr)
378  ? (agg_chosen_bytes == sizeof(int32_t) ? LL_INT(int32_t(0))
379  : LL_INT(int64_t(0)))
380  : (is_simple_count_target && arg_expr && str_target_lv ? str_target_lv
381  : target_lv)};
382  if (query_mem_desc.isLogicalSizedColumnsAllowed()) {
383  if (is_simple_count_target && arg_expr && str_target_lv) {
384  agg_args[1] =
385  agg_chosen_bytes == sizeof(int32_t) ? LL_INT(int32_t(0)) : LL_INT(int64_t(0));
386  }
387  }
388  std::string agg_fname{agg_base_name};
389  if (is_fp_arg) {
390  if (!lazy_fetched) {
391  if (agg_chosen_bytes == sizeof(float)) {
392  CHECK_EQ(arg_type.get_type(), kFLOAT);
393  agg_fname += "_float";
394  } else {
395  CHECK_EQ(agg_chosen_bytes, sizeof(double));
396  agg_fname += "_double";
397  }
398  }
399  } else if (agg_chosen_bytes == sizeof(int32_t)) {
400  agg_fname += "_int32";
401  } else if (agg_chosen_bytes == sizeof(int16_t) &&
402  query_mem_desc.didOutputColumnar()) {
403  agg_fname += "_int16";
404  } else if (agg_chosen_bytes == sizeof(int8_t) && query_mem_desc.didOutputColumnar()) {
405  agg_fname += "_int8";
406  }
407 
409  CHECK_EQ(agg_chosen_bytes, sizeof(int64_t));
410  CHECK(!chosen_type.is_fp());
411  group_by_and_agg->codegenCountDistinct(
412  target_idx, target_expr, agg_args, query_mem_desc, co.device_type);
413  } else if (target_info.agg_kind == kAPPROX_MEDIAN) {
414  CHECK_EQ(agg_chosen_bytes, sizeof(int64_t));
415  group_by_and_agg->codegenApproxMedian(
416  target_idx, target_expr, agg_args, query_mem_desc, co.device_type);
417  } else {
418  const auto& arg_ti = target_info.agg_arg_type;
419  if (need_skip_null && !arg_ti.is_geometry()) {
420  agg_fname += "_skip_val";
421  }
422 
424  (need_skip_null && !arg_ti.is_geometry())) {
425  llvm::Value* null_in_lv{nullptr};
426  if (arg_ti.is_fp()) {
427  null_in_lv =
428  static_cast<llvm::Value*>(executor->cgen_state_->inlineFpNull(arg_ti));
429  } else {
430  null_in_lv = static_cast<llvm::Value*>(executor->cgen_state_->inlineIntNull(
432  ? arg_ti
433  : target_info.sql_type));
434  }
435  CHECK(null_in_lv);
436  auto null_lv =
437  executor->cgen_state_->castToTypeIn(null_in_lv, (agg_chosen_bytes << 3));
438  agg_args.push_back(null_lv);
439  }
440  if (!target_info.is_distinct) {
442  query_mem_desc.threadsShareMemory()) {
443  agg_fname += "_shared";
444  if (needs_unnest_double_patch) {
445  agg_fname = patch_agg_fname(agg_fname);
446  }
447  }
448  auto agg_fname_call_ret_lv = group_by_and_agg->emitCall(agg_fname, agg_args);
449 
450  if (agg_fname.find("checked") != std::string::npos) {
451  group_by_and_agg->checkErrorCode(agg_fname_call_ret_lv);
452  }
453  }
454  }
455  const auto window_func = dynamic_cast<const Analyzer::WindowFunction*>(target_expr);
456  if (window_func && window_function_requires_peer_handling(window_func)) {
457  const auto window_func_context =
459  const auto pending_outputs =
460  LL_INT(window_func_context->aggregateStatePendingOutputs());
461  executor->cgen_state_->emitExternalCall("add_window_pending_output",
462  llvm::Type::getVoidTy(LL_CONTEXT),
463  {agg_args.front(), pending_outputs});
464  const auto& window_func_ti = window_func->get_type_info();
465  std::string apply_window_pending_outputs_name = "apply_window_pending_outputs";
466  switch (window_func_ti.get_type()) {
467  case kFLOAT: {
468  apply_window_pending_outputs_name += "_float";
469  if (query_mem_desc.didOutputColumnar()) {
470  apply_window_pending_outputs_name += "_columnar";
471  }
472  break;
473  }
474  case kDOUBLE: {
475  apply_window_pending_outputs_name += "_double";
476  break;
477  }
478  default: {
479  apply_window_pending_outputs_name += "_int";
480  if (query_mem_desc.didOutputColumnar()) {
481  apply_window_pending_outputs_name +=
482  std::to_string(window_func_ti.get_size() * 8);
483  } else {
484  apply_window_pending_outputs_name += "64";
485  }
486  break;
487  }
488  }
489  const auto partition_end =
490  LL_INT(reinterpret_cast<int64_t>(window_func_context->partitionEnd()));
491  executor->cgen_state_->emitExternalCall(apply_window_pending_outputs_name,
492  llvm::Type::getVoidTy(LL_CONTEXT),
493  {pending_outputs,
494  target_lvs.front(),
495  partition_end,
496  code_generator.posArg(nullptr)});
497  }
498 
499  ++slot_index;
500  ++target_lv_idx;
501  }
502 }
503 
505  const Executor* executor,
506  const CompilationOptions& co) {
507  AUTOMATIC_IR_METADATA(executor->cgen_state_.get());
508  if (query_mem_desc.getPaddedSlotWidthBytes(slot_index_counter) == 0) {
509  CHECK(!dynamic_cast<const Analyzer::AggExpr*>(target_expr));
510  ++slot_index_counter;
511  ++target_index_counter;
512  return;
513  }
514  if (dynamic_cast<const Analyzer::UOper*>(target_expr) &&
515  static_cast<const Analyzer::UOper*>(target_expr)->get_optype() == kUNNEST) {
516  throw std::runtime_error("UNNEST not supported in the projection list yet.");
517  }
518  if ((executor->plan_state_->isLazyFetchColumn(target_expr) || !is_group_by) &&
519  (static_cast<size_t>(query_mem_desc.getPaddedSlotWidthBytes(slot_index_counter)) <
520  sizeof(int64_t)) &&
521  !is_columnar_projection(query_mem_desc)) {
522  // TODO(miyu): enable different byte width in the layout w/o padding
524  }
525 
526  auto target_info = get_target_info(target_expr, g_bigint_count);
527  auto arg_expr = agg_arg(target_expr);
528  if (arg_expr) {
531  target_info.skip_null_val = false;
532  } else if (query_mem_desc.getQueryDescriptionType() ==
534  !arg_expr->get_type_info().is_varlen()) {
535  // TODO: COUNT is currently not null-aware for varlen types. Need to add proper code
536  // generation for handling varlen nulls.
537  target_info.skip_null_val = true;
538  } else if (constrained_not_null(arg_expr, ra_exe_unit.quals)) {
539  target_info.skip_null_val = false;
540  }
541  }
542 
543  if (!(query_mem_desc.getQueryDescriptionType() ==
547  sample_exprs_to_codegen.emplace_back(target_expr,
548  target_info,
549  slot_index_counter,
550  target_index_counter++,
551  is_group_by);
552  } else {
553  target_exprs_to_codegen.emplace_back(target_expr,
554  target_info,
555  slot_index_counter,
556  target_index_counter++,
557  is_group_by);
558  }
559 
560  const auto agg_fn_names = agg_fn_base_names(target_info);
561  slot_index_counter += agg_fn_names.size();
562 }
563 
564 namespace {
565 
567  const QueryMemoryDescriptor& query_mem_desc) {
568  const bool is_group_by{query_mem_desc.isGroupBy()};
569  if (target_info.agg_kind == kSAMPLE && target_info.sql_type.is_string() &&
570  target_info.sql_type.get_compression() != kENCODING_NONE) {
571  return get_agg_initial_val(target_info.agg_kind,
572  target_info.sql_type,
573  is_group_by,
574  query_mem_desc.getCompactByteWidth());
575  }
576  return 0;
577 }
578 
579 } // namespace
580 
582  GroupByAndAggregate* group_by_and_agg,
583  Executor* executor,
584  const QueryMemoryDescriptor& query_mem_desc,
585  const CompilationOptions& co,
586  const GpuSharedMemoryContext& gpu_smem_context,
587  const std::tuple<llvm::Value*, llvm::Value*>& agg_out_ptr_w_idx,
588  const std::vector<llvm::Value*>& agg_out_vec,
589  llvm::Value* output_buffer_byte_stream,
590  llvm::Value* out_row_idx,
591  DiamondCodegen& diamond_codegen) const {
592  CHECK(group_by_and_agg);
593  CHECK(executor);
594  AUTOMATIC_IR_METADATA(executor->cgen_state_.get());
595 
596  for (const auto& target_expr_codegen : target_exprs_to_codegen) {
597  target_expr_codegen.codegen(group_by_and_agg,
598  executor,
599  query_mem_desc,
600  co,
601  gpu_smem_context,
602  agg_out_ptr_w_idx,
603  agg_out_vec,
604  output_buffer_byte_stream,
605  out_row_idx,
606  diamond_codegen);
607  }
608  if (!sample_exprs_to_codegen.empty()) {
609  codegenSampleExpressions(group_by_and_agg,
610  executor,
611  query_mem_desc,
612  co,
613  agg_out_ptr_w_idx,
614  agg_out_vec,
615  output_buffer_byte_stream,
616  out_row_idx,
617  diamond_codegen);
618  }
619 }
620 
622  GroupByAndAggregate* group_by_and_agg,
623  Executor* executor,
624  const QueryMemoryDescriptor& query_mem_desc,
625  const CompilationOptions& co,
626  const std::tuple<llvm::Value*, llvm::Value*>& agg_out_ptr_w_idx,
627  const std::vector<llvm::Value*>& agg_out_vec,
628  llvm::Value* output_buffer_byte_stream,
629  llvm::Value* out_row_idx,
630  DiamondCodegen& diamond_codegen) const {
631  AUTOMATIC_IR_METADATA(executor->cgen_state_.get());
632  CHECK(!sample_exprs_to_codegen.empty());
634  if (sample_exprs_to_codegen.size() == 1 &&
635  !sample_exprs_to_codegen.front().target_info.sql_type.is_varlen()) {
636  codegenSingleSlotSampleExpression(group_by_and_agg,
637  executor,
638  query_mem_desc,
639  co,
640  agg_out_ptr_w_idx,
641  agg_out_vec,
642  output_buffer_byte_stream,
643  out_row_idx,
644  diamond_codegen);
645  } else {
646  codegenMultiSlotSampleExpressions(group_by_and_agg,
647  executor,
648  query_mem_desc,
649  co,
650  agg_out_ptr_w_idx,
651  agg_out_vec,
652  output_buffer_byte_stream,
653  out_row_idx,
654  diamond_codegen);
655  }
656 }
657 
659  GroupByAndAggregate* group_by_and_agg,
660  Executor* executor,
661  const QueryMemoryDescriptor& query_mem_desc,
662  const CompilationOptions& co,
663  const std::tuple<llvm::Value*, llvm::Value*>& agg_out_ptr_w_idx,
664  const std::vector<llvm::Value*>& agg_out_vec,
665  llvm::Value* output_buffer_byte_stream,
666  llvm::Value* out_row_idx,
667  DiamondCodegen& diamond_codegen) const {
668  AUTOMATIC_IR_METADATA(executor->cgen_state_.get());
669  CHECK_EQ(size_t(1), sample_exprs_to_codegen.size());
670  CHECK(!sample_exprs_to_codegen.front().target_info.sql_type.is_varlen());
672  // no need for the atomic if we only have one SAMPLE target
673  sample_exprs_to_codegen.front().codegen(group_by_and_agg,
674  executor,
675  query_mem_desc,
676  co,
677  {},
678  agg_out_ptr_w_idx,
679  agg_out_vec,
680  output_buffer_byte_stream,
681  out_row_idx,
682  diamond_codegen);
683 }
684 
686  GroupByAndAggregate* group_by_and_agg,
687  Executor* executor,
688  const QueryMemoryDescriptor& query_mem_desc,
689  const CompilationOptions& co,
690  const std::tuple<llvm::Value*, llvm::Value*>& agg_out_ptr_w_idx,
691  const std::vector<llvm::Value*>& agg_out_vec,
692  llvm::Value* output_buffer_byte_stream,
693  llvm::Value* out_row_idx,
694  DiamondCodegen& diamond_codegen) const {
695  AUTOMATIC_IR_METADATA(executor->cgen_state_.get());
696  CHECK(sample_exprs_to_codegen.size() > 1 ||
697  sample_exprs_to_codegen.front().target_info.sql_type.is_varlen());
699  const auto& first_sample_expr = sample_exprs_to_codegen.front();
700  auto target_lvs = group_by_and_agg->codegenAggArg(first_sample_expr.target_expr, co);
701  CHECK_GE(target_lvs.size(), size_t(1));
702 
703  const auto init_val =
704  get_initial_agg_val(first_sample_expr.target_info, query_mem_desc);
705 
706  llvm::Value* agg_col_ptr{nullptr};
707  if (is_group_by) {
708  const auto agg_column_size_bytes =
709  query_mem_desc.isLogicalSizedColumnsAllowed() &&
710  !first_sample_expr.target_info.sql_type.is_varlen()
711  ? first_sample_expr.target_info.sql_type.get_size()
712  : sizeof(int64_t);
713  agg_col_ptr = group_by_and_agg->codegenAggColumnPtr(output_buffer_byte_stream,
714  out_row_idx,
715  agg_out_ptr_w_idx,
716  query_mem_desc,
717  agg_column_size_bytes,
718  first_sample_expr.base_slot_index,
719  first_sample_expr.target_idx);
720  } else {
721  CHECK_LT(static_cast<size_t>(first_sample_expr.base_slot_index), agg_out_vec.size());
722  agg_col_ptr =
723  executor->castToIntPtrTyIn(agg_out_vec[first_sample_expr.base_slot_index], 64);
724  }
725 
726  auto sample_cas_lv = codegenSlotEmptyKey(agg_col_ptr, target_lvs, executor, init_val);
727 
728  DiamondCodegen sample_cfg(
729  sample_cas_lv, executor, false, "sample_valcheck", &diamond_codegen, false);
730 
731  for (const auto& target_expr_codegen : sample_exprs_to_codegen) {
732  target_expr_codegen.codegen(group_by_and_agg,
733  executor,
734  query_mem_desc,
735  co,
736  {},
737  agg_out_ptr_w_idx,
738  agg_out_vec,
739  output_buffer_byte_stream,
740  out_row_idx,
741  diamond_codegen,
742  &sample_cfg);
743  }
744 }
745 
747  llvm::Value* agg_col_ptr,
748  std::vector<llvm::Value*>& target_lvs,
749  Executor* executor,
750  const int64_t init_val) const {
751  AUTOMATIC_IR_METADATA(executor->cgen_state_.get());
752  const auto& first_sample_expr = sample_exprs_to_codegen.front();
753  const auto first_sample_slot_bytes =
754  first_sample_expr.target_info.sql_type.is_varlen()
755  ? sizeof(int64_t)
756  : first_sample_expr.target_info.sql_type.get_size();
757  llvm::Value* target_lv_casted{nullptr};
758  // deciding whether proper casting is required for the first sample's slot:
759  if (first_sample_expr.target_info.sql_type.is_varlen()) {
760  target_lv_casted =
761  LL_BUILDER.CreatePtrToInt(target_lvs.front(), llvm::Type::getInt64Ty(LL_CONTEXT));
762  } else if (first_sample_expr.target_info.sql_type.is_fp()) {
763  // Initialization value for SAMPLE on a float column should be 0
764  CHECK_EQ(init_val, 0);
765  if (query_mem_desc.isLogicalSizedColumnsAllowed()) {
766  target_lv_casted = executor->cgen_state_->ir_builder_.CreateFPToSI(
767  target_lvs.front(),
768  first_sample_slot_bytes == sizeof(float) ? llvm::Type::getInt32Ty(LL_CONTEXT)
769  : llvm::Type::getInt64Ty(LL_CONTEXT));
770  } else {
771  target_lv_casted = executor->cgen_state_->ir_builder_.CreateFPToSI(
772  target_lvs.front(), llvm::Type::getInt64Ty(LL_CONTEXT));
773  }
774  } else if (first_sample_slot_bytes != sizeof(int64_t) &&
775  !query_mem_desc.isLogicalSizedColumnsAllowed()) {
776  target_lv_casted =
777  executor->cgen_state_->ir_builder_.CreateCast(llvm::Instruction::CastOps::SExt,
778  target_lvs.front(),
779  llvm::Type::getInt64Ty(LL_CONTEXT));
780  } else {
781  target_lv_casted = target_lvs.front();
782  }
783 
784  std::string slot_empty_cas_func_name("slotEmptyKeyCAS");
785  llvm::Value* init_val_lv{LL_INT(init_val)};
786  if (query_mem_desc.isLogicalSizedColumnsAllowed() &&
787  !first_sample_expr.target_info.sql_type.is_varlen()) {
788  // add proper suffix to the function name:
789  switch (first_sample_slot_bytes) {
790  case 1:
791  slot_empty_cas_func_name += "_int8";
792  break;
793  case 2:
794  slot_empty_cas_func_name += "_int16";
795  break;
796  case 4:
797  slot_empty_cas_func_name += "_int32";
798  break;
799  case 8:
800  break;
801  default:
802  UNREACHABLE() << "Invalid slot size for slotEmptyKeyCAS function.";
803  break;
804  }
805  if (first_sample_slot_bytes != sizeof(int64_t)) {
806  init_val_lv = llvm::ConstantInt::get(
807  get_int_type(first_sample_slot_bytes * 8, LL_CONTEXT), init_val);
808  }
809  }
810 
811  auto sample_cas_lv = executor->cgen_state_->emitExternalCall(
812  slot_empty_cas_func_name,
813  llvm::Type::getInt1Ty(executor->cgen_state_->context_),
814  {agg_col_ptr, target_lv_casted, init_val_lv});
815  return sample_cas_lv;
816 }
#define LL_BUILDER
const Analyzer::Expr * agg_arg(const Analyzer::Expr *expr)
#define CHECK_EQ(x, y)
Definition: Logger.h:211
bool target_has_geo(const TargetInfo &target_info)
bool constrained_not_null(const Analyzer::Expr *expr, const std::list< std::shared_ptr< Analyzer::Expr >> &quals)
void codegenAggregate(GroupByAndAggregate *group_by_and_agg, Executor *executor, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const std::vector< llvm::Value * > &target_lvs, const std::tuple< llvm::Value *, llvm::Value * > &agg_out_ptr_w_idx, const std::vector< llvm::Value * > &agg_out_vec, llvm::Value *output_buffer_byte_stream, llvm::Value *out_row_idx, int32_t slot_index) const
std::vector< std::string > agg_fn_base_names(const TargetInfo &target_info)
llvm::Value * codegenAggColumnPtr(llvm::Value *output_buffer_byte_stream, llvm::Value *out_row_idx, const std::tuple< llvm::Value *, llvm::Value * > &agg_out_ptr_w_idx, const QueryMemoryDescriptor &query_mem_desc, const size_t chosen_bytes, const size_t agg_out_off, const size_t target_idx)
: returns the pointer to where the aggregation should be stored.
bool isLogicalSizedColumnsAllowed() const
SQLTypeInfo sql_type
Definition: TargetInfo.h:42
void codegenApproxMedian(const size_t target_idx, const Analyzer::Expr *target_expr, std::vector< llvm::Value * > &agg_args, const QueryMemoryDescriptor &query_mem_desc, const ExecutorDeviceType device_type)
TargetInfo get_target_info(const PointerType target_expr, const bool bigint_count)
Definition: TargetInfo.h:79
void codegenMultiSlotSampleExpressions(GroupByAndAggregate *group_by_and_agg, Executor *executor, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const std::tuple< llvm::Value *, llvm::Value * > &agg_out_ptr_w_idx, const std::vector< llvm::Value * > &agg_out_vec, llvm::Value *output_buffer_byte_stream, llvm::Value *out_row_idx, DiamondCodegen &diamond_codegen) const
llvm::Value * posArg(const Analyzer::Expr *) const
Definition: ColumnIR.cpp:512
#define UNREACHABLE()
Definition: Logger.h:247
#define CHECK_GE(x, y)
Definition: Logger.h:216
llvm::Value * emitCall(const std::string &fname, const std::vector< llvm::Value * > &args)
int64_t get_agg_initial_val(const SQLAgg agg, const SQLTypeInfo &ti, const bool enable_compaction, const unsigned min_byte_width_to_compact)
void checkErrorCode(llvm::Value *retCode)
bool takes_float_argument(const TargetInfo &target_info)
Definition: TargetInfo.h:134
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:314
bool needsUnnestDoublePatch(llvm::Value const *val_ptr, const std::string &agg_base_name, const bool threads_share_memory, const CompilationOptions &co) const
bool skip_null_val
Definition: TargetInfo.h:44
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
static WindowFunctionContext * getActiveWindowFunctionContext(Executor *executor)
std::string to_string(char const *&&v)
SQLTypeInfo agg_arg_type
Definition: TargetInfo.h:43
llvm::Value * codegenSlotEmptyKey(llvm::Value *agg_col_ptr, std::vector< llvm::Value * > &target_lvs, Executor *executor, const int64_t init_val) const
std::string patch_agg_fname(const std::string &agg_name)
Helpers for codegen of target expressions.
size_t getColOnlyOffInBytes(const size_t col_idx) const
Definition: sqldefs.h:73
const SQLTypeInfo get_compact_type(const TargetInfo &target)
bool is_agg
Definition: TargetInfo.h:40
void codegen(GroupByAndAggregate *group_by_and_agg, Executor *executor, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const GpuSharedMemoryContext &gpu_smem_context, const std::tuple< llvm::Value *, llvm::Value * > &agg_out_ptr_w_idx, const std::vector< llvm::Value * > &agg_out_vec, llvm::Value *output_buffer_byte_stream, llvm::Value *out_row_idx, DiamondCodegen &diamond_codegen) const
#define LL_INT(v)
llvm::Value * convertNullIfAny(const SQLTypeInfo &arg_type, const TargetInfo &agg_info, llvm::Value *target)
void codegenSampleExpressions(GroupByAndAggregate *group_by_and_agg, Executor *executor, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const std::tuple< llvm::Value *, llvm::Value * > &agg_out_ptr_w_idx, const std::vector< llvm::Value * > &agg_out_vec, llvm::Value *output_buffer_byte_stream, llvm::Value *out_row_idx, DiamondCodegen &diamond_codegen) const
bool g_bigint_count
Definition: sqldefs.h:75
#define LL_CONTEXT
bool is_distinct_target(const TargetInfo &target_info)
Definition: TargetInfo.h:130
void codegenCountDistinct(const size_t target_idx, const Analyzer::Expr *target_expr, std::vector< llvm::Value * > &agg_args, const QueryMemoryDescriptor &, const ExecutorDeviceType)
const int8_t getPaddedSlotWidthBytes(const size_t slot_idx) const
#define AUTOMATIC_IR_METADATA(CGENSTATE)
SQLAgg agg_kind
Definition: TargetInfo.h:41
void codegen(GroupByAndAggregate *group_by_and_agg, Executor *executor, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const GpuSharedMemoryContext &gpu_smem_context, const std::tuple< llvm::Value *, llvm::Value * > &agg_out_ptr_w_idx, const std::vector< llvm::Value * > &agg_out_vec, llvm::Value *output_buffer_byte_stream, llvm::Value *out_row_idx, DiamondCodegen &diamond_codegen, DiamondCodegen *sample_cfg=nullptr) const
QueryDescriptionType getQueryDescriptionType() const
ExecutorDeviceType device_type
bool window_function_is_aggregate(const SqlWindowFunctionKind kind)
Definition: WindowContext.h:42
#define CHECK_LT(x, y)
Definition: Logger.h:213
HOST DEVICE EncodingType get_compression() const
Definition: sqltypes.h:322
const Analyzer::Expr * target_expr
Definition: sqldefs.h:76
std::vector< llvm::Value * > codegenAggArg(const Analyzer::Expr *target_expr, const CompilationOptions &co)
llvm::Value * codegenWindowRowPointer(const Analyzer::WindowFunction *window_func, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, DiamondCodegen &diamond_codegen)
bool window_function_requires_peer_handling(const Analyzer::WindowFunction *window_func)
bool is_simple_count(const TargetInfo &target_info)
#define CHECK(condition)
Definition: Logger.h:203
bool is_geometry() const
Definition: sqltypes.h:501
static void resetWindowFunctionContext(Executor *executor)
int64_t get_initial_agg_val(const TargetInfo &target_info, const QueryMemoryDescriptor &query_mem_desc)
std::string numeric_type_name(const SQLTypeInfo &ti)
Definition: Execute.h:210
bool is_string() const
Definition: sqltypes.h:489
bool is_distinct
Definition: TargetInfo.h:45
void operator()(const Analyzer::Expr *target_expr, const Executor *executor, const CompilationOptions &co)
Definition: sqldefs.h:74
const int8_t getLogicalSlotWidthBytes(const size_t slot_idx) const
int get_physical_coord_cols() const
Definition: sqltypes.h:350
Definition: sqldefs.h:72
size_t getColOffInBytes(const size_t col_idx) const
bool is_columnar_projection(const QueryMemoryDescriptor &query_mem_desc)
void codegenSingleSlotSampleExpression(GroupByAndAggregate *group_by_and_agg, Executor *executor, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const std::tuple< llvm::Value *, llvm::Value * > &agg_out_ptr_w_idx, const std::vector< llvm::Value * > &agg_out_vec, llvm::Value *output_buffer_byte_stream, llvm::Value *out_row_idx, DiamondCodegen &diamond_codegen) const
bool is_agg_domain_range_equivalent(const SQLAgg &agg_kind)
Definition: TargetInfo.h:65