OmniSciDB  addbbd5075
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
ResultSetReductionJIT.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2019 OmniSci, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "ResultSetReductionJIT.h"
20 
21 #include "CodeGenerator.h"
22 #include "DynamicWatchdog.h"
23 #include "Execute.h"
24 #include "IRCodegenUtils.h"
26 
27 #include "Shared/likely.h"
28 #include "Shared/mapdpath.h"
29 
30 #include <llvm/Bitcode/BitcodeReader.h>
31 #include <llvm/IR/Function.h>
32 #include <llvm/IR/IRBuilder.h>
33 #include <llvm/IR/Verifier.h>
34 #include <llvm/Support/SourceMgr.h>
35 #include <llvm/Support/raw_os_ostream.h>
36 
37 extern std::unique_ptr<llvm::Module> g_rt_module;
38 
40 
42 
43 namespace {
44 
45 // Error code to be returned when the watchdog timer triggers during the reduction.
46 const int32_t WATCHDOG_ERROR{-1};
47 // Use the interpreter, not the JIT, for a number of entries lower than the threshold.
48 const size_t INTERP_THRESHOLD{25};
49 
50 // Load the value stored at 'ptr' interpreted as 'ptr_type'.
51 Value* emit_load(Value* ptr, Type ptr_type, Function* function) {
52  return function->add<Load>(
53  function->add<Cast>(Cast::CastOp::BitCast, ptr, ptr_type, ""),
54  ptr->label() + "_loaded");
55 }
56 
57 // Load the value stored at 'ptr' as a 32-bit signed integer.
58 Value* emit_load_i32(Value* ptr, Function* function) {
59  return emit_load(ptr, Type::Int32Ptr, function);
60 }
61 
62 // Load the value stored at 'ptr' as a 64-bit signed integer.
63 Value* emit_load_i64(Value* ptr, Function* function) {
64  return emit_load(ptr, Type::Int64Ptr, function);
65 }
66 
67 // Read a 32- or 64-bit integer stored at 'ptr' and sign extend to 64-bit.
68 Value* emit_read_int_from_buff(Value* ptr, const int8_t compact_sz, Function* function) {
69  switch (compact_sz) {
70  case 8: {
71  return emit_load_i64(ptr, function);
72  }
73  case 4: {
74  const auto loaded_val = emit_load_i32(ptr, function);
75  return function->add<Cast>(Cast::CastOp::SExt, loaded_val, Type::Int64, "");
76  }
77  default: {
78  LOG(FATAL) << "Invalid byte width: " << compact_sz;
79  return nullptr;
80  }
81  }
82 }
83 
84 // Emit a runtime call to accumulate into the 'val_ptr' byte address the 'other_ptr'
85 // value when the type is specified as not null.
86 void emit_aggregate_one_value(const std::string& agg_kind,
87  Value* val_ptr,
88  Value* other_ptr,
89  const size_t chosen_bytes,
90  const TargetInfo& agg_info,
91  Function* ir_reduce_one_entry) {
92  const auto sql_type = get_compact_type(agg_info);
93  const auto dest_name = agg_kind + "_dest";
94  if (sql_type.is_fp()) {
95  if (chosen_bytes == sizeof(float)) {
96  const auto agg = ir_reduce_one_entry->add<Cast>(
97  Cast::CastOp::BitCast, val_ptr, Type::Int32Ptr, dest_name);
98  const auto val = emit_load(other_ptr, Type::FloatPtr, ir_reduce_one_entry);
99  ir_reduce_one_entry->add<Call>(
100  "agg_" + agg_kind + "_float", std::vector<const Value*>{agg, val}, "");
101  } else {
102  CHECK_EQ(chosen_bytes, sizeof(double));
103  const auto agg = ir_reduce_one_entry->add<Cast>(
104  Cast::CastOp::BitCast, val_ptr, Type::Int64Ptr, dest_name);
105  const auto val = emit_load(other_ptr, Type::DoublePtr, ir_reduce_one_entry);
106  ir_reduce_one_entry->add<Call>(
107  "agg_" + agg_kind + "_double", std::vector<const Value*>{agg, val}, "");
108  }
109  } else {
110  if (chosen_bytes == sizeof(int32_t)) {
111  const auto agg = ir_reduce_one_entry->add<Cast>(
112  Cast::CastOp::BitCast, val_ptr, Type::Int32Ptr, dest_name);
113  const auto val = emit_load(other_ptr, Type::Int32Ptr, ir_reduce_one_entry);
114  ir_reduce_one_entry->add<Call>(
115  "agg_" + agg_kind + "_int32", std::vector<const Value*>{agg, val}, "");
116  } else {
117  CHECK_EQ(chosen_bytes, sizeof(int64_t));
118  const auto agg = ir_reduce_one_entry->add<Cast>(
119  Cast::CastOp::BitCast, val_ptr, Type::Int64Ptr, dest_name);
120  const auto val = emit_load(other_ptr, Type::Int64Ptr, ir_reduce_one_entry);
121  ir_reduce_one_entry->add<Call>(
122  "agg_" + agg_kind, std::vector<const Value*>{agg, val}, "");
123  }
124  }
125 }
126 
127 // Same as above, but support nullable types as well.
128 void emit_aggregate_one_nullable_value(const std::string& agg_kind,
129  Value* val_ptr,
130  Value* other_ptr,
131  const int64_t init_val,
132  const size_t chosen_bytes,
133  const TargetInfo& agg_info,
134  Function* ir_reduce_one_entry) {
135  const auto dest_name = agg_kind + "_dest";
136  if (agg_info.skip_null_val) {
137  const auto sql_type = get_compact_type(agg_info);
138  if (sql_type.is_fp()) {
139  if (chosen_bytes == sizeof(float)) {
140  const auto agg = ir_reduce_one_entry->add<Cast>(
141  Cast::CastOp::BitCast, val_ptr, Type::Int32Ptr, dest_name);
142  const auto val = emit_load(other_ptr, Type::FloatPtr, ir_reduce_one_entry);
143  const auto init_val_lv = ir_reduce_one_entry->addConstant<ConstantFP>(
144  *reinterpret_cast<const float*>(may_alias_ptr(&init_val)), Type::Float);
145  ir_reduce_one_entry->add<Call>("agg_" + agg_kind + "_float_skip_val",
146  std::vector<const Value*>{agg, val, init_val_lv},
147  "");
148  } else {
149  CHECK_EQ(chosen_bytes, sizeof(double));
150  const auto agg = ir_reduce_one_entry->add<Cast>(
151  Cast::CastOp::BitCast, val_ptr, Type::Int64Ptr, dest_name);
152  const auto val = emit_load(other_ptr, Type::DoublePtr, ir_reduce_one_entry);
153  const auto init_val_lv = ir_reduce_one_entry->addConstant<ConstantFP>(
154  *reinterpret_cast<const double*>(may_alias_ptr(&init_val)), Type::Double);
155  ir_reduce_one_entry->add<Call>("agg_" + agg_kind + "_double_skip_val",
156  std::vector<const Value*>{agg, val, init_val_lv},
157  "");
158  }
159  } else {
160  if (chosen_bytes == sizeof(int32_t)) {
161  const auto agg = ir_reduce_one_entry->add<Cast>(
162  Cast::CastOp::BitCast, val_ptr, Type::Int32Ptr, dest_name);
163  const auto val = emit_load(other_ptr, Type::Int32Ptr, ir_reduce_one_entry);
164  const auto init_val_lv =
165  ir_reduce_one_entry->addConstant<ConstantInt>(init_val, Type::Int32);
166  ir_reduce_one_entry->add<Call>("agg_" + agg_kind + "_int32_skip_val",
167  std::vector<const Value*>{agg, val, init_val_lv},
168  "");
169  } else {
170  CHECK_EQ(chosen_bytes, sizeof(int64_t));
171  const auto agg = ir_reduce_one_entry->add<Cast>(
172  Cast::CastOp::BitCast, val_ptr, Type::Int64Ptr, dest_name);
173  const auto val = emit_load(other_ptr, Type::Int64Ptr, ir_reduce_one_entry);
174  const auto init_val_lv =
175  ir_reduce_one_entry->addConstant<ConstantInt>(init_val, Type::Int64);
176  ir_reduce_one_entry->add<Call>("agg_" + agg_kind + "_skip_val",
177  std::vector<const Value*>{agg, val, init_val_lv},
178  "");
179  }
180  }
181  } else {
183  agg_kind, val_ptr, other_ptr, chosen_bytes, agg_info, ir_reduce_one_entry);
184  }
185 }
186 
187 // Emit code to accumulate the 'other_ptr' count into the 'val_ptr' destination.
189  Value* other_ptr,
190  const size_t chosen_bytes,
191  Function* ir_reduce_one_entry) {
192  const auto dest_name = "count_dest";
193  if (chosen_bytes == sizeof(int32_t)) {
194  const auto agg = ir_reduce_one_entry->add<Cast>(
195  Cast::CastOp::BitCast, val_ptr, Type::Int32Ptr, dest_name);
196  const auto val = emit_load(other_ptr, Type::Int32Ptr, ir_reduce_one_entry);
197  ir_reduce_one_entry->add<Call>(
198  "agg_sum_int32", std::vector<const Value*>{agg, val}, "");
199  } else {
200  CHECK_EQ(chosen_bytes, sizeof(int64_t));
201  const auto agg = ir_reduce_one_entry->add<Cast>(
202  Cast::CastOp::BitCast, val_ptr, Type::Int64Ptr, dest_name);
203  const auto val = emit_load(other_ptr, Type::Int64Ptr, ir_reduce_one_entry);
204  ir_reduce_one_entry->add<Call>("agg_sum", std::vector<const Value*>{agg, val}, "");
205  }
206 }
207 
208 // Emit code to load the value stored at the 'other_pi8' as an integer of the given width
209 // 'chosen_bytes' and write it to the 'slot_pi8' destination only if necessary (the
210 // existing value at destination is the initialization value).
212  Value* other_pi8,
213  const int64_t init_val,
214  const size_t chosen_bytes,
215  Function* ir_reduce_one_entry) {
216  const auto func_name = "write_projection_int" + std::to_string(chosen_bytes * 8);
217  if (chosen_bytes == sizeof(int32_t)) {
218  const auto proj_val = emit_load_i32(other_pi8, ir_reduce_one_entry);
219  ir_reduce_one_entry->add<Call>(
220  func_name,
221  std::vector<const Value*>{
222  slot_pi8,
223  proj_val,
224  ir_reduce_one_entry->addConstant<ConstantInt>(init_val, Type::Int64)},
225  "");
226  } else {
227  CHECK_EQ(chosen_bytes, sizeof(int64_t));
228  const auto proj_val = emit_load_i64(other_pi8, ir_reduce_one_entry);
229  ir_reduce_one_entry->add<Call>(
230  func_name,
231  std::vector<const Value*>{
232  slot_pi8,
233  proj_val,
234  ir_reduce_one_entry->addConstant<ConstantInt>(init_val, Type::Int64)},
235  "");
236  }
237 }
238 
239 // Emit code to load the value stored at the 'other_pi8' as an integer of the given width
240 // 'chosen_bytes' and write it to the 'slot_pi8' destination only if necessary (the
241 // existing value at destination is the initialization value).
243  Value* other_pi8,
244  const int64_t init_val,
245  const size_t chosen_bytes,
246  Function* ir_reduce_one_entry) {
247  if (chosen_bytes == sizeof(int32_t)) {
248  const auto func_name = "checked_single_agg_id_int32";
249  const auto proj_val = emit_load_i32(other_pi8, ir_reduce_one_entry);
250  const auto slot_pi32 = ir_reduce_one_entry->add<Cast>(
251  Cast::CastOp::BitCast, slot_pi8, Type::Int32Ptr, "");
252  return ir_reduce_one_entry->add<Call>(
253  func_name,
254  Type::Int32,
255  std::vector<const Value*>{
256  slot_pi32,
257  proj_val,
258  ir_reduce_one_entry->addConstant<ConstantInt>(init_val, Type::Int32)},
259  "");
260  } else {
261  const auto func_name = "checked_single_agg_id";
262  CHECK_EQ(chosen_bytes, sizeof(int64_t));
263  const auto proj_val = emit_load_i64(other_pi8, ir_reduce_one_entry);
264  const auto slot_pi64 = ir_reduce_one_entry->add<Cast>(
265  Cast::CastOp::BitCast, slot_pi8, Type::Int64Ptr, "");
266 
267  return ir_reduce_one_entry->add<Call>(
268  func_name,
269  Type::Int32,
270  std::vector<const Value*>{
271  slot_pi64,
272  proj_val,
273  ir_reduce_one_entry->addConstant<ConstantInt>(init_val, Type::Int64)},
274  "");
275  }
276 }
277 
278 std::unique_ptr<Function> create_function(
279  const std::string name,
280  const std::vector<Function::NamedArg>& arg_types,
281  const Type ret_type,
282  const bool always_inline) {
283  return std::make_unique<Function>(name, arg_types, ret_type, always_inline);
284 }
285 
286 // Create the declaration for the 'is_empty_entry' function. Use private linkage since
287 // it's a helper only called from the generated code and mark it as always inline.
288 std::unique_ptr<Function> setup_is_empty_entry(ReductionCode* reduction_code) {
289  return create_function(
290  "is_empty_entry", {{"row_ptr", Type::Int8Ptr}}, Type::Int1, /*always_inline=*/true);
291 }
292 
293 // Create the declaration for the 'reduce_one_entry' helper.
294 std::unique_ptr<Function> setup_reduce_one_entry(ReductionCode* reduction_code,
295  const QueryDescriptionType hash_type) {
296  std::string this_ptr_name;
297  std::string that_ptr_name;
298  switch (hash_type) {
300  this_ptr_name = "this_targets_ptr";
301  that_ptr_name = "that_targets_ptr";
302  break;
303  }
306  this_ptr_name = "this_row_ptr";
307  that_ptr_name = "that_row_ptr";
308  break;
309  }
310  default: {
311  LOG(FATAL) << "Unexpected query description type";
312  }
313  }
314  return create_function("reduce_one_entry",
315  {{this_ptr_name, Type::Int8Ptr},
316  {that_ptr_name, Type::Int8Ptr},
317  {"this_qmd", Type::VoidPtr},
318  {"that_qmd", Type::VoidPtr},
319  {"serialized_varlen_buffer_arg", Type::VoidPtr}},
320  Type::Int32,
321  /*always_inline=*/true);
322 }
323 
324 // Create the declaration for the 'reduce_one_entry_idx' helper.
325 std::unique_ptr<Function> setup_reduce_one_entry_idx(ReductionCode* reduction_code) {
326  return create_function("reduce_one_entry_idx",
327  {{"this_buff", Type::Int8Ptr},
328  {"that_buff", Type::Int8Ptr},
329  {"that_entry_idx", Type::Int32},
330  {"that_entry_count", Type::Int32},
331  {"this_qmd_handle", Type::VoidPtr},
332  {"that_qmd_handle", Type::VoidPtr},
333  {"serialized_varlen_buffer", Type::VoidPtr}},
334  Type::Int32,
335  /*always_inline=*/true);
336 }
337 
338 // Create the declaration for the 'reduce_loop' entry point. Use external linkage, this is
339 // the public API of the generated code directly used from result set reduction.
340 std::unique_ptr<Function> setup_reduce_loop(ReductionCode* reduction_code) {
341  return create_function("reduce_loop",
342  {{"this_buff", Type::Int8Ptr},
343  {"that_buff", Type::Int8Ptr},
344  {"start_index", Type::Int32},
345  {"end_index", Type::Int32},
346  {"that_entry_count", Type::Int32},
347  {"this_qmd_handle", Type::VoidPtr},
348  {"that_qmd_handle", Type::VoidPtr},
349  {"serialized_varlen_buffer", Type::VoidPtr}},
350  Type::Int32,
351  /*always_inline=*/false);
352 }
353 
354 llvm::Function* create_llvm_function(const Function* function,
355  const CgenState* cgen_state) {
356  auto& ctx = cgen_state->context_;
357  std::vector<llvm::Type*> parameter_types;
358  const auto& arg_types = function->arg_types();
359  for (const auto& named_arg : arg_types) {
360  CHECK(named_arg.type != Type::Void);
361  parameter_types.push_back(llvm_type(named_arg.type, ctx));
362  }
363  const auto func_type = llvm::FunctionType::get(
364  llvm_type(function->ret_type(), ctx), parameter_types, false);
365  const auto linkage = function->always_inline() ? llvm::Function::PrivateLinkage
366  : llvm::Function::ExternalLinkage;
367  auto func =
368  llvm::Function::Create(func_type, linkage, function->name(), cgen_state->module_);
369  const auto arg_it = func->arg_begin();
370  for (size_t i = 0; i < arg_types.size(); ++i) {
371  const auto arg = &*(arg_it + i);
372  arg->setName(arg_types[i].name);
373  }
374  if (function->always_inline()) {
376  }
377  return func;
378 }
379 
380 // Setup the reduction function and helpers declarations, create a module and a code
381 // generation state object.
383  ReductionCode reduction_code{};
384  reduction_code.ir_is_empty = setup_is_empty_entry(&reduction_code);
385  reduction_code.ir_reduce_one_entry = setup_reduce_one_entry(&reduction_code, hash_type);
386  reduction_code.ir_reduce_one_entry_idx = setup_reduce_one_entry_idx(&reduction_code);
387  reduction_code.ir_reduce_loop = setup_reduce_loop(&reduction_code);
388  return reduction_code;
389 }
390 
392  return hash_type == QueryDescriptionType::GroupByBaselineHash ||
395 }
396 
397 // Variable length sample fast path (no serialized variable length buffer).
398 void varlen_buffer_sample(int8_t* this_ptr1,
399  int8_t* this_ptr2,
400  const int8_t* that_ptr1,
401  const int8_t* that_ptr2,
402  const int64_t init_val) {
403  const auto rhs_proj_col = *reinterpret_cast<const int64_t*>(that_ptr1);
404  if (rhs_proj_col != init_val) {
405  *reinterpret_cast<int64_t*>(this_ptr1) = rhs_proj_col;
406  }
407  CHECK(this_ptr2 && that_ptr2);
408  *reinterpret_cast<int64_t*>(this_ptr2) = *reinterpret_cast<const int64_t*>(that_ptr2);
409 }
410 
411 } // namespace
412 
414  const void* serialized_varlen_buffer_handle,
415  int8_t* this_ptr1,
416  int8_t* this_ptr2,
417  const int8_t* that_ptr1,
418  const int8_t* that_ptr2,
419  const int64_t init_val,
420  const int64_t length_to_elems) {
421  if (!serialized_varlen_buffer_handle) {
422  varlen_buffer_sample(this_ptr1, this_ptr2, that_ptr1, that_ptr2, init_val);
423  return;
424  }
425  const auto& serialized_varlen_buffer =
426  *reinterpret_cast<const std::vector<std::string>*>(serialized_varlen_buffer_handle);
427  if (!serialized_varlen_buffer.empty()) {
428  const auto rhs_proj_col = *reinterpret_cast<const int64_t*>(that_ptr1);
429  CHECK_LT(static_cast<size_t>(rhs_proj_col), serialized_varlen_buffer.size());
430  const auto& varlen_bytes_str = serialized_varlen_buffer[rhs_proj_col];
431  const auto str_ptr = reinterpret_cast<const int8_t*>(varlen_bytes_str.c_str());
432  *reinterpret_cast<int64_t*>(this_ptr1) = reinterpret_cast<const int64_t>(str_ptr);
433  *reinterpret_cast<int64_t*>(this_ptr2) =
434  static_cast<int64_t>(varlen_bytes_str.size() / length_to_elems);
435  } else {
436  varlen_buffer_sample(this_ptr1, this_ptr2, that_ptr1, that_ptr2, init_val);
437  }
438 }
439 
440 // Wrappers to be called from the generated code, sharing implementation with the rest of
441 // the system.
442 
443 extern "C" void count_distinct_set_union_jit_rt(const int64_t new_set_handle,
444  const int64_t old_set_handle,
445  const void* that_qmd_handle,
446  const void* this_qmd_handle,
447  const int64_t target_logical_idx) {
448  const auto that_qmd = reinterpret_cast<const QueryMemoryDescriptor*>(that_qmd_handle);
449  const auto this_qmd = reinterpret_cast<const QueryMemoryDescriptor*>(this_qmd_handle);
450  const auto& new_count_distinct_desc =
451  that_qmd->getCountDistinctDescriptor(target_logical_idx);
452  const auto& old_count_distinct_desc =
453  this_qmd->getCountDistinctDescriptor(target_logical_idx);
454  CHECK(old_count_distinct_desc.impl_type_ != CountDistinctImplType::Invalid);
455  CHECK(old_count_distinct_desc.impl_type_ == new_count_distinct_desc.impl_type_);
457  new_set_handle, old_set_handle, new_count_distinct_desc, old_count_distinct_desc);
458 }
459 
461  const int8_t* key,
462  const uint32_t key_count,
463  const void* this_qmd_handle,
464  const int8_t* that_buff,
465  const uint32_t that_entry_idx,
466  const uint32_t that_entry_count,
467  const uint32_t row_size_bytes,
468  int64_t** buff_out,
469  uint8_t* empty) {
470  const auto& this_qmd = *reinterpret_cast<const QueryMemoryDescriptor*>(this_qmd_handle);
471  const auto gvi = get_group_value_reduction(reinterpret_cast<int64_t*>(groups_buffer),
472  this_qmd.getEntryCount(),
473  reinterpret_cast<const int64_t*>(key),
474  key_count,
475  this_qmd.getEffectiveKeyWidth(),
476  this_qmd,
477  reinterpret_cast<const int64_t*>(that_buff),
478  that_entry_idx,
479  that_entry_count,
480  row_size_bytes >> 3);
481  *buff_out = gvi.first;
482  *empty = gvi.second;
483 }
484 
485 extern "C" uint8_t check_watchdog_rt(const size_t sample_seed) {
486  if (UNLIKELY(g_enable_dynamic_watchdog && (sample_seed & 0x3F) == 0 &&
487  dynamic_watchdog())) {
488  return true;
489  }
490  return false;
491 }
492 
494  const std::vector<TargetInfo>& targets,
495  const std::vector<int64_t>& target_init_vals)
496  : query_mem_desc_(query_mem_desc)
497  , targets_(targets)
498  , target_init_vals_(target_init_vals) {}
499 
500 // The code generated for a reduction between two result set buffers is structured in
501 // several functions and their IR is stored in the 'ReductionCode' structure. At a high
502 // level, the pseudocode is:
503 //
504 // func is_empty_func(row_ptr):
505 // ...
506 //
507 // func reduce_func_baseline(this_ptr, that_ptr):
508 // if is_empty_func(that_ptr):
509 // return
510 // for each target in the row:
511 // reduce target from that_ptr into this_ptr
512 //
513 // func reduce_func_perfect_hash(this_ptr, that_ptr):
514 // if is_empty_func(that_ptr):
515 // return
516 // for each target in the row:
517 // reduce target from that_ptr into this_ptr
518 //
519 // func reduce_func_idx(this_buff, that_buff, that_entry_index):
520 // that_ptr = that_result_set[that_entry_index]
521 // # Retrieval of 'this_ptr' is different between perfect hash and baseline.
522 // this_ptr = this_result_set[that_entry_index]
523 // or
524 // get_row(key(that_row_ptr), this_result_set_buffer)
525 // reduce_func_[baseline|perfect_hash](this_ptr, that_ptr)
526 //
527 // func reduce_loop(this_buff, that_buff, start_entry_index, end_entry_index):
528 // for that_entry_index in [start_entry_index, end_entry_index):
529 // reduce_func_idx(this_buff, that_buff, that_entry_index)
530 
532  const auto hash_type = query_mem_desc_.getQueryDescriptionType();
534  return {};
535  }
536  auto reduction_code = setup_functions_ir(hash_type);
537  isEmpty(reduction_code);
541  reduceOneEntryNoCollisions(reduction_code);
542  reduceOneEntryNoCollisionsIdx(reduction_code);
543  break;
544  }
546  reduceOneEntryBaseline(reduction_code);
547  reduceOneEntryBaselineIdx(reduction_code);
548  break;
549  }
550  default: {
551  LOG(FATAL) << "Unexpected query description type";
552  }
553  }
554  reduceLoop(reduction_code);
555  // For small result sets, avoid native code generation and use the interpreter instead.
558  return reduction_code;
559  }
560  std::lock_guard<std::mutex> reduction_guard(ReductionCode::s_reduction_mutex);
561  CodeCacheKey key{cacheKey()};
562  const auto val_ptr = s_code_cache.get(key);
563  if (val_ptr) {
564  return {reinterpret_cast<ReductionCode::FuncPtr>(std::get<0>(val_ptr->first.front())),
565  nullptr,
566  nullptr,
567  nullptr,
568  std::move(reduction_code.ir_is_empty),
569  std::move(reduction_code.ir_reduce_one_entry),
570  std::move(reduction_code.ir_reduce_one_entry_idx),
571  std::move(reduction_code.ir_reduce_loop)};
572  }
573  reduction_code.cgen_state.reset(new CgenState({}, false));
574  auto cgen_state = reduction_code.cgen_state.get();
575  std::unique_ptr<llvm::Module> module(runtime_module_shallow_copy(cgen_state));
576  cgen_state->module_ = module.get();
577  auto ir_is_empty = create_llvm_function(reduction_code.ir_is_empty.get(), cgen_state);
578  auto ir_reduce_one_entry =
579  create_llvm_function(reduction_code.ir_reduce_one_entry.get(), cgen_state);
580  auto ir_reduce_one_entry_idx =
581  create_llvm_function(reduction_code.ir_reduce_one_entry_idx.get(), cgen_state);
582  auto ir_reduce_loop =
583  create_llvm_function(reduction_code.ir_reduce_loop.get(), cgen_state);
584  std::unordered_map<const Function*, llvm::Function*> f;
585  f.emplace(reduction_code.ir_is_empty.get(), ir_is_empty);
586  f.emplace(reduction_code.ir_reduce_one_entry.get(), ir_reduce_one_entry);
587  f.emplace(reduction_code.ir_reduce_one_entry_idx.get(), ir_reduce_one_entry_idx);
588  f.emplace(reduction_code.ir_reduce_loop.get(), ir_reduce_loop);
589  translate_function(reduction_code.ir_is_empty.get(), ir_is_empty, reduction_code, f);
591  reduction_code.ir_reduce_one_entry.get(), ir_reduce_one_entry, reduction_code, f);
592  translate_function(reduction_code.ir_reduce_one_entry_idx.get(),
593  ir_reduce_one_entry_idx,
594  reduction_code,
595  f);
597  reduction_code.ir_reduce_loop.get(), ir_reduce_loop, reduction_code, f);
598  reduction_code.llvm_reduce_loop = ir_reduce_loop;
599  reduction_code.module = std::move(module);
600  return finalizeReductionCode(std::move(reduction_code),
601  ir_is_empty,
602  ir_reduce_one_entry,
603  ir_reduce_one_entry_idx,
604  key);
605 }
606 
608  // Clear stub cache to avoid crash caused by non-deterministic static destructor order
609  // of LLVM context and the cache.
612  g_rt_module = nullptr;
613 }
614 
615 void ResultSetReductionJIT::isEmpty(const ReductionCode& reduction_code) const {
616  auto ir_is_empty = reduction_code.ir_is_empty.get();
619  Value* key{nullptr};
620  Value* empty_key_val{nullptr};
621  const auto keys_ptr = ir_is_empty->arg(0);
626  CHECK_LT(static_cast<size_t>(query_mem_desc_.getTargetIdxForKey()),
627  target_init_vals_.size());
628  const int64_t target_slot_off =
630  const auto slot_ptr = ir_is_empty->add<GetElementPtr>(
631  keys_ptr,
632  ir_is_empty->addConstant<ConstantInt>(target_slot_off, Type::Int32),
633  "is_empty_slot_ptr");
634  const auto compact_sz =
636  key = emit_read_int_from_buff(slot_ptr, compact_sz, ir_is_empty);
637  empty_key_val = ir_is_empty->addConstant<ConstantInt>(
639  } else {
641  case 4: {
644  key = emit_load_i32(keys_ptr, ir_is_empty);
645  empty_key_val = ir_is_empty->addConstant<ConstantInt>(EMPTY_KEY_32, Type::Int32);
646  break;
647  }
648  case 8: {
649  key = emit_load_i64(keys_ptr, ir_is_empty);
650  empty_key_val = ir_is_empty->addConstant<ConstantInt>(EMPTY_KEY_64, Type::Int64);
651  break;
652  }
653  default:
654  LOG(FATAL) << "Invalid key width";
655  }
656  }
657  const auto ret =
658  ir_is_empty->add<ICmp>(ICmp::Predicate::EQ, key, empty_key_val, "is_key_empty");
659  ir_is_empty->add<Ret>(ret);
660 }
661 
663  const ReductionCode& reduction_code) const {
664  auto ir_reduce_one_entry = reduction_code.ir_reduce_one_entry.get();
665  const auto this_row_ptr = ir_reduce_one_entry->arg(0);
666  const auto that_row_ptr = ir_reduce_one_entry->arg(1);
667  const auto that_is_empty =
668  ir_reduce_one_entry->add<Call>(reduction_code.ir_is_empty.get(),
669  std::vector<const Value*>{that_row_ptr},
670  "that_is_empty");
671  ir_reduce_one_entry->add<ReturnEarly>(
672  that_is_empty, ir_reduce_one_entry->addConstant<ConstantInt>(0, Type::Int32), "");
673 
674  const auto key_bytes = get_key_bytes_rowwise(query_mem_desc_);
675  if (key_bytes) { // copy the key from right hand side
676  ir_reduce_one_entry->add<MemCpy>(
677  this_row_ptr,
678  that_row_ptr,
679  ir_reduce_one_entry->addConstant<ConstantInt>(key_bytes, Type::Int32));
680  }
681 
682  const auto key_bytes_with_padding = align_to_int64(key_bytes);
683  const auto key_bytes_lv =
684  ir_reduce_one_entry->addConstant<ConstantInt>(key_bytes_with_padding, Type::Int32);
685  const auto this_targets_start_ptr = ir_reduce_one_entry->add<GetElementPtr>(
686  this_row_ptr, key_bytes_lv, "this_targets_start");
687  const auto that_targets_start_ptr = ir_reduce_one_entry->add<GetElementPtr>(
688  that_row_ptr, key_bytes_lv, "that_targets_start");
689 
691  ir_reduce_one_entry, this_targets_start_ptr, that_targets_start_ptr);
692 }
693 
695  Function* ir_reduce_one_entry,
696  Value* this_targets_start_ptr,
697  Value* that_targets_start_ptr) const {
698  const auto& col_slot_context = query_mem_desc_.getColSlotContext();
699  Value* this_targets_ptr = this_targets_start_ptr;
700  Value* that_targets_ptr = that_targets_start_ptr;
701  size_t init_agg_val_idx = 0;
702  for (size_t target_logical_idx = 0; target_logical_idx < targets_.size();
703  ++target_logical_idx) {
704  const auto& target_info = targets_[target_logical_idx];
705  const auto& slots_for_col = col_slot_context.getSlotsForCol(target_logical_idx);
706  Value* this_ptr2{nullptr};
707  Value* that_ptr2{nullptr};
708 
709  bool two_slot_target{false};
710  if (target_info.is_agg &&
711  (target_info.agg_kind == kAVG ||
712  (target_info.agg_kind == kSAMPLE && target_info.sql_type.is_varlen()))) {
713  // Note that this assumes if one of the slot pairs in a given target is an array,
714  // all slot pairs are arrays. Currently this is true for all geo targets, but we
715  // should better codify and store this information in the future
716  two_slot_target = true;
717  }
718 
719  for (size_t target_slot_idx = slots_for_col.front();
720  target_slot_idx < slots_for_col.back() + 1;
721  target_slot_idx += 2) {
722  const auto slot_off_val = query_mem_desc_.getPaddedSlotWidthBytes(target_slot_idx);
723  const auto slot_off =
724  ir_reduce_one_entry->addConstant<ConstantInt>(slot_off_val, Type::Int32);
725  if (UNLIKELY(two_slot_target)) {
726  const auto desc = "target_" + std::to_string(target_logical_idx) + "_second_slot";
727  this_ptr2 = ir_reduce_one_entry->add<GetElementPtr>(
728  this_targets_ptr, slot_off, "this_" + desc);
729  that_ptr2 = ir_reduce_one_entry->add<GetElementPtr>(
730  that_targets_ptr, slot_off, "that_" + desc);
731  }
732  reduceOneSlot(this_targets_ptr,
733  this_ptr2,
734  that_targets_ptr,
735  that_ptr2,
736  target_info,
737  target_logical_idx,
738  target_slot_idx,
739  init_agg_val_idx,
740  slots_for_col.front(),
741  ir_reduce_one_entry);
742  auto increment_agg_val_idx_maybe =
743  [&init_agg_val_idx, &target_logical_idx, this](const int slot_count) {
745  query_mem_desc_.getTargetGroupbyIndex(target_logical_idx) < 0) {
746  init_agg_val_idx += slot_count;
747  }
748  };
749  if (target_logical_idx + 1 == targets_.size() &&
750  target_slot_idx + 1 >= slots_for_col.back()) {
751  break;
752  }
753  const auto next_desc =
754  "target_" + std::to_string(target_logical_idx + 1) + "_first_slot";
755  if (UNLIKELY(two_slot_target)) {
756  increment_agg_val_idx_maybe(2);
757  const auto two_slot_off = ir_reduce_one_entry->addConstant<ConstantInt>(
758  slot_off_val + query_mem_desc_.getPaddedSlotWidthBytes(target_slot_idx + 1),
759  Type::Int32);
760  this_targets_ptr = ir_reduce_one_entry->add<GetElementPtr>(
761  this_targets_ptr, two_slot_off, "this_" + next_desc);
762  that_targets_ptr = ir_reduce_one_entry->add<GetElementPtr>(
763  that_targets_ptr, two_slot_off, "that_" + next_desc);
764  } else {
765  increment_agg_val_idx_maybe(1);
766  this_targets_ptr = ir_reduce_one_entry->add<GetElementPtr>(
767  this_targets_ptr, slot_off, "this_" + next_desc);
768  that_targets_ptr = ir_reduce_one_entry->add<GetElementPtr>(
769  that_targets_ptr, slot_off, "that_" + next_desc);
770  }
771  }
772  }
773  ir_reduce_one_entry->add<Ret>(
774  ir_reduce_one_entry->addConstant<ConstantInt>(0, Type::Int32));
775 }
776 
778  const ReductionCode& reduction_code) const {
779  auto ir_reduce_one_entry = reduction_code.ir_reduce_one_entry.get();
780  const auto this_targets_ptr_arg = ir_reduce_one_entry->arg(0);
781  const auto that_targets_ptr_arg = ir_reduce_one_entry->arg(1);
782  Value* this_ptr1 = this_targets_ptr_arg;
783  Value* that_ptr1 = that_targets_ptr_arg;
784  size_t j = 0;
785  size_t init_agg_val_idx = 0;
786  for (size_t target_logical_idx = 0; target_logical_idx < targets_.size();
787  ++target_logical_idx) {
788  const auto& target_info = targets_[target_logical_idx];
789  Value* this_ptr2{nullptr};
790  Value* that_ptr2{nullptr};
791  if (target_info.is_agg &&
792  (target_info.agg_kind == kAVG ||
793  (target_info.agg_kind == kSAMPLE && target_info.sql_type.is_varlen()))) {
794  const auto desc = "target_" + std::to_string(target_logical_idx) + "_second_slot";
795  const auto second_slot_rel_off =
796  ir_reduce_one_entry->addConstant<ConstantInt>(sizeof(int64_t), Type::Int32);
797  this_ptr2 = ir_reduce_one_entry->add<GetElementPtr>(
798  this_ptr1, second_slot_rel_off, "this_" + desc);
799  that_ptr2 = ir_reduce_one_entry->add<GetElementPtr>(
800  that_ptr1, second_slot_rel_off, "that_" + desc);
801  }
802  reduceOneSlot(this_ptr1,
803  this_ptr2,
804  that_ptr1,
805  that_ptr2,
806  target_info,
807  target_logical_idx,
808  j,
809  init_agg_val_idx,
810  j,
811  ir_reduce_one_entry);
812  if (target_logical_idx + 1 == targets_.size()) {
813  break;
814  }
816  init_agg_val_idx = advance_slot(init_agg_val_idx, target_info, false);
817  } else {
818  if (query_mem_desc_.getTargetGroupbyIndex(target_logical_idx) < 0) {
819  init_agg_val_idx = advance_slot(init_agg_val_idx, target_info, false);
820  }
821  }
822  j = advance_slot(j, target_info, false);
823  const auto next_desc =
824  "target_" + std::to_string(target_logical_idx + 1) + "_first_slot";
825  auto next_slot_rel_off = ir_reduce_one_entry->addConstant<ConstantInt>(
826  init_agg_val_idx * sizeof(int64_t), Type::Int32);
827  this_ptr1 = ir_reduce_one_entry->add<GetElementPtr>(
828  this_targets_ptr_arg, next_slot_rel_off, next_desc);
829  that_ptr1 = ir_reduce_one_entry->add<GetElementPtr>(
830  that_targets_ptr_arg, next_slot_rel_off, next_desc);
831  }
832  ir_reduce_one_entry->add<Ret>(
833  ir_reduce_one_entry->addConstant<ConstantInt>(0, Type::Int32));
834 }
835 
837  const ReductionCode& reduction_code) const {
838  auto ir_reduce_one_entry_idx = reduction_code.ir_reduce_one_entry_idx.get();
843  const auto this_buff = ir_reduce_one_entry_idx->arg(0);
844  const auto that_buff = ir_reduce_one_entry_idx->arg(1);
845  const auto entry_idx = ir_reduce_one_entry_idx->arg(2);
846  const auto this_qmd_handle = ir_reduce_one_entry_idx->arg(4);
847  const auto that_qmd_handle = ir_reduce_one_entry_idx->arg(5);
848  const auto serialized_varlen_buffer_arg = ir_reduce_one_entry_idx->arg(6);
849  const auto row_bytes = ir_reduce_one_entry_idx->addConstant<ConstantInt>(
851  const auto row_off_in_bytes = ir_reduce_one_entry_idx->add<BinaryOperator>(
852  BinaryOperator::BinaryOp::Mul, entry_idx, row_bytes, "row_off_in_bytes");
853  const auto this_row_ptr = ir_reduce_one_entry_idx->add<GetElementPtr>(
854  this_buff, row_off_in_bytes, "this_row_ptr");
855  const auto that_row_ptr = ir_reduce_one_entry_idx->add<GetElementPtr>(
856  that_buff, row_off_in_bytes, "that_row_ptr");
857  const auto reduce_rc = ir_reduce_one_entry_idx->add<Call>(
858  reduction_code.ir_reduce_one_entry.get(),
859  std::vector<const Value*>{this_row_ptr,
860  that_row_ptr,
861  this_qmd_handle,
862  that_qmd_handle,
863  serialized_varlen_buffer_arg},
864  "");
865  ir_reduce_one_entry_idx->add<Ret>(reduce_rc);
866 }
867 
869  const ReductionCode& reduction_code) const {
870  auto ir_reduce_one_entry_idx = reduction_code.ir_reduce_one_entry_idx.get();
875  const auto this_buff = ir_reduce_one_entry_idx->arg(0);
876  const auto that_buff = ir_reduce_one_entry_idx->arg(1);
877  const auto that_entry_idx = ir_reduce_one_entry_idx->arg(2);
878  const auto that_entry_count = ir_reduce_one_entry_idx->arg(3);
879  const auto this_qmd_handle = ir_reduce_one_entry_idx->arg(4);
880  const auto that_qmd_handle = ir_reduce_one_entry_idx->arg(5);
881  const auto serialized_varlen_buffer_arg = ir_reduce_one_entry_idx->arg(6);
882  const auto row_bytes = ir_reduce_one_entry_idx->addConstant<ConstantInt>(
884  const auto that_row_off_in_bytes = ir_reduce_one_entry_idx->add<BinaryOperator>(
885  BinaryOperator::BinaryOp::Mul, that_entry_idx, row_bytes, "that_row_off_in_bytes");
886  const auto that_row_ptr = ir_reduce_one_entry_idx->add<GetElementPtr>(
887  that_buff, that_row_off_in_bytes, "that_row_ptr");
888  const auto that_is_empty =
889  ir_reduce_one_entry_idx->add<Call>(reduction_code.ir_is_empty.get(),
890  std::vector<const Value*>{that_row_ptr},
891  "that_is_empty");
892  ir_reduce_one_entry_idx->add<ReturnEarly>(
893  that_is_empty,
894  ir_reduce_one_entry_idx->addConstant<ConstantInt>(0, Type::Int32),
895  "");
896  const auto key_count = query_mem_desc_.getGroupbyColCount();
897  const auto one_element =
898  ir_reduce_one_entry_idx->addConstant<ConstantInt>(1, Type::Int32);
899  const auto this_targets_ptr_i64_ptr = ir_reduce_one_entry_idx->add<Alloca>(
900  Type::Int64Ptr, one_element, "this_targets_ptr_out");
901  const auto this_is_empty_ptr =
902  ir_reduce_one_entry_idx->add<Alloca>(Type::Int8, one_element, "this_is_empty_out");
903  ir_reduce_one_entry_idx->add<ExternalCall>(
904  "get_group_value_reduction_rt",
905  Type::Void,
906  std::vector<const Value*>{
907  this_buff,
908  that_row_ptr,
909  ir_reduce_one_entry_idx->addConstant<ConstantInt>(key_count, Type::Int32),
910  this_qmd_handle,
911  that_buff,
912  that_entry_idx,
913  that_entry_count,
914  row_bytes,
915  this_targets_ptr_i64_ptr,
916  this_is_empty_ptr},
917  "");
918  const auto this_targets_ptr_i64 = ir_reduce_one_entry_idx->add<Load>(
919  this_targets_ptr_i64_ptr, "this_targets_ptr_i64");
920  auto this_is_empty =
921  ir_reduce_one_entry_idx->add<Load>(this_is_empty_ptr, "this_is_empty");
922  this_is_empty = ir_reduce_one_entry_idx->add<Cast>(
923  Cast::CastOp::Trunc, this_is_empty, Type::Int1, "this_is_empty_bool");
924  ir_reduce_one_entry_idx->add<ReturnEarly>(
925  this_is_empty,
926  ir_reduce_one_entry_idx->addConstant<ConstantInt>(0, Type::Int32),
927  "");
929  const auto this_targets_ptr = ir_reduce_one_entry_idx->add<Cast>(
930  Cast::CastOp::BitCast, this_targets_ptr_i64, Type::Int8Ptr, "this_targets_ptr");
931  const auto key_byte_count = key_qw_count * sizeof(int64_t);
932  const auto key_byte_count_lv =
933  ir_reduce_one_entry_idx->addConstant<ConstantInt>(key_byte_count, Type::Int32);
934  const auto that_targets_ptr = ir_reduce_one_entry_idx->add<GetElementPtr>(
935  that_row_ptr, key_byte_count_lv, "that_targets_ptr");
936  const auto reduce_rc = ir_reduce_one_entry_idx->add<Call>(
937  reduction_code.ir_reduce_one_entry.get(),
938  std::vector<const Value*>{this_targets_ptr,
939  that_targets_ptr,
940  this_qmd_handle,
941  that_qmd_handle,
942  serialized_varlen_buffer_arg},
943  "");
944  ir_reduce_one_entry_idx->add<Ret>(reduce_rc);
945 }
946 
947 namespace {
948 
949 void generate_loop_body(For* for_loop,
950  Function* ir_reduce_loop,
951  Function* ir_reduce_one_entry_idx,
952  Value* this_buff,
953  Value* that_buff,
954  Value* start_index,
955  Value* that_entry_count,
956  Value* this_qmd_handle,
957  Value* that_qmd_handle,
958  Value* serialized_varlen_buffer) {
959  const auto that_entry_idx = for_loop->add<BinaryOperator>(
960  BinaryOperator::BinaryOp::Add, for_loop->iter(), start_index, "that_entry_idx");
961  const auto watchdog_sample_seed =
962  for_loop->add<Cast>(Cast::CastOp::SExt, that_entry_idx, Type::Int64, "");
963  const auto watchdog_triggered =
964  for_loop->add<ExternalCall>("check_watchdog_rt",
965  Type::Int8,
966  std::vector<const Value*>{watchdog_sample_seed},
967  "");
968  const auto watchdog_triggered_bool =
969  for_loop->add<ICmp>(ICmp::Predicate::NE,
970  watchdog_triggered,
971  ir_reduce_loop->addConstant<ConstantInt>(0, Type::Int8),
972  "");
973  for_loop->add<ReturnEarly>(
974  watchdog_triggered_bool,
975  ir_reduce_loop->addConstant<ConstantInt>(WATCHDOG_ERROR, Type::Int32),
976  "");
977  const auto reduce_rc =
978  for_loop->add<Call>(ir_reduce_one_entry_idx,
979  std::vector<const Value*>{this_buff,
980  that_buff,
981  that_entry_idx,
982  that_entry_count,
983  this_qmd_handle,
984  that_qmd_handle,
985  serialized_varlen_buffer},
986  "");
987 
988  auto reduce_rc_bool =
989  for_loop->add<ICmp>(ICmp::Predicate::NE,
990  reduce_rc,
991  ir_reduce_loop->addConstant<ConstantInt>(0, Type::Int32),
992  "");
993  for_loop->add<ReturnEarly>(reduce_rc_bool, reduce_rc, "");
994 }
995 
996 } // namespace
997 
998 void ResultSetReductionJIT::reduceLoop(const ReductionCode& reduction_code) const {
999  auto ir_reduce_loop = reduction_code.ir_reduce_loop.get();
1000  const auto this_buff_arg = ir_reduce_loop->arg(0);
1001  const auto that_buff_arg = ir_reduce_loop->arg(1);
1002  const auto start_index_arg = ir_reduce_loop->arg(2);
1003  const auto end_index_arg = ir_reduce_loop->arg(3);
1004  const auto that_entry_count_arg = ir_reduce_loop->arg(4);
1005  const auto this_qmd_handle_arg = ir_reduce_loop->arg(5);
1006  const auto that_qmd_handle_arg = ir_reduce_loop->arg(6);
1007  const auto serialized_varlen_buffer_arg = ir_reduce_loop->arg(7);
1008  For* for_loop =
1009  static_cast<For*>(ir_reduce_loop->add<For>(start_index_arg, end_index_arg, ""));
1010  generate_loop_body(for_loop,
1011  ir_reduce_loop,
1012  reduction_code.ir_reduce_one_entry_idx.get(),
1013  this_buff_arg,
1014  that_buff_arg,
1015  start_index_arg,
1016  that_entry_count_arg,
1017  this_qmd_handle_arg,
1018  that_qmd_handle_arg,
1019  serialized_varlen_buffer_arg);
1020  ir_reduce_loop->add<Ret>(ir_reduce_loop->addConstant<ConstantInt>(0, Type::Int32));
1021 }
1022 
1024  Value* this_ptr2,
1025  Value* that_ptr1,
1026  Value* that_ptr2,
1027  const TargetInfo& target_info,
1028  const size_t target_logical_idx,
1029  const size_t target_slot_idx,
1030  const size_t init_agg_val_idx,
1031  const size_t first_slot_idx_for_target,
1032  Function* ir_reduce_one_entry) const {
1034  if (query_mem_desc_.getTargetGroupbyIndex(target_logical_idx) >= 0) {
1035  return;
1036  }
1037  }
1038  const bool float_argument_input = takes_float_argument(target_info);
1039  const auto chosen_bytes =
1040  get_width_for_slot(target_slot_idx, float_argument_input, query_mem_desc_);
1041  CHECK_LT(init_agg_val_idx, target_init_vals_.size());
1042  auto init_val = target_init_vals_[init_agg_val_idx];
1043  if (target_info.is_agg &&
1044  (target_info.agg_kind != kSINGLE_VALUE && target_info.agg_kind != kSAMPLE)) {
1045  reduceOneAggregateSlot(this_ptr1,
1046  this_ptr2,
1047  that_ptr1,
1048  that_ptr2,
1049  target_info,
1050  target_logical_idx,
1051  target_slot_idx,
1052  init_val,
1053  chosen_bytes,
1054  ir_reduce_one_entry);
1055  } else if (target_info.agg_kind == kSINGLE_VALUE) {
1056  const auto checked_rc = emit_checked_write_projection(
1057  this_ptr1, that_ptr1, init_val, chosen_bytes, ir_reduce_one_entry);
1058 
1059  auto checked_rc_bool = ir_reduce_one_entry->add<ICmp>(
1061  checked_rc,
1062  ir_reduce_one_entry->addConstant<ConstantInt>(0, Type::Int32),
1063  "");
1064 
1065  ir_reduce_one_entry->add<ReturnEarly>(checked_rc_bool, checked_rc, "");
1066 
1067  } else {
1069  this_ptr1, that_ptr1, init_val, chosen_bytes, ir_reduce_one_entry);
1070  if (target_info.agg_kind == kSAMPLE && target_info.sql_type.is_varlen()) {
1071  CHECK(this_ptr2 && that_ptr2);
1072  size_t length_to_elems{0};
1073  if (target_info.sql_type.is_geometry()) {
1074  // TODO: Assumes hard-coded sizes for geometry targets
1075  length_to_elems = target_slot_idx == first_slot_idx_for_target ? 1 : 4;
1076  } else {
1077  const auto& elem_ti = target_info.sql_type.get_elem_type();
1078  length_to_elems = target_info.sql_type.is_string() ? 1 : elem_ti.get_size();
1079  }
1080  const auto serialized_varlen_buffer_arg = ir_reduce_one_entry->arg(4);
1081  ir_reduce_one_entry->add<ExternalCall>(
1082  "serialized_varlen_buffer_sample",
1083  Type::Void,
1084  std::vector<const Value*>{
1085  serialized_varlen_buffer_arg,
1086  this_ptr1,
1087  this_ptr2,
1088  that_ptr1,
1089  that_ptr2,
1090  ir_reduce_one_entry->addConstant<ConstantInt>(init_val, Type::Int64),
1091  ir_reduce_one_entry->addConstant<ConstantInt>(length_to_elems,
1092  Type::Int64)},
1093  "");
1094  }
1095  }
1096 }
1097 
1099  Value* this_ptr2,
1100  Value* that_ptr1,
1101  Value* that_ptr2,
1102  const TargetInfo& target_info,
1103  const size_t target_logical_idx,
1104  const size_t target_slot_idx,
1105  const int64_t init_val,
1106  const int8_t chosen_bytes,
1107  Function* ir_reduce_one_entry) const {
1108  switch (target_info.agg_kind) {
1109  case kCOUNT:
1110  case kAPPROX_COUNT_DISTINCT: {
1111  if (is_distinct_target(target_info)) {
1112  CHECK_EQ(static_cast<size_t>(chosen_bytes), sizeof(int64_t));
1114  this_ptr1, that_ptr1, target_logical_idx, ir_reduce_one_entry);
1115  break;
1116  }
1117  CHECK_EQ(int64_t(0), init_val);
1118  emit_aggregate_one_count(this_ptr1, that_ptr1, chosen_bytes, ir_reduce_one_entry);
1119  break;
1120  }
1121  case kAVG: {
1122  // Ignore float argument compaction for count component for fear of its overflow
1123  emit_aggregate_one_count(this_ptr2,
1124  that_ptr2,
1125  query_mem_desc_.getPaddedSlotWidthBytes(target_slot_idx),
1126  ir_reduce_one_entry);
1127  }
1128  // fall thru
1129  case kSUM: {
1131  this_ptr1,
1132  that_ptr1,
1133  init_val,
1134  chosen_bytes,
1135  target_info,
1136  ir_reduce_one_entry);
1137  break;
1138  }
1139  case kMIN: {
1141  this_ptr1,
1142  that_ptr1,
1143  init_val,
1144  chosen_bytes,
1145  target_info,
1146  ir_reduce_one_entry);
1147  break;
1148  }
1149  case kMAX: {
1151  this_ptr1,
1152  that_ptr1,
1153  init_val,
1154  chosen_bytes,
1155  target_info,
1156  ir_reduce_one_entry);
1157  break;
1158  }
1159  default:
1160  LOG(FATAL) << "Invalid aggregate type";
1161  }
1162 }
1163 
1165  Value* this_ptr1,
1166  Value* that_ptr1,
1167  const size_t target_logical_idx,
1168  Function* ir_reduce_one_entry) const {
1170  const auto old_set_handle = emit_load_i64(this_ptr1, ir_reduce_one_entry);
1171  const auto new_set_handle = emit_load_i64(that_ptr1, ir_reduce_one_entry);
1172  const auto this_qmd_arg = ir_reduce_one_entry->arg(2);
1173  const auto that_qmd_arg = ir_reduce_one_entry->arg(3);
1174  ir_reduce_one_entry->add<ExternalCall>(
1175  "count_distinct_set_union_jit_rt",
1176  Type::Void,
1177  std::vector<const Value*>{
1178  new_set_handle,
1179  old_set_handle,
1180  that_qmd_arg,
1181  this_qmd_arg,
1182  ir_reduce_one_entry->addConstant<ConstantInt>(target_logical_idx, Type::Int64)},
1183  "");
1184 }
1185 
1187  ReductionCode reduction_code,
1188  const llvm::Function* ir_is_empty,
1189  const llvm::Function* ir_reduce_one_entry,
1190  const llvm::Function* ir_reduce_one_entry_idx,
1191  const CodeCacheKey& key) const {
1192  CompilationOptions co{
1194  reduction_code.module.release();
1196  reduction_code.llvm_reduce_loop, {reduction_code.llvm_reduce_loop}, co);
1197  reduction_code.func_ptr = reinterpret_cast<ReductionCode::FuncPtr>(
1198  ee->getPointerToFunction(reduction_code.llvm_reduce_loop));
1199  auto cache_val =
1200  std::make_tuple(reinterpret_cast<void*>(reduction_code.func_ptr), std::move(ee));
1201  std::vector<std::tuple<void*, ExecutionEngineWrapper>> cache_vals;
1202  cache_vals.emplace_back(std::move(cache_val));
1204  std::move(cache_vals),
1205  reduction_code.llvm_reduce_loop->getParent(),
1206  s_code_cache);
1207  return reduction_code;
1208 }
1209 
1210 namespace {
1211 
1212 std::string target_info_key(const TargetInfo& target_info) {
1213  return std::to_string(target_info.is_agg) + "\n" +
1214  std::to_string(target_info.agg_kind) + "\n" +
1215  target_info.sql_type.get_type_name() + "\n" +
1216  std::to_string(target_info.sql_type.get_notnull()) + "\n" +
1217  target_info.agg_arg_type.get_type_name() + "\n" +
1218  std::to_string(target_info.agg_arg_type.get_notnull()) + "\n" +
1219  std::to_string(target_info.skip_null_val) + "\n" +
1220  std::to_string(target_info.is_distinct);
1221 }
1222 
1223 } // namespace
1224 
1225 std::string ResultSetReductionJIT::cacheKey() const {
1226  std::vector<std::string> target_init_vals_strings;
1227  std::transform(target_init_vals_.begin(),
1228  target_init_vals_.end(),
1229  std::back_inserter(target_init_vals_strings),
1230  [](const int64_t v) { return std::to_string(v); });
1231  const auto target_init_vals_key =
1232  boost::algorithm::join(target_init_vals_strings, ", ");
1233  std::vector<std::string> targets_strings;
1234  std::transform(
1235  targets_.begin(),
1236  targets_.end(),
1237  std::back_inserter(targets_strings),
1238  [](const TargetInfo& target_info) { return target_info_key(target_info); });
1239  const auto targets_key = boost::algorithm::join(targets_strings, ", ");
1240  return query_mem_desc_.reductionKey() + "\n" + target_init_vals_key + "\n" +
1241  targets_key;
1242 }
void emit_aggregate_one_nullable_value(const std::string &agg_kind, Value *val_ptr, Value *other_ptr, const int64_t init_val, const size_t chosen_bytes, const TargetInfo &agg_info, Function *ir_reduce_one_entry)
QueryDescriptionType
Definition: Types.h:26
void clear()
Definition: LruCache.hpp:57
#define CHECK_EQ(x, y)
Definition: Logger.h:201
void reduceOneSlot(Value *this_ptr1, Value *this_ptr2, Value *that_ptr1, Value *that_ptr2, const TargetInfo &target_info, const size_t target_logical_idx, const size_t target_slot_idx, const size_t init_agg_val_idx, const size_t first_slot_idx_for_target, Function *ir_reduce_one_entry) const
const int32_t groups_buffer_size return groups_buffer
bool is_aggregate_query(const QueryDescriptionType hash_type)
std::unique_ptr< llvm::Module > module(runtime_module_shallow_copy(cgen_state))
void count_distinct_set_union(const int64_t new_set_handle, const int64_t old_set_handle, const CountDistinctDescriptor &new_count_distinct_desc, const CountDistinctDescriptor &old_count_distinct_desc)
__device__ bool dynamic_watchdog()
#define EMPTY_KEY_64
static void addCodeToCache(const CodeCacheKey &, std::vector< std::tuple< void *, ExecutionEngineWrapper >>, llvm::Module *, CodeCache &)
void count_distinct_set_union_jit_rt(const int64_t new_set_handle, const int64_t old_set_handle, const void *that_qmd_handle, const void *this_qmd_handle, const int64_t target_logical_idx)
const std::string & label() const
std::unique_ptr< llvm::Module > runtime_module_shallow_copy(CgenState *cgen_state)
void reduceOneEntryNoCollisions(const ReductionCode &reduction_code) const
void serialized_varlen_buffer_sample(const void *serialized_varlen_buffer_handle, int8_t *this_ptr1, int8_t *this_ptr2, const int8_t *that_ptr1, const int8_t *that_ptr2, const int64_t init_val, const int64_t length_to_elems)
void varlen_buffer_sample(int8_t *this_ptr1, int8_t *this_ptr2, const int8_t *that_ptr1, const int8_t *that_ptr2, const int64_t init_val)
std::unique_ptr< Function > ir_reduce_loop
Value * emit_read_int_from_buff(Value *ptr, const int8_t compact_sz, Function *function)
void reduceOneEntryBaselineIdx(const ReductionCode &reduction_code) const
SQLTypeInfo sql_type
Definition: TargetInfo.h:42
#define LOG(tag)
Definition: Logger.h:188
void mark_function_always_inline(llvm::Function *func)
void get_group_value_reduction_rt(int8_t *groups_buffer, const int8_t *key, const uint32_t key_count, const void *this_qmd_handle, const int8_t *that_buff, const uint32_t that_entry_idx, const uint32_t that_entry_count, const uint32_t row_size_bytes, int64_t **buff_out, uint8_t *empty)
void reduceLoop(const ReductionCode &reduction_code) const
size_t get_byteoff_of_slot(const size_t slot_idx, const QueryMemoryDescriptor &query_mem_desc)
std::string join(T const &container, std::string const &delim)
llvm::Function * llvm_reduce_loop
void reduceOneEntryNoCollisionsIdx(const ReductionCode &reduction_code) const
#define CHECK_GE(x, y)
Definition: Logger.h:206
std::vector< std::string > CodeCacheKey
Definition: CodeCache.h:61
ReductionCode finalizeReductionCode(ReductionCode reduction_code, const llvm::Function *ir_is_empty, const llvm::Function *ir_reduce_one_entry, const llvm::Function *ir_reduce_one_entry_idx, const CodeCacheKey &key) const
size_t get_slot_off_quad(const QueryMemoryDescriptor &query_mem_desc)
std::string cacheKey() const
size_t getEffectiveKeyWidth() const
bool is_varlen() const
Definition: sqltypes.h:491
std::unique_ptr< Function > ir_reduce_one_entry
bool g_enable_dynamic_watchdog
Definition: Execute.cpp:72
static ExecutionEngineWrapper generateNativeCPUCode(llvm::Function *func, const std::unordered_set< llvm::Function * > &live_funcs, const CompilationOptions &co)
const std::vector< int64_t > target_init_vals_
void reduceOneAggregateSlot(Value *this_ptr1, Value *this_ptr2, Value *that_ptr1, Value *that_ptr2, const TargetInfo &target_info, const size_t target_logical_idx, const size_t target_slot_idx, const int64_t init_val, const int8_t chosen_bytes, Function *ir_reduce_one_entry) const
bool takes_float_argument(const TargetInfo &target_info)
Definition: TargetInfo.h:121
Value * add(Args &&...args)
bool skip_null_val
Definition: TargetInfo.h:44
const Value * emit_checked_write_projection(Value *slot_pi8, Value *other_pi8, const int64_t init_val, const size_t chosen_bytes, Function *ir_reduce_one_entry)
int8_t get_width_for_slot(const size_t target_slot_idx, const bool float_argument_input, const QueryMemoryDescriptor &query_mem_desc)
const int64_t const uint32_t const uint32_t key_qw_count
std::unique_ptr< Function > setup_reduce_one_entry_idx(ReductionCode *reduction_code)
int32_t(*)(int8_t *this_buff, const int8_t *that_buff, const int32_t start_entry_index, const int32_t end_entry_index, const int32_t that_entry_count, const void *this_qmd, const void *that_qmd, const void *serialized_varlen_buffer) FuncPtr
const QueryMemoryDescriptor query_mem_desc_
std::unique_ptr< Function > ir_is_empty
std::string to_string(char const *&&v)
SQLTypeInfo agg_arg_type
Definition: TargetInfo.h:43
void translate_function(const Function *function, llvm::Function *llvm_function, const ReductionCode &reduction_code, const std::unordered_map< const Function *, llvm::Function * > &f)
void emit_aggregate_one_value(const std::string &agg_kind, Value *val_ptr, Value *other_ptr, const size_t chosen_bytes, const TargetInfo &agg_info, Function *ir_reduce_one_entry)
Value * emit_load_i32(Value *ptr, Function *function)
std::string get_type_name() const
Definition: sqltypes.h:429
Definition: sqldefs.h:73
const SQLTypeInfo get_compact_type(const TargetInfo &target)
false auto cgen_state
llvm::Module * module_
Definition: CgenState.h:266
ResultSetReductionJIT(const QueryMemoryDescriptor &query_mem_desc, const std::vector< TargetInfo > &targets, const std::vector< int64_t > &target_init_vals)
llvm::LLVMContext & context_
Definition: CgenState.h:269
bool is_agg
Definition: TargetInfo.h:40
size_t advance_slot(const size_t j, const TargetInfo &target_info, const bool separate_varlen_storage)
CHECK(cgen_state)
void reduceOneCountDistinctSlot(Value *this_ptr1, Value *that_ptr1, const size_t target_logical_idx, Function *ir_reduce_one_entry) const
size_t getGroupbyColCount() const
GroupValueInfo get_group_value_reduction(int64_t *groups_buffer, const uint32_t groups_buffer_entry_count, const int64_t *key, const uint32_t key_count, const size_t key_width, const QueryMemoryDescriptor &query_mem_desc, const int64_t *that_buff_i64, const size_t that_entry_idx, const size_t that_entry_count, const uint32_t row_size_quad)
size_t targetGroupbyIndicesSize() const
void generate_loop_body(For *for_loop, Function *ir_reduce_loop, Function *ir_reduce_one_entry_idx, Value *this_buff, Value *that_buff, Value *start_index, Value *that_entry_count, Value *this_qmd_handle, Value *that_qmd_handle, Value *serialized_varlen_buffer)
void emit_write_projection(Value *slot_pi8, Value *other_pi8, const int64_t init_val, const size_t chosen_bytes, Function *ir_reduce_one_entry)
std::unique_ptr< llvm::Module > g_rt_module
llvm::Function * create_llvm_function(const Function *function, const CgenState *cgen_state)
HOST DEVICE bool get_notnull() const
Definition: sqltypes.h:333
uint8_t check_watchdog_rt(const size_t sample_seed)
Definition: sqldefs.h:75
static std::mutex s_reduction_mutex
bool is_distinct_target(const TargetInfo &target_info)
Definition: TargetInfo.h:117
std::string target_info_key(const TargetInfo &target_info)
std::unique_ptr< Function > ir_reduce_one_entry_idx
const int8_t getPaddedSlotWidthBytes(const size_t slot_idx) const
ReductionCode setup_functions_ir(const QueryDescriptionType hash_type)
std::unique_ptr< Function > setup_is_empty_entry(ReductionCode *reduction_code)
SQLAgg agg_kind
Definition: TargetInfo.h:41
bool is_geometry() const
Definition: sqltypes.h:489
size_t getCountDistinctDescriptorsSize() const
void reduceOneEntryTargetsNoCollisions(Function *ir_reduce_one_entry, Value *this_targets_start_ptr, Value *that_targets_start_ptr) const
ssize_t getTargetGroupbyIndex(const size_t target_idx) const
QueryDescriptionType getQueryDescriptionType() const
#define UNLIKELY(x)
Definition: likely.h:20
llvm::Type * llvm_type(const Type type, llvm::LLVMContext &ctx)
ReductionCode codegen() const
const CountDistinctDescriptor & getCountDistinctDescriptor(const size_t idx) const
#define CHECK_LT(x, y)
Definition: Logger.h:203
size_t get_row_bytes(const QueryMemoryDescriptor &query_mem_desc)
Definition: sqldefs.h:76
std::unique_ptr< Function > create_function(const std::string name, const std::vector< Function::NamedArg > &arg_types, const Type ret_type, const bool always_inline)
const Value * iter() const
std::unique_ptr< Function > setup_reduce_one_entry(ReductionCode *reduction_code, const QueryDescriptionType hash_type)
void isEmpty(const ReductionCode &reduction_code) const
Value * emit_load_i64(Value *ptr, Function *function)
bool is_string() const
Definition: sqltypes.h:477
const ColSlotContext & getColSlotContext() const
#define EMPTY_KEY_32
std::unique_ptr< llvm::Module > module
void reduceOneEntryBaseline(const ReductionCode &reduction_code) const
SQLTypeInfoCore get_elem_type() const
Definition: sqltypes.h:659
Value * emit_load(Value *ptr, Type ptr_type, Function *function)
value_t * get(const key_t &key)
Definition: LruCache.hpp:39
bool is_distinct
Definition: TargetInfo.h:45
void emit_aggregate_one_count(Value *val_ptr, Value *other_ptr, const size_t chosen_bytes, Function *ir_reduce_one_entry)
Definition: sqldefs.h:74
Definition: sqldefs.h:72
std::unique_ptr< Function > setup_reduce_loop(ReductionCode *reduction_code)
size_t get_key_bytes_rowwise(const QueryMemoryDescriptor &query_mem_desc)
FORCE_INLINE HOST DEVICE T align_to_int64(T addr)
std::string reductionKey() const
const Executor * getExecutor() const
int32_t getTargetIdxForKey() const
const std::vector< TargetInfo > targets_