OmniSciDB  0264ff685a
ResultSetReductionJIT.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2019 OmniSci, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "ResultSetReductionJIT.h"
20 
21 #include "CodeGenerator.h"
22 #include "DynamicWatchdog.h"
23 #include "Execute.h"
24 #include "IRCodegenUtils.h"
26 #include "Shared/likely.h"
27 #include "Shared/quantile.h"
28 
29 #include <llvm/Bitcode/BitcodeReader.h>
30 #include <llvm/IR/Function.h>
31 #include <llvm/IR/IRBuilder.h>
32 #include <llvm/IR/Verifier.h>
33 #include <llvm/Support/SourceMgr.h>
34 #include <llvm/Support/raw_os_ostream.h>
35 
36 extern std::unique_ptr<llvm::Module> g_rt_module;
37 
39 
41 
42 namespace {
43 
44 // Error code to be returned when the watchdog timer triggers during the reduction.
45 const int32_t WATCHDOG_ERROR{-1};
46 // Use the interpreter, not the JIT, for a number of entries lower than the threshold.
47 const size_t INTERP_THRESHOLD{25};
48 
49 // Load the value stored at 'ptr' interpreted as 'ptr_type'.
50 Value* emit_load(Value* ptr, Type ptr_type, Function* function) {
51  return function->add<Load>(
52  function->add<Cast>(Cast::CastOp::BitCast, ptr, ptr_type, ""),
53  ptr->label() + "_loaded");
54 }
55 
56 // Load the value stored at 'ptr' as a 32-bit signed integer.
57 Value* emit_load_i32(Value* ptr, Function* function) {
58  return emit_load(ptr, Type::Int32Ptr, function);
59 }
60 
61 // Load the value stored at 'ptr' as a 64-bit signed integer.
62 Value* emit_load_i64(Value* ptr, Function* function) {
63  return emit_load(ptr, Type::Int64Ptr, function);
64 }
65 
66 // Read a 32- or 64-bit integer stored at 'ptr' and sign extend to 64-bit.
67 Value* emit_read_int_from_buff(Value* ptr, const int8_t compact_sz, Function* function) {
68  switch (compact_sz) {
69  case 8: {
70  return emit_load_i64(ptr, function);
71  }
72  case 4: {
73  const auto loaded_val = emit_load_i32(ptr, function);
74  return function->add<Cast>(Cast::CastOp::SExt, loaded_val, Type::Int64, "");
75  }
76  default: {
77  LOG(FATAL) << "Invalid byte width: " << compact_sz;
78  return nullptr;
79  }
80  }
81 }
82 
83 // Emit a runtime call to accumulate into the 'val_ptr' byte address the 'other_ptr'
84 // value when the type is specified as not null.
85 void emit_aggregate_one_value(const std::string& agg_kind,
86  Value* val_ptr,
87  Value* other_ptr,
88  const size_t chosen_bytes,
89  const TargetInfo& agg_info,
90  Function* ir_reduce_one_entry) {
91  const auto sql_type = get_compact_type(agg_info);
92  const auto dest_name = agg_kind + "_dest";
93  if (sql_type.is_fp()) {
94  if (chosen_bytes == sizeof(float)) {
95  const auto agg = ir_reduce_one_entry->add<Cast>(
96  Cast::CastOp::BitCast, val_ptr, Type::Int32Ptr, dest_name);
97  const auto val = emit_load(other_ptr, Type::FloatPtr, ir_reduce_one_entry);
98  ir_reduce_one_entry->add<Call>(
99  "agg_" + agg_kind + "_float", std::vector<const Value*>{agg, val}, "");
100  } else {
101  CHECK_EQ(chosen_bytes, sizeof(double));
102  const auto agg = ir_reduce_one_entry->add<Cast>(
103  Cast::CastOp::BitCast, val_ptr, Type::Int64Ptr, dest_name);
104  const auto val = emit_load(other_ptr, Type::DoublePtr, ir_reduce_one_entry);
105  ir_reduce_one_entry->add<Call>(
106  "agg_" + agg_kind + "_double", std::vector<const Value*>{agg, val}, "");
107  }
108  } else {
109  if (chosen_bytes == sizeof(int32_t)) {
110  const auto agg = ir_reduce_one_entry->add<Cast>(
111  Cast::CastOp::BitCast, val_ptr, Type::Int32Ptr, dest_name);
112  const auto val = emit_load(other_ptr, Type::Int32Ptr, ir_reduce_one_entry);
113  ir_reduce_one_entry->add<Call>(
114  "agg_" + agg_kind + "_int32", std::vector<const Value*>{agg, val}, "");
115  } else {
116  CHECK_EQ(chosen_bytes, sizeof(int64_t));
117  const auto agg = ir_reduce_one_entry->add<Cast>(
118  Cast::CastOp::BitCast, val_ptr, Type::Int64Ptr, dest_name);
119  const auto val = emit_load(other_ptr, Type::Int64Ptr, ir_reduce_one_entry);
120  ir_reduce_one_entry->add<Call>(
121  "agg_" + agg_kind, std::vector<const Value*>{agg, val}, "");
122  }
123  }
124 }
125 
126 // Same as above, but support nullable types as well.
127 void emit_aggregate_one_nullable_value(const std::string& agg_kind,
128  Value* val_ptr,
129  Value* other_ptr,
130  const int64_t init_val,
131  const size_t chosen_bytes,
132  const TargetInfo& agg_info,
133  Function* ir_reduce_one_entry) {
134  const auto dest_name = agg_kind + "_dest";
135  if (agg_info.skip_null_val) {
136  const auto sql_type = get_compact_type(agg_info);
137  if (sql_type.is_fp()) {
138  if (chosen_bytes == sizeof(float)) {
139  const auto agg = ir_reduce_one_entry->add<Cast>(
140  Cast::CastOp::BitCast, val_ptr, Type::Int32Ptr, dest_name);
141  const auto val = emit_load(other_ptr, Type::FloatPtr, ir_reduce_one_entry);
142  const auto init_val_lv = ir_reduce_one_entry->addConstant<ConstantFP>(
143  *reinterpret_cast<const float*>(may_alias_ptr(&init_val)), Type::Float);
144  ir_reduce_one_entry->add<Call>("agg_" + agg_kind + "_float_skip_val",
145  std::vector<const Value*>{agg, val, init_val_lv},
146  "");
147  } else {
148  CHECK_EQ(chosen_bytes, sizeof(double));
149  const auto agg = ir_reduce_one_entry->add<Cast>(
150  Cast::CastOp::BitCast, val_ptr, Type::Int64Ptr, dest_name);
151  const auto val = emit_load(other_ptr, Type::DoublePtr, ir_reduce_one_entry);
152  const auto init_val_lv = ir_reduce_one_entry->addConstant<ConstantFP>(
153  *reinterpret_cast<const double*>(may_alias_ptr(&init_val)), Type::Double);
154  ir_reduce_one_entry->add<Call>("agg_" + agg_kind + "_double_skip_val",
155  std::vector<const Value*>{agg, val, init_val_lv},
156  "");
157  }
158  } else {
159  if (chosen_bytes == sizeof(int32_t)) {
160  const auto agg = ir_reduce_one_entry->add<Cast>(
161  Cast::CastOp::BitCast, val_ptr, Type::Int32Ptr, dest_name);
162  const auto val = emit_load(other_ptr, Type::Int32Ptr, ir_reduce_one_entry);
163  const auto init_val_lv =
164  ir_reduce_one_entry->addConstant<ConstantInt>(init_val, Type::Int32);
165  ir_reduce_one_entry->add<Call>("agg_" + agg_kind + "_int32_skip_val",
166  std::vector<const Value*>{agg, val, init_val_lv},
167  "");
168  } else {
169  CHECK_EQ(chosen_bytes, sizeof(int64_t));
170  const auto agg = ir_reduce_one_entry->add<Cast>(
171  Cast::CastOp::BitCast, val_ptr, Type::Int64Ptr, dest_name);
172  const auto val = emit_load(other_ptr, Type::Int64Ptr, ir_reduce_one_entry);
173  const auto init_val_lv =
174  ir_reduce_one_entry->addConstant<ConstantInt>(init_val, Type::Int64);
175  ir_reduce_one_entry->add<Call>("agg_" + agg_kind + "_skip_val",
176  std::vector<const Value*>{agg, val, init_val_lv},
177  "");
178  }
179  }
180  } else {
182  agg_kind, val_ptr, other_ptr, chosen_bytes, agg_info, ir_reduce_one_entry);
183  }
184 }
185 
186 // Emit code to accumulate the 'other_ptr' count into the 'val_ptr' destination.
188  Value* other_ptr,
189  const size_t chosen_bytes,
190  Function* ir_reduce_one_entry) {
191  const auto dest_name = "count_dest";
192  if (chosen_bytes == sizeof(int32_t)) {
193  const auto agg = ir_reduce_one_entry->add<Cast>(
194  Cast::CastOp::BitCast, val_ptr, Type::Int32Ptr, dest_name);
195  const auto val = emit_load(other_ptr, Type::Int32Ptr, ir_reduce_one_entry);
196  ir_reduce_one_entry->add<Call>(
197  "agg_sum_int32", std::vector<const Value*>{agg, val}, "");
198  } else {
199  CHECK_EQ(chosen_bytes, sizeof(int64_t));
200  const auto agg = ir_reduce_one_entry->add<Cast>(
201  Cast::CastOp::BitCast, val_ptr, Type::Int64Ptr, dest_name);
202  const auto val = emit_load(other_ptr, Type::Int64Ptr, ir_reduce_one_entry);
203  ir_reduce_one_entry->add<Call>("agg_sum", std::vector<const Value*>{agg, val}, "");
204  }
205 }
206 
207 // Emit code to load the value stored at the 'other_pi8' as an integer of the given width
208 // 'chosen_bytes' and write it to the 'slot_pi8' destination only if necessary (the
209 // existing value at destination is the initialization value).
211  Value* other_pi8,
212  const int64_t init_val,
213  const size_t chosen_bytes,
214  Function* ir_reduce_one_entry) {
215  const auto func_name = "write_projection_int" + std::to_string(chosen_bytes * 8);
216  if (chosen_bytes == sizeof(int32_t)) {
217  const auto proj_val = emit_load_i32(other_pi8, ir_reduce_one_entry);
218  ir_reduce_one_entry->add<Call>(
219  func_name,
220  std::vector<const Value*>{
221  slot_pi8,
222  proj_val,
223  ir_reduce_one_entry->addConstant<ConstantInt>(init_val, Type::Int64)},
224  "");
225  } else {
226  CHECK_EQ(chosen_bytes, sizeof(int64_t));
227  const auto proj_val = emit_load_i64(other_pi8, ir_reduce_one_entry);
228  ir_reduce_one_entry->add<Call>(
229  func_name,
230  std::vector<const Value*>{
231  slot_pi8,
232  proj_val,
233  ir_reduce_one_entry->addConstant<ConstantInt>(init_val, Type::Int64)},
234  "");
235  }
236 }
237 
238 // Emit code to load the value stored at the 'other_pi8' as an integer of the given width
239 // 'chosen_bytes' and write it to the 'slot_pi8' destination only if necessary (the
240 // existing value at destination is the initialization value).
242  Value* other_pi8,
243  const int64_t init_val,
244  const size_t chosen_bytes,
245  Function* ir_reduce_one_entry) {
246  if (chosen_bytes == sizeof(int32_t)) {
247  const auto func_name = "checked_single_agg_id_int32";
248  const auto proj_val = emit_load_i32(other_pi8, ir_reduce_one_entry);
249  const auto slot_pi32 = ir_reduce_one_entry->add<Cast>(
250  Cast::CastOp::BitCast, slot_pi8, Type::Int32Ptr, "");
251  return ir_reduce_one_entry->add<Call>(
252  func_name,
253  Type::Int32,
254  std::vector<const Value*>{
255  slot_pi32,
256  proj_val,
257  ir_reduce_one_entry->addConstant<ConstantInt>(init_val, Type::Int32)},
258  "");
259  } else {
260  const auto func_name = "checked_single_agg_id";
261  CHECK_EQ(chosen_bytes, sizeof(int64_t));
262  const auto proj_val = emit_load_i64(other_pi8, ir_reduce_one_entry);
263  const auto slot_pi64 = ir_reduce_one_entry->add<Cast>(
264  Cast::CastOp::BitCast, slot_pi8, Type::Int64Ptr, "");
265 
266  return ir_reduce_one_entry->add<Call>(
267  func_name,
268  Type::Int32,
269  std::vector<const Value*>{
270  slot_pi64,
271  proj_val,
272  ir_reduce_one_entry->addConstant<ConstantInt>(init_val, Type::Int64)},
273  "");
274  }
275 }
276 
277 std::unique_ptr<Function> create_function(
278  const std::string name,
279  const std::vector<Function::NamedArg>& arg_types,
280  const Type ret_type,
281  const bool always_inline) {
282  return std::make_unique<Function>(name, arg_types, ret_type, always_inline);
283 }
284 
285 // Create the declaration for the 'is_empty_entry' function. Use private linkage since
286 // it's a helper only called from the generated code and mark it as always inline.
287 std::unique_ptr<Function> setup_is_empty_entry(ReductionCode* reduction_code) {
288  return create_function(
289  "is_empty_entry", {{"row_ptr", Type::Int8Ptr}}, Type::Int1, /*always_inline=*/true);
290 }
291 
292 // Create the declaration for the 'reduce_one_entry' helper.
293 std::unique_ptr<Function> setup_reduce_one_entry(ReductionCode* reduction_code,
294  const QueryDescriptionType hash_type) {
295  std::string this_ptr_name;
296  std::string that_ptr_name;
297  switch (hash_type) {
299  this_ptr_name = "this_targets_ptr";
300  that_ptr_name = "that_targets_ptr";
301  break;
302  }
305  this_ptr_name = "this_row_ptr";
306  that_ptr_name = "that_row_ptr";
307  break;
308  }
309  default: {
310  LOG(FATAL) << "Unexpected query description type";
311  }
312  }
313  return create_function("reduce_one_entry",
314  {{this_ptr_name, Type::Int8Ptr},
315  {that_ptr_name, Type::Int8Ptr},
316  {"this_qmd", Type::VoidPtr},
317  {"that_qmd", Type::VoidPtr},
318  {"serialized_varlen_buffer_arg", Type::VoidPtr}},
319  Type::Int32,
320  /*always_inline=*/true);
321 }
322 
323 // Create the declaration for the 'reduce_one_entry_idx' helper.
324 std::unique_ptr<Function> setup_reduce_one_entry_idx(ReductionCode* reduction_code) {
325  return create_function("reduce_one_entry_idx",
326  {{"this_buff", Type::Int8Ptr},
327  {"that_buff", Type::Int8Ptr},
328  {"that_entry_idx", Type::Int32},
329  {"that_entry_count", Type::Int32},
330  {"this_qmd_handle", Type::VoidPtr},
331  {"that_qmd_handle", Type::VoidPtr},
332  {"serialized_varlen_buffer", Type::VoidPtr}},
333  Type::Int32,
334  /*always_inline=*/true);
335 }
336 
337 // Create the declaration for the 'reduce_loop' entry point. Use external linkage, this is
338 // the public API of the generated code directly used from result set reduction.
339 std::unique_ptr<Function> setup_reduce_loop(ReductionCode* reduction_code) {
340  return create_function("reduce_loop",
341  {{"this_buff", Type::Int8Ptr},
342  {"that_buff", Type::Int8Ptr},
343  {"start_index", Type::Int32},
344  {"end_index", Type::Int32},
345  {"that_entry_count", Type::Int32},
346  {"this_qmd_handle", Type::VoidPtr},
347  {"that_qmd_handle", Type::VoidPtr},
348  {"serialized_varlen_buffer", Type::VoidPtr}},
349  Type::Int32,
350  /*always_inline=*/false);
351 }
352 
353 llvm::Function* create_llvm_function(const Function* function, CgenState* cgen_state) {
354  AUTOMATIC_IR_METADATA(cgen_state);
355  auto& ctx = cgen_state->context_;
356  std::vector<llvm::Type*> parameter_types;
357  const auto& arg_types = function->arg_types();
358  for (const auto& named_arg : arg_types) {
359  CHECK(named_arg.type != Type::Void);
360  parameter_types.push_back(llvm_type(named_arg.type, ctx));
361  }
362  const auto func_type = llvm::FunctionType::get(
363  llvm_type(function->ret_type(), ctx), parameter_types, false);
364  const auto linkage = function->always_inline() ? llvm::Function::PrivateLinkage
365  : llvm::Function::ExternalLinkage;
366  auto func =
367  llvm::Function::Create(func_type, linkage, function->name(), cgen_state->module_);
368  const auto arg_it = func->arg_begin();
369  for (size_t i = 0; i < arg_types.size(); ++i) {
370  const auto arg = &*(arg_it + i);
371  arg->setName(arg_types[i].name);
372  }
373  if (function->always_inline()) {
375  }
376  return func;
377 }
378 
379 // Setup the reduction function and helpers declarations, create a module and a code
380 // generation state object.
382  ReductionCode reduction_code{};
383  reduction_code.ir_is_empty = setup_is_empty_entry(&reduction_code);
384  reduction_code.ir_reduce_one_entry = setup_reduce_one_entry(&reduction_code, hash_type);
385  reduction_code.ir_reduce_one_entry_idx = setup_reduce_one_entry_idx(&reduction_code);
386  reduction_code.ir_reduce_loop = setup_reduce_loop(&reduction_code);
387  return reduction_code;
388 }
389 
391  return hash_type == QueryDescriptionType::GroupByBaselineHash ||
394 }
395 
396 // Variable length sample fast path (no serialized variable length buffer).
397 void varlen_buffer_sample(int8_t* this_ptr1,
398  int8_t* this_ptr2,
399  const int8_t* that_ptr1,
400  const int8_t* that_ptr2,
401  const int64_t init_val) {
402  const auto rhs_proj_col = *reinterpret_cast<const int64_t*>(that_ptr1);
403  if (rhs_proj_col != init_val) {
404  *reinterpret_cast<int64_t*>(this_ptr1) = rhs_proj_col;
405  }
406  CHECK(this_ptr2 && that_ptr2);
407  *reinterpret_cast<int64_t*>(this_ptr2) = *reinterpret_cast<const int64_t*>(that_ptr2);
408 }
409 
410 } // namespace
411 
413  const void* serialized_varlen_buffer_handle,
414  int8_t* this_ptr1,
415  int8_t* this_ptr2,
416  const int8_t* that_ptr1,
417  const int8_t* that_ptr2,
418  const int64_t init_val,
419  const int64_t length_to_elems) {
420  if (!serialized_varlen_buffer_handle) {
421  varlen_buffer_sample(this_ptr1, this_ptr2, that_ptr1, that_ptr2, init_val);
422  return;
423  }
424  const auto& serialized_varlen_buffer =
425  *reinterpret_cast<const std::vector<std::string>*>(serialized_varlen_buffer_handle);
426  if (!serialized_varlen_buffer.empty()) {
427  const auto rhs_proj_col = *reinterpret_cast<const int64_t*>(that_ptr1);
428  CHECK_LT(static_cast<size_t>(rhs_proj_col), serialized_varlen_buffer.size());
429  const auto& varlen_bytes_str = serialized_varlen_buffer[rhs_proj_col];
430  const auto str_ptr = reinterpret_cast<const int8_t*>(varlen_bytes_str.c_str());
431  *reinterpret_cast<int64_t*>(this_ptr1) = reinterpret_cast<const int64_t>(str_ptr);
432  *reinterpret_cast<int64_t*>(this_ptr2) =
433  static_cast<int64_t>(varlen_bytes_str.size() / length_to_elems);
434  } else {
435  varlen_buffer_sample(this_ptr1, this_ptr2, that_ptr1, that_ptr2, init_val);
436  }
437 }
438 
439 // Wrappers to be called from the generated code, sharing implementation with the rest of
440 // the system.
441 
442 extern "C" void count_distinct_set_union_jit_rt(const int64_t new_set_handle,
443  const int64_t old_set_handle,
444  const void* that_qmd_handle,
445  const void* this_qmd_handle,
446  const int64_t target_logical_idx) {
447  const auto that_qmd = reinterpret_cast<const QueryMemoryDescriptor*>(that_qmd_handle);
448  const auto this_qmd = reinterpret_cast<const QueryMemoryDescriptor*>(this_qmd_handle);
449  const auto& new_count_distinct_desc =
450  that_qmd->getCountDistinctDescriptor(target_logical_idx);
451  const auto& old_count_distinct_desc =
452  this_qmd->getCountDistinctDescriptor(target_logical_idx);
453  CHECK(old_count_distinct_desc.impl_type_ != CountDistinctImplType::Invalid);
454  CHECK(old_count_distinct_desc.impl_type_ == new_count_distinct_desc.impl_type_);
456  new_set_handle, old_set_handle, new_count_distinct_desc, old_count_distinct_desc);
457 }
458 
459 extern "C" void approx_median_jit_rt(const int64_t new_set_handle,
460  const int64_t old_set_handle,
461  const void* that_qmd_handle,
462  const void* this_qmd_handle,
463  const int64_t target_logical_idx) {
464  auto* accumulator = reinterpret_cast<quantile::TDigest*>(old_set_handle);
465  auto* incoming = reinterpret_cast<quantile::TDigest*>(new_set_handle);
466  accumulator->mergeTDigest(*incoming);
467 }
468 
469 extern "C" void get_group_value_reduction_rt(int8_t* groups_buffer,
470  const int8_t* key,
471  const uint32_t key_count,
472  const void* this_qmd_handle,
473  const int8_t* that_buff,
474  const uint32_t that_entry_idx,
475  const uint32_t that_entry_count,
476  const uint32_t row_size_bytes,
477  int64_t** buff_out,
478  uint8_t* empty) {
479  const auto& this_qmd = *reinterpret_cast<const QueryMemoryDescriptor*>(this_qmd_handle);
480  const auto gvi =
481  result_set::get_group_value_reduction(reinterpret_cast<int64_t*>(groups_buffer),
482  this_qmd.getEntryCount(),
483  reinterpret_cast<const int64_t*>(key),
484  key_count,
485  this_qmd.getEffectiveKeyWidth(),
486  this_qmd,
487  reinterpret_cast<const int64_t*>(that_buff),
488  that_entry_idx,
489  that_entry_count,
490  row_size_bytes >> 3);
491  *buff_out = gvi.first;
492  *empty = gvi.second;
493 }
494 
495 extern "C" uint8_t check_watchdog_rt(const size_t sample_seed) {
496  if (UNLIKELY(g_enable_dynamic_watchdog && (sample_seed & 0x3F) == 0 &&
497  dynamic_watchdog())) {
498  return true;
499  }
500  return false;
501 }
502 
504  const std::vector<TargetInfo>& targets,
505  const std::vector<int64_t>& target_init_vals)
506  : query_mem_desc_(query_mem_desc)
507  , targets_(targets)
508  , target_init_vals_(target_init_vals) {}
509 
510 // The code generated for a reduction between two result set buffers is structured in
511 // several functions and their IR is stored in the 'ReductionCode' structure. At a high
512 // level, the pseudocode is:
513 //
514 // func is_empty_func(row_ptr):
515 // ...
516 //
517 // func reduce_func_baseline(this_ptr, that_ptr):
518 // if is_empty_func(that_ptr):
519 // return
520 // for each target in the row:
521 // reduce target from that_ptr into this_ptr
522 //
523 // func reduce_func_perfect_hash(this_ptr, that_ptr):
524 // if is_empty_func(that_ptr):
525 // return
526 // for each target in the row:
527 // reduce target from that_ptr into this_ptr
528 //
529 // func reduce_func_idx(this_buff, that_buff, that_entry_index):
530 // that_ptr = that_result_set[that_entry_index]
531 // # Retrieval of 'this_ptr' is different between perfect hash and baseline.
532 // this_ptr = this_result_set[that_entry_index]
533 // or
534 // get_row(key(that_row_ptr), this_result_setBuffer)
535 // reduce_func_[baseline|perfect_hash](this_ptr, that_ptr)
536 //
537 // func reduce_loop(this_buff, that_buff, start_entry_index, end_entry_index):
538 // for that_entry_index in [start_entry_index, end_entry_index):
539 // reduce_func_idx(this_buff, that_buff, that_entry_index)
540 
542  const auto hash_type = query_mem_desc_.getQueryDescriptionType();
544  return {};
545  }
546  auto reduction_code = setup_functions_ir(hash_type);
547  isEmpty(reduction_code);
551  reduceOneEntryNoCollisions(reduction_code);
552  reduceOneEntryNoCollisionsIdx(reduction_code);
553  break;
554  }
556  reduceOneEntryBaseline(reduction_code);
557  reduceOneEntryBaselineIdx(reduction_code);
558  break;
559  }
560  default: {
561  LOG(FATAL) << "Unexpected query description type";
562  }
563  }
564  reduceLoop(reduction_code);
565  // For small result sets, avoid native code generation and use the interpreter instead.
568  return reduction_code;
569  }
570  std::lock_guard<std::mutex> reduction_guard(ReductionCode::s_reduction_mutex);
571  CodeCacheKey key{cacheKey()};
572  const auto compilation_context = s_code_cache.get(key);
573  if (compilation_context) {
574  auto cpu_context =
575  std::dynamic_pointer_cast<CpuCompilationContext>(compilation_context->first);
576  CHECK(cpu_context);
577  return {reinterpret_cast<ReductionCode::FuncPtr>(cpu_context->func()),
578  nullptr,
579  nullptr,
580  nullptr,
581  std::move(reduction_code.ir_is_empty),
582  std::move(reduction_code.ir_reduce_one_entry),
583  std::move(reduction_code.ir_reduce_one_entry_idx),
584  std::move(reduction_code.ir_reduce_loop)};
585  }
586  reduction_code.cgen_state.reset(new CgenState({}, false));
587  auto cgen_state = reduction_code.cgen_state.get();
588  std::unique_ptr<llvm::Module> module = runtime_module_shallow_copy(cgen_state);
589  cgen_state->module_ = module.get();
590  AUTOMATIC_IR_METADATA(cgen_state);
591  auto ir_is_empty = create_llvm_function(reduction_code.ir_is_empty.get(), cgen_state);
592  auto ir_reduce_one_entry =
593  create_llvm_function(reduction_code.ir_reduce_one_entry.get(), cgen_state);
594  auto ir_reduce_one_entry_idx =
595  create_llvm_function(reduction_code.ir_reduce_one_entry_idx.get(), cgen_state);
596  auto ir_reduce_loop =
597  create_llvm_function(reduction_code.ir_reduce_loop.get(), cgen_state);
598  std::unordered_map<const Function*, llvm::Function*> f;
599  f.emplace(reduction_code.ir_is_empty.get(), ir_is_empty);
600  f.emplace(reduction_code.ir_reduce_one_entry.get(), ir_reduce_one_entry);
601  f.emplace(reduction_code.ir_reduce_one_entry_idx.get(), ir_reduce_one_entry_idx);
602  f.emplace(reduction_code.ir_reduce_loop.get(), ir_reduce_loop);
603  translate_function(reduction_code.ir_is_empty.get(), ir_is_empty, reduction_code, f);
605  reduction_code.ir_reduce_one_entry.get(), ir_reduce_one_entry, reduction_code, f);
606  translate_function(reduction_code.ir_reduce_one_entry_idx.get(),
607  ir_reduce_one_entry_idx,
608  reduction_code,
609  f);
611  reduction_code.ir_reduce_loop.get(), ir_reduce_loop, reduction_code, f);
612  reduction_code.llvm_reduce_loop = ir_reduce_loop;
613  reduction_code.module = std::move(module);
615  return finalizeReductionCode(std::move(reduction_code),
616  ir_is_empty,
617  ir_reduce_one_entry,
618  ir_reduce_one_entry_idx,
619  key);
620 }
621 
623  // Clear stub cache to avoid crash caused by non-deterministic static destructor order
624  // of LLVM context and the cache.
627  g_rt_module = nullptr;
628 }
629 
630 void ResultSetReductionJIT::isEmpty(const ReductionCode& reduction_code) const {
631  auto ir_is_empty = reduction_code.ir_is_empty.get();
634  Value* key{nullptr};
635  Value* empty_key_val{nullptr};
636  const auto keys_ptr = ir_is_empty->arg(0);
641  CHECK_LT(static_cast<size_t>(query_mem_desc_.getTargetIdxForKey()),
642  target_init_vals_.size());
643  const int64_t target_slot_off = result_set::get_byteoff_of_slot(
645  const auto slot_ptr = ir_is_empty->add<GetElementPtr>(
646  keys_ptr,
647  ir_is_empty->addConstant<ConstantInt>(target_slot_off, Type::Int32),
648  "is_empty_slot_ptr");
649  const auto compact_sz =
651  key = emit_read_int_from_buff(slot_ptr, compact_sz, ir_is_empty);
652  empty_key_val = ir_is_empty->addConstant<ConstantInt>(
654  } else {
656  case 4: {
659  key = emit_load_i32(keys_ptr, ir_is_empty);
660  empty_key_val = ir_is_empty->addConstant<ConstantInt>(EMPTY_KEY_32, Type::Int32);
661  break;
662  }
663  case 8: {
664  key = emit_load_i64(keys_ptr, ir_is_empty);
665  empty_key_val = ir_is_empty->addConstant<ConstantInt>(EMPTY_KEY_64, Type::Int64);
666  break;
667  }
668  default:
669  LOG(FATAL) << "Invalid key width";
670  }
671  }
672  const auto ret =
673  ir_is_empty->add<ICmp>(ICmp::Predicate::EQ, key, empty_key_val, "is_key_empty");
674  ir_is_empty->add<Ret>(ret);
675 }
676 
678  const ReductionCode& reduction_code) const {
679  auto ir_reduce_one_entry = reduction_code.ir_reduce_one_entry.get();
680  const auto this_row_ptr = ir_reduce_one_entry->arg(0);
681  const auto that_row_ptr = ir_reduce_one_entry->arg(1);
682  const auto that_is_empty =
683  ir_reduce_one_entry->add<Call>(reduction_code.ir_is_empty.get(),
684  std::vector<const Value*>{that_row_ptr},
685  "that_is_empty");
686  ir_reduce_one_entry->add<ReturnEarly>(
687  that_is_empty, ir_reduce_one_entry->addConstant<ConstantInt>(0, Type::Int32), "");
688 
689  const auto key_bytes = get_key_bytes_rowwise(query_mem_desc_);
690  if (key_bytes) { // copy the key from right hand side
691  ir_reduce_one_entry->add<MemCpy>(
692  this_row_ptr,
693  that_row_ptr,
694  ir_reduce_one_entry->addConstant<ConstantInt>(key_bytes, Type::Int32));
695  }
696 
697  const auto key_bytes_with_padding = align_to_int64(key_bytes);
698  const auto key_bytes_lv =
699  ir_reduce_one_entry->addConstant<ConstantInt>(key_bytes_with_padding, Type::Int32);
700  const auto this_targets_start_ptr = ir_reduce_one_entry->add<GetElementPtr>(
701  this_row_ptr, key_bytes_lv, "this_targets_start");
702  const auto that_targets_start_ptr = ir_reduce_one_entry->add<GetElementPtr>(
703  that_row_ptr, key_bytes_lv, "that_targets_start");
704 
706  ir_reduce_one_entry, this_targets_start_ptr, that_targets_start_ptr);
707 }
708 
710  Function* ir_reduce_one_entry,
711  Value* this_targets_start_ptr,
712  Value* that_targets_start_ptr) const {
713  const auto& col_slot_context = query_mem_desc_.getColSlotContext();
714  Value* this_targets_ptr = this_targets_start_ptr;
715  Value* that_targets_ptr = that_targets_start_ptr;
716  size_t init_agg_val_idx = 0;
717  for (size_t target_logical_idx = 0; target_logical_idx < targets_.size();
718  ++target_logical_idx) {
719  const auto& target_info = targets_[target_logical_idx];
720  const auto& slots_for_col = col_slot_context.getSlotsForCol(target_logical_idx);
721  Value* this_ptr2{nullptr};
722  Value* that_ptr2{nullptr};
723 
724  bool two_slot_target{false};
725  if (target_info.is_agg &&
726  (target_info.agg_kind == kAVG ||
727  (target_info.agg_kind == kSAMPLE && target_info.sql_type.is_varlen()))) {
728  // Note that this assumes if one of the slot pairs in a given target is an array,
729  // all slot pairs are arrays. Currently this is true for all geo targets, but we
730  // should better codify and store this information in the future
731  two_slot_target = true;
732  }
733 
734  for (size_t target_slot_idx = slots_for_col.front();
735  target_slot_idx < slots_for_col.back() + 1;
736  target_slot_idx += 2) {
737  const auto slot_off_val = query_mem_desc_.getPaddedSlotWidthBytes(target_slot_idx);
738  const auto slot_off =
739  ir_reduce_one_entry->addConstant<ConstantInt>(slot_off_val, Type::Int32);
740  if (UNLIKELY(two_slot_target)) {
741  const auto desc = "target_" + std::to_string(target_logical_idx) + "_second_slot";
742  this_ptr2 = ir_reduce_one_entry->add<GetElementPtr>(
743  this_targets_ptr, slot_off, "this_" + desc);
744  that_ptr2 = ir_reduce_one_entry->add<GetElementPtr>(
745  that_targets_ptr, slot_off, "that_" + desc);
746  }
747  reduceOneSlot(this_targets_ptr,
748  this_ptr2,
749  that_targets_ptr,
750  that_ptr2,
751  target_info,
752  target_logical_idx,
753  target_slot_idx,
754  init_agg_val_idx,
755  slots_for_col.front(),
756  ir_reduce_one_entry);
757  auto increment_agg_val_idx_maybe =
758  [&init_agg_val_idx, &target_logical_idx, this](const int slot_count) {
760  query_mem_desc_.getTargetGroupbyIndex(target_logical_idx) < 0) {
761  init_agg_val_idx += slot_count;
762  }
763  };
764  if (target_logical_idx + 1 == targets_.size() &&
765  target_slot_idx + 1 >= slots_for_col.back()) {
766  break;
767  }
768  const auto next_desc =
769  "target_" + std::to_string(target_logical_idx + 1) + "_first_slot";
770  if (UNLIKELY(two_slot_target)) {
771  increment_agg_val_idx_maybe(2);
772  const auto two_slot_off = ir_reduce_one_entry->addConstant<ConstantInt>(
773  slot_off_val + query_mem_desc_.getPaddedSlotWidthBytes(target_slot_idx + 1),
774  Type::Int32);
775  this_targets_ptr = ir_reduce_one_entry->add<GetElementPtr>(
776  this_targets_ptr, two_slot_off, "this_" + next_desc);
777  that_targets_ptr = ir_reduce_one_entry->add<GetElementPtr>(
778  that_targets_ptr, two_slot_off, "that_" + next_desc);
779  } else {
780  increment_agg_val_idx_maybe(1);
781  this_targets_ptr = ir_reduce_one_entry->add<GetElementPtr>(
782  this_targets_ptr, slot_off, "this_" + next_desc);
783  that_targets_ptr = ir_reduce_one_entry->add<GetElementPtr>(
784  that_targets_ptr, slot_off, "that_" + next_desc);
785  }
786  }
787  }
788  ir_reduce_one_entry->add<Ret>(
789  ir_reduce_one_entry->addConstant<ConstantInt>(0, Type::Int32));
790 }
791 
793  const ReductionCode& reduction_code) const {
794  auto ir_reduce_one_entry = reduction_code.ir_reduce_one_entry.get();
795  const auto this_targets_ptr_arg = ir_reduce_one_entry->arg(0);
796  const auto that_targets_ptr_arg = ir_reduce_one_entry->arg(1);
797  Value* this_ptr1 = this_targets_ptr_arg;
798  Value* that_ptr1 = that_targets_ptr_arg;
799  size_t j = 0;
800  size_t init_agg_val_idx = 0;
801  for (size_t target_logical_idx = 0; target_logical_idx < targets_.size();
802  ++target_logical_idx) {
803  const auto& target_info = targets_[target_logical_idx];
804  Value* this_ptr2{nullptr};
805  Value* that_ptr2{nullptr};
806  if (target_info.is_agg &&
807  (target_info.agg_kind == kAVG ||
808  (target_info.agg_kind == kSAMPLE && target_info.sql_type.is_varlen()))) {
809  const auto desc = "target_" + std::to_string(target_logical_idx) + "_second_slot";
810  const auto second_slot_rel_off =
811  ir_reduce_one_entry->addConstant<ConstantInt>(sizeof(int64_t), Type::Int32);
812  this_ptr2 = ir_reduce_one_entry->add<GetElementPtr>(
813  this_ptr1, second_slot_rel_off, "this_" + desc);
814  that_ptr2 = ir_reduce_one_entry->add<GetElementPtr>(
815  that_ptr1, second_slot_rel_off, "that_" + desc);
816  }
817  reduceOneSlot(this_ptr1,
818  this_ptr2,
819  that_ptr1,
820  that_ptr2,
821  target_info,
822  target_logical_idx,
823  j,
824  init_agg_val_idx,
825  j,
826  ir_reduce_one_entry);
827  if (target_logical_idx + 1 == targets_.size()) {
828  break;
829  }
831  init_agg_val_idx = advance_slot(init_agg_val_idx, target_info, false);
832  } else {
833  if (query_mem_desc_.getTargetGroupbyIndex(target_logical_idx) < 0) {
834  init_agg_val_idx = advance_slot(init_agg_val_idx, target_info, false);
835  }
836  }
837  j = advance_slot(j, target_info, false);
838  const auto next_desc =
839  "target_" + std::to_string(target_logical_idx + 1) + "_first_slot";
840  auto next_slot_rel_off = ir_reduce_one_entry->addConstant<ConstantInt>(
841  init_agg_val_idx * sizeof(int64_t), Type::Int32);
842  this_ptr1 = ir_reduce_one_entry->add<GetElementPtr>(
843  this_targets_ptr_arg, next_slot_rel_off, next_desc);
844  that_ptr1 = ir_reduce_one_entry->add<GetElementPtr>(
845  that_targets_ptr_arg, next_slot_rel_off, next_desc);
846  }
847  ir_reduce_one_entry->add<Ret>(
848  ir_reduce_one_entry->addConstant<ConstantInt>(0, Type::Int32));
849 }
850 
852  const ReductionCode& reduction_code) const {
853  auto ir_reduce_one_entry_idx = reduction_code.ir_reduce_one_entry_idx.get();
858  const auto this_buff = ir_reduce_one_entry_idx->arg(0);
859  const auto that_buff = ir_reduce_one_entry_idx->arg(1);
860  const auto entry_idx = ir_reduce_one_entry_idx->arg(2);
861  const auto this_qmd_handle = ir_reduce_one_entry_idx->arg(4);
862  const auto that_qmd_handle = ir_reduce_one_entry_idx->arg(5);
863  const auto serialized_varlen_buffer_arg = ir_reduce_one_entry_idx->arg(6);
864  const auto row_bytes = ir_reduce_one_entry_idx->addConstant<ConstantInt>(
866  const auto entry_idx_64 = ir_reduce_one_entry_idx->add<Cast>(
867  Cast::CastOp::SExt, entry_idx, Type::Int64, "entry_idx_64");
868  const auto row_off_in_bytes = ir_reduce_one_entry_idx->add<BinaryOperator>(
869  BinaryOperator::BinaryOp::Mul, entry_idx_64, row_bytes, "row_off_in_bytes");
870  const auto this_row_ptr = ir_reduce_one_entry_idx->add<GetElementPtr>(
871  this_buff, row_off_in_bytes, "this_row_ptr");
872  const auto that_row_ptr = ir_reduce_one_entry_idx->add<GetElementPtr>(
873  that_buff, row_off_in_bytes, "that_row_ptr");
874  const auto reduce_rc = ir_reduce_one_entry_idx->add<Call>(
875  reduction_code.ir_reduce_one_entry.get(),
876  std::vector<const Value*>{this_row_ptr,
877  that_row_ptr,
878  this_qmd_handle,
879  that_qmd_handle,
880  serialized_varlen_buffer_arg},
881  "");
882  ir_reduce_one_entry_idx->add<Ret>(reduce_rc);
883 }
884 
886  const ReductionCode& reduction_code) const {
887  auto ir_reduce_one_entry_idx = reduction_code.ir_reduce_one_entry_idx.get();
892  const auto this_buff = ir_reduce_one_entry_idx->arg(0);
893  const auto that_buff = ir_reduce_one_entry_idx->arg(1);
894  const auto that_entry_idx = ir_reduce_one_entry_idx->arg(2);
895  const auto that_entry_count = ir_reduce_one_entry_idx->arg(3);
896  const auto this_qmd_handle = ir_reduce_one_entry_idx->arg(4);
897  const auto that_qmd_handle = ir_reduce_one_entry_idx->arg(5);
898  const auto serialized_varlen_buffer_arg = ir_reduce_one_entry_idx->arg(6);
899  const auto row_bytes = ir_reduce_one_entry_idx->addConstant<ConstantInt>(
901  const auto that_entry_idx_64 = ir_reduce_one_entry_idx->add<Cast>(
902  Cast::CastOp::SExt, that_entry_idx, Type::Int64, "that_entry_idx_64");
903  const auto that_row_off_in_bytes =
904  ir_reduce_one_entry_idx->add<BinaryOperator>(BinaryOperator::BinaryOp::Mul,
905  that_entry_idx_64,
906  row_bytes,
907  "that_row_off_in_bytes");
908  const auto that_row_ptr = ir_reduce_one_entry_idx->add<GetElementPtr>(
909  that_buff, that_row_off_in_bytes, "that_row_ptr");
910  const auto that_is_empty =
911  ir_reduce_one_entry_idx->add<Call>(reduction_code.ir_is_empty.get(),
912  std::vector<const Value*>{that_row_ptr},
913  "that_is_empty");
914  ir_reduce_one_entry_idx->add<ReturnEarly>(
915  that_is_empty,
916  ir_reduce_one_entry_idx->addConstant<ConstantInt>(0, Type::Int32),
917  "");
918  const auto key_count = query_mem_desc_.getGroupbyColCount();
919  const auto one_element =
920  ir_reduce_one_entry_idx->addConstant<ConstantInt>(1, Type::Int32);
921  const auto this_targets_ptr_i64_ptr = ir_reduce_one_entry_idx->add<Alloca>(
922  Type::Int64Ptr, one_element, "this_targets_ptr_out");
923  const auto this_is_empty_ptr =
924  ir_reduce_one_entry_idx->add<Alloca>(Type::Int8, one_element, "this_is_empty_out");
925  ir_reduce_one_entry_idx->add<ExternalCall>(
926  "get_group_value_reduction_rt",
927  Type::Void,
928  std::vector<const Value*>{
929  this_buff,
930  that_row_ptr,
931  ir_reduce_one_entry_idx->addConstant<ConstantInt>(key_count, Type::Int32),
932  this_qmd_handle,
933  that_buff,
934  that_entry_idx,
935  that_entry_count,
936  row_bytes,
937  this_targets_ptr_i64_ptr,
938  this_is_empty_ptr},
939  "");
940  const auto this_targets_ptr_i64 = ir_reduce_one_entry_idx->add<Load>(
941  this_targets_ptr_i64_ptr, "this_targets_ptr_i64");
942  auto this_is_empty =
943  ir_reduce_one_entry_idx->add<Load>(this_is_empty_ptr, "this_is_empty");
944  this_is_empty = ir_reduce_one_entry_idx->add<Cast>(
945  Cast::CastOp::Trunc, this_is_empty, Type::Int1, "this_is_empty_bool");
946  ir_reduce_one_entry_idx->add<ReturnEarly>(
947  this_is_empty,
948  ir_reduce_one_entry_idx->addConstant<ConstantInt>(0, Type::Int32),
949  "");
950  const auto key_qw_count = get_slot_off_quad(query_mem_desc_);
951  const auto this_targets_ptr = ir_reduce_one_entry_idx->add<Cast>(
952  Cast::CastOp::BitCast, this_targets_ptr_i64, Type::Int8Ptr, "this_targets_ptr");
953  const auto key_byte_count = key_qw_count * sizeof(int64_t);
954  const auto key_byte_count_lv =
955  ir_reduce_one_entry_idx->addConstant<ConstantInt>(key_byte_count, Type::Int32);
956  const auto that_targets_ptr = ir_reduce_one_entry_idx->add<GetElementPtr>(
957  that_row_ptr, key_byte_count_lv, "that_targets_ptr");
958  const auto reduce_rc = ir_reduce_one_entry_idx->add<Call>(
959  reduction_code.ir_reduce_one_entry.get(),
960  std::vector<const Value*>{this_targets_ptr,
961  that_targets_ptr,
962  this_qmd_handle,
963  that_qmd_handle,
964  serialized_varlen_buffer_arg},
965  "");
966  ir_reduce_one_entry_idx->add<Ret>(reduce_rc);
967 }
968 
969 namespace {
970 
971 void generate_loop_body(For* for_loop,
972  Function* ir_reduce_loop,
973  Function* ir_reduce_one_entry_idx,
974  Value* this_buff,
975  Value* that_buff,
976  Value* start_index,
977  Value* that_entry_count,
978  Value* this_qmd_handle,
979  Value* that_qmd_handle,
980  Value* serialized_varlen_buffer) {
981  const auto that_entry_idx = for_loop->add<BinaryOperator>(
982  BinaryOperator::BinaryOp::Add, for_loop->iter(), start_index, "that_entry_idx");
983  const auto watchdog_sample_seed =
984  for_loop->add<Cast>(Cast::CastOp::SExt, that_entry_idx, Type::Int64, "");
985  const auto watchdog_triggered =
986  for_loop->add<ExternalCall>("check_watchdog_rt",
987  Type::Int8,
988  std::vector<const Value*>{watchdog_sample_seed},
989  "");
990  const auto watchdog_triggered_bool =
991  for_loop->add<ICmp>(ICmp::Predicate::NE,
992  watchdog_triggered,
993  ir_reduce_loop->addConstant<ConstantInt>(0, Type::Int8),
994  "");
995  for_loop->add<ReturnEarly>(
996  watchdog_triggered_bool,
997  ir_reduce_loop->addConstant<ConstantInt>(WATCHDOG_ERROR, Type::Int32),
998  "");
999  const auto reduce_rc =
1000  for_loop->add<Call>(ir_reduce_one_entry_idx,
1001  std::vector<const Value*>{this_buff,
1002  that_buff,
1003  that_entry_idx,
1004  that_entry_count,
1005  this_qmd_handle,
1006  that_qmd_handle,
1007  serialized_varlen_buffer},
1008  "");
1009 
1010  auto reduce_rc_bool =
1011  for_loop->add<ICmp>(ICmp::Predicate::NE,
1012  reduce_rc,
1013  ir_reduce_loop->addConstant<ConstantInt>(0, Type::Int32),
1014  "");
1015  for_loop->add<ReturnEarly>(reduce_rc_bool, reduce_rc, "");
1016 }
1017 
1018 } // namespace
1019 
1020 void ResultSetReductionJIT::reduceLoop(const ReductionCode& reduction_code) const {
1021  auto ir_reduce_loop = reduction_code.ir_reduce_loop.get();
1022  const auto this_buff_arg = ir_reduce_loop->arg(0);
1023  const auto that_buff_arg = ir_reduce_loop->arg(1);
1024  const auto start_index_arg = ir_reduce_loop->arg(2);
1025  const auto end_index_arg = ir_reduce_loop->arg(3);
1026  const auto that_entry_count_arg = ir_reduce_loop->arg(4);
1027  const auto this_qmd_handle_arg = ir_reduce_loop->arg(5);
1028  const auto that_qmd_handle_arg = ir_reduce_loop->arg(6);
1029  const auto serialized_varlen_buffer_arg = ir_reduce_loop->arg(7);
1030  For* for_loop =
1031  static_cast<For*>(ir_reduce_loop->add<For>(start_index_arg, end_index_arg, ""));
1032  generate_loop_body(for_loop,
1033  ir_reduce_loop,
1034  reduction_code.ir_reduce_one_entry_idx.get(),
1035  this_buff_arg,
1036  that_buff_arg,
1037  start_index_arg,
1038  that_entry_count_arg,
1039  this_qmd_handle_arg,
1040  that_qmd_handle_arg,
1041  serialized_varlen_buffer_arg);
1042  ir_reduce_loop->add<Ret>(ir_reduce_loop->addConstant<ConstantInt>(0, Type::Int32));
1043 }
1044 
1046  Value* this_ptr2,
1047  Value* that_ptr1,
1048  Value* that_ptr2,
1049  const TargetInfo& target_info,
1050  const size_t target_logical_idx,
1051  const size_t target_slot_idx,
1052  const size_t init_agg_val_idx,
1053  const size_t first_slot_idx_for_target,
1054  Function* ir_reduce_one_entry) const {
1056  if (query_mem_desc_.getTargetGroupbyIndex(target_logical_idx) >= 0) {
1057  return;
1058  }
1059  }
1060  const bool float_argument_input = takes_float_argument(target_info);
1061  const auto chosen_bytes = result_set::get_width_for_slot(
1062  target_slot_idx, float_argument_input, query_mem_desc_);
1063  CHECK_LT(init_agg_val_idx, target_init_vals_.size());
1064  auto init_val = target_init_vals_[init_agg_val_idx];
1065  if (target_info.is_agg &&
1066  (target_info.agg_kind != kSINGLE_VALUE && target_info.agg_kind != kSAMPLE)) {
1067  reduceOneAggregateSlot(this_ptr1,
1068  this_ptr2,
1069  that_ptr1,
1070  that_ptr2,
1071  target_info,
1072  target_logical_idx,
1073  target_slot_idx,
1074  init_val,
1075  chosen_bytes,
1076  ir_reduce_one_entry);
1077  } else if (target_info.agg_kind == kSINGLE_VALUE) {
1078  const auto checked_rc = emit_checked_write_projection(
1079  this_ptr1, that_ptr1, init_val, chosen_bytes, ir_reduce_one_entry);
1080 
1081  auto checked_rc_bool = ir_reduce_one_entry->add<ICmp>(
1083  checked_rc,
1084  ir_reduce_one_entry->addConstant<ConstantInt>(0, Type::Int32),
1085  "");
1086 
1087  ir_reduce_one_entry->add<ReturnEarly>(checked_rc_bool, checked_rc, "");
1088 
1089  } else {
1091  this_ptr1, that_ptr1, init_val, chosen_bytes, ir_reduce_one_entry);
1092  if (target_info.agg_kind == kSAMPLE && target_info.sql_type.is_varlen()) {
1093  CHECK(this_ptr2 && that_ptr2);
1094  size_t length_to_elems{0};
1095  if (target_info.sql_type.is_geometry()) {
1096  // TODO: Assumes hard-coded sizes for geometry targets
1097  length_to_elems = target_slot_idx == first_slot_idx_for_target ? 1 : 4;
1098  } else {
1099  const auto& elem_ti = target_info.sql_type.get_elem_type();
1100  length_to_elems = target_info.sql_type.is_string() ? 1 : elem_ti.get_size();
1101  }
1102  const auto serialized_varlen_buffer_arg = ir_reduce_one_entry->arg(4);
1103  ir_reduce_one_entry->add<ExternalCall>(
1104  "serialized_varlen_buffer_sample",
1105  Type::Void,
1106  std::vector<const Value*>{
1107  serialized_varlen_buffer_arg,
1108  this_ptr1,
1109  this_ptr2,
1110  that_ptr1,
1111  that_ptr2,
1112  ir_reduce_one_entry->addConstant<ConstantInt>(init_val, Type::Int64),
1113  ir_reduce_one_entry->addConstant<ConstantInt>(length_to_elems,
1114  Type::Int64)},
1115  "");
1116  }
1117  }
1118 }
1119 
1121  Value* this_ptr2,
1122  Value* that_ptr1,
1123  Value* that_ptr2,
1124  const TargetInfo& target_info,
1125  const size_t target_logical_idx,
1126  const size_t target_slot_idx,
1127  const int64_t init_val,
1128  const int8_t chosen_bytes,
1129  Function* ir_reduce_one_entry) const {
1130  switch (target_info.agg_kind) {
1131  case kCOUNT:
1132  case kAPPROX_COUNT_DISTINCT: {
1133  if (is_distinct_target(target_info)) {
1134  CHECK_EQ(static_cast<size_t>(chosen_bytes), sizeof(int64_t));
1136  this_ptr1, that_ptr1, target_logical_idx, ir_reduce_one_entry);
1137  break;
1138  }
1139  CHECK_EQ(int64_t(0), init_val);
1140  emit_aggregate_one_count(this_ptr1, that_ptr1, chosen_bytes, ir_reduce_one_entry);
1141  break;
1142  }
1143  case kAPPROX_MEDIAN:
1144  CHECK_EQ(chosen_bytes, static_cast<int8_t>(sizeof(int64_t)));
1146  this_ptr1, that_ptr1, target_logical_idx, ir_reduce_one_entry);
1147  break;
1148  case kAVG: {
1149  // Ignore float argument compaction for count component for fear of its overflow
1150  emit_aggregate_one_count(this_ptr2,
1151  that_ptr2,
1152  query_mem_desc_.getPaddedSlotWidthBytes(target_slot_idx),
1153  ir_reduce_one_entry);
1154  }
1155  // fall thru
1156  case kSUM: {
1158  this_ptr1,
1159  that_ptr1,
1160  init_val,
1161  chosen_bytes,
1162  target_info,
1163  ir_reduce_one_entry);
1164  break;
1165  }
1166  case kMIN: {
1168  this_ptr1,
1169  that_ptr1,
1170  init_val,
1171  chosen_bytes,
1172  target_info,
1173  ir_reduce_one_entry);
1174  break;
1175  }
1176  case kMAX: {
1178  this_ptr1,
1179  that_ptr1,
1180  init_val,
1181  chosen_bytes,
1182  target_info,
1183  ir_reduce_one_entry);
1184  break;
1185  }
1186  default:
1187  LOG(FATAL) << "Invalid aggregate type";
1188  }
1189 }
1190 
1192  Value* this_ptr1,
1193  Value* that_ptr1,
1194  const size_t target_logical_idx,
1195  Function* ir_reduce_one_entry) const {
1197  const auto old_set_handle = emit_load_i64(this_ptr1, ir_reduce_one_entry);
1198  const auto new_set_handle = emit_load_i64(that_ptr1, ir_reduce_one_entry);
1199  const auto this_qmd_arg = ir_reduce_one_entry->arg(2);
1200  const auto that_qmd_arg = ir_reduce_one_entry->arg(3);
1201  ir_reduce_one_entry->add<ExternalCall>(
1202  "count_distinct_set_union_jit_rt",
1203  Type::Void,
1204  std::vector<const Value*>{
1205  new_set_handle,
1206  old_set_handle,
1207  that_qmd_arg,
1208  this_qmd_arg,
1209  ir_reduce_one_entry->addConstant<ConstantInt>(target_logical_idx, Type::Int64)},
1210  "");
1211 }
1212 
1214  Value* this_ptr1,
1215  Value* that_ptr1,
1216  const size_t target_logical_idx,
1217  Function* ir_reduce_one_entry) const {
1219  const auto old_set_handle = emit_load_i64(this_ptr1, ir_reduce_one_entry);
1220  const auto new_set_handle = emit_load_i64(that_ptr1, ir_reduce_one_entry);
1221  const auto this_qmd_arg = ir_reduce_one_entry->arg(2);
1222  const auto that_qmd_arg = ir_reduce_one_entry->arg(3);
1223  ir_reduce_one_entry->add<ExternalCall>(
1224  "approx_median_jit_rt",
1225  Type::Void,
1226  std::vector<const Value*>{
1227  new_set_handle,
1228  old_set_handle,
1229  that_qmd_arg,
1230  this_qmd_arg,
1231  ir_reduce_one_entry->addConstant<ConstantInt>(target_logical_idx, Type::Int64)},
1232  "");
1233 }
1234 
1236  ReductionCode reduction_code,
1237  const llvm::Function* ir_is_empty,
1238  const llvm::Function* ir_reduce_one_entry,
1239  const llvm::Function* ir_reduce_one_entry_idx,
1240  const CodeCacheKey& key) const {
1241  CompilationOptions co{
1243 
1244 #ifdef NDEBUG
1245  LOG(IR) << "Reduction Loop:\n"
1246  << serialize_llvm_object(reduction_code.llvm_reduce_loop);
1247  LOG(IR) << "Reduction Is Empty Func:\n" << serialize_llvm_object(ir_is_empty);
1248  LOG(IR) << "Reduction One Entry Func:\n" << serialize_llvm_object(ir_reduce_one_entry);
1249  LOG(IR) << "Reduction One Entry Idx Func:\n"
1250  << serialize_llvm_object(ir_reduce_one_entry_idx);
1251 #else
1252  LOG(IR) << serialize_llvm_object(reduction_code.cgen_state->module_);
1253 #endif
1254 
1255  reduction_code.module.release();
1257  reduction_code.llvm_reduce_loop, {reduction_code.llvm_reduce_loop}, co);
1258  reduction_code.func_ptr = reinterpret_cast<ReductionCode::FuncPtr>(
1259  ee->getPointerToFunction(reduction_code.llvm_reduce_loop));
1260 
1261  auto cpu_compilation_context = std::make_shared<CpuCompilationContext>(std::move(ee));
1262  cpu_compilation_context->setFunctionPointer(reduction_code.llvm_reduce_loop);
1263  reduction_code.compilation_context = cpu_compilation_context;
1265  reduction_code.compilation_context,
1266  reduction_code.llvm_reduce_loop->getParent(),
1267  s_code_cache);
1268  return reduction_code;
1269 }
1270 
1271 namespace {
1272 
1273 std::string target_info_key(const TargetInfo& target_info) {
1274  return std::to_string(target_info.is_agg) + "\n" +
1275  std::to_string(target_info.agg_kind) + "\n" +
1276  target_info.sql_type.get_type_name() + "\n" +
1277  std::to_string(target_info.sql_type.get_notnull()) + "\n" +
1278  target_info.agg_arg_type.get_type_name() + "\n" +
1279  std::to_string(target_info.agg_arg_type.get_notnull()) + "\n" +
1280  std::to_string(target_info.skip_null_val) + "\n" +
1281  std::to_string(target_info.is_distinct);
1282 }
1283 
1284 } // namespace
1285 
1286 std::string ResultSetReductionJIT::cacheKey() const {
1287  std::vector<std::string> target_init_vals_strings;
1288  std::transform(target_init_vals_.begin(),
1289  target_init_vals_.end(),
1290  std::back_inserter(target_init_vals_strings),
1291  [](const int64_t v) { return std::to_string(v); });
1292  const auto target_init_vals_key =
1293  boost::algorithm::join(target_init_vals_strings, ", ");
1294  std::vector<std::string> targets_strings;
1295  std::transform(
1296  targets_.begin(),
1297  targets_.end(),
1298  std::back_inserter(targets_strings),
1299  [](const TargetInfo& target_info) { return target_info_key(target_info); });
1300  const auto targets_key = boost::algorithm::join(targets_strings, ", ");
1301  return query_mem_desc_.reductionKey() + "\n" + target_init_vals_key + "\n" +
1302  targets_key;
1303 }
1304 
1306  const auto hash_type = query_mem_desc_.getQueryDescriptionType();
1307  auto reduction_code = setup_functions_ir(hash_type);
1309  isEmpty(reduction_code);
1310  reduceOneEntryNoCollisions(reduction_code);
1311  reduceOneEntryNoCollisionsIdx(reduction_code);
1312  reduceLoop(reduction_code);
1313  reduction_code.cgen_state.reset(new CgenState({}, false));
1314  auto cgen_state = reduction_code.cgen_state.get();
1315  std::unique_ptr<llvm::Module> module(runtime_module_shallow_copy(cgen_state));
1316 
1317  cgen_state->module_ = module.get();
1318  AUTOMATIC_IR_METADATA(cgen_state);
1319  auto ir_is_empty = create_llvm_function(reduction_code.ir_is_empty.get(), cgen_state);
1320  auto ir_reduce_one_entry =
1321  create_llvm_function(reduction_code.ir_reduce_one_entry.get(), cgen_state);
1322  auto ir_reduce_one_entry_idx =
1323  create_llvm_function(reduction_code.ir_reduce_one_entry_idx.get(), cgen_state);
1324  auto ir_reduce_loop =
1325  create_llvm_function(reduction_code.ir_reduce_loop.get(), cgen_state);
1326  std::unordered_map<const Function*, llvm::Function*> f;
1327  f.emplace(reduction_code.ir_is_empty.get(), ir_is_empty);
1328  f.emplace(reduction_code.ir_reduce_one_entry.get(), ir_reduce_one_entry);
1329  f.emplace(reduction_code.ir_reduce_one_entry_idx.get(), ir_reduce_one_entry_idx);
1330  f.emplace(reduction_code.ir_reduce_loop.get(), ir_reduce_loop);
1331  translate_function(reduction_code.ir_is_empty.get(), ir_is_empty, reduction_code, f);
1333  reduction_code.ir_reduce_one_entry.get(), ir_reduce_one_entry, reduction_code, f);
1334  translate_function(reduction_code.ir_reduce_one_entry_idx.get(),
1335  ir_reduce_one_entry_idx,
1336  reduction_code,
1337  f);
1339  reduction_code.ir_reduce_loop.get(), ir_reduce_loop, reduction_code, f);
1340  reduction_code.llvm_reduce_loop = ir_reduce_loop;
1341  reduction_code.module = std::move(module);
1342  return reduction_code;
1343 }
GroupValueInfo get_group_value_reduction(int64_t *groups_buffer, const uint32_t groups_buffer_entry_count, const int64_t *key, const uint32_t key_count, const size_t key_width, const QueryMemoryDescriptor &query_mem_desc, const int64_t *that_buff_i64, const size_t that_entry_idx, const size_t that_entry_count, const uint32_t row_size_quad)
void emit_aggregate_one_nullable_value(const std::string &agg_kind, Value *val_ptr, Value *other_ptr, const int64_t init_val, const size_t chosen_bytes, const TargetInfo &agg_info, Function *ir_reduce_one_entry)
std::unique_ptr< CgenState > cgen_state
void clear()
Definition: LruCache.hpp:60
#define CHECK_EQ(x, y)
Definition: Logger.h:205
Value * add(Args &&... args)
void reduceOneEntryBaseline(const ReductionCode &reduction_code) const
bool is_aggregate_query(const QueryDescriptionType hash_type)
void isEmpty(const ReductionCode &reduction_code) const
void reduceOneEntryNoCollisionsIdx(const ReductionCode &reduction_code) const
void count_distinct_set_union(const int64_t new_set_handle, const int64_t old_set_handle, const CountDistinctDescriptor &new_count_distinct_desc, const CountDistinctDescriptor &old_count_distinct_desc)
#define EMPTY_KEY_64
void count_distinct_set_union_jit_rt(const int64_t new_set_handle, const int64_t old_set_handle, const void *that_qmd_handle, const void *this_qmd_handle, const int64_t target_logical_idx)
bool is_string() const
Definition: sqltypes.h:478
std::unique_ptr< llvm::Module > runtime_module_shallow_copy(CgenState *cgen_state)
static ExecutionEngineWrapper generateNativeCPUCode(llvm::Function *func, const std::unordered_set< llvm::Function *> &live_funcs, const CompilationOptions &co)
void reduceOneApproxMedianSlot(Value *this_ptr1, Value *that_ptr1, const size_t target_logical_idx, Function *ir_reduce_one_entry) const
void serialized_varlen_buffer_sample(const void *serialized_varlen_buffer_handle, int8_t *this_ptr1, int8_t *this_ptr2, const int8_t *that_ptr1, const int8_t *that_ptr2, const int64_t init_val, const int64_t length_to_elems)
void reduceOneSlot(Value *this_ptr1, Value *this_ptr2, Value *that_ptr1, Value *that_ptr2, const TargetInfo &target_info, const size_t target_logical_idx, const size_t target_slot_idx, const size_t init_agg_val_idx, const size_t first_slot_idx_for_target, Function *ir_reduce_one_entry) const
std::shared_ptr< CompilationContext > compilation_context
void varlen_buffer_sample(int8_t *this_ptr1, int8_t *this_ptr2, const int8_t *that_ptr1, const int8_t *that_ptr2, const int64_t init_val)
std::unique_ptr< Function > ir_reduce_loop
Value * emit_read_int_from_buff(Value *ptr, const int8_t compact_sz, Function *function)
SQLTypeInfo sql_type
Definition: TargetInfo.h:42
#define LOG(tag)
Definition: Logger.h:188
void mark_function_always_inline(llvm::Function *func)
void get_group_value_reduction_rt(int8_t *groups_buffer, const int8_t *key, const uint32_t key_count, const void *this_qmd_handle, const int8_t *that_buff, const uint32_t that_entry_idx, const uint32_t that_entry_count, const uint32_t row_size_bytes, int64_t **buff_out, uint8_t *empty)
std::string join(T const &container, std::string const &delim)
bool dynamic_watchdog()
size_t getCountDistinctDescriptorsSize() const
llvm::Function * llvm_reduce_loop
#define CHECK_GE(x, y)
Definition: Logger.h:210
void reduceOneEntryTargetsNoCollisions(Function *ir_reduce_one_entry, Value *this_targets_start_ptr, Value *that_targets_start_ptr) const
std::vector< std::string > CodeCacheKey
Definition: CodeCache.h:25
std::unique_ptr< llvm::Module > g_rt_module
size_t get_slot_off_quad(const QueryMemoryDescriptor &query_mem_desc)
std::unique_ptr< Function > ir_reduce_one_entry
bool g_enable_dynamic_watchdog
Definition: Execute.cpp:77
const std::vector< int64_t > target_init_vals_
bool takes_float_argument(const TargetInfo &target_info)
Definition: TargetInfo.h:134
bool is_varlen() const
Definition: sqltypes.h:500
bool skip_null_val
Definition: TargetInfo.h:44
const Value * emit_checked_write_projection(Value *slot_pi8, Value *other_pi8, const int64_t init_val, const size_t chosen_bytes, Function *ir_reduce_one_entry)
std::unique_ptr< Function > setup_reduce_one_entry_idx(ReductionCode *reduction_code)
name
Definition: setup.py:35
int32_t(*)(int8_t *this_buff, const int8_t *that_buff, const int32_t start_entry_index, const int32_t end_entry_index, const int32_t that_entry_count, const void *this_qmd, const void *that_qmd, const void *serialized_varlen_buffer) FuncPtr
const QueryMemoryDescriptor query_mem_desc_
std::unique_ptr< Function > ir_is_empty
virtual ReductionCode codegen() const
std::string to_string(char const *&&v)
SQLTypeInfo agg_arg_type
Definition: TargetInfo.h:43
const int8_t getPaddedSlotWidthBytes(const size_t slot_idx) const
void emit_aggregate_one_value(const std::string &agg_kind, Value *val_ptr, Value *other_ptr, const size_t chosen_bytes, const TargetInfo &agg_info, Function *ir_reduce_one_entry)
const Executor * getExecutor() const
Value * emit_load_i32(Value *ptr, Function *function)
Definition: sqldefs.h:73
const SQLTypeInfo get_compact_type(const TargetInfo &target)
HOST DEVICE bool get_notnull() const
Definition: sqltypes.h:318
int8_t get_width_for_slot(const size_t target_slot_idx, const bool float_argument_input, const QueryMemoryDescriptor &query_mem_desc)
llvm::Module * module_
Definition: CgenState.h:322
ResultSetReductionJIT(const QueryMemoryDescriptor &query_mem_desc, const std::vector< TargetInfo > &targets, const std::vector< int64_t > &target_init_vals)
llvm::LLVMContext & context_
Definition: CgenState.h:331
size_t get_byteoff_of_slot(const size_t slot_idx, const QueryMemoryDescriptor &query_mem_desc)
bool is_agg
Definition: TargetInfo.h:40
size_t advance_slot(const size_t j, const TargetInfo &target_info, const bool separate_varlen_storage)
ReductionCode finalizeReductionCode(ReductionCode reduction_code, const llvm::Function *ir_is_empty, const llvm::Function *ir_reduce_one_entry, const llvm::Function *ir_reduce_one_entry_idx, const CodeCacheKey &key) const
void generate_loop_body(For *for_loop, Function *ir_reduce_loop, Function *ir_reduce_one_entry_idx, Value *this_buff, Value *that_buff, Value *start_index, Value *that_entry_count, Value *this_qmd_handle, Value *that_qmd_handle, Value *serialized_varlen_buffer)
void emit_write_projection(Value *slot_pi8, Value *other_pi8, const int64_t init_val, const size_t chosen_bytes, Function *ir_reduce_one_entry)
void reduceOneEntryNoCollisions(const ReductionCode &reduction_code) const
void translate_function(const Function *function, llvm::Function *llvm_function, const ReductionCode &reduction_code, const std::unordered_map< const Function *, llvm::Function *> &f)
DEVICE void mergeTDigest(TDigest &t_digest)
Definition: quantile.h:223
uint8_t check_watchdog_rt(const size_t sample_seed)
Definition: sqldefs.h:75
static std::mutex s_reduction_mutex
bool is_distinct_target(const TargetInfo &target_info)
Definition: TargetInfo.h:130
std::string target_info_key(const TargetInfo &target_info)
std::unique_ptr< Function > ir_reduce_one_entry_idx
ReductionCode setup_functions_ir(const QueryDescriptionType hash_type)
#define AUTOMATIC_IR_METADATA(CGENSTATE)
size_t targetGroupbyIndicesSize() const
std::unique_ptr< Function > setup_is_empty_entry(ReductionCode *reduction_code)
SQLAgg agg_kind
Definition: TargetInfo.h:41
static void addCodeToCache(const CodeCacheKey &, std::shared_ptr< CompilationContext >, llvm::Module *, CodeCache &)
#define AUTOMATIC_IR_METADATA_DONE()
#define UNLIKELY(x)
Definition: likely.h:25
llvm::Type * llvm_type(const Type type, llvm::LLVMContext &ctx)
int32_t getTargetIdxForKey() const
void reduceOneCountDistinctSlot(Value *this_ptr1, Value *that_ptr1, const size_t target_logical_idx, Function *ir_reduce_one_entry) const
#define CHECK_LT(x, y)
Definition: Logger.h:207
bool is_geometry() const
Definition: sqltypes.h:490
std::string serialize_llvm_object(const T *llvm_obj)
std::string get_type_name() const
Definition: sqltypes.h:414
size_t get_row_bytes(const QueryMemoryDescriptor &query_mem_desc)
int64_t getTargetGroupbyIndex(const size_t target_idx) const
void reduceOneEntryBaselineIdx(const ReductionCode &reduction_code) const
void approx_median_jit_rt(const int64_t new_set_handle, const int64_t old_set_handle, const void *that_qmd_handle, const void *this_qmd_handle, const int64_t target_logical_idx)
Definition: sqldefs.h:76
std::unique_ptr< Function > create_function(const std::string name, const std::vector< Function::NamedArg > &arg_types, const Type ret_type, const bool always_inline)
std::string reductionKey() const
std::unique_ptr< Function > setup_reduce_one_entry(ReductionCode *reduction_code, const QueryDescriptionType hash_type)
Value * emit_load_i64(Value *ptr, Function *function)
SQLTypeInfo get_elem_type() const
Definition: sqltypes.h:697
#define CHECK(condition)
Definition: Logger.h:197
const ColSlotContext & getColSlotContext() const
#define EMPTY_KEY_32
std::unique_ptr< llvm::Module > module
QueryDescriptionType
Definition: Types.h:26
const Value * iter() const
const CountDistinctDescriptor & getCountDistinctDescriptor(const size_t idx) const
Value * emit_load(Value *ptr, Type ptr_type, Function *function)
value_t * get(const key_t &key)
Definition: LruCache.hpp:39
llvm::Function * create_llvm_function(const Function *function, CgenState *cgen_state)
bool is_distinct
Definition: TargetInfo.h:45
void reduceLoop(const ReductionCode &reduction_code) const
void emit_aggregate_one_count(Value *val_ptr, Value *other_ptr, const size_t chosen_bytes, Function *ir_reduce_one_entry)
QueryDescriptionType getQueryDescriptionType() const
Definition: sqldefs.h:74
virtual ReductionCode codegen() const
Definition: sqldefs.h:72
std::unique_ptr< Function > setup_reduce_loop(ReductionCode *reduction_code)
size_t get_key_bytes_rowwise(const QueryMemoryDescriptor &query_mem_desc)
FORCE_INLINE HOST DEVICE T align_to_int64(T addr)
const std::string & label() const
size_t getEffectiveKeyWidth() const
void reduceOneAggregateSlot(Value *this_ptr1, Value *this_ptr2, Value *that_ptr1, Value *that_ptr2, const TargetInfo &target_info, const size_t target_logical_idx, const size_t target_slot_idx, const int64_t init_val, const int8_t chosen_bytes, Function *ir_reduce_one_entry) const
const std::vector< TargetInfo > targets_