OmniSciDB  5ade3759e0
ResultSetReductionJIT.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2019 OmniSci, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "ResultSetReductionJIT.h"
18 
19 #include "CodeGenerator.h"
20 #include "DynamicWatchdog.h"
21 #include "Execute.h"
22 #include "IRCodegenUtils.h"
24 
25 #include "Shared/likely.h"
26 #include "Shared/mapdpath.h"
27 
28 #include <llvm/Bitcode/BitcodeReader.h>
29 #include <llvm/ExecutionEngine/Interpreter.h>
30 #include <llvm/IR/Function.h>
31 #include <llvm/IR/IRBuilder.h>
32 #include <llvm/IR/Verifier.h>
33 #include <llvm/Support/SourceMgr.h>
34 #include <llvm/Support/raw_os_ostream.h>
35 #include <llvm/Transforms/Utils/Cloning.h>
36 
37 extern std::unique_ptr<llvm::Module> g_rt_module;
38 
40 
42 
43 namespace {
44 
45 // Error code to be returned when the watchdog timer triggers during the reduction.
46 const int32_t WATCHDOG_ERROR{-1};
47 // Use the LLVM interpreter, not the JIT, for a number of entries lower than the
48 // threshold.
49 const size_t INTERP_THRESHOLD{50};
50 
51 // Make a shallow copy (just declarations) of the runtime module. Function definitions are
52 // cloned only if they're used from the generated code.
53 std::unique_ptr<llvm::Module> runtime_module_shallow_copy(CgenState* cgen_state) {
54  return llvm::CloneModule(
55 #if LLVM_VERSION_MAJOR >= 7
56  *g_rt_module.get(),
57 #else
58  g_rt_module.get(),
59 #endif
60  cgen_state->vmap_,
61  [](const llvm::GlobalValue* gv) {
62  auto func = llvm::dyn_cast<llvm::Function>(gv);
63  if (!func) {
64  return true;
65  }
66  return (func->getLinkage() == llvm::GlobalValue::LinkageTypes::PrivateLinkage ||
67  func->getLinkage() == llvm::GlobalValue::LinkageTypes::InternalLinkage);
68  });
69 }
70 
71 // Load the value stored at 'ptr' as the given 'loaded_type'.
72 llvm::Value* emit_load(llvm::Value* ptr, llvm::Type* loaded_type, CgenState* cgen_state) {
73  return cgen_state->ir_builder_.CreateLoad(
74  cgen_state->ir_builder_.CreateBitCast(ptr, loaded_type),
75  ptr->getName() + "_loaded");
76 }
77 
78 // Load the value stored at 'ptr' as a 32-bit signed integer.
79 llvm::Value* emit_load_i32(llvm::Value* ptr, CgenState* cgen_state) {
80  const auto pi32_type = llvm::Type::getInt32PtrTy(cgen_state->context_);
81  return emit_load(ptr, pi32_type, cgen_state);
82 }
83 
84 // Load the value stored at 'ptr' as a 64-bit signed integer.
85 llvm::Value* emit_load_i64(llvm::Value* ptr, CgenState* cgen_state) {
86  const auto pi64_type = llvm::Type::getInt64PtrTy(cgen_state->context_);
87  return emit_load(ptr, pi64_type, cgen_state);
88 }
89 
90 // Read a 32- or 64-bit integer stored at 'ptr' and sign extend to 64-bit.
91 llvm::Value* emit_read_int_from_buff(llvm::Value* ptr,
92  const int8_t compact_sz,
93  CgenState* cgen_state) {
94  switch (compact_sz) {
95  case 8: {
96  return emit_load_i64(ptr, cgen_state);
97  }
98  case 4: {
99  const auto loaded_val = emit_load_i32(ptr, cgen_state);
100  auto& ctx = cgen_state->context_;
101  const auto i64_type = get_int_type(64, ctx);
102  return cgen_state->ir_builder_.CreateSExt(loaded_val, i64_type);
103  }
104  default: {
105  LOG(FATAL) << "Invalid byte width: " << compact_sz;
106  return nullptr;
107  }
108  }
109 }
110 
111 // Emit a runtime call to accumulate into the 'val_ptr' byte address the 'other_ptr'
112 // value when the type is specified as not null.
113 void emit_aggregate_one_value(const std::string& agg_kind,
114  llvm::Value* val_ptr,
115  llvm::Value* other_ptr,
116  const size_t chosen_bytes,
117  const TargetInfo& agg_info,
118  CgenState* cgen_state) {
119  const auto sql_type = get_compact_type(agg_info);
120  const auto pi32_type = llvm::Type::getInt32PtrTy(cgen_state->context_);
121  const auto pi64_type = llvm::Type::getInt64PtrTy(cgen_state->context_);
122  const auto pf32_type = llvm::Type::getFloatPtrTy(cgen_state->context_);
123  const auto pf64_type = llvm::Type::getDoublePtrTy(cgen_state->context_);
124  const auto dest_name = agg_kind + "_dest";
125  if (sql_type.is_fp()) {
126  if (chosen_bytes == sizeof(float)) {
127  const auto agg =
128  cgen_state->ir_builder_.CreateBitCast(val_ptr, pi32_type, dest_name);
129  const auto val = emit_load(other_ptr, pf32_type, cgen_state);
130  cgen_state->emitCall("agg_" + agg_kind + "_float", {agg, val});
131  } else {
132  CHECK_EQ(chosen_bytes, sizeof(double));
133  const auto agg =
134  cgen_state->ir_builder_.CreateBitCast(val_ptr, pi64_type, dest_name);
135  const auto val = emit_load(other_ptr, pf64_type, cgen_state);
136  cgen_state->emitCall("agg_" + agg_kind + "_double", {agg, val});
137  }
138  } else {
139  if (chosen_bytes == sizeof(int32_t)) {
140  const auto agg =
141  cgen_state->ir_builder_.CreateBitCast(val_ptr, pi32_type, dest_name);
142  const auto val = emit_load(other_ptr, pi32_type, cgen_state);
143  cgen_state->emitCall("agg_" + agg_kind + "_int32", {agg, val});
144  } else {
145  CHECK_EQ(chosen_bytes, sizeof(int64_t));
146  const auto agg =
147  cgen_state->ir_builder_.CreateBitCast(val_ptr, pi64_type, dest_name);
148  const auto val = emit_load(other_ptr, pi64_type, cgen_state);
149  cgen_state->emitCall("agg_" + agg_kind, {agg, val});
150  }
151  }
152 }
153 
154 // Same as above, but support nullable types as well.
155 void emit_aggregate_one_nullable_value(const std::string& agg_kind,
156  llvm::Value* val_ptr,
157  llvm::Value* other_ptr,
158  const int64_t init_val,
159  const size_t chosen_bytes,
160  const TargetInfo& agg_info,
161  CgenState* cgen_state) {
162  const auto dest_name = agg_kind + "_dest";
163  if (agg_info.skip_null_val) {
164  const auto pi32_type = llvm::Type::getInt32PtrTy(cgen_state->context_);
165  const auto pi64_type = llvm::Type::getInt64PtrTy(cgen_state->context_);
166  const auto pf32_type = llvm::Type::getFloatPtrTy(cgen_state->context_);
167  const auto pf64_type = llvm::Type::getDoublePtrTy(cgen_state->context_);
168  const auto sql_type = get_compact_type(agg_info);
169  if (sql_type.is_fp()) {
170  if (chosen_bytes == sizeof(float)) {
171  const auto agg =
172  cgen_state->ir_builder_.CreateBitCast(val_ptr, pi32_type, dest_name);
173  const auto val = emit_load(other_ptr, pf32_type, cgen_state);
174  const auto init_val_lv =
175  cgen_state->llFp(*reinterpret_cast<const float*>(may_alias_ptr(&init_val)));
176  cgen_state->emitCall("agg_" + agg_kind + "_float_skip_val",
177  {agg, val, init_val_lv});
178  } else {
179  CHECK_EQ(chosen_bytes, sizeof(double));
180  const auto agg =
181  cgen_state->ir_builder_.CreateBitCast(val_ptr, pi64_type, dest_name);
182  const auto val = emit_load(other_ptr, pf64_type, cgen_state);
183  const auto init_val_lv =
184  cgen_state->llFp(*reinterpret_cast<const double*>(may_alias_ptr(&init_val)));
185  cgen_state->emitCall("agg_" + agg_kind + "_double_skip_val",
186  {agg, val, init_val_lv});
187  }
188  } else {
189  if (chosen_bytes == sizeof(int32_t)) {
190  const auto agg =
191  cgen_state->ir_builder_.CreateBitCast(val_ptr, pi32_type, dest_name);
192  const auto val = emit_load(other_ptr, pi32_type, cgen_state);
193  const auto init_val_lv = cgen_state->llInt<int32_t>(init_val);
194  cgen_state->emitCall("agg_" + agg_kind + "_int32_skip_val",
195  {agg, val, init_val_lv});
196  } else {
197  CHECK_EQ(chosen_bytes, sizeof(int64_t));
198  const auto agg =
199  cgen_state->ir_builder_.CreateBitCast(val_ptr, pi64_type, dest_name);
200  const auto val = emit_load(other_ptr, pi64_type, cgen_state);
201  const auto init_val_lv = cgen_state->llInt<int64_t>(init_val);
202  cgen_state->emitCall("agg_" + agg_kind + "_skip_val", {agg, val, init_val_lv});
203  }
204  }
205  } else {
207  agg_kind, val_ptr, other_ptr, chosen_bytes, agg_info, cgen_state);
208  }
209 }
210 
211 // Emit code to accumulate the 'other_ptr' count into the 'val_ptr' destination.
212 void emit_aggregate_one_count(llvm::Value* val_ptr,
213  llvm::Value* other_ptr,
214  const size_t chosen_bytes,
215  CgenState* cgen_state) {
216  const auto dest_name = "count_dest";
217  if (chosen_bytes == sizeof(int32_t)) {
218  const auto pi32_type = llvm::Type::getInt32PtrTy(cgen_state->context_);
219  const auto agg = cgen_state->ir_builder_.CreateBitCast(val_ptr, pi32_type, dest_name);
220  const auto val = emit_load(other_ptr, pi32_type, cgen_state);
221  cgen_state->emitCall("agg_sum_int32", {agg, val});
222  } else {
223  CHECK_EQ(chosen_bytes, sizeof(int64_t));
224  const auto pi64_type = llvm::Type::getInt64PtrTy(cgen_state->context_);
225  const auto agg = cgen_state->ir_builder_.CreateBitCast(val_ptr, pi64_type, dest_name);
226  const auto val = emit_load(other_ptr, pi64_type, cgen_state);
227  cgen_state->emitCall("agg_sum", {agg, val});
228  }
229 }
230 
231 // Emit code to load the value stored at the 'other_pi8' as an integer of the given width
232 // 'chosen_bytes' and write it to the 'slot_pi8' destination only if necessary (the
233 // existing value at destination is the initialization value).
234 void emit_write_projection(llvm::Value* slot_pi8,
235  llvm::Value* other_pi8,
236  const int64_t init_val,
237  const size_t chosen_bytes,
238  CgenState* cgen_state) {
239  const auto func_name = "write_projection_int" + std::to_string(chosen_bytes * 8);
240  if (chosen_bytes == sizeof(int32_t)) {
241  const auto proj_val = emit_load_i32(other_pi8, cgen_state);
242  cgen_state->emitCall(func_name,
243  {slot_pi8, proj_val, cgen_state->llInt<int64_t>(init_val)});
244  } else {
245  CHECK_EQ(chosen_bytes, sizeof(int64_t));
246  const auto proj_val = emit_load_i64(other_pi8, cgen_state);
247  cgen_state->emitCall(func_name,
248  {slot_pi8, proj_val, cgen_state->llInt<int64_t>(init_val)});
249  }
250 }
251 
252 // Create the declaration for the 'is_empty_entry' function. Use private linkage since
253 // it's a helper only called from the generated code and mark it as always inline.
254 llvm::Function* setup_is_empty_entry(const CgenState* cgen_state) {
255  auto& ctx = cgen_state->context_;
256  const auto pi8_type = llvm::PointerType::get(get_int_type(8, ctx), 0);
257  const auto func_type = llvm::FunctionType::get(get_int_type(1, ctx), {pi8_type}, false);
258  auto func = llvm::Function::Create(
259  func_type, llvm::Function::PrivateLinkage, "is_empty_entry", cgen_state->module_);
260  const auto arg_it = func->arg_begin();
261  const auto row_ptr_arg = &*arg_it;
262  row_ptr_arg->setName("row_ptr");
264  return func;
265 }
266 
267 // Create the declaration for the 'reduce_one_entry' helper.
268 llvm::Function* setup_reduce_one_entry(const CgenState* cgen_state,
269  const QueryDescriptionType hash_type) {
270  auto& ctx = cgen_state->context_;
271  const auto pi8_type = llvm::PointerType::get(get_int_type(8, ctx), 0);
272  const auto pvoid_type = llvm::PointerType::get(llvm::Type::getVoidTy(ctx), 0);
273  const auto func_type =
274  llvm::FunctionType::get(llvm::Type::getVoidTy(ctx),
275  {pi8_type, pi8_type, pvoid_type, pvoid_type, pvoid_type},
276  false);
277  const auto func = llvm::Function::Create(
278  func_type, llvm::Function::PrivateLinkage, "reduce_one_entry", cgen_state->module_);
279  const auto arg_it = func->arg_begin();
280  switch (hash_type) {
282  const auto this_targets_ptr_arg = &*arg_it;
283  const auto that_targets_ptr_arg = &*(arg_it + 1);
284  this_targets_ptr_arg->setName("this_targets_ptr");
285  that_targets_ptr_arg->setName("that_targets_ptr");
286  break;
287  }
289  const auto this_row_ptr_arg = &*arg_it;
290  const auto that_row_ptr_arg = &*(arg_it + 1);
291  this_row_ptr_arg->setName("this_row_ptr");
292  that_row_ptr_arg->setName("that_row_ptr");
293  break;
294  }
295  default: {
296  LOG(FATAL) << "Unexpected query description type";
297  }
298  }
299  const auto this_qmd_arg = &*(arg_it + 2);
300  const auto that_qmd_arg = &*(arg_it + 3);
301  const auto serialized_varlen_buffer_arg = &*(arg_it + 4);
302  this_qmd_arg->setName("this_qmd");
303  that_qmd_arg->setName("that_qmd");
304  serialized_varlen_buffer_arg->setName("serialized_varlen_buffer_arg");
306  return func;
307 }
308 
309 // Create the declaration for the 'reduce_one_entry_idx' helper.
310 llvm::Function* setup_reduce_one_entry_idx(const CgenState* cgen_state) {
311  auto& ctx = cgen_state->context_;
312  const auto pi8_type = llvm::PointerType::get(get_int_type(8, ctx), 0);
313  const auto i32_type = get_int_type(32, ctx);
314  const auto pvoid_type = llvm::PointerType::get(llvm::Type::getVoidTy(ctx), 0);
315  const auto func_type = llvm::FunctionType::get(
316  llvm::Type::getVoidTy(ctx),
317  {pi8_type, pi8_type, i32_type, i32_type, pvoid_type, pvoid_type, pvoid_type},
318  false);
319  auto func = llvm::Function::Create(func_type,
320  llvm::Function::PrivateLinkage,
321  "reduce_one_entry_idx",
322  cgen_state->module_);
323  const auto arg_it = func->arg_begin();
324  const auto this_buff_arg = &*arg_it;
325  const auto that_buff_arg = &*(arg_it + 1);
326  const auto that_entry_idx_arg = &*(arg_it + 2);
327  const auto that_entry_count_arg = &*(arg_it + 3);
328  const auto this_qmd_handle_arg = &*(arg_it + 4);
329  const auto that_qmd_handle_arg = &*(arg_it + 5);
330  const auto serialized_varlen_buffer_arg = &*(arg_it + 6);
331  this_buff_arg->setName("this_buff");
332  that_buff_arg->setName("that_buff");
333  that_entry_idx_arg->setName("that_entry_idx");
334  that_entry_count_arg->setName("that_entry_count");
335  this_qmd_handle_arg->setName("this_qmd_handle");
336  that_qmd_handle_arg->setName("that_qmd_handle");
337  serialized_varlen_buffer_arg->setName("serialized_varlen_buffer");
339  return func;
340 }
341 
342 // Create the declaration for the 'reduce_loop' entry point. Use external linkage, this is
343 // the public API of the generated code directly used from result set reduction.
344 llvm::Function* setup_reduce_loop(const CgenState* cgen_state) {
345  auto& ctx = cgen_state->context_;
346  const auto pi8_type = llvm::PointerType::get(get_int_type(8, ctx), 0);
347  const auto i32_type = get_int_type(32, ctx);
348  const auto pvoid_type = llvm::PointerType::get(llvm::Type::getVoidTy(ctx), 0);
349  const auto func_type = llvm::FunctionType::get(i32_type,
350  {pi8_type,
351  pi8_type,
352  i32_type,
353  i32_type,
354  i32_type,
355  pvoid_type,
356  pvoid_type,
357  pvoid_type},
358  false);
359  auto func = llvm::Function::Create(
360  func_type, llvm::Function::ExternalLinkage, "reduce_loop", cgen_state->module_);
361  const auto arg_it = func->arg_begin();
362  const auto this_buff_arg = &*arg_it;
363  const auto that_buff_arg = &*(arg_it + 1);
364  const auto start_index_arg = &*(arg_it + 2);
365  const auto end_index_arg = &*(arg_it + 3);
366  const auto that_entry_count_arg = &*(arg_it + 4);
367  const auto this_qmd_handle_arg = &*(arg_it + 5);
368  const auto that_qmd_handle_arg = &*(arg_it + 6);
369  const auto serialized_varlen_buffer_arg = &*(arg_it + 7);
370  this_buff_arg->setName("this_buff");
371  that_buff_arg->setName("that_buff");
372  start_index_arg->setName("start_index");
373  end_index_arg->setName("end_index");
374  that_entry_count_arg->setName("that_entry_count");
375  this_qmd_handle_arg->setName("this_qmd_handle");
376  that_qmd_handle_arg->setName("that_qmd_handle");
377  serialized_varlen_buffer_arg->setName("serialized_varlen_buffer");
378  return func;
379 }
380 
381 // Setup the reduction function and helpers declarations, create a module and a code
382 // generation state object.
384  ReductionCode reduction_code{};
385  reduction_code.cgen_state.reset(new CgenState({}, false));
386  auto cgen_state = reduction_code.cgen_state.get();
387  std::unique_ptr<llvm::Module> module(runtime_module_shallow_copy(cgen_state));
388  cgen_state->module_ = module.get();
389  reduction_code.ir_is_empty = setup_is_empty_entry(cgen_state);
390  reduction_code.ir_reduce_one_entry = setup_reduce_one_entry(cgen_state, hash_type);
391  reduction_code.ir_reduce_one_entry_idx = setup_reduce_one_entry_idx(cgen_state);
392  reduction_code.ir_reduce_loop = setup_reduce_loop(cgen_state);
393  reduction_code.module = std::move(module);
394  return reduction_code;
395 }
396 
397 // When the number of entries is below 'INTERP_THRESHOLD', run the generated function in
398 // its IR form, without compiling to native code.
400  auto module = func->getParent();
401 
402  llvm::ExecutionEngine* execution_engine{nullptr};
403 
404  std::string err_str;
405  std::unique_ptr<llvm::Module> owner(module);
406  llvm::EngineBuilder eb(std::move(owner));
407  eb.setErrorStr(&err_str);
408  eb.setEngineKind(llvm::EngineKind::Interpreter);
409  execution_engine = eb.create();
410  CHECK(execution_engine);
411 
412  return ExecutionEngineWrapper(execution_engine);
413 }
414 
415 bool is_group_query(const QueryDescriptionType hash_type) {
416  return hash_type == QueryDescriptionType::GroupByBaselineHash ||
418 }
419 
420 // Emit an early return from a function when the provided 'cond' is true, which the caller
421 // code can use when entries are empty or the watchdog is triggered. For functions which
422 // return void, the specified error code is ignored. For functions which return an
423 // integer, the error code is returned.
424 void return_early(llvm::Value* cond,
425  const ReductionCode& reduction_code,
426  llvm::Function* func,
427  int error_code) {
428  auto cgen_state = reduction_code.cgen_state.get();
429  auto& ctx = cgen_state->context_;
430  const auto early_return = llvm::BasicBlock::Create(ctx, ".early_return", func, 0);
431  const auto do_reduction = llvm::BasicBlock::Create(ctx, ".do_reduction", func, 0);
432  cgen_state->ir_builder_.CreateCondBr(cond, early_return, do_reduction);
433  cgen_state->ir_builder_.SetInsertPoint(early_return);
434  if (func->getReturnType()->isVoidTy()) {
435  cgen_state->ir_builder_.CreateRetVoid();
436  } else {
437  cgen_state->ir_builder_.CreateRet(cgen_state->llInt<int32_t>(error_code));
438  }
439  cgen_state->ir_builder_.SetInsertPoint(do_reduction);
440 }
441 
442 // Variable length sample fast path (no serialized variable length buffer).
443 void varlen_buffer_sample(int8_t* this_ptr1,
444  int8_t* this_ptr2,
445  const int8_t* that_ptr1,
446  const int8_t* that_ptr2,
447  const int64_t init_val) {
448  const auto rhs_proj_col = *reinterpret_cast<const int64_t*>(that_ptr1);
449  if (rhs_proj_col != init_val) {
450  *reinterpret_cast<int64_t*>(this_ptr1) = rhs_proj_col;
451  }
452  CHECK(this_ptr2 && that_ptr2);
453  *reinterpret_cast<int64_t*>(this_ptr2) = *reinterpret_cast<const int64_t*>(that_ptr2);
454 }
455 
456 } // namespace
457 
459  const void* serialized_varlen_buffer_handle,
460  int8_t* this_ptr1,
461  int8_t* this_ptr2,
462  const int8_t* that_ptr1,
463  const int8_t* that_ptr2,
464  const int64_t init_val,
465  const int64_t length_to_elems) {
466  if (!serialized_varlen_buffer_handle) {
467  varlen_buffer_sample(this_ptr1, this_ptr2, that_ptr1, that_ptr2, init_val);
468  return;
469  }
470  const auto& serialized_varlen_buffer =
471  *reinterpret_cast<const std::vector<std::string>*>(serialized_varlen_buffer_handle);
472  if (!serialized_varlen_buffer.empty()) {
473  const auto rhs_proj_col = *reinterpret_cast<const int64_t*>(that_ptr1);
474  CHECK_LT(static_cast<size_t>(rhs_proj_col), serialized_varlen_buffer.size());
475  const auto& varlen_bytes_str = serialized_varlen_buffer[rhs_proj_col];
476  const auto str_ptr = reinterpret_cast<const int8_t*>(varlen_bytes_str.c_str());
477  *reinterpret_cast<int64_t*>(this_ptr1) = reinterpret_cast<const int64_t>(str_ptr);
478  *reinterpret_cast<int64_t*>(this_ptr2) =
479  static_cast<int64_t>(varlen_bytes_str.size() / length_to_elems);
480  } else {
481  varlen_buffer_sample(this_ptr1, this_ptr2, that_ptr1, that_ptr2, init_val);
482  }
483 }
484 
485 // Wrappers to be called from the generated code, sharing implementation with the rest of
486 // the system.
487 
488 extern "C" void count_distinct_set_union_jit_rt(const int64_t new_set_handle,
489  const int64_t old_set_handle,
490  const void* that_qmd_handle,
491  const void* this_qmd_handle,
492  const int64_t target_logical_idx) {
493  const auto that_qmd = reinterpret_cast<const QueryMemoryDescriptor*>(that_qmd_handle);
494  const auto this_qmd = reinterpret_cast<const QueryMemoryDescriptor*>(this_qmd_handle);
495  const auto& new_count_distinct_desc =
496  that_qmd->getCountDistinctDescriptor(target_logical_idx);
497  const auto& old_count_distinct_desc =
498  this_qmd->getCountDistinctDescriptor(target_logical_idx);
499  CHECK(old_count_distinct_desc.impl_type_ != CountDistinctImplType::Invalid);
500  CHECK(old_count_distinct_desc.impl_type_ == new_count_distinct_desc.impl_type_);
502  new_set_handle, old_set_handle, new_count_distinct_desc, old_count_distinct_desc);
503 }
504 
505 extern "C" void get_group_value_reduction_rt(int8_t* groups_buffer,
506  const int8_t* key,
507  const uint32_t key_count,
508  const void* this_qmd_handle,
509  const int8_t* that_buff,
510  const uint32_t that_entry_idx,
511  const uint32_t that_entry_count,
512  const uint32_t row_size_bytes,
513  int64_t** buff_out,
514  uint8_t* empty) {
515  const auto& this_qmd = *reinterpret_cast<const QueryMemoryDescriptor*>(this_qmd_handle);
516  const auto gvi = get_group_value_reduction(reinterpret_cast<int64_t*>(groups_buffer),
517  this_qmd.getEntryCount(),
518  reinterpret_cast<const int64_t*>(key),
519  key_count,
520  this_qmd.getEffectiveKeyWidth(),
521  this_qmd,
522  reinterpret_cast<const int64_t*>(that_buff),
523  that_entry_idx,
524  that_entry_count,
525  row_size_bytes >> 3);
526  *buff_out = gvi.first;
527  *empty = gvi.second;
528 }
529 
530 extern "C" uint8_t check_watchdog_rt(const size_t sample_seed) {
531  if (UNLIKELY(g_enable_dynamic_watchdog && (sample_seed & 0x3F) == 0 &&
532  dynamic_watchdog())) {
533  return true;
534  }
535  return false;
536 }
537 
539  const std::vector<TargetInfo>& targets,
540  const std::vector<int64_t>& target_init_vals)
541  : query_mem_desc_(query_mem_desc)
542  , targets_(targets)
543  , target_init_vals_(target_init_vals) {}
544 
545 // The code generated for a reduction between two result set buffers is structured in
546 // several functions and their IR is stored in the 'ReductionCode' structure. At a high
547 // level, the pseudocode is:
548 //
549 // func is_empty_func(row_ptr):
550 // ...
551 //
552 // func reduce_func_baseline(this_ptr, that_ptr):
553 // if is_empty_func(that_ptr):
554 // return
555 // for each target in the row:
556 // reduce target from that_ptr into this_ptr
557 //
558 // func reduce_func_perfect_hash(this_ptr, that_ptr):
559 // if is_empty_func(that_ptr):
560 // return
561 // for each target in the row:
562 // reduce target from that_ptr into this_ptr
563 //
564 // func reduce_func_idx(this_buff, that_buff, that_entry_index):
565 // that_ptr = that_result_set[that_entry_index]
566 // # Retrieval of 'this_ptr' is different between perfect hash and baseline.
567 // this_ptr = this_result_set[that_entry_index]
568 // or
569 // get_row(key(that_row_ptr), this_result_set_buffer)
570 // reduce_func_[baseline|perfect_hash](this_ptr, that_ptr)
571 //
572 // func reduce_loop(this_buff, that_buff, start_entry_index, end_entry_index):
573 // for that_entry_index in [start_entry_index, end_entry_index):
574 // reduce_func_idx(this_buff, that_buff, that_entry_index)
575 
577  std::lock_guard<std::mutex> reduction_guard(ReductionCode::s_reduction_mutex);
578  const auto hash_type = query_mem_desc_.getQueryDescriptionType();
579  if (query_mem_desc_.didOutputColumnar() || !is_group_query(hash_type)) {
580  return {};
581  }
582  auto reduction_code = setup_functions_ir(hash_type);
583  isEmpty(reduction_code);
586  reduceOneEntryNoCollisions(reduction_code);
587  reduceOneEntryNoCollisionsIdx(reduction_code);
588  break;
589  }
591  reduceOneEntryBaseline(reduction_code);
592  reduceOneEntryBaselineIdx(reduction_code);
593  break;
594  }
595  default: {
596  LOG(FATAL) << "Unexpected query description type";
597  }
598  }
599  reduceLoop(reduction_code);
600  return finalizeReductionCode(std::move(reduction_code));
601 }
602 
605  g_rt_module = nullptr;
606 }
607 
608 void ResultSetReductionJIT::isEmpty(const ReductionCode& reduction_code) const {
611  auto cgen_state = reduction_code.cgen_state.get();
612  auto& ctx = cgen_state->context_;
613  const auto bb_entry =
614  llvm::BasicBlock::Create(ctx, ".entry", reduction_code.ir_is_empty, 0);
615  cgen_state->ir_builder_.SetInsertPoint(bb_entry);
616  llvm::Value* key{nullptr};
617  llvm::Value* empty_key_val{nullptr};
618  const auto arg_it = reduction_code.ir_is_empty->arg_begin();
619  const auto keys_ptr = &*arg_it;
624  CHECK_LT(static_cast<size_t>(query_mem_desc_.getTargetIdxForKey()),
625  target_init_vals_.size());
626  const auto target_slot_off =
628  const auto slot_ptr = cgen_state->ir_builder_.CreateGEP(
629  keys_ptr, cgen_state->llInt<int32_t>(target_slot_off), "is_empty_slot_ptr");
630  const auto compact_sz =
632  key = emit_read_int_from_buff(slot_ptr, compact_sz, cgen_state);
633  empty_key_val = cgen_state->llInt<int64_t>(
635  } else {
637  case 4: {
640  key = emit_load_i32(keys_ptr, cgen_state);
641  empty_key_val = cgen_state->llInt<int32_t>(EMPTY_KEY_32);
642  break;
643  }
644  case 8: {
645  key = emit_load_i64(keys_ptr, cgen_state);
646  empty_key_val = cgen_state->llInt<int64_t>(EMPTY_KEY_64);
647  break;
648  }
649  default:
650  LOG(FATAL) << "Invalid key width";
651  }
652  }
653  const auto ret =
654  cgen_state->ir_builder_.CreateICmpEQ(key, empty_key_val, "is_key_empty");
655  cgen_state->ir_builder_.CreateRet(ret);
656  verify_function_ir(reduction_code.ir_is_empty);
657 }
658 
660  const ReductionCode& reduction_code) const {
661  auto cgen_state = reduction_code.cgen_state.get();
662  const auto bb_entry = llvm::BasicBlock::Create(
663  cgen_state->context_, ".entry", reduction_code.ir_reduce_one_entry, 0);
664  cgen_state->ir_builder_.SetInsertPoint(bb_entry);
665  const auto arg_it = reduction_code.ir_reduce_one_entry->arg_begin();
666  const auto this_row_ptr = &*arg_it;
667  const auto that_row_ptr = &*(arg_it + 1);
668  const auto that_is_empty = cgen_state->ir_builder_.CreateCall(
669  reduction_code.ir_is_empty, that_row_ptr, "that_is_empty");
670  return_early(that_is_empty, reduction_code, reduction_code.ir_reduce_one_entry, 0);
671 
672  const auto key_bytes = get_key_bytes_rowwise(query_mem_desc_);
673  if (key_bytes) { // copy the key from right hand side
674  cgen_state->ir_builder_.CreateMemCpy(
675  this_row_ptr, 0, that_row_ptr, 0, cgen_state->llInt<int32_t>(key_bytes));
676  }
677 
678  const auto key_bytes_with_padding = align_to_int64(key_bytes);
679  const auto key_bytes_lv = cgen_state->llInt<int32_t>(key_bytes_with_padding);
680  const auto this_targets_start_ptr =
681  cgen_state->ir_builder_.CreateGEP(this_row_ptr, key_bytes_lv, "this_targets_start");
682  const auto that_targets_start_ptr =
683  cgen_state->ir_builder_.CreateGEP(that_row_ptr, key_bytes_lv, "that_targets_start");
684 
686  reduction_code, this_targets_start_ptr, that_targets_start_ptr);
687  verify_function_ir(reduction_code.ir_reduce_one_entry);
688 }
689 
691  const ReductionCode& reduction_code,
692  llvm::Value* this_targets_start_ptr,
693  llvm::Value* that_targets_start_ptr) const {
694  auto cgen_state = reduction_code.cgen_state.get();
695  const auto& col_slot_context = query_mem_desc_.getColSlotContext();
696  llvm::Value* this_targets_ptr = this_targets_start_ptr;
697  llvm::Value* that_targets_ptr = that_targets_start_ptr;
698  size_t init_agg_val_idx = 0;
699  for (size_t target_logical_idx = 0; target_logical_idx < targets_.size();
700  ++target_logical_idx) {
701  const auto& target_info = targets_[target_logical_idx];
702  const auto& slots_for_col = col_slot_context.getSlotsForCol(target_logical_idx);
703  llvm::Value* this_ptr2{nullptr};
704  llvm::Value* that_ptr2{nullptr};
705 
706  bool two_slot_target{false};
707  if (target_info.is_agg &&
708  (target_info.agg_kind == kAVG ||
709  (target_info.agg_kind == kSAMPLE && target_info.sql_type.is_varlen()))) {
710  // Note that this assumes if one of the slot pairs in a given target is an array,
711  // all slot pairs are arrays. Currently this is true for all geo targets, but we
712  // should better codify and store this information in the future
713  two_slot_target = true;
714  }
715 
716  for (size_t target_slot_idx = slots_for_col.front();
717  target_slot_idx < slots_for_col.back() + 1;
718  target_slot_idx += 2) {
719  const auto slot_off_val = query_mem_desc_.getPaddedSlotWidthBytes(target_slot_idx);
720  const auto slot_off = cgen_state->llInt<int32_t>(slot_off_val);
721  if (UNLIKELY(two_slot_target)) {
722  const auto desc = "target_" + std::to_string(target_logical_idx) + "_second_slot";
723  this_ptr2 =
724  cgen_state->ir_builder_.CreateGEP(this_targets_ptr, slot_off, "this_" + desc);
725  that_ptr2 =
726  cgen_state->ir_builder_.CreateGEP(that_targets_ptr, slot_off, "that_" + desc);
727  }
728  reduceOneSlot(this_targets_ptr,
729  this_ptr2,
730  that_targets_ptr,
731  that_ptr2,
732  target_info,
733  target_logical_idx,
734  target_slot_idx,
735  init_agg_val_idx,
736  slots_for_col.front(),
737  reduction_code);
738  auto increment_agg_val_idx_maybe =
739  [&init_agg_val_idx, &target_logical_idx, this](const int slot_count) {
741  query_mem_desc_.getTargetGroupbyIndex(target_logical_idx) < 0) {
742  init_agg_val_idx += slot_count;
743  }
744  };
745  if (target_logical_idx + 1 == targets_.size() &&
746  target_slot_idx + 1 >= slots_for_col.back()) {
747  break;
748  }
749  const auto next_desc =
750  "target_" + std::to_string(target_logical_idx + 1) + "_first_slot";
751  if (UNLIKELY(two_slot_target)) {
752  increment_agg_val_idx_maybe(2);
753  const auto two_slot_off = cgen_state->llInt<int32_t>(
754  slot_off_val + query_mem_desc_.getPaddedSlotWidthBytes(target_slot_idx + 1));
755  this_targets_ptr = cgen_state->ir_builder_.CreateGEP(
756  this_targets_ptr, two_slot_off, "this_" + next_desc);
757  that_targets_ptr = cgen_state->ir_builder_.CreateGEP(
758  that_targets_ptr, two_slot_off, "that_" + next_desc);
759  } else {
760  increment_agg_val_idx_maybe(1);
761  this_targets_ptr = cgen_state->ir_builder_.CreateGEP(
762  this_targets_ptr, slot_off, "this_" + next_desc);
763  that_targets_ptr = cgen_state->ir_builder_.CreateGEP(
764  that_targets_ptr, slot_off, "that_" + next_desc);
765  }
766  }
767  }
768  reduction_code.cgen_state->ir_builder_.CreateRetVoid();
769  verify_function_ir(reduction_code.ir_reduce_one_entry);
770 }
771 
773  const ReductionCode& reduction_code) const {
774  auto cgen_state = reduction_code.cgen_state.get();
775  const auto bb_entry = llvm::BasicBlock::Create(
776  cgen_state->context_, ".entry", reduction_code.ir_reduce_one_entry, 0);
777  cgen_state->ir_builder_.SetInsertPoint(bb_entry);
778  const auto arg_it = reduction_code.ir_reduce_one_entry->arg_begin();
779  const auto this_targets_ptr_arg = &*arg_it;
780  const auto that_targets_ptr_arg = &*(arg_it + 1);
781  llvm::Value* this_ptr1 = this_targets_ptr_arg;
782  llvm::Value* that_ptr1 = that_targets_ptr_arg;
783  size_t j = 0;
784  size_t init_agg_val_idx = 0;
785  for (size_t target_logical_idx = 0; target_logical_idx < targets_.size();
786  ++target_logical_idx) {
787  const auto& target_info = targets_[target_logical_idx];
788  llvm::Value* this_ptr2{nullptr};
789  llvm::Value* that_ptr2{nullptr};
790  if (target_info.is_agg &&
791  (target_info.agg_kind == kAVG ||
792  (target_info.agg_kind == kSAMPLE && target_info.sql_type.is_varlen()))) {
793  const auto desc = "target_" + std::to_string(target_logical_idx) + "_second_slot";
794  const auto second_slot_rel_off = cgen_state->llInt<int32_t>(sizeof(int64_t));
795  this_ptr2 = cgen_state->ir_builder_.CreateGEP(
796  this_ptr1, second_slot_rel_off, "this_" + desc);
797  that_ptr2 = cgen_state->ir_builder_.CreateGEP(
798  that_ptr1, second_slot_rel_off, "that_" + desc);
799  }
800  reduceOneSlot(this_ptr1,
801  this_ptr2,
802  that_ptr1,
803  that_ptr2,
804  target_info,
805  target_logical_idx,
806  j,
807  init_agg_val_idx,
808  j,
809  reduction_code);
810  if (target_logical_idx + 1 == targets_.size()) {
811  break;
812  }
814  init_agg_val_idx = advance_slot(init_agg_val_idx, target_info, false);
815  } else {
816  if (query_mem_desc_.getTargetGroupbyIndex(target_logical_idx) < 0) {
817  init_agg_val_idx = advance_slot(init_agg_val_idx, target_info, false);
818  }
819  }
820  j = advance_slot(j, target_info, false);
821  const auto next_desc =
822  "target_" + std::to_string(target_logical_idx + 1) + "_first_slot";
823  auto next_slot_rel_off =
824  cgen_state->llInt<int32_t>(init_agg_val_idx * sizeof(int64_t));
825  this_ptr1 = cgen_state->ir_builder_.CreateGEP(
826  this_targets_ptr_arg, next_slot_rel_off, next_desc);
827  that_ptr1 = cgen_state->ir_builder_.CreateGEP(
828  that_targets_ptr_arg, next_slot_rel_off, next_desc);
829  }
830  reduction_code.cgen_state->ir_builder_.CreateRetVoid();
831  verify_function_ir(reduction_code.ir_reduce_one_entry);
832 }
833 
835  const ReductionCode& reduction_code) const {
838  auto cgen_state = reduction_code.cgen_state.get();
839  auto& ctx = cgen_state->context_;
840  const auto bb_entry =
841  llvm::BasicBlock::Create(ctx, ".entry", reduction_code.ir_reduce_one_entry_idx, 0);
842  cgen_state->ir_builder_.SetInsertPoint(bb_entry);
843  const auto arg_it = reduction_code.ir_reduce_one_entry_idx->arg_begin();
844  const auto this_buff = &*arg_it;
845  const auto that_buff = &*(arg_it + 1);
846  const auto entry_idx = &*(arg_it + 2);
847  const auto this_qmd_handle = &*(arg_it + 4);
848  const auto that_qmd_handle = &*(arg_it + 5);
849  const auto serialized_varlen_buffer_arg = &*(arg_it + 6);
850  const auto row_bytes = cgen_state->llInt<int32_t>(get_row_bytes(query_mem_desc_));
851  const auto row_off_in_bytes =
852  cgen_state->ir_builder_.CreateMul(entry_idx, row_bytes, "row_off_in_bytes");
853  const auto this_row_ptr =
854  cgen_state->ir_builder_.CreateGEP(this_buff, row_off_in_bytes, "this_row_ptr");
855  const auto that_row_ptr =
856  cgen_state->ir_builder_.CreateGEP(that_buff, row_off_in_bytes, "that_row_ptr");
857  cgen_state->ir_builder_.CreateCall(reduction_code.ir_reduce_one_entry,
858  {this_row_ptr,
859  that_row_ptr,
860  this_qmd_handle,
861  that_qmd_handle,
862  serialized_varlen_buffer_arg});
863  cgen_state->ir_builder_.CreateRetVoid();
865 }
866 
868  const ReductionCode& reduction_code) const {
873  auto cgen_state = reduction_code.cgen_state.get();
874  auto& ctx = cgen_state->context_;
875  const auto bb_entry =
876  llvm::BasicBlock::Create(ctx, ".entry", reduction_code.ir_reduce_one_entry_idx, 0);
877  cgen_state->ir_builder_.SetInsertPoint(bb_entry);
878  const auto arg_it = reduction_code.ir_reduce_one_entry_idx->arg_begin();
879  const auto this_buff = &*arg_it;
880  const auto that_buff = &*(arg_it + 1);
881  const auto that_entry_idx = &*(arg_it + 2);
882  const auto that_entry_count = &*(arg_it + 3);
883  const auto this_qmd_handle = &*(arg_it + 4);
884  const auto that_qmd_handle = &*(arg_it + 5);
885  const auto serialized_varlen_buffer_arg = &*(arg_it + 6);
886  const auto row_bytes = cgen_state->llInt<int32_t>(get_row_bytes(query_mem_desc_));
887  const auto that_row_off_in_bytes = cgen_state->ir_builder_.CreateMul(
888  that_entry_idx, row_bytes, "that_row_off_in_bytes");
889  const auto that_row_ptr =
890  cgen_state->ir_builder_.CreateGEP(that_buff, that_row_off_in_bytes, "that_row_ptr");
891  const auto that_is_empty = cgen_state->ir_builder_.CreateCall(
892  reduction_code.ir_is_empty, that_row_ptr, "that_is_empty");
893  return_early(that_is_empty, reduction_code, reduction_code.ir_reduce_one_entry_idx, 0);
894  const auto key_count = query_mem_desc_.getGroupbyColCount();
895  const auto pi64_type = llvm::Type::getInt64PtrTy(cgen_state->context_);
896  const auto bool_type = llvm::Type::getInt8Ty(cgen_state->context_);
897  const auto this_targets_ptr_i64_ptr = cgen_state->ir_builder_.CreateAlloca(
898  pi64_type, cgen_state->llInt<int32_t>(1), "this_targets_ptr_out");
899  const auto this_is_empty_ptr = cgen_state->ir_builder_.CreateAlloca(
900  bool_type, cgen_state->llInt<int32_t>(1), "this_is_empty_out");
901  cgen_state->emitExternalCall("get_group_value_reduction_rt",
902  llvm::Type::getVoidTy(ctx),
903  {this_buff,
904  that_row_ptr,
905  cgen_state->llInt<int32_t>(key_count),
906  this_qmd_handle,
907  that_buff,
908  that_entry_idx,
909  that_entry_count,
910  row_bytes,
911  this_targets_ptr_i64_ptr,
912  this_is_empty_ptr});
913  const auto this_targets_ptr_i64 = cgen_state->ir_builder_.CreateLoad(
914  this_targets_ptr_i64_ptr, "this_targets_ptr_i64");
915  llvm::Value* this_is_empty =
916  cgen_state->ir_builder_.CreateLoad(this_is_empty_ptr, "this_is_empty");
917  this_is_empty = cgen_state->ir_builder_.CreateTrunc(
918  this_is_empty, get_int_type(1, ctx), "this_is_empty_bool");
919  return_early(this_is_empty, reduction_code, reduction_code.ir_reduce_one_entry_idx, 0);
920  const auto pi8_type = llvm::Type::getInt8PtrTy(cgen_state->context_);
922  const auto this_targets_ptr = cgen_state->ir_builder_.CreateBitCast(
923  this_targets_ptr_i64, pi8_type, "this_targets_ptr");
924  const auto key_byte_count = key_qw_count * sizeof(int64_t);
925  const auto key_byte_count_lv = cgen_state->llInt<int32_t>(key_byte_count);
926  const auto that_targets_ptr = cgen_state->ir_builder_.CreateGEP(
927  that_row_ptr, key_byte_count_lv, "that_targets_ptr");
928  cgen_state->ir_builder_.CreateCall(reduction_code.ir_reduce_one_entry,
929  {this_targets_ptr,
930  that_targets_ptr,
931  this_qmd_handle,
932  that_qmd_handle,
933  serialized_varlen_buffer_arg});
934  cgen_state->ir_builder_.CreateRetVoid();
936 }
937 
938 namespace {
939 
940 llvm::BasicBlock* generate_loop_body(const ReductionCode& reduction_code,
941  llvm::Value* this_buff,
942  llvm::Value* that_buff,
943  llvm::Value* iterator,
944  llvm::Value* start_index,
945  llvm::Value* that_entry_count,
946  llvm::Value* this_qmd_handle,
947  llvm::Value* that_qmd_handle,
948  llvm::Value* serialized_varlen_buffer,
949  bool emit_watchdog_check) {
950  auto cgen_state = reduction_code.cgen_state.get();
951  auto& ir_builder = cgen_state->ir_builder_;
952  auto& ctx = cgen_state->context_;
953  const auto loop_body_bb = llvm::BasicBlock::Create(
954  ctx, ".loop_body", ir_builder.GetInsertBlock()->getParent());
955  ir_builder.SetInsertPoint(loop_body_bb);
956  const auto loop_iter =
957  ir_builder.CreateTrunc(iterator, get_int_type(32, ctx), "relative_entry_idx");
958  const auto that_entry_idx =
959  ir_builder.CreateAdd(loop_iter, start_index, "that_entry_idx");
960  if (emit_watchdog_check) {
961  const auto watchdog_sample_seed =
962  ir_builder.CreateSExt(that_entry_idx, get_int_type(64, ctx));
963  const auto watchdog_triggered = cgen_state->emitExternalCall(
964  "check_watchdog_rt", get_int_type(8, ctx), {watchdog_sample_seed});
965  const auto watchdog_triggered_bool = cgen_state->ir_builder_.CreateICmpNE(
966  watchdog_triggered, cgen_state->llInt<int8_t>(0));
967  return_early(watchdog_triggered_bool,
968  reduction_code,
969  reduction_code.ir_reduce_loop,
971  }
972  ir_builder.CreateCall(reduction_code.ir_reduce_one_entry_idx,
973  {this_buff,
974  that_buff,
975  that_entry_idx,
976  that_entry_count,
977  this_qmd_handle,
978  that_qmd_handle,
979  serialized_varlen_buffer});
980  return loop_body_bb;
981 }
982 
983 } // namespace
984 
985 void ResultSetReductionJIT::reduceLoop(const ReductionCode& reduction_code) const {
986  const auto arg_it = reduction_code.ir_reduce_loop->arg_begin();
987  const auto this_buff_arg = &*arg_it;
988  const auto that_buff_arg = &*(arg_it + 1);
989  const auto start_index_arg = &*(arg_it + 2);
990  const auto end_index_arg = &*(arg_it + 3);
991  const auto that_entry_count_arg = &*(arg_it + 4);
992  const auto this_qmd_handle_arg = &*(arg_it + 5);
993  const auto that_qmd_handle_arg = &*(arg_it + 6);
994  const auto serialized_varlen_buffer_arg = &*(arg_it + 7);
995  auto cgen_state = reduction_code.cgen_state.get();
996  auto& ctx = cgen_state->context_;
997  const auto bb_entry =
998  llvm::BasicBlock::Create(ctx, ".entry", reduction_code.ir_reduce_loop, 0);
999  cgen_state->ir_builder_.SetInsertPoint(bb_entry);
1000  const auto i64_type = get_int_type(64, cgen_state->context_);
1001  const auto iteration_count = cgen_state->ir_builder_.CreateSub(
1002  end_index_arg, start_index_arg, "iteration_count");
1003  const auto upper_bound = cgen_state->ir_builder_.CreateSExt(iteration_count, i64_type);
1004  const auto bb_exit =
1005  llvm::BasicBlock::Create(ctx, ".exit", reduction_code.ir_reduce_loop);
1006  cgen_state->ir_builder_.SetInsertPoint(bb_exit);
1007  cgen_state->ir_builder_.CreateRet(cgen_state->llInt<int32_t>(0));
1008  JoinLoop join_loop(
1011  [upper_bound](const std::vector<llvm::Value*>& v) {
1012  JoinLoopDomain domain{{0}};
1013  domain.upper_bound = upper_bound;
1014  return domain;
1015  },
1016  nullptr,
1017  nullptr,
1018  nullptr,
1019  "reduction_loop");
1020  const auto bb_loop_body = JoinLoop::codegen(
1021  {join_loop},
1022  [this,
1023  &reduction_code,
1024  this_buff_arg,
1025  that_buff_arg,
1026  start_index_arg,
1027  that_entry_count_arg,
1028  this_qmd_handle_arg,
1029  that_qmd_handle_arg,
1030  serialized_varlen_buffer_arg](const std::vector<llvm::Value*>& iterators) {
1031  return generate_loop_body(reduction_code,
1032  this_buff_arg,
1033  that_buff_arg,
1034  iterators.back(),
1035  start_index_arg,
1036  that_entry_count_arg,
1037  this_qmd_handle_arg,
1038  that_qmd_handle_arg,
1039  serialized_varlen_buffer_arg,
1040  !useInterpreter(reduction_code.cgen_state.get()));
1041  },
1042  nullptr,
1043  bb_exit,
1044  cgen_state->ir_builder_);
1045  cgen_state->ir_builder_.SetInsertPoint(bb_entry);
1046  cgen_state->ir_builder_.CreateBr(bb_loop_body);
1047  verify_function_ir(reduction_code.ir_reduce_loop);
1048 }
1049 
1050 void ResultSetReductionJIT::reduceOneSlot(llvm::Value* this_ptr1,
1051  llvm::Value* this_ptr2,
1052  llvm::Value* that_ptr1,
1053  llvm::Value* that_ptr2,
1054  const TargetInfo& target_info,
1055  const size_t target_logical_idx,
1056  const size_t target_slot_idx,
1057  const size_t init_agg_val_idx,
1058  const size_t first_slot_idx_for_target,
1059  const ReductionCode& reduction_code) const {
1061  if (query_mem_desc_.getTargetGroupbyIndex(target_logical_idx) >= 0) {
1062  return;
1063  }
1064  }
1065  const bool float_argument_input = takes_float_argument(target_info);
1066  const auto chosen_bytes =
1067  get_width_for_slot(target_slot_idx, float_argument_input, query_mem_desc_);
1068  CHECK_LT(init_agg_val_idx, target_init_vals_.size());
1069  auto init_val = target_init_vals_[init_agg_val_idx];
1070  if (target_info.is_agg && target_info.agg_kind != kSAMPLE) {
1071  reduceOneAggregateSlot(this_ptr1,
1072  this_ptr2,
1073  that_ptr1,
1074  that_ptr2,
1075  target_info,
1076  target_logical_idx,
1077  target_slot_idx,
1078  init_val,
1079  chosen_bytes,
1080  reduction_code);
1081  } else {
1082  const auto cgen_state = reduction_code.cgen_state.get();
1083  emit_write_projection(this_ptr1, that_ptr1, init_val, chosen_bytes, cgen_state);
1084  if (target_info.agg_kind == kSAMPLE && target_info.sql_type.is_varlen()) {
1085  CHECK(this_ptr2 && that_ptr2);
1086  size_t length_to_elems{0};
1087  if (target_info.sql_type.is_geometry()) {
1088  // TODO: Assumes hard-coded sizes for geometry targets
1089  length_to_elems = target_slot_idx == first_slot_idx_for_target ? 1 : 4;
1090  } else {
1091  const auto& elem_ti = target_info.sql_type.get_elem_type();
1092  length_to_elems = target_info.sql_type.is_string() ? 1 : elem_ti.get_size();
1093  }
1094  const auto arg_it = reduction_code.ir_reduce_one_entry->arg_begin();
1095  const auto serialized_varlen_buffer_arg = &*(arg_it + 4);
1096  cgen_state->emitExternalCall("serialized_varlen_buffer_sample",
1097  llvm::Type::getVoidTy(cgen_state->context_),
1098  {serialized_varlen_buffer_arg,
1099  this_ptr1,
1100  this_ptr2,
1101  that_ptr1,
1102  that_ptr2,
1103  cgen_state->llInt<int64_t>(init_val),
1104  cgen_state->llInt<int64_t>(length_to_elems)});
1105  }
1106  }
1107 }
1108 
1110  llvm::Value* this_ptr1,
1111  llvm::Value* this_ptr2,
1112  llvm::Value* that_ptr1,
1113  llvm::Value* that_ptr2,
1114  const TargetInfo& target_info,
1115  const size_t target_logical_idx,
1116  const size_t target_slot_idx,
1117  const int64_t init_val,
1118  const int8_t chosen_bytes,
1119  const ReductionCode& reduction_code) const {
1120  const auto cgen_state = reduction_code.cgen_state.get();
1121  switch (target_info.agg_kind) {
1122  case kCOUNT:
1123  case kAPPROX_COUNT_DISTINCT: {
1124  if (is_distinct_target(target_info)) {
1125  CHECK_EQ(static_cast<size_t>(chosen_bytes), sizeof(int64_t));
1127  this_ptr1, that_ptr1, target_logical_idx, reduction_code);
1128  break;
1129  }
1130  CHECK_EQ(int64_t(0), init_val);
1131  emit_aggregate_one_count(this_ptr1, that_ptr1, chosen_bytes, cgen_state);
1132  break;
1133  }
1134  case kAVG: {
1135  // Ignore float argument compaction for count component for fear of its overflow
1136  emit_aggregate_one_count(this_ptr2,
1137  that_ptr2,
1138  query_mem_desc_.getPaddedSlotWidthBytes(target_slot_idx),
1139  cgen_state);
1140  }
1141  // fall thru
1142  case kSUM: {
1144  "sum", this_ptr1, that_ptr1, init_val, chosen_bytes, target_info, cgen_state);
1145  break;
1146  }
1147  case kMIN: {
1149  "min", this_ptr1, that_ptr1, init_val, chosen_bytes, target_info, cgen_state);
1150  break;
1151  }
1152  case kMAX: {
1154  "max", this_ptr1, that_ptr1, init_val, chosen_bytes, target_info, cgen_state);
1155  break;
1156  }
1157  default:
1158  LOG(FATAL) << "Invalid aggregate type";
1159  }
1160 }
1161 
1163  llvm::Value* this_ptr1,
1164  llvm::Value* that_ptr1,
1165  const size_t target_logical_idx,
1166  const ReductionCode& reduction_code) const {
1168  const auto cgen_state = reduction_code.cgen_state.get();
1169  const auto old_set_handle = emit_load_i64(this_ptr1, cgen_state);
1170  const auto new_set_handle = emit_load_i64(that_ptr1, cgen_state);
1171  const auto arg_it = reduction_code.ir_reduce_one_entry->arg_begin();
1172  const auto this_qmd_arg = &*(arg_it + 2);
1173  const auto that_qmd_arg = &*(arg_it + 3);
1174  cgen_state->emitExternalCall("count_distinct_set_union_jit_rt",
1175  llvm::Type::getVoidTy(cgen_state->context_),
1176  {new_set_handle,
1177  old_set_handle,
1178  that_qmd_arg,
1179  this_qmd_arg,
1180  cgen_state->llInt<int64_t>(target_logical_idx)});
1181 }
1182 
1184  ReductionCode reduction_code) const {
1185  const auto key0 = serialize_llvm_object(reduction_code.ir_is_empty);
1186  const auto key1 = serialize_llvm_object(reduction_code.ir_reduce_one_entry);
1187  const auto key2 = serialize_llvm_object(reduction_code.ir_reduce_one_entry_idx);
1188  CodeCacheKey key{key0, key1, key2};
1189  const auto val_ptr = s_code_cache.get(key);
1190  if (val_ptr) {
1191  return {
1192  nullptr,
1193  std::get<1>(val_ptr->first.front()).get(),
1194  nullptr,
1195  nullptr,
1196  nullptr,
1197  nullptr,
1198  nullptr,
1199  nullptr,
1200  reinterpret_cast<ReductionCode::FuncPtr>(std::get<0>(val_ptr->first.front()))};
1201  }
1202  CompilationOptions co{
1204  reduction_code.module.release();
1205  const bool use_interp = useInterpreter(reduction_code.cgen_state.get());
1206  auto ee = use_interp
1207  ? create_interpreter_engine(reduction_code.ir_reduce_loop)
1209  reduction_code.ir_reduce_loop, {reduction_code.ir_reduce_loop}, co);
1210  reduction_code.func_ptr =
1211  use_interp ? nullptr
1212  : reinterpret_cast<ReductionCode::FuncPtr>(
1213  ee->getPointerToFunction(reduction_code.ir_reduce_loop));
1214  reduction_code.execution_engine = ee.get();
1215  if (use_interp) {
1216  reduction_code.own_execution_engine = std::move(ee);
1217  } else {
1218  std::tuple<void*, ExecutionEngineWrapper> cache_val =
1219  std::make_tuple(reinterpret_cast<void*>(reduction_code.func_ptr), std::move(ee));
1220  std::vector<std::tuple<void*, ExecutionEngineWrapper>> cache_vals;
1221  cache_vals.emplace_back(std::move(cache_val));
1223  std::move(cache_vals),
1224  reduction_code.ir_reduce_loop->getParent(),
1225  s_code_cache);
1226  }
1227  return reduction_code;
1228 }
1229 
1230 bool ResultSetReductionJIT::useInterpreter(const CgenState* cgen_state) const {
1231  // The LLVM interpreter uses llvm::Function* pointers as keys in a cache to quickly
1232  // resolve binding to external functions. That works if the functions are kept around
1233  // for the entire lifetime of the process. Unfortunately, that is incompatible with our
1234  // need to free functions and modules, because the pointers could be recycled and lead
1235  // to false hits in that cache.
1237  !cgen_state->has_external_calls_;
1238 }
llvm::BasicBlock * generate_loop_body(const ReductionCode &reduction_code, llvm::Value *this_buff, llvm::Value *that_buff, llvm::Value *iterator, llvm::Value *start_index, llvm::Value *that_entry_count, llvm::Value *this_qmd_handle, llvm::Value *that_qmd_handle, llvm::Value *serialized_varlen_buffer, bool emit_watchdog_check)
std::unique_ptr< CgenState > cgen_state
void clear()
Definition: LruCache.hpp:57
llvm::Function * setup_reduce_one_entry(const CgenState *cgen_state, const QueryDescriptionType hash_type)
#define CHECK_EQ(x, y)
Definition: Logger.h:195
llvm::Function * ir_reduce_one_entry
void reduceOneEntryBaseline(const ReductionCode &reduction_code) const
void isEmpty(const ReductionCode &reduction_code) const
void reduceOneEntryNoCollisionsIdx(const ReductionCode &reduction_code) const
void count_distinct_set_union(const int64_t new_set_handle, const int64_t old_set_handle, const CountDistinctDescriptor &new_count_distinct_desc, const CountDistinctDescriptor &old_count_distinct_desc)
llvm::ConstantFP * llFp(const float v) const
Definition: CgenState.h:251
#define EMPTY_KEY_64
static void addCodeToCache(const CodeCacheKey &, std::vector< std::tuple< void *, ExecutionEngineWrapper >>, llvm::Module *, CodeCache &)
void count_distinct_set_union_jit_rt(const int64_t new_set_handle, const int64_t old_set_handle, const void *that_qmd_handle, const void *this_qmd_handle, const int64_t target_logical_idx)
bool is_group_query(const QueryDescriptionType hash_type)
ssize_t getTargetGroupbyIndex(const size_t target_idx) const
static ExecutionEngineWrapper generateNativeCPUCode(llvm::Function *func, const std::unordered_set< llvm::Function *> &live_funcs, const CompilationOptions &co)
llvm::Function * ir_is_empty
void serialized_varlen_buffer_sample(const void *serialized_varlen_buffer_handle, int8_t *this_ptr1, int8_t *this_ptr2, const int8_t *that_ptr1, const int8_t *that_ptr2, const int64_t init_val, const int64_t length_to_elems)
void varlen_buffer_sample(int8_t *this_ptr1, int8_t *this_ptr2, const int8_t *that_ptr1, const int8_t *that_ptr2, const int64_t init_val)
SQLTypeInfo sql_type
Definition: TargetInfo.h:42
#define LOG(tag)
Definition: Logger.h:182
void mark_function_always_inline(llvm::Function *func)
void get_group_value_reduction_rt(int8_t *groups_buffer, const int8_t *key, const uint32_t key_count, const void *this_qmd_handle, const int8_t *that_buff, const uint32_t that_entry_idx, const uint32_t that_entry_count, const uint32_t row_size_bytes, int64_t **buff_out, uint8_t *empty)
bool has_external_calls_
Definition: CgenState.h:285
llvm::IRBuilder ir_builder_
Definition: CgenState.h:268
ReductionCode finalizeReductionCode(ReductionCode reduction_code) const
size_t get_byteoff_of_slot(const size_t slot_idx, const QueryMemoryDescriptor &query_mem_desc)
void return_early(llvm::Value *cond, const ReductionCode &reduction_code, llvm::Function *func, int error_code)
bool dynamic_watchdog()
size_t getCountDistinctDescriptorsSize() const
bool is_varlen() const
Definition: sqltypes.h:464
#define CHECK_GE(x, y)
Definition: Logger.h:200
void emit_aggregate_one_nullable_value(const std::string &agg_kind, llvm::Value *val_ptr, llvm::Value *other_ptr, const int64_t init_val, const size_t chosen_bytes, const TargetInfo &agg_info, CgenState *cgen_state)
std::vector< std::string > CodeCacheKey
Definition: CodeCache.h:61
std::unique_ptr< llvm::Module > g_rt_module
size_t get_slot_off_quad(const QueryMemoryDescriptor &query_mem_desc)
bool g_enable_dynamic_watchdog
Definition: Execute.cpp:70
const std::vector< int64_t > target_init_vals_
bool takes_float_argument(const TargetInfo &target_info)
Definition: TargetInfo.h:120
bool skip_null_val
Definition: TargetInfo.h:44
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
int8_t get_width_for_slot(const size_t target_slot_idx, const bool float_argument_input, const QueryMemoryDescriptor &query_mem_desc)
const int64_t const uint32_t const uint32_t key_qw_count
static llvm::BasicBlock * codegen(const std::vector< JoinLoop > &join_loops, const std::function< llvm::BasicBlock *(const std::vector< llvm::Value *> &)> &body_codegen, llvm::Value *outer_iter, llvm::BasicBlock *exit_bb, llvm::IRBuilder<> &builder)
Definition: JoinLoop.cpp:45
llvm::Value * emit_load(llvm::Value *ptr, llvm::Type *loaded_type, CgenState *cgen_state)
int32_t(*)(int8_t *this_buff, const int8_t *that_buff, const int32_t start_entry_index, const int32_t end_entry_index, const int32_t that_entry_count, const void *this_qmd, const void *that_qmd, const void *serialized_varlen_buffer) FuncPtr
const QueryMemoryDescriptor query_mem_desc_
ReductionCode codegen() const
std::string to_string(char const *&&v)
const int8_t getPaddedSlotWidthBytes(const size_t slot_idx) const
bool useInterpreter(const CgenState *cgen_state) const
llvm::Value * emit_read_int_from_buff(llvm::Value *ptr, const int8_t compact_sz, CgenState *cgen_state)
Definition: sqldefs.h:71
const SQLTypeInfo get_compact_type(const TargetInfo &target)
llvm::Module * module_
Definition: CgenState.h:263
ResultSetReductionJIT(const QueryMemoryDescriptor &query_mem_desc, const std::vector< TargetInfo > &targets, const std::vector< int64_t > &target_init_vals)
void verify_function_ir(const llvm::Function *func)
llvm::Value * emit_load_i32(llvm::Value *ptr, CgenState *cgen_state)
llvm::LLVMContext & context_
Definition: CgenState.h:266
bool is_agg
Definition: TargetInfo.h:40
size_t advance_slot(const size_t j, const TargetInfo &target_info, const bool separate_varlen_storage)
T v(const TargetValue &r)
std::unique_ptr< llvm::Module > runtime_module_shallow_copy(CgenState *cgen_state)
GroupValueInfo get_group_value_reduction(int64_t *groups_buffer, const uint32_t groups_buffer_entry_count, const int64_t *key, const uint32_t key_count, const size_t key_width, const QueryMemoryDescriptor &query_mem_desc, const int64_t *that_buff_i64, const size_t that_entry_idx, const size_t that_entry_count, const uint32_t row_size_quad)
const int8_t const int64_t const uint64_t const int32_t const int64_t int64_t uint32_t const int64_t int32_t * error_code
void reduceOneEntryNoCollisions(const ReductionCode &reduction_code) const
uint8_t check_watchdog_rt(const size_t sample_seed)
Definition: sqldefs.h:71
static std::mutex s_reduction_mutex
bool is_distinct_target(const TargetInfo &target_info)
Definition: TargetInfo.h:116
SQLTypeInfoCore get_elem_type() const
Definition: sqltypes.h:632
ReductionCode setup_functions_ir(const QueryDescriptionType hash_type)
llvm::Function * setup_is_empty_entry(const CgenState *cgen_state)
size_t targetGroupbyIndicesSize() const
SQLAgg agg_kind
Definition: TargetInfo.h:41
llvm::Function * ir_reduce_one_entry_idx
#define UNLIKELY(x)
Definition: likely.h:20
ExecutionEngineWrapper own_execution_engine
void emit_aggregate_one_count(llvm::Value *val_ptr, llvm::Value *other_ptr, const size_t chosen_bytes, CgenState *cgen_state)
int32_t getTargetIdxForKey() const
ExecutionEngineWrapper create_interpreter_engine(llvm::Function *func)
#define CHECK_LT(x, y)
Definition: Logger.h:197
llvm::Value * upper_bound
Definition: JoinLoop.h:43
std::string serialize_llvm_object(const T *llvm_obj)
size_t get_row_bytes(const QueryMemoryDescriptor &query_mem_desc)
llvm::Value * emitCall(const std::string &fname, const std::vector< llvm::Value *> &args)
Definition: CgenState.cpp:134
void reduceOneEntryBaselineIdx(const ReductionCode &reduction_code) const
Definition: sqldefs.h:71
llvm::Function * setup_reduce_loop(const CgenState *cgen_state)
llvm::Value * emit_load_i64(llvm::Value *ptr, CgenState *cgen_state)
void reduceOneAggregateSlot(llvm::Value *this_ptr1, llvm::Value *this_ptr2, llvm::Value *that_ptr1, llvm::Value *that_ptr2, const TargetInfo &target_info, const size_t target_logical_idx, const size_t target_slot_idx, const int64_t init_val, const int8_t chosen_bytes, const ReductionCode &reduction_code) const
#define CHECK(condition)
Definition: Logger.h:187
const ColSlotContext & getColSlotContext() const
llvm::ValueToValueMapTy vmap_
Definition: CgenState.h:267
#define EMPTY_KEY_32
std::unique_ptr< llvm::Module > module
bool is_geometry() const
Definition: sqltypes.h:462
QueryDescriptionType
Definition: Types.h:26
void emit_aggregate_one_value(const std::string &agg_kind, llvm::Value *val_ptr, llvm::Value *other_ptr, const size_t chosen_bytes, const TargetInfo &agg_info, CgenState *cgen_state)
void reduceOneSlot(llvm::Value *this_ptr1, llvm::Value *this_ptr2, llvm::Value *that_ptr1, llvm::Value *that_ptr2, const TargetInfo &target_info, const size_t target_logical_idx, const size_t target_slot_idx, const size_t init_agg_val_idx, const size_t first_slot_idx_for_target, const ReductionCode &reduction_code) const
llvm::Function * ir_reduce_loop
const CountDistinctDescriptor & getCountDistinctDescriptor(const size_t idx) const
value_t * get(const key_t &key)
Definition: LruCache.hpp:39
void reduceLoop(const ReductionCode &reduction_code) const
QueryDescriptionType getQueryDescriptionType() const
Definition: sqldefs.h:71
llvm::Function * setup_reduce_one_entry_idx(const CgenState *cgen_state)
Definition: sqldefs.h:71
bool is_string() const
Definition: sqltypes.h:450
size_t get_key_bytes_rowwise(const QueryMemoryDescriptor &query_mem_desc)
FORCE_INLINE HOST DEVICE T align_to_int64(T addr)
llvm::ConstantInt * llInt(const T v) const
Definition: CgenState.h:247
void reduceOneEntryTargetsNoCollisions(const ReductionCode &reduction_code, llvm::Value *this_targets_start_ptr, llvm::Value *that_targets_start_ptr) const
size_t getEffectiveKeyWidth() const
void emit_write_projection(llvm::Value *slot_pi8, llvm::Value *other_pi8, const int64_t init_val, const size_t chosen_bytes, CgenState *cgen_state)
llvm::ExecutionEngine * execution_engine
void reduceOneCountDistinctSlot(llvm::Value *this_ptr1, llvm::Value *that_ptr1, const size_t target_logical_idx, const ReductionCode &reduction_code) const
const std::vector< TargetInfo > targets_