OmniSciDB  cde582ebc3
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
ExtensionsIR.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "CodeGenerator.h"
18 #include "Execute.h"
19 #include "ExtensionFunctions.hpp"
22 
23 #include <tuple>
24 
25 extern std::unique_ptr<llvm::Module> udf_gpu_module;
26 extern std::unique_ptr<llvm::Module> udf_cpu_module;
27 
28 namespace {
29 
30 llvm::StructType* get_buffer_struct_type(CgenState* cgen_state,
31  const std::string& ext_func_name,
32  size_t param_num,
33  llvm::Type* elem_type,
34  bool has_is_null) {
35  CHECK(elem_type);
36  CHECK(elem_type->isPointerTy());
37  llvm::StructType* generated_struct_type =
38  (has_is_null ? llvm::StructType::get(cgen_state->context_,
39  {elem_type,
40  llvm::Type::getInt64Ty(cgen_state->context_),
41  llvm::Type::getInt8Ty(cgen_state->context_)},
42  false)
43  : llvm::StructType::get(
44  cgen_state->context_,
45  {elem_type, llvm::Type::getInt64Ty(cgen_state->context_)},
46  false));
47  llvm::Function* udf_func = cgen_state->module_->getFunction(ext_func_name);
48  if (udf_func) {
49  // Compare expected array struct type with type from the function
50  // definition from the UDF module, but use the type from the
51  // module
52  llvm::FunctionType* udf_func_type = udf_func->getFunctionType();
53  CHECK_LE(param_num, udf_func_type->getNumParams());
54  llvm::Type* param_pointer_type = udf_func_type->getParamType(param_num);
55  CHECK(param_pointer_type->isPointerTy());
56  llvm::Type* param_type = param_pointer_type->getPointerElementType();
57  CHECK(param_type->isStructTy());
58  llvm::StructType* struct_type = llvm::cast<llvm::StructType>(param_type);
59  CHECK_GE(struct_type->getStructNumElements(),
60  generated_struct_type->getStructNumElements())
61  << serialize_llvm_object(struct_type);
62 
63  const auto expected_elems = generated_struct_type->elements();
64  const auto current_elems = struct_type->elements();
65  for (size_t i = 0; i < expected_elems.size(); i++) {
66  CHECK_EQ(expected_elems[i], current_elems[i])
67  << "[" << ::toString(expected_elems[i]) << ", " << ::toString(current_elems[i])
68  << "]";
69  }
70 
71  if (struct_type->isLiteral()) {
72  return struct_type;
73  }
74 
75  llvm::StringRef struct_name = struct_type->getStructName();
76 #if LLVM_VERSION_MAJOR >= 12
77  return struct_type->getTypeByName(cgen_state->context_, struct_name);
78 #else
79  return cgen_state->module_->getTypeByName(struct_name);
80 #endif
81  }
82  return generated_struct_type;
83 }
84 
86  llvm::LLVMContext& ctx) {
87  switch (ext_arg_type) {
88  case ExtArgumentType::Bool: // pass thru to Int8
90  return get_int_type(8, ctx);
92  return get_int_type(16, ctx);
94  return get_int_type(32, ctx);
96  return get_int_type(64, ctx);
98  return llvm::Type::getFloatTy(ctx);
100  return llvm::Type::getDoubleTy(ctx);
124  return llvm::Type::getVoidTy(ctx);
125  default:
126  CHECK(false);
127  }
128  CHECK(false);
129  return nullptr;
130 }
131 
133  CHECK(ll_type);
134  const auto bits = ll_type->getPrimitiveSizeInBits();
135 
136  if (ll_type->isFloatingPointTy()) {
137  switch (bits) {
138  case 32:
139  return SQLTypeInfo(kFLOAT, false);
140  case 64:
141  return SQLTypeInfo(kDOUBLE, false);
142  default:
143  LOG(FATAL) << "Unsupported llvm floating point type: " << bits
144  << ", only 32 and 64 bit floating point is supported.";
145  }
146  } else {
147  switch (bits) {
148  case 1:
149  return SQLTypeInfo(kBOOLEAN, false);
150  case 8:
151  return SQLTypeInfo(kTINYINT, false);
152  case 16:
153  return SQLTypeInfo(kSMALLINT, false);
154  case 32:
155  return SQLTypeInfo(kINT, false);
156  case 64:
157  return SQLTypeInfo(kBIGINT, false);
158  default:
159  LOG(FATAL) << "Unrecognized llvm type for SQL type: "
160  << bits; // TODO let's get the real name here
161  }
162  }
163  UNREACHABLE();
164  return SQLTypeInfo();
165 }
166 
168  llvm::LLVMContext& ctx) {
169  CHECK(ti.is_buffer());
170  if (ti.is_bytes()) {
171  return llvm::Type::getInt8PtrTy(ctx);
172  }
173 
174  const auto& elem_ti = ti.get_elem_type();
175  if (elem_ti.is_fp()) {
176  switch (elem_ti.get_size()) {
177  case 4:
178  return llvm::Type::getFloatPtrTy(ctx);
179  case 8:
180  return llvm::Type::getDoublePtrTy(ctx);
181  }
182  }
183 
184  if (elem_ti.is_boolean()) {
185  return llvm::Type::getInt8PtrTy(ctx);
186  }
187 
188  CHECK(elem_ti.is_integer());
189  switch (elem_ti.get_size()) {
190  case 1:
191  return llvm::Type::getInt8PtrTy(ctx);
192  case 2:
193  return llvm::Type::getInt16PtrTy(ctx);
194  case 4:
195  return llvm::Type::getInt32PtrTy(ctx);
196  case 8:
197  return llvm::Type::getInt64PtrTy(ctx);
198  }
199 
200  UNREACHABLE();
201  return nullptr;
202 }
203 
205  const auto& func_ti = function_oper->get_type_info();
206  for (size_t i = 0; i < function_oper->getArity(); ++i) {
207  const auto arg = function_oper->getArg(i);
208  const auto& arg_ti = arg->get_type_info();
209  if ((func_ti.is_array() && arg_ti.is_array()) ||
210  (func_ti.is_bytes() && arg_ti.is_bytes())) {
211  // If the function returns an array and any of the arguments are arrays, allow NULL
212  // scalars.
213  // TODO: Make this a property of the FunctionOper following `RETURN NULL ON NULL`
214  // semantics.
215  return false;
216  } else if (!arg_ti.get_notnull() && !arg_ti.is_buffer()) {
217  // Nullable geometry args will trigger a null check
218  return true;
219  } else {
220  continue;
221  }
222  }
223  return false;
224 }
225 
226 } // namespace
227 
229  int8_t* buffer) {
230  Executor* exec_ptr = reinterpret_cast<Executor*>(exec);
231  if (buffer != nullptr) {
232  exec_ptr->getRowSetMemoryOwner()->addVarlenBuffer(buffer);
233  }
234 }
235 
237  const Analyzer::FunctionOper* function_oper,
238  const CompilationOptions& co) {
240  ExtensionFunction ext_func_sig = [=]() {
242  try {
243  return bind_function(function_oper, /* is_gpu= */ true);
244  } catch (ExtensionFunctionBindingError& e) {
245  LOG(WARNING) << "codegenFunctionOper[GPU]: " << e.what() << " Redirecting "
246  << function_oper->getName() << " to run on CPU.";
247  throw QueryMustRunOnCpu();
248  }
249  } else {
250  try {
251  return bind_function(function_oper, /* is_gpu= */ false);
252  } catch (ExtensionFunctionBindingError& e) {
253  LOG(WARNING) << "codegenFunctionOper[CPU]: " << e.what();
254  throw;
255  }
256  }
257  }();
258 
259  const auto& ret_ti = function_oper->get_type_info();
260  CHECK(ret_ti.is_integer() || ret_ti.is_fp() || ret_ti.is_boolean() ||
261  ret_ti.is_buffer());
262  if (ret_ti.is_buffer() && co.device_type == ExecutorDeviceType::GPU) {
263  // TODO: This is not necessary for runtime UDFs because RBC does
264  // not generated GPU LLVM IR when the UDF is using Buffer objects.
265  // However, we cannot remove it until C++ UDFs can be defined for
266  // different devices independently.
267  throw QueryMustRunOnCpu();
268  }
269 
270  auto ret_ty = ext_arg_type_to_llvm_type(ext_func_sig.getRet(), cgen_state_->context_);
271  const auto current_bb = cgen_state_->ir_builder_.GetInsertBlock();
272  for (auto it : cgen_state_->ext_call_cache_) {
273  if (*it.foper == *function_oper) {
274  auto inst = llvm::dyn_cast<llvm::Instruction>(it.lv);
275  if (inst && inst->getParent() == current_bb) {
276  return it.lv;
277  }
278  }
279  }
280  std::vector<llvm::Value*> orig_arg_lvs;
281  std::vector<size_t> orig_arg_lvs_index;
282  std::unordered_map<llvm::Value*, llvm::Value*> const_arr_size;
283 
284  for (size_t i = 0; i < function_oper->getArity(); ++i) {
285  orig_arg_lvs_index.push_back(orig_arg_lvs.size());
286  const auto arg = function_oper->getArg(i);
287  const auto arg_cast = dynamic_cast<const Analyzer::UOper*>(arg);
288  const auto arg0 =
289  (arg_cast && arg_cast->get_optype() == kCAST) ? arg_cast->get_operand() : arg;
290  const auto array_expr_arg = dynamic_cast<const Analyzer::ArrayExpr*>(arg0);
291  auto is_local_alloc =
292  ret_ti.is_buffer() || (array_expr_arg && array_expr_arg->isLocalAlloc());
293  const auto& arg_ti = arg->get_type_info();
294  const auto arg_lvs = codegen(arg, true, co);
295  auto geo_uoper_arg = dynamic_cast<const Analyzer::GeoUOper*>(arg);
296  auto geo_binoper_arg = dynamic_cast<const Analyzer::GeoBinOper*>(arg);
297  auto geo_expr_arg = dynamic_cast<const Analyzer::GeoExpr*>(arg);
298  // TODO(adb / d): Assuming no const array cols for geo (for now)
299  if ((geo_uoper_arg || geo_binoper_arg) && arg_ti.is_geometry()) {
300  // Extract arr sizes and put them in the map, forward arr pointers
301  CHECK_EQ(2 * static_cast<size_t>(arg_ti.get_physical_coord_cols()), arg_lvs.size());
302  for (size_t i = 0; i < arg_lvs.size(); i++) {
303  auto arr = arg_lvs[i++];
304  auto size = arg_lvs[i];
305  orig_arg_lvs.push_back(arr);
306  const_arr_size[arr] = size;
307  }
308  } else if (geo_expr_arg && geo_expr_arg->get_type_info().is_geometry()) {
309  CHECK(geo_expr_arg->get_type_info().get_type() == kPOINT);
310  CHECK_EQ(arg_lvs.size(), size_t(2));
311  for (size_t j = 0; j < arg_lvs.size(); j++) {
312  orig_arg_lvs.push_back(arg_lvs[j]);
313  }
314  } else if (arg_ti.is_geometry()) {
315  CHECK_EQ(static_cast<size_t>(arg_ti.get_physical_coord_cols()), arg_lvs.size());
316  for (size_t j = 0; j < arg_lvs.size(); j++) {
317  orig_arg_lvs.push_back(arg_lvs[j]);
318  }
319  } else if (arg_ti.is_bytes()) {
320  CHECK_EQ(size_t(3), arg_lvs.size());
321  /* arg_lvs contains:
322  c = string_decode(&col_buf0, pos)
323  ptr = extract_str_ptr(c)
324  sz = extract_str_len(c)
325  */
326  for (size_t j = 0; j < arg_lvs.size(); j++) {
327  orig_arg_lvs.push_back(arg_lvs[j]);
328  }
329  } else {
330  if (arg_lvs.size() > 1) {
331  CHECK(arg_ti.is_array());
332  CHECK_EQ(size_t(2), arg_lvs.size());
333  const_arr_size[arg_lvs.front()] = arg_lvs.back();
334  } else {
335  CHECK_EQ(size_t(1), arg_lvs.size());
336  /* arg_lvs contains:
337  &col_buf1
338  */
339  if (is_local_alloc && arg_ti.get_size() > 0) {
340  const_arr_size[arg_lvs.front()] = cgen_state_->llInt(arg_ti.get_size());
341  }
342  }
343  orig_arg_lvs.push_back(arg_lvs.front());
344  }
345  }
346  // The extension function implementations don't handle NULL, they work under
347  // the assumption that the inputs are validated before calling them. Generate
348  // code to do the check at the call site: if any argument is NULL, return NULL
349  // without calling the function at all.
350  const auto [bbs, null_buffer_ptr] = beginArgsNullcheck(function_oper, orig_arg_lvs);
351  CHECK_GE(orig_arg_lvs.size(), function_oper->getArity());
352  // Arguments must be converted to the types the extension function can handle.
354  function_oper, &ext_func_sig, orig_arg_lvs, orig_arg_lvs_index, const_arr_size, co);
355 
356  llvm::Value* buffer_ret{nullptr};
357  if (ret_ti.is_buffer()) {
358  // codegen buffer return as first arg
359  CHECK(ret_ti.is_array() || ret_ti.is_bytes());
360  ret_ty = llvm::Type::getVoidTy(cgen_state_->context_);
361  const auto struct_ty = get_buffer_struct_type(
362  cgen_state_,
363  function_oper->getName(),
364  0,
366  /* has_is_null = */ ret_ti.is_array() || ret_ti.is_bytes());
367  buffer_ret = cgen_state_->ir_builder_.CreateAlloca(struct_ty);
368  args.insert(args.begin(), buffer_ret);
369  }
370 
371  const auto ext_call = cgen_state_->emitExternalCall(
372  ext_func_sig.getName(), ret_ty, args, {}, ret_ti.is_buffer());
373  auto ext_call_nullcheck = endArgsNullcheck(
374  bbs, ret_ti.is_buffer() ? buffer_ret : ext_call, null_buffer_ptr, function_oper);
375 
376  // Cast the return of the extension function to match the FunctionOper
377  if (!(ret_ti.is_buffer())) {
378  const auto extension_ret_ti = get_sql_type_from_llvm_type(ret_ty);
379  if (bbs.args_null_bb &&
380  extension_ret_ti.get_type() != function_oper->get_type_info().get_type() &&
381  // Skip i1-->i8 casts for ST_ functions.
382  // function_oper ret type is i1, extension ret type is 'upgraded' to i8
383  // during type deserialization to 'handle' NULL returns, hence i1-->i8.
384  // ST_ functions can't return NULLs, we just need to check arg nullness
385  // and if any args are NULL then ST_ function is not called
386  function_oper->getName().substr(0, 3) != std::string("ST_")) {
387  ext_call_nullcheck = codegenCast(ext_call_nullcheck,
388  extension_ret_ti,
389  function_oper->get_type_info(),
390  false,
391  co);
392  }
393  }
394 
395  cgen_state_->ext_call_cache_.push_back({function_oper, ext_call_nullcheck});
396  return ext_call_nullcheck;
397 }
398 
399 // Start the control flow needed for a call site check of NULL arguments.
400 std::tuple<CodeGenerator::ArgNullcheckBBs, llvm::Value*>
402  const std::vector<llvm::Value*>& orig_arg_lvs) {
404  llvm::BasicBlock* args_null_bb{nullptr};
405  llvm::BasicBlock* args_notnull_bb{nullptr};
406  llvm::BasicBlock* orig_bb = cgen_state_->ir_builder_.GetInsertBlock();
407  llvm::Value* null_array_alloca{nullptr};
408  // Only generate the check if required (at least one argument must be nullable).
409  if (ext_func_call_requires_nullcheck(function_oper)) {
410  const auto func_ti = function_oper->get_type_info();
411  if (func_ti.is_buffer()) {
412  const auto arr_struct_ty = get_buffer_struct_type(
413  cgen_state_,
414  function_oper->getName(),
415  0,
417  func_ti.is_array() || func_ti.is_bytes());
418  null_array_alloca = cgen_state_->ir_builder_.CreateAlloca(arr_struct_ty);
419  }
420  const auto args_notnull_lv = cgen_state_->ir_builder_.CreateNot(
421  codegenFunctionOperNullArg(function_oper, orig_arg_lvs));
422  args_notnull_bb = llvm::BasicBlock::Create(
423  cgen_state_->context_, "args_notnull", cgen_state_->current_func_);
424  args_null_bb = llvm::BasicBlock::Create(
426  cgen_state_->ir_builder_.CreateCondBr(args_notnull_lv, args_notnull_bb, args_null_bb);
427  cgen_state_->ir_builder_.SetInsertPoint(args_notnull_bb);
428  }
429  return std::make_tuple(
430  CodeGenerator::ArgNullcheckBBs{args_null_bb, args_notnull_bb, orig_bb},
431  null_array_alloca);
432 }
433 
434 // Wrap up the control flow needed for NULL argument handling.
436  const ArgNullcheckBBs& bbs,
437  llvm::Value* fn_ret_lv,
438  llvm::Value* null_array_ptr,
439  const Analyzer::FunctionOper* function_oper) {
441  if (bbs.args_null_bb) {
442  CHECK(bbs.args_notnull_bb);
443  cgen_state_->ir_builder_.CreateBr(bbs.args_null_bb);
444  cgen_state_->ir_builder_.SetInsertPoint(bbs.args_null_bb);
445 
446  llvm::PHINode* ext_call_phi{nullptr};
447  llvm::Value* null_lv{nullptr};
448  const auto func_ti = function_oper->get_type_info();
449  if (!func_ti.is_buffer()) {
450  // The pre-cast SQL equivalent of the type returned by the extension function.
451  const auto extension_ret_ti = get_sql_type_from_llvm_type(fn_ret_lv->getType());
452 
453  ext_call_phi = cgen_state_->ir_builder_.CreatePHI(
454  extension_ret_ti.is_fp()
455  ? get_fp_type(extension_ret_ti.get_size() * 8, cgen_state_->context_)
456  : get_int_type(extension_ret_ti.get_size() * 8, cgen_state_->context_),
457  2);
458 
459  null_lv =
460  extension_ret_ti.is_fp()
461  ? static_cast<llvm::Value*>(cgen_state_->inlineFpNull(extension_ret_ti))
462  : static_cast<llvm::Value*>(cgen_state_->inlineIntNull(extension_ret_ti));
463  } else {
464  const auto arr_struct_ty = get_buffer_struct_type(
465  cgen_state_,
466  function_oper->getName(),
467  0,
469  true);
470  ext_call_phi =
471  cgen_state_->ir_builder_.CreatePHI(llvm::PointerType::get(arr_struct_ty, 0), 2);
472 
473  CHECK(null_array_ptr);
474  const auto arr_null_bool =
475  cgen_state_->ir_builder_.CreateStructGEP(arr_struct_ty, null_array_ptr, 2);
476  cgen_state_->ir_builder_.CreateStore(
477  llvm::ConstantInt::get(get_int_type(8, cgen_state_->context_), 1),
478  arr_null_bool);
479 
480  const auto arr_null_size =
481  cgen_state_->ir_builder_.CreateStructGEP(arr_struct_ty, null_array_ptr, 1);
482  cgen_state_->ir_builder_.CreateStore(
483  llvm::ConstantInt::get(get_int_type(64, cgen_state_->context_), 0),
484  arr_null_size);
485  }
486  ext_call_phi->addIncoming(fn_ret_lv, bbs.args_notnull_bb);
487  ext_call_phi->addIncoming(func_ti.is_buffer() ? null_array_ptr : null_lv,
488  bbs.orig_bb);
489 
490  return ext_call_phi;
491  }
492  return fn_ret_lv;
493 }
494 
495 namespace {
496 
498  const auto& ret_ti = function_oper->get_type_info();
499  if (!ret_ti.is_integer() && !ret_ti.is_fp()) {
500  return true;
501  }
502  for (size_t i = 0; i < function_oper->getArity(); ++i) {
503  const auto arg = function_oper->getArg(i);
504  const auto& arg_ti = arg->get_type_info();
505  if (!arg_ti.is_integer() && !arg_ti.is_fp()) {
506  return true;
507  }
508  }
509  return false;
510 }
511 
512 } // namespace
513 
516  const CompilationOptions& co) {
518  if (call_requires_custom_type_handling(function_oper)) {
519  // Some functions need the return type to be the same as the input type.
520  if (function_oper->getName() == "FLOOR" || function_oper->getName() == "CEIL") {
521  CHECK_EQ(size_t(1), function_oper->getArity());
522  const auto arg = function_oper->getArg(0);
523  const auto& arg_ti = arg->get_type_info();
524  CHECK(arg_ti.is_decimal());
525  const auto arg_lvs = codegen(arg, true, co);
526  CHECK_EQ(size_t(1), arg_lvs.size());
527  const auto arg_lv = arg_lvs.front();
528  CHECK(arg_lv->getType()->isIntegerTy(64));
530  std::tie(bbs, std::ignore) = beginArgsNullcheck(function_oper, {arg_lvs});
531  const std::string func_name =
532  (function_oper->getName() == "FLOOR") ? "decimal_floor" : "decimal_ceil";
533  const auto covar_result_lv = cgen_state_->emitCall(
534  func_name, {arg_lv, cgen_state_->llInt(exp_to_scale(arg_ti.get_scale()))});
535  const auto ret_ti = function_oper->get_type_info();
536  CHECK(ret_ti.is_decimal());
537  CHECK_EQ(0, ret_ti.get_scale());
538  const auto result_lv = cgen_state_->ir_builder_.CreateSDiv(
539  covar_result_lv, cgen_state_->llInt(exp_to_scale(arg_ti.get_scale())));
540  return endArgsNullcheck(bbs, result_lv, nullptr, function_oper);
541  } else if (function_oper->getName() == "ROUND" &&
542  function_oper->getArg(0)->get_type_info().is_decimal()) {
543  CHECK_EQ(size_t(2), function_oper->getArity());
544 
545  const auto arg0 = function_oper->getArg(0);
546  const auto& arg0_ti = arg0->get_type_info();
547  const auto arg0_lvs = codegen(arg0, true, co);
548  CHECK_EQ(size_t(1), arg0_lvs.size());
549  const auto arg0_lv = arg0_lvs.front();
550  CHECK(arg0_lv->getType()->isIntegerTy(64));
551 
552  const auto arg1 = function_oper->getArg(1);
553  const auto& arg1_ti = arg1->get_type_info();
554  CHECK(arg1_ti.is_integer());
555  const auto arg1_lvs = codegen(arg1, true, co);
556  auto arg1_lv = arg1_lvs.front();
557  if (arg1_ti.get_type() != kINT) {
558  arg1_lv = codegenCast(arg1_lv, arg1_ti, SQLTypeInfo(kINT, true), false, co);
559  }
560 
562  std::tie(bbs0, std::ignore) =
563  beginArgsNullcheck(function_oper, {arg0_lv, arg1_lvs.front()});
564 
565  const std::string func_name = "Round__4";
566  const auto ret_ti = function_oper->get_type_info();
567  CHECK(ret_ti.is_decimal());
568  const auto result_lv = cgen_state_->emitExternalCall(
569  func_name,
571  {arg0_lv, arg1_lv, cgen_state_->llInt(arg0_ti.get_scale())});
572 
573  return endArgsNullcheck(bbs0, result_lv, nullptr, function_oper);
574  }
575  throw std::runtime_error("Type combination not supported for function " +
576  function_oper->getName());
577  }
578  return codegenFunctionOper(function_oper, co);
579 }
580 
581 // Generates code which returns true iff at least one of the arguments is NULL.
583  const Analyzer::FunctionOper* function_oper,
584  const std::vector<llvm::Value*>& orig_arg_lvs) {
586  llvm::Value* one_arg_null =
587  llvm::ConstantInt::get(llvm::IntegerType::getInt1Ty(cgen_state_->context_), false);
588  size_t physical_coord_cols = 0;
589  for (size_t i = 0, j = 0; i < function_oper->getArity();
590  ++i, j += std::max(size_t(1), physical_coord_cols)) {
591  const auto arg = function_oper->getArg(i);
592  const auto& arg_ti = arg->get_type_info();
593  physical_coord_cols = arg_ti.get_physical_coord_cols();
594  if (arg_ti.get_notnull()) {
595  continue;
596  }
597  auto geo_expr_arg = dynamic_cast<const Analyzer::GeoExpr*>(arg);
598  if (geo_expr_arg && arg_ti.is_geometry()) {
599  CHECK(arg_ti.get_type() == kPOINT);
600  auto is_null_lv = cgen_state_->ir_builder_.CreateICmp(
601  llvm::CmpInst::ICMP_EQ,
602  orig_arg_lvs[j],
603  llvm::ConstantPointerNull::get( // TODO: centralize logic; in geo expr?
604  arg_ti.get_compression() == kENCODING_GEOINT
605  ? llvm::Type::getInt32PtrTy(cgen_state_->context_)
606  : llvm::Type::getDoublePtrTy(cgen_state_->context_)));
607  one_arg_null = cgen_state_->ir_builder_.CreateOr(one_arg_null, is_null_lv);
608  physical_coord_cols = 2; // number of lvs to advance
609  continue;
610  }
611 #ifdef ENABLE_GEOS
612  // If geo arg is coming from geos, skip the null check, assume it's a valid geo
613  if (arg_ti.is_geometry()) {
614  auto* coords_load = llvm::dyn_cast<llvm::LoadInst>(orig_arg_lvs[i]);
615  if (coords_load) {
616  continue;
617  }
618  }
619 #endif
620  if (arg_ti.is_geometry()) {
621  auto* coords_alloca = llvm::dyn_cast<llvm::AllocaInst>(orig_arg_lvs[j]);
622  auto* coords_phi = llvm::dyn_cast<llvm::PHINode>(orig_arg_lvs[j]);
623  if (coords_alloca || coords_phi) {
624  // TODO: null check dynamically generated geometries
625  continue;
626  }
627  }
628  if (arg_ti.is_buffer() || arg_ti.is_geometry()) {
629  // POINT [un]compressed coord check requires custom checker and chunk iterator
630  // Non-POINT NULL geographies will have a normally encoded null coord array
631  auto fname =
632  (arg_ti.get_type() == kPOINT) ? "point_coord_array_is_null" : "array_is_null";
633  auto is_null_lv = cgen_state_->emitExternalCall(
634  fname, get_int_type(1, cgen_state_->context_), {orig_arg_lvs[j], posArg(arg)});
635  one_arg_null = cgen_state_->ir_builder_.CreateOr(one_arg_null, is_null_lv);
636  continue;
637  }
638  CHECK(arg_ti.is_number() or arg_ti.is_boolean());
639  one_arg_null = cgen_state_->ir_builder_.CreateOr(
640  one_arg_null, codegenIsNullNumber(orig_arg_lvs[j], arg_ti));
641  }
642  return one_arg_null;
643 }
644 
645 llvm::Value* CodeGenerator::codegenCompression(const SQLTypeInfo& type_info) {
647  int32_t compression = (type_info.get_compression() == kENCODING_GEOINT &&
648  type_info.get_comp_param() == 32)
649  ? 1
650  : 0;
651 
652  return cgen_state_->llInt(compression);
653 }
654 
655 std::pair<llvm::Value*, llvm::Value*> CodeGenerator::codegenArrayBuff(
656  llvm::Value* chunk,
657  llvm::Value* row_pos,
658  SQLTypes array_type,
659  bool cast_and_extend) {
661  const auto elem_ti =
662  SQLTypeInfo(
663  SQLTypes::kARRAY, 0, 0, false, EncodingType::kENCODING_NONE, 0, array_type)
664  .get_elem_type();
665 
666  auto buff = cgen_state_->emitExternalCall(
667  "array_buff", llvm::Type::getInt32PtrTy(cgen_state_->context_), {chunk, row_pos});
668 
669  auto len = cgen_state_->emitExternalCall(
670  "array_size",
671  get_int_type(32, cgen_state_->context_),
672  {chunk, row_pos, cgen_state_->llInt(log2_bytes(elem_ti.get_logical_size()))});
673 
674  if (cast_and_extend) {
675  buff = castArrayPointer(buff, elem_ti);
676  len =
677  cgen_state_->ir_builder_.CreateZExt(len, get_int_type(64, cgen_state_->context_));
678  }
679 
680  return std::make_pair(buff, len);
681 }
682 
683 void CodeGenerator::codegenBufferArgs(const std::string& ext_func_name,
684  size_t param_num,
685  llvm::Value* buffer_buf,
686  llvm::Value* buffer_size,
687  llvm::Value* buffer_null,
688  std::vector<llvm::Value*>& output_args) {
690  CHECK(buffer_buf);
691  CHECK(buffer_size);
692 
693  auto buffer_abstraction = get_buffer_struct_type(
694  cgen_state_, ext_func_name, param_num, buffer_buf->getType(), !!(buffer_null));
695  auto alloc_mem = cgen_state_->ir_builder_.CreateAlloca(buffer_abstraction);
696 
697  auto buffer_buf_ptr =
698  cgen_state_->ir_builder_.CreateStructGEP(buffer_abstraction, alloc_mem, 0);
699  cgen_state_->ir_builder_.CreateStore(buffer_buf, buffer_buf_ptr);
700 
701  auto buffer_size_ptr =
702  cgen_state_->ir_builder_.CreateStructGEP(buffer_abstraction, alloc_mem, 1);
703  cgen_state_->ir_builder_.CreateStore(buffer_size, buffer_size_ptr);
704 
705  if (buffer_null) {
706  auto bool_extended_type = llvm::Type::getInt8Ty(cgen_state_->context_);
707  auto buffer_null_extended =
708  cgen_state_->ir_builder_.CreateZExt(buffer_null, bool_extended_type);
709  auto buffer_is_null_ptr =
710  cgen_state_->ir_builder_.CreateStructGEP(buffer_abstraction, alloc_mem, 2);
711  cgen_state_->ir_builder_.CreateStore(buffer_null_extended, buffer_is_null_ptr);
712  }
713  output_args.push_back(alloc_mem);
714 }
715 
716 llvm::StructType* CodeGenerator::createPointStructType(const std::string& udf_func_name,
717  size_t param_num) {
718  llvm::Module* module_for_lookup = cgen_state_->module_;
719  llvm::Function* udf_func = module_for_lookup->getFunction(udf_func_name);
720 
721  llvm::StructType* generated_struct_type =
722  llvm::StructType::get(cgen_state_->context_,
723  {llvm::Type::getInt8PtrTy(cgen_state_->context_),
724  llvm::Type::getInt32Ty(cgen_state_->context_),
725  llvm::Type::getInt32Ty(cgen_state_->context_),
726  llvm::Type::getInt32Ty(cgen_state_->context_),
727  llvm::Type::getInt32Ty(cgen_state_->context_)},
728  false);
729 
730  if (udf_func) {
731  llvm::FunctionType* udf_func_type = udf_func->getFunctionType();
732  CHECK(param_num < udf_func_type->getNumParams());
733  llvm::Type* param_pointer_type = udf_func_type->getParamType(param_num);
734  CHECK(param_pointer_type->isPointerTy());
735  llvm::Type* param_type = param_pointer_type->getPointerElementType();
736  CHECK(param_type->isStructTy());
737  llvm::StructType* struct_type = llvm::cast<llvm::StructType>(param_type);
738  CHECK(struct_type->getStructNumElements() == 5) << serialize_llvm_object(struct_type);
739  const auto expected_elems = generated_struct_type->elements();
740  const auto current_elems = struct_type->elements();
741  for (size_t i = 0; i < expected_elems.size(); i++) {
742  CHECK_EQ(expected_elems[i], current_elems[i]);
743  }
744  if (struct_type->isLiteral()) {
745  return struct_type;
746  }
747 
748  llvm::StringRef struct_name = struct_type->getStructName();
749 #if LLVM_VERSION_MAJOR >= 12
750  llvm::StructType* point_type =
751  struct_type->getTypeByName(cgen_state_->context_, struct_name);
752 #else
753  llvm::StructType* point_type = module_for_lookup->getTypeByName(struct_name);
754 #endif
755  CHECK(point_type);
756 
757  return (point_type);
758  }
759  return generated_struct_type;
760 }
761 
762 void CodeGenerator::codegenGeoPointArgs(const std::string& udf_func_name,
763  size_t param_num,
764  llvm::Value* point_buf,
765  llvm::Value* point_size,
766  llvm::Value* compression,
767  llvm::Value* input_srid,
768  llvm::Value* output_srid,
769  std::vector<llvm::Value*>& output_args) {
771  CHECK(point_buf);
772  CHECK(point_size);
773  CHECK(compression);
774  CHECK(input_srid);
775  CHECK(output_srid);
776 
777  auto point_abstraction = createPointStructType(udf_func_name, param_num);
778  auto alloc_mem = cgen_state_->ir_builder_.CreateAlloca(point_abstraction, nullptr);
779 
780  auto point_buf_ptr =
781  cgen_state_->ir_builder_.CreateStructGEP(point_abstraction, alloc_mem, 0);
782  cgen_state_->ir_builder_.CreateStore(point_buf, point_buf_ptr);
783 
784  auto point_size_ptr =
785  cgen_state_->ir_builder_.CreateStructGEP(point_abstraction, alloc_mem, 1);
786  cgen_state_->ir_builder_.CreateStore(point_size, point_size_ptr);
787 
788  auto point_compression_ptr =
789  cgen_state_->ir_builder_.CreateStructGEP(point_abstraction, alloc_mem, 2);
790  cgen_state_->ir_builder_.CreateStore(compression, point_compression_ptr);
791 
792  auto input_srid_ptr =
793  cgen_state_->ir_builder_.CreateStructGEP(point_abstraction, alloc_mem, 3);
794  cgen_state_->ir_builder_.CreateStore(input_srid, input_srid_ptr);
795 
796  auto output_srid_ptr =
797  cgen_state_->ir_builder_.CreateStructGEP(point_abstraction, alloc_mem, 4);
798  cgen_state_->ir_builder_.CreateStore(output_srid, output_srid_ptr);
799 
800  output_args.push_back(alloc_mem);
801 }
802 
804  const std::string& udf_func_name,
805  size_t param_num) {
806  llvm::Module* module_for_lookup = cgen_state_->module_;
807  llvm::Function* udf_func = module_for_lookup->getFunction(udf_func_name);
808 
809  llvm::StructType* generated_struct_type =
810  llvm::StructType::get(cgen_state_->context_,
811  {llvm::Type::getInt8PtrTy(cgen_state_->context_),
812  llvm::Type::getInt32Ty(cgen_state_->context_),
813  llvm::Type::getInt32Ty(cgen_state_->context_),
814  llvm::Type::getInt32Ty(cgen_state_->context_),
815  llvm::Type::getInt32Ty(cgen_state_->context_)},
816  false);
817 
818  if (udf_func) {
819  llvm::FunctionType* udf_func_type = udf_func->getFunctionType();
820  CHECK(param_num < udf_func_type->getNumParams());
821  llvm::Type* param_pointer_type = udf_func_type->getParamType(param_num);
822  CHECK(param_pointer_type->isPointerTy());
823  llvm::Type* param_type = param_pointer_type->getPointerElementType();
824  CHECK(param_type->isStructTy());
825  llvm::StructType* struct_type = llvm::cast<llvm::StructType>(param_type);
826  CHECK(struct_type->isStructTy());
827  CHECK(struct_type->getStructNumElements() == 5);
828 
829  const auto expected_elems = generated_struct_type->elements();
830  const auto current_elems = struct_type->elements();
831  for (size_t i = 0; i < expected_elems.size(); i++) {
832  CHECK_EQ(expected_elems[i], current_elems[i]);
833  }
834  if (struct_type->isLiteral()) {
835  return struct_type;
836  }
837 
838  llvm::StringRef struct_name = struct_type->getStructName();
839 #if LLVM_VERSION_MAJOR >= 12
840  llvm::StructType* line_string_type =
841  struct_type->getTypeByName(cgen_state_->context_, struct_name);
842 #else
843  llvm::StructType* line_string_type = module_for_lookup->getTypeByName(struct_name);
844 #endif
845  CHECK(line_string_type);
846 
847  return (line_string_type);
848  }
849  return generated_struct_type;
850 }
851 
852 void CodeGenerator::codegenGeoLineStringArgs(const std::string& udf_func_name,
853  size_t param_num,
854  llvm::Value* line_string_buf,
855  llvm::Value* line_string_size,
856  llvm::Value* compression,
857  llvm::Value* input_srid,
858  llvm::Value* output_srid,
859  std::vector<llvm::Value*>& output_args) {
861  CHECK(line_string_buf);
862  CHECK(line_string_size);
863  CHECK(compression);
864  CHECK(input_srid);
865  CHECK(output_srid);
866 
867  auto line_string_abstraction = createLineStringStructType(udf_func_name, param_num);
868  auto alloc_mem =
869  cgen_state_->ir_builder_.CreateAlloca(line_string_abstraction, nullptr);
870 
871  auto line_string_buf_ptr =
872  cgen_state_->ir_builder_.CreateStructGEP(line_string_abstraction, alloc_mem, 0);
873  cgen_state_->ir_builder_.CreateStore(line_string_buf, line_string_buf_ptr);
874 
875  auto line_string_size_ptr =
876  cgen_state_->ir_builder_.CreateStructGEP(line_string_abstraction, alloc_mem, 1);
877  cgen_state_->ir_builder_.CreateStore(line_string_size, line_string_size_ptr);
878 
879  auto line_string_compression_ptr =
880  cgen_state_->ir_builder_.CreateStructGEP(line_string_abstraction, alloc_mem, 2);
881  cgen_state_->ir_builder_.CreateStore(compression, line_string_compression_ptr);
882 
883  auto input_srid_ptr =
884  cgen_state_->ir_builder_.CreateStructGEP(line_string_abstraction, alloc_mem, 3);
885  cgen_state_->ir_builder_.CreateStore(input_srid, input_srid_ptr);
886 
887  auto output_srid_ptr =
888  cgen_state_->ir_builder_.CreateStructGEP(line_string_abstraction, alloc_mem, 4);
889  cgen_state_->ir_builder_.CreateStore(output_srid, output_srid_ptr);
890 
891  output_args.push_back(alloc_mem);
892 }
893 
894 llvm::StructType* CodeGenerator::createPolygonStructType(const std::string& udf_func_name,
895  size_t param_num) {
896  llvm::Module* module_for_lookup = cgen_state_->module_;
897  llvm::Function* udf_func = module_for_lookup->getFunction(udf_func_name);
898 
899  llvm::StructType* generated_struct_type =
900  llvm::StructType::get(cgen_state_->context_,
901  {llvm::Type::getInt8PtrTy(cgen_state_->context_),
902  llvm::Type::getInt32Ty(cgen_state_->context_),
903  llvm::Type::getInt8PtrTy(cgen_state_->context_),
904  llvm::Type::getInt32Ty(cgen_state_->context_),
905  llvm::Type::getInt32Ty(cgen_state_->context_),
906  llvm::Type::getInt32Ty(cgen_state_->context_),
907  llvm::Type::getInt32Ty(cgen_state_->context_)},
908  false);
909 
910  if (udf_func) {
911  llvm::FunctionType* udf_func_type = udf_func->getFunctionType();
912  CHECK(param_num < udf_func_type->getNumParams());
913  llvm::Type* param_pointer_type = udf_func_type->getParamType(param_num);
914  CHECK(param_pointer_type->isPointerTy());
915  llvm::Type* param_type = param_pointer_type->getPointerElementType();
916  CHECK(param_type->isStructTy());
917  llvm::StructType* struct_type = llvm::cast<llvm::StructType>(param_type);
918 
919  CHECK(struct_type->isStructTy());
920  CHECK(struct_type->getStructNumElements() == 7);
921 
922  const auto expected_elems = generated_struct_type->elements();
923  const auto current_elems = struct_type->elements();
924  for (size_t i = 0; i < expected_elems.size(); i++) {
925  CHECK_EQ(expected_elems[i], current_elems[i]);
926  }
927  if (struct_type->isLiteral()) {
928  return struct_type;
929  }
930 
931  llvm::StringRef struct_name = struct_type->getStructName();
932 
933 #if LLVM_VERSION_MAJOR >= 12
934  llvm::StructType* polygon_type =
935  struct_type->getTypeByName(cgen_state_->context_, struct_name);
936 #else
937  llvm::StructType* polygon_type = module_for_lookup->getTypeByName(struct_name);
938 #endif
939  CHECK(polygon_type);
940 
941  return (polygon_type);
942  }
943  return generated_struct_type;
944 }
945 
946 void CodeGenerator::codegenGeoPolygonArgs(const std::string& udf_func_name,
947  size_t param_num,
948  llvm::Value* polygon_buf,
949  llvm::Value* polygon_size,
950  llvm::Value* ring_sizes_buf,
951  llvm::Value* num_rings,
952  llvm::Value* compression,
953  llvm::Value* input_srid,
954  llvm::Value* output_srid,
955  std::vector<llvm::Value*>& output_args) {
957  CHECK(polygon_buf);
958  CHECK(polygon_size);
959  CHECK(ring_sizes_buf);
960  CHECK(num_rings);
961  CHECK(compression);
962  CHECK(input_srid);
963  CHECK(output_srid);
964 
965  auto& builder = cgen_state_->ir_builder_;
966 
967  auto polygon_abstraction = createPolygonStructType(udf_func_name, param_num);
968  auto alloc_mem = builder.CreateAlloca(polygon_abstraction, nullptr);
969 
970  const auto polygon_buf_ptr = builder.CreateStructGEP(polygon_abstraction, alloc_mem, 0);
971  builder.CreateStore(polygon_buf, polygon_buf_ptr);
972 
973  const auto polygon_size_ptr =
974  builder.CreateStructGEP(polygon_abstraction, alloc_mem, 1);
975  builder.CreateStore(polygon_size, polygon_size_ptr);
976 
977  const auto ring_sizes_buf_ptr =
978  builder.CreateStructGEP(polygon_abstraction, alloc_mem, 2);
979  const auto ring_sizes_ptr_ty =
980  llvm::dyn_cast<llvm::PointerType>(ring_sizes_buf_ptr->getType());
981  CHECK(ring_sizes_ptr_ty);
982  builder.CreateStore(
983  builder.CreateBitCast(ring_sizes_buf, ring_sizes_ptr_ty->getPointerElementType()),
984  ring_sizes_buf_ptr);
985 
986  const auto ring_size_ptr = builder.CreateStructGEP(polygon_abstraction, alloc_mem, 3);
987  builder.CreateStore(num_rings, ring_size_ptr);
988 
989  const auto polygon_compression_ptr =
990  builder.CreateStructGEP(polygon_abstraction, alloc_mem, 4);
991  builder.CreateStore(compression, polygon_compression_ptr);
992 
993  const auto input_srid_ptr = builder.CreateStructGEP(polygon_abstraction, alloc_mem, 5);
994  builder.CreateStore(input_srid, input_srid_ptr);
995 
996  const auto output_srid_ptr = builder.CreateStructGEP(polygon_abstraction, alloc_mem, 6);
997  builder.CreateStore(output_srid, output_srid_ptr);
998 
999  output_args.push_back(alloc_mem);
1000 }
1001 
1003  const std::string& udf_func_name,
1004  size_t param_num) {
1005  llvm::Function* udf_func = cgen_state_->module_->getFunction(udf_func_name);
1006 
1007  llvm::StructType* generated_struct_type =
1008  llvm::StructType::get(cgen_state_->context_,
1009  {llvm::Type::getInt8PtrTy(cgen_state_->context_),
1010  llvm::Type::getInt32Ty(cgen_state_->context_),
1011  llvm::Type::getInt8PtrTy(cgen_state_->context_),
1012  llvm::Type::getInt32Ty(cgen_state_->context_),
1013  llvm::Type::getInt8PtrTy(cgen_state_->context_),
1014  llvm::Type::getInt32Ty(cgen_state_->context_),
1015  llvm::Type::getInt32Ty(cgen_state_->context_),
1016  llvm::Type::getInt32Ty(cgen_state_->context_),
1017  llvm::Type::getInt32Ty(cgen_state_->context_)},
1018  false);
1019 
1020  if (udf_func) {
1021  llvm::FunctionType* udf_func_type = udf_func->getFunctionType();
1022  CHECK(param_num < udf_func_type->getNumParams());
1023  llvm::Type* param_pointer_type = udf_func_type->getParamType(param_num);
1024  CHECK(param_pointer_type->isPointerTy());
1025  llvm::Type* param_type = param_pointer_type->getPointerElementType();
1026  CHECK(param_type->isStructTy());
1027  llvm::StructType* struct_type = llvm::cast<llvm::StructType>(param_type);
1028  CHECK(struct_type->isStructTy());
1029  CHECK(struct_type->getStructNumElements() == 9);
1030  const auto expected_elems = generated_struct_type->elements();
1031  const auto current_elems = struct_type->elements();
1032  for (size_t i = 0; i < expected_elems.size(); i++) {
1033  CHECK_EQ(expected_elems[i], current_elems[i]);
1034  }
1035  if (struct_type->isLiteral()) {
1036  return struct_type;
1037  }
1038  llvm::StringRef struct_name = struct_type->getStructName();
1039 
1040 #if LLVM_VERSION_MAJOR >= 12
1041  llvm::StructType* polygon_type =
1042  struct_type->getTypeByName(cgen_state_->context_, struct_name);
1043 #else
1044  llvm::StructType* polygon_type = cgen_state_->module_->getTypeByName(struct_name);
1045 #endif
1046  CHECK(polygon_type);
1047 
1048  return (polygon_type);
1049  }
1050  return generated_struct_type;
1051 }
1052 
1053 void CodeGenerator::codegenGeoMultiPolygonArgs(const std::string& udf_func_name,
1054  size_t param_num,
1055  llvm::Value* polygon_coords,
1056  llvm::Value* polygon_coords_size,
1057  llvm::Value* ring_sizes_buf,
1058  llvm::Value* ring_sizes,
1059  llvm::Value* polygon_bounds,
1060  llvm::Value* polygon_bounds_sizes,
1061  llvm::Value* compression,
1062  llvm::Value* input_srid,
1063  llvm::Value* output_srid,
1064  std::vector<llvm::Value*>& output_args) {
1066  CHECK(polygon_coords);
1067  CHECK(polygon_coords_size);
1068  CHECK(ring_sizes_buf);
1069  CHECK(ring_sizes);
1070  CHECK(polygon_bounds);
1071  CHECK(polygon_bounds_sizes);
1072  CHECK(compression);
1073  CHECK(input_srid);
1074  CHECK(output_srid);
1075 
1076  auto& builder = cgen_state_->ir_builder_;
1077 
1078  auto multi_polygon_abstraction = createMultiPolygonStructType(udf_func_name, param_num);
1079  auto alloc_mem = builder.CreateAlloca(multi_polygon_abstraction, nullptr);
1080 
1081  const auto polygon_coords_ptr =
1082  builder.CreateStructGEP(multi_polygon_abstraction, alloc_mem, 0);
1083  builder.CreateStore(polygon_coords, polygon_coords_ptr);
1084 
1085  const auto polygon_coords_size_ptr =
1086  builder.CreateStructGEP(multi_polygon_abstraction, alloc_mem, 1);
1087  builder.CreateStore(polygon_coords_size, polygon_coords_size_ptr);
1088 
1089  const auto ring_sizes_buf_ptr =
1090  builder.CreateStructGEP(multi_polygon_abstraction, alloc_mem, 2);
1091  const auto ring_sizes_ptr_ty =
1092  llvm::dyn_cast<llvm::PointerType>(ring_sizes_buf_ptr->getType());
1093  CHECK(ring_sizes_ptr_ty);
1094  builder.CreateStore(
1095  builder.CreateBitCast(ring_sizes_buf, ring_sizes_ptr_ty->getPointerElementType()),
1096  ring_sizes_buf_ptr);
1097 
1098  const auto ring_sizes_ptr =
1099  builder.CreateStructGEP(multi_polygon_abstraction, alloc_mem, 3);
1100  builder.CreateStore(ring_sizes, ring_sizes_ptr);
1101 
1102  const auto polygon_bounds_buf_ptr =
1103  builder.CreateStructGEP(multi_polygon_abstraction, alloc_mem, 4);
1104  const auto bounds_ptr_ty =
1105  llvm::dyn_cast<llvm::PointerType>(polygon_bounds_buf_ptr->getType());
1106  CHECK(bounds_ptr_ty);
1107  builder.CreateStore(
1108  builder.CreateBitCast(polygon_bounds, bounds_ptr_ty->getPointerElementType()),
1109  polygon_bounds_buf_ptr);
1110 
1111  const auto polygon_bounds_sizes_ptr =
1112  builder.CreateStructGEP(multi_polygon_abstraction, alloc_mem, 5);
1113  builder.CreateStore(polygon_bounds_sizes, polygon_bounds_sizes_ptr);
1114 
1115  const auto polygon_compression_ptr =
1116  builder.CreateStructGEP(multi_polygon_abstraction, alloc_mem, 6);
1117  builder.CreateStore(compression, polygon_compression_ptr);
1118 
1119  const auto input_srid_ptr =
1120  builder.CreateStructGEP(multi_polygon_abstraction, alloc_mem, 7);
1121  builder.CreateStore(input_srid, input_srid_ptr);
1122 
1123  const auto output_srid_ptr =
1124  builder.CreateStructGEP(multi_polygon_abstraction, alloc_mem, 8);
1125  builder.CreateStore(output_srid, output_srid_ptr);
1126 
1127  output_args.push_back(alloc_mem);
1128 }
1129 
1130 // Generate CAST operations for arguments in `orig_arg_lvs` to the types required by
1131 // `ext_func_sig`.
1133  const Analyzer::FunctionOper* function_oper,
1134  const ExtensionFunction* ext_func_sig,
1135  const std::vector<llvm::Value*>& orig_arg_lvs,
1136  const std::vector<size_t>& orig_arg_lvs_index,
1137  const std::unordered_map<llvm::Value*, llvm::Value*>& const_arr_size,
1138  const CompilationOptions& co) {
1140  CHECK(ext_func_sig);
1141  const auto& ext_func_args = ext_func_sig->getInputArgs();
1142  CHECK_LE(function_oper->getArity(), ext_func_args.size());
1143  const auto func_ti = function_oper->get_type_info();
1144  std::vector<llvm::Value*> args;
1145  /*
1146  i: argument in RA for the function operand
1147  j: extra offset in ext_func_args
1148  k: origin_arg_lvs counter, equal to orig_arg_lvs_index[i]
1149  ij: ext_func_args counter, equal to i + j
1150  dj: offset when UDF implementation first argument corresponds to return value
1151  */
1152  for (size_t i = 0, j = 0, dj = (func_ti.is_buffer() ? 1 : 0);
1153  i < function_oper->getArity();
1154  ++i) {
1155  size_t k = orig_arg_lvs_index[i];
1156  size_t ij = i + j;
1157  const auto arg = function_oper->getArg(i);
1158  const auto ext_func_arg = ext_func_args[ij];
1159  const auto& arg_ti = arg->get_type_info();
1160  llvm::Value* arg_lv{nullptr};
1161  if (arg_ti.is_bytes()) {
1162  CHECK(ext_func_arg == ExtArgumentType::TextEncodingNone)
1163  << ::toString(ext_func_arg);
1164  const auto ptr_lv = orig_arg_lvs[k + 1];
1165  const auto len_lv = orig_arg_lvs[k + 2];
1166  auto& builder = cgen_state_->ir_builder_;
1167  auto string_buf_arg = builder.CreatePointerCast(
1168  ptr_lv, llvm::Type::getInt8PtrTy(cgen_state_->context_));
1169  auto string_size_arg =
1170  builder.CreateZExt(len_lv, get_int_type(64, cgen_state_->context_));
1171  codegenBufferArgs(ext_func_sig->getName(),
1172  ij + dj,
1173  string_buf_arg,
1174  string_size_arg,
1175  nullptr,
1176  args);
1177  } else if (arg_ti.is_array()) {
1178  bool const_arr = (const_arr_size.count(orig_arg_lvs[k]) > 0);
1179  const auto elem_ti = arg_ti.get_elem_type();
1180  // TODO: switch to fast fixlen variants
1181  const auto ptr_lv = (const_arr)
1182  ? orig_arg_lvs[k]
1184  "array_buff",
1185  llvm::Type::getInt8PtrTy(cgen_state_->context_),
1186  {orig_arg_lvs[k], posArg(arg)});
1187  const auto len_lv =
1188  (const_arr) ? const_arr_size.at(orig_arg_lvs[k])
1190  "array_size",
1192  {orig_arg_lvs[k],
1193  posArg(arg),
1194  cgen_state_->llInt(log2_bytes(elem_ti.get_logical_size()))});
1195 
1196  if (is_ext_arg_type_pointer(ext_func_arg)) {
1197  args.push_back(castArrayPointer(ptr_lv, elem_ti));
1198  args.push_back(cgen_state_->ir_builder_.CreateZExt(
1199  len_lv, get_int_type(64, cgen_state_->context_)));
1200  j++;
1201  } else if (is_ext_arg_type_array(ext_func_arg)) {
1202  auto array_buf_arg = castArrayPointer(ptr_lv, elem_ti);
1203  auto& builder = cgen_state_->ir_builder_;
1204  auto array_size_arg =
1205  builder.CreateZExt(len_lv, get_int_type(64, cgen_state_->context_));
1206  auto array_null_arg =
1207  cgen_state_->emitExternalCall("array_is_null",
1209  {orig_arg_lvs[k], posArg(arg)});
1210  codegenBufferArgs(ext_func_sig->getName(),
1211  ij + dj,
1212  array_buf_arg,
1213  array_size_arg,
1214  array_null_arg,
1215  args);
1216  } else {
1217  UNREACHABLE();
1218  }
1219 
1220  } else if (arg_ti.is_geometry()) {
1221  auto geo_expr_arg = dynamic_cast<const Analyzer::GeoExpr*>(arg);
1222  if (geo_expr_arg) {
1223  auto ptr_lv = cgen_state_->ir_builder_.CreateBitCast(
1224  orig_arg_lvs[k], llvm::Type::getInt8PtrTy(cgen_state_->context_));
1225  args.push_back(ptr_lv);
1226  // TODO: remove when we normalize extension functions geo sizes to int32
1227  auto size_lv = cgen_state_->ir_builder_.CreateSExt(
1228  orig_arg_lvs[k + 1], llvm::Type::getInt64Ty(cgen_state_->context_));
1229  args.push_back(size_lv);
1230  j++;
1231  continue;
1232  }
1233  // Coords
1234  bool const_arr = (const_arr_size.count(orig_arg_lvs[k]) > 0);
1235  // NOTE(adb): We're generating code to handle the TINYINT array only -- the actual
1236  // geo encoding (or lack thereof) does not matter here
1237  const auto elem_ti = SQLTypeInfo(SQLTypes::kARRAY,
1238  0,
1239  0,
1240  false,
1242  0,
1244  .get_elem_type();
1245  llvm::Value* ptr_lv;
1246  llvm::Value* len_lv;
1247  int32_t fixlen = -1;
1248  if (arg_ti.get_type() == kPOINT) {
1249  const auto col_var = dynamic_cast<const Analyzer::ColumnVar*>(arg);
1250  if (col_var) {
1251  const auto coords_cd = executor()->getPhysicalColumnDescriptor(col_var, 1);
1252  if (coords_cd && coords_cd->columnType.get_type() == kARRAY) {
1253  fixlen = coords_cd->columnType.get_size();
1254  }
1255  }
1256  }
1257  if (fixlen > 0) {
1258  ptr_lv =
1259  cgen_state_->emitExternalCall("fast_fixlen_array_buff",
1260  llvm::Type::getInt8PtrTy(cgen_state_->context_),
1261  {orig_arg_lvs[k], posArg(arg)});
1262  len_lv = cgen_state_->llInt(int32_t(fixlen));
1263  } else {
1264  // TODO: remove const_arr and related code if it's not needed
1265  ptr_lv = (const_arr) ? orig_arg_lvs[k]
1267  "array_buff",
1268  llvm::Type::getInt8PtrTy(cgen_state_->context_),
1269  {orig_arg_lvs[k], posArg(arg)});
1270  len_lv = (const_arr)
1271  ? const_arr_size.at(orig_arg_lvs[k])
1273  "array_size",
1275  {orig_arg_lvs[k],
1276  posArg(arg),
1277  cgen_state_->llInt(log2_bytes(elem_ti.get_logical_size()))});
1278  }
1279 
1280  if (is_ext_arg_type_geo(ext_func_arg)) {
1281  if (arg_ti.get_type() == kPOINT || arg_ti.get_type() == kLINESTRING) {
1282  auto array_buf_arg = castArrayPointer(ptr_lv, elem_ti);
1283  auto compression_val = codegenCompression(arg_ti);
1284  auto input_srid_val = cgen_state_->llInt(arg_ti.get_input_srid());
1285  auto output_srid_val = cgen_state_->llInt(arg_ti.get_output_srid());
1286 
1287  if (arg_ti.get_type() == kPOINT) {
1288  CHECK_EQ(k, ij);
1289  codegenGeoPointArgs(ext_func_sig->getName(),
1290  ij + dj,
1291  array_buf_arg,
1292  len_lv,
1293  compression_val,
1294  input_srid_val,
1295  output_srid_val,
1296  args);
1297  } else {
1298  CHECK_EQ(k, ij);
1299  codegenGeoLineStringArgs(ext_func_sig->getName(),
1300  ij + dj,
1301  array_buf_arg,
1302  len_lv,
1303  compression_val,
1304  input_srid_val,
1305  output_srid_val,
1306  args);
1307  }
1308  }
1309  } else {
1310  CHECK(ext_func_arg == ExtArgumentType::PInt8);
1311  args.push_back(castArrayPointer(ptr_lv, elem_ti));
1312  args.push_back(cgen_state_->ir_builder_.CreateZExt(
1313  len_lv, get_int_type(64, cgen_state_->context_)));
1314  j++;
1315  }
1316 
1317  switch (arg_ti.get_type()) {
1318  case kPOINT:
1319  case kLINESTRING:
1320  break;
1321  case kPOLYGON: {
1322  if (ext_func_arg == ExtArgumentType::GeoPolygon) {
1323  auto array_buf_arg = castArrayPointer(ptr_lv, elem_ti);
1324  auto compression_val = codegenCompression(arg_ti);
1325  auto input_srid_val = cgen_state_->llInt(arg_ti.get_input_srid());
1326  auto output_srid_val = cgen_state_->llInt(arg_ti.get_output_srid());
1327 
1328  auto [ring_size_buff, ring_size] =
1329  codegenArrayBuff(orig_arg_lvs[k + 1],
1330  posArg(arg),
1332  /*cast_and_extend=*/false);
1333  CHECK_EQ(k, ij);
1334  codegenGeoPolygonArgs(ext_func_sig->getName(),
1335  ij + dj,
1336  array_buf_arg,
1337  len_lv,
1338  ring_size_buff,
1339  ring_size,
1340  compression_val,
1341  input_srid_val,
1342  output_srid_val,
1343  args);
1344  } else {
1345  CHECK(ext_func_arg == ExtArgumentType::PInt8);
1346  // Ring Sizes
1347  auto const_arr = const_arr_size.count(orig_arg_lvs[k + 1]) > 0;
1348  auto [ring_size_buff, ring_size] =
1349  (const_arr) ? std::make_pair(orig_arg_lvs[k + 1],
1350  const_arr_size.at(orig_arg_lvs[k + 1]))
1351  : codegenArrayBuff(orig_arg_lvs[k + 1],
1352  posArg(arg),
1354  /*cast_and_extend=*/true);
1355  args.push_back(ring_size_buff);
1356  args.push_back(ring_size);
1357  j += 2;
1358  }
1359  break;
1360  }
1361  case kMULTIPOLYGON: {
1362  if (ext_func_arg == ExtArgumentType::GeoMultiPolygon) {
1363  auto array_buf_arg = castArrayPointer(ptr_lv, elem_ti);
1364  auto compression_val = codegenCompression(arg_ti);
1365  auto input_srid_val = cgen_state_->llInt(arg_ti.get_input_srid());
1366  auto output_srid_val = cgen_state_->llInt(arg_ti.get_output_srid());
1367 
1368  auto [ring_size_buff, ring_size] =
1369  codegenArrayBuff(orig_arg_lvs[k + 1],
1370  posArg(arg),
1372  /*cast_and_extend=*/false);
1373 
1374  auto [poly_bounds_buff, poly_bounds_size] =
1375  codegenArrayBuff(orig_arg_lvs[k + 2],
1376  posArg(arg),
1378  /*cast_and_extend=*/false);
1379  CHECK_EQ(k, ij);
1380  codegenGeoMultiPolygonArgs(ext_func_sig->getName(),
1381  ij + dj,
1382  array_buf_arg,
1383  len_lv,
1384  ring_size_buff,
1385  ring_size,
1386  poly_bounds_buff,
1387  poly_bounds_size,
1388  compression_val,
1389  input_srid_val,
1390  output_srid_val,
1391  args);
1392  } else {
1393  CHECK(ext_func_arg == ExtArgumentType::PInt8);
1394  // Ring Sizes
1395  {
1396  auto const_arr = const_arr_size.count(orig_arg_lvs[k + 1]) > 0;
1397  auto [ring_size_buff, ring_size] =
1398  (const_arr) ? std::make_pair(orig_arg_lvs[k + 1],
1399  const_arr_size.at(orig_arg_lvs[k + 1]))
1400  : codegenArrayBuff(orig_arg_lvs[k + 1],
1401  posArg(arg),
1403  /*cast_and_extend=*/true);
1404 
1405  args.push_back(ring_size_buff);
1406  args.push_back(ring_size);
1407  }
1408  // Poly Rings
1409  {
1410  auto const_arr = const_arr_size.count(orig_arg_lvs[k + 2]) > 0;
1411  auto [poly_bounds_buff, poly_bounds_size] =
1412  (const_arr)
1413  ? std::make_pair(orig_arg_lvs[k + 2],
1414  const_arr_size.at(orig_arg_lvs[k + 2]))
1415  : codegenArrayBuff(
1416  orig_arg_lvs[k + 2], posArg(arg), SQLTypes::kINT, true);
1417 
1418  args.push_back(poly_bounds_buff);
1419  args.push_back(poly_bounds_size);
1420  }
1421  j += 4;
1422  }
1423  break;
1424  }
1425  default:
1426  CHECK(false);
1427  }
1428  } else {
1429  CHECK(is_ext_arg_type_scalar(ext_func_arg));
1430  const auto arg_target_ti = ext_arg_type_to_type_info(ext_func_arg);
1431  if (arg_ti.get_type() != arg_target_ti.get_type()) {
1432  arg_lv = codegenCast(orig_arg_lvs[k], arg_ti, arg_target_ti, false, co);
1433  } else {
1434  arg_lv = orig_arg_lvs[k];
1435  }
1436  CHECK_EQ(arg_lv->getType(),
1437  ext_arg_type_to_llvm_type(ext_func_arg, cgen_state_->context_));
1438  args.push_back(arg_lv);
1439  }
1440  }
1441  return args;
1442 }
1443 
1444 llvm::Value* CodeGenerator::castArrayPointer(llvm::Value* ptr,
1445  const SQLTypeInfo& elem_ti) {
1447  if (elem_ti.get_type() == kFLOAT) {
1448  return cgen_state_->ir_builder_.CreatePointerCast(
1449  ptr, llvm::Type::getFloatPtrTy(cgen_state_->context_));
1450  }
1451  if (elem_ti.get_type() == kDOUBLE) {
1452  return cgen_state_->ir_builder_.CreatePointerCast(
1453  ptr, llvm::Type::getDoublePtrTy(cgen_state_->context_));
1454  }
1455  CHECK(elem_ti.is_integer() || elem_ti.is_boolean() ||
1456  (elem_ti.is_string() && elem_ti.get_compression() == kENCODING_DICT));
1457  switch (elem_ti.get_size()) {
1458  case 1:
1459  return cgen_state_->ir_builder_.CreatePointerCast(
1460  ptr, llvm::Type::getInt8PtrTy(cgen_state_->context_));
1461  case 2:
1462  return cgen_state_->ir_builder_.CreatePointerCast(
1463  ptr, llvm::Type::getInt16PtrTy(cgen_state_->context_));
1464  case 4:
1465  return cgen_state_->ir_builder_.CreatePointerCast(
1466  ptr, llvm::Type::getInt32PtrTy(cgen_state_->context_));
1467  case 8:
1468  return cgen_state_->ir_builder_.CreatePointerCast(
1469  ptr, llvm::Type::getInt64PtrTy(cgen_state_->context_));
1470  default:
1471  CHECK(false);
1472  }
1473  return nullptr;
1474 }
llvm::StructType * createLineStringStructType(const std::string &udf_func_name, size_t param_num)
void codegenGeoMultiPolygonArgs(const std::string &udf_func_name, size_t param_num, llvm::Value *polygon_coords, llvm::Value *polygon_coords_size, llvm::Value *ring_sizes_buf, llvm::Value *ring_sizes, llvm::Value *polygon_bounds, llvm::Value *polygon_bounds_sizes, llvm::Value *compression, llvm::Value *input_srid, llvm::Value *output_srid, std::vector< llvm::Value * > &output_args)
#define CHECK_EQ(x, y)
Definition: Logger.h:230
llvm::StructType * get_buffer_struct_type(CgenState *cgen_state, const std::string &ext_func_name, size_t param_num, llvm::Type *elem_type, bool has_is_null)
HOST DEVICE int get_size() const
Definition: sqltypes.h:339
bool is_ext_arg_type_scalar(const ExtArgumentType ext_arg_type)
llvm::BasicBlock * args_notnull_bb
size_t getArity() const
Definition: Analyzer.h:2169
SQLTypes
Definition: sqltypes.h:38
std::unique_ptr< llvm::Module > udf_gpu_module
CgenState * cgen_state_
const ExtArgumentType getRet() const
void codegenGeoPolygonArgs(const std::string &udf_func_name, size_t param_num, llvm::Value *polygon_buf, llvm::Value *polygon_size, llvm::Value *ring_sizes_buf, llvm::Value *num_rings, llvm::Value *compression, llvm::Value *input_srid, llvm::Value *output_srid, std::vector< llvm::Value * > &output_args)
#define LOG(tag)
Definition: Logger.h:216
std::vector< llvm::Value * > codegenFunctionOperCastArgs(const Analyzer::FunctionOper *, const ExtensionFunction *, const std::vector< llvm::Value * > &, const std::vector< size_t > &, const std::unordered_map< llvm::Value *, llvm::Value * > &, const CompilationOptions &)
llvm::Value * emitExternalCall(const std::string &fname, llvm::Type *ret_type, const std::vector< llvm::Value * > args, const std::vector< llvm::Attribute::AttrKind > &fnattrs={}, const bool has_struct_return=false)
Definition: CgenState.h:217
llvm::Value * codegenFunctionOperNullArg(const Analyzer::FunctionOper *, const std::vector< llvm::Value * > &)
llvm::IRBuilder ir_builder_
Definition: CgenState.h:361
llvm::Value * posArg(const Analyzer::Expr *) const
Definition: ColumnIR.cpp:515
llvm::Value * castArrayPointer(llvm::Value *ptr, const SQLTypeInfo &elem_ti)
#define UNREACHABLE()
Definition: Logger.h:266
#define CHECK_GE(x, y)
Definition: Logger.h:235
Definition: sqldefs.h:48
llvm::Type * get_fp_type(const int width, llvm::LLVMContext &context)
llvm::StructType * createPointStructType(const std::string &udf_func_name, size_t param_num)
bool call_requires_custom_type_handling(const Analyzer::FunctionOper *function_oper)
const std::string getName(bool keep_suffix=true) const
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:329
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
bool ext_func_call_requires_nullcheck(const Analyzer::FunctionOper *function_oper)
SQLTypeInfo get_sql_type_from_llvm_type(const llvm::Type *ll_type)
std::vector< FunctionOperValue > ext_call_cache_
Definition: CgenState.h:367
void codegenBufferArgs(const std::string &udf_func_name, size_t param_num, llvm::Value *buffer_buf, llvm::Value *buffer_size, llvm::Value *buffer_is_null, std::vector< llvm::Value * > &output_args)
RUNTIME_EXPORT void register_buffer_with_executor_rsm(int64_t exec, int8_t *buffer)
std::pair< llvm::Value *, llvm::Value * > codegenArrayBuff(llvm::Value *chunk, llvm::Value *row_pos, SQLTypes array_type, bool cast_and_extend)
llvm::Module * module_
Definition: CgenState.h:350
Supported runtime functions management and retrieval.
llvm::LLVMContext & context_
Definition: CgenState.h:359
llvm::Function * current_func_
Definition: CgenState.h:353
std::tuple< ArgNullcheckBBs, llvm::Value * > beginArgsNullcheck(const Analyzer::FunctionOper *function_oper, const std::vector< llvm::Value * > &orig_arg_lvs)
bool is_integer() const
Definition: sqltypes.h:512
llvm::ConstantInt * inlineIntNull(const SQLTypeInfo &)
Definition: CgenState.cpp:64
bool is_ext_arg_type_geo(const ExtArgumentType ext_arg_type)
bool is_ext_arg_type_array(const ExtArgumentType ext_arg_type)
llvm::Value * codegenFunctionOper(const Analyzer::FunctionOper *, const CompilationOptions &)
llvm::Type * get_llvm_type_from_sql_array_type(const SQLTypeInfo ti, llvm::LLVMContext &ctx)
bool is_boolean() const
Definition: sqltypes.h:517
llvm::BasicBlock * args_null_bb
#define AUTOMATIC_IR_METADATA(CGENSTATE)
llvm::Type * ext_arg_type_to_llvm_type(const ExtArgumentType ext_arg_type, llvm::LLVMContext &ctx)
std::string toString(const Executor::ExtModuleKinds &kind)
Definition: Execute.h:1448
std::tuple< T, std::vector< SQLTypeInfo > > bind_function(std::string name, Analyzer::ExpressionPtrVector func_args, const std::vector< T > &ext_funcs, const std::string processor)
Argument type based extension function binding.
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:81
llvm::Value * emitCall(const std::string &fname, const std::vector< llvm::Value * > &args)
Definition: CgenState.cpp:215
bool is_buffer() const
Definition: sqltypes.h:528
ExecutorDeviceType device_type
void codegenGeoPointArgs(const std::string &udf_func_name, size_t param_num, llvm::Value *point_buf, llvm::Value *point_size, llvm::Value *compression, llvm::Value *input_srid, llvm::Value *output_srid, std::vector< llvm::Value * > &output_args)
#define RUNTIME_EXPORT
std::vector< llvm::Value * > codegen(const Analyzer::Expr *, const bool fetch_columns, const CompilationOptions &)
Definition: IRCodegen.cpp:30
#define CHECK_LE(x, y)
Definition: Logger.h:233
HOST DEVICE EncodingType get_compression() const
Definition: sqltypes.h:337
std::string serialize_llvm_object(const T *llvm_obj)
llvm::StructType * createPolygonStructType(const std::string &udf_func_name, size_t param_num)
const Analyzer::Expr * getArg(const size_t i) const
Definition: Analyzer.h:2171
const Expr * get_operand() const
Definition: Analyzer.h:378
llvm::Value * endArgsNullcheck(const ArgNullcheckBBs &, llvm::Value *, llvm::Value *, const Analyzer::FunctionOper *)
const std::vector< ExtArgumentType > & getInputArgs() const
std::unique_ptr< llvm::Module > udf_cpu_module
HOST DEVICE int get_comp_param() const
Definition: sqltypes.h:338
llvm::Value * codegenFunctionOperWithCustomTypeHandling(const Analyzer::FunctionOperWithCustomTypeHandling *, const CompilationOptions &)
bool is_bytes() const
Definition: sqltypes.h:525
llvm::ConstantInt * llInt(const T v) const
Definition: CgenState.h:296
#define CHECK(condition)
Definition: Logger.h:222
llvm::Value * codegenIsNullNumber(llvm::Value *, const SQLTypeInfo &)
Definition: LogicalIR.cpp:409
uint64_t exp_to_scale(const unsigned exp)
llvm::Value * codegenCompression(const SQLTypeInfo &type_info)
llvm::Value * codegenCast(const Analyzer::UOper *, const CompilationOptions &)
Definition: CastIR.cpp:21
uint32_t log2_bytes(const uint32_t bytes)
Definition: Execute.h:176
Definition: sqltypes.h:45
bool is_string() const
Definition: sqltypes.h:510
std::string getName() const
Definition: Analyzer.h:2167
void codegenGeoLineStringArgs(const std::string &udf_func_name, size_t param_num, llvm::Value *line_string_buf, llvm::Value *line_string_size, llvm::Value *compression, llvm::Value *input_srid, llvm::Value *output_srid, std::vector< llvm::Value * > &output_args)
bool is_ext_arg_type_pointer(const ExtArgumentType ext_arg_type)
SQLTypeInfo get_elem_type() const
Definition: sqltypes.h:865
bool is_decimal() const
Definition: sqltypes.h:513
int get_physical_coord_cols() const
Definition: sqltypes.h:375
SQLTypeInfo ext_arg_type_to_type_info(const ExtArgumentType ext_arg_type)
llvm::ConstantFP * inlineFpNull(const SQLTypeInfo &)
Definition: CgenState.cpp:102
Executor * executor() const
llvm::StructType * createMultiPolygonStructType(const std::string &udf_func_name, size_t param_num)