OmniSciDB  471d68cefb
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
ExtensionsIR.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2017 MapD Technologies, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "CodeGenerator.h"
18 #include "Execute.h"
19 #include "ExtensionFunctions.hpp"
22 
23 #include <tuple>
24 
25 extern std::unique_ptr<llvm::Module> udf_gpu_module;
26 extern std::unique_ptr<llvm::Module> udf_cpu_module;
27 
28 namespace {
29 
30 llvm::StructType* get_buffer_struct_type(CgenState* cgen_state,
31  const std::string& ext_func_name,
32  size_t param_num,
33  llvm::Type* elem_type,
34  bool has_is_null) {
35  CHECK(elem_type);
36  CHECK(elem_type->isPointerTy());
37  llvm::StructType* generated_struct_type =
38  (has_is_null ? llvm::StructType::get(cgen_state->context_,
39  {elem_type,
40  llvm::Type::getInt64Ty(cgen_state->context_),
41  llvm::Type::getInt8Ty(cgen_state->context_)},
42  false)
43  : llvm::StructType::get(
44  cgen_state->context_,
45  {elem_type, llvm::Type::getInt64Ty(cgen_state->context_)},
46  false));
47  llvm::Function* udf_func = cgen_state->module_->getFunction(ext_func_name);
48  if (udf_func) {
49  // Compare expected array struct type with type from the function
50  // definition from the UDF module, but use the type from the
51  // module
52  llvm::FunctionType* udf_func_type = udf_func->getFunctionType();
53  CHECK_LE(param_num, udf_func_type->getNumParams());
54  llvm::Type* param_pointer_type = udf_func_type->getParamType(param_num);
55  CHECK(param_pointer_type->isPointerTy());
56  llvm::Type* param_type = param_pointer_type->getPointerElementType();
57  CHECK(param_type->isStructTy());
58  llvm::StructType* struct_type = llvm::cast<llvm::StructType>(param_type);
59  CHECK_GE(struct_type->getStructNumElements(),
60  generated_struct_type->getStructNumElements())
61  << serialize_llvm_object(struct_type);
62 
63  const auto expected_elems = generated_struct_type->elements();
64  const auto current_elems = struct_type->elements();
65  for (size_t i = 0; i < expected_elems.size(); i++) {
66  CHECK_EQ(expected_elems[i], current_elems[i])
67  << "[" << ::toString(expected_elems[i]) << ", " << ::toString(current_elems[i])
68  << "]";
69  }
70 
71  if (struct_type->isLiteral()) {
72  return struct_type;
73  }
74 
75  llvm::StringRef struct_name = struct_type->getStructName();
76 #if LLVM_VERSION_MAJOR >= 12
77  return struct_type->getTypeByName(cgen_state->context_, struct_name);
78 #else
79  return cgen_state->module_->getTypeByName(struct_name);
80 #endif
81  }
82  return generated_struct_type;
83 }
84 
86  llvm::LLVMContext& ctx) {
87  switch (ext_arg_type) {
88  case ExtArgumentType::Bool: // pass thru to Int8
90  return get_int_type(8, ctx);
92  return get_int_type(16, ctx);
94  return get_int_type(32, ctx);
96  return get_int_type(64, ctx);
98  return llvm::Type::getFloatTy(ctx);
100  return llvm::Type::getDoubleTy(ctx);
123  return llvm::Type::getVoidTy(ctx);
124  default:
125  CHECK(false);
126  }
127  CHECK(false);
128  return nullptr;
129 }
130 
132  CHECK(ll_type);
133  const auto bits = ll_type->getPrimitiveSizeInBits();
134 
135  if (ll_type->isFloatingPointTy()) {
136  switch (bits) {
137  case 32:
138  return SQLTypeInfo(kFLOAT, false);
139  case 64:
140  return SQLTypeInfo(kDOUBLE, false);
141  default:
142  LOG(FATAL) << "Unsupported llvm floating point type: " << bits
143  << ", only 32 and 64 bit floating point is supported.";
144  }
145  } else {
146  switch (bits) {
147  case 1:
148  return SQLTypeInfo(kBOOLEAN, false);
149  case 8:
150  return SQLTypeInfo(kTINYINT, false);
151  case 16:
152  return SQLTypeInfo(kSMALLINT, false);
153  case 32:
154  return SQLTypeInfo(kINT, false);
155  case 64:
156  return SQLTypeInfo(kBIGINT, false);
157  default:
158  LOG(FATAL) << "Unrecognized llvm type for SQL type: "
159  << bits; // TODO let's get the real name here
160  }
161  }
162  UNREACHABLE();
163  return SQLTypeInfo();
164 }
165 
167  llvm::LLVMContext& ctx) {
168  CHECK(ti.is_buffer());
169  if (ti.is_bytes()) {
170  return llvm::Type::getInt8PtrTy(ctx);
171  }
172 
173  const auto& elem_ti = ti.get_elem_type();
174  if (elem_ti.is_fp()) {
175  switch (elem_ti.get_size()) {
176  case 4:
177  return llvm::Type::getFloatPtrTy(ctx);
178  case 8:
179  return llvm::Type::getDoublePtrTy(ctx);
180  }
181  }
182 
183  if (elem_ti.is_boolean()) {
184  return llvm::Type::getInt8PtrTy(ctx);
185  }
186 
187  CHECK(elem_ti.is_integer());
188  switch (elem_ti.get_size()) {
189  case 1:
190  return llvm::Type::getInt8PtrTy(ctx);
191  case 2:
192  return llvm::Type::getInt16PtrTy(ctx);
193  case 4:
194  return llvm::Type::getInt32PtrTy(ctx);
195  case 8:
196  return llvm::Type::getInt64PtrTy(ctx);
197  }
198 
199  UNREACHABLE();
200  return nullptr;
201 }
202 
204  const auto& func_ti = function_oper->get_type_info();
205  for (size_t i = 0; i < function_oper->getArity(); ++i) {
206  const auto arg = function_oper->getArg(i);
207  const auto& arg_ti = arg->get_type_info();
208  if ((func_ti.is_array() && arg_ti.is_array()) ||
209  (func_ti.is_bytes() && arg_ti.is_bytes())) {
210  // If the function returns an array and any of the arguments are arrays, allow NULL
211  // scalars.
212  // TODO: Make this a property of the FunctionOper following `RETURN NULL ON NULL`
213  // semantics.
214  return false;
215  } else if (!arg_ti.get_notnull() && !arg_ti.is_buffer()) {
216  // Nullable geometry args will trigger a null check
217  return true;
218  } else {
219  continue;
220  }
221  }
222  return false;
223 }
224 
225 } // namespace
226 
228  int8_t* buffer) {
229  Executor* exec_ptr = reinterpret_cast<Executor*>(exec);
230  if (buffer != nullptr) {
231  exec_ptr->getRowSetMemoryOwner()->addVarlenBuffer(buffer);
232  }
233 }
234 
236  const Analyzer::FunctionOper* function_oper,
237  const CompilationOptions& co) {
239  ExtensionFunction ext_func_sig = [=]() {
241  try {
242  return bind_function(function_oper, /* is_gpu= */ true);
243  } catch (ExtensionFunctionBindingError& e) {
244  LOG(WARNING) << "codegenFunctionOper[GPU]: " << e.what() << " Redirecting "
245  << function_oper->getName() << " to run on CPU.";
246  throw QueryMustRunOnCpu();
247  }
248  } else {
249  try {
250  return bind_function(function_oper, /* is_gpu= */ false);
251  } catch (ExtensionFunctionBindingError& e) {
252  LOG(WARNING) << "codegenFunctionOper[CPU]: " << e.what();
253  throw;
254  }
255  }
256  }();
257 
258  const auto& ret_ti = function_oper->get_type_info();
259  CHECK(ret_ti.is_integer() || ret_ti.is_fp() || ret_ti.is_boolean() ||
260  ret_ti.is_buffer());
261  if (ret_ti.is_buffer() && co.device_type == ExecutorDeviceType::GPU) {
262  // TODO: This is not necessary for runtime UDFs because RBC does
263  // not generated GPU LLVM IR when the UDF is using Buffer objects.
264  // However, we cannot remove it until C++ UDFs can be defined for
265  // different devices independently.
266  throw QueryMustRunOnCpu();
267  }
268 
269  auto ret_ty = ext_arg_type_to_llvm_type(ext_func_sig.getRet(), cgen_state_->context_);
270  const auto current_bb = cgen_state_->ir_builder_.GetInsertBlock();
271  for (auto it : cgen_state_->ext_call_cache_) {
272  if (*it.foper == *function_oper) {
273  auto inst = llvm::dyn_cast<llvm::Instruction>(it.lv);
274  if (inst && inst->getParent() == current_bb) {
275  return it.lv;
276  }
277  }
278  }
279  std::vector<llvm::Value*> orig_arg_lvs;
280  std::vector<size_t> orig_arg_lvs_index;
281  std::unordered_map<llvm::Value*, llvm::Value*> const_arr_size;
282 
283  for (size_t i = 0; i < function_oper->getArity(); ++i) {
284  orig_arg_lvs_index.push_back(orig_arg_lvs.size());
285  const auto arg = function_oper->getArg(i);
286  const auto arg_cast = dynamic_cast<const Analyzer::UOper*>(arg);
287  const auto arg0 =
288  (arg_cast && arg_cast->get_optype() == kCAST) ? arg_cast->get_operand() : arg;
289  const auto array_expr_arg = dynamic_cast<const Analyzer::ArrayExpr*>(arg0);
290  auto is_local_alloc =
291  ret_ti.is_buffer() || (array_expr_arg && array_expr_arg->isLocalAlloc());
292  const auto& arg_ti = arg->get_type_info();
293  const auto arg_lvs = codegen(arg, true, co);
294  auto geo_uoper_arg = dynamic_cast<const Analyzer::GeoUOper*>(arg);
295  auto geo_binoper_arg = dynamic_cast<const Analyzer::GeoBinOper*>(arg);
296  auto geo_expr_arg = dynamic_cast<const Analyzer::GeoExpr*>(arg);
297  // TODO(adb / d): Assuming no const array cols for geo (for now)
298  if ((geo_uoper_arg || geo_binoper_arg) && arg_ti.is_geometry()) {
299  // Extract arr sizes and put them in the map, forward arr pointers
300  CHECK_EQ(2 * static_cast<size_t>(arg_ti.get_physical_coord_cols()), arg_lvs.size());
301  for (size_t i = 0; i < arg_lvs.size(); i++) {
302  auto arr = arg_lvs[i++];
303  auto size = arg_lvs[i];
304  orig_arg_lvs.push_back(arr);
305  const_arr_size[arr] = size;
306  }
307  } else if (geo_expr_arg && geo_expr_arg->get_type_info().is_geometry()) {
308  CHECK(geo_expr_arg->get_type_info().get_type() == kPOINT);
309  CHECK_EQ(arg_lvs.size(), size_t(2));
310  for (size_t j = 0; j < arg_lvs.size(); j++) {
311  orig_arg_lvs.push_back(arg_lvs[j]);
312  }
313  } else if (arg_ti.is_geometry()) {
314  CHECK_EQ(static_cast<size_t>(arg_ti.get_physical_coord_cols()), arg_lvs.size());
315  for (size_t j = 0; j < arg_lvs.size(); j++) {
316  orig_arg_lvs.push_back(arg_lvs[j]);
317  }
318  } else if (arg_ti.is_bytes()) {
319  CHECK_EQ(size_t(3), arg_lvs.size());
320  /* arg_lvs contains:
321  c = string_decode(&col_buf0, pos)
322  ptr = extract_str_ptr(c)
323  sz = extract_str_len(c)
324  */
325  for (size_t j = 0; j < arg_lvs.size(); j++) {
326  orig_arg_lvs.push_back(arg_lvs[j]);
327  }
328  } else {
329  if (arg_lvs.size() > 1) {
330  CHECK(arg_ti.is_array());
331  CHECK_EQ(size_t(2), arg_lvs.size());
332  const_arr_size[arg_lvs.front()] = arg_lvs.back();
333  } else {
334  CHECK_EQ(size_t(1), arg_lvs.size());
335  /* arg_lvs contains:
336  &col_buf1
337  */
338  if (is_local_alloc && arg_ti.get_size() > 0) {
339  const_arr_size[arg_lvs.front()] = cgen_state_->llInt(arg_ti.get_size());
340  }
341  }
342  orig_arg_lvs.push_back(arg_lvs.front());
343  }
344  }
345  // The extension function implementations don't handle NULL, they work under
346  // the assumption that the inputs are validated before calling them. Generate
347  // code to do the check at the call site: if any argument is NULL, return NULL
348  // without calling the function at all.
349  const auto [bbs, null_buffer_ptr] = beginArgsNullcheck(function_oper, orig_arg_lvs);
350  CHECK_GE(orig_arg_lvs.size(), function_oper->getArity());
351  // Arguments must be converted to the types the extension function can handle.
353  function_oper, &ext_func_sig, orig_arg_lvs, orig_arg_lvs_index, const_arr_size, co);
354 
355  llvm::Value* buffer_ret{nullptr};
356  if (ret_ti.is_buffer()) {
357  // codegen buffer return as first arg
358  CHECK(ret_ti.is_array() || ret_ti.is_bytes());
359  ret_ty = llvm::Type::getVoidTy(cgen_state_->context_);
360  const auto struct_ty = get_buffer_struct_type(
361  cgen_state_,
362  function_oper->getName(),
363  0,
365  /* has_is_null = */ ret_ti.is_array() || ret_ti.is_bytes());
366  buffer_ret = cgen_state_->ir_builder_.CreateAlloca(struct_ty);
367  args.insert(args.begin(), buffer_ret);
368  }
369 
370  const auto ext_call = cgen_state_->emitExternalCall(
371  ext_func_sig.getName(), ret_ty, args, {}, ret_ti.is_buffer());
372  auto ext_call_nullcheck = endArgsNullcheck(
373  bbs, ret_ti.is_buffer() ? buffer_ret : ext_call, null_buffer_ptr, function_oper);
374 
375  // Cast the return of the extension function to match the FunctionOper
376  if (!(ret_ti.is_buffer())) {
377  const auto extension_ret_ti = get_sql_type_from_llvm_type(ret_ty);
378  if (bbs.args_null_bb &&
379  extension_ret_ti.get_type() != function_oper->get_type_info().get_type() &&
380  // Skip i1-->i8 casts for ST_ functions.
381  // function_oper ret type is i1, extension ret type is 'upgraded' to i8
382  // during type deserialization to 'handle' NULL returns, hence i1-->i8.
383  // ST_ functions can't return NULLs, we just need to check arg nullness
384  // and if any args are NULL then ST_ function is not called
385  function_oper->getName().substr(0, 3) != std::string("ST_")) {
386  ext_call_nullcheck = codegenCast(ext_call_nullcheck,
387  extension_ret_ti,
388  function_oper->get_type_info(),
389  false,
390  co);
391  }
392  }
393 
394  cgen_state_->ext_call_cache_.push_back({function_oper, ext_call_nullcheck});
395  return ext_call_nullcheck;
396 }
397 
398 // Start the control flow needed for a call site check of NULL arguments.
399 std::tuple<CodeGenerator::ArgNullcheckBBs, llvm::Value*>
401  const std::vector<llvm::Value*>& orig_arg_lvs) {
403  llvm::BasicBlock* args_null_bb{nullptr};
404  llvm::BasicBlock* args_notnull_bb{nullptr};
405  llvm::BasicBlock* orig_bb = cgen_state_->ir_builder_.GetInsertBlock();
406  llvm::Value* null_array_alloca{nullptr};
407  // Only generate the check if required (at least one argument must be nullable).
408  if (ext_func_call_requires_nullcheck(function_oper)) {
409  const auto func_ti = function_oper->get_type_info();
410  if (func_ti.is_buffer()) {
411  const auto arr_struct_ty = get_buffer_struct_type(
412  cgen_state_,
413  function_oper->getName(),
414  0,
416  func_ti.is_array() || func_ti.is_bytes());
417  null_array_alloca = cgen_state_->ir_builder_.CreateAlloca(arr_struct_ty);
418  }
419  const auto args_notnull_lv = cgen_state_->ir_builder_.CreateNot(
420  codegenFunctionOperNullArg(function_oper, orig_arg_lvs));
421  args_notnull_bb = llvm::BasicBlock::Create(
422  cgen_state_->context_, "args_notnull", cgen_state_->current_func_);
423  args_null_bb = llvm::BasicBlock::Create(
425  cgen_state_->ir_builder_.CreateCondBr(args_notnull_lv, args_notnull_bb, args_null_bb);
426  cgen_state_->ir_builder_.SetInsertPoint(args_notnull_bb);
427  }
428  return std::make_tuple(
429  CodeGenerator::ArgNullcheckBBs{args_null_bb, args_notnull_bb, orig_bb},
430  null_array_alloca);
431 }
432 
433 // Wrap up the control flow needed for NULL argument handling.
435  const ArgNullcheckBBs& bbs,
436  llvm::Value* fn_ret_lv,
437  llvm::Value* null_array_ptr,
438  const Analyzer::FunctionOper* function_oper) {
440  if (bbs.args_null_bb) {
441  CHECK(bbs.args_notnull_bb);
442  cgen_state_->ir_builder_.CreateBr(bbs.args_null_bb);
443  cgen_state_->ir_builder_.SetInsertPoint(bbs.args_null_bb);
444 
445  llvm::PHINode* ext_call_phi{nullptr};
446  llvm::Value* null_lv{nullptr};
447  const auto func_ti = function_oper->get_type_info();
448  if (!func_ti.is_buffer()) {
449  // The pre-cast SQL equivalent of the type returned by the extension function.
450  const auto extension_ret_ti = get_sql_type_from_llvm_type(fn_ret_lv->getType());
451 
452  ext_call_phi = cgen_state_->ir_builder_.CreatePHI(
453  extension_ret_ti.is_fp()
454  ? get_fp_type(extension_ret_ti.get_size() * 8, cgen_state_->context_)
455  : get_int_type(extension_ret_ti.get_size() * 8, cgen_state_->context_),
456  2);
457 
458  null_lv =
459  extension_ret_ti.is_fp()
460  ? static_cast<llvm::Value*>(cgen_state_->inlineFpNull(extension_ret_ti))
461  : static_cast<llvm::Value*>(cgen_state_->inlineIntNull(extension_ret_ti));
462  } else {
463  const auto arr_struct_ty = get_buffer_struct_type(
464  cgen_state_,
465  function_oper->getName(),
466  0,
468  true);
469  ext_call_phi =
470  cgen_state_->ir_builder_.CreatePHI(llvm::PointerType::get(arr_struct_ty, 0), 2);
471 
472  CHECK(null_array_ptr);
473  const auto arr_null_bool =
474  cgen_state_->ir_builder_.CreateStructGEP(arr_struct_ty, null_array_ptr, 2);
475  cgen_state_->ir_builder_.CreateStore(
476  llvm::ConstantInt::get(get_int_type(8, cgen_state_->context_), 1),
477  arr_null_bool);
478 
479  const auto arr_null_size =
480  cgen_state_->ir_builder_.CreateStructGEP(arr_struct_ty, null_array_ptr, 1);
481  cgen_state_->ir_builder_.CreateStore(
482  llvm::ConstantInt::get(get_int_type(64, cgen_state_->context_), 0),
483  arr_null_size);
484  }
485  ext_call_phi->addIncoming(fn_ret_lv, bbs.args_notnull_bb);
486  ext_call_phi->addIncoming(func_ti.is_buffer() ? null_array_ptr : null_lv,
487  bbs.orig_bb);
488 
489  return ext_call_phi;
490  }
491  return fn_ret_lv;
492 }
493 
494 namespace {
495 
497  const auto& ret_ti = function_oper->get_type_info();
498  if (!ret_ti.is_integer() && !ret_ti.is_fp()) {
499  return true;
500  }
501  for (size_t i = 0; i < function_oper->getArity(); ++i) {
502  const auto arg = function_oper->getArg(i);
503  const auto& arg_ti = arg->get_type_info();
504  if (!arg_ti.is_integer() && !arg_ti.is_fp()) {
505  return true;
506  }
507  }
508  return false;
509 }
510 
511 } // namespace
512 
515  const CompilationOptions& co) {
517  if (call_requires_custom_type_handling(function_oper)) {
518  // Some functions need the return type to be the same as the input type.
519  if (function_oper->getName() == "FLOOR" || function_oper->getName() == "CEIL") {
520  CHECK_EQ(size_t(1), function_oper->getArity());
521  const auto arg = function_oper->getArg(0);
522  const auto& arg_ti = arg->get_type_info();
523  CHECK(arg_ti.is_decimal());
524  const auto arg_lvs = codegen(arg, true, co);
525  CHECK_EQ(size_t(1), arg_lvs.size());
526  const auto arg_lv = arg_lvs.front();
527  CHECK(arg_lv->getType()->isIntegerTy(64));
529  std::tie(bbs, std::ignore) = beginArgsNullcheck(function_oper, {arg_lvs});
530  const std::string func_name =
531  (function_oper->getName() == "FLOOR") ? "decimal_floor" : "decimal_ceil";
532  const auto covar_result_lv = cgen_state_->emitCall(
533  func_name, {arg_lv, cgen_state_->llInt(exp_to_scale(arg_ti.get_scale()))});
534  const auto ret_ti = function_oper->get_type_info();
535  CHECK(ret_ti.is_decimal());
536  CHECK_EQ(0, ret_ti.get_scale());
537  const auto result_lv = cgen_state_->ir_builder_.CreateSDiv(
538  covar_result_lv, cgen_state_->llInt(exp_to_scale(arg_ti.get_scale())));
539  return endArgsNullcheck(bbs, result_lv, nullptr, function_oper);
540  } else if (function_oper->getName() == "ROUND" &&
541  function_oper->getArg(0)->get_type_info().is_decimal()) {
542  CHECK_EQ(size_t(2), function_oper->getArity());
543 
544  const auto arg0 = function_oper->getArg(0);
545  const auto& arg0_ti = arg0->get_type_info();
546  const auto arg0_lvs = codegen(arg0, true, co);
547  CHECK_EQ(size_t(1), arg0_lvs.size());
548  const auto arg0_lv = arg0_lvs.front();
549  CHECK(arg0_lv->getType()->isIntegerTy(64));
550 
551  const auto arg1 = function_oper->getArg(1);
552  const auto& arg1_ti = arg1->get_type_info();
553  CHECK(arg1_ti.is_integer());
554  const auto arg1_lvs = codegen(arg1, true, co);
555  auto arg1_lv = arg1_lvs.front();
556  if (arg1_ti.get_type() != kINT) {
557  arg1_lv = codegenCast(arg1_lv, arg1_ti, SQLTypeInfo(kINT, true), false, co);
558  }
559 
561  std::tie(bbs0, std::ignore) =
562  beginArgsNullcheck(function_oper, {arg0_lv, arg1_lvs.front()});
563 
564  const std::string func_name = "Round__4";
565  const auto ret_ti = function_oper->get_type_info();
566  CHECK(ret_ti.is_decimal());
567  const auto result_lv = cgen_state_->emitExternalCall(
568  func_name,
570  {arg0_lv, arg1_lv, cgen_state_->llInt(arg0_ti.get_scale())});
571 
572  return endArgsNullcheck(bbs0, result_lv, nullptr, function_oper);
573  }
574  throw std::runtime_error("Type combination not supported for function " +
575  function_oper->getName());
576  }
577  return codegenFunctionOper(function_oper, co);
578 }
579 
580 // Generates code which returns true iff at least one of the arguments is NULL.
582  const Analyzer::FunctionOper* function_oper,
583  const std::vector<llvm::Value*>& orig_arg_lvs) {
585  llvm::Value* one_arg_null =
586  llvm::ConstantInt::get(llvm::IntegerType::getInt1Ty(cgen_state_->context_), false);
587  size_t physical_coord_cols = 0;
588  for (size_t i = 0, j = 0; i < function_oper->getArity();
589  ++i, j += std::max(size_t(1), physical_coord_cols)) {
590  const auto arg = function_oper->getArg(i);
591  const auto& arg_ti = arg->get_type_info();
592  physical_coord_cols = arg_ti.get_physical_coord_cols();
593  if (arg_ti.get_notnull()) {
594  continue;
595  }
596  auto geo_expr_arg = dynamic_cast<const Analyzer::GeoExpr*>(arg);
597  if (geo_expr_arg && arg_ti.is_geometry()) {
598  CHECK(arg_ti.get_type() == kPOINT);
599  auto is_null_lv = cgen_state_->ir_builder_.CreateICmp(
600  llvm::CmpInst::ICMP_EQ,
601  orig_arg_lvs[j],
602  llvm::ConstantPointerNull::get( // TODO: centralize logic; in geo expr?
603  arg_ti.get_compression() == kENCODING_GEOINT
604  ? llvm::Type::getInt32PtrTy(cgen_state_->context_)
605  : llvm::Type::getDoublePtrTy(cgen_state_->context_)));
606  one_arg_null = cgen_state_->ir_builder_.CreateOr(one_arg_null, is_null_lv);
607  physical_coord_cols = 2; // number of lvs to advance
608  continue;
609  }
610 #ifdef ENABLE_GEOS
611  // If geo arg is coming from geos, skip the null check, assume it's a valid geo
612  if (arg_ti.is_geometry()) {
613  auto* coords_load = llvm::dyn_cast<llvm::LoadInst>(orig_arg_lvs[i]);
614  if (coords_load) {
615  continue;
616  }
617  }
618 #endif
619  if (arg_ti.is_geometry()) {
620  auto* coords_alloca = llvm::dyn_cast<llvm::AllocaInst>(orig_arg_lvs[j]);
621  auto* coords_phi = llvm::dyn_cast<llvm::PHINode>(orig_arg_lvs[j]);
622  if (coords_alloca || coords_phi) {
623  // TODO: null check dynamically generated geometries
624  continue;
625  }
626  }
627  if (arg_ti.is_buffer() || arg_ti.is_geometry()) {
628  // POINT [un]compressed coord check requires custom checker and chunk iterator
629  // Non-POINT NULL geographies will have a normally encoded null coord array
630  auto fname =
631  (arg_ti.get_type() == kPOINT) ? "point_coord_array_is_null" : "array_is_null";
632  auto is_null_lv = cgen_state_->emitExternalCall(
633  fname, get_int_type(1, cgen_state_->context_), {orig_arg_lvs[j], posArg(arg)});
634  one_arg_null = cgen_state_->ir_builder_.CreateOr(one_arg_null, is_null_lv);
635  continue;
636  }
637  CHECK(arg_ti.is_number() or arg_ti.is_boolean());
638  one_arg_null = cgen_state_->ir_builder_.CreateOr(
639  one_arg_null, codegenIsNullNumber(orig_arg_lvs[j], arg_ti));
640  }
641  return one_arg_null;
642 }
643 
644 llvm::Value* CodeGenerator::codegenCompression(const SQLTypeInfo& type_info) {
646  int32_t compression = (type_info.get_compression() == kENCODING_GEOINT &&
647  type_info.get_comp_param() == 32)
648  ? 1
649  : 0;
650 
651  return cgen_state_->llInt(compression);
652 }
653 
654 std::pair<llvm::Value*, llvm::Value*> CodeGenerator::codegenArrayBuff(
655  llvm::Value* chunk,
656  llvm::Value* row_pos,
657  SQLTypes array_type,
658  bool cast_and_extend) {
660  const auto elem_ti =
661  SQLTypeInfo(
662  SQLTypes::kARRAY, 0, 0, false, EncodingType::kENCODING_NONE, 0, array_type)
663  .get_elem_type();
664 
665  auto buff = cgen_state_->emitExternalCall(
666  "array_buff", llvm::Type::getInt32PtrTy(cgen_state_->context_), {chunk, row_pos});
667 
668  auto len = cgen_state_->emitExternalCall(
669  "array_size",
670  get_int_type(32, cgen_state_->context_),
671  {chunk, row_pos, cgen_state_->llInt(log2_bytes(elem_ti.get_logical_size()))});
672 
673  if (cast_and_extend) {
674  buff = castArrayPointer(buff, elem_ti);
675  len =
676  cgen_state_->ir_builder_.CreateZExt(len, get_int_type(64, cgen_state_->context_));
677  }
678 
679  return std::make_pair(buff, len);
680 }
681 
682 void CodeGenerator::codegenBufferArgs(const std::string& ext_func_name,
683  size_t param_num,
684  llvm::Value* buffer_buf,
685  llvm::Value* buffer_size,
686  llvm::Value* buffer_null,
687  std::vector<llvm::Value*>& output_args) {
689  CHECK(buffer_buf);
690  CHECK(buffer_size);
691 
692  auto buffer_abstraction = get_buffer_struct_type(
693  cgen_state_, ext_func_name, param_num, buffer_buf->getType(), !!(buffer_null));
694  auto alloc_mem = cgen_state_->ir_builder_.CreateAlloca(buffer_abstraction);
695 
696  auto buffer_buf_ptr =
697  cgen_state_->ir_builder_.CreateStructGEP(buffer_abstraction, alloc_mem, 0);
698  cgen_state_->ir_builder_.CreateStore(buffer_buf, buffer_buf_ptr);
699 
700  auto buffer_size_ptr =
701  cgen_state_->ir_builder_.CreateStructGEP(buffer_abstraction, alloc_mem, 1);
702  cgen_state_->ir_builder_.CreateStore(buffer_size, buffer_size_ptr);
703 
704  if (buffer_null) {
705  auto bool_extended_type = llvm::Type::getInt8Ty(cgen_state_->context_);
706  auto buffer_null_extended =
707  cgen_state_->ir_builder_.CreateZExt(buffer_null, bool_extended_type);
708  auto buffer_is_null_ptr =
709  cgen_state_->ir_builder_.CreateStructGEP(buffer_abstraction, alloc_mem, 2);
710  cgen_state_->ir_builder_.CreateStore(buffer_null_extended, buffer_is_null_ptr);
711  }
712  output_args.push_back(alloc_mem);
713 }
714 
715 llvm::StructType* CodeGenerator::createPointStructType(const std::string& udf_func_name,
716  size_t param_num) {
717  llvm::Module* module_for_lookup = cgen_state_->module_;
718  llvm::Function* udf_func = module_for_lookup->getFunction(udf_func_name);
719 
720  llvm::StructType* generated_struct_type =
721  llvm::StructType::get(cgen_state_->context_,
722  {llvm::Type::getInt8PtrTy(cgen_state_->context_),
723  llvm::Type::getInt32Ty(cgen_state_->context_),
724  llvm::Type::getInt32Ty(cgen_state_->context_),
725  llvm::Type::getInt32Ty(cgen_state_->context_),
726  llvm::Type::getInt32Ty(cgen_state_->context_)},
727  false);
728 
729  if (udf_func) {
730  llvm::FunctionType* udf_func_type = udf_func->getFunctionType();
731  CHECK(param_num < udf_func_type->getNumParams());
732  llvm::Type* param_pointer_type = udf_func_type->getParamType(param_num);
733  CHECK(param_pointer_type->isPointerTy());
734  llvm::Type* param_type = param_pointer_type->getPointerElementType();
735  CHECK(param_type->isStructTy());
736  llvm::StructType* struct_type = llvm::cast<llvm::StructType>(param_type);
737  CHECK(struct_type->getStructNumElements() == 5) << serialize_llvm_object(struct_type);
738  const auto expected_elems = generated_struct_type->elements();
739  const auto current_elems = struct_type->elements();
740  for (size_t i = 0; i < expected_elems.size(); i++) {
741  CHECK_EQ(expected_elems[i], current_elems[i]);
742  }
743  if (struct_type->isLiteral()) {
744  return struct_type;
745  }
746 
747  llvm::StringRef struct_name = struct_type->getStructName();
748 #if LLVM_VERSION_MAJOR >= 12
749  llvm::StructType* point_type =
750  struct_type->getTypeByName(cgen_state_->context_, struct_name);
751 #else
752  llvm::StructType* point_type = module_for_lookup->getTypeByName(struct_name);
753 #endif
754  CHECK(point_type);
755 
756  return (point_type);
757  }
758  return generated_struct_type;
759 }
760 
761 void CodeGenerator::codegenGeoPointArgs(const std::string& udf_func_name,
762  size_t param_num,
763  llvm::Value* point_buf,
764  llvm::Value* point_size,
765  llvm::Value* compression,
766  llvm::Value* input_srid,
767  llvm::Value* output_srid,
768  std::vector<llvm::Value*>& output_args) {
770  CHECK(point_buf);
771  CHECK(point_size);
772  CHECK(compression);
773  CHECK(input_srid);
774  CHECK(output_srid);
775 
776  auto point_abstraction = createPointStructType(udf_func_name, param_num);
777  auto alloc_mem = cgen_state_->ir_builder_.CreateAlloca(point_abstraction, nullptr);
778 
779  auto point_buf_ptr =
780  cgen_state_->ir_builder_.CreateStructGEP(point_abstraction, alloc_mem, 0);
781  cgen_state_->ir_builder_.CreateStore(point_buf, point_buf_ptr);
782 
783  auto point_size_ptr =
784  cgen_state_->ir_builder_.CreateStructGEP(point_abstraction, alloc_mem, 1);
785  cgen_state_->ir_builder_.CreateStore(point_size, point_size_ptr);
786 
787  auto point_compression_ptr =
788  cgen_state_->ir_builder_.CreateStructGEP(point_abstraction, alloc_mem, 2);
789  cgen_state_->ir_builder_.CreateStore(compression, point_compression_ptr);
790 
791  auto input_srid_ptr =
792  cgen_state_->ir_builder_.CreateStructGEP(point_abstraction, alloc_mem, 3);
793  cgen_state_->ir_builder_.CreateStore(input_srid, input_srid_ptr);
794 
795  auto output_srid_ptr =
796  cgen_state_->ir_builder_.CreateStructGEP(point_abstraction, alloc_mem, 4);
797  cgen_state_->ir_builder_.CreateStore(output_srid, output_srid_ptr);
798 
799  output_args.push_back(alloc_mem);
800 }
801 
803  const std::string& udf_func_name,
804  size_t param_num) {
805  llvm::Module* module_for_lookup = cgen_state_->module_;
806  llvm::Function* udf_func = module_for_lookup->getFunction(udf_func_name);
807 
808  llvm::StructType* generated_struct_type =
809  llvm::StructType::get(cgen_state_->context_,
810  {llvm::Type::getInt8PtrTy(cgen_state_->context_),
811  llvm::Type::getInt32Ty(cgen_state_->context_),
812  llvm::Type::getInt32Ty(cgen_state_->context_),
813  llvm::Type::getInt32Ty(cgen_state_->context_),
814  llvm::Type::getInt32Ty(cgen_state_->context_)},
815  false);
816 
817  if (udf_func) {
818  llvm::FunctionType* udf_func_type = udf_func->getFunctionType();
819  CHECK(param_num < udf_func_type->getNumParams());
820  llvm::Type* param_pointer_type = udf_func_type->getParamType(param_num);
821  CHECK(param_pointer_type->isPointerTy());
822  llvm::Type* param_type = param_pointer_type->getPointerElementType();
823  CHECK(param_type->isStructTy());
824  llvm::StructType* struct_type = llvm::cast<llvm::StructType>(param_type);
825  CHECK(struct_type->isStructTy());
826  CHECK(struct_type->getStructNumElements() == 5);
827 
828  const auto expected_elems = generated_struct_type->elements();
829  const auto current_elems = struct_type->elements();
830  for (size_t i = 0; i < expected_elems.size(); i++) {
831  CHECK_EQ(expected_elems[i], current_elems[i]);
832  }
833  if (struct_type->isLiteral()) {
834  return struct_type;
835  }
836 
837  llvm::StringRef struct_name = struct_type->getStructName();
838 #if LLVM_VERSION_MAJOR >= 12
839  llvm::StructType* line_string_type =
840  struct_type->getTypeByName(cgen_state_->context_, struct_name);
841 #else
842  llvm::StructType* line_string_type = module_for_lookup->getTypeByName(struct_name);
843 #endif
844  CHECK(line_string_type);
845 
846  return (line_string_type);
847  }
848  return generated_struct_type;
849 }
850 
851 void CodeGenerator::codegenGeoLineStringArgs(const std::string& udf_func_name,
852  size_t param_num,
853  llvm::Value* line_string_buf,
854  llvm::Value* line_string_size,
855  llvm::Value* compression,
856  llvm::Value* input_srid,
857  llvm::Value* output_srid,
858  std::vector<llvm::Value*>& output_args) {
860  CHECK(line_string_buf);
861  CHECK(line_string_size);
862  CHECK(compression);
863  CHECK(input_srid);
864  CHECK(output_srid);
865 
866  auto line_string_abstraction = createLineStringStructType(udf_func_name, param_num);
867  auto alloc_mem =
868  cgen_state_->ir_builder_.CreateAlloca(line_string_abstraction, nullptr);
869 
870  auto line_string_buf_ptr =
871  cgen_state_->ir_builder_.CreateStructGEP(line_string_abstraction, alloc_mem, 0);
872  cgen_state_->ir_builder_.CreateStore(line_string_buf, line_string_buf_ptr);
873 
874  auto line_string_size_ptr =
875  cgen_state_->ir_builder_.CreateStructGEP(line_string_abstraction, alloc_mem, 1);
876  cgen_state_->ir_builder_.CreateStore(line_string_size, line_string_size_ptr);
877 
878  auto line_string_compression_ptr =
879  cgen_state_->ir_builder_.CreateStructGEP(line_string_abstraction, alloc_mem, 2);
880  cgen_state_->ir_builder_.CreateStore(compression, line_string_compression_ptr);
881 
882  auto input_srid_ptr =
883  cgen_state_->ir_builder_.CreateStructGEP(line_string_abstraction, alloc_mem, 3);
884  cgen_state_->ir_builder_.CreateStore(input_srid, input_srid_ptr);
885 
886  auto output_srid_ptr =
887  cgen_state_->ir_builder_.CreateStructGEP(line_string_abstraction, alloc_mem, 4);
888  cgen_state_->ir_builder_.CreateStore(output_srid, output_srid_ptr);
889 
890  output_args.push_back(alloc_mem);
891 }
892 
893 llvm::StructType* CodeGenerator::createPolygonStructType(const std::string& udf_func_name,
894  size_t param_num) {
895  llvm::Module* module_for_lookup = cgen_state_->module_;
896  llvm::Function* udf_func = module_for_lookup->getFunction(udf_func_name);
897 
898  llvm::StructType* generated_struct_type =
899  llvm::StructType::get(cgen_state_->context_,
900  {llvm::Type::getInt8PtrTy(cgen_state_->context_),
901  llvm::Type::getInt32Ty(cgen_state_->context_),
902  llvm::Type::getInt8PtrTy(cgen_state_->context_),
903  llvm::Type::getInt32Ty(cgen_state_->context_),
904  llvm::Type::getInt32Ty(cgen_state_->context_),
905  llvm::Type::getInt32Ty(cgen_state_->context_),
906  llvm::Type::getInt32Ty(cgen_state_->context_)},
907  false);
908 
909  if (udf_func) {
910  llvm::FunctionType* udf_func_type = udf_func->getFunctionType();
911  CHECK(param_num < udf_func_type->getNumParams());
912  llvm::Type* param_pointer_type = udf_func_type->getParamType(param_num);
913  CHECK(param_pointer_type->isPointerTy());
914  llvm::Type* param_type = param_pointer_type->getPointerElementType();
915  CHECK(param_type->isStructTy());
916  llvm::StructType* struct_type = llvm::cast<llvm::StructType>(param_type);
917 
918  CHECK(struct_type->isStructTy());
919  CHECK(struct_type->getStructNumElements() == 7);
920 
921  const auto expected_elems = generated_struct_type->elements();
922  const auto current_elems = struct_type->elements();
923  for (size_t i = 0; i < expected_elems.size(); i++) {
924  CHECK_EQ(expected_elems[i], current_elems[i]);
925  }
926  if (struct_type->isLiteral()) {
927  return struct_type;
928  }
929 
930  llvm::StringRef struct_name = struct_type->getStructName();
931 
932 #if LLVM_VERSION_MAJOR >= 12
933  llvm::StructType* polygon_type =
934  struct_type->getTypeByName(cgen_state_->context_, struct_name);
935 #else
936  llvm::StructType* polygon_type = module_for_lookup->getTypeByName(struct_name);
937 #endif
938  CHECK(polygon_type);
939 
940  return (polygon_type);
941  }
942  return generated_struct_type;
943 }
944 
945 void CodeGenerator::codegenGeoPolygonArgs(const std::string& udf_func_name,
946  size_t param_num,
947  llvm::Value* polygon_buf,
948  llvm::Value* polygon_size,
949  llvm::Value* ring_sizes_buf,
950  llvm::Value* num_rings,
951  llvm::Value* compression,
952  llvm::Value* input_srid,
953  llvm::Value* output_srid,
954  std::vector<llvm::Value*>& output_args) {
956  CHECK(polygon_buf);
957  CHECK(polygon_size);
958  CHECK(ring_sizes_buf);
959  CHECK(num_rings);
960  CHECK(compression);
961  CHECK(input_srid);
962  CHECK(output_srid);
963 
964  auto& builder = cgen_state_->ir_builder_;
965 
966  auto polygon_abstraction = createPolygonStructType(udf_func_name, param_num);
967  auto alloc_mem = builder.CreateAlloca(polygon_abstraction, nullptr);
968 
969  const auto polygon_buf_ptr = builder.CreateStructGEP(polygon_abstraction, alloc_mem, 0);
970  builder.CreateStore(polygon_buf, polygon_buf_ptr);
971 
972  const auto polygon_size_ptr =
973  builder.CreateStructGEP(polygon_abstraction, alloc_mem, 1);
974  builder.CreateStore(polygon_size, polygon_size_ptr);
975 
976  const auto ring_sizes_buf_ptr =
977  builder.CreateStructGEP(polygon_abstraction, alloc_mem, 2);
978  const auto ring_sizes_ptr_ty =
979  llvm::dyn_cast<llvm::PointerType>(ring_sizes_buf_ptr->getType());
980  CHECK(ring_sizes_ptr_ty);
981  builder.CreateStore(
982  builder.CreateBitCast(ring_sizes_buf, ring_sizes_ptr_ty->getPointerElementType()),
983  ring_sizes_buf_ptr);
984 
985  const auto ring_size_ptr = builder.CreateStructGEP(polygon_abstraction, alloc_mem, 3);
986  builder.CreateStore(num_rings, ring_size_ptr);
987 
988  const auto polygon_compression_ptr =
989  builder.CreateStructGEP(polygon_abstraction, alloc_mem, 4);
990  builder.CreateStore(compression, polygon_compression_ptr);
991 
992  const auto input_srid_ptr = builder.CreateStructGEP(polygon_abstraction, alloc_mem, 5);
993  builder.CreateStore(input_srid, input_srid_ptr);
994 
995  const auto output_srid_ptr = builder.CreateStructGEP(polygon_abstraction, alloc_mem, 6);
996  builder.CreateStore(output_srid, output_srid_ptr);
997 
998  output_args.push_back(alloc_mem);
999 }
1000 
1002  const std::string& udf_func_name,
1003  size_t param_num) {
1004  llvm::Function* udf_func = cgen_state_->module_->getFunction(udf_func_name);
1005 
1006  llvm::StructType* generated_struct_type =
1007  llvm::StructType::get(cgen_state_->context_,
1008  {llvm::Type::getInt8PtrTy(cgen_state_->context_),
1009  llvm::Type::getInt32Ty(cgen_state_->context_),
1010  llvm::Type::getInt8PtrTy(cgen_state_->context_),
1011  llvm::Type::getInt32Ty(cgen_state_->context_),
1012  llvm::Type::getInt8PtrTy(cgen_state_->context_),
1013  llvm::Type::getInt32Ty(cgen_state_->context_),
1014  llvm::Type::getInt32Ty(cgen_state_->context_),
1015  llvm::Type::getInt32Ty(cgen_state_->context_),
1016  llvm::Type::getInt32Ty(cgen_state_->context_)},
1017  false);
1018 
1019  if (udf_func) {
1020  llvm::FunctionType* udf_func_type = udf_func->getFunctionType();
1021  CHECK(param_num < udf_func_type->getNumParams());
1022  llvm::Type* param_pointer_type = udf_func_type->getParamType(param_num);
1023  CHECK(param_pointer_type->isPointerTy());
1024  llvm::Type* param_type = param_pointer_type->getPointerElementType();
1025  CHECK(param_type->isStructTy());
1026  llvm::StructType* struct_type = llvm::cast<llvm::StructType>(param_type);
1027  CHECK(struct_type->isStructTy());
1028  CHECK(struct_type->getStructNumElements() == 9);
1029  const auto expected_elems = generated_struct_type->elements();
1030  const auto current_elems = struct_type->elements();
1031  for (size_t i = 0; i < expected_elems.size(); i++) {
1032  CHECK_EQ(expected_elems[i], current_elems[i]);
1033  }
1034  if (struct_type->isLiteral()) {
1035  return struct_type;
1036  }
1037  llvm::StringRef struct_name = struct_type->getStructName();
1038 
1039 #if LLVM_VERSION_MAJOR >= 12
1040  llvm::StructType* polygon_type =
1041  struct_type->getTypeByName(cgen_state_->context_, struct_name);
1042 #else
1043  llvm::StructType* polygon_type = cgen_state_->module_->getTypeByName(struct_name);
1044 #endif
1045  CHECK(polygon_type);
1046 
1047  return (polygon_type);
1048  }
1049  return generated_struct_type;
1050 }
1051 
1052 void CodeGenerator::codegenGeoMultiPolygonArgs(const std::string& udf_func_name,
1053  size_t param_num,
1054  llvm::Value* polygon_coords,
1055  llvm::Value* polygon_coords_size,
1056  llvm::Value* ring_sizes_buf,
1057  llvm::Value* ring_sizes,
1058  llvm::Value* polygon_bounds,
1059  llvm::Value* polygon_bounds_sizes,
1060  llvm::Value* compression,
1061  llvm::Value* input_srid,
1062  llvm::Value* output_srid,
1063  std::vector<llvm::Value*>& output_args) {
1065  CHECK(polygon_coords);
1066  CHECK(polygon_coords_size);
1067  CHECK(ring_sizes_buf);
1068  CHECK(ring_sizes);
1069  CHECK(polygon_bounds);
1070  CHECK(polygon_bounds_sizes);
1071  CHECK(compression);
1072  CHECK(input_srid);
1073  CHECK(output_srid);
1074 
1075  auto& builder = cgen_state_->ir_builder_;
1076 
1077  auto multi_polygon_abstraction = createMultiPolygonStructType(udf_func_name, param_num);
1078  auto alloc_mem = builder.CreateAlloca(multi_polygon_abstraction, nullptr);
1079 
1080  const auto polygon_coords_ptr =
1081  builder.CreateStructGEP(multi_polygon_abstraction, alloc_mem, 0);
1082  builder.CreateStore(polygon_coords, polygon_coords_ptr);
1083 
1084  const auto polygon_coords_size_ptr =
1085  builder.CreateStructGEP(multi_polygon_abstraction, alloc_mem, 1);
1086  builder.CreateStore(polygon_coords_size, polygon_coords_size_ptr);
1087 
1088  const auto ring_sizes_buf_ptr =
1089  builder.CreateStructGEP(multi_polygon_abstraction, alloc_mem, 2);
1090  const auto ring_sizes_ptr_ty =
1091  llvm::dyn_cast<llvm::PointerType>(ring_sizes_buf_ptr->getType());
1092  CHECK(ring_sizes_ptr_ty);
1093  builder.CreateStore(
1094  builder.CreateBitCast(ring_sizes_buf, ring_sizes_ptr_ty->getPointerElementType()),
1095  ring_sizes_buf_ptr);
1096 
1097  const auto ring_sizes_ptr =
1098  builder.CreateStructGEP(multi_polygon_abstraction, alloc_mem, 3);
1099  builder.CreateStore(ring_sizes, ring_sizes_ptr);
1100 
1101  const auto polygon_bounds_buf_ptr =
1102  builder.CreateStructGEP(multi_polygon_abstraction, alloc_mem, 4);
1103  const auto bounds_ptr_ty =
1104  llvm::dyn_cast<llvm::PointerType>(polygon_bounds_buf_ptr->getType());
1105  CHECK(bounds_ptr_ty);
1106  builder.CreateStore(
1107  builder.CreateBitCast(polygon_bounds, bounds_ptr_ty->getPointerElementType()),
1108  polygon_bounds_buf_ptr);
1109 
1110  const auto polygon_bounds_sizes_ptr =
1111  builder.CreateStructGEP(multi_polygon_abstraction, alloc_mem, 5);
1112  builder.CreateStore(polygon_bounds_sizes, polygon_bounds_sizes_ptr);
1113 
1114  const auto polygon_compression_ptr =
1115  builder.CreateStructGEP(multi_polygon_abstraction, alloc_mem, 6);
1116  builder.CreateStore(compression, polygon_compression_ptr);
1117 
1118  const auto input_srid_ptr =
1119  builder.CreateStructGEP(multi_polygon_abstraction, alloc_mem, 7);
1120  builder.CreateStore(input_srid, input_srid_ptr);
1121 
1122  const auto output_srid_ptr =
1123  builder.CreateStructGEP(multi_polygon_abstraction, alloc_mem, 8);
1124  builder.CreateStore(output_srid, output_srid_ptr);
1125 
1126  output_args.push_back(alloc_mem);
1127 }
1128 
1129 // Generate CAST operations for arguments in `orig_arg_lvs` to the types required by
1130 // `ext_func_sig`.
1132  const Analyzer::FunctionOper* function_oper,
1133  const ExtensionFunction* ext_func_sig,
1134  const std::vector<llvm::Value*>& orig_arg_lvs,
1135  const std::vector<size_t>& orig_arg_lvs_index,
1136  const std::unordered_map<llvm::Value*, llvm::Value*>& const_arr_size,
1137  const CompilationOptions& co) {
1139  CHECK(ext_func_sig);
1140  const auto& ext_func_args = ext_func_sig->getArgs();
1141  CHECK_LE(function_oper->getArity(), ext_func_args.size());
1142  const auto func_ti = function_oper->get_type_info();
1143  std::vector<llvm::Value*> args;
1144  /*
1145  i: argument in RA for the function operand
1146  j: extra offset in ext_func_args
1147  k: origin_arg_lvs counter, equal to orig_arg_lvs_index[i]
1148  ij: ext_func_args counter, equal to i + j
1149  dj: offset when UDF implementation first argument corresponds to return value
1150  */
1151  for (size_t i = 0, j = 0, dj = (func_ti.is_buffer() ? 1 : 0);
1152  i < function_oper->getArity();
1153  ++i) {
1154  size_t k = orig_arg_lvs_index[i];
1155  size_t ij = i + j;
1156  const auto arg = function_oper->getArg(i);
1157  const auto ext_func_arg = ext_func_args[ij];
1158  const auto& arg_ti = arg->get_type_info();
1159  llvm::Value* arg_lv{nullptr};
1160  if (arg_ti.is_bytes()) {
1161  CHECK(ext_func_arg == ExtArgumentType::TextEncodingNone)
1162  << ::toString(ext_func_arg);
1163  const auto ptr_lv = orig_arg_lvs[k + 1];
1164  const auto len_lv = orig_arg_lvs[k + 2];
1165  auto& builder = cgen_state_->ir_builder_;
1166  auto string_buf_arg = builder.CreatePointerCast(
1167  ptr_lv, llvm::Type::getInt8PtrTy(cgen_state_->context_));
1168  auto string_size_arg =
1169  builder.CreateZExt(len_lv, get_int_type(64, cgen_state_->context_));
1170  codegenBufferArgs(ext_func_sig->getName(),
1171  ij + dj,
1172  string_buf_arg,
1173  string_size_arg,
1174  nullptr,
1175  args);
1176  } else if (arg_ti.is_array()) {
1177  bool const_arr = (const_arr_size.count(orig_arg_lvs[k]) > 0);
1178  const auto elem_ti = arg_ti.get_elem_type();
1179  // TODO: switch to fast fixlen variants
1180  const auto ptr_lv = (const_arr)
1181  ? orig_arg_lvs[k]
1183  "array_buff",
1184  llvm::Type::getInt8PtrTy(cgen_state_->context_),
1185  {orig_arg_lvs[k], posArg(arg)});
1186  const auto len_lv =
1187  (const_arr) ? const_arr_size.at(orig_arg_lvs[k])
1189  "array_size",
1191  {orig_arg_lvs[k],
1192  posArg(arg),
1193  cgen_state_->llInt(log2_bytes(elem_ti.get_logical_size()))});
1194 
1195  if (is_ext_arg_type_pointer(ext_func_arg)) {
1196  args.push_back(castArrayPointer(ptr_lv, elem_ti));
1197  args.push_back(cgen_state_->ir_builder_.CreateZExt(
1198  len_lv, get_int_type(64, cgen_state_->context_)));
1199  j++;
1200  } else if (is_ext_arg_type_array(ext_func_arg)) {
1201  auto array_buf_arg = castArrayPointer(ptr_lv, elem_ti);
1202  auto& builder = cgen_state_->ir_builder_;
1203  auto array_size_arg =
1204  builder.CreateZExt(len_lv, get_int_type(64, cgen_state_->context_));
1205  auto array_null_arg =
1206  cgen_state_->emitExternalCall("array_is_null",
1208  {orig_arg_lvs[k], posArg(arg)});
1209  codegenBufferArgs(ext_func_sig->getName(),
1210  ij + dj,
1211  array_buf_arg,
1212  array_size_arg,
1213  array_null_arg,
1214  args);
1215  } else {
1216  UNREACHABLE();
1217  }
1218 
1219  } else if (arg_ti.is_geometry()) {
1220  auto geo_expr_arg = dynamic_cast<const Analyzer::GeoExpr*>(arg);
1221  if (geo_expr_arg) {
1222  auto ptr_lv = cgen_state_->ir_builder_.CreateBitCast(
1223  orig_arg_lvs[k], llvm::Type::getInt8PtrTy(cgen_state_->context_));
1224  args.push_back(ptr_lv);
1225  // TODO: remove when we normalize extension functions geo sizes to int32
1226  auto size_lv = cgen_state_->ir_builder_.CreateSExt(
1227  orig_arg_lvs[k + 1], llvm::Type::getInt64Ty(cgen_state_->context_));
1228  args.push_back(size_lv);
1229  j++;
1230  continue;
1231  }
1232  // Coords
1233  bool const_arr = (const_arr_size.count(orig_arg_lvs[k]) > 0);
1234  // NOTE(adb): We're generating code to handle the TINYINT array only -- the actual
1235  // geo encoding (or lack thereof) does not matter here
1236  const auto elem_ti = SQLTypeInfo(SQLTypes::kARRAY,
1237  0,
1238  0,
1239  false,
1241  0,
1243  .get_elem_type();
1244  llvm::Value* ptr_lv;
1245  llvm::Value* len_lv;
1246  int32_t fixlen = -1;
1247  if (arg_ti.get_type() == kPOINT) {
1248  const auto col_var = dynamic_cast<const Analyzer::ColumnVar*>(arg);
1249  if (col_var) {
1250  const auto coords_cd = executor()->getPhysicalColumnDescriptor(col_var, 1);
1251  if (coords_cd && coords_cd->columnType.get_type() == kARRAY) {
1252  fixlen = coords_cd->columnType.get_size();
1253  }
1254  }
1255  }
1256  if (fixlen > 0) {
1257  ptr_lv =
1258  cgen_state_->emitExternalCall("fast_fixlen_array_buff",
1259  llvm::Type::getInt8PtrTy(cgen_state_->context_),
1260  {orig_arg_lvs[k], posArg(arg)});
1261  len_lv = cgen_state_->llInt(int32_t(fixlen));
1262  } else {
1263  // TODO: remove const_arr and related code if it's not needed
1264  ptr_lv = (const_arr) ? orig_arg_lvs[k]
1266  "array_buff",
1267  llvm::Type::getInt8PtrTy(cgen_state_->context_),
1268  {orig_arg_lvs[k], posArg(arg)});
1269  len_lv = (const_arr)
1270  ? const_arr_size.at(orig_arg_lvs[k])
1272  "array_size",
1274  {orig_arg_lvs[k],
1275  posArg(arg),
1276  cgen_state_->llInt(log2_bytes(elem_ti.get_logical_size()))});
1277  }
1278 
1279  if (is_ext_arg_type_geo(ext_func_arg)) {
1280  if (arg_ti.get_type() == kPOINT || arg_ti.get_type() == kLINESTRING) {
1281  auto array_buf_arg = castArrayPointer(ptr_lv, elem_ti);
1282  auto compression_val = codegenCompression(arg_ti);
1283  auto input_srid_val = cgen_state_->llInt(arg_ti.get_input_srid());
1284  auto output_srid_val = cgen_state_->llInt(arg_ti.get_output_srid());
1285 
1286  if (arg_ti.get_type() == kPOINT) {
1287  CHECK_EQ(k, ij);
1288  codegenGeoPointArgs(ext_func_sig->getName(),
1289  ij + dj,
1290  array_buf_arg,
1291  len_lv,
1292  compression_val,
1293  input_srid_val,
1294  output_srid_val,
1295  args);
1296  } else {
1297  CHECK_EQ(k, ij);
1298  codegenGeoLineStringArgs(ext_func_sig->getName(),
1299  ij + dj,
1300  array_buf_arg,
1301  len_lv,
1302  compression_val,
1303  input_srid_val,
1304  output_srid_val,
1305  args);
1306  }
1307  }
1308  } else {
1309  CHECK(ext_func_arg == ExtArgumentType::PInt8);
1310  args.push_back(castArrayPointer(ptr_lv, elem_ti));
1311  args.push_back(cgen_state_->ir_builder_.CreateZExt(
1312  len_lv, get_int_type(64, cgen_state_->context_)));
1313  j++;
1314  }
1315 
1316  switch (arg_ti.get_type()) {
1317  case kPOINT:
1318  case kLINESTRING:
1319  break;
1320  case kPOLYGON: {
1321  if (ext_func_arg == ExtArgumentType::GeoPolygon) {
1322  auto array_buf_arg = castArrayPointer(ptr_lv, elem_ti);
1323  auto compression_val = codegenCompression(arg_ti);
1324  auto input_srid_val = cgen_state_->llInt(arg_ti.get_input_srid());
1325  auto output_srid_val = cgen_state_->llInt(arg_ti.get_output_srid());
1326 
1327  auto [ring_size_buff, ring_size] =
1328  codegenArrayBuff(orig_arg_lvs[k + 1],
1329  posArg(arg),
1331  /*cast_and_extend=*/false);
1332  CHECK_EQ(k, ij);
1333  codegenGeoPolygonArgs(ext_func_sig->getName(),
1334  ij + dj,
1335  array_buf_arg,
1336  len_lv,
1337  ring_size_buff,
1338  ring_size,
1339  compression_val,
1340  input_srid_val,
1341  output_srid_val,
1342  args);
1343  } else {
1344  CHECK(ext_func_arg == ExtArgumentType::PInt8);
1345  // Ring Sizes
1346  auto const_arr = const_arr_size.count(orig_arg_lvs[k + 1]) > 0;
1347  auto [ring_size_buff, ring_size] =
1348  (const_arr) ? std::make_pair(orig_arg_lvs[k + 1],
1349  const_arr_size.at(orig_arg_lvs[k + 1]))
1350  : codegenArrayBuff(orig_arg_lvs[k + 1],
1351  posArg(arg),
1353  /*cast_and_extend=*/true);
1354  args.push_back(ring_size_buff);
1355  args.push_back(ring_size);
1356  j += 2;
1357  }
1358  break;
1359  }
1360  case kMULTIPOLYGON: {
1361  if (ext_func_arg == ExtArgumentType::GeoMultiPolygon) {
1362  auto array_buf_arg = castArrayPointer(ptr_lv, elem_ti);
1363  auto compression_val = codegenCompression(arg_ti);
1364  auto input_srid_val = cgen_state_->llInt(arg_ti.get_input_srid());
1365  auto output_srid_val = cgen_state_->llInt(arg_ti.get_output_srid());
1366 
1367  auto [ring_size_buff, ring_size] =
1368  codegenArrayBuff(orig_arg_lvs[k + 1],
1369  posArg(arg),
1371  /*cast_and_extend=*/false);
1372 
1373  auto [poly_bounds_buff, poly_bounds_size] =
1374  codegenArrayBuff(orig_arg_lvs[k + 2],
1375  posArg(arg),
1377  /*cast_and_extend=*/false);
1378  CHECK_EQ(k, ij);
1379  codegenGeoMultiPolygonArgs(ext_func_sig->getName(),
1380  ij + dj,
1381  array_buf_arg,
1382  len_lv,
1383  ring_size_buff,
1384  ring_size,
1385  poly_bounds_buff,
1386  poly_bounds_size,
1387  compression_val,
1388  input_srid_val,
1389  output_srid_val,
1390  args);
1391  } else {
1392  CHECK(ext_func_arg == ExtArgumentType::PInt8);
1393  // Ring Sizes
1394  {
1395  auto const_arr = const_arr_size.count(orig_arg_lvs[k + 1]) > 0;
1396  auto [ring_size_buff, ring_size] =
1397  (const_arr) ? std::make_pair(orig_arg_lvs[k + 1],
1398  const_arr_size.at(orig_arg_lvs[k + 1]))
1399  : codegenArrayBuff(orig_arg_lvs[k + 1],
1400  posArg(arg),
1402  /*cast_and_extend=*/true);
1403 
1404  args.push_back(ring_size_buff);
1405  args.push_back(ring_size);
1406  }
1407  // Poly Rings
1408  {
1409  auto const_arr = const_arr_size.count(orig_arg_lvs[k + 2]) > 0;
1410  auto [poly_bounds_buff, poly_bounds_size] =
1411  (const_arr)
1412  ? std::make_pair(orig_arg_lvs[k + 2],
1413  const_arr_size.at(orig_arg_lvs[k + 2]))
1414  : codegenArrayBuff(
1415  orig_arg_lvs[k + 2], posArg(arg), SQLTypes::kINT, true);
1416 
1417  args.push_back(poly_bounds_buff);
1418  args.push_back(poly_bounds_size);
1419  }
1420  j += 4;
1421  }
1422  break;
1423  }
1424  default:
1425  CHECK(false);
1426  }
1427  } else {
1428  CHECK(is_ext_arg_type_scalar(ext_func_arg));
1429  const auto arg_target_ti = ext_arg_type_to_type_info(ext_func_arg);
1430  if (arg_ti.get_type() != arg_target_ti.get_type()) {
1431  arg_lv = codegenCast(orig_arg_lvs[k], arg_ti, arg_target_ti, false, co);
1432  } else {
1433  arg_lv = orig_arg_lvs[k];
1434  }
1435  CHECK_EQ(arg_lv->getType(),
1436  ext_arg_type_to_llvm_type(ext_func_arg, cgen_state_->context_));
1437  args.push_back(arg_lv);
1438  }
1439  }
1440  return args;
1441 }
1442 
1443 llvm::Value* CodeGenerator::castArrayPointer(llvm::Value* ptr,
1444  const SQLTypeInfo& elem_ti) {
1446  if (elem_ti.get_type() == kFLOAT) {
1447  return cgen_state_->ir_builder_.CreatePointerCast(
1448  ptr, llvm::Type::getFloatPtrTy(cgen_state_->context_));
1449  }
1450  if (elem_ti.get_type() == kDOUBLE) {
1451  return cgen_state_->ir_builder_.CreatePointerCast(
1452  ptr, llvm::Type::getDoublePtrTy(cgen_state_->context_));
1453  }
1454  CHECK(elem_ti.is_integer() || elem_ti.is_boolean() ||
1455  (elem_ti.is_string() && elem_ti.get_compression() == kENCODING_DICT));
1456  switch (elem_ti.get_size()) {
1457  case 1:
1458  return cgen_state_->ir_builder_.CreatePointerCast(
1459  ptr, llvm::Type::getInt8PtrTy(cgen_state_->context_));
1460  case 2:
1461  return cgen_state_->ir_builder_.CreatePointerCast(
1462  ptr, llvm::Type::getInt16PtrTy(cgen_state_->context_));
1463  case 4:
1464  return cgen_state_->ir_builder_.CreatePointerCast(
1465  ptr, llvm::Type::getInt32PtrTy(cgen_state_->context_));
1466  case 8:
1467  return cgen_state_->ir_builder_.CreatePointerCast(
1468  ptr, llvm::Type::getInt64PtrTy(cgen_state_->context_));
1469  default:
1470  CHECK(false);
1471  }
1472  return nullptr;
1473 }
llvm::StructType * createLineStringStructType(const std::string &udf_func_name, size_t param_num)
void codegenGeoMultiPolygonArgs(const std::string &udf_func_name, size_t param_num, llvm::Value *polygon_coords, llvm::Value *polygon_coords_size, llvm::Value *ring_sizes_buf, llvm::Value *ring_sizes, llvm::Value *polygon_bounds, llvm::Value *polygon_bounds_sizes, llvm::Value *compression, llvm::Value *input_srid, llvm::Value *output_srid, std::vector< llvm::Value * > &output_args)
#define CHECK_EQ(x, y)
Definition: Logger.h:217
llvm::StructType * get_buffer_struct_type(CgenState *cgen_state, const std::string &ext_func_name, size_t param_num, llvm::Type *elem_type, bool has_is_null)
HOST DEVICE int get_size() const
Definition: sqltypes.h:339
const std::vector< ExtArgumentType > & getArgs() const
bool is_ext_arg_type_scalar(const ExtArgumentType ext_arg_type)
std::string toString(const ExtArgumentType &sig_type)
llvm::BasicBlock * args_notnull_bb
size_t getArity() const
Definition: Analyzer.h:1515
SQLTypes
Definition: sqltypes.h:38
std::unique_ptr< llvm::Module > udf_gpu_module
CgenState * cgen_state_
const ExtArgumentType getRet() const
void codegenGeoPolygonArgs(const std::string &udf_func_name, size_t param_num, llvm::Value *polygon_buf, llvm::Value *polygon_size, llvm::Value *ring_sizes_buf, llvm::Value *num_rings, llvm::Value *compression, llvm::Value *input_srid, llvm::Value *output_srid, std::vector< llvm::Value * > &output_args)
#define LOG(tag)
Definition: Logger.h:203
std::vector< llvm::Value * > codegenFunctionOperCastArgs(const Analyzer::FunctionOper *, const ExtensionFunction *, const std::vector< llvm::Value * > &, const std::vector< size_t > &, const std::unordered_map< llvm::Value *, llvm::Value * > &, const CompilationOptions &)
llvm::Value * emitExternalCall(const std::string &fname, llvm::Type *ret_type, const std::vector< llvm::Value * > args, const std::vector< llvm::Attribute::AttrKind > &fnattrs={}, const bool has_struct_return=false)
Definition: CgenState.h:228
llvm::Value * codegenFunctionOperNullArg(const Analyzer::FunctionOper *, const std::vector< llvm::Value * > &)
llvm::IRBuilder ir_builder_
Definition: CgenState.h:340
llvm::Value * posArg(const Analyzer::Expr *) const
Definition: ColumnIR.cpp:512
llvm::Value * castArrayPointer(llvm::Value *ptr, const SQLTypeInfo &elem_ti)
#define UNREACHABLE()
Definition: Logger.h:253
#define CHECK_GE(x, y)
Definition: Logger.h:222
Definition: sqldefs.h:49
llvm::Type * get_fp_type(const int width, llvm::LLVMContext &context)
llvm::StructType * createPointStructType(const std::string &udf_func_name, size_t param_num)
bool call_requires_custom_type_handling(const Analyzer::FunctionOper *function_oper)
const std::string getName(bool keep_suffix=true) const
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:329
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
bool ext_func_call_requires_nullcheck(const Analyzer::FunctionOper *function_oper)
SQLTypeInfo get_sql_type_from_llvm_type(const llvm::Type *ll_type)
std::vector< FunctionOperValue > ext_call_cache_
Definition: CgenState.h:346
void codegenBufferArgs(const std::string &udf_func_name, size_t param_num, llvm::Value *buffer_buf, llvm::Value *buffer_size, llvm::Value *buffer_is_null, std::vector< llvm::Value * > &output_args)
RUNTIME_EXPORT void register_buffer_with_executor_rsm(int64_t exec, int8_t *buffer)
std::pair< llvm::Value *, llvm::Value * > codegenArrayBuff(llvm::Value *chunk, llvm::Value *row_pos, SQLTypes array_type, bool cast_and_extend)
llvm::Module * module_
Definition: CgenState.h:329
llvm::LLVMContext & context_
Definition: CgenState.h:338
llvm::Function * current_func_
Definition: CgenState.h:332
std::tuple< ArgNullcheckBBs, llvm::Value * > beginArgsNullcheck(const Analyzer::FunctionOper *function_oper, const std::vector< llvm::Value * > &orig_arg_lvs)
bool is_integer() const
Definition: sqltypes.h:511
llvm::ConstantInt * inlineIntNull(const SQLTypeInfo &)
Definition: CgenState.cpp:29
bool is_ext_arg_type_geo(const ExtArgumentType ext_arg_type)
bool is_ext_arg_type_array(const ExtArgumentType ext_arg_type)
llvm::Value * codegenFunctionOper(const Analyzer::FunctionOper *, const CompilationOptions &)
llvm::Type * get_llvm_type_from_sql_array_type(const SQLTypeInfo ti, llvm::LLVMContext &ctx)
bool is_boolean() const
Definition: sqltypes.h:516
llvm::BasicBlock * args_null_bb
#define AUTOMATIC_IR_METADATA(CGENSTATE)
llvm::Type * ext_arg_type_to_llvm_type(const ExtArgumentType ext_arg_type, llvm::LLVMContext &ctx)
std::tuple< T, std::vector< SQLTypeInfo > > bind_function(std::string name, Analyzer::ExpressionPtrVector func_args, const std::vector< T > &ext_funcs, const std::string processor)
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:77
llvm::Value * emitCall(const std::string &fname, const std::vector< llvm::Value * > &args)
Definition: CgenState.cpp:175
bool is_buffer() const
Definition: sqltypes.h:527
ExecutorDeviceType device_type
void codegenGeoPointArgs(const std::string &udf_func_name, size_t param_num, llvm::Value *point_buf, llvm::Value *point_size, llvm::Value *compression, llvm::Value *input_srid, llvm::Value *output_srid, std::vector< llvm::Value * > &output_args)
#define RUNTIME_EXPORT
std::vector< llvm::Value * > codegen(const Analyzer::Expr *, const bool fetch_columns, const CompilationOptions &)
Definition: IRCodegen.cpp:30
#define CHECK_LE(x, y)
Definition: Logger.h:220
HOST DEVICE EncodingType get_compression() const
Definition: sqltypes.h:337
std::string serialize_llvm_object(const T *llvm_obj)
llvm::StructType * createPolygonStructType(const std::string &udf_func_name, size_t param_num)
const Analyzer::Expr * getArg(const size_t i) const
Definition: Analyzer.h:1517
const Expr * get_operand() const
Definition: Analyzer.h:370
llvm::Value * endArgsNullcheck(const ArgNullcheckBBs &, llvm::Value *, llvm::Value *, const Analyzer::FunctionOper *)
std::unique_ptr< llvm::Module > udf_cpu_module
HOST DEVICE int get_comp_param() const
Definition: sqltypes.h:338
llvm::Value * codegenFunctionOperWithCustomTypeHandling(const Analyzer::FunctionOperWithCustomTypeHandling *, const CompilationOptions &)
bool is_bytes() const
Definition: sqltypes.h:524
llvm::ConstantInt * llInt(const T v) const
Definition: CgenState.h:307
#define CHECK(condition)
Definition: Logger.h:209
llvm::Value * codegenIsNullNumber(llvm::Value *, const SQLTypeInfo &)
Definition: LogicalIR.cpp:409
uint64_t exp_to_scale(const unsigned exp)
llvm::Value * codegenCompression(const SQLTypeInfo &type_info)
llvm::Value * codegenCast(const Analyzer::UOper *, const CompilationOptions &)
Definition: CastIR.cpp:20
uint32_t log2_bytes(const uint32_t bytes)
Definition: Execute.h:174
Definition: sqltypes.h:45
bool is_string() const
Definition: sqltypes.h:509
std::string getName() const
Definition: Analyzer.h:1513
void codegenGeoLineStringArgs(const std::string &udf_func_name, size_t param_num, llvm::Value *line_string_buf, llvm::Value *line_string_size, llvm::Value *compression, llvm::Value *input_srid, llvm::Value *output_srid, std::vector< llvm::Value * > &output_args)
bool is_ext_arg_type_pointer(const ExtArgumentType ext_arg_type)
SQLTypeInfo get_elem_type() const
Definition: sqltypes.h:850
bool is_decimal() const
Definition: sqltypes.h:512
int get_physical_coord_cols() const
Definition: sqltypes.h:365
SQLTypeInfo ext_arg_type_to_type_info(const ExtArgumentType ext_arg_type)
llvm::ConstantFP * inlineFpNull(const SQLTypeInfo &)
Definition: CgenState.cpp:67
Executor * executor() const
llvm::StructType * createMultiPolygonStructType(const std::string &udf_func_name, size_t param_num)