OmniSciDB  85c2d10cdc
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
ExtensionsIR.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2017 MapD Technologies, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "CodeGenerator.h"
18 #include "Execute.h"
19 #include "ExtensionFunctions.hpp"
23 
24 #include <tuple>
25 
26 extern std::unique_ptr<llvm::Module> udf_gpu_module;
27 extern std::unique_ptr<llvm::Module> udf_cpu_module;
28 
29 namespace {
30 
31 llvm::StructType* get_buffer_struct_type(CgenState* cgen_state,
32  const std::string& ext_func_name,
33  size_t param_num,
34  llvm::Type* elem_type,
35  bool has_is_null) {
36  CHECK(elem_type);
37  CHECK(elem_type->isPointerTy());
38  llvm::StructType* generated_struct_type =
39  (has_is_null ? llvm::StructType::get(cgen_state->context_,
40  {elem_type,
41  llvm::Type::getInt64Ty(cgen_state->context_),
42  llvm::Type::getInt8Ty(cgen_state->context_)},
43  false)
44  : llvm::StructType::get(
45  cgen_state->context_,
46  {elem_type, llvm::Type::getInt64Ty(cgen_state->context_)},
47  false));
48  llvm::Function* udf_func = cgen_state->module_->getFunction(ext_func_name);
49  if (udf_func) {
50  // Compare expected array struct type with type from the function
51  // definition from the UDF module, but use the type from the
52  // module
53  llvm::FunctionType* udf_func_type = udf_func->getFunctionType();
54  CHECK_LE(param_num, udf_func_type->getNumParams());
55  llvm::Type* param_pointer_type = udf_func_type->getParamType(param_num);
56  CHECK(param_pointer_type->isPointerTy());
57  llvm::Type* param_type = param_pointer_type->getPointerElementType();
58  CHECK(param_type->isStructTy());
59  llvm::StructType* struct_type = llvm::cast<llvm::StructType>(param_type);
60  CHECK_GE(struct_type->getStructNumElements(),
61  generated_struct_type->getStructNumElements())
62  << serialize_llvm_object(struct_type);
63 
64  const auto expected_elems = generated_struct_type->elements();
65  const auto current_elems = struct_type->elements();
66  for (size_t i = 0; i < expected_elems.size(); i++) {
67  CHECK_EQ(expected_elems[i], current_elems[i])
68  << "[" << ::toString(expected_elems[i]) << ", " << ::toString(current_elems[i])
69  << "]";
70  }
71 
72  if (struct_type->isLiteral()) {
73  return struct_type;
74  }
75 
76  llvm::StringRef struct_name = struct_type->getStructName();
77  return cgen_state->module_->getTypeByName(struct_name);
78  }
79  return generated_struct_type;
80 }
81 
83  llvm::LLVMContext& ctx) {
84  switch (ext_arg_type) {
85  case ExtArgumentType::Bool: // pass thru to Int8
87  return get_int_type(8, ctx);
89  return get_int_type(16, ctx);
91  return get_int_type(32, ctx);
93  return get_int_type(64, ctx);
95  return llvm::Type::getFloatTy(ctx);
97  return llvm::Type::getDoubleTy(ctx);
120  return llvm::Type::getVoidTy(ctx);
121  default:
122  CHECK(false);
123  }
124  CHECK(false);
125  return nullptr;
126 }
127 
129  CHECK(ll_type);
130  const auto bits = ll_type->getPrimitiveSizeInBits();
131 
132  if (ll_type->isFloatingPointTy()) {
133  switch (bits) {
134  case 32:
135  return SQLTypeInfo(kFLOAT, false);
136  case 64:
137  return SQLTypeInfo(kDOUBLE, false);
138  default:
139  LOG(FATAL) << "Unsupported llvm floating point type: " << bits
140  << ", only 32 and 64 bit floating point is supported.";
141  }
142  } else {
143  switch (bits) {
144  case 1:
145  return SQLTypeInfo(kBOOLEAN, false);
146  case 8:
147  return SQLTypeInfo(kTINYINT, false);
148  case 16:
149  return SQLTypeInfo(kSMALLINT, false);
150  case 32:
151  return SQLTypeInfo(kINT, false);
152  case 64:
153  return SQLTypeInfo(kBIGINT, false);
154  default:
155  LOG(FATAL) << "Unrecognized llvm type for SQL type: "
156  << bits; // TODO let's get the real name here
157  }
158  }
159  UNREACHABLE();
160  return SQLTypeInfo();
161 }
162 
164  llvm::LLVMContext& ctx) {
165  CHECK(ti.is_buffer());
166  if (ti.is_bytes()) {
167  return llvm::Type::getInt8PtrTy(ctx);
168  }
169 
170  const auto& elem_ti = ti.get_elem_type();
171  if (elem_ti.is_fp()) {
172  switch (elem_ti.get_size()) {
173  case 4:
174  return llvm::Type::getFloatPtrTy(ctx);
175  case 8:
176  return llvm::Type::getDoublePtrTy(ctx);
177  }
178  }
179 
180  if (elem_ti.is_boolean()) {
181  return llvm::Type::getInt8PtrTy(ctx);
182  }
183 
184  CHECK(elem_ti.is_integer());
185  switch (elem_ti.get_size()) {
186  case 1:
187  return llvm::Type::getInt8PtrTy(ctx);
188  case 2:
189  return llvm::Type::getInt16PtrTy(ctx);
190  case 4:
191  return llvm::Type::getInt32PtrTy(ctx);
192  case 8:
193  return llvm::Type::getInt64PtrTy(ctx);
194  }
195 
196  UNREACHABLE();
197  return nullptr;
198 }
199 
201  const auto& func_ti = function_oper->get_type_info();
202  for (size_t i = 0; i < function_oper->getArity(); ++i) {
203  const auto arg = function_oper->getArg(i);
204  const auto& arg_ti = arg->get_type_info();
205  if ((func_ti.is_array() && arg_ti.is_array()) ||
206  (func_ti.is_bytes() && arg_ti.is_bytes())) {
207  // If the function returns an array and any of the arguments are arrays, allow NULL
208  // scalars.
209  // TODO: Make this a property of the FunctionOper following `RETURN NULL ON NULL`
210  // semantics.
211  return false;
212  } else if (!arg_ti.get_notnull() && !arg_ti.is_buffer()) {
213  // Nullable geometry args will trigger a null check
214  return true;
215  } else {
216  continue;
217  }
218  }
219  return false;
220 }
221 
222 } // namespace
223 
225  int8_t* buffer) {
226  Executor* exec_ptr = reinterpret_cast<Executor*>(exec);
227  if (buffer != nullptr) {
228  exec_ptr->getRowSetMemoryOwner()->addVarlenBuffer(buffer);
229  }
230 }
231 
233  const Analyzer::FunctionOper* function_oper,
234  const CompilationOptions& co) {
236  ExtensionFunction ext_func_sig = [=]() {
238  try {
239  return bind_function(function_oper, /* is_gpu= */ true);
240  } catch (ExtensionFunctionBindingError& e) {
241  LOG(WARNING) << "codegenFunctionOper[GPU]: " << e.what() << " Redirecting "
242  << function_oper->getName() << " to run on CPU.";
243  throw QueryMustRunOnCpu();
244  }
245  } else {
246  try {
247  return bind_function(function_oper, /* is_gpu= */ false);
248  } catch (ExtensionFunctionBindingError& e) {
249  LOG(WARNING) << "codegenFunctionOper[CPU]: " << e.what();
250  throw;
251  }
252  }
253  }();
254 
255  const auto& ret_ti = function_oper->get_type_info();
256  CHECK(ret_ti.is_integer() || ret_ti.is_fp() || ret_ti.is_boolean() ||
257  ret_ti.is_buffer());
258  if (ret_ti.is_buffer() && co.device_type == ExecutorDeviceType::GPU) {
259  // TODO: This is not necessary for runtime UDFs because RBC does
260  // not generated GPU LLVM IR when the UDF is using Buffer objects.
261  // However, we cannot remove it until C++ UDFs can be defined for
262  // different devices independently.
263  throw QueryMustRunOnCpu();
264  }
265 
266  auto ret_ty = ext_arg_type_to_llvm_type(ext_func_sig.getRet(), cgen_state_->context_);
267  const auto current_bb = cgen_state_->ir_builder_.GetInsertBlock();
268  for (auto it : cgen_state_->ext_call_cache_) {
269  if (*it.foper == *function_oper) {
270  auto inst = llvm::dyn_cast<llvm::Instruction>(it.lv);
271  if (inst && inst->getParent() == current_bb) {
272  return it.lv;
273  }
274  }
275  }
276  std::vector<llvm::Value*> orig_arg_lvs;
277  std::vector<size_t> orig_arg_lvs_index;
278  std::unordered_map<llvm::Value*, llvm::Value*> const_arr_size;
279 
280  for (size_t i = 0; i < function_oper->getArity(); ++i) {
281  orig_arg_lvs_index.push_back(orig_arg_lvs.size());
282  const auto arg = function_oper->getArg(i);
283  const auto arg_cast = dynamic_cast<const Analyzer::UOper*>(arg);
284  const auto arg0 =
285  (arg_cast && arg_cast->get_optype() == kCAST) ? arg_cast->get_operand() : arg;
286  const auto array_expr_arg = dynamic_cast<const Analyzer::ArrayExpr*>(arg0);
287  auto is_local_alloc =
288  ret_ti.is_buffer() || (array_expr_arg && array_expr_arg->isLocalAlloc());
289  const auto& arg_ti = arg->get_type_info();
290  const auto arg_lvs = codegen(arg, true, co);
291  auto geo_uoper_arg = dynamic_cast<const Analyzer::GeoUOper*>(arg);
292  auto geo_binoper_arg = dynamic_cast<const Analyzer::GeoBinOper*>(arg);
293  // TODO(adb / d): Assuming no const array cols for geo (for now)
294  if ((geo_uoper_arg || geo_binoper_arg) && arg_ti.is_geometry()) {
295  // Extract arr sizes and put them in the map, forward arr pointers
296  CHECK_EQ(2 * static_cast<size_t>(arg_ti.get_physical_coord_cols()), arg_lvs.size());
297  for (size_t i = 0; i < arg_lvs.size(); i++) {
298  auto arr = arg_lvs[i++];
299  auto size = arg_lvs[i];
300  orig_arg_lvs.push_back(arr);
301  const_arr_size[arr] = size;
302  }
303  } else if (arg_ti.is_geometry()) {
304  CHECK_EQ(static_cast<size_t>(arg_ti.get_physical_coord_cols()), arg_lvs.size());
305  for (size_t j = 0; j < arg_lvs.size(); j++) {
306  orig_arg_lvs.push_back(arg_lvs[j]);
307  }
308  } else if (arg_ti.is_bytes()) {
309  CHECK_EQ(size_t(3), arg_lvs.size());
310  /* arg_lvs contains:
311  c = string_decode(&col_buf0, pos)
312  ptr = extract_str_ptr(c)
313  sz = extract_str_len(c)
314  */
315  for (size_t j = 0; j < arg_lvs.size(); j++) {
316  orig_arg_lvs.push_back(arg_lvs[j]);
317  }
318  } else {
319  if (arg_lvs.size() > 1) {
320  CHECK(arg_ti.is_array());
321  CHECK_EQ(size_t(2), arg_lvs.size());
322  const_arr_size[arg_lvs.front()] = arg_lvs.back();
323  } else {
324  CHECK_EQ(size_t(1), arg_lvs.size());
325  /* arg_lvs contains:
326  &col_buf1
327  */
328  if (is_local_alloc && arg_ti.get_size() > 0) {
329  const_arr_size[arg_lvs.front()] = cgen_state_->llInt(arg_ti.get_size());
330  }
331  }
332  orig_arg_lvs.push_back(arg_lvs.front());
333  }
334  }
335  // The extension function implementations don't handle NULL, they work under
336  // the assumption that the inputs are validated before calling them. Generate
337  // code to do the check at the call site: if any argument is NULL, return NULL
338  // without calling the function at all.
339  const auto [bbs, null_buffer_ptr] = beginArgsNullcheck(function_oper, orig_arg_lvs);
340  CHECK_GE(orig_arg_lvs.size(), function_oper->getArity());
341  // Arguments must be converted to the types the extension function can handle.
343  function_oper, &ext_func_sig, orig_arg_lvs, orig_arg_lvs_index, const_arr_size, co);
344 
345  llvm::Value* buffer_ret{nullptr};
346  if (ret_ti.is_buffer()) {
347  // codegen buffer return as first arg
348  CHECK(ret_ti.is_array() || ret_ti.is_bytes());
349  ret_ty = llvm::Type::getVoidTy(cgen_state_->context_);
350  const auto struct_ty = get_buffer_struct_type(
351  cgen_state_,
352  function_oper->getName(),
353  0,
355  /* has_is_null = */ ret_ti.is_array() || ret_ti.is_bytes());
356  buffer_ret = cgen_state_->ir_builder_.CreateAlloca(struct_ty);
357  args.insert(args.begin(), buffer_ret);
358  }
359 
360  const auto ext_call = cgen_state_->emitExternalCall(
361  ext_func_sig.getName(), ret_ty, args, {}, ret_ti.is_buffer());
362  auto ext_call_nullcheck = endArgsNullcheck(
363  bbs, ret_ti.is_buffer() ? buffer_ret : ext_call, null_buffer_ptr, function_oper);
364 
365  // Cast the return of the extension function to match the FunctionOper
366  if (!(ret_ti.is_buffer())) {
367  const auto extension_ret_ti = get_sql_type_from_llvm_type(ret_ty);
368  if (bbs.args_null_bb &&
369  extension_ret_ti.get_type() != function_oper->get_type_info().get_type() &&
370  // Skip i1-->i8 casts for ST_ functions.
371  // function_oper ret type is i1, extension ret type is 'upgraded' to i8
372  // during type deserialization to 'handle' NULL returns, hence i1-->i8.
373  // ST_ functions can't return NULLs, we just need to check arg nullness
374  // and if any args are NULL then ST_ function is not called
375  function_oper->getName().substr(0, 3) != std::string("ST_")) {
376  ext_call_nullcheck = codegenCast(ext_call_nullcheck,
377  extension_ret_ti,
378  function_oper->get_type_info(),
379  false,
380  co);
381  }
382  }
383 
384  cgen_state_->ext_call_cache_.push_back({function_oper, ext_call_nullcheck});
385  return ext_call_nullcheck;
386 }
387 
388 // Start the control flow needed for a call site check of NULL arguments.
389 std::tuple<CodeGenerator::ArgNullcheckBBs, llvm::Value*>
391  const std::vector<llvm::Value*>& orig_arg_lvs) {
393  llvm::BasicBlock* args_null_bb{nullptr};
394  llvm::BasicBlock* args_notnull_bb{nullptr};
395  llvm::BasicBlock* orig_bb = cgen_state_->ir_builder_.GetInsertBlock();
396  llvm::Value* null_array_alloca{nullptr};
397  // Only generate the check if required (at least one argument must be nullable).
398  if (ext_func_call_requires_nullcheck(function_oper)) {
399  const auto func_ti = function_oper->get_type_info();
400  if (func_ti.is_buffer()) {
401  const auto arr_struct_ty = get_buffer_struct_type(
402  cgen_state_,
403  function_oper->getName(),
404  0,
406  func_ti.is_array() || func_ti.is_bytes());
407  null_array_alloca = cgen_state_->ir_builder_.CreateAlloca(arr_struct_ty);
408  }
409  const auto args_notnull_lv = cgen_state_->ir_builder_.CreateNot(
410  codegenFunctionOperNullArg(function_oper, orig_arg_lvs));
411  args_notnull_bb = llvm::BasicBlock::Create(
412  cgen_state_->context_, "args_notnull", cgen_state_->current_func_);
413  args_null_bb = llvm::BasicBlock::Create(
415  cgen_state_->ir_builder_.CreateCondBr(args_notnull_lv, args_notnull_bb, args_null_bb);
416  cgen_state_->ir_builder_.SetInsertPoint(args_notnull_bb);
417  }
418  return std::make_tuple(
419  CodeGenerator::ArgNullcheckBBs{args_null_bb, args_notnull_bb, orig_bb},
420  null_array_alloca);
421 }
422 
423 // Wrap up the control flow needed for NULL argument handling.
425  const ArgNullcheckBBs& bbs,
426  llvm::Value* fn_ret_lv,
427  llvm::Value* null_array_ptr,
428  const Analyzer::FunctionOper* function_oper) {
430  if (bbs.args_null_bb) {
431  CHECK(bbs.args_notnull_bb);
432  cgen_state_->ir_builder_.CreateBr(bbs.args_null_bb);
433  cgen_state_->ir_builder_.SetInsertPoint(bbs.args_null_bb);
434 
435  llvm::PHINode* ext_call_phi{nullptr};
436  llvm::Value* null_lv{nullptr};
437  const auto func_ti = function_oper->get_type_info();
438  if (!func_ti.is_buffer()) {
439  // The pre-cast SQL equivalent of the type returned by the extension function.
440  const auto extension_ret_ti = get_sql_type_from_llvm_type(fn_ret_lv->getType());
441 
442  ext_call_phi = cgen_state_->ir_builder_.CreatePHI(
443  extension_ret_ti.is_fp()
444  ? get_fp_type(extension_ret_ti.get_size() * 8, cgen_state_->context_)
445  : get_int_type(extension_ret_ti.get_size() * 8, cgen_state_->context_),
446  2);
447 
448  null_lv =
449  extension_ret_ti.is_fp()
450  ? static_cast<llvm::Value*>(cgen_state_->inlineFpNull(extension_ret_ti))
451  : static_cast<llvm::Value*>(cgen_state_->inlineIntNull(extension_ret_ti));
452  } else {
453  const auto arr_struct_ty = get_buffer_struct_type(
454  cgen_state_,
455  function_oper->getName(),
456  0,
458  true);
459  ext_call_phi =
460  cgen_state_->ir_builder_.CreatePHI(llvm::PointerType::get(arr_struct_ty, 0), 2);
461 
462  CHECK(null_array_ptr);
463  const auto arr_null_bool =
464  cgen_state_->ir_builder_.CreateStructGEP(arr_struct_ty, null_array_ptr, 2);
465  cgen_state_->ir_builder_.CreateStore(
466  llvm::ConstantInt::get(get_int_type(8, cgen_state_->context_), 1),
467  arr_null_bool);
468 
469  const auto arr_null_size =
470  cgen_state_->ir_builder_.CreateStructGEP(arr_struct_ty, null_array_ptr, 1);
471  cgen_state_->ir_builder_.CreateStore(
472  llvm::ConstantInt::get(get_int_type(64, cgen_state_->context_), 0),
473  arr_null_size);
474  }
475  ext_call_phi->addIncoming(fn_ret_lv, bbs.args_notnull_bb);
476  ext_call_phi->addIncoming(func_ti.is_buffer() ? null_array_ptr : null_lv,
477  bbs.orig_bb);
478 
479  return ext_call_phi;
480  }
481  return fn_ret_lv;
482 }
483 
484 namespace {
485 
487  const auto& ret_ti = function_oper->get_type_info();
488  if (!ret_ti.is_integer() && !ret_ti.is_fp()) {
489  return true;
490  }
491  for (size_t i = 0; i < function_oper->getArity(); ++i) {
492  const auto arg = function_oper->getArg(i);
493  const auto& arg_ti = arg->get_type_info();
494  if (!arg_ti.is_integer() && !arg_ti.is_fp()) {
495  return true;
496  }
497  }
498  return false;
499 }
500 
501 } // namespace
502 
505  const CompilationOptions& co) {
507  if (call_requires_custom_type_handling(function_oper)) {
508  // Some functions need the return type to be the same as the input type.
509  if (function_oper->getName() == "FLOOR" || function_oper->getName() == "CEIL") {
510  CHECK_EQ(size_t(1), function_oper->getArity());
511  const auto arg = function_oper->getArg(0);
512  const auto& arg_ti = arg->get_type_info();
513  CHECK(arg_ti.is_decimal());
514  const auto arg_lvs = codegen(arg, true, co);
515  CHECK_EQ(size_t(1), arg_lvs.size());
516  const auto arg_lv = arg_lvs.front();
517  CHECK(arg_lv->getType()->isIntegerTy(64));
519  std::tie(bbs, std::ignore) = beginArgsNullcheck(function_oper, {arg_lvs});
520  const std::string func_name =
521  (function_oper->getName() == "FLOOR") ? "decimal_floor" : "decimal_ceil";
522  const auto covar_result_lv = cgen_state_->emitCall(
523  func_name, {arg_lv, cgen_state_->llInt(exp_to_scale(arg_ti.get_scale()))});
524  const auto ret_ti = function_oper->get_type_info();
525  CHECK(ret_ti.is_decimal());
526  CHECK_EQ(0, ret_ti.get_scale());
527  const auto result_lv = cgen_state_->ir_builder_.CreateSDiv(
528  covar_result_lv, cgen_state_->llInt(exp_to_scale(arg_ti.get_scale())));
529  return endArgsNullcheck(bbs, result_lv, nullptr, function_oper);
530  } else if (function_oper->getName() == "ROUND" &&
531  function_oper->getArg(0)->get_type_info().is_decimal()) {
532  CHECK_EQ(size_t(2), function_oper->getArity());
533 
534  const auto arg0 = function_oper->getArg(0);
535  const auto& arg0_ti = arg0->get_type_info();
536  const auto arg0_lvs = codegen(arg0, true, co);
537  CHECK_EQ(size_t(1), arg0_lvs.size());
538  const auto arg0_lv = arg0_lvs.front();
539  CHECK(arg0_lv->getType()->isIntegerTy(64));
540 
541  const auto arg1 = function_oper->getArg(1);
542  const auto& arg1_ti = arg1->get_type_info();
543  CHECK(arg1_ti.is_integer());
544  const auto arg1_lvs = codegen(arg1, true, co);
545  auto arg1_lv = arg1_lvs.front();
546  if (arg1_ti.get_type() != kINT) {
547  arg1_lv = codegenCast(arg1_lv, arg1_ti, SQLTypeInfo(kINT, true), false, co);
548  }
549 
551  std::tie(bbs0, std::ignore) =
552  beginArgsNullcheck(function_oper, {arg0_lv, arg1_lvs.front()});
553 
554  const std::string func_name = "Round__4";
555  const auto ret_ti = function_oper->get_type_info();
556  CHECK(ret_ti.is_decimal());
557  const auto result_lv = cgen_state_->emitExternalCall(
558  func_name,
560  {arg0_lv, arg1_lv, cgen_state_->llInt(arg0_ti.get_scale())});
561 
562  return endArgsNullcheck(bbs0, result_lv, nullptr, function_oper);
563  }
564  throw std::runtime_error("Type combination not supported for function " +
565  function_oper->getName());
566  }
567  return codegenFunctionOper(function_oper, co);
568 }
569 
570 // Generates code which returns true iff at least one of the arguments is NULL.
572  const Analyzer::FunctionOper* function_oper,
573  const std::vector<llvm::Value*>& orig_arg_lvs) {
575  llvm::Value* one_arg_null =
576  llvm::ConstantInt::get(llvm::IntegerType::getInt1Ty(cgen_state_->context_), false);
577  size_t physical_coord_cols = 0;
578  for (size_t i = 0, j = 0; i < function_oper->getArity();
579  ++i, j += std::max(size_t(1), physical_coord_cols)) {
580  const auto arg = function_oper->getArg(i);
581  const auto& arg_ti = arg->get_type_info();
582  physical_coord_cols = arg_ti.get_physical_coord_cols();
583  if (arg_ti.get_notnull()) {
584  continue;
585  }
586 #ifdef ENABLE_GEOS
587  // If geo arg is coming from geos, skip the null check, assume it's a valid geo
588  if (arg_ti.is_geometry()) {
589  auto* coords_load = llvm::dyn_cast<llvm::LoadInst>(orig_arg_lvs[i]);
590  if (coords_load) {
591  continue;
592  }
593  }
594 #endif
595  if (arg_ti.is_buffer() || arg_ti.is_geometry()) {
596  // POINT [un]compressed coord check requires custom checker and chunk iterator
597  // Non-POINT NULL geographies will have a normally encoded null coord array
598  auto fname =
599  (arg_ti.get_type() == kPOINT) ? "point_coord_array_is_null" : "array_is_null";
600  auto is_null_lv = cgen_state_->emitExternalCall(
601  fname, get_int_type(1, cgen_state_->context_), {orig_arg_lvs[j], posArg(arg)});
602  one_arg_null = cgen_state_->ir_builder_.CreateOr(one_arg_null, is_null_lv);
603  continue;
604  }
605  CHECK(arg_ti.is_number() or arg_ti.is_boolean());
606  one_arg_null = cgen_state_->ir_builder_.CreateOr(
607  one_arg_null, codegenIsNullNumber(orig_arg_lvs[j], arg_ti));
608  }
609  return one_arg_null;
610 }
611 
612 llvm::Value* CodeGenerator::codegenCompression(const SQLTypeInfo& type_info) {
614  int32_t compression = (type_info.get_compression() == kENCODING_GEOINT &&
615  type_info.get_comp_param() == 32)
616  ? 1
617  : 0;
618 
619  return cgen_state_->llInt(compression);
620 }
621 
622 std::pair<llvm::Value*, llvm::Value*> CodeGenerator::codegenArrayBuff(
623  llvm::Value* chunk,
624  llvm::Value* row_pos,
625  SQLTypes array_type,
626  bool cast_and_extend) {
628  const auto elem_ti =
629  SQLTypeInfo(
630  SQLTypes::kARRAY, 0, 0, false, EncodingType::kENCODING_NONE, 0, array_type)
631  .get_elem_type();
632 
633  auto buff = cgen_state_->emitExternalCall(
634  "array_buff", llvm::Type::getInt32PtrTy(cgen_state_->context_), {chunk, row_pos});
635 
636  auto len = cgen_state_->emitExternalCall(
637  "array_size",
638  get_int_type(32, cgen_state_->context_),
639  {chunk, row_pos, cgen_state_->llInt(log2_bytes(elem_ti.get_logical_size()))});
640 
641  if (cast_and_extend) {
642  buff = castArrayPointer(buff, elem_ti);
643  len =
644  cgen_state_->ir_builder_.CreateZExt(len, get_int_type(64, cgen_state_->context_));
645  }
646 
647  return std::make_pair(buff, len);
648 }
649 
650 void CodeGenerator::codegenBufferArgs(const std::string& ext_func_name,
651  size_t param_num,
652  llvm::Value* buffer_buf,
653  llvm::Value* buffer_size,
654  llvm::Value* buffer_null,
655  std::vector<llvm::Value*>& output_args) {
657  CHECK(buffer_buf);
658  CHECK(buffer_size);
659 
660  auto buffer_abstraction = get_buffer_struct_type(
661  cgen_state_, ext_func_name, param_num, buffer_buf->getType(), !!(buffer_null));
662  auto alloc_mem = cgen_state_->ir_builder_.CreateAlloca(buffer_abstraction);
663 
664  auto buffer_buf_ptr =
665  cgen_state_->ir_builder_.CreateStructGEP(buffer_abstraction, alloc_mem, 0);
666  cgen_state_->ir_builder_.CreateStore(buffer_buf, buffer_buf_ptr);
667 
668  auto buffer_size_ptr =
669  cgen_state_->ir_builder_.CreateStructGEP(buffer_abstraction, alloc_mem, 1);
670  cgen_state_->ir_builder_.CreateStore(buffer_size, buffer_size_ptr);
671 
672  if (buffer_null) {
673  auto bool_extended_type = llvm::Type::getInt8Ty(cgen_state_->context_);
674  auto buffer_null_extended =
675  cgen_state_->ir_builder_.CreateZExt(buffer_null, bool_extended_type);
676  auto buffer_is_null_ptr =
677  cgen_state_->ir_builder_.CreateStructGEP(buffer_abstraction, alloc_mem, 2);
678  cgen_state_->ir_builder_.CreateStore(buffer_null_extended, buffer_is_null_ptr);
679  }
680  output_args.push_back(alloc_mem);
681 }
682 
683 llvm::StructType* CodeGenerator::createPointStructType(const std::string& udf_func_name,
684  size_t param_num) {
685  llvm::Module* module_for_lookup = cgen_state_->module_;
686  llvm::Function* udf_func = module_for_lookup->getFunction(udf_func_name);
687 
688  llvm::StructType* generated_struct_type =
689  llvm::StructType::get(cgen_state_->context_,
690  {llvm::Type::getInt8PtrTy(cgen_state_->context_),
691  llvm::Type::getInt64Ty(cgen_state_->context_),
692  llvm::Type::getInt32Ty(cgen_state_->context_),
693  llvm::Type::getInt32Ty(cgen_state_->context_),
694  llvm::Type::getInt32Ty(cgen_state_->context_)},
695  false);
696 
697  if (udf_func) {
698  llvm::FunctionType* udf_func_type = udf_func->getFunctionType();
699  CHECK(param_num < udf_func_type->getNumParams());
700  llvm::Type* param_pointer_type = udf_func_type->getParamType(param_num);
701  CHECK(param_pointer_type->isPointerTy());
702  llvm::Type* param_type = param_pointer_type->getPointerElementType();
703  CHECK(param_type->isStructTy());
704  llvm::StructType* struct_type = llvm::cast<llvm::StructType>(param_type);
705  CHECK(struct_type->getStructNumElements() == 5) << serialize_llvm_object(struct_type);
706  const auto expected_elems = generated_struct_type->elements();
707  const auto current_elems = struct_type->elements();
708  for (size_t i = 0; i < expected_elems.size(); i++) {
709  CHECK_EQ(expected_elems[i], current_elems[i]);
710  }
711  if (struct_type->isLiteral()) {
712  return struct_type;
713  }
714 
715  llvm::StringRef struct_name = struct_type->getStructName();
716  llvm::StructType* point_type = module_for_lookup->getTypeByName(struct_name);
717  CHECK(point_type);
718 
719  return (point_type);
720  }
721  return generated_struct_type;
722 }
723 
724 void CodeGenerator::codegenGeoPointArgs(const std::string& udf_func_name,
725  size_t param_num,
726  llvm::Value* point_buf,
727  llvm::Value* point_size,
728  llvm::Value* compression,
729  llvm::Value* input_srid,
730  llvm::Value* output_srid,
731  std::vector<llvm::Value*>& output_args) {
733  CHECK(point_buf);
734  CHECK(point_size);
735  CHECK(compression);
736  CHECK(input_srid);
737  CHECK(output_srid);
738 
739  auto point_abstraction = createPointStructType(udf_func_name, param_num);
740  auto alloc_mem = cgen_state_->ir_builder_.CreateAlloca(point_abstraction, nullptr);
741 
742  auto point_buf_ptr =
743  cgen_state_->ir_builder_.CreateStructGEP(point_abstraction, alloc_mem, 0);
744  cgen_state_->ir_builder_.CreateStore(point_buf, point_buf_ptr);
745 
746  auto point_size_ptr =
747  cgen_state_->ir_builder_.CreateStructGEP(point_abstraction, alloc_mem, 1);
748  cgen_state_->ir_builder_.CreateStore(point_size, point_size_ptr);
749 
750  auto point_compression_ptr =
751  cgen_state_->ir_builder_.CreateStructGEP(point_abstraction, alloc_mem, 2);
752  cgen_state_->ir_builder_.CreateStore(compression, point_compression_ptr);
753 
754  auto input_srid_ptr =
755  cgen_state_->ir_builder_.CreateStructGEP(point_abstraction, alloc_mem, 3);
756  cgen_state_->ir_builder_.CreateStore(input_srid, input_srid_ptr);
757 
758  auto output_srid_ptr =
759  cgen_state_->ir_builder_.CreateStructGEP(point_abstraction, alloc_mem, 4);
760  cgen_state_->ir_builder_.CreateStore(output_srid, output_srid_ptr);
761 
762  output_args.push_back(alloc_mem);
763 }
764 
766  const std::string& udf_func_name,
767  size_t param_num) {
768  llvm::Module* module_for_lookup = cgen_state_->module_;
769  llvm::Function* udf_func = module_for_lookup->getFunction(udf_func_name);
770 
771  llvm::StructType* generated_struct_type =
772  llvm::StructType::get(cgen_state_->context_,
773  {llvm::Type::getInt8PtrTy(cgen_state_->context_),
774  llvm::Type::getInt64Ty(cgen_state_->context_),
775  llvm::Type::getInt32Ty(cgen_state_->context_),
776  llvm::Type::getInt32Ty(cgen_state_->context_),
777  llvm::Type::getInt32Ty(cgen_state_->context_)},
778  false);
779 
780  if (udf_func) {
781  llvm::FunctionType* udf_func_type = udf_func->getFunctionType();
782  CHECK(param_num < udf_func_type->getNumParams());
783  llvm::Type* param_pointer_type = udf_func_type->getParamType(param_num);
784  CHECK(param_pointer_type->isPointerTy());
785  llvm::Type* param_type = param_pointer_type->getPointerElementType();
786  CHECK(param_type->isStructTy());
787  llvm::StructType* struct_type = llvm::cast<llvm::StructType>(param_type);
788  CHECK(struct_type->isStructTy());
789  CHECK(struct_type->getStructNumElements() == 5);
790 
791  const auto expected_elems = generated_struct_type->elements();
792  const auto current_elems = struct_type->elements();
793  for (size_t i = 0; i < expected_elems.size(); i++) {
794  CHECK_EQ(expected_elems[i], current_elems[i]);
795  }
796  if (struct_type->isLiteral()) {
797  return struct_type;
798  }
799 
800  llvm::StringRef struct_name = struct_type->getStructName();
801  llvm::StructType* line_string_type = module_for_lookup->getTypeByName(struct_name);
802  CHECK(line_string_type);
803 
804  return (line_string_type);
805  }
806  return generated_struct_type;
807 }
808 
809 void CodeGenerator::codegenGeoLineStringArgs(const std::string& udf_func_name,
810  size_t param_num,
811  llvm::Value* line_string_buf,
812  llvm::Value* line_string_size,
813  llvm::Value* compression,
814  llvm::Value* input_srid,
815  llvm::Value* output_srid,
816  std::vector<llvm::Value*>& output_args) {
818  CHECK(line_string_buf);
819  CHECK(line_string_size);
820  CHECK(compression);
821  CHECK(input_srid);
822  CHECK(output_srid);
823 
824  auto line_string_abstraction = createLineStringStructType(udf_func_name, param_num);
825  auto alloc_mem =
826  cgen_state_->ir_builder_.CreateAlloca(line_string_abstraction, nullptr);
827 
828  auto line_string_buf_ptr =
829  cgen_state_->ir_builder_.CreateStructGEP(line_string_abstraction, alloc_mem, 0);
830  cgen_state_->ir_builder_.CreateStore(line_string_buf, line_string_buf_ptr);
831 
832  auto line_string_size_ptr =
833  cgen_state_->ir_builder_.CreateStructGEP(line_string_abstraction, alloc_mem, 1);
834  cgen_state_->ir_builder_.CreateStore(line_string_size, line_string_size_ptr);
835 
836  auto line_string_compression_ptr =
837  cgen_state_->ir_builder_.CreateStructGEP(line_string_abstraction, alloc_mem, 2);
838  cgen_state_->ir_builder_.CreateStore(compression, line_string_compression_ptr);
839 
840  auto input_srid_ptr =
841  cgen_state_->ir_builder_.CreateStructGEP(line_string_abstraction, alloc_mem, 3);
842  cgen_state_->ir_builder_.CreateStore(input_srid, input_srid_ptr);
843 
844  auto output_srid_ptr =
845  cgen_state_->ir_builder_.CreateStructGEP(line_string_abstraction, alloc_mem, 4);
846  cgen_state_->ir_builder_.CreateStore(output_srid, output_srid_ptr);
847 
848  output_args.push_back(alloc_mem);
849 }
850 
851 llvm::StructType* CodeGenerator::createPolygonStructType(const std::string& udf_func_name,
852  size_t param_num) {
853  llvm::Module* module_for_lookup = cgen_state_->module_;
854  llvm::Function* udf_func = module_for_lookup->getFunction(udf_func_name);
855 
856  llvm::StructType* generated_struct_type =
857  llvm::StructType::get(cgen_state_->context_,
858  {llvm::Type::getInt8PtrTy(cgen_state_->context_),
859  llvm::Type::getInt64Ty(cgen_state_->context_),
860  llvm::Type::getInt32PtrTy(cgen_state_->context_),
861  llvm::Type::getInt64Ty(cgen_state_->context_),
862  llvm::Type::getInt32Ty(cgen_state_->context_),
863  llvm::Type::getInt32Ty(cgen_state_->context_),
864  llvm::Type::getInt32Ty(cgen_state_->context_)},
865  false);
866 
867  if (udf_func) {
868  llvm::FunctionType* udf_func_type = udf_func->getFunctionType();
869  CHECK(param_num < udf_func_type->getNumParams());
870  llvm::Type* param_pointer_type = udf_func_type->getParamType(param_num);
871  CHECK(param_pointer_type->isPointerTy());
872  llvm::Type* param_type = param_pointer_type->getPointerElementType();
873  CHECK(param_type->isStructTy());
874  llvm::StructType* struct_type = llvm::cast<llvm::StructType>(param_type);
875 
876  CHECK(struct_type->isStructTy());
877  CHECK(struct_type->getStructNumElements() == 7);
878 
879  const auto expected_elems = generated_struct_type->elements();
880  const auto current_elems = struct_type->elements();
881  for (size_t i = 0; i < expected_elems.size(); i++) {
882  CHECK_EQ(expected_elems[i], current_elems[i]);
883  }
884  if (struct_type->isLiteral()) {
885  return struct_type;
886  }
887 
888  llvm::StringRef struct_name = struct_type->getStructName();
889 
890  llvm::StructType* polygon_type = module_for_lookup->getTypeByName(struct_name);
891  CHECK(polygon_type);
892 
893  return (polygon_type);
894  }
895  return generated_struct_type;
896 }
897 
898 void CodeGenerator::codegenGeoPolygonArgs(const std::string& udf_func_name,
899  size_t param_num,
900  llvm::Value* polygon_buf,
901  llvm::Value* polygon_size,
902  llvm::Value* ring_sizes_buf,
903  llvm::Value* num_rings,
904  llvm::Value* compression,
905  llvm::Value* input_srid,
906  llvm::Value* output_srid,
907  std::vector<llvm::Value*>& output_args) {
909  CHECK(polygon_buf);
910  CHECK(polygon_size);
911  CHECK(ring_sizes_buf);
912  CHECK(num_rings);
913  CHECK(compression);
914  CHECK(input_srid);
915  CHECK(output_srid);
916 
917  auto polygon_abstraction = createPolygonStructType(udf_func_name, param_num);
918  auto alloc_mem = cgen_state_->ir_builder_.CreateAlloca(polygon_abstraction, nullptr);
919 
920  auto polygon_buf_ptr =
921  cgen_state_->ir_builder_.CreateStructGEP(polygon_abstraction, alloc_mem, 0);
922  cgen_state_->ir_builder_.CreateStore(polygon_buf, polygon_buf_ptr);
923 
924  auto polygon_size_ptr =
925  cgen_state_->ir_builder_.CreateStructGEP(polygon_abstraction, alloc_mem, 1);
926  cgen_state_->ir_builder_.CreateStore(polygon_size, polygon_size_ptr);
927 
928  auto ring_sizes_buf_ptr =
929  cgen_state_->ir_builder_.CreateStructGEP(polygon_abstraction, alloc_mem, 2);
930  cgen_state_->ir_builder_.CreateStore(ring_sizes_buf, ring_sizes_buf_ptr);
931 
932  auto ring_size_ptr =
933  cgen_state_->ir_builder_.CreateStructGEP(polygon_abstraction, alloc_mem, 3);
934  cgen_state_->ir_builder_.CreateStore(num_rings, ring_size_ptr);
935 
936  auto polygon_compression_ptr =
937  cgen_state_->ir_builder_.CreateStructGEP(polygon_abstraction, alloc_mem, 4);
938  cgen_state_->ir_builder_.CreateStore(compression, polygon_compression_ptr);
939 
940  auto input_srid_ptr =
941  cgen_state_->ir_builder_.CreateStructGEP(polygon_abstraction, alloc_mem, 5);
942  cgen_state_->ir_builder_.CreateStore(input_srid, input_srid_ptr);
943 
944  auto output_srid_ptr =
945  cgen_state_->ir_builder_.CreateStructGEP(polygon_abstraction, alloc_mem, 6);
946  cgen_state_->ir_builder_.CreateStore(output_srid, output_srid_ptr);
947 
948  output_args.push_back(alloc_mem);
949 }
950 
952  const std::string& udf_func_name,
953  size_t param_num) {
954  llvm::Function* udf_func = cgen_state_->module_->getFunction(udf_func_name);
955  llvm::Module* module_for_lookup = cgen_state_->module_;
956 
957  llvm::StructType* generated_struct_type =
958  llvm::StructType::get(cgen_state_->context_,
959  {llvm::Type::getInt8PtrTy(cgen_state_->context_),
960  llvm::Type::getInt64Ty(cgen_state_->context_),
961  llvm::Type::getInt32PtrTy(cgen_state_->context_),
962  llvm::Type::getInt64Ty(cgen_state_->context_),
963  llvm::Type::getInt32PtrTy(cgen_state_->context_),
964  llvm::Type::getInt64Ty(cgen_state_->context_),
965  llvm::Type::getInt32Ty(cgen_state_->context_),
966  llvm::Type::getInt32Ty(cgen_state_->context_),
967  llvm::Type::getInt32Ty(cgen_state_->context_)},
968  false);
969 
970  if (udf_func) {
971  llvm::FunctionType* udf_func_type = udf_func->getFunctionType();
972  CHECK(param_num < udf_func_type->getNumParams());
973  llvm::Type* param_pointer_type = udf_func_type->getParamType(param_num);
974  CHECK(param_pointer_type->isPointerTy());
975  llvm::Type* param_type = param_pointer_type->getPointerElementType();
976  CHECK(param_type->isStructTy());
977  llvm::StructType* struct_type = llvm::cast<llvm::StructType>(param_type);
978  CHECK(struct_type->isStructTy());
979  CHECK(struct_type->getStructNumElements() == 9);
980  const auto expected_elems = generated_struct_type->elements();
981  const auto current_elems = struct_type->elements();
982  for (size_t i = 0; i < expected_elems.size(); i++) {
983  CHECK_EQ(expected_elems[i], current_elems[i]);
984  }
985  if (struct_type->isLiteral()) {
986  return struct_type;
987  }
988  llvm::StringRef struct_name = struct_type->getStructName();
989 
990  llvm::StructType* polygon_type = module_for_lookup->getTypeByName(struct_name);
991  CHECK(polygon_type);
992 
993  return (polygon_type);
994  }
995  return generated_struct_type;
996 }
997 
998 void CodeGenerator::codegenGeoMultiPolygonArgs(const std::string& udf_func_name,
999  size_t param_num,
1000  llvm::Value* polygon_coords,
1001  llvm::Value* polygon_coords_size,
1002  llvm::Value* ring_sizes_buf,
1003  llvm::Value* ring_sizes,
1004  llvm::Value* polygon_bounds,
1005  llvm::Value* polygon_bounds_sizes,
1006  llvm::Value* compression,
1007  llvm::Value* input_srid,
1008  llvm::Value* output_srid,
1009  std::vector<llvm::Value*>& output_args) {
1011  CHECK(polygon_coords);
1012  CHECK(polygon_coords_size);
1013  CHECK(ring_sizes_buf);
1014  CHECK(ring_sizes);
1015  CHECK(polygon_bounds);
1016  CHECK(polygon_bounds_sizes);
1017  CHECK(compression);
1018  CHECK(input_srid);
1019  CHECK(output_srid);
1020 
1021  auto multi_polygon_abstraction = createMultiPolygonStructType(udf_func_name, param_num);
1022  auto alloc_mem =
1023  cgen_state_->ir_builder_.CreateAlloca(multi_polygon_abstraction, nullptr);
1024 
1025  auto polygon_coords_ptr =
1026  cgen_state_->ir_builder_.CreateStructGEP(multi_polygon_abstraction, alloc_mem, 0);
1027  cgen_state_->ir_builder_.CreateStore(polygon_coords, polygon_coords_ptr);
1028 
1029  auto polygon_coords_size_ptr =
1030  cgen_state_->ir_builder_.CreateStructGEP(multi_polygon_abstraction, alloc_mem, 1);
1031  cgen_state_->ir_builder_.CreateStore(polygon_coords_size, polygon_coords_size_ptr);
1032 
1033  auto ring_sizes_buf_ptr =
1034  cgen_state_->ir_builder_.CreateStructGEP(multi_polygon_abstraction, alloc_mem, 2);
1035  cgen_state_->ir_builder_.CreateStore(ring_sizes_buf, ring_sizes_buf_ptr);
1036 
1037  auto ring_sizes_ptr =
1038  cgen_state_->ir_builder_.CreateStructGEP(multi_polygon_abstraction, alloc_mem, 3);
1039  cgen_state_->ir_builder_.CreateStore(ring_sizes, ring_sizes_ptr);
1040 
1041  auto polygon_bounds_buf_ptr =
1042  cgen_state_->ir_builder_.CreateStructGEP(multi_polygon_abstraction, alloc_mem, 4);
1043  cgen_state_->ir_builder_.CreateStore(polygon_bounds, polygon_bounds_buf_ptr);
1044 
1045  auto polygon_bounds_sizes_ptr =
1046  cgen_state_->ir_builder_.CreateStructGEP(multi_polygon_abstraction, alloc_mem, 5);
1047  cgen_state_->ir_builder_.CreateStore(polygon_bounds_sizes, polygon_bounds_sizes_ptr);
1048 
1049  auto polygon_compression_ptr =
1050  cgen_state_->ir_builder_.CreateStructGEP(multi_polygon_abstraction, alloc_mem, 6);
1051  cgen_state_->ir_builder_.CreateStore(compression, polygon_compression_ptr);
1052 
1053  auto input_srid_ptr =
1054  cgen_state_->ir_builder_.CreateStructGEP(multi_polygon_abstraction, alloc_mem, 7);
1055  cgen_state_->ir_builder_.CreateStore(input_srid, input_srid_ptr);
1056 
1057  auto output_srid_ptr =
1058  cgen_state_->ir_builder_.CreateStructGEP(multi_polygon_abstraction, alloc_mem, 8);
1059  cgen_state_->ir_builder_.CreateStore(output_srid, output_srid_ptr);
1060 
1061  output_args.push_back(alloc_mem);
1062 }
1063 
1064 // Generate CAST operations for arguments in `orig_arg_lvs` to the types required by
1065 // `ext_func_sig`.
1067  const Analyzer::FunctionOper* function_oper,
1068  const ExtensionFunction* ext_func_sig,
1069  const std::vector<llvm::Value*>& orig_arg_lvs,
1070  const std::vector<size_t>& orig_arg_lvs_index,
1071  const std::unordered_map<llvm::Value*, llvm::Value*>& const_arr_size,
1072  const CompilationOptions& co) {
1074  CHECK(ext_func_sig);
1075  const auto& ext_func_args = ext_func_sig->getArgs();
1076  CHECK_LE(function_oper->getArity(), ext_func_args.size());
1077  const auto func_ti = function_oper->get_type_info();
1078  std::vector<llvm::Value*> args;
1079  /*
1080  i: argument in RA for the function operand
1081  j: extra offset in ext_func_args
1082  k: origin_arg_lvs counter, equal to orig_arg_lvs_index[i]
1083  ij: ext_func_args counter, equal to i + j
1084  dj: offset when UDF implementation first argument corresponds to return value
1085  */
1086  for (size_t i = 0, j = 0, dj = (func_ti.is_buffer() ? 1 : 0);
1087  i < function_oper->getArity();
1088  ++i) {
1089  size_t k = orig_arg_lvs_index[i];
1090  size_t ij = i + j;
1091  const auto arg = function_oper->getArg(i);
1092  const auto ext_func_arg = ext_func_args[ij];
1093  const auto& arg_ti = arg->get_type_info();
1094  llvm::Value* arg_lv{nullptr};
1095  if (arg_ti.is_bytes()) {
1096  CHECK(ext_func_arg == ExtArgumentType::TextEncodingNone)
1097  << ::toString(ext_func_arg);
1098  const auto ptr_lv = orig_arg_lvs[k + 1];
1099  const auto len_lv = orig_arg_lvs[k + 2];
1100  auto& builder = cgen_state_->ir_builder_;
1101  auto string_buf_arg = builder.CreatePointerCast(
1102  ptr_lv, llvm::Type::getInt8PtrTy(cgen_state_->context_));
1103  auto string_size_arg =
1104  builder.CreateZExt(len_lv, get_int_type(64, cgen_state_->context_));
1105  codegenBufferArgs(ext_func_sig->getName(),
1106  ij + dj,
1107  string_buf_arg,
1108  string_size_arg,
1109  nullptr,
1110  args);
1111  } else if (arg_ti.is_array()) {
1112  bool const_arr = (const_arr_size.count(orig_arg_lvs[k]) > 0);
1113  const auto elem_ti = arg_ti.get_elem_type();
1114  // TODO: switch to fast fixlen variants
1115  const auto ptr_lv = (const_arr)
1116  ? orig_arg_lvs[k]
1118  "array_buff",
1119  llvm::Type::getInt8PtrTy(cgen_state_->context_),
1120  {orig_arg_lvs[k], posArg(arg)});
1121  const auto len_lv =
1122  (const_arr) ? const_arr_size.at(orig_arg_lvs[k])
1124  "array_size",
1126  {orig_arg_lvs[k],
1127  posArg(arg),
1128  cgen_state_->llInt(log2_bytes(elem_ti.get_logical_size()))});
1129 
1130  if (is_ext_arg_type_pointer(ext_func_arg)) {
1131  args.push_back(castArrayPointer(ptr_lv, elem_ti));
1132  args.push_back(cgen_state_->ir_builder_.CreateZExt(
1133  len_lv, get_int_type(64, cgen_state_->context_)));
1134  j++;
1135  } else if (is_ext_arg_type_array(ext_func_arg)) {
1136  auto array_buf_arg = castArrayPointer(ptr_lv, elem_ti);
1137  auto& builder = cgen_state_->ir_builder_;
1138  auto array_size_arg =
1139  builder.CreateZExt(len_lv, get_int_type(64, cgen_state_->context_));
1140  auto array_null_arg =
1141  cgen_state_->emitExternalCall("array_is_null",
1143  {orig_arg_lvs[k], posArg(arg)});
1144  codegenBufferArgs(ext_func_sig->getName(),
1145  ij + dj,
1146  array_buf_arg,
1147  array_size_arg,
1148  array_null_arg,
1149  args);
1150  } else {
1151  UNREACHABLE();
1152  }
1153 
1154  } else if (arg_ti.is_geometry()) {
1155  // Coords
1156  bool const_arr = (const_arr_size.count(orig_arg_lvs[k]) > 0);
1157  // NOTE(adb): We're generating code to handle the TINYINT array only -- the actual
1158  // geo encoding (or lack thereof) does not matter here
1159  const auto elem_ti = SQLTypeInfo(SQLTypes::kARRAY,
1160  0,
1161  0,
1162  false,
1164  0,
1166  .get_elem_type();
1167  llvm::Value* ptr_lv;
1168  llvm::Value* len_lv;
1169  int32_t fixlen = -1;
1170  if (arg_ti.get_type() == kPOINT) {
1171  const auto col_var = dynamic_cast<const Analyzer::ColumnVar*>(arg);
1172  if (col_var) {
1173  const auto coords_cd = executor()->getPhysicalColumnDescriptor(col_var, 1);
1174  if (coords_cd && coords_cd->columnType.get_type() == kARRAY) {
1175  fixlen = coords_cd->columnType.get_size();
1176  }
1177  }
1178  }
1179  if (fixlen > 0) {
1180  ptr_lv =
1181  cgen_state_->emitExternalCall("fast_fixlen_array_buff",
1182  llvm::Type::getInt8PtrTy(cgen_state_->context_),
1183  {orig_arg_lvs[k], posArg(arg)});
1184  len_lv = cgen_state_->llInt(int64_t(fixlen));
1185  } else {
1186  // TODO: remove const_arr and related code if it's not needed
1187  ptr_lv = (const_arr) ? orig_arg_lvs[k]
1189  "array_buff",
1190  llvm::Type::getInt8PtrTy(cgen_state_->context_),
1191  {orig_arg_lvs[k], posArg(arg)});
1192  len_lv = (const_arr)
1193  ? const_arr_size.at(orig_arg_lvs[k])
1195  "array_size",
1197  {orig_arg_lvs[k],
1198  posArg(arg),
1199  cgen_state_->llInt(log2_bytes(elem_ti.get_logical_size()))});
1200  }
1201 
1202  if (is_ext_arg_type_geo(ext_func_arg)) {
1203  if (arg_ti.get_type() == kPOINT || arg_ti.get_type() == kLINESTRING) {
1204  auto array_buf_arg = castArrayPointer(ptr_lv, elem_ti);
1205  auto& builder = cgen_state_->ir_builder_;
1206  auto array_size_arg =
1207  builder.CreateZExt(len_lv, get_int_type(64, cgen_state_->context_));
1208  auto compression_val = codegenCompression(arg_ti);
1209  auto input_srid_val = cgen_state_->llInt(arg_ti.get_input_srid());
1210  auto output_srid_val = cgen_state_->llInt(arg_ti.get_output_srid());
1211 
1212  if (arg_ti.get_type() == kPOINT) {
1213  CHECK_EQ(k, ij);
1214  codegenGeoPointArgs(ext_func_sig->getName(),
1215  ij + dj,
1216  array_buf_arg,
1217  array_size_arg,
1218  compression_val,
1219  input_srid_val,
1220  output_srid_val,
1221  args);
1222  } else {
1223  CHECK_EQ(k, ij);
1224  codegenGeoLineStringArgs(ext_func_sig->getName(),
1225  ij + dj,
1226  array_buf_arg,
1227  array_size_arg,
1228  compression_val,
1229  input_srid_val,
1230  output_srid_val,
1231  args);
1232  }
1233  }
1234  } else {
1235  CHECK(ext_func_arg == ExtArgumentType::PInt8);
1236  args.push_back(castArrayPointer(ptr_lv, elem_ti));
1237  args.push_back(cgen_state_->ir_builder_.CreateZExt(
1238  len_lv, get_int_type(64, cgen_state_->context_)));
1239  j++;
1240  }
1241 
1242  switch (arg_ti.get_type()) {
1243  case kPOINT:
1244  case kLINESTRING:
1245  break;
1246  case kPOLYGON: {
1247  if (ext_func_arg == ExtArgumentType::GeoPolygon) {
1248  auto array_buf_arg = castArrayPointer(ptr_lv, elem_ti);
1249  auto& builder = cgen_state_->ir_builder_;
1250  auto array_size_arg =
1251  builder.CreateZExt(len_lv, get_int_type(64, cgen_state_->context_));
1252  auto compression_val = codegenCompression(arg_ti);
1253  auto input_srid_val = cgen_state_->llInt(arg_ti.get_input_srid());
1254  auto output_srid_val = cgen_state_->llInt(arg_ti.get_output_srid());
1255 
1256  auto [ring_size_buff, ring_size] =
1257  codegenArrayBuff(orig_arg_lvs[k + 1], posArg(arg), SQLTypes::kINT, true);
1258  CHECK_EQ(k, ij);
1259  codegenGeoPolygonArgs(ext_func_sig->getName(),
1260  ij + dj,
1261  array_buf_arg,
1262  array_size_arg,
1263  ring_size_buff,
1264  ring_size,
1265  compression_val,
1266  input_srid_val,
1267  output_srid_val,
1268  args);
1269  } else {
1270  CHECK(ext_func_arg == ExtArgumentType::PInt8);
1271  // Ring Sizes
1272  auto const_arr = const_arr_size.count(orig_arg_lvs[k + 1]) > 0;
1273  auto [ring_size_buff, ring_size] =
1274  (const_arr) ? std::make_pair(orig_arg_lvs[k + 1],
1275  const_arr_size.at(orig_arg_lvs[k + 1]))
1276  : codegenArrayBuff(
1277  orig_arg_lvs[k + 1], posArg(arg), SQLTypes::kINT, true);
1278  args.push_back(ring_size_buff);
1279  args.push_back(ring_size);
1280  j += 2;
1281  }
1282  break;
1283  }
1284  case kMULTIPOLYGON: {
1285  if (ext_func_arg == ExtArgumentType::GeoMultiPolygon) {
1286  auto array_buf_arg = castArrayPointer(ptr_lv, elem_ti);
1287  auto& builder = cgen_state_->ir_builder_;
1288  auto array_size_arg =
1289  builder.CreateZExt(len_lv, get_int_type(64, cgen_state_->context_));
1290  auto compression_val = codegenCompression(arg_ti);
1291  auto input_srid_val = cgen_state_->llInt(arg_ti.get_input_srid());
1292  auto output_srid_val = cgen_state_->llInt(arg_ti.get_output_srid());
1293 
1294  auto [ring_size_buff, ring_size] =
1295  codegenArrayBuff(orig_arg_lvs[k + 1], posArg(arg), SQLTypes::kINT, true);
1296 
1297  auto [poly_bounds_buff, poly_bounds_size] =
1298  codegenArrayBuff(orig_arg_lvs[k + 2], posArg(arg), SQLTypes::kINT, true);
1299  CHECK_EQ(k, ij);
1300  codegenGeoMultiPolygonArgs(ext_func_sig->getName(),
1301  ij + dj,
1302  array_buf_arg,
1303  array_size_arg,
1304  ring_size_buff,
1305  ring_size,
1306  poly_bounds_buff,
1307  poly_bounds_size,
1308  compression_val,
1309  input_srid_val,
1310  output_srid_val,
1311  args);
1312  } else {
1313  CHECK(ext_func_arg == ExtArgumentType::PInt8);
1314  // Ring Sizes
1315  {
1316  auto const_arr = const_arr_size.count(orig_arg_lvs[k + 1]) > 0;
1317  auto [ring_size_buff, ring_size] =
1318  (const_arr)
1319  ? std::make_pair(orig_arg_lvs[k + 1],
1320  const_arr_size.at(orig_arg_lvs[k + 1]))
1321  : codegenArrayBuff(
1322  orig_arg_lvs[k + 1], posArg(arg), SQLTypes::kINT, true);
1323 
1324  args.push_back(ring_size_buff);
1325  args.push_back(ring_size);
1326  }
1327  // Poly Rings
1328  {
1329  auto const_arr = const_arr_size.count(orig_arg_lvs[k + 2]) > 0;
1330  auto [poly_bounds_buff, poly_bounds_size] =
1331  (const_arr)
1332  ? std::make_pair(orig_arg_lvs[k + 2],
1333  const_arr_size.at(orig_arg_lvs[k + 2]))
1334  : codegenArrayBuff(
1335  orig_arg_lvs[k + 2], posArg(arg), SQLTypes::kINT, true);
1336 
1337  args.push_back(poly_bounds_buff);
1338  args.push_back(poly_bounds_size);
1339  }
1340  j += 4;
1341  }
1342  break;
1343  }
1344  default:
1345  CHECK(false);
1346  }
1347  } else {
1348  CHECK(is_ext_arg_type_scalar(ext_func_arg));
1349  const auto arg_target_ti = ext_arg_type_to_type_info(ext_func_arg);
1350  if (arg_ti.get_type() != arg_target_ti.get_type()) {
1351  arg_lv = codegenCast(orig_arg_lvs[k], arg_ti, arg_target_ti, false, co);
1352  } else {
1353  arg_lv = orig_arg_lvs[k];
1354  }
1355  CHECK_EQ(arg_lv->getType(),
1356  ext_arg_type_to_llvm_type(ext_func_arg, cgen_state_->context_));
1357  args.push_back(arg_lv);
1358  }
1359  }
1360  return args;
1361 }
1362 
1363 llvm::Value* CodeGenerator::castArrayPointer(llvm::Value* ptr,
1364  const SQLTypeInfo& elem_ti) {
1366  if (elem_ti.get_type() == kFLOAT) {
1367  return cgen_state_->ir_builder_.CreatePointerCast(
1368  ptr, llvm::Type::getFloatPtrTy(cgen_state_->context_));
1369  }
1370  if (elem_ti.get_type() == kDOUBLE) {
1371  return cgen_state_->ir_builder_.CreatePointerCast(
1372  ptr, llvm::Type::getDoublePtrTy(cgen_state_->context_));
1373  }
1374  CHECK(elem_ti.is_integer() || elem_ti.is_boolean() ||
1375  (elem_ti.is_string() && elem_ti.get_compression() == kENCODING_DICT));
1376  switch (elem_ti.get_size()) {
1377  case 1:
1378  return cgen_state_->ir_builder_.CreatePointerCast(
1379  ptr, llvm::Type::getInt8PtrTy(cgen_state_->context_));
1380  case 2:
1381  return cgen_state_->ir_builder_.CreatePointerCast(
1382  ptr, llvm::Type::getInt16PtrTy(cgen_state_->context_));
1383  case 4:
1384  return cgen_state_->ir_builder_.CreatePointerCast(
1385  ptr, llvm::Type::getInt32PtrTy(cgen_state_->context_));
1386  case 8:
1387  return cgen_state_->ir_builder_.CreatePointerCast(
1388  ptr, llvm::Type::getInt64PtrTy(cgen_state_->context_));
1389  default:
1390  CHECK(false);
1391  }
1392  return nullptr;
1393 }
llvm::StructType * createLineStringStructType(const std::string &udf_func_name, size_t param_num)
void codegenGeoMultiPolygonArgs(const std::string &udf_func_name, size_t param_num, llvm::Value *polygon_coords, llvm::Value *polygon_coords_size, llvm::Value *ring_sizes_buf, llvm::Value *ring_sizes, llvm::Value *polygon_bounds, llvm::Value *polygon_bounds_sizes, llvm::Value *compression, llvm::Value *input_srid, llvm::Value *output_srid, std::vector< llvm::Value * > &output_args)
#define CHECK_EQ(x, y)
Definition: Logger.h:205
llvm::StructType * get_buffer_struct_type(CgenState *cgen_state, const std::string &ext_func_name, size_t param_num, llvm::Type *elem_type, bool has_is_null)
HOST DEVICE int get_size() const
Definition: sqltypes.h:324
const std::vector< ExtArgumentType > & getArgs() const
std::string toString(const ExtArgumentType &sig_type)
llvm::BasicBlock * args_notnull_bb
size_t getArity() const
Definition: Analyzer.h:1360
SQLTypes
Definition: sqltypes.h:37
std::unique_ptr< llvm::Module > udf_gpu_module
bool is_ext_arg_type_geo(const ExtArgumentType ext_arg_type)
CgenState * cgen_state_
const ExtArgumentType getRet() const
void codegenGeoPolygonArgs(const std::string &udf_func_name, size_t param_num, llvm::Value *polygon_buf, llvm::Value *polygon_size, llvm::Value *ring_sizes_buf, llvm::Value *num_rings, llvm::Value *compression, llvm::Value *input_srid, llvm::Value *output_srid, std::vector< llvm::Value * > &output_args)
#define LOG(tag)
Definition: Logger.h:188
std::vector< llvm::Value * > codegenFunctionOperCastArgs(const Analyzer::FunctionOper *, const ExtensionFunction *, const std::vector< llvm::Value * > &, const std::vector< size_t > &, const std::unordered_map< llvm::Value *, llvm::Value * > &, const CompilationOptions &)
llvm::Value * emitExternalCall(const std::string &fname, llvm::Type *ret_type, const std::vector< llvm::Value * > args, const std::vector< llvm::Attribute::AttrKind > &fnattrs={}, const bool has_struct_return=false)
Definition: CgenState.h:222
llvm::Value * codegenFunctionOperNullArg(const Analyzer::FunctionOper *, const std::vector< llvm::Value * > &)
llvm::IRBuilder ir_builder_
Definition: CgenState.h:329
llvm::Value * posArg(const Analyzer::Expr *) const
Definition: ColumnIR.cpp:512
llvm::Value * castArrayPointer(llvm::Value *ptr, const SQLTypeInfo &elem_ti)
#define UNREACHABLE()
Definition: Logger.h:241
#define CHECK_GE(x, y)
Definition: Logger.h:210
Definition: sqldefs.h:49
llvm::Type * get_fp_type(const int width, llvm::LLVMContext &context)
llvm::StructType * createPointStructType(const std::string &udf_func_name, size_t param_num)
bool call_requires_custom_type_handling(const Analyzer::FunctionOper *function_oper)
const std::string getName(bool keep_suffix=true) const
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:314
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
bool ext_func_call_requires_nullcheck(const Analyzer::FunctionOper *function_oper)
SQLTypeInfo get_sql_type_from_llvm_type(const llvm::Type *ll_type)
std::vector< FunctionOperValue > ext_call_cache_
Definition: CgenState.h:335
void codegenBufferArgs(const std::string &udf_func_name, size_t param_num, llvm::Value *buffer_buf, llvm::Value *buffer_size, llvm::Value *buffer_is_null, std::vector< llvm::Value * > &output_args)
RUNTIME_EXPORT void register_buffer_with_executor_rsm(int64_t exec, int8_t *buffer)
std::pair< llvm::Value *, llvm::Value * > codegenArrayBuff(llvm::Value *chunk, llvm::Value *row_pos, SQLTypes array_type, bool cast_and_extend)
llvm::Module * module_
Definition: CgenState.h:318
llvm::LLVMContext & context_
Definition: CgenState.h:327
llvm::Function * current_func_
Definition: CgenState.h:321
std::tuple< ArgNullcheckBBs, llvm::Value * > beginArgsNullcheck(const Analyzer::FunctionOper *function_oper, const std::vector< llvm::Value * > &orig_arg_lvs)
bool is_integer() const
Definition: sqltypes.h:490
llvm::ConstantInt * inlineIntNull(const SQLTypeInfo &)
Definition: CgenState.cpp:28
llvm::Value * codegenFunctionOper(const Analyzer::FunctionOper *, const CompilationOptions &)
llvm::Type * get_llvm_type_from_sql_array_type(const SQLTypeInfo ti, llvm::LLVMContext &ctx)
bool is_boolean() const
Definition: sqltypes.h:495
llvm::BasicBlock * args_null_bb
#define AUTOMATIC_IR_METADATA(CGENSTATE)
llvm::Type * ext_arg_type_to_llvm_type(const ExtArgumentType ext_arg_type, llvm::LLVMContext &ctx)
std::tuple< T, std::vector< SQLTypeInfo > > bind_function(std::string name, Analyzer::ExpressionPtrVector func_args, const std::vector< T > &ext_funcs, const std::string processor)
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:78
llvm::Value * emitCall(const std::string &fname, const std::vector< llvm::Value * > &args)
Definition: CgenState.cpp:174
bool is_buffer() const
Definition: sqltypes.h:506
ExecutorDeviceType device_type
void codegenGeoPointArgs(const std::string &udf_func_name, size_t param_num, llvm::Value *point_buf, llvm::Value *point_size, llvm::Value *compression, llvm::Value *input_srid, llvm::Value *output_srid, std::vector< llvm::Value * > &output_args)
#define RUNTIME_EXPORT
bool is_ext_arg_type_array(const ExtArgumentType ext_arg_type)
std::vector< llvm::Value * > codegen(const Analyzer::Expr *, const bool fetch_columns, const CompilationOptions &)
Definition: IRCodegen.cpp:28
#define CHECK_LE(x, y)
Definition: Logger.h:208
HOST DEVICE EncodingType get_compression() const
Definition: sqltypes.h:322
std::string serialize_llvm_object(const T *llvm_obj)
bool is_ext_arg_type_scalar(const ExtArgumentType ext_arg_type)
llvm::StructType * createPolygonStructType(const std::string &udf_func_name, size_t param_num)
const Analyzer::Expr * getArg(const size_t i) const
Definition: Analyzer.h:1362
const Expr * get_operand() const
Definition: Analyzer.h:371
llvm::Value * endArgsNullcheck(const ArgNullcheckBBs &, llvm::Value *, llvm::Value *, const Analyzer::FunctionOper *)
std::unique_ptr< llvm::Module > udf_cpu_module
HOST DEVICE int get_comp_param() const
Definition: sqltypes.h:323
llvm::Value * codegenFunctionOperWithCustomTypeHandling(const Analyzer::FunctionOperWithCustomTypeHandling *, const CompilationOptions &)
bool is_bytes() const
Definition: sqltypes.h:503
llvm::ConstantInt * llInt(const T v) const
Definition: CgenState.h:300
#define CHECK(condition)
Definition: Logger.h:197
llvm::Value * codegenIsNullNumber(llvm::Value *, const SQLTypeInfo &)
Definition: LogicalIR.cpp:409
uint64_t exp_to_scale(const unsigned exp)
llvm::Value * codegenCompression(const SQLTypeInfo &type_info)
llvm::Value * codegenCast(const Analyzer::UOper *, const CompilationOptions &)
Definition: CastIR.cpp:20
uint32_t log2_bytes(const uint32_t bytes)
Definition: Execute.h:178
Definition: sqltypes.h:44
bool is_string() const
Definition: sqltypes.h:488
std::string getName() const
Definition: Analyzer.h:1358
void codegenGeoLineStringArgs(const std::string &udf_func_name, size_t param_num, llvm::Value *line_string_buf, llvm::Value *line_string_size, llvm::Value *compression, llvm::Value *input_srid, llvm::Value *output_srid, std::vector< llvm::Value * > &output_args)
SQLTypeInfo get_elem_type() const
Definition: sqltypes.h:712
bool is_decimal() const
Definition: sqltypes.h:491
int get_physical_coord_cols() const
Definition: sqltypes.h:350
SQLTypeInfo ext_arg_type_to_type_info(const ExtArgumentType ext_arg_type)
llvm::ConstantFP * inlineFpNull(const SQLTypeInfo &)
Definition: CgenState.cpp:66
bool is_ext_arg_type_pointer(const ExtArgumentType ext_arg_type)
Executor * executor() const
llvm::StructType * createMultiPolygonStructType(const std::string &udf_func_name, size_t param_num)