OmniSciDB  bf83d84833
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
ExtensionsIR.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2017 MapD Technologies, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "CodeGenerator.h"
18 #include "Execute.h"
19 #include "ExtensionFunctions.hpp"
23 
24 #include <tuple>
25 
26 extern std::unique_ptr<llvm::Module> udf_gpu_module;
27 extern std::unique_ptr<llvm::Module> udf_cpu_module;
28 
29 namespace {
30 
31 llvm::StructType* get_buffer_struct_type(CgenState* cgen_state,
32  const std::string& ext_func_name,
33  size_t param_num,
34  llvm::Type* elem_type,
35  bool has_is_null) {
36  CHECK(elem_type);
37  CHECK(elem_type->isPointerTy());
38  llvm::StructType* generated_struct_type =
39  (has_is_null ? llvm::StructType::get(cgen_state->context_,
40  {elem_type,
41  llvm::Type::getInt64Ty(cgen_state->context_),
42  llvm::Type::getInt8Ty(cgen_state->context_)},
43  false)
44  : llvm::StructType::get(
45  cgen_state->context_,
46  {elem_type, llvm::Type::getInt64Ty(cgen_state->context_)},
47  false));
48  llvm::Function* udf_func = cgen_state->module_->getFunction(ext_func_name);
49  if (udf_func) {
50  // Compare expected array struct type with type from the function
51  // definition from the UDF module, but use the type from the
52  // module
53  llvm::FunctionType* udf_func_type = udf_func->getFunctionType();
54  CHECK_LE(param_num, udf_func_type->getNumParams());
55  llvm::Type* param_pointer_type = udf_func_type->getParamType(param_num);
56  CHECK(param_pointer_type->isPointerTy());
57  llvm::Type* param_type = param_pointer_type->getPointerElementType();
58  CHECK(param_type->isStructTy());
59  llvm::StructType* struct_type = llvm::cast<llvm::StructType>(param_type);
60  CHECK_GE(struct_type->getStructNumElements(),
61  generated_struct_type->getStructNumElements())
62  << serialize_llvm_object(struct_type);
63 
64  const auto expected_elems = generated_struct_type->elements();
65  const auto current_elems = struct_type->elements();
66  for (size_t i = 0; i < expected_elems.size(); i++) {
67  CHECK_EQ(expected_elems[i], current_elems[i])
68  << "[" << ::toString(expected_elems[i]) << ", " << ::toString(current_elems[i])
69  << "]";
70  }
71 
72  if (struct_type->isLiteral()) {
73  return struct_type;
74  }
75 
76  llvm::StringRef struct_name = struct_type->getStructName();
77  return cgen_state->module_->getTypeByName(struct_name);
78  }
79  return generated_struct_type;
80 }
81 
83  llvm::LLVMContext& ctx) {
84  switch (ext_arg_type) {
85  case ExtArgumentType::Bool: // pass thru to Int8
87  return get_int_type(8, ctx);
89  return get_int_type(16, ctx);
91  return get_int_type(32, ctx);
93  return get_int_type(64, ctx);
95  return llvm::Type::getFloatTy(ctx);
97  return llvm::Type::getDoubleTy(ctx);
113  return llvm::Type::getVoidTy(ctx);
114  default:
115  CHECK(false);
116  }
117  CHECK(false);
118  return nullptr;
119 }
120 
122  CHECK(ll_type);
123  const auto bits = ll_type->getPrimitiveSizeInBits();
124 
125  if (ll_type->isFloatingPointTy()) {
126  switch (bits) {
127  case 32:
128  return SQLTypeInfo(kFLOAT, false);
129  case 64:
130  return SQLTypeInfo(kDOUBLE, false);
131  default:
132  LOG(FATAL) << "Unsupported llvm floating point type: " << bits
133  << ", only 32 and 64 bit floating point is supported.";
134  }
135  } else {
136  switch (bits) {
137  case 1:
138  return SQLTypeInfo(kBOOLEAN, false);
139  case 8:
140  return SQLTypeInfo(kTINYINT, false);
141  case 16:
142  return SQLTypeInfo(kSMALLINT, false);
143  case 32:
144  return SQLTypeInfo(kINT, false);
145  case 64:
146  return SQLTypeInfo(kBIGINT, false);
147  default:
148  LOG(FATAL) << "Unrecognized llvm type for SQL type: "
149  << bits; // TODO let's get the real name here
150  }
151  }
152  UNREACHABLE();
153  return SQLTypeInfo();
154 }
155 
157  llvm::LLVMContext& ctx) {
158  CHECK(ti.is_buffer());
159  if (ti.is_bytes()) {
160  return llvm::Type::getInt8PtrTy(ctx);
161  }
162 
163  const auto& elem_ti = ti.get_elem_type();
164  if (elem_ti.is_fp()) {
165  switch (elem_ti.get_size()) {
166  case 4:
167  return llvm::Type::getFloatPtrTy(ctx);
168  case 8:
169  return llvm::Type::getDoublePtrTy(ctx);
170  }
171  }
172 
173  if (elem_ti.is_boolean()) {
174  return llvm::Type::getInt8PtrTy(ctx);
175  }
176 
177  CHECK(elem_ti.is_integer());
178  switch (elem_ti.get_size()) {
179  case 1:
180  return llvm::Type::getInt8PtrTy(ctx);
181  case 2:
182  return llvm::Type::getInt16PtrTy(ctx);
183  case 4:
184  return llvm::Type::getInt32PtrTy(ctx);
185  case 8:
186  return llvm::Type::getInt64PtrTy(ctx);
187  }
188 
189  UNREACHABLE();
190  return nullptr;
191 }
192 
194  const auto& func_ti = function_oper->get_type_info();
195  for (size_t i = 0; i < function_oper->getArity(); ++i) {
196  const auto arg = function_oper->getArg(i);
197  const auto& arg_ti = arg->get_type_info();
198  if ((func_ti.is_array() && arg_ti.is_array()) ||
199  (func_ti.is_bytes() && arg_ti.is_bytes())) {
200  // If the function returns an array and any of the arguments are arrays, allow NULL
201  // scalars.
202  // TODO: Make this a property of the FunctionOper following `RETURN NULL ON NULL`
203  // semantics.
204  return false;
205  } else if (!arg_ti.get_notnull() && !arg_ti.is_buffer()) {
206  // Nullable geometry args will trigger a null check
207  return true;
208  } else {
209  continue;
210  }
211  }
212  return false;
213 }
214 
215 } // namespace
216 
217 extern "C" void register_buffer_with_executor_rsm(int64_t exec, int8_t* buffer) {
218  Executor* exec_ptr = reinterpret_cast<Executor*>(exec);
219  if (buffer != nullptr) {
220  exec_ptr->getRowSetMemoryOwner()->addVarlenBuffer(buffer);
221  }
222 }
223 
225  const Analyzer::FunctionOper* function_oper,
226  const CompilationOptions& co) {
228  ExtensionFunction ext_func_sig = [=]() {
230  try {
231  return bind_function(function_oper, /* is_gpu= */ true);
232  } catch (ExtensionFunctionBindingError& e) {
233  LOG(WARNING) << "codegenFunctionOper[GPU]: " << e.what() << " Redirecting "
234  << function_oper->getName() << " to run on CPU.";
235  throw QueryMustRunOnCpu();
236  }
237  } else {
238  try {
239  return bind_function(function_oper, /* is_gpu= */ false);
240  } catch (ExtensionFunctionBindingError& e) {
241  LOG(WARNING) << "codegenFunctionOper[CPU]: " << e.what();
242  throw;
243  }
244  }
245  }();
246 
247  const auto& ret_ti = function_oper->get_type_info();
248  CHECK(ret_ti.is_integer() || ret_ti.is_fp() || ret_ti.is_boolean() ||
249  ret_ti.is_buffer());
250  if (ret_ti.is_buffer() && co.device_type == ExecutorDeviceType::GPU) {
251  // TODO: This is not necessary for runtime UDFs because RBC does
252  // not generated GPU LLVM IR when the UDF is using Buffer objects.
253  // However, we cannot remove it until C++ UDFs can be defined for
254  // different devices independently.
255  throw QueryMustRunOnCpu();
256  }
257 
258  auto ret_ty = ext_arg_type_to_llvm_type(ext_func_sig.getRet(), cgen_state_->context_);
259  const auto current_bb = cgen_state_->ir_builder_.GetInsertBlock();
260  for (auto it : cgen_state_->ext_call_cache_) {
261  if (*it.foper == *function_oper) {
262  auto inst = llvm::dyn_cast<llvm::Instruction>(it.lv);
263  if (inst && inst->getParent() == current_bb) {
264  return it.lv;
265  }
266  }
267  }
268  std::vector<llvm::Value*> orig_arg_lvs;
269  std::vector<size_t> orig_arg_lvs_index;
270  std::unordered_map<llvm::Value*, llvm::Value*> const_arr_size;
271 
272  for (size_t i = 0; i < function_oper->getArity(); ++i) {
273  orig_arg_lvs_index.push_back(orig_arg_lvs.size());
274  const auto arg = function_oper->getArg(i);
275  const auto arg_cast = dynamic_cast<const Analyzer::UOper*>(arg);
276  const auto arg0 =
277  (arg_cast && arg_cast->get_optype() == kCAST) ? arg_cast->get_operand() : arg;
278  const auto array_expr_arg = dynamic_cast<const Analyzer::ArrayExpr*>(arg0);
279  auto is_local_alloc =
280  ret_ti.is_buffer() || (array_expr_arg && array_expr_arg->isLocalAlloc());
281  const auto& arg_ti = arg->get_type_info();
282  const auto arg_lvs = codegen(arg, true, co);
283  auto geo_uoper_arg = dynamic_cast<const Analyzer::GeoUOper*>(arg);
284  auto geo_binoper_arg = dynamic_cast<const Analyzer::GeoBinOper*>(arg);
285  // TODO(adb / d): Assuming no const array cols for geo (for now)
286  if ((geo_uoper_arg || geo_binoper_arg) && arg_ti.is_geometry()) {
287  // Extract arr sizes and put them in the map, forward arr pointers
288  CHECK_EQ(2 * static_cast<size_t>(arg_ti.get_physical_coord_cols()), arg_lvs.size());
289  for (size_t i = 0; i < arg_lvs.size(); i++) {
290  auto arr = arg_lvs[i++];
291  auto size = arg_lvs[i];
292  orig_arg_lvs.push_back(arr);
293  const_arr_size[arr] = size;
294  }
295  } else if (arg_ti.is_geometry()) {
296  CHECK_EQ(static_cast<size_t>(arg_ti.get_physical_coord_cols()), arg_lvs.size());
297  for (size_t j = 0; j < arg_lvs.size(); j++) {
298  orig_arg_lvs.push_back(arg_lvs[j]);
299  }
300  } else if (arg_ti.is_bytes()) {
301  CHECK_EQ(size_t(3), arg_lvs.size());
302  /* arg_lvs contains:
303  c = string_decode(&col_buf0, pos)
304  ptr = extract_str_ptr(c)
305  sz = extract_str_len(c)
306  */
307  for (size_t j = 0; j < arg_lvs.size(); j++) {
308  orig_arg_lvs.push_back(arg_lvs[j]);
309  }
310  } else {
311  if (arg_lvs.size() > 1) {
312  CHECK(arg_ti.is_array());
313  CHECK_EQ(size_t(2), arg_lvs.size());
314  const_arr_size[arg_lvs.front()] = arg_lvs.back();
315  } else {
316  CHECK_EQ(size_t(1), arg_lvs.size());
317  /* arg_lvs contains:
318  &col_buf1
319  */
320  if (is_local_alloc && arg_ti.get_size() > 0) {
321  const_arr_size[arg_lvs.front()] = cgen_state_->llInt(arg_ti.get_size());
322  }
323  }
324  orig_arg_lvs.push_back(arg_lvs.front());
325  }
326  }
327  // The extension function implementations don't handle NULL, they work under
328  // the assumption that the inputs are validated before calling them. Generate
329  // code to do the check at the call site: if any argument is NULL, return NULL
330  // without calling the function at all.
331  const auto [bbs, null_buffer_ptr] = beginArgsNullcheck(function_oper, orig_arg_lvs);
332  CHECK_GE(orig_arg_lvs.size(), function_oper->getArity());
333  // Arguments must be converted to the types the extension function can handle.
335  function_oper, &ext_func_sig, orig_arg_lvs, orig_arg_lvs_index, const_arr_size, co);
336 
337  llvm::Value* buffer_ret{nullptr};
338  if (ret_ti.is_buffer()) {
339  // codegen buffer return as first arg
340  CHECK(ret_ti.is_array() || ret_ti.is_bytes());
341  ret_ty = llvm::Type::getVoidTy(cgen_state_->context_);
342  const auto struct_ty = get_buffer_struct_type(
343  cgen_state_,
344  function_oper->getName(),
345  0,
347  /* has_is_null = */ ret_ti.is_array() || ret_ti.is_bytes());
348  buffer_ret = cgen_state_->ir_builder_.CreateAlloca(struct_ty);
349  args.insert(args.begin(), buffer_ret);
350  }
351 
352  const auto ext_call = cgen_state_->emitExternalCall(
353  ext_func_sig.getName(), ret_ty, args, {}, ret_ti.is_buffer());
354  auto ext_call_nullcheck = endArgsNullcheck(
355  bbs, ret_ti.is_buffer() ? buffer_ret : ext_call, null_buffer_ptr, function_oper);
356 
357  // Cast the return of the extension function to match the FunctionOper
358  if (!(ret_ti.is_buffer())) {
359  const auto extension_ret_ti = get_sql_type_from_llvm_type(ret_ty);
360  if (bbs.args_null_bb &&
361  extension_ret_ti.get_type() != function_oper->get_type_info().get_type() &&
362  // Skip i1-->i8 casts for ST_ functions.
363  // function_oper ret type is i1, extension ret type is 'upgraded' to i8
364  // during type deserialization to 'handle' NULL returns, hence i1-->i8.
365  // ST_ functions can't return NULLs, we just need to check arg nullness
366  // and if any args are NULL then ST_ function is not called
367  function_oper->getName().substr(0, 3) != std::string("ST_")) {
368  ext_call_nullcheck = codegenCast(ext_call_nullcheck,
369  extension_ret_ti,
370  function_oper->get_type_info(),
371  false,
372  co);
373  }
374  }
375 
376  cgen_state_->ext_call_cache_.push_back({function_oper, ext_call_nullcheck});
377  return ext_call_nullcheck;
378 }
379 
380 // Start the control flow needed for a call site check of NULL arguments.
381 std::tuple<CodeGenerator::ArgNullcheckBBs, llvm::Value*>
383  const std::vector<llvm::Value*>& orig_arg_lvs) {
385  llvm::BasicBlock* args_null_bb{nullptr};
386  llvm::BasicBlock* args_notnull_bb{nullptr};
387  llvm::BasicBlock* orig_bb = cgen_state_->ir_builder_.GetInsertBlock();
388  llvm::Value* null_array_alloca{nullptr};
389  // Only generate the check if required (at least one argument must be nullable).
390  if (ext_func_call_requires_nullcheck(function_oper)) {
391  const auto func_ti = function_oper->get_type_info();
392  if (func_ti.is_buffer()) {
393  const auto arr_struct_ty = get_buffer_struct_type(
394  cgen_state_,
395  function_oper->getName(),
396  0,
398  func_ti.is_array() || func_ti.is_bytes());
399  null_array_alloca = cgen_state_->ir_builder_.CreateAlloca(arr_struct_ty);
400  }
401  const auto args_notnull_lv = cgen_state_->ir_builder_.CreateNot(
402  codegenFunctionOperNullArg(function_oper, orig_arg_lvs));
403  args_notnull_bb = llvm::BasicBlock::Create(
404  cgen_state_->context_, "args_notnull", cgen_state_->current_func_);
405  args_null_bb = llvm::BasicBlock::Create(
407  cgen_state_->ir_builder_.CreateCondBr(args_notnull_lv, args_notnull_bb, args_null_bb);
408  cgen_state_->ir_builder_.SetInsertPoint(args_notnull_bb);
409  }
410  return std::make_tuple(
411  CodeGenerator::ArgNullcheckBBs{args_null_bb, args_notnull_bb, orig_bb},
412  null_array_alloca);
413 }
414 
415 // Wrap up the control flow needed for NULL argument handling.
417  const ArgNullcheckBBs& bbs,
418  llvm::Value* fn_ret_lv,
419  llvm::Value* null_array_ptr,
420  const Analyzer::FunctionOper* function_oper) {
422  if (bbs.args_null_bb) {
423  CHECK(bbs.args_notnull_bb);
424  cgen_state_->ir_builder_.CreateBr(bbs.args_null_bb);
425  cgen_state_->ir_builder_.SetInsertPoint(bbs.args_null_bb);
426 
427  llvm::PHINode* ext_call_phi{nullptr};
428  llvm::Value* null_lv{nullptr};
429  const auto func_ti = function_oper->get_type_info();
430  if (!func_ti.is_buffer()) {
431  // The pre-cast SQL equivalent of the type returned by the extension function.
432  const auto extension_ret_ti = get_sql_type_from_llvm_type(fn_ret_lv->getType());
433 
434  ext_call_phi = cgen_state_->ir_builder_.CreatePHI(
435  extension_ret_ti.is_fp()
436  ? get_fp_type(extension_ret_ti.get_size() * 8, cgen_state_->context_)
437  : get_int_type(extension_ret_ti.get_size() * 8, cgen_state_->context_),
438  2);
439 
440  null_lv =
441  extension_ret_ti.is_fp()
442  ? static_cast<llvm::Value*>(cgen_state_->inlineFpNull(extension_ret_ti))
443  : static_cast<llvm::Value*>(cgen_state_->inlineIntNull(extension_ret_ti));
444  } else {
445  const auto arr_struct_ty = get_buffer_struct_type(
446  cgen_state_,
447  function_oper->getName(),
448  0,
450  true);
451  ext_call_phi =
452  cgen_state_->ir_builder_.CreatePHI(llvm::PointerType::get(arr_struct_ty, 0), 2);
453 
454  CHECK(null_array_ptr);
455  const auto arr_null_bool =
456  cgen_state_->ir_builder_.CreateStructGEP(arr_struct_ty, null_array_ptr, 2);
457  cgen_state_->ir_builder_.CreateStore(
458  llvm::ConstantInt::get(get_int_type(1, cgen_state_->context_), 1),
459  arr_null_bool);
460 
461  const auto arr_null_size =
462  cgen_state_->ir_builder_.CreateStructGEP(arr_struct_ty, null_array_ptr, 1);
463  cgen_state_->ir_builder_.CreateStore(
464  llvm::ConstantInt::get(get_int_type(64, cgen_state_->context_), 0),
465  arr_null_size);
466  }
467  ext_call_phi->addIncoming(fn_ret_lv, bbs.args_notnull_bb);
468  ext_call_phi->addIncoming(func_ti.is_buffer() ? null_array_ptr : null_lv,
469  bbs.orig_bb);
470 
471  return ext_call_phi;
472  }
473  return fn_ret_lv;
474 }
475 
476 namespace {
477 
479  const auto& ret_ti = function_oper->get_type_info();
480  if (!ret_ti.is_integer() && !ret_ti.is_fp()) {
481  return true;
482  }
483  for (size_t i = 0; i < function_oper->getArity(); ++i) {
484  const auto arg = function_oper->getArg(i);
485  const auto& arg_ti = arg->get_type_info();
486  if (!arg_ti.is_integer() && !arg_ti.is_fp()) {
487  return true;
488  }
489  }
490  return false;
491 }
492 
493 } // namespace
494 
497  const CompilationOptions& co) {
499  if (call_requires_custom_type_handling(function_oper)) {
500  // Some functions need the return type to be the same as the input type.
501  if (function_oper->getName() == "FLOOR" || function_oper->getName() == "CEIL") {
502  CHECK_EQ(size_t(1), function_oper->getArity());
503  const auto arg = function_oper->getArg(0);
504  const auto& arg_ti = arg->get_type_info();
505  CHECK(arg_ti.is_decimal());
506  const auto arg_lvs = codegen(arg, true, co);
507  CHECK_EQ(size_t(1), arg_lvs.size());
508  const auto arg_lv = arg_lvs.front();
509  CHECK(arg_lv->getType()->isIntegerTy(64));
511  std::tie(bbs, std::ignore) = beginArgsNullcheck(function_oper, {arg_lvs});
512  const std::string func_name =
513  (function_oper->getName() == "FLOOR") ? "decimal_floor" : "decimal_ceil";
514  const auto covar_result_lv = cgen_state_->emitCall(
515  func_name, {arg_lv, cgen_state_->llInt(exp_to_scale(arg_ti.get_scale()))});
516  const auto ret_ti = function_oper->get_type_info();
517  CHECK(ret_ti.is_decimal());
518  CHECK_EQ(0, ret_ti.get_scale());
519  const auto result_lv = cgen_state_->ir_builder_.CreateSDiv(
520  covar_result_lv, cgen_state_->llInt(exp_to_scale(arg_ti.get_scale())));
521  return endArgsNullcheck(bbs, result_lv, nullptr, function_oper);
522  } else if (function_oper->getName() == "ROUND" &&
523  function_oper->getArg(0)->get_type_info().is_decimal()) {
524  CHECK_EQ(size_t(2), function_oper->getArity());
525 
526  const auto arg0 = function_oper->getArg(0);
527  const auto& arg0_ti = arg0->get_type_info();
528  const auto arg0_lvs = codegen(arg0, true, co);
529  CHECK_EQ(size_t(1), arg0_lvs.size());
530  const auto arg0_lv = arg0_lvs.front();
531  CHECK(arg0_lv->getType()->isIntegerTy(64));
532 
533  const auto arg1 = function_oper->getArg(1);
534  const auto& arg1_ti = arg1->get_type_info();
535  CHECK(arg1_ti.is_integer());
536  const auto arg1_lvs = codegen(arg1, true, co);
537  auto arg1_lv = arg1_lvs.front();
538  if (arg1_ti.get_type() != kINT) {
539  arg1_lv = codegenCast(arg1_lv, arg1_ti, SQLTypeInfo(kINT, true), false, co);
540  }
541 
543  std::tie(bbs0, std::ignore) =
544  beginArgsNullcheck(function_oper, {arg0_lv, arg1_lvs.front()});
545 
546  const std::string func_name = "Round__4";
547  const auto ret_ti = function_oper->get_type_info();
548  CHECK(ret_ti.is_decimal());
549  const auto result_lv = cgen_state_->emitExternalCall(
550  func_name,
552  {arg0_lv, arg1_lv, cgen_state_->llInt(arg0_ti.get_scale())});
553 
554  return endArgsNullcheck(bbs0, result_lv, nullptr, function_oper);
555  }
556  throw std::runtime_error("Type combination not supported for function " +
557  function_oper->getName());
558  }
559  return codegenFunctionOper(function_oper, co);
560 }
561 
562 // Generates code which returns true iff at least one of the arguments is NULL.
564  const Analyzer::FunctionOper* function_oper,
565  const std::vector<llvm::Value*>& orig_arg_lvs) {
567  llvm::Value* one_arg_null =
568  llvm::ConstantInt::get(llvm::IntegerType::getInt1Ty(cgen_state_->context_), false);
569  size_t physical_coord_cols = 0;
570  for (size_t i = 0, j = 0; i < function_oper->getArity();
571  ++i, j += std::max(size_t(1), physical_coord_cols)) {
572  const auto arg = function_oper->getArg(i);
573  const auto& arg_ti = arg->get_type_info();
574  physical_coord_cols = arg_ti.get_physical_coord_cols();
575  if (arg_ti.get_notnull()) {
576  continue;
577  }
578 #ifdef ENABLE_GEOS
579  // If geo arg is coming from geos, skip the null check, assume it's a valid geo
580  if (arg_ti.is_geometry()) {
581  auto* coords_load = llvm::dyn_cast<llvm::LoadInst>(orig_arg_lvs[i]);
582  if (coords_load) {
583  continue;
584  }
585  }
586 #endif
587  if (arg_ti.is_buffer() || arg_ti.is_geometry()) {
588  // POINT [un]compressed coord check requires custom checker and chunk iterator
589  // Non-POINT NULL geographies will have a normally encoded null coord array
590  auto fname =
591  (arg_ti.get_type() == kPOINT) ? "point_coord_array_is_null" : "array_is_null";
592  auto is_null_lv = cgen_state_->emitExternalCall(
593  fname, get_int_type(1, cgen_state_->context_), {orig_arg_lvs[j], posArg(arg)});
594  one_arg_null = cgen_state_->ir_builder_.CreateOr(one_arg_null, is_null_lv);
595  continue;
596  }
597  CHECK(arg_ti.is_number() or arg_ti.is_boolean());
598  one_arg_null = cgen_state_->ir_builder_.CreateOr(
599  one_arg_null, codegenIsNullNumber(orig_arg_lvs[j], arg_ti));
600  }
601  return one_arg_null;
602 }
603 
604 llvm::Value* CodeGenerator::codegenCompression(const SQLTypeInfo& type_info) {
606  int32_t compression = (type_info.get_compression() == kENCODING_GEOINT &&
607  type_info.get_comp_param() == 32)
608  ? 1
609  : 0;
610 
611  return cgen_state_->llInt(compression);
612 }
613 
614 std::pair<llvm::Value*, llvm::Value*> CodeGenerator::codegenArrayBuff(
615  llvm::Value* chunk,
616  llvm::Value* row_pos,
617  SQLTypes array_type,
618  bool cast_and_extend) {
620  const auto elem_ti =
621  SQLTypeInfo(
622  SQLTypes::kARRAY, 0, 0, false, EncodingType::kENCODING_NONE, 0, array_type)
623  .get_elem_type();
624 
625  auto buff = cgen_state_->emitExternalCall(
626  "array_buff", llvm::Type::getInt32PtrTy(cgen_state_->context_), {chunk, row_pos});
627 
628  auto len = cgen_state_->emitExternalCall(
629  "array_size",
630  get_int_type(32, cgen_state_->context_),
631  {chunk, row_pos, cgen_state_->llInt(log2_bytes(elem_ti.get_logical_size()))});
632 
633  if (cast_and_extend) {
634  buff = castArrayPointer(buff, elem_ti);
635  len =
636  cgen_state_->ir_builder_.CreateZExt(len, get_int_type(64, cgen_state_->context_));
637  }
638 
639  return std::make_pair(buff, len);
640 }
641 
642 void CodeGenerator::codegenBufferArgs(const std::string& ext_func_name,
643  size_t param_num,
644  llvm::Value* buffer_buf,
645  llvm::Value* buffer_size,
646  llvm::Value* buffer_null,
647  std::vector<llvm::Value*>& output_args) {
649  CHECK(buffer_buf);
650  CHECK(buffer_size);
651 
652  auto buffer_abstraction = get_buffer_struct_type(
653  cgen_state_, ext_func_name, param_num, buffer_buf->getType(), !!(buffer_null));
654  auto alloc_mem = cgen_state_->ir_builder_.CreateAlloca(buffer_abstraction);
655 
656  auto buffer_buf_ptr =
657  cgen_state_->ir_builder_.CreateStructGEP(buffer_abstraction, alloc_mem, 0);
658  cgen_state_->ir_builder_.CreateStore(buffer_buf, buffer_buf_ptr);
659 
660  auto buffer_size_ptr =
661  cgen_state_->ir_builder_.CreateStructGEP(buffer_abstraction, alloc_mem, 1);
662  cgen_state_->ir_builder_.CreateStore(buffer_size, buffer_size_ptr);
663 
664  if (buffer_null) {
665  auto bool_extended_type = llvm::Type::getInt8Ty(cgen_state_->context_);
666  auto buffer_null_extended =
667  cgen_state_->ir_builder_.CreateZExt(buffer_null, bool_extended_type);
668  auto buffer_is_null_ptr =
669  cgen_state_->ir_builder_.CreateStructGEP(buffer_abstraction, alloc_mem, 2);
670  cgen_state_->ir_builder_.CreateStore(buffer_null_extended, buffer_is_null_ptr);
671  }
672  output_args.push_back(alloc_mem);
673 }
674 
675 llvm::StructType* CodeGenerator::createPointStructType(const std::string& udf_func_name,
676  size_t param_num) {
677  llvm::Module* module_for_lookup = cgen_state_->module_;
678  llvm::Function* udf_func = module_for_lookup->getFunction(udf_func_name);
679 
680  llvm::StructType* generated_struct_type =
681  llvm::StructType::get(cgen_state_->context_,
682  {llvm::Type::getInt8PtrTy(cgen_state_->context_),
683  llvm::Type::getInt64Ty(cgen_state_->context_),
684  llvm::Type::getInt32Ty(cgen_state_->context_),
685  llvm::Type::getInt32Ty(cgen_state_->context_),
686  llvm::Type::getInt32Ty(cgen_state_->context_)},
687  false);
688 
689  if (udf_func) {
690  llvm::FunctionType* udf_func_type = udf_func->getFunctionType();
691  CHECK(param_num < udf_func_type->getNumParams());
692  llvm::Type* param_pointer_type = udf_func_type->getParamType(param_num);
693  CHECK(param_pointer_type->isPointerTy());
694  llvm::Type* param_type = param_pointer_type->getPointerElementType();
695  CHECK(param_type->isStructTy());
696  llvm::StructType* struct_type = llvm::cast<llvm::StructType>(param_type);
697  CHECK(struct_type->getStructNumElements() == 5) << serialize_llvm_object(struct_type);
698  const auto expected_elems = generated_struct_type->elements();
699  const auto current_elems = struct_type->elements();
700  for (size_t i = 0; i < expected_elems.size(); i++) {
701  CHECK_EQ(expected_elems[i], current_elems[i]);
702  }
703  if (struct_type->isLiteral()) {
704  return struct_type;
705  }
706 
707  llvm::StringRef struct_name = struct_type->getStructName();
708  llvm::StructType* point_type = module_for_lookup->getTypeByName(struct_name);
709  CHECK(point_type);
710 
711  return (point_type);
712  }
713  return generated_struct_type;
714 }
715 
716 void CodeGenerator::codegenGeoPointArgs(const std::string& udf_func_name,
717  size_t param_num,
718  llvm::Value* point_buf,
719  llvm::Value* point_size,
720  llvm::Value* compression,
721  llvm::Value* input_srid,
722  llvm::Value* output_srid,
723  std::vector<llvm::Value*>& output_args) {
725  CHECK(point_buf);
726  CHECK(point_size);
727  CHECK(compression);
728  CHECK(input_srid);
729  CHECK(output_srid);
730 
731  auto point_abstraction = createPointStructType(udf_func_name, param_num);
732  auto alloc_mem = cgen_state_->ir_builder_.CreateAlloca(point_abstraction, nullptr);
733 
734  auto point_buf_ptr =
735  cgen_state_->ir_builder_.CreateStructGEP(point_abstraction, alloc_mem, 0);
736  cgen_state_->ir_builder_.CreateStore(point_buf, point_buf_ptr);
737 
738  auto point_size_ptr =
739  cgen_state_->ir_builder_.CreateStructGEP(point_abstraction, alloc_mem, 1);
740  cgen_state_->ir_builder_.CreateStore(point_size, point_size_ptr);
741 
742  auto point_compression_ptr =
743  cgen_state_->ir_builder_.CreateStructGEP(point_abstraction, alloc_mem, 2);
744  cgen_state_->ir_builder_.CreateStore(compression, point_compression_ptr);
745 
746  auto input_srid_ptr =
747  cgen_state_->ir_builder_.CreateStructGEP(point_abstraction, alloc_mem, 3);
748  cgen_state_->ir_builder_.CreateStore(input_srid, input_srid_ptr);
749 
750  auto output_srid_ptr =
751  cgen_state_->ir_builder_.CreateStructGEP(point_abstraction, alloc_mem, 4);
752  cgen_state_->ir_builder_.CreateStore(output_srid, output_srid_ptr);
753 
754  output_args.push_back(alloc_mem);
755 }
756 
758  const std::string& udf_func_name,
759  size_t param_num) {
760  llvm::Module* module_for_lookup = cgen_state_->module_;
761  llvm::Function* udf_func = module_for_lookup->getFunction(udf_func_name);
762 
763  llvm::StructType* generated_struct_type =
764  llvm::StructType::get(cgen_state_->context_,
765  {llvm::Type::getInt8PtrTy(cgen_state_->context_),
766  llvm::Type::getInt64Ty(cgen_state_->context_),
767  llvm::Type::getInt32Ty(cgen_state_->context_),
768  llvm::Type::getInt32Ty(cgen_state_->context_),
769  llvm::Type::getInt32Ty(cgen_state_->context_)},
770  false);
771 
772  if (udf_func) {
773  llvm::FunctionType* udf_func_type = udf_func->getFunctionType();
774  CHECK(param_num < udf_func_type->getNumParams());
775  llvm::Type* param_pointer_type = udf_func_type->getParamType(param_num);
776  CHECK(param_pointer_type->isPointerTy());
777  llvm::Type* param_type = param_pointer_type->getPointerElementType();
778  CHECK(param_type->isStructTy());
779  llvm::StructType* struct_type = llvm::cast<llvm::StructType>(param_type);
780  CHECK(struct_type->isStructTy());
781  CHECK(struct_type->getStructNumElements() == 5);
782 
783  const auto expected_elems = generated_struct_type->elements();
784  const auto current_elems = struct_type->elements();
785  for (size_t i = 0; i < expected_elems.size(); i++) {
786  CHECK_EQ(expected_elems[i], current_elems[i]);
787  }
788  if (struct_type->isLiteral()) {
789  return struct_type;
790  }
791 
792  llvm::StringRef struct_name = struct_type->getStructName();
793  llvm::StructType* line_string_type = module_for_lookup->getTypeByName(struct_name);
794  CHECK(line_string_type);
795 
796  return (line_string_type);
797  }
798  return generated_struct_type;
799 }
800 
801 void CodeGenerator::codegenGeoLineStringArgs(const std::string& udf_func_name,
802  size_t param_num,
803  llvm::Value* line_string_buf,
804  llvm::Value* line_string_size,
805  llvm::Value* compression,
806  llvm::Value* input_srid,
807  llvm::Value* output_srid,
808  std::vector<llvm::Value*>& output_args) {
810  CHECK(line_string_buf);
811  CHECK(line_string_size);
812  CHECK(compression);
813  CHECK(input_srid);
814  CHECK(output_srid);
815 
816  auto line_string_abstraction = createLineStringStructType(udf_func_name, param_num);
817  auto alloc_mem =
818  cgen_state_->ir_builder_.CreateAlloca(line_string_abstraction, nullptr);
819 
820  auto line_string_buf_ptr =
821  cgen_state_->ir_builder_.CreateStructGEP(line_string_abstraction, alloc_mem, 0);
822  cgen_state_->ir_builder_.CreateStore(line_string_buf, line_string_buf_ptr);
823 
824  auto line_string_size_ptr =
825  cgen_state_->ir_builder_.CreateStructGEP(line_string_abstraction, alloc_mem, 1);
826  cgen_state_->ir_builder_.CreateStore(line_string_size, line_string_size_ptr);
827 
828  auto line_string_compression_ptr =
829  cgen_state_->ir_builder_.CreateStructGEP(line_string_abstraction, alloc_mem, 2);
830  cgen_state_->ir_builder_.CreateStore(compression, line_string_compression_ptr);
831 
832  auto input_srid_ptr =
833  cgen_state_->ir_builder_.CreateStructGEP(line_string_abstraction, alloc_mem, 3);
834  cgen_state_->ir_builder_.CreateStore(input_srid, input_srid_ptr);
835 
836  auto output_srid_ptr =
837  cgen_state_->ir_builder_.CreateStructGEP(line_string_abstraction, alloc_mem, 4);
838  cgen_state_->ir_builder_.CreateStore(output_srid, output_srid_ptr);
839 
840  output_args.push_back(alloc_mem);
841 }
842 
843 llvm::StructType* CodeGenerator::createPolygonStructType(const std::string& udf_func_name,
844  size_t param_num) {
845  llvm::Module* module_for_lookup = cgen_state_->module_;
846  llvm::Function* udf_func = module_for_lookup->getFunction(udf_func_name);
847 
848  llvm::StructType* generated_struct_type =
849  llvm::StructType::get(cgen_state_->context_,
850  {llvm::Type::getInt8PtrTy(cgen_state_->context_),
851  llvm::Type::getInt64Ty(cgen_state_->context_),
852  llvm::Type::getInt32PtrTy(cgen_state_->context_),
853  llvm::Type::getInt64Ty(cgen_state_->context_),
854  llvm::Type::getInt32Ty(cgen_state_->context_),
855  llvm::Type::getInt32Ty(cgen_state_->context_),
856  llvm::Type::getInt32Ty(cgen_state_->context_)},
857  false);
858 
859  if (udf_func) {
860  llvm::FunctionType* udf_func_type = udf_func->getFunctionType();
861  CHECK(param_num < udf_func_type->getNumParams());
862  llvm::Type* param_pointer_type = udf_func_type->getParamType(param_num);
863  CHECK(param_pointer_type->isPointerTy());
864  llvm::Type* param_type = param_pointer_type->getPointerElementType();
865  CHECK(param_type->isStructTy());
866  llvm::StructType* struct_type = llvm::cast<llvm::StructType>(param_type);
867 
868  CHECK(struct_type->isStructTy());
869  CHECK(struct_type->getStructNumElements() == 7);
870 
871  const auto expected_elems = generated_struct_type->elements();
872  const auto current_elems = struct_type->elements();
873  for (size_t i = 0; i < expected_elems.size(); i++) {
874  CHECK_EQ(expected_elems[i], current_elems[i]);
875  }
876  if (struct_type->isLiteral()) {
877  return struct_type;
878  }
879 
880  llvm::StringRef struct_name = struct_type->getStructName();
881 
882  llvm::StructType* polygon_type = module_for_lookup->getTypeByName(struct_name);
883  CHECK(polygon_type);
884 
885  return (polygon_type);
886  }
887  return generated_struct_type;
888 }
889 
890 void CodeGenerator::codegenGeoPolygonArgs(const std::string& udf_func_name,
891  size_t param_num,
892  llvm::Value* polygon_buf,
893  llvm::Value* polygon_size,
894  llvm::Value* ring_sizes_buf,
895  llvm::Value* num_rings,
896  llvm::Value* compression,
897  llvm::Value* input_srid,
898  llvm::Value* output_srid,
899  std::vector<llvm::Value*>& output_args) {
901  CHECK(polygon_buf);
902  CHECK(polygon_size);
903  CHECK(ring_sizes_buf);
904  CHECK(num_rings);
905  CHECK(compression);
906  CHECK(input_srid);
907  CHECK(output_srid);
908 
909  auto polygon_abstraction = createPolygonStructType(udf_func_name, param_num);
910  auto alloc_mem = cgen_state_->ir_builder_.CreateAlloca(polygon_abstraction, nullptr);
911 
912  auto polygon_buf_ptr =
913  cgen_state_->ir_builder_.CreateStructGEP(polygon_abstraction, alloc_mem, 0);
914  cgen_state_->ir_builder_.CreateStore(polygon_buf, polygon_buf_ptr);
915 
916  auto polygon_size_ptr =
917  cgen_state_->ir_builder_.CreateStructGEP(polygon_abstraction, alloc_mem, 1);
918  cgen_state_->ir_builder_.CreateStore(polygon_size, polygon_size_ptr);
919 
920  auto ring_sizes_buf_ptr =
921  cgen_state_->ir_builder_.CreateStructGEP(polygon_abstraction, alloc_mem, 2);
922  cgen_state_->ir_builder_.CreateStore(ring_sizes_buf, ring_sizes_buf_ptr);
923 
924  auto ring_size_ptr =
925  cgen_state_->ir_builder_.CreateStructGEP(polygon_abstraction, alloc_mem, 3);
926  cgen_state_->ir_builder_.CreateStore(num_rings, ring_size_ptr);
927 
928  auto polygon_compression_ptr =
929  cgen_state_->ir_builder_.CreateStructGEP(polygon_abstraction, alloc_mem, 4);
930  cgen_state_->ir_builder_.CreateStore(compression, polygon_compression_ptr);
931 
932  auto input_srid_ptr =
933  cgen_state_->ir_builder_.CreateStructGEP(polygon_abstraction, alloc_mem, 5);
934  cgen_state_->ir_builder_.CreateStore(input_srid, input_srid_ptr);
935 
936  auto output_srid_ptr =
937  cgen_state_->ir_builder_.CreateStructGEP(polygon_abstraction, alloc_mem, 6);
938  cgen_state_->ir_builder_.CreateStore(output_srid, output_srid_ptr);
939 
940  output_args.push_back(alloc_mem);
941 }
942 
944  const std::string& udf_func_name,
945  size_t param_num) {
946  llvm::Function* udf_func = cgen_state_->module_->getFunction(udf_func_name);
947  llvm::Module* module_for_lookup = cgen_state_->module_;
948 
949  llvm::StructType* generated_struct_type =
950  llvm::StructType::get(cgen_state_->context_,
951  {llvm::Type::getInt8PtrTy(cgen_state_->context_),
952  llvm::Type::getInt64Ty(cgen_state_->context_),
953  llvm::Type::getInt32PtrTy(cgen_state_->context_),
954  llvm::Type::getInt64Ty(cgen_state_->context_),
955  llvm::Type::getInt32PtrTy(cgen_state_->context_),
956  llvm::Type::getInt64Ty(cgen_state_->context_),
957  llvm::Type::getInt32Ty(cgen_state_->context_),
958  llvm::Type::getInt32Ty(cgen_state_->context_),
959  llvm::Type::getInt32Ty(cgen_state_->context_)},
960  false);
961 
962  if (udf_func) {
963  llvm::FunctionType* udf_func_type = udf_func->getFunctionType();
964  CHECK(param_num < udf_func_type->getNumParams());
965  llvm::Type* param_pointer_type = udf_func_type->getParamType(param_num);
966  CHECK(param_pointer_type->isPointerTy());
967  llvm::Type* param_type = param_pointer_type->getPointerElementType();
968  CHECK(param_type->isStructTy());
969  llvm::StructType* struct_type = llvm::cast<llvm::StructType>(param_type);
970  CHECK(struct_type->isStructTy());
971  CHECK(struct_type->getStructNumElements() == 9);
972  const auto expected_elems = generated_struct_type->elements();
973  const auto current_elems = struct_type->elements();
974  for (size_t i = 0; i < expected_elems.size(); i++) {
975  CHECK_EQ(expected_elems[i], current_elems[i]);
976  }
977  if (struct_type->isLiteral()) {
978  return struct_type;
979  }
980  llvm::StringRef struct_name = struct_type->getStructName();
981 
982  llvm::StructType* polygon_type = module_for_lookup->getTypeByName(struct_name);
983  CHECK(polygon_type);
984 
985  return (polygon_type);
986  }
987  return generated_struct_type;
988 }
989 
990 void CodeGenerator::codegenGeoMultiPolygonArgs(const std::string& udf_func_name,
991  size_t param_num,
992  llvm::Value* polygon_coords,
993  llvm::Value* polygon_coords_size,
994  llvm::Value* ring_sizes_buf,
995  llvm::Value* ring_sizes,
996  llvm::Value* polygon_bounds,
997  llvm::Value* polygon_bounds_sizes,
998  llvm::Value* compression,
999  llvm::Value* input_srid,
1000  llvm::Value* output_srid,
1001  std::vector<llvm::Value*>& output_args) {
1003  CHECK(polygon_coords);
1004  CHECK(polygon_coords_size);
1005  CHECK(ring_sizes_buf);
1006  CHECK(ring_sizes);
1007  CHECK(polygon_bounds);
1008  CHECK(polygon_bounds_sizes);
1009  CHECK(compression);
1010  CHECK(input_srid);
1011  CHECK(output_srid);
1012 
1013  auto multi_polygon_abstraction = createMultiPolygonStructType(udf_func_name, param_num);
1014  auto alloc_mem =
1015  cgen_state_->ir_builder_.CreateAlloca(multi_polygon_abstraction, nullptr);
1016 
1017  auto polygon_coords_ptr =
1018  cgen_state_->ir_builder_.CreateStructGEP(multi_polygon_abstraction, alloc_mem, 0);
1019  cgen_state_->ir_builder_.CreateStore(polygon_coords, polygon_coords_ptr);
1020 
1021  auto polygon_coords_size_ptr =
1022  cgen_state_->ir_builder_.CreateStructGEP(multi_polygon_abstraction, alloc_mem, 1);
1023  cgen_state_->ir_builder_.CreateStore(polygon_coords_size, polygon_coords_size_ptr);
1024 
1025  auto ring_sizes_buf_ptr =
1026  cgen_state_->ir_builder_.CreateStructGEP(multi_polygon_abstraction, alloc_mem, 2);
1027  cgen_state_->ir_builder_.CreateStore(ring_sizes_buf, ring_sizes_buf_ptr);
1028 
1029  auto ring_sizes_ptr =
1030  cgen_state_->ir_builder_.CreateStructGEP(multi_polygon_abstraction, alloc_mem, 3);
1031  cgen_state_->ir_builder_.CreateStore(ring_sizes, ring_sizes_ptr);
1032 
1033  auto polygon_bounds_buf_ptr =
1034  cgen_state_->ir_builder_.CreateStructGEP(multi_polygon_abstraction, alloc_mem, 4);
1035  cgen_state_->ir_builder_.CreateStore(polygon_bounds, polygon_bounds_buf_ptr);
1036 
1037  auto polygon_bounds_sizes_ptr =
1038  cgen_state_->ir_builder_.CreateStructGEP(multi_polygon_abstraction, alloc_mem, 5);
1039  cgen_state_->ir_builder_.CreateStore(polygon_bounds_sizes, polygon_bounds_sizes_ptr);
1040 
1041  auto polygon_compression_ptr =
1042  cgen_state_->ir_builder_.CreateStructGEP(multi_polygon_abstraction, alloc_mem, 6);
1043  cgen_state_->ir_builder_.CreateStore(compression, polygon_compression_ptr);
1044 
1045  auto input_srid_ptr =
1046  cgen_state_->ir_builder_.CreateStructGEP(multi_polygon_abstraction, alloc_mem, 7);
1047  cgen_state_->ir_builder_.CreateStore(input_srid, input_srid_ptr);
1048 
1049  auto output_srid_ptr =
1050  cgen_state_->ir_builder_.CreateStructGEP(multi_polygon_abstraction, alloc_mem, 8);
1051  cgen_state_->ir_builder_.CreateStore(output_srid, output_srid_ptr);
1052 
1053  output_args.push_back(alloc_mem);
1054 }
1055 
1056 // Generate CAST operations for arguments in `orig_arg_lvs` to the types required by
1057 // `ext_func_sig`.
1059  const Analyzer::FunctionOper* function_oper,
1060  const ExtensionFunction* ext_func_sig,
1061  const std::vector<llvm::Value*>& orig_arg_lvs,
1062  const std::vector<size_t>& orig_arg_lvs_index,
1063  const std::unordered_map<llvm::Value*, llvm::Value*>& const_arr_size,
1064  const CompilationOptions& co) {
1066  CHECK(ext_func_sig);
1067  const auto& ext_func_args = ext_func_sig->getArgs();
1068  CHECK_LE(function_oper->getArity(), ext_func_args.size());
1069  const auto func_ti = function_oper->get_type_info();
1070  std::vector<llvm::Value*> args;
1071  /*
1072  i: argument in RA for the function operand
1073  j: extra offset in ext_func_args
1074  k: origin_arg_lvs counter, equal to orig_arg_lvs_index[i]
1075  ij: ext_func_args counter, equal to i + j
1076  dj: offset when UDF implementation first argument corresponds to return value
1077  */
1078  for (size_t i = 0, j = 0, dj = (func_ti.is_buffer() ? 1 : 0);
1079  i < function_oper->getArity();
1080  ++i) {
1081  size_t k = orig_arg_lvs_index[i];
1082  size_t ij = i + j;
1083  const auto arg = function_oper->getArg(i);
1084  const auto ext_func_arg = ext_func_args[ij];
1085  const auto& arg_ti = arg->get_type_info();
1086  llvm::Value* arg_lv{nullptr};
1087  if (arg_ti.is_bytes()) {
1088  CHECK(ext_func_arg == ExtArgumentType::TextEncodingNone)
1089  << ::toString(ext_func_arg);
1090  const auto ptr_lv = orig_arg_lvs[k + 1];
1091  const auto len_lv = orig_arg_lvs[k + 2];
1092  auto& builder = cgen_state_->ir_builder_;
1093  auto string_buf_arg = builder.CreatePointerCast(
1094  ptr_lv, llvm::Type::getInt8PtrTy(cgen_state_->context_));
1095  auto string_size_arg =
1096  builder.CreateZExt(len_lv, get_int_type(64, cgen_state_->context_));
1097  codegenBufferArgs(ext_func_sig->getName(),
1098  ij + dj,
1099  string_buf_arg,
1100  string_size_arg,
1101  nullptr,
1102  args);
1103  } else if (arg_ti.is_array()) {
1104  bool const_arr = (const_arr_size.count(orig_arg_lvs[k]) > 0);
1105  const auto elem_ti = arg_ti.get_elem_type();
1106  // TODO: switch to fast fixlen variants
1107  const auto ptr_lv = (const_arr)
1108  ? orig_arg_lvs[k]
1110  "array_buff",
1111  llvm::Type::getInt8PtrTy(cgen_state_->context_),
1112  {orig_arg_lvs[k], posArg(arg)});
1113  const auto len_lv =
1114  (const_arr) ? const_arr_size.at(orig_arg_lvs[k])
1116  "array_size",
1118  {orig_arg_lvs[k],
1119  posArg(arg),
1120  cgen_state_->llInt(log2_bytes(elem_ti.get_logical_size()))});
1121 
1122  if (is_ext_arg_type_pointer(ext_func_arg)) {
1123  args.push_back(castArrayPointer(ptr_lv, elem_ti));
1124  args.push_back(cgen_state_->ir_builder_.CreateZExt(
1125  len_lv, get_int_type(64, cgen_state_->context_)));
1126  j++;
1127  } else if (is_ext_arg_type_array(ext_func_arg)) {
1128  auto array_buf_arg = castArrayPointer(ptr_lv, elem_ti);
1129  auto& builder = cgen_state_->ir_builder_;
1130  auto array_size_arg =
1131  builder.CreateZExt(len_lv, get_int_type(64, cgen_state_->context_));
1132  auto array_null_arg =
1133  cgen_state_->emitExternalCall("array_is_null",
1135  {orig_arg_lvs[k], posArg(arg)});
1136  codegenBufferArgs(ext_func_sig->getName(),
1137  ij + dj,
1138  array_buf_arg,
1139  array_size_arg,
1140  array_null_arg,
1141  args);
1142  } else {
1143  UNREACHABLE();
1144  }
1145 
1146  } else if (arg_ti.is_geometry()) {
1147  // Coords
1148  bool const_arr = (const_arr_size.count(orig_arg_lvs[k]) > 0);
1149  // NOTE(adb): We're generating code to handle the TINYINT array only -- the actual
1150  // geo encoding (or lack thereof) does not matter here
1151  const auto elem_ti = SQLTypeInfo(SQLTypes::kARRAY,
1152  0,
1153  0,
1154  false,
1156  0,
1158  .get_elem_type();
1159  llvm::Value* ptr_lv;
1160  llvm::Value* len_lv;
1161  int32_t fixlen = -1;
1162  if (arg_ti.get_type() == kPOINT) {
1163  const auto col_var = dynamic_cast<const Analyzer::ColumnVar*>(arg);
1164  if (col_var) {
1165  const auto coords_cd = executor()->getPhysicalColumnDescriptor(col_var, 1);
1166  if (coords_cd && coords_cd->columnType.get_type() == kARRAY) {
1167  fixlen = coords_cd->columnType.get_size();
1168  }
1169  }
1170  }
1171  if (fixlen > 0) {
1172  ptr_lv =
1173  cgen_state_->emitExternalCall("fast_fixlen_array_buff",
1174  llvm::Type::getInt8PtrTy(cgen_state_->context_),
1175  {orig_arg_lvs[k], posArg(arg)});
1176  len_lv = cgen_state_->llInt(int64_t(fixlen));
1177  } else {
1178  // TODO: remove const_arr and related code if it's not needed
1179  ptr_lv = (const_arr) ? orig_arg_lvs[k]
1181  "array_buff",
1182  llvm::Type::getInt8PtrTy(cgen_state_->context_),
1183  {orig_arg_lvs[k], posArg(arg)});
1184  len_lv = (const_arr)
1185  ? const_arr_size.at(orig_arg_lvs[k])
1187  "array_size",
1189  {orig_arg_lvs[k],
1190  posArg(arg),
1191  cgen_state_->llInt(log2_bytes(elem_ti.get_logical_size()))});
1192  }
1193 
1194  if (is_ext_arg_type_geo(ext_func_arg)) {
1195  if (arg_ti.get_type() == kPOINT || arg_ti.get_type() == kLINESTRING) {
1196  auto array_buf_arg = castArrayPointer(ptr_lv, elem_ti);
1197  auto& builder = cgen_state_->ir_builder_;
1198  auto array_size_arg =
1199  builder.CreateZExt(len_lv, get_int_type(64, cgen_state_->context_));
1200  auto compression_val = codegenCompression(arg_ti);
1201  auto input_srid_val = cgen_state_->llInt(arg_ti.get_input_srid());
1202  auto output_srid_val = cgen_state_->llInt(arg_ti.get_output_srid());
1203 
1204  if (arg_ti.get_type() == kPOINT) {
1205  CHECK_EQ(k, ij);
1206  codegenGeoPointArgs(ext_func_sig->getName(),
1207  ij + dj,
1208  array_buf_arg,
1209  array_size_arg,
1210  compression_val,
1211  input_srid_val,
1212  output_srid_val,
1213  args);
1214  } else {
1215  CHECK_EQ(k, ij);
1216  codegenGeoLineStringArgs(ext_func_sig->getName(),
1217  ij + dj,
1218  array_buf_arg,
1219  array_size_arg,
1220  compression_val,
1221  input_srid_val,
1222  output_srid_val,
1223  args);
1224  }
1225  }
1226  } else {
1227  CHECK(ext_func_arg == ExtArgumentType::PInt8);
1228  args.push_back(castArrayPointer(ptr_lv, elem_ti));
1229  args.push_back(cgen_state_->ir_builder_.CreateZExt(
1230  len_lv, get_int_type(64, cgen_state_->context_)));
1231  j++;
1232  }
1233 
1234  switch (arg_ti.get_type()) {
1235  case kPOINT:
1236  case kLINESTRING:
1237  break;
1238  case kPOLYGON: {
1239  if (ext_func_arg == ExtArgumentType::GeoPolygon) {
1240  auto array_buf_arg = castArrayPointer(ptr_lv, elem_ti);
1241  auto& builder = cgen_state_->ir_builder_;
1242  auto array_size_arg =
1243  builder.CreateZExt(len_lv, get_int_type(64, cgen_state_->context_));
1244  auto compression_val = codegenCompression(arg_ti);
1245  auto input_srid_val = cgen_state_->llInt(arg_ti.get_input_srid());
1246  auto output_srid_val = cgen_state_->llInt(arg_ti.get_output_srid());
1247 
1248  auto [ring_size_buff, ring_size] =
1249  codegenArrayBuff(orig_arg_lvs[k + 1], posArg(arg), SQLTypes::kINT, true);
1250  CHECK_EQ(k, ij);
1251  codegenGeoPolygonArgs(ext_func_sig->getName(),
1252  ij + dj,
1253  array_buf_arg,
1254  array_size_arg,
1255  ring_size_buff,
1256  ring_size,
1257  compression_val,
1258  input_srid_val,
1259  output_srid_val,
1260  args);
1261  } else {
1262  CHECK(ext_func_arg == ExtArgumentType::PInt8);
1263  // Ring Sizes
1264  auto const_arr = const_arr_size.count(orig_arg_lvs[k + 1]) > 0;
1265  auto [ring_size_buff, ring_size] =
1266  (const_arr) ? std::make_pair(orig_arg_lvs[k + 1],
1267  const_arr_size.at(orig_arg_lvs[k + 1]))
1268  : codegenArrayBuff(
1269  orig_arg_lvs[k + 1], posArg(arg), SQLTypes::kINT, true);
1270  args.push_back(ring_size_buff);
1271  args.push_back(ring_size);
1272  j += 2;
1273  }
1274  break;
1275  }
1276  case kMULTIPOLYGON: {
1277  if (ext_func_arg == ExtArgumentType::GeoMultiPolygon) {
1278  auto array_buf_arg = castArrayPointer(ptr_lv, elem_ti);
1279  auto& builder = cgen_state_->ir_builder_;
1280  auto array_size_arg =
1281  builder.CreateZExt(len_lv, get_int_type(64, cgen_state_->context_));
1282  auto compression_val = codegenCompression(arg_ti);
1283  auto input_srid_val = cgen_state_->llInt(arg_ti.get_input_srid());
1284  auto output_srid_val = cgen_state_->llInt(arg_ti.get_output_srid());
1285 
1286  auto [ring_size_buff, ring_size] =
1287  codegenArrayBuff(orig_arg_lvs[k + 1], posArg(arg), SQLTypes::kINT, true);
1288 
1289  auto [poly_bounds_buff, poly_bounds_size] =
1290  codegenArrayBuff(orig_arg_lvs[k + 2], posArg(arg), SQLTypes::kINT, true);
1291  CHECK_EQ(k, ij);
1292  codegenGeoMultiPolygonArgs(ext_func_sig->getName(),
1293  ij + dj,
1294  array_buf_arg,
1295  array_size_arg,
1296  ring_size_buff,
1297  ring_size,
1298  poly_bounds_buff,
1299  poly_bounds_size,
1300  compression_val,
1301  input_srid_val,
1302  output_srid_val,
1303  args);
1304  } else {
1305  CHECK(ext_func_arg == ExtArgumentType::PInt8);
1306  // Ring Sizes
1307  {
1308  auto const_arr = const_arr_size.count(orig_arg_lvs[k + 1]) > 0;
1309  auto [ring_size_buff, ring_size] =
1310  (const_arr)
1311  ? std::make_pair(orig_arg_lvs[k + 1],
1312  const_arr_size.at(orig_arg_lvs[k + 1]))
1313  : codegenArrayBuff(
1314  orig_arg_lvs[k + 1], posArg(arg), SQLTypes::kINT, true);
1315 
1316  args.push_back(ring_size_buff);
1317  args.push_back(ring_size);
1318  }
1319  // Poly Rings
1320  {
1321  auto const_arr = const_arr_size.count(orig_arg_lvs[k + 2]) > 0;
1322  auto [poly_bounds_buff, poly_bounds_size] =
1323  (const_arr)
1324  ? std::make_pair(orig_arg_lvs[k + 2],
1325  const_arr_size.at(orig_arg_lvs[k + 2]))
1326  : codegenArrayBuff(
1327  orig_arg_lvs[k + 2], posArg(arg), SQLTypes::kINT, true);
1328 
1329  args.push_back(poly_bounds_buff);
1330  args.push_back(poly_bounds_size);
1331  }
1332  j += 4;
1333  }
1334  break;
1335  }
1336  default:
1337  CHECK(false);
1338  }
1339  } else {
1340  CHECK(is_ext_arg_type_scalar(ext_func_arg));
1341  const auto arg_target_ti = ext_arg_type_to_type_info(ext_func_arg);
1342  if (arg_ti.get_type() != arg_target_ti.get_type()) {
1343  arg_lv = codegenCast(orig_arg_lvs[k], arg_ti, arg_target_ti, false, co);
1344  } else {
1345  arg_lv = orig_arg_lvs[k];
1346  }
1347  CHECK_EQ(arg_lv->getType(),
1348  ext_arg_type_to_llvm_type(ext_func_arg, cgen_state_->context_));
1349  args.push_back(arg_lv);
1350  }
1351  }
1352  return args;
1353 }
1354 
1355 llvm::Value* CodeGenerator::castArrayPointer(llvm::Value* ptr,
1356  const SQLTypeInfo& elem_ti) {
1358  if (elem_ti.get_type() == kFLOAT) {
1359  return cgen_state_->ir_builder_.CreatePointerCast(
1360  ptr, llvm::Type::getFloatPtrTy(cgen_state_->context_));
1361  }
1362  if (elem_ti.get_type() == kDOUBLE) {
1363  return cgen_state_->ir_builder_.CreatePointerCast(
1364  ptr, llvm::Type::getDoublePtrTy(cgen_state_->context_));
1365  }
1366  CHECK(elem_ti.is_integer() || elem_ti.is_boolean() ||
1367  (elem_ti.is_string() && elem_ti.get_compression() == kENCODING_DICT));
1368  switch (elem_ti.get_size()) {
1369  case 1:
1370  return cgen_state_->ir_builder_.CreatePointerCast(
1371  ptr, llvm::Type::getInt8PtrTy(cgen_state_->context_));
1372  case 2:
1373  return cgen_state_->ir_builder_.CreatePointerCast(
1374  ptr, llvm::Type::getInt16PtrTy(cgen_state_->context_));
1375  case 4:
1376  return cgen_state_->ir_builder_.CreatePointerCast(
1377  ptr, llvm::Type::getInt32PtrTy(cgen_state_->context_));
1378  case 8:
1379  return cgen_state_->ir_builder_.CreatePointerCast(
1380  ptr, llvm::Type::getInt64PtrTy(cgen_state_->context_));
1381  default:
1382  CHECK(false);
1383  }
1384  return nullptr;
1385 }
llvm::StructType * createLineStringStructType(const std::string &udf_func_name, size_t param_num)
void codegenGeoMultiPolygonArgs(const std::string &udf_func_name, size_t param_num, llvm::Value *polygon_coords, llvm::Value *polygon_coords_size, llvm::Value *ring_sizes_buf, llvm::Value *ring_sizes, llvm::Value *polygon_bounds, llvm::Value *polygon_bounds_sizes, llvm::Value *compression, llvm::Value *input_srid, llvm::Value *output_srid, std::vector< llvm::Value * > &output_args)
#define CHECK_EQ(x, y)
Definition: Logger.h:205
llvm::StructType * get_buffer_struct_type(CgenState *cgen_state, const std::string &ext_func_name, size_t param_num, llvm::Type *elem_type, bool has_is_null)
HOST DEVICE int get_size() const
Definition: sqltypes.h:321
const std::vector< ExtArgumentType > & getArgs() const
std::string toString(const ExtArgumentType &sig_type)
llvm::BasicBlock * args_notnull_bb
size_t getArity() const
Definition: Analyzer.h:1360
SQLTypes
Definition: sqltypes.h:37
std::unique_ptr< llvm::Module > udf_gpu_module
bool is_ext_arg_type_geo(const ExtArgumentType ext_arg_type)
CgenState * cgen_state_
const ExtArgumentType getRet() const
void codegenGeoPolygonArgs(const std::string &udf_func_name, size_t param_num, llvm::Value *polygon_buf, llvm::Value *polygon_size, llvm::Value *ring_sizes_buf, llvm::Value *num_rings, llvm::Value *compression, llvm::Value *input_srid, llvm::Value *output_srid, std::vector< llvm::Value * > &output_args)
#define LOG(tag)
Definition: Logger.h:188
std::vector< llvm::Value * > codegenFunctionOperCastArgs(const Analyzer::FunctionOper *, const ExtensionFunction *, const std::vector< llvm::Value * > &, const std::vector< size_t > &, const std::unordered_map< llvm::Value *, llvm::Value * > &, const CompilationOptions &)
llvm::Value * emitExternalCall(const std::string &fname, llvm::Type *ret_type, const std::vector< llvm::Value * > args, const std::vector< llvm::Attribute::AttrKind > &fnattrs={}, const bool has_struct_return=false)
Definition: CgenState.h:222
llvm::Value * codegenFunctionOperNullArg(const Analyzer::FunctionOper *, const std::vector< llvm::Value * > &)
llvm::IRBuilder ir_builder_
Definition: CgenState.h:331
llvm::Value * posArg(const Analyzer::Expr *) const
Definition: ColumnIR.cpp:513
llvm::Value * castArrayPointer(llvm::Value *ptr, const SQLTypeInfo &elem_ti)
#define UNREACHABLE()
Definition: Logger.h:241
#define CHECK_GE(x, y)
Definition: Logger.h:210
Definition: sqldefs.h:49
llvm::Type * get_fp_type(const int width, llvm::LLVMContext &context)
llvm::StructType * createPointStructType(const std::string &udf_func_name, size_t param_num)
bool call_requires_custom_type_handling(const Analyzer::FunctionOper *function_oper)
const std::string getName(bool keep_suffix=true) const
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:311
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
bool ext_func_call_requires_nullcheck(const Analyzer::FunctionOper *function_oper)
SQLTypeInfo get_sql_type_from_llvm_type(const llvm::Type *ll_type)
std::vector< FunctionOperValue > ext_call_cache_
Definition: CgenState.h:337
void codegenBufferArgs(const std::string &udf_func_name, size_t param_num, llvm::Value *buffer_buf, llvm::Value *buffer_size, llvm::Value *buffer_is_null, std::vector< llvm::Value * > &output_args)
std::pair< llvm::Value *, llvm::Value * > codegenArrayBuff(llvm::Value *chunk, llvm::Value *row_pos, SQLTypes array_type, bool cast_and_extend)
llvm::Module * module_
Definition: CgenState.h:320
llvm::LLVMContext & context_
Definition: CgenState.h:329
llvm::Function * current_func_
Definition: CgenState.h:323
std::tuple< ArgNullcheckBBs, llvm::Value * > beginArgsNullcheck(const Analyzer::FunctionOper *function_oper, const std::vector< llvm::Value * > &orig_arg_lvs)
bool is_integer() const
Definition: sqltypes.h:480
llvm::ConstantInt * inlineIntNull(const SQLTypeInfo &)
Definition: CgenState.cpp:27
llvm::Value * codegenFunctionOper(const Analyzer::FunctionOper *, const CompilationOptions &)
llvm::Type * get_llvm_type_from_sql_array_type(const SQLTypeInfo ti, llvm::LLVMContext &ctx)
bool is_boolean() const
Definition: sqltypes.h:485
llvm::BasicBlock * args_null_bb
#define AUTOMATIC_IR_METADATA(CGENSTATE)
llvm::Type * ext_arg_type_to_llvm_type(const ExtArgumentType ext_arg_type, llvm::LLVMContext &ctx)
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:78
llvm::Value * emitCall(const std::string &fname, const std::vector< llvm::Value * > &args)
Definition: CgenState.cpp:137
bool is_buffer() const
Definition: sqltypes.h:495
ExecutorDeviceType device_type
void codegenGeoPointArgs(const std::string &udf_func_name, size_t param_num, llvm::Value *point_buf, llvm::Value *point_size, llvm::Value *compression, llvm::Value *input_srid, llvm::Value *output_srid, std::vector< llvm::Value * > &output_args)
bool is_ext_arg_type_array(const ExtArgumentType ext_arg_type)
std::vector< llvm::Value * > codegen(const Analyzer::Expr *, const bool fetch_columns, const CompilationOptions &)
Definition: IRCodegen.cpp:26
#define CHECK_LE(x, y)
Definition: Logger.h:208
HOST DEVICE EncodingType get_compression() const
Definition: sqltypes.h:319
std::string serialize_llvm_object(const T *llvm_obj)
bool is_ext_arg_type_scalar(const ExtArgumentType ext_arg_type)
llvm::StructType * createPolygonStructType(const std::string &udf_func_name, size_t param_num)
const Analyzer::Expr * getArg(const size_t i) const
Definition: Analyzer.h:1362
const Expr * get_operand() const
Definition: Analyzer.h:371
llvm::Value * endArgsNullcheck(const ArgNullcheckBBs &, llvm::Value *, llvm::Value *, const Analyzer::FunctionOper *)
std::unique_ptr< llvm::Module > udf_cpu_module
T bind_function(std::string name, Analyzer::ExpressionPtrVector func_args, const std::vector< T > &ext_funcs, const std::string processor)
HOST DEVICE int get_comp_param() const
Definition: sqltypes.h:320
llvm::Value * codegenFunctionOperWithCustomTypeHandling(const Analyzer::FunctionOperWithCustomTypeHandling *, const CompilationOptions &)
bool is_bytes() const
Definition: sqltypes.h:492
llvm::ConstantInt * llInt(const T v) const
Definition: CgenState.h:300
#define CHECK(condition)
Definition: Logger.h:197
llvm::Value * codegenIsNullNumber(llvm::Value *, const SQLTypeInfo &)
Definition: LogicalIR.cpp:401
uint64_t exp_to_scale(const unsigned exp)
llvm::Value * codegenCompression(const SQLTypeInfo &type_info)
llvm::Value * codegenCast(const Analyzer::UOper *, const CompilationOptions &)
Definition: CastIR.cpp:20
uint32_t log2_bytes(const uint32_t bytes)
Definition: Execute.h:172
Definition: sqltypes.h:44
bool is_string() const
Definition: sqltypes.h:478
std::string getName() const
Definition: Analyzer.h:1358
void codegenGeoLineStringArgs(const std::string &udf_func_name, size_t param_num, llvm::Value *line_string_buf, llvm::Value *line_string_size, llvm::Value *compression, llvm::Value *input_srid, llvm::Value *output_srid, std::vector< llvm::Value * > &output_args)
SQLTypeInfo get_elem_type() const
Definition: sqltypes.h:697
bool is_decimal() const
Definition: sqltypes.h:481
int get_physical_coord_cols() const
Definition: sqltypes.h:347
void register_buffer_with_executor_rsm(int64_t exec, int8_t *buffer)
SQLTypeInfo ext_arg_type_to_type_info(const ExtArgumentType ext_arg_type)
llvm::ConstantFP * inlineFpNull(const SQLTypeInfo &)
Definition: CgenState.cpp:65
bool is_ext_arg_type_pointer(const ExtArgumentType ext_arg_type)
Executor * executor() const
llvm::StructType * createMultiPolygonStructType(const std::string &udf_func_name, size_t param_num)