OmniSciDB  ba1bac9284
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
TableFunctionCompilationContext Class Reference

#include <TableFunctionCompilationContext.h>

+ Collaboration diagram for TableFunctionCompilationContext:

Public Types

using FuncPtr = int32_t(*)(const int8_t **input_cols, const int64_t *input_row_count, int64_t **out, int64_t *output_row_count)
 

Public Member Functions

 TableFunctionCompilationContext ()
 
 TableFunctionCompilationContext (const TableFunctionCompilationContext &)=delete
 
TableFunctionCompilationContextoperator= (const TableFunctionCompilationContext &)=delete
 
void compile (const TableFunctionExecutionUnit &exe_unit, const CompilationOptions &co, Executor *executor)
 
TableFunctionCompilationContext::FuncPtr getFuncPtr () const
 
GpuCompilationContextgetGpuCode () const
 

Private Member Functions

void generateEntryPoint (const TableFunctionExecutionUnit &exe_unit, bool is_gpu)
 
void generateGpuKernel ()
 
void finalize (const CompilationOptions &co, Executor *executor)
 

Private Attributes

std::unique_ptr< CgenStatecgen_state_
 
std::unique_ptr< llvm::Module > module_
 
ExecutionEngineWrapper own_execution_engine_
 
std::shared_ptr
< GpuCompilationContext
gpu_code_
 
llvm::Function * entry_point_func_
 
llvm::Function * kernel_func_
 
FuncPtr func_ptr
 

Detailed Description

Definition at line 29 of file TableFunctionCompilationContext.h.

Member Typedef Documentation

using TableFunctionCompilationContext::FuncPtr = int32_t (*)(const int8_t** input_cols, const int64_t* input_row_count, int64_t** out, int64_t* output_row_count)

Definition at line 45 of file TableFunctionCompilationContext.h.

Constructor & Destructor Documentation

TableFunctionCompilationContext::TableFunctionCompilationContext ( )

Definition at line 225 of file TableFunctionCompilationContext.cpp.

References cgen_state_, CHECK, entry_point_func_, anonymous_namespace{TableFunctionCompilationContext.cpp}::generate_entry_point(), module_, and runtime_module_shallow_copy().

226  : cgen_state_(std::make_unique<CgenState>(/*num_query_infos=*/0,
227  /*contains_left_deep_outer_join=*/false)) {
228  auto cgen_state = cgen_state_.get();
229  CHECK(cgen_state);
230 
231  std::unique_ptr<llvm::Module> module(runtime_module_shallow_copy(cgen_state));
232  cgen_state->module_ = module.get();
233 
235  module_ = std::move(module);
236 }
std::unique_ptr< llvm::Module > runtime_module_shallow_copy(CgenState *cgen_state)
llvm::Function * generate_entry_point(const CgenState *cgen_state)
std::unique_ptr< llvm::Module > module_
#define CHECK(condition)
Definition: Logger.h:206

+ Here is the call graph for this function:

TableFunctionCompilationContext::TableFunctionCompilationContext ( const TableFunctionCompilationContext )
delete

Member Function Documentation

void TableFunctionCompilationContext::compile ( const TableFunctionExecutionUnit exe_unit,
const CompilationOptions co,
Executor executor 
)

Definition at line 238 of file TableFunctionCompilationContext.cpp.

References CompilationOptions::device_type, finalize(), generateEntryPoint(), generateGpuKernel(), and GPU.

Referenced by Executor::executeTableFunction().

240  {
241  generateEntryPoint(exe_unit, /*is_gpu=*/co.device_type == ExecutorDeviceType::GPU);
244  }
245  finalize(co, executor);
246 }
void generateEntryPoint(const TableFunctionExecutionUnit &exe_unit, bool is_gpu)
ExecutorDeviceType device_type
void finalize(const CompilationOptions &co, Executor *executor)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void TableFunctionCompilationContext::finalize ( const CompilationOptions co,
Executor executor 
)
private

Definition at line 457 of file TableFunctionCompilationContext.cpp.

References cgen_state_, CHECK, CPU, CompilationOptions::device_type, entry_point_func_, func_ptr, CodeGenerator::generateNativeCPUCode(), CodeGenerator::generateNativeGPUCode(), GPU, gpu_code_, logger::IR, kernel_func_, CodeGenerator::link_udf_module(), LOG, module_, own_execution_engine_, rt_udf_cpu_module, rt_udf_gpu_module, and serialize_llvm_object().

Referenced by compile().

458  {
459  /*
460  TODO 1: eliminate need for OverrideFromSrc
461  TODO 2: detect and link only the udf's that are needed
462  */
463  if (co.device_type == ExecutorDeviceType::GPU && rt_udf_gpu_module != nullptr) {
465  *module_,
466  cgen_state_.get(),
467  llvm::Linker::Flags::OverrideFromSrc);
468  }
469  if (co.device_type == ExecutorDeviceType::CPU && rt_udf_cpu_module != nullptr) {
471  *module_,
472  cgen_state_.get(),
473  llvm::Linker::Flags::OverrideFromSrc);
474  }
475 
476  module_.release();
477  // Add code to cache?
478 
479  LOG(IR) << "Table Function Entry Point IR\n"
481 
483  LOG(IR) << "Table Function Kernel IR\n" << serialize_llvm_object(kernel_func_);
484 
485  CHECK(executor);
486  executor->initializeNVPTXBackend();
487  const auto cuda_mgr = executor->catalog_->getDataMgr().getCudaMgr();
488  CHECK(cuda_mgr);
489 
490  CodeGenerator::GPUTarget gpu_target{executor->nvptx_target_machine_.get(),
491  cuda_mgr,
492  executor->blockSize(),
493  cgen_state_.get(),
494  false};
496  kernel_func_,
498  co,
499  gpu_target);
500  } else {
501  auto ee =
503  func_ptr = reinterpret_cast<FuncPtr>(ee->getPointerToFunction(entry_point_func_));
504  own_execution_engine_ = std::move(ee);
505  }
506 
507  LOG(IR) << "End of IR";
508 }
std::unique_ptr< llvm::Module > rt_udf_cpu_module
#define LOG(tag)
Definition: Logger.h:200
std::unique_ptr< llvm::Module > rt_udf_gpu_module
std::shared_ptr< GpuCompilationContext > gpu_code_
int32_t(*)(const int8_t **input_cols, const int64_t *input_row_count, int64_t **out, int64_t *output_row_count) FuncPtr
std::unique_ptr< llvm::Module > module_
static ExecutionEngineWrapper generateNativeCPUCode(llvm::Function *func, const std::unordered_set< llvm::Function * > &live_funcs, const CompilationOptions &co)
static std::shared_ptr< GpuCompilationContext > generateNativeGPUCode(llvm::Function *func, llvm::Function *wrapper_func, const std::unordered_set< llvm::Function * > &live_funcs, const CompilationOptions &co, const GPUTarget &gpu_target)
static void link_udf_module(const std::unique_ptr< llvm::Module > &udf_module, llvm::Module &module, CgenState *cgen_state, llvm::Linker::Flags flags=llvm::Linker::Flags::None)
ExecutorDeviceType device_type
std::string serialize_llvm_object(const T *llvm_obj)
#define CHECK(condition)
Definition: Logger.h:206

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void TableFunctionCompilationContext::generateEntryPoint ( const TableFunctionExecutionUnit exe_unit,
bool  is_gpu 
)
private

Definition at line 248 of file TableFunctionCompilationContext.cpp.

References anonymous_namespace{TableFunctionCompilationContext.cpp}::alloc_column(), anonymous_namespace{TableFunctionCompilationContext.cpp}::alloc_column_list(), cgen_state_, CHECK, CHECK_EQ, entry_point_func_, generate_column_heads_load(), get_bit_width(), get_fp_type(), get_int_type(), table_functions::TableFunction::getName(), table_functions::TableFunction::hasTableFunctionSpecifiedParameter(), i, TableFunctionExecutionUnit::input_exprs, table_functions::TableFunction::isRuntime(), test_fsi::r, TableFunctionExecutionUnit::table_func, TableFunctionExecutionUnit::target_exprs, to_lower(), to_string(), and verify_function_ir().

Referenced by compile().

250  {
252  auto arg_it = entry_point_func_->arg_begin();
253  const auto input_cols_arg = &*arg_it;
254  const auto input_row_counts_arg = &*(++arg_it);
255  const auto output_buffers_arg = &*(++arg_it);
256  const auto output_row_count_ptr = &*(++arg_it);
257 
258  auto cgen_state = cgen_state_.get();
259  CHECK(cgen_state);
260  auto& ctx = cgen_state->context_;
261 
262  const auto bb_entry = llvm::BasicBlock::Create(ctx, ".entry", entry_point_func_, 0);
263  cgen_state->ir_builder_.SetInsertPoint(bb_entry);
264 
265  const auto bb_exit = llvm::BasicBlock::Create(ctx, ".exit", entry_point_func_);
266 
267  const auto func_body_bb = llvm::BasicBlock::Create(
268  ctx, ".func_body", cgen_state->ir_builder_.GetInsertBlock()->getParent());
269  cgen_state->ir_builder_.SetInsertPoint(func_body_bb);
270 
271  auto col_heads = generate_column_heads_load(
272  exe_unit.input_exprs.size(), input_cols_arg, cgen_state->ir_builder_, ctx);
273  CHECK_EQ(exe_unit.input_exprs.size(), col_heads.size());
274 
275  auto row_count_heads = generate_column_heads_load(
276  exe_unit.input_exprs.size(), input_row_counts_arg, cgen_state->ir_builder_, ctx);
277 
278  // The column arguments of C++ UDTFs processed by clang must be
279  // passed by reference, see rbc issues 200 and 289.
280  auto pass_column_by_value = exe_unit.table_func.isRuntime();
281  std::vector<llvm::Value*> func_args;
282  size_t func_arg_index = 0;
283  int col_index = -1;
284  for (size_t i = 0; i < exe_unit.input_exprs.size(); i++) {
285  const auto& expr = exe_unit.input_exprs[i];
286  const auto& ti = expr->get_type_info();
287  if (col_index == -1) {
288  func_arg_index += 1;
289  }
290  if (ti.is_fp()) {
291  auto r = cgen_state->ir_builder_.CreateBitCast(
292  col_heads[i], llvm::PointerType::get(get_fp_type(get_bit_width(ti), ctx), 0));
293  func_args.push_back(cgen_state->ir_builder_.CreateLoad(r));
294  CHECK_EQ(col_index, -1);
295  } else if (ti.is_integer()) {
296  auto r = cgen_state->ir_builder_.CreateBitCast(
297  col_heads[i], llvm::PointerType::get(get_int_type(get_bit_width(ti), ctx), 0));
298  func_args.push_back(cgen_state->ir_builder_.CreateLoad(r));
299  CHECK_EQ(col_index, -1);
300  } else if (ti.is_column()) {
301  auto [col, col_ptr] =
302  alloc_column(std::string("input_col.") + std::to_string(func_arg_index),
303  i,
304  ti.get_elem_type(),
305  col_heads[i],
306  row_count_heads[i],
307  ctx,
308  cgen_state_->ir_builder_);
309  func_args.push_back(
310  (pass_column_by_value ? cgen_state_->ir_builder_.CreateLoad(col) : col_ptr));
311  CHECK_EQ(col_index, -1);
312  } else if (ti.is_column_list()) {
313  if (col_index == -1) {
314  auto col_list = alloc_column_list(
315  std::string("input_col_list.") + std::to_string(func_arg_index),
316  ti.get_elem_type(),
317  col_heads[i],
318  ti.get_dimension(),
319  row_count_heads[i],
320  ctx,
321  cgen_state_->ir_builder_);
322  func_args.push_back(col_list);
323  }
324  col_index++;
325  if (col_index + 1 == ti.get_dimension()) {
326  col_index = -1;
327  }
328  } else {
329  throw std::runtime_error(
330  "Only integer and floating point columns or scalars are supported as inputs to "
331  "table "
332  "functions, got " +
333  ti.get_type_name());
334  }
335  }
336  std::vector<llvm::Value*> output_col_args;
337  for (size_t i = 0; i < exe_unit.target_exprs.size(); i++) {
338  auto output_load = cgen_state->ir_builder_.CreateLoad(
339  cgen_state->ir_builder_.CreateGEP(output_buffers_arg, cgen_state_->llInt(i)));
340  const auto& expr = exe_unit.target_exprs[i];
341  const auto& ti = expr->get_type_info();
342  CHECK(!ti.is_column()); // UDTF output column type is its data type
343  CHECK(!ti.is_column_list()); // TODO: when UDTF outputs column_list, convert it to
344  // output columns
345  auto [col, col_ptr] = alloc_column(
346  std::string("output_col.") + std::to_string(i),
347  i,
348  ti,
349  (is_gpu ? output_load : nullptr), // CPU: set_output_row_size will set the output
350  // Column ptr member
351  output_row_count_ptr,
352  ctx,
353  cgen_state_->ir_builder_);
354  if (!is_gpu) {
355  cgen_state->emitExternalCall(
356  "register_output_column",
357  llvm::Type::getVoidTy(ctx),
358  {llvm::ConstantInt::get(get_int_type(32, ctx), i, true), col_ptr});
359  }
360  if (pass_column_by_value) {
361  output_col_args.push_back(col);
362  } else {
363  func_args.push_back(col_ptr);
364  }
365  }
366 
367  // output column members must be set before loading column when
368  // column instances are passed by value
369  if (!exe_unit.table_func.hasTableFunctionSpecifiedParameter() && !is_gpu) {
370  cgen_state->emitExternalCall(
371  "set_output_row_size",
372  llvm::Type::getVoidTy(ctx),
373  {cgen_state_->ir_builder_.CreateLoad(output_row_count_ptr)});
374  }
375  if (pass_column_by_value) {
376  for (auto& col : output_col_args) {
377  func_args.push_back(cgen_state_->ir_builder_.CreateLoad(col));
378  }
379  }
380 
381  auto func_name = exe_unit.table_func.getName();
382  boost::algorithm::to_lower(func_name);
383  const auto table_func_return =
384  cgen_state->emitExternalCall(func_name, get_int_type(32, ctx), func_args);
385  table_func_return->setName("table_func_ret");
386 
387  // If table_func_return is non-negative then store the value in
388  // output_row_count and return zero. Otherwise, return
389  // table_func_return that negative value contains the error code.
390  const auto bb_exit_0 = llvm::BasicBlock::Create(ctx, ".exit0", entry_point_func_);
391 
392  auto const_zero = llvm::ConstantInt::get(table_func_return->getType(), 0, true);
393  auto is_ok = cgen_state_->ir_builder_.CreateICmpSGE(table_func_return, const_zero);
394  cgen_state_->ir_builder_.CreateCondBr(is_ok, bb_exit_0, bb_exit);
395 
396  cgen_state_->ir_builder_.SetInsertPoint(bb_exit_0);
397  auto r = cgen_state->ir_builder_.CreateIntCast(
398  table_func_return, get_int_type(64, ctx), true);
399  cgen_state->ir_builder_.CreateStore(r, output_row_count_ptr);
400  cgen_state->ir_builder_.CreateRet(const_zero);
401 
402  cgen_state->ir_builder_.SetInsertPoint(bb_exit);
403  cgen_state->ir_builder_.CreateRet(table_func_return);
404 
405  cgen_state->ir_builder_.SetInsertPoint(bb_entry);
406  cgen_state->ir_builder_.CreateBr(func_body_bb);
407 
408  /*
409  std::cout << "=================================" << std::endl;
410  entry_point_func_->print(llvm::outs());
411  std::cout << "=================================" << std::endl;
412  */
413 
415 }
std::string to_lower(const std::string &str)
#define CHECK_EQ(x, y)
Definition: Logger.h:214
std::tuple< llvm::Value *, llvm::Value * > alloc_column(std::string col_name, const size_t index, const SQLTypeInfo &data_target_info, llvm::Value *data_ptr, llvm::Value *data_size, llvm::LLVMContext &ctx, llvm::IRBuilder<> &ir_builder)
std::vector< Analyzer::Expr * > input_exprs
const table_functions::TableFunction table_func
tuple r
Definition: test_fsi.py:16
llvm::Type * get_fp_type(const int width, llvm::LLVMContext &context)
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
std::string to_string(char const *&&v)
void verify_function_ir(const llvm::Function *func)
size_t get_bit_width(const SQLTypeInfo &ti)
std::string getName(const bool drop_suffix=false, const bool lower=false) const
std::vector< llvm::Value * > generate_column_heads_load(const int num_columns, llvm::Value *byte_stream_arg, llvm::IRBuilder<> &ir_builder, llvm::LLVMContext &ctx)
llvm::Value * alloc_column_list(std::string col_list_name, const SQLTypeInfo &data_target_info, llvm::Value *data_ptrs, int length, llvm::Value *data_size, llvm::LLVMContext &ctx, llvm::IRBuilder<> &ir_builder)
#define CHECK(condition)
Definition: Logger.h:206
std::vector< Analyzer::Expr * > target_exprs

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void TableFunctionCompilationContext::generateGpuKernel ( )
private

Definition at line 417 of file TableFunctionCompilationContext.cpp.

References cgen_state_, CHECK, CHECK_EQ, entry_point_func_, get_int_type(), i, and kernel_func_.

Referenced by compile().

417  {
419  std::vector<llvm::Type*> arg_types;
420  arg_types.reserve(entry_point_func_->arg_size());
421  std::for_each(entry_point_func_->arg_begin(),
422  entry_point_func_->arg_end(),
423  [&arg_types](const auto& arg) { arg_types.push_back(arg.getType()); });
424  CHECK_EQ(arg_types.size(), entry_point_func_->arg_size());
425 
426  auto cgen_state = cgen_state_.get();
427  CHECK(cgen_state);
428  auto& ctx = cgen_state->context_;
429 
430  std::vector<llvm::Type*> wrapper_arg_types(arg_types.size() + 1);
431  wrapper_arg_types[0] = llvm::PointerType::get(get_int_type(32, ctx), 0);
432  wrapper_arg_types[1] = arg_types[0];
433 
434  for (size_t i = 1; i < arg_types.size(); ++i) {
435  wrapper_arg_types[i + 1] = arg_types[i];
436  }
437 
438  auto wrapper_ft =
439  llvm::FunctionType::get(llvm::Type::getVoidTy(ctx), wrapper_arg_types, false);
440  kernel_func_ = llvm::Function::Create(wrapper_ft,
441  llvm::Function::ExternalLinkage,
442  "table_func_kernel",
443  cgen_state->module_);
444 
445  auto wrapper_bb_entry = llvm::BasicBlock::Create(ctx, ".entry", kernel_func_, 0);
446  llvm::IRBuilder<> b(ctx);
447  b.SetInsertPoint(wrapper_bb_entry);
448  std::vector<llvm::Value*> loaded_args = {kernel_func_->arg_begin() + 1};
449  for (size_t i = 2; i < wrapper_arg_types.size(); ++i) {
450  loaded_args.push_back(kernel_func_->arg_begin() + i);
451  }
452  auto error_lv = b.CreateCall(entry_point_func_, loaded_args);
453  b.CreateStore(error_lv, kernel_func_->arg_begin());
454  b.CreateRetVoid();
455 }
#define CHECK_EQ(x, y)
Definition: Logger.h:214
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
#define CHECK(condition)
Definition: Logger.h:206

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

TableFunctionCompilationContext::FuncPtr TableFunctionCompilationContext::getFuncPtr ( ) const
inline

Definition at line 46 of file TableFunctionCompilationContext.h.

References func_ptr.

Referenced by TableFunctionExecutionContext::launchCpuCode().

+ Here is the caller graph for this function:

GpuCompilationContext* TableFunctionCompilationContext::getGpuCode ( ) const
inline

Definition at line 48 of file TableFunctionCompilationContext.h.

References gpu_code_.

Referenced by TableFunctionExecutionContext::launchGpuCode().

48 { return gpu_code_.get(); }
std::shared_ptr< GpuCompilationContext > gpu_code_

+ Here is the caller graph for this function:

TableFunctionCompilationContext& TableFunctionCompilationContext::operator= ( const TableFunctionCompilationContext )
delete

Member Data Documentation

std::unique_ptr<CgenState> TableFunctionCompilationContext::cgen_state_
private
llvm::Function* TableFunctionCompilationContext::entry_point_func_
private
FuncPtr TableFunctionCompilationContext::func_ptr
private

Definition at line 61 of file TableFunctionCompilationContext.h.

Referenced by finalize(), and getFuncPtr().

std::shared_ptr<GpuCompilationContext> TableFunctionCompilationContext::gpu_code_
private

Definition at line 58 of file TableFunctionCompilationContext.h.

Referenced by finalize(), and getGpuCode().

llvm::Function* TableFunctionCompilationContext::kernel_func_
private

Definition at line 60 of file TableFunctionCompilationContext.h.

Referenced by finalize(), and generateGpuKernel().

std::unique_ptr<llvm::Module> TableFunctionCompilationContext::module_
private

Definition at line 56 of file TableFunctionCompilationContext.h.

Referenced by finalize(), and TableFunctionCompilationContext().

ExecutionEngineWrapper TableFunctionCompilationContext::own_execution_engine_
private

Definition at line 57 of file TableFunctionCompilationContext.h.

Referenced by finalize().


The documentation for this class was generated from the following files: