OmniSciDB  b24e664e58
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
TableFunctionCompilationContext.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2019 OmniSci, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
18 
19 #include <llvm/IR/Verifier.h>
20 #include <llvm/Support/raw_os_ostream.h>
21 #include <algorithm>
22 #include <boost/algorithm/string.hpp>
23 
25 
26 extern std::unique_ptr<llvm::Module> g_rt_module;
27 extern std::unique_ptr<llvm::Module> rt_udf_cpu_module;
28 
29 namespace {
30 
31 llvm::Function* generate_entry_point(const CgenState* cgen_state) {
32  auto& ctx = cgen_state->context_;
33  const auto pi8_type = llvm::PointerType::get(get_int_type(8, ctx), 0);
34  const auto ppi8_type = llvm::PointerType::get(pi8_type, 0);
35  const auto pi64_type = llvm::PointerType::get(get_int_type(64, ctx), 0);
36  const auto ppi64_type = llvm::PointerType::get(pi64_type, 0);
37  const auto i32_type = get_int_type(32, ctx);
38 
39  const auto func_type = llvm::FunctionType::get(
40  i32_type, {ppi8_type, pi64_type, ppi64_type, pi64_type}, false);
41 
42  auto func = llvm::Function::Create(func_type,
43  llvm::Function::ExternalLinkage,
44  "call_table_function",
45  cgen_state->module_);
46  auto arg_it = func->arg_begin();
47  const auto input_cols_arg = &*arg_it;
48  input_cols_arg->setName("input_col_buffers");
49  const auto input_row_count = &*(++arg_it);
50  input_row_count->setName("input_row_count");
51  const auto output_buffers = &*(++arg_it);
52  output_buffers->setName("output_buffers");
53  const auto output_row_count = &*(++arg_it);
54  output_row_count->setName("output_row_count");
55  return func;
56 }
57 
58 } // namespace
59 
61  : cgen_state_(std::make_unique<CgenState>(std::vector<InputTableInfo>{}, false)) {
62  auto cgen_state = cgen_state_.get();
64 
65  std::unique_ptr<llvm::Module> module(runtime_module_shallow_copy(cgen_state));
66  cgen_state->module_ = module.get();
67 
69  module_ = std::move(module);
70 }
71 
73  const CompilationOptions& co,
74  Executor* executor) {
75  generateEntryPoint(exe_unit);
78  }
79  finalize(co, executor);
80 }
81 
83  const TableFunctionExecutionUnit& exe_unit) {
85  auto arg_it = entry_point_func_->arg_begin();
86  const auto input_cols_arg = &*arg_it;
87  const auto input_row_count = &*(++arg_it);
88  const auto output_buffers_arg = &*(++arg_it);
89  const auto output_row_count_ptr = &*(++arg_it);
90 
91  auto cgen_state = cgen_state_.get();
93  auto& ctx = cgen_state->context_;
94 
95  const auto bb_entry = llvm::BasicBlock::Create(ctx, ".entry", entry_point_func_, 0);
96  cgen_state->ir_builder_.SetInsertPoint(bb_entry);
97 
98  const auto bb_exit = llvm::BasicBlock::Create(ctx, ".exit", entry_point_func_);
99 
100  const auto func_body_bb = llvm::BasicBlock::Create(
101  ctx, ".func_body", cgen_state->ir_builder_.GetInsertBlock()->getParent());
102  cgen_state->ir_builder_.SetInsertPoint(func_body_bb);
103 
104  auto col_heads = generate_column_heads_load(
105  exe_unit.input_exprs.size(), input_cols_arg, cgen_state->ir_builder_, ctx);
106  CHECK_EQ(exe_unit.input_exprs.size(), col_heads.size());
107 
108  std::vector<llvm::Value*> func_args;
109  for (size_t i = 0; i < exe_unit.input_exprs.size(); i++) {
110  const auto& expr = exe_unit.input_exprs[i];
111  const auto& ti = expr->get_type_info();
112  if (ti.is_fp()) {
113  func_args.push_back(cgen_state->ir_builder_.CreateBitCast(
114  col_heads[i], llvm::PointerType::get(get_fp_type(get_bit_width(ti), ctx), 0)));
115  } else if (ti.is_integer()) {
116  func_args.push_back(cgen_state->ir_builder_.CreateBitCast(
117  col_heads[i], llvm::PointerType::get(get_int_type(get_bit_width(ti), ctx), 0)));
118  } else {
119  throw std::runtime_error(
120  "Only integer and floating point columns are supported as inputs to table "
121  "functions.");
122  }
123  }
124 
125  func_args.push_back(input_row_count);
126  func_args.push_back(output_row_count_ptr);
127 
128  for (size_t i = 0; i < exe_unit.target_exprs.size(); i++) {
129  auto output_load = cgen_state->ir_builder_.CreateLoad(
130  cgen_state->ir_builder_.CreateGEP(output_buffers_arg, cgen_state_->llInt(i)));
131  const auto& ti = exe_unit.target_exprs[i]->get_type_info();
132  if (ti.is_fp()) {
133  func_args.push_back(cgen_state->ir_builder_.CreateBitCast(
134  output_load, llvm::PointerType::get(get_fp_type(get_bit_width(ti), ctx), 0)));
135  } else if (ti.is_integer()) {
136  func_args.push_back(cgen_state->ir_builder_.CreateBitCast(
137  output_load, llvm::PointerType::get(get_int_type(get_bit_width(ti), ctx), 0)));
138  } else {
139  throw std::runtime_error(
140  "Only integer and floating point columns are supported as outputs to table "
141  "functions.");
142  }
143  }
144 
145  auto func_name = exe_unit.table_func_name;
146  boost::algorithm::to_lower(func_name);
147  const auto table_func_return =
148  cgen_state->emitExternalCall(func_name, get_int_type(32, ctx), func_args);
149  table_func_return->setName("table_func_ret");
150  cgen_state->ir_builder_.SetInsertPoint(bb_exit);
151  cgen_state->ir_builder_.CreateRet(table_func_return);
152 
153  cgen_state->ir_builder_.SetInsertPoint(func_body_bb);
154  cgen_state->ir_builder_.CreateBr(bb_exit);
155 
156  cgen_state->ir_builder_.SetInsertPoint(bb_entry);
157  cgen_state->ir_builder_.CreateBr(func_body_bb);
158 
160 }
161 
164  std::vector<llvm::Type*> arg_types;
165  arg_types.reserve(entry_point_func_->arg_size());
166  std::for_each(entry_point_func_->arg_begin(),
167  entry_point_func_->arg_end(),
168  [&arg_types](const auto& arg) { arg_types.push_back(arg.getType()); });
169  CHECK_EQ(arg_types.size(), entry_point_func_->arg_size());
170 
171  auto cgen_state = cgen_state_.get();
172  CHECK(cgen_state);
173  auto& ctx = cgen_state->context_;
174 
175  std::vector<llvm::Type*> wrapper_arg_types(arg_types.size() + 1);
176  wrapper_arg_types[0] = llvm::PointerType::get(get_int_type(32, ctx), 0);
177  wrapper_arg_types[1] = arg_types[0];
178 
179  for (size_t i = 1; i < arg_types.size(); ++i) {
180  wrapper_arg_types[i + 1] = arg_types[i];
181  }
182 
183  auto wrapper_ft =
184  llvm::FunctionType::get(llvm::Type::getVoidTy(ctx), wrapper_arg_types, false);
185  kernel_func_ = llvm::Function::Create(wrapper_ft,
186  llvm::Function::ExternalLinkage,
187  "table_func_kernel",
188  cgen_state->module_);
189 
190  auto wrapper_bb_entry = llvm::BasicBlock::Create(ctx, ".entry", kernel_func_, 0);
191  llvm::IRBuilder<> b(ctx);
192  b.SetInsertPoint(wrapper_bb_entry);
193  std::vector<llvm::Value*> loaded_args = {kernel_func_->arg_begin() + 1};
194  for (size_t i = 2; i < wrapper_arg_types.size(); ++i) {
195  loaded_args.push_back(kernel_func_->arg_begin() + i);
196  }
197  auto error_lv = b.CreateCall(entry_point_func_, loaded_args);
198  b.CreateStore(error_lv, kernel_func_->arg_begin());
199  b.CreateRetVoid();
200 }
201 
203  Executor* executor) {
204  if (rt_udf_cpu_module != nullptr) {
205  /*
206  TODO 1: eliminate need for OverrideFromSrc
207  TODO 2: detect and link only the udf's that are needed
208  */
210  *module_,
211  cgen_state_.get(),
212  llvm::Linker::Flags::OverrideFromSrc);
213  }
214 
215  module_.release();
216  // Add code to cache?
217 
218  LOG(IR) << "Table Function Entry Point IR\n"
220 
222  LOG(IR) << "Table Function Kernel IR\n" << serialize_llvm_object(kernel_func_);
223 
224  CHECK(executor);
225  executor->initializeNVPTXBackend();
226  const auto cuda_mgr = executor->catalog_->getDataMgr().getCudaMgr();
227  CHECK(cuda_mgr);
228 
229  CodeGenerator::GPUTarget gpu_target{executor->nvptx_target_machine_.get(),
230  cuda_mgr,
231  executor->blockSize(),
232  cgen_state_.get(),
233  false};
234  gpu_code_ = std::make_unique<CodeGenerator::GPUCode>(
236  kernel_func_,
238  co,
239  gpu_target));
240  } else {
241  auto ee =
243  func_ptr = reinterpret_cast<FuncPtr>(ee->getPointerToFunction(entry_point_func_));
244  own_execution_engine_ = std::move(ee);
245  }
246 
247  LOG(IR) << "End of IR";
248 }
#define CHECK_EQ(x, y)
Definition: Logger.h:198
std::unique_ptr< llvm::Module > rt_udf_cpu_module
const std::string table_func_name
std::unique_ptr< llvm::Module > module(runtime_module_shallow_copy(cgen_state))
std::unique_ptr< llvm::Module > runtime_module_shallow_copy(CgenState *cgen_state)
std::vector< Analyzer::Expr * > input_exprs
void generateEntryPoint(const TableFunctionExecutionUnit &exe_unit)
#define LOG(tag)
Definition: Logger.h:185
llvm::Function * generate_entry_point(const CgenState *cgen_state)
static GPUCode generateNativeGPUCode(llvm::Function *func, llvm::Function *wrapper_func, const std::unordered_set< llvm::Function * > &live_funcs, const CompilationOptions &co, const GPUTarget &gpu_target)
int32_t(*)(const int8_t **input_cols, const int64_t *input_row_count, int64_t **out, int64_t *output_row_count) FuncPtr
std::unique_ptr< llvm::Module > module_
llvm::Type * get_fp_type(const int width, llvm::LLVMContext &context)
static ExecutionEngineWrapper generateNativeCPUCode(llvm::Function *func, const std::unordered_set< llvm::Function * > &live_funcs, const CompilationOptions &co)
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
false auto cgen_state
std::unique_ptr< CodeGenerator::GPUCode > gpu_code_
llvm::Module * module_
Definition: CgenState.h:264
void verify_function_ir(const llvm::Function *func)
size_t get_bit_width(const SQLTypeInfo &ti)
llvm::LLVMContext & context_
Definition: CgenState.h:267
CHECK(cgen_state)
std::unique_ptr< llvm::Module > g_rt_module
static void link_udf_module(const std::unique_ptr< llvm::Module > &udf_module, llvm::Module &module, CgenState *cgen_state, llvm::Linker::Flags flags=llvm::Linker::Flags::None)
ExecutorDeviceType device_type_
void finalize(const CompilationOptions &co, Executor *executor)
std::string serialize_llvm_object(const T *llvm_obj)
std::vector< llvm::Value * > generate_column_heads_load(const int num_columns, llvm::Value *byte_stream_arg, llvm::IRBuilder<> &ir_builder, llvm::LLVMContext &ctx)
std::vector< Analyzer::Expr * > target_exprs
void compile(const TableFunctionExecutionUnit &exe_unit, const CompilationOptions &co, Executor *executor)