21 #include <llvm/IR/InstIterator.h>
22 #include <llvm/Transforms/Utils/BasicBlockUtils.h>
23 #include <llvm/Transforms/Utils/Cloning.h>
26 const bool contains_left_deep_outer_join,
28 : executor_id_(executor->getExecutorId())
31 , filter_func_(nullptr)
32 , current_func_(nullptr)
33 , row_func_bb_(nullptr)
34 , filter_func_bb_(nullptr)
35 , row_func_call_(nullptr)
36 , filter_func_call_(nullptr)
37 , context_(executor->getContext())
38 , ir_builder_(context_)
39 , contains_left_deep_outer_join_(contains_left_deep_outer_join)
40 , outer_join_match_found_per_level_(std::max(num_query_infos, size_t(1)) - 1)
41 , needs_error_check_(
false)
43 , query_func_(nullptr)
44 , query_func_entry_ir_builder_(context_){};
47 const bool contains_left_deep_outer_join)
49 contains_left_deep_outer_join,
50 Executor::getExecutor(Executor::UNITARY_EXECUTOR_ID).get()) {}
53 : executor_id_(Executor::INVALID_EXECUTOR_ID)
57 , ir_builder_(context_)
58 , contains_left_deep_outer_join_(
false)
59 , needs_error_check_(
false)
61 , query_func_(nullptr)
62 , query_func_entry_ir_builder_(context_){};
71 return llInt(int64_t(0));
97 return llInt(int64_t(0));
121 const size_t byte_width,
122 const bool is_signed) {
123 int64_t max_int{0}, min_int{0};
127 uint64_t max_uint{0}, min_uint{0};
129 max_int =
static_cast<int64_t
>(max_uint);
132 switch (byte_width) {
134 return std::make_pair(::
ll_int(static_cast<int8_t>(max_int),
context_),
137 return std::make_pair(::
ll_int(static_cast<int16_t>(max_int),
context_),
140 return std::make_pair(::
ll_int(static_cast<int32_t>(max_int),
context_),
150 auto src_bits = val->getType()->getScalarSizeInBits();
151 if (src_bits == dst_bits) {
154 if (val->getType()->isIntegerTy()) {
159 if (val->getType()->isPointerTy()) {
163 CHECK(val->getType()->isFloatTy() || val->getType()->isDoubleTy());
168 dst_type = llvm::Type::getDoubleTy(
context_);
171 dst_type = llvm::Type::getFloatTy(
context_);
182 if (!fn->isDeclaration()) {
187 auto func_impl =
getExecutor()->get_rt_module()->getFunction(fn->getName());
188 CHECK(func_impl) << fn->getName().str();
190 if (func_impl->isDeclaration()) {
194 auto DestI = fn->arg_begin();
195 for (
auto arg_it = func_impl->arg_begin(); arg_it != func_impl->arg_end(); ++arg_it) {
196 DestI->setName(arg_it->getName());
197 vmap_[&*arg_it] = &*DestI++;
200 llvm::SmallVector<llvm::ReturnInst*, 8> Returns;
201 #if LLVM_VERSION_MAJOR > 12
202 llvm::CloneFunctionInto(
203 fn, func_impl,
vmap_, llvm::CloneFunctionChangeType::DifferentModule, Returns);
205 llvm::CloneFunctionInto(fn, func_impl,
vmap_,
true, Returns);
208 for (
auto it = llvm::inst_begin(fn), e = llvm::inst_end(fn); it != e; ++it) {
209 if (llvm::isa<llvm::CallInst>(*it)) {
210 auto& call = llvm::cast<llvm::CallInst>(*it);
217 const std::vector<llvm::Value*>&
args) {
219 auto func =
module_->getFunction(fname);
220 CHECK(func) << fname;
229 const std::vector<llvm::Value*>&
args) {
231 auto func =
module_->getFunction(fname);
241 llvm::Value* errorCode,
246 ir_builder_.CreateCondBr(condition, check_ok, check_fail);
255 template <
typename T>
256 llvm::Type*
getTy(llvm::LLVMContext& ctx) {
return getTy<std::remove_pointer_t<T>>(ctx)->getPointerTo(); }
274 virtual llvm::FunctionCallee getFunction(llvm::Module* llvm_module,
275 llvm::LLVMContext& context)
const = 0;
279 template <
typename... TYPES>
284 llvm::LLVMContext& context)
const {
285 return llvm_module->getOrInsertFunction(name_, getTy<TYPES>(context)...);
289 static const std::unordered_map<std::string, std::shared_ptr<GpuFunctionDefinition>>
291 {
"asin", std::make_shared<GpuFunction<double, double>>(
"Asin")},
292 {
"atanh", std::make_shared<GpuFunction<double, double>>(
"Atanh")},
293 {
"atan", std::make_shared<GpuFunction<double, double>>(
"Atan")},
294 {
"cosh", std::make_shared<GpuFunction<double, double>>(
"Cosh")},
295 {
"cos", std::make_shared<GpuFunction<double, double>>(
"Cos")},
296 {
"exp", std::make_shared<GpuFunction<double, double>>(
"Exp")},
297 {
"log", std::make_shared<GpuFunction<double, double>>(
"ln")},
298 {
"pow", std::make_shared<GpuFunction<double, double, double>>(
"power")},
299 {
"sinh", std::make_shared<GpuFunction<double, double>>(
"Sinh")},
300 {
"sin", std::make_shared<GpuFunction<double, double>>(
"Sin")},
301 {
"sqrt", std::make_shared<GpuFunction<double, double>>(
"Sqrt")},
302 {
"tan", std::make_shared<GpuFunction<double, double>>(
"Tan")}};
306 std::vector<std::string> ret;
309 CHECK(!fn->isDeclaration());
311 for (
auto& basic_block : *fn) {
312 auto& inst_list = basic_block.getInstList();
313 for (
auto inst_itr = inst_list.begin(); inst_itr != inst_list.end(); ++inst_itr) {
314 if (
auto call_inst = llvm::dyn_cast<llvm::CallInst>(inst_itr)) {
315 auto called_fcn = call_inst->getCalledFunction();
320 ret.emplace_back(called_fcn->getName());
329 llvm::Function* fn) {
331 CHECK(!fn->isDeclaration());
335 throw QueryMustRunOnCpu(
"Codegen failed: Could not find replacement functon for " +
337 " to run on gpu. Query step must run in cpu mode.");
339 const auto& gpu_fcn_obj = map_it->second;
341 VLOG(1) <<
"Replacing " << fcn_to_replace <<
" with " << gpu_fcn_obj->name_
342 <<
" for parent function " << fn->getName().str();
344 for (
auto& basic_block : *fn) {
345 auto& inst_list = basic_block.getInstList();
346 for (
auto inst_itr = inst_list.begin(); inst_itr != inst_list.end(); ++inst_itr) {
347 if (
auto call_inst = llvm::dyn_cast<llvm::CallInst>(inst_itr)) {
348 auto called_fcn = call_inst->getCalledFunction();
351 if (called_fcn->getName() == fcn_to_replace) {
352 std::vector<llvm::Value*>
args;
353 std::vector<llvm::Type*> arg_types;
354 for (
auto& arg : call_inst->args()) {
355 arg_types.push_back(arg.get()->getType());
356 args.push_back(arg.get());
360 auto gpu_func_type = gpu_func.getFunctionType();
361 CHECK(gpu_func_type);
362 CHECK_EQ(gpu_func_type->getReturnType(), called_fcn->getReturnType());
363 llvm::ReplaceInstWithInst(call_inst,
364 llvm::CallInst::Create(gpu_func, args,
""));
384 llvm::CloneModule(*llvm_module,
vmap_, [always_clone](
const llvm::GlobalValue* gv) {
385 auto func = llvm::dyn_cast<llvm::Function>(gv);
389 return (func->getLinkage() == llvm::GlobalValue::LinkageTypes::PrivateLinkage ||
390 func->getLinkage() == llvm::GlobalValue::LinkageTypes::InternalLinkage ||
397 const std::string& fname,
399 const std::vector<llvm::Value*>
args,
400 const std::vector<llvm::Attribute::AttrKind>& fnattrs,
401 const bool has_struct_return) {
402 std::vector<llvm::Type*> arg_types;
403 for (
const auto arg : args) {
405 arg_types.push_back(arg->getType());
407 auto func_ty = llvm::FunctionType::get(ret_type, arg_types,
false);
408 llvm::AttributeList attrs;
409 if (!fnattrs.empty()) {
410 std::vector<std::pair<unsigned, llvm::Attribute>> indexedAttrs;
411 indexedAttrs.reserve(fnattrs.size());
412 for (
auto attr : fnattrs) {
413 indexedAttrs.emplace_back(llvm::AttributeList::FunctionIndex,
414 llvm::Attribute::get(
context_, attr));
416 attrs = llvm::AttributeList::get(
context_,
417 {&indexedAttrs.front(), indexedAttrs.size()});
420 auto func_p =
module_->getOrInsertFunction(fname, func_ty, attrs);
422 auto callee = func_p.getCallee();
423 llvm::Function* func{
nullptr};
424 if (
auto callee_cast = llvm::dyn_cast<llvm::ConstantExpr>(callee)) {
427 CHECK(callee_cast->isCast());
428 CHECK_EQ(callee_cast->getNumOperands(), size_t(1));
429 func = llvm::dyn_cast<llvm::Function>(callee_cast->getOperand(0));
431 func = llvm::dyn_cast<llvm::Function>(callee);
434 llvm::FunctionType* func_type = func_p.getFunctionType();
436 if (has_struct_return) {
437 const auto arg_ti = func_type->getParamType(0);
438 CHECK(arg_ti->isPointerTy() && arg_ti->getPointerElementType()->isStructTy());
439 auto attr_list = func->getAttributes();
440 #if 14 <= LLVM_VERSION_MAJOR
441 llvm::AttrBuilder arr_arg_builder(
context_, attr_list.getParamAttrs(0));
443 llvm::AttrBuilder arr_arg_builder(attr_list.getParamAttributes(0));
445 arr_arg_builder.addAttribute(llvm::Attribute::StructRet);
446 func->addParamAttrs(0, arr_arg_builder);
450 CHECK_EQ(result->getType(), ret_type);
llvm::Value * castToTypeIn(llvm::Value *val, const size_t bit_width)
llvm::Value * emitEntryCall(const std::string &fname, const std::vector< llvm::Value * > &args)
llvm::FunctionCallee getFunction(llvm::Module *llvm_module, llvm::LLVMContext &context) const
llvm::LLVMContext & getExecutorContext() const
std::shared_ptr< Executor > getExecutor() const
void maybeCloneFunctionRecursive(llvm::Function *fn)
llvm::ConstantInt * ll_int(const T v, llvm::LLVMContext &context)
llvm::IRBuilder ir_builder_
llvm::Type * getTy(llvm::LLVMContext &ctx)
HOST DEVICE SQLTypes get_type() const
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
GpuFunctionDefinition(char const *name)
llvm::Type * getTy< double >(llvm::LLVMContext &ctx)
static std::shared_ptr< Executor > getExecutor(const ExecutorId id, const std::string &debug_dir="", const std::string &debug_file="", const SystemParameters &system_parameters=SystemParameters())
static const std::unordered_map< std::string, std::shared_ptr< GpuFunctionDefinition > > gpu_replacement_functions
llvm::LLVMContext & context_
llvm::Function * current_func_
llvm::Value * emitExternalCall(const std::string &fname, llvm::Type *ret_type, const std::vector< llvm::Value * > args, const std::vector< llvm::Attribute::AttrKind > &fnattrs={}, const bool has_struct_return=false)
llvm::ConstantInt * inlineIntNull(const SQLTypeInfo &)
void replaceFunctionForGpu(const std::string &fcn_to_replace, llvm::Function *fn)
llvm::ConstantFP * llFp(const float v) const
std::vector< std::string > gpuFunctionsToReplace(llvm::Function *fn)
llvm::IRBuilder query_func_entry_ir_builder_
llvm::Value * emitCall(const std::string &fname, const std::vector< llvm::Value * > &args)
static const ExecutorId INVALID_EXECUTOR_ID
std::pair< uint64_t, uint64_t > inline_uint_max_min(const size_t byte_width)
llvm::Constant * inlineNull(const SQLTypeInfo &)
void set_module_shallow_copy(const std::unique_ptr< llvm::Module > &module, bool always_clone=false)
HOST DEVICE EncodingType get_compression() const
static bool alwaysCloneRuntimeFunction(const llvm::Function *func)
void emitErrorCheck(llvm::Value *condition, llvm::Value *errorCode, std::string label)
llvm::ConstantInt * llInt(const T v) const
bool g_enable_watchdog false
llvm::ValueToValueMapTy vmap_
int64_t inline_int_null_val(const SQL_TYPE_INFO &ti)
std::pair< int64_t, int64_t > inline_int_max_min(const size_t byte_width)
CgenState(const size_t num_query_infos, const bool contains_left_deep_outer_join, Executor *executor)
std::pair< llvm::ConstantInt *, llvm::ConstantInt * > inlineIntMaxMin(const size_t byte_width, const bool is_signed)
GpuFunction(char const *name)
llvm::ConstantFP * inlineFpNull(const SQLTypeInfo &)