21 #include <llvm/IR/InstIterator.h>
22 #include <llvm/Transforms/Utils/BasicBlockUtils.h>
23 #include <llvm/Transforms/Utils/Cloning.h>
26 const bool contains_left_deep_outer_join,
28 : executor_id_(executor->getExecutorId())
31 , filter_func_(nullptr)
32 , current_func_(nullptr)
33 , row_func_bb_(nullptr)
34 , filter_func_bb_(nullptr)
35 , row_func_call_(nullptr)
36 , filter_func_call_(nullptr)
37 , context_(executor->getContext())
38 , ir_builder_(context_)
39 , contains_left_deep_outer_join_(contains_left_deep_outer_join)
40 , outer_join_match_found_per_level_(std::max(num_query_infos, size_t(1)) - 1)
41 , needs_error_check_(
false)
43 , query_func_(nullptr)
44 , query_func_entry_ir_builder_(context_){};
47 const bool contains_left_deep_outer_join)
49 contains_left_deep_outer_join,
50 Executor::getExecutor(Executor::UNITARY_EXECUTOR_ID).get()) {}
53 : executor_id_(Executor::INVALID_EXECUTOR_ID)
57 , ir_builder_(context_)
58 , contains_left_deep_outer_join_(
false)
59 , needs_error_check_(
false)
61 , query_func_(nullptr)
62 , query_func_entry_ir_builder_(context_){};
71 return llInt(int64_t(0));
96 return llInt(int64_t(0));
120 const size_t byte_width,
121 const bool is_signed) {
122 int64_t max_int{0}, min_int{0};
126 uint64_t max_uint{0}, min_uint{0};
128 max_int =
static_cast<int64_t
>(max_uint);
131 switch (byte_width) {
133 return std::make_pair(::
ll_int(static_cast<int8_t>(max_int),
context_),
136 return std::make_pair(::
ll_int(static_cast<int16_t>(max_int),
context_),
139 return std::make_pair(::
ll_int(static_cast<int32_t>(max_int),
context_),
149 auto src_bits = val->getType()->getScalarSizeInBits();
150 if (src_bits == dst_bits) {
153 if (val->getType()->isIntegerTy()) {
158 if (val->getType()->isPointerTy()) {
162 CHECK(val->getType()->isFloatTy() || val->getType()->isDoubleTy());
167 dst_type = llvm::Type::getDoubleTy(
context_);
170 dst_type = llvm::Type::getFloatTy(
context_);
181 if (!fn->isDeclaration()) {
186 auto func_impl =
getExecutor()->get_rt_module()->getFunction(fn->getName());
187 CHECK(func_impl) << fn->getName().str();
189 if (func_impl->isDeclaration()) {
193 auto DestI = fn->arg_begin();
194 for (
auto arg_it = func_impl->arg_begin(); arg_it != func_impl->arg_end(); ++arg_it) {
195 DestI->setName(arg_it->getName());
196 vmap_[&*arg_it] = &*DestI++;
199 llvm::SmallVector<llvm::ReturnInst*, 8> Returns;
200 #if LLVM_VERSION_MAJOR > 12
201 llvm::CloneFunctionInto(
202 fn, func_impl,
vmap_, llvm::CloneFunctionChangeType::DifferentModule, Returns);
204 llvm::CloneFunctionInto(fn, func_impl,
vmap_,
true, Returns);
207 for (
auto it = llvm::inst_begin(fn), e = llvm::inst_end(fn); it != e; ++it) {
208 if (llvm::isa<llvm::CallInst>(*it)) {
209 auto& call = llvm::cast<llvm::CallInst>(*it);
216 const std::vector<llvm::Value*>&
args) {
218 auto func =
module_->getFunction(fname);
228 llvm::Value* errorCode,
233 ir_builder_.CreateCondBr(condition, check_ok, check_fail);
242 template <
typename T>
243 llvm::Type*
getTy(llvm::LLVMContext& ctx) {
return getTy<std::remove_pointer_t<T>>(ctx)->getPointerTo(); }
261 virtual llvm::FunctionCallee getFunction(llvm::Module* llvm_module,
262 llvm::LLVMContext& context)
const = 0;
266 template <
typename... TYPES>
271 llvm::LLVMContext& context)
const {
272 return llvm_module->getOrInsertFunction(name_, getTy<TYPES>(context)...);
276 static const std::unordered_map<std::string, std::shared_ptr<GpuFunctionDefinition>>
278 {
"asin", std::make_shared<GpuFunction<double, double>>(
"Asin")},
279 {
"atanh", std::make_shared<GpuFunction<double, double>>(
"Atanh")},
280 {
"atan", std::make_shared<GpuFunction<double, double>>(
"Atan")},
281 {
"cosh", std::make_shared<GpuFunction<double, double>>(
"Cosh")},
282 {
"cos", std::make_shared<GpuFunction<double, double>>(
"Cos")},
283 {
"exp", std::make_shared<GpuFunction<double, double>>(
"Exp")},
284 {
"log", std::make_shared<GpuFunction<double, double>>(
"ln")},
285 {
"pow", std::make_shared<GpuFunction<double, double, double>>(
"power")},
286 {
"sinh", std::make_shared<GpuFunction<double, double>>(
"Sinh")},
287 {
"sin", std::make_shared<GpuFunction<double, double>>(
"Sin")},
288 {
"sqrt", std::make_shared<GpuFunction<double, double>>(
"Sqrt")},
289 {
"tan", std::make_shared<GpuFunction<double, double>>(
"Tan")}};
293 std::vector<std::string> ret;
296 CHECK(!fn->isDeclaration());
298 for (
auto& basic_block : *fn) {
299 auto& inst_list = basic_block.getInstList();
300 for (
auto inst_itr = inst_list.begin(); inst_itr != inst_list.end(); ++inst_itr) {
301 if (
auto call_inst = llvm::dyn_cast<llvm::CallInst>(inst_itr)) {
302 auto called_fcn = call_inst->getCalledFunction();
307 ret.emplace_back(called_fcn->getName());
316 llvm::Function* fn) {
318 CHECK(!fn->isDeclaration());
322 throw QueryMustRunOnCpu(
"Codegen failed: Could not find replacement functon for " +
324 " to run on gpu. Query step must run in cpu mode.");
326 const auto& gpu_fcn_obj = map_it->second;
328 VLOG(1) <<
"Replacing " << fcn_to_replace <<
" with " << gpu_fcn_obj->name_
329 <<
" for parent function " << fn->getName().str();
331 for (
auto& basic_block : *fn) {
332 auto& inst_list = basic_block.getInstList();
333 for (
auto inst_itr = inst_list.begin(); inst_itr != inst_list.end(); ++inst_itr) {
334 if (
auto call_inst = llvm::dyn_cast<llvm::CallInst>(inst_itr)) {
335 auto called_fcn = call_inst->getCalledFunction();
338 if (called_fcn->getName() == fcn_to_replace) {
339 std::vector<llvm::Value*>
args;
340 std::vector<llvm::Type*> arg_types;
341 for (
auto& arg : call_inst->args()) {
342 arg_types.push_back(arg.get()->getType());
343 args.push_back(arg.get());
347 auto gpu_func_type = gpu_func.getFunctionType();
348 CHECK(gpu_func_type);
349 CHECK_EQ(gpu_func_type->getReturnType(), called_fcn->getReturnType());
350 llvm::ReplaceInstWithInst(call_inst,
351 llvm::CallInst::Create(gpu_func, args,
""));
371 llvm::CloneModule(*llvm_module,
vmap_, [always_clone](
const llvm::GlobalValue* gv) {
372 auto func = llvm::dyn_cast<llvm::Function>(gv);
376 return (func->getLinkage() == llvm::GlobalValue::LinkageTypes::PrivateLinkage ||
377 func->getLinkage() == llvm::GlobalValue::LinkageTypes::InternalLinkage ||
llvm::Value * castToTypeIn(llvm::Value *val, const size_t bit_width)
llvm::FunctionCallee getFunction(llvm::Module *llvm_module, llvm::LLVMContext &context) const
llvm::LLVMContext & getExecutorContext() const
std::shared_ptr< Executor > getExecutor() const
void maybeCloneFunctionRecursive(llvm::Function *fn)
llvm::ConstantInt * ll_int(const T v, llvm::LLVMContext &context)
llvm::IRBuilder ir_builder_
llvm::Type * getTy(llvm::LLVMContext &ctx)
HOST DEVICE SQLTypes get_type() const
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
GpuFunctionDefinition(char const *name)
llvm::Type * getTy< double >(llvm::LLVMContext &ctx)
static std::shared_ptr< Executor > getExecutor(const ExecutorId id, const std::string &debug_dir="", const std::string &debug_file="", const SystemParameters &system_parameters=SystemParameters())
static const std::unordered_map< std::string, std::shared_ptr< GpuFunctionDefinition > > gpu_replacement_functions
llvm::LLVMContext & context_
llvm::Function * current_func_
llvm::ConstantInt * inlineIntNull(const SQLTypeInfo &)
void replaceFunctionForGpu(const std::string &fcn_to_replace, llvm::Function *fn)
llvm::ConstantFP * llFp(const float v) const
std::vector< std::string > gpuFunctionsToReplace(llvm::Function *fn)
llvm::Value * emitCall(const std::string &fname, const std::vector< llvm::Value * > &args)
static const ExecutorId INVALID_EXECUTOR_ID
std::pair< uint64_t, uint64_t > inline_uint_max_min(const size_t byte_width)
llvm::Constant * inlineNull(const SQLTypeInfo &)
void set_module_shallow_copy(const std::unique_ptr< llvm::Module > &module, bool always_clone=false)
HOST DEVICE EncodingType get_compression() const
static bool alwaysCloneRuntimeFunction(const llvm::Function *func)
void emitErrorCheck(llvm::Value *condition, llvm::Value *errorCode, std::string label)
llvm::ConstantInt * llInt(const T v) const
bool g_enable_watchdog false
llvm::ValueToValueMapTy vmap_
int64_t inline_int_null_val(const SQL_TYPE_INFO &ti)
std::pair< int64_t, int64_t > inline_int_max_min(const size_t byte_width)
CgenState(const size_t num_query_infos, const bool contains_left_deep_outer_join, Executor *executor)
std::pair< llvm::ConstantInt *, llvm::ConstantInt * > inlineIntMaxMin(const size_t byte_width, const bool is_signed)
GpuFunction(char const *name)
llvm::ConstantFP * inlineFpNull(const SQLTypeInfo &)