30 std::static_pointer_cast<Analyzer::ColumnVar>(column->
deep_copy()));
38 result.insert(next_result.begin(), next_result.end());
60 UsedColumnExpressions visitor;
61 const auto used_columns = visitor.visit(expr);
62 std::list<std::shared_ptr<const InputColDescriptor>> global_col_ids;
63 for (
const auto& used_column : used_columns) {
64 global_col_ids.push_back(std::make_shared<InputColDescriptor>(
65 used_column.first.getColId(),
66 used_column.first.getScanDesc().getTableId(),
67 used_column.first.getScanDesc().getNestLevel()));
75 const bool fetch_columns,
80 const auto used_columns =
prepare(expr);
81 std::vector<llvm::Type*> arg_types(plan_state_->global_to_local_col_ids_.size() + 1);
82 std::vector<std::shared_ptr<Analyzer::ColumnVar>> inputs(arg_types.size() - 1);
83 auto& ctx =
module_->getContext();
84 for (
const auto& kv : plan_state_->global_to_local_col_ids_) {
85 size_t arg_idx = kv.second;
87 const auto it = used_columns.find(kv.first);
88 const auto col_expr = it->second;
89 inputs[arg_idx] = col_expr;
90 const auto& ti = col_expr->get_type_info();
95 auto ft = llvm::FunctionType::get(
get_int_type(32, ctx), arg_types,
false);
96 auto scalar_expr_func = llvm::Function::Create(
97 ft, llvm::Function::ExternalLinkage,
"scalar_expr",
module_.get());
98 auto bb_entry = llvm::BasicBlock::Create(ctx,
".entry", scalar_expr_func, 0);
105 const auto expr_lvs =
codegen(expr, fetch_columns, co);
106 CHECK_EQ(expr_lvs.size(), size_t(1));
111 std::vector<llvm::Type*> wrapper_arg_types(arg_types.size() + 1);
112 wrapper_arg_types[0] = llvm::PointerType::get(
get_int_type(32, ctx), 0);
113 wrapper_arg_types[1] = arg_types[0];
114 for (
size_t i = 1;
i < arg_types.size(); ++
i) {
115 wrapper_arg_types[
i + 1] = llvm::PointerType::get(arg_types[
i], 0);
118 llvm::FunctionType::get(llvm::Type::getVoidTy(ctx), wrapper_arg_types,
false);
119 auto wrapper_scalar_expr_func =
120 llvm::Function::Create(wrapper_ft,
121 llvm::Function::ExternalLinkage,
122 "wrapper_scalar_expr",
124 auto wrapper_bb_entry =
125 llvm::BasicBlock::Create(ctx,
".entry", wrapper_scalar_expr_func, 0);
126 llvm::IRBuilder<> b(ctx);
127 b.SetInsertPoint(wrapper_bb_entry);
128 std::vector<llvm::Value*> loaded_args = {wrapper_scalar_expr_func->arg_begin() + 1};
129 for (
size_t i = 2;
i < wrapper_arg_types.size(); ++
i) {
130 loaded_args.push_back(b.CreateLoad(wrapper_scalar_expr_func->arg_begin() +
i));
132 auto error_lv = b.CreateCall(scalar_expr_func, loaded_args);
133 b.CreateStore(error_lv, wrapper_scalar_expr_func->arg_begin());
135 return {scalar_expr_func, wrapper_scalar_expr_func, inputs};
137 return {scalar_expr_func,
nullptr, inputs};
156 LOG(
FATAL) <<
"Invalid device type";
164 const bool fetch_column,
173 llvm::Function* func,
174 llvm::Function* wrapper_func,
181 cuda_mgr_ = std::make_unique<CudaMgr_Namespace::CudaMgr>(0);
183 const auto& dev_props =
cuda_mgr_->getAllDeviceProperties();
184 int block_size = dev_props.front().maxThreadsPerBlock;
192 func, wrapper_func, {func, wrapper_func}, co, gpu_target);
ScalarCodeGenerator::ColumnMap visitColumnVar(const Analyzer::ColumnVar *column) const override
CompiledExpression compile(const Analyzer::Expr *expr, const bool fetch_columns, const CompilationOptions &co)
std::vector< llvm::Value * > codegenColumn(const Analyzer::ColumnVar *, const bool fetch_column, const CompilationOptions &) override
std::vector< void * > generateNativeCode(const CompiledExpression &compiled_expression, const CompilationOptions &co)
llvm::Type * llvm_type_from_sql(const SQLTypeInfo &ti, llvm::LLVMContext &ctx)
std::unique_ptr< PlanState > own_plan_state_
ExecutionEngineWrapper execution_engine_
llvm::ExecutionEngine * get()
llvm::IRBuilder ir_builder_
std::unique_ptr< llvm::TargetMachine > nvptx_target_machine_
std::shared_ptr< GpuCompilationContext > gpu_compilation_context_
std::shared_ptr< Analyzer::Expr > deep_copy() const override
static ExecutionEngineWrapper generateNativeCPUCode(llvm::Function *func, const std::unordered_set< llvm::Function * > &live_funcs, const CompilationOptions &co)
HOST DEVICE SQLTypes get_type() const
std::unordered_map< InputColDescriptor, std::shared_ptr< Analyzer::ColumnVar >> ColumnMap
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
llvm::Function * wrapper_func
const CudaMgr_Namespace::CudaMgr * cuda_mgr
llvm::Function * row_func_
std::unique_ptr< CudaMgr_Namespace::CudaMgr > cuda_mgr_
static std::shared_ptr< GpuCompilationContext > generateNativeGPUCode(llvm::Function *func, llvm::Function *wrapper_func, const std::unordered_set< llvm::Function * > &live_funcs, const CompilationOptions &co, const GPUTarget &gpu_target)
llvm::TargetMachine * nvptx_target_machine
int getLocalColumnId(const Analyzer::ColumnVar *col_var, const bool fetch_column)
std::unordered_map< TableId, const ColumnDescriptor * > DeletedColumnsMap
std::vector< InputTableInfo > g_table_infos
bool row_func_not_inlined
ScalarCodeGenerator::ColumnMap aggregateResult(const ScalarCodeGenerator::ColumnMap &aggregate, const ScalarCodeGenerator::ColumnMap &next_result) const override
void allocateLocalColumnIds(const std::list< std::shared_ptr< const InputColDescriptor >> &global_col_ids)
const SQLTypeInfo & get_type_info() const
ExecutorDeviceType device_type
std::vector< llvm::Value * > codegen(const Analyzer::Expr *, const bool fetch_columns, const CompilationOptions &)
ColumnMap prepare(const Analyzer::Expr *)
std::unique_ptr< CgenState > own_cgen_state_
std::vector< void * > generateNativeGPUCode(llvm::Function *func, llvm::Function *wrapper_func, const CompilationOptions &co)
std::unique_ptr< llvm::Module > module_
int get_column_id() const
static std::unique_ptr< llvm::TargetMachine > initializeNVPTXBackend(const CudaMgr_Namespace::NvidiaDeviceArch arch)