32 #define LL_CONTEXT executor->cgen_state_->context_
33 #define LL_BUILDER executor->cgen_state_->ir_builder_
34 #define LL_BOOL(v) executor->ll_bool(v)
35 #define LL_INT(v) executor->cgen_state_->llInt(v)
36 #define LL_FP(v) executor->cgen_state_->llFp(v)
37 #define ROW_FUNC executor->cgen_state_->row_func_
44 if (chosen_type.is_geometry()) {
45 return std::vector<std::string>(2 * chosen_type.get_physical_coord_cols(),
48 if (chosen_type.is_varlen()) {
49 return {
"agg_id",
"agg_id"};
55 return {
"agg_sum",
"agg_count"};
57 return {target_info.
is_distinct ?
"agg_count_distinct" :
"agg_count"};
65 return {
"agg_approximate_count_distinct"};
67 return {
"agg_approx_median"};
69 return {
"checked_single_agg_id"};
100 const std::tuple<llvm::Value*, llvm::Value*>& agg_out_ptr_w_idx_in,
101 const std::vector<llvm::Value*>& agg_out_vec,
102 llvm::Value* output_buffer_byte_stream,
103 llvm::Value* out_row_idx,
106 CHECK(group_by_and_agg);
110 auto agg_out_ptr_w_idx = agg_out_ptr_w_idx_in;
118 ? std::vector<llvm::Value*>{executor->codegenWindowFunction(
target_idx, co)}
120 const auto window_row_ptr = window_func
122 window_func, query_mem_desc, co, diamond_codegen)
124 if (window_row_ptr) {
126 std::make_tuple(window_row_ptr, std::get<1>(agg_out_ptr_w_idx_in));
128 out_row_idx = window_row_ptr;
132 llvm::Value* str_target_lv{
nullptr};
137 str_target_lv = target_lvs.front();
138 target_lvs.erase(target_lvs.begin());
144 if (target_lvs.size() < agg_fn_names.size()) {
145 CHECK_EQ(target_lvs.size(), agg_fn_names.size() / 2);
146 std::vector<llvm::Value*> new_target_lvs;
147 new_target_lvs.reserve(agg_fn_names.size());
148 for (
const auto& target_lv : target_lvs) {
149 new_target_lvs.push_back(target_lv);
150 new_target_lvs.push_back(target_lv);
152 target_lvs = new_target_lvs;
155 if (target_lvs.size() < agg_fn_names.size()) {
156 CHECK_EQ(
size_t(1), target_lvs.size());
157 CHECK_EQ(
size_t(2), agg_fn_names.size());
158 for (
size_t i = 1;
i < agg_fn_names.size(); ++
i) {
159 target_lvs.push_back(target_lvs.front());
166 CHECK_EQ(agg_fn_names.size(), target_lvs.size());
169 CHECK(str_target_lv || (agg_fn_names.size() == target_lvs.size()));
170 CHECK(target_lvs.size() == 1 || target_lvs.size() == 2);
181 (!arg_expr || arg_expr->get_type_info().get_notnull())) {
182 CHECK_EQ(
size_t(1), agg_fn_names.size());
184 llvm::Value* agg_col_ptr{
nullptr};
188 CHECK_EQ(
size_t(0), col_off % chosen_bytes);
189 col_off /= chosen_bytes;
190 CHECK(std::get<1>(agg_out_ptr_w_idx));
195 std::get<0>(agg_out_ptr_w_idx),
200 CHECK_EQ(
size_t(0), col_off % chosen_bytes);
201 col_off /= chosen_bytes;
204 std::get<0>(agg_out_ptr_w_idx),
210 if (chosen_bytes !=
sizeof(int32_t)) {
213 const auto acc_i64 =
LL_BUILDER.CreateBitCast(
214 is_group_by ? agg_col_ptr : agg_out_vec[slot_index],
218 "agg_count_shared", std::vector<llvm::Value*>{acc_i64,
LL_INT(int64_t(1))});
220 LL_BUILDER.CreateAtomicRMW(llvm::AtomicRMWInst::Add,
223 llvm::AtomicOrdering::Monotonic);
227 is_group_by ? agg_col_ptr : agg_out_vec[slot_index],
231 acc_i32, llvm::Type::getInt32PtrTy(
LL_CONTEXT, 3));
233 LL_BUILDER.CreateAtomicRMW(llvm::AtomicRMWInst::Add,
236 llvm::AtomicOrdering::Monotonic);
239 const auto acc_i32 = (
is_group_by ? agg_col_ptr : agg_out_vec[slot_index]);
242 const auto shared_acc_i32 =
LL_BUILDER.CreatePointerCast(
243 acc_i32, llvm::Type::getInt32PtrTy(
LL_CONTEXT, 3));
244 LL_BUILDER.CreateAtomicRMW(llvm::AtomicRMWInst::Add,
247 llvm::AtomicOrdering::Monotonic);
249 LL_BUILDER.CreateAtomicRMW(llvm::AtomicRMWInst::Add,
252 llvm::AtomicOrdering::Monotonic);
265 output_buffer_byte_stream,
275 const std::vector<llvm::Value*>& target_lvs,
276 const std::tuple<llvm::Value*, llvm::Value*>& agg_out_ptr_w_idx,
277 const std::vector<llvm::Value*>& agg_out_vec,
278 llvm::Value* output_buffer_byte_stream,
279 llvm::Value* out_row_idx,
280 int32_t slot_index)
const {
282 size_t target_lv_idx = 0;
283 const bool lazy_fetched{executor->plan_state_->isLazyFetchColumn(
target_expr)};
290 for (
const auto& agg_base_name : agg_fn_names) {
296 const auto& elem_ti = arg_expr->get_type_info().get_elem_type();
300 CHECK_EQ(
size_t(0), col_off_in_bytes %
sizeof(int64_t));
301 col_off /=
sizeof(int64_t);
303 executor->cgen_state_->emitExternalCall(
308 : agg_out_vec[slot_index],
309 target_lvs[target_lv_idx],
310 code_generator.
posArg(arg_expr),
312 ?
static_cast<llvm::Value*
>(executor->cgen_state_->inlineFpNull(elem_ti))
313 : static_cast<llvm::Value*>(
314 executor->cgen_state_->inlineIntNull(elem_ti))});
320 llvm::Value* agg_col_ptr{
nullptr};
321 const auto chosen_bytes =
324 const auto& arg_type =
325 ((arg_expr && arg_expr->get_type_info().get_type() !=
kNULLT) &&
329 const bool is_fp_arg =
330 !lazy_fetched && arg_type.
get_type() !=
kNULLT && arg_type.is_fp();
340 agg_col_ptr->setName(
"agg_col_ptr");
346 const auto agg_chosen_bytes =
347 float_argument_input && !is_count_in_avg ?
sizeof(float) : chosen_bytes;
348 if (float_argument_input) {
349 CHECK_GE(chosen_bytes,
sizeof(
float));
352 auto target_lv = target_lvs[target_lv_idx];
356 if (!needs_unnest_double_patch) {
359 }
else if (is_fp_arg) {
362 if (!dynamic_cast<const Analyzer::AggExpr*>(
target_expr) || arg_expr) {
364 executor->cgen_state_->castToTypeIn(target_lv, (agg_chosen_bytes << 3));
369 llvm::Value* str_target_lv{
nullptr};
372 str_target_lv = target_lvs.front();
374 std::vector<llvm::Value*> agg_args{
375 executor->castToIntPtrTyIn((
is_group_by ? agg_col_ptr : agg_out_vec[slot_index]),
376 (agg_chosen_bytes << 3)),
377 (is_simple_count_target && !arg_expr)
378 ? (agg_chosen_bytes ==
sizeof(int32_t) ?
LL_INT(int32_t(0))
380 : (is_simple_count_target && arg_expr && str_target_lv ? str_target_lv
383 if (is_simple_count_target && arg_expr && str_target_lv) {
385 agg_chosen_bytes ==
sizeof(int32_t) ?
LL_INT(int32_t(0)) :
LL_INT(int64_t(0));
388 std::string agg_fname{agg_base_name};
391 if (agg_chosen_bytes ==
sizeof(
float)) {
393 agg_fname +=
"_float";
395 CHECK_EQ(agg_chosen_bytes,
sizeof(
double));
396 agg_fname +=
"_double";
399 }
else if (agg_chosen_bytes ==
sizeof(int32_t)) {
400 agg_fname +=
"_int32";
401 }
else if (agg_chosen_bytes ==
sizeof(int16_t) &&
403 agg_fname +=
"_int16";
404 }
else if (agg_chosen_bytes ==
sizeof(int8_t) && query_mem_desc.
didOutputColumnar()) {
405 agg_fname +=
"_int8";
409 CHECK_EQ(agg_chosen_bytes,
sizeof(int64_t));
410 CHECK(!chosen_type.is_fp());
414 CHECK_EQ(agg_chosen_bytes,
sizeof(int64_t));
419 if (need_skip_null && !arg_ti.is_geometry()) {
420 agg_fname +=
"_skip_val";
424 (need_skip_null && !arg_ti.is_geometry())) {
425 llvm::Value* null_in_lv{
nullptr};
426 if (arg_ti.is_fp()) {
428 static_cast<llvm::Value*
>(executor->cgen_state_->inlineFpNull(arg_ti));
430 null_in_lv =
static_cast<llvm::Value*
>(executor->cgen_state_->inlineIntNull(
437 executor->cgen_state_->castToTypeIn(null_in_lv, (agg_chosen_bytes << 3));
438 agg_args.push_back(null_lv);
443 agg_fname +=
"_shared";
444 if (needs_unnest_double_patch) {
448 auto agg_fname_call_ret_lv = group_by_and_agg->
emitCall(agg_fname, agg_args);
450 if (agg_fname.find(
"checked") != std::string::npos) {
457 const auto window_func_context =
459 const auto pending_outputs =
460 LL_INT(window_func_context->aggregateStatePendingOutputs());
461 executor->cgen_state_->emitExternalCall(
"add_window_pending_output",
463 {agg_args.front(), pending_outputs});
464 const auto& window_func_ti = window_func->get_type_info();
465 std::string apply_window_pending_outputs_name =
"apply_window_pending_outputs";
466 switch (window_func_ti.get_type()) {
468 apply_window_pending_outputs_name +=
"_float";
470 apply_window_pending_outputs_name +=
"_columnar";
475 apply_window_pending_outputs_name +=
"_double";
479 apply_window_pending_outputs_name +=
"_int";
481 apply_window_pending_outputs_name +=
484 apply_window_pending_outputs_name +=
"64";
489 const auto partition_end =
490 LL_INT(reinterpret_cast<int64_t>(window_func_context->partitionEnd()));
491 executor->cgen_state_->emitExternalCall(apply_window_pending_outputs_name,
496 code_generator.
posArg(
nullptr)});
505 const Executor* executor,
509 CHECK(!dynamic_cast<const Analyzer::AggExpr*>(target_expr));
510 ++slot_index_counter;
511 ++target_index_counter;
514 if (dynamic_cast<const Analyzer::UOper*>(target_expr) &&
515 static_cast<const Analyzer::UOper*>(target_expr)->get_optype() ==
kUNNEST) {
516 throw std::runtime_error(
"UNNEST not supported in the projection list yet.");
518 if ((executor->plan_state_->isLazyFetchColumn(target_expr) || !
is_group_by) &&
527 auto arg_expr =
agg_arg(target_expr);
534 !arg_expr->get_type_info().is_varlen()) {
547 sample_exprs_to_codegen.emplace_back(target_expr,
550 target_index_counter++,
553 target_exprs_to_codegen.emplace_back(target_expr,
556 target_index_counter++,
561 slot_index_counter += agg_fn_names.size();
587 const std::tuple<llvm::Value*, llvm::Value*>& agg_out_ptr_w_idx,
588 const std::vector<llvm::Value*>& agg_out_vec,
589 llvm::Value* output_buffer_byte_stream,
590 llvm::Value* out_row_idx,
592 CHECK(group_by_and_agg);
596 for (
const auto& target_expr_codegen : target_exprs_to_codegen) {
597 target_expr_codegen.codegen(group_by_and_agg,
604 output_buffer_byte_stream,
608 if (!sample_exprs_to_codegen.empty()) {
609 codegenSampleExpressions(group_by_and_agg,
615 output_buffer_byte_stream,
626 const std::tuple<llvm::Value*, llvm::Value*>& agg_out_ptr_w_idx,
627 const std::vector<llvm::Value*>& agg_out_vec,
628 llvm::Value* output_buffer_byte_stream,
629 llvm::Value* out_row_idx,
632 CHECK(!sample_exprs_to_codegen.empty());
634 if (sample_exprs_to_codegen.size() == 1 &&
635 !sample_exprs_to_codegen.front().target_info.sql_type.is_varlen()) {
636 codegenSingleSlotSampleExpression(group_by_and_agg,
642 output_buffer_byte_stream,
646 codegenMultiSlotSampleExpressions(group_by_and_agg,
652 output_buffer_byte_stream,
663 const std::tuple<llvm::Value*, llvm::Value*>& agg_out_ptr_w_idx,
664 const std::vector<llvm::Value*>& agg_out_vec,
665 llvm::Value* output_buffer_byte_stream,
666 llvm::Value* out_row_idx,
669 CHECK_EQ(
size_t(1), sample_exprs_to_codegen.size());
670 CHECK(!sample_exprs_to_codegen.front().target_info.sql_type.is_varlen());
673 sample_exprs_to_codegen.front().codegen(group_by_and_agg,
680 output_buffer_byte_stream,
690 const std::tuple<llvm::Value*, llvm::Value*>& agg_out_ptr_w_idx,
691 const std::vector<llvm::Value*>& agg_out_vec,
692 llvm::Value* output_buffer_byte_stream,
693 llvm::Value* out_row_idx,
696 CHECK(sample_exprs_to_codegen.size() > 1 ||
697 sample_exprs_to_codegen.front().target_info.sql_type.is_varlen());
699 const auto& first_sample_expr = sample_exprs_to_codegen.front();
700 auto target_lvs = group_by_and_agg->
codegenAggArg(first_sample_expr.target_expr, co);
701 CHECK_GE(target_lvs.size(), size_t(1));
703 const auto init_val =
706 llvm::Value* agg_col_ptr{
nullptr};
708 const auto agg_column_size_bytes =
710 !first_sample_expr.target_info.sql_type.is_varlen()
711 ? first_sample_expr.target_info.sql_type.get_size()
717 agg_column_size_bytes,
718 first_sample_expr.base_slot_index,
719 first_sample_expr.target_idx);
721 CHECK_LT(static_cast<size_t>(first_sample_expr.base_slot_index), agg_out_vec.size());
723 executor->castToIntPtrTyIn(agg_out_vec[first_sample_expr.base_slot_index], 64);
726 auto sample_cas_lv = codegenSlotEmptyKey(agg_col_ptr, target_lvs, executor, init_val);
729 sample_cas_lv, executor,
false,
"sample_valcheck", &diamond_codegen,
false);
731 for (
const auto& target_expr_codegen : sample_exprs_to_codegen) {
732 target_expr_codegen.codegen(group_by_and_agg,
739 output_buffer_byte_stream,
747 llvm::Value* agg_col_ptr,
748 std::vector<llvm::Value*>& target_lvs,
750 const int64_t init_val)
const {
752 const auto& first_sample_expr = sample_exprs_to_codegen.front();
753 const auto first_sample_slot_bytes =
754 first_sample_expr.target_info.sql_type.is_varlen()
756 : first_sample_expr.target_info.sql_type.get_size();
757 llvm::Value* target_lv_casted{
nullptr};
759 if (first_sample_expr.target_info.sql_type.is_varlen()) {
762 }
else if (first_sample_expr.target_info.sql_type.is_fp()) {
766 target_lv_casted = executor->cgen_state_->ir_builder_.CreateFPToSI(
768 first_sample_slot_bytes ==
sizeof(float) ? llvm::Type::getInt32Ty(
LL_CONTEXT)
771 target_lv_casted = executor->cgen_state_->ir_builder_.CreateFPToSI(
772 target_lvs.front(), llvm::Type::getInt64Ty(
LL_CONTEXT));
774 }
else if (first_sample_slot_bytes !=
sizeof(int64_t) &&
777 executor->cgen_state_->ir_builder_.CreateCast(llvm::Instruction::CastOps::SExt,
781 target_lv_casted = target_lvs.front();
784 std::string slot_empty_cas_func_name(
"slotEmptyKeyCAS");
785 llvm::Value* init_val_lv{
LL_INT(init_val)};
787 !first_sample_expr.target_info.sql_type.is_varlen()) {
789 switch (first_sample_slot_bytes) {
791 slot_empty_cas_func_name +=
"_int8";
794 slot_empty_cas_func_name +=
"_int16";
797 slot_empty_cas_func_name +=
"_int32";
802 UNREACHABLE() <<
"Invalid slot size for slotEmptyKeyCAS function.";
805 if (first_sample_slot_bytes !=
sizeof(int64_t)) {
806 init_val_lv = llvm::ConstantInt::get(
811 auto sample_cas_lv = executor->cgen_state_->emitExternalCall(
812 slot_empty_cas_func_name,
813 llvm::Type::getInt1Ty(executor->cgen_state_->context_),
814 {agg_col_ptr, target_lv_casted, init_val_lv});
815 return sample_cas_lv;
const Analyzer::Expr * agg_arg(const Analyzer::Expr *expr)
bool target_has_geo(const TargetInfo &target_info)
bool constrained_not_null(const Analyzer::Expr *expr, const std::list< std::shared_ptr< Analyzer::Expr >> &quals)
void codegenAggregate(GroupByAndAggregate *group_by_and_agg, Executor *executor, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const std::vector< llvm::Value * > &target_lvs, const std::tuple< llvm::Value *, llvm::Value * > &agg_out_ptr_w_idx, const std::vector< llvm::Value * > &agg_out_vec, llvm::Value *output_buffer_byte_stream, llvm::Value *out_row_idx, int32_t slot_index) const
std::vector< std::string > agg_fn_base_names(const TargetInfo &target_info)
llvm::Value * codegenAggColumnPtr(llvm::Value *output_buffer_byte_stream, llvm::Value *out_row_idx, const std::tuple< llvm::Value *, llvm::Value * > &agg_out_ptr_w_idx, const QueryMemoryDescriptor &query_mem_desc, const size_t chosen_bytes, const size_t agg_out_off, const size_t target_idx)
: returns the pointer to where the aggregation should be stored.
void codegenSampleExpressions(GroupByAndAggregate *group_by_and_agg, Executor *executor, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const std::tuple< llvm::Value *, llvm::Value * > &agg_out_ptr_w_idx, const std::vector< llvm::Value * > &agg_out_vec, llvm::Value *output_buffer_byte_stream, llvm::Value *out_row_idx, GroupByAndAggregate::DiamondCodegen &diamond_codegen) const
bool isLogicalSizedColumnsAllowed() const
void codegenApproxMedian(const size_t target_idx, const Analyzer::Expr *target_expr, std::vector< llvm::Value * > &agg_args, const QueryMemoryDescriptor &query_mem_desc, const ExecutorDeviceType device_type)
TargetInfo get_target_info(const PointerType target_expr, const bool bigint_count)
void codegen(GroupByAndAggregate *group_by_and_agg, Executor *executor, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const GpuSharedMemoryContext &gpu_smem_context, const std::tuple< llvm::Value *, llvm::Value * > &agg_out_ptr_w_idx, const std::vector< llvm::Value * > &agg_out_vec, llvm::Value *output_buffer_byte_stream, llvm::Value *out_row_idx, GroupByAndAggregate::DiamondCodegen &diamond_codegen, GroupByAndAggregate::DiamondCodegen *sample_cfg=nullptr) const
void codegen(GroupByAndAggregate *group_by_and_agg, Executor *executor, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const GpuSharedMemoryContext &gpu_smem_context, const std::tuple< llvm::Value *, llvm::Value * > &agg_out_ptr_w_idx, const std::vector< llvm::Value * > &agg_out_vec, llvm::Value *output_buffer_byte_stream, llvm::Value *out_row_idx, GroupByAndAggregate::DiamondCodegen &diamond_codegen) const
llvm::Value * posArg(const Analyzer::Expr *) const
llvm::Value * emitCall(const std::string &fname, const std::vector< llvm::Value * > &args)
int64_t get_agg_initial_val(const SQLAgg agg, const SQLTypeInfo &ti, const bool enable_compaction, const unsigned min_byte_width_to_compact)
void checkErrorCode(llvm::Value *retCode)
bool takes_float_argument(const TargetInfo &target_info)
HOST DEVICE SQLTypes get_type() const
bool isSharedMemoryUsed() const
bool needsUnnestDoublePatch(llvm::Value const *val_ptr, const std::string &agg_base_name, const bool threads_share_memory, const CompilationOptions &co) const
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
static WindowFunctionContext * getActiveWindowFunctionContext(Executor *executor)
llvm::Value * codegenSlotEmptyKey(llvm::Value *agg_col_ptr, std::vector< llvm::Value * > &target_lvs, Executor *executor, const int64_t init_val) const
std::string patch_agg_fname(const std::string &agg_name)
Helpers for codegen of target expressions.
void codegenSingleSlotSampleExpression(GroupByAndAggregate *group_by_and_agg, Executor *executor, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const std::tuple< llvm::Value *, llvm::Value * > &agg_out_ptr_w_idx, const std::vector< llvm::Value * > &agg_out_vec, llvm::Value *output_buffer_byte_stream, llvm::Value *out_row_idx, GroupByAndAggregate::DiamondCodegen &diamond_codegen) const
size_t getColOnlyOffInBytes(const size_t col_idx) const
const SQLTypeInfo get_compact_type(const TargetInfo &target)
size_t getCompactByteWidth() const
llvm::Value * convertNullIfAny(const SQLTypeInfo &arg_type, const TargetInfo &agg_info, llvm::Value *target)
bool is_distinct_target(const TargetInfo &target_info)
void codegenCountDistinct(const size_t target_idx, const Analyzer::Expr *target_expr, std::vector< llvm::Value * > &agg_args, const QueryMemoryDescriptor &, const ExecutorDeviceType)
const int8_t getPaddedSlotWidthBytes(const size_t slot_idx) const
QueryDescriptionType getQueryDescriptionType() const
ExecutorDeviceType device_type
bool window_function_is_aggregate(const SqlWindowFunctionKind kind)
HOST DEVICE EncodingType get_compression() const
const Analyzer::Expr * target_expr
std::vector< llvm::Value * > codegenAggArg(const Analyzer::Expr *target_expr, const CompilationOptions &co)
llvm::Value * codegenWindowRowPointer(const Analyzer::WindowFunction *window_func, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, DiamondCodegen &diamond_codegen)
bool window_function_requires_peer_handling(const Analyzer::WindowFunction *window_func)
void codegenMultiSlotSampleExpressions(GroupByAndAggregate *group_by_and_agg, Executor *executor, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const std::tuple< llvm::Value *, llvm::Value * > &agg_out_ptr_w_idx, const std::vector< llvm::Value * > &agg_out_vec, llvm::Value *output_buffer_byte_stream, llvm::Value *out_row_idx, GroupByAndAggregate::DiamondCodegen &diamond_codegen) const
bool is_simple_count(const TargetInfo &target_info)
bool didOutputColumnar() const
bool threadsShareMemory() const
static void resetWindowFunctionContext(Executor *executor)
int64_t get_initial_agg_val(const TargetInfo &target_info, const QueryMemoryDescriptor &query_mem_desc)
std::string numeric_type_name(const SQLTypeInfo &ti)
void operator()(const Analyzer::Expr *target_expr, const Executor *executor, const CompilationOptions &co)
const int8_t getLogicalSlotWidthBytes(const size_t slot_idx) const
int get_physical_coord_cols() const
size_t getColOffInBytes(const size_t col_idx) const
bool is_columnar_projection(const QueryMemoryDescriptor &query_mem_desc)
bool is_agg_domain_range_equivalent(const SQLAgg &agg_kind)