32 #define LL_CONTEXT executor->cgen_state_->context_
33 #define LL_BUILDER executor->cgen_state_->ir_builder_
34 #define LL_BOOL(v) executor->ll_bool(v)
35 #define LL_INT(v) executor->cgen_state_->llInt(v)
36 #define LL_FP(v) executor->cgen_state_->llFp(v)
37 #define ROW_FUNC executor->cgen_state_->row_func_
49 if (is_varlen_projection) {
51 CHECK(chosen_type.is_geometry());
52 return {
"agg_id_varlen"};
55 if (chosen_type.is_geometry()) {
56 return std::vector<std::string>(2 * chosen_type.get_physical_coord_cols(),
59 if (chosen_type.is_varlen()) {
62 return {
"agg_id",
"agg_id"};
68 return {
"agg_sum",
"agg_count"};
70 return {target_info.
is_distinct ?
"agg_count_distinct" :
"agg_count"};
78 return {
"agg_approximate_count_distinct"};
80 return {
"agg_approx_quantile"};
82 return {
"checked_single_agg_id"};
115 const std::tuple<llvm::Value*, llvm::Value*>& agg_out_ptr_w_idx_in,
116 const std::vector<llvm::Value*>& agg_out_vec,
117 llvm::Value* output_buffer_byte_stream,
118 llvm::Value* out_row_idx,
119 llvm::Value* varlen_output_buffer,
122 CHECK(group_by_and_agg);
126 auto agg_out_ptr_w_idx = agg_out_ptr_w_idx_in;
135 ? std::vector<llvm::Value*>{executor->codegenWindowFunction(
target_idx, co)}
137 const auto window_row_ptr = window_func
139 window_func, query_mem_desc, co, diamond_codegen)
141 if (window_row_ptr) {
143 std::make_tuple(window_row_ptr, std::get<1>(agg_out_ptr_w_idx_in));
145 out_row_idx = window_row_ptr;
149 llvm::Value* str_target_lv{
nullptr};
154 str_target_lv = target_lvs.front();
155 target_lvs.erase(target_lvs.begin());
161 if (target_lvs.size() < agg_fn_names.size()) {
162 CHECK_EQ(target_lvs.size(), agg_fn_names.size() / 2);
163 std::vector<llvm::Value*> new_target_lvs;
164 new_target_lvs.reserve(agg_fn_names.size());
165 for (
const auto& target_lv : target_lvs) {
166 new_target_lvs.push_back(target_lv);
167 new_target_lvs.push_back(target_lv);
169 target_lvs = new_target_lvs;
172 if (target_lvs.size() < agg_fn_names.size()) {
173 CHECK_EQ(
size_t(1), target_lvs.size());
174 CHECK_EQ(
size_t(2), agg_fn_names.size());
175 for (
size_t i = 1; i < agg_fn_names.size(); ++i) {
176 target_lvs.push_back(target_lvs.front());
183 CHECK_EQ(agg_fn_names.size(), target_lvs.size());
186 CHECK(str_target_lv || (agg_fn_names.size() == target_lvs.size()));
187 CHECK(target_lvs.size() == 1 || target_lvs.size() == 2);
198 (!arg_expr || arg_expr->get_type_info().get_notnull())) {
199 CHECK_EQ(
size_t(1), agg_fn_names.size());
201 llvm::Value* agg_col_ptr{
nullptr};
205 CHECK_EQ(
size_t(0), col_off % chosen_bytes);
206 col_off /= chosen_bytes;
207 CHECK(std::get<1>(agg_out_ptr_w_idx));
211 std::get<0>(agg_out_ptr_w_idx),
214 bit_cast->getType()->getScalarType()->getPointerElementType(),
219 CHECK_EQ(
size_t(0), col_off % chosen_bytes);
220 col_off /= chosen_bytes;
222 std::get<0>(agg_out_ptr_w_idx),
225 bit_cast->getType()->getScalarType()->getPointerElementType(),
231 if (chosen_bytes !=
sizeof(int32_t)) {
234 const auto acc_i64 =
LL_BUILDER.CreateBitCast(
235 is_group_by ? agg_col_ptr : agg_out_vec[slot_index],
239 "agg_count_shared", std::vector<llvm::Value*>{acc_i64,
LL_INT(int64_t(1))});
241 LL_BUILDER.CreateAtomicRMW(llvm::AtomicRMWInst::Add,
244 #
if LLVM_VERSION_MAJOR > 12
247 llvm::AtomicOrdering::Monotonic);
251 is_group_by ? agg_col_ptr : agg_out_vec[slot_index],
255 acc_i32, llvm::Type::getInt32PtrTy(
LL_CONTEXT, 3));
257 LL_BUILDER.CreateAtomicRMW(llvm::AtomicRMWInst::Add,
260 #
if LLVM_VERSION_MAJOR > 12
263 llvm::AtomicOrdering::Monotonic);
266 const auto acc_i32 = (
is_group_by ? agg_col_ptr : agg_out_vec[slot_index]);
269 const auto shared_acc_i32 =
LL_BUILDER.CreatePointerCast(
270 acc_i32, llvm::Type::getInt32PtrTy(
LL_CONTEXT, 3));
271 LL_BUILDER.CreateAtomicRMW(llvm::AtomicRMWInst::Add,
274 #
if LLVM_VERSION_MAJOR > 12
277 llvm::AtomicOrdering::Monotonic);
279 LL_BUILDER.CreateAtomicRMW(llvm::AtomicRMWInst::Add,
282 #
if LLVM_VERSION_MAJOR > 12
285 llvm::AtomicOrdering::Monotonic);
298 output_buffer_byte_stream,
300 varlen_output_buffer,
309 const std::vector<llvm::Value*>& target_lvs,
310 const std::tuple<llvm::Value*, llvm::Value*>& agg_out_ptr_w_idx,
311 const std::vector<llvm::Value*>& agg_out_vec,
312 llvm::Value* output_buffer_byte_stream,
313 llvm::Value* out_row_idx,
314 llvm::Value* varlen_output_buffer,
315 int32_t slot_index)
const {
317 size_t target_lv_idx = 0;
318 const bool lazy_fetched{executor->plan_state_->isLazyFetchColumn(
target_expr)};
326 for (
const auto& agg_base_name : agg_fn_names) {
332 const auto& elem_ti = arg_expr->get_type_info().get_elem_type();
336 CHECK_EQ(
size_t(0), col_off_in_bytes %
sizeof(int64_t));
337 col_off /=
sizeof(int64_t);
339 executor->cgen_state_->emitExternalCall(
345 ->getPointerElementType(),
346 std::get<0>(agg_out_ptr_w_idx),
348 : agg_out_vec[slot_index],
349 target_lvs[target_lv_idx],
350 code_generator.
posArg(arg_expr),
352 ?
static_cast<llvm::Value*
>(executor->cgen_state_->inlineFpNull(elem_ti))
353 : static_cast<llvm::Value*>(
354 executor->cgen_state_->inlineIntNull(elem_ti))});
360 llvm::Value* agg_col_ptr{
nullptr};
361 const auto chosen_bytes =
364 const auto& arg_type =
365 ((arg_expr && arg_expr->get_type_info().get_type() !=
kNULLT) &&
369 const bool is_fp_arg =
370 !lazy_fetched && arg_type.
get_type() !=
kNULLT && arg_type.is_fp();
380 agg_col_ptr->setName(
"agg_col_ptr");
387 CHECK_LT(target_lv_idx, target_lvs.size());
388 CHECK(varlen_output_buffer);
389 auto target_lv = target_lvs[target_lv_idx];
391 std::string agg_fname_suffix =
"";
394 agg_fname_suffix +=
"_shared";
399 auto& builder = executor->cgen_state_->ir_builder_;
400 auto orig_bb = builder.GetInsertBlock();
401 auto target_ptr_type = llvm::dyn_cast<llvm::PointerType>(target_lv->getType());
402 CHECK(target_ptr_type) <<
"Varlen projections expect a pointer input.";
404 builder.CreateICmp(llvm::CmpInst::ICMP_EQ,
406 llvm::ConstantPointerNull::get(llvm::PointerType::get(
407 target_ptr_type->getPointerElementType(), 0)));
408 llvm::BasicBlock* true_bb{
nullptr};
411 is_nullptr, executor,
false,
"varlen_null_check",
nullptr,
false);
415 builder.SetInsertPoint(nullcheck_diamond.
cond_false_);
416 auto arr_ptr_lv = executor->cgen_state_->ir_builder_.CreateBitCast(
418 llvm::PointerType::get(
get_int_type(8, executor->cgen_state_->context_), 0));
419 const int64_t chosen_bytes =
422 const auto output_buffer_slot =
LL_BUILDER.CreateZExt(
423 LL_BUILDER.CreateLoad(arg->getType()->getPointerElementType(), arg),
426 CHECK(varlen_buffer_row_sz);
427 const auto output_buffer_slot_bytes =
LL_BUILDER.CreateAdd(
429 executor->cgen_state_->llInt(
430 static_cast<int64_t>(*varlen_buffer_row_sz))),
431 executor->cgen_state_->llInt(static_cast<int64_t>(
434 std::vector<llvm::Value*> varlen_agg_args{
435 executor->castToIntPtrTyIn(varlen_output_buffer, 8),
436 output_buffer_slot_bytes,
438 executor->cgen_state_->llInt(chosen_bytes)};
439 auto varlen_offset_ptr =
440 group_by_and_agg->
emitCall(agg_base_name + agg_fname_suffix, varlen_agg_args);
443 auto varlen_offset_int =
LL_BUILDER.CreatePtrToInt(
444 varlen_offset_ptr, llvm::Type::getInt64Ty(
LL_CONTEXT));
445 builder.CreateBr(nullcheck_diamond.
cond_true_);
448 builder.SetInsertPoint(nullcheck_diamond.
cond_true_);
450 builder.CreatePHI(llvm::Type::getInt64Ty(executor->cgen_state_->context_), 2);
451 output_phi->addIncoming(varlen_offset_int, nullcheck_diamond.
cond_false_);
452 output_phi->addIncoming(executor->cgen_state_->llInt(static_cast<int64_t>(0)),
455 std::vector<llvm::Value*> agg_args{agg_col_ptr, output_phi};
456 group_by_and_agg->
emitCall(
"agg_id" + agg_fname_suffix, agg_args);
459 builder.SetInsertPoint(true_bb);
469 const auto agg_chosen_bytes =
470 float_argument_input && !is_count_in_avg ?
sizeof(float) : chosen_bytes;
471 if (float_argument_input) {
472 CHECK_GE(chosen_bytes,
sizeof(
float));
475 auto target_lv = target_lvs[target_lv_idx];
479 if (!needs_unnest_double_patch) {
482 }
else if (is_fp_arg) {
485 if (!dynamic_cast<const Analyzer::AggExpr*>(
target_expr) || arg_expr) {
487 executor->cgen_state_->castToTypeIn(target_lv, (agg_chosen_bytes << 3));
492 llvm::Value* str_target_lv{
nullptr};
495 str_target_lv = target_lvs.front();
497 std::vector<llvm::Value*> agg_args{
498 executor->castToIntPtrTyIn((
is_group_by ? agg_col_ptr : agg_out_vec[slot_index]),
499 (agg_chosen_bytes << 3)),
500 (is_simple_count_target && !arg_expr)
501 ? (agg_chosen_bytes ==
sizeof(int32_t) ?
LL_INT(int32_t(0))
503 : (is_simple_count_target && arg_expr && str_target_lv ? str_target_lv
506 if (is_simple_count_target && arg_expr && str_target_lv) {
508 agg_chosen_bytes ==
sizeof(int32_t) ?
LL_INT(int32_t(0)) :
LL_INT(int64_t(0));
511 std::string agg_fname{agg_base_name};
514 if (agg_chosen_bytes ==
sizeof(
float)) {
516 agg_fname +=
"_float";
518 CHECK_EQ(agg_chosen_bytes,
sizeof(
double));
519 agg_fname +=
"_double";
522 }
else if (agg_chosen_bytes ==
sizeof(int32_t)) {
523 agg_fname +=
"_int32";
524 }
else if (agg_chosen_bytes ==
sizeof(int16_t) &&
526 agg_fname +=
"_int16";
527 }
else if (agg_chosen_bytes ==
sizeof(int8_t) && query_mem_desc.
didOutputColumnar()) {
528 agg_fname +=
"_int8";
532 CHECK_EQ(agg_chosen_bytes,
sizeof(int64_t));
533 CHECK(!chosen_type.is_fp());
537 CHECK_EQ(agg_chosen_bytes,
sizeof(int64_t));
542 if (need_skip_null && !arg_ti.is_geometry()) {
543 agg_fname +=
"_skip_val";
547 (need_skip_null && !arg_ti.is_geometry())) {
548 llvm::Value* null_in_lv{
nullptr};
549 if (arg_ti.is_fp()) {
551 static_cast<llvm::Value*
>(executor->cgen_state_->inlineFpNull(arg_ti));
553 null_in_lv =
static_cast<llvm::Value*
>(executor->cgen_state_->inlineIntNull(
560 executor->cgen_state_->castToTypeIn(null_in_lv, (agg_chosen_bytes << 3));
561 agg_args.push_back(null_lv);
566 agg_fname +=
"_shared";
567 if (needs_unnest_double_patch) {
571 auto agg_fname_call_ret_lv = group_by_and_agg->
emitCall(agg_fname, agg_args);
573 if (agg_fname.find(
"checked") != std::string::npos) {
580 if (window_func && !window_func->hasFraming() &&
582 const auto window_func_context =
584 const auto pending_outputs =
585 LL_INT(window_func_context->aggregateStatePendingOutputs());
586 executor->cgen_state_->emitExternalCall(
"add_window_pending_output",
588 {agg_args.front(), pending_outputs});
589 const auto& window_func_ti = window_func->get_type_info();
590 std::string apply_window_pending_outputs_name =
"apply_window_pending_outputs";
591 switch (window_func_ti.get_type()) {
593 apply_window_pending_outputs_name +=
"_float";
595 apply_window_pending_outputs_name +=
"_columnar";
600 apply_window_pending_outputs_name +=
"_double";
604 apply_window_pending_outputs_name +=
"_int";
606 apply_window_pending_outputs_name +=
609 apply_window_pending_outputs_name +=
"64";
614 const auto partition_end =
615 LL_INT(reinterpret_cast<int64_t>(window_func_context->partitionEnd()));
616 executor->cgen_state_->emitExternalCall(apply_window_pending_outputs_name,
621 code_generator.
posArg(
nullptr)});
630 const Executor* executor,
635 CHECK(!dynamic_cast<const Analyzer::AggExpr*>(target_expr));
636 ++slot_index_counter;
637 ++target_index_counter;
640 if (dynamic_cast<const Analyzer::UOper*>(target_expr) &&
641 static_cast<const Analyzer::UOper*>(target_expr)->get_optype() ==
kUNNEST) {
642 throw std::runtime_error(
"UNNEST not supported in the projection list yet.");
644 if ((executor->plan_state_->isLazyFetchColumn(target_expr) || !
is_group_by) &&
653 executor->plan_state_->isLazyFetchColumn(target_expr)) {
663 auto arg_expr =
agg_arg(target_expr);
670 !arg_expr->get_type_info().is_varlen()) {
683 sample_exprs_to_codegen.emplace_back(target_expr,
686 target_index_counter++,
689 target_exprs_to_codegen.emplace_back(target_expr,
692 target_index_counter++,
698 slot_index_counter += agg_fn_names.size();
724 const std::tuple<llvm::Value*, llvm::Value*>& agg_out_ptr_w_idx,
725 const std::vector<llvm::Value*>& agg_out_vec,
726 llvm::Value* output_buffer_byte_stream,
727 llvm::Value* out_row_idx,
728 llvm::Value* varlen_output_buffer,
730 CHECK(group_by_and_agg);
734 for (
const auto& target_expr_codegen : target_exprs_to_codegen) {
735 target_expr_codegen.codegen(group_by_and_agg,
742 output_buffer_byte_stream,
744 varlen_output_buffer,
747 if (!sample_exprs_to_codegen.empty()) {
748 codegenSampleExpressions(group_by_and_agg,
754 output_buffer_byte_stream,
765 const std::tuple<llvm::Value*, llvm::Value*>& agg_out_ptr_w_idx,
766 const std::vector<llvm::Value*>& agg_out_vec,
767 llvm::Value* output_buffer_byte_stream,
768 llvm::Value* out_row_idx,
771 CHECK(!sample_exprs_to_codegen.empty());
773 if (sample_exprs_to_codegen.size() == 1 &&
774 !sample_exprs_to_codegen.front().target_info.sql_type.is_varlen()) {
775 codegenSingleSlotSampleExpression(group_by_and_agg,
781 output_buffer_byte_stream,
785 codegenMultiSlotSampleExpressions(group_by_and_agg,
791 output_buffer_byte_stream,
802 const std::tuple<llvm::Value*, llvm::Value*>& agg_out_ptr_w_idx,
803 const std::vector<llvm::Value*>& agg_out_vec,
804 llvm::Value* output_buffer_byte_stream,
805 llvm::Value* out_row_idx,
808 CHECK_EQ(
size_t(1), sample_exprs_to_codegen.size());
809 CHECK(!sample_exprs_to_codegen.front().target_info.sql_type.is_varlen());
812 sample_exprs_to_codegen.front().codegen(group_by_and_agg,
819 output_buffer_byte_stream,
830 const std::tuple<llvm::Value*, llvm::Value*>& agg_out_ptr_w_idx,
831 const std::vector<llvm::Value*>& agg_out_vec,
832 llvm::Value* output_buffer_byte_stream,
833 llvm::Value* out_row_idx,
836 CHECK(sample_exprs_to_codegen.size() > 1 ||
837 sample_exprs_to_codegen.front().target_info.sql_type.is_varlen());
839 const auto& first_sample_expr = sample_exprs_to_codegen.front();
840 auto target_lvs = group_by_and_agg->
codegenAggArg(first_sample_expr.target_expr, co);
841 CHECK_GE(target_lvs.size(), size_t(1));
843 const auto init_val =
846 llvm::Value* agg_col_ptr{
nullptr};
848 const auto agg_column_size_bytes =
850 !first_sample_expr.target_info.sql_type.is_varlen()
851 ? first_sample_expr.target_info.sql_type.get_size()
857 agg_column_size_bytes,
858 first_sample_expr.base_slot_index,
859 first_sample_expr.target_idx);
861 CHECK_LT(static_cast<size_t>(first_sample_expr.base_slot_index), agg_out_vec.size());
863 executor->castToIntPtrTyIn(agg_out_vec[first_sample_expr.base_slot_index], 64);
867 codegenSlotEmptyKey(agg_col_ptr, target_lvs, executor, query_mem_desc, init_val);
870 sample_cas_lv, executor,
false,
"sample_valcheck", &diamond_codegen,
false);
872 for (
const auto& target_expr_codegen : sample_exprs_to_codegen) {
873 target_expr_codegen.codegen(group_by_and_agg,
880 output_buffer_byte_stream,
889 llvm::Value* agg_col_ptr,
890 std::vector<llvm::Value*>& target_lvs,
893 const int64_t init_val)
const {
895 const auto& first_sample_expr = sample_exprs_to_codegen.front();
896 const auto first_sample_slot_bytes =
897 first_sample_expr.target_info.sql_type.is_varlen()
899 : first_sample_expr.target_info.sql_type.get_size();
900 llvm::Value* target_lv_casted{
nullptr};
902 if (first_sample_expr.target_info.sql_type.is_varlen()) {
905 }
else if (first_sample_expr.target_info.sql_type.is_fp()) {
909 target_lv_casted = executor->cgen_state_->ir_builder_.CreateFPToSI(
911 first_sample_slot_bytes ==
sizeof(float) ? llvm::Type::getInt32Ty(
LL_CONTEXT)
914 target_lv_casted = executor->cgen_state_->ir_builder_.CreateFPToSI(
915 target_lvs.front(), llvm::Type::getInt64Ty(
LL_CONTEXT));
917 }
else if (first_sample_slot_bytes !=
sizeof(int64_t) &&
920 executor->cgen_state_->ir_builder_.CreateCast(llvm::Instruction::CastOps::SExt,
924 target_lv_casted = target_lvs.front();
927 std::string slot_empty_cas_func_name(
"slotEmptyKeyCAS");
928 llvm::Value* init_val_lv{
LL_INT(init_val)};
930 !first_sample_expr.target_info.sql_type.is_varlen()) {
932 switch (first_sample_slot_bytes) {
934 slot_empty_cas_func_name +=
"_int8";
937 slot_empty_cas_func_name +=
"_int16";
940 slot_empty_cas_func_name +=
"_int32";
945 UNREACHABLE() <<
"Invalid slot size for slotEmptyKeyCAS function.";
948 if (first_sample_slot_bytes !=
sizeof(int64_t)) {
949 init_val_lv = llvm::ConstantInt::get(
954 auto sample_cas_lv = executor->cgen_state_->emitExternalCall(
955 slot_empty_cas_func_name,
956 llvm::Type::getInt1Ty(executor->cgen_state_->context_),
957 {agg_col_ptr, target_lv_casted, init_val_lv});
958 return sample_cas_lv;
size_t varlenOutputRowSizeToSlot(const size_t slot_idx) const
const Analyzer::Expr * agg_arg(const Analyzer::Expr *expr)
bool target_has_geo(const TargetInfo &target_info)
bool constrained_not_null(const Analyzer::Expr *expr, const std::list< std::shared_ptr< Analyzer::Expr >> &quals)
llvm::BasicBlock * cond_false_
llvm::Value * codegenAggColumnPtr(llvm::Value *output_buffer_byte_stream, llvm::Value *out_row_idx, const std::tuple< llvm::Value *, llvm::Value * > &agg_out_ptr_w_idx, const QueryMemoryDescriptor &query_mem_desc, const size_t chosen_bytes, const size_t agg_out_off, const size_t target_idx)
: returns the pointer to where the aggregation should be stored.
std::vector< std::string > agg_fn_base_names(const TargetInfo &target_info, const bool is_varlen_projection)
bool isLogicalSizedColumnsAllowed() const
void codegen(GroupByAndAggregate *group_by_and_agg, Executor *executor, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const GpuSharedMemoryContext &gpu_smem_context, const std::tuple< llvm::Value *, llvm::Value * > &agg_out_ptr_w_idx, const std::vector< llvm::Value * > &agg_out_vec, llvm::Value *output_buffer_byte_stream, llvm::Value *out_row_idx, llvm::Value *varlen_output_buffer, DiamondCodegen &diamond_codegen, DiamondCodegen *sample_cfg=nullptr) const
void codegenMultiSlotSampleExpressions(GroupByAndAggregate *group_by_and_agg, Executor *executor, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const std::tuple< llvm::Value *, llvm::Value * > &agg_out_ptr_w_idx, const std::vector< llvm::Value * > &agg_out_vec, llvm::Value *output_buffer_byte_stream, llvm::Value *out_row_idx, DiamondCodegen &diamond_codegen) const
llvm::Value * posArg(const Analyzer::Expr *) const
llvm::Value * emitCall(const std::string &fname, const std::vector< llvm::Value * > &args)
int64_t get_agg_initial_val(const SQLAgg agg, const SQLTypeInfo &ti, const bool enable_compaction, const unsigned min_byte_width_to_compact)
void codegenApproxQuantile(const size_t target_idx, const Analyzer::Expr *target_expr, std::vector< llvm::Value * > &agg_args, const QueryMemoryDescriptor &query_mem_desc, const ExecutorDeviceType device_type)
void checkErrorCode(llvm::Value *retCode)
bool takes_float_argument(const TargetInfo &target_info)
#define LLVM_ALIGN(alignment)
HOST DEVICE SQLTypes get_type() const
bool isSharedMemoryUsed() const
bool needsUnnestDoublePatch(llvm::Value const *val_ptr, const std::string &agg_base_name, const bool threads_share_memory, const CompilationOptions &co) const
llvm::BasicBlock * cond_true_
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
static WindowFunctionContext * getActiveWindowFunctionContext(Executor *executor)
TargetInfo get_target_info(const Analyzer::Expr *target_expr, const bool bigint_count)
std::string patch_agg_fname(const std::string &agg_name)
Helpers for codegen of target expressions.
size_t getColOnlyOffInBytes(const size_t col_idx) const
const SQLTypeInfo get_compact_type(const TargetInfo &target)
bool is_varlen_projection(const Analyzer::Expr *target_expr, const SQLTypeInfo &ti)
size_t getCompactByteWidth() const
llvm::Value * get_arg_by_name(llvm::Function *func, const std::string &name)
void operator()(const Analyzer::Expr *target_expr, const Executor *executor, QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co)
llvm::Value * convertNullIfAny(const SQLTypeInfo &arg_type, const TargetInfo &agg_info, llvm::Value *target)
void codegenSampleExpressions(GroupByAndAggregate *group_by_and_agg, Executor *executor, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const std::tuple< llvm::Value *, llvm::Value * > &agg_out_ptr_w_idx, const std::vector< llvm::Value * > &agg_out_vec, llvm::Value *output_buffer_byte_stream, llvm::Value *out_row_idx, DiamondCodegen &diamond_codegen) const
void codegen(GroupByAndAggregate *group_by_and_agg, Executor *executor, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const GpuSharedMemoryContext &gpu_smem_context, const std::tuple< llvm::Value *, llvm::Value * > &agg_out_ptr_w_idx, const std::vector< llvm::Value * > &agg_out_vec, llvm::Value *output_buffer_byte_stream, llvm::Value *out_row_idx, llvm::Value *varlen_output_buffer, DiamondCodegen &diamond_codegen) const
bool is_distinct_target(const TargetInfo &target_info)
void codegenCountDistinct(const size_t target_idx, const Analyzer::Expr *target_expr, std::vector< llvm::Value * > &agg_args, const QueryMemoryDescriptor &, const ExecutorDeviceType)
const int8_t getPaddedSlotWidthBytes(const size_t slot_idx) const
QueryDescriptionType getQueryDescriptionType() const
ExecutorDeviceType device_type
std::optional< size_t > varlenOutputBufferElemSize() const
bool window_function_is_aggregate(const SqlWindowFunctionKind kind)
llvm::Value * codegenSlotEmptyKey(llvm::Value *agg_col_ptr, std::vector< llvm::Value * > &target_lvs, Executor *executor, const QueryMemoryDescriptor &query_mem_desc, const int64_t init_val) const
HOST DEVICE EncodingType get_compression() const
const Analyzer::Expr * target_expr
std::vector< llvm::Value * > codegenAggArg(const Analyzer::Expr *target_expr, const CompilationOptions &co)
llvm::Value * codegenWindowRowPointer(const Analyzer::WindowFunction *window_func, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, DiamondCodegen &diamond_codegen)
bool window_function_requires_peer_handling(const Analyzer::WindowFunction *window_func)
bool is_simple_count(const TargetInfo &target_info)
bool didOutputColumnar() const
bool threadsShareMemory() const
static void resetWindowFunctionContext(Executor *executor)
void setPaddedSlotWidthBytes(const size_t slot_idx, const int8_t bytes)
int64_t get_initial_agg_val(const TargetInfo &target_info, const QueryMemoryDescriptor &query_mem_desc)
std::string numeric_type_name(const SQLTypeInfo &ti)
const int8_t getLogicalSlotWidthBytes(const size_t slot_idx) const
int get_physical_coord_cols() const
size_t getColOffInBytes(const size_t col_idx) const
void codegenAggregate(GroupByAndAggregate *group_by_and_agg, Executor *executor, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const std::vector< llvm::Value * > &target_lvs, const std::tuple< llvm::Value *, llvm::Value * > &agg_out_ptr_w_idx, const std::vector< llvm::Value * > &agg_out_vec, llvm::Value *output_buffer_byte_stream, llvm::Value *out_row_idx, llvm::Value *varlen_output_buffer, int32_t slot_index) const
bool is_columnar_projection(const QueryMemoryDescriptor &query_mem_desc)
void codegenSingleSlotSampleExpression(GroupByAndAggregate *group_by_and_agg, Executor *executor, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const std::tuple< llvm::Value *, llvm::Value * > &agg_out_ptr_w_idx, const std::vector< llvm::Value * > &agg_out_vec, llvm::Value *output_buffer_byte_stream, llvm::Value *out_row_idx, DiamondCodegen &diamond_codegen) const
bool is_agg_domain_range_equivalent(const SQLAgg &agg_kind)