#include <GroupByAndAggregate.h>

Collaboration diagram for GroupByAndAggregate:

Public Member Functions
	GroupByAndAggregate (Executor *executor, const ExecutorDeviceType device_type, const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner, const std::optional< int64_t > &group_cardinality_estimation)

bool	codegen (llvm::Value filter_result, llvm::BasicBlock sc_false, QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const GpuSharedMemoryContext &gpu_smem_context)

Static Public Member Functions
static size_t	shard_count_for_top_groups (const RelAlgExecutionUnit &ra_exe_unit)

Private Member Functions
bool	gpuCanHandleOrderEntries (const std::list< Analyzer::OrderEntry > &order_entries)

ApproxQuantileDescriptors	initApproxQuantileDescriptors ()

std::unique_ptr < QueryMemoryDescriptor >	initQueryMemoryDescriptor (const bool allow_multifrag, const size_t max_groups_buffer_entry_count, const int8_t crt_min_byte_width, RenderInfo *render_info, const bool output_columnar_hint)

std::unique_ptr < QueryMemoryDescriptor >	initQueryMemoryDescriptorImpl (const bool allow_multifrag, const size_t max_groups_buffer_entry_count, const int8_t crt_min_byte_width, const bool sort_on_gpu_hint, RenderInfo *render_info, const bool must_use_baseline_sort, const bool output_columnar_hint)

int64_t	getShardedTopBucket (const ColRangeInfo &col_range_info, const size_t shard_count) const

llvm::Value *	codegenOutputSlot (llvm::Value *groups_buffer, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, DiamondCodegen &diamond_codegen)

std::tuple< llvm::Value , llvm::Value >	codegenGroupBy (const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, DiamondCodegen &codegen)

llvm::Value *	codegenVarlenOutputBuffer (const QueryMemoryDescriptor &query_mem_desc)

std::tuple< llvm::Value , llvm::Value >	codegenSingleColumnPerfectHash (const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, llvm::Value groups_buffer, llvm::Value group_expr_lv_translated, llvm::Value *group_expr_lv_original, const int32_t row_size_quad)

std::tuple< llvm::Value , llvm::Value >	codegenMultiColumnPerfectHash (llvm::Value groups_buffer, llvm::Value group_key, llvm::Value *key_size_lv, const QueryMemoryDescriptor &query_mem_desc, const int32_t row_size_quad)

llvm::Function *	codegenPerfectHashFunction ()

std::tuple< llvm::Value , llvm::Value >	codegenMultiColumnBaselineHash (const CompilationOptions &co, llvm::Value groups_buffer, llvm::Value group_key, llvm::Value *key_size_lv, const QueryMemoryDescriptor &query_mem_desc, const size_t key_width, const int32_t row_size_quad)

ColRangeInfo	getColRangeInfo ()

llvm::Value *	convertNullIfAny (const SQLTypeInfo &arg_type, const TargetInfo &agg_info, llvm::Value *target)

bool	codegenAggCalls (const std::tuple< llvm::Value , llvm::Value > &agg_out_ptr_w_idx, llvm::Value varlen_output_buffer, const std::vector< llvm::Value > &agg_out_vec, QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const GpuSharedMemoryContext &gpu_smem_context, DiamondCodegen &diamond_codegen)

llvm::Value *	codegenWindowRowPointer (const Analyzer::WindowFunction *window_func, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, DiamondCodegen &diamond_codegen)

llvm::Value *	codegenAggColumnPtr (llvm::Value output_buffer_byte_stream, llvm::Value out_row_idx, const std::tuple< llvm::Value , llvm::Value > &agg_out_ptr_w_idx, const QueryMemoryDescriptor &query_mem_desc, const size_t chosen_bytes, const size_t agg_out_off, const size_t target_idx)
	: returns the pointer to where the aggregation should be stored. More...

void	codegenEstimator (std::stack< llvm::BasicBlock * > &array_loops, DiamondCodegen &diamond_codegen, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &)

void	codegenCountDistinct (const size_t target_idx, const Analyzer::Expr target_expr, std::vector< llvm::Value > &agg_args, const QueryMemoryDescriptor &, const ExecutorDeviceType)

void	codegenApproxQuantile (const size_t target_idx, const Analyzer::Expr target_expr, std::vector< llvm::Value > &agg_args, const QueryMemoryDescriptor &query_mem_desc, const ExecutorDeviceType device_type)

void	codegenMode (const size_t target_idx, const Analyzer::Expr target_expr, std::vector< llvm::Value > &agg_args, const QueryMemoryDescriptor &query_mem_desc, const ExecutorDeviceType device_type)

llvm::Value *	getAdditionalLiteral (const int32_t off)

std::vector< llvm::Value * >	codegenAggArg (const Analyzer::Expr *target_expr, const CompilationOptions &co)

llvm::Value *	emitCall (const std::string &fname, const std::vector< llvm::Value * > &args)

void	checkErrorCode (llvm::Value *retCode)

bool	needsUnnestDoublePatch (llvm::Value const *val_ptr, const std::string &agg_base_name, const bool threads_share_memory, const CompilationOptions &co) const

void	prependForceSync ()

Static Private Member Functions
static int64_t	getBucketedCardinality (const ColRangeInfo &col_range_info)

Private Attributes
Executor *	executor_

const RelAlgExecutionUnit &	ra_exe_unit_

const std::vector < InputTableInfo > &	query_infos_

std::shared_ptr < RowSetMemoryOwner >	row_set_mem_owner_

bool	output_columnar_

const ExecutorDeviceType	device_type_

const std::optional< int64_t >	group_cardinality_estimation_

Friends
class	Executor

class	QueryMemoryDescriptor

class	CodeGenerator

class	ExecutionKernel

struct	TargetExprCodegen

struct	TargetExprCodegenBuilder

Detailed Description

Definition at line 61 of file GroupByAndAggregate.h.

Constructor & Destructor Documentation

GroupByAndAggregate::GroupByAndAggregate	(	Executor *	executor,
		const ExecutorDeviceType	device_type,
		const RelAlgExecutionUnit &	ra_exe_unit,
		const std::vector< InputTableInfo > &	query_infos,
		std::shared_ptr< RowSetMemoryOwner >	row_set_mem_owner,
		const std::optional< int64_t > &	group_cardinality_estimation
	)

Definition at line 394 of file GroupByAndAggregate.cpp.

References RelAlgExecutionUnit::groupby_exprs, and ra_exe_unit_.

     : executor_(executor)
     , ra_exe_unit_(ra_exe_unit)
     , query_infos_(query_infos)
     , row_set_mem_owner_(row_set_mem_owner)
     , device_type_(device_type)
     , group_cardinality_estimation_(group_cardinality_estimation) {
   for (const auto& groupby_expr : ra_exe_unit_.groupby_exprs) {
     if (!groupby_expr) {
       continue;
     }
     const auto& groupby_ti = groupby_expr->get_type_info();
     if (groupby_ti.is_text_encoding_none()) {
       throw std::runtime_error(
           "Cannot group by string columns which are not dictionary encoded.");
     }
     if (groupby_ti.is_buffer()) {
       throw std::runtime_error("Group by buffer not supported");
     }
     if (groupby_ti.is_geometry()) {
       throw std::runtime_error("Group by geometry not supported");
     }
   }
 }

Member Function Documentation

void GroupByAndAggregate::checkErrorCode ( llvm::Value * retCode )

private

Definition at line 2234 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, and executor_.

Referenced by TargetExprCodegen::codegenAggregate().

                                                            {
   AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
   auto zero_const = llvm::ConstantInt::get(retCode->getType(), 0, true);
   auto rc_check_condition = executor_->cgen_state_->ir_builder_.CreateICmp(
       llvm::ICmpInst::ICMP_EQ, retCode, zero_const);
 
   executor_->cgen_state_->emitErrorCheck(rc_check_condition, retCode, "rc");
 }

Here is the caller graph for this function:

bool GroupByAndAggregate::codegen	(	llvm::Value *	filter_result,
		llvm::BasicBlock *	sc_false,
		QueryMemoryDescriptor &	query_mem_desc,
		const CompilationOptions &	co,
		const GpuSharedMemoryContext &	gpu_smem_context
	)

Definition at line 1047 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, CHECK, codegenAggCalls(), codegenEstimator(), codegenGroupBy(), codegenVarlenOutputBuffer(), DiamondCodegen::cond_false_, CompilationOptions::device_type, QueryMemoryDescriptor::didOutputColumnar(), RelAlgExecutionUnit::estimator, executor_, anonymous_namespace{GroupByAndAggregate.cpp}::get_agg_count(), get_arg_by_name(), get_int_type(), QueryMemoryDescriptor::getGroupbyColCount(), QueryMemoryDescriptor::getQueryDescriptionType(), GPU, RelAlgExecutionUnit::groupby_exprs, heavyai::GroupByPerfectHash, RelAlgExecutionUnit::join_quals, LL_BUILDER, LL_CONTEXT, LL_INT, LLVM_ALIGN, CodeGenerator::posArg(), prependForceSync(), heavyai::Projection, query_mem_desc, ra_exe_unit_, ROW_FUNC, RelAlgExecutionUnit::target_exprs, QueryMemoryDescriptor::usesGetGroupValueFast(), and QueryMemoryDescriptor::useStreamingTopN().

                                                                                   {
   AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
   CHECK(filter_result);
 
   bool can_return_error = false;
   llvm::BasicBlock* filter_false{nullptr};
 
   {
     const bool is_group_by{!ra_exe_unit_.groupby_exprs.empty()};
 
     if (executor_->isArchMaxwell(co.device_type)) {
       prependForceSync();
     }
     DiamondCodegen filter_cfg(filter_result,
                               executor_,
                               !is_group_by || query_mem_desc.usesGetGroupValueFast(),
                               "filter",  // filter_true and filter_false basic blocks
                               nullptr,
                               false);
     filter_false = filter_cfg.cond_false_;
 
     if (is_group_by) {
       if (query_mem_desc.getQueryDescriptionType() == QueryDescriptionType::Projection &&
           !query_mem_desc.useStreamingTopN()) {
         const auto crt_matched = get_arg_by_name(ROW_FUNC, "crt_matched");
         LL_BUILDER.CreateStore(LL_INT(int32_t(1)), crt_matched);
         auto total_matched_ptr = get_arg_by_name(ROW_FUNC, "total_matched");
         llvm::Value* old_total_matched_val{nullptr};
         if (co.device_type == ExecutorDeviceType::GPU) {
           old_total_matched_val =
               LL_BUILDER.CreateAtomicRMW(llvm::AtomicRMWInst::Add,
                                          total_matched_ptr,
                                          LL_INT(int32_t(1)),
 #if LLVM_VERSION_MAJOR > 12
                                          LLVM_ALIGN(8),
 #endif
                                          llvm::AtomicOrdering::Monotonic);
         } else {
           old_total_matched_val = LL_BUILDER.CreateLoad(
               total_matched_ptr->getType()->getPointerElementType(), total_matched_ptr);
           LL_BUILDER.CreateStore(
               LL_BUILDER.CreateAdd(old_total_matched_val, LL_INT(int32_t(1))),
               total_matched_ptr);
         }
         auto old_total_matched_ptr = get_arg_by_name(ROW_FUNC, "old_total_matched");
         LL_BUILDER.CreateStore(old_total_matched_val, old_total_matched_ptr);
       }
 
       auto agg_out_ptr_w_idx = codegenGroupBy(query_mem_desc, co, filter_cfg);
       auto varlen_output_buffer = codegenVarlenOutputBuffer(query_mem_desc);
       if (query_mem_desc.usesGetGroupValueFast() ||
           query_mem_desc.getQueryDescriptionType() ==
               QueryDescriptionType::GroupByPerfectHash) {
         if (query_mem_desc.getGroupbyColCount() > 1) {
           filter_cfg.setChainToNext();
         }
         // Don't generate null checks if the group slot is guaranteed to be non-null,
         // as it's the case for get_group_value_fast* family.
         can_return_error = codegenAggCalls(agg_out_ptr_w_idx,
                                            varlen_output_buffer,
                                            {},
                                            query_mem_desc,
                                            co,
                                            gpu_smem_context,
                                            filter_cfg);
       } else {
         {
           llvm::Value* nullcheck_cond{nullptr};
           if (query_mem_desc.didOutputColumnar()) {
             nullcheck_cond = LL_BUILDER.CreateICmpSGE(std::get<1>(agg_out_ptr_w_idx),
                                                       LL_INT(int32_t(0)));
           } else {
             nullcheck_cond = LL_BUILDER.CreateICmpNE(
                 std::get<0>(agg_out_ptr_w_idx),
                 llvm::ConstantPointerNull::get(
                     llvm::PointerType::get(get_int_type(64, LL_CONTEXT), 0)));
           }
           DiamondCodegen nullcheck_cfg(
               nullcheck_cond, executor_, false, "groupby_nullcheck", &filter_cfg, false);
           codegenAggCalls(agg_out_ptr_w_idx,
                           varlen_output_buffer,
                           {},
                           query_mem_desc,
                           co,
                           gpu_smem_context,
                           filter_cfg);
         }
         can_return_error = true;
         if (query_mem_desc.getQueryDescriptionType() ==
                 QueryDescriptionType::Projection &&
             query_mem_desc.useStreamingTopN()) {
           // Ignore rejection on pushing current row to top-K heap.
           LL_BUILDER.CreateRet(LL_INT(int32_t(0)));
         } else {
           CodeGenerator code_generator(executor_);
           LL_BUILDER.CreateRet(LL_BUILDER.CreateNeg(LL_BUILDER.CreateTrunc(
               // TODO(alex): remove the trunc once pos is converted to 32 bits
               code_generator.posArg(nullptr),
               get_int_type(32, LL_CONTEXT))));
         }
       }
     } else {
       if (ra_exe_unit_.estimator) {
         std::stack<llvm::BasicBlock*> array_loops;
         codegenEstimator(array_loops, filter_cfg, query_mem_desc, co);
       } else {
         auto arg_it = ROW_FUNC->arg_begin();
         std::vector<llvm::Value*> agg_out_vec;
         for (int32_t i = 0; i < get_agg_count(ra_exe_unit_.target_exprs); ++i) {
           agg_out_vec.push_back(&*arg_it++);
         }
         can_return_error = codegenAggCalls(std::make_tuple(nullptr, nullptr),
                                            /*varlen_output_buffer=*/nullptr,
                                            agg_out_vec,
                                            query_mem_desc,
                                            co,
                                            gpu_smem_context,
                                            filter_cfg);
       }
     }
   }
 
   if (ra_exe_unit_.join_quals.empty()) {
     executor_->cgen_state_->ir_builder_.CreateRet(LL_INT(int32_t(0)));
   } else if (sc_false) {
     const auto saved_insert_block = LL_BUILDER.GetInsertBlock();
     LL_BUILDER.SetInsertPoint(sc_false);
     LL_BUILDER.CreateBr(filter_false);
     LL_BUILDER.SetInsertPoint(saved_insert_block);
   }
 
   return can_return_error;
 }

Here is the call graph for this function:

std::vector< llvm::Value * > GroupByAndAggregate::codegenAggArg	(	const Analyzer::Expr *	target_expr,
		const CompilationOptions &	co
	)

private

Definition at line 2044 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, CodeGenerator::cgen_state_, CHECK, CHECK_EQ, CodeGenerator::codegen(), CgenState::context_, CUR_FUNC, executor_, get_int_type(), Analyzer::Expr::get_type_info(), SQLTypeInfo::is_geometry(), kARRAY, kPOINT, kSAMPLE, LL_BUILDER, LL_CONTEXT, log2_bytes(), and CodeGenerator::posArg().

Referenced by TargetExprCodegen::codegen(), and TargetExprCodegenBuilder::codegenMultiSlotSampleExpressions().

                                   {
   AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
   const auto agg_expr = dynamic_cast<const Analyzer::AggExpr*>(target_expr);
   const auto func_expr = dynamic_cast<const Analyzer::FunctionOper*>(target_expr);
   const auto arr_expr = dynamic_cast<const Analyzer::ArrayExpr*>(target_expr);
 
   // TODO(alex): handle arrays uniformly?
   CodeGenerator code_generator(executor_);
   if (target_expr) {
     const auto& target_ti = target_expr->get_type_info();
     if (target_ti.is_buffer() &&
         !executor_->plan_state_->isLazyFetchColumn(target_expr)) {
       const auto target_lvs =
           agg_expr ? code_generator.codegen(agg_expr->get_arg(), true, co)
                    : code_generator.codegen(
                          target_expr, !executor_->plan_state_->allow_lazy_fetch_, co);
       if (!func_expr && !arr_expr) {
         // Something with the chunk transport is code that was generated from a source
         // other than an ARRAY[] expression
         if (target_ti.is_text_encoding_none()) {
           CHECK_EQ(size_t(3), target_lvs.size());
           return {target_lvs[1], target_lvs[2]};
         }
         CHECK(target_ti.is_array());
         CHECK_EQ(size_t(1), target_lvs.size());
         CHECK(!agg_expr || agg_expr->get_aggtype() == kSAMPLE);
         const auto i32_ty = get_int_type(32, executor_->cgen_state_->context_);
         const auto i8p_ty =
             llvm::PointerType::get(get_int_type(8, executor_->cgen_state_->context_), 0);
         const auto& elem_ti = target_ti.get_elem_type();
         return {
             executor_->cgen_state_->emitExternalCall(
                 "array_buff",
                 i8p_ty,
                 {target_lvs.front(), code_generator.posArg(target_expr)}),
             executor_->cgen_state_->emitExternalCall(
                 "array_size",
                 i32_ty,
                 {target_lvs.front(),
                  code_generator.posArg(target_expr),
                  executor_->cgen_state_->llInt(log2_bytes(elem_ti.get_logical_size()))})};
       } else {
         if (agg_expr) {
           throw std::runtime_error(
               "Using array[] operator as argument to an aggregate operator is not "
               "supported");
         }
         CHECK(func_expr || arr_expr);
         if (dynamic_cast<const Analyzer::FunctionOper*>(target_expr)) {
           CHECK_EQ(size_t(1), target_lvs.size());
           const auto prefix = target_ti.get_buffer_name();
           CHECK(target_ti.is_array() || target_ti.is_text_encoding_none());
           const auto target_lv = LL_BUILDER.CreateLoad(
               target_lvs[0]->getType()->getPointerElementType(), target_lvs[0]);
           // const auto target_lv_type = target_lvs[0]->getType();
           // CHECK(target_lv_type->isStructTy());
           // CHECK_EQ(target_lv_type->getNumContainedTypes(), 3u);
           const auto i8p_ty = llvm::PointerType::get(
               get_int_type(8, executor_->cgen_state_->context_), 0);
           const auto ptr = LL_BUILDER.CreatePointerCast(
               LL_BUILDER.CreateExtractValue(target_lv, 0), i8p_ty);
           const auto size = LL_BUILDER.CreateExtractValue(target_lv, 1);
           const auto null_flag = LL_BUILDER.CreateExtractValue(target_lv, 2);
           const auto nullcheck_ok_bb =
               llvm::BasicBlock::Create(LL_CONTEXT, prefix + "_nullcheck_ok_bb", CUR_FUNC);
           const auto nullcheck_fail_bb = llvm::BasicBlock::Create(
               LL_CONTEXT, prefix + "_nullcheck_fail_bb", CUR_FUNC);
 
           // TODO(adb): probably better to zext the bool
           const auto nullcheck = LL_BUILDER.CreateICmpEQ(
               null_flag, executor_->cgen_state_->llInt(static_cast<int8_t>(1)));
           LL_BUILDER.CreateCondBr(nullcheck, nullcheck_fail_bb, nullcheck_ok_bb);
 
           const auto ret_bb =
               llvm::BasicBlock::Create(LL_CONTEXT, prefix + "_return", CUR_FUNC);
           LL_BUILDER.SetInsertPoint(ret_bb);
           auto result_phi = LL_BUILDER.CreatePHI(i8p_ty, 2, prefix + "_ptr_return");
           result_phi->addIncoming(ptr, nullcheck_ok_bb);
           const auto null_arr_sentinel = LL_BUILDER.CreateIntToPtr(
               executor_->cgen_state_->llInt(static_cast<int8_t>(0)), i8p_ty);
           result_phi->addIncoming(null_arr_sentinel, nullcheck_fail_bb);
           LL_BUILDER.SetInsertPoint(nullcheck_ok_bb);
           executor_->cgen_state_->emitExternalCall(
               "register_buffer_with_executor_rsm",
               llvm::Type::getVoidTy(executor_->cgen_state_->context_),
               {executor_->cgen_state_->llInt(reinterpret_cast<int64_t>(executor_)), ptr});
           LL_BUILDER.CreateBr(ret_bb);
           LL_BUILDER.SetInsertPoint(nullcheck_fail_bb);
           LL_BUILDER.CreateBr(ret_bb);
 
           LL_BUILDER.SetInsertPoint(ret_bb);
           return {result_phi, size};
         }
         CHECK_EQ(size_t(2), target_lvs.size());
         return {target_lvs[0], target_lvs[1]};
       }
     }
     if (target_ti.is_geometry() &&
         !executor_->plan_state_->isLazyFetchColumn(target_expr)) {
       auto generate_coord_lvs =
           [&](auto* selected_target_expr,
               bool const fetch_columns) -> std::vector<llvm::Value*> {
         const auto target_lvs =
             code_generator.codegen(selected_target_expr, fetch_columns, co);
         if (dynamic_cast<const Analyzer::GeoOperator*>(target_expr) &&
             target_expr->get_type_info().is_geometry()) {
           // return a pointer to the temporary alloca
           return target_lvs;
         }
         const auto geo_uoper = dynamic_cast<const Analyzer::GeoUOper*>(target_expr);
         const auto geo_binoper = dynamic_cast<const Analyzer::GeoBinOper*>(target_expr);
         if (geo_uoper || geo_binoper) {
           CHECK(target_expr->get_type_info().is_geometry());
           CHECK_EQ(2 * static_cast<size_t>(target_ti.get_physical_coord_cols()),
                    target_lvs.size());
           return target_lvs;
         }
         CHECK_EQ(static_cast<size_t>(target_ti.get_physical_coord_cols()),
                  target_lvs.size());
 
         const auto i32_ty = get_int_type(32, executor_->cgen_state_->context_);
         const auto i8p_ty =
             llvm::PointerType::get(get_int_type(8, executor_->cgen_state_->context_), 0);
         std::vector<llvm::Value*> coords;
         size_t ctr = 0;
         for (const auto& target_lv : target_lvs) {
           // TODO(adb): consider adding a utility to sqltypes so we can get the types of
           // the physical coords cols based on the sqltype (e.g. TINYINT for col 0, INT
           // for col 1 for pols / mpolys, etc). Hardcoding for now. first array is the
           // coords array (TINYINT). Subsequent arrays are regular INT.
 
           const size_t elem_sz = ctr == 0 ? 1 : 4;
           ctr++;
           int32_t fixlen = -1;
           if (target_ti.get_type() == kPOINT) {
             const auto col_var = dynamic_cast<const Analyzer::ColumnVar*>(target_expr);
             if (col_var) {
               const auto coords_cd = executor_->getPhysicalColumnDescriptor(col_var, 1);
               if (coords_cd && coords_cd->columnType.get_type() == kARRAY) {
                 fixlen = coords_cd->columnType.get_size();
               }
             }
           }
           if (fixlen > 0) {
             coords.push_back(executor_->cgen_state_->emitExternalCall(
                 "fast_fixlen_array_buff",
                 i8p_ty,
                 {target_lv, code_generator.posArg(selected_target_expr)}));
             auto fixed_len_lv = executor_->cgen_state_->emitExternalCall(
                 "determine_fixed_array_len",
                 llvm::IntegerType::get(code_generator.cgen_state_->context_, 64),
                 {target_lv, executor_->cgen_state_->llInt(int64_t(fixlen))});
             coords.push_back(fixed_len_lv);
             continue;
           }
           coords.push_back(executor_->cgen_state_->emitExternalCall(
               "array_buff",
               i8p_ty,
               {target_lv, code_generator.posArg(selected_target_expr)}));
           coords.push_back(executor_->cgen_state_->emitExternalCall(
               "array_size",
               i32_ty,
               {target_lv,
                code_generator.posArg(selected_target_expr),
                executor_->cgen_state_->llInt(log2_bytes(elem_sz))}));
         }
         return coords;
       };
 
       if (agg_expr) {
         return generate_coord_lvs(agg_expr->get_arg(), true);
       } else {
         return generate_coord_lvs(target_expr,
                                   !executor_->plan_state_->allow_lazy_fetch_);
       }
     }
   }
   bool fetch_column = !executor_->plan_state_->allow_lazy_fetch_;
   return agg_expr ? code_generator.codegen(agg_expr->get_arg(), true, co)
                   : code_generator.codegen(target_expr, fetch_column, co);
 }

Here is the call graph for this function:

Here is the caller graph for this function:

bool GroupByAndAggregate::codegenAggCalls	(	const std::tuple< llvm::Value , llvm::Value > &	agg_out_ptr_w_idx,
		llvm::Value *	varlen_output_buffer,
		const std::vector< llvm::Value * > &	agg_out_vec,
		QueryMemoryDescriptor &	query_mem_desc,
		const CompilationOptions &	co,
		const GpuSharedMemoryContext &	gpu_smem_context,
		DiamondCodegen &	diamond_codegen
	)

private

Definition at line 1681 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, CHECK, TargetExprCodegenBuilder::codegen(), QueryMemoryDescriptor::didOutputColumnar(), executor_, g_cluster, QueryMemoryDescriptor::getQueryDescriptionType(), LL_BUILDER, LL_CONTEXT, heavyai::Projection, ra_exe_unit_, and RelAlgExecutionUnit::target_exprs.

Referenced by codegen().

                                      {
   AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
   auto agg_out_ptr_w_idx = agg_out_ptr_w_idx_in;
   // TODO(alex): unify the two cases, the output for non-group by queries
   //             should be a contiguous buffer
   const bool is_group_by = std::get<0>(agg_out_ptr_w_idx);
   bool can_return_error = false;
   if (is_group_by) {
     CHECK(agg_out_vec.empty());
   } else {
     CHECK(!agg_out_vec.empty());
   }
 
   // output buffer is casted into a byte stream to be able to handle data elements of
   // different sizes (only used when actual column width sizes are used)
   llvm::Value* output_buffer_byte_stream{nullptr};
   llvm::Value* out_row_idx{nullptr};
   if (query_mem_desc.didOutputColumnar() && !g_cluster &&
       query_mem_desc.getQueryDescriptionType() == QueryDescriptionType::Projection) {
     output_buffer_byte_stream = LL_BUILDER.CreateBitCast(
         std::get<0>(agg_out_ptr_w_idx),
         llvm::PointerType::get(llvm::Type::getInt8Ty(LL_CONTEXT), 0));
     output_buffer_byte_stream->setName("out_buff_b_stream");
     CHECK(std::get<1>(agg_out_ptr_w_idx));
     out_row_idx = LL_BUILDER.CreateZExt(std::get<1>(agg_out_ptr_w_idx),
                                         llvm::Type::getInt64Ty(LL_CONTEXT));
     out_row_idx->setName("out_row_idx");
   }
 
   TargetExprCodegenBuilder target_builder(ra_exe_unit_, is_group_by);
   for (size_t target_idx = 0; target_idx < ra_exe_unit_.target_exprs.size();
        ++target_idx) {
     auto target_expr = ra_exe_unit_.target_exprs[target_idx];
     CHECK(target_expr);
 
     target_builder(target_expr, executor_, query_mem_desc, co);
   }
 
   target_builder.codegen(this,
                          executor_,
                          query_mem_desc,
                          co,
                          gpu_smem_context,
                          agg_out_ptr_w_idx,
                          agg_out_vec,
                          output_buffer_byte_stream,
                          out_row_idx,
                          varlen_output_buffer,
                          diamond_codegen);
 
   return can_return_error;
 }

Here is the call graph for this function:

Here is the caller graph for this function:

llvm::Value * GroupByAndAggregate::codegenAggColumnPtr	(	llvm::Value *	output_buffer_byte_stream,
		llvm::Value *	out_row_idx,
		const std::tuple< llvm::Value , llvm::Value > &	agg_out_ptr_w_idx,
		const QueryMemoryDescriptor &	query_mem_desc,
		const size_t	chosen_bytes,
		const size_t	agg_out_off,
		const size_t	target_idx
	)

private

: returns the pointer to where the aggregation should be stored.

Definition at line 1744 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, shared::bit_cast(), CHECK, CHECK_EQ, QueryMemoryDescriptor::didOutputColumnar(), executor_, g_cluster, get_int_type(), QueryMemoryDescriptor::getColOffInBytes(), QueryMemoryDescriptor::getColOnlyOffInBytes(), QueryMemoryDescriptor::getQueryDescriptionType(), LL_BUILDER, LL_CONTEXT, LL_INT, heavyai::Projection, and to_string().

Referenced by TargetExprCodegen::codegenAggregate(), and TargetExprCodegenBuilder::codegenMultiSlotSampleExpressions().

                              {
   AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
   llvm::Value* agg_col_ptr{nullptr};
   if (query_mem_desc.didOutputColumnar()) {
     // TODO(Saman): remove the second columnar branch, and support all query description
     // types through the first branch. Then, input arguments should also be cleaned up
     if (!g_cluster &&
         query_mem_desc.getQueryDescriptionType() == QueryDescriptionType::Projection) {
       CHECK(chosen_bytes == 1 || chosen_bytes == 2 || chosen_bytes == 4 ||
             chosen_bytes == 8);
       CHECK(output_buffer_byte_stream);
       CHECK(out_row_idx);
       size_t col_off = query_mem_desc.getColOffInBytes(agg_out_off);
       // multiplying by chosen_bytes, i.e., << log2(chosen_bytes)
       auto out_per_col_byte_idx =
 #ifdef _WIN32
           LL_BUILDER.CreateShl(out_row_idx, __lzcnt(chosen_bytes) - 1);
 #else
           LL_BUILDER.CreateShl(out_row_idx, __builtin_ffs(chosen_bytes) - 1);
 #endif
       auto byte_offset = LL_BUILDER.CreateAdd(out_per_col_byte_idx,
                                               LL_INT(static_cast<int64_t>(col_off)));
       byte_offset->setName("out_byte_off_target_" + std::to_string(target_idx));
       auto output_ptr = LL_BUILDER.CreateGEP(
           output_buffer_byte_stream->getType()->getScalarType()->getPointerElementType(),
           output_buffer_byte_stream,
           byte_offset);
       agg_col_ptr = LL_BUILDER.CreateBitCast(
           output_ptr,
           llvm::PointerType::get(get_int_type((chosen_bytes << 3), LL_CONTEXT), 0));
       agg_col_ptr->setName("out_ptr_target_" + std::to_string(target_idx));
     } else {
       auto const col_off_in_bytes = query_mem_desc.getColOffInBytes(agg_out_off);
       auto const col_off = col_off_in_bytes / chosen_bytes;
       auto const col_rem = col_off_in_bytes % chosen_bytes;
       CHECK_EQ(col_rem, 0u) << col_off_in_bytes << " % " << chosen_bytes;
       CHECK(std::get<1>(agg_out_ptr_w_idx));
       auto* agg_out_idx = LL_BUILDER.CreateZExt(
           std::get<1>(agg_out_ptr_w_idx),
           get_int_type(8 * sizeof(col_off), executor_->cgen_state_->context_));
       auto* offset = LL_BUILDER.CreateAdd(agg_out_idx, LL_INT(col_off));
       auto* bit_cast = LL_BUILDER.CreateBitCast(
           std::get<0>(agg_out_ptr_w_idx),
           llvm::PointerType::get(get_int_type((chosen_bytes << 3), LL_CONTEXT), 0));
       agg_col_ptr = LL_BUILDER.CreateGEP(
           bit_cast->getType()->getScalarType()->getPointerElementType(),
           bit_cast,
           offset);
     }
   } else {
     auto const col_off_in_bytes = query_mem_desc.getColOnlyOffInBytes(agg_out_off);
     auto const col_off = col_off_in_bytes / chosen_bytes;
     auto const col_rem = col_off_in_bytes % chosen_bytes;
     CHECK_EQ(col_rem, 0u) << col_off_in_bytes << " % " << chosen_bytes;
     auto* bit_cast = LL_BUILDER.CreateBitCast(
         std::get<0>(agg_out_ptr_w_idx),
         llvm::PointerType::get(get_int_type((chosen_bytes << 3), LL_CONTEXT), 0));
     agg_col_ptr = LL_BUILDER.CreateGEP(
         bit_cast->getType()->getScalarType()->getPointerElementType(),
         bit_cast,
         LL_INT(col_off));
   }
   CHECK(agg_col_ptr);
   return agg_col_ptr;
 }

Here is the call graph for this function:

Here is the caller graph for this function:

void GroupByAndAggregate::codegenApproxQuantile	(	const size_t	target_idx,
		const Analyzer::Expr *	target_expr,
		std::vector< llvm::Value * > &	agg_args,
		const QueryMemoryDescriptor &	query_mem_desc,
		const ExecutorDeviceType	device_type
	)

private

Definition at line 1951 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, executor_, g_bigint_count, SQLTypeInfo::get_notnull(), get_target_info(), Analyzer::Expr::get_type_info(), and GPU.

Referenced by TargetExprCodegen::codegenAggregate().

                                           {
   if (device_type == ExecutorDeviceType::GPU) {
     throw QueryMustRunOnCpu();
   }
   llvm::BasicBlock *calc, *skip{nullptr};
   AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
   auto const arg_ti =
       static_cast<const Analyzer::AggExpr*>(target_expr)->get_arg()->get_type_info();
   bool const nullable = !arg_ti.get_notnull();
 
   auto* cs = executor_->cgen_state_.get();
   auto& irb = cs->ir_builder_;
   if (nullable) {
     auto* const null_value = cs->castToTypeIn(cs->inlineNull(arg_ti), 64);
     auto* const skip_cond = arg_ti.is_fp()
                                 ? irb.CreateFCmpOEQ(agg_args.back(), null_value)
                                 : irb.CreateICmpEQ(agg_args.back(), null_value);
     calc = llvm::BasicBlock::Create(cs->context_, "calc_approx_quantile");
     skip = llvm::BasicBlock::Create(cs->context_, "skip_approx_quantile");
     irb.CreateCondBr(skip_cond, skip, calc);
     cs->current_func_->getBasicBlockList().push_back(calc);
     irb.SetInsertPoint(calc);
   }
   if (!arg_ti.is_fp()) {
     auto const agg_info = get_target_info(target_expr, g_bigint_count);
     agg_args.back() = executor_->castToFP(agg_args.back(), arg_ti, agg_info.sql_type);
   }
   cs->emitExternalCall(
       "agg_approx_quantile", llvm::Type::getVoidTy(cs->context_), agg_args);
   if (nullable) {
     irb.CreateBr(skip);
     cs->current_func_->getBasicBlockList().push_back(skip);
     irb.SetInsertPoint(skip);
   }
 }

Here is the call graph for this function:

Here is the caller graph for this function:

void GroupByAndAggregate::codegenCountDistinct	(	const size_t	target_idx,
		const Analyzer::Expr *	target_expr,
		std::vector< llvm::Value * > &	agg_args,
		const QueryMemoryDescriptor &	query_mem_desc,
		const ExecutorDeviceType	device_type
	)

private

Definition at line 1881 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, Bitmap, CHECK, CHECK_EQ, emitCall(), executor_, g_bigint_count, get_int_type(), get_target_info(), Analyzer::Expr::get_type_info(), getAdditionalLiteral(), QueryMemoryDescriptor::getCountDistinctDescriptor(), GPU, Invalid, kAPPROX_COUNT_DISTINCT, LL_CONTEXT, and LL_INT.

Referenced by TargetExprCodegen::codegenAggregate().

                                           {
   AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
   const auto agg_info = get_target_info(target_expr, g_bigint_count);
   const auto& arg_ti =
       static_cast<const Analyzer::AggExpr*>(target_expr)->get_arg()->get_type_info();
   if (arg_ti.is_fp()) {
     agg_args.back() = executor_->cgen_state_->ir_builder_.CreateBitCast(
         agg_args.back(), get_int_type(64, executor_->cgen_state_->context_));
   }
   const auto& count_distinct_descriptor =
       query_mem_desc.getCountDistinctDescriptor(target_idx);
   CHECK(count_distinct_descriptor.impl_type_ != CountDistinctImplType::Invalid);
   if (agg_info.agg_kind == kAPPROX_COUNT_DISTINCT) {
     CHECK(count_distinct_descriptor.impl_type_ == CountDistinctImplType::Bitmap);
     agg_args.push_back(LL_INT(int32_t(count_distinct_descriptor.bitmap_sz_bits)));
     if (device_type == ExecutorDeviceType::GPU) {
       const auto base_dev_addr = getAdditionalLiteral(-1);
       const auto base_host_addr = getAdditionalLiteral(-2);
       agg_args.push_back(base_dev_addr);
       agg_args.push_back(base_host_addr);
       emitCall("agg_approximate_count_distinct_gpu", agg_args);
     } else {
       emitCall("agg_approximate_count_distinct", agg_args);
     }
     return;
   }
   std::string agg_fname{"agg_count_distinct"};
   if (count_distinct_descriptor.impl_type_ == CountDistinctImplType::Bitmap) {
     agg_fname += "_bitmap";
     agg_args.push_back(LL_INT(count_distinct_descriptor.min_val));
     agg_args.push_back(LL_INT(count_distinct_descriptor.bucket_size));
   }
   if (agg_info.skip_null_val) {
     auto null_lv = executor_->cgen_state_->castToTypeIn(
         (arg_ti.is_fp()
              ? static_cast<llvm::Value*>(executor_->cgen_state_->inlineFpNull(arg_ti))
              : static_cast<llvm::Value*>(executor_->cgen_state_->inlineIntNull(arg_ti))),
         64);
     null_lv = executor_->cgen_state_->ir_builder_.CreateBitCast(
         null_lv, get_int_type(64, executor_->cgen_state_->context_));
     agg_fname += "_skip_val";
     agg_args.push_back(null_lv);
   }
   if (device_type == ExecutorDeviceType::GPU) {
     CHECK(count_distinct_descriptor.impl_type_ == CountDistinctImplType::Bitmap);
     agg_fname += "_gpu";
     const auto base_dev_addr = getAdditionalLiteral(-1);
     const auto base_host_addr = getAdditionalLiteral(-2);
     agg_args.push_back(base_dev_addr);
     agg_args.push_back(base_host_addr);
     agg_args.push_back(LL_INT(int64_t(count_distinct_descriptor.sub_bitmap_count)));
     CHECK_EQ(size_t(0),
              count_distinct_descriptor.bitmapPaddedSizeBytes() %
                  count_distinct_descriptor.sub_bitmap_count);
     agg_args.push_back(LL_INT(int64_t(count_distinct_descriptor.bitmapPaddedSizeBytes() /
                                       count_distinct_descriptor.sub_bitmap_count)));
   }
   if (count_distinct_descriptor.impl_type_ == CountDistinctImplType::Bitmap) {
     emitCall(agg_fname, agg_args);
   } else {
     executor_->cgen_state_->emitExternalCall(
         agg_fname, llvm::Type::getVoidTy(LL_CONTEXT), agg_args);
   }
 }

Here is the call graph for this function:

Here is the caller graph for this function:

void GroupByAndAggregate::codegenEstimator	(	std::stack< llvm::BasicBlock * > &	array_loops,
		DiamondCodegen &	diamond_codegen,
		const QueryMemoryDescriptor &	query_mem_desc,
		const CompilationOptions &	co
	)

private

Definition at line 1817 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, CHECK, emitCall(), RelAlgExecutionUnit::estimator, executor_, get_int_type(), QueryMemoryDescriptor::getEffectiveKeyWidth(), LL_BUILDER, LL_CONTEXT, LL_INT, ra_exe_unit_, and ROW_FUNC.

Referenced by codegen().

                                                                          {
   AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
   const auto& estimator_arg = ra_exe_unit_.estimator->getArgument();
   auto estimator_comp_count_lv = LL_INT(static_cast<int32_t>(estimator_arg.size()));
   auto estimator_key_lv = LL_BUILDER.CreateAlloca(llvm::Type::getInt64Ty(LL_CONTEXT),
                                                   estimator_comp_count_lv);
   int32_t subkey_idx = 0;
   for (const auto& estimator_arg_comp : estimator_arg) {
     const auto estimator_arg_comp_lvs =
         executor_->groupByColumnCodegen(estimator_arg_comp.get(),
                                         query_mem_desc.getEffectiveKeyWidth(),
                                         co,
                                         false,
                                         0,
                                         diamond_codegen,
                                         array_loops,
                                         true);
     CHECK(!estimator_arg_comp_lvs.original_value);
     const auto estimator_arg_comp_lv = estimator_arg_comp_lvs.translated_value;
     // store the sub-key to the buffer
     LL_BUILDER.CreateStore(
         estimator_arg_comp_lv,
         LL_BUILDER.CreateGEP(
             estimator_key_lv->getType()->getScalarType()->getPointerElementType(),
             estimator_key_lv,
             LL_INT(subkey_idx++)));
   }
   const auto int8_ptr_ty = llvm::PointerType::get(get_int_type(8, LL_CONTEXT), 0);
   const auto bitmap = LL_BUILDER.CreateBitCast(&*ROW_FUNC->arg_begin(), int8_ptr_ty);
   const auto key_bytes = LL_BUILDER.CreateBitCast(estimator_key_lv, int8_ptr_ty);
   const auto estimator_comp_bytes_lv =
       LL_INT(static_cast<int32_t>(estimator_arg.size() * sizeof(int64_t)));
   const auto bitmap_size_lv =
       LL_INT(static_cast<uint32_t>(ra_exe_unit_.estimator->getBufferSize()));
   emitCall(ra_exe_unit_.estimator->getRuntimeFunctionName(),
            {bitmap, &*bitmap_size_lv, key_bytes, &*estimator_comp_bytes_lv});
 }

Here is the call graph for this function:

Here is the caller graph for this function:

std::tuple< llvm::Value , llvm::Value > GroupByAndAggregate::codegenGroupBy	(	const QueryMemoryDescriptor &	query_mem_desc,
		const CompilationOptions &	co,
		DiamondCodegen &	codegen
	)

private

Definition at line 1273 of file GroupByAndAggregate.cpp.

Referenced by codegen().

                                      {
   AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
   auto arg_it = ROW_FUNC->arg_begin();
   auto groups_buffer = arg_it++;
 
   std::stack<llvm::BasicBlock*> array_loops;
 
   // TODO(Saman): move this logic outside of this function.
   if (query_mem_desc.getQueryDescriptionType() == QueryDescriptionType::Projection) {
     if (query_mem_desc.didOutputColumnar()) {
       return std::make_tuple(
           &*groups_buffer,
           codegenOutputSlot(&*groups_buffer, query_mem_desc, co, diamond_codegen));
     } else {
       return std::make_tuple(
           codegenOutputSlot(&*groups_buffer, query_mem_desc, co, diamond_codegen),
           nullptr);
     }
   }
 
   CHECK(query_mem_desc.getQueryDescriptionType() ==
             QueryDescriptionType::GroupByBaselineHash ||
         query_mem_desc.getQueryDescriptionType() ==
             QueryDescriptionType::GroupByPerfectHash);
 
   const int32_t row_size_quad = query_mem_desc.didOutputColumnar()
                                     ? 0
                                     : query_mem_desc.getRowSize() / sizeof(int64_t);
 
   const auto col_width_size = query_mem_desc.isSingleColumnGroupByWithPerfectHash()
                                   ? sizeof(int64_t)
                                   : query_mem_desc.getEffectiveKeyWidth();
   // for multi-column group by
   llvm::Value* group_key = nullptr;
   llvm::Value* key_size_lv = nullptr;
 
   if (!query_mem_desc.isSingleColumnGroupByWithPerfectHash()) {
     key_size_lv = LL_INT(static_cast<int32_t>(query_mem_desc.getGroupbyColCount()));
     if (query_mem_desc.getQueryDescriptionType() ==
         QueryDescriptionType::GroupByPerfectHash) {
       group_key =
           LL_BUILDER.CreateAlloca(llvm::Type::getInt64Ty(LL_CONTEXT), key_size_lv);
     } else if (query_mem_desc.getQueryDescriptionType() ==
                QueryDescriptionType::GroupByBaselineHash) {
       group_key =
           col_width_size == sizeof(int32_t)
               ? LL_BUILDER.CreateAlloca(llvm::Type::getInt32Ty(LL_CONTEXT), key_size_lv)
               : LL_BUILDER.CreateAlloca(llvm::Type::getInt64Ty(LL_CONTEXT), key_size_lv);
     }
     CHECK(group_key);
     CHECK(key_size_lv);
   }
 
   int32_t subkey_idx = 0;
   CHECK(query_mem_desc.getGroupbyColCount() == ra_exe_unit_.groupby_exprs.size());
   for (const auto& group_expr : ra_exe_unit_.groupby_exprs) {
     const auto col_range_info =
         get_expr_range_info(ra_exe_unit_, query_infos_, group_expr.get(), executor_);
     const auto translated_null_value = static_cast<int64_t>(
         query_mem_desc.isSingleColumnGroupByWithPerfectHash()
             ? checked_int64_t(query_mem_desc.getMaxVal()) +
                   (query_mem_desc.getBucket() ? query_mem_desc.getBucket() : 1)
             : checked_int64_t(col_range_info.max) +
                   (col_range_info.bucket ? col_range_info.bucket : 1));
 
     const bool col_has_nulls =
         query_mem_desc.getQueryDescriptionType() ==
                 QueryDescriptionType::GroupByPerfectHash
             ? (query_mem_desc.isSingleColumnGroupByWithPerfectHash()
                    ? query_mem_desc.hasNulls()
                    : col_range_info.has_nulls)
             : false;
 
     const auto group_expr_lvs =
         executor_->groupByColumnCodegen(group_expr.get(),
                                         col_width_size,
                                         co,
                                         col_has_nulls,
                                         translated_null_value,
                                         diamond_codegen,
                                         array_loops,
                                         query_mem_desc.threadsShareMemory());
     const auto group_expr_lv = group_expr_lvs.translated_value;
     if (query_mem_desc.isSingleColumnGroupByWithPerfectHash()) {
       CHECK_EQ(size_t(1), ra_exe_unit_.groupby_exprs.size());
       return codegenSingleColumnPerfectHash(query_mem_desc,
                                             co,
                                             &*groups_buffer,
                                             group_expr_lv,
                                             group_expr_lvs.original_value,
                                             row_size_quad);
     } else {
       // store the sub-key to the buffer
       LL_BUILDER.CreateStore(
           group_expr_lv,
           LL_BUILDER.CreateGEP(
               group_key->getType()->getScalarType()->getPointerElementType(),
               group_key,
               LL_INT(subkey_idx++)));
     }
   }
   if (query_mem_desc.getQueryDescriptionType() ==
       QueryDescriptionType::GroupByPerfectHash) {
     CHECK(ra_exe_unit_.groupby_exprs.size() != 1);
     return codegenMultiColumnPerfectHash(
         &*groups_buffer, group_key, key_size_lv, query_mem_desc, row_size_quad);
   } else if (query_mem_desc.getQueryDescriptionType() ==
              QueryDescriptionType::GroupByBaselineHash) {
     return codegenMultiColumnBaselineHash(co,
                                           &*groups_buffer,
                                           group_key,
                                           key_size_lv,
                                           query_mem_desc,
                                           col_width_size,
                                           row_size_quad);
   }
   CHECK(false);
   return std::make_tuple(nullptr, nullptr);
 }

Here is the call graph for this function:

Here is the caller graph for this function:

void GroupByAndAggregate::codegenMode	(	const size_t	target_idx,
		const Analyzer::Expr *	target_expr,
		std::vector< llvm::Value * > &	agg_args,
		const QueryMemoryDescriptor &	query_mem_desc,
		const ExecutorDeviceType	device_type
	)

private

Definition at line 1992 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, executor_, get_int_type(), SQLTypeInfo::get_notnull(), Analyzer::Expr::get_type_info(), and GPU.

Referenced by TargetExprCodegen::codegenAggregate().

                                                                             {
   if (device_type == ExecutorDeviceType::GPU) {
     throw QueryMustRunOnCpu();
   }
   llvm::BasicBlock *calc, *skip{nullptr};
   AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
   auto const arg_ti =
       static_cast<const Analyzer::AggExpr*>(target_expr)->get_arg()->get_type_info();
   bool const nullable = !arg_ti.get_notnull();
   bool const is_fp = arg_ti.is_fp();
   auto* cs = executor_->cgen_state_.get();
   auto& irb = cs->ir_builder_;
   if (nullable) {
     auto* const null_value =
         is_fp ? cs->inlineNull(arg_ti) : cs->castToTypeIn(cs->inlineNull(arg_ti), 64);
     auto* const skip_cond = is_fp ? irb.CreateFCmpOEQ(agg_args.back(), null_value)
                                   : irb.CreateICmpEQ(agg_args.back(), null_value);
     calc = llvm::BasicBlock::Create(cs->context_, "calc_mode");
     skip = llvm::BasicBlock::Create(cs->context_, "skip_mode");
     irb.CreateCondBr(skip_cond, skip, calc);
     cs->current_func_->getBasicBlockList().push_back(calc);
     irb.SetInsertPoint(calc);
   }
   if (is_fp) {
     auto* const int_type = get_int_type(8 * arg_ti.get_size(), cs->context_);
     agg_args.back() = irb.CreateBitCast(agg_args.back(), int_type);
   }
   // "agg_mode" collides with existing names, so non-standard suffix "_func" is added.
   cs->emitExternalCall("agg_mode_func", llvm::Type::getVoidTy(cs->context_), agg_args);
   if (nullable) {
     irb.CreateBr(skip);
     cs->current_func_->getBasicBlockList().push_back(skip);
     irb.SetInsertPoint(skip);
   }
 }

Here is the call graph for this function:

Here is the caller graph for this function:

std::tuple< llvm::Value , llvm::Value > GroupByAndAggregate::codegenMultiColumnBaselineHash	(	const CompilationOptions &	co,
		llvm::Value *	groups_buffer,
		llvm::Value *	group_key,
		llvm::Value *	key_size_lv,
		const QueryMemoryDescriptor &	query_mem_desc,
		const size_t	key_width,
		const int32_t	row_size_quad
	)

private

Definition at line 1505 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, CHECK, QueryMemoryDescriptor::didOutputColumnar(), emitCall(), executor_, QueryMemoryDescriptor::getEntryCount(), LL_BUILDER, LL_CONTEXT, LL_INT, and CompilationOptions::with_dynamic_watchdog.

Referenced by codegenGroupBy().

                                  {
   AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
   if (group_key->getType() != llvm::Type::getInt64PtrTy(LL_CONTEXT)) {
     CHECK(key_width == sizeof(int32_t));
     group_key =
         LL_BUILDER.CreatePointerCast(group_key, llvm::Type::getInt64PtrTy(LL_CONTEXT));
   }
   std::vector<llvm::Value*> func_args{
       groups_buffer,
       LL_INT(static_cast<int32_t>(query_mem_desc.getEntryCount())),
       &*group_key,
       &*key_size_lv,
       LL_INT(static_cast<int32_t>(key_width))};
   std::string func_name{"get_group_value"};
   if (query_mem_desc.didOutputColumnar()) {
     func_name += "_columnar_slot";
   } else {
     func_args.push_back(LL_INT(row_size_quad));
   }
   if (co.with_dynamic_watchdog) {
     func_name += "_with_watchdog";
   }
   if (query_mem_desc.didOutputColumnar()) {
     return std::make_tuple(groups_buffer, emitCall(func_name, func_args));
   } else {
     return std::make_tuple(emitCall(func_name, func_args), nullptr);
   }
 }

Here is the call graph for this function:

Here is the caller graph for this function:

std::tuple< llvm::Value , llvm::Value > GroupByAndAggregate::codegenMultiColumnPerfectHash	(	llvm::Value *	groups_buffer,
		llvm::Value *	group_key,
		llvm::Value *	key_size_lv,
		const QueryMemoryDescriptor &	query_mem_desc,
		const int32_t	row_size_quad
	)

private

Definition at line 1461 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, CHECK, codegenPerfectHashFunction(), QueryMemoryDescriptor::didOutputColumnar(), emitCall(), executor_, get_int_type(), QueryMemoryDescriptor::getEntryCount(), QueryMemoryDescriptor::getQueryDescriptionType(), heavyai::GroupByPerfectHash, QueryMemoryDescriptor::hasKeylessHash(), LL_BUILDER, LL_CONTEXT, and LL_INT.

Referenced by codegenGroupBy().

                                  {
   AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
   CHECK(query_mem_desc.getQueryDescriptionType() ==
         QueryDescriptionType::GroupByPerfectHash);
   // compute the index (perfect hash)
   auto perfect_hash_func = codegenPerfectHashFunction();
   auto hash_lv =
       LL_BUILDER.CreateCall(perfect_hash_func, std::vector<llvm::Value*>{group_key});
 
   if (query_mem_desc.didOutputColumnar()) {
     if (!query_mem_desc.hasKeylessHash()) {
       const std::string set_matching_func_name{
           "set_matching_group_value_perfect_hash_columnar"};
       const std::vector<llvm::Value*> set_matching_func_arg{
           groups_buffer,
           hash_lv,
           group_key,
           key_size_lv,
           llvm::ConstantInt::get(get_int_type(32, LL_CONTEXT),
                                  query_mem_desc.getEntryCount())};
       emitCall(set_matching_func_name, set_matching_func_arg);
     }
     return std::make_tuple(groups_buffer, hash_lv);
   } else {
     if (query_mem_desc.hasKeylessHash()) {
       return std::make_tuple(emitCall("get_matching_group_value_perfect_hash_keyless",
                                       {groups_buffer, hash_lv, LL_INT(row_size_quad)}),
                              nullptr);
     } else {
       return std::make_tuple(
           emitCall(
               "get_matching_group_value_perfect_hash",
               {groups_buffer, hash_lv, group_key, key_size_lv, LL_INT(row_size_quad)}),
           nullptr);
     }
   }
 }

Here is the call graph for this function:

Here is the caller graph for this function:

llvm::Value * GroupByAndAggregate::codegenOutputSlot	(	llvm::Value *	groups_buffer,
		const QueryMemoryDescriptor &	query_mem_desc,
		const CompilationOptions &	co,
		DiamondCodegen &	diamond_codegen
	)

private

Definition at line 1185 of file GroupByAndAggregate.cpp.

References run_benchmark_import::args, AUTOMATIC_IR_METADATA, CHECK, CHECK_EQ, CHECK_GE, CHECK_LT, CodeGenerator::codegen(), QueryMemoryDescriptor::didOutputColumnar(), emitCall(), executor_, get_arg_by_name(), get_heap_key_slot_index(), QueryMemoryDescriptor::getColOffInBytes(), QueryMemoryDescriptor::getPaddedSlotWidthBytes(), QueryMemoryDescriptor::getQueryDescriptionType(), QueryMemoryDescriptor::getRowSize(), RelAlgExecutionUnit::groupby_exprs, inline_fp_null_val(), inline_int_null_val(), SortInfo::limit, LL_BOOL, LL_BUILDER, LL_FP, LL_INT, anonymous_namespace{Utm.h}::n, SortInfo::offset, SortInfo::order_entries, CodeGenerator::posArg(), heavyai::Projection, ra_exe_unit_, ROW_FUNC, RelAlgExecutionUnit::sort_info, RelAlgExecutionUnit::target_exprs, to_string(), and QueryMemoryDescriptor::useStreamingTopN().

Referenced by codegenGroupBy(), and codegenWindowRowPointer().

                                      {
   AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
   CHECK(query_mem_desc.getQueryDescriptionType() == QueryDescriptionType::Projection);
   CHECK_EQ(size_t(1), ra_exe_unit_.groupby_exprs.size());
   const auto group_expr = ra_exe_unit_.groupby_exprs.front();
   CHECK(!group_expr);
   if (!query_mem_desc.didOutputColumnar()) {
     CHECK_EQ(size_t(0), query_mem_desc.getRowSize() % sizeof(int64_t));
   }
   const int32_t row_size_quad = query_mem_desc.didOutputColumnar()
                                     ? 0
                                     : query_mem_desc.getRowSize() / sizeof(int64_t);
   CodeGenerator code_generator(executor_);
   if (query_mem_desc.useStreamingTopN()) {
     const auto& only_order_entry = ra_exe_unit_.sort_info.order_entries.front();
     CHECK_GE(only_order_entry.tle_no, int(1));
     const size_t target_idx = only_order_entry.tle_no - 1;
     CHECK_LT(target_idx, ra_exe_unit_.target_exprs.size());
     const auto order_entry_expr = ra_exe_unit_.target_exprs[target_idx];
     const auto chosen_bytes =
         static_cast<size_t>(query_mem_desc.getPaddedSlotWidthBytes(target_idx));
     auto order_entry_lv = executor_->cgen_state_->castToTypeIn(
         code_generator.codegen(order_entry_expr, true, co).front(), chosen_bytes * 8);
     const uint32_t n =
         ra_exe_unit_.sort_info.offset + ra_exe_unit_.sort_info.limit.value_or(0);
     std::string fname = "get_bin_from_k_heap";
     const auto& oe_ti = order_entry_expr->get_type_info();
     llvm::Value* null_key_lv = nullptr;
     if (oe_ti.is_integer() || oe_ti.is_decimal() || oe_ti.is_time()) {
       const size_t bit_width = order_entry_lv->getType()->getIntegerBitWidth();
       switch (bit_width) {
         case 32:
           null_key_lv = LL_INT(static_cast<int32_t>(inline_int_null_val(oe_ti)));
           break;
         case 64:
           null_key_lv = LL_INT(static_cast<int64_t>(inline_int_null_val(oe_ti)));
           break;
         default:
           CHECK(false);
       }
       fname += "_int" + std::to_string(bit_width) + "_t";
     } else {
       CHECK(oe_ti.is_fp());
       if (order_entry_lv->getType()->isDoubleTy()) {
         null_key_lv = LL_FP(static_cast<double>(inline_fp_null_val(oe_ti)));
       } else {
         null_key_lv = LL_FP(static_cast<float>(inline_fp_null_val(oe_ti)));
       }
       fname += order_entry_lv->getType()->isDoubleTy() ? "_double" : "_float";
     }
     const auto key_slot_idx =
         get_heap_key_slot_index(ra_exe_unit_.target_exprs, target_idx);
     return emitCall(
         fname,
         {groups_buffer,
          LL_INT(n),
          LL_INT(row_size_quad),
          LL_INT(static_cast<uint32_t>(query_mem_desc.getColOffInBytes(key_slot_idx))),
          LL_BOOL(only_order_entry.is_desc),
          LL_BOOL(!order_entry_expr->get_type_info().get_notnull()),
          LL_BOOL(only_order_entry.nulls_first),
          null_key_lv,
          order_entry_lv});
   } else {
     auto* arg = get_arg_by_name(ROW_FUNC, "max_matched");
     const auto output_buffer_entry_count_lv =
         LL_BUILDER.CreateLoad(arg->getType()->getPointerElementType(), arg);
     arg = get_arg_by_name(ROW_FUNC, "old_total_matched");
     const auto group_expr_lv =
         LL_BUILDER.CreateLoad(arg->getType()->getPointerElementType(), arg);
     std::vector<llvm::Value*> args{groups_buffer,
                                    output_buffer_entry_count_lv,
                                    group_expr_lv,
                                    code_generator.posArg(nullptr)};
     if (query_mem_desc.didOutputColumnar()) {
       const auto columnar_output_offset =
           emitCall("get_columnar_scan_output_offset", args);
       return columnar_output_offset;
     }
     args.push_back(LL_INT(row_size_quad));
     return emitCall("get_scan_output_slot", args);
   }
 }

Here is the call graph for this function:

Here is the caller graph for this function:

llvm::Function * GroupByAndAggregate::codegenPerfectHashFunction ( )

private

Definition at line 1541 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, CHECK, CHECK_GT, executor_, anonymous_namespace{GroupByAndAggregate.cpp}::get_expr_range_info(), get_int_type(), getBucketedCardinality(), RelAlgExecutionUnit::groupby_exprs, heavyai::GroupByPerfectHash, LL_CONTEXT, LL_INT, mark_function_always_inline(), query_infos_, and ra_exe_unit_.

Referenced by codegenMultiColumnPerfectHash().

                                                               {
   AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
   CHECK_GT(ra_exe_unit_.groupby_exprs.size(), size_t(1));
   auto ft = llvm::FunctionType::get(
       get_int_type(32, LL_CONTEXT),
       std::vector<llvm::Type*>{llvm::PointerType::get(get_int_type(64, LL_CONTEXT), 0)},
       false);
   auto key_hash_func = llvm::Function::Create(ft,
                                               llvm::Function::ExternalLinkage,
                                               "perfect_key_hash",
                                               executor_->cgen_state_->module_);
   executor_->cgen_state_->helper_functions_.push_back(key_hash_func);
   mark_function_always_inline(key_hash_func);
   auto& key_buff_arg = *key_hash_func->args().begin();
   llvm::Value* key_buff_lv = &key_buff_arg;
   auto bb = llvm::BasicBlock::Create(LL_CONTEXT, "entry", key_hash_func);
   llvm::IRBuilder<> key_hash_func_builder(bb);
   llvm::Value* hash_lv{llvm::ConstantInt::get(get_int_type(64, LL_CONTEXT), 0)};
   std::vector<int64_t> cardinalities;
   for (const auto& groupby_expr : ra_exe_unit_.groupby_exprs) {
     auto col_range_info =
         get_expr_range_info(ra_exe_unit_, query_infos_, groupby_expr.get(), executor_);
     CHECK(col_range_info.hash_type_ == QueryDescriptionType::GroupByPerfectHash);
     cardinalities.push_back(getBucketedCardinality(col_range_info));
   }
   size_t dim_idx = 0;
   for (const auto& groupby_expr : ra_exe_unit_.groupby_exprs) {
     auto* gep = key_hash_func_builder.CreateGEP(
         key_buff_lv->getType()->getScalarType()->getPointerElementType(),
         key_buff_lv,
         LL_INT(dim_idx));
     auto key_comp_lv =
         key_hash_func_builder.CreateLoad(gep->getType()->getPointerElementType(), gep);
     auto col_range_info =
         get_expr_range_info(ra_exe_unit_, query_infos_, groupby_expr.get(), executor_);
     auto crt_term_lv =
         key_hash_func_builder.CreateSub(key_comp_lv, LL_INT(col_range_info.min));
     if (col_range_info.bucket) {
       crt_term_lv =
           key_hash_func_builder.CreateSDiv(crt_term_lv, LL_INT(col_range_info.bucket));
     }
     for (size_t prev_dim_idx = 0; prev_dim_idx < dim_idx; ++prev_dim_idx) {
       crt_term_lv = key_hash_func_builder.CreateMul(crt_term_lv,
                                                     LL_INT(cardinalities[prev_dim_idx]));
     }
     hash_lv = key_hash_func_builder.CreateAdd(hash_lv, crt_term_lv);
     ++dim_idx;
   }
   key_hash_func_builder.CreateRet(
       key_hash_func_builder.CreateTrunc(hash_lv, get_int_type(32, LL_CONTEXT)));
   return key_hash_func;
 }

Here is the call graph for this function:

Here is the caller graph for this function:

std::tuple< llvm::Value , llvm::Value > GroupByAndAggregate::codegenSingleColumnPerfectHash	(	const QueryMemoryDescriptor &	query_mem_desc,
		const CompilationOptions &	co,
		llvm::Value *	groups_buffer,
		llvm::Value *	group_expr_lv_translated,
		llvm::Value *	group_expr_lv_original,
		const int32_t	row_size_quad
	)

private

Definition at line 1411 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, CHECK, CompilationOptions::device_type, QueryMemoryDescriptor::didOutputColumnar(), emitCall(), executor_, QueryMemoryDescriptor::getBucket(), QueryMemoryDescriptor::getMinVal(), QueryMemoryDescriptor::hasKeylessHash(), QueryMemoryDescriptor::interleavedBins(), LL_INT, QueryMemoryDescriptor::mustUseBaselineSort(), and QueryMemoryDescriptor::usesGetGroupValueFast().

Referenced by codegenGroupBy().

                                  {
   AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
   CHECK(query_mem_desc.usesGetGroupValueFast());
   std::string get_group_fn_name{query_mem_desc.didOutputColumnar()
                                     ? "get_columnar_group_bin_offset"
                                     : "get_group_value_fast"};
   if (!query_mem_desc.didOutputColumnar() && query_mem_desc.hasKeylessHash()) {
     get_group_fn_name += "_keyless";
   }
   if (query_mem_desc.interleavedBins(co.device_type)) {
     CHECK(!query_mem_desc.didOutputColumnar());
     CHECK(query_mem_desc.hasKeylessHash());
     get_group_fn_name += "_semiprivate";
   }
   std::vector<llvm::Value*> get_group_fn_args{&*groups_buffer,
                                               &*group_expr_lv_translated};
   if (group_expr_lv_original && get_group_fn_name == "get_group_value_fast" &&
       query_mem_desc.mustUseBaselineSort()) {
     get_group_fn_name += "_with_original_key";
     get_group_fn_args.push_back(group_expr_lv_original);
   }
   get_group_fn_args.push_back(LL_INT(query_mem_desc.getMinVal()));
   get_group_fn_args.push_back(LL_INT(query_mem_desc.getBucket()));
   if (!query_mem_desc.hasKeylessHash()) {
     if (!query_mem_desc.didOutputColumnar()) {
       get_group_fn_args.push_back(LL_INT(row_size_quad));
     }
   } else {
     if (!query_mem_desc.didOutputColumnar()) {
       get_group_fn_args.push_back(LL_INT(row_size_quad));
     }
     if (query_mem_desc.interleavedBins(co.device_type)) {
       auto warp_idx = emitCall("thread_warp_idx", {LL_INT(executor_->warpSize())});
       get_group_fn_args.push_back(warp_idx);
       get_group_fn_args.push_back(LL_INT(executor_->warpSize()));
     }
   }
   if (get_group_fn_name == "get_columnar_group_bin_offset") {
     return std::make_tuple(&*groups_buffer,
                            emitCall(get_group_fn_name, get_group_fn_args));
   }
   return std::make_tuple(emitCall(get_group_fn_name, get_group_fn_args), nullptr);
 }

Here is the call graph for this function:

Here is the caller graph for this function:

llvm::Value * GroupByAndAggregate::codegenVarlenOutputBuffer ( const QueryMemoryDescriptor & query_mem_desc )

private

Definition at line 1396 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, CHECK, executor_, QueryMemoryDescriptor::hasVarlenOutput(), LL_CONTEXT, and ROW_FUNC.

Referenced by codegen().

                                                  {
   if (!query_mem_desc.hasVarlenOutput()) {
     return nullptr;
   }
 
   AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
   auto arg_it = ROW_FUNC->arg_begin();
   arg_it++; /* groups_buffer */
   auto varlen_output_buffer = arg_it++;
   CHECK(varlen_output_buffer->getType() == llvm::Type::getInt64PtrTy(LL_CONTEXT));
   return varlen_output_buffer;
 }

Here is the call graph for this function:

Here is the caller graph for this function:

llvm::Value * GroupByAndAggregate::codegenWindowRowPointer	(	const Analyzer::WindowFunction *	window_func,
		const QueryMemoryDescriptor &	query_mem_desc,
		const CompilationOptions &	co,
		DiamondCodegen &	diamond_codegen
	)

private

Definition at line 1645 of file GroupByAndAggregate.cpp.

References run_benchmark_import::args, AUTOMATIC_IR_METADATA, codegenOutputSlot(), CodeGenerator::codegenWindowPosition(), QueryMemoryDescriptor::didOutputColumnar(), emitCall(), executor_, get_int_type(), WindowProjectNodeContext::getActiveWindowFunctionContext(), QueryMemoryDescriptor::getEntryCount(), Analyzer::WindowFunction::getKind(), QueryMemoryDescriptor::getRowSize(), LL_BUILDER, LL_CONTEXT, LL_INT, CodeGenerator::posArg(), ROW_FUNC, and window_function_is_aggregate().

Referenced by TargetExprCodegen::codegen().

                                      {
   AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
   const auto window_func_context =
       WindowProjectNodeContext::getActiveWindowFunctionContext(executor_);
   if (window_func_context && window_function_is_aggregate(window_func->getKind())) {
     const int32_t row_size_quad = query_mem_desc.didOutputColumnar()
                                       ? 0
                                       : query_mem_desc.getRowSize() / sizeof(int64_t);
     auto arg_it = ROW_FUNC->arg_begin();
     auto groups_buffer = arg_it++;
     CodeGenerator code_generator(executor_);
     auto window_pos_lv = code_generator.codegenWindowPosition(
         window_func_context, code_generator.posArg(nullptr));
     const auto pos_in_window =
         LL_BUILDER.CreateTrunc(window_pos_lv, get_int_type(32, LL_CONTEXT));
     llvm::Value* entry_count_lv =
         LL_INT(static_cast<int32_t>(query_mem_desc.getEntryCount()));
     std::vector<llvm::Value*> args{
         &*groups_buffer, entry_count_lv, pos_in_window, code_generator.posArg(nullptr)};
     if (query_mem_desc.didOutputColumnar()) {
       const auto columnar_output_offset =
           emitCall("get_columnar_scan_output_offset", args);
       return LL_BUILDER.CreateSExt(columnar_output_offset, get_int_type(64, LL_CONTEXT));
     }
     args.push_back(LL_INT(row_size_quad));
     return emitCall("get_scan_output_slot", args);
   }
   auto arg_it = ROW_FUNC->arg_begin();
   auto groups_buffer = arg_it++;
   return codegenOutputSlot(&*groups_buffer, query_mem_desc, co, diamond_codegen);
 }

Here is the call graph for this function:

Here is the caller graph for this function:

llvm::Value * GroupByAndAggregate::convertNullIfAny	(	const SQLTypeInfo &	arg_type,
		const TargetInfo &	agg_info,
		llvm::Value *	target
	)

private

Definition at line 1594 of file GroupByAndAggregate.cpp.

References TargetInfo::agg_kind, AUTOMATIC_IR_METADATA, CHECK, executor_, SQLTypeInfo::get_size(), SQLTypeInfo::is_fp(), kAPPROX_COUNT_DISTINCT, kCOUNT, LL_BUILDER, and TargetInfo::sql_type.

Referenced by TargetExprCodegen::codegenAggregate().

                                                                       {
   AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
   const auto& agg_type = agg_info.sql_type;
   const size_t chosen_bytes = agg_type.get_size();
 
   bool need_conversion{false};
   llvm::Value* arg_null{nullptr};
   llvm::Value* agg_null{nullptr};
   llvm::Value* target_to_cast{target};
   if (arg_type.is_fp()) {
     arg_null = executor_->cgen_state_->inlineFpNull(arg_type);
     if (agg_type.is_fp()) {
       agg_null = executor_->cgen_state_->inlineFpNull(agg_type);
       if (!static_cast<llvm::ConstantFP*>(arg_null)->isExactlyValue(
               static_cast<llvm::ConstantFP*>(agg_null)->getValueAPF())) {
         need_conversion = true;
       }
     } else {
       CHECK(agg_info.agg_kind == kCOUNT || agg_info.agg_kind == kAPPROX_COUNT_DISTINCT);
       return target;
     }
   } else {
     arg_null = executor_->cgen_state_->inlineIntNull(arg_type);
     if (agg_type.is_fp()) {
       agg_null = executor_->cgen_state_->inlineFpNull(agg_type);
       need_conversion = true;
       target_to_cast = executor_->castToFP(target, arg_type, agg_type);
     } else {
       agg_null = executor_->cgen_state_->inlineIntNull(agg_type);
       if ((static_cast<llvm::ConstantInt*>(arg_null)->getBitWidth() !=
            static_cast<llvm::ConstantInt*>(agg_null)->getBitWidth()) ||
           (static_cast<llvm::ConstantInt*>(arg_null)->getValue() !=
            static_cast<llvm::ConstantInt*>(agg_null)->getValue())) {
         need_conversion = true;
       }
     }
   }
   if (need_conversion) {
     auto cmp = arg_type.is_fp() ? LL_BUILDER.CreateFCmpOEQ(target, arg_null)
                                 : LL_BUILDER.CreateICmpEQ(target, arg_null);
     return LL_BUILDER.CreateSelect(
         cmp,
         agg_null,
         executor_->cgen_state_->castToTypeIn(target_to_cast, chosen_bytes << 3));
   } else {
     return target;
   }
 }

Here is the call graph for this function:

Here is the caller graph for this function:

llvm::Value * GroupByAndAggregate::emitCall	(	const std::string &	fname,
		const std::vector< llvm::Value * > &	args
	)

private

Definition at line 2228 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, and executor_.

Referenced by TargetExprCodegen::codegen(), TargetExprCodegen::codegenAggregate(), codegenCountDistinct(), codegenEstimator(), codegenMultiColumnBaselineHash(), codegenMultiColumnPerfectHash(), codegenOutputSlot(), codegenSingleColumnPerfectHash(), and codegenWindowRowPointer().

                                                                               {
   AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
   return executor_->cgen_state_->emitCall(fname, args);
 }

Here is the caller graph for this function:

llvm::Value * GroupByAndAggregate::getAdditionalLiteral ( const int32_t off )

private

Definition at line 2032 of file GroupByAndAggregate.cpp.

References shared::bit_cast(), CHECK_LT, get_arg_by_name(), get_int_type(), LL_BUILDER, LL_CONTEXT, LL_INT, and ROW_FUNC.

Referenced by codegenCountDistinct().

                                                                       {
   CHECK_LT(off, 0);
   const auto lit_buff_lv = get_arg_by_name(ROW_FUNC, "literals");
   auto* bit_cast = LL_BUILDER.CreateBitCast(
       lit_buff_lv, llvm::PointerType::get(get_int_type(64, LL_CONTEXT), 0));
   auto* gep =
       LL_BUILDER.CreateGEP(bit_cast->getType()->getScalarType()->getPointerElementType(),
                            bit_cast,
                            LL_INT(off));
   return LL_BUILDER.CreateLoad(gep->getType()->getPointerElementType(), gep);
 }

Here is the call graph for this function:

Here is the caller graph for this function:

int64_t GroupByAndAggregate::getBucketedCardinality ( const ColRangeInfo & col_range_info )

staticprivate

Definition at line 356 of file GroupByAndAggregate.cpp.

References ColRangeInfo::bucket, ColRangeInfo::has_nulls, ColRangeInfo::max, and ColRangeInfo::min.

Referenced by codegenPerfectHashFunction(), and getColRangeInfo().

                                                                                       {
   checked_int64_t crt_col_cardinality =
       checked_int64_t(col_range_info.max) - checked_int64_t(col_range_info.min);
   if (col_range_info.bucket) {
     crt_col_cardinality /= col_range_info.bucket;
   }
   return static_cast<int64_t>(crt_col_cardinality +
                               (1 + (col_range_info.has_nulls ? 1 : 0)));
 }

Here is the caller graph for this function:

ColRangeInfo GroupByAndAggregate::getColRangeInfo ( )

private

Definition at line 218 of file GroupByAndAggregate.cpp.

References anonymous_namespace{GroupByAndAggregate.cpp}::cardinality_estimate_less_than_column_range(), CHECK, CHECK_GE, device_type_, executor_, anonymous_namespace{GroupByAndAggregate.cpp}::expr_is_rowid(), anonymous_namespace{GroupByAndAggregate.cpp}::get_expr_range_info(), Executor::getBaselineThreshold(), getBucketedCardinality(), group_cardinality_estimation_, RelAlgExecutionUnit::groupby_exprs, heavyai::GroupByBaselineHash, heavyai::GroupByPerfectHash, anonymous_namespace{GroupByAndAggregate.cpp}::has_count_distinct(), anonymous_namespace{GroupByAndAggregate.cpp}::is_column_range_too_big_for_perfect_hash(), kENCODING_DICT, MAX_BUFFER_SIZE, SortInfo::order_entries, RelAlgExecutionUnit::quals, query_infos_, ra_exe_unit_, RelAlgExecutionUnit::simple_quals, RelAlgExecutionUnit::sort_info, and RelAlgExecutionUnit::target_exprs.

Referenced by initQueryMemoryDescriptorImpl().

                                                   {
   // Use baseline layout more eagerly on the GPU if the query uses count distinct,
   // because our HyperLogLog implementation is 4x less memory efficient on GPU.
   // Technically, this only applies to APPROX_COUNT_DISTINCT, but in practice we
   // can expect this to be true anyway for grouped queries since the precise version
   // uses significantly more memory.
   const int64_t baseline_threshold =
       Executor::getBaselineThreshold(has_count_distinct(ra_exe_unit_), device_type_);
   // `group_cardinality_estimation_` is set as the result of (NDV) cardinality estimator
   auto group_cardinality_estimation = group_cardinality_estimation_.value_or(0);
   if (ra_exe_unit_.groupby_exprs.size() != 1) {
     try {
       checked_int64_t cardinality{1};
       bool has_nulls{false};
       for (const auto& groupby_expr : ra_exe_unit_.groupby_exprs) {
         auto col_range_info = get_expr_range_info(
             ra_exe_unit_, query_infos_, groupby_expr.get(), executor_);
         if (col_range_info.hash_type_ != QueryDescriptionType::GroupByPerfectHash) {
           // going through baseline hash if a non-integer type is encountered
           return {QueryDescriptionType::GroupByBaselineHash,
                   0,
                   group_cardinality_estimation,
                   0,
                   false};
         }
         auto crt_col_cardinality = getBucketedCardinality(col_range_info);
         CHECK_GE(crt_col_cardinality, 0);
         cardinality *= crt_col_cardinality;
         if (col_range_info.has_nulls) {
           has_nulls = true;
         }
       }
       // For zero or high cardinalities, use baseline layout.
       if (!cardinality || cardinality > baseline_threshold) {
         return {QueryDescriptionType::GroupByBaselineHash,
                 0,
                 group_cardinality_estimation,
                 0,
                 false};
       }
       // todo (yoonmin) : should we consider min(group_cardinality_estimation,
       // cardinality) if we have `group_cardinality_estimation` value?
       return {QueryDescriptionType::GroupByPerfectHash,
               0,
               int64_t(cardinality),
               0,
               has_nulls};
     } catch (...) {  // overflow when computing cardinality
       return {QueryDescriptionType::GroupByBaselineHash,
               0,
               group_cardinality_estimation,
               0,
               false};
     }
   }
   // For single column groupby on high timestamps, force baseline hash due to wide ranges
   // we are likely to encounter when applying quals to the expression range
   // TODO: consider allowing TIMESTAMP(9) (nanoseconds) with quals to use perfect hash if
   // the range is small enough
   if (ra_exe_unit_.groupby_exprs.front() &&
       ra_exe_unit_.groupby_exprs.front()->get_type_info().is_high_precision_timestamp() &&
       ra_exe_unit_.simple_quals.size() > 0) {
     return {QueryDescriptionType::GroupByBaselineHash,
             0,
             group_cardinality_estimation,
             0,
             false};
   }
   const auto col_range_info = get_expr_range_info(
       ra_exe_unit_, query_infos_, ra_exe_unit_.groupby_exprs.front().get(), executor_);
   if (!ra_exe_unit_.groupby_exprs.front()) {
     return col_range_info;
   }
   static const int64_t MAX_BUFFER_SIZE = 1 << 30;
   const int64_t col_count =
       ra_exe_unit_.groupby_exprs.size() + ra_exe_unit_.target_exprs.size();
   int64_t max_entry_count = MAX_BUFFER_SIZE / (col_count * sizeof(int64_t));
   if (has_count_distinct(ra_exe_unit_)) {
     max_entry_count = std::min(max_entry_count, baseline_threshold);
   }
   const auto& groupby_expr_ti = ra_exe_unit_.groupby_exprs.front()->get_type_info();
   if (groupby_expr_ti.is_string() && !col_range_info.bucket) {
     CHECK(groupby_expr_ti.get_compression() == kENCODING_DICT);
 
     const bool has_filters =
         !ra_exe_unit_.quals.empty() || !ra_exe_unit_.simple_quals.empty();
     if (has_filters &&
         is_column_range_too_big_for_perfect_hash(col_range_info, max_entry_count)) {
       // if filters are present, we can use the filter to narrow the cardinality of the
       // group by in the case of ranges too big for perfect hash. Otherwise, we are better
       // off attempting perfect hash (since we know the range will be made of
       // monotonically increasing numbers from min to max for dictionary encoded strings)
       // and failing later due to excessive memory use.
       // Check the conditions where baseline hash can provide a performance increase and
       // return baseline hash (potentially forcing an estimator query) as the range type.
       // Otherwise, return col_range_info which will likely be perfect hash, though could
       // be baseline from a previous call of this function prior to the estimator query.
       if (!ra_exe_unit_.sort_info.order_entries.empty()) {
         // TODO(adb): allow some sorts to pass through this block by centralizing sort
         // algorithm decision making
         if (has_count_distinct(ra_exe_unit_)) {
           // always use baseline hash for column range too big for perfect hash with count
           // distinct descriptors. We will need 8GB of CPU memory minimum for the perfect
           // hash group by in this case.
           return {QueryDescriptionType::GroupByBaselineHash,
                   col_range_info.min,
                   col_range_info.max,
                   0,
                   col_range_info.has_nulls};
         } else {
           // use original col range for sort
           return col_range_info;
         }
       }
       // if filters are present and the filtered range is less than the cardinality of
       // the column, consider baseline hash
       if (!group_cardinality_estimation_ ||
           cardinality_estimate_less_than_column_range(*group_cardinality_estimation_,
                                                       col_range_info)) {
         return {QueryDescriptionType::GroupByBaselineHash,
                 col_range_info.min,
                 col_range_info.max,
                 0,
                 col_range_info.has_nulls};
       }
     }
   } else if ((!expr_is_rowid(ra_exe_unit_.groupby_exprs.front().get())) &&
              is_column_range_too_big_for_perfect_hash(col_range_info, max_entry_count) &&
              !col_range_info.bucket) {
     return {QueryDescriptionType::GroupByBaselineHash,
             col_range_info.min,
             col_range_info.max,
             0,
             col_range_info.has_nulls};
   }
   return col_range_info;
 }

Here is the call graph for this function:

Here is the caller graph for this function:

int64_t GroupByAndAggregate::getShardedTopBucket	(	const ColRangeInfo &	col_range_info,
		const size_t	shard_count
	)		const

private

Definition at line 425 of file GroupByAndAggregate.cpp.

References ColRangeInfo::bucket, CHECK, CHECK_GT, device_type_, executor_, g_leaf_count, and GPU.

Referenced by initQueryMemoryDescriptorImpl().

                                                                                  {
   size_t device_count{0};
   if (device_type_ == ExecutorDeviceType::GPU) {
     device_count = executor_->cudaMgr()->getDeviceCount();
     CHECK_GT(device_count, 0u);
   }
 
   int64_t bucket{col_range_info.bucket};
 
   if (shard_count) {
     CHECK(!col_range_info.bucket);
     /*
       when a node has fewer devices than shard count,
       a) In a distributed setup, the minimum distance between two keys would be
       device_count because shards are stored consecutively across the physical tables,
       i.e if a shard column has values 0 to 9, and 3 shards on each leaf, then node 1
       would have values: 0,1,2,6,7,8 and node 2 would have values: 3,4,5,9. If each leaf
       node has only 1 device, in this case, all the keys from each node are loaded on
       the device each.
 
       b) In a single node setup, the distance would be minimum of device_count or
       difference of device_count - shard_count. For example: If a single node server
       running on 3 devices a shard column has values 0 to 9 in a table with 4 shards,
       device to fragment keys mapping would be: device 1 - 4,8,3,7 device 2 - 1,5,9
       device 3 - 2, 6 The bucket value would be 4(shards) - 3(devices) = 1 i.e. minimum
       of device_count or difference.
 
       When a node has device count equal to or more than shard count then the
       minimum distance is always at least shard_count * no of leaf nodes.
     */
     if (device_count < shard_count) {
       bucket = g_leaf_count ? std::max(device_count, static_cast<size_t>(1))
                             : std::min(device_count, shard_count - device_count);
     } else {
       bucket = shard_count * std::max(g_leaf_count, static_cast<size_t>(1));
     }
   }
 
   return bucket;
 }

Here is the caller graph for this function:

bool GroupByAndAggregate::gpuCanHandleOrderEntries ( const std::list< Analyzer::OrderEntry > & order_entries )

private

Definition at line 1004 of file GroupByAndAggregate.cpp.

References CHECK, CHECK_GE, CHECK_LE, executor_, Analyzer::AggExpr::get_arg(), anonymous_namespace{GroupByAndAggregate.cpp}::get_expr_range_info(), Analyzer::Expr::get_type_info(), heavyai::GroupByPerfectHash, kAPPROX_COUNT_DISTINCT, kAVG, kMAX, kMIN, query_infos_, ra_exe_unit_, and RelAlgExecutionUnit::target_exprs.

Referenced by initQueryMemoryDescriptor().

                                                       {
   if (order_entries.size() > 1) {  // TODO(alex): lift this restriction
     return false;
   }
   for (const auto& order_entry : order_entries) {
     CHECK_GE(order_entry.tle_no, 1);
     CHECK_LE(static_cast<size_t>(order_entry.tle_no), ra_exe_unit_.target_exprs.size());
     const auto target_expr = ra_exe_unit_.target_exprs[order_entry.tle_no - 1];
     if (!dynamic_cast<Analyzer::AggExpr*>(target_expr)) {
       return false;
     }
     // TODO(alex): relax the restrictions
     auto agg_expr = static_cast<Analyzer::AggExpr*>(target_expr);
     if (agg_expr->get_is_distinct() || agg_expr->get_aggtype() == kAVG ||
         agg_expr->get_aggtype() == kMIN || agg_expr->get_aggtype() == kMAX ||
         agg_expr->get_aggtype() == kAPPROX_COUNT_DISTINCT) {
       return false;
     }
     if (agg_expr->get_arg()) {
       const auto& arg_ti = agg_expr->get_arg()->get_type_info();
       if (arg_ti.is_fp()) {
         return false;
       }
       auto expr_range_info =
           get_expr_range_info(ra_exe_unit_, query_infos_, agg_expr->get_arg(), executor_);
       // TOD(adb): QMD not actually initialized here?
       if ((!(expr_range_info.hash_type_ == QueryDescriptionType::GroupByPerfectHash &&
              /* query_mem_desc.getGroupbyColCount() == 1 */ false) ||
            expr_range_info.has_nulls) &&
           order_entry.is_desc == order_entry.nulls_first) {
         return false;
       }
     }
     const auto& target_ti = target_expr->get_type_info();
     CHECK(!target_ti.is_buffer());
     if (!target_ti.is_integer()) {
       return false;
     }
   }
   return true;
 }

Here is the call graph for this function:

Here is the caller graph for this function:

ApproxQuantileDescriptors GroupByAndAggregate::initApproxQuantileDescriptors ( )

private

Definition at line 894 of file GroupByAndAggregate.cpp.

References RelAlgExecutionUnit::eachAggTarget(), g_approx_quantile_buffer, g_approx_quantile_centroids, kAPPROX_QUANTILE, and ra_exe_unit_.

Referenced by initQueryMemoryDescriptorImpl().

                                                                              {
   // Count APPROX_QUANTILE targets
   size_t target_count = 0u;
   auto count_target = [&](Analyzer::AggExpr const*, size_t) { ++target_count; };
   ra_exe_unit_.eachAggTarget<kAPPROX_QUANTILE>(count_target);
   if (target_count == 0u) {
     return {};
   }
 
   // Reserve and fill descriptors
   std::vector<ApproxQuantileDescriptor> descriptors;
   descriptors.reserve(target_count);
   auto add_descriptor = [&](Analyzer::AggExpr const*, size_t) {
     descriptors.push_back({g_approx_quantile_buffer, g_approx_quantile_centroids});
   };
   ra_exe_unit_.eachAggTarget<kAPPROX_QUANTILE>(add_descriptor);
   return descriptors;
 }

Here is the call graph for this function:

Here is the caller graph for this function:

std::unique_ptr< QueryMemoryDescriptor > GroupByAndAggregate::initQueryMemoryDescriptor	(	const bool	allow_multifrag,
		const size_t	max_groups_buffer_entry_count,
		const int8_t	crt_min_byte_width,
		RenderInfo *	render_info,
		const bool	output_columnar_hint
	)

private

Definition at line 853 of file GroupByAndAggregate.cpp.

References align_to_int64(), CHECK, device_type_, GPU, gpuCanHandleOrderEntries(), initQueryMemoryDescriptorImpl(), SortInfo::order_entries, query_mem_desc, ra_exe_unit_, shard_count_for_top_groups(), and RelAlgExecutionUnit::sort_info.

                                      {
   const auto shard_count = device_type_ == ExecutorDeviceType::GPU
                                ? shard_count_for_top_groups(ra_exe_unit_)
                                : 0;
   bool sort_on_gpu_hint =
       device_type_ == ExecutorDeviceType::GPU && allow_multifrag &&
       !ra_exe_unit_.sort_info.order_entries.empty() &&
       gpuCanHandleOrderEntries(ra_exe_unit_.sort_info.order_entries) && !shard_count;
   // must_use_baseline_sort is true iff we'd sort on GPU with the old algorithm
   // but the total output buffer size would be too big or it's a sharded top query.
   // For the sake of managing risk, use the new result set way very selectively for
   // this case only (alongside the baseline layout we've enabled for a while now).
   bool must_use_baseline_sort = shard_count;
   std::unique_ptr<QueryMemoryDescriptor> query_mem_desc;
   while (true) {
     query_mem_desc = initQueryMemoryDescriptorImpl(allow_multifrag,
                                                    max_groups_buffer_entry_count,
                                                    crt_min_byte_width,
                                                    sort_on_gpu_hint,
                                                    render_info,
                                                    must_use_baseline_sort,
                                                    output_columnar_hint);
     CHECK(query_mem_desc);
     if (query_mem_desc->sortOnGpu() &&
         (query_mem_desc->getBufferSizeBytes(device_type_) +
          align_to_int64(query_mem_desc->getEntryCount() * sizeof(int32_t))) >
             2 * 1024 * 1024 * 1024LL) {
       must_use_baseline_sort = true;
       sort_on_gpu_hint = false;
     } else {
       break;
     }
   }
   return query_mem_desc;
 }

Here is the call graph for this function:

std::unique_ptr< QueryMemoryDescriptor > GroupByAndAggregate::initQueryMemoryDescriptorImpl	(	const bool	allow_multifrag,
		const size_t	max_groups_buffer_entry_count,
		const int8_t	crt_min_byte_width,
		const bool	sort_on_gpu_hint,
		RenderInfo *	render_info,
		const bool	must_use_baseline_sort,
		const bool	output_columnar_hint
	)

private

Definition at line 913 of file GroupByAndAggregate.cpp.

References CPU, device_type_, executor_, g_enable_watchdog, g_watchdog_baseline_max_groups, anonymous_namespace{GroupByAndAggregate.cpp}::get_keyless_info(), getColRangeInfo(), getShardedTopBucket(), GPU, RelAlgExecutionUnit::groupby_exprs, heavyai::GroupByBaselineHash, heavyai::GroupByPerfectHash, ColRangeInfo::hash_type_, QueryMemoryDescriptor::init(), anonymous_namespace{GroupByAndAggregate.cpp}::init_count_distinct_descriptors(), initApproxQuantileDescriptors(), LOG, query_infos_, ra_exe_unit_, shard_count_for_top_groups(), and logger::WARNING.

Referenced by initQueryMemoryDescriptor().

                                      {
   const bool is_group_by{!ra_exe_unit_.groupby_exprs.empty()};
 
   const bool threads_can_reuse_group_by_buffers =
       device_type_ == ExecutorDeviceType::CPU && is_group_by &&
       ra_exe_unit_.groupby_exprs.front();
 
   auto col_range_info_nosharding = getColRangeInfo();
 
   const auto shard_count = device_type_ == ExecutorDeviceType::GPU
                                ? shard_count_for_top_groups(ra_exe_unit_)
                                : 0;
 
   const auto col_range_info =
       ColRangeInfo{col_range_info_nosharding.hash_type_,
                    col_range_info_nosharding.min,
                    col_range_info_nosharding.max,
                    getShardedTopBucket(col_range_info_nosharding, shard_count),
                    col_range_info_nosharding.has_nulls};
 
   // Non-grouped aggregates do not support accessing aggregated ranges
   // Keyless hash is currently only supported with single-column perfect hash
   const auto keyless_info =
       !(is_group_by &&
         col_range_info.hash_type_ == QueryDescriptionType::GroupByPerfectHash)
           ? KeylessInfo{false, -1}
           : get_keyless_info(ra_exe_unit_, query_infos_, is_group_by, executor_);
 
   if (g_enable_watchdog &&
       ((col_range_info.hash_type_ == QueryDescriptionType::GroupByBaselineHash &&
         max_groups_buffer_entry_count > g_watchdog_baseline_max_groups) ||
        (col_range_info.hash_type_ == QueryDescriptionType::GroupByPerfectHash &&
         ra_exe_unit_.groupby_exprs.size() == 1 &&
         (col_range_info.max - col_range_info.min) /
                 std::max(col_range_info.bucket, int64_t(1)) >
             130000000))) {
     throw WatchdogException("Query would use too much memory");
   }
 
   const auto count_distinct_descriptors = init_count_distinct_descriptors(
       ra_exe_unit_, query_infos_, col_range_info, device_type_, executor_);
   auto approx_quantile_descriptors = initApproxQuantileDescriptors();
   try {
     return QueryMemoryDescriptor::init(executor_,
                                        ra_exe_unit_,
                                        query_infos_,
                                        col_range_info,
                                        keyless_info,
                                        allow_multifrag,
                                        device_type_,
                                        crt_min_byte_width,
                                        sort_on_gpu_hint,
                                        shard_count,
                                        max_groups_buffer_entry_count,
                                        render_info,
                                        approx_quantile_descriptors,
                                        count_distinct_descriptors,
                                        must_use_baseline_sort,
                                        output_columnar_hint,
                                        /*streaming_top_n_hint=*/true,
                                        threads_can_reuse_group_by_buffers);
   } catch (const StreamingTopNOOM& e) {
     LOG(WARNING) << e.what() << " Disabling Streaming Top N.";
     return QueryMemoryDescriptor::init(executor_,
                                        ra_exe_unit_,
                                        query_infos_,
                                        col_range_info,
                                        keyless_info,
                                        allow_multifrag,
                                        device_type_,
                                        crt_min_byte_width,
                                        sort_on_gpu_hint,
                                        shard_count,
                                        max_groups_buffer_entry_count,
                                        render_info,
                                        approx_quantile_descriptors,
                                        count_distinct_descriptors,
                                        must_use_baseline_sort,
                                        output_columnar_hint,
                                        /*streaming_top_n_hint=*/false,
                                        threads_can_reuse_group_by_buffers);
   }
 }

Here is the call graph for this function:

Here is the caller graph for this function:

bool GroupByAndAggregate::needsUnnestDoublePatch	(	llvm::Value const *	val_ptr,
		const std::string &	agg_base_name,
		const bool	threads_share_memory,
		const CompilationOptions &	co
	)		const

private

Definition at line 29 of file MaxwellCodegenPatch.cpp.

References CompilationOptions::device_type, and executor_.

Referenced by TargetExprCodegen::codegenAggregate().

                                                                                      {
   return (executor_->isArchMaxwell(co.device_type) && threads_share_memory &&
           llvm::isa<llvm::AllocaInst>(val_ptr) &&
           val_ptr->getType() ==
               llvm::Type::getDoublePtrTy(executor_->cgen_state_->context_) &&
           "agg_id" == agg_base_name);
 }

Here is the caller graph for this function:

void GroupByAndAggregate::prependForceSync ( )

private

Definition at line 40 of file MaxwellCodegenPatch.cpp.

References executor_.

Referenced by codegen().

                                            {
   executor_->cgen_state_->ir_builder_.CreateCall(
       executor_->cgen_state_->module_->getFunction("force_sync"));
 }

Here is the caller graph for this function:

size_t GroupByAndAggregate::shard_count_for_top_groups ( const RelAlgExecutionUnit & ra_exe_unit )

static

Definition at line 2251 of file GroupByAndAggregate.cpp.

References Catalog_Namespace::get_metadata_for_table(), Analyzer::ColumnVar::getColumnKey(), RelAlgExecutionUnit::groupby_exprs, SortInfo::limit, TableDescriptor::nShards, SortInfo::order_entries, and RelAlgExecutionUnit::sort_info.

Referenced by Executor::collectAllDeviceResults(), RelAlgExecutor::executeRelAlgQuerySingleStep(), initQueryMemoryDescriptor(), and initQueryMemoryDescriptorImpl().

                                             {
   if (ra_exe_unit.sort_info.order_entries.size() != 1 || !ra_exe_unit.sort_info.limit) {
     return 0;
   }
   for (const auto& group_expr : ra_exe_unit.groupby_exprs) {
     const auto grouped_col_expr =
         dynamic_cast<const Analyzer::ColumnVar*>(group_expr.get());
     if (!grouped_col_expr) {
       continue;
     }
     const auto& column_key = grouped_col_expr->getColumnKey();
     if (column_key.table_id <= 0) {
       return 0;
     }
     const auto td = Catalog_Namespace::get_metadata_for_table(
         {column_key.db_id, column_key.table_id});
     if (td->shardedColumnId == column_key.column_id) {
       return td->nShards;
     }
   }
   return 0;
 }

Here is the call graph for this function:

Here is the caller graph for this function:

Friends And Related Function Documentation

friend class CodeGenerator

friend

Definition at line 221 of file GroupByAndAggregate.h.

friend class ExecutionKernel

friend

Definition at line 222 of file GroupByAndAggregate.h.

friend class Executor

friend

Definition at line 219 of file GroupByAndAggregate.h.

friend class QueryMemoryDescriptor

friend

Definition at line 220 of file GroupByAndAggregate.h.

friend struct TargetExprCodegen

friend

Definition at line 223 of file GroupByAndAggregate.h.

friend struct TargetExprCodegenBuilder

friend

Definition at line 224 of file GroupByAndAggregate.h.

Member Data Documentation

const ExecutorDeviceType GroupByAndAggregate::device_type_

private

Definition at line 215 of file GroupByAndAggregate.h.

Referenced by getColRangeInfo(), getShardedTopBucket(), initQueryMemoryDescriptor(), and initQueryMemoryDescriptorImpl().

Executor* GroupByAndAggregate::executor_

private

const std::optional<int64_t> GroupByAndAggregate::group_cardinality_estimation_

private

Definition at line 217 of file GroupByAndAggregate.h.

Referenced by getColRangeInfo().

bool GroupByAndAggregate::output_columnar_

private

Definition at line 214 of file GroupByAndAggregate.h.

const std::vector<InputTableInfo>& GroupByAndAggregate::query_infos_

private

Definition at line 212 of file GroupByAndAggregate.h.

Referenced by codegenGroupBy(), Executor::codegenJoinLoops(), codegenPerfectHashFunction(), getColRangeInfo(), gpuCanHandleOrderEntries(), and initQueryMemoryDescriptorImpl().

const RelAlgExecutionUnit& GroupByAndAggregate::ra_exe_unit_

private

Definition at line 211 of file GroupByAndAggregate.h.

Referenced by codegen(), codegenAggCalls(), codegenEstimator(), codegenGroupBy(), codegenOutputSlot(), codegenPerfectHashFunction(), getColRangeInfo(), gpuCanHandleOrderEntries(), GroupByAndAggregate(), initApproxQuantileDescriptors(), initQueryMemoryDescriptor(), and initQueryMemoryDescriptorImpl().

std::shared_ptr<RowSetMemoryOwner> GroupByAndAggregate::row_set_mem_owner_

private

Definition at line 213 of file GroupByAndAggregate.h.

The documentation for this class was generated from the following files:

/home/jenkins-slave/workspace/core-os-doxygen/QueryEngine/GroupByAndAggregate.h
/home/jenkins-slave/workspace/core-os-doxygen/QueryEngine/GroupByAndAggregate.cpp
/home/jenkins-slave/workspace/core-os-doxygen/QueryEngine/MaxwellCodegenPatch.cpp

Public Member Functions

Static Public Member Functions

Private Member Functions

Static Private Member Functions

Private Attributes

Friends

Detailed Description

Constructor & Destructor Documentation

Member Function Documentation

Friends And Related Function Documentation

Member Data Documentation