OmniSciDB  a987f07e93
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
GroupByAndAggregate Class Reference

#include <GroupByAndAggregate.h>

+ Collaboration diagram for GroupByAndAggregate:

Public Member Functions

 GroupByAndAggregate (Executor *executor, const ExecutorDeviceType device_type, const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner, const std::optional< int64_t > &group_cardinality_estimation)
 
bool codegen (llvm::Value *filter_result, llvm::BasicBlock *sc_false, QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const GpuSharedMemoryContext &gpu_smem_context)
 

Static Public Member Functions

static size_t shard_count_for_top_groups (const RelAlgExecutionUnit &ra_exe_unit, const Catalog_Namespace::Catalog &catalog)
 

Private Member Functions

bool gpuCanHandleOrderEntries (const std::list< Analyzer::OrderEntry > &order_entries)
 
std::unique_ptr
< QueryMemoryDescriptor
initQueryMemoryDescriptor (const bool allow_multifrag, const size_t max_groups_buffer_entry_count, const int8_t crt_min_byte_width, RenderInfo *render_info, const bool output_columnar_hint)
 
std::unique_ptr
< QueryMemoryDescriptor
initQueryMemoryDescriptorImpl (const bool allow_multifrag, const size_t max_groups_buffer_entry_count, const int8_t crt_min_byte_width, const bool sort_on_gpu_hint, RenderInfo *render_info, const bool must_use_baseline_sort, const bool output_columnar_hint)
 
int64_t getShardedTopBucket (const ColRangeInfo &col_range_info, const size_t shard_count) const
 
llvm::Value * codegenOutputSlot (llvm::Value *groups_buffer, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, DiamondCodegen &diamond_codegen)
 
std::tuple< llvm::Value
*, llvm::Value * > 
codegenGroupBy (const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, DiamondCodegen &codegen)
 
llvm::Value * codegenVarlenOutputBuffer (const QueryMemoryDescriptor &query_mem_desc)
 
std::tuple< llvm::Value
*, llvm::Value * > 
codegenSingleColumnPerfectHash (const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, llvm::Value *groups_buffer, llvm::Value *group_expr_lv_translated, llvm::Value *group_expr_lv_original, const int32_t row_size_quad)
 
std::tuple< llvm::Value
*, llvm::Value * > 
codegenMultiColumnPerfectHash (llvm::Value *groups_buffer, llvm::Value *group_key, llvm::Value *key_size_lv, const QueryMemoryDescriptor &query_mem_desc, const int32_t row_size_quad)
 
llvm::Function * codegenPerfectHashFunction ()
 
std::tuple< llvm::Value
*, llvm::Value * > 
codegenMultiColumnBaselineHash (const CompilationOptions &co, llvm::Value *groups_buffer, llvm::Value *group_key, llvm::Value *key_size_lv, const QueryMemoryDescriptor &query_mem_desc, const size_t key_width, const int32_t row_size_quad)
 
ColRangeInfo getColRangeInfo ()
 
llvm::Value * convertNullIfAny (const SQLTypeInfo &arg_type, const TargetInfo &agg_info, llvm::Value *target)
 
bool codegenAggCalls (const std::tuple< llvm::Value *, llvm::Value * > &agg_out_ptr_w_idx, llvm::Value *varlen_output_buffer, const std::vector< llvm::Value * > &agg_out_vec, QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const GpuSharedMemoryContext &gpu_smem_context, DiamondCodegen &diamond_codegen)
 
llvm::Value * codegenWindowRowPointer (const Analyzer::WindowFunction *window_func, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, DiamondCodegen &diamond_codegen)
 
llvm::Value * codegenAggColumnPtr (llvm::Value *output_buffer_byte_stream, llvm::Value *out_row_idx, const std::tuple< llvm::Value *, llvm::Value * > &agg_out_ptr_w_idx, const QueryMemoryDescriptor &query_mem_desc, const size_t chosen_bytes, const size_t agg_out_off, const size_t target_idx)
 : returns the pointer to where the aggregation should be stored. More...
 
void codegenEstimator (std::stack< llvm::BasicBlock * > &array_loops, DiamondCodegen &diamond_codegen, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &)
 
void codegenCountDistinct (const size_t target_idx, const Analyzer::Expr *target_expr, std::vector< llvm::Value * > &agg_args, const QueryMemoryDescriptor &, const ExecutorDeviceType)
 
void codegenApproxQuantile (const size_t target_idx, const Analyzer::Expr *target_expr, std::vector< llvm::Value * > &agg_args, const QueryMemoryDescriptor &query_mem_desc, const ExecutorDeviceType device_type)
 
void codegenMode (const size_t target_idx, const Analyzer::Expr *target_expr, std::vector< llvm::Value * > &agg_args, const QueryMemoryDescriptor &query_mem_desc, const ExecutorDeviceType device_type)
 
llvm::Value * getAdditionalLiteral (const int32_t off)
 
std::vector< llvm::Value * > codegenAggArg (const Analyzer::Expr *target_expr, const CompilationOptions &co)
 
llvm::Value * emitCall (const std::string &fname, const std::vector< llvm::Value * > &args)
 
void checkErrorCode (llvm::Value *retCode)
 
bool needsUnnestDoublePatch (llvm::Value const *val_ptr, const std::string &agg_base_name, const bool threads_share_memory, const CompilationOptions &co) const
 
void prependForceSync ()
 

Static Private Member Functions

static int64_t getBucketedCardinality (const ColRangeInfo &col_range_info)
 

Private Attributes

Executorexecutor_
 
const RelAlgExecutionUnitra_exe_unit_
 
const std::vector
< InputTableInfo > & 
query_infos_
 
std::shared_ptr
< RowSetMemoryOwner
row_set_mem_owner_
 
bool output_columnar_
 
const ExecutorDeviceType device_type_
 
const std::optional< int64_t > group_cardinality_estimation_
 

Friends

class Executor
 
class QueryMemoryDescriptor
 
class CodeGenerator
 
class ExecutionKernel
 
struct TargetExprCodegen
 
struct TargetExprCodegenBuilder
 

Detailed Description

Definition at line 61 of file GroupByAndAggregate.h.

Constructor & Destructor Documentation

GroupByAndAggregate::GroupByAndAggregate ( Executor executor,
const ExecutorDeviceType  device_type,
const RelAlgExecutionUnit ra_exe_unit,
const std::vector< InputTableInfo > &  query_infos,
std::shared_ptr< RowSetMemoryOwner row_set_mem_owner,
const std::optional< int64_t > &  group_cardinality_estimation 
)

Definition at line 374 of file GroupByAndAggregate.cpp.

References RelAlgExecutionUnit::groupby_exprs, and ra_exe_unit_.

381  : executor_(executor)
382  , ra_exe_unit_(ra_exe_unit)
383  , query_infos_(query_infos)
384  , row_set_mem_owner_(row_set_mem_owner)
385  , device_type_(device_type)
386  , group_cardinality_estimation_(group_cardinality_estimation) {
387  for (const auto& groupby_expr : ra_exe_unit_.groupby_exprs) {
388  if (!groupby_expr) {
389  continue;
390  }
391  const auto& groupby_ti = groupby_expr->get_type_info();
392  if (groupby_ti.is_bytes()) {
393  throw std::runtime_error(
394  "Cannot group by string columns which are not dictionary encoded.");
395  }
396  if (groupby_ti.is_buffer()) {
397  throw std::runtime_error("Group by buffer not supported");
398  }
399  if (groupby_ti.is_geometry()) {
400  throw std::runtime_error("Group by geometry not supported");
401  }
402  }
403 }
std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner_
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
const std::vector< InputTableInfo > & query_infos_
const ExecutorDeviceType device_type_
const std::optional< int64_t > group_cardinality_estimation_
const RelAlgExecutionUnit & ra_exe_unit_

Member Function Documentation

void GroupByAndAggregate::checkErrorCode ( llvm::Value *  retCode)
private

Definition at line 2185 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, and executor_.

Referenced by TargetExprCodegen::codegenAggregate().

2185  {
2186  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
2187  auto zero_const = llvm::ConstantInt::get(retCode->getType(), 0, true);
2188  auto rc_check_condition = executor_->cgen_state_->ir_builder_.CreateICmp(
2189  llvm::ICmpInst::ICMP_EQ, retCode, zero_const);
2190 
2191  executor_->cgen_state_->emitErrorCheck(rc_check_condition, retCode, "rc");
2192 }
#define AUTOMATIC_IR_METADATA(CGENSTATE)

+ Here is the caller graph for this function:

bool GroupByAndAggregate::codegen ( llvm::Value *  filter_result,
llvm::BasicBlock *  sc_false,
QueryMemoryDescriptor query_mem_desc,
const CompilationOptions co,
const GpuSharedMemoryContext gpu_smem_context 
)

Definition at line 999 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, CHECK, codegenAggCalls(), codegenEstimator(), codegenGroupBy(), codegenVarlenOutputBuffer(), DiamondCodegen::cond_false_, CompilationOptions::device_type, QueryMemoryDescriptor::didOutputColumnar(), RelAlgExecutionUnit::estimator, executor_, anonymous_namespace{GroupByAndAggregate.cpp}::get_agg_count(), get_arg_by_name(), get_int_type(), QueryMemoryDescriptor::getGroupbyColCount(), QueryMemoryDescriptor::getQueryDescriptionType(), GPU, RelAlgExecutionUnit::groupby_exprs, GroupByPerfectHash, RelAlgExecutionUnit::join_quals, LL_BUILDER, LL_CONTEXT, LL_INT, LLVM_ALIGN, CodeGenerator::posArg(), prependForceSync(), Projection, query_mem_desc, ra_exe_unit_, ROW_FUNC, RelAlgExecutionUnit::target_exprs, QueryMemoryDescriptor::usesGetGroupValueFast(), and QueryMemoryDescriptor::useStreamingTopN().

1003  {
1004  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1005  CHECK(filter_result);
1006 
1007  bool can_return_error = false;
1008  llvm::BasicBlock* filter_false{nullptr};
1009 
1010  {
1011  const bool is_group_by = !ra_exe_unit_.groupby_exprs.empty();
1012 
1013  if (executor_->isArchMaxwell(co.device_type)) {
1014  prependForceSync();
1015  }
1016  DiamondCodegen filter_cfg(filter_result,
1017  executor_,
1018  !is_group_by || query_mem_desc.usesGetGroupValueFast(),
1019  "filter", // filter_true and filter_false basic blocks
1020  nullptr,
1021  false);
1022  filter_false = filter_cfg.cond_false_;
1023 
1024  if (is_group_by) {
1026  !query_mem_desc.useStreamingTopN()) {
1027  const auto crt_matched = get_arg_by_name(ROW_FUNC, "crt_matched");
1028  LL_BUILDER.CreateStore(LL_INT(int32_t(1)), crt_matched);
1029  auto total_matched_ptr = get_arg_by_name(ROW_FUNC, "total_matched");
1030  llvm::Value* old_total_matched_val{nullptr};
1032  old_total_matched_val =
1033  LL_BUILDER.CreateAtomicRMW(llvm::AtomicRMWInst::Add,
1034  total_matched_ptr,
1035  LL_INT(int32_t(1)),
1036 #if LLVM_VERSION_MAJOR > 12
1037  LLVM_ALIGN(8),
1038 #endif
1039  llvm::AtomicOrdering::Monotonic);
1040  } else {
1041  old_total_matched_val = LL_BUILDER.CreateLoad(
1042  total_matched_ptr->getType()->getPointerElementType(), total_matched_ptr);
1043  LL_BUILDER.CreateStore(
1044  LL_BUILDER.CreateAdd(old_total_matched_val, LL_INT(int32_t(1))),
1045  total_matched_ptr);
1046  }
1047  auto old_total_matched_ptr = get_arg_by_name(ROW_FUNC, "old_total_matched");
1048  LL_BUILDER.CreateStore(old_total_matched_val, old_total_matched_ptr);
1049  }
1050 
1051  auto agg_out_ptr_w_idx = codegenGroupBy(query_mem_desc, co, filter_cfg);
1052  auto varlen_output_buffer = codegenVarlenOutputBuffer(query_mem_desc);
1053  if (query_mem_desc.usesGetGroupValueFast() ||
1054  query_mem_desc.getQueryDescriptionType() ==
1056  if (query_mem_desc.getGroupbyColCount() > 1) {
1057  filter_cfg.setChainToNext();
1058  }
1059  // Don't generate null checks if the group slot is guaranteed to be non-null,
1060  // as it's the case for get_group_value_fast* family.
1061  can_return_error = codegenAggCalls(agg_out_ptr_w_idx,
1062  varlen_output_buffer,
1063  {},
1065  co,
1066  gpu_smem_context,
1067  filter_cfg);
1068  } else {
1069  {
1070  llvm::Value* nullcheck_cond{nullptr};
1071  if (query_mem_desc.didOutputColumnar()) {
1072  nullcheck_cond = LL_BUILDER.CreateICmpSGE(std::get<1>(agg_out_ptr_w_idx),
1073  LL_INT(int32_t(0)));
1074  } else {
1075  nullcheck_cond = LL_BUILDER.CreateICmpNE(
1076  std::get<0>(agg_out_ptr_w_idx),
1077  llvm::ConstantPointerNull::get(
1078  llvm::PointerType::get(get_int_type(64, LL_CONTEXT), 0)));
1079  }
1080  DiamondCodegen nullcheck_cfg(
1081  nullcheck_cond, executor_, false, "groupby_nullcheck", &filter_cfg, false);
1082  codegenAggCalls(agg_out_ptr_w_idx,
1083  varlen_output_buffer,
1084  {},
1086  co,
1087  gpu_smem_context,
1088  filter_cfg);
1089  }
1090  can_return_error = true;
1091  if (query_mem_desc.getQueryDescriptionType() ==
1093  query_mem_desc.useStreamingTopN()) {
1094  // Ignore rejection on pushing current row to top-K heap.
1095  LL_BUILDER.CreateRet(LL_INT(int32_t(0)));
1096  } else {
1097  CodeGenerator code_generator(executor_);
1098  LL_BUILDER.CreateRet(LL_BUILDER.CreateNeg(LL_BUILDER.CreateTrunc(
1099  // TODO(alex): remove the trunc once pos is converted to 32 bits
1100  code_generator.posArg(nullptr),
1101  get_int_type(32, LL_CONTEXT))));
1102  }
1103  }
1104  } else {
1105  if (ra_exe_unit_.estimator) {
1106  std::stack<llvm::BasicBlock*> array_loops;
1107  codegenEstimator(array_loops, filter_cfg, query_mem_desc, co);
1108  } else {
1109  auto arg_it = ROW_FUNC->arg_begin();
1110  std::vector<llvm::Value*> agg_out_vec;
1111  for (int32_t i = 0; i < get_agg_count(ra_exe_unit_.target_exprs); ++i) {
1112  agg_out_vec.push_back(&*arg_it++);
1113  }
1114  can_return_error = codegenAggCalls(std::make_tuple(nullptr, nullptr),
1115  /*varlen_output_buffer=*/nullptr,
1116  agg_out_vec,
1117  query_mem_desc,
1118  co,
1119  gpu_smem_context,
1120  filter_cfg);
1121  }
1122  }
1123  }
1124 
1125  if (ra_exe_unit_.join_quals.empty()) {
1126  executor_->cgen_state_->ir_builder_.CreateRet(LL_INT(int32_t(0)));
1127  } else if (sc_false) {
1128  const auto saved_insert_block = LL_BUILDER.GetInsertBlock();
1129  LL_BUILDER.SetInsertPoint(sc_false);
1130  LL_BUILDER.CreateBr(filter_false);
1131  LL_BUILDER.SetInsertPoint(saved_insert_block);
1132  }
1133 
1134  return can_return_error;
1135 }
std::vector< Analyzer::Expr * > target_exprs
#define ROW_FUNC
llvm::BasicBlock * cond_false_
#define LL_BUILDER
#define LL_CONTEXT
#define LL_INT(v)
llvm::Value * codegenVarlenOutputBuffer(const QueryMemoryDescriptor &query_mem_desc)
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
#define LLVM_ALIGN(alignment)
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
llvm::Value * get_arg_by_name(llvm::Function *func, const std::string &name)
Definition: Execute.h:166
size_t getGroupbyColCount() const
const JoinQualsPerNestingLevel join_quals
bool codegenAggCalls(const std::tuple< llvm::Value *, llvm::Value * > &agg_out_ptr_w_idx, llvm::Value *varlen_output_buffer, const std::vector< llvm::Value * > &agg_out_vec, QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const GpuSharedMemoryContext &gpu_smem_context, DiamondCodegen &diamond_codegen)
std::tuple< llvm::Value *, llvm::Value * > codegenGroupBy(const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, DiamondCodegen &codegen)
const std::shared_ptr< Analyzer::Estimator > estimator
#define AUTOMATIC_IR_METADATA(CGENSTATE)
QueryDescriptionType getQueryDescriptionType() const
ExecutorDeviceType device_type
void codegenEstimator(std::stack< llvm::BasicBlock * > &array_loops, DiamondCodegen &diamond_codegen, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &)
int32_t get_agg_count(const std::vector< Analyzer::Expr * > &target_exprs)
#define CHECK(condition)
Definition: Logger.h:289
const RelAlgExecutionUnit & ra_exe_unit_

+ Here is the call graph for this function:

std::vector< llvm::Value * > GroupByAndAggregate::codegenAggArg ( const Analyzer::Expr target_expr,
const CompilationOptions co 
)
private

Definition at line 1999 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, CHECK, CHECK_EQ, CodeGenerator::codegen(), CUR_FUNC, executor_, get_int_type(), Analyzer::Expr::get_type_info(), SQLTypeInfo::is_geometry(), kARRAY, kPOINT, kSAMPLE, LL_BUILDER, LL_CONTEXT, log2_bytes(), and CodeGenerator::posArg().

Referenced by TargetExprCodegen::codegen(), and TargetExprCodegenBuilder::codegenMultiSlotSampleExpressions().

2001  {
2002  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
2003  const auto agg_expr = dynamic_cast<const Analyzer::AggExpr*>(target_expr);
2004  const auto func_expr = dynamic_cast<const Analyzer::FunctionOper*>(target_expr);
2005  const auto arr_expr = dynamic_cast<const Analyzer::ArrayExpr*>(target_expr);
2006 
2007  // TODO(alex): handle arrays uniformly?
2008  CodeGenerator code_generator(executor_);
2009  if (target_expr) {
2010  const auto& target_ti = target_expr->get_type_info();
2011  if (target_ti.is_buffer() &&
2012  !executor_->plan_state_->isLazyFetchColumn(target_expr)) {
2013  const auto target_lvs =
2014  agg_expr ? code_generator.codegen(agg_expr->get_arg(), true, co)
2015  : code_generator.codegen(
2016  target_expr, !executor_->plan_state_->allow_lazy_fetch_, co);
2017  if (!func_expr && !arr_expr) {
2018  // Something with the chunk transport is code that was generated from a source
2019  // other than an ARRAY[] expression
2020  if (target_ti.is_bytes()) {
2021  CHECK_EQ(size_t(3), target_lvs.size());
2022  return {target_lvs[1], target_lvs[2]};
2023  }
2024  CHECK(target_ti.is_array());
2025  CHECK_EQ(size_t(1), target_lvs.size());
2026  CHECK(!agg_expr || agg_expr->get_aggtype() == kSAMPLE);
2027  const auto i32_ty = get_int_type(32, executor_->cgen_state_->context_);
2028  const auto i8p_ty =
2029  llvm::PointerType::get(get_int_type(8, executor_->cgen_state_->context_), 0);
2030  const auto& elem_ti = target_ti.get_elem_type();
2031  return {
2032  executor_->cgen_state_->emitExternalCall(
2033  "array_buff",
2034  i8p_ty,
2035  {target_lvs.front(), code_generator.posArg(target_expr)}),
2036  executor_->cgen_state_->emitExternalCall(
2037  "array_size",
2038  i32_ty,
2039  {target_lvs.front(),
2040  code_generator.posArg(target_expr),
2041  executor_->cgen_state_->llInt(log2_bytes(elem_ti.get_logical_size()))})};
2042  } else {
2043  if (agg_expr) {
2044  throw std::runtime_error(
2045  "Using array[] operator as argument to an aggregate operator is not "
2046  "supported");
2047  }
2048  CHECK(func_expr || arr_expr);
2049  if (dynamic_cast<const Analyzer::FunctionOper*>(target_expr)) {
2050  CHECK_EQ(size_t(1), target_lvs.size());
2051  const auto prefix = target_ti.get_buffer_name();
2052  CHECK(target_ti.is_array() || target_ti.is_bytes());
2053  const auto target_lv = LL_BUILDER.CreateLoad(
2054  target_lvs[0]->getType()->getPointerElementType(), target_lvs[0]);
2055  // const auto target_lv_type = target_lvs[0]->getType();
2056  // CHECK(target_lv_type->isStructTy());
2057  // CHECK_EQ(target_lv_type->getNumContainedTypes(), 3u);
2058  const auto i8p_ty = llvm::PointerType::get(
2059  get_int_type(8, executor_->cgen_state_->context_), 0);
2060  const auto ptr = LL_BUILDER.CreatePointerCast(
2061  LL_BUILDER.CreateExtractValue(target_lv, 0), i8p_ty);
2062  const auto size = LL_BUILDER.CreateExtractValue(target_lv, 1);
2063  const auto null_flag = LL_BUILDER.CreateExtractValue(target_lv, 2);
2064  const auto nullcheck_ok_bb =
2065  llvm::BasicBlock::Create(LL_CONTEXT, prefix + "_nullcheck_ok_bb", CUR_FUNC);
2066  const auto nullcheck_fail_bb = llvm::BasicBlock::Create(
2067  LL_CONTEXT, prefix + "_nullcheck_fail_bb", CUR_FUNC);
2068 
2069  // TODO(adb): probably better to zext the bool
2070  const auto nullcheck = LL_BUILDER.CreateICmpEQ(
2071  null_flag, executor_->cgen_state_->llInt(static_cast<int8_t>(1)));
2072  LL_BUILDER.CreateCondBr(nullcheck, nullcheck_fail_bb, nullcheck_ok_bb);
2073 
2074  const auto ret_bb =
2075  llvm::BasicBlock::Create(LL_CONTEXT, prefix + "_return", CUR_FUNC);
2076  LL_BUILDER.SetInsertPoint(ret_bb);
2077  auto result_phi = LL_BUILDER.CreatePHI(i8p_ty, 2, prefix + "_ptr_return");
2078  result_phi->addIncoming(ptr, nullcheck_ok_bb);
2079  const auto null_arr_sentinel = LL_BUILDER.CreateIntToPtr(
2080  executor_->cgen_state_->llInt(static_cast<int8_t>(0)), i8p_ty);
2081  result_phi->addIncoming(null_arr_sentinel, nullcheck_fail_bb);
2082  LL_BUILDER.SetInsertPoint(nullcheck_ok_bb);
2083  executor_->cgen_state_->emitExternalCall(
2084  "register_buffer_with_executor_rsm",
2085  llvm::Type::getVoidTy(executor_->cgen_state_->context_),
2086  {executor_->cgen_state_->llInt(reinterpret_cast<int64_t>(executor_)), ptr});
2087  LL_BUILDER.CreateBr(ret_bb);
2088  LL_BUILDER.SetInsertPoint(nullcheck_fail_bb);
2089  LL_BUILDER.CreateBr(ret_bb);
2090 
2091  LL_BUILDER.SetInsertPoint(ret_bb);
2092  return {result_phi, size};
2093  }
2094  CHECK_EQ(size_t(2), target_lvs.size());
2095  return {target_lvs[0], target_lvs[1]};
2096  }
2097  }
2098  if (target_ti.is_geometry() &&
2099  !executor_->plan_state_->isLazyFetchColumn(target_expr)) {
2100  auto generate_coord_lvs =
2101  [&](auto* selected_target_expr,
2102  bool const fetch_columns) -> std::vector<llvm::Value*> {
2103  const auto target_lvs =
2104  code_generator.codegen(selected_target_expr, fetch_columns, co);
2105  if (dynamic_cast<const Analyzer::GeoOperator*>(target_expr) &&
2106  target_expr->get_type_info().is_geometry()) {
2107  // return a pointer to the temporary alloca
2108  return target_lvs;
2109  }
2110  const auto geo_uoper = dynamic_cast<const Analyzer::GeoUOper*>(target_expr);
2111  const auto geo_binoper = dynamic_cast<const Analyzer::GeoBinOper*>(target_expr);
2112  if (geo_uoper || geo_binoper) {
2113  CHECK(target_expr->get_type_info().is_geometry());
2114  CHECK_EQ(2 * static_cast<size_t>(target_ti.get_physical_coord_cols()),
2115  target_lvs.size());
2116  return target_lvs;
2117  }
2118  CHECK_EQ(static_cast<size_t>(target_ti.get_physical_coord_cols()),
2119  target_lvs.size());
2120 
2121  const auto i32_ty = get_int_type(32, executor_->cgen_state_->context_);
2122  const auto i8p_ty =
2123  llvm::PointerType::get(get_int_type(8, executor_->cgen_state_->context_), 0);
2124  std::vector<llvm::Value*> coords;
2125  size_t ctr = 0;
2126  for (const auto& target_lv : target_lvs) {
2127  // TODO(adb): consider adding a utility to sqltypes so we can get the types of
2128  // the physical coords cols based on the sqltype (e.g. TINYINT for col 0, INT
2129  // for col 1 for pols / mpolys, etc). Hardcoding for now. first array is the
2130  // coords array (TINYINT). Subsequent arrays are regular INT.
2131 
2132  const size_t elem_sz = ctr == 0 ? 1 : 4;
2133  ctr++;
2134  int32_t fixlen = -1;
2135  if (target_ti.get_type() == kPOINT) {
2136  const auto col_var = dynamic_cast<const Analyzer::ColumnVar*>(target_expr);
2137  if (col_var) {
2138  const auto coords_cd = executor_->getPhysicalColumnDescriptor(col_var, 1);
2139  if (coords_cd && coords_cd->columnType.get_type() == kARRAY) {
2140  fixlen = coords_cd->columnType.get_size();
2141  }
2142  }
2143  }
2144  if (fixlen > 0) {
2145  coords.push_back(executor_->cgen_state_->emitExternalCall(
2146  "fast_fixlen_array_buff",
2147  i8p_ty,
2148  {target_lv, code_generator.posArg(selected_target_expr)}));
2149  coords.push_back(executor_->cgen_state_->llInt(int64_t(fixlen)));
2150  continue;
2151  }
2152  coords.push_back(executor_->cgen_state_->emitExternalCall(
2153  "array_buff",
2154  i8p_ty,
2155  {target_lv, code_generator.posArg(selected_target_expr)}));
2156  coords.push_back(executor_->cgen_state_->emitExternalCall(
2157  "array_size",
2158  i32_ty,
2159  {target_lv,
2160  code_generator.posArg(selected_target_expr),
2161  executor_->cgen_state_->llInt(log2_bytes(elem_sz))}));
2162  }
2163  return coords;
2164  };
2165 
2166  if (agg_expr) {
2167  return generate_coord_lvs(agg_expr->get_arg(), true);
2168  } else {
2169  return generate_coord_lvs(target_expr,
2170  !executor_->plan_state_->allow_lazy_fetch_);
2171  }
2172  }
2173  }
2174  return agg_expr ? code_generator.codegen(agg_expr->get_arg(), true, co)
2175  : code_generator.codegen(
2176  target_expr, !executor_->plan_state_->allow_lazy_fetch_, co);
2177 }
#define CHECK_EQ(x, y)
Definition: Logger.h:297
#define LL_BUILDER
#define LL_CONTEXT
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
#define AUTOMATIC_IR_METADATA(CGENSTATE)
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:83
#define CHECK(condition)
Definition: Logger.h:289
bool is_geometry() const
Definition: sqltypes.h:588
#define CUR_FUNC
uint32_t log2_bytes(const uint32_t bytes)
Definition: Execute.h:176

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool GroupByAndAggregate::codegenAggCalls ( const std::tuple< llvm::Value *, llvm::Value * > &  agg_out_ptr_w_idx,
llvm::Value *  varlen_output_buffer,
const std::vector< llvm::Value * > &  agg_out_vec,
QueryMemoryDescriptor query_mem_desc,
const CompilationOptions co,
const GpuSharedMemoryContext gpu_smem_context,
DiamondCodegen diamond_codegen 
)
private

Definition at line 1632 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, CHECK, TargetExprCodegenBuilder::codegen(), QueryMemoryDescriptor::didOutputColumnar(), executor_, g_cluster, QueryMemoryDescriptor::getQueryDescriptionType(), LL_BUILDER, LL_CONTEXT, Projection, ra_exe_unit_, and RelAlgExecutionUnit::target_exprs.

Referenced by codegen().

1639  {
1640  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1641  auto agg_out_ptr_w_idx = agg_out_ptr_w_idx_in;
1642  // TODO(alex): unify the two cases, the output for non-group by queries
1643  // should be a contiguous buffer
1644  const bool is_group_by = std::get<0>(agg_out_ptr_w_idx);
1645  bool can_return_error = false;
1646  if (is_group_by) {
1647  CHECK(agg_out_vec.empty());
1648  } else {
1649  CHECK(!agg_out_vec.empty());
1650  }
1651 
1652  // output buffer is casted into a byte stream to be able to handle data elements of
1653  // different sizes (only used when actual column width sizes are used)
1654  llvm::Value* output_buffer_byte_stream{nullptr};
1655  llvm::Value* out_row_idx{nullptr};
1656  if (query_mem_desc.didOutputColumnar() && !g_cluster &&
1658  output_buffer_byte_stream = LL_BUILDER.CreateBitCast(
1659  std::get<0>(agg_out_ptr_w_idx),
1660  llvm::PointerType::get(llvm::Type::getInt8Ty(LL_CONTEXT), 0));
1661  output_buffer_byte_stream->setName("out_buff_b_stream");
1662  CHECK(std::get<1>(agg_out_ptr_w_idx));
1663  out_row_idx = LL_BUILDER.CreateZExt(std::get<1>(agg_out_ptr_w_idx),
1664  llvm::Type::getInt64Ty(LL_CONTEXT));
1665  out_row_idx->setName("out_row_idx");
1666  }
1667 
1668  TargetExprCodegenBuilder target_builder(ra_exe_unit_, is_group_by);
1669  for (size_t target_idx = 0; target_idx < ra_exe_unit_.target_exprs.size();
1670  ++target_idx) {
1671  auto target_expr = ra_exe_unit_.target_exprs[target_idx];
1672  CHECK(target_expr);
1673 
1674  target_builder(target_expr, executor_, query_mem_desc, co);
1675  }
1676 
1677  target_builder.codegen(this,
1678  executor_,
1679  query_mem_desc,
1680  co,
1681  gpu_smem_context,
1682  agg_out_ptr_w_idx,
1683  agg_out_vec,
1684  output_buffer_byte_stream,
1685  out_row_idx,
1686  varlen_output_buffer,
1687  diamond_codegen);
1688 
1689  for (auto target_expr : ra_exe_unit_.target_exprs) {
1690  CHECK(target_expr);
1691  executor_->plan_state_->isLazyFetchColumn(target_expr);
1692  }
1693 
1694  return can_return_error;
1695 }
std::vector< Analyzer::Expr * > target_exprs
#define LL_BUILDER
#define LL_CONTEXT
#define AUTOMATIC_IR_METADATA(CGENSTATE)
QueryDescriptionType getQueryDescriptionType() const
#define CHECK(condition)
Definition: Logger.h:289
bool g_cluster
const RelAlgExecutionUnit & ra_exe_unit_

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

llvm::Value * GroupByAndAggregate::codegenAggColumnPtr ( llvm::Value *  output_buffer_byte_stream,
llvm::Value *  out_row_idx,
const std::tuple< llvm::Value *, llvm::Value * > &  agg_out_ptr_w_idx,
const QueryMemoryDescriptor query_mem_desc,
const size_t  chosen_bytes,
const size_t  agg_out_off,
const size_t  target_idx 
)
private

: returns the pointer to where the aggregation should be stored.

Definition at line 1700 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, shared::bit_cast(), CHECK, CHECK_EQ, QueryMemoryDescriptor::didOutputColumnar(), executor_, g_cluster, get_int_type(), QueryMemoryDescriptor::getColOffInBytes(), QueryMemoryDescriptor::getColOnlyOffInBytes(), QueryMemoryDescriptor::getQueryDescriptionType(), LL_BUILDER, LL_CONTEXT, LL_INT, Projection, and to_string().

Referenced by TargetExprCodegen::codegenAggregate(), and TargetExprCodegenBuilder::codegenMultiSlotSampleExpressions().

1707  {
1708  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1709  llvm::Value* agg_col_ptr{nullptr};
1710  if (query_mem_desc.didOutputColumnar()) {
1711  // TODO(Saman): remove the second columnar branch, and support all query description
1712  // types through the first branch. Then, input arguments should also be cleaned up
1713  if (!g_cluster &&
1715  CHECK(chosen_bytes == 1 || chosen_bytes == 2 || chosen_bytes == 4 ||
1716  chosen_bytes == 8);
1717  CHECK(output_buffer_byte_stream);
1718  CHECK(out_row_idx);
1719  size_t col_off = query_mem_desc.getColOffInBytes(agg_out_off);
1720  // multiplying by chosen_bytes, i.e., << log2(chosen_bytes)
1721  auto out_per_col_byte_idx =
1722 #ifdef _WIN32
1723  LL_BUILDER.CreateShl(out_row_idx, __lzcnt(chosen_bytes) - 1);
1724 #else
1725  LL_BUILDER.CreateShl(out_row_idx, __builtin_ffs(chosen_bytes) - 1);
1726 #endif
1727  auto byte_offset = LL_BUILDER.CreateAdd(out_per_col_byte_idx,
1728  LL_INT(static_cast<int64_t>(col_off)));
1729  byte_offset->setName("out_byte_off_target_" + std::to_string(target_idx));
1730  auto output_ptr = LL_BUILDER.CreateGEP(
1731  output_buffer_byte_stream->getType()->getScalarType()->getPointerElementType(),
1732  output_buffer_byte_stream,
1733  byte_offset);
1734  agg_col_ptr = LL_BUILDER.CreateBitCast(
1735  output_ptr,
1736  llvm::PointerType::get(get_int_type((chosen_bytes << 3), LL_CONTEXT), 0));
1737  agg_col_ptr->setName("out_ptr_target_" + std::to_string(target_idx));
1738  } else {
1739  auto const col_off_in_bytes = query_mem_desc.getColOffInBytes(agg_out_off);
1740  auto const col_off = col_off_in_bytes / chosen_bytes;
1741  auto const col_rem = col_off_in_bytes % chosen_bytes;
1742  CHECK_EQ(col_rem, 0u) << col_off_in_bytes << " % " << chosen_bytes;
1743  CHECK(std::get<1>(agg_out_ptr_w_idx));
1744  auto* agg_out_idx = LL_BUILDER.CreateZExt(
1745  std::get<1>(agg_out_ptr_w_idx),
1746  get_int_type(8 * sizeof(col_off), executor_->cgen_state_->context_));
1747  auto* offset = LL_BUILDER.CreateAdd(agg_out_idx, LL_INT(col_off));
1748  auto* bit_cast = LL_BUILDER.CreateBitCast(
1749  std::get<0>(agg_out_ptr_w_idx),
1750  llvm::PointerType::get(get_int_type((chosen_bytes << 3), LL_CONTEXT), 0));
1751  agg_col_ptr = LL_BUILDER.CreateGEP(
1752  bit_cast->getType()->getScalarType()->getPointerElementType(),
1753  bit_cast,
1754  offset);
1755  }
1756  } else {
1757  auto const col_off_in_bytes = query_mem_desc.getColOnlyOffInBytes(agg_out_off);
1758  auto const col_off = col_off_in_bytes / chosen_bytes;
1759  auto const col_rem = col_off_in_bytes % chosen_bytes;
1760  CHECK_EQ(col_rem, 0u) << col_off_in_bytes << " % " << chosen_bytes;
1761  auto* bit_cast = LL_BUILDER.CreateBitCast(
1762  std::get<0>(agg_out_ptr_w_idx),
1763  llvm::PointerType::get(get_int_type((chosen_bytes << 3), LL_CONTEXT), 0));
1764  agg_col_ptr = LL_BUILDER.CreateGEP(
1765  bit_cast->getType()->getScalarType()->getPointerElementType(),
1766  bit_cast,
1767  LL_INT(col_off));
1768  }
1769  CHECK(agg_col_ptr);
1770  return agg_col_ptr;
1771 }
#define CHECK_EQ(x, y)
Definition: Logger.h:297
#define LL_BUILDER
#define LL_CONTEXT
#define LL_INT(v)
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
std::string to_string(char const *&&v)
size_t getColOnlyOffInBytes(const size_t col_idx) const
#define AUTOMATIC_IR_METADATA(CGENSTATE)
QueryDescriptionType getQueryDescriptionType() const
TO bit_cast(FROM &&from)
Definition: misc.h:298
#define CHECK(condition)
Definition: Logger.h:289
bool g_cluster
size_t getColOffInBytes(const size_t col_idx) const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void GroupByAndAggregate::codegenApproxQuantile ( const size_t  target_idx,
const Analyzer::Expr target_expr,
std::vector< llvm::Value * > &  agg_args,
const QueryMemoryDescriptor query_mem_desc,
const ExecutorDeviceType  device_type 
)
private

Definition at line 1906 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, executor_, g_bigint_count, SQLTypeInfo::get_notnull(), get_target_info(), Analyzer::Expr::get_type_info(), and GPU.

Referenced by TargetExprCodegen::codegenAggregate().

1911  {
1912  if (device_type == ExecutorDeviceType::GPU) {
1913  throw QueryMustRunOnCpu();
1914  }
1915  llvm::BasicBlock *calc, *skip{nullptr};
1916  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1917  auto const arg_ti =
1918  static_cast<const Analyzer::AggExpr*>(target_expr)->get_arg()->get_type_info();
1919  bool const nullable = !arg_ti.get_notnull();
1920 
1921  auto* cs = executor_->cgen_state_.get();
1922  auto& irb = cs->ir_builder_;
1923  if (nullable) {
1924  auto* const null_value = cs->castToTypeIn(cs->inlineNull(arg_ti), 64);
1925  auto* const skip_cond = arg_ti.is_fp()
1926  ? irb.CreateFCmpOEQ(agg_args.back(), null_value)
1927  : irb.CreateICmpEQ(agg_args.back(), null_value);
1928  calc = llvm::BasicBlock::Create(cs->context_, "calc_approx_quantile");
1929  skip = llvm::BasicBlock::Create(cs->context_, "skip_approx_quantile");
1930  irb.CreateCondBr(skip_cond, skip, calc);
1931  cs->current_func_->getBasicBlockList().push_back(calc);
1932  irb.SetInsertPoint(calc);
1933  }
1934  if (!arg_ti.is_fp()) {
1935  auto const agg_info = get_target_info(target_expr, g_bigint_count);
1936  agg_args.back() = executor_->castToFP(agg_args.back(), arg_ti, agg_info.sql_type);
1937  }
1938  cs->emitExternalCall(
1939  "agg_approx_quantile", llvm::Type::getVoidTy(cs->context_), agg_args);
1940  if (nullable) {
1941  irb.CreateBr(skip);
1942  cs->current_func_->getBasicBlockList().push_back(skip);
1943  irb.SetInsertPoint(skip);
1944  }
1945 }
TargetInfo get_target_info(const Analyzer::Expr *target_expr, const bool bigint_count)
Definition: TargetInfo.h:88
bool g_bigint_count
#define AUTOMATIC_IR_METADATA(CGENSTATE)
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:83
HOST DEVICE bool get_notnull() const
Definition: sqltypes.h:387

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void GroupByAndAggregate::codegenCountDistinct ( const size_t  target_idx,
const Analyzer::Expr target_expr,
std::vector< llvm::Value * > &  agg_args,
const QueryMemoryDescriptor query_mem_desc,
const ExecutorDeviceType  device_type 
)
private

Definition at line 1837 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, Bitmap, CHECK, CHECK_EQ, emitCall(), executor_, g_bigint_count, get_int_type(), get_target_info(), Analyzer::Expr::get_type_info(), getAdditionalLiteral(), QueryMemoryDescriptor::getCountDistinctDescriptor(), GPU, Invalid, kAPPROX_COUNT_DISTINCT, LL_CONTEXT, and LL_INT.

Referenced by TargetExprCodegen::codegenAggregate().

1842  {
1843  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1844  const auto agg_info = get_target_info(target_expr, g_bigint_count);
1845  const auto& arg_ti =
1846  static_cast<const Analyzer::AggExpr*>(target_expr)->get_arg()->get_type_info();
1847  if (arg_ti.is_fp()) {
1848  agg_args.back() = executor_->cgen_state_->ir_builder_.CreateBitCast(
1849  agg_args.back(), get_int_type(64, executor_->cgen_state_->context_));
1850  }
1851  const auto& count_distinct_descriptor =
1852  query_mem_desc.getCountDistinctDescriptor(target_idx);
1853  CHECK(count_distinct_descriptor.impl_type_ != CountDistinctImplType::Invalid);
1854  if (agg_info.agg_kind == kAPPROX_COUNT_DISTINCT) {
1855  CHECK(count_distinct_descriptor.impl_type_ == CountDistinctImplType::Bitmap);
1856  agg_args.push_back(LL_INT(int32_t(count_distinct_descriptor.bitmap_sz_bits)));
1857  if (device_type == ExecutorDeviceType::GPU) {
1858  const auto base_dev_addr = getAdditionalLiteral(-1);
1859  const auto base_host_addr = getAdditionalLiteral(-2);
1860  agg_args.push_back(base_dev_addr);
1861  agg_args.push_back(base_host_addr);
1862  emitCall("agg_approximate_count_distinct_gpu", agg_args);
1863  } else {
1864  emitCall("agg_approximate_count_distinct", agg_args);
1865  }
1866  return;
1867  }
1868  std::string agg_fname{"agg_count_distinct"};
1869  if (count_distinct_descriptor.impl_type_ == CountDistinctImplType::Bitmap) {
1870  agg_fname += "_bitmap";
1871  agg_args.push_back(LL_INT(static_cast<int64_t>(count_distinct_descriptor.min_val)));
1872  }
1873  if (agg_info.skip_null_val) {
1874  auto null_lv = executor_->cgen_state_->castToTypeIn(
1875  (arg_ti.is_fp()
1876  ? static_cast<llvm::Value*>(executor_->cgen_state_->inlineFpNull(arg_ti))
1877  : static_cast<llvm::Value*>(executor_->cgen_state_->inlineIntNull(arg_ti))),
1878  64);
1879  null_lv = executor_->cgen_state_->ir_builder_.CreateBitCast(
1880  null_lv, get_int_type(64, executor_->cgen_state_->context_));
1881  agg_fname += "_skip_val";
1882  agg_args.push_back(null_lv);
1883  }
1884  if (device_type == ExecutorDeviceType::GPU) {
1885  CHECK(count_distinct_descriptor.impl_type_ == CountDistinctImplType::Bitmap);
1886  agg_fname += "_gpu";
1887  const auto base_dev_addr = getAdditionalLiteral(-1);
1888  const auto base_host_addr = getAdditionalLiteral(-2);
1889  agg_args.push_back(base_dev_addr);
1890  agg_args.push_back(base_host_addr);
1891  agg_args.push_back(LL_INT(int64_t(count_distinct_descriptor.sub_bitmap_count)));
1892  CHECK_EQ(size_t(0),
1893  count_distinct_descriptor.bitmapPaddedSizeBytes() %
1894  count_distinct_descriptor.sub_bitmap_count);
1895  agg_args.push_back(LL_INT(int64_t(count_distinct_descriptor.bitmapPaddedSizeBytes() /
1896  count_distinct_descriptor.sub_bitmap_count)));
1897  }
1898  if (count_distinct_descriptor.impl_type_ == CountDistinctImplType::Bitmap) {
1899  emitCall(agg_fname, agg_args);
1900  } else {
1901  executor_->cgen_state_->emitExternalCall(
1902  agg_fname, llvm::Type::getVoidTy(LL_CONTEXT), agg_args);
1903  }
1904 }
#define CHECK_EQ(x, y)
Definition: Logger.h:297
llvm::Value * getAdditionalLiteral(const int32_t off)
#define LL_CONTEXT
#define LL_INT(v)
llvm::Value * emitCall(const std::string &fname, const std::vector< llvm::Value * > &args)
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
TargetInfo get_target_info(const Analyzer::Expr *target_expr, const bool bigint_count)
Definition: TargetInfo.h:88
bool g_bigint_count
#define AUTOMATIC_IR_METADATA(CGENSTATE)
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:83
const CountDistinctDescriptor & getCountDistinctDescriptor(const size_t idx) const
#define CHECK(condition)
Definition: Logger.h:289

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void GroupByAndAggregate::codegenEstimator ( std::stack< llvm::BasicBlock * > &  array_loops,
DiamondCodegen diamond_codegen,
const QueryMemoryDescriptor query_mem_desc,
const CompilationOptions co 
)
private

Definition at line 1773 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, CHECK, emitCall(), RelAlgExecutionUnit::estimator, executor_, get_int_type(), QueryMemoryDescriptor::getEffectiveKeyWidth(), LL_BUILDER, LL_CONTEXT, LL_INT, ra_exe_unit_, and ROW_FUNC.

Referenced by codegen().

1776  {
1777  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1778  const auto& estimator_arg = ra_exe_unit_.estimator->getArgument();
1779  auto estimator_comp_count_lv = LL_INT(static_cast<int32_t>(estimator_arg.size()));
1780  auto estimator_key_lv = LL_BUILDER.CreateAlloca(llvm::Type::getInt64Ty(LL_CONTEXT),
1781  estimator_comp_count_lv);
1782  int32_t subkey_idx = 0;
1783  for (const auto& estimator_arg_comp : estimator_arg) {
1784  const auto estimator_arg_comp_lvs =
1785  executor_->groupByColumnCodegen(estimator_arg_comp.get(),
1786  query_mem_desc.getEffectiveKeyWidth(),
1787  co,
1788  false,
1789  0,
1790  diamond_codegen,
1791  array_loops,
1792  true);
1793  CHECK(!estimator_arg_comp_lvs.original_value);
1794  const auto estimator_arg_comp_lv = estimator_arg_comp_lvs.translated_value;
1795  // store the sub-key to the buffer
1796  LL_BUILDER.CreateStore(
1797  estimator_arg_comp_lv,
1798  LL_BUILDER.CreateGEP(
1799  estimator_key_lv->getType()->getScalarType()->getPointerElementType(),
1800  estimator_key_lv,
1801  LL_INT(subkey_idx++)));
1802  }
1803  const auto int8_ptr_ty = llvm::PointerType::get(get_int_type(8, LL_CONTEXT), 0);
1804  const auto bitmap = LL_BUILDER.CreateBitCast(&*ROW_FUNC->arg_begin(), int8_ptr_ty);
1805  const auto key_bytes = LL_BUILDER.CreateBitCast(estimator_key_lv, int8_ptr_ty);
1806  const auto estimator_comp_bytes_lv =
1807  LL_INT(static_cast<int32_t>(estimator_arg.size() * sizeof(int64_t)));
1808  const auto bitmap_size_lv =
1809  LL_INT(static_cast<uint32_t>(ra_exe_unit_.estimator->getBufferSize()));
1810  emitCall(ra_exe_unit_.estimator->getRuntimeFunctionName(),
1811  {bitmap, &*bitmap_size_lv, key_bytes, &*estimator_comp_bytes_lv});
1812 }
#define ROW_FUNC
#define LL_BUILDER
#define LL_CONTEXT
#define LL_INT(v)
llvm::Value * emitCall(const std::string &fname, const std::vector< llvm::Value * > &args)
size_t getEffectiveKeyWidth() const
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
const std::shared_ptr< Analyzer::Estimator > estimator
#define AUTOMATIC_IR_METADATA(CGENSTATE)
#define CHECK(condition)
Definition: Logger.h:289
const RelAlgExecutionUnit & ra_exe_unit_

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::tuple< llvm::Value *, llvm::Value * > GroupByAndAggregate::codegenGroupBy ( const QueryMemoryDescriptor query_mem_desc,
const CompilationOptions co,
DiamondCodegen codegen 
)
private

Definition at line 1224 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, CHECK, CHECK_EQ, codegenMultiColumnBaselineHash(), codegenMultiColumnPerfectHash(), codegenOutputSlot(), codegenSingleColumnPerfectHash(), QueryMemoryDescriptor::didOutputColumnar(), executor_, anonymous_namespace{GroupByAndAggregate.cpp}::get_expr_range_info(), QueryMemoryDescriptor::getBucket(), QueryMemoryDescriptor::getEffectiveKeyWidth(), QueryMemoryDescriptor::getGroupbyColCount(), QueryMemoryDescriptor::getMaxVal(), QueryMemoryDescriptor::getQueryDescriptionType(), QueryMemoryDescriptor::getRowSize(), RelAlgExecutionUnit::groupby_exprs, GroupByBaselineHash, GroupByPerfectHash, QueryMemoryDescriptor::hasNulls(), QueryMemoryDescriptor::isSingleColumnGroupByWithPerfectHash(), LL_BUILDER, LL_CONTEXT, LL_INT, Projection, query_infos_, ra_exe_unit_, ROW_FUNC, and QueryMemoryDescriptor::threadsShareMemory().

Referenced by codegen().

1227  {
1228  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1229  auto arg_it = ROW_FUNC->arg_begin();
1230  auto groups_buffer = arg_it++;
1231 
1232  std::stack<llvm::BasicBlock*> array_loops;
1233 
1234  // TODO(Saman): move this logic outside of this function.
1236  if (query_mem_desc.didOutputColumnar()) {
1237  return std::make_tuple(
1238  &*groups_buffer,
1239  codegenOutputSlot(&*groups_buffer, query_mem_desc, co, diamond_codegen));
1240  } else {
1241  return std::make_tuple(
1242  codegenOutputSlot(&*groups_buffer, query_mem_desc, co, diamond_codegen),
1243  nullptr);
1244  }
1245  }
1246 
1247  CHECK(query_mem_desc.getQueryDescriptionType() ==
1249  query_mem_desc.getQueryDescriptionType() ==
1251 
1252  const int32_t row_size_quad = query_mem_desc.didOutputColumnar()
1253  ? 0
1254  : query_mem_desc.getRowSize() / sizeof(int64_t);
1255 
1256  const auto col_width_size = query_mem_desc.isSingleColumnGroupByWithPerfectHash()
1257  ? sizeof(int64_t)
1258  : query_mem_desc.getEffectiveKeyWidth();
1259  // for multi-column group by
1260  llvm::Value* group_key = nullptr;
1261  llvm::Value* key_size_lv = nullptr;
1262 
1263  if (!query_mem_desc.isSingleColumnGroupByWithPerfectHash()) {
1264  key_size_lv = LL_INT(static_cast<int32_t>(query_mem_desc.getGroupbyColCount()));
1265  if (query_mem_desc.getQueryDescriptionType() ==
1267  group_key =
1268  LL_BUILDER.CreateAlloca(llvm::Type::getInt64Ty(LL_CONTEXT), key_size_lv);
1269  } else if (query_mem_desc.getQueryDescriptionType() ==
1271  group_key =
1272  col_width_size == sizeof(int32_t)
1273  ? LL_BUILDER.CreateAlloca(llvm::Type::getInt32Ty(LL_CONTEXT), key_size_lv)
1274  : LL_BUILDER.CreateAlloca(llvm::Type::getInt64Ty(LL_CONTEXT), key_size_lv);
1275  }
1276  CHECK(group_key);
1277  CHECK(key_size_lv);
1278  }
1279 
1280  int32_t subkey_idx = 0;
1281  CHECK(query_mem_desc.getGroupbyColCount() == ra_exe_unit_.groupby_exprs.size());
1282  for (const auto& group_expr : ra_exe_unit_.groupby_exprs) {
1283  const auto col_range_info =
1285  const auto translated_null_value = static_cast<int64_t>(
1286  query_mem_desc.isSingleColumnGroupByWithPerfectHash()
1287  ? checked_int64_t(query_mem_desc.getMaxVal()) +
1288  (query_mem_desc.getBucket() ? query_mem_desc.getBucket() : 1)
1289  : checked_int64_t(col_range_info.max) +
1290  (col_range_info.bucket ? col_range_info.bucket : 1));
1291 
1292  const bool col_has_nulls =
1293  query_mem_desc.getQueryDescriptionType() ==
1295  ? (query_mem_desc.isSingleColumnGroupByWithPerfectHash()
1296  ? query_mem_desc.hasNulls()
1297  : col_range_info.has_nulls)
1298  : false;
1299 
1300  const auto group_expr_lvs =
1301  executor_->groupByColumnCodegen(group_expr.get(),
1302  col_width_size,
1303  co,
1304  col_has_nulls,
1305  translated_null_value,
1306  diamond_codegen,
1307  array_loops,
1308  query_mem_desc.threadsShareMemory());
1309  const auto group_expr_lv = group_expr_lvs.translated_value;
1310  if (query_mem_desc.isSingleColumnGroupByWithPerfectHash()) {
1311  CHECK_EQ(size_t(1), ra_exe_unit_.groupby_exprs.size());
1312  return codegenSingleColumnPerfectHash(query_mem_desc,
1313  co,
1314  &*groups_buffer,
1315  group_expr_lv,
1316  group_expr_lvs.original_value,
1317  row_size_quad);
1318  } else {
1319  // store the sub-key to the buffer
1320  LL_BUILDER.CreateStore(
1321  group_expr_lv,
1322  LL_BUILDER.CreateGEP(
1323  group_key->getType()->getScalarType()->getPointerElementType(),
1324  group_key,
1325  LL_INT(subkey_idx++)));
1326  }
1327  }
1328  if (query_mem_desc.getQueryDescriptionType() ==
1330  CHECK(ra_exe_unit_.groupby_exprs.size() != 1);
1332  &*groups_buffer, group_key, key_size_lv, query_mem_desc, row_size_quad);
1333  } else if (query_mem_desc.getQueryDescriptionType() ==
1336  &*groups_buffer,
1337  group_key,
1338  key_size_lv,
1339  query_mem_desc,
1340  col_width_size,
1341  row_size_quad);
1342  }
1343  CHECK(false);
1344  return std::make_tuple(nullptr, nullptr);
1345 }
#define CHECK_EQ(x, y)
Definition: Logger.h:297
#define ROW_FUNC
ColRangeInfo get_expr_range_info(const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, const Analyzer::Expr *expr, Executor *executor)
#define LL_BUILDER
boost::multiprecision::number< boost::multiprecision::cpp_int_backend< 64, 64, boost::multiprecision::signed_magnitude, boost::multiprecision::checked, void >> checked_int64_t
#define LL_CONTEXT
#define LL_INT(v)
size_t getEffectiveKeyWidth() const
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
std::tuple< llvm::Value *, llvm::Value * > codegenMultiColumnBaselineHash(const CompilationOptions &co, llvm::Value *groups_buffer, llvm::Value *group_key, llvm::Value *key_size_lv, const QueryMemoryDescriptor &query_mem_desc, const size_t key_width, const int32_t row_size_quad)
size_t getGroupbyColCount() const
std::tuple< llvm::Value *, llvm::Value * > codegenSingleColumnPerfectHash(const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, llvm::Value *groups_buffer, llvm::Value *group_expr_lv_translated, llvm::Value *group_expr_lv_original, const int32_t row_size_quad)
#define AUTOMATIC_IR_METADATA(CGENSTATE)
std::tuple< llvm::Value *, llvm::Value * > codegenMultiColumnPerfectHash(llvm::Value *groups_buffer, llvm::Value *group_key, llvm::Value *key_size_lv, const QueryMemoryDescriptor &query_mem_desc, const int32_t row_size_quad)
QueryDescriptionType getQueryDescriptionType() const
const std::vector< InputTableInfo > & query_infos_
bool isSingleColumnGroupByWithPerfectHash() const
llvm::Value * codegenOutputSlot(llvm::Value *groups_buffer, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, DiamondCodegen &diamond_codegen)
#define CHECK(condition)
Definition: Logger.h:289
const RelAlgExecutionUnit & ra_exe_unit_

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void GroupByAndAggregate::codegenMode ( const size_t  target_idx,
const Analyzer::Expr target_expr,
std::vector< llvm::Value * > &  agg_args,
const QueryMemoryDescriptor query_mem_desc,
const ExecutorDeviceType  device_type 
)
private

Definition at line 1947 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, executor_, get_int_type(), SQLTypeInfo::get_notnull(), Analyzer::Expr::get_type_info(), and GPU.

Referenced by TargetExprCodegen::codegenAggregate().

1951  {
1952  if (device_type == ExecutorDeviceType::GPU) {
1953  throw QueryMustRunOnCpu();
1954  }
1955  llvm::BasicBlock *calc, *skip{nullptr};
1956  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1957  auto const arg_ti =
1958  static_cast<const Analyzer::AggExpr*>(target_expr)->get_arg()->get_type_info();
1959  bool const nullable = !arg_ti.get_notnull();
1960  bool const is_fp = arg_ti.is_fp();
1961  auto* cs = executor_->cgen_state_.get();
1962  auto& irb = cs->ir_builder_;
1963  if (nullable) {
1964  auto* const null_value =
1965  is_fp ? cs->inlineNull(arg_ti) : cs->castToTypeIn(cs->inlineNull(arg_ti), 64);
1966  auto* const skip_cond = is_fp ? irb.CreateFCmpOEQ(agg_args.back(), null_value)
1967  : irb.CreateICmpEQ(agg_args.back(), null_value);
1968  calc = llvm::BasicBlock::Create(cs->context_, "calc_mode");
1969  skip = llvm::BasicBlock::Create(cs->context_, "skip_mode");
1970  irb.CreateCondBr(skip_cond, skip, calc);
1971  cs->current_func_->getBasicBlockList().push_back(calc);
1972  irb.SetInsertPoint(calc);
1973  }
1974  if (is_fp) {
1975  auto* const int_type = get_int_type(8 * arg_ti.get_size(), cs->context_);
1976  agg_args.back() = irb.CreateBitCast(agg_args.back(), int_type);
1977  }
1978  // "agg_mode" collides with existing names, so non-standard suffix "_func" is added.
1979  cs->emitExternalCall("agg_mode_func", llvm::Type::getVoidTy(cs->context_), agg_args);
1980  if (nullable) {
1981  irb.CreateBr(skip);
1982  cs->current_func_->getBasicBlockList().push_back(skip);
1983  irb.SetInsertPoint(skip);
1984  }
1985 }
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
#define AUTOMATIC_IR_METADATA(CGENSTATE)
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:83
HOST DEVICE bool get_notnull() const
Definition: sqltypes.h:387

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::tuple< llvm::Value *, llvm::Value * > GroupByAndAggregate::codegenMultiColumnBaselineHash ( const CompilationOptions co,
llvm::Value *  groups_buffer,
llvm::Value *  group_key,
llvm::Value *  key_size_lv,
const QueryMemoryDescriptor query_mem_desc,
const size_t  key_width,
const int32_t  row_size_quad 
)
private

Definition at line 1456 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, CHECK, QueryMemoryDescriptor::didOutputColumnar(), emitCall(), executor_, QueryMemoryDescriptor::getEntryCount(), LL_BUILDER, LL_CONTEXT, LL_INT, and CompilationOptions::with_dynamic_watchdog.

Referenced by codegenGroupBy().

1463  {
1464  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1465  if (group_key->getType() != llvm::Type::getInt64PtrTy(LL_CONTEXT)) {
1466  CHECK(key_width == sizeof(int32_t));
1467  group_key =
1468  LL_BUILDER.CreatePointerCast(group_key, llvm::Type::getInt64PtrTy(LL_CONTEXT));
1469  }
1470  std::vector<llvm::Value*> func_args{
1471  groups_buffer,
1472  LL_INT(static_cast<int32_t>(query_mem_desc.getEntryCount())),
1473  &*group_key,
1474  &*key_size_lv,
1475  LL_INT(static_cast<int32_t>(key_width))};
1476  std::string func_name{"get_group_value"};
1477  if (query_mem_desc.didOutputColumnar()) {
1478  func_name += "_columnar_slot";
1479  } else {
1480  func_args.push_back(LL_INT(row_size_quad));
1481  }
1482  if (co.with_dynamic_watchdog) {
1483  func_name += "_with_watchdog";
1484  }
1485  if (query_mem_desc.didOutputColumnar()) {
1486  return std::make_tuple(groups_buffer, emitCall(func_name, func_args));
1487  } else {
1488  return std::make_tuple(emitCall(func_name, func_args), nullptr);
1489  }
1490 }
#define LL_BUILDER
#define LL_CONTEXT
#define LL_INT(v)
llvm::Value * emitCall(const std::string &fname, const std::vector< llvm::Value * > &args)
#define AUTOMATIC_IR_METADATA(CGENSTATE)
#define CHECK(condition)
Definition: Logger.h:289

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::tuple< llvm::Value *, llvm::Value * > GroupByAndAggregate::codegenMultiColumnPerfectHash ( llvm::Value *  groups_buffer,
llvm::Value *  group_key,
llvm::Value *  key_size_lv,
const QueryMemoryDescriptor query_mem_desc,
const int32_t  row_size_quad 
)
private

Definition at line 1412 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, CHECK, codegenPerfectHashFunction(), QueryMemoryDescriptor::didOutputColumnar(), emitCall(), executor_, get_int_type(), QueryMemoryDescriptor::getEntryCount(), QueryMemoryDescriptor::getQueryDescriptionType(), GroupByPerfectHash, QueryMemoryDescriptor::hasKeylessHash(), LL_BUILDER, LL_CONTEXT, and LL_INT.

Referenced by codegenGroupBy().

1417  {
1418  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1419  CHECK(query_mem_desc.getQueryDescriptionType() ==
1421  // compute the index (perfect hash)
1422  auto perfect_hash_func = codegenPerfectHashFunction();
1423  auto hash_lv =
1424  LL_BUILDER.CreateCall(perfect_hash_func, std::vector<llvm::Value*>{group_key});
1425 
1426  if (query_mem_desc.didOutputColumnar()) {
1427  if (!query_mem_desc.hasKeylessHash()) {
1428  const std::string set_matching_func_name{
1429  "set_matching_group_value_perfect_hash_columnar"};
1430  const std::vector<llvm::Value*> set_matching_func_arg{
1431  groups_buffer,
1432  hash_lv,
1433  group_key,
1434  key_size_lv,
1435  llvm::ConstantInt::get(get_int_type(32, LL_CONTEXT),
1436  query_mem_desc.getEntryCount())};
1437  emitCall(set_matching_func_name, set_matching_func_arg);
1438  }
1439  return std::make_tuple(groups_buffer, hash_lv);
1440  } else {
1441  if (query_mem_desc.hasKeylessHash()) {
1442  return std::make_tuple(emitCall("get_matching_group_value_perfect_hash_keyless",
1443  {groups_buffer, hash_lv, LL_INT(row_size_quad)}),
1444  nullptr);
1445  } else {
1446  return std::make_tuple(
1447  emitCall(
1448  "get_matching_group_value_perfect_hash",
1449  {groups_buffer, hash_lv, group_key, key_size_lv, LL_INT(row_size_quad)}),
1450  nullptr);
1451  }
1452  }
1453 }
#define LL_BUILDER
#define LL_CONTEXT
#define LL_INT(v)
llvm::Value * emitCall(const std::string &fname, const std::vector< llvm::Value * > &args)
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
#define AUTOMATIC_IR_METADATA(CGENSTATE)
QueryDescriptionType getQueryDescriptionType() const
llvm::Function * codegenPerfectHashFunction()
#define CHECK(condition)
Definition: Logger.h:289

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

llvm::Value * GroupByAndAggregate::codegenOutputSlot ( llvm::Value *  groups_buffer,
const QueryMemoryDescriptor query_mem_desc,
const CompilationOptions co,
DiamondCodegen diamond_codegen 
)
private

Definition at line 1137 of file GroupByAndAggregate.cpp.

References run_benchmark_import::args, AUTOMATIC_IR_METADATA, CHECK, CHECK_EQ, CHECK_GE, CHECK_LT, CodeGenerator::codegen(), QueryMemoryDescriptor::didOutputColumnar(), emitCall(), executor_, get_arg_by_name(), get_heap_key_slot_index(), QueryMemoryDescriptor::getColOffInBytes(), QueryMemoryDescriptor::getPaddedSlotWidthBytes(), QueryMemoryDescriptor::getQueryDescriptionType(), QueryMemoryDescriptor::getRowSize(), RelAlgExecutionUnit::groupby_exprs, inline_fp_null_val(), inline_int_null_val(), SortInfo::limit, LL_BOOL, LL_BUILDER, LL_FP, LL_INT, anonymous_namespace{Utm.h}::n, SortInfo::offset, SortInfo::order_entries, CodeGenerator::posArg(), Projection, ra_exe_unit_, ROW_FUNC, RelAlgExecutionUnit::sort_info, RelAlgExecutionUnit::target_exprs, to_string(), and QueryMemoryDescriptor::useStreamingTopN().

Referenced by codegenGroupBy(), and codegenWindowRowPointer().

1141  {
1142  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1144  CHECK_EQ(size_t(1), ra_exe_unit_.groupby_exprs.size());
1145  const auto group_expr = ra_exe_unit_.groupby_exprs.front();
1146  CHECK(!group_expr);
1147  if (!query_mem_desc.didOutputColumnar()) {
1148  CHECK_EQ(size_t(0), query_mem_desc.getRowSize() % sizeof(int64_t));
1149  }
1150  const int32_t row_size_quad = query_mem_desc.didOutputColumnar()
1151  ? 0
1152  : query_mem_desc.getRowSize() / sizeof(int64_t);
1153  CodeGenerator code_generator(executor_);
1154  if (query_mem_desc.useStreamingTopN()) {
1155  const auto& only_order_entry = ra_exe_unit_.sort_info.order_entries.front();
1156  CHECK_GE(only_order_entry.tle_no, int(1));
1157  const size_t target_idx = only_order_entry.tle_no - 1;
1158  CHECK_LT(target_idx, ra_exe_unit_.target_exprs.size());
1159  const auto order_entry_expr = ra_exe_unit_.target_exprs[target_idx];
1160  const auto chosen_bytes =
1161  static_cast<size_t>(query_mem_desc.getPaddedSlotWidthBytes(target_idx));
1162  auto order_entry_lv = executor_->cgen_state_->castToTypeIn(
1163  code_generator.codegen(order_entry_expr, true, co).front(), chosen_bytes * 8);
1165  std::string fname = "get_bin_from_k_heap";
1166  const auto& oe_ti = order_entry_expr->get_type_info();
1167  llvm::Value* null_key_lv = nullptr;
1168  if (oe_ti.is_integer() || oe_ti.is_decimal() || oe_ti.is_time()) {
1169  const size_t bit_width = order_entry_lv->getType()->getIntegerBitWidth();
1170  switch (bit_width) {
1171  case 32:
1172  null_key_lv = LL_INT(static_cast<int32_t>(inline_int_null_val(oe_ti)));
1173  break;
1174  case 64:
1175  null_key_lv = LL_INT(static_cast<int64_t>(inline_int_null_val(oe_ti)));
1176  break;
1177  default:
1178  CHECK(false);
1179  }
1180  fname += "_int" + std::to_string(bit_width) + "_t";
1181  } else {
1182  CHECK(oe_ti.is_fp());
1183  if (order_entry_lv->getType()->isDoubleTy()) {
1184  null_key_lv = LL_FP(static_cast<double>(inline_fp_null_val(oe_ti)));
1185  } else {
1186  null_key_lv = LL_FP(static_cast<float>(inline_fp_null_val(oe_ti)));
1187  }
1188  fname += order_entry_lv->getType()->isDoubleTy() ? "_double" : "_float";
1189  }
1190  const auto key_slot_idx =
1192  return emitCall(
1193  fname,
1194  {groups_buffer,
1195  LL_INT(n),
1196  LL_INT(row_size_quad),
1197  LL_INT(static_cast<uint32_t>(query_mem_desc.getColOffInBytes(key_slot_idx))),
1198  LL_BOOL(only_order_entry.is_desc),
1199  LL_BOOL(!order_entry_expr->get_type_info().get_notnull()),
1200  LL_BOOL(only_order_entry.nulls_first),
1201  null_key_lv,
1202  order_entry_lv});
1203  } else {
1204  auto* arg = get_arg_by_name(ROW_FUNC, "max_matched");
1205  const auto output_buffer_entry_count_lv =
1206  LL_BUILDER.CreateLoad(arg->getType()->getPointerElementType(), arg);
1207  arg = get_arg_by_name(ROW_FUNC, "old_total_matched");
1208  const auto group_expr_lv =
1209  LL_BUILDER.CreateLoad(arg->getType()->getPointerElementType(), arg);
1210  std::vector<llvm::Value*> args{groups_buffer,
1211  output_buffer_entry_count_lv,
1212  group_expr_lv,
1213  code_generator.posArg(nullptr)};
1214  if (query_mem_desc.didOutputColumnar()) {
1215  const auto columnar_output_offset =
1216  emitCall("get_columnar_scan_output_offset", args);
1217  return columnar_output_offset;
1218  }
1219  args.push_back(LL_INT(row_size_quad));
1220  return emitCall("get_scan_output_slot", args);
1221  }
1222 }
std::vector< Analyzer::Expr * > target_exprs
#define CHECK_EQ(x, y)
Definition: Logger.h:297
#define ROW_FUNC
#define LL_BUILDER
const std::list< Analyzer::OrderEntry > order_entries
#define LL_INT(v)
#define CHECK_GE(x, y)
Definition: Logger.h:302
llvm::Value * emitCall(const std::string &fname, const std::vector< llvm::Value * > &args)
size_t get_heap_key_slot_index(const std::vector< Analyzer::Expr * > &target_exprs, const size_t target_idx)
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
double inline_fp_null_val(const SQL_TYPE_INFO &ti)
std::string to_string(char const *&&v)
#define LL_BOOL(v)
const size_t limit
llvm::Value * get_arg_by_name(llvm::Function *func, const std::string &name)
Definition: Execute.h:166
#define LL_FP(v)
const int8_t getPaddedSlotWidthBytes(const size_t slot_idx) const
#define AUTOMATIC_IR_METADATA(CGENSTATE)
QueryDescriptionType getQueryDescriptionType() const
#define CHECK_LT(x, y)
Definition: Logger.h:299
#define CHECK(condition)
Definition: Logger.h:289
int64_t inline_int_null_val(const SQL_TYPE_INFO &ti)
constexpr double n
Definition: Utm.h:38
const RelAlgExecutionUnit & ra_exe_unit_
const size_t offset
size_t getColOffInBytes(const size_t col_idx) const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

llvm::Function * GroupByAndAggregate::codegenPerfectHashFunction ( )
private

Definition at line 1492 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, CHECK, CHECK_GT, executor_, anonymous_namespace{GroupByAndAggregate.cpp}::get_expr_range_info(), get_int_type(), getBucketedCardinality(), RelAlgExecutionUnit::groupby_exprs, GroupByPerfectHash, LL_CONTEXT, LL_INT, mark_function_always_inline(), query_infos_, and ra_exe_unit_.

Referenced by codegenMultiColumnPerfectHash().

1492  {
1493  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1494  CHECK_GT(ra_exe_unit_.groupby_exprs.size(), size_t(1));
1495  auto ft = llvm::FunctionType::get(
1496  get_int_type(32, LL_CONTEXT),
1497  std::vector<llvm::Type*>{llvm::PointerType::get(get_int_type(64, LL_CONTEXT), 0)},
1498  false);
1499  auto key_hash_func = llvm::Function::Create(ft,
1500  llvm::Function::ExternalLinkage,
1501  "perfect_key_hash",
1502  executor_->cgen_state_->module_);
1503  executor_->cgen_state_->helper_functions_.push_back(key_hash_func);
1504  mark_function_always_inline(key_hash_func);
1505  auto& key_buff_arg = *key_hash_func->args().begin();
1506  llvm::Value* key_buff_lv = &key_buff_arg;
1507  auto bb = llvm::BasicBlock::Create(LL_CONTEXT, "entry", key_hash_func);
1508  llvm::IRBuilder<> key_hash_func_builder(bb);
1509  llvm::Value* hash_lv{llvm::ConstantInt::get(get_int_type(64, LL_CONTEXT), 0)};
1510  std::vector<int64_t> cardinalities;
1511  for (const auto& groupby_expr : ra_exe_unit_.groupby_exprs) {
1512  auto col_range_info =
1513  get_expr_range_info(ra_exe_unit_, query_infos_, groupby_expr.get(), executor_);
1514  CHECK(col_range_info.hash_type_ == QueryDescriptionType::GroupByPerfectHash);
1515  cardinalities.push_back(getBucketedCardinality(col_range_info));
1516  }
1517  size_t dim_idx = 0;
1518  for (const auto& groupby_expr : ra_exe_unit_.groupby_exprs) {
1519  auto* gep = key_hash_func_builder.CreateGEP(
1520  key_buff_lv->getType()->getScalarType()->getPointerElementType(),
1521  key_buff_lv,
1522  LL_INT(dim_idx));
1523  auto key_comp_lv =
1524  key_hash_func_builder.CreateLoad(gep->getType()->getPointerElementType(), gep);
1525  auto col_range_info =
1526  get_expr_range_info(ra_exe_unit_, query_infos_, groupby_expr.get(), executor_);
1527  auto crt_term_lv =
1528  key_hash_func_builder.CreateSub(key_comp_lv, LL_INT(col_range_info.min));
1529  if (col_range_info.bucket) {
1530  crt_term_lv =
1531  key_hash_func_builder.CreateSDiv(crt_term_lv, LL_INT(col_range_info.bucket));
1532  }
1533  for (size_t prev_dim_idx = 0; prev_dim_idx < dim_idx; ++prev_dim_idx) {
1534  crt_term_lv = key_hash_func_builder.CreateMul(crt_term_lv,
1535  LL_INT(cardinalities[prev_dim_idx]));
1536  }
1537  hash_lv = key_hash_func_builder.CreateAdd(hash_lv, crt_term_lv);
1538  ++dim_idx;
1539  }
1540  key_hash_func_builder.CreateRet(
1541  key_hash_func_builder.CreateTrunc(hash_lv, get_int_type(32, LL_CONTEXT)));
1542  return key_hash_func;
1543 }
static int64_t getBucketedCardinality(const ColRangeInfo &col_range_info)
ColRangeInfo get_expr_range_info(const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, const Analyzer::Expr *expr, Executor *executor)
#define LL_CONTEXT
void mark_function_always_inline(llvm::Function *func)
#define LL_INT(v)
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
#define CHECK_GT(x, y)
Definition: Logger.h:301
#define AUTOMATIC_IR_METADATA(CGENSTATE)
const std::vector< InputTableInfo > & query_infos_
#define CHECK(condition)
Definition: Logger.h:289
const RelAlgExecutionUnit & ra_exe_unit_

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::tuple< llvm::Value *, llvm::Value * > GroupByAndAggregate::codegenSingleColumnPerfectHash ( const QueryMemoryDescriptor query_mem_desc,
const CompilationOptions co,
llvm::Value *  groups_buffer,
llvm::Value *  group_expr_lv_translated,
llvm::Value *  group_expr_lv_original,
const int32_t  row_size_quad 
)
private

Definition at line 1362 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, CHECK, CompilationOptions::device_type, QueryMemoryDescriptor::didOutputColumnar(), emitCall(), executor_, QueryMemoryDescriptor::getBucket(), QueryMemoryDescriptor::getMinVal(), QueryMemoryDescriptor::hasKeylessHash(), QueryMemoryDescriptor::interleavedBins(), LL_INT, QueryMemoryDescriptor::mustUseBaselineSort(), and QueryMemoryDescriptor::usesGetGroupValueFast().

Referenced by codegenGroupBy().

1368  {
1369  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1370  CHECK(query_mem_desc.usesGetGroupValueFast());
1371  std::string get_group_fn_name{query_mem_desc.didOutputColumnar()
1372  ? "get_columnar_group_bin_offset"
1373  : "get_group_value_fast"};
1374  if (!query_mem_desc.didOutputColumnar() && query_mem_desc.hasKeylessHash()) {
1375  get_group_fn_name += "_keyless";
1376  }
1377  if (query_mem_desc.interleavedBins(co.device_type)) {
1378  CHECK(!query_mem_desc.didOutputColumnar());
1379  CHECK(query_mem_desc.hasKeylessHash());
1380  get_group_fn_name += "_semiprivate";
1381  }
1382  std::vector<llvm::Value*> get_group_fn_args{&*groups_buffer,
1383  &*group_expr_lv_translated};
1384  if (group_expr_lv_original && get_group_fn_name == "get_group_value_fast" &&
1385  query_mem_desc.mustUseBaselineSort()) {
1386  get_group_fn_name += "_with_original_key";
1387  get_group_fn_args.push_back(group_expr_lv_original);
1388  }
1389  get_group_fn_args.push_back(LL_INT(query_mem_desc.getMinVal()));
1390  get_group_fn_args.push_back(LL_INT(query_mem_desc.getBucket()));
1391  if (!query_mem_desc.hasKeylessHash()) {
1392  if (!query_mem_desc.didOutputColumnar()) {
1393  get_group_fn_args.push_back(LL_INT(row_size_quad));
1394  }
1395  } else {
1396  if (!query_mem_desc.didOutputColumnar()) {
1397  get_group_fn_args.push_back(LL_INT(row_size_quad));
1398  }
1399  if (query_mem_desc.interleavedBins(co.device_type)) {
1400  auto warp_idx = emitCall("thread_warp_idx", {LL_INT(executor_->warpSize())});
1401  get_group_fn_args.push_back(warp_idx);
1402  get_group_fn_args.push_back(LL_INT(executor_->warpSize()));
1403  }
1404  }
1405  if (get_group_fn_name == "get_columnar_group_bin_offset") {
1406  return std::make_tuple(&*groups_buffer,
1407  emitCall(get_group_fn_name, get_group_fn_args));
1408  }
1409  return std::make_tuple(emitCall(get_group_fn_name, get_group_fn_args), nullptr);
1410 }
#define LL_INT(v)
llvm::Value * emitCall(const std::string &fname, const std::vector< llvm::Value * > &args)
#define AUTOMATIC_IR_METADATA(CGENSTATE)
ExecutorDeviceType device_type
bool interleavedBins(const ExecutorDeviceType) const
#define CHECK(condition)
Definition: Logger.h:289

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

llvm::Value * GroupByAndAggregate::codegenVarlenOutputBuffer ( const QueryMemoryDescriptor query_mem_desc)
private

Definition at line 1347 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, CHECK, executor_, QueryMemoryDescriptor::hasVarlenOutput(), LL_CONTEXT, and ROW_FUNC.

Referenced by codegen().

1348  {
1349  if (!query_mem_desc.hasVarlenOutput()) {
1350  return nullptr;
1351  }
1352 
1353  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1354  auto arg_it = ROW_FUNC->arg_begin();
1355  arg_it++; /* groups_buffer */
1356  auto varlen_output_buffer = arg_it++;
1357  CHECK(varlen_output_buffer->getType() == llvm::Type::getInt64PtrTy(LL_CONTEXT));
1358  return varlen_output_buffer;
1359 }
#define ROW_FUNC
#define LL_CONTEXT
#define AUTOMATIC_IR_METADATA(CGENSTATE)
#define CHECK(condition)
Definition: Logger.h:289

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

llvm::Value * GroupByAndAggregate::codegenWindowRowPointer ( const Analyzer::WindowFunction window_func,
const QueryMemoryDescriptor query_mem_desc,
const CompilationOptions co,
DiamondCodegen diamond_codegen 
)
private

Definition at line 1596 of file GroupByAndAggregate.cpp.

References run_benchmark_import::args, AUTOMATIC_IR_METADATA, codegenOutputSlot(), CodeGenerator::codegenWindowPosition(), QueryMemoryDescriptor::didOutputColumnar(), emitCall(), executor_, get_int_type(), WindowProjectNodeContext::getActiveWindowFunctionContext(), QueryMemoryDescriptor::getEntryCount(), Analyzer::WindowFunction::getKind(), QueryMemoryDescriptor::getRowSize(), LL_BUILDER, LL_CONTEXT, LL_INT, CodeGenerator::posArg(), ROW_FUNC, and window_function_is_aggregate().

Referenced by TargetExprCodegen::codegen().

1600  {
1601  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1602  const auto window_func_context =
1604  if (window_func_context && window_function_is_aggregate(window_func->getKind())) {
1605  const int32_t row_size_quad = query_mem_desc.didOutputColumnar()
1606  ? 0
1607  : query_mem_desc.getRowSize() / sizeof(int64_t);
1608  auto arg_it = ROW_FUNC->arg_begin();
1609  auto groups_buffer = arg_it++;
1610  CodeGenerator code_generator(executor_);
1611  auto window_pos_lv = code_generator.codegenWindowPosition(
1612  window_func_context, code_generator.posArg(nullptr));
1613  const auto pos_in_window =
1614  LL_BUILDER.CreateTrunc(window_pos_lv, get_int_type(32, LL_CONTEXT));
1615  llvm::Value* entry_count_lv =
1616  LL_INT(static_cast<int32_t>(query_mem_desc.getEntryCount()));
1617  std::vector<llvm::Value*> args{
1618  &*groups_buffer, entry_count_lv, pos_in_window, code_generator.posArg(nullptr)};
1619  if (query_mem_desc.didOutputColumnar()) {
1620  const auto columnar_output_offset =
1621  emitCall("get_columnar_scan_output_offset", args);
1622  return LL_BUILDER.CreateSExt(columnar_output_offset, get_int_type(64, LL_CONTEXT));
1623  }
1624  args.push_back(LL_INT(row_size_quad));
1625  return emitCall("get_scan_output_slot", args);
1626  }
1627  auto arg_it = ROW_FUNC->arg_begin();
1628  auto groups_buffer = arg_it++;
1629  return codegenOutputSlot(&*groups_buffer, query_mem_desc, co, diamond_codegen);
1630 }
SqlWindowFunctionKind getKind() const
Definition: Analyzer.h:2570
#define ROW_FUNC
#define LL_BUILDER
#define LL_CONTEXT
#define LL_INT(v)
llvm::Value * emitCall(const std::string &fname, const std::vector< llvm::Value * > &args)
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
static WindowFunctionContext * getActiveWindowFunctionContext(Executor *executor)
#define AUTOMATIC_IR_METADATA(CGENSTATE)
bool window_function_is_aggregate(const SqlWindowFunctionKind kind)
Definition: WindowContext.h:43
llvm::Value * codegenOutputSlot(llvm::Value *groups_buffer, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, DiamondCodegen &diamond_codegen)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

llvm::Value * GroupByAndAggregate::convertNullIfAny ( const SQLTypeInfo arg_type,
const TargetInfo agg_info,
llvm::Value *  target 
)
private

Definition at line 1545 of file GroupByAndAggregate.cpp.

References TargetInfo::agg_kind, AUTOMATIC_IR_METADATA, CHECK, executor_, SQLTypeInfo::get_size(), SQLTypeInfo::is_fp(), kAPPROX_COUNT_DISTINCT, kCOUNT, LL_BUILDER, and TargetInfo::sql_type.

Referenced by TargetExprCodegen::codegenAggregate().

1547  {
1548  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1549  const auto& agg_type = agg_info.sql_type;
1550  const size_t chosen_bytes = agg_type.get_size();
1551 
1552  bool need_conversion{false};
1553  llvm::Value* arg_null{nullptr};
1554  llvm::Value* agg_null{nullptr};
1555  llvm::Value* target_to_cast{target};
1556  if (arg_type.is_fp()) {
1557  arg_null = executor_->cgen_state_->inlineFpNull(arg_type);
1558  if (agg_type.is_fp()) {
1559  agg_null = executor_->cgen_state_->inlineFpNull(agg_type);
1560  if (!static_cast<llvm::ConstantFP*>(arg_null)->isExactlyValue(
1561  static_cast<llvm::ConstantFP*>(agg_null)->getValueAPF())) {
1562  need_conversion = true;
1563  }
1564  } else {
1565  CHECK(agg_info.agg_kind == kCOUNT || agg_info.agg_kind == kAPPROX_COUNT_DISTINCT);
1566  return target;
1567  }
1568  } else {
1569  arg_null = executor_->cgen_state_->inlineIntNull(arg_type);
1570  if (agg_type.is_fp()) {
1571  agg_null = executor_->cgen_state_->inlineFpNull(agg_type);
1572  need_conversion = true;
1573  target_to_cast = executor_->castToFP(target, arg_type, agg_type);
1574  } else {
1575  agg_null = executor_->cgen_state_->inlineIntNull(agg_type);
1576  if ((static_cast<llvm::ConstantInt*>(arg_null)->getBitWidth() !=
1577  static_cast<llvm::ConstantInt*>(agg_null)->getBitWidth()) ||
1578  (static_cast<llvm::ConstantInt*>(arg_null)->getValue() !=
1579  static_cast<llvm::ConstantInt*>(agg_null)->getValue())) {
1580  need_conversion = true;
1581  }
1582  }
1583  }
1584  if (need_conversion) {
1585  auto cmp = arg_type.is_fp() ? LL_BUILDER.CreateFCmpOEQ(target, arg_null)
1586  : LL_BUILDER.CreateICmpEQ(target, arg_null);
1587  return LL_BUILDER.CreateSelect(
1588  cmp,
1589  agg_null,
1590  executor_->cgen_state_->castToTypeIn(target_to_cast, chosen_bytes << 3));
1591  } else {
1592  return target;
1593  }
1594 }
HOST DEVICE int get_size() const
Definition: sqltypes.h:390
#define LL_BUILDER
SQLTypeInfo sql_type
Definition: TargetInfo.h:52
bool is_fp() const
Definition: sqltypes.h:580
#define AUTOMATIC_IR_METADATA(CGENSTATE)
SQLAgg agg_kind
Definition: TargetInfo.h:51
Definition: sqldefs.h:78
#define CHECK(condition)
Definition: Logger.h:289

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

llvm::Value * GroupByAndAggregate::emitCall ( const std::string &  fname,
const std::vector< llvm::Value * > &  args 
)
private

Definition at line 2179 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, and executor_.

Referenced by TargetExprCodegen::codegen(), TargetExprCodegen::codegenAggregate(), codegenCountDistinct(), codegenEstimator(), codegenMultiColumnBaselineHash(), codegenMultiColumnPerfectHash(), codegenOutputSlot(), codegenSingleColumnPerfectHash(), and codegenWindowRowPointer().

2180  {
2181  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
2182  return executor_->cgen_state_->emitCall(fname, args);
2183 }
#define AUTOMATIC_IR_METADATA(CGENSTATE)

+ Here is the caller graph for this function:

llvm::Value * GroupByAndAggregate::getAdditionalLiteral ( const int32_t  off)
private

Definition at line 1987 of file GroupByAndAggregate.cpp.

References shared::bit_cast(), CHECK_LT, get_arg_by_name(), get_int_type(), LL_BUILDER, LL_CONTEXT, LL_INT, and ROW_FUNC.

Referenced by codegenCountDistinct().

1987  {
1988  CHECK_LT(off, 0);
1989  const auto lit_buff_lv = get_arg_by_name(ROW_FUNC, "literals");
1990  auto* bit_cast = LL_BUILDER.CreateBitCast(
1991  lit_buff_lv, llvm::PointerType::get(get_int_type(64, LL_CONTEXT), 0));
1992  auto* gep =
1993  LL_BUILDER.CreateGEP(bit_cast->getType()->getScalarType()->getPointerElementType(),
1994  bit_cast,
1995  LL_INT(off));
1996  return LL_BUILDER.CreateLoad(gep->getType()->getPointerElementType(), gep);
1997 }
#define ROW_FUNC
#define LL_BUILDER
#define LL_CONTEXT
#define LL_INT(v)
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
llvm::Value * get_arg_by_name(llvm::Function *func, const std::string &name)
Definition: Execute.h:166
#define CHECK_LT(x, y)
Definition: Logger.h:299
TO bit_cast(FROM &&from)
Definition: misc.h:298

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

int64_t GroupByAndAggregate::getBucketedCardinality ( const ColRangeInfo col_range_info)
staticprivate

Definition at line 336 of file GroupByAndAggregate.cpp.

References ColRangeInfo::bucket, ColRangeInfo::has_nulls, ColRangeInfo::max, and ColRangeInfo::min.

Referenced by codegenPerfectHashFunction(), and getColRangeInfo().

336  {
337  checked_int64_t crt_col_cardinality =
338  checked_int64_t(col_range_info.max) - checked_int64_t(col_range_info.min);
339  if (col_range_info.bucket) {
340  crt_col_cardinality /= col_range_info.bucket;
341  }
342  return static_cast<int64_t>(crt_col_cardinality +
343  (1 + (col_range_info.has_nulls ? 1 : 0)));
344 }
boost::multiprecision::number< boost::multiprecision::cpp_int_backend< 64, 64, boost::multiprecision::signed_magnitude, boost::multiprecision::checked, void >> checked_int64_t

+ Here is the caller graph for this function:

ColRangeInfo GroupByAndAggregate::getColRangeInfo ( )
private

Definition at line 216 of file GroupByAndAggregate.cpp.

References anonymous_namespace{GroupByAndAggregate.cpp}::cardinality_estimate_less_than_column_range(), CHECK, CHECK_GE, device_type_, executor_, anonymous_namespace{GroupByAndAggregate.cpp}::expr_is_rowid(), anonymous_namespace{GroupByAndAggregate.cpp}::get_expr_range_info(), Executor::getBaselineThreshold(), getBucketedCardinality(), group_cardinality_estimation_, RelAlgExecutionUnit::groupby_exprs, GroupByBaselineHash, GroupByPerfectHash, anonymous_namespace{GroupByAndAggregate.cpp}::has_count_distinct(), anonymous_namespace{GroupByAndAggregate.cpp}::is_column_range_too_big_for_perfect_hash(), kENCODING_DICT, SortInfo::order_entries, RelAlgExecutionUnit::quals, query_infos_, ra_exe_unit_, RelAlgExecutionUnit::simple_quals, RelAlgExecutionUnit::sort_info, and RelAlgExecutionUnit::target_exprs.

Referenced by initQueryMemoryDescriptorImpl().

216  {
217  // Use baseline layout more eagerly on the GPU if the query uses count distinct,
218  // because our HyperLogLog implementation is 4x less memory efficient on GPU.
219  // Technically, this only applies to APPROX_COUNT_DISTINCT, but in practice we
220  // can expect this to be true anyway for grouped queries since the precise version
221  // uses significantly more memory.
222  const int64_t baseline_threshold =
224  if (ra_exe_unit_.groupby_exprs.size() != 1) {
225  try {
226  checked_int64_t cardinality{1};
227  bool has_nulls{false};
228  for (const auto& groupby_expr : ra_exe_unit_.groupby_exprs) {
229  auto col_range_info = get_expr_range_info(
230  ra_exe_unit_, query_infos_, groupby_expr.get(), executor_);
231  if (col_range_info.hash_type_ != QueryDescriptionType::GroupByPerfectHash) {
232  // going through baseline hash if a non-integer type is encountered
233  return {QueryDescriptionType::GroupByBaselineHash, 0, 0, 0, false};
234  }
235  auto crt_col_cardinality = getBucketedCardinality(col_range_info);
236  CHECK_GE(crt_col_cardinality, 0);
237  cardinality *= crt_col_cardinality;
238  if (col_range_info.has_nulls) {
239  has_nulls = true;
240  }
241  }
242  // For zero or high cardinalities, use baseline layout.
243  if (!cardinality || cardinality > baseline_threshold) {
244  return {QueryDescriptionType::GroupByBaselineHash, 0, 0, 0, false};
245  }
247  0,
248  int64_t(cardinality),
249  0,
250  has_nulls};
251  } catch (...) { // overflow when computing cardinality
252  return {QueryDescriptionType::GroupByBaselineHash, 0, 0, 0, false};
253  }
254  }
255  // For single column groupby on high timestamps, force baseline hash due to wide ranges
256  // we are likely to encounter when applying quals to the expression range
257  // TODO: consider allowing TIMESTAMP(9) (nanoseconds) with quals to use perfect hash if
258  // the range is small enough
259  if (ra_exe_unit_.groupby_exprs.front() &&
260  ra_exe_unit_.groupby_exprs.front()->get_type_info().is_high_precision_timestamp() &&
261  ra_exe_unit_.simple_quals.size() > 0) {
262  return {QueryDescriptionType::GroupByBaselineHash, 0, 0, 0, false};
263  }
264  const auto col_range_info = get_expr_range_info(
266  if (!ra_exe_unit_.groupby_exprs.front()) {
267  return col_range_info;
268  }
269  static const int64_t MAX_BUFFER_SIZE = 1 << 30;
270  const int64_t col_count =
272  int64_t max_entry_count = MAX_BUFFER_SIZE / (col_count * sizeof(int64_t));
274  max_entry_count = std::min(max_entry_count, baseline_threshold);
275  }
276  const auto& groupby_expr_ti = ra_exe_unit_.groupby_exprs.front()->get_type_info();
277  if (groupby_expr_ti.is_string() && !col_range_info.bucket) {
278  CHECK(groupby_expr_ti.get_compression() == kENCODING_DICT);
279 
280  const bool has_filters =
281  !ra_exe_unit_.quals.empty() || !ra_exe_unit_.simple_quals.empty();
282  if (has_filters &&
283  is_column_range_too_big_for_perfect_hash(col_range_info, max_entry_count)) {
284  // if filters are present, we can use the filter to narrow the cardinality of the
285  // group by in the case of ranges too big for perfect hash. Otherwise, we are better
286  // off attempting perfect hash (since we know the range will be made of
287  // monotonically increasing numbers from min to max for dictionary encoded strings)
288  // and failing later due to excessive memory use.
289  // Check the conditions where baseline hash can provide a performance increase and
290  // return baseline hash (potentially forcing an estimator query) as the range type.
291  // Otherwise, return col_range_info which will likely be perfect hash, though could
292  // be baseline from a previous call of this function prior to the estimator query.
293  if (!ra_exe_unit_.sort_info.order_entries.empty()) {
294  // TODO(adb): allow some sorts to pass through this block by centralizing sort
295  // algorithm decision making
297  is_column_range_too_big_for_perfect_hash(col_range_info, max_entry_count)) {
298  // always use baseline hash for column range too big for perfect hash with count
299  // distinct descriptors. We will need 8GB of CPU memory minimum for the perfect
300  // hash group by in this case.
302  col_range_info.min,
303  col_range_info.max,
304  0,
305  col_range_info.has_nulls};
306  } else {
307  // use original col range for sort
308  return col_range_info;
309  }
310  }
311  // if filters are present and the filtered range is less than the cardinality of
312  // the column, consider baseline hash
315  col_range_info)) {
317  col_range_info.min,
318  col_range_info.max,
319  0,
320  col_range_info.has_nulls};
321  }
322  }
323  } else if ((!expr_is_rowid(ra_exe_unit_.groupby_exprs.front().get(),
324  *executor_->catalog_)) &&
325  is_column_range_too_big_for_perfect_hash(col_range_info, max_entry_count) &&
326  !col_range_info.bucket) {
328  col_range_info.min,
329  col_range_info.max,
330  0,
331  col_range_info.has_nulls};
332  }
333  return col_range_info;
334 }
std::vector< Analyzer::Expr * > target_exprs
static int64_t getBucketedCardinality(const ColRangeInfo &col_range_info)
ColRangeInfo get_expr_range_info(const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, const Analyzer::Expr *expr, Executor *executor)
boost::multiprecision::number< boost::multiprecision::cpp_int_backend< 64, 64, boost::multiprecision::signed_magnitude, boost::multiprecision::checked, void >> checked_int64_t
bool is_column_range_too_big_for_perfect_hash(const ColRangeInfo &col_range_info, const int64_t max_entry_count)
bool expr_is_rowid(const Analyzer::Expr *expr, const Catalog_Namespace::Catalog &cat)
const std::list< Analyzer::OrderEntry > order_entries
#define CHECK_GE(x, y)
Definition: Logger.h:302
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
bool has_count_distinct(const RelAlgExecutionUnit &ra_exe_unit)
const std::vector< InputTableInfo > & query_infos_
const ExecutorDeviceType device_type_
bool cardinality_estimate_less_than_column_range(const int64_t cardinality_estimate, const ColRangeInfo &col_range_info)
const std::optional< int64_t > group_cardinality_estimation_
std::list< std::shared_ptr< Analyzer::Expr > > quals
#define CHECK(condition)
Definition: Logger.h:289
static size_t getBaselineThreshold(bool for_count_distinct, ExecutorDeviceType device_type)
Definition: Execute.h:1277
const RelAlgExecutionUnit & ra_exe_unit_
std::list< std::shared_ptr< Analyzer::Expr > > simple_quals

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

int64_t GroupByAndAggregate::getShardedTopBucket ( const ColRangeInfo col_range_info,
const size_t  shard_count 
) const
private

Definition at line 405 of file GroupByAndAggregate.cpp.

References ColRangeInfo::bucket, CHECK, CHECK_GT, device_type_, executor_, g_leaf_count, and GPU.

Referenced by initQueryMemoryDescriptorImpl().

406  {
407  size_t device_count{0};
409  device_count = executor_->cudaMgr()->getDeviceCount();
410  CHECK_GT(device_count, 0u);
411  }
412 
413  int64_t bucket{col_range_info.bucket};
414 
415  if (shard_count) {
416  CHECK(!col_range_info.bucket);
417  /*
418  when a node has fewer devices than shard count,
419  a) In a distributed setup, the minimum distance between two keys would be
420  device_count because shards are stored consecutively across the physical tables,
421  i.e if a shard column has values 0 to 9, and 3 shards on each leaf, then node 1
422  would have values: 0,1,2,6,7,8 and node 2 would have values: 3,4,5,9. If each leaf
423  node has only 1 device, in this case, all the keys from each node are loaded on
424  the device each.
425 
426  b) In a single node setup, the distance would be minimum of device_count or
427  difference of device_count - shard_count. For example: If a single node server
428  running on 3 devices a shard column has values 0 to 9 in a table with 4 shards,
429  device to fragment keys mapping would be: device 1 - 4,8,3,7 device 2 - 1,5,9
430  device 3 - 2, 6 The bucket value would be 4(shards) - 3(devices) = 1 i.e. minimum
431  of device_count or difference.
432 
433  When a node has device count equal to or more than shard count then the
434  minimum distance is always at least shard_count * no of leaf nodes.
435  */
436  if (device_count < shard_count) {
437  bucket = g_leaf_count ? std::max(device_count, static_cast<size_t>(1))
438  : std::min(device_count, shard_count - device_count);
439  } else {
440  bucket = shard_count * std::max(g_leaf_count, static_cast<size_t>(1));
441  }
442  }
443 
444  return bucket;
445 }
#define CHECK_GT(x, y)
Definition: Logger.h:301
const ExecutorDeviceType device_type_
#define CHECK(condition)
Definition: Logger.h:289
size_t g_leaf_count
Definition: ParserNode.cpp:76

+ Here is the caller graph for this function:

bool GroupByAndAggregate::gpuCanHandleOrderEntries ( const std::list< Analyzer::OrderEntry > &  order_entries)
private

Definition at line 956 of file GroupByAndAggregate.cpp.

References CHECK, CHECK_GE, CHECK_LE, executor_, Analyzer::AggExpr::get_arg(), anonymous_namespace{GroupByAndAggregate.cpp}::get_expr_range_info(), Analyzer::Expr::get_type_info(), GroupByPerfectHash, kAPPROX_COUNT_DISTINCT, kAVG, kMAX, kMIN, query_infos_, ra_exe_unit_, and RelAlgExecutionUnit::target_exprs.

Referenced by initQueryMemoryDescriptor().

957  {
958  if (order_entries.size() > 1) { // TODO(alex): lift this restriction
959  return false;
960  }
961  for (const auto& order_entry : order_entries) {
962  CHECK_GE(order_entry.tle_no, 1);
963  CHECK_LE(static_cast<size_t>(order_entry.tle_no), ra_exe_unit_.target_exprs.size());
964  const auto target_expr = ra_exe_unit_.target_exprs[order_entry.tle_no - 1];
965  if (!dynamic_cast<Analyzer::AggExpr*>(target_expr)) {
966  return false;
967  }
968  // TODO(alex): relax the restrictions
969  auto agg_expr = static_cast<Analyzer::AggExpr*>(target_expr);
970  if (agg_expr->get_is_distinct() || agg_expr->get_aggtype() == kAVG ||
971  agg_expr->get_aggtype() == kMIN || agg_expr->get_aggtype() == kMAX ||
972  agg_expr->get_aggtype() == kAPPROX_COUNT_DISTINCT) {
973  return false;
974  }
975  if (agg_expr->get_arg()) {
976  const auto& arg_ti = agg_expr->get_arg()->get_type_info();
977  if (arg_ti.is_fp()) {
978  return false;
979  }
980  auto expr_range_info =
981  get_expr_range_info(ra_exe_unit_, query_infos_, agg_expr->get_arg(), executor_);
982  // TOD(adb): QMD not actually initialized here?
983  if ((!(expr_range_info.hash_type_ == QueryDescriptionType::GroupByPerfectHash &&
984  /* query_mem_desc.getGroupbyColCount() == 1 */ false) ||
985  expr_range_info.has_nulls) &&
986  order_entry.is_desc == order_entry.nulls_first) {
987  return false;
988  }
989  }
990  const auto& target_ti = target_expr->get_type_info();
991  CHECK(!target_ti.is_buffer());
992  if (!target_ti.is_integer()) {
993  return false;
994  }
995  }
996  return true;
997 }
std::vector< Analyzer::Expr * > target_exprs
ColRangeInfo get_expr_range_info(const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, const Analyzer::Expr *expr, Executor *executor)
#define CHECK_GE(x, y)
Definition: Logger.h:302
Expr * get_arg() const
Definition: Analyzer.h:1204
Definition: sqldefs.h:75
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:83
const std::vector< InputTableInfo > & query_infos_
#define CHECK_LE(x, y)
Definition: Logger.h:300
#define CHECK(condition)
Definition: Logger.h:289
const RelAlgExecutionUnit & ra_exe_unit_
Definition: sqldefs.h:76
Definition: sqldefs.h:74

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::unique_ptr< QueryMemoryDescriptor > GroupByAndAggregate::initQueryMemoryDescriptor ( const bool  allow_multifrag,
const size_t  max_groups_buffer_entry_count,
const int8_t  crt_min_byte_width,
RenderInfo render_info,
const bool  output_columnar_hint 
)
private

Definition at line 831 of file GroupByAndAggregate.cpp.

References align_to_int64(), CHECK, device_type_, executor_, GPU, gpuCanHandleOrderEntries(), initQueryMemoryDescriptorImpl(), SortInfo::order_entries, query_mem_desc, ra_exe_unit_, shard_count_for_top_groups(), and RelAlgExecutionUnit::sort_info.

836  {
837  const auto shard_count =
840  : 0;
841  bool sort_on_gpu_hint =
842  device_type_ == ExecutorDeviceType::GPU && allow_multifrag &&
845  // must_use_baseline_sort is true iff we'd sort on GPU with the old algorithm
846  // but the total output buffer size would be too big or it's a sharded top query.
847  // For the sake of managing risk, use the new result set way very selectively for
848  // this case only (alongside the baseline layout we've enabled for a while now).
849  bool must_use_baseline_sort = shard_count;
850  std::unique_ptr<QueryMemoryDescriptor> query_mem_desc;
851  while (true) {
852  query_mem_desc = initQueryMemoryDescriptorImpl(allow_multifrag,
853  max_groups_buffer_entry_count,
854  crt_min_byte_width,
855  sort_on_gpu_hint,
856  render_info,
857  must_use_baseline_sort,
858  output_columnar_hint);
859  CHECK(query_mem_desc);
860  if (query_mem_desc->sortOnGpu() &&
861  (query_mem_desc->getBufferSizeBytes(device_type_) +
862  align_to_int64(query_mem_desc->getEntryCount() * sizeof(int32_t))) >
863  2 * 1024 * 1024 * 1024LL) {
864  must_use_baseline_sort = true;
865  sort_on_gpu_hint = false;
866  } else {
867  break;
868  }
869  }
870  return query_mem_desc;
871 }
bool gpuCanHandleOrderEntries(const std::list< Analyzer::OrderEntry > &order_entries)
std::unique_ptr< QueryMemoryDescriptor > initQueryMemoryDescriptorImpl(const bool allow_multifrag, const size_t max_groups_buffer_entry_count, const int8_t crt_min_byte_width, const bool sort_on_gpu_hint, RenderInfo *render_info, const bool must_use_baseline_sort, const bool output_columnar_hint)
const std::list< Analyzer::OrderEntry > order_entries
const ExecutorDeviceType device_type_
#define CHECK(condition)
Definition: Logger.h:289
const RelAlgExecutionUnit & ra_exe_unit_
FORCE_INLINE HOST DEVICE T align_to_int64(T addr)
static size_t shard_count_for_top_groups(const RelAlgExecutionUnit &ra_exe_unit, const Catalog_Namespace::Catalog &catalog)

+ Here is the call graph for this function:

std::unique_ptr< QueryMemoryDescriptor > GroupByAndAggregate::initQueryMemoryDescriptorImpl ( const bool  allow_multifrag,
const size_t  max_groups_buffer_entry_count,
const int8_t  crt_min_byte_width,
const bool  sort_on_gpu_hint,
RenderInfo render_info,
const bool  must_use_baseline_sort,
const bool  output_columnar_hint 
)
private

Definition at line 873 of file GroupByAndAggregate.cpp.

References device_type_, executor_, g_enable_watchdog, g_watchdog_baseline_max_groups, anonymous_namespace{GroupByAndAggregate.cpp}::get_keyless_info(), getColRangeInfo(), getShardedTopBucket(), GPU, RelAlgExecutionUnit::groupby_exprs, GroupByBaselineHash, GroupByPerfectHash, ColRangeInfo::hash_type_, QueryMemoryDescriptor::init(), anonymous_namespace{GroupByAndAggregate.cpp}::init_count_distinct_descriptors(), LOG, query_infos_, ra_exe_unit_, shard_count_for_top_groups(), and logger::WARNING.

Referenced by initQueryMemoryDescriptor().

880  {
881  const bool is_group_by{!ra_exe_unit_.groupby_exprs.empty()};
882 
883  auto col_range_info_nosharding = getColRangeInfo();
884 
885  const auto shard_count =
888  : 0;
889 
890  const auto col_range_info =
891  ColRangeInfo{col_range_info_nosharding.hash_type_,
892  col_range_info_nosharding.min,
893  col_range_info_nosharding.max,
894  getShardedTopBucket(col_range_info_nosharding, shard_count),
895  col_range_info_nosharding.has_nulls};
896 
897  // Non-grouped aggregates do not support accessing aggregated ranges
898  // Keyless hash is currently only supported with single-column perfect hash
899  const auto keyless_info =
900  !(is_group_by &&
901  col_range_info.hash_type_ == QueryDescriptionType::GroupByPerfectHash)
902  ? KeylessInfo{false, -1}
904 
905  if (g_enable_watchdog &&
906  ((col_range_info.hash_type_ == QueryDescriptionType::GroupByBaselineHash &&
907  max_groups_buffer_entry_count > g_watchdog_baseline_max_groups) ||
908  (col_range_info.hash_type_ == QueryDescriptionType::GroupByPerfectHash &&
909  ra_exe_unit_.groupby_exprs.size() == 1 &&
910  (col_range_info.max - col_range_info.min) /
911  std::max(col_range_info.bucket, int64_t(1)) >
912  130000000))) {
913  throw WatchdogException("Query would use too much memory");
914  }
915 
916  const auto count_distinct_descriptors = init_count_distinct_descriptors(
917  ra_exe_unit_, query_infos_, col_range_info, device_type_, executor_);
918  try {
920  ra_exe_unit_,
921  query_infos_,
922  col_range_info,
923  keyless_info,
924  allow_multifrag,
925  device_type_,
926  crt_min_byte_width,
927  sort_on_gpu_hint,
928  shard_count,
929  max_groups_buffer_entry_count,
930  render_info,
931  count_distinct_descriptors,
932  must_use_baseline_sort,
933  output_columnar_hint,
934  /*streaming_top_n_hint=*/true);
935  } catch (const StreamingTopNOOM& e) {
936  LOG(WARNING) << e.what() << " Disabling Streaming Top N.";
938  ra_exe_unit_,
939  query_infos_,
940  col_range_info,
941  keyless_info,
942  allow_multifrag,
943  device_type_,
944  crt_min_byte_width,
945  sort_on_gpu_hint,
946  shard_count,
947  max_groups_buffer_entry_count,
948  render_info,
949  count_distinct_descriptors,
950  must_use_baseline_sort,
951  output_columnar_hint,
952  /*streaming_top_n_hint=*/false);
953  }
954 }
size_t g_watchdog_baseline_max_groups
KeylessInfo get_keyless_info(const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, const bool is_group_by, Executor *executor)
#define LOG(tag)
Definition: Logger.h:283
ColRangeInfo getColRangeInfo()
QueryDescriptionType hash_type_
static std::unique_ptr< QueryMemoryDescriptor > init(const Executor *executor, const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, const ColRangeInfo &col_range_info, const KeylessInfo &keyless_info, const bool allow_multifrag, const ExecutorDeviceType device_type, const int8_t crt_min_byte_width, const bool sort_on_gpu_hint, const size_t shard_count, const size_t max_groups_buffer_entry_count, RenderInfo *render_info, const CountDistinctDescriptors count_distinct_descriptors, const bool must_use_baseline_sort, const bool output_columnar_hint, const bool streaming_top_n_hint)
CountDistinctDescriptors init_count_distinct_descriptors(const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, const ColRangeInfo &group_by_range_info, const ExecutorDeviceType device_type, Executor *executor)
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
bool g_enable_watchdog
int64_t getShardedTopBucket(const ColRangeInfo &col_range_info, const size_t shard_count) const
const std::vector< InputTableInfo > & query_infos_
const ExecutorDeviceType device_type_
const RelAlgExecutionUnit & ra_exe_unit_
static size_t shard_count_for_top_groups(const RelAlgExecutionUnit &ra_exe_unit, const Catalog_Namespace::Catalog &catalog)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool GroupByAndAggregate::needsUnnestDoublePatch ( llvm::Value const *  val_ptr,
const std::string &  agg_base_name,
const bool  threads_share_memory,
const CompilationOptions co 
) const
private

Definition at line 29 of file MaxwellCodegenPatch.cpp.

References CompilationOptions::device_type, and executor_.

Referenced by TargetExprCodegen::codegenAggregate().

32  {
33  return (executor_->isArchMaxwell(co.device_type) && threads_share_memory &&
34  llvm::isa<llvm::AllocaInst>(val_ptr) &&
35  val_ptr->getType() ==
36  llvm::Type::getDoublePtrTy(executor_->cgen_state_->context_) &&
37  "agg_id" == agg_base_name);
38 }
ExecutorDeviceType device_type

+ Here is the caller graph for this function:

void GroupByAndAggregate::prependForceSync ( )
private

Definition at line 40 of file MaxwellCodegenPatch.cpp.

References executor_.

Referenced by codegen().

40  {
41  executor_->cgen_state_->ir_builder_.CreateCall(
42  executor_->cgen_state_->module_->getFunction("force_sync"));
43 }

+ Here is the caller graph for this function:

size_t GroupByAndAggregate::shard_count_for_top_groups ( const RelAlgExecutionUnit ra_exe_unit,
const Catalog_Namespace::Catalog catalog 
)
static

Definition at line 2202 of file GroupByAndAggregate.cpp.

References Catalog_Namespace::Catalog::getMetadataForTable(), RelAlgExecutionUnit::groupby_exprs, SortInfo::limit, TableDescriptor::nShards, SortInfo::order_entries, and RelAlgExecutionUnit::sort_info.

Referenced by Executor::collectAllDeviceResults(), RelAlgExecutor::executeRelAlgQuerySingleStep(), initQueryMemoryDescriptor(), and initQueryMemoryDescriptorImpl().

2204  {
2205  if (ra_exe_unit.sort_info.order_entries.size() != 1 || !ra_exe_unit.sort_info.limit) {
2206  return 0;
2207  }
2208  for (const auto& group_expr : ra_exe_unit.groupby_exprs) {
2209  const auto grouped_col_expr =
2210  dynamic_cast<const Analyzer::ColumnVar*>(group_expr.get());
2211  if (!grouped_col_expr) {
2212  continue;
2213  }
2214  if (grouped_col_expr->get_table_id() <= 0) {
2215  return 0;
2216  }
2217  const auto td = catalog.getMetadataForTable(grouped_col_expr->get_table_id());
2218  if (td->shardedColumnId == grouped_col_expr->get_column_id()) {
2219  return td->nShards;
2220  }
2221  }
2222  return 0;
2223 }
const std::list< Analyzer::OrderEntry > order_entries
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
const size_t limit
const TableDescriptor * getMetadataForTable(const std::string &tableName, const bool populateFragmenter=true) const
Returns a pointer to a const TableDescriptor struct matching the provided tableName.

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

Friends And Related Function Documentation

friend class CodeGenerator
friend

Definition at line 220 of file GroupByAndAggregate.h.

friend class ExecutionKernel
friend

Definition at line 221 of file GroupByAndAggregate.h.

friend class Executor
friend

Definition at line 218 of file GroupByAndAggregate.h.

friend class QueryMemoryDescriptor
friend

Definition at line 219 of file GroupByAndAggregate.h.

friend struct TargetExprCodegen
friend

Definition at line 222 of file GroupByAndAggregate.h.

friend struct TargetExprCodegenBuilder
friend

Definition at line 223 of file GroupByAndAggregate.h.

Member Data Documentation

const ExecutorDeviceType GroupByAndAggregate::device_type_
private
const std::optional<int64_t> GroupByAndAggregate::group_cardinality_estimation_
private

Definition at line 216 of file GroupByAndAggregate.h.

Referenced by getColRangeInfo().

bool GroupByAndAggregate::output_columnar_
private

Definition at line 213 of file GroupByAndAggregate.h.

const std::vector<InputTableInfo>& GroupByAndAggregate::query_infos_
private
std::shared_ptr<RowSetMemoryOwner> GroupByAndAggregate::row_set_mem_owner_
private

Definition at line 212 of file GroupByAndAggregate.h.


The documentation for this class was generated from the following files: