OmniSciDB  c1a53651b2
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
GroupByAndAggregate Class Reference

#include <GroupByAndAggregate.h>

+ Collaboration diagram for GroupByAndAggregate:

Public Member Functions

 GroupByAndAggregate (Executor *executor, const ExecutorDeviceType device_type, const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner, const std::optional< int64_t > &group_cardinality_estimation)
 
bool codegen (llvm::Value *filter_result, llvm::BasicBlock *sc_false, QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const GpuSharedMemoryContext &gpu_smem_context)
 

Static Public Member Functions

static size_t shard_count_for_top_groups (const RelAlgExecutionUnit &ra_exe_unit)
 

Private Member Functions

bool gpuCanHandleOrderEntries (const std::list< Analyzer::OrderEntry > &order_entries)
 
std::unique_ptr
< QueryMemoryDescriptor
initQueryMemoryDescriptor (const bool allow_multifrag, const size_t max_groups_buffer_entry_count, const int8_t crt_min_byte_width, RenderInfo *render_info, const bool output_columnar_hint)
 
std::unique_ptr
< QueryMemoryDescriptor
initQueryMemoryDescriptorImpl (const bool allow_multifrag, const size_t max_groups_buffer_entry_count, const int8_t crt_min_byte_width, const bool sort_on_gpu_hint, RenderInfo *render_info, const bool must_use_baseline_sort, const bool output_columnar_hint)
 
int64_t getShardedTopBucket (const ColRangeInfo &col_range_info, const size_t shard_count) const
 
llvm::Value * codegenOutputSlot (llvm::Value *groups_buffer, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, DiamondCodegen &diamond_codegen)
 
std::tuple< llvm::Value
*, llvm::Value * > 
codegenGroupBy (const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, DiamondCodegen &codegen)
 
llvm::Value * codegenVarlenOutputBuffer (const QueryMemoryDescriptor &query_mem_desc)
 
std::tuple< llvm::Value
*, llvm::Value * > 
codegenSingleColumnPerfectHash (const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, llvm::Value *groups_buffer, llvm::Value *group_expr_lv_translated, llvm::Value *group_expr_lv_original, const int32_t row_size_quad)
 
std::tuple< llvm::Value
*, llvm::Value * > 
codegenMultiColumnPerfectHash (llvm::Value *groups_buffer, llvm::Value *group_key, llvm::Value *key_size_lv, const QueryMemoryDescriptor &query_mem_desc, const int32_t row_size_quad)
 
llvm::Function * codegenPerfectHashFunction ()
 
std::tuple< llvm::Value
*, llvm::Value * > 
codegenMultiColumnBaselineHash (const CompilationOptions &co, llvm::Value *groups_buffer, llvm::Value *group_key, llvm::Value *key_size_lv, const QueryMemoryDescriptor &query_mem_desc, const size_t key_width, const int32_t row_size_quad)
 
ColRangeInfo getColRangeInfo ()
 
llvm::Value * convertNullIfAny (const SQLTypeInfo &arg_type, const TargetInfo &agg_info, llvm::Value *target)
 
bool codegenAggCalls (const std::tuple< llvm::Value *, llvm::Value * > &agg_out_ptr_w_idx, llvm::Value *varlen_output_buffer, const std::vector< llvm::Value * > &agg_out_vec, QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const GpuSharedMemoryContext &gpu_smem_context, DiamondCodegen &diamond_codegen)
 
llvm::Value * codegenWindowRowPointer (const Analyzer::WindowFunction *window_func, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, DiamondCodegen &diamond_codegen)
 
llvm::Value * codegenAggColumnPtr (llvm::Value *output_buffer_byte_stream, llvm::Value *out_row_idx, const std::tuple< llvm::Value *, llvm::Value * > &agg_out_ptr_w_idx, const QueryMemoryDescriptor &query_mem_desc, const size_t chosen_bytes, const size_t agg_out_off, const size_t target_idx)
 : returns the pointer to where the aggregation should be stored. More...
 
void codegenEstimator (std::stack< llvm::BasicBlock * > &array_loops, DiamondCodegen &diamond_codegen, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &)
 
void codegenCountDistinct (const size_t target_idx, const Analyzer::Expr *target_expr, std::vector< llvm::Value * > &agg_args, const QueryMemoryDescriptor &, const ExecutorDeviceType)
 
void codegenApproxQuantile (const size_t target_idx, const Analyzer::Expr *target_expr, std::vector< llvm::Value * > &agg_args, const QueryMemoryDescriptor &query_mem_desc, const ExecutorDeviceType device_type)
 
void codegenMode (const size_t target_idx, const Analyzer::Expr *target_expr, std::vector< llvm::Value * > &agg_args, const QueryMemoryDescriptor &query_mem_desc, const ExecutorDeviceType device_type)
 
llvm::Value * getAdditionalLiteral (const int32_t off)
 
std::vector< llvm::Value * > codegenAggArg (const Analyzer::Expr *target_expr, const CompilationOptions &co)
 
llvm::Value * emitCall (const std::string &fname, const std::vector< llvm::Value * > &args)
 
void checkErrorCode (llvm::Value *retCode)
 
bool needsUnnestDoublePatch (llvm::Value const *val_ptr, const std::string &agg_base_name, const bool threads_share_memory, const CompilationOptions &co) const
 
void prependForceSync ()
 

Static Private Member Functions

static int64_t getBucketedCardinality (const ColRangeInfo &col_range_info)
 

Private Attributes

Executorexecutor_
 
const RelAlgExecutionUnitra_exe_unit_
 
const std::vector
< InputTableInfo > & 
query_infos_
 
std::shared_ptr
< RowSetMemoryOwner
row_set_mem_owner_
 
bool output_columnar_
 
const ExecutorDeviceType device_type_
 
const std::optional< int64_t > group_cardinality_estimation_
 

Friends

class Executor
 
class QueryMemoryDescriptor
 
class CodeGenerator
 
class ExecutionKernel
 
struct TargetExprCodegen
 
struct TargetExprCodegenBuilder
 

Detailed Description

Definition at line 61 of file GroupByAndAggregate.h.

Constructor & Destructor Documentation

GroupByAndAggregate::GroupByAndAggregate ( Executor executor,
const ExecutorDeviceType  device_type,
const RelAlgExecutionUnit ra_exe_unit,
const std::vector< InputTableInfo > &  query_infos,
std::shared_ptr< RowSetMemoryOwner row_set_mem_owner,
const std::optional< int64_t > &  group_cardinality_estimation 
)

Definition at line 372 of file GroupByAndAggregate.cpp.

References RelAlgExecutionUnit::groupby_exprs, and ra_exe_unit_.

379  : executor_(executor)
380  , ra_exe_unit_(ra_exe_unit)
381  , query_infos_(query_infos)
382  , row_set_mem_owner_(row_set_mem_owner)
383  , device_type_(device_type)
384  , group_cardinality_estimation_(group_cardinality_estimation) {
385  for (const auto& groupby_expr : ra_exe_unit_.groupby_exprs) {
386  if (!groupby_expr) {
387  continue;
388  }
389  const auto& groupby_ti = groupby_expr->get_type_info();
390  if (groupby_ti.is_bytes()) {
391  throw std::runtime_error(
392  "Cannot group by string columns which are not dictionary encoded.");
393  }
394  if (groupby_ti.is_buffer()) {
395  throw std::runtime_error("Group by buffer not supported");
396  }
397  if (groupby_ti.is_geometry()) {
398  throw std::runtime_error("Group by geometry not supported");
399  }
400  }
401 }
std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner_
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
const std::vector< InputTableInfo > & query_infos_
const ExecutorDeviceType device_type_
const std::optional< int64_t > group_cardinality_estimation_
const RelAlgExecutionUnit & ra_exe_unit_

Member Function Documentation

void GroupByAndAggregate::checkErrorCode ( llvm::Value *  retCode)
private

Definition at line 2181 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, and executor_.

Referenced by TargetExprCodegen::codegenAggregate().

2181  {
2182  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
2183  auto zero_const = llvm::ConstantInt::get(retCode->getType(), 0, true);
2184  auto rc_check_condition = executor_->cgen_state_->ir_builder_.CreateICmp(
2185  llvm::ICmpInst::ICMP_EQ, retCode, zero_const);
2186 
2187  executor_->cgen_state_->emitErrorCheck(rc_check_condition, retCode, "rc");
2188 }
#define AUTOMATIC_IR_METADATA(CGENSTATE)

+ Here is the caller graph for this function:

bool GroupByAndAggregate::codegen ( llvm::Value *  filter_result,
llvm::BasicBlock *  sc_false,
QueryMemoryDescriptor query_mem_desc,
const CompilationOptions co,
const GpuSharedMemoryContext gpu_smem_context 
)

Definition at line 995 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, CHECK, codegenAggCalls(), codegenEstimator(), codegenGroupBy(), codegenVarlenOutputBuffer(), DiamondCodegen::cond_false_, CompilationOptions::device_type, QueryMemoryDescriptor::didOutputColumnar(), RelAlgExecutionUnit::estimator, executor_, anonymous_namespace{GroupByAndAggregate.cpp}::get_agg_count(), get_arg_by_name(), get_int_type(), QueryMemoryDescriptor::getGroupbyColCount(), QueryMemoryDescriptor::getQueryDescriptionType(), GPU, RelAlgExecutionUnit::groupby_exprs, GroupByPerfectHash, RelAlgExecutionUnit::join_quals, LL_BUILDER, LL_CONTEXT, LL_INT, LLVM_ALIGN, CodeGenerator::posArg(), prependForceSync(), Projection, query_mem_desc, ra_exe_unit_, ROW_FUNC, RelAlgExecutionUnit::target_exprs, QueryMemoryDescriptor::usesGetGroupValueFast(), and QueryMemoryDescriptor::useStreamingTopN().

999  {
1000  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1001  CHECK(filter_result);
1002 
1003  bool can_return_error = false;
1004  llvm::BasicBlock* filter_false{nullptr};
1005 
1006  {
1007  const bool is_group_by = !ra_exe_unit_.groupby_exprs.empty();
1008 
1009  if (executor_->isArchMaxwell(co.device_type)) {
1010  prependForceSync();
1011  }
1012  DiamondCodegen filter_cfg(filter_result,
1013  executor_,
1014  !is_group_by || query_mem_desc.usesGetGroupValueFast(),
1015  "filter", // filter_true and filter_false basic blocks
1016  nullptr,
1017  false);
1018  filter_false = filter_cfg.cond_false_;
1019 
1020  if (is_group_by) {
1022  !query_mem_desc.useStreamingTopN()) {
1023  const auto crt_matched = get_arg_by_name(ROW_FUNC, "crt_matched");
1024  LL_BUILDER.CreateStore(LL_INT(int32_t(1)), crt_matched);
1025  auto total_matched_ptr = get_arg_by_name(ROW_FUNC, "total_matched");
1026  llvm::Value* old_total_matched_val{nullptr};
1028  old_total_matched_val =
1029  LL_BUILDER.CreateAtomicRMW(llvm::AtomicRMWInst::Add,
1030  total_matched_ptr,
1031  LL_INT(int32_t(1)),
1032 #if LLVM_VERSION_MAJOR > 12
1033  LLVM_ALIGN(8),
1034 #endif
1035  llvm::AtomicOrdering::Monotonic);
1036  } else {
1037  old_total_matched_val = LL_BUILDER.CreateLoad(
1038  total_matched_ptr->getType()->getPointerElementType(), total_matched_ptr);
1039  LL_BUILDER.CreateStore(
1040  LL_BUILDER.CreateAdd(old_total_matched_val, LL_INT(int32_t(1))),
1041  total_matched_ptr);
1042  }
1043  auto old_total_matched_ptr = get_arg_by_name(ROW_FUNC, "old_total_matched");
1044  LL_BUILDER.CreateStore(old_total_matched_val, old_total_matched_ptr);
1045  }
1046 
1047  auto agg_out_ptr_w_idx = codegenGroupBy(query_mem_desc, co, filter_cfg);
1048  auto varlen_output_buffer = codegenVarlenOutputBuffer(query_mem_desc);
1049  if (query_mem_desc.usesGetGroupValueFast() ||
1050  query_mem_desc.getQueryDescriptionType() ==
1052  if (query_mem_desc.getGroupbyColCount() > 1) {
1053  filter_cfg.setChainToNext();
1054  }
1055  // Don't generate null checks if the group slot is guaranteed to be non-null,
1056  // as it's the case for get_group_value_fast* family.
1057  can_return_error = codegenAggCalls(agg_out_ptr_w_idx,
1058  varlen_output_buffer,
1059  {},
1061  co,
1062  gpu_smem_context,
1063  filter_cfg);
1064  } else {
1065  {
1066  llvm::Value* nullcheck_cond{nullptr};
1067  if (query_mem_desc.didOutputColumnar()) {
1068  nullcheck_cond = LL_BUILDER.CreateICmpSGE(std::get<1>(agg_out_ptr_w_idx),
1069  LL_INT(int32_t(0)));
1070  } else {
1071  nullcheck_cond = LL_BUILDER.CreateICmpNE(
1072  std::get<0>(agg_out_ptr_w_idx),
1073  llvm::ConstantPointerNull::get(
1074  llvm::PointerType::get(get_int_type(64, LL_CONTEXT), 0)));
1075  }
1076  DiamondCodegen nullcheck_cfg(
1077  nullcheck_cond, executor_, false, "groupby_nullcheck", &filter_cfg, false);
1078  codegenAggCalls(agg_out_ptr_w_idx,
1079  varlen_output_buffer,
1080  {},
1082  co,
1083  gpu_smem_context,
1084  filter_cfg);
1085  }
1086  can_return_error = true;
1087  if (query_mem_desc.getQueryDescriptionType() ==
1089  query_mem_desc.useStreamingTopN()) {
1090  // Ignore rejection on pushing current row to top-K heap.
1091  LL_BUILDER.CreateRet(LL_INT(int32_t(0)));
1092  } else {
1093  CodeGenerator code_generator(executor_);
1094  LL_BUILDER.CreateRet(LL_BUILDER.CreateNeg(LL_BUILDER.CreateTrunc(
1095  // TODO(alex): remove the trunc once pos is converted to 32 bits
1096  code_generator.posArg(nullptr),
1097  get_int_type(32, LL_CONTEXT))));
1098  }
1099  }
1100  } else {
1101  if (ra_exe_unit_.estimator) {
1102  std::stack<llvm::BasicBlock*> array_loops;
1103  codegenEstimator(array_loops, filter_cfg, query_mem_desc, co);
1104  } else {
1105  auto arg_it = ROW_FUNC->arg_begin();
1106  std::vector<llvm::Value*> agg_out_vec;
1107  for (int32_t i = 0; i < get_agg_count(ra_exe_unit_.target_exprs); ++i) {
1108  agg_out_vec.push_back(&*arg_it++);
1109  }
1110  can_return_error = codegenAggCalls(std::make_tuple(nullptr, nullptr),
1111  /*varlen_output_buffer=*/nullptr,
1112  agg_out_vec,
1113  query_mem_desc,
1114  co,
1115  gpu_smem_context,
1116  filter_cfg);
1117  }
1118  }
1119  }
1120 
1121  if (ra_exe_unit_.join_quals.empty()) {
1122  executor_->cgen_state_->ir_builder_.CreateRet(LL_INT(int32_t(0)));
1123  } else if (sc_false) {
1124  const auto saved_insert_block = LL_BUILDER.GetInsertBlock();
1125  LL_BUILDER.SetInsertPoint(sc_false);
1126  LL_BUILDER.CreateBr(filter_false);
1127  LL_BUILDER.SetInsertPoint(saved_insert_block);
1128  }
1129 
1130  return can_return_error;
1131 }
std::vector< Analyzer::Expr * > target_exprs
#define ROW_FUNC
llvm::BasicBlock * cond_false_
#define LL_BUILDER
#define LL_CONTEXT
#define LL_INT(v)
llvm::Value * codegenVarlenOutputBuffer(const QueryMemoryDescriptor &query_mem_desc)
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
#define LLVM_ALIGN(alignment)
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
llvm::Value * get_arg_by_name(llvm::Function *func, const std::string &name)
Definition: Execute.h:167
size_t getGroupbyColCount() const
const JoinQualsPerNestingLevel join_quals
bool codegenAggCalls(const std::tuple< llvm::Value *, llvm::Value * > &agg_out_ptr_w_idx, llvm::Value *varlen_output_buffer, const std::vector< llvm::Value * > &agg_out_vec, QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const GpuSharedMemoryContext &gpu_smem_context, DiamondCodegen &diamond_codegen)
std::tuple< llvm::Value *, llvm::Value * > codegenGroupBy(const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, DiamondCodegen &codegen)
const std::shared_ptr< Analyzer::Estimator > estimator
#define AUTOMATIC_IR_METADATA(CGENSTATE)
QueryDescriptionType getQueryDescriptionType() const
ExecutorDeviceType device_type
void codegenEstimator(std::stack< llvm::BasicBlock * > &array_loops, DiamondCodegen &diamond_codegen, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &)
int32_t get_agg_count(const std::vector< Analyzer::Expr * > &target_exprs)
#define CHECK(condition)
Definition: Logger.h:291
const RelAlgExecutionUnit & ra_exe_unit_

+ Here is the call graph for this function:

std::vector< llvm::Value * > GroupByAndAggregate::codegenAggArg ( const Analyzer::Expr target_expr,
const CompilationOptions co 
)
private

Definition at line 1995 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, CHECK, CHECK_EQ, CodeGenerator::codegen(), CUR_FUNC, executor_, get_int_type(), Analyzer::Expr::get_type_info(), SQLTypeInfo::is_geometry(), kARRAY, kPOINT, kSAMPLE, LL_BUILDER, LL_CONTEXT, log2_bytes(), and CodeGenerator::posArg().

Referenced by TargetExprCodegen::codegen(), and TargetExprCodegenBuilder::codegenMultiSlotSampleExpressions().

1997  {
1998  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1999  const auto agg_expr = dynamic_cast<const Analyzer::AggExpr*>(target_expr);
2000  const auto func_expr = dynamic_cast<const Analyzer::FunctionOper*>(target_expr);
2001  const auto arr_expr = dynamic_cast<const Analyzer::ArrayExpr*>(target_expr);
2002 
2003  // TODO(alex): handle arrays uniformly?
2004  CodeGenerator code_generator(executor_);
2005  if (target_expr) {
2006  const auto& target_ti = target_expr->get_type_info();
2007  if (target_ti.is_buffer() &&
2008  !executor_->plan_state_->isLazyFetchColumn(target_expr)) {
2009  const auto target_lvs =
2010  agg_expr ? code_generator.codegen(agg_expr->get_arg(), true, co)
2011  : code_generator.codegen(
2012  target_expr, !executor_->plan_state_->allow_lazy_fetch_, co);
2013  if (!func_expr && !arr_expr) {
2014  // Something with the chunk transport is code that was generated from a source
2015  // other than an ARRAY[] expression
2016  if (target_ti.is_bytes()) {
2017  CHECK_EQ(size_t(3), target_lvs.size());
2018  return {target_lvs[1], target_lvs[2]};
2019  }
2020  CHECK(target_ti.is_array());
2021  CHECK_EQ(size_t(1), target_lvs.size());
2022  CHECK(!agg_expr || agg_expr->get_aggtype() == kSAMPLE);
2023  const auto i32_ty = get_int_type(32, executor_->cgen_state_->context_);
2024  const auto i8p_ty =
2025  llvm::PointerType::get(get_int_type(8, executor_->cgen_state_->context_), 0);
2026  const auto& elem_ti = target_ti.get_elem_type();
2027  return {
2028  executor_->cgen_state_->emitExternalCall(
2029  "array_buff",
2030  i8p_ty,
2031  {target_lvs.front(), code_generator.posArg(target_expr)}),
2032  executor_->cgen_state_->emitExternalCall(
2033  "array_size",
2034  i32_ty,
2035  {target_lvs.front(),
2036  code_generator.posArg(target_expr),
2037  executor_->cgen_state_->llInt(log2_bytes(elem_ti.get_logical_size()))})};
2038  } else {
2039  if (agg_expr) {
2040  throw std::runtime_error(
2041  "Using array[] operator as argument to an aggregate operator is not "
2042  "supported");
2043  }
2044  CHECK(func_expr || arr_expr);
2045  if (dynamic_cast<const Analyzer::FunctionOper*>(target_expr)) {
2046  CHECK_EQ(size_t(1), target_lvs.size());
2047  const auto prefix = target_ti.get_buffer_name();
2048  CHECK(target_ti.is_array() || target_ti.is_bytes());
2049  const auto target_lv = LL_BUILDER.CreateLoad(
2050  target_lvs[0]->getType()->getPointerElementType(), target_lvs[0]);
2051  // const auto target_lv_type = target_lvs[0]->getType();
2052  // CHECK(target_lv_type->isStructTy());
2053  // CHECK_EQ(target_lv_type->getNumContainedTypes(), 3u);
2054  const auto i8p_ty = llvm::PointerType::get(
2055  get_int_type(8, executor_->cgen_state_->context_), 0);
2056  const auto ptr = LL_BUILDER.CreatePointerCast(
2057  LL_BUILDER.CreateExtractValue(target_lv, 0), i8p_ty);
2058  const auto size = LL_BUILDER.CreateExtractValue(target_lv, 1);
2059  const auto null_flag = LL_BUILDER.CreateExtractValue(target_lv, 2);
2060  const auto nullcheck_ok_bb =
2061  llvm::BasicBlock::Create(LL_CONTEXT, prefix + "_nullcheck_ok_bb", CUR_FUNC);
2062  const auto nullcheck_fail_bb = llvm::BasicBlock::Create(
2063  LL_CONTEXT, prefix + "_nullcheck_fail_bb", CUR_FUNC);
2064 
2065  // TODO(adb): probably better to zext the bool
2066  const auto nullcheck = LL_BUILDER.CreateICmpEQ(
2067  null_flag, executor_->cgen_state_->llInt(static_cast<int8_t>(1)));
2068  LL_BUILDER.CreateCondBr(nullcheck, nullcheck_fail_bb, nullcheck_ok_bb);
2069 
2070  const auto ret_bb =
2071  llvm::BasicBlock::Create(LL_CONTEXT, prefix + "_return", CUR_FUNC);
2072  LL_BUILDER.SetInsertPoint(ret_bb);
2073  auto result_phi = LL_BUILDER.CreatePHI(i8p_ty, 2, prefix + "_ptr_return");
2074  result_phi->addIncoming(ptr, nullcheck_ok_bb);
2075  const auto null_arr_sentinel = LL_BUILDER.CreateIntToPtr(
2076  executor_->cgen_state_->llInt(static_cast<int8_t>(0)), i8p_ty);
2077  result_phi->addIncoming(null_arr_sentinel, nullcheck_fail_bb);
2078  LL_BUILDER.SetInsertPoint(nullcheck_ok_bb);
2079  executor_->cgen_state_->emitExternalCall(
2080  "register_buffer_with_executor_rsm",
2081  llvm::Type::getVoidTy(executor_->cgen_state_->context_),
2082  {executor_->cgen_state_->llInt(reinterpret_cast<int64_t>(executor_)), ptr});
2083  LL_BUILDER.CreateBr(ret_bb);
2084  LL_BUILDER.SetInsertPoint(nullcheck_fail_bb);
2085  LL_BUILDER.CreateBr(ret_bb);
2086 
2087  LL_BUILDER.SetInsertPoint(ret_bb);
2088  return {result_phi, size};
2089  }
2090  CHECK_EQ(size_t(2), target_lvs.size());
2091  return {target_lvs[0], target_lvs[1]};
2092  }
2093  }
2094  if (target_ti.is_geometry() &&
2095  !executor_->plan_state_->isLazyFetchColumn(target_expr)) {
2096  auto generate_coord_lvs =
2097  [&](auto* selected_target_expr,
2098  bool const fetch_columns) -> std::vector<llvm::Value*> {
2099  const auto target_lvs =
2100  code_generator.codegen(selected_target_expr, fetch_columns, co);
2101  if (dynamic_cast<const Analyzer::GeoOperator*>(target_expr) &&
2102  target_expr->get_type_info().is_geometry()) {
2103  // return a pointer to the temporary alloca
2104  return target_lvs;
2105  }
2106  const auto geo_uoper = dynamic_cast<const Analyzer::GeoUOper*>(target_expr);
2107  const auto geo_binoper = dynamic_cast<const Analyzer::GeoBinOper*>(target_expr);
2108  if (geo_uoper || geo_binoper) {
2109  CHECK(target_expr->get_type_info().is_geometry());
2110  CHECK_EQ(2 * static_cast<size_t>(target_ti.get_physical_coord_cols()),
2111  target_lvs.size());
2112  return target_lvs;
2113  }
2114  CHECK_EQ(static_cast<size_t>(target_ti.get_physical_coord_cols()),
2115  target_lvs.size());
2116 
2117  const auto i32_ty = get_int_type(32, executor_->cgen_state_->context_);
2118  const auto i8p_ty =
2119  llvm::PointerType::get(get_int_type(8, executor_->cgen_state_->context_), 0);
2120  std::vector<llvm::Value*> coords;
2121  size_t ctr = 0;
2122  for (const auto& target_lv : target_lvs) {
2123  // TODO(adb): consider adding a utility to sqltypes so we can get the types of
2124  // the physical coords cols based on the sqltype (e.g. TINYINT for col 0, INT
2125  // for col 1 for pols / mpolys, etc). Hardcoding for now. first array is the
2126  // coords array (TINYINT). Subsequent arrays are regular INT.
2127 
2128  const size_t elem_sz = ctr == 0 ? 1 : 4;
2129  ctr++;
2130  int32_t fixlen = -1;
2131  if (target_ti.get_type() == kPOINT) {
2132  const auto col_var = dynamic_cast<const Analyzer::ColumnVar*>(target_expr);
2133  if (col_var) {
2134  const auto coords_cd = executor_->getPhysicalColumnDescriptor(col_var, 1);
2135  if (coords_cd && coords_cd->columnType.get_type() == kARRAY) {
2136  fixlen = coords_cd->columnType.get_size();
2137  }
2138  }
2139  }
2140  if (fixlen > 0) {
2141  coords.push_back(executor_->cgen_state_->emitExternalCall(
2142  "fast_fixlen_array_buff",
2143  i8p_ty,
2144  {target_lv, code_generator.posArg(selected_target_expr)}));
2145  coords.push_back(executor_->cgen_state_->llInt(int64_t(fixlen)));
2146  continue;
2147  }
2148  coords.push_back(executor_->cgen_state_->emitExternalCall(
2149  "array_buff",
2150  i8p_ty,
2151  {target_lv, code_generator.posArg(selected_target_expr)}));
2152  coords.push_back(executor_->cgen_state_->emitExternalCall(
2153  "array_size",
2154  i32_ty,
2155  {target_lv,
2156  code_generator.posArg(selected_target_expr),
2157  executor_->cgen_state_->llInt(log2_bytes(elem_sz))}));
2158  }
2159  return coords;
2160  };
2161 
2162  if (agg_expr) {
2163  return generate_coord_lvs(agg_expr->get_arg(), true);
2164  } else {
2165  return generate_coord_lvs(target_expr,
2166  !executor_->plan_state_->allow_lazy_fetch_);
2167  }
2168  }
2169  }
2170  return agg_expr ? code_generator.codegen(agg_expr->get_arg(), true, co)
2171  : code_generator.codegen(
2172  target_expr, !executor_->plan_state_->allow_lazy_fetch_, co);
2173 }
#define CHECK_EQ(x, y)
Definition: Logger.h:301
#define LL_BUILDER
#define LL_CONTEXT
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
#define AUTOMATIC_IR_METADATA(CGENSTATE)
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:79
#define CHECK(condition)
Definition: Logger.h:291
bool is_geometry() const
Definition: sqltypes.h:592
#define CUR_FUNC
uint32_t log2_bytes(const uint32_t bytes)
Definition: Execute.h:177

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool GroupByAndAggregate::codegenAggCalls ( const std::tuple< llvm::Value *, llvm::Value * > &  agg_out_ptr_w_idx,
llvm::Value *  varlen_output_buffer,
const std::vector< llvm::Value * > &  agg_out_vec,
QueryMemoryDescriptor query_mem_desc,
const CompilationOptions co,
const GpuSharedMemoryContext gpu_smem_context,
DiamondCodegen diamond_codegen 
)
private

Definition at line 1628 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, CHECK, TargetExprCodegenBuilder::codegen(), QueryMemoryDescriptor::didOutputColumnar(), executor_, g_cluster, QueryMemoryDescriptor::getQueryDescriptionType(), LL_BUILDER, LL_CONTEXT, Projection, ra_exe_unit_, and RelAlgExecutionUnit::target_exprs.

Referenced by codegen().

1635  {
1636  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1637  auto agg_out_ptr_w_idx = agg_out_ptr_w_idx_in;
1638  // TODO(alex): unify the two cases, the output for non-group by queries
1639  // should be a contiguous buffer
1640  const bool is_group_by = std::get<0>(agg_out_ptr_w_idx);
1641  bool can_return_error = false;
1642  if (is_group_by) {
1643  CHECK(agg_out_vec.empty());
1644  } else {
1645  CHECK(!agg_out_vec.empty());
1646  }
1647 
1648  // output buffer is casted into a byte stream to be able to handle data elements of
1649  // different sizes (only used when actual column width sizes are used)
1650  llvm::Value* output_buffer_byte_stream{nullptr};
1651  llvm::Value* out_row_idx{nullptr};
1652  if (query_mem_desc.didOutputColumnar() && !g_cluster &&
1654  output_buffer_byte_stream = LL_BUILDER.CreateBitCast(
1655  std::get<0>(agg_out_ptr_w_idx),
1656  llvm::PointerType::get(llvm::Type::getInt8Ty(LL_CONTEXT), 0));
1657  output_buffer_byte_stream->setName("out_buff_b_stream");
1658  CHECK(std::get<1>(agg_out_ptr_w_idx));
1659  out_row_idx = LL_BUILDER.CreateZExt(std::get<1>(agg_out_ptr_w_idx),
1660  llvm::Type::getInt64Ty(LL_CONTEXT));
1661  out_row_idx->setName("out_row_idx");
1662  }
1663 
1664  TargetExprCodegenBuilder target_builder(ra_exe_unit_, is_group_by);
1665  for (size_t target_idx = 0; target_idx < ra_exe_unit_.target_exprs.size();
1666  ++target_idx) {
1667  auto target_expr = ra_exe_unit_.target_exprs[target_idx];
1668  CHECK(target_expr);
1669 
1670  target_builder(target_expr, executor_, query_mem_desc, co);
1671  }
1672 
1673  target_builder.codegen(this,
1674  executor_,
1675  query_mem_desc,
1676  co,
1677  gpu_smem_context,
1678  agg_out_ptr_w_idx,
1679  agg_out_vec,
1680  output_buffer_byte_stream,
1681  out_row_idx,
1682  varlen_output_buffer,
1683  diamond_codegen);
1684 
1685  for (auto target_expr : ra_exe_unit_.target_exprs) {
1686  CHECK(target_expr);
1687  executor_->plan_state_->isLazyFetchColumn(target_expr);
1688  }
1689 
1690  return can_return_error;
1691 }
std::vector< Analyzer::Expr * > target_exprs
#define LL_BUILDER
#define LL_CONTEXT
#define AUTOMATIC_IR_METADATA(CGENSTATE)
QueryDescriptionType getQueryDescriptionType() const
#define CHECK(condition)
Definition: Logger.h:291
bool g_cluster
const RelAlgExecutionUnit & ra_exe_unit_

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

llvm::Value * GroupByAndAggregate::codegenAggColumnPtr ( llvm::Value *  output_buffer_byte_stream,
llvm::Value *  out_row_idx,
const std::tuple< llvm::Value *, llvm::Value * > &  agg_out_ptr_w_idx,
const QueryMemoryDescriptor query_mem_desc,
const size_t  chosen_bytes,
const size_t  agg_out_off,
const size_t  target_idx 
)
private

: returns the pointer to where the aggregation should be stored.

Definition at line 1696 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, shared::bit_cast(), CHECK, CHECK_EQ, QueryMemoryDescriptor::didOutputColumnar(), executor_, g_cluster, get_int_type(), QueryMemoryDescriptor::getColOffInBytes(), QueryMemoryDescriptor::getColOnlyOffInBytes(), QueryMemoryDescriptor::getQueryDescriptionType(), LL_BUILDER, LL_CONTEXT, LL_INT, Projection, and to_string().

Referenced by TargetExprCodegen::codegenAggregate(), and TargetExprCodegenBuilder::codegenMultiSlotSampleExpressions().

1703  {
1704  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1705  llvm::Value* agg_col_ptr{nullptr};
1706  if (query_mem_desc.didOutputColumnar()) {
1707  // TODO(Saman): remove the second columnar branch, and support all query description
1708  // types through the first branch. Then, input arguments should also be cleaned up
1709  if (!g_cluster &&
1711  CHECK(chosen_bytes == 1 || chosen_bytes == 2 || chosen_bytes == 4 ||
1712  chosen_bytes == 8);
1713  CHECK(output_buffer_byte_stream);
1714  CHECK(out_row_idx);
1715  size_t col_off = query_mem_desc.getColOffInBytes(agg_out_off);
1716  // multiplying by chosen_bytes, i.e., << log2(chosen_bytes)
1717  auto out_per_col_byte_idx =
1718 #ifdef _WIN32
1719  LL_BUILDER.CreateShl(out_row_idx, __lzcnt(chosen_bytes) - 1);
1720 #else
1721  LL_BUILDER.CreateShl(out_row_idx, __builtin_ffs(chosen_bytes) - 1);
1722 #endif
1723  auto byte_offset = LL_BUILDER.CreateAdd(out_per_col_byte_idx,
1724  LL_INT(static_cast<int64_t>(col_off)));
1725  byte_offset->setName("out_byte_off_target_" + std::to_string(target_idx));
1726  auto output_ptr = LL_BUILDER.CreateGEP(
1727  output_buffer_byte_stream->getType()->getScalarType()->getPointerElementType(),
1728  output_buffer_byte_stream,
1729  byte_offset);
1730  agg_col_ptr = LL_BUILDER.CreateBitCast(
1731  output_ptr,
1732  llvm::PointerType::get(get_int_type((chosen_bytes << 3), LL_CONTEXT), 0));
1733  agg_col_ptr->setName("out_ptr_target_" + std::to_string(target_idx));
1734  } else {
1735  auto const col_off_in_bytes = query_mem_desc.getColOffInBytes(agg_out_off);
1736  auto const col_off = col_off_in_bytes / chosen_bytes;
1737  auto const col_rem = col_off_in_bytes % chosen_bytes;
1738  CHECK_EQ(col_rem, 0u) << col_off_in_bytes << " % " << chosen_bytes;
1739  CHECK(std::get<1>(agg_out_ptr_w_idx));
1740  auto* agg_out_idx = LL_BUILDER.CreateZExt(
1741  std::get<1>(agg_out_ptr_w_idx),
1742  get_int_type(8 * sizeof(col_off), executor_->cgen_state_->context_));
1743  auto* offset = LL_BUILDER.CreateAdd(agg_out_idx, LL_INT(col_off));
1744  auto* bit_cast = LL_BUILDER.CreateBitCast(
1745  std::get<0>(agg_out_ptr_w_idx),
1746  llvm::PointerType::get(get_int_type((chosen_bytes << 3), LL_CONTEXT), 0));
1747  agg_col_ptr = LL_BUILDER.CreateGEP(
1748  bit_cast->getType()->getScalarType()->getPointerElementType(),
1749  bit_cast,
1750  offset);
1751  }
1752  } else {
1753  auto const col_off_in_bytes = query_mem_desc.getColOnlyOffInBytes(agg_out_off);
1754  auto const col_off = col_off_in_bytes / chosen_bytes;
1755  auto const col_rem = col_off_in_bytes % chosen_bytes;
1756  CHECK_EQ(col_rem, 0u) << col_off_in_bytes << " % " << chosen_bytes;
1757  auto* bit_cast = LL_BUILDER.CreateBitCast(
1758  std::get<0>(agg_out_ptr_w_idx),
1759  llvm::PointerType::get(get_int_type((chosen_bytes << 3), LL_CONTEXT), 0));
1760  agg_col_ptr = LL_BUILDER.CreateGEP(
1761  bit_cast->getType()->getScalarType()->getPointerElementType(),
1762  bit_cast,
1763  LL_INT(col_off));
1764  }
1765  CHECK(agg_col_ptr);
1766  return agg_col_ptr;
1767 }
#define CHECK_EQ(x, y)
Definition: Logger.h:301
#define LL_BUILDER
#define LL_CONTEXT
#define LL_INT(v)
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
std::string to_string(char const *&&v)
size_t getColOnlyOffInBytes(const size_t col_idx) const
#define AUTOMATIC_IR_METADATA(CGENSTATE)
QueryDescriptionType getQueryDescriptionType() const
TO bit_cast(FROM &&from)
Definition: misc.h:298
#define CHECK(condition)
Definition: Logger.h:291
bool g_cluster
size_t getColOffInBytes(const size_t col_idx) const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void GroupByAndAggregate::codegenApproxQuantile ( const size_t  target_idx,
const Analyzer::Expr target_expr,
std::vector< llvm::Value * > &  agg_args,
const QueryMemoryDescriptor query_mem_desc,
const ExecutorDeviceType  device_type 
)
private

Definition at line 1902 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, executor_, g_bigint_count, SQLTypeInfo::get_notnull(), get_target_info(), Analyzer::Expr::get_type_info(), and GPU.

Referenced by TargetExprCodegen::codegenAggregate().

1907  {
1908  if (device_type == ExecutorDeviceType::GPU) {
1909  throw QueryMustRunOnCpu();
1910  }
1911  llvm::BasicBlock *calc, *skip{nullptr};
1912  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1913  auto const arg_ti =
1914  static_cast<const Analyzer::AggExpr*>(target_expr)->get_arg()->get_type_info();
1915  bool const nullable = !arg_ti.get_notnull();
1916 
1917  auto* cs = executor_->cgen_state_.get();
1918  auto& irb = cs->ir_builder_;
1919  if (nullable) {
1920  auto* const null_value = cs->castToTypeIn(cs->inlineNull(arg_ti), 64);
1921  auto* const skip_cond = arg_ti.is_fp()
1922  ? irb.CreateFCmpOEQ(agg_args.back(), null_value)
1923  : irb.CreateICmpEQ(agg_args.back(), null_value);
1924  calc = llvm::BasicBlock::Create(cs->context_, "calc_approx_quantile");
1925  skip = llvm::BasicBlock::Create(cs->context_, "skip_approx_quantile");
1926  irb.CreateCondBr(skip_cond, skip, calc);
1927  cs->current_func_->getBasicBlockList().push_back(calc);
1928  irb.SetInsertPoint(calc);
1929  }
1930  if (!arg_ti.is_fp()) {
1931  auto const agg_info = get_target_info(target_expr, g_bigint_count);
1932  agg_args.back() = executor_->castToFP(agg_args.back(), arg_ti, agg_info.sql_type);
1933  }
1934  cs->emitExternalCall(
1935  "agg_approx_quantile", llvm::Type::getVoidTy(cs->context_), agg_args);
1936  if (nullable) {
1937  irb.CreateBr(skip);
1938  cs->current_func_->getBasicBlockList().push_back(skip);
1939  irb.SetInsertPoint(skip);
1940  }
1941 }
TargetInfo get_target_info(const Analyzer::Expr *target_expr, const bool bigint_count)
Definition: TargetInfo.h:88
bool g_bigint_count
#define AUTOMATIC_IR_METADATA(CGENSTATE)
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:79
HOST DEVICE bool get_notnull() const
Definition: sqltypes.h:388

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void GroupByAndAggregate::codegenCountDistinct ( const size_t  target_idx,
const Analyzer::Expr target_expr,
std::vector< llvm::Value * > &  agg_args,
const QueryMemoryDescriptor query_mem_desc,
const ExecutorDeviceType  device_type 
)
private

Definition at line 1833 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, Bitmap, CHECK, CHECK_EQ, emitCall(), executor_, g_bigint_count, get_int_type(), get_target_info(), Analyzer::Expr::get_type_info(), getAdditionalLiteral(), QueryMemoryDescriptor::getCountDistinctDescriptor(), GPU, Invalid, kAPPROX_COUNT_DISTINCT, LL_CONTEXT, and LL_INT.

Referenced by TargetExprCodegen::codegenAggregate().

1838  {
1839  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1840  const auto agg_info = get_target_info(target_expr, g_bigint_count);
1841  const auto& arg_ti =
1842  static_cast<const Analyzer::AggExpr*>(target_expr)->get_arg()->get_type_info();
1843  if (arg_ti.is_fp()) {
1844  agg_args.back() = executor_->cgen_state_->ir_builder_.CreateBitCast(
1845  agg_args.back(), get_int_type(64, executor_->cgen_state_->context_));
1846  }
1847  const auto& count_distinct_descriptor =
1848  query_mem_desc.getCountDistinctDescriptor(target_idx);
1849  CHECK(count_distinct_descriptor.impl_type_ != CountDistinctImplType::Invalid);
1850  if (agg_info.agg_kind == kAPPROX_COUNT_DISTINCT) {
1851  CHECK(count_distinct_descriptor.impl_type_ == CountDistinctImplType::Bitmap);
1852  agg_args.push_back(LL_INT(int32_t(count_distinct_descriptor.bitmap_sz_bits)));
1853  if (device_type == ExecutorDeviceType::GPU) {
1854  const auto base_dev_addr = getAdditionalLiteral(-1);
1855  const auto base_host_addr = getAdditionalLiteral(-2);
1856  agg_args.push_back(base_dev_addr);
1857  agg_args.push_back(base_host_addr);
1858  emitCall("agg_approximate_count_distinct_gpu", agg_args);
1859  } else {
1860  emitCall("agg_approximate_count_distinct", agg_args);
1861  }
1862  return;
1863  }
1864  std::string agg_fname{"agg_count_distinct"};
1865  if (count_distinct_descriptor.impl_type_ == CountDistinctImplType::Bitmap) {
1866  agg_fname += "_bitmap";
1867  agg_args.push_back(LL_INT(static_cast<int64_t>(count_distinct_descriptor.min_val)));
1868  }
1869  if (agg_info.skip_null_val) {
1870  auto null_lv = executor_->cgen_state_->castToTypeIn(
1871  (arg_ti.is_fp()
1872  ? static_cast<llvm::Value*>(executor_->cgen_state_->inlineFpNull(arg_ti))
1873  : static_cast<llvm::Value*>(executor_->cgen_state_->inlineIntNull(arg_ti))),
1874  64);
1875  null_lv = executor_->cgen_state_->ir_builder_.CreateBitCast(
1876  null_lv, get_int_type(64, executor_->cgen_state_->context_));
1877  agg_fname += "_skip_val";
1878  agg_args.push_back(null_lv);
1879  }
1880  if (device_type == ExecutorDeviceType::GPU) {
1881  CHECK(count_distinct_descriptor.impl_type_ == CountDistinctImplType::Bitmap);
1882  agg_fname += "_gpu";
1883  const auto base_dev_addr = getAdditionalLiteral(-1);
1884  const auto base_host_addr = getAdditionalLiteral(-2);
1885  agg_args.push_back(base_dev_addr);
1886  agg_args.push_back(base_host_addr);
1887  agg_args.push_back(LL_INT(int64_t(count_distinct_descriptor.sub_bitmap_count)));
1888  CHECK_EQ(size_t(0),
1889  count_distinct_descriptor.bitmapPaddedSizeBytes() %
1890  count_distinct_descriptor.sub_bitmap_count);
1891  agg_args.push_back(LL_INT(int64_t(count_distinct_descriptor.bitmapPaddedSizeBytes() /
1892  count_distinct_descriptor.sub_bitmap_count)));
1893  }
1894  if (count_distinct_descriptor.impl_type_ == CountDistinctImplType::Bitmap) {
1895  emitCall(agg_fname, agg_args);
1896  } else {
1897  executor_->cgen_state_->emitExternalCall(
1898  agg_fname, llvm::Type::getVoidTy(LL_CONTEXT), agg_args);
1899  }
1900 }
#define CHECK_EQ(x, y)
Definition: Logger.h:301
llvm::Value * getAdditionalLiteral(const int32_t off)
#define LL_CONTEXT
#define LL_INT(v)
llvm::Value * emitCall(const std::string &fname, const std::vector< llvm::Value * > &args)
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
TargetInfo get_target_info(const Analyzer::Expr *target_expr, const bool bigint_count)
Definition: TargetInfo.h:88
bool g_bigint_count
#define AUTOMATIC_IR_METADATA(CGENSTATE)
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:79
const CountDistinctDescriptor & getCountDistinctDescriptor(const size_t idx) const
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void GroupByAndAggregate::codegenEstimator ( std::stack< llvm::BasicBlock * > &  array_loops,
DiamondCodegen diamond_codegen,
const QueryMemoryDescriptor query_mem_desc,
const CompilationOptions co 
)
private

Definition at line 1769 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, CHECK, emitCall(), RelAlgExecutionUnit::estimator, executor_, get_int_type(), QueryMemoryDescriptor::getEffectiveKeyWidth(), LL_BUILDER, LL_CONTEXT, LL_INT, ra_exe_unit_, and ROW_FUNC.

Referenced by codegen().

1772  {
1773  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1774  const auto& estimator_arg = ra_exe_unit_.estimator->getArgument();
1775  auto estimator_comp_count_lv = LL_INT(static_cast<int32_t>(estimator_arg.size()));
1776  auto estimator_key_lv = LL_BUILDER.CreateAlloca(llvm::Type::getInt64Ty(LL_CONTEXT),
1777  estimator_comp_count_lv);
1778  int32_t subkey_idx = 0;
1779  for (const auto& estimator_arg_comp : estimator_arg) {
1780  const auto estimator_arg_comp_lvs =
1781  executor_->groupByColumnCodegen(estimator_arg_comp.get(),
1782  query_mem_desc.getEffectiveKeyWidth(),
1783  co,
1784  false,
1785  0,
1786  diamond_codegen,
1787  array_loops,
1788  true);
1789  CHECK(!estimator_arg_comp_lvs.original_value);
1790  const auto estimator_arg_comp_lv = estimator_arg_comp_lvs.translated_value;
1791  // store the sub-key to the buffer
1792  LL_BUILDER.CreateStore(
1793  estimator_arg_comp_lv,
1794  LL_BUILDER.CreateGEP(
1795  estimator_key_lv->getType()->getScalarType()->getPointerElementType(),
1796  estimator_key_lv,
1797  LL_INT(subkey_idx++)));
1798  }
1799  const auto int8_ptr_ty = llvm::PointerType::get(get_int_type(8, LL_CONTEXT), 0);
1800  const auto bitmap = LL_BUILDER.CreateBitCast(&*ROW_FUNC->arg_begin(), int8_ptr_ty);
1801  const auto key_bytes = LL_BUILDER.CreateBitCast(estimator_key_lv, int8_ptr_ty);
1802  const auto estimator_comp_bytes_lv =
1803  LL_INT(static_cast<int32_t>(estimator_arg.size() * sizeof(int64_t)));
1804  const auto bitmap_size_lv =
1805  LL_INT(static_cast<uint32_t>(ra_exe_unit_.estimator->getBufferSize()));
1806  emitCall(ra_exe_unit_.estimator->getRuntimeFunctionName(),
1807  {bitmap, &*bitmap_size_lv, key_bytes, &*estimator_comp_bytes_lv});
1808 }
#define ROW_FUNC
#define LL_BUILDER
#define LL_CONTEXT
#define LL_INT(v)
llvm::Value * emitCall(const std::string &fname, const std::vector< llvm::Value * > &args)
size_t getEffectiveKeyWidth() const
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
const std::shared_ptr< Analyzer::Estimator > estimator
#define AUTOMATIC_IR_METADATA(CGENSTATE)
#define CHECK(condition)
Definition: Logger.h:291
const RelAlgExecutionUnit & ra_exe_unit_

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::tuple< llvm::Value *, llvm::Value * > GroupByAndAggregate::codegenGroupBy ( const QueryMemoryDescriptor query_mem_desc,
const CompilationOptions co,
DiamondCodegen codegen 
)
private

Definition at line 1220 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, CHECK, CHECK_EQ, codegenMultiColumnBaselineHash(), codegenMultiColumnPerfectHash(), codegenOutputSlot(), codegenSingleColumnPerfectHash(), QueryMemoryDescriptor::didOutputColumnar(), executor_, anonymous_namespace{GroupByAndAggregate.cpp}::get_expr_range_info(), QueryMemoryDescriptor::getBucket(), QueryMemoryDescriptor::getEffectiveKeyWidth(), QueryMemoryDescriptor::getGroupbyColCount(), QueryMemoryDescriptor::getMaxVal(), QueryMemoryDescriptor::getQueryDescriptionType(), QueryMemoryDescriptor::getRowSize(), RelAlgExecutionUnit::groupby_exprs, GroupByBaselineHash, GroupByPerfectHash, QueryMemoryDescriptor::hasNulls(), QueryMemoryDescriptor::isSingleColumnGroupByWithPerfectHash(), LL_BUILDER, LL_CONTEXT, LL_INT, Projection, query_infos_, ra_exe_unit_, ROW_FUNC, and QueryMemoryDescriptor::threadsShareMemory().

Referenced by codegen().

1223  {
1224  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1225  auto arg_it = ROW_FUNC->arg_begin();
1226  auto groups_buffer = arg_it++;
1227 
1228  std::stack<llvm::BasicBlock*> array_loops;
1229 
1230  // TODO(Saman): move this logic outside of this function.
1232  if (query_mem_desc.didOutputColumnar()) {
1233  return std::make_tuple(
1234  &*groups_buffer,
1235  codegenOutputSlot(&*groups_buffer, query_mem_desc, co, diamond_codegen));
1236  } else {
1237  return std::make_tuple(
1238  codegenOutputSlot(&*groups_buffer, query_mem_desc, co, diamond_codegen),
1239  nullptr);
1240  }
1241  }
1242 
1243  CHECK(query_mem_desc.getQueryDescriptionType() ==
1245  query_mem_desc.getQueryDescriptionType() ==
1247 
1248  const int32_t row_size_quad = query_mem_desc.didOutputColumnar()
1249  ? 0
1250  : query_mem_desc.getRowSize() / sizeof(int64_t);
1251 
1252  const auto col_width_size = query_mem_desc.isSingleColumnGroupByWithPerfectHash()
1253  ? sizeof(int64_t)
1254  : query_mem_desc.getEffectiveKeyWidth();
1255  // for multi-column group by
1256  llvm::Value* group_key = nullptr;
1257  llvm::Value* key_size_lv = nullptr;
1258 
1259  if (!query_mem_desc.isSingleColumnGroupByWithPerfectHash()) {
1260  key_size_lv = LL_INT(static_cast<int32_t>(query_mem_desc.getGroupbyColCount()));
1261  if (query_mem_desc.getQueryDescriptionType() ==
1263  group_key =
1264  LL_BUILDER.CreateAlloca(llvm::Type::getInt64Ty(LL_CONTEXT), key_size_lv);
1265  } else if (query_mem_desc.getQueryDescriptionType() ==
1267  group_key =
1268  col_width_size == sizeof(int32_t)
1269  ? LL_BUILDER.CreateAlloca(llvm::Type::getInt32Ty(LL_CONTEXT), key_size_lv)
1270  : LL_BUILDER.CreateAlloca(llvm::Type::getInt64Ty(LL_CONTEXT), key_size_lv);
1271  }
1272  CHECK(group_key);
1273  CHECK(key_size_lv);
1274  }
1275 
1276  int32_t subkey_idx = 0;
1277  CHECK(query_mem_desc.getGroupbyColCount() == ra_exe_unit_.groupby_exprs.size());
1278  for (const auto& group_expr : ra_exe_unit_.groupby_exprs) {
1279  const auto col_range_info =
1281  const auto translated_null_value = static_cast<int64_t>(
1282  query_mem_desc.isSingleColumnGroupByWithPerfectHash()
1283  ? checked_int64_t(query_mem_desc.getMaxVal()) +
1284  (query_mem_desc.getBucket() ? query_mem_desc.getBucket() : 1)
1285  : checked_int64_t(col_range_info.max) +
1286  (col_range_info.bucket ? col_range_info.bucket : 1));
1287 
1288  const bool col_has_nulls =
1289  query_mem_desc.getQueryDescriptionType() ==
1291  ? (query_mem_desc.isSingleColumnGroupByWithPerfectHash()
1292  ? query_mem_desc.hasNulls()
1293  : col_range_info.has_nulls)
1294  : false;
1295 
1296  const auto group_expr_lvs =
1297  executor_->groupByColumnCodegen(group_expr.get(),
1298  col_width_size,
1299  co,
1300  col_has_nulls,
1301  translated_null_value,
1302  diamond_codegen,
1303  array_loops,
1304  query_mem_desc.threadsShareMemory());
1305  const auto group_expr_lv = group_expr_lvs.translated_value;
1306  if (query_mem_desc.isSingleColumnGroupByWithPerfectHash()) {
1307  CHECK_EQ(size_t(1), ra_exe_unit_.groupby_exprs.size());
1308  return codegenSingleColumnPerfectHash(query_mem_desc,
1309  co,
1310  &*groups_buffer,
1311  group_expr_lv,
1312  group_expr_lvs.original_value,
1313  row_size_quad);
1314  } else {
1315  // store the sub-key to the buffer
1316  LL_BUILDER.CreateStore(
1317  group_expr_lv,
1318  LL_BUILDER.CreateGEP(
1319  group_key->getType()->getScalarType()->getPointerElementType(),
1320  group_key,
1321  LL_INT(subkey_idx++)));
1322  }
1323  }
1324  if (query_mem_desc.getQueryDescriptionType() ==
1326  CHECK(ra_exe_unit_.groupby_exprs.size() != 1);
1328  &*groups_buffer, group_key, key_size_lv, query_mem_desc, row_size_quad);
1329  } else if (query_mem_desc.getQueryDescriptionType() ==
1332  &*groups_buffer,
1333  group_key,
1334  key_size_lv,
1335  query_mem_desc,
1336  col_width_size,
1337  row_size_quad);
1338  }
1339  CHECK(false);
1340  return std::make_tuple(nullptr, nullptr);
1341 }
#define CHECK_EQ(x, y)
Definition: Logger.h:301
#define ROW_FUNC
ColRangeInfo get_expr_range_info(const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, const Analyzer::Expr *expr, Executor *executor)
#define LL_BUILDER
boost::multiprecision::number< boost::multiprecision::cpp_int_backend< 64, 64, boost::multiprecision::signed_magnitude, boost::multiprecision::checked, void >> checked_int64_t
#define LL_CONTEXT
#define LL_INT(v)
size_t getEffectiveKeyWidth() const
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
std::tuple< llvm::Value *, llvm::Value * > codegenMultiColumnBaselineHash(const CompilationOptions &co, llvm::Value *groups_buffer, llvm::Value *group_key, llvm::Value *key_size_lv, const QueryMemoryDescriptor &query_mem_desc, const size_t key_width, const int32_t row_size_quad)
size_t getGroupbyColCount() const
std::tuple< llvm::Value *, llvm::Value * > codegenSingleColumnPerfectHash(const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, llvm::Value *groups_buffer, llvm::Value *group_expr_lv_translated, llvm::Value *group_expr_lv_original, const int32_t row_size_quad)
#define AUTOMATIC_IR_METADATA(CGENSTATE)
std::tuple< llvm::Value *, llvm::Value * > codegenMultiColumnPerfectHash(llvm::Value *groups_buffer, llvm::Value *group_key, llvm::Value *key_size_lv, const QueryMemoryDescriptor &query_mem_desc, const int32_t row_size_quad)
QueryDescriptionType getQueryDescriptionType() const
const std::vector< InputTableInfo > & query_infos_
bool isSingleColumnGroupByWithPerfectHash() const
llvm::Value * codegenOutputSlot(llvm::Value *groups_buffer, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, DiamondCodegen &diamond_codegen)
#define CHECK(condition)
Definition: Logger.h:291
const RelAlgExecutionUnit & ra_exe_unit_

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void GroupByAndAggregate::codegenMode ( const size_t  target_idx,
const Analyzer::Expr target_expr,
std::vector< llvm::Value * > &  agg_args,
const QueryMemoryDescriptor query_mem_desc,
const ExecutorDeviceType  device_type 
)
private

Definition at line 1943 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, executor_, get_int_type(), SQLTypeInfo::get_notnull(), Analyzer::Expr::get_type_info(), and GPU.

Referenced by TargetExprCodegen::codegenAggregate().

1947  {
1948  if (device_type == ExecutorDeviceType::GPU) {
1949  throw QueryMustRunOnCpu();
1950  }
1951  llvm::BasicBlock *calc, *skip{nullptr};
1952  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1953  auto const arg_ti =
1954  static_cast<const Analyzer::AggExpr*>(target_expr)->get_arg()->get_type_info();
1955  bool const nullable = !arg_ti.get_notnull();
1956  bool const is_fp = arg_ti.is_fp();
1957  auto* cs = executor_->cgen_state_.get();
1958  auto& irb = cs->ir_builder_;
1959  if (nullable) {
1960  auto* const null_value =
1961  is_fp ? cs->inlineNull(arg_ti) : cs->castToTypeIn(cs->inlineNull(arg_ti), 64);
1962  auto* const skip_cond = is_fp ? irb.CreateFCmpOEQ(agg_args.back(), null_value)
1963  : irb.CreateICmpEQ(agg_args.back(), null_value);
1964  calc = llvm::BasicBlock::Create(cs->context_, "calc_mode");
1965  skip = llvm::BasicBlock::Create(cs->context_, "skip_mode");
1966  irb.CreateCondBr(skip_cond, skip, calc);
1967  cs->current_func_->getBasicBlockList().push_back(calc);
1968  irb.SetInsertPoint(calc);
1969  }
1970  if (is_fp) {
1971  auto* const int_type = get_int_type(8 * arg_ti.get_size(), cs->context_);
1972  agg_args.back() = irb.CreateBitCast(agg_args.back(), int_type);
1973  }
1974  // "agg_mode" collides with existing names, so non-standard suffix "_func" is added.
1975  cs->emitExternalCall("agg_mode_func", llvm::Type::getVoidTy(cs->context_), agg_args);
1976  if (nullable) {
1977  irb.CreateBr(skip);
1978  cs->current_func_->getBasicBlockList().push_back(skip);
1979  irb.SetInsertPoint(skip);
1980  }
1981 }
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
#define AUTOMATIC_IR_METADATA(CGENSTATE)
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:79
HOST DEVICE bool get_notnull() const
Definition: sqltypes.h:388

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::tuple< llvm::Value *, llvm::Value * > GroupByAndAggregate::codegenMultiColumnBaselineHash ( const CompilationOptions co,
llvm::Value *  groups_buffer,
llvm::Value *  group_key,
llvm::Value *  key_size_lv,
const QueryMemoryDescriptor query_mem_desc,
const size_t  key_width,
const int32_t  row_size_quad 
)
private

Definition at line 1452 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, CHECK, QueryMemoryDescriptor::didOutputColumnar(), emitCall(), executor_, QueryMemoryDescriptor::getEntryCount(), LL_BUILDER, LL_CONTEXT, LL_INT, and CompilationOptions::with_dynamic_watchdog.

Referenced by codegenGroupBy().

1459  {
1460  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1461  if (group_key->getType() != llvm::Type::getInt64PtrTy(LL_CONTEXT)) {
1462  CHECK(key_width == sizeof(int32_t));
1463  group_key =
1464  LL_BUILDER.CreatePointerCast(group_key, llvm::Type::getInt64PtrTy(LL_CONTEXT));
1465  }
1466  std::vector<llvm::Value*> func_args{
1467  groups_buffer,
1468  LL_INT(static_cast<int32_t>(query_mem_desc.getEntryCount())),
1469  &*group_key,
1470  &*key_size_lv,
1471  LL_INT(static_cast<int32_t>(key_width))};
1472  std::string func_name{"get_group_value"};
1473  if (query_mem_desc.didOutputColumnar()) {
1474  func_name += "_columnar_slot";
1475  } else {
1476  func_args.push_back(LL_INT(row_size_quad));
1477  }
1478  if (co.with_dynamic_watchdog) {
1479  func_name += "_with_watchdog";
1480  }
1481  if (query_mem_desc.didOutputColumnar()) {
1482  return std::make_tuple(groups_buffer, emitCall(func_name, func_args));
1483  } else {
1484  return std::make_tuple(emitCall(func_name, func_args), nullptr);
1485  }
1486 }
#define LL_BUILDER
#define LL_CONTEXT
#define LL_INT(v)
llvm::Value * emitCall(const std::string &fname, const std::vector< llvm::Value * > &args)
#define AUTOMATIC_IR_METADATA(CGENSTATE)
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::tuple< llvm::Value *, llvm::Value * > GroupByAndAggregate::codegenMultiColumnPerfectHash ( llvm::Value *  groups_buffer,
llvm::Value *  group_key,
llvm::Value *  key_size_lv,
const QueryMemoryDescriptor query_mem_desc,
const int32_t  row_size_quad 
)
private

Definition at line 1408 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, CHECK, codegenPerfectHashFunction(), QueryMemoryDescriptor::didOutputColumnar(), emitCall(), executor_, get_int_type(), QueryMemoryDescriptor::getEntryCount(), QueryMemoryDescriptor::getQueryDescriptionType(), GroupByPerfectHash, QueryMemoryDescriptor::hasKeylessHash(), LL_BUILDER, LL_CONTEXT, and LL_INT.

Referenced by codegenGroupBy().

1413  {
1414  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1415  CHECK(query_mem_desc.getQueryDescriptionType() ==
1417  // compute the index (perfect hash)
1418  auto perfect_hash_func = codegenPerfectHashFunction();
1419  auto hash_lv =
1420  LL_BUILDER.CreateCall(perfect_hash_func, std::vector<llvm::Value*>{group_key});
1421 
1422  if (query_mem_desc.didOutputColumnar()) {
1423  if (!query_mem_desc.hasKeylessHash()) {
1424  const std::string set_matching_func_name{
1425  "set_matching_group_value_perfect_hash_columnar"};
1426  const std::vector<llvm::Value*> set_matching_func_arg{
1427  groups_buffer,
1428  hash_lv,
1429  group_key,
1430  key_size_lv,
1431  llvm::ConstantInt::get(get_int_type(32, LL_CONTEXT),
1432  query_mem_desc.getEntryCount())};
1433  emitCall(set_matching_func_name, set_matching_func_arg);
1434  }
1435  return std::make_tuple(groups_buffer, hash_lv);
1436  } else {
1437  if (query_mem_desc.hasKeylessHash()) {
1438  return std::make_tuple(emitCall("get_matching_group_value_perfect_hash_keyless",
1439  {groups_buffer, hash_lv, LL_INT(row_size_quad)}),
1440  nullptr);
1441  } else {
1442  return std::make_tuple(
1443  emitCall(
1444  "get_matching_group_value_perfect_hash",
1445  {groups_buffer, hash_lv, group_key, key_size_lv, LL_INT(row_size_quad)}),
1446  nullptr);
1447  }
1448  }
1449 }
#define LL_BUILDER
#define LL_CONTEXT
#define LL_INT(v)
llvm::Value * emitCall(const std::string &fname, const std::vector< llvm::Value * > &args)
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
#define AUTOMATIC_IR_METADATA(CGENSTATE)
QueryDescriptionType getQueryDescriptionType() const
llvm::Function * codegenPerfectHashFunction()
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

llvm::Value * GroupByAndAggregate::codegenOutputSlot ( llvm::Value *  groups_buffer,
const QueryMemoryDescriptor query_mem_desc,
const CompilationOptions co,
DiamondCodegen diamond_codegen 
)
private

Definition at line 1133 of file GroupByAndAggregate.cpp.

References run_benchmark_import::args, AUTOMATIC_IR_METADATA, CHECK, CHECK_EQ, CHECK_GE, CHECK_LT, CodeGenerator::codegen(), QueryMemoryDescriptor::didOutputColumnar(), emitCall(), executor_, get_arg_by_name(), get_heap_key_slot_index(), QueryMemoryDescriptor::getColOffInBytes(), QueryMemoryDescriptor::getPaddedSlotWidthBytes(), QueryMemoryDescriptor::getQueryDescriptionType(), QueryMemoryDescriptor::getRowSize(), RelAlgExecutionUnit::groupby_exprs, inline_fp_null_val(), inline_int_null_val(), SortInfo::limit, LL_BOOL, LL_BUILDER, LL_FP, LL_INT, anonymous_namespace{Utm.h}::n, SortInfo::offset, SortInfo::order_entries, CodeGenerator::posArg(), Projection, ra_exe_unit_, ROW_FUNC, RelAlgExecutionUnit::sort_info, RelAlgExecutionUnit::target_exprs, to_string(), and QueryMemoryDescriptor::useStreamingTopN().

Referenced by codegenGroupBy(), and codegenWindowRowPointer().

1137  {
1138  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1140  CHECK_EQ(size_t(1), ra_exe_unit_.groupby_exprs.size());
1141  const auto group_expr = ra_exe_unit_.groupby_exprs.front();
1142  CHECK(!group_expr);
1143  if (!query_mem_desc.didOutputColumnar()) {
1144  CHECK_EQ(size_t(0), query_mem_desc.getRowSize() % sizeof(int64_t));
1145  }
1146  const int32_t row_size_quad = query_mem_desc.didOutputColumnar()
1147  ? 0
1148  : query_mem_desc.getRowSize() / sizeof(int64_t);
1149  CodeGenerator code_generator(executor_);
1150  if (query_mem_desc.useStreamingTopN()) {
1151  const auto& only_order_entry = ra_exe_unit_.sort_info.order_entries.front();
1152  CHECK_GE(only_order_entry.tle_no, int(1));
1153  const size_t target_idx = only_order_entry.tle_no - 1;
1154  CHECK_LT(target_idx, ra_exe_unit_.target_exprs.size());
1155  const auto order_entry_expr = ra_exe_unit_.target_exprs[target_idx];
1156  const auto chosen_bytes =
1157  static_cast<size_t>(query_mem_desc.getPaddedSlotWidthBytes(target_idx));
1158  auto order_entry_lv = executor_->cgen_state_->castToTypeIn(
1159  code_generator.codegen(order_entry_expr, true, co).front(), chosen_bytes * 8);
1161  std::string fname = "get_bin_from_k_heap";
1162  const auto& oe_ti = order_entry_expr->get_type_info();
1163  llvm::Value* null_key_lv = nullptr;
1164  if (oe_ti.is_integer() || oe_ti.is_decimal() || oe_ti.is_time()) {
1165  const size_t bit_width = order_entry_lv->getType()->getIntegerBitWidth();
1166  switch (bit_width) {
1167  case 32:
1168  null_key_lv = LL_INT(static_cast<int32_t>(inline_int_null_val(oe_ti)));
1169  break;
1170  case 64:
1171  null_key_lv = LL_INT(static_cast<int64_t>(inline_int_null_val(oe_ti)));
1172  break;
1173  default:
1174  CHECK(false);
1175  }
1176  fname += "_int" + std::to_string(bit_width) + "_t";
1177  } else {
1178  CHECK(oe_ti.is_fp());
1179  if (order_entry_lv->getType()->isDoubleTy()) {
1180  null_key_lv = LL_FP(static_cast<double>(inline_fp_null_val(oe_ti)));
1181  } else {
1182  null_key_lv = LL_FP(static_cast<float>(inline_fp_null_val(oe_ti)));
1183  }
1184  fname += order_entry_lv->getType()->isDoubleTy() ? "_double" : "_float";
1185  }
1186  const auto key_slot_idx =
1188  return emitCall(
1189  fname,
1190  {groups_buffer,
1191  LL_INT(n),
1192  LL_INT(row_size_quad),
1193  LL_INT(static_cast<uint32_t>(query_mem_desc.getColOffInBytes(key_slot_idx))),
1194  LL_BOOL(only_order_entry.is_desc),
1195  LL_BOOL(!order_entry_expr->get_type_info().get_notnull()),
1196  LL_BOOL(only_order_entry.nulls_first),
1197  null_key_lv,
1198  order_entry_lv});
1199  } else {
1200  auto* arg = get_arg_by_name(ROW_FUNC, "max_matched");
1201  const auto output_buffer_entry_count_lv =
1202  LL_BUILDER.CreateLoad(arg->getType()->getPointerElementType(), arg);
1203  arg = get_arg_by_name(ROW_FUNC, "old_total_matched");
1204  const auto group_expr_lv =
1205  LL_BUILDER.CreateLoad(arg->getType()->getPointerElementType(), arg);
1206  std::vector<llvm::Value*> args{groups_buffer,
1207  output_buffer_entry_count_lv,
1208  group_expr_lv,
1209  code_generator.posArg(nullptr)};
1210  if (query_mem_desc.didOutputColumnar()) {
1211  const auto columnar_output_offset =
1212  emitCall("get_columnar_scan_output_offset", args);
1213  return columnar_output_offset;
1214  }
1215  args.push_back(LL_INT(row_size_quad));
1216  return emitCall("get_scan_output_slot", args);
1217  }
1218 }
std::vector< Analyzer::Expr * > target_exprs
#define CHECK_EQ(x, y)
Definition: Logger.h:301
#define ROW_FUNC
#define LL_BUILDER
const std::list< Analyzer::OrderEntry > order_entries
#define LL_INT(v)
#define CHECK_GE(x, y)
Definition: Logger.h:306
llvm::Value * emitCall(const std::string &fname, const std::vector< llvm::Value * > &args)
size_t get_heap_key_slot_index(const std::vector< Analyzer::Expr * > &target_exprs, const size_t target_idx)
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
double inline_fp_null_val(const SQL_TYPE_INFO &ti)
std::string to_string(char const *&&v)
#define LL_BOOL(v)
const size_t limit
llvm::Value * get_arg_by_name(llvm::Function *func, const std::string &name)
Definition: Execute.h:167
#define LL_FP(v)
const int8_t getPaddedSlotWidthBytes(const size_t slot_idx) const
#define AUTOMATIC_IR_METADATA(CGENSTATE)
QueryDescriptionType getQueryDescriptionType() const
#define CHECK_LT(x, y)
Definition: Logger.h:303
#define CHECK(condition)
Definition: Logger.h:291
int64_t inline_int_null_val(const SQL_TYPE_INFO &ti)
constexpr double n
Definition: Utm.h:38
const RelAlgExecutionUnit & ra_exe_unit_
const size_t offset
size_t getColOffInBytes(const size_t col_idx) const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

llvm::Function * GroupByAndAggregate::codegenPerfectHashFunction ( )
private

Definition at line 1488 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, CHECK, CHECK_GT, executor_, anonymous_namespace{GroupByAndAggregate.cpp}::get_expr_range_info(), get_int_type(), getBucketedCardinality(), RelAlgExecutionUnit::groupby_exprs, GroupByPerfectHash, LL_CONTEXT, LL_INT, mark_function_always_inline(), query_infos_, and ra_exe_unit_.

Referenced by codegenMultiColumnPerfectHash().

1488  {
1489  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1490  CHECK_GT(ra_exe_unit_.groupby_exprs.size(), size_t(1));
1491  auto ft = llvm::FunctionType::get(
1492  get_int_type(32, LL_CONTEXT),
1493  std::vector<llvm::Type*>{llvm::PointerType::get(get_int_type(64, LL_CONTEXT), 0)},
1494  false);
1495  auto key_hash_func = llvm::Function::Create(ft,
1496  llvm::Function::ExternalLinkage,
1497  "perfect_key_hash",
1498  executor_->cgen_state_->module_);
1499  executor_->cgen_state_->helper_functions_.push_back(key_hash_func);
1500  mark_function_always_inline(key_hash_func);
1501  auto& key_buff_arg = *key_hash_func->args().begin();
1502  llvm::Value* key_buff_lv = &key_buff_arg;
1503  auto bb = llvm::BasicBlock::Create(LL_CONTEXT, "entry", key_hash_func);
1504  llvm::IRBuilder<> key_hash_func_builder(bb);
1505  llvm::Value* hash_lv{llvm::ConstantInt::get(get_int_type(64, LL_CONTEXT), 0)};
1506  std::vector<int64_t> cardinalities;
1507  for (const auto& groupby_expr : ra_exe_unit_.groupby_exprs) {
1508  auto col_range_info =
1509  get_expr_range_info(ra_exe_unit_, query_infos_, groupby_expr.get(), executor_);
1510  CHECK(col_range_info.hash_type_ == QueryDescriptionType::GroupByPerfectHash);
1511  cardinalities.push_back(getBucketedCardinality(col_range_info));
1512  }
1513  size_t dim_idx = 0;
1514  for (const auto& groupby_expr : ra_exe_unit_.groupby_exprs) {
1515  auto* gep = key_hash_func_builder.CreateGEP(
1516  key_buff_lv->getType()->getScalarType()->getPointerElementType(),
1517  key_buff_lv,
1518  LL_INT(dim_idx));
1519  auto key_comp_lv =
1520  key_hash_func_builder.CreateLoad(gep->getType()->getPointerElementType(), gep);
1521  auto col_range_info =
1522  get_expr_range_info(ra_exe_unit_, query_infos_, groupby_expr.get(), executor_);
1523  auto crt_term_lv =
1524  key_hash_func_builder.CreateSub(key_comp_lv, LL_INT(col_range_info.min));
1525  if (col_range_info.bucket) {
1526  crt_term_lv =
1527  key_hash_func_builder.CreateSDiv(crt_term_lv, LL_INT(col_range_info.bucket));
1528  }
1529  for (size_t prev_dim_idx = 0; prev_dim_idx < dim_idx; ++prev_dim_idx) {
1530  crt_term_lv = key_hash_func_builder.CreateMul(crt_term_lv,
1531  LL_INT(cardinalities[prev_dim_idx]));
1532  }
1533  hash_lv = key_hash_func_builder.CreateAdd(hash_lv, crt_term_lv);
1534  ++dim_idx;
1535  }
1536  key_hash_func_builder.CreateRet(
1537  key_hash_func_builder.CreateTrunc(hash_lv, get_int_type(32, LL_CONTEXT)));
1538  return key_hash_func;
1539 }
static int64_t getBucketedCardinality(const ColRangeInfo &col_range_info)
ColRangeInfo get_expr_range_info(const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, const Analyzer::Expr *expr, Executor *executor)
#define LL_CONTEXT
void mark_function_always_inline(llvm::Function *func)
#define LL_INT(v)
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
#define CHECK_GT(x, y)
Definition: Logger.h:305
#define AUTOMATIC_IR_METADATA(CGENSTATE)
const std::vector< InputTableInfo > & query_infos_
#define CHECK(condition)
Definition: Logger.h:291
const RelAlgExecutionUnit & ra_exe_unit_

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::tuple< llvm::Value *, llvm::Value * > GroupByAndAggregate::codegenSingleColumnPerfectHash ( const QueryMemoryDescriptor query_mem_desc,
const CompilationOptions co,
llvm::Value *  groups_buffer,
llvm::Value *  group_expr_lv_translated,
llvm::Value *  group_expr_lv_original,
const int32_t  row_size_quad 
)
private

Definition at line 1358 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, CHECK, CompilationOptions::device_type, QueryMemoryDescriptor::didOutputColumnar(), emitCall(), executor_, QueryMemoryDescriptor::getBucket(), QueryMemoryDescriptor::getMinVal(), QueryMemoryDescriptor::hasKeylessHash(), QueryMemoryDescriptor::interleavedBins(), LL_INT, QueryMemoryDescriptor::mustUseBaselineSort(), and QueryMemoryDescriptor::usesGetGroupValueFast().

Referenced by codegenGroupBy().

1364  {
1365  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1366  CHECK(query_mem_desc.usesGetGroupValueFast());
1367  std::string get_group_fn_name{query_mem_desc.didOutputColumnar()
1368  ? "get_columnar_group_bin_offset"
1369  : "get_group_value_fast"};
1370  if (!query_mem_desc.didOutputColumnar() && query_mem_desc.hasKeylessHash()) {
1371  get_group_fn_name += "_keyless";
1372  }
1373  if (query_mem_desc.interleavedBins(co.device_type)) {
1374  CHECK(!query_mem_desc.didOutputColumnar());
1375  CHECK(query_mem_desc.hasKeylessHash());
1376  get_group_fn_name += "_semiprivate";
1377  }
1378  std::vector<llvm::Value*> get_group_fn_args{&*groups_buffer,
1379  &*group_expr_lv_translated};
1380  if (group_expr_lv_original && get_group_fn_name == "get_group_value_fast" &&
1381  query_mem_desc.mustUseBaselineSort()) {
1382  get_group_fn_name += "_with_original_key";
1383  get_group_fn_args.push_back(group_expr_lv_original);
1384  }
1385  get_group_fn_args.push_back(LL_INT(query_mem_desc.getMinVal()));
1386  get_group_fn_args.push_back(LL_INT(query_mem_desc.getBucket()));
1387  if (!query_mem_desc.hasKeylessHash()) {
1388  if (!query_mem_desc.didOutputColumnar()) {
1389  get_group_fn_args.push_back(LL_INT(row_size_quad));
1390  }
1391  } else {
1392  if (!query_mem_desc.didOutputColumnar()) {
1393  get_group_fn_args.push_back(LL_INT(row_size_quad));
1394  }
1395  if (query_mem_desc.interleavedBins(co.device_type)) {
1396  auto warp_idx = emitCall("thread_warp_idx", {LL_INT(executor_->warpSize())});
1397  get_group_fn_args.push_back(warp_idx);
1398  get_group_fn_args.push_back(LL_INT(executor_->warpSize()));
1399  }
1400  }
1401  if (get_group_fn_name == "get_columnar_group_bin_offset") {
1402  return std::make_tuple(&*groups_buffer,
1403  emitCall(get_group_fn_name, get_group_fn_args));
1404  }
1405  return std::make_tuple(emitCall(get_group_fn_name, get_group_fn_args), nullptr);
1406 }
#define LL_INT(v)
llvm::Value * emitCall(const std::string &fname, const std::vector< llvm::Value * > &args)
#define AUTOMATIC_IR_METADATA(CGENSTATE)
ExecutorDeviceType device_type
bool interleavedBins(const ExecutorDeviceType) const
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

llvm::Value * GroupByAndAggregate::codegenVarlenOutputBuffer ( const QueryMemoryDescriptor query_mem_desc)
private

Definition at line 1343 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, CHECK, executor_, QueryMemoryDescriptor::hasVarlenOutput(), LL_CONTEXT, and ROW_FUNC.

Referenced by codegen().

1344  {
1345  if (!query_mem_desc.hasVarlenOutput()) {
1346  return nullptr;
1347  }
1348 
1349  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1350  auto arg_it = ROW_FUNC->arg_begin();
1351  arg_it++; /* groups_buffer */
1352  auto varlen_output_buffer = arg_it++;
1353  CHECK(varlen_output_buffer->getType() == llvm::Type::getInt64PtrTy(LL_CONTEXT));
1354  return varlen_output_buffer;
1355 }
#define ROW_FUNC
#define LL_CONTEXT
#define AUTOMATIC_IR_METADATA(CGENSTATE)
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

llvm::Value * GroupByAndAggregate::codegenWindowRowPointer ( const Analyzer::WindowFunction window_func,
const QueryMemoryDescriptor query_mem_desc,
const CompilationOptions co,
DiamondCodegen diamond_codegen 
)
private

Definition at line 1592 of file GroupByAndAggregate.cpp.

References run_benchmark_import::args, AUTOMATIC_IR_METADATA, codegenOutputSlot(), CodeGenerator::codegenWindowPosition(), QueryMemoryDescriptor::didOutputColumnar(), emitCall(), executor_, get_int_type(), WindowProjectNodeContext::getActiveWindowFunctionContext(), QueryMemoryDescriptor::getEntryCount(), Analyzer::WindowFunction::getKind(), QueryMemoryDescriptor::getRowSize(), LL_BUILDER, LL_CONTEXT, LL_INT, CodeGenerator::posArg(), ROW_FUNC, and window_function_is_aggregate().

Referenced by TargetExprCodegen::codegen().

1596  {
1597  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1598  const auto window_func_context =
1600  if (window_func_context && window_function_is_aggregate(window_func->getKind())) {
1601  const int32_t row_size_quad = query_mem_desc.didOutputColumnar()
1602  ? 0
1603  : query_mem_desc.getRowSize() / sizeof(int64_t);
1604  auto arg_it = ROW_FUNC->arg_begin();
1605  auto groups_buffer = arg_it++;
1606  CodeGenerator code_generator(executor_);
1607  auto window_pos_lv = code_generator.codegenWindowPosition(
1608  window_func_context, code_generator.posArg(nullptr));
1609  const auto pos_in_window =
1610  LL_BUILDER.CreateTrunc(window_pos_lv, get_int_type(32, LL_CONTEXT));
1611  llvm::Value* entry_count_lv =
1612  LL_INT(static_cast<int32_t>(query_mem_desc.getEntryCount()));
1613  std::vector<llvm::Value*> args{
1614  &*groups_buffer, entry_count_lv, pos_in_window, code_generator.posArg(nullptr)};
1615  if (query_mem_desc.didOutputColumnar()) {
1616  const auto columnar_output_offset =
1617  emitCall("get_columnar_scan_output_offset", args);
1618  return LL_BUILDER.CreateSExt(columnar_output_offset, get_int_type(64, LL_CONTEXT));
1619  }
1620  args.push_back(LL_INT(row_size_quad));
1621  return emitCall("get_scan_output_slot", args);
1622  }
1623  auto arg_it = ROW_FUNC->arg_begin();
1624  auto groups_buffer = arg_it++;
1625  return codegenOutputSlot(&*groups_buffer, query_mem_desc, co, diamond_codegen);
1626 }
SqlWindowFunctionKind getKind() const
Definition: Analyzer.h:2576
#define ROW_FUNC
#define LL_BUILDER
#define LL_CONTEXT
#define LL_INT(v)
llvm::Value * emitCall(const std::string &fname, const std::vector< llvm::Value * > &args)
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
static WindowFunctionContext * getActiveWindowFunctionContext(Executor *executor)
#define AUTOMATIC_IR_METADATA(CGENSTATE)
bool window_function_is_aggregate(const SqlWindowFunctionKind kind)
Definition: WindowContext.h:43
llvm::Value * codegenOutputSlot(llvm::Value *groups_buffer, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, DiamondCodegen &diamond_codegen)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

llvm::Value * GroupByAndAggregate::convertNullIfAny ( const SQLTypeInfo arg_type,
const TargetInfo agg_info,
llvm::Value *  target 
)
private

Definition at line 1541 of file GroupByAndAggregate.cpp.

References TargetInfo::agg_kind, AUTOMATIC_IR_METADATA, CHECK, executor_, SQLTypeInfo::get_size(), SQLTypeInfo::is_fp(), kAPPROX_COUNT_DISTINCT, kCOUNT, LL_BUILDER, and TargetInfo::sql_type.

Referenced by TargetExprCodegen::codegenAggregate().

1543  {
1544  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1545  const auto& agg_type = agg_info.sql_type;
1546  const size_t chosen_bytes = agg_type.get_size();
1547 
1548  bool need_conversion{false};
1549  llvm::Value* arg_null{nullptr};
1550  llvm::Value* agg_null{nullptr};
1551  llvm::Value* target_to_cast{target};
1552  if (arg_type.is_fp()) {
1553  arg_null = executor_->cgen_state_->inlineFpNull(arg_type);
1554  if (agg_type.is_fp()) {
1555  agg_null = executor_->cgen_state_->inlineFpNull(agg_type);
1556  if (!static_cast<llvm::ConstantFP*>(arg_null)->isExactlyValue(
1557  static_cast<llvm::ConstantFP*>(agg_null)->getValueAPF())) {
1558  need_conversion = true;
1559  }
1560  } else {
1561  CHECK(agg_info.agg_kind == kCOUNT || agg_info.agg_kind == kAPPROX_COUNT_DISTINCT);
1562  return target;
1563  }
1564  } else {
1565  arg_null = executor_->cgen_state_->inlineIntNull(arg_type);
1566  if (agg_type.is_fp()) {
1567  agg_null = executor_->cgen_state_->inlineFpNull(agg_type);
1568  need_conversion = true;
1569  target_to_cast = executor_->castToFP(target, arg_type, agg_type);
1570  } else {
1571  agg_null = executor_->cgen_state_->inlineIntNull(agg_type);
1572  if ((static_cast<llvm::ConstantInt*>(arg_null)->getBitWidth() !=
1573  static_cast<llvm::ConstantInt*>(agg_null)->getBitWidth()) ||
1574  (static_cast<llvm::ConstantInt*>(arg_null)->getValue() !=
1575  static_cast<llvm::ConstantInt*>(agg_null)->getValue())) {
1576  need_conversion = true;
1577  }
1578  }
1579  }
1580  if (need_conversion) {
1581  auto cmp = arg_type.is_fp() ? LL_BUILDER.CreateFCmpOEQ(target, arg_null)
1582  : LL_BUILDER.CreateICmpEQ(target, arg_null);
1583  return LL_BUILDER.CreateSelect(
1584  cmp,
1585  agg_null,
1586  executor_->cgen_state_->castToTypeIn(target_to_cast, chosen_bytes << 3));
1587  } else {
1588  return target;
1589  }
1590 }
HOST DEVICE int get_size() const
Definition: sqltypes.h:393
#define LL_BUILDER
SQLTypeInfo sql_type
Definition: TargetInfo.h:52
bool is_fp() const
Definition: sqltypes.h:584
#define AUTOMATIC_IR_METADATA(CGENSTATE)
SQLAgg agg_kind
Definition: TargetInfo.h:51
Definition: sqldefs.h:78
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

llvm::Value * GroupByAndAggregate::emitCall ( const std::string &  fname,
const std::vector< llvm::Value * > &  args 
)
private

Definition at line 2175 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, and executor_.

Referenced by TargetExprCodegen::codegen(), TargetExprCodegen::codegenAggregate(), codegenCountDistinct(), codegenEstimator(), codegenMultiColumnBaselineHash(), codegenMultiColumnPerfectHash(), codegenOutputSlot(), codegenSingleColumnPerfectHash(), and codegenWindowRowPointer().

2176  {
2177  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
2178  return executor_->cgen_state_->emitCall(fname, args);
2179 }
#define AUTOMATIC_IR_METADATA(CGENSTATE)

+ Here is the caller graph for this function:

llvm::Value * GroupByAndAggregate::getAdditionalLiteral ( const int32_t  off)
private

Definition at line 1983 of file GroupByAndAggregate.cpp.

References shared::bit_cast(), CHECK_LT, get_arg_by_name(), get_int_type(), LL_BUILDER, LL_CONTEXT, LL_INT, and ROW_FUNC.

Referenced by codegenCountDistinct().

1983  {
1984  CHECK_LT(off, 0);
1985  const auto lit_buff_lv = get_arg_by_name(ROW_FUNC, "literals");
1986  auto* bit_cast = LL_BUILDER.CreateBitCast(
1987  lit_buff_lv, llvm::PointerType::get(get_int_type(64, LL_CONTEXT), 0));
1988  auto* gep =
1989  LL_BUILDER.CreateGEP(bit_cast->getType()->getScalarType()->getPointerElementType(),
1990  bit_cast,
1991  LL_INT(off));
1992  return LL_BUILDER.CreateLoad(gep->getType()->getPointerElementType(), gep);
1993 }
#define ROW_FUNC
#define LL_BUILDER
#define LL_CONTEXT
#define LL_INT(v)
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
llvm::Value * get_arg_by_name(llvm::Function *func, const std::string &name)
Definition: Execute.h:167
#define CHECK_LT(x, y)
Definition: Logger.h:303
TO bit_cast(FROM &&from)
Definition: misc.h:298

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

int64_t GroupByAndAggregate::getBucketedCardinality ( const ColRangeInfo col_range_info)
staticprivate

Definition at line 334 of file GroupByAndAggregate.cpp.

References ColRangeInfo::bucket, ColRangeInfo::has_nulls, ColRangeInfo::max, and ColRangeInfo::min.

Referenced by codegenPerfectHashFunction(), and getColRangeInfo().

334  {
335  checked_int64_t crt_col_cardinality =
336  checked_int64_t(col_range_info.max) - checked_int64_t(col_range_info.min);
337  if (col_range_info.bucket) {
338  crt_col_cardinality /= col_range_info.bucket;
339  }
340  return static_cast<int64_t>(crt_col_cardinality +
341  (1 + (col_range_info.has_nulls ? 1 : 0)));
342 }
boost::multiprecision::number< boost::multiprecision::cpp_int_backend< 64, 64, boost::multiprecision::signed_magnitude, boost::multiprecision::checked, void >> checked_int64_t

+ Here is the caller graph for this function:

ColRangeInfo GroupByAndAggregate::getColRangeInfo ( )
private

Definition at line 215 of file GroupByAndAggregate.cpp.

References anonymous_namespace{GroupByAndAggregate.cpp}::cardinality_estimate_less_than_column_range(), CHECK, CHECK_GE, device_type_, executor_, anonymous_namespace{GroupByAndAggregate.cpp}::expr_is_rowid(), anonymous_namespace{GroupByAndAggregate.cpp}::get_expr_range_info(), Executor::getBaselineThreshold(), getBucketedCardinality(), group_cardinality_estimation_, RelAlgExecutionUnit::groupby_exprs, GroupByBaselineHash, GroupByPerfectHash, anonymous_namespace{GroupByAndAggregate.cpp}::has_count_distinct(), anonymous_namespace{GroupByAndAggregate.cpp}::is_column_range_too_big_for_perfect_hash(), kENCODING_DICT, SortInfo::order_entries, RelAlgExecutionUnit::quals, query_infos_, ra_exe_unit_, RelAlgExecutionUnit::simple_quals, RelAlgExecutionUnit::sort_info, and RelAlgExecutionUnit::target_exprs.

Referenced by initQueryMemoryDescriptorImpl().

215  {
216  // Use baseline layout more eagerly on the GPU if the query uses count distinct,
217  // because our HyperLogLog implementation is 4x less memory efficient on GPU.
218  // Technically, this only applies to APPROX_COUNT_DISTINCT, but in practice we
219  // can expect this to be true anyway for grouped queries since the precise version
220  // uses significantly more memory.
221  const int64_t baseline_threshold =
223  if (ra_exe_unit_.groupby_exprs.size() != 1) {
224  try {
225  checked_int64_t cardinality{1};
226  bool has_nulls{false};
227  for (const auto& groupby_expr : ra_exe_unit_.groupby_exprs) {
228  auto col_range_info = get_expr_range_info(
229  ra_exe_unit_, query_infos_, groupby_expr.get(), executor_);
230  if (col_range_info.hash_type_ != QueryDescriptionType::GroupByPerfectHash) {
231  // going through baseline hash if a non-integer type is encountered
232  return {QueryDescriptionType::GroupByBaselineHash, 0, 0, 0, false};
233  }
234  auto crt_col_cardinality = getBucketedCardinality(col_range_info);
235  CHECK_GE(crt_col_cardinality, 0);
236  cardinality *= crt_col_cardinality;
237  if (col_range_info.has_nulls) {
238  has_nulls = true;
239  }
240  }
241  // For zero or high cardinalities, use baseline layout.
242  if (!cardinality || cardinality > baseline_threshold) {
243  return {QueryDescriptionType::GroupByBaselineHash, 0, 0, 0, false};
244  }
246  0,
247  int64_t(cardinality),
248  0,
249  has_nulls};
250  } catch (...) { // overflow when computing cardinality
251  return {QueryDescriptionType::GroupByBaselineHash, 0, 0, 0, false};
252  }
253  }
254  // For single column groupby on high timestamps, force baseline hash due to wide ranges
255  // we are likely to encounter when applying quals to the expression range
256  // TODO: consider allowing TIMESTAMP(9) (nanoseconds) with quals to use perfect hash if
257  // the range is small enough
258  if (ra_exe_unit_.groupby_exprs.front() &&
259  ra_exe_unit_.groupby_exprs.front()->get_type_info().is_high_precision_timestamp() &&
260  ra_exe_unit_.simple_quals.size() > 0) {
261  return {QueryDescriptionType::GroupByBaselineHash, 0, 0, 0, false};
262  }
263  const auto col_range_info = get_expr_range_info(
265  if (!ra_exe_unit_.groupby_exprs.front()) {
266  return col_range_info;
267  }
268  static const int64_t MAX_BUFFER_SIZE = 1 << 30;
269  const int64_t col_count =
271  int64_t max_entry_count = MAX_BUFFER_SIZE / (col_count * sizeof(int64_t));
273  max_entry_count = std::min(max_entry_count, baseline_threshold);
274  }
275  const auto& groupby_expr_ti = ra_exe_unit_.groupby_exprs.front()->get_type_info();
276  if (groupby_expr_ti.is_string() && !col_range_info.bucket) {
277  CHECK(groupby_expr_ti.get_compression() == kENCODING_DICT);
278 
279  const bool has_filters =
280  !ra_exe_unit_.quals.empty() || !ra_exe_unit_.simple_quals.empty();
281  if (has_filters &&
282  is_column_range_too_big_for_perfect_hash(col_range_info, max_entry_count)) {
283  // if filters are present, we can use the filter to narrow the cardinality of the
284  // group by in the case of ranges too big for perfect hash. Otherwise, we are better
285  // off attempting perfect hash (since we know the range will be made of
286  // monotonically increasing numbers from min to max for dictionary encoded strings)
287  // and failing later due to excessive memory use.
288  // Check the conditions where baseline hash can provide a performance increase and
289  // return baseline hash (potentially forcing an estimator query) as the range type.
290  // Otherwise, return col_range_info which will likely be perfect hash, though could
291  // be baseline from a previous call of this function prior to the estimator query.
292  if (!ra_exe_unit_.sort_info.order_entries.empty()) {
293  // TODO(adb): allow some sorts to pass through this block by centralizing sort
294  // algorithm decision making
296  is_column_range_too_big_for_perfect_hash(col_range_info, max_entry_count)) {
297  // always use baseline hash for column range too big for perfect hash with count
298  // distinct descriptors. We will need 8GB of CPU memory minimum for the perfect
299  // hash group by in this case.
301  col_range_info.min,
302  col_range_info.max,
303  0,
304  col_range_info.has_nulls};
305  } else {
306  // use original col range for sort
307  return col_range_info;
308  }
309  }
310  // if filters are present and the filtered range is less than the cardinality of
311  // the column, consider baseline hash
314  col_range_info)) {
316  col_range_info.min,
317  col_range_info.max,
318  0,
319  col_range_info.has_nulls};
320  }
321  }
322  } else if ((!expr_is_rowid(ra_exe_unit_.groupby_exprs.front().get())) &&
323  is_column_range_too_big_for_perfect_hash(col_range_info, max_entry_count) &&
324  !col_range_info.bucket) {
326  col_range_info.min,
327  col_range_info.max,
328  0,
329  col_range_info.has_nulls};
330  }
331  return col_range_info;
332 }
std::vector< Analyzer::Expr * > target_exprs
static int64_t getBucketedCardinality(const ColRangeInfo &col_range_info)
ColRangeInfo get_expr_range_info(const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, const Analyzer::Expr *expr, Executor *executor)
boost::multiprecision::number< boost::multiprecision::cpp_int_backend< 64, 64, boost::multiprecision::signed_magnitude, boost::multiprecision::checked, void >> checked_int64_t
bool is_column_range_too_big_for_perfect_hash(const ColRangeInfo &col_range_info, const int64_t max_entry_count)
const std::list< Analyzer::OrderEntry > order_entries
#define CHECK_GE(x, y)
Definition: Logger.h:306
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
bool has_count_distinct(const RelAlgExecutionUnit &ra_exe_unit)
const std::vector< InputTableInfo > & query_infos_
bool expr_is_rowid(const Analyzer::Expr *expr)
const ExecutorDeviceType device_type_
bool cardinality_estimate_less_than_column_range(const int64_t cardinality_estimate, const ColRangeInfo &col_range_info)
const std::optional< int64_t > group_cardinality_estimation_
std::list< std::shared_ptr< Analyzer::Expr > > quals
#define CHECK(condition)
Definition: Logger.h:291
static size_t getBaselineThreshold(bool for_count_distinct, ExecutorDeviceType device_type)
Definition: Execute.h:1270
const RelAlgExecutionUnit & ra_exe_unit_
std::list< std::shared_ptr< Analyzer::Expr > > simple_quals

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

int64_t GroupByAndAggregate::getShardedTopBucket ( const ColRangeInfo col_range_info,
const size_t  shard_count 
) const
private

Definition at line 403 of file GroupByAndAggregate.cpp.

References ColRangeInfo::bucket, CHECK, CHECK_GT, device_type_, executor_, g_leaf_count, and GPU.

Referenced by initQueryMemoryDescriptorImpl().

404  {
405  size_t device_count{0};
407  device_count = executor_->cudaMgr()->getDeviceCount();
408  CHECK_GT(device_count, 0u);
409  }
410 
411  int64_t bucket{col_range_info.bucket};
412 
413  if (shard_count) {
414  CHECK(!col_range_info.bucket);
415  /*
416  when a node has fewer devices than shard count,
417  a) In a distributed setup, the minimum distance between two keys would be
418  device_count because shards are stored consecutively across the physical tables,
419  i.e if a shard column has values 0 to 9, and 3 shards on each leaf, then node 1
420  would have values: 0,1,2,6,7,8 and node 2 would have values: 3,4,5,9. If each leaf
421  node has only 1 device, in this case, all the keys from each node are loaded on
422  the device each.
423 
424  b) In a single node setup, the distance would be minimum of device_count or
425  difference of device_count - shard_count. For example: If a single node server
426  running on 3 devices a shard column has values 0 to 9 in a table with 4 shards,
427  device to fragment keys mapping would be: device 1 - 4,8,3,7 device 2 - 1,5,9
428  device 3 - 2, 6 The bucket value would be 4(shards) - 3(devices) = 1 i.e. minimum
429  of device_count or difference.
430 
431  When a node has device count equal to or more than shard count then the
432  minimum distance is always at least shard_count * no of leaf nodes.
433  */
434  if (device_count < shard_count) {
435  bucket = g_leaf_count ? std::max(device_count, static_cast<size_t>(1))
436  : std::min(device_count, shard_count - device_count);
437  } else {
438  bucket = shard_count * std::max(g_leaf_count, static_cast<size_t>(1));
439  }
440  }
441 
442  return bucket;
443 }
#define CHECK_GT(x, y)
Definition: Logger.h:305
const ExecutorDeviceType device_type_
#define CHECK(condition)
Definition: Logger.h:291
size_t g_leaf_count
Definition: ParserNode.cpp:77

+ Here is the caller graph for this function:

bool GroupByAndAggregate::gpuCanHandleOrderEntries ( const std::list< Analyzer::OrderEntry > &  order_entries)
private

Definition at line 952 of file GroupByAndAggregate.cpp.

References CHECK, CHECK_GE, CHECK_LE, executor_, Analyzer::AggExpr::get_arg(), anonymous_namespace{GroupByAndAggregate.cpp}::get_expr_range_info(), Analyzer::Expr::get_type_info(), GroupByPerfectHash, kAPPROX_COUNT_DISTINCT, kAVG, kMAX, kMIN, query_infos_, ra_exe_unit_, and RelAlgExecutionUnit::target_exprs.

Referenced by initQueryMemoryDescriptor().

953  {
954  if (order_entries.size() > 1) { // TODO(alex): lift this restriction
955  return false;
956  }
957  for (const auto& order_entry : order_entries) {
958  CHECK_GE(order_entry.tle_no, 1);
959  CHECK_LE(static_cast<size_t>(order_entry.tle_no), ra_exe_unit_.target_exprs.size());
960  const auto target_expr = ra_exe_unit_.target_exprs[order_entry.tle_no - 1];
961  if (!dynamic_cast<Analyzer::AggExpr*>(target_expr)) {
962  return false;
963  }
964  // TODO(alex): relax the restrictions
965  auto agg_expr = static_cast<Analyzer::AggExpr*>(target_expr);
966  if (agg_expr->get_is_distinct() || agg_expr->get_aggtype() == kAVG ||
967  agg_expr->get_aggtype() == kMIN || agg_expr->get_aggtype() == kMAX ||
968  agg_expr->get_aggtype() == kAPPROX_COUNT_DISTINCT) {
969  return false;
970  }
971  if (agg_expr->get_arg()) {
972  const auto& arg_ti = agg_expr->get_arg()->get_type_info();
973  if (arg_ti.is_fp()) {
974  return false;
975  }
976  auto expr_range_info =
977  get_expr_range_info(ra_exe_unit_, query_infos_, agg_expr->get_arg(), executor_);
978  // TOD(adb): QMD not actually initialized here?
979  if ((!(expr_range_info.hash_type_ == QueryDescriptionType::GroupByPerfectHash &&
980  /* query_mem_desc.getGroupbyColCount() == 1 */ false) ||
981  expr_range_info.has_nulls) &&
982  order_entry.is_desc == order_entry.nulls_first) {
983  return false;
984  }
985  }
986  const auto& target_ti = target_expr->get_type_info();
987  CHECK(!target_ti.is_buffer());
988  if (!target_ti.is_integer()) {
989  return false;
990  }
991  }
992  return true;
993 }
std::vector< Analyzer::Expr * > target_exprs
ColRangeInfo get_expr_range_info(const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, const Analyzer::Expr *expr, Executor *executor)
#define CHECK_GE(x, y)
Definition: Logger.h:306
Expr * get_arg() const
Definition: Analyzer.h:1208
Definition: sqldefs.h:75
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:79
const std::vector< InputTableInfo > & query_infos_
#define CHECK_LE(x, y)
Definition: Logger.h:304
#define CHECK(condition)
Definition: Logger.h:291
const RelAlgExecutionUnit & ra_exe_unit_
Definition: sqldefs.h:76
Definition: sqldefs.h:74

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::unique_ptr< QueryMemoryDescriptor > GroupByAndAggregate::initQueryMemoryDescriptor ( const bool  allow_multifrag,
const size_t  max_groups_buffer_entry_count,
const int8_t  crt_min_byte_width,
RenderInfo render_info,
const bool  output_columnar_hint 
)
private

Definition at line 829 of file GroupByAndAggregate.cpp.

References align_to_int64(), CHECK, device_type_, GPU, gpuCanHandleOrderEntries(), initQueryMemoryDescriptorImpl(), SortInfo::order_entries, query_mem_desc, ra_exe_unit_, shard_count_for_top_groups(), and RelAlgExecutionUnit::sort_info.

834  {
835  const auto shard_count = device_type_ == ExecutorDeviceType::GPU
837  : 0;
838  bool sort_on_gpu_hint =
839  device_type_ == ExecutorDeviceType::GPU && allow_multifrag &&
842  // must_use_baseline_sort is true iff we'd sort on GPU with the old algorithm
843  // but the total output buffer size would be too big or it's a sharded top query.
844  // For the sake of managing risk, use the new result set way very selectively for
845  // this case only (alongside the baseline layout we've enabled for a while now).
846  bool must_use_baseline_sort = shard_count;
847  std::unique_ptr<QueryMemoryDescriptor> query_mem_desc;
848  while (true) {
849  query_mem_desc = initQueryMemoryDescriptorImpl(allow_multifrag,
850  max_groups_buffer_entry_count,
851  crt_min_byte_width,
852  sort_on_gpu_hint,
853  render_info,
854  must_use_baseline_sort,
855  output_columnar_hint);
856  CHECK(query_mem_desc);
857  if (query_mem_desc->sortOnGpu() &&
858  (query_mem_desc->getBufferSizeBytes(device_type_) +
859  align_to_int64(query_mem_desc->getEntryCount() * sizeof(int32_t))) >
860  2 * 1024 * 1024 * 1024LL) {
861  must_use_baseline_sort = true;
862  sort_on_gpu_hint = false;
863  } else {
864  break;
865  }
866  }
867  return query_mem_desc;
868 }
bool gpuCanHandleOrderEntries(const std::list< Analyzer::OrderEntry > &order_entries)
std::unique_ptr< QueryMemoryDescriptor > initQueryMemoryDescriptorImpl(const bool allow_multifrag, const size_t max_groups_buffer_entry_count, const int8_t crt_min_byte_width, const bool sort_on_gpu_hint, RenderInfo *render_info, const bool must_use_baseline_sort, const bool output_columnar_hint)
const std::list< Analyzer::OrderEntry > order_entries
const ExecutorDeviceType device_type_
static size_t shard_count_for_top_groups(const RelAlgExecutionUnit &ra_exe_unit)
#define CHECK(condition)
Definition: Logger.h:291
const RelAlgExecutionUnit & ra_exe_unit_
FORCE_INLINE HOST DEVICE T align_to_int64(T addr)

+ Here is the call graph for this function:

std::unique_ptr< QueryMemoryDescriptor > GroupByAndAggregate::initQueryMemoryDescriptorImpl ( const bool  allow_multifrag,
const size_t  max_groups_buffer_entry_count,
const int8_t  crt_min_byte_width,
const bool  sort_on_gpu_hint,
RenderInfo render_info,
const bool  must_use_baseline_sort,
const bool  output_columnar_hint 
)
private

Definition at line 870 of file GroupByAndAggregate.cpp.

References device_type_, executor_, g_enable_watchdog, g_watchdog_baseline_max_groups, anonymous_namespace{GroupByAndAggregate.cpp}::get_keyless_info(), getColRangeInfo(), getShardedTopBucket(), GPU, RelAlgExecutionUnit::groupby_exprs, GroupByBaselineHash, GroupByPerfectHash, ColRangeInfo::hash_type_, QueryMemoryDescriptor::init(), anonymous_namespace{GroupByAndAggregate.cpp}::init_count_distinct_descriptors(), LOG, query_infos_, ra_exe_unit_, shard_count_for_top_groups(), and logger::WARNING.

Referenced by initQueryMemoryDescriptor().

877  {
878  const bool is_group_by{!ra_exe_unit_.groupby_exprs.empty()};
879 
880  auto col_range_info_nosharding = getColRangeInfo();
881 
882  const auto shard_count = device_type_ == ExecutorDeviceType::GPU
884  : 0;
885 
886  const auto col_range_info =
887  ColRangeInfo{col_range_info_nosharding.hash_type_,
888  col_range_info_nosharding.min,
889  col_range_info_nosharding.max,
890  getShardedTopBucket(col_range_info_nosharding, shard_count),
891  col_range_info_nosharding.has_nulls};
892 
893  // Non-grouped aggregates do not support accessing aggregated ranges
894  // Keyless hash is currently only supported with single-column perfect hash
895  const auto keyless_info =
896  !(is_group_by &&
897  col_range_info.hash_type_ == QueryDescriptionType::GroupByPerfectHash)
898  ? KeylessInfo{false, -1}
900 
901  if (g_enable_watchdog &&
902  ((col_range_info.hash_type_ == QueryDescriptionType::GroupByBaselineHash &&
903  max_groups_buffer_entry_count > g_watchdog_baseline_max_groups) ||
904  (col_range_info.hash_type_ == QueryDescriptionType::GroupByPerfectHash &&
905  ra_exe_unit_.groupby_exprs.size() == 1 &&
906  (col_range_info.max - col_range_info.min) /
907  std::max(col_range_info.bucket, int64_t(1)) >
908  130000000))) {
909  throw WatchdogException("Query would use too much memory");
910  }
911 
912  const auto count_distinct_descriptors = init_count_distinct_descriptors(
913  ra_exe_unit_, query_infos_, col_range_info, device_type_, executor_);
914  try {
916  ra_exe_unit_,
917  query_infos_,
918  col_range_info,
919  keyless_info,
920  allow_multifrag,
921  device_type_,
922  crt_min_byte_width,
923  sort_on_gpu_hint,
924  shard_count,
925  max_groups_buffer_entry_count,
926  render_info,
927  count_distinct_descriptors,
928  must_use_baseline_sort,
929  output_columnar_hint,
930  /*streaming_top_n_hint=*/true);
931  } catch (const StreamingTopNOOM& e) {
932  LOG(WARNING) << e.what() << " Disabling Streaming Top N.";
934  ra_exe_unit_,
935  query_infos_,
936  col_range_info,
937  keyless_info,
938  allow_multifrag,
939  device_type_,
940  crt_min_byte_width,
941  sort_on_gpu_hint,
942  shard_count,
943  max_groups_buffer_entry_count,
944  render_info,
945  count_distinct_descriptors,
946  must_use_baseline_sort,
947  output_columnar_hint,
948  /*streaming_top_n_hint=*/false);
949  }
950 }
size_t g_watchdog_baseline_max_groups
KeylessInfo get_keyless_info(const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, const bool is_group_by, Executor *executor)
#define LOG(tag)
Definition: Logger.h:285
ColRangeInfo getColRangeInfo()
QueryDescriptionType hash_type_
static std::unique_ptr< QueryMemoryDescriptor > init(const Executor *executor, const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, const ColRangeInfo &col_range_info, const KeylessInfo &keyless_info, const bool allow_multifrag, const ExecutorDeviceType device_type, const int8_t crt_min_byte_width, const bool sort_on_gpu_hint, const size_t shard_count, const size_t max_groups_buffer_entry_count, RenderInfo *render_info, const CountDistinctDescriptors count_distinct_descriptors, const bool must_use_baseline_sort, const bool output_columnar_hint, const bool streaming_top_n_hint)
CountDistinctDescriptors init_count_distinct_descriptors(const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, const ColRangeInfo &group_by_range_info, const ExecutorDeviceType device_type, Executor *executor)
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
bool g_enable_watchdog
int64_t getShardedTopBucket(const ColRangeInfo &col_range_info, const size_t shard_count) const
const std::vector< InputTableInfo > & query_infos_
const ExecutorDeviceType device_type_
static size_t shard_count_for_top_groups(const RelAlgExecutionUnit &ra_exe_unit)
const RelAlgExecutionUnit & ra_exe_unit_

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool GroupByAndAggregate::needsUnnestDoublePatch ( llvm::Value const *  val_ptr,
const std::string &  agg_base_name,
const bool  threads_share_memory,
const CompilationOptions co 
) const
private

Definition at line 29 of file MaxwellCodegenPatch.cpp.

References CompilationOptions::device_type, and executor_.

Referenced by TargetExprCodegen::codegenAggregate().

32  {
33  return (executor_->isArchMaxwell(co.device_type) && threads_share_memory &&
34  llvm::isa<llvm::AllocaInst>(val_ptr) &&
35  val_ptr->getType() ==
36  llvm::Type::getDoublePtrTy(executor_->cgen_state_->context_) &&
37  "agg_id" == agg_base_name);
38 }
ExecutorDeviceType device_type

+ Here is the caller graph for this function:

void GroupByAndAggregate::prependForceSync ( )
private

Definition at line 40 of file MaxwellCodegenPatch.cpp.

References executor_.

Referenced by codegen().

40  {
41  executor_->cgen_state_->ir_builder_.CreateCall(
42  executor_->cgen_state_->module_->getFunction("force_sync"));
43 }

+ Here is the caller graph for this function:

size_t GroupByAndAggregate::shard_count_for_top_groups ( const RelAlgExecutionUnit ra_exe_unit)
static

Definition at line 2198 of file GroupByAndAggregate.cpp.

References Catalog_Namespace::get_metadata_for_table(), Analyzer::ColumnVar::getColumnKey(), RelAlgExecutionUnit::groupby_exprs, SortInfo::limit, TableDescriptor::nShards, SortInfo::order_entries, and RelAlgExecutionUnit::sort_info.

Referenced by Executor::collectAllDeviceResults(), RelAlgExecutor::executeRelAlgQuerySingleStep(), initQueryMemoryDescriptor(), and initQueryMemoryDescriptorImpl().

2199  {
2200  if (ra_exe_unit.sort_info.order_entries.size() != 1 || !ra_exe_unit.sort_info.limit) {
2201  return 0;
2202  }
2203  for (const auto& group_expr : ra_exe_unit.groupby_exprs) {
2204  const auto grouped_col_expr =
2205  dynamic_cast<const Analyzer::ColumnVar*>(group_expr.get());
2206  if (!grouped_col_expr) {
2207  continue;
2208  }
2209  const auto& column_key = grouped_col_expr->getColumnKey();
2210  if (column_key.table_id <= 0) {
2211  return 0;
2212  }
2214  {column_key.db_id, column_key.table_id});
2215  if (td->shardedColumnId == column_key.column_id) {
2216  return td->nShards;
2217  }
2218  }
2219  return 0;
2220 }
const std::list< Analyzer::OrderEntry > order_entries
const TableDescriptor * get_metadata_for_table(const ::shared::TableKey &table_key, bool populate_fragmenter)
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
const size_t limit
const shared::ColumnKey & getColumnKey() const
Definition: Analyzer.h:198

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

Friends And Related Function Documentation

friend class CodeGenerator
friend

Definition at line 219 of file GroupByAndAggregate.h.

friend class ExecutionKernel
friend

Definition at line 220 of file GroupByAndAggregate.h.

friend class Executor
friend

Definition at line 217 of file GroupByAndAggregate.h.

friend class QueryMemoryDescriptor
friend

Definition at line 218 of file GroupByAndAggregate.h.

friend struct TargetExprCodegen
friend

Definition at line 221 of file GroupByAndAggregate.h.

friend struct TargetExprCodegenBuilder
friend

Definition at line 222 of file GroupByAndAggregate.h.

Member Data Documentation

const ExecutorDeviceType GroupByAndAggregate::device_type_
private
const std::optional<int64_t> GroupByAndAggregate::group_cardinality_estimation_
private

Definition at line 215 of file GroupByAndAggregate.h.

Referenced by getColRangeInfo().

bool GroupByAndAggregate::output_columnar_
private

Definition at line 212 of file GroupByAndAggregate.h.

const std::vector<InputTableInfo>& GroupByAndAggregate::query_infos_
private
std::shared_ptr<RowSetMemoryOwner> GroupByAndAggregate::row_set_mem_owner_
private

Definition at line 211 of file GroupByAndAggregate.h.


The documentation for this class was generated from the following files: