OmniSciDB  dfae7c3b14
GroupByAndAggregate Class Reference

#include <GroupByAndAggregate.h>

+ Collaboration diagram for GroupByAndAggregate:

Classes

struct  DiamondCodegen
 

Public Member Functions

 GroupByAndAggregate (Executor *executor, const ExecutorDeviceType device_type, const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner, const std::optional< int64_t > &group_cardinality_estimation)
 
bool codegen (llvm::Value *filter_result, llvm::BasicBlock *sc_false, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const GpuSharedMemoryContext &gpu_smem_context)
 

Static Public Member Functions

static void addTransientStringLiterals (const RelAlgExecutionUnit &ra_exe_unit, Executor *executor, std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner)
 
static size_t shard_count_for_top_groups (const RelAlgExecutionUnit &ra_exe_unit, const Catalog_Namespace::Catalog &catalog)
 

Private Member Functions

bool gpuCanHandleOrderEntries (const std::list< Analyzer::OrderEntry > &order_entries)
 
std::unique_ptr< QueryMemoryDescriptorinitQueryMemoryDescriptor (const bool allow_multifrag, const size_t max_groups_buffer_entry_count, const int8_t crt_min_byte_width, RenderInfo *render_info, const bool output_columnar_hint)
 
std::unique_ptr< QueryMemoryDescriptorinitQueryMemoryDescriptorImpl (const bool allow_multifrag, const size_t max_groups_buffer_entry_count, const int8_t crt_min_byte_width, const bool sort_on_gpu_hint, RenderInfo *render_info, const bool must_use_baseline_sort, const bool output_columnar_hint)
 
int64_t getShardedTopBucket (const ColRangeInfo &col_range_info, const size_t shard_count) const
 
void addTransientStringLiterals ()
 
CountDistinctDescriptors initCountDistinctDescriptors ()
 
llvm::Value * codegenOutputSlot (llvm::Value *groups_buffer, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, DiamondCodegen &diamond_codegen)
 
std::tuple< llvm::Value *, llvm::Value * > codegenGroupBy (const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, DiamondCodegen &codegen)
 
std::tuple< llvm::Value *, llvm::Value * > codegenSingleColumnPerfectHash (const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, llvm::Value *groups_buffer, llvm::Value *group_expr_lv_translated, llvm::Value *group_expr_lv_original, const int32_t row_size_quad)
 
std::tuple< llvm::Value *, llvm::Value * > codegenMultiColumnPerfectHash (llvm::Value *groups_buffer, llvm::Value *group_key, llvm::Value *key_size_lv, const QueryMemoryDescriptor &query_mem_desc, const int32_t row_size_quad)
 
llvm::Function * codegenPerfectHashFunction ()
 
std::tuple< llvm::Value *, llvm::Value * > codegenMultiColumnBaselineHash (const CompilationOptions &co, llvm::Value *groups_buffer, llvm::Value *group_key, llvm::Value *key_size_lv, const QueryMemoryDescriptor &query_mem_desc, const size_t key_width, const int32_t row_size_quad)
 
ColRangeInfo getColRangeInfo ()
 
ColRangeInfo getExprRangeInfo (const Analyzer::Expr *expr) const
 
KeylessInfo getKeylessInfo (const std::vector< Analyzer::Expr *> &target_expr_list, const bool is_group_by) const
 
llvm::Value * convertNullIfAny (const SQLTypeInfo &arg_type, const TargetInfo &agg_info, llvm::Value *target)
 
bool codegenAggCalls (const std::tuple< llvm::Value *, llvm::Value *> &agg_out_ptr_w_idx, const std::vector< llvm::Value *> &agg_out_vec, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const GpuSharedMemoryContext &gpu_smem_context, DiamondCodegen &diamond_codegen)
 
llvm::Value * codegenWindowRowPointer (const Analyzer::WindowFunction *window_func, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, DiamondCodegen &diamond_codegen)
 
llvm::Value * codegenAggColumnPtr (llvm::Value *output_buffer_byte_stream, llvm::Value *out_row_idx, const std::tuple< llvm::Value *, llvm::Value *> &agg_out_ptr_w_idx, const QueryMemoryDescriptor &query_mem_desc, const size_t chosen_bytes, const size_t agg_out_off, const size_t target_idx)
 : returns the pointer to where the aggregation should be stored. More...
 
void codegenEstimator (std::stack< llvm::BasicBlock *> &array_loops, GroupByAndAggregate::DiamondCodegen &diamond_codegen, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &)
 
void codegenCountDistinct (const size_t target_idx, const Analyzer::Expr *target_expr, std::vector< llvm::Value *> &agg_args, const QueryMemoryDescriptor &, const ExecutorDeviceType)
 
llvm::Value * getAdditionalLiteral (const int32_t off)
 
std::vector< llvm::Value * > codegenAggArg (const Analyzer::Expr *target_expr, const CompilationOptions &co)
 
llvm::Value * emitCall (const std::string &fname, const std::vector< llvm::Value *> &args)
 
void checkErrorCode (llvm::Value *retCode)
 
bool needsUnnestDoublePatch (llvm::Value *val_ptr, const std::string &agg_base_name, const bool threads_share_memory, const CompilationOptions &co) const
 
void prependForceSync ()
 

Static Private Member Functions

static int64_t getBucketedCardinality (const ColRangeInfo &col_range_info)
 

Private Attributes

Executorexecutor_
 
const RelAlgExecutionUnitra_exe_unit_
 
const std::vector< InputTableInfo > & query_infos_
 
std::shared_ptr< RowSetMemoryOwnerrow_set_mem_owner_
 
bool output_columnar_
 
const ExecutorDeviceType device_type_
 
const std::optional< int64_t > group_cardinality_estimation_
 

Friends

class Executor
 
class QueryMemoryDescriptor
 
class CodeGenerator
 
class ExecutionKernel
 
struct TargetExprCodegen
 
struct TargetExprCodegenBuilder
 

Detailed Description

Definition at line 125 of file GroupByAndAggregate.h.

Constructor & Destructor Documentation

◆ GroupByAndAggregate()

GroupByAndAggregate::GroupByAndAggregate ( Executor executor,
const ExecutorDeviceType  device_type,
const RelAlgExecutionUnit ra_exe_unit,
const std::vector< InputTableInfo > &  query_infos,
std::shared_ptr< RowSetMemoryOwner row_set_mem_owner,
const std::optional< int64_t > &  group_cardinality_estimation 
)

Definition at line 308 of file GroupByAndAggregate.cpp.

References RelAlgExecutionUnit::groupby_exprs, kENCODING_DICT, and ra_exe_unit_.

315  : executor_(executor)
316  , ra_exe_unit_(ra_exe_unit)
317  , query_infos_(query_infos)
318  , row_set_mem_owner_(row_set_mem_owner)
319  , device_type_(device_type)
320  , group_cardinality_estimation_(group_cardinality_estimation) {
321  for (const auto& groupby_expr : ra_exe_unit_.groupby_exprs) {
322  if (!groupby_expr) {
323  continue;
324  }
325  const auto& groupby_ti = groupby_expr->get_type_info();
326  if (groupby_ti.is_string() && groupby_ti.get_compression() != kENCODING_DICT) {
327  throw std::runtime_error(
328  "Cannot group by string columns which are not dictionary encoded.");
329  }
330  if (groupby_ti.is_array()) {
331  throw std::runtime_error("Group by array not supported");
332  }
333  if (groupby_ti.is_geometry()) {
334  throw std::runtime_error("Group by geometry not supported");
335  }
336  }
337 }
std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner_
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
const std::vector< InputTableInfo > & query_infos_
const ExecutorDeviceType device_type_
const std::optional< int64_t > group_cardinality_estimation_
const RelAlgExecutionUnit & ra_exe_unit_

Member Function Documentation

◆ addTransientStringLiterals() [1/2]

void GroupByAndAggregate::addTransientStringLiterals ( const RelAlgExecutionUnit ra_exe_unit,
Executor executor,
std::shared_ptr< RowSetMemoryOwner row_set_mem_owner 
)
static

Definition at line 576 of file GroupByAndAggregate.cpp.

References anonymous_namespace{GroupByAndAggregate.cpp}::add_transient_string_literals_for_expression(), RelAlgExecutionUnit::groupby_exprs, kENCODING_DICT, kSAMPLE, kSINGLE_VALUE, and RelAlgExecutionUnit::target_exprs.

579  {
580  for (const auto& group_expr : ra_exe_unit.groupby_exprs) {
582  group_expr.get(), executor, row_set_mem_owner);
583  }
584  for (const auto target_expr : ra_exe_unit.target_exprs) {
585  const auto& target_type = target_expr->get_type_info();
586  if (target_type.is_string() && target_type.get_compression() != kENCODING_DICT) {
587  continue;
588  }
589  const auto agg_expr = dynamic_cast<const Analyzer::AggExpr*>(target_expr);
590  if (agg_expr) {
591  if (agg_expr->get_aggtype() == kSINGLE_VALUE ||
592  agg_expr->get_aggtype() == kSAMPLE) {
594  agg_expr->get_arg(), executor, row_set_mem_owner);
595  }
596  } else {
598  target_expr, executor, row_set_mem_owner);
599  }
600  }
601  row_set_mem_owner->addLiteralStringDictProxy(executor->lit_str_dict_proxy_);
602 }
std::vector< Analyzer::Expr * > target_exprs
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
void add_transient_string_literals_for_expression(const Analyzer::Expr *expr, Executor *executor, std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner)
+ Here is the call graph for this function:

◆ addTransientStringLiterals() [2/2]

void GroupByAndAggregate::addTransientStringLiterals ( )
private

Definition at line 510 of file GroupByAndAggregate.cpp.

References executor_, ra_exe_unit_, and row_set_mem_owner_.

Referenced by RelAlgExecutor::executeSort(), RelAlgExecutor::executeWorkUnit(), and initQueryMemoryDescriptorImpl().

510  {
512 }
std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner_
const RelAlgExecutionUnit & ra_exe_unit_
+ Here is the caller graph for this function:

◆ checkErrorCode()

void GroupByAndAggregate::checkErrorCode ( llvm::Value *  retCode)
private

Definition at line 1989 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, and GroupByAndAggregate::DiamondCodegen::executor_.

Referenced by TargetExprCodegen::codegenAggregate().

1989  {
1990  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1991  auto zero_const = llvm::ConstantInt::get(retCode->getType(), 0, true);
1992  auto rc_check_condition = executor_->cgen_state_->ir_builder_.CreateICmp(
1993  llvm::ICmpInst::ICMP_EQ, retCode, zero_const);
1994 
1995  executor_->cgen_state_->emitErrorCheck(rc_check_condition, retCode, "rc");
1996 }
#define AUTOMATIC_IR_METADATA(CGENSTATE)
+ Here is the caller graph for this function:

◆ codegen()

bool GroupByAndAggregate::codegen ( llvm::Value *  filter_result,
llvm::BasicBlock *  sc_false,
const QueryMemoryDescriptor query_mem_desc,
const CompilationOptions co,
const GpuSharedMemoryContext gpu_smem_context 
)

Definition at line 949 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, CHECK, codegenAggCalls(), codegenEstimator(), codegenGroupBy(), GroupByAndAggregate::DiamondCodegen::cond_false_, CompilationOptions::device_type, QueryMemoryDescriptor::didOutputColumnar(), RelAlgExecutionUnit::estimator, GroupByAndAggregate::DiamondCodegen::executor_, anonymous_namespace{GroupByAndAggregate.cpp}::get_agg_count(), get_arg_by_name(), get_int_type(), QueryMemoryDescriptor::getGroupbyColCount(), QueryMemoryDescriptor::getQueryDescriptionType(), GPU, RelAlgExecutionUnit::groupby_exprs, GroupByPerfectHash, RelAlgExecutionUnit::join_quals, LL_BUILDER, LL_CONTEXT, LL_INT, CodeGenerator::posArg(), prependForceSync(), Projection, ra_exe_unit_, ROW_FUNC, RelAlgExecutionUnit::target_exprs, QueryMemoryDescriptor::usesGetGroupValueFast(), and QueryMemoryDescriptor::useStreamingTopN().

Referenced by Executor::compileBody().

953  {
954  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
955  CHECK(filter_result);
956 
957  bool can_return_error = false;
958  llvm::BasicBlock* filter_false{nullptr};
959 
960  {
961  const bool is_group_by = !ra_exe_unit_.groupby_exprs.empty();
962 
963  if (executor_->isArchMaxwell(co.device_type)) {
965  }
966  DiamondCodegen filter_cfg(filter_result,
967  executor_,
968  !is_group_by || query_mem_desc.usesGetGroupValueFast(),
969  "filter", // filter_true and filter_false basic blocks
970  nullptr,
971  false);
972  filter_false = filter_cfg.cond_false_;
973 
974  if (is_group_by) {
976  !query_mem_desc.useStreamingTopN()) {
977  const auto crt_matched = get_arg_by_name(ROW_FUNC, "crt_matched");
978  LL_BUILDER.CreateStore(LL_INT(int32_t(1)), crt_matched);
979  auto total_matched_ptr = get_arg_by_name(ROW_FUNC, "total_matched");
980  llvm::Value* old_total_matched_val{nullptr};
982  old_total_matched_val =
983  LL_BUILDER.CreateAtomicRMW(llvm::AtomicRMWInst::Add,
984  total_matched_ptr,
985  LL_INT(int32_t(1)),
986  llvm::AtomicOrdering::Monotonic);
987  } else {
988  old_total_matched_val = LL_BUILDER.CreateLoad(total_matched_ptr);
989  LL_BUILDER.CreateStore(
990  LL_BUILDER.CreateAdd(old_total_matched_val, LL_INT(int32_t(1))),
991  total_matched_ptr);
992  }
993  auto old_total_matched_ptr = get_arg_by_name(ROW_FUNC, "old_total_matched");
994  LL_BUILDER.CreateStore(old_total_matched_val, old_total_matched_ptr);
995  }
996 
997  auto agg_out_ptr_w_idx = codegenGroupBy(query_mem_desc, co, filter_cfg);
998  if (query_mem_desc.usesGetGroupValueFast() ||
999  query_mem_desc.getQueryDescriptionType() ==
1001  if (query_mem_desc.getGroupbyColCount() > 1) {
1002  filter_cfg.setChainToNext();
1003  }
1004  // Don't generate null checks if the group slot is guaranteed to be non-null,
1005  // as it's the case for get_group_value_fast* family.
1006  can_return_error = codegenAggCalls(
1007  agg_out_ptr_w_idx, {}, query_mem_desc, co, gpu_smem_context, filter_cfg);
1008  } else {
1009  {
1010  llvm::Value* nullcheck_cond{nullptr};
1011  if (query_mem_desc.didOutputColumnar()) {
1012  nullcheck_cond = LL_BUILDER.CreateICmpSGE(std::get<1>(agg_out_ptr_w_idx),
1013  LL_INT(int32_t(0)));
1014  } else {
1015  nullcheck_cond = LL_BUILDER.CreateICmpNE(
1016  std::get<0>(agg_out_ptr_w_idx),
1017  llvm::ConstantPointerNull::get(
1018  llvm::PointerType::get(get_int_type(64, LL_CONTEXT), 0)));
1019  }
1020  DiamondCodegen nullcheck_cfg(
1021  nullcheck_cond, executor_, false, "groupby_nullcheck", &filter_cfg, false);
1023  agg_out_ptr_w_idx, {}, query_mem_desc, co, gpu_smem_context, filter_cfg);
1024  }
1025  can_return_error = true;
1026  if (query_mem_desc.getQueryDescriptionType() ==
1028  query_mem_desc.useStreamingTopN()) {
1029  // Ignore rejection on pushing current row to top-K heap.
1030  LL_BUILDER.CreateRet(LL_INT(int32_t(0)));
1031  } else {
1032  CodeGenerator code_generator(executor_);
1033  LL_BUILDER.CreateRet(LL_BUILDER.CreateNeg(LL_BUILDER.CreateTrunc(
1034  // TODO(alex): remove the trunc once pos is converted to 32 bits
1035  code_generator.posArg(nullptr),
1036  get_int_type(32, LL_CONTEXT))));
1037  }
1038  }
1039  } else {
1040  if (ra_exe_unit_.estimator) {
1041  std::stack<llvm::BasicBlock*> array_loops;
1042  codegenEstimator(array_loops, filter_cfg, query_mem_desc, co);
1043  } else {
1044  auto arg_it = ROW_FUNC->arg_begin();
1045  std::vector<llvm::Value*> agg_out_vec;
1046  for (int32_t i = 0; i < get_agg_count(ra_exe_unit_.target_exprs); ++i) {
1047  agg_out_vec.push_back(&*arg_it++);
1048  }
1049  can_return_error = codegenAggCalls(std::make_tuple(nullptr, nullptr),
1050  agg_out_vec,
1051  query_mem_desc,
1052  co,
1053  gpu_smem_context,
1054  filter_cfg);
1055  }
1056  }
1057  }
1058 
1059  if (ra_exe_unit_.join_quals.empty()) {
1060  executor_->cgen_state_->ir_builder_.CreateRet(LL_INT(int32_t(0)));
1061  } else if (sc_false) {
1062  const auto saved_insert_block = LL_BUILDER.GetInsertBlock();
1063  LL_BUILDER.SetInsertPoint(sc_false);
1064  LL_BUILDER.CreateBr(filter_false);
1065  LL_BUILDER.SetInsertPoint(saved_insert_block);
1066  }
1067 
1068  return can_return_error;
1069 }
std::vector< Analyzer::Expr * > target_exprs
#define ROW_FUNC
int32_t get_agg_count(const std::vector< Analyzer::Expr *> &target_exprs)
#define LL_BUILDER
#define LL_CONTEXT
#define LL_INT(v)
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
llvm::Value * get_arg_by_name(llvm::Function *func, const std::string &name)
Definition: Execute.h:129
const JoinQualsPerNestingLevel join_quals
std::tuple< llvm::Value *, llvm::Value * > codegenGroupBy(const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, DiamondCodegen &codegen)
const std::shared_ptr< Analyzer::Estimator > estimator
#define AUTOMATIC_IR_METADATA(CGENSTATE)
bool codegenAggCalls(const std::tuple< llvm::Value *, llvm::Value *> &agg_out_ptr_w_idx, const std::vector< llvm::Value *> &agg_out_vec, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const GpuSharedMemoryContext &gpu_smem_context, DiamondCodegen &diamond_codegen)
ExecutorDeviceType device_type
void codegenEstimator(std::stack< llvm::BasicBlock *> &array_loops, GroupByAndAggregate::DiamondCodegen &diamond_codegen, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &)
#define CHECK(condition)
Definition: Logger.h:197
const RelAlgExecutionUnit & ra_exe_unit_
QueryDescriptionType getQueryDescriptionType() const
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ codegenAggArg()

std::vector< llvm::Value * > GroupByAndAggregate::codegenAggArg ( const Analyzer::Expr target_expr,
const CompilationOptions co 
)
private

Definition at line 1810 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, CHECK, CHECK_EQ, CodeGenerator::codegen(), CUR_FUNC, GroupByAndAggregate::DiamondCodegen::executor_, get_int_type(), Analyzer::Expr::get_type_info(), SQLTypeInfo::is_geometry(), kARRAY, kPOINT, kSAMPLE, LL_BUILDER, LL_CONTEXT, and CodeGenerator::posArg().

Referenced by TargetExprCodegen::codegen(), and TargetExprCodegenBuilder::codegenMultiSlotSampleExpressions().

1812  {
1813  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1814  const auto agg_expr = dynamic_cast<const Analyzer::AggExpr*>(target_expr);
1815  const auto func_expr = dynamic_cast<const Analyzer::FunctionOper*>(target_expr);
1816  const auto arr_expr = dynamic_cast<const Analyzer::ArrayExpr*>(target_expr);
1817 
1818  // TODO(alex): handle arrays uniformly?
1819  CodeGenerator code_generator(executor_);
1820  if (target_expr) {
1821  const auto& target_ti = target_expr->get_type_info();
1822  if (target_ti.is_array() && !executor_->plan_state_->isLazyFetchColumn(target_expr)) {
1823  const auto target_lvs =
1824  agg_expr ? code_generator.codegen(agg_expr->get_arg(), true, co)
1825  : code_generator.codegen(
1826  target_expr, !executor_->plan_state_->allow_lazy_fetch_, co);
1827  if (!func_expr && !arr_expr) {
1828  // Something with the chunk transport is code that was generated from a source
1829  // other than an ARRAY[] expression
1830  CHECK_EQ(size_t(1), target_lvs.size());
1831  CHECK(!agg_expr || agg_expr->get_aggtype() == kSAMPLE);
1832  const auto i32_ty = get_int_type(32, executor_->cgen_state_->context_);
1833  const auto i8p_ty =
1834  llvm::PointerType::get(get_int_type(8, executor_->cgen_state_->context_), 0);
1835  const auto& elem_ti = target_ti.get_elem_type();
1836  return {
1837  executor_->cgen_state_->emitExternalCall(
1838  "array_buff",
1839  i8p_ty,
1840  {target_lvs.front(), code_generator.posArg(target_expr)}),
1841  executor_->cgen_state_->emitExternalCall(
1842  "array_size",
1843  i32_ty,
1844  {target_lvs.front(),
1845  code_generator.posArg(target_expr),
1846  executor_->cgen_state_->llInt(log2_bytes(elem_ti.get_logical_size()))})};
1847  } else {
1848  if (agg_expr) {
1849  throw std::runtime_error(
1850  "Using array[] operator as argument to an aggregate operator is not "
1851  "supported");
1852  }
1853  CHECK(func_expr || arr_expr);
1854  if (dynamic_cast<const Analyzer::FunctionOper*>(target_expr)) {
1855  CHECK_EQ(size_t(1), target_lvs.size());
1856 
1857  const auto target_lv = LL_BUILDER.CreateLoad(target_lvs[0]);
1858 
1859  // const auto target_lv_type = target_lvs[0]->getType();
1860  // CHECK(target_lv_type->isStructTy());
1861  // CHECK_EQ(target_lv_type->getNumContainedTypes(), 3u);
1862  const auto i8p_ty = llvm::PointerType::get(
1863  get_int_type(8, executor_->cgen_state_->context_), 0);
1864  const auto ptr = LL_BUILDER.CreatePointerCast(
1865  LL_BUILDER.CreateExtractValue(target_lv, 0), i8p_ty);
1866  const auto size = LL_BUILDER.CreateExtractValue(target_lv, 1);
1867  const auto null_flag = LL_BUILDER.CreateExtractValue(target_lv, 2);
1868 
1869  const auto nullcheck_ok_bb =
1870  llvm::BasicBlock::Create(LL_CONTEXT, "arr_nullcheck_ok_bb", CUR_FUNC);
1871  const auto nullcheck_fail_bb =
1872  llvm::BasicBlock::Create(LL_CONTEXT, "arr_nullcheck_fail_bb", CUR_FUNC);
1873 
1874  // TODO(adb): probably better to zext the bool
1875  const auto nullcheck = LL_BUILDER.CreateICmpEQ(
1876  null_flag, executor_->cgen_state_->llInt(static_cast<int8_t>(1)));
1877  LL_BUILDER.CreateCondBr(nullcheck, nullcheck_fail_bb, nullcheck_ok_bb);
1878 
1879  const auto ret_bb =
1880  llvm::BasicBlock::Create(LL_CONTEXT, "arr_return", CUR_FUNC);
1881  LL_BUILDER.SetInsertPoint(ret_bb);
1882  auto result_phi = LL_BUILDER.CreatePHI(i8p_ty, 2, "array_ptr_return");
1883  result_phi->addIncoming(ptr, nullcheck_ok_bb);
1884 
1885  const auto null_arr_sentinel = LL_BUILDER.CreateIntToPtr(
1886  executor_->cgen_state_->llInt(static_cast<int8_t>(0)), i8p_ty);
1887  result_phi->addIncoming(null_arr_sentinel, nullcheck_fail_bb);
1888 
1889  LL_BUILDER.SetInsertPoint(nullcheck_ok_bb);
1890  executor_->cgen_state_->emitExternalCall(
1891  "register_buffer_with_executor_rsm",
1892  llvm::Type::getVoidTy(executor_->cgen_state_->context_),
1893  {executor_->cgen_state_->llInt(reinterpret_cast<int64_t>(executor_)), ptr});
1894  LL_BUILDER.CreateBr(ret_bb);
1895 
1896  LL_BUILDER.SetInsertPoint(nullcheck_fail_bb);
1897  LL_BUILDER.CreateBr(ret_bb);
1898 
1899  LL_BUILDER.SetInsertPoint(ret_bb);
1900 
1901  return {result_phi, size};
1902  }
1903  CHECK_EQ(size_t(2), target_lvs.size());
1904  return {target_lvs[0], target_lvs[1]};
1905  }
1906  }
1907  if (target_ti.is_geometry() &&
1908  !executor_->plan_state_->isLazyFetchColumn(target_expr)) {
1909  auto generate_coord_lvs =
1910  [&](auto* selected_target_expr,
1911  bool const fetch_columns) -> std::vector<llvm::Value*> {
1912  const auto target_lvs =
1913  code_generator.codegen(selected_target_expr, fetch_columns, co);
1914  const auto geo_uoper = dynamic_cast<const Analyzer::GeoUOper*>(target_expr);
1915  const auto geo_binoper = dynamic_cast<const Analyzer::GeoBinOper*>(target_expr);
1916  if (geo_uoper || geo_binoper) {
1917  CHECK(target_expr->get_type_info().is_geometry());
1918  CHECK_EQ(2 * static_cast<size_t>(target_ti.get_physical_coord_cols()),
1919  target_lvs.size());
1920  return target_lvs;
1921  }
1922  CHECK_EQ(static_cast<size_t>(target_ti.get_physical_coord_cols()),
1923  target_lvs.size());
1924 
1925  const auto i32_ty = get_int_type(32, executor_->cgen_state_->context_);
1926  const auto i8p_ty =
1927  llvm::PointerType::get(get_int_type(8, executor_->cgen_state_->context_), 0);
1928  std::vector<llvm::Value*> coords;
1929  size_t ctr = 0;
1930  for (const auto& target_lv : target_lvs) {
1931  // TODO(adb): consider adding a utility to sqltypes so we can get the types of
1932  // the physical coords cols based on the sqltype (e.g. TINYINT for col 0, INT
1933  // for col 1 for pols / mpolys, etc). Hardcoding for now. first array is the
1934  // coords array (TINYINT). Subsequent arrays are regular INT.
1935 
1936  const size_t elem_sz = ctr == 0 ? 1 : 4;
1937  ctr++;
1938  int32_t fixlen = -1;
1939  if (target_ti.get_type() == kPOINT) {
1940  const auto col_var = dynamic_cast<const Analyzer::ColumnVar*>(target_expr);
1941  if (col_var) {
1942  const auto coords_cd = executor_->getPhysicalColumnDescriptor(col_var, 1);
1943  if (coords_cd && coords_cd->columnType.get_type() == kARRAY) {
1944  fixlen = coords_cd->columnType.get_size();
1945  }
1946  }
1947  }
1948  if (fixlen > 0) {
1949  coords.push_back(executor_->cgen_state_->emitExternalCall(
1950  "fast_fixlen_array_buff",
1951  i8p_ty,
1952  {target_lv, code_generator.posArg(selected_target_expr)}));
1953  coords.push_back(executor_->cgen_state_->llInt(int64_t(fixlen)));
1954  continue;
1955  }
1956  coords.push_back(executor_->cgen_state_->emitExternalCall(
1957  "array_buff",
1958  i8p_ty,
1959  {target_lv, code_generator.posArg(selected_target_expr)}));
1960  coords.push_back(executor_->cgen_state_->emitExternalCall(
1961  "array_size",
1962  i32_ty,
1963  {target_lv,
1964  code_generator.posArg(selected_target_expr),
1965  executor_->cgen_state_->llInt(log2_bytes(elem_sz))}));
1966  }
1967  return coords;
1968  };
1969 
1970  if (agg_expr) {
1971  return generate_coord_lvs(agg_expr->get_arg(), true);
1972  } else {
1973  return generate_coord_lvs(target_expr,
1974  !executor_->plan_state_->allow_lazy_fetch_);
1975  }
1976  }
1977  }
1978  return agg_expr ? code_generator.codegen(agg_expr->get_arg(), true, co)
1979  : code_generator.codegen(
1980  target_expr, !executor_->plan_state_->allow_lazy_fetch_, co);
1981 }
#define CHECK_EQ(x, y)
Definition: Logger.h:205
#define LL_BUILDER
#define LL_CONTEXT
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
#define AUTOMATIC_IR_METADATA(CGENSTATE)
bool is_geometry() const
Definition: sqltypes.h:429
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:78
#define CHECK(condition)
Definition: Logger.h:197
#define CUR_FUNC
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ codegenAggCalls()

bool GroupByAndAggregate::codegenAggCalls ( const std::tuple< llvm::Value *, llvm::Value *> &  agg_out_ptr_w_idx,
const std::vector< llvm::Value *> &  agg_out_vec,
const QueryMemoryDescriptor query_mem_desc,
const CompilationOptions co,
const GpuSharedMemoryContext gpu_smem_context,
DiamondCodegen diamond_codegen 
)
private

Definition at line 1559 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, CHECK, TargetExprCodegenBuilder::codegen(), QueryMemoryDescriptor::didOutputColumnar(), GroupByAndAggregate::DiamondCodegen::executor_, g_cluster, QueryMemoryDescriptor::getQueryDescriptionType(), LL_BUILDER, LL_CONTEXT, Projection, ra_exe_unit_, and RelAlgExecutionUnit::target_exprs.

Referenced by codegen().

1565  {
1566  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1567  auto agg_out_ptr_w_idx = agg_out_ptr_w_idx_in;
1568  // TODO(alex): unify the two cases, the output for non-group by queries
1569  // should be a contiguous buffer
1570  const bool is_group_by = std::get<0>(agg_out_ptr_w_idx);
1571  bool can_return_error = false;
1572  if (is_group_by) {
1573  CHECK(agg_out_vec.empty());
1574  } else {
1575  CHECK(!agg_out_vec.empty());
1576  }
1577 
1578  // output buffer is casted into a byte stream to be able to handle data elements of
1579  // different sizes (only used when actual column width sizes are used)
1580  llvm::Value* output_buffer_byte_stream{nullptr};
1581  llvm::Value* out_row_idx{nullptr};
1582  if (query_mem_desc.didOutputColumnar() && !g_cluster &&
1584  output_buffer_byte_stream = LL_BUILDER.CreateBitCast(
1585  std::get<0>(agg_out_ptr_w_idx),
1586  llvm::PointerType::get(llvm::Type::getInt8Ty(LL_CONTEXT), 0));
1587  output_buffer_byte_stream->setName("out_buff_b_stream");
1588  CHECK(std::get<1>(agg_out_ptr_w_idx));
1589  out_row_idx = LL_BUILDER.CreateZExt(std::get<1>(agg_out_ptr_w_idx),
1590  llvm::Type::getInt64Ty(LL_CONTEXT));
1591  out_row_idx->setName("out_row_idx");
1592  }
1593 
1594  TargetExprCodegenBuilder target_builder(query_mem_desc, ra_exe_unit_, is_group_by);
1595  for (size_t target_idx = 0; target_idx < ra_exe_unit_.target_exprs.size();
1596  ++target_idx) {
1597  auto target_expr = ra_exe_unit_.target_exprs[target_idx];
1598  CHECK(target_expr);
1599 
1600  target_builder(target_expr, executor_, co);
1601  }
1602 
1603  target_builder.codegen(this,
1604  executor_,
1605  query_mem_desc,
1606  co,
1607  gpu_smem_context,
1608  agg_out_ptr_w_idx,
1609  agg_out_vec,
1610  output_buffer_byte_stream,
1611  out_row_idx,
1612  diamond_codegen);
1613 
1614  for (auto target_expr : ra_exe_unit_.target_exprs) {
1615  CHECK(target_expr);
1616  executor_->plan_state_->isLazyFetchColumn(target_expr);
1617  }
1618 
1619  return can_return_error;
1620 }
std::vector< Analyzer::Expr * > target_exprs
#define LL_BUILDER
#define LL_CONTEXT
#define AUTOMATIC_IR_METADATA(CGENSTATE)
bool g_cluster
#define CHECK(condition)
Definition: Logger.h:197
const RelAlgExecutionUnit & ra_exe_unit_
QueryDescriptionType getQueryDescriptionType() const
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ codegenAggColumnPtr()

llvm::Value * GroupByAndAggregate::codegenAggColumnPtr ( llvm::Value *  output_buffer_byte_stream,
llvm::Value *  out_row_idx,
const std::tuple< llvm::Value *, llvm::Value *> &  agg_out_ptr_w_idx,
const QueryMemoryDescriptor query_mem_desc,
const size_t  chosen_bytes,
const size_t  agg_out_off,
const size_t  target_idx 
)
private

: returns the pointer to where the aggregation should be stored.

Definition at line 1625 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, CHECK, CHECK_EQ, QueryMemoryDescriptor::didOutputColumnar(), GroupByAndAggregate::DiamondCodegen::executor_, g_cluster, get_int_type(), QueryMemoryDescriptor::getColOffInBytes(), QueryMemoryDescriptor::getColOnlyOffInBytes(), QueryMemoryDescriptor::getQueryDescriptionType(), LL_BUILDER, LL_CONTEXT, LL_INT, Projection, and to_string().

Referenced by TargetExprCodegen::codegenAggregate(), and TargetExprCodegenBuilder::codegenMultiSlotSampleExpressions().

1632  {
1633  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1634  llvm::Value* agg_col_ptr{nullptr};
1635  if (query_mem_desc.didOutputColumnar()) {
1636  // TODO(Saman): remove the second columnar branch, and support all query description
1637  // types through the first branch. Then, input arguments should also be cleaned up
1638  if (!g_cluster &&
1640  CHECK(chosen_bytes == 1 || chosen_bytes == 2 || chosen_bytes == 4 ||
1641  chosen_bytes == 8);
1642  CHECK(output_buffer_byte_stream);
1643  CHECK(out_row_idx);
1644  uint32_t col_off = query_mem_desc.getColOffInBytes(agg_out_off);
1645  // multiplying by chosen_bytes, i.e., << log2(chosen_bytes)
1646  auto out_per_col_byte_idx =
1647  LL_BUILDER.CreateShl(out_row_idx, __builtin_ffs(chosen_bytes) - 1);
1648  auto byte_offset = LL_BUILDER.CreateAdd(out_per_col_byte_idx,
1649  LL_INT(static_cast<int64_t>(col_off)));
1650  byte_offset->setName("out_byte_off_target_" + std::to_string(target_idx));
1651  auto output_ptr = LL_BUILDER.CreateGEP(output_buffer_byte_stream, byte_offset);
1652  agg_col_ptr = LL_BUILDER.CreateBitCast(
1653  output_ptr,
1654  llvm::PointerType::get(get_int_type((chosen_bytes << 3), LL_CONTEXT), 0));
1655  agg_col_ptr->setName("out_ptr_target_" + std::to_string(target_idx));
1656  } else {
1657  uint32_t col_off = query_mem_desc.getColOffInBytes(agg_out_off);
1658  CHECK_EQ(size_t(0), col_off % chosen_bytes);
1659  col_off /= chosen_bytes;
1660  CHECK(std::get<1>(agg_out_ptr_w_idx));
1661  auto offset = LL_BUILDER.CreateAdd(std::get<1>(agg_out_ptr_w_idx), LL_INT(col_off));
1662  agg_col_ptr = LL_BUILDER.CreateGEP(
1663  LL_BUILDER.CreateBitCast(
1664  std::get<0>(agg_out_ptr_w_idx),
1665  llvm::PointerType::get(get_int_type((chosen_bytes << 3), LL_CONTEXT), 0)),
1666  offset);
1667  }
1668  } else {
1669  uint32_t col_off = query_mem_desc.getColOnlyOffInBytes(agg_out_off);
1670  CHECK_EQ(size_t(0), col_off % chosen_bytes);
1671  col_off /= chosen_bytes;
1672  agg_col_ptr = LL_BUILDER.CreateGEP(
1673  LL_BUILDER.CreateBitCast(
1674  std::get<0>(agg_out_ptr_w_idx),
1675  llvm::PointerType::get(get_int_type((chosen_bytes << 3), LL_CONTEXT), 0)),
1676  LL_INT(col_off));
1677  }
1678  CHECK(agg_col_ptr);
1679  return agg_col_ptr;
1680 }
#define CHECK_EQ(x, y)
Definition: Logger.h:205
#define LL_BUILDER
#define LL_CONTEXT
#define LL_INT(v)
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
std::string to_string(char const *&&v)
#define AUTOMATIC_IR_METADATA(CGENSTATE)
size_t getColOnlyOffInBytes(const size_t col_idx) const
bool g_cluster
#define CHECK(condition)
Definition: Logger.h:197
size_t getColOffInBytes(const size_t col_idx) const
QueryDescriptionType getQueryDescriptionType() const
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ codegenCountDistinct()

void GroupByAndAggregate::codegenCountDistinct ( const size_t  target_idx,
const Analyzer::Expr target_expr,
std::vector< llvm::Value *> &  agg_args,
const QueryMemoryDescriptor query_mem_desc,
const ExecutorDeviceType  device_type 
)
private

Definition at line 1732 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, Bitmap, CHECK, CHECK_EQ, emitCall(), GroupByAndAggregate::DiamondCodegen::executor_, g_bigint_count, get_int_type(), get_target_info(), Analyzer::Expr::get_type_info(), getAdditionalLiteral(), QueryMemoryDescriptor::getCountDistinctDescriptor(), GPU, Invalid, kAPPROX_COUNT_DISTINCT, LL_CONTEXT, and LL_INT.

Referenced by TargetExprCodegen::codegenAggregate().

1737  {
1738  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1739  const auto agg_info = get_target_info(target_expr, g_bigint_count);
1740  const auto& arg_ti =
1741  static_cast<const Analyzer::AggExpr*>(target_expr)->get_arg()->get_type_info();
1742  if (arg_ti.is_fp()) {
1743  agg_args.back() = executor_->cgen_state_->ir_builder_.CreateBitCast(
1744  agg_args.back(), get_int_type(64, executor_->cgen_state_->context_));
1745  }
1746  const auto& count_distinct_descriptor =
1747  query_mem_desc.getCountDistinctDescriptor(target_idx);
1748  CHECK(count_distinct_descriptor.impl_type_ != CountDistinctImplType::Invalid);
1749  if (agg_info.agg_kind == kAPPROX_COUNT_DISTINCT) {
1750  CHECK(count_distinct_descriptor.impl_type_ == CountDistinctImplType::Bitmap);
1751  agg_args.push_back(LL_INT(int32_t(count_distinct_descriptor.bitmap_sz_bits)));
1752  if (device_type == ExecutorDeviceType::GPU) {
1753  const auto base_dev_addr = getAdditionalLiteral(-1);
1754  const auto base_host_addr = getAdditionalLiteral(-2);
1755  agg_args.push_back(base_dev_addr);
1756  agg_args.push_back(base_host_addr);
1757  emitCall("agg_approximate_count_distinct_gpu", agg_args);
1758  } else {
1759  emitCall("agg_approximate_count_distinct", agg_args);
1760  }
1761  return;
1762  }
1763  std::string agg_fname{"agg_count_distinct"};
1764  if (count_distinct_descriptor.impl_type_ == CountDistinctImplType::Bitmap) {
1765  agg_fname += "_bitmap";
1766  agg_args.push_back(LL_INT(static_cast<int64_t>(count_distinct_descriptor.min_val)));
1767  }
1768  if (agg_info.skip_null_val) {
1769  auto null_lv = executor_->cgen_state_->castToTypeIn(
1770  (arg_ti.is_fp()
1771  ? static_cast<llvm::Value*>(executor_->cgen_state_->inlineFpNull(arg_ti))
1772  : static_cast<llvm::Value*>(executor_->cgen_state_->inlineIntNull(arg_ti))),
1773  64);
1774  null_lv = executor_->cgen_state_->ir_builder_.CreateBitCast(
1775  null_lv, get_int_type(64, executor_->cgen_state_->context_));
1776  agg_fname += "_skip_val";
1777  agg_args.push_back(null_lv);
1778  }
1779  if (device_type == ExecutorDeviceType::GPU) {
1780  CHECK(count_distinct_descriptor.impl_type_ == CountDistinctImplType::Bitmap);
1781  agg_fname += "_gpu";
1782  const auto base_dev_addr = getAdditionalLiteral(-1);
1783  const auto base_host_addr = getAdditionalLiteral(-2);
1784  agg_args.push_back(base_dev_addr);
1785  agg_args.push_back(base_host_addr);
1786  agg_args.push_back(LL_INT(int64_t(count_distinct_descriptor.sub_bitmap_count)));
1787  CHECK_EQ(size_t(0),
1788  count_distinct_descriptor.bitmapPaddedSizeBytes() %
1789  count_distinct_descriptor.sub_bitmap_count);
1790  agg_args.push_back(LL_INT(int64_t(count_distinct_descriptor.bitmapPaddedSizeBytes() /
1791  count_distinct_descriptor.sub_bitmap_count)));
1792  }
1793  if (count_distinct_descriptor.impl_type_ == CountDistinctImplType::Bitmap) {
1794  emitCall(agg_fname, agg_args);
1795  } else {
1796  executor_->cgen_state_->emitExternalCall(
1797  agg_fname, llvm::Type::getVoidTy(LL_CONTEXT), agg_args);
1798  }
1799 }
#define CHECK_EQ(x, y)
Definition: Logger.h:205
llvm::Value * getAdditionalLiteral(const int32_t off)
#define LL_CONTEXT
TargetInfo get_target_info(const PointerType target_expr, const bool bigint_count)
Definition: TargetInfo.h:78
#define LL_INT(v)
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
bool g_bigint_count
#define AUTOMATIC_IR_METADATA(CGENSTATE)
llvm::Value * emitCall(const std::string &fname, const std::vector< llvm::Value *> &args)
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:78
#define CHECK(condition)
Definition: Logger.h:197
const CountDistinctDescriptor & getCountDistinctDescriptor(const size_t idx) const
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ codegenEstimator()

void GroupByAndAggregate::codegenEstimator ( std::stack< llvm::BasicBlock *> &  array_loops,
GroupByAndAggregate::DiamondCodegen diamond_codegen,
const QueryMemoryDescriptor query_mem_desc,
const CompilationOptions co 
)
private

Definition at line 1682 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, CHECK, emitCall(), RelAlgExecutionUnit::estimator, GroupByAndAggregate::DiamondCodegen::executor_, get_int_type(), QueryMemoryDescriptor::getEffectiveKeyWidth(), LL_BUILDER, LL_CONTEXT, LL_INT, ra_exe_unit_, and ROW_FUNC.

Referenced by codegen().

1686  {
1687  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1688  const auto& estimator_arg = ra_exe_unit_.estimator->getArgument();
1689  auto estimator_comp_count_lv = LL_INT(static_cast<int32_t>(estimator_arg.size()));
1690  auto estimator_key_lv = LL_BUILDER.CreateAlloca(llvm::Type::getInt64Ty(LL_CONTEXT),
1691  estimator_comp_count_lv);
1692  int32_t subkey_idx = 0;
1693  for (const auto& estimator_arg_comp : estimator_arg) {
1694  const auto estimator_arg_comp_lvs =
1695  executor_->groupByColumnCodegen(estimator_arg_comp.get(),
1696  query_mem_desc.getEffectiveKeyWidth(),
1697  co,
1698  false,
1699  0,
1700  diamond_codegen,
1701  array_loops,
1702  true);
1703  CHECK(!estimator_arg_comp_lvs.original_value);
1704  const auto estimator_arg_comp_lv = estimator_arg_comp_lvs.translated_value;
1705  // store the sub-key to the buffer
1706  LL_BUILDER.CreateStore(estimator_arg_comp_lv,
1707  LL_BUILDER.CreateGEP(estimator_key_lv, LL_INT(subkey_idx++)));
1708  }
1709  const auto int8_ptr_ty = llvm::PointerType::get(get_int_type(8, LL_CONTEXT), 0);
1710  const auto bitmap = LL_BUILDER.CreateBitCast(&*ROW_FUNC->arg_begin(), int8_ptr_ty);
1711  const auto key_bytes = LL_BUILDER.CreateBitCast(estimator_key_lv, int8_ptr_ty);
1712  const auto estimator_comp_bytes_lv =
1713  LL_INT(static_cast<int32_t>(estimator_arg.size() * sizeof(int64_t)));
1714  const auto bitmap_size_lv =
1715  LL_INT(static_cast<uint32_t>(ra_exe_unit_.estimator->getBufferSize()));
1716  emitCall(ra_exe_unit_.estimator->getRuntimeFunctionName(),
1717  {bitmap, &*bitmap_size_lv, key_bytes, &*estimator_comp_bytes_lv});
1718 }
#define ROW_FUNC
#define LL_BUILDER
#define LL_CONTEXT
#define LL_INT(v)
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
const std::shared_ptr< Analyzer::Estimator > estimator
#define AUTOMATIC_IR_METADATA(CGENSTATE)
llvm::Value * emitCall(const std::string &fname, const std::vector< llvm::Value *> &args)
#define CHECK(condition)
Definition: Logger.h:197
const RelAlgExecutionUnit & ra_exe_unit_
size_t getEffectiveKeyWidth() const
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ codegenGroupBy()

std::tuple< llvm::Value *, llvm::Value * > GroupByAndAggregate::codegenGroupBy ( const QueryMemoryDescriptor query_mem_desc,
const CompilationOptions co,
DiamondCodegen codegen 
)
private

Definition at line 1163 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, CHECK, CHECK_EQ, codegenMultiColumnBaselineHash(), codegenMultiColumnPerfectHash(), codegenOutputSlot(), codegenSingleColumnPerfectHash(), QueryMemoryDescriptor::didOutputColumnar(), GroupByAndAggregate::DiamondCodegen::executor_, QueryMemoryDescriptor::getBucket(), QueryMemoryDescriptor::getEffectiveKeyWidth(), getExprRangeInfo(), QueryMemoryDescriptor::getGroupbyColCount(), QueryMemoryDescriptor::getMaxVal(), QueryMemoryDescriptor::getQueryDescriptionType(), QueryMemoryDescriptor::getRowSize(), RelAlgExecutionUnit::groupby_exprs, GroupByBaselineHash, GroupByPerfectHash, QueryMemoryDescriptor::hasNulls(), QueryMemoryDescriptor::isSingleColumnGroupByWithPerfectHash(), LL_BUILDER, LL_CONTEXT, LL_INT, Projection, ra_exe_unit_, ROW_FUNC, and QueryMemoryDescriptor::threadsShareMemory().

Referenced by codegen().

1166  {
1167  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1168  auto arg_it = ROW_FUNC->arg_begin();
1169  auto groups_buffer = arg_it++;
1170 
1171  std::stack<llvm::BasicBlock*> array_loops;
1172 
1173  // TODO(Saman): move this logic outside of this function.
1175  if (query_mem_desc.didOutputColumnar()) {
1176  return std::make_tuple(
1177  &*groups_buffer,
1178  codegenOutputSlot(&*groups_buffer, query_mem_desc, co, diamond_codegen));
1179  } else {
1180  return std::make_tuple(
1181  codegenOutputSlot(&*groups_buffer, query_mem_desc, co, diamond_codegen),
1182  nullptr);
1183  }
1184  }
1185 
1186  CHECK(query_mem_desc.getQueryDescriptionType() ==
1188  query_mem_desc.getQueryDescriptionType() ==
1190 
1191  const int32_t row_size_quad = query_mem_desc.didOutputColumnar()
1192  ? 0
1193  : query_mem_desc.getRowSize() / sizeof(int64_t);
1194 
1195  const auto col_width_size = query_mem_desc.isSingleColumnGroupByWithPerfectHash()
1196  ? sizeof(int64_t)
1197  : query_mem_desc.getEffectiveKeyWidth();
1198  // for multi-column group by
1199  llvm::Value* group_key = nullptr;
1200  llvm::Value* key_size_lv = nullptr;
1201 
1202  if (!query_mem_desc.isSingleColumnGroupByWithPerfectHash()) {
1203  key_size_lv = LL_INT(static_cast<int32_t>(query_mem_desc.getGroupbyColCount()));
1204  if (query_mem_desc.getQueryDescriptionType() ==
1206  group_key =
1207  LL_BUILDER.CreateAlloca(llvm::Type::getInt64Ty(LL_CONTEXT), key_size_lv);
1208  } else if (query_mem_desc.getQueryDescriptionType() ==
1210  group_key =
1211  col_width_size == sizeof(int32_t)
1212  ? LL_BUILDER.CreateAlloca(llvm::Type::getInt32Ty(LL_CONTEXT), key_size_lv)
1213  : LL_BUILDER.CreateAlloca(llvm::Type::getInt64Ty(LL_CONTEXT), key_size_lv);
1214  }
1215  CHECK(group_key);
1216  CHECK(key_size_lv);
1217  }
1218 
1219  int32_t subkey_idx = 0;
1220  CHECK(query_mem_desc.getGroupbyColCount() == ra_exe_unit_.groupby_exprs.size());
1221  for (const auto& group_expr : ra_exe_unit_.groupby_exprs) {
1222  const auto col_range_info = getExprRangeInfo(group_expr.get());
1223  const auto translated_null_value = static_cast<int64_t>(
1224  query_mem_desc.isSingleColumnGroupByWithPerfectHash()
1225  ? checked_int64_t(query_mem_desc.getMaxVal()) +
1226  (query_mem_desc.getBucket() ? query_mem_desc.getBucket() : 1)
1227  : checked_int64_t(col_range_info.max) +
1228  (col_range_info.bucket ? col_range_info.bucket : 1));
1229 
1230  const bool col_has_nulls =
1231  query_mem_desc.getQueryDescriptionType() ==
1233  ? (query_mem_desc.isSingleColumnGroupByWithPerfectHash()
1234  ? query_mem_desc.hasNulls()
1235  : col_range_info.has_nulls)
1236  : false;
1237 
1238  const auto group_expr_lvs =
1239  executor_->groupByColumnCodegen(group_expr.get(),
1240  col_width_size,
1241  co,
1242  col_has_nulls,
1243  translated_null_value,
1244  diamond_codegen,
1245  array_loops,
1246  query_mem_desc.threadsShareMemory());
1247  const auto group_expr_lv = group_expr_lvs.translated_value;
1248  if (query_mem_desc.isSingleColumnGroupByWithPerfectHash()) {
1249  CHECK_EQ(size_t(1), ra_exe_unit_.groupby_exprs.size());
1250  return codegenSingleColumnPerfectHash(query_mem_desc,
1251  co,
1252  &*groups_buffer,
1253  group_expr_lv,
1254  group_expr_lvs.original_value,
1255  row_size_quad);
1256  } else {
1257  // store the sub-key to the buffer
1258  LL_BUILDER.CreateStore(group_expr_lv,
1259  LL_BUILDER.CreateGEP(group_key, LL_INT(subkey_idx++)));
1260  }
1261  }
1262  if (query_mem_desc.getQueryDescriptionType() ==
1264  CHECK(ra_exe_unit_.groupby_exprs.size() != 1);
1266  &*groups_buffer, group_key, key_size_lv, query_mem_desc, row_size_quad);
1267  } else if (query_mem_desc.getQueryDescriptionType() ==
1270  &*groups_buffer,
1271  group_key,
1272  key_size_lv,
1273  query_mem_desc,
1274  col_width_size,
1275  row_size_quad);
1276  }
1277  CHECK(false);
1278  return std::make_tuple(nullptr, nullptr);
1279 }
#define CHECK_EQ(x, y)
Definition: Logger.h:205
#define ROW_FUNC
#define LL_BUILDER
#define LL_CONTEXT
#define LL_INT(v)
ColRangeInfo getExprRangeInfo(const Analyzer::Expr *expr) const
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
std::tuple< llvm::Value *, llvm::Value * > codegenMultiColumnBaselineHash(const CompilationOptions &co, llvm::Value *groups_buffer, llvm::Value *group_key, llvm::Value *key_size_lv, const QueryMemoryDescriptor &query_mem_desc, const size_t key_width, const int32_t row_size_quad)
std::tuple< llvm::Value *, llvm::Value * > codegenSingleColumnPerfectHash(const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, llvm::Value *groups_buffer, llvm::Value *group_expr_lv_translated, llvm::Value *group_expr_lv_original, const int32_t row_size_quad)
#define AUTOMATIC_IR_METADATA(CGENSTATE)
std::tuple< llvm::Value *, llvm::Value * > codegenMultiColumnPerfectHash(llvm::Value *groups_buffer, llvm::Value *group_key, llvm::Value *key_size_lv, const QueryMemoryDescriptor &query_mem_desc, const int32_t row_size_quad)
llvm::Value * codegenOutputSlot(llvm::Value *groups_buffer, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, DiamondCodegen &diamond_codegen)
#define CHECK(condition)
Definition: Logger.h:197
boost::multiprecision::number< boost::multiprecision::cpp_int_backend< 64, 64, boost::multiprecision::signed_magnitude, boost::multiprecision::checked, void > > checked_int64_t
const RelAlgExecutionUnit & ra_exe_unit_
QueryDescriptionType getQueryDescriptionType() const
bool isSingleColumnGroupByWithPerfectHash() const
size_t getEffectiveKeyWidth() const
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ codegenMultiColumnBaselineHash()

std::tuple< llvm::Value *, llvm::Value * > GroupByAndAggregate::codegenMultiColumnBaselineHash ( const CompilationOptions co,
llvm::Value *  groups_buffer,
llvm::Value *  group_key,
llvm::Value *  key_size_lv,
const QueryMemoryDescriptor query_mem_desc,
const size_t  key_width,
const int32_t  row_size_quad 
)
private

Definition at line 1376 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, CHECK, QueryMemoryDescriptor::didOutputColumnar(), emitCall(), GroupByAndAggregate::DiamondCodegen::executor_, QueryMemoryDescriptor::getEntryCount(), LL_BUILDER, LL_CONTEXT, LL_INT, ROW_FUNC, and CompilationOptions::with_dynamic_watchdog.

Referenced by codegenGroupBy().

1383  {
1384  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1385  auto arg_it = ROW_FUNC->arg_begin(); // groups_buffer
1386  ++arg_it; // current match count
1387  ++arg_it; // total match count
1388  ++arg_it; // old match count
1389  ++arg_it; // output buffer slots count
1390  ++arg_it; // aggregate init values
1391  CHECK(arg_it->getName() == "agg_init_val");
1392  if (group_key->getType() != llvm::Type::getInt64PtrTy(LL_CONTEXT)) {
1393  CHECK(key_width == sizeof(int32_t));
1394  group_key =
1395  LL_BUILDER.CreatePointerCast(group_key, llvm::Type::getInt64PtrTy(LL_CONTEXT));
1396  }
1397  std::vector<llvm::Value*> func_args{
1398  groups_buffer,
1399  LL_INT(static_cast<int32_t>(query_mem_desc.getEntryCount())),
1400  &*group_key,
1401  &*key_size_lv,
1402  LL_INT(static_cast<int32_t>(key_width))};
1403  std::string func_name{"get_group_value"};
1404  if (query_mem_desc.didOutputColumnar()) {
1405  func_name += "_columnar_slot";
1406  } else {
1407  func_args.push_back(LL_INT(row_size_quad));
1408  func_args.push_back(&*arg_it);
1409  }
1410  if (co.with_dynamic_watchdog) {
1411  func_name += "_with_watchdog";
1412  }
1413  if (query_mem_desc.didOutputColumnar()) {
1414  return std::make_tuple(groups_buffer, emitCall(func_name, func_args));
1415  } else {
1416  return std::make_tuple(emitCall(func_name, func_args), nullptr);
1417  }
1418 }
#define ROW_FUNC
#define LL_BUILDER
#define LL_CONTEXT
#define LL_INT(v)
#define AUTOMATIC_IR_METADATA(CGENSTATE)
llvm::Value * emitCall(const std::string &fname, const std::vector< llvm::Value *> &args)
#define CHECK(condition)
Definition: Logger.h:197
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ codegenMultiColumnPerfectHash()

std::tuple< llvm::Value *, llvm::Value * > GroupByAndAggregate::codegenMultiColumnPerfectHash ( llvm::Value *  groups_buffer,
llvm::Value *  group_key,
llvm::Value *  key_size_lv,
const QueryMemoryDescriptor query_mem_desc,
const int32_t  row_size_quad 
)
private

Definition at line 1332 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, CHECK, codegenPerfectHashFunction(), QueryMemoryDescriptor::didOutputColumnar(), emitCall(), GroupByAndAggregate::DiamondCodegen::executor_, get_int_type(), QueryMemoryDescriptor::getEntryCount(), QueryMemoryDescriptor::getQueryDescriptionType(), GroupByPerfectHash, QueryMemoryDescriptor::hasKeylessHash(), LL_BUILDER, LL_CONTEXT, and LL_INT.

Referenced by codegenGroupBy().

1337  {
1338  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1339  CHECK(query_mem_desc.getQueryDescriptionType() ==
1341  // compute the index (perfect hash)
1342  auto perfect_hash_func = codegenPerfectHashFunction();
1343  auto hash_lv =
1344  LL_BUILDER.CreateCall(perfect_hash_func, std::vector<llvm::Value*>{group_key});
1345 
1346  if (query_mem_desc.didOutputColumnar()) {
1347  if (!query_mem_desc.hasKeylessHash()) {
1348  const std::string set_matching_func_name{
1349  "set_matching_group_value_perfect_hash_columnar"};
1350  const std::vector<llvm::Value*> set_matching_func_arg{
1351  groups_buffer,
1352  hash_lv,
1353  group_key,
1354  key_size_lv,
1355  llvm::ConstantInt::get(get_int_type(32, LL_CONTEXT),
1356  query_mem_desc.getEntryCount())};
1357  emitCall(set_matching_func_name, set_matching_func_arg);
1358  }
1359  return std::make_tuple(groups_buffer, hash_lv);
1360  } else {
1361  if (query_mem_desc.hasKeylessHash()) {
1362  return std::make_tuple(emitCall("get_matching_group_value_perfect_hash_keyless",
1363  {groups_buffer, hash_lv, LL_INT(row_size_quad)}),
1364  nullptr);
1365  } else {
1366  return std::make_tuple(
1367  emitCall(
1368  "get_matching_group_value_perfect_hash",
1369  {groups_buffer, hash_lv, group_key, key_size_lv, LL_INT(row_size_quad)}),
1370  nullptr);
1371  }
1372  }
1373 }
#define LL_BUILDER
#define LL_CONTEXT
#define LL_INT(v)
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
#define AUTOMATIC_IR_METADATA(CGENSTATE)
llvm::Value * emitCall(const std::string &fname, const std::vector< llvm::Value *> &args)
llvm::Function * codegenPerfectHashFunction()
#define CHECK(condition)
Definition: Logger.h:197
QueryDescriptionType getQueryDescriptionType() const
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ codegenOutputSlot()

llvm::Value * GroupByAndAggregate::codegenOutputSlot ( llvm::Value *  groups_buffer,
const QueryMemoryDescriptor query_mem_desc,
const CompilationOptions co,
DiamondCodegen diamond_codegen 
)
private

Definition at line 1071 of file GroupByAndAggregate.cpp.

References run_benchmark_import::args, AUTOMATIC_IR_METADATA, CHECK, CHECK_EQ, CHECK_GE, CHECK_LT, CodeGenerator::codegen(), QueryMemoryDescriptor::didOutputColumnar(), emitCall(), GroupByAndAggregate::DiamondCodegen::executor_, get_arg_by_name(), get_heap_key_slot_index(), QueryMemoryDescriptor::getColOffInBytes(), QueryMemoryDescriptor::getEntryCount(), QueryMemoryDescriptor::getPaddedSlotWidthBytes(), QueryMemoryDescriptor::getQueryDescriptionType(), QueryMemoryDescriptor::getRowSize(), RelAlgExecutionUnit::groupby_exprs, inline_fp_null_val(), inline_int_null_val(), SortInfo::limit, LL_BOOL, LL_BUILDER, LL_FP, LL_INT, SortInfo::offset, SortInfo::order_entries, CodeGenerator::posArg(), Projection, ra_exe_unit_, ROW_FUNC, RelAlgExecutionUnit::sort_info, RelAlgExecutionUnit::target_exprs, to_string(), RelAlgExecutionUnit::use_bump_allocator, and QueryMemoryDescriptor::useStreamingTopN().

Referenced by codegenGroupBy(), and codegenWindowRowPointer().

1075  {
1076  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1078  CHECK_EQ(size_t(1), ra_exe_unit_.groupby_exprs.size());
1079  const auto group_expr = ra_exe_unit_.groupby_exprs.front();
1080  CHECK(!group_expr);
1081  if (!query_mem_desc.didOutputColumnar()) {
1082  CHECK_EQ(size_t(0), query_mem_desc.getRowSize() % sizeof(int64_t));
1083  }
1084  const int32_t row_size_quad = query_mem_desc.didOutputColumnar()
1085  ? 0
1086  : query_mem_desc.getRowSize() / sizeof(int64_t);
1087  CodeGenerator code_generator(executor_);
1088  if (query_mem_desc.useStreamingTopN()) {
1089  const auto& only_order_entry = ra_exe_unit_.sort_info.order_entries.front();
1090  CHECK_GE(only_order_entry.tle_no, int(1));
1091  const size_t target_idx = only_order_entry.tle_no - 1;
1092  CHECK_LT(target_idx, ra_exe_unit_.target_exprs.size());
1093  const auto order_entry_expr = ra_exe_unit_.target_exprs[target_idx];
1094  const auto chosen_bytes =
1095  static_cast<size_t>(query_mem_desc.getPaddedSlotWidthBytes(target_idx));
1096  auto order_entry_lv = executor_->cgen_state_->castToTypeIn(
1097  code_generator.codegen(order_entry_expr, true, co).front(), chosen_bytes * 8);
1099  std::string fname = "get_bin_from_k_heap";
1100  const auto& oe_ti = order_entry_expr->get_type_info();
1101  llvm::Value* null_key_lv = nullptr;
1102  if (oe_ti.is_integer() || oe_ti.is_decimal() || oe_ti.is_time()) {
1103  const size_t bit_width = order_entry_lv->getType()->getIntegerBitWidth();
1104  switch (bit_width) {
1105  case 32:
1106  null_key_lv = LL_INT(static_cast<int32_t>(inline_int_null_val(oe_ti)));
1107  break;
1108  case 64:
1109  null_key_lv = LL_INT(static_cast<int64_t>(inline_int_null_val(oe_ti)));
1110  break;
1111  default:
1112  CHECK(false);
1113  }
1114  fname += "_int" + std::to_string(bit_width) + "_t";
1115  } else {
1116  CHECK(oe_ti.is_fp());
1117  if (order_entry_lv->getType()->isDoubleTy()) {
1118  null_key_lv = LL_FP(static_cast<double>(inline_fp_null_val(oe_ti)));
1119  } else {
1120  null_key_lv = LL_FP(static_cast<float>(inline_fp_null_val(oe_ti)));
1121  }
1122  fname += order_entry_lv->getType()->isDoubleTy() ? "_double" : "_float";
1123  }
1124  const auto key_slot_idx =
1126  return emitCall(
1127  fname,
1128  {groups_buffer,
1129  LL_INT(n),
1130  LL_INT(row_size_quad),
1131  LL_INT(static_cast<uint32_t>(query_mem_desc.getColOffInBytes(key_slot_idx))),
1132  LL_BOOL(only_order_entry.is_desc),
1133  LL_BOOL(!order_entry_expr->get_type_info().get_notnull()),
1134  LL_BOOL(only_order_entry.nulls_first),
1135  null_key_lv,
1136  order_entry_lv});
1137  } else {
1138  llvm::Value* output_buffer_entry_count_lv{nullptr};
1140  output_buffer_entry_count_lv =
1141  LL_BUILDER.CreateLoad(get_arg_by_name(ROW_FUNC, "max_matched"));
1142  CHECK(output_buffer_entry_count_lv);
1143  }
1144  const auto group_expr_lv =
1145  LL_BUILDER.CreateLoad(get_arg_by_name(ROW_FUNC, "old_total_matched"));
1146  std::vector<llvm::Value*> args{
1147  groups_buffer,
1148  output_buffer_entry_count_lv
1149  ? output_buffer_entry_count_lv
1150  : LL_INT(static_cast<int32_t>(query_mem_desc.getEntryCount())),
1151  group_expr_lv,
1152  code_generator.posArg(nullptr)};
1153  if (query_mem_desc.didOutputColumnar()) {
1154  const auto columnar_output_offset =
1155  emitCall("get_columnar_scan_output_offset", args);
1156  return columnar_output_offset;
1157  }
1158  args.push_back(LL_INT(row_size_quad));
1159  return emitCall("get_scan_output_slot", args);
1160  }
1161 }
std::vector< Analyzer::Expr * > target_exprs
#define CHECK_EQ(x, y)
Definition: Logger.h:205
#define ROW_FUNC
#define LL_BUILDER
const std::list< Analyzer::OrderEntry > order_entries
#define LL_INT(v)
#define CHECK_GE(x, y)
Definition: Logger.h:210
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
double inline_fp_null_val(const SQL_TYPE_INFO &ti)
std::string to_string(char const *&&v)
const int8_t getPaddedSlotWidthBytes(const size_t slot_idx) const
#define LL_BOOL(v)
const size_t limit
llvm::Value * get_arg_by_name(llvm::Function *func, const std::string &name)
Definition: Execute.h:129
const SortInfo sort_info
#define LL_FP(v)
size_t get_heap_key_slot_index(const std::vector< Analyzer::Expr *> &target_exprs, const size_t target_idx)
#define AUTOMATIC_IR_METADATA(CGENSTATE)
#define CHECK_LT(x, y)
Definition: Logger.h:207
llvm::Value * emitCall(const std::string &fname, const std::vector< llvm::Value *> &args)
#define CHECK(condition)
Definition: Logger.h:197
int64_t inline_int_null_val(const SQL_TYPE_INFO &ti)
size_t getColOffInBytes(const size_t col_idx) const
const RelAlgExecutionUnit & ra_exe_unit_
const size_t offset
QueryDescriptionType getQueryDescriptionType() const
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ codegenPerfectHashFunction()

llvm::Function * GroupByAndAggregate::codegenPerfectHashFunction ( )
private

Definition at line 1420 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, CHECK, CHECK_GT, GroupByAndAggregate::DiamondCodegen::executor_, get_int_type(), getBucketedCardinality(), getExprRangeInfo(), RelAlgExecutionUnit::groupby_exprs, GroupByPerfectHash, LL_CONTEXT, LL_INT, mark_function_always_inline(), and ra_exe_unit_.

Referenced by codegenMultiColumnPerfectHash().

1420  {
1421  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1422  CHECK_GT(ra_exe_unit_.groupby_exprs.size(), size_t(1));
1423  auto ft = llvm::FunctionType::get(
1424  get_int_type(32, LL_CONTEXT),
1425  std::vector<llvm::Type*>{llvm::PointerType::get(get_int_type(64, LL_CONTEXT), 0)},
1426  false);
1427  auto key_hash_func = llvm::Function::Create(ft,
1428  llvm::Function::ExternalLinkage,
1429  "perfect_key_hash",
1430  executor_->cgen_state_->module_);
1431  executor_->cgen_state_->helper_functions_.push_back(key_hash_func);
1432  mark_function_always_inline(key_hash_func);
1433  auto& key_buff_arg = *key_hash_func->args().begin();
1434  llvm::Value* key_buff_lv = &key_buff_arg;
1435  auto bb = llvm::BasicBlock::Create(LL_CONTEXT, "entry", key_hash_func);
1436  llvm::IRBuilder<> key_hash_func_builder(bb);
1437  llvm::Value* hash_lv{llvm::ConstantInt::get(get_int_type(64, LL_CONTEXT), 0)};
1438  std::vector<int64_t> cardinalities;
1439  for (const auto& groupby_expr : ra_exe_unit_.groupby_exprs) {
1440  auto col_range_info = getExprRangeInfo(groupby_expr.get());
1441  CHECK(col_range_info.hash_type_ == QueryDescriptionType::GroupByPerfectHash);
1442  cardinalities.push_back(getBucketedCardinality(col_range_info));
1443  }
1444  size_t dim_idx = 0;
1445  for (const auto& groupby_expr : ra_exe_unit_.groupby_exprs) {
1446  auto key_comp_lv = key_hash_func_builder.CreateLoad(
1447  key_hash_func_builder.CreateGEP(key_buff_lv, LL_INT(dim_idx)));
1448  auto col_range_info = getExprRangeInfo(groupby_expr.get());
1449  auto crt_term_lv =
1450  key_hash_func_builder.CreateSub(key_comp_lv, LL_INT(col_range_info.min));
1451  if (col_range_info.bucket) {
1452  crt_term_lv =
1453  key_hash_func_builder.CreateSDiv(crt_term_lv, LL_INT(col_range_info.bucket));
1454  }
1455  for (size_t prev_dim_idx = 0; prev_dim_idx < dim_idx; ++prev_dim_idx) {
1456  crt_term_lv = key_hash_func_builder.CreateMul(crt_term_lv,
1457  LL_INT(cardinalities[prev_dim_idx]));
1458  }
1459  hash_lv = key_hash_func_builder.CreateAdd(hash_lv, crt_term_lv);
1460  ++dim_idx;
1461  }
1462  key_hash_func_builder.CreateRet(
1463  key_hash_func_builder.CreateTrunc(hash_lv, get_int_type(32, LL_CONTEXT)));
1464  return key_hash_func;
1465 }
static int64_t getBucketedCardinality(const ColRangeInfo &col_range_info)
#define LL_CONTEXT
void mark_function_always_inline(llvm::Function *func)
#define LL_INT(v)
ColRangeInfo getExprRangeInfo(const Analyzer::Expr *expr) const
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
#define CHECK_GT(x, y)
Definition: Logger.h:209
#define AUTOMATIC_IR_METADATA(CGENSTATE)
#define CHECK(condition)
Definition: Logger.h:197
const RelAlgExecutionUnit & ra_exe_unit_
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ codegenSingleColumnPerfectHash()

std::tuple< llvm::Value *, llvm::Value * > GroupByAndAggregate::codegenSingleColumnPerfectHash ( const QueryMemoryDescriptor query_mem_desc,
const CompilationOptions co,
llvm::Value *  groups_buffer,
llvm::Value *  group_expr_lv_translated,
llvm::Value *  group_expr_lv_original,
const int32_t  row_size_quad 
)
private

Definition at line 1282 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, CHECK, CompilationOptions::device_type, QueryMemoryDescriptor::didOutputColumnar(), emitCall(), GroupByAndAggregate::DiamondCodegen::executor_, QueryMemoryDescriptor::getBucket(), QueryMemoryDescriptor::getMinVal(), QueryMemoryDescriptor::hasKeylessHash(), QueryMemoryDescriptor::interleavedBins(), LL_INT, QueryMemoryDescriptor::mustUseBaselineSort(), and QueryMemoryDescriptor::usesGetGroupValueFast().

Referenced by codegenGroupBy().

1288  {
1289  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1290  CHECK(query_mem_desc.usesGetGroupValueFast());
1291  std::string get_group_fn_name{query_mem_desc.didOutputColumnar()
1292  ? "get_columnar_group_bin_offset"
1293  : "get_group_value_fast"};
1294  if (!query_mem_desc.didOutputColumnar() && query_mem_desc.hasKeylessHash()) {
1295  get_group_fn_name += "_keyless";
1296  }
1297  if (query_mem_desc.interleavedBins(co.device_type)) {
1298  CHECK(!query_mem_desc.didOutputColumnar());
1299  CHECK(query_mem_desc.hasKeylessHash());
1300  get_group_fn_name += "_semiprivate";
1301  }
1302  std::vector<llvm::Value*> get_group_fn_args{&*groups_buffer,
1303  &*group_expr_lv_translated};
1304  if (group_expr_lv_original && get_group_fn_name == "get_group_value_fast" &&
1305  query_mem_desc.mustUseBaselineSort()) {
1306  get_group_fn_name += "_with_original_key";
1307  get_group_fn_args.push_back(group_expr_lv_original);
1308  }
1309  get_group_fn_args.push_back(LL_INT(query_mem_desc.getMinVal()));
1310  get_group_fn_args.push_back(LL_INT(query_mem_desc.getBucket()));
1311  if (!query_mem_desc.hasKeylessHash()) {
1312  if (!query_mem_desc.didOutputColumnar()) {
1313  get_group_fn_args.push_back(LL_INT(row_size_quad));
1314  }
1315  } else {
1316  if (!query_mem_desc.didOutputColumnar()) {
1317  get_group_fn_args.push_back(LL_INT(row_size_quad));
1318  }
1319  if (query_mem_desc.interleavedBins(co.device_type)) {
1320  auto warp_idx = emitCall("thread_warp_idx", {LL_INT(executor_->warpSize())});
1321  get_group_fn_args.push_back(warp_idx);
1322  get_group_fn_args.push_back(LL_INT(executor_->warpSize()));
1323  }
1324  }
1325  if (get_group_fn_name == "get_columnar_group_bin_offset") {
1326  return std::make_tuple(&*groups_buffer,
1327  emitCall(get_group_fn_name, get_group_fn_args));
1328  }
1329  return std::make_tuple(emitCall(get_group_fn_name, get_group_fn_args), nullptr);
1330 }
#define LL_INT(v)
bool interleavedBins(const ExecutorDeviceType) const
#define AUTOMATIC_IR_METADATA(CGENSTATE)
ExecutorDeviceType device_type
llvm::Value * emitCall(const std::string &fname, const std::vector< llvm::Value *> &args)
#define CHECK(condition)
Definition: Logger.h:197
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ codegenWindowRowPointer()

llvm::Value * GroupByAndAggregate::codegenWindowRowPointer ( const Analyzer::WindowFunction window_func,
const QueryMemoryDescriptor query_mem_desc,
const CompilationOptions co,
DiamondCodegen diamond_codegen 
)
private

Definition at line 1518 of file GroupByAndAggregate.cpp.

References run_benchmark_import::args, AUTOMATIC_IR_METADATA, CHECK, codegenOutputSlot(), COUNT, QueryMemoryDescriptor::didOutputColumnar(), emitCall(), GroupByAndAggregate::DiamondCodegen::executor_, get_int_type(), WindowProjectNodeContext::getActiveWindowFunctionContext(), QueryMemoryDescriptor::getEntryCount(), Analyzer::WindowFunction::getKind(), QueryMemoryDescriptor::getRowSize(), LL_BUILDER, LL_CONTEXT, LL_INT, CodeGenerator::posArg(), ROW_FUNC, and window_function_is_aggregate().

Referenced by TargetExprCodegen::codegen().

1522  {
1523  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1524  const auto window_func_context =
1526  if (window_func_context && window_function_is_aggregate(window_func->getKind())) {
1527  const int32_t row_size_quad = query_mem_desc.didOutputColumnar()
1528  ? 0
1529  : query_mem_desc.getRowSize() / sizeof(int64_t);
1530  auto arg_it = ROW_FUNC->arg_begin();
1531  auto groups_buffer = arg_it++;
1532  CodeGenerator code_generator(executor_);
1533  if (!window_func_context->getRowNumber()) {
1534  CHECK(window_func->getKind() == SqlWindowFunctionKind::COUNT);
1535  window_func_context->setRowNumber(emitCall(
1536  "row_number_window_func",
1537  {LL_INT(reinterpret_cast<const int64_t>(window_func_context->output())),
1538  code_generator.posArg(nullptr)}));
1539  }
1540  const auto pos_in_window = LL_BUILDER.CreateTrunc(window_func_context->getRowNumber(),
1541  get_int_type(32, LL_CONTEXT));
1542  llvm::Value* entry_count_lv =
1543  LL_INT(static_cast<int32_t>(query_mem_desc.getEntryCount()));
1544  std::vector<llvm::Value*> args{
1545  &*groups_buffer, entry_count_lv, pos_in_window, code_generator.posArg(nullptr)};
1546  if (query_mem_desc.didOutputColumnar()) {
1547  const auto columnar_output_offset =
1548  emitCall("get_columnar_scan_output_offset", args);
1549  return LL_BUILDER.CreateSExt(columnar_output_offset, get_int_type(64, LL_CONTEXT));
1550  }
1551  args.push_back(LL_INT(row_size_quad));
1552  return emitCall("get_scan_output_slot", args);
1553  }
1554  auto arg_it = ROW_FUNC->arg_begin();
1555  auto groups_buffer = arg_it++;
1556  return codegenOutputSlot(&*groups_buffer, query_mem_desc, co, diamond_codegen);
1557 }
#define ROW_FUNC
#define LL_BUILDER
#define LL_CONTEXT
#define LL_INT(v)
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
static WindowFunctionContext * getActiveWindowFunctionContext(Executor *executor)
#define AUTOMATIC_IR_METADATA(CGENSTATE)
bool window_function_is_aggregate(const SqlWindowFunctionKind kind)
Definition: WindowContext.h:42
llvm::Value * emitCall(const std::string &fname, const std::vector< llvm::Value *> &args)
llvm::Value * codegenOutputSlot(llvm::Value *groups_buffer, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, DiamondCodegen &diamond_codegen)
#define CHECK(condition)
Definition: Logger.h:197
SqlWindowFunctionKind getKind() const
Definition: Analyzer.h:1447
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ convertNullIfAny()

llvm::Value * GroupByAndAggregate::convertNullIfAny ( const SQLTypeInfo arg_type,
const TargetInfo agg_info,
llvm::Value *  target 
)
private

Definition at line 1467 of file GroupByAndAggregate.cpp.

References TargetInfo::agg_kind, AUTOMATIC_IR_METADATA, CHECK, GroupByAndAggregate::DiamondCodegen::executor_, SQLTypeInfo::get_size(), SQLTypeInfo::is_fp(), kAPPROX_COUNT_DISTINCT, kCOUNT, LL_BUILDER, and TargetInfo::sql_type.

Referenced by TargetExprCodegen::codegenAggregate().

1469  {
1470  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1471  const auto& agg_type = agg_info.sql_type;
1472  const size_t chosen_bytes = agg_type.get_size();
1473 
1474  bool need_conversion{false};
1475  llvm::Value* arg_null{nullptr};
1476  llvm::Value* agg_null{nullptr};
1477  llvm::Value* target_to_cast{target};
1478  if (arg_type.is_fp()) {
1479  arg_null = executor_->cgen_state_->inlineFpNull(arg_type);
1480  if (agg_type.is_fp()) {
1481  agg_null = executor_->cgen_state_->inlineFpNull(agg_type);
1482  if (!static_cast<llvm::ConstantFP*>(arg_null)->isExactlyValue(
1483  static_cast<llvm::ConstantFP*>(agg_null)->getValueAPF())) {
1484  need_conversion = true;
1485  }
1486  } else {
1487  CHECK(agg_info.agg_kind == kCOUNT || agg_info.agg_kind == kAPPROX_COUNT_DISTINCT);
1488  return target;
1489  }
1490  } else {
1491  arg_null = executor_->cgen_state_->inlineIntNull(arg_type);
1492  if (agg_type.is_fp()) {
1493  agg_null = executor_->cgen_state_->inlineFpNull(agg_type);
1494  need_conversion = true;
1495  target_to_cast = executor_->castToFP(target);
1496  } else {
1497  agg_null = executor_->cgen_state_->inlineIntNull(agg_type);
1498  if ((static_cast<llvm::ConstantInt*>(arg_null)->getBitWidth() !=
1499  static_cast<llvm::ConstantInt*>(agg_null)->getBitWidth()) ||
1500  (static_cast<llvm::ConstantInt*>(arg_null)->getValue() !=
1501  static_cast<llvm::ConstantInt*>(agg_null)->getValue())) {
1502  need_conversion = true;
1503  }
1504  }
1505  }
1506  if (need_conversion) {
1507  auto cmp = arg_type.is_fp() ? LL_BUILDER.CreateFCmpOEQ(target, arg_null)
1508  : LL_BUILDER.CreateICmpEQ(target, arg_null);
1509  return LL_BUILDER.CreateSelect(
1510  cmp,
1511  agg_null,
1512  executor_->cgen_state_->castToTypeIn(target_to_cast, chosen_bytes << 3));
1513  } else {
1514  return target;
1515  }
1516 }
#define LL_BUILDER
SQLTypeInfo sql_type
Definition: TargetInfo.h:42
HOST DEVICE int get_size() const
Definition: sqltypes.h:269
#define AUTOMATIC_IR_METADATA(CGENSTATE)
SQLAgg agg_kind
Definition: TargetInfo.h:41
Definition: sqldefs.h:76
#define CHECK(condition)
Definition: Logger.h:197
bool is_fp() const
Definition: sqltypes.h:421
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ emitCall()

llvm::Value * GroupByAndAggregate::emitCall ( const std::string &  fname,
const std::vector< llvm::Value *> &  args 
)
private

Definition at line 1983 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, and GroupByAndAggregate::DiamondCodegen::executor_.

Referenced by TargetExprCodegen::codegen(), TargetExprCodegen::codegenAggregate(), codegenCountDistinct(), codegenEstimator(), codegenMultiColumnBaselineHash(), codegenMultiColumnPerfectHash(), codegenOutputSlot(), codegenSingleColumnPerfectHash(), and codegenWindowRowPointer().

1984  {
1985  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1986  return executor_->cgen_state_->emitCall(fname, args);
1987 }
#define AUTOMATIC_IR_METADATA(CGENSTATE)
+ Here is the caller graph for this function:

◆ getAdditionalLiteral()

llvm::Value * GroupByAndAggregate::getAdditionalLiteral ( const int32_t  off)
private

Definition at line 1801 of file GroupByAndAggregate.cpp.

References CHECK_LT, get_arg_by_name(), get_int_type(), LL_BUILDER, LL_CONTEXT, LL_INT, and ROW_FUNC.

Referenced by codegenCountDistinct().

1801  {
1802  CHECK_LT(off, 0);
1803  const auto lit_buff_lv = get_arg_by_name(ROW_FUNC, "literals");
1804  return LL_BUILDER.CreateLoad(LL_BUILDER.CreateGEP(
1805  LL_BUILDER.CreateBitCast(lit_buff_lv,
1806  llvm::PointerType::get(get_int_type(64, LL_CONTEXT), 0)),
1807  LL_INT(off)));
1808 }
#define ROW_FUNC
#define LL_BUILDER
#define LL_CONTEXT
#define LL_INT(v)
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
llvm::Value * get_arg_by_name(llvm::Function *func, const std::string &name)
Definition: Execute.h:129
#define CHECK_LT(x, y)
Definition: Logger.h:207
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ getBucketedCardinality()

int64_t GroupByAndAggregate::getBucketedCardinality ( const ColRangeInfo col_range_info)
staticprivate

Definition at line 290 of file GroupByAndAggregate.cpp.

References ColRangeInfo::bucket, ColRangeInfo::has_nulls, ColRangeInfo::max, and ColRangeInfo::min.

Referenced by codegenPerfectHashFunction(), getColRangeInfo(), and QueryMemoryDescriptor::init().

290  {
291  checked_int64_t crt_col_cardinality =
292  checked_int64_t(col_range_info.max) - checked_int64_t(col_range_info.min);
293  if (col_range_info.bucket) {
294  crt_col_cardinality /= col_range_info.bucket;
295  }
296  return static_cast<int64_t>(crt_col_cardinality +
297  (1 + (col_range_info.has_nulls ? 1 : 0)));
298 }
boost::multiprecision::number< boost::multiprecision::cpp_int_backend< 64, 64, boost::multiprecision::signed_magnitude, boost::multiprecision::checked, void > > checked_int64_t
+ Here is the caller graph for this function:

◆ getColRangeInfo()

ColRangeInfo GroupByAndAggregate::getColRangeInfo ( )
private

Definition at line 133 of file GroupByAndAggregate.cpp.

References Executor::baseline_threshold, anonymous_namespace{GroupByAndAggregate.cpp}::cardinality_estimate_less_than_column_range(), CHECK, CHECK_GE, device_type_, executor_, anonymous_namespace{GroupByAndAggregate.cpp}::expr_is_rowid(), getBucketedCardinality(), getExprRangeInfo(), GPU, group_cardinality_estimation_, RelAlgExecutionUnit::groupby_exprs, GroupByBaselineHash, GroupByPerfectHash, anonymous_namespace{GroupByAndAggregate.cpp}::has_count_distinct(), anonymous_namespace{GroupByAndAggregate.cpp}::is_column_range_too_big_for_perfect_hash(), kENCODING_DICT, SortInfo::order_entries, RelAlgExecutionUnit::quals, ra_exe_unit_, RelAlgExecutionUnit::simple_quals, RelAlgExecutionUnit::sort_info, and RelAlgExecutionUnit::target_exprs.

Referenced by Executor::compileWorkUnit(), and initQueryMemoryDescriptorImpl().

133  {
134  // Use baseline layout more eagerly on the GPU if the query uses count distinct,
135  // because our HyperLogLog implementation is 4x less memory efficient on GPU.
136  // Technically, this only applies to APPROX_COUNT_DISTINCT, but in practice we
137  // can expect this to be true anyway for grouped queries since the precise version
138  // uses significantly more memory.
139  const int64_t baseline_threshold =
144  if (ra_exe_unit_.groupby_exprs.size() != 1) {
145  try {
146  checked_int64_t cardinality{1};
147  bool has_nulls{false};
148  for (const auto& groupby_expr : ra_exe_unit_.groupby_exprs) {
149  auto col_range_info = getExprRangeInfo(groupby_expr.get());
150  if (col_range_info.hash_type_ != QueryDescriptionType::GroupByPerfectHash) {
151  // going through baseline hash if a non-integer type is encountered
152  return {QueryDescriptionType::GroupByBaselineHash, 0, 0, 0, false};
153  }
154  auto crt_col_cardinality = getBucketedCardinality(col_range_info);
155  CHECK_GE(crt_col_cardinality, 0);
156  cardinality *= crt_col_cardinality;
157  if (col_range_info.has_nulls) {
158  has_nulls = true;
159  }
160  }
161  // For zero or high cardinalities, use baseline layout.
162  if (!cardinality || cardinality > baseline_threshold) {
163  return {QueryDescriptionType::GroupByBaselineHash, 0, 0, 0, false};
164  }
166  0,
167  int64_t(cardinality),
168  0,
169  has_nulls};
170  } catch (...) { // overflow when computing cardinality
171  return {QueryDescriptionType::GroupByBaselineHash, 0, 0, 0, false};
172  }
173  }
174  // For single column groupby on high timestamps, force baseline hash due to wide ranges
175  // we are likely to encounter when applying quals to the expression range
176  // TODO: consider allowing TIMESTAMP(9) (nanoseconds) with quals to use perfect hash if
177  // the range is small enough
178  if (ra_exe_unit_.groupby_exprs.front() &&
179  ra_exe_unit_.groupby_exprs.front()->get_type_info().is_high_precision_timestamp() &&
180  ra_exe_unit_.simple_quals.size() > 0) {
181  return {QueryDescriptionType::GroupByBaselineHash, 0, 0, 0, false};
182  }
183  const auto col_range_info = getExprRangeInfo(ra_exe_unit_.groupby_exprs.front().get());
184  if (!ra_exe_unit_.groupby_exprs.front()) {
185  return col_range_info;
186  }
187  static const int64_t MAX_BUFFER_SIZE = 1 << 30;
188  const int64_t col_count =
190  int64_t max_entry_count = MAX_BUFFER_SIZE / (col_count * sizeof(int64_t));
192  max_entry_count = std::min(max_entry_count, baseline_threshold);
193  }
194  const auto& groupby_expr_ti = ra_exe_unit_.groupby_exprs.front()->get_type_info();
195  if (groupby_expr_ti.is_string() && !col_range_info.bucket) {
196  CHECK(groupby_expr_ti.get_compression() == kENCODING_DICT);
197 
198  const bool has_filters =
199  !ra_exe_unit_.quals.empty() || !ra_exe_unit_.simple_quals.empty();
200  if (has_filters &&
201  is_column_range_too_big_for_perfect_hash(col_range_info, max_entry_count)) {
202  // if filters are present, we can use the filter to narrow the cardinality of the
203  // group by in the case of ranges too big for perfect hash. Otherwise, we are better
204  // off attempting perfect hash (since we know the range will be made of
205  // monotonically increasing numbers from min to max for dictionary encoded strings)
206  // and failing later due to excessive memory use.
207  // Check the conditions where baseline hash can provide a performance increase and
208  // return baseline hash (potentially forcing an estimator query) as the range type.
209  // Otherwise, return col_range_info which will likely be perfect hash, though could
210  // be baseline from a previous call of this function prior to the estimator query.
211  if (!ra_exe_unit_.sort_info.order_entries.empty()) {
212  // TODO(adb): allow some sorts to pass through this block by centralizing sort
213  // algorithm decision making
215  is_column_range_too_big_for_perfect_hash(col_range_info, max_entry_count)) {
216  // always use baseline hash for column range too big for perfect hash with count
217  // distinct descriptors. We will need 8GB of CPU memory minimum for the perfect
218  // hash group by in this case.
220  col_range_info.min,
221  col_range_info.max,
222  0,
223  col_range_info.has_nulls};
224  } else {
225  // use original col range for sort
226  return col_range_info;
227  }
228  }
229  // if filters are present and the filtered range is less than the cardinality of
230  // the column, consider baseline hash
233  col_range_info)) {
235  col_range_info.min,
236  col_range_info.max,
237  0,
238  col_range_info.has_nulls};
239  }
240  }
241  } else if ((!expr_is_rowid(ra_exe_unit_.groupby_exprs.front().get(),
242  *executor_->catalog_)) &&
243  is_column_range_too_big_for_perfect_hash(col_range_info, max_entry_count) &&
244  !col_range_info.bucket) {
246  col_range_info.min,
247  col_range_info.max,
248  0,
249  col_range_info.has_nulls};
250  }
251  return col_range_info;
252 }
std::vector< Analyzer::Expr * > target_exprs
static int64_t getBucketedCardinality(const ColRangeInfo &col_range_info)
bool is_column_range_too_big_for_perfect_hash(const ColRangeInfo &col_range_info, const int64_t max_entry_count)
bool expr_is_rowid(const Analyzer::Expr *expr, const Catalog_Namespace::Catalog &cat)
const std::list< Analyzer::OrderEntry > order_entries
static const size_t baseline_threshold
Definition: Execute.h:933
ColRangeInfo getExprRangeInfo(const Analyzer::Expr *expr) const
#define CHECK_GE(x, y)
Definition: Logger.h:210
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
bool has_count_distinct(const RelAlgExecutionUnit &ra_exe_unit)
const SortInfo sort_info
const ExecutorDeviceType device_type_
bool cardinality_estimate_less_than_column_range(const int64_t cardinality_estimate, const ColRangeInfo &col_range_info)
const std::optional< int64_t > group_cardinality_estimation_
std::list< std::shared_ptr< Analyzer::Expr > > quals
#define CHECK(condition)
Definition: Logger.h:197
boost::multiprecision::number< boost::multiprecision::cpp_int_backend< 64, 64, boost::multiprecision::signed_magnitude, boost::multiprecision::checked, void > > checked_int64_t
const RelAlgExecutionUnit & ra_exe_unit_
std::list< std::shared_ptr< Analyzer::Expr > > simple_quals
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ getExprRangeInfo()

ColRangeInfo GroupByAndAggregate::getExprRangeInfo ( const Analyzer::Expr expr) const
private

Definition at line 254 of file GroupByAndAggregate.cpp.

References CHECK, Double, executor_, Float, getExpressionRange(), GroupByBaselineHash, GroupByPerfectHash, Integer, Invalid, NonGroupedAggregate, Projection, query_infos_, ra_exe_unit_, and RelAlgExecutionUnit::simple_quals.

Referenced by codegenGroupBy(), codegenPerfectHashFunction(), getColRangeInfo(), gpuCanHandleOrderEntries(), and initCountDistinctDescriptors().

254  {
255  if (!expr) {
256  return {QueryDescriptionType::Projection, 0, 0, 0, false};
257  }
258 
259  const auto expr_range = getExpressionRange(
260  expr, query_infos_, executor_, boost::make_optional(ra_exe_unit_.simple_quals));
261  switch (expr_range.getType()) {
263  if (expr_range.getIntMin() > expr_range.getIntMax()) {
264  return {
265  QueryDescriptionType::GroupByBaselineHash, 0, -1, 0, expr_range.hasNulls()};
266  }
268  expr_range.getIntMin(),
269  expr_range.getIntMax(),
270  expr_range.getBucket(),
271  expr_range.hasNulls()};
272  }
275  if (expr_range.getFpMin() > expr_range.getFpMax()) {
276  return {
277  QueryDescriptionType::GroupByBaselineHash, 0, -1, 0, expr_range.hasNulls()};
278  }
279  return {QueryDescriptionType::GroupByBaselineHash, 0, 0, 0, false};
280  }
282  return {QueryDescriptionType::GroupByBaselineHash, 0, 0, 0, false};
283  default:
284  CHECK(false);
285  }
286  CHECK(false);
287  return {QueryDescriptionType::NonGroupedAggregate, 0, 0, 0, false};
288 }
ExpressionRange getExpressionRange(const Analyzer::BinOper *expr, const std::vector< InputTableInfo > &query_infos, const Executor *, boost::optional< std::list< std::shared_ptr< Analyzer::Expr >>> simple_quals)
const std::vector< InputTableInfo > & query_infos_
#define CHECK(condition)
Definition: Logger.h:197
const RelAlgExecutionUnit & ra_exe_unit_
std::list< std::shared_ptr< Analyzer::Expr > > simple_quals
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ getKeylessInfo()

KeylessInfo GroupByAndAggregate::getKeylessInfo ( const std::vector< Analyzer::Expr *> &  target_expr_list,
const bool  is_group_by 
) const
private

This function goes through all target expressions and answers two questions:

  1. Is it possible to have keyless hash?
  2. If yes to 1, then what aggregate expression should be considered to represent the key's presence, if needed (e.g., in detecting empty entries in the result set).

NOTE: Keyless hash is only valid with single-column group by at the moment.

Definition at line 700 of file GroupByAndAggregate.cpp.

References agg_arg(), CHECK, constrained_not_null(), Double, executor_, Float, g_bigint_count, get_agg_initial_val(), get_compact_type(), get_target_info(), getExpressionRange(), Integer, Invalid, is_distinct_target(), kAVG, kCOUNT, keyless, kMAX, kMIN, kSUM, RelAlgExecutionUnit::quals, query_infos_, ra_exe_unit_, and takes_float_argument().

Referenced by initQueryMemoryDescriptorImpl().

702  {
703  bool keyless{true}, found{false};
704  int32_t num_agg_expr{0};
705  int32_t index{0};
706  for (const auto target_expr : target_expr_list) {
707  const auto agg_info = get_target_info(target_expr, g_bigint_count);
708  const auto chosen_type = get_compact_type(agg_info);
709  if (agg_info.is_agg) {
710  num_agg_expr++;
711  }
712  if (!found && agg_info.is_agg && !is_distinct_target(agg_info)) {
713  auto agg_expr = dynamic_cast<const Analyzer::AggExpr*>(target_expr);
714  CHECK(agg_expr);
715  const auto arg_expr = agg_arg(target_expr);
716  const bool float_argument_input = takes_float_argument(agg_info);
717  switch (agg_info.agg_kind) {
718  case kAVG:
719  ++index;
720  if (arg_expr && !arg_expr->get_type_info().get_notnull()) {
721  auto expr_range_info = getExpressionRange(arg_expr, query_infos_, executor_);
722  if (expr_range_info.getType() == ExpressionRangeType::Invalid ||
723  expr_range_info.hasNulls()) {
724  break;
725  }
726  }
727  found = true;
728  break;
729  case kCOUNT:
730  if (arg_expr && !arg_expr->get_type_info().get_notnull()) {
731  auto expr_range_info = getExpressionRange(arg_expr, query_infos_, executor_);
732  if (expr_range_info.getType() == ExpressionRangeType::Invalid ||
733  expr_range_info.hasNulls()) {
734  break;
735  }
736  }
737  found = true;
738  break;
739  case kSUM: {
740  auto arg_ti = arg_expr->get_type_info();
741  if (constrained_not_null(arg_expr, ra_exe_unit_.quals)) {
742  arg_ti.set_notnull(true);
743  }
744  if (!arg_ti.get_notnull()) {
745  auto expr_range_info = getExpressionRange(arg_expr, query_infos_, executor_);
746  if (expr_range_info.getType() != ExpressionRangeType::Invalid &&
747  !expr_range_info.hasNulls()) {
748  found = true;
749  }
750  } else {
751  auto expr_range_info = getExpressionRange(arg_expr, query_infos_, executor_);
752  switch (expr_range_info.getType()) {
755  if (expr_range_info.getFpMax() < 0 || expr_range_info.getFpMin() > 0) {
756  found = true;
757  }
758  break;
760  if (expr_range_info.getIntMax() < 0 || expr_range_info.getIntMin() > 0) {
761  found = true;
762  }
763  break;
764  default:
765  break;
766  }
767  }
768  break;
769  }
770  case kMIN: {
771  CHECK(agg_expr && agg_expr->get_arg());
772  const auto& arg_ti = agg_expr->get_arg()->get_type_info();
773  if (arg_ti.is_string() || arg_ti.is_array()) {
774  break;
775  }
776  auto expr_range_info =
777  getExpressionRange(agg_expr->get_arg(), query_infos_, executor_);
778  auto init_max = get_agg_initial_val(agg_info.agg_kind,
779  chosen_type,
780  is_group_by || float_argument_input,
781  float_argument_input ? sizeof(float) : 8);
782  switch (expr_range_info.getType()) {
785  auto double_max =
786  *reinterpret_cast<const double*>(may_alias_ptr(&init_max));
787  if (expr_range_info.getFpMax() < double_max) {
788  found = true;
789  }
790  break;
791  }
793  if (expr_range_info.getIntMax() < init_max) {
794  found = true;
795  }
796  break;
797  default:
798  break;
799  }
800  break;
801  }
802  case kMAX: {
803  CHECK(agg_expr && agg_expr->get_arg());
804  const auto& arg_ti = agg_expr->get_arg()->get_type_info();
805  if (arg_ti.is_string() || arg_ti.is_array()) {
806  break;
807  }
808  auto expr_range_info =
809  getExpressionRange(agg_expr->get_arg(), query_infos_, executor_);
810  // NULL sentinel and init value for kMAX are identical, which results in
811  // ambiguity in detecting empty keys in presence of nulls.
812  if (expr_range_info.getType() == ExpressionRangeType::Invalid ||
813  expr_range_info.hasNulls()) {
814  break;
815  }
816  auto init_min = get_agg_initial_val(agg_info.agg_kind,
817  chosen_type,
818  is_group_by || float_argument_input,
819  float_argument_input ? sizeof(float) : 8);
820  switch (expr_range_info.getType()) {
823  auto double_min =
824  *reinterpret_cast<const double*>(may_alias_ptr(&init_min));
825  if (expr_range_info.getFpMin() > double_min) {
826  found = true;
827  }
828  break;
829  }
831  if (expr_range_info.getIntMin() > init_min) {
832  found = true;
833  }
834  break;
835  default:
836  break;
837  }
838  break;
839  }
840  default:
841  keyless = false;
842  break;
843  }
844  }
845  if (!keyless) {
846  break;
847  }
848  if (!found) {
849  ++index;
850  }
851  }
852 
853  // shouldn't use keyless for projection only
854  return {
855  keyless && found,
856  index,
857  };
858 }
const Analyzer::Expr * agg_arg(const Analyzer::Expr *expr)
bool constrained_not_null(const Analyzer::Expr *expr, const std::list< std::shared_ptr< Analyzer::Expr >> &quals)
TargetInfo get_target_info(const PointerType target_expr, const bool bigint_count)
Definition: TargetInfo.h:78
int64_t get_agg_initial_val(const SQLAgg agg, const SQLTypeInfo &ti, const bool enable_compaction, const unsigned min_byte_width_to_compact)
bool takes_float_argument(const TargetInfo &target_info)
Definition: TargetInfo.h:133
bool g_bigint_count
Definition: sqldefs.h:73
const SQLTypeInfo get_compact_type(const TargetInfo &target)
Definition: sqldefs.h:75
bool is_distinct_target(const TargetInfo &target_info)
Definition: TargetInfo.h:129
ExpressionRange getExpressionRange(const Analyzer::BinOper *expr, const std::vector< InputTableInfo > &query_infos, const Executor *, boost::optional< std::list< std::shared_ptr< Analyzer::Expr >>> simple_quals)
const std::vector< InputTableInfo > & query_infos_
Definition: sqldefs.h:76
std::list< std::shared_ptr< Analyzer::Expr > > quals
#define CHECK(condition)
Definition: Logger.h:197
const RelAlgExecutionUnit & ra_exe_unit_
Definition: sqldefs.h:74
Definition: sqldefs.h:72
const int64_t const uint32_t const uint32_t const uint32_t const bool keyless
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ getShardedTopBucket()

int64_t GroupByAndAggregate::getShardedTopBucket ( const ColRangeInfo col_range_info,
const size_t  shard_count 
) const
private

Definition at line 339 of file GroupByAndAggregate.cpp.

References ColRangeInfo::bucket, CHECK, CHECK_GT, device_type_, executor_, g_leaf_count, and GPU.

Referenced by initQueryMemoryDescriptorImpl().

340  {
341  size_t device_count{0};
343  auto cuda_mgr = executor_->getCatalog()->getDataMgr().getCudaMgr();
344  CHECK(cuda_mgr);
345  device_count = executor_->getCatalog()->getDataMgr().getCudaMgr()->getDeviceCount();
346  CHECK_GT(device_count, 0u);
347  }
348 
349  int64_t bucket{col_range_info.bucket};
350 
351  if (shard_count) {
352  CHECK(!col_range_info.bucket);
353  /*
354  when a node has fewer devices than shard count,
355  a) In a distributed setup, the minimum distance between two keys would be
356  device_count because shards are stored consecutively across the physical tables,
357  i.e if a shard column has values 0 to 9, and 3 shards on each leaf, then node 1
358  would have values: 0,1,2,6,7,8 and node 2 would have values: 3,4,5,9. If each leaf
359  node has only 1 device, in this case, all the keys from each node are loaded on
360  the device each.
361 
362  b) In a single node setup, the distance would be minimum of device_count or
363  difference of device_count - shard_count. For example: If a single node server
364  running on 3 devices a shard column has values 0 to 9 in a table with 4 shards,
365  device to fragment keys mapping would be: device 1 - 4,8,3,7 device 2 - 1,5,9
366  device 3 - 2, 6 The bucket value would be 4(shards) - 3(devices) = 1 i.e. minimum
367  of device_count or difference.
368 
369  When a node has device count equal to or more than shard count then the
370  minimum distance is always at least shard_count * no of leaf nodes.
371  */
372  if (device_count < shard_count) {
373  bucket = g_leaf_count ? std::max(device_count, static_cast<size_t>(1))
374  : std::min(device_count, shard_count - device_count);
375  } else {
376  bucket = shard_count * std::max(g_leaf_count, static_cast<size_t>(1));
377  }
378  }
379 
380  return bucket;
381 }
#define CHECK_GT(x, y)
Definition: Logger.h:209
const ExecutorDeviceType device_type_
size_t g_leaf_count
Definition: ParserNode.cpp:68
#define CHECK(condition)
Definition: Logger.h:197
+ Here is the caller graph for this function:

◆ gpuCanHandleOrderEntries()

bool GroupByAndAggregate::gpuCanHandleOrderEntries ( const std::list< Analyzer::OrderEntry > &  order_entries)
private

Definition at line 860 of file GroupByAndAggregate.cpp.

References CHECK, CHECK_GE, CHECK_LE, Analyzer::AggExpr::get_arg(), Analyzer::Expr::get_type_info(), getExprRangeInfo(), GroupByPerfectHash, kAPPROX_COUNT_DISTINCT, kAVG, kMAX, kMIN, ra_exe_unit_, and RelAlgExecutionUnit::target_exprs.

Referenced by initQueryMemoryDescriptor().

861  {
862  if (order_entries.size() > 1) { // TODO(alex): lift this restriction
863  return false;
864  }
865  for (const auto& order_entry : order_entries) {
866  CHECK_GE(order_entry.tle_no, 1);
867  CHECK_LE(static_cast<size_t>(order_entry.tle_no), ra_exe_unit_.target_exprs.size());
868  const auto target_expr = ra_exe_unit_.target_exprs[order_entry.tle_no - 1];
869  if (!dynamic_cast<Analyzer::AggExpr*>(target_expr)) {
870  return false;
871  }
872  // TODO(alex): relax the restrictions
873  auto agg_expr = static_cast<Analyzer::AggExpr*>(target_expr);
874  if (agg_expr->get_is_distinct() || agg_expr->get_aggtype() == kAVG ||
875  agg_expr->get_aggtype() == kMIN || agg_expr->get_aggtype() == kMAX ||
876  agg_expr->get_aggtype() == kAPPROX_COUNT_DISTINCT) {
877  return false;
878  }
879  if (agg_expr->get_arg()) {
880  const auto& arg_ti = agg_expr->get_arg()->get_type_info();
881  if (arg_ti.is_fp()) {
882  return false;
883  }
884  auto expr_range_info = getExprRangeInfo(agg_expr->get_arg());
885  // TOD(adb): QMD not actually initialized here?
886  if ((!(expr_range_info.hash_type_ == QueryDescriptionType::GroupByPerfectHash &&
887  /* query_mem_desc.getGroupbyColCount() == 1 */ false) ||
888  expr_range_info.has_nulls) &&
889  order_entry.is_desc == order_entry.nulls_first) {
890  return false;
891  }
892  }
893  const auto& target_ti = target_expr->get_type_info();
894  CHECK(!target_ti.is_array());
895  if (!target_ti.is_integer()) {
896  return false;
897  }
898  }
899  return true;
900 }
std::vector< Analyzer::Expr * > target_exprs
ColRangeInfo getExprRangeInfo(const Analyzer::Expr *expr) const
#define CHECK_GE(x, y)
Definition: Logger.h:210
Expr * get_arg() const
Definition: Analyzer.h:1096
Definition: sqldefs.h:73
#define CHECK_LE(x, y)
Definition: Logger.h:208
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:78
#define CHECK(condition)
Definition: Logger.h:197
const RelAlgExecutionUnit & ra_exe_unit_
Definition: sqldefs.h:74
Definition: sqldefs.h:72
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ initCountDistinctDescriptors()

CountDistinctDescriptors GroupByAndAggregate::initCountDistinctDescriptors ( )
private

Definition at line 604 of file GroupByAndAggregate.cpp.

References Bitmap, CHECK, CHECK_GE, device_type_, g_bigint_count, g_enable_watchdog, g_hll_precision_bits, Analyzer::AggExpr::get_arg(), get_count_distinct_sub_bitmap_count(), get_target_info(), Analyzer::Expr::get_type_info(), getExprRangeInfo(), GroupByPerfectHash, hll_size_for_rate(), Invalid, is_distinct_target(), kAPPROX_COUNT_DISTINCT, kCOUNT, kENCODING_DICT, kINT, Projection, ra_exe_unit_, StdSet, and RelAlgExecutionUnit::target_exprs.

Referenced by initQueryMemoryDescriptorImpl().

604  {
605  CountDistinctDescriptors count_distinct_descriptors;
606  for (const auto target_expr : ra_exe_unit_.target_exprs) {
607  auto agg_info = get_target_info(target_expr, g_bigint_count);
608  if (is_distinct_target(agg_info)) {
609  CHECK(agg_info.is_agg);
610  CHECK(agg_info.agg_kind == kCOUNT || agg_info.agg_kind == kAPPROX_COUNT_DISTINCT);
611  const auto agg_expr = static_cast<const Analyzer::AggExpr*>(target_expr);
612  const auto& arg_ti = agg_expr->get_arg()->get_type_info();
613  if (arg_ti.is_string() && arg_ti.get_compression() != kENCODING_DICT) {
614  throw std::runtime_error(
615  "Strings must be dictionary-encoded for COUNT(DISTINCT).");
616  }
617  if (agg_info.agg_kind == kAPPROX_COUNT_DISTINCT && arg_ti.is_array()) {
618  throw std::runtime_error("APPROX_COUNT_DISTINCT on arrays not supported yet");
619  }
620  if (agg_info.agg_kind == kAPPROX_COUNT_DISTINCT && arg_ti.is_geometry()) {
621  throw std::runtime_error(
622  "APPROX_COUNT_DISTINCT on geometry columns not supported");
623  }
624  if (agg_info.is_distinct && arg_ti.is_geometry()) {
625  throw std::runtime_error("COUNT DISTINCT on geometry columns not supported");
626  }
627  ColRangeInfo no_range_info{QueryDescriptionType::Projection, 0, 0, 0, false};
628  auto arg_range_info =
629  arg_ti.is_fp() ? no_range_info : getExprRangeInfo(agg_expr->get_arg());
630  CountDistinctImplType count_distinct_impl_type{CountDistinctImplType::StdSet};
631  int64_t bitmap_sz_bits{0};
632  if (agg_info.agg_kind == kAPPROX_COUNT_DISTINCT) {
633  const auto error_rate = agg_expr->get_error_rate();
634  if (error_rate) {
635  CHECK(error_rate->get_type_info().get_type() == kINT);
636  CHECK_GE(error_rate->get_constval().intval, 1);
637  bitmap_sz_bits = hll_size_for_rate(error_rate->get_constval().smallintval);
638  } else {
639  bitmap_sz_bits = g_hll_precision_bits;
640  }
641  }
642  if (arg_range_info.isEmpty()) {
643  count_distinct_descriptors.emplace_back(
645  0,
646  64,
647  agg_info.agg_kind == kAPPROX_COUNT_DISTINCT,
648  device_type_,
649  1});
650  continue;
651  }
652  if (arg_range_info.hash_type_ == QueryDescriptionType::GroupByPerfectHash &&
653  !(arg_ti.is_array() || arg_ti.is_geometry())) { // TODO(alex): allow bitmap
654  // implementation for arrays
655  count_distinct_impl_type = CountDistinctImplType::Bitmap;
656  if (agg_info.agg_kind == kCOUNT) {
657  bitmap_sz_bits = arg_range_info.max - arg_range_info.min + 1;
658  const int64_t MAX_BITMAP_BITS{8 * 1000 * 1000 * 1000L};
659  if (bitmap_sz_bits <= 0 || bitmap_sz_bits > MAX_BITMAP_BITS) {
660  count_distinct_impl_type = CountDistinctImplType::StdSet;
661  }
662  }
663  }
664  if (agg_info.agg_kind == kAPPROX_COUNT_DISTINCT &&
665  count_distinct_impl_type == CountDistinctImplType::StdSet &&
666  !(arg_ti.is_array() || arg_ti.is_geometry())) {
667  count_distinct_impl_type = CountDistinctImplType::Bitmap;
668  }
669 
670  if (g_enable_watchdog && !(arg_range_info.isEmpty()) &&
671  count_distinct_impl_type == CountDistinctImplType::StdSet) {
672  throw WatchdogException("Cannot use a fast path for COUNT distinct");
673  }
674  const auto sub_bitmap_count =
676  count_distinct_descriptors.emplace_back(
677  CountDistinctDescriptor{count_distinct_impl_type,
678  arg_range_info.min,
679  bitmap_sz_bits,
680  agg_info.agg_kind == kAPPROX_COUNT_DISTINCT,
681  device_type_,
682  sub_bitmap_count});
683  } else {
684  count_distinct_descriptors.emplace_back(CountDistinctDescriptor{
685  CountDistinctImplType::Invalid, 0, 0, false, device_type_, 0});
686  }
687  }
688  return count_distinct_descriptors;
689 }
std::vector< Analyzer::Expr * > target_exprs
int hll_size_for_rate(const int err_percent)
Definition: HyperLogLog.h:115
TargetInfo get_target_info(const PointerType target_expr, const bool bigint_count)
Definition: TargetInfo.h:78
ColRangeInfo getExprRangeInfo(const Analyzer::Expr *expr) const
#define CHECK_GE(x, y)
Definition: Logger.h:210
int g_hll_precision_bits
bool g_bigint_count
Expr * get_arg() const
Definition: Analyzer.h:1096
size_t get_count_distinct_sub_bitmap_count(const size_t bitmap_sz_bits, const RelAlgExecutionUnit &ra_exe_unit, const ExecutorDeviceType device_type)
std::vector< CountDistinctDescriptor > CountDistinctDescriptors
Definition: CountDistinct.h:35
bool is_distinct_target(const TargetInfo &target_info)
Definition: TargetInfo.h:129
const ExecutorDeviceType device_type_
Definition: sqldefs.h:76
CountDistinctImplType
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:78
#define CHECK(condition)
Definition: Logger.h:197
bool g_enable_watchdog
Definition: Execute.cpp:74
Definition: sqltypes.h:47
const RelAlgExecutionUnit & ra_exe_unit_
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ initQueryMemoryDescriptor()

std::unique_ptr< QueryMemoryDescriptor > GroupByAndAggregate::initQueryMemoryDescriptor ( const bool  allow_multifrag,
const size_t  max_groups_buffer_entry_count,
const int8_t  crt_min_byte_width,
RenderInfo render_info,
const bool  output_columnar_hint 
)
private

Definition at line 383 of file GroupByAndAggregate.cpp.

References align_to_int64(), CHECK, device_type_, executor_, GPU, gpuCanHandleOrderEntries(), initQueryMemoryDescriptorImpl(), SortInfo::order_entries, ra_exe_unit_, shard_count_for_top_groups(), and RelAlgExecutionUnit::sort_info.

Referenced by Executor::compileWorkUnit(), and ExecutionKernel::runImpl().

388  {
389  const auto shard_count =
392  : 0;
393  bool sort_on_gpu_hint =
394  device_type_ == ExecutorDeviceType::GPU && allow_multifrag &&
397  // must_use_baseline_sort is true iff we'd sort on GPU with the old algorithm
398  // but the total output buffer size would be too big or it's a sharded top query.
399  // For the sake of managing risk, use the new result set way very selectively for
400  // this case only (alongside the baseline layout we've enabled for a while now).
401  bool must_use_baseline_sort = shard_count;
402  std::unique_ptr<QueryMemoryDescriptor> query_mem_desc;
403  while (true) {
404  query_mem_desc = initQueryMemoryDescriptorImpl(allow_multifrag,
405  max_groups_buffer_entry_count,
406  crt_min_byte_width,
407  sort_on_gpu_hint,
408  render_info,
409  must_use_baseline_sort,
410  output_columnar_hint);
411  CHECK(query_mem_desc);
412  if (query_mem_desc->sortOnGpu() &&
413  (query_mem_desc->getBufferSizeBytes(device_type_) +
414  align_to_int64(query_mem_desc->getEntryCount() * sizeof(int32_t))) >
415  2 * 1024 * 1024 * 1024L) {
416  must_use_baseline_sort = true;
417  sort_on_gpu_hint = false;
418  } else {
419  break;
420  }
421  }
422  return query_mem_desc;
423 }
bool gpuCanHandleOrderEntries(const std::list< Analyzer::OrderEntry > &order_entries)
std::unique_ptr< QueryMemoryDescriptor > initQueryMemoryDescriptorImpl(const bool allow_multifrag, const size_t max_groups_buffer_entry_count, const int8_t crt_min_byte_width, const bool sort_on_gpu_hint, RenderInfo *render_info, const bool must_use_baseline_sort, const bool output_columnar_hint)
const std::list< Analyzer::OrderEntry > order_entries
const SortInfo sort_info
const ExecutorDeviceType device_type_
#define CHECK(condition)
Definition: Logger.h:197
const RelAlgExecutionUnit & ra_exe_unit_
FORCE_INLINE HOST DEVICE T align_to_int64(T addr)
static size_t shard_count_for_top_groups(const RelAlgExecutionUnit &ra_exe_unit, const Catalog_Namespace::Catalog &catalog)
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ initQueryMemoryDescriptorImpl()

std::unique_ptr< QueryMemoryDescriptor > GroupByAndAggregate::initQueryMemoryDescriptorImpl ( const bool  allow_multifrag,
const size_t  max_groups_buffer_entry_count,
const int8_t  crt_min_byte_width,
const bool  sort_on_gpu_hint,
RenderInfo render_info,
const bool  must_use_baseline_sort,
const bool  output_columnar_hint 
)
private

Definition at line 425 of file GroupByAndAggregate.cpp.

References addTransientStringLiterals(), device_type_, executor_, g_enable_watchdog, get_col_byte_widths(), getColRangeInfo(), getKeylessInfo(), getShardedTopBucket(), GPU, RelAlgExecutionUnit::groupby_exprs, GroupByBaselineHash, GroupByPerfectHash, ColRangeInfo::hash_type_, QueryMemoryDescriptor::init(), initCountDistinctDescriptors(), LOG, query_infos_, ra_exe_unit_, shard_count_for_top_groups(), RelAlgExecutionUnit::target_exprs, and logger::WARNING.

Referenced by initQueryMemoryDescriptor().

432  {
434 
435  const auto count_distinct_descriptors = initCountDistinctDescriptors();
436 
437  auto group_col_widths = get_col_byte_widths(ra_exe_unit_.groupby_exprs);
438 
439  const bool is_group_by{!ra_exe_unit_.groupby_exprs.empty()};
440 
441  auto col_range_info_nosharding = getColRangeInfo();
442 
443  const auto shard_count =
446  : 0;
447 
448  const auto col_range_info =
449  ColRangeInfo{col_range_info_nosharding.hash_type_,
450  col_range_info_nosharding.min,
451  col_range_info_nosharding.max,
452  getShardedTopBucket(col_range_info_nosharding, shard_count),
453  col_range_info_nosharding.has_nulls};
454 
455  // Non-grouped aggregates do not support accessing aggregated ranges
456  // Keyless hash is currently only supported with single-column perfect hash
457  const auto keyless_info = !(is_group_by && col_range_info.hash_type_ ==
459  ? KeylessInfo{false, -1}
460  : getKeylessInfo(ra_exe_unit_.target_exprs, is_group_by);
461 
462  if (g_enable_watchdog &&
463  ((col_range_info.hash_type_ == QueryDescriptionType::GroupByBaselineHash &&
464  max_groups_buffer_entry_count > 120000000) ||
465  (col_range_info.hash_type_ == QueryDescriptionType::GroupByPerfectHash &&
466  ra_exe_unit_.groupby_exprs.size() == 1 &&
467  (col_range_info.max - col_range_info.min) /
468  std::max(col_range_info.bucket, int64_t(1)) >
469  130000000))) {
470  throw WatchdogException("Query would use too much memory");
471  }
472  try {
474  ra_exe_unit_,
475  query_infos_,
476  col_range_info,
477  keyless_info,
478  allow_multifrag,
479  device_type_,
480  crt_min_byte_width,
481  sort_on_gpu_hint,
482  shard_count,
483  max_groups_buffer_entry_count,
484  render_info,
485  count_distinct_descriptors,
486  must_use_baseline_sort,
487  output_columnar_hint,
488  /*streaming_top_n_hint=*/true);
489  } catch (const StreamingTopNOOM& e) {
490  LOG(WARNING) << e.what() << " Disabling Streaming Top N.";
492  ra_exe_unit_,
493  query_infos_,
494  col_range_info,
495  keyless_info,
496  allow_multifrag,
497  device_type_,
498  crt_min_byte_width,
499  sort_on_gpu_hint,
500  shard_count,
501  max_groups_buffer_entry_count,
502  render_info,
503  count_distinct_descriptors,
504  must_use_baseline_sort,
505  output_columnar_hint,
506  /*streaming_top_n_hint=*/false);
507  }
508 }
std::vector< Analyzer::Expr * > target_exprs
#define LOG(tag)
Definition: Logger.h:188
ColRangeInfo getColRangeInfo()
QueryDescriptionType hash_type_
static std::unique_ptr< QueryMemoryDescriptor > init(const Executor *executor, const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, const ColRangeInfo &col_range_info, const KeylessInfo &keyless_info, const bool allow_multifrag, const ExecutorDeviceType device_type, const int8_t crt_min_byte_width, const bool sort_on_gpu_hint, const size_t shard_count, const size_t max_groups_buffer_entry_count, RenderInfo *render_info, const CountDistinctDescriptors count_distinct_descriptors, const bool must_use_baseline_sort, const bool output_columnar_hint, const bool streaming_top_n_hint)
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
CountDistinctDescriptors initCountDistinctDescriptors()
const std::vector< InputTableInfo > & query_infos_
std::vector< int8_t > get_col_byte_widths(const T &col_expr_list)
const ExecutorDeviceType device_type_
KeylessInfo getKeylessInfo(const std::vector< Analyzer::Expr *> &target_expr_list, const bool is_group_by) const
int64_t getShardedTopBucket(const ColRangeInfo &col_range_info, const size_t shard_count) const
bool g_enable_watchdog
Definition: Execute.cpp:74
const RelAlgExecutionUnit & ra_exe_unit_
static size_t shard_count_for_top_groups(const RelAlgExecutionUnit &ra_exe_unit, const Catalog_Namespace::Catalog &catalog)
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ needsUnnestDoublePatch()

bool GroupByAndAggregate::needsUnnestDoublePatch ( llvm::Value *  val_ptr,
const std::string &  agg_base_name,
const bool  threads_share_memory,
const CompilationOptions co 
) const
private

Definition at line 30 of file MaxwellCodegenPatch.cpp.

References CompilationOptions::device_type, and executor_.

Referenced by TargetExprCodegen::codegenAggregate().

33  {
34  return (executor_->isArchMaxwell(co.device_type) && threads_share_memory &&
35  llvm::isa<llvm::AllocaInst>(val_ptr) &&
36  val_ptr->getType() ==
37  llvm::Type::getDoublePtrTy(executor_->cgen_state_->context_) &&
38  "agg_id" == agg_base_name);
39 }
ExecutorDeviceType device_type
+ Here is the caller graph for this function:

◆ prependForceSync()

void GroupByAndAggregate::prependForceSync ( )
private

Definition at line 41 of file MaxwellCodegenPatch.cpp.

References executor_.

Referenced by codegen().

41  {
42  executor_->cgen_state_->ir_builder_.CreateCall(
43  executor_->cgen_state_->module_->getFunction("force_sync"));
44 }
+ Here is the caller graph for this function:

◆ shard_count_for_top_groups()

size_t GroupByAndAggregate::shard_count_for_top_groups ( const RelAlgExecutionUnit ra_exe_unit,
const Catalog_Namespace::Catalog catalog 
)
static

Definition at line 2006 of file GroupByAndAggregate.cpp.

References Catalog_Namespace::Catalog::getMetadataForTable(), RelAlgExecutionUnit::groupby_exprs, SortInfo::limit, TableDescriptor::nShards, SortInfo::order_entries, and RelAlgExecutionUnit::sort_info.

Referenced by Executor::collectAllDeviceResults(), RelAlgExecutor::executeRelAlgQuerySingleStep(), initQueryMemoryDescriptor(), and initQueryMemoryDescriptorImpl().

2008  {
2009  if (ra_exe_unit.sort_info.order_entries.size() != 1 || !ra_exe_unit.sort_info.limit) {
2010  return 0;
2011  }
2012  for (const auto& group_expr : ra_exe_unit.groupby_exprs) {
2013  const auto grouped_col_expr =
2014  dynamic_cast<const Analyzer::ColumnVar*>(group_expr.get());
2015  if (!grouped_col_expr) {
2016  continue;
2017  }
2018  if (grouped_col_expr->get_table_id() <= 0) {
2019  return 0;
2020  }
2021  const auto td = catalog.getMetadataForTable(grouped_col_expr->get_table_id());
2022  if (td->shardedColumnId == grouped_col_expr->get_column_id()) {
2023  return td->nShards;
2024  }
2025  }
2026  return 0;
2027 }
const TableDescriptor * getMetadataForTable(const std::string &tableName, const bool populateFragmenter=true) const
Returns a pointer to a const TableDescriptor struct matching the provided tableName.
const std::list< Analyzer::OrderEntry > order_entries
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
const size_t limit
const SortInfo sort_info
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

Friends And Related Function Documentation

◆ CodeGenerator

friend class CodeGenerator
friend

Definition at line 302 of file GroupByAndAggregate.h.

◆ ExecutionKernel

friend class ExecutionKernel
friend

Definition at line 303 of file GroupByAndAggregate.h.

◆ Executor

friend class Executor
friend

Definition at line 300 of file GroupByAndAggregate.h.

◆ QueryMemoryDescriptor

friend class QueryMemoryDescriptor
friend

Definition at line 301 of file GroupByAndAggregate.h.

◆ TargetExprCodegen

friend struct TargetExprCodegen
friend

Definition at line 304 of file GroupByAndAggregate.h.

◆ TargetExprCodegenBuilder

friend struct TargetExprCodegenBuilder
friend

Definition at line 305 of file GroupByAndAggregate.h.

Member Data Documentation

◆ device_type_

◆ executor_

◆ group_cardinality_estimation_

const std::optional<int64_t> GroupByAndAggregate::group_cardinality_estimation_
private

Definition at line 298 of file GroupByAndAggregate.h.

Referenced by getColRangeInfo().

◆ output_columnar_

bool GroupByAndAggregate::output_columnar_
private

Definition at line 295 of file GroupByAndAggregate.h.

◆ query_infos_

const std::vector<InputTableInfo>& GroupByAndAggregate::query_infos_
private

◆ ra_exe_unit_

◆ row_set_mem_owner_

std::shared_ptr<RowSetMemoryOwner> GroupByAndAggregate::row_set_mem_owner_
private

Definition at line 294 of file GroupByAndAggregate.h.

Referenced by addTransientStringLiterals().


The documentation for this class was generated from the following files: