OmniSciDB  94e8789169
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
GroupByAndAggregate Class Reference

#include <GroupByAndAggregate.h>

+ Collaboration diagram for GroupByAndAggregate:

Classes

struct  DiamondCodegen
 

Public Member Functions

 GroupByAndAggregate (Executor *executor, const ExecutorDeviceType device_type, const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner, const std::optional< int64_t > &group_cardinality_estimation)
 
bool codegen (llvm::Value *filter_result, llvm::BasicBlock *sc_false, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const GpuSharedMemoryContext &gpu_smem_context)
 

Static Public Member Functions

static void addTransientStringLiterals (const RelAlgExecutionUnit &ra_exe_unit, Executor *executor, std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner)
 
static size_t shard_count_for_top_groups (const RelAlgExecutionUnit &ra_exe_unit, const Catalog_Namespace::Catalog &catalog)
 

Private Member Functions

bool gpuCanHandleOrderEntries (const std::list< Analyzer::OrderEntry > &order_entries)
 
std::unique_ptr
< QueryMemoryDescriptor
initQueryMemoryDescriptor (const bool allow_multifrag, const size_t max_groups_buffer_entry_count, const int8_t crt_min_byte_width, RenderInfo *render_info, const bool output_columnar_hint)
 
std::unique_ptr
< QueryMemoryDescriptor
initQueryMemoryDescriptorImpl (const bool allow_multifrag, const size_t max_groups_buffer_entry_count, const int8_t crt_min_byte_width, const bool sort_on_gpu_hint, RenderInfo *render_info, const bool must_use_baseline_sort, const bool output_columnar_hint)
 
int64_t getShardedTopBucket (const ColRangeInfo &col_range_info, const size_t shard_count) const
 
void addTransientStringLiterals ()
 
CountDistinctDescriptors initCountDistinctDescriptors ()
 
llvm::Value * codegenOutputSlot (llvm::Value *groups_buffer, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, DiamondCodegen &diamond_codegen)
 
std::tuple< llvm::Value
*, llvm::Value * > 
codegenGroupBy (const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, DiamondCodegen &codegen)
 
std::tuple< llvm::Value
*, llvm::Value * > 
codegenSingleColumnPerfectHash (const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, llvm::Value *groups_buffer, llvm::Value *group_expr_lv_translated, llvm::Value *group_expr_lv_original, const int32_t row_size_quad)
 
std::tuple< llvm::Value
*, llvm::Value * > 
codegenMultiColumnPerfectHash (llvm::Value *groups_buffer, llvm::Value *group_key, llvm::Value *key_size_lv, const QueryMemoryDescriptor &query_mem_desc, const int32_t row_size_quad)
 
llvm::Function * codegenPerfectHashFunction ()
 
std::tuple< llvm::Value
*, llvm::Value * > 
codegenMultiColumnBaselineHash (const CompilationOptions &co, llvm::Value *groups_buffer, llvm::Value *group_key, llvm::Value *key_size_lv, const QueryMemoryDescriptor &query_mem_desc, const size_t key_width, const int32_t row_size_quad)
 
ColRangeInfo getColRangeInfo ()
 
ColRangeInfo getExprRangeInfo (const Analyzer::Expr *expr) const
 
KeylessInfo getKeylessInfo (const std::vector< Analyzer::Expr * > &target_expr_list, const bool is_group_by) const
 
llvm::Value * convertNullIfAny (const SQLTypeInfo &arg_type, const TargetInfo &agg_info, llvm::Value *target)
 
bool codegenAggCalls (const std::tuple< llvm::Value *, llvm::Value * > &agg_out_ptr_w_idx, const std::vector< llvm::Value * > &agg_out_vec, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const GpuSharedMemoryContext &gpu_smem_context, DiamondCodegen &diamond_codegen)
 
llvm::Value * codegenWindowRowPointer (const Analyzer::WindowFunction *window_func, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, DiamondCodegen &diamond_codegen)
 
llvm::Value * codegenAggColumnPtr (llvm::Value *output_buffer_byte_stream, llvm::Value *out_row_idx, const std::tuple< llvm::Value *, llvm::Value * > &agg_out_ptr_w_idx, const QueryMemoryDescriptor &query_mem_desc, const size_t chosen_bytes, const size_t agg_out_off, const size_t target_idx)
 : returns the pointer to where the aggregation should be stored. More...
 
void codegenEstimator (std::stack< llvm::BasicBlock * > &array_loops, GroupByAndAggregate::DiamondCodegen &diamond_codegen, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &)
 
void codegenCountDistinct (const size_t target_idx, const Analyzer::Expr *target_expr, std::vector< llvm::Value * > &agg_args, const QueryMemoryDescriptor &, const ExecutorDeviceType)
 
void codegenApproxMedian (const size_t target_idx, const Analyzer::Expr *target_expr, std::vector< llvm::Value * > &agg_args, const QueryMemoryDescriptor &query_mem_desc, const ExecutorDeviceType device_type)
 
llvm::Value * getAdditionalLiteral (const int32_t off)
 
std::vector< llvm::Value * > codegenAggArg (const Analyzer::Expr *target_expr, const CompilationOptions &co)
 
llvm::Value * emitCall (const std::string &fname, const std::vector< llvm::Value * > &args)
 
void checkErrorCode (llvm::Value *retCode)
 
bool needsUnnestDoublePatch (llvm::Value const *val_ptr, const std::string &agg_base_name, const bool threads_share_memory, const CompilationOptions &co) const
 
void prependForceSync ()
 

Static Private Member Functions

static int64_t getBucketedCardinality (const ColRangeInfo &col_range_info)
 

Private Attributes

Executorexecutor_
 
const RelAlgExecutionUnitra_exe_unit_
 
const std::vector
< InputTableInfo > & 
query_infos_
 
std::shared_ptr
< RowSetMemoryOwner
row_set_mem_owner_
 
bool output_columnar_
 
const ExecutorDeviceType device_type_
 
const std::optional< int64_t > group_cardinality_estimation_
 

Friends

class Executor
 
class QueryMemoryDescriptor
 
class CodeGenerator
 
class ExecutionKernel
 
struct TargetExprCodegen
 
struct TargetExprCodegenBuilder
 

Detailed Description

Definition at line 125 of file GroupByAndAggregate.h.

Constructor & Destructor Documentation

GroupByAndAggregate::GroupByAndAggregate ( Executor executor,
const ExecutorDeviceType  device_type,
const RelAlgExecutionUnit ra_exe_unit,
const std::vector< InputTableInfo > &  query_infos,
std::shared_ptr< RowSetMemoryOwner row_set_mem_owner,
const std::optional< int64_t > &  group_cardinality_estimation 
)

Definition at line 310 of file GroupByAndAggregate.cpp.

References RelAlgExecutionUnit::groupby_exprs, and ra_exe_unit_.

317  : executor_(executor)
318  , ra_exe_unit_(ra_exe_unit)
319  , query_infos_(query_infos)
320  , row_set_mem_owner_(row_set_mem_owner)
321  , device_type_(device_type)
322  , group_cardinality_estimation_(group_cardinality_estimation) {
323  for (const auto& groupby_expr : ra_exe_unit_.groupby_exprs) {
324  if (!groupby_expr) {
325  continue;
326  }
327  const auto& groupby_ti = groupby_expr->get_type_info();
328  if (groupby_ti.is_bytes()) {
329  throw std::runtime_error(
330  "Cannot group by string columns which are not dictionary encoded.");
331  }
332  if (groupby_ti.is_buffer()) {
333  throw std::runtime_error("Group by buffer not supported");
334  }
335  if (groupby_ti.is_geometry()) {
336  throw std::runtime_error("Group by geometry not supported");
337  }
338  }
339 }
std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner_
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
const std::vector< InputTableInfo > & query_infos_
const ExecutorDeviceType device_type_
const std::optional< int64_t > group_cardinality_estimation_
const RelAlgExecutionUnit & ra_exe_unit_

Member Function Documentation

void GroupByAndAggregate::addTransientStringLiterals ( const RelAlgExecutionUnit ra_exe_unit,
Executor executor,
std::shared_ptr< RowSetMemoryOwner row_set_mem_owner 
)
static

Definition at line 578 of file GroupByAndAggregate.cpp.

References anonymous_namespace{GroupByAndAggregate.cpp}::add_transient_string_literals_for_expression(), RelAlgExecutionUnit::groupby_exprs, kENCODING_DICT, kSAMPLE, kSINGLE_VALUE, and RelAlgExecutionUnit::target_exprs.

581  {
582  for (const auto& group_expr : ra_exe_unit.groupby_exprs) {
584  group_expr.get(), executor, row_set_mem_owner);
585  }
586  for (const auto target_expr : ra_exe_unit.target_exprs) {
587  const auto& target_type = target_expr->get_type_info();
588  if (target_type.is_string() && target_type.get_compression() != kENCODING_DICT) {
589  continue;
590  }
591  const auto agg_expr = dynamic_cast<const Analyzer::AggExpr*>(target_expr);
592  if (agg_expr) {
593  if (agg_expr->get_aggtype() == kSINGLE_VALUE ||
594  agg_expr->get_aggtype() == kSAMPLE) {
596  agg_expr->get_arg(), executor, row_set_mem_owner);
597  }
598  } else {
600  target_expr, executor, row_set_mem_owner);
601  }
602  }
603 }
std::vector< Analyzer::Expr * > target_exprs
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
void add_transient_string_literals_for_expression(const Analyzer::Expr *expr, Executor *executor, std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner)

+ Here is the call graph for this function:

void GroupByAndAggregate::addTransientStringLiterals ( )
private

Definition at line 512 of file GroupByAndAggregate.cpp.

References executor_, ra_exe_unit_, and row_set_mem_owner_.

Referenced by RelAlgExecutor::executeSort(), RelAlgExecutor::executeWorkUnit(), and initQueryMemoryDescriptorImpl().

512  {
514 }
std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner_
const RelAlgExecutionUnit & ra_exe_unit_

+ Here is the caller graph for this function:

void GroupByAndAggregate::checkErrorCode ( llvm::Value *  retCode)
private

Definition at line 2022 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, and executor_.

Referenced by TargetExprCodegen::codegenAggregate().

2022  {
2023  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
2024  auto zero_const = llvm::ConstantInt::get(retCode->getType(), 0, true);
2025  auto rc_check_condition = executor_->cgen_state_->ir_builder_.CreateICmp(
2026  llvm::ICmpInst::ICMP_EQ, retCode, zero_const);
2027 
2028  executor_->cgen_state_->emitErrorCheck(rc_check_condition, retCode, "rc");
2029 }
#define AUTOMATIC_IR_METADATA(CGENSTATE)

+ Here is the caller graph for this function:

bool GroupByAndAggregate::codegen ( llvm::Value *  filter_result,
llvm::BasicBlock *  sc_false,
const QueryMemoryDescriptor query_mem_desc,
const CompilationOptions co,
const GpuSharedMemoryContext gpu_smem_context 
)

Definition at line 950 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, CHECK, codegenAggCalls(), codegenEstimator(), codegenGroupBy(), GroupByAndAggregate::DiamondCodegen::cond_false_, CompilationOptions::device_type, QueryMemoryDescriptor::didOutputColumnar(), RelAlgExecutionUnit::estimator, executor_, anonymous_namespace{GroupByAndAggregate.cpp}::get_agg_count(), get_arg_by_name(), get_int_type(), QueryMemoryDescriptor::getGroupbyColCount(), QueryMemoryDescriptor::getQueryDescriptionType(), GPU, RelAlgExecutionUnit::groupby_exprs, GroupByPerfectHash, generate_TableFunctionsFactory_init::i, RelAlgExecutionUnit::join_quals, LL_BUILDER, LL_CONTEXT, LL_INT, CodeGenerator::posArg(), prependForceSync(), Projection, query_mem_desc, ra_exe_unit_, ROW_FUNC, RelAlgExecutionUnit::target_exprs, QueryMemoryDescriptor::usesGetGroupValueFast(), and QueryMemoryDescriptor::useStreamingTopN().

954  {
955  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
956  CHECK(filter_result);
957 
958  bool can_return_error = false;
959  llvm::BasicBlock* filter_false{nullptr};
960 
961  {
962  const bool is_group_by = !ra_exe_unit_.groupby_exprs.empty();
963 
964  if (executor_->isArchMaxwell(co.device_type)) {
966  }
967  DiamondCodegen filter_cfg(filter_result,
968  executor_,
969  !is_group_by || query_mem_desc.usesGetGroupValueFast(),
970  "filter", // filter_true and filter_false basic blocks
971  nullptr,
972  false);
973  filter_false = filter_cfg.cond_false_;
974 
975  if (is_group_by) {
977  !query_mem_desc.useStreamingTopN()) {
978  const auto crt_matched = get_arg_by_name(ROW_FUNC, "crt_matched");
979  LL_BUILDER.CreateStore(LL_INT(int32_t(1)), crt_matched);
980  auto total_matched_ptr = get_arg_by_name(ROW_FUNC, "total_matched");
981  llvm::Value* old_total_matched_val{nullptr};
983  old_total_matched_val =
984  LL_BUILDER.CreateAtomicRMW(llvm::AtomicRMWInst::Add,
985  total_matched_ptr,
986  LL_INT(int32_t(1)),
987  llvm::AtomicOrdering::Monotonic);
988  } else {
989  old_total_matched_val = LL_BUILDER.CreateLoad(total_matched_ptr);
990  LL_BUILDER.CreateStore(
991  LL_BUILDER.CreateAdd(old_total_matched_val, LL_INT(int32_t(1))),
992  total_matched_ptr);
993  }
994  auto old_total_matched_ptr = get_arg_by_name(ROW_FUNC, "old_total_matched");
995  LL_BUILDER.CreateStore(old_total_matched_val, old_total_matched_ptr);
996  }
997 
998  auto agg_out_ptr_w_idx = codegenGroupBy(query_mem_desc, co, filter_cfg);
999  if (query_mem_desc.usesGetGroupValueFast() ||
1000  query_mem_desc.getQueryDescriptionType() ==
1002  if (query_mem_desc.getGroupbyColCount() > 1) {
1003  filter_cfg.setChainToNext();
1004  }
1005  // Don't generate null checks if the group slot is guaranteed to be non-null,
1006  // as it's the case for get_group_value_fast* family.
1007  can_return_error = codegenAggCalls(
1008  agg_out_ptr_w_idx, {}, query_mem_desc, co, gpu_smem_context, filter_cfg);
1009  } else {
1010  {
1011  llvm::Value* nullcheck_cond{nullptr};
1012  if (query_mem_desc.didOutputColumnar()) {
1013  nullcheck_cond = LL_BUILDER.CreateICmpSGE(std::get<1>(agg_out_ptr_w_idx),
1014  LL_INT(int32_t(0)));
1015  } else {
1016  nullcheck_cond = LL_BUILDER.CreateICmpNE(
1017  std::get<0>(agg_out_ptr_w_idx),
1018  llvm::ConstantPointerNull::get(
1019  llvm::PointerType::get(get_int_type(64, LL_CONTEXT), 0)));
1020  }
1021  DiamondCodegen nullcheck_cfg(
1022  nullcheck_cond, executor_, false, "groupby_nullcheck", &filter_cfg, false);
1024  agg_out_ptr_w_idx, {}, query_mem_desc, co, gpu_smem_context, filter_cfg);
1025  }
1026  can_return_error = true;
1027  if (query_mem_desc.getQueryDescriptionType() ==
1029  query_mem_desc.useStreamingTopN()) {
1030  // Ignore rejection on pushing current row to top-K heap.
1031  LL_BUILDER.CreateRet(LL_INT(int32_t(0)));
1032  } else {
1033  CodeGenerator code_generator(executor_);
1034  LL_BUILDER.CreateRet(LL_BUILDER.CreateNeg(LL_BUILDER.CreateTrunc(
1035  // TODO(alex): remove the trunc once pos is converted to 32 bits
1036  code_generator.posArg(nullptr),
1037  get_int_type(32, LL_CONTEXT))));
1038  }
1039  }
1040  } else {
1041  if (ra_exe_unit_.estimator) {
1042  std::stack<llvm::BasicBlock*> array_loops;
1043  codegenEstimator(array_loops, filter_cfg, query_mem_desc, co);
1044  } else {
1045  auto arg_it = ROW_FUNC->arg_begin();
1046  std::vector<llvm::Value*> agg_out_vec;
1047  for (int32_t i = 0; i < get_agg_count(ra_exe_unit_.target_exprs); ++i) {
1048  agg_out_vec.push_back(&*arg_it++);
1049  }
1050  can_return_error = codegenAggCalls(std::make_tuple(nullptr, nullptr),
1051  agg_out_vec,
1052  query_mem_desc,
1053  co,
1054  gpu_smem_context,
1055  filter_cfg);
1056  }
1057  }
1058  }
1059 
1060  if (ra_exe_unit_.join_quals.empty()) {
1061  executor_->cgen_state_->ir_builder_.CreateRet(LL_INT(int32_t(0)));
1062  } else if (sc_false) {
1063  const auto saved_insert_block = LL_BUILDER.GetInsertBlock();
1064  LL_BUILDER.SetInsertPoint(sc_false);
1065  LL_BUILDER.CreateBr(filter_false);
1066  LL_BUILDER.SetInsertPoint(saved_insert_block);
1067  }
1068 
1069  return can_return_error;
1070 }
void codegenEstimator(std::stack< llvm::BasicBlock * > &array_loops, GroupByAndAggregate::DiamondCodegen &diamond_codegen, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &)
std::vector< Analyzer::Expr * > target_exprs
#define ROW_FUNC
bool codegenAggCalls(const std::tuple< llvm::Value *, llvm::Value * > &agg_out_ptr_w_idx, const std::vector< llvm::Value * > &agg_out_vec, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const GpuSharedMemoryContext &gpu_smem_context, DiamondCodegen &diamond_codegen)
#define LL_BUILDER
#define LL_CONTEXT
#define LL_INT(v)
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
llvm::Value * get_arg_by_name(llvm::Function *func, const std::string &name)
Definition: Execute.h:162
size_t getGroupbyColCount() const
const JoinQualsPerNestingLevel join_quals
std::tuple< llvm::Value *, llvm::Value * > codegenGroupBy(const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, DiamondCodegen &codegen)
const std::shared_ptr< Analyzer::Estimator > estimator
#define AUTOMATIC_IR_METADATA(CGENSTATE)
QueryDescriptionType getQueryDescriptionType() const
ExecutorDeviceType device_type
int32_t get_agg_count(const std::vector< Analyzer::Expr * > &target_exprs)
#define CHECK(condition)
Definition: Logger.h:197
const RelAlgExecutionUnit & ra_exe_unit_

+ Here is the call graph for this function:

std::vector< llvm::Value * > GroupByAndAggregate::codegenAggArg ( const Analyzer::Expr target_expr,
const CompilationOptions co 
)
private

Definition at line 1842 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, CHECK, CHECK_EQ, CodeGenerator::codegen(), CUR_FUNC, executor_, get_int_type(), Analyzer::Expr::get_type_info(), SQLTypeInfo::is_geometry(), kARRAY, kPOINT, kSAMPLE, LL_BUILDER, LL_CONTEXT, log2_bytes(), and CodeGenerator::posArg().

Referenced by TargetExprCodegen::codegen(), and TargetExprCodegenBuilder::codegenMultiSlotSampleExpressions().

1844  {
1845  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1846  const auto agg_expr = dynamic_cast<const Analyzer::AggExpr*>(target_expr);
1847  const auto func_expr = dynamic_cast<const Analyzer::FunctionOper*>(target_expr);
1848  const auto arr_expr = dynamic_cast<const Analyzer::ArrayExpr*>(target_expr);
1849 
1850  // TODO(alex): handle arrays uniformly?
1851  CodeGenerator code_generator(executor_);
1852  if (target_expr) {
1853  const auto& target_ti = target_expr->get_type_info();
1854  if (target_ti.is_buffer() &&
1855  !executor_->plan_state_->isLazyFetchColumn(target_expr)) {
1856  const auto target_lvs =
1857  agg_expr ? code_generator.codegen(agg_expr->get_arg(), true, co)
1858  : code_generator.codegen(
1859  target_expr, !executor_->plan_state_->allow_lazy_fetch_, co);
1860  if (!func_expr && !arr_expr) {
1861  // Something with the chunk transport is code that was generated from a source
1862  // other than an ARRAY[] expression
1863  if (target_ti.is_bytes()) {
1864  CHECK_EQ(size_t(3), target_lvs.size());
1865  return {target_lvs[1], target_lvs[2]};
1866  }
1867  CHECK(target_ti.is_array());
1868  CHECK_EQ(size_t(1), target_lvs.size());
1869  CHECK(!agg_expr || agg_expr->get_aggtype() == kSAMPLE);
1870  const auto i32_ty = get_int_type(32, executor_->cgen_state_->context_);
1871  const auto i8p_ty =
1872  llvm::PointerType::get(get_int_type(8, executor_->cgen_state_->context_), 0);
1873  const auto& elem_ti = target_ti.get_elem_type();
1874  return {
1875  executor_->cgen_state_->emitExternalCall(
1876  "array_buff",
1877  i8p_ty,
1878  {target_lvs.front(), code_generator.posArg(target_expr)}),
1879  executor_->cgen_state_->emitExternalCall(
1880  "array_size",
1881  i32_ty,
1882  {target_lvs.front(),
1883  code_generator.posArg(target_expr),
1884  executor_->cgen_state_->llInt(log2_bytes(elem_ti.get_logical_size()))})};
1885  } else {
1886  if (agg_expr) {
1887  throw std::runtime_error(
1888  "Using array[] operator as argument to an aggregate operator is not "
1889  "supported");
1890  }
1891  CHECK(func_expr || arr_expr);
1892  if (dynamic_cast<const Analyzer::FunctionOper*>(target_expr)) {
1893  CHECK_EQ(size_t(1), target_lvs.size());
1894  const auto prefix = target_ti.get_buffer_name();
1895  CHECK(target_ti.is_array() || target_ti.is_bytes());
1896  const auto target_lv = LL_BUILDER.CreateLoad(target_lvs[0]);
1897  // const auto target_lv_type = target_lvs[0]->getType();
1898  // CHECK(target_lv_type->isStructTy());
1899  // CHECK_EQ(target_lv_type->getNumContainedTypes(), 3u);
1900  const auto i8p_ty = llvm::PointerType::get(
1901  get_int_type(8, executor_->cgen_state_->context_), 0);
1902  const auto ptr = LL_BUILDER.CreatePointerCast(
1903  LL_BUILDER.CreateExtractValue(target_lv, 0), i8p_ty);
1904  const auto size = LL_BUILDER.CreateExtractValue(target_lv, 1);
1905  const auto null_flag = LL_BUILDER.CreateExtractValue(target_lv, 2);
1906  const auto nullcheck_ok_bb =
1907  llvm::BasicBlock::Create(LL_CONTEXT, prefix + "_nullcheck_ok_bb", CUR_FUNC);
1908  const auto nullcheck_fail_bb = llvm::BasicBlock::Create(
1909  LL_CONTEXT, prefix + "_nullcheck_fail_bb", CUR_FUNC);
1910 
1911  // TODO(adb): probably better to zext the bool
1912  const auto nullcheck = LL_BUILDER.CreateICmpEQ(
1913  null_flag, executor_->cgen_state_->llInt(static_cast<int8_t>(1)));
1914  LL_BUILDER.CreateCondBr(nullcheck, nullcheck_fail_bb, nullcheck_ok_bb);
1915 
1916  const auto ret_bb =
1917  llvm::BasicBlock::Create(LL_CONTEXT, prefix + "_return", CUR_FUNC);
1918  LL_BUILDER.SetInsertPoint(ret_bb);
1919  auto result_phi = LL_BUILDER.CreatePHI(i8p_ty, 2, prefix + "_ptr_return");
1920  result_phi->addIncoming(ptr, nullcheck_ok_bb);
1921  const auto null_arr_sentinel = LL_BUILDER.CreateIntToPtr(
1922  executor_->cgen_state_->llInt(static_cast<int8_t>(0)), i8p_ty);
1923  result_phi->addIncoming(null_arr_sentinel, nullcheck_fail_bb);
1924  LL_BUILDER.SetInsertPoint(nullcheck_ok_bb);
1925  executor_->cgen_state_->emitExternalCall(
1926  "register_buffer_with_executor_rsm",
1927  llvm::Type::getVoidTy(executor_->cgen_state_->context_),
1928  {executor_->cgen_state_->llInt(reinterpret_cast<int64_t>(executor_)), ptr});
1929  LL_BUILDER.CreateBr(ret_bb);
1930  LL_BUILDER.SetInsertPoint(nullcheck_fail_bb);
1931  LL_BUILDER.CreateBr(ret_bb);
1932 
1933  LL_BUILDER.SetInsertPoint(ret_bb);
1934  return {result_phi, size};
1935  }
1936  CHECK_EQ(size_t(2), target_lvs.size());
1937  return {target_lvs[0], target_lvs[1]};
1938  }
1939  }
1940  if (target_ti.is_geometry() &&
1941  !executor_->plan_state_->isLazyFetchColumn(target_expr)) {
1942  auto generate_coord_lvs =
1943  [&](auto* selected_target_expr,
1944  bool const fetch_columns) -> std::vector<llvm::Value*> {
1945  const auto target_lvs =
1946  code_generator.codegen(selected_target_expr, fetch_columns, co);
1947  const auto geo_uoper = dynamic_cast<const Analyzer::GeoUOper*>(target_expr);
1948  const auto geo_binoper = dynamic_cast<const Analyzer::GeoBinOper*>(target_expr);
1949  if (geo_uoper || geo_binoper) {
1950  CHECK(target_expr->get_type_info().is_geometry());
1951  CHECK_EQ(2 * static_cast<size_t>(target_ti.get_physical_coord_cols()),
1952  target_lvs.size());
1953  return target_lvs;
1954  }
1955  CHECK_EQ(static_cast<size_t>(target_ti.get_physical_coord_cols()),
1956  target_lvs.size());
1957 
1958  const auto i32_ty = get_int_type(32, executor_->cgen_state_->context_);
1959  const auto i8p_ty =
1960  llvm::PointerType::get(get_int_type(8, executor_->cgen_state_->context_), 0);
1961  std::vector<llvm::Value*> coords;
1962  size_t ctr = 0;
1963  for (const auto& target_lv : target_lvs) {
1964  // TODO(adb): consider adding a utility to sqltypes so we can get the types of
1965  // the physical coords cols based on the sqltype (e.g. TINYINT for col 0, INT
1966  // for col 1 for pols / mpolys, etc). Hardcoding for now. first array is the
1967  // coords array (TINYINT). Subsequent arrays are regular INT.
1968 
1969  const size_t elem_sz = ctr == 0 ? 1 : 4;
1970  ctr++;
1971  int32_t fixlen = -1;
1972  if (target_ti.get_type() == kPOINT) {
1973  const auto col_var = dynamic_cast<const Analyzer::ColumnVar*>(target_expr);
1974  if (col_var) {
1975  const auto coords_cd = executor_->getPhysicalColumnDescriptor(col_var, 1);
1976  if (coords_cd && coords_cd->columnType.get_type() == kARRAY) {
1977  fixlen = coords_cd->columnType.get_size();
1978  }
1979  }
1980  }
1981  if (fixlen > 0) {
1982  coords.push_back(executor_->cgen_state_->emitExternalCall(
1983  "fast_fixlen_array_buff",
1984  i8p_ty,
1985  {target_lv, code_generator.posArg(selected_target_expr)}));
1986  coords.push_back(executor_->cgen_state_->llInt(int64_t(fixlen)));
1987  continue;
1988  }
1989  coords.push_back(executor_->cgen_state_->emitExternalCall(
1990  "array_buff",
1991  i8p_ty,
1992  {target_lv, code_generator.posArg(selected_target_expr)}));
1993  coords.push_back(executor_->cgen_state_->emitExternalCall(
1994  "array_size",
1995  i32_ty,
1996  {target_lv,
1997  code_generator.posArg(selected_target_expr),
1998  executor_->cgen_state_->llInt(log2_bytes(elem_sz))}));
1999  }
2000  return coords;
2001  };
2002 
2003  if (agg_expr) {
2004  return generate_coord_lvs(agg_expr->get_arg(), true);
2005  } else {
2006  return generate_coord_lvs(target_expr,
2007  !executor_->plan_state_->allow_lazy_fetch_);
2008  }
2009  }
2010  }
2011  return agg_expr ? code_generator.codegen(agg_expr->get_arg(), true, co)
2012  : code_generator.codegen(
2013  target_expr, !executor_->plan_state_->allow_lazy_fetch_, co);
2014 }
#define CHECK_EQ(x, y)
Definition: Logger.h:205
#define LL_BUILDER
#define LL_CONTEXT
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
#define AUTOMATIC_IR_METADATA(CGENSTATE)
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:78
#define CHECK(condition)
Definition: Logger.h:197
bool is_geometry() const
Definition: sqltypes.h:490
#define CUR_FUNC
uint32_t log2_bytes(const uint32_t bytes)
Definition: Execute.h:172

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool GroupByAndAggregate::codegenAggCalls ( const std::tuple< llvm::Value *, llvm::Value * > &  agg_out_ptr_w_idx,
const std::vector< llvm::Value * > &  agg_out_vec,
const QueryMemoryDescriptor query_mem_desc,
const CompilationOptions co,
const GpuSharedMemoryContext gpu_smem_context,
DiamondCodegen diamond_codegen 
)
private

Definition at line 1552 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, CHECK, TargetExprCodegenBuilder::codegen(), QueryMemoryDescriptor::didOutputColumnar(), executor_, g_cluster, QueryMemoryDescriptor::getQueryDescriptionType(), LL_BUILDER, LL_CONTEXT, Projection, ra_exe_unit_, and RelAlgExecutionUnit::target_exprs.

Referenced by codegen().

1558  {
1559  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1560  auto agg_out_ptr_w_idx = agg_out_ptr_w_idx_in;
1561  // TODO(alex): unify the two cases, the output for non-group by queries
1562  // should be a contiguous buffer
1563  const bool is_group_by = std::get<0>(agg_out_ptr_w_idx);
1564  bool can_return_error = false;
1565  if (is_group_by) {
1566  CHECK(agg_out_vec.empty());
1567  } else {
1568  CHECK(!agg_out_vec.empty());
1569  }
1570 
1571  // output buffer is casted into a byte stream to be able to handle data elements of
1572  // different sizes (only used when actual column width sizes are used)
1573  llvm::Value* output_buffer_byte_stream{nullptr};
1574  llvm::Value* out_row_idx{nullptr};
1575  if (query_mem_desc.didOutputColumnar() && !g_cluster &&
1577  output_buffer_byte_stream = LL_BUILDER.CreateBitCast(
1578  std::get<0>(agg_out_ptr_w_idx),
1579  llvm::PointerType::get(llvm::Type::getInt8Ty(LL_CONTEXT), 0));
1580  output_buffer_byte_stream->setName("out_buff_b_stream");
1581  CHECK(std::get<1>(agg_out_ptr_w_idx));
1582  out_row_idx = LL_BUILDER.CreateZExt(std::get<1>(agg_out_ptr_w_idx),
1583  llvm::Type::getInt64Ty(LL_CONTEXT));
1584  out_row_idx->setName("out_row_idx");
1585  }
1586 
1587  TargetExprCodegenBuilder target_builder(query_mem_desc, ra_exe_unit_, is_group_by);
1588  for (size_t target_idx = 0; target_idx < ra_exe_unit_.target_exprs.size();
1589  ++target_idx) {
1590  auto target_expr = ra_exe_unit_.target_exprs[target_idx];
1591  CHECK(target_expr);
1592 
1593  target_builder(target_expr, executor_, co);
1594  }
1595 
1596  target_builder.codegen(this,
1597  executor_,
1598  query_mem_desc,
1599  co,
1600  gpu_smem_context,
1601  agg_out_ptr_w_idx,
1602  agg_out_vec,
1603  output_buffer_byte_stream,
1604  out_row_idx,
1605  diamond_codegen);
1606 
1607  for (auto target_expr : ra_exe_unit_.target_exprs) {
1608  CHECK(target_expr);
1609  executor_->plan_state_->isLazyFetchColumn(target_expr);
1610  }
1611 
1612  return can_return_error;
1613 }
std::vector< Analyzer::Expr * > target_exprs
#define LL_BUILDER
#define LL_CONTEXT
#define AUTOMATIC_IR_METADATA(CGENSTATE)
QueryDescriptionType getQueryDescriptionType() const
#define CHECK(condition)
Definition: Logger.h:197
bool g_cluster
const RelAlgExecutionUnit & ra_exe_unit_

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

llvm::Value * GroupByAndAggregate::codegenAggColumnPtr ( llvm::Value *  output_buffer_byte_stream,
llvm::Value *  out_row_idx,
const std::tuple< llvm::Value *, llvm::Value * > &  agg_out_ptr_w_idx,
const QueryMemoryDescriptor query_mem_desc,
const size_t  chosen_bytes,
const size_t  agg_out_off,
const size_t  target_idx 
)
private

: returns the pointer to where the aggregation should be stored.

Definition at line 1618 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, CHECK, CHECK_EQ, QueryMemoryDescriptor::didOutputColumnar(), executor_, g_cluster, get_int_type(), QueryMemoryDescriptor::getColOffInBytes(), QueryMemoryDescriptor::getColOnlyOffInBytes(), QueryMemoryDescriptor::getQueryDescriptionType(), LL_BUILDER, LL_CONTEXT, LL_INT, Projection, and to_string().

Referenced by TargetExprCodegen::codegenAggregate(), and TargetExprCodegenBuilder::codegenMultiSlotSampleExpressions().

1625  {
1626  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1627  llvm::Value* agg_col_ptr{nullptr};
1628  if (query_mem_desc.didOutputColumnar()) {
1629  // TODO(Saman): remove the second columnar branch, and support all query description
1630  // types through the first branch. Then, input arguments should also be cleaned up
1631  if (!g_cluster &&
1633  CHECK(chosen_bytes == 1 || chosen_bytes == 2 || chosen_bytes == 4 ||
1634  chosen_bytes == 8);
1635  CHECK(output_buffer_byte_stream);
1636  CHECK(out_row_idx);
1637  uint32_t col_off = query_mem_desc.getColOffInBytes(agg_out_off);
1638  // multiplying by chosen_bytes, i.e., << log2(chosen_bytes)
1639  auto out_per_col_byte_idx =
1640  LL_BUILDER.CreateShl(out_row_idx, __builtin_ffs(chosen_bytes) - 1);
1641  auto byte_offset = LL_BUILDER.CreateAdd(out_per_col_byte_idx,
1642  LL_INT(static_cast<int64_t>(col_off)));
1643  byte_offset->setName("out_byte_off_target_" + std::to_string(target_idx));
1644  auto output_ptr = LL_BUILDER.CreateGEP(output_buffer_byte_stream, byte_offset);
1645  agg_col_ptr = LL_BUILDER.CreateBitCast(
1646  output_ptr,
1647  llvm::PointerType::get(get_int_type((chosen_bytes << 3), LL_CONTEXT), 0));
1648  agg_col_ptr->setName("out_ptr_target_" + std::to_string(target_idx));
1649  } else {
1650  uint32_t col_off = query_mem_desc.getColOffInBytes(agg_out_off);
1651  CHECK_EQ(size_t(0), col_off % chosen_bytes);
1652  col_off /= chosen_bytes;
1653  CHECK(std::get<1>(agg_out_ptr_w_idx));
1654  auto offset = LL_BUILDER.CreateAdd(std::get<1>(agg_out_ptr_w_idx), LL_INT(col_off));
1655  agg_col_ptr = LL_BUILDER.CreateGEP(
1656  LL_BUILDER.CreateBitCast(
1657  std::get<0>(agg_out_ptr_w_idx),
1658  llvm::PointerType::get(get_int_type((chosen_bytes << 3), LL_CONTEXT), 0)),
1659  offset);
1660  }
1661  } else {
1662  uint32_t col_off = query_mem_desc.getColOnlyOffInBytes(agg_out_off);
1663  CHECK_EQ(size_t(0), col_off % chosen_bytes);
1664  col_off /= chosen_bytes;
1665  agg_col_ptr = LL_BUILDER.CreateGEP(
1666  LL_BUILDER.CreateBitCast(
1667  std::get<0>(agg_out_ptr_w_idx),
1668  llvm::PointerType::get(get_int_type((chosen_bytes << 3), LL_CONTEXT), 0)),
1669  LL_INT(col_off));
1670  }
1671  CHECK(agg_col_ptr);
1672  return agg_col_ptr;
1673 }
#define CHECK_EQ(x, y)
Definition: Logger.h:205
#define LL_BUILDER
#define LL_CONTEXT
#define LL_INT(v)
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
std::string to_string(char const *&&v)
size_t getColOnlyOffInBytes(const size_t col_idx) const
#define AUTOMATIC_IR_METADATA(CGENSTATE)
QueryDescriptionType getQueryDescriptionType() const
#define CHECK(condition)
Definition: Logger.h:197
bool g_cluster
size_t getColOffInBytes(const size_t col_idx) const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void GroupByAndAggregate::codegenApproxMedian ( const size_t  target_idx,
const Analyzer::Expr target_expr,
std::vector< llvm::Value * > &  agg_args,
const QueryMemoryDescriptor query_mem_desc,
const ExecutorDeviceType  device_type 
)
private

Definition at line 1794 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, emitCall(), executor_, g_bigint_count, SQLTypeInfo::get_notnull(), get_target_info(), Analyzer::Expr::get_type_info(), and GPU.

Referenced by TargetExprCodegen::codegenAggregate().

1798  {
1799  if (device_type == ExecutorDeviceType::GPU) {
1800  throw QueryMustRunOnCpu();
1801  }
1802  llvm::BasicBlock *calc, *skip;
1803  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1804  auto const arg_ti =
1805  static_cast<const Analyzer::AggExpr*>(target_expr)->get_arg()->get_type_info();
1806  bool const nullable = !arg_ti.get_notnull();
1807 
1808  auto* cs = executor_->cgen_state_.get();
1809  auto& irb = cs->ir_builder_;
1810  if (nullable) {
1811  auto* const null_value = cs->castToTypeIn(cs->inlineNull(arg_ti), 64);
1812  auto* const skip_cond = arg_ti.is_fp()
1813  ? irb.CreateFCmpOEQ(agg_args.back(), null_value)
1814  : irb.CreateICmpEQ(agg_args.back(), null_value);
1815  calc = llvm::BasicBlock::Create(cs->context_, "calc_approx_median");
1816  skip = llvm::BasicBlock::Create(cs->context_, "skip_approx_median");
1817  irb.CreateCondBr(skip_cond, skip, calc);
1818  cs->current_func_->getBasicBlockList().push_back(calc);
1819  irb.SetInsertPoint(calc);
1820  }
1821  if (!arg_ti.is_fp()) {
1822  auto const agg_info = get_target_info(target_expr, g_bigint_count);
1823  agg_args.back() = executor_->castToFP(agg_args.back(), arg_ti, agg_info.sql_type);
1824  }
1825  emitCall("agg_approx_median", agg_args);
1826  if (nullable) {
1827  irb.CreateBr(skip);
1828  cs->current_func_->getBasicBlockList().push_back(skip);
1829  irb.SetInsertPoint(skip);
1830  }
1831 }
TargetInfo get_target_info(const PointerType target_expr, const bool bigint_count)
Definition: TargetInfo.h:79
llvm::Value * emitCall(const std::string &fname, const std::vector< llvm::Value * > &args)
bool g_bigint_count
#define AUTOMATIC_IR_METADATA(CGENSTATE)
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:78
HOST DEVICE bool get_notnull() const
Definition: sqltypes.h:318

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void GroupByAndAggregate::codegenCountDistinct ( const size_t  target_idx,
const Analyzer::Expr target_expr,
std::vector< llvm::Value * > &  agg_args,
const QueryMemoryDescriptor query_mem_desc,
const ExecutorDeviceType  device_type 
)
private

Definition at line 1725 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, Bitmap, CHECK, CHECK_EQ, emitCall(), executor_, g_bigint_count, get_int_type(), get_target_info(), Analyzer::Expr::get_type_info(), getAdditionalLiteral(), QueryMemoryDescriptor::getCountDistinctDescriptor(), GPU, Invalid, kAPPROX_COUNT_DISTINCT, LL_CONTEXT, and LL_INT.

Referenced by TargetExprCodegen::codegenAggregate().

1730  {
1731  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1732  const auto agg_info = get_target_info(target_expr, g_bigint_count);
1733  const auto& arg_ti =
1734  static_cast<const Analyzer::AggExpr*>(target_expr)->get_arg()->get_type_info();
1735  if (arg_ti.is_fp()) {
1736  agg_args.back() = executor_->cgen_state_->ir_builder_.CreateBitCast(
1737  agg_args.back(), get_int_type(64, executor_->cgen_state_->context_));
1738  }
1739  const auto& count_distinct_descriptor =
1740  query_mem_desc.getCountDistinctDescriptor(target_idx);
1741  CHECK(count_distinct_descriptor.impl_type_ != CountDistinctImplType::Invalid);
1742  if (agg_info.agg_kind == kAPPROX_COUNT_DISTINCT) {
1743  CHECK(count_distinct_descriptor.impl_type_ == CountDistinctImplType::Bitmap);
1744  agg_args.push_back(LL_INT(int32_t(count_distinct_descriptor.bitmap_sz_bits)));
1745  if (device_type == ExecutorDeviceType::GPU) {
1746  const auto base_dev_addr = getAdditionalLiteral(-1);
1747  const auto base_host_addr = getAdditionalLiteral(-2);
1748  agg_args.push_back(base_dev_addr);
1749  agg_args.push_back(base_host_addr);
1750  emitCall("agg_approximate_count_distinct_gpu", agg_args);
1751  } else {
1752  emitCall("agg_approximate_count_distinct", agg_args);
1753  }
1754  return;
1755  }
1756  std::string agg_fname{"agg_count_distinct"};
1757  if (count_distinct_descriptor.impl_type_ == CountDistinctImplType::Bitmap) {
1758  agg_fname += "_bitmap";
1759  agg_args.push_back(LL_INT(static_cast<int64_t>(count_distinct_descriptor.min_val)));
1760  }
1761  if (agg_info.skip_null_val) {
1762  auto null_lv = executor_->cgen_state_->castToTypeIn(
1763  (arg_ti.is_fp()
1764  ? static_cast<llvm::Value*>(executor_->cgen_state_->inlineFpNull(arg_ti))
1765  : static_cast<llvm::Value*>(executor_->cgen_state_->inlineIntNull(arg_ti))),
1766  64);
1767  null_lv = executor_->cgen_state_->ir_builder_.CreateBitCast(
1768  null_lv, get_int_type(64, executor_->cgen_state_->context_));
1769  agg_fname += "_skip_val";
1770  agg_args.push_back(null_lv);
1771  }
1772  if (device_type == ExecutorDeviceType::GPU) {
1773  CHECK(count_distinct_descriptor.impl_type_ == CountDistinctImplType::Bitmap);
1774  agg_fname += "_gpu";
1775  const auto base_dev_addr = getAdditionalLiteral(-1);
1776  const auto base_host_addr = getAdditionalLiteral(-2);
1777  agg_args.push_back(base_dev_addr);
1778  agg_args.push_back(base_host_addr);
1779  agg_args.push_back(LL_INT(int64_t(count_distinct_descriptor.sub_bitmap_count)));
1780  CHECK_EQ(size_t(0),
1781  count_distinct_descriptor.bitmapPaddedSizeBytes() %
1782  count_distinct_descriptor.sub_bitmap_count);
1783  agg_args.push_back(LL_INT(int64_t(count_distinct_descriptor.bitmapPaddedSizeBytes() /
1784  count_distinct_descriptor.sub_bitmap_count)));
1785  }
1786  if (count_distinct_descriptor.impl_type_ == CountDistinctImplType::Bitmap) {
1787  emitCall(agg_fname, agg_args);
1788  } else {
1789  executor_->cgen_state_->emitExternalCall(
1790  agg_fname, llvm::Type::getVoidTy(LL_CONTEXT), agg_args);
1791  }
1792 }
#define CHECK_EQ(x, y)
Definition: Logger.h:205
llvm::Value * getAdditionalLiteral(const int32_t off)
#define LL_CONTEXT
TargetInfo get_target_info(const PointerType target_expr, const bool bigint_count)
Definition: TargetInfo.h:79
#define LL_INT(v)
llvm::Value * emitCall(const std::string &fname, const std::vector< llvm::Value * > &args)
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
bool g_bigint_count
#define AUTOMATIC_IR_METADATA(CGENSTATE)
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:78
const CountDistinctDescriptor & getCountDistinctDescriptor(const size_t idx) const
#define CHECK(condition)
Definition: Logger.h:197

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void GroupByAndAggregate::codegenEstimator ( std::stack< llvm::BasicBlock * > &  array_loops,
GroupByAndAggregate::DiamondCodegen diamond_codegen,
const QueryMemoryDescriptor query_mem_desc,
const CompilationOptions co 
)
private

Definition at line 1675 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, CHECK, emitCall(), RelAlgExecutionUnit::estimator, executor_, get_int_type(), QueryMemoryDescriptor::getEffectiveKeyWidth(), LL_BUILDER, LL_CONTEXT, LL_INT, ra_exe_unit_, and ROW_FUNC.

Referenced by codegen().

1679  {
1680  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1681  const auto& estimator_arg = ra_exe_unit_.estimator->getArgument();
1682  auto estimator_comp_count_lv = LL_INT(static_cast<int32_t>(estimator_arg.size()));
1683  auto estimator_key_lv = LL_BUILDER.CreateAlloca(llvm::Type::getInt64Ty(LL_CONTEXT),
1684  estimator_comp_count_lv);
1685  int32_t subkey_idx = 0;
1686  for (const auto& estimator_arg_comp : estimator_arg) {
1687  const auto estimator_arg_comp_lvs =
1688  executor_->groupByColumnCodegen(estimator_arg_comp.get(),
1689  query_mem_desc.getEffectiveKeyWidth(),
1690  co,
1691  false,
1692  0,
1693  diamond_codegen,
1694  array_loops,
1695  true);
1696  CHECK(!estimator_arg_comp_lvs.original_value);
1697  const auto estimator_arg_comp_lv = estimator_arg_comp_lvs.translated_value;
1698  // store the sub-key to the buffer
1699  LL_BUILDER.CreateStore(estimator_arg_comp_lv,
1700  LL_BUILDER.CreateGEP(estimator_key_lv, LL_INT(subkey_idx++)));
1701  }
1702  const auto int8_ptr_ty = llvm::PointerType::get(get_int_type(8, LL_CONTEXT), 0);
1703  const auto bitmap = LL_BUILDER.CreateBitCast(&*ROW_FUNC->arg_begin(), int8_ptr_ty);
1704  const auto key_bytes = LL_BUILDER.CreateBitCast(estimator_key_lv, int8_ptr_ty);
1705  const auto estimator_comp_bytes_lv =
1706  LL_INT(static_cast<int32_t>(estimator_arg.size() * sizeof(int64_t)));
1707  const auto bitmap_size_lv =
1708  LL_INT(static_cast<uint32_t>(ra_exe_unit_.estimator->getBufferSize()));
1709  emitCall(ra_exe_unit_.estimator->getRuntimeFunctionName(),
1710  {bitmap, &*bitmap_size_lv, key_bytes, &*estimator_comp_bytes_lv});
1711 }
#define ROW_FUNC
#define LL_BUILDER
#define LL_CONTEXT
#define LL_INT(v)
llvm::Value * emitCall(const std::string &fname, const std::vector< llvm::Value * > &args)
size_t getEffectiveKeyWidth() const
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
const std::shared_ptr< Analyzer::Estimator > estimator
#define AUTOMATIC_IR_METADATA(CGENSTATE)
#define CHECK(condition)
Definition: Logger.h:197
const RelAlgExecutionUnit & ra_exe_unit_

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::tuple< llvm::Value *, llvm::Value * > GroupByAndAggregate::codegenGroupBy ( const QueryMemoryDescriptor query_mem_desc,
const CompilationOptions co,
DiamondCodegen codegen 
)
private

Definition at line 1164 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, CHECK, CHECK_EQ, codegenMultiColumnBaselineHash(), codegenMultiColumnPerfectHash(), codegenOutputSlot(), codegenSingleColumnPerfectHash(), QueryMemoryDescriptor::didOutputColumnar(), executor_, QueryMemoryDescriptor::getBucket(), QueryMemoryDescriptor::getEffectiveKeyWidth(), getExprRangeInfo(), QueryMemoryDescriptor::getGroupbyColCount(), QueryMemoryDescriptor::getMaxVal(), QueryMemoryDescriptor::getQueryDescriptionType(), QueryMemoryDescriptor::getRowSize(), RelAlgExecutionUnit::groupby_exprs, GroupByBaselineHash, GroupByPerfectHash, QueryMemoryDescriptor::hasNulls(), QueryMemoryDescriptor::isSingleColumnGroupByWithPerfectHash(), LL_BUILDER, LL_CONTEXT, LL_INT, Projection, ra_exe_unit_, ROW_FUNC, and QueryMemoryDescriptor::threadsShareMemory().

Referenced by codegen().

1167  {
1168  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1169  auto arg_it = ROW_FUNC->arg_begin();
1170  auto groups_buffer = arg_it++;
1171 
1172  std::stack<llvm::BasicBlock*> array_loops;
1173 
1174  // TODO(Saman): move this logic outside of this function.
1176  if (query_mem_desc.didOutputColumnar()) {
1177  return std::make_tuple(
1178  &*groups_buffer,
1179  codegenOutputSlot(&*groups_buffer, query_mem_desc, co, diamond_codegen));
1180  } else {
1181  return std::make_tuple(
1182  codegenOutputSlot(&*groups_buffer, query_mem_desc, co, diamond_codegen),
1183  nullptr);
1184  }
1185  }
1186 
1187  CHECK(query_mem_desc.getQueryDescriptionType() ==
1189  query_mem_desc.getQueryDescriptionType() ==
1191 
1192  const int32_t row_size_quad = query_mem_desc.didOutputColumnar()
1193  ? 0
1194  : query_mem_desc.getRowSize() / sizeof(int64_t);
1195 
1196  const auto col_width_size = query_mem_desc.isSingleColumnGroupByWithPerfectHash()
1197  ? sizeof(int64_t)
1198  : query_mem_desc.getEffectiveKeyWidth();
1199  // for multi-column group by
1200  llvm::Value* group_key = nullptr;
1201  llvm::Value* key_size_lv = nullptr;
1202 
1203  if (!query_mem_desc.isSingleColumnGroupByWithPerfectHash()) {
1204  key_size_lv = LL_INT(static_cast<int32_t>(query_mem_desc.getGroupbyColCount()));
1205  if (query_mem_desc.getQueryDescriptionType() ==
1207  group_key =
1208  LL_BUILDER.CreateAlloca(llvm::Type::getInt64Ty(LL_CONTEXT), key_size_lv);
1209  } else if (query_mem_desc.getQueryDescriptionType() ==
1211  group_key =
1212  col_width_size == sizeof(int32_t)
1213  ? LL_BUILDER.CreateAlloca(llvm::Type::getInt32Ty(LL_CONTEXT), key_size_lv)
1214  : LL_BUILDER.CreateAlloca(llvm::Type::getInt64Ty(LL_CONTEXT), key_size_lv);
1215  }
1216  CHECK(group_key);
1217  CHECK(key_size_lv);
1218  }
1219 
1220  int32_t subkey_idx = 0;
1221  CHECK(query_mem_desc.getGroupbyColCount() == ra_exe_unit_.groupby_exprs.size());
1222  for (const auto& group_expr : ra_exe_unit_.groupby_exprs) {
1223  const auto col_range_info = getExprRangeInfo(group_expr.get());
1224  const auto translated_null_value = static_cast<int64_t>(
1225  query_mem_desc.isSingleColumnGroupByWithPerfectHash()
1226  ? checked_int64_t(query_mem_desc.getMaxVal()) +
1227  (query_mem_desc.getBucket() ? query_mem_desc.getBucket() : 1)
1228  : checked_int64_t(col_range_info.max) +
1229  (col_range_info.bucket ? col_range_info.bucket : 1));
1230 
1231  const bool col_has_nulls =
1232  query_mem_desc.getQueryDescriptionType() ==
1234  ? (query_mem_desc.isSingleColumnGroupByWithPerfectHash()
1235  ? query_mem_desc.hasNulls()
1236  : col_range_info.has_nulls)
1237  : false;
1238 
1239  const auto group_expr_lvs =
1240  executor_->groupByColumnCodegen(group_expr.get(),
1241  col_width_size,
1242  co,
1243  col_has_nulls,
1244  translated_null_value,
1245  diamond_codegen,
1246  array_loops,
1247  query_mem_desc.threadsShareMemory());
1248  const auto group_expr_lv = group_expr_lvs.translated_value;
1249  if (query_mem_desc.isSingleColumnGroupByWithPerfectHash()) {
1250  CHECK_EQ(size_t(1), ra_exe_unit_.groupby_exprs.size());
1251  return codegenSingleColumnPerfectHash(query_mem_desc,
1252  co,
1253  &*groups_buffer,
1254  group_expr_lv,
1255  group_expr_lvs.original_value,
1256  row_size_quad);
1257  } else {
1258  // store the sub-key to the buffer
1259  LL_BUILDER.CreateStore(group_expr_lv,
1260  LL_BUILDER.CreateGEP(group_key, LL_INT(subkey_idx++)));
1261  }
1262  }
1263  if (query_mem_desc.getQueryDescriptionType() ==
1265  CHECK(ra_exe_unit_.groupby_exprs.size() != 1);
1267  &*groups_buffer, group_key, key_size_lv, query_mem_desc, row_size_quad);
1268  } else if (query_mem_desc.getQueryDescriptionType() ==
1271  &*groups_buffer,
1272  group_key,
1273  key_size_lv,
1274  query_mem_desc,
1275  col_width_size,
1276  row_size_quad);
1277  }
1278  CHECK(false);
1279  return std::make_tuple(nullptr, nullptr);
1280 }
#define CHECK_EQ(x, y)
Definition: Logger.h:205
#define ROW_FUNC
#define LL_BUILDER
boost::multiprecision::number< boost::multiprecision::cpp_int_backend< 64, 64, boost::multiprecision::signed_magnitude, boost::multiprecision::checked, void >> checked_int64_t
#define LL_CONTEXT
#define LL_INT(v)
size_t getEffectiveKeyWidth() const
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
std::tuple< llvm::Value *, llvm::Value * > codegenMultiColumnBaselineHash(const CompilationOptions &co, llvm::Value *groups_buffer, llvm::Value *group_key, llvm::Value *key_size_lv, const QueryMemoryDescriptor &query_mem_desc, const size_t key_width, const int32_t row_size_quad)
size_t getGroupbyColCount() const
std::tuple< llvm::Value *, llvm::Value * > codegenSingleColumnPerfectHash(const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, llvm::Value *groups_buffer, llvm::Value *group_expr_lv_translated, llvm::Value *group_expr_lv_original, const int32_t row_size_quad)
#define AUTOMATIC_IR_METADATA(CGENSTATE)
std::tuple< llvm::Value *, llvm::Value * > codegenMultiColumnPerfectHash(llvm::Value *groups_buffer, llvm::Value *group_key, llvm::Value *key_size_lv, const QueryMemoryDescriptor &query_mem_desc, const int32_t row_size_quad)
QueryDescriptionType getQueryDescriptionType() const
bool isSingleColumnGroupByWithPerfectHash() const
llvm::Value * codegenOutputSlot(llvm::Value *groups_buffer, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, DiamondCodegen &diamond_codegen)
#define CHECK(condition)
Definition: Logger.h:197
ColRangeInfo getExprRangeInfo(const Analyzer::Expr *expr) const
const RelAlgExecutionUnit & ra_exe_unit_

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::tuple< llvm::Value *, llvm::Value * > GroupByAndAggregate::codegenMultiColumnBaselineHash ( const CompilationOptions co,
llvm::Value *  groups_buffer,
llvm::Value *  group_key,
llvm::Value *  key_size_lv,
const QueryMemoryDescriptor query_mem_desc,
const size_t  key_width,
const int32_t  row_size_quad 
)
private

Definition at line 1377 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, CHECK, QueryMemoryDescriptor::didOutputColumnar(), emitCall(), executor_, QueryMemoryDescriptor::getEntryCount(), LL_BUILDER, LL_CONTEXT, LL_INT, and CompilationOptions::with_dynamic_watchdog.

Referenced by codegenGroupBy().

1384  {
1385  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1386  if (group_key->getType() != llvm::Type::getInt64PtrTy(LL_CONTEXT)) {
1387  CHECK(key_width == sizeof(int32_t));
1388  group_key =
1389  LL_BUILDER.CreatePointerCast(group_key, llvm::Type::getInt64PtrTy(LL_CONTEXT));
1390  }
1391  std::vector<llvm::Value*> func_args{
1392  groups_buffer,
1393  LL_INT(static_cast<int32_t>(query_mem_desc.getEntryCount())),
1394  &*group_key,
1395  &*key_size_lv,
1396  LL_INT(static_cast<int32_t>(key_width))};
1397  std::string func_name{"get_group_value"};
1398  if (query_mem_desc.didOutputColumnar()) {
1399  func_name += "_columnar_slot";
1400  } else {
1401  func_args.push_back(LL_INT(row_size_quad));
1402  }
1403  if (co.with_dynamic_watchdog) {
1404  func_name += "_with_watchdog";
1405  }
1406  if (query_mem_desc.didOutputColumnar()) {
1407  return std::make_tuple(groups_buffer, emitCall(func_name, func_args));
1408  } else {
1409  return std::make_tuple(emitCall(func_name, func_args), nullptr);
1410  }
1411 }
#define LL_BUILDER
#define LL_CONTEXT
#define LL_INT(v)
llvm::Value * emitCall(const std::string &fname, const std::vector< llvm::Value * > &args)
#define AUTOMATIC_IR_METADATA(CGENSTATE)
#define CHECK(condition)
Definition: Logger.h:197

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::tuple< llvm::Value *, llvm::Value * > GroupByAndAggregate::codegenMultiColumnPerfectHash ( llvm::Value *  groups_buffer,
llvm::Value *  group_key,
llvm::Value *  key_size_lv,
const QueryMemoryDescriptor query_mem_desc,
const int32_t  row_size_quad 
)
private

Definition at line 1333 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, CHECK, codegenPerfectHashFunction(), QueryMemoryDescriptor::didOutputColumnar(), emitCall(), executor_, get_int_type(), QueryMemoryDescriptor::getEntryCount(), QueryMemoryDescriptor::getQueryDescriptionType(), GroupByPerfectHash, QueryMemoryDescriptor::hasKeylessHash(), LL_BUILDER, LL_CONTEXT, and LL_INT.

Referenced by codegenGroupBy().

1338  {
1339  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1340  CHECK(query_mem_desc.getQueryDescriptionType() ==
1342  // compute the index (perfect hash)
1343  auto perfect_hash_func = codegenPerfectHashFunction();
1344  auto hash_lv =
1345  LL_BUILDER.CreateCall(perfect_hash_func, std::vector<llvm::Value*>{group_key});
1346 
1347  if (query_mem_desc.didOutputColumnar()) {
1348  if (!query_mem_desc.hasKeylessHash()) {
1349  const std::string set_matching_func_name{
1350  "set_matching_group_value_perfect_hash_columnar"};
1351  const std::vector<llvm::Value*> set_matching_func_arg{
1352  groups_buffer,
1353  hash_lv,
1354  group_key,
1355  key_size_lv,
1356  llvm::ConstantInt::get(get_int_type(32, LL_CONTEXT),
1357  query_mem_desc.getEntryCount())};
1358  emitCall(set_matching_func_name, set_matching_func_arg);
1359  }
1360  return std::make_tuple(groups_buffer, hash_lv);
1361  } else {
1362  if (query_mem_desc.hasKeylessHash()) {
1363  return std::make_tuple(emitCall("get_matching_group_value_perfect_hash_keyless",
1364  {groups_buffer, hash_lv, LL_INT(row_size_quad)}),
1365  nullptr);
1366  } else {
1367  return std::make_tuple(
1368  emitCall(
1369  "get_matching_group_value_perfect_hash",
1370  {groups_buffer, hash_lv, group_key, key_size_lv, LL_INT(row_size_quad)}),
1371  nullptr);
1372  }
1373  }
1374 }
#define LL_BUILDER
#define LL_CONTEXT
#define LL_INT(v)
llvm::Value * emitCall(const std::string &fname, const std::vector< llvm::Value * > &args)
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
#define AUTOMATIC_IR_METADATA(CGENSTATE)
QueryDescriptionType getQueryDescriptionType() const
llvm::Function * codegenPerfectHashFunction()
#define CHECK(condition)
Definition: Logger.h:197

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

llvm::Value * GroupByAndAggregate::codegenOutputSlot ( llvm::Value *  groups_buffer,
const QueryMemoryDescriptor query_mem_desc,
const CompilationOptions co,
DiamondCodegen diamond_codegen 
)
private

Definition at line 1072 of file GroupByAndAggregate.cpp.

References run_benchmark_import::args, AUTOMATIC_IR_METADATA, CHECK, CHECK_EQ, CHECK_GE, CHECK_LT, CodeGenerator::codegen(), QueryMemoryDescriptor::didOutputColumnar(), emitCall(), executor_, get_arg_by_name(), get_heap_key_slot_index(), QueryMemoryDescriptor::getColOffInBytes(), QueryMemoryDescriptor::getEntryCount(), QueryMemoryDescriptor::getPaddedSlotWidthBytes(), QueryMemoryDescriptor::getQueryDescriptionType(), QueryMemoryDescriptor::getRowSize(), RelAlgExecutionUnit::groupby_exprs, inline_fp_null_val(), inline_int_null_val(), SortInfo::limit, LL_BOOL, LL_BUILDER, LL_FP, LL_INT, SortInfo::offset, SortInfo::order_entries, CodeGenerator::posArg(), Projection, ra_exe_unit_, ROW_FUNC, RelAlgExecutionUnit::sort_info, RelAlgExecutionUnit::target_exprs, to_string(), RelAlgExecutionUnit::use_bump_allocator, and QueryMemoryDescriptor::useStreamingTopN().

Referenced by codegenGroupBy(), and codegenWindowRowPointer().

1076  {
1077  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1079  CHECK_EQ(size_t(1), ra_exe_unit_.groupby_exprs.size());
1080  const auto group_expr = ra_exe_unit_.groupby_exprs.front();
1081  CHECK(!group_expr);
1082  if (!query_mem_desc.didOutputColumnar()) {
1083  CHECK_EQ(size_t(0), query_mem_desc.getRowSize() % sizeof(int64_t));
1084  }
1085  const int32_t row_size_quad = query_mem_desc.didOutputColumnar()
1086  ? 0
1087  : query_mem_desc.getRowSize() / sizeof(int64_t);
1088  CodeGenerator code_generator(executor_);
1089  if (query_mem_desc.useStreamingTopN()) {
1090  const auto& only_order_entry = ra_exe_unit_.sort_info.order_entries.front();
1091  CHECK_GE(only_order_entry.tle_no, int(1));
1092  const size_t target_idx = only_order_entry.tle_no - 1;
1093  CHECK_LT(target_idx, ra_exe_unit_.target_exprs.size());
1094  const auto order_entry_expr = ra_exe_unit_.target_exprs[target_idx];
1095  const auto chosen_bytes =
1096  static_cast<size_t>(query_mem_desc.getPaddedSlotWidthBytes(target_idx));
1097  auto order_entry_lv = executor_->cgen_state_->castToTypeIn(
1098  code_generator.codegen(order_entry_expr, true, co).front(), chosen_bytes * 8);
1100  std::string fname = "get_bin_from_k_heap";
1101  const auto& oe_ti = order_entry_expr->get_type_info();
1102  llvm::Value* null_key_lv = nullptr;
1103  if (oe_ti.is_integer() || oe_ti.is_decimal() || oe_ti.is_time()) {
1104  const size_t bit_width = order_entry_lv->getType()->getIntegerBitWidth();
1105  switch (bit_width) {
1106  case 32:
1107  null_key_lv = LL_INT(static_cast<int32_t>(inline_int_null_val(oe_ti)));
1108  break;
1109  case 64:
1110  null_key_lv = LL_INT(static_cast<int64_t>(inline_int_null_val(oe_ti)));
1111  break;
1112  default:
1113  CHECK(false);
1114  }
1115  fname += "_int" + std::to_string(bit_width) + "_t";
1116  } else {
1117  CHECK(oe_ti.is_fp());
1118  if (order_entry_lv->getType()->isDoubleTy()) {
1119  null_key_lv = LL_FP(static_cast<double>(inline_fp_null_val(oe_ti)));
1120  } else {
1121  null_key_lv = LL_FP(static_cast<float>(inline_fp_null_val(oe_ti)));
1122  }
1123  fname += order_entry_lv->getType()->isDoubleTy() ? "_double" : "_float";
1124  }
1125  const auto key_slot_idx =
1127  return emitCall(
1128  fname,
1129  {groups_buffer,
1130  LL_INT(n),
1131  LL_INT(row_size_quad),
1132  LL_INT(static_cast<uint32_t>(query_mem_desc.getColOffInBytes(key_slot_idx))),
1133  LL_BOOL(only_order_entry.is_desc),
1134  LL_BOOL(!order_entry_expr->get_type_info().get_notnull()),
1135  LL_BOOL(only_order_entry.nulls_first),
1136  null_key_lv,
1137  order_entry_lv});
1138  } else {
1139  llvm::Value* output_buffer_entry_count_lv{nullptr};
1141  output_buffer_entry_count_lv =
1142  LL_BUILDER.CreateLoad(get_arg_by_name(ROW_FUNC, "max_matched"));
1143  CHECK(output_buffer_entry_count_lv);
1144  }
1145  const auto group_expr_lv =
1146  LL_BUILDER.CreateLoad(get_arg_by_name(ROW_FUNC, "old_total_matched"));
1147  std::vector<llvm::Value*> args{
1148  groups_buffer,
1149  output_buffer_entry_count_lv
1150  ? output_buffer_entry_count_lv
1151  : LL_INT(static_cast<int32_t>(query_mem_desc.getEntryCount())),
1152  group_expr_lv,
1153  code_generator.posArg(nullptr)};
1154  if (query_mem_desc.didOutputColumnar()) {
1155  const auto columnar_output_offset =
1156  emitCall("get_columnar_scan_output_offset", args);
1157  return columnar_output_offset;
1158  }
1159  args.push_back(LL_INT(row_size_quad));
1160  return emitCall("get_scan_output_slot", args);
1161  }
1162 }
std::vector< Analyzer::Expr * > target_exprs
#define CHECK_EQ(x, y)
Definition: Logger.h:205
#define ROW_FUNC
#define LL_BUILDER
const std::list< Analyzer::OrderEntry > order_entries
#define LL_INT(v)
#define CHECK_GE(x, y)
Definition: Logger.h:210
llvm::Value * emitCall(const std::string &fname, const std::vector< llvm::Value * > &args)
size_t get_heap_key_slot_index(const std::vector< Analyzer::Expr * > &target_exprs, const size_t target_idx)
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
double inline_fp_null_val(const SQL_TYPE_INFO &ti)
std::string to_string(char const *&&v)
#define LL_BOOL(v)
const size_t limit
llvm::Value * get_arg_by_name(llvm::Function *func, const std::string &name)
Definition: Execute.h:162
const SortInfo sort_info
#define LL_FP(v)
const int8_t getPaddedSlotWidthBytes(const size_t slot_idx) const
#define AUTOMATIC_IR_METADATA(CGENSTATE)
QueryDescriptionType getQueryDescriptionType() const
#define CHECK_LT(x, y)
Definition: Logger.h:207
#define CHECK(condition)
Definition: Logger.h:197
int64_t inline_int_null_val(const SQL_TYPE_INFO &ti)
const RelAlgExecutionUnit & ra_exe_unit_
const size_t offset
size_t getColOffInBytes(const size_t col_idx) const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

llvm::Function * GroupByAndAggregate::codegenPerfectHashFunction ( )
private

Definition at line 1413 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, CHECK, CHECK_GT, executor_, get_int_type(), getBucketedCardinality(), getExprRangeInfo(), RelAlgExecutionUnit::groupby_exprs, GroupByPerfectHash, LL_CONTEXT, LL_INT, mark_function_always_inline(), and ra_exe_unit_.

Referenced by codegenMultiColumnPerfectHash().

1413  {
1414  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1415  CHECK_GT(ra_exe_unit_.groupby_exprs.size(), size_t(1));
1416  auto ft = llvm::FunctionType::get(
1417  get_int_type(32, LL_CONTEXT),
1418  std::vector<llvm::Type*>{llvm::PointerType::get(get_int_type(64, LL_CONTEXT), 0)},
1419  false);
1420  auto key_hash_func = llvm::Function::Create(ft,
1421  llvm::Function::ExternalLinkage,
1422  "perfect_key_hash",
1423  executor_->cgen_state_->module_);
1424  executor_->cgen_state_->helper_functions_.push_back(key_hash_func);
1425  mark_function_always_inline(key_hash_func);
1426  auto& key_buff_arg = *key_hash_func->args().begin();
1427  llvm::Value* key_buff_lv = &key_buff_arg;
1428  auto bb = llvm::BasicBlock::Create(LL_CONTEXT, "entry", key_hash_func);
1429  llvm::IRBuilder<> key_hash_func_builder(bb);
1430  llvm::Value* hash_lv{llvm::ConstantInt::get(get_int_type(64, LL_CONTEXT), 0)};
1431  std::vector<int64_t> cardinalities;
1432  for (const auto& groupby_expr : ra_exe_unit_.groupby_exprs) {
1433  auto col_range_info = getExprRangeInfo(groupby_expr.get());
1434  CHECK(col_range_info.hash_type_ == QueryDescriptionType::GroupByPerfectHash);
1435  cardinalities.push_back(getBucketedCardinality(col_range_info));
1436  }
1437  size_t dim_idx = 0;
1438  for (const auto& groupby_expr : ra_exe_unit_.groupby_exprs) {
1439  auto key_comp_lv = key_hash_func_builder.CreateLoad(
1440  key_hash_func_builder.CreateGEP(key_buff_lv, LL_INT(dim_idx)));
1441  auto col_range_info = getExprRangeInfo(groupby_expr.get());
1442  auto crt_term_lv =
1443  key_hash_func_builder.CreateSub(key_comp_lv, LL_INT(col_range_info.min));
1444  if (col_range_info.bucket) {
1445  crt_term_lv =
1446  key_hash_func_builder.CreateSDiv(crt_term_lv, LL_INT(col_range_info.bucket));
1447  }
1448  for (size_t prev_dim_idx = 0; prev_dim_idx < dim_idx; ++prev_dim_idx) {
1449  crt_term_lv = key_hash_func_builder.CreateMul(crt_term_lv,
1450  LL_INT(cardinalities[prev_dim_idx]));
1451  }
1452  hash_lv = key_hash_func_builder.CreateAdd(hash_lv, crt_term_lv);
1453  ++dim_idx;
1454  }
1455  key_hash_func_builder.CreateRet(
1456  key_hash_func_builder.CreateTrunc(hash_lv, get_int_type(32, LL_CONTEXT)));
1457  return key_hash_func;
1458 }
static int64_t getBucketedCardinality(const ColRangeInfo &col_range_info)
#define LL_CONTEXT
void mark_function_always_inline(llvm::Function *func)
#define LL_INT(v)
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
#define CHECK_GT(x, y)
Definition: Logger.h:209
#define AUTOMATIC_IR_METADATA(CGENSTATE)
#define CHECK(condition)
Definition: Logger.h:197
ColRangeInfo getExprRangeInfo(const Analyzer::Expr *expr) const
const RelAlgExecutionUnit & ra_exe_unit_

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::tuple< llvm::Value *, llvm::Value * > GroupByAndAggregate::codegenSingleColumnPerfectHash ( const QueryMemoryDescriptor query_mem_desc,
const CompilationOptions co,
llvm::Value *  groups_buffer,
llvm::Value *  group_expr_lv_translated,
llvm::Value *  group_expr_lv_original,
const int32_t  row_size_quad 
)
private

Definition at line 1283 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, CHECK, CompilationOptions::device_type, QueryMemoryDescriptor::didOutputColumnar(), emitCall(), executor_, QueryMemoryDescriptor::getBucket(), QueryMemoryDescriptor::getMinVal(), QueryMemoryDescriptor::hasKeylessHash(), QueryMemoryDescriptor::interleavedBins(), LL_INT, QueryMemoryDescriptor::mustUseBaselineSort(), and QueryMemoryDescriptor::usesGetGroupValueFast().

Referenced by codegenGroupBy().

1289  {
1290  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1291  CHECK(query_mem_desc.usesGetGroupValueFast());
1292  std::string get_group_fn_name{query_mem_desc.didOutputColumnar()
1293  ? "get_columnar_group_bin_offset"
1294  : "get_group_value_fast"};
1295  if (!query_mem_desc.didOutputColumnar() && query_mem_desc.hasKeylessHash()) {
1296  get_group_fn_name += "_keyless";
1297  }
1298  if (query_mem_desc.interleavedBins(co.device_type)) {
1299  CHECK(!query_mem_desc.didOutputColumnar());
1300  CHECK(query_mem_desc.hasKeylessHash());
1301  get_group_fn_name += "_semiprivate";
1302  }
1303  std::vector<llvm::Value*> get_group_fn_args{&*groups_buffer,
1304  &*group_expr_lv_translated};
1305  if (group_expr_lv_original && get_group_fn_name == "get_group_value_fast" &&
1306  query_mem_desc.mustUseBaselineSort()) {
1307  get_group_fn_name += "_with_original_key";
1308  get_group_fn_args.push_back(group_expr_lv_original);
1309  }
1310  get_group_fn_args.push_back(LL_INT(query_mem_desc.getMinVal()));
1311  get_group_fn_args.push_back(LL_INT(query_mem_desc.getBucket()));
1312  if (!query_mem_desc.hasKeylessHash()) {
1313  if (!query_mem_desc.didOutputColumnar()) {
1314  get_group_fn_args.push_back(LL_INT(row_size_quad));
1315  }
1316  } else {
1317  if (!query_mem_desc.didOutputColumnar()) {
1318  get_group_fn_args.push_back(LL_INT(row_size_quad));
1319  }
1320  if (query_mem_desc.interleavedBins(co.device_type)) {
1321  auto warp_idx = emitCall("thread_warp_idx", {LL_INT(executor_->warpSize())});
1322  get_group_fn_args.push_back(warp_idx);
1323  get_group_fn_args.push_back(LL_INT(executor_->warpSize()));
1324  }
1325  }
1326  if (get_group_fn_name == "get_columnar_group_bin_offset") {
1327  return std::make_tuple(&*groups_buffer,
1328  emitCall(get_group_fn_name, get_group_fn_args));
1329  }
1330  return std::make_tuple(emitCall(get_group_fn_name, get_group_fn_args), nullptr);
1331 }
#define LL_INT(v)
llvm::Value * emitCall(const std::string &fname, const std::vector< llvm::Value * > &args)
#define AUTOMATIC_IR_METADATA(CGENSTATE)
ExecutorDeviceType device_type
bool interleavedBins(const ExecutorDeviceType) const
#define CHECK(condition)
Definition: Logger.h:197

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

llvm::Value * GroupByAndAggregate::codegenWindowRowPointer ( const Analyzer::WindowFunction window_func,
const QueryMemoryDescriptor query_mem_desc,
const CompilationOptions co,
DiamondCodegen diamond_codegen 
)
private

Definition at line 1511 of file GroupByAndAggregate.cpp.

References run_benchmark_import::args, AUTOMATIC_IR_METADATA, CHECK, codegenOutputSlot(), COUNT, QueryMemoryDescriptor::didOutputColumnar(), emitCall(), executor_, get_int_type(), WindowProjectNodeContext::getActiveWindowFunctionContext(), QueryMemoryDescriptor::getEntryCount(), Analyzer::WindowFunction::getKind(), QueryMemoryDescriptor::getRowSize(), LL_BUILDER, LL_CONTEXT, LL_INT, CodeGenerator::posArg(), ROW_FUNC, and window_function_is_aggregate().

Referenced by TargetExprCodegen::codegen().

1515  {
1516  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1517  const auto window_func_context =
1519  if (window_func_context && window_function_is_aggregate(window_func->getKind())) {
1520  const int32_t row_size_quad = query_mem_desc.didOutputColumnar()
1521  ? 0
1522  : query_mem_desc.getRowSize() / sizeof(int64_t);
1523  auto arg_it = ROW_FUNC->arg_begin();
1524  auto groups_buffer = arg_it++;
1525  CodeGenerator code_generator(executor_);
1526  if (!window_func_context->getRowNumber()) {
1527  CHECK(window_func->getKind() == SqlWindowFunctionKind::COUNT);
1528  window_func_context->setRowNumber(emitCall(
1529  "row_number_window_func",
1530  {LL_INT(reinterpret_cast<const int64_t>(window_func_context->output())),
1531  code_generator.posArg(nullptr)}));
1532  }
1533  const auto pos_in_window = LL_BUILDER.CreateTrunc(window_func_context->getRowNumber(),
1534  get_int_type(32, LL_CONTEXT));
1535  llvm::Value* entry_count_lv =
1536  LL_INT(static_cast<int32_t>(query_mem_desc.getEntryCount()));
1537  std::vector<llvm::Value*> args{
1538  &*groups_buffer, entry_count_lv, pos_in_window, code_generator.posArg(nullptr)};
1539  if (query_mem_desc.didOutputColumnar()) {
1540  const auto columnar_output_offset =
1541  emitCall("get_columnar_scan_output_offset", args);
1542  return LL_BUILDER.CreateSExt(columnar_output_offset, get_int_type(64, LL_CONTEXT));
1543  }
1544  args.push_back(LL_INT(row_size_quad));
1545  return emitCall("get_scan_output_slot", args);
1546  }
1547  auto arg_it = ROW_FUNC->arg_begin();
1548  auto groups_buffer = arg_it++;
1549  return codegenOutputSlot(&*groups_buffer, query_mem_desc, co, diamond_codegen);
1550 }
SqlWindowFunctionKind getKind() const
Definition: Analyzer.h:1447
#define ROW_FUNC
#define LL_BUILDER
#define LL_CONTEXT
#define LL_INT(v)
llvm::Value * emitCall(const std::string &fname, const std::vector< llvm::Value * > &args)
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
static WindowFunctionContext * getActiveWindowFunctionContext(Executor *executor)
#define AUTOMATIC_IR_METADATA(CGENSTATE)
bool window_function_is_aggregate(const SqlWindowFunctionKind kind)
Definition: WindowContext.h:42
llvm::Value * codegenOutputSlot(llvm::Value *groups_buffer, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, DiamondCodegen &diamond_codegen)
#define CHECK(condition)
Definition: Logger.h:197

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

llvm::Value * GroupByAndAggregate::convertNullIfAny ( const SQLTypeInfo arg_type,
const TargetInfo agg_info,
llvm::Value *  target 
)
private

Definition at line 1460 of file GroupByAndAggregate.cpp.

References TargetInfo::agg_kind, AUTOMATIC_IR_METADATA, CHECK, executor_, SQLTypeInfo::get_size(), SQLTypeInfo::is_fp(), kAPPROX_COUNT_DISTINCT, kCOUNT, LL_BUILDER, and TargetInfo::sql_type.

Referenced by TargetExprCodegen::codegenAggregate().

1462  {
1463  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1464  const auto& agg_type = agg_info.sql_type;
1465  const size_t chosen_bytes = agg_type.get_size();
1466 
1467  bool need_conversion{false};
1468  llvm::Value* arg_null{nullptr};
1469  llvm::Value* agg_null{nullptr};
1470  llvm::Value* target_to_cast{target};
1471  if (arg_type.is_fp()) {
1472  arg_null = executor_->cgen_state_->inlineFpNull(arg_type);
1473  if (agg_type.is_fp()) {
1474  agg_null = executor_->cgen_state_->inlineFpNull(agg_type);
1475  if (!static_cast<llvm::ConstantFP*>(arg_null)->isExactlyValue(
1476  static_cast<llvm::ConstantFP*>(agg_null)->getValueAPF())) {
1477  need_conversion = true;
1478  }
1479  } else {
1480  CHECK(agg_info.agg_kind == kCOUNT || agg_info.agg_kind == kAPPROX_COUNT_DISTINCT);
1481  return target;
1482  }
1483  } else {
1484  arg_null = executor_->cgen_state_->inlineIntNull(arg_type);
1485  if (agg_type.is_fp()) {
1486  agg_null = executor_->cgen_state_->inlineFpNull(agg_type);
1487  need_conversion = true;
1488  target_to_cast = executor_->castToFP(target, arg_type, agg_type);
1489  } else {
1490  agg_null = executor_->cgen_state_->inlineIntNull(agg_type);
1491  if ((static_cast<llvm::ConstantInt*>(arg_null)->getBitWidth() !=
1492  static_cast<llvm::ConstantInt*>(agg_null)->getBitWidth()) ||
1493  (static_cast<llvm::ConstantInt*>(arg_null)->getValue() !=
1494  static_cast<llvm::ConstantInt*>(agg_null)->getValue())) {
1495  need_conversion = true;
1496  }
1497  }
1498  }
1499  if (need_conversion) {
1500  auto cmp = arg_type.is_fp() ? LL_BUILDER.CreateFCmpOEQ(target, arg_null)
1501  : LL_BUILDER.CreateICmpEQ(target, arg_null);
1502  return LL_BUILDER.CreateSelect(
1503  cmp,
1504  agg_null,
1505  executor_->cgen_state_->castToTypeIn(target_to_cast, chosen_bytes << 3));
1506  } else {
1507  return target;
1508  }
1509 }
HOST DEVICE int get_size() const
Definition: sqltypes.h:321
#define LL_BUILDER
SQLTypeInfo sql_type
Definition: TargetInfo.h:42
bool is_fp() const
Definition: sqltypes.h:482
#define AUTOMATIC_IR_METADATA(CGENSTATE)
SQLAgg agg_kind
Definition: TargetInfo.h:41
Definition: sqldefs.h:76
#define CHECK(condition)
Definition: Logger.h:197

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

llvm::Value * GroupByAndAggregate::emitCall ( const std::string &  fname,
const std::vector< llvm::Value * > &  args 
)
private

Definition at line 2016 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, and executor_.

Referenced by TargetExprCodegen::codegen(), TargetExprCodegen::codegenAggregate(), codegenApproxMedian(), codegenCountDistinct(), codegenEstimator(), codegenMultiColumnBaselineHash(), codegenMultiColumnPerfectHash(), codegenOutputSlot(), codegenSingleColumnPerfectHash(), and codegenWindowRowPointer().

2017  {
2018  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
2019  return executor_->cgen_state_->emitCall(fname, args);
2020 }
#define AUTOMATIC_IR_METADATA(CGENSTATE)

+ Here is the caller graph for this function:

llvm::Value * GroupByAndAggregate::getAdditionalLiteral ( const int32_t  off)
private

Definition at line 1833 of file GroupByAndAggregate.cpp.

References CHECK_LT, get_arg_by_name(), get_int_type(), LL_BUILDER, LL_CONTEXT, LL_INT, and ROW_FUNC.

Referenced by codegenCountDistinct().

1833  {
1834  CHECK_LT(off, 0);
1835  const auto lit_buff_lv = get_arg_by_name(ROW_FUNC, "literals");
1836  return LL_BUILDER.CreateLoad(LL_BUILDER.CreateGEP(
1837  LL_BUILDER.CreateBitCast(lit_buff_lv,
1838  llvm::PointerType::get(get_int_type(64, LL_CONTEXT), 0)),
1839  LL_INT(off)));
1840 }
#define ROW_FUNC
#define LL_BUILDER
#define LL_CONTEXT
#define LL_INT(v)
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
llvm::Value * get_arg_by_name(llvm::Function *func, const std::string &name)
Definition: Execute.h:162
#define CHECK_LT(x, y)
Definition: Logger.h:207

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

int64_t GroupByAndAggregate::getBucketedCardinality ( const ColRangeInfo col_range_info)
staticprivate

Definition at line 292 of file GroupByAndAggregate.cpp.

References ColRangeInfo::bucket, ColRangeInfo::has_nulls, ColRangeInfo::max, and ColRangeInfo::min.

Referenced by codegenPerfectHashFunction(), and getColRangeInfo().

292  {
293  checked_int64_t crt_col_cardinality =
294  checked_int64_t(col_range_info.max) - checked_int64_t(col_range_info.min);
295  if (col_range_info.bucket) {
296  crt_col_cardinality /= col_range_info.bucket;
297  }
298  return static_cast<int64_t>(crt_col_cardinality +
299  (1 + (col_range_info.has_nulls ? 1 : 0)));
300 }
boost::multiprecision::number< boost::multiprecision::cpp_int_backend< 64, 64, boost::multiprecision::signed_magnitude, boost::multiprecision::checked, void >> checked_int64_t

+ Here is the caller graph for this function:

ColRangeInfo GroupByAndAggregate::getColRangeInfo ( )
private

Definition at line 135 of file GroupByAndAggregate.cpp.

References Executor::baseline_threshold, anonymous_namespace{GroupByAndAggregate.cpp}::cardinality_estimate_less_than_column_range(), CHECK, CHECK_GE, device_type_, executor_, anonymous_namespace{GroupByAndAggregate.cpp}::expr_is_rowid(), getBucketedCardinality(), getExprRangeInfo(), GPU, group_cardinality_estimation_, RelAlgExecutionUnit::groupby_exprs, GroupByBaselineHash, GroupByPerfectHash, anonymous_namespace{GroupByAndAggregate.cpp}::has_count_distinct(), anonymous_namespace{GroupByAndAggregate.cpp}::is_column_range_too_big_for_perfect_hash(), kENCODING_DICT, SortInfo::order_entries, RelAlgExecutionUnit::quals, ra_exe_unit_, RelAlgExecutionUnit::simple_quals, RelAlgExecutionUnit::sort_info, and RelAlgExecutionUnit::target_exprs.

Referenced by initQueryMemoryDescriptorImpl().

135  {
136  // Use baseline layout more eagerly on the GPU if the query uses count distinct,
137  // because our HyperLogLog implementation is 4x less memory efficient on GPU.
138  // Technically, this only applies to APPROX_COUNT_DISTINCT, but in practice we
139  // can expect this to be true anyway for grouped queries since the precise version
140  // uses significantly more memory.
141  const int64_t baseline_threshold =
146  if (ra_exe_unit_.groupby_exprs.size() != 1) {
147  try {
148  checked_int64_t cardinality{1};
149  bool has_nulls{false};
150  for (const auto& groupby_expr : ra_exe_unit_.groupby_exprs) {
151  auto col_range_info = getExprRangeInfo(groupby_expr.get());
152  if (col_range_info.hash_type_ != QueryDescriptionType::GroupByPerfectHash) {
153  // going through baseline hash if a non-integer type is encountered
154  return {QueryDescriptionType::GroupByBaselineHash, 0, 0, 0, false};
155  }
156  auto crt_col_cardinality = getBucketedCardinality(col_range_info);
157  CHECK_GE(crt_col_cardinality, 0);
158  cardinality *= crt_col_cardinality;
159  if (col_range_info.has_nulls) {
160  has_nulls = true;
161  }
162  }
163  // For zero or high cardinalities, use baseline layout.
164  if (!cardinality || cardinality > baseline_threshold) {
165  return {QueryDescriptionType::GroupByBaselineHash, 0, 0, 0, false};
166  }
168  0,
169  int64_t(cardinality),
170  0,
171  has_nulls};
172  } catch (...) { // overflow when computing cardinality
173  return {QueryDescriptionType::GroupByBaselineHash, 0, 0, 0, false};
174  }
175  }
176  // For single column groupby on high timestamps, force baseline hash due to wide ranges
177  // we are likely to encounter when applying quals to the expression range
178  // TODO: consider allowing TIMESTAMP(9) (nanoseconds) with quals to use perfect hash if
179  // the range is small enough
180  if (ra_exe_unit_.groupby_exprs.front() &&
181  ra_exe_unit_.groupby_exprs.front()->get_type_info().is_high_precision_timestamp() &&
182  ra_exe_unit_.simple_quals.size() > 0) {
183  return {QueryDescriptionType::GroupByBaselineHash, 0, 0, 0, false};
184  }
185  const auto col_range_info = getExprRangeInfo(ra_exe_unit_.groupby_exprs.front().get());
186  if (!ra_exe_unit_.groupby_exprs.front()) {
187  return col_range_info;
188  }
189  static const int64_t MAX_BUFFER_SIZE = 1 << 30;
190  const int64_t col_count =
192  int64_t max_entry_count = MAX_BUFFER_SIZE / (col_count * sizeof(int64_t));
194  max_entry_count = std::min(max_entry_count, baseline_threshold);
195  }
196  const auto& groupby_expr_ti = ra_exe_unit_.groupby_exprs.front()->get_type_info();
197  if (groupby_expr_ti.is_string() && !col_range_info.bucket) {
198  CHECK(groupby_expr_ti.get_compression() == kENCODING_DICT);
199 
200  const bool has_filters =
201  !ra_exe_unit_.quals.empty() || !ra_exe_unit_.simple_quals.empty();
202  if (has_filters &&
203  is_column_range_too_big_for_perfect_hash(col_range_info, max_entry_count)) {
204  // if filters are present, we can use the filter to narrow the cardinality of the
205  // group by in the case of ranges too big for perfect hash. Otherwise, we are better
206  // off attempting perfect hash (since we know the range will be made of
207  // monotonically increasing numbers from min to max for dictionary encoded strings)
208  // and failing later due to excessive memory use.
209  // Check the conditions where baseline hash can provide a performance increase and
210  // return baseline hash (potentially forcing an estimator query) as the range type.
211  // Otherwise, return col_range_info which will likely be perfect hash, though could
212  // be baseline from a previous call of this function prior to the estimator query.
213  if (!ra_exe_unit_.sort_info.order_entries.empty()) {
214  // TODO(adb): allow some sorts to pass through this block by centralizing sort
215  // algorithm decision making
217  is_column_range_too_big_for_perfect_hash(col_range_info, max_entry_count)) {
218  // always use baseline hash for column range too big for perfect hash with count
219  // distinct descriptors. We will need 8GB of CPU memory minimum for the perfect
220  // hash group by in this case.
222  col_range_info.min,
223  col_range_info.max,
224  0,
225  col_range_info.has_nulls};
226  } else {
227  // use original col range for sort
228  return col_range_info;
229  }
230  }
231  // if filters are present and the filtered range is less than the cardinality of
232  // the column, consider baseline hash
235  col_range_info)) {
237  col_range_info.min,
238  col_range_info.max,
239  0,
240  col_range_info.has_nulls};
241  }
242  }
243  } else if ((!expr_is_rowid(ra_exe_unit_.groupby_exprs.front().get(),
244  *executor_->catalog_)) &&
245  is_column_range_too_big_for_perfect_hash(col_range_info, max_entry_count) &&
246  !col_range_info.bucket) {
248  col_range_info.min,
249  col_range_info.max,
250  0,
251  col_range_info.has_nulls};
252  }
253  return col_range_info;
254 }
std::vector< Analyzer::Expr * > target_exprs
static int64_t getBucketedCardinality(const ColRangeInfo &col_range_info)
boost::multiprecision::number< boost::multiprecision::cpp_int_backend< 64, 64, boost::multiprecision::signed_magnitude, boost::multiprecision::checked, void >> checked_int64_t
bool is_column_range_too_big_for_perfect_hash(const ColRangeInfo &col_range_info, const int64_t max_entry_count)
bool expr_is_rowid(const Analyzer::Expr *expr, const Catalog_Namespace::Catalog &cat)
const std::list< Analyzer::OrderEntry > order_entries
static const size_t baseline_threshold
Definition: Execute.h:1021
#define CHECK_GE(x, y)
Definition: Logger.h:210
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
bool has_count_distinct(const RelAlgExecutionUnit &ra_exe_unit)
const SortInfo sort_info
const ExecutorDeviceType device_type_
bool cardinality_estimate_less_than_column_range(const int64_t cardinality_estimate, const ColRangeInfo &col_range_info)
const std::optional< int64_t > group_cardinality_estimation_
std::list< std::shared_ptr< Analyzer::Expr > > quals
#define CHECK(condition)
Definition: Logger.h:197
ColRangeInfo getExprRangeInfo(const Analyzer::Expr *expr) const
const RelAlgExecutionUnit & ra_exe_unit_
std::list< std::shared_ptr< Analyzer::Expr > > simple_quals

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

ColRangeInfo GroupByAndAggregate::getExprRangeInfo ( const Analyzer::Expr expr) const
private

Definition at line 256 of file GroupByAndAggregate.cpp.

References CHECK, Double, executor_, Float, getExpressionRange(), GroupByBaselineHash, GroupByPerfectHash, Integer, Invalid, NonGroupedAggregate, Projection, query_infos_, ra_exe_unit_, and RelAlgExecutionUnit::simple_quals.

Referenced by codegenGroupBy(), codegenPerfectHashFunction(), getColRangeInfo(), gpuCanHandleOrderEntries(), and initCountDistinctDescriptors().

256  {
257  if (!expr) {
258  return {QueryDescriptionType::Projection, 0, 0, 0, false};
259  }
260 
261  const auto expr_range = getExpressionRange(
262  expr, query_infos_, executor_, boost::make_optional(ra_exe_unit_.simple_quals));
263  switch (expr_range.getType()) {
265  if (expr_range.getIntMin() > expr_range.getIntMax()) {
266  return {
267  QueryDescriptionType::GroupByBaselineHash, 0, -1, 0, expr_range.hasNulls()};
268  }
270  expr_range.getIntMin(),
271  expr_range.getIntMax(),
272  expr_range.getBucket(),
273  expr_range.hasNulls()};
274  }
277  if (expr_range.getFpMin() > expr_range.getFpMax()) {
278  return {
279  QueryDescriptionType::GroupByBaselineHash, 0, -1, 0, expr_range.hasNulls()};
280  }
281  return {QueryDescriptionType::GroupByBaselineHash, 0, 0, 0, false};
282  }
284  return {QueryDescriptionType::GroupByBaselineHash, 0, 0, 0, false};
285  default:
286  CHECK(false);
287  }
288  CHECK(false);
289  return {QueryDescriptionType::NonGroupedAggregate, 0, 0, 0, false};
290 }
ExpressionRange getExpressionRange(const Analyzer::BinOper *expr, const std::vector< InputTableInfo > &query_infos, const Executor *, boost::optional< std::list< std::shared_ptr< Analyzer::Expr >>> simple_quals)
const std::vector< InputTableInfo > & query_infos_
#define CHECK(condition)
Definition: Logger.h:197
const RelAlgExecutionUnit & ra_exe_unit_
std::list< std::shared_ptr< Analyzer::Expr > > simple_quals

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

KeylessInfo GroupByAndAggregate::getKeylessInfo ( const std::vector< Analyzer::Expr * > &  target_expr_list,
const bool  is_group_by 
) const
private

This function goes through all target expressions and answers two questions:

  1. Is it possible to have keyless hash?
  2. If yes to 1, then what aggregate expression should be considered to represent the key's presence, if needed (e.g., in detecting empty entries in the result set).

NOTE: Keyless hash is only valid with single-column group by at the moment.

Definition at line 701 of file GroupByAndAggregate.cpp.

References agg_arg(), CHECK, constrained_not_null(), Double, executor_, Float, g_bigint_count, get_agg_initial_val(), get_compact_type(), get_target_info(), getExpressionRange(), Integer, Invalid, is_distinct_target(), kAVG, kCOUNT, kMAX, kMIN, kSUM, RelAlgExecutionUnit::quals, query_infos_, ra_exe_unit_, and takes_float_argument().

Referenced by initQueryMemoryDescriptorImpl().

703  {
704  bool keyless{true}, found{false};
705  int32_t num_agg_expr{0};
706  int32_t index{0};
707  for (const auto target_expr : target_expr_list) {
708  const auto agg_info = get_target_info(target_expr, g_bigint_count);
709  const auto chosen_type = get_compact_type(agg_info);
710  if (agg_info.is_agg) {
711  num_agg_expr++;
712  }
713  if (!found && agg_info.is_agg && !is_distinct_target(agg_info)) {
714  auto agg_expr = dynamic_cast<const Analyzer::AggExpr*>(target_expr);
715  CHECK(agg_expr);
716  const auto arg_expr = agg_arg(target_expr);
717  const bool float_argument_input = takes_float_argument(agg_info);
718  switch (agg_info.agg_kind) {
719  case kAVG:
720  ++index;
721  if (arg_expr && !arg_expr->get_type_info().get_notnull()) {
722  auto expr_range_info = getExpressionRange(arg_expr, query_infos_, executor_);
723  if (expr_range_info.getType() == ExpressionRangeType::Invalid ||
724  expr_range_info.hasNulls()) {
725  break;
726  }
727  }
728  found = true;
729  break;
730  case kCOUNT:
731  if (arg_expr && !arg_expr->get_type_info().get_notnull()) {
732  auto expr_range_info = getExpressionRange(arg_expr, query_infos_, executor_);
733  if (expr_range_info.getType() == ExpressionRangeType::Invalid ||
734  expr_range_info.hasNulls()) {
735  break;
736  }
737  }
738  found = true;
739  break;
740  case kSUM: {
741  auto arg_ti = arg_expr->get_type_info();
742  if (constrained_not_null(arg_expr, ra_exe_unit_.quals)) {
743  arg_ti.set_notnull(true);
744  }
745  if (!arg_ti.get_notnull()) {
746  auto expr_range_info = getExpressionRange(arg_expr, query_infos_, executor_);
747  if (expr_range_info.getType() != ExpressionRangeType::Invalid &&
748  !expr_range_info.hasNulls()) {
749  found = true;
750  }
751  } else {
752  auto expr_range_info = getExpressionRange(arg_expr, query_infos_, executor_);
753  switch (expr_range_info.getType()) {
756  if (expr_range_info.getFpMax() < 0 || expr_range_info.getFpMin() > 0) {
757  found = true;
758  }
759  break;
761  if (expr_range_info.getIntMax() < 0 || expr_range_info.getIntMin() > 0) {
762  found = true;
763  }
764  break;
765  default:
766  break;
767  }
768  }
769  break;
770  }
771  case kMIN: {
772  CHECK(agg_expr && agg_expr->get_arg());
773  const auto& arg_ti = agg_expr->get_arg()->get_type_info();
774  if (arg_ti.is_string() || arg_ti.is_buffer()) {
775  break;
776  }
777  auto expr_range_info =
778  getExpressionRange(agg_expr->get_arg(), query_infos_, executor_);
779  auto init_max = get_agg_initial_val(agg_info.agg_kind,
780  chosen_type,
781  is_group_by || float_argument_input,
782  float_argument_input ? sizeof(float) : 8);
783  switch (expr_range_info.getType()) {
786  auto double_max =
787  *reinterpret_cast<const double*>(may_alias_ptr(&init_max));
788  if (expr_range_info.getFpMax() < double_max) {
789  found = true;
790  }
791  break;
792  }
794  if (expr_range_info.getIntMax() < init_max) {
795  found = true;
796  }
797  break;
798  default:
799  break;
800  }
801  break;
802  }
803  case kMAX: {
804  CHECK(agg_expr && agg_expr->get_arg());
805  const auto& arg_ti = agg_expr->get_arg()->get_type_info();
806  if (arg_ti.is_string() || arg_ti.is_buffer()) {
807  break;
808  }
809  auto expr_range_info =
810  getExpressionRange(agg_expr->get_arg(), query_infos_, executor_);
811  // NULL sentinel and init value for kMAX are identical, which results in
812  // ambiguity in detecting empty keys in presence of nulls.
813  if (expr_range_info.getType() == ExpressionRangeType::Invalid ||
814  expr_range_info.hasNulls()) {
815  break;
816  }
817  auto init_min = get_agg_initial_val(agg_info.agg_kind,
818  chosen_type,
819  is_group_by || float_argument_input,
820  float_argument_input ? sizeof(float) : 8);
821  switch (expr_range_info.getType()) {
824  auto double_min =
825  *reinterpret_cast<const double*>(may_alias_ptr(&init_min));
826  if (expr_range_info.getFpMin() > double_min) {
827  found = true;
828  }
829  break;
830  }
832  if (expr_range_info.getIntMin() > init_min) {
833  found = true;
834  }
835  break;
836  default:
837  break;
838  }
839  break;
840  }
841  default:
842  keyless = false;
843  break;
844  }
845  }
846  if (!keyless) {
847  break;
848  }
849  if (!found) {
850  ++index;
851  }
852  }
853 
854  // shouldn't use keyless for projection only
855  return {
856  keyless && found,
857  index,
858  };
859 }
const Analyzer::Expr * agg_arg(const Analyzer::Expr *expr)
bool constrained_not_null(const Analyzer::Expr *expr, const std::list< std::shared_ptr< Analyzer::Expr >> &quals)
TargetInfo get_target_info(const PointerType target_expr, const bool bigint_count)
Definition: TargetInfo.h:79
int64_t get_agg_initial_val(const SQLAgg agg, const SQLTypeInfo &ti, const bool enable_compaction, const unsigned min_byte_width_to_compact)
bool takes_float_argument(const TargetInfo &target_info)
Definition: TargetInfo.h:134
Definition: sqldefs.h:73
const SQLTypeInfo get_compact_type(const TargetInfo &target)
bool g_bigint_count
Definition: sqldefs.h:75
bool is_distinct_target(const TargetInfo &target_info)
Definition: TargetInfo.h:130
ExpressionRange getExpressionRange(const Analyzer::BinOper *expr, const std::vector< InputTableInfo > &query_infos, const Executor *, boost::optional< std::list< std::shared_ptr< Analyzer::Expr >>> simple_quals)
const std::vector< InputTableInfo > & query_infos_
Definition: sqldefs.h:76
std::list< std::shared_ptr< Analyzer::Expr > > quals
#define CHECK(condition)
Definition: Logger.h:197
const RelAlgExecutionUnit & ra_exe_unit_
Definition: sqldefs.h:74
Definition: sqldefs.h:72

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

int64_t GroupByAndAggregate::getShardedTopBucket ( const ColRangeInfo col_range_info,
const size_t  shard_count 
) const
private

Definition at line 341 of file GroupByAndAggregate.cpp.

References ColRangeInfo::bucket, CHECK, CHECK_GT, device_type_, executor_, g_leaf_count, and GPU.

Referenced by initQueryMemoryDescriptorImpl().

342  {
343  size_t device_count{0};
345  auto cuda_mgr = executor_->getCatalog()->getDataMgr().getCudaMgr();
346  CHECK(cuda_mgr);
347  device_count = executor_->getCatalog()->getDataMgr().getCudaMgr()->getDeviceCount();
348  CHECK_GT(device_count, 0u);
349  }
350 
351  int64_t bucket{col_range_info.bucket};
352 
353  if (shard_count) {
354  CHECK(!col_range_info.bucket);
355  /*
356  when a node has fewer devices than shard count,
357  a) In a distributed setup, the minimum distance between two keys would be
358  device_count because shards are stored consecutively across the physical tables,
359  i.e if a shard column has values 0 to 9, and 3 shards on each leaf, then node 1
360  would have values: 0,1,2,6,7,8 and node 2 would have values: 3,4,5,9. If each leaf
361  node has only 1 device, in this case, all the keys from each node are loaded on
362  the device each.
363 
364  b) In a single node setup, the distance would be minimum of device_count or
365  difference of device_count - shard_count. For example: If a single node server
366  running on 3 devices a shard column has values 0 to 9 in a table with 4 shards,
367  device to fragment keys mapping would be: device 1 - 4,8,3,7 device 2 - 1,5,9
368  device 3 - 2, 6 The bucket value would be 4(shards) - 3(devices) = 1 i.e. minimum
369  of device_count or difference.
370 
371  When a node has device count equal to or more than shard count then the
372  minimum distance is always at least shard_count * no of leaf nodes.
373  */
374  if (device_count < shard_count) {
375  bucket = g_leaf_count ? std::max(device_count, static_cast<size_t>(1))
376  : std::min(device_count, shard_count - device_count);
377  } else {
378  bucket = shard_count * std::max(g_leaf_count, static_cast<size_t>(1));
379  }
380  }
381 
382  return bucket;
383 }
#define CHECK_GT(x, y)
Definition: Logger.h:209
const ExecutorDeviceType device_type_
#define CHECK(condition)
Definition: Logger.h:197
size_t g_leaf_count
Definition: ParserNode.cpp:74

+ Here is the caller graph for this function:

bool GroupByAndAggregate::gpuCanHandleOrderEntries ( const std::list< Analyzer::OrderEntry > &  order_entries)
private

Definition at line 861 of file GroupByAndAggregate.cpp.

References CHECK, CHECK_GE, CHECK_LE, Analyzer::AggExpr::get_arg(), Analyzer::Expr::get_type_info(), getExprRangeInfo(), GroupByPerfectHash, kAPPROX_COUNT_DISTINCT, kAVG, kMAX, kMIN, ra_exe_unit_, and RelAlgExecutionUnit::target_exprs.

Referenced by initQueryMemoryDescriptor().

862  {
863  if (order_entries.size() > 1) { // TODO(alex): lift this restriction
864  return false;
865  }
866  for (const auto& order_entry : order_entries) {
867  CHECK_GE(order_entry.tle_no, 1);
868  CHECK_LE(static_cast<size_t>(order_entry.tle_no), ra_exe_unit_.target_exprs.size());
869  const auto target_expr = ra_exe_unit_.target_exprs[order_entry.tle_no - 1];
870  if (!dynamic_cast<Analyzer::AggExpr*>(target_expr)) {
871  return false;
872  }
873  // TODO(alex): relax the restrictions
874  auto agg_expr = static_cast<Analyzer::AggExpr*>(target_expr);
875  if (agg_expr->get_is_distinct() || agg_expr->get_aggtype() == kAVG ||
876  agg_expr->get_aggtype() == kMIN || agg_expr->get_aggtype() == kMAX ||
877  agg_expr->get_aggtype() == kAPPROX_COUNT_DISTINCT) {
878  return false;
879  }
880  if (agg_expr->get_arg()) {
881  const auto& arg_ti = agg_expr->get_arg()->get_type_info();
882  if (arg_ti.is_fp()) {
883  return false;
884  }
885  auto expr_range_info = getExprRangeInfo(agg_expr->get_arg());
886  // TOD(adb): QMD not actually initialized here?
887  if ((!(expr_range_info.hash_type_ == QueryDescriptionType::GroupByPerfectHash &&
888  /* query_mem_desc.getGroupbyColCount() == 1 */ false) ||
889  expr_range_info.has_nulls) &&
890  order_entry.is_desc == order_entry.nulls_first) {
891  return false;
892  }
893  }
894  const auto& target_ti = target_expr->get_type_info();
895  CHECK(!target_ti.is_buffer());
896  if (!target_ti.is_integer()) {
897  return false;
898  }
899  }
900  return true;
901 }
std::vector< Analyzer::Expr * > target_exprs
#define CHECK_GE(x, y)
Definition: Logger.h:210
Expr * get_arg() const
Definition: Analyzer.h:1096
Definition: sqldefs.h:73
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:78
#define CHECK_LE(x, y)
Definition: Logger.h:208
#define CHECK(condition)
Definition: Logger.h:197
ColRangeInfo getExprRangeInfo(const Analyzer::Expr *expr) const
const RelAlgExecutionUnit & ra_exe_unit_
Definition: sqldefs.h:74
Definition: sqldefs.h:72

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

CountDistinctDescriptors GroupByAndAggregate::initCountDistinctDescriptors ( )
private

Definition at line 605 of file GroupByAndAggregate.cpp.

References Bitmap, CHECK, CHECK_GE, device_type_, g_bigint_count, g_enable_watchdog, g_hll_precision_bits, Analyzer::AggExpr::get_arg(), get_count_distinct_sub_bitmap_count(), get_target_info(), Analyzer::Expr::get_type_info(), getExprRangeInfo(), GroupByPerfectHash, hll_size_for_rate(), Invalid, is_distinct_target(), kAPPROX_COUNT_DISTINCT, kCOUNT, kINT, Projection, ra_exe_unit_, StdSet, and RelAlgExecutionUnit::target_exprs.

Referenced by initQueryMemoryDescriptorImpl().

605  {
606  CountDistinctDescriptors count_distinct_descriptors;
607  for (const auto target_expr : ra_exe_unit_.target_exprs) {
608  auto agg_info = get_target_info(target_expr, g_bigint_count);
609  if (is_distinct_target(agg_info)) {
610  CHECK(agg_info.is_agg);
611  CHECK(agg_info.agg_kind == kCOUNT || agg_info.agg_kind == kAPPROX_COUNT_DISTINCT);
612  const auto agg_expr = static_cast<const Analyzer::AggExpr*>(target_expr);
613  const auto& arg_ti = agg_expr->get_arg()->get_type_info();
614  if (arg_ti.is_bytes()) {
615  throw std::runtime_error(
616  "Strings must be dictionary-encoded for COUNT(DISTINCT).");
617  }
618  if (agg_info.agg_kind == kAPPROX_COUNT_DISTINCT && arg_ti.is_buffer()) {
619  throw std::runtime_error("APPROX_COUNT_DISTINCT on arrays not supported yet");
620  }
621  if (agg_info.agg_kind == kAPPROX_COUNT_DISTINCT && arg_ti.is_geometry()) {
622  throw std::runtime_error(
623  "APPROX_COUNT_DISTINCT on geometry columns not supported");
624  }
625  if (agg_info.is_distinct && arg_ti.is_geometry()) {
626  throw std::runtime_error("COUNT DISTINCT on geometry columns not supported");
627  }
628  ColRangeInfo no_range_info{QueryDescriptionType::Projection, 0, 0, 0, false};
629  auto arg_range_info =
630  arg_ti.is_fp() ? no_range_info : getExprRangeInfo(agg_expr->get_arg());
631  CountDistinctImplType count_distinct_impl_type{CountDistinctImplType::StdSet};
632  int64_t bitmap_sz_bits{0};
633  if (agg_info.agg_kind == kAPPROX_COUNT_DISTINCT) {
634  const auto error_rate = agg_expr->get_error_rate();
635  if (error_rate) {
636  CHECK(error_rate->get_type_info().get_type() == kINT);
637  CHECK_GE(error_rate->get_constval().intval, 1);
638  bitmap_sz_bits = hll_size_for_rate(error_rate->get_constval().smallintval);
639  } else {
640  bitmap_sz_bits = g_hll_precision_bits;
641  }
642  }
643  if (arg_range_info.isEmpty()) {
644  count_distinct_descriptors.emplace_back(
646  0,
647  64,
648  agg_info.agg_kind == kAPPROX_COUNT_DISTINCT,
649  device_type_,
650  1});
651  continue;
652  }
653  if (arg_range_info.hash_type_ == QueryDescriptionType::GroupByPerfectHash &&
654  !(arg_ti.is_buffer() || arg_ti.is_geometry())) { // TODO(alex): allow bitmap
655  // implementation for arrays
656  count_distinct_impl_type = CountDistinctImplType::Bitmap;
657  if (agg_info.agg_kind == kCOUNT) {
658  bitmap_sz_bits = arg_range_info.max - arg_range_info.min + 1;
659  const int64_t MAX_BITMAP_BITS{8 * 1000 * 1000 * 1000L};
660  if (bitmap_sz_bits <= 0 || bitmap_sz_bits > MAX_BITMAP_BITS) {
661  count_distinct_impl_type = CountDistinctImplType::StdSet;
662  }
663  }
664  }
665  if (agg_info.agg_kind == kAPPROX_COUNT_DISTINCT &&
666  count_distinct_impl_type == CountDistinctImplType::StdSet &&
667  !(arg_ti.is_array() || arg_ti.is_geometry())) {
668  count_distinct_impl_type = CountDistinctImplType::Bitmap;
669  }
670 
671  if (g_enable_watchdog && !(arg_range_info.isEmpty()) &&
672  count_distinct_impl_type == CountDistinctImplType::StdSet) {
673  throw WatchdogException("Cannot use a fast path for COUNT distinct");
674  }
675  const auto sub_bitmap_count =
677  count_distinct_descriptors.emplace_back(
678  CountDistinctDescriptor{count_distinct_impl_type,
679  arg_range_info.min,
680  bitmap_sz_bits,
681  agg_info.agg_kind == kAPPROX_COUNT_DISTINCT,
682  device_type_,
683  sub_bitmap_count});
684  } else {
685  count_distinct_descriptors.emplace_back(CountDistinctDescriptor{
686  CountDistinctImplType::Invalid, 0, 0, false, device_type_, 0});
687  }
688  }
689  return count_distinct_descriptors;
690 }
std::vector< Analyzer::Expr * > target_exprs
bool g_enable_watchdog
int hll_size_for_rate(const int err_percent)
Definition: HyperLogLog.h:115
TargetInfo get_target_info(const PointerType target_expr, const bool bigint_count)
Definition: TargetInfo.h:79
#define CHECK_GE(x, y)
Definition: Logger.h:210
Expr * get_arg() const
Definition: Analyzer.h:1096
int g_hll_precision_bits
size_t get_count_distinct_sub_bitmap_count(const size_t bitmap_sz_bits, const RelAlgExecutionUnit &ra_exe_unit, const ExecutorDeviceType device_type)
std::vector< CountDistinctDescriptor > CountDistinctDescriptors
Definition: CountDistinct.h:35
bool g_bigint_count
bool is_distinct_target(const TargetInfo &target_info)
Definition: TargetInfo.h:130
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:78
const ExecutorDeviceType device_type_
Definition: sqldefs.h:76
CountDistinctImplType
#define CHECK(condition)
Definition: Logger.h:197
ColRangeInfo getExprRangeInfo(const Analyzer::Expr *expr) const
Definition: sqltypes.h:44
const RelAlgExecutionUnit & ra_exe_unit_

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::unique_ptr< QueryMemoryDescriptor > GroupByAndAggregate::initQueryMemoryDescriptor ( const bool  allow_multifrag,
const size_t  max_groups_buffer_entry_count,
const int8_t  crt_min_byte_width,
RenderInfo render_info,
const bool  output_columnar_hint 
)
private

Definition at line 385 of file GroupByAndAggregate.cpp.

References align_to_int64(), CHECK, device_type_, executor_, GPU, gpuCanHandleOrderEntries(), initQueryMemoryDescriptorImpl(), SortInfo::order_entries, query_mem_desc, ra_exe_unit_, shard_count_for_top_groups(), and RelAlgExecutionUnit::sort_info.

390  {
391  const auto shard_count =
394  : 0;
395  bool sort_on_gpu_hint =
396  device_type_ == ExecutorDeviceType::GPU && allow_multifrag &&
399  // must_use_baseline_sort is true iff we'd sort on GPU with the old algorithm
400  // but the total output buffer size would be too big or it's a sharded top query.
401  // For the sake of managing risk, use the new result set way very selectively for
402  // this case only (alongside the baseline layout we've enabled for a while now).
403  bool must_use_baseline_sort = shard_count;
404  std::unique_ptr<QueryMemoryDescriptor> query_mem_desc;
405  while (true) {
406  query_mem_desc = initQueryMemoryDescriptorImpl(allow_multifrag,
407  max_groups_buffer_entry_count,
408  crt_min_byte_width,
409  sort_on_gpu_hint,
410  render_info,
411  must_use_baseline_sort,
412  output_columnar_hint);
413  CHECK(query_mem_desc);
414  if (query_mem_desc->sortOnGpu() &&
415  (query_mem_desc->getBufferSizeBytes(device_type_) +
416  align_to_int64(query_mem_desc->getEntryCount() * sizeof(int32_t))) >
417  2 * 1024 * 1024 * 1024L) {
418  must_use_baseline_sort = true;
419  sort_on_gpu_hint = false;
420  } else {
421  break;
422  }
423  }
424  return query_mem_desc;
425 }
bool gpuCanHandleOrderEntries(const std::list< Analyzer::OrderEntry > &order_entries)
std::unique_ptr< QueryMemoryDescriptor > initQueryMemoryDescriptorImpl(const bool allow_multifrag, const size_t max_groups_buffer_entry_count, const int8_t crt_min_byte_width, const bool sort_on_gpu_hint, RenderInfo *render_info, const bool must_use_baseline_sort, const bool output_columnar_hint)
const std::list< Analyzer::OrderEntry > order_entries
const SortInfo sort_info
const ExecutorDeviceType device_type_
#define CHECK(condition)
Definition: Logger.h:197
const RelAlgExecutionUnit & ra_exe_unit_
FORCE_INLINE HOST DEVICE T align_to_int64(T addr)
static size_t shard_count_for_top_groups(const RelAlgExecutionUnit &ra_exe_unit, const Catalog_Namespace::Catalog &catalog)

+ Here is the call graph for this function:

std::unique_ptr< QueryMemoryDescriptor > GroupByAndAggregate::initQueryMemoryDescriptorImpl ( const bool  allow_multifrag,
const size_t  max_groups_buffer_entry_count,
const int8_t  crt_min_byte_width,
const bool  sort_on_gpu_hint,
RenderInfo render_info,
const bool  must_use_baseline_sort,
const bool  output_columnar_hint 
)
private

Definition at line 427 of file GroupByAndAggregate.cpp.

References addTransientStringLiterals(), device_type_, executor_, g_enable_watchdog, get_col_byte_widths(), getColRangeInfo(), getKeylessInfo(), getShardedTopBucket(), GPU, RelAlgExecutionUnit::groupby_exprs, GroupByBaselineHash, GroupByPerfectHash, ColRangeInfo::hash_type_, QueryMemoryDescriptor::init(), initCountDistinctDescriptors(), LOG, query_infos_, ra_exe_unit_, shard_count_for_top_groups(), RelAlgExecutionUnit::target_exprs, and logger::WARNING.

Referenced by initQueryMemoryDescriptor().

434  {
436 
437  const auto count_distinct_descriptors = initCountDistinctDescriptors();
438 
439  auto group_col_widths = get_col_byte_widths(ra_exe_unit_.groupby_exprs);
440 
441  const bool is_group_by{!ra_exe_unit_.groupby_exprs.empty()};
442 
443  auto col_range_info_nosharding = getColRangeInfo();
444 
445  const auto shard_count =
448  : 0;
449 
450  const auto col_range_info =
451  ColRangeInfo{col_range_info_nosharding.hash_type_,
452  col_range_info_nosharding.min,
453  col_range_info_nosharding.max,
454  getShardedTopBucket(col_range_info_nosharding, shard_count),
455  col_range_info_nosharding.has_nulls};
456 
457  // Non-grouped aggregates do not support accessing aggregated ranges
458  // Keyless hash is currently only supported with single-column perfect hash
459  const auto keyless_info = !(is_group_by && col_range_info.hash_type_ ==
461  ? KeylessInfo{false, -1}
462  : getKeylessInfo(ra_exe_unit_.target_exprs, is_group_by);
463 
464  if (g_enable_watchdog &&
465  ((col_range_info.hash_type_ == QueryDescriptionType::GroupByBaselineHash &&
466  max_groups_buffer_entry_count > 120000000) ||
467  (col_range_info.hash_type_ == QueryDescriptionType::GroupByPerfectHash &&
468  ra_exe_unit_.groupby_exprs.size() == 1 &&
469  (col_range_info.max - col_range_info.min) /
470  std::max(col_range_info.bucket, int64_t(1)) >
471  130000000))) {
472  throw WatchdogException("Query would use too much memory");
473  }
474  try {
476  ra_exe_unit_,
477  query_infos_,
478  col_range_info,
479  keyless_info,
480  allow_multifrag,
481  device_type_,
482  crt_min_byte_width,
483  sort_on_gpu_hint,
484  shard_count,
485  max_groups_buffer_entry_count,
486  render_info,
487  count_distinct_descriptors,
488  must_use_baseline_sort,
489  output_columnar_hint,
490  /*streaming_top_n_hint=*/true);
491  } catch (const StreamingTopNOOM& e) {
492  LOG(WARNING) << e.what() << " Disabling Streaming Top N.";
494  ra_exe_unit_,
495  query_infos_,
496  col_range_info,
497  keyless_info,
498  allow_multifrag,
499  device_type_,
500  crt_min_byte_width,
501  sort_on_gpu_hint,
502  shard_count,
503  max_groups_buffer_entry_count,
504  render_info,
505  count_distinct_descriptors,
506  must_use_baseline_sort,
507  output_columnar_hint,
508  /*streaming_top_n_hint=*/false);
509  }
510 }
std::vector< Analyzer::Expr * > target_exprs
bool g_enable_watchdog
#define LOG(tag)
Definition: Logger.h:188
ColRangeInfo getColRangeInfo()
QueryDescriptionType hash_type_
static std::unique_ptr< QueryMemoryDescriptor > init(const Executor *executor, const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, const ColRangeInfo &col_range_info, const KeylessInfo &keyless_info, const bool allow_multifrag, const ExecutorDeviceType device_type, const int8_t crt_min_byte_width, const bool sort_on_gpu_hint, const size_t shard_count, const size_t max_groups_buffer_entry_count, RenderInfo *render_info, const CountDistinctDescriptors count_distinct_descriptors, const bool must_use_baseline_sort, const bool output_columnar_hint, const bool streaming_top_n_hint)
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
CountDistinctDescriptors initCountDistinctDescriptors()
KeylessInfo getKeylessInfo(const std::vector< Analyzer::Expr * > &target_expr_list, const bool is_group_by) const
int64_t getShardedTopBucket(const ColRangeInfo &col_range_info, const size_t shard_count) const
const std::vector< InputTableInfo > & query_infos_
std::vector< int8_t > get_col_byte_widths(const T &col_expr_list)
const ExecutorDeviceType device_type_
const RelAlgExecutionUnit & ra_exe_unit_
static size_t shard_count_for_top_groups(const RelAlgExecutionUnit &ra_exe_unit, const Catalog_Namespace::Catalog &catalog)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool GroupByAndAggregate::needsUnnestDoublePatch ( llvm::Value const *  val_ptr,
const std::string &  agg_base_name,
const bool  threads_share_memory,
const CompilationOptions co 
) const
private

Definition at line 30 of file MaxwellCodegenPatch.cpp.

References CompilationOptions::device_type, and executor_.

Referenced by TargetExprCodegen::codegenAggregate().

33  {
34  return (executor_->isArchMaxwell(co.device_type) && threads_share_memory &&
35  llvm::isa<llvm::AllocaInst>(val_ptr) &&
36  val_ptr->getType() ==
37  llvm::Type::getDoublePtrTy(executor_->cgen_state_->context_) &&
38  "agg_id" == agg_base_name);
39 }
ExecutorDeviceType device_type

+ Here is the caller graph for this function:

void GroupByAndAggregate::prependForceSync ( )
private

Definition at line 41 of file MaxwellCodegenPatch.cpp.

References executor_.

Referenced by codegen().

41  {
42  executor_->cgen_state_->ir_builder_.CreateCall(
43  executor_->cgen_state_->module_->getFunction("force_sync"));
44 }

+ Here is the caller graph for this function:

size_t GroupByAndAggregate::shard_count_for_top_groups ( const RelAlgExecutionUnit ra_exe_unit,
const Catalog_Namespace::Catalog catalog 
)
static

Definition at line 2039 of file GroupByAndAggregate.cpp.

References Catalog_Namespace::Catalog::getMetadataForTable(), RelAlgExecutionUnit::groupby_exprs, SortInfo::limit, TableDescriptor::nShards, SortInfo::order_entries, and RelAlgExecutionUnit::sort_info.

Referenced by Executor::collectAllDeviceResults(), RelAlgExecutor::executeRelAlgQuerySingleStep(), initQueryMemoryDescriptor(), and initQueryMemoryDescriptorImpl().

2041  {
2042  if (ra_exe_unit.sort_info.order_entries.size() != 1 || !ra_exe_unit.sort_info.limit) {
2043  return 0;
2044  }
2045  for (const auto& group_expr : ra_exe_unit.groupby_exprs) {
2046  const auto grouped_col_expr =
2047  dynamic_cast<const Analyzer::ColumnVar*>(group_expr.get());
2048  if (!grouped_col_expr) {
2049  continue;
2050  }
2051  if (grouped_col_expr->get_table_id() <= 0) {
2052  return 0;
2053  }
2054  const auto td = catalog.getMetadataForTable(grouped_col_expr->get_table_id());
2055  if (td->shardedColumnId == grouped_col_expr->get_column_id()) {
2056  return td->nShards;
2057  }
2058  }
2059  return 0;
2060 }
const std::list< Analyzer::OrderEntry > order_entries
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
const size_t limit
const SortInfo sort_info
const TableDescriptor * getMetadataForTable(const std::string &tableName, const bool populateFragmenter=true) const
Returns a pointer to a const TableDescriptor struct matching the provided tableName.

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

Friends And Related Function Documentation

friend class CodeGenerator
friend

Definition at line 308 of file GroupByAndAggregate.h.

friend class ExecutionKernel
friend

Definition at line 309 of file GroupByAndAggregate.h.

friend class Executor
friend

Definition at line 306 of file GroupByAndAggregate.h.

friend class QueryMemoryDescriptor
friend

Definition at line 307 of file GroupByAndAggregate.h.

friend struct TargetExprCodegen
friend

Definition at line 310 of file GroupByAndAggregate.h.

friend struct TargetExprCodegenBuilder
friend

Definition at line 311 of file GroupByAndAggregate.h.

Member Data Documentation

const std::optional<int64_t> GroupByAndAggregate::group_cardinality_estimation_
private

Definition at line 304 of file GroupByAndAggregate.h.

Referenced by getColRangeInfo().

bool GroupByAndAggregate::output_columnar_
private

Definition at line 301 of file GroupByAndAggregate.h.

const std::vector<InputTableInfo>& GroupByAndAggregate::query_infos_
private
std::shared_ptr<RowSetMemoryOwner> GroupByAndAggregate::row_set_mem_owner_
private

Definition at line 300 of file GroupByAndAggregate.h.

Referenced by addTransientStringLiterals().


The documentation for this class was generated from the following files: