OmniSciDB  cde582ebc3
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
GroupByAndAggregate Class Reference

#include <GroupByAndAggregate.h>

+ Collaboration diagram for GroupByAndAggregate:

Public Member Functions

 GroupByAndAggregate (Executor *executor, const ExecutorDeviceType device_type, const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner, const std::optional< int64_t > &group_cardinality_estimation)
 
bool codegen (llvm::Value *filter_result, llvm::BasicBlock *sc_false, QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const GpuSharedMemoryContext &gpu_smem_context)
 

Static Public Member Functions

static size_t shard_count_for_top_groups (const RelAlgExecutionUnit &ra_exe_unit, const Catalog_Namespace::Catalog &catalog)
 

Private Member Functions

bool gpuCanHandleOrderEntries (const std::list< Analyzer::OrderEntry > &order_entries)
 
std::unique_ptr
< QueryMemoryDescriptor
initQueryMemoryDescriptor (const bool allow_multifrag, const size_t max_groups_buffer_entry_count, const int8_t crt_min_byte_width, RenderInfo *render_info, const bool output_columnar_hint)
 
std::unique_ptr
< QueryMemoryDescriptor
initQueryMemoryDescriptorImpl (const bool allow_multifrag, const size_t max_groups_buffer_entry_count, const int8_t crt_min_byte_width, const bool sort_on_gpu_hint, RenderInfo *render_info, const bool must_use_baseline_sort, const bool output_columnar_hint)
 
int64_t getShardedTopBucket (const ColRangeInfo &col_range_info, const size_t shard_count) const
 
llvm::Value * codegenOutputSlot (llvm::Value *groups_buffer, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, DiamondCodegen &diamond_codegen)
 
std::tuple< llvm::Value
*, llvm::Value * > 
codegenGroupBy (const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, DiamondCodegen &codegen)
 
llvm::Value * codegenVarlenOutputBuffer (const QueryMemoryDescriptor &query_mem_desc)
 
std::tuple< llvm::Value
*, llvm::Value * > 
codegenSingleColumnPerfectHash (const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, llvm::Value *groups_buffer, llvm::Value *group_expr_lv_translated, llvm::Value *group_expr_lv_original, const int32_t row_size_quad)
 
std::tuple< llvm::Value
*, llvm::Value * > 
codegenMultiColumnPerfectHash (llvm::Value *groups_buffer, llvm::Value *group_key, llvm::Value *key_size_lv, const QueryMemoryDescriptor &query_mem_desc, const int32_t row_size_quad)
 
llvm::Function * codegenPerfectHashFunction ()
 
std::tuple< llvm::Value
*, llvm::Value * > 
codegenMultiColumnBaselineHash (const CompilationOptions &co, llvm::Value *groups_buffer, llvm::Value *group_key, llvm::Value *key_size_lv, const QueryMemoryDescriptor &query_mem_desc, const size_t key_width, const int32_t row_size_quad)
 
ColRangeInfo getColRangeInfo ()
 
llvm::Value * convertNullIfAny (const SQLTypeInfo &arg_type, const TargetInfo &agg_info, llvm::Value *target)
 
bool codegenAggCalls (const std::tuple< llvm::Value *, llvm::Value * > &agg_out_ptr_w_idx, llvm::Value *varlen_output_buffer, const std::vector< llvm::Value * > &agg_out_vec, QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const GpuSharedMemoryContext &gpu_smem_context, DiamondCodegen &diamond_codegen)
 
llvm::Value * codegenWindowRowPointer (const Analyzer::WindowFunction *window_func, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, DiamondCodegen &diamond_codegen)
 
llvm::Value * codegenAggColumnPtr (llvm::Value *output_buffer_byte_stream, llvm::Value *out_row_idx, const std::tuple< llvm::Value *, llvm::Value * > &agg_out_ptr_w_idx, const QueryMemoryDescriptor &query_mem_desc, const size_t chosen_bytes, const size_t agg_out_off, const size_t target_idx)
 : returns the pointer to where the aggregation should be stored. More...
 
void codegenEstimator (std::stack< llvm::BasicBlock * > &array_loops, DiamondCodegen &diamond_codegen, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &)
 
void codegenCountDistinct (const size_t target_idx, const Analyzer::Expr *target_expr, std::vector< llvm::Value * > &agg_args, const QueryMemoryDescriptor &, const ExecutorDeviceType)
 
void codegenApproxQuantile (const size_t target_idx, const Analyzer::Expr *target_expr, std::vector< llvm::Value * > &agg_args, const QueryMemoryDescriptor &query_mem_desc, const ExecutorDeviceType device_type)
 
llvm::Value * getAdditionalLiteral (const int32_t off)
 
std::vector< llvm::Value * > codegenAggArg (const Analyzer::Expr *target_expr, const CompilationOptions &co)
 
llvm::Value * emitCall (const std::string &fname, const std::vector< llvm::Value * > &args)
 
void checkErrorCode (llvm::Value *retCode)
 
bool needsUnnestDoublePatch (llvm::Value const *val_ptr, const std::string &agg_base_name, const bool threads_share_memory, const CompilationOptions &co) const
 
void prependForceSync ()
 

Static Private Member Functions

static int64_t getBucketedCardinality (const ColRangeInfo &col_range_info)
 

Private Attributes

Executorexecutor_
 
const RelAlgExecutionUnitra_exe_unit_
 
const std::vector
< InputTableInfo > & 
query_infos_
 
std::shared_ptr
< RowSetMemoryOwner
row_set_mem_owner_
 
bool output_columnar_
 
const ExecutorDeviceType device_type_
 
const std::optional< int64_t > group_cardinality_estimation_
 

Friends

class Executor
 
class QueryMemoryDescriptor
 
class CodeGenerator
 
class ExecutionKernel
 
struct TargetExprCodegen
 
struct TargetExprCodegenBuilder
 

Detailed Description

Definition at line 61 of file GroupByAndAggregate.h.

Constructor & Destructor Documentation

GroupByAndAggregate::GroupByAndAggregate ( Executor executor,
const ExecutorDeviceType  device_type,
const RelAlgExecutionUnit ra_exe_unit,
const std::vector< InputTableInfo > &  query_infos,
std::shared_ptr< RowSetMemoryOwner row_set_mem_owner,
const std::optional< int64_t > &  group_cardinality_estimation 
)

Definition at line 398 of file GroupByAndAggregate.cpp.

References RelAlgExecutionUnit::groupby_exprs, and ra_exe_unit_.

405  : executor_(executor)
406  , ra_exe_unit_(ra_exe_unit)
407  , query_infos_(query_infos)
408  , row_set_mem_owner_(row_set_mem_owner)
409  , device_type_(device_type)
410  , group_cardinality_estimation_(group_cardinality_estimation) {
411  for (const auto& groupby_expr : ra_exe_unit_.groupby_exprs) {
412  if (!groupby_expr) {
413  continue;
414  }
415  const auto& groupby_ti = groupby_expr->get_type_info();
416  if (groupby_ti.is_bytes()) {
417  throw std::runtime_error(
418  "Cannot group by string columns which are not dictionary encoded.");
419  }
420  if (groupby_ti.is_buffer()) {
421  throw std::runtime_error("Group by buffer not supported");
422  }
423  if (groupby_ti.is_geometry()) {
424  throw std::runtime_error("Group by geometry not supported");
425  }
426  }
427 }
std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner_
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
const std::vector< InputTableInfo > & query_infos_
const ExecutorDeviceType device_type_
const std::optional< int64_t > group_cardinality_estimation_
const RelAlgExecutionUnit & ra_exe_unit_

Member Function Documentation

void GroupByAndAggregate::checkErrorCode ( llvm::Value *  retCode)
private

Definition at line 2041 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, and executor_.

Referenced by TargetExprCodegen::codegenAggregate().

2041  {
2042  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
2043  auto zero_const = llvm::ConstantInt::get(retCode->getType(), 0, true);
2044  auto rc_check_condition = executor_->cgen_state_->ir_builder_.CreateICmp(
2045  llvm::ICmpInst::ICMP_EQ, retCode, zero_const);
2046 
2047  executor_->cgen_state_->emitErrorCheck(rc_check_condition, retCode, "rc");
2048 }
#define AUTOMATIC_IR_METADATA(CGENSTATE)

+ Here is the caller graph for this function:

bool GroupByAndAggregate::codegen ( llvm::Value *  filter_result,
llvm::BasicBlock *  sc_false,
QueryMemoryDescriptor query_mem_desc,
const CompilationOptions co,
const GpuSharedMemoryContext gpu_smem_context 
)

Definition at line 905 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, CHECK, codegenAggCalls(), codegenEstimator(), codegenGroupBy(), codegenVarlenOutputBuffer(), DiamondCodegen::cond_false_, CompilationOptions::device_type, QueryMemoryDescriptor::didOutputColumnar(), RelAlgExecutionUnit::estimator, executor_, anonymous_namespace{GroupByAndAggregate.cpp}::get_agg_count(), get_arg_by_name(), get_int_type(), QueryMemoryDescriptor::getGroupbyColCount(), QueryMemoryDescriptor::getQueryDescriptionType(), GPU, RelAlgExecutionUnit::groupby_exprs, GroupByPerfectHash, RelAlgExecutionUnit::join_quals, LL_BUILDER, LL_CONTEXT, LL_INT, LLVM_ALIGN, CodeGenerator::posArg(), prependForceSync(), Projection, query_mem_desc, ra_exe_unit_, ROW_FUNC, RelAlgExecutionUnit::target_exprs, QueryMemoryDescriptor::usesGetGroupValueFast(), and QueryMemoryDescriptor::useStreamingTopN().

909  {
910  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
911  CHECK(filter_result);
912 
913  bool can_return_error = false;
914  llvm::BasicBlock* filter_false{nullptr};
915 
916  {
917  const bool is_group_by = !ra_exe_unit_.groupby_exprs.empty();
918 
919  if (executor_->isArchMaxwell(co.device_type)) {
921  }
922  DiamondCodegen filter_cfg(filter_result,
923  executor_,
924  !is_group_by || query_mem_desc.usesGetGroupValueFast(),
925  "filter", // filter_true and filter_false basic blocks
926  nullptr,
927  false);
928  filter_false = filter_cfg.cond_false_;
929 
930  if (is_group_by) {
932  !query_mem_desc.useStreamingTopN()) {
933  const auto crt_matched = get_arg_by_name(ROW_FUNC, "crt_matched");
934  LL_BUILDER.CreateStore(LL_INT(int32_t(1)), crt_matched);
935  auto total_matched_ptr = get_arg_by_name(ROW_FUNC, "total_matched");
936  llvm::Value* old_total_matched_val{nullptr};
938  old_total_matched_val =
939  LL_BUILDER.CreateAtomicRMW(llvm::AtomicRMWInst::Add,
940  total_matched_ptr,
941  LL_INT(int32_t(1)),
942 #if LLVM_VERSION_MAJOR > 12
943  LLVM_ALIGN(8),
944 #endif
945  llvm::AtomicOrdering::Monotonic);
946  } else {
947  old_total_matched_val = LL_BUILDER.CreateLoad(
948  total_matched_ptr->getType()->getPointerElementType(), total_matched_ptr);
949  LL_BUILDER.CreateStore(
950  LL_BUILDER.CreateAdd(old_total_matched_val, LL_INT(int32_t(1))),
951  total_matched_ptr);
952  }
953  auto old_total_matched_ptr = get_arg_by_name(ROW_FUNC, "old_total_matched");
954  LL_BUILDER.CreateStore(old_total_matched_val, old_total_matched_ptr);
955  }
956 
957  auto agg_out_ptr_w_idx = codegenGroupBy(query_mem_desc, co, filter_cfg);
958  auto varlen_output_buffer = codegenVarlenOutputBuffer(query_mem_desc);
959  if (query_mem_desc.usesGetGroupValueFast() ||
960  query_mem_desc.getQueryDescriptionType() ==
962  if (query_mem_desc.getGroupbyColCount() > 1) {
963  filter_cfg.setChainToNext();
964  }
965  // Don't generate null checks if the group slot is guaranteed to be non-null,
966  // as it's the case for get_group_value_fast* family.
967  can_return_error = codegenAggCalls(agg_out_ptr_w_idx,
968  varlen_output_buffer,
969  {},
971  co,
972  gpu_smem_context,
973  filter_cfg);
974  } else {
975  {
976  llvm::Value* nullcheck_cond{nullptr};
977  if (query_mem_desc.didOutputColumnar()) {
978  nullcheck_cond = LL_BUILDER.CreateICmpSGE(std::get<1>(agg_out_ptr_w_idx),
979  LL_INT(int32_t(0)));
980  } else {
981  nullcheck_cond = LL_BUILDER.CreateICmpNE(
982  std::get<0>(agg_out_ptr_w_idx),
983  llvm::ConstantPointerNull::get(
984  llvm::PointerType::get(get_int_type(64, LL_CONTEXT), 0)));
985  }
986  DiamondCodegen nullcheck_cfg(
987  nullcheck_cond, executor_, false, "groupby_nullcheck", &filter_cfg, false);
988  codegenAggCalls(agg_out_ptr_w_idx,
989  varlen_output_buffer,
990  {},
992  co,
993  gpu_smem_context,
994  filter_cfg);
995  }
996  can_return_error = true;
997  if (query_mem_desc.getQueryDescriptionType() ==
999  query_mem_desc.useStreamingTopN()) {
1000  // Ignore rejection on pushing current row to top-K heap.
1001  LL_BUILDER.CreateRet(LL_INT(int32_t(0)));
1002  } else {
1003  CodeGenerator code_generator(executor_);
1004  LL_BUILDER.CreateRet(LL_BUILDER.CreateNeg(LL_BUILDER.CreateTrunc(
1005  // TODO(alex): remove the trunc once pos is converted to 32 bits
1006  code_generator.posArg(nullptr),
1007  get_int_type(32, LL_CONTEXT))));
1008  }
1009  }
1010  } else {
1011  if (ra_exe_unit_.estimator) {
1012  std::stack<llvm::BasicBlock*> array_loops;
1013  codegenEstimator(array_loops, filter_cfg, query_mem_desc, co);
1014  } else {
1015  auto arg_it = ROW_FUNC->arg_begin();
1016  std::vector<llvm::Value*> agg_out_vec;
1017  for (int32_t i = 0; i < get_agg_count(ra_exe_unit_.target_exprs); ++i) {
1018  agg_out_vec.push_back(&*arg_it++);
1019  }
1020  can_return_error = codegenAggCalls(std::make_tuple(nullptr, nullptr),
1021  /*varlen_output_buffer=*/nullptr,
1022  agg_out_vec,
1023  query_mem_desc,
1024  co,
1025  gpu_smem_context,
1026  filter_cfg);
1027  }
1028  }
1029  }
1030 
1031  if (ra_exe_unit_.join_quals.empty()) {
1032  executor_->cgen_state_->ir_builder_.CreateRet(LL_INT(int32_t(0)));
1033  } else if (sc_false) {
1034  const auto saved_insert_block = LL_BUILDER.GetInsertBlock();
1035  LL_BUILDER.SetInsertPoint(sc_false);
1036  LL_BUILDER.CreateBr(filter_false);
1037  LL_BUILDER.SetInsertPoint(saved_insert_block);
1038  }
1039 
1040  return can_return_error;
1041 }
std::vector< Analyzer::Expr * > target_exprs
#define ROW_FUNC
llvm::BasicBlock * cond_false_
#define LL_BUILDER
#define LL_CONTEXT
#define LL_INT(v)
llvm::Value * codegenVarlenOutputBuffer(const QueryMemoryDescriptor &query_mem_desc)
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
#define LLVM_ALIGN(alignment)
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
llvm::Value * get_arg_by_name(llvm::Function *func, const std::string &name)
Definition: Execute.h:166
size_t getGroupbyColCount() const
const JoinQualsPerNestingLevel join_quals
bool codegenAggCalls(const std::tuple< llvm::Value *, llvm::Value * > &agg_out_ptr_w_idx, llvm::Value *varlen_output_buffer, const std::vector< llvm::Value * > &agg_out_vec, QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const GpuSharedMemoryContext &gpu_smem_context, DiamondCodegen &diamond_codegen)
std::tuple< llvm::Value *, llvm::Value * > codegenGroupBy(const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, DiamondCodegen &codegen)
const std::shared_ptr< Analyzer::Estimator > estimator
#define AUTOMATIC_IR_METADATA(CGENSTATE)
QueryDescriptionType getQueryDescriptionType() const
ExecutorDeviceType device_type
void codegenEstimator(std::stack< llvm::BasicBlock * > &array_loops, DiamondCodegen &diamond_codegen, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &)
int32_t get_agg_count(const std::vector< Analyzer::Expr * > &target_exprs)
#define CHECK(condition)
Definition: Logger.h:222
const RelAlgExecutionUnit & ra_exe_unit_

+ Here is the call graph for this function:

std::vector< llvm::Value * > GroupByAndAggregate::codegenAggArg ( const Analyzer::Expr target_expr,
const CompilationOptions co 
)
private

Definition at line 1855 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, CHECK, CHECK_EQ, CodeGenerator::codegen(), CUR_FUNC, executor_, get_int_type(), Analyzer::Expr::get_type_info(), SQLTypeInfo::is_geometry(), kARRAY, kPOINT, kSAMPLE, LL_BUILDER, LL_CONTEXT, log2_bytes(), and CodeGenerator::posArg().

Referenced by TargetExprCodegen::codegen(), and TargetExprCodegenBuilder::codegenMultiSlotSampleExpressions().

1857  {
1858  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1859  const auto agg_expr = dynamic_cast<const Analyzer::AggExpr*>(target_expr);
1860  const auto func_expr = dynamic_cast<const Analyzer::FunctionOper*>(target_expr);
1861  const auto arr_expr = dynamic_cast<const Analyzer::ArrayExpr*>(target_expr);
1862 
1863  // TODO(alex): handle arrays uniformly?
1864  CodeGenerator code_generator(executor_);
1865  if (target_expr) {
1866  const auto& target_ti = target_expr->get_type_info();
1867  if (target_ti.is_buffer() &&
1868  !executor_->plan_state_->isLazyFetchColumn(target_expr)) {
1869  const auto target_lvs =
1870  agg_expr ? code_generator.codegen(agg_expr->get_arg(), true, co)
1871  : code_generator.codegen(
1872  target_expr, !executor_->plan_state_->allow_lazy_fetch_, co);
1873  if (!func_expr && !arr_expr) {
1874  // Something with the chunk transport is code that was generated from a source
1875  // other than an ARRAY[] expression
1876  if (target_ti.is_bytes()) {
1877  CHECK_EQ(size_t(3), target_lvs.size());
1878  return {target_lvs[1], target_lvs[2]};
1879  }
1880  CHECK(target_ti.is_array());
1881  CHECK_EQ(size_t(1), target_lvs.size());
1882  CHECK(!agg_expr || agg_expr->get_aggtype() == kSAMPLE);
1883  const auto i32_ty = get_int_type(32, executor_->cgen_state_->context_);
1884  const auto i8p_ty =
1885  llvm::PointerType::get(get_int_type(8, executor_->cgen_state_->context_), 0);
1886  const auto& elem_ti = target_ti.get_elem_type();
1887  return {
1888  executor_->cgen_state_->emitExternalCall(
1889  "array_buff",
1890  i8p_ty,
1891  {target_lvs.front(), code_generator.posArg(target_expr)}),
1892  executor_->cgen_state_->emitExternalCall(
1893  "array_size",
1894  i32_ty,
1895  {target_lvs.front(),
1896  code_generator.posArg(target_expr),
1897  executor_->cgen_state_->llInt(log2_bytes(elem_ti.get_logical_size()))})};
1898  } else {
1899  if (agg_expr) {
1900  throw std::runtime_error(
1901  "Using array[] operator as argument to an aggregate operator is not "
1902  "supported");
1903  }
1904  CHECK(func_expr || arr_expr);
1905  if (dynamic_cast<const Analyzer::FunctionOper*>(target_expr)) {
1906  CHECK_EQ(size_t(1), target_lvs.size());
1907  const auto prefix = target_ti.get_buffer_name();
1908  CHECK(target_ti.is_array() || target_ti.is_bytes());
1909  const auto target_lv = LL_BUILDER.CreateLoad(
1910  target_lvs[0]->getType()->getPointerElementType(), target_lvs[0]);
1911  // const auto target_lv_type = target_lvs[0]->getType();
1912  // CHECK(target_lv_type->isStructTy());
1913  // CHECK_EQ(target_lv_type->getNumContainedTypes(), 3u);
1914  const auto i8p_ty = llvm::PointerType::get(
1915  get_int_type(8, executor_->cgen_state_->context_), 0);
1916  const auto ptr = LL_BUILDER.CreatePointerCast(
1917  LL_BUILDER.CreateExtractValue(target_lv, 0), i8p_ty);
1918  const auto size = LL_BUILDER.CreateExtractValue(target_lv, 1);
1919  const auto null_flag = LL_BUILDER.CreateExtractValue(target_lv, 2);
1920  const auto nullcheck_ok_bb =
1921  llvm::BasicBlock::Create(LL_CONTEXT, prefix + "_nullcheck_ok_bb", CUR_FUNC);
1922  const auto nullcheck_fail_bb = llvm::BasicBlock::Create(
1923  LL_CONTEXT, prefix + "_nullcheck_fail_bb", CUR_FUNC);
1924 
1925  // TODO(adb): probably better to zext the bool
1926  const auto nullcheck = LL_BUILDER.CreateICmpEQ(
1927  null_flag, executor_->cgen_state_->llInt(static_cast<int8_t>(1)));
1928  LL_BUILDER.CreateCondBr(nullcheck, nullcheck_fail_bb, nullcheck_ok_bb);
1929 
1930  const auto ret_bb =
1931  llvm::BasicBlock::Create(LL_CONTEXT, prefix + "_return", CUR_FUNC);
1932  LL_BUILDER.SetInsertPoint(ret_bb);
1933  auto result_phi = LL_BUILDER.CreatePHI(i8p_ty, 2, prefix + "_ptr_return");
1934  result_phi->addIncoming(ptr, nullcheck_ok_bb);
1935  const auto null_arr_sentinel = LL_BUILDER.CreateIntToPtr(
1936  executor_->cgen_state_->llInt(static_cast<int8_t>(0)), i8p_ty);
1937  result_phi->addIncoming(null_arr_sentinel, nullcheck_fail_bb);
1938  LL_BUILDER.SetInsertPoint(nullcheck_ok_bb);
1939  executor_->cgen_state_->emitExternalCall(
1940  "register_buffer_with_executor_rsm",
1941  llvm::Type::getVoidTy(executor_->cgen_state_->context_),
1942  {executor_->cgen_state_->llInt(reinterpret_cast<int64_t>(executor_)), ptr});
1943  LL_BUILDER.CreateBr(ret_bb);
1944  LL_BUILDER.SetInsertPoint(nullcheck_fail_bb);
1945  LL_BUILDER.CreateBr(ret_bb);
1946 
1947  LL_BUILDER.SetInsertPoint(ret_bb);
1948  return {result_phi, size};
1949  }
1950  CHECK_EQ(size_t(2), target_lvs.size());
1951  return {target_lvs[0], target_lvs[1]};
1952  }
1953  }
1954  if (target_ti.is_geometry() &&
1955  !executor_->plan_state_->isLazyFetchColumn(target_expr)) {
1956  auto generate_coord_lvs =
1957  [&](auto* selected_target_expr,
1958  bool const fetch_columns) -> std::vector<llvm::Value*> {
1959  const auto target_lvs =
1960  code_generator.codegen(selected_target_expr, fetch_columns, co);
1961  if (dynamic_cast<const Analyzer::GeoOperator*>(target_expr) &&
1962  target_expr->get_type_info().is_geometry()) {
1963  // return a pointer to the temporary alloca
1964  return target_lvs;
1965  }
1966  const auto geo_uoper = dynamic_cast<const Analyzer::GeoUOper*>(target_expr);
1967  const auto geo_binoper = dynamic_cast<const Analyzer::GeoBinOper*>(target_expr);
1968  if (geo_uoper || geo_binoper) {
1969  CHECK(target_expr->get_type_info().is_geometry());
1970  CHECK_EQ(2 * static_cast<size_t>(target_ti.get_physical_coord_cols()),
1971  target_lvs.size());
1972  return target_lvs;
1973  }
1974  CHECK_EQ(static_cast<size_t>(target_ti.get_physical_coord_cols()),
1975  target_lvs.size());
1976 
1977  const auto i32_ty = get_int_type(32, executor_->cgen_state_->context_);
1978  const auto i8p_ty =
1979  llvm::PointerType::get(get_int_type(8, executor_->cgen_state_->context_), 0);
1980  std::vector<llvm::Value*> coords;
1981  size_t ctr = 0;
1982  for (const auto& target_lv : target_lvs) {
1983  // TODO(adb): consider adding a utility to sqltypes so we can get the types of
1984  // the physical coords cols based on the sqltype (e.g. TINYINT for col 0, INT
1985  // for col 1 for pols / mpolys, etc). Hardcoding for now. first array is the
1986  // coords array (TINYINT). Subsequent arrays are regular INT.
1987 
1988  const size_t elem_sz = ctr == 0 ? 1 : 4;
1989  ctr++;
1990  int32_t fixlen = -1;
1991  if (target_ti.get_type() == kPOINT) {
1992  const auto col_var = dynamic_cast<const Analyzer::ColumnVar*>(target_expr);
1993  if (col_var) {
1994  const auto coords_cd = executor_->getPhysicalColumnDescriptor(col_var, 1);
1995  if (coords_cd && coords_cd->columnType.get_type() == kARRAY) {
1996  fixlen = coords_cd->columnType.get_size();
1997  }
1998  }
1999  }
2000  if (fixlen > 0) {
2001  coords.push_back(executor_->cgen_state_->emitExternalCall(
2002  "fast_fixlen_array_buff",
2003  i8p_ty,
2004  {target_lv, code_generator.posArg(selected_target_expr)}));
2005  coords.push_back(executor_->cgen_state_->llInt(int64_t(fixlen)));
2006  continue;
2007  }
2008  coords.push_back(executor_->cgen_state_->emitExternalCall(
2009  "array_buff",
2010  i8p_ty,
2011  {target_lv, code_generator.posArg(selected_target_expr)}));
2012  coords.push_back(executor_->cgen_state_->emitExternalCall(
2013  "array_size",
2014  i32_ty,
2015  {target_lv,
2016  code_generator.posArg(selected_target_expr),
2017  executor_->cgen_state_->llInt(log2_bytes(elem_sz))}));
2018  }
2019  return coords;
2020  };
2021 
2022  if (agg_expr) {
2023  return generate_coord_lvs(agg_expr->get_arg(), true);
2024  } else {
2025  return generate_coord_lvs(target_expr,
2026  !executor_->plan_state_->allow_lazy_fetch_);
2027  }
2028  }
2029  }
2030  return agg_expr ? code_generator.codegen(agg_expr->get_arg(), true, co)
2031  : code_generator.codegen(
2032  target_expr, !executor_->plan_state_->allow_lazy_fetch_, co);
2033 }
#define CHECK_EQ(x, y)
Definition: Logger.h:230
#define LL_BUILDER
#define LL_CONTEXT
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
#define AUTOMATIC_IR_METADATA(CGENSTATE)
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:81
#define CHECK(condition)
Definition: Logger.h:222
bool is_geometry() const
Definition: sqltypes.h:522
#define CUR_FUNC
uint32_t log2_bytes(const uint32_t bytes)
Definition: Execute.h:176

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool GroupByAndAggregate::codegenAggCalls ( const std::tuple< llvm::Value *, llvm::Value * > &  agg_out_ptr_w_idx,
llvm::Value *  varlen_output_buffer,
const std::vector< llvm::Value * > &  agg_out_vec,
QueryMemoryDescriptor query_mem_desc,
const CompilationOptions co,
const GpuSharedMemoryContext gpu_smem_context,
DiamondCodegen diamond_codegen 
)
private

Definition at line 1538 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, CHECK, TargetExprCodegenBuilder::codegen(), QueryMemoryDescriptor::didOutputColumnar(), executor_, g_cluster, QueryMemoryDescriptor::getQueryDescriptionType(), LL_BUILDER, LL_CONTEXT, Projection, ra_exe_unit_, and RelAlgExecutionUnit::target_exprs.

Referenced by codegen().

1545  {
1546  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1547  auto agg_out_ptr_w_idx = agg_out_ptr_w_idx_in;
1548  // TODO(alex): unify the two cases, the output for non-group by queries
1549  // should be a contiguous buffer
1550  const bool is_group_by = std::get<0>(agg_out_ptr_w_idx);
1551  bool can_return_error = false;
1552  if (is_group_by) {
1553  CHECK(agg_out_vec.empty());
1554  } else {
1555  CHECK(!agg_out_vec.empty());
1556  }
1557 
1558  // output buffer is casted into a byte stream to be able to handle data elements of
1559  // different sizes (only used when actual column width sizes are used)
1560  llvm::Value* output_buffer_byte_stream{nullptr};
1561  llvm::Value* out_row_idx{nullptr};
1562  if (query_mem_desc.didOutputColumnar() && !g_cluster &&
1564  output_buffer_byte_stream = LL_BUILDER.CreateBitCast(
1565  std::get<0>(agg_out_ptr_w_idx),
1566  llvm::PointerType::get(llvm::Type::getInt8Ty(LL_CONTEXT), 0));
1567  output_buffer_byte_stream->setName("out_buff_b_stream");
1568  CHECK(std::get<1>(agg_out_ptr_w_idx));
1569  out_row_idx = LL_BUILDER.CreateZExt(std::get<1>(agg_out_ptr_w_idx),
1570  llvm::Type::getInt64Ty(LL_CONTEXT));
1571  out_row_idx->setName("out_row_idx");
1572  }
1573 
1574  TargetExprCodegenBuilder target_builder(ra_exe_unit_, is_group_by);
1575  for (size_t target_idx = 0; target_idx < ra_exe_unit_.target_exprs.size();
1576  ++target_idx) {
1577  auto target_expr = ra_exe_unit_.target_exprs[target_idx];
1578  CHECK(target_expr);
1579 
1580  target_builder(target_expr, executor_, query_mem_desc, co);
1581  }
1582 
1583  target_builder.codegen(this,
1584  executor_,
1585  query_mem_desc,
1586  co,
1587  gpu_smem_context,
1588  agg_out_ptr_w_idx,
1589  agg_out_vec,
1590  output_buffer_byte_stream,
1591  out_row_idx,
1592  varlen_output_buffer,
1593  diamond_codegen);
1594 
1595  for (auto target_expr : ra_exe_unit_.target_exprs) {
1596  CHECK(target_expr);
1597  executor_->plan_state_->isLazyFetchColumn(target_expr);
1598  }
1599 
1600  return can_return_error;
1601 }
std::vector< Analyzer::Expr * > target_exprs
#define LL_BUILDER
#define LL_CONTEXT
#define AUTOMATIC_IR_METADATA(CGENSTATE)
QueryDescriptionType getQueryDescriptionType() const
#define CHECK(condition)
Definition: Logger.h:222
bool g_cluster
const RelAlgExecutionUnit & ra_exe_unit_

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

llvm::Value * GroupByAndAggregate::codegenAggColumnPtr ( llvm::Value *  output_buffer_byte_stream,
llvm::Value *  out_row_idx,
const std::tuple< llvm::Value *, llvm::Value * > &  agg_out_ptr_w_idx,
const QueryMemoryDescriptor query_mem_desc,
const size_t  chosen_bytes,
const size_t  agg_out_off,
const size_t  target_idx 
)
private

: returns the pointer to where the aggregation should be stored.

Definition at line 1606 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, CHECK, CHECK_EQ, QueryMemoryDescriptor::didOutputColumnar(), executor_, g_cluster, get_int_type(), QueryMemoryDescriptor::getColOffInBytes(), QueryMemoryDescriptor::getColOnlyOffInBytes(), QueryMemoryDescriptor::getQueryDescriptionType(), LL_BUILDER, LL_CONTEXT, LL_INT, Projection, and to_string().

Referenced by TargetExprCodegen::codegenAggregate(), and TargetExprCodegenBuilder::codegenMultiSlotSampleExpressions().

1613  {
1614  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1615  llvm::Value* agg_col_ptr{nullptr};
1616  if (query_mem_desc.didOutputColumnar()) {
1617  // TODO(Saman): remove the second columnar branch, and support all query description
1618  // types through the first branch. Then, input arguments should also be cleaned up
1619  if (!g_cluster &&
1621  CHECK(chosen_bytes == 1 || chosen_bytes == 2 || chosen_bytes == 4 ||
1622  chosen_bytes == 8);
1623  CHECK(output_buffer_byte_stream);
1624  CHECK(out_row_idx);
1625  uint32_t col_off = query_mem_desc.getColOffInBytes(agg_out_off);
1626  // multiplying by chosen_bytes, i.e., << log2(chosen_bytes)
1627  auto out_per_col_byte_idx =
1628 #ifdef _WIN32
1629  LL_BUILDER.CreateShl(out_row_idx, __lzcnt(chosen_bytes) - 1);
1630 #else
1631  LL_BUILDER.CreateShl(out_row_idx, __builtin_ffs(chosen_bytes) - 1);
1632 #endif
1633  auto byte_offset = LL_BUILDER.CreateAdd(out_per_col_byte_idx,
1634  LL_INT(static_cast<int64_t>(col_off)));
1635  byte_offset->setName("out_byte_off_target_" + std::to_string(target_idx));
1636  auto output_ptr = LL_BUILDER.CreateGEP(
1637  output_buffer_byte_stream->getType()->getScalarType()->getPointerElementType(),
1638  output_buffer_byte_stream,
1639  byte_offset);
1640  agg_col_ptr = LL_BUILDER.CreateBitCast(
1641  output_ptr,
1642  llvm::PointerType::get(get_int_type((chosen_bytes << 3), LL_CONTEXT), 0));
1643  agg_col_ptr->setName("out_ptr_target_" + std::to_string(target_idx));
1644  } else {
1645  uint32_t col_off = query_mem_desc.getColOffInBytes(agg_out_off);
1646  CHECK_EQ(size_t(0), col_off % chosen_bytes);
1647  col_off /= chosen_bytes;
1648  CHECK(std::get<1>(agg_out_ptr_w_idx));
1649  auto offset = LL_BUILDER.CreateAdd(std::get<1>(agg_out_ptr_w_idx), LL_INT(col_off));
1650  auto* bit_cast = LL_BUILDER.CreateBitCast(
1651  std::get<0>(agg_out_ptr_w_idx),
1652  llvm::PointerType::get(get_int_type((chosen_bytes << 3), LL_CONTEXT), 0));
1653  agg_col_ptr = LL_BUILDER.CreateGEP(
1654  bit_cast->getType()->getScalarType()->getPointerElementType(),
1655  bit_cast,
1656  offset);
1657  }
1658  } else {
1659  uint32_t col_off = query_mem_desc.getColOnlyOffInBytes(agg_out_off);
1660  CHECK_EQ(size_t(0), col_off % chosen_bytes);
1661  col_off /= chosen_bytes;
1662  auto* bit_cast = LL_BUILDER.CreateBitCast(
1663  std::get<0>(agg_out_ptr_w_idx),
1664  llvm::PointerType::get(get_int_type((chosen_bytes << 3), LL_CONTEXT), 0));
1665  agg_col_ptr = LL_BUILDER.CreateGEP(
1666  bit_cast->getType()->getScalarType()->getPointerElementType(),
1667  bit_cast,
1668  LL_INT(col_off));
1669  }
1670  CHECK(agg_col_ptr);
1671  return agg_col_ptr;
1672 }
#define CHECK_EQ(x, y)
Definition: Logger.h:230
#define LL_BUILDER
#define LL_CONTEXT
#define LL_INT(v)
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
std::string to_string(char const *&&v)
size_t getColOnlyOffInBytes(const size_t col_idx) const
#define AUTOMATIC_IR_METADATA(CGENSTATE)
QueryDescriptionType getQueryDescriptionType() const
#define CHECK(condition)
Definition: Logger.h:222
bool g_cluster
size_t getColOffInBytes(const size_t col_idx) const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void GroupByAndAggregate::codegenApproxQuantile ( const size_t  target_idx,
const Analyzer::Expr target_expr,
std::vector< llvm::Value * > &  agg_args,
const QueryMemoryDescriptor query_mem_desc,
const ExecutorDeviceType  device_type 
)
private

Definition at line 1802 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, executor_, g_bigint_count, SQLTypeInfo::get_notnull(), get_target_info(), Analyzer::Expr::get_type_info(), and GPU.

Referenced by TargetExprCodegen::codegenAggregate().

1807  {
1808  if (device_type == ExecutorDeviceType::GPU) {
1809  throw QueryMustRunOnCpu();
1810  }
1811  llvm::BasicBlock *calc, *skip;
1812  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1813  auto const arg_ti =
1814  static_cast<const Analyzer::AggExpr*>(target_expr)->get_arg()->get_type_info();
1815  bool const nullable = !arg_ti.get_notnull();
1816 
1817  auto* cs = executor_->cgen_state_.get();
1818  auto& irb = cs->ir_builder_;
1819  if (nullable) {
1820  auto* const null_value = cs->castToTypeIn(cs->inlineNull(arg_ti), 64);
1821  auto* const skip_cond = arg_ti.is_fp()
1822  ? irb.CreateFCmpOEQ(agg_args.back(), null_value)
1823  : irb.CreateICmpEQ(agg_args.back(), null_value);
1824  calc = llvm::BasicBlock::Create(cs->context_, "calc_approx_quantile");
1825  skip = llvm::BasicBlock::Create(cs->context_, "skip_approx_quantile");
1826  irb.CreateCondBr(skip_cond, skip, calc);
1827  cs->current_func_->getBasicBlockList().push_back(calc);
1828  irb.SetInsertPoint(calc);
1829  }
1830  if (!arg_ti.is_fp()) {
1831  auto const agg_info = get_target_info(target_expr, g_bigint_count);
1832  agg_args.back() = executor_->castToFP(agg_args.back(), arg_ti, agg_info.sql_type);
1833  }
1834  cs->emitExternalCall(
1835  "agg_approx_quantile", llvm::Type::getVoidTy(cs->context_), agg_args);
1836  if (nullable) {
1837  irb.CreateBr(skip);
1838  cs->current_func_->getBasicBlockList().push_back(skip);
1839  irb.SetInsertPoint(skip);
1840  }
1841 }
TargetInfo get_target_info(const Analyzer::Expr *target_expr, const bool bigint_count)
Definition: TargetInfo.h:97
bool g_bigint_count
#define AUTOMATIC_IR_METADATA(CGENSTATE)
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:81
HOST DEVICE bool get_notnull() const
Definition: sqltypes.h:336

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void GroupByAndAggregate::codegenCountDistinct ( const size_t  target_idx,
const Analyzer::Expr target_expr,
std::vector< llvm::Value * > &  agg_args,
const QueryMemoryDescriptor query_mem_desc,
const ExecutorDeviceType  device_type 
)
private

Definition at line 1733 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, Bitmap, CHECK, CHECK_EQ, emitCall(), executor_, g_bigint_count, get_int_type(), get_target_info(), Analyzer::Expr::get_type_info(), getAdditionalLiteral(), QueryMemoryDescriptor::getCountDistinctDescriptor(), GPU, Invalid, kAPPROX_COUNT_DISTINCT, LL_CONTEXT, and LL_INT.

Referenced by TargetExprCodegen::codegenAggregate().

1738  {
1739  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1740  const auto agg_info = get_target_info(target_expr, g_bigint_count);
1741  const auto& arg_ti =
1742  static_cast<const Analyzer::AggExpr*>(target_expr)->get_arg()->get_type_info();
1743  if (arg_ti.is_fp()) {
1744  agg_args.back() = executor_->cgen_state_->ir_builder_.CreateBitCast(
1745  agg_args.back(), get_int_type(64, executor_->cgen_state_->context_));
1746  }
1747  const auto& count_distinct_descriptor =
1748  query_mem_desc.getCountDistinctDescriptor(target_idx);
1749  CHECK(count_distinct_descriptor.impl_type_ != CountDistinctImplType::Invalid);
1750  if (agg_info.agg_kind == kAPPROX_COUNT_DISTINCT) {
1751  CHECK(count_distinct_descriptor.impl_type_ == CountDistinctImplType::Bitmap);
1752  agg_args.push_back(LL_INT(int32_t(count_distinct_descriptor.bitmap_sz_bits)));
1753  if (device_type == ExecutorDeviceType::GPU) {
1754  const auto base_dev_addr = getAdditionalLiteral(-1);
1755  const auto base_host_addr = getAdditionalLiteral(-2);
1756  agg_args.push_back(base_dev_addr);
1757  agg_args.push_back(base_host_addr);
1758  emitCall("agg_approximate_count_distinct_gpu", agg_args);
1759  } else {
1760  emitCall("agg_approximate_count_distinct", agg_args);
1761  }
1762  return;
1763  }
1764  std::string agg_fname{"agg_count_distinct"};
1765  if (count_distinct_descriptor.impl_type_ == CountDistinctImplType::Bitmap) {
1766  agg_fname += "_bitmap";
1767  agg_args.push_back(LL_INT(static_cast<int64_t>(count_distinct_descriptor.min_val)));
1768  }
1769  if (agg_info.skip_null_val) {
1770  auto null_lv = executor_->cgen_state_->castToTypeIn(
1771  (arg_ti.is_fp()
1772  ? static_cast<llvm::Value*>(executor_->cgen_state_->inlineFpNull(arg_ti))
1773  : static_cast<llvm::Value*>(executor_->cgen_state_->inlineIntNull(arg_ti))),
1774  64);
1775  null_lv = executor_->cgen_state_->ir_builder_.CreateBitCast(
1776  null_lv, get_int_type(64, executor_->cgen_state_->context_));
1777  agg_fname += "_skip_val";
1778  agg_args.push_back(null_lv);
1779  }
1780  if (device_type == ExecutorDeviceType::GPU) {
1781  CHECK(count_distinct_descriptor.impl_type_ == CountDistinctImplType::Bitmap);
1782  agg_fname += "_gpu";
1783  const auto base_dev_addr = getAdditionalLiteral(-1);
1784  const auto base_host_addr = getAdditionalLiteral(-2);
1785  agg_args.push_back(base_dev_addr);
1786  agg_args.push_back(base_host_addr);
1787  agg_args.push_back(LL_INT(int64_t(count_distinct_descriptor.sub_bitmap_count)));
1788  CHECK_EQ(size_t(0),
1789  count_distinct_descriptor.bitmapPaddedSizeBytes() %
1790  count_distinct_descriptor.sub_bitmap_count);
1791  agg_args.push_back(LL_INT(int64_t(count_distinct_descriptor.bitmapPaddedSizeBytes() /
1792  count_distinct_descriptor.sub_bitmap_count)));
1793  }
1794  if (count_distinct_descriptor.impl_type_ == CountDistinctImplType::Bitmap) {
1795  emitCall(agg_fname, agg_args);
1796  } else {
1797  executor_->cgen_state_->emitExternalCall(
1798  agg_fname, llvm::Type::getVoidTy(LL_CONTEXT), agg_args);
1799  }
1800 }
#define CHECK_EQ(x, y)
Definition: Logger.h:230
llvm::Value * getAdditionalLiteral(const int32_t off)
#define LL_CONTEXT
#define LL_INT(v)
llvm::Value * emitCall(const std::string &fname, const std::vector< llvm::Value * > &args)
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
TargetInfo get_target_info(const Analyzer::Expr *target_expr, const bool bigint_count)
Definition: TargetInfo.h:97
bool g_bigint_count
#define AUTOMATIC_IR_METADATA(CGENSTATE)
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:81
const CountDistinctDescriptor & getCountDistinctDescriptor(const size_t idx) const
#define CHECK(condition)
Definition: Logger.h:222

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void GroupByAndAggregate::codegenEstimator ( std::stack< llvm::BasicBlock * > &  array_loops,
DiamondCodegen diamond_codegen,
const QueryMemoryDescriptor query_mem_desc,
const CompilationOptions co 
)
private

Definition at line 1674 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, CHECK, emitCall(), RelAlgExecutionUnit::estimator, executor_, get_int_type(), QueryMemoryDescriptor::getEffectiveKeyWidth(), LL_BUILDER, LL_CONTEXT, LL_INT, ra_exe_unit_, and ROW_FUNC.

Referenced by codegen().

1677  {
1678  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1679  const auto& estimator_arg = ra_exe_unit_.estimator->getArgument();
1680  auto estimator_comp_count_lv = LL_INT(static_cast<int32_t>(estimator_arg.size()));
1681  auto estimator_key_lv = LL_BUILDER.CreateAlloca(llvm::Type::getInt64Ty(LL_CONTEXT),
1682  estimator_comp_count_lv);
1683  int32_t subkey_idx = 0;
1684  for (const auto& estimator_arg_comp : estimator_arg) {
1685  const auto estimator_arg_comp_lvs =
1686  executor_->groupByColumnCodegen(estimator_arg_comp.get(),
1687  query_mem_desc.getEffectiveKeyWidth(),
1688  co,
1689  false,
1690  0,
1691  diamond_codegen,
1692  array_loops,
1693  true);
1694  CHECK(!estimator_arg_comp_lvs.original_value);
1695  const auto estimator_arg_comp_lv = estimator_arg_comp_lvs.translated_value;
1696  // store the sub-key to the buffer
1697  LL_BUILDER.CreateStore(
1698  estimator_arg_comp_lv,
1699  LL_BUILDER.CreateGEP(
1700  estimator_key_lv->getType()->getScalarType()->getPointerElementType(),
1701  estimator_key_lv,
1702  LL_INT(subkey_idx++)));
1703  }
1704  const auto int8_ptr_ty = llvm::PointerType::get(get_int_type(8, LL_CONTEXT), 0);
1705  const auto bitmap = LL_BUILDER.CreateBitCast(&*ROW_FUNC->arg_begin(), int8_ptr_ty);
1706  const auto key_bytes = LL_BUILDER.CreateBitCast(estimator_key_lv, int8_ptr_ty);
1707  const auto estimator_comp_bytes_lv =
1708  LL_INT(static_cast<int32_t>(estimator_arg.size() * sizeof(int64_t)));
1709  const auto bitmap_size_lv =
1710  LL_INT(static_cast<uint32_t>(ra_exe_unit_.estimator->getBufferSize()));
1711  emitCall(ra_exe_unit_.estimator->getRuntimeFunctionName(),
1712  {bitmap, &*bitmap_size_lv, key_bytes, &*estimator_comp_bytes_lv});
1713 }
#define ROW_FUNC
#define LL_BUILDER
#define LL_CONTEXT
#define LL_INT(v)
llvm::Value * emitCall(const std::string &fname, const std::vector< llvm::Value * > &args)
size_t getEffectiveKeyWidth() const
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
const std::shared_ptr< Analyzer::Estimator > estimator
#define AUTOMATIC_IR_METADATA(CGENSTATE)
#define CHECK(condition)
Definition: Logger.h:222
const RelAlgExecutionUnit & ra_exe_unit_

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::tuple< llvm::Value *, llvm::Value * > GroupByAndAggregate::codegenGroupBy ( const QueryMemoryDescriptor query_mem_desc,
const CompilationOptions co,
DiamondCodegen codegen 
)
private

Definition at line 1130 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, CHECK, CHECK_EQ, codegenMultiColumnBaselineHash(), codegenMultiColumnPerfectHash(), codegenOutputSlot(), codegenSingleColumnPerfectHash(), QueryMemoryDescriptor::didOutputColumnar(), executor_, anonymous_namespace{GroupByAndAggregate.cpp}::get_expr_range_info(), QueryMemoryDescriptor::getBucket(), QueryMemoryDescriptor::getEffectiveKeyWidth(), QueryMemoryDescriptor::getGroupbyColCount(), QueryMemoryDescriptor::getMaxVal(), QueryMemoryDescriptor::getQueryDescriptionType(), QueryMemoryDescriptor::getRowSize(), RelAlgExecutionUnit::groupby_exprs, GroupByBaselineHash, GroupByPerfectHash, QueryMemoryDescriptor::hasNulls(), QueryMemoryDescriptor::isSingleColumnGroupByWithPerfectHash(), LL_BUILDER, LL_CONTEXT, LL_INT, Projection, query_infos_, ra_exe_unit_, ROW_FUNC, and QueryMemoryDescriptor::threadsShareMemory().

Referenced by codegen().

1133  {
1134  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1135  auto arg_it = ROW_FUNC->arg_begin();
1136  auto groups_buffer = arg_it++;
1137 
1138  std::stack<llvm::BasicBlock*> array_loops;
1139 
1140  // TODO(Saman): move this logic outside of this function.
1142  if (query_mem_desc.didOutputColumnar()) {
1143  return std::make_tuple(
1144  &*groups_buffer,
1145  codegenOutputSlot(&*groups_buffer, query_mem_desc, co, diamond_codegen));
1146  } else {
1147  return std::make_tuple(
1148  codegenOutputSlot(&*groups_buffer, query_mem_desc, co, diamond_codegen),
1149  nullptr);
1150  }
1151  }
1152 
1153  CHECK(query_mem_desc.getQueryDescriptionType() ==
1155  query_mem_desc.getQueryDescriptionType() ==
1157 
1158  const int32_t row_size_quad = query_mem_desc.didOutputColumnar()
1159  ? 0
1160  : query_mem_desc.getRowSize() / sizeof(int64_t);
1161 
1162  const auto col_width_size = query_mem_desc.isSingleColumnGroupByWithPerfectHash()
1163  ? sizeof(int64_t)
1164  : query_mem_desc.getEffectiveKeyWidth();
1165  // for multi-column group by
1166  llvm::Value* group_key = nullptr;
1167  llvm::Value* key_size_lv = nullptr;
1168 
1169  if (!query_mem_desc.isSingleColumnGroupByWithPerfectHash()) {
1170  key_size_lv = LL_INT(static_cast<int32_t>(query_mem_desc.getGroupbyColCount()));
1171  if (query_mem_desc.getQueryDescriptionType() ==
1173  group_key =
1174  LL_BUILDER.CreateAlloca(llvm::Type::getInt64Ty(LL_CONTEXT), key_size_lv);
1175  } else if (query_mem_desc.getQueryDescriptionType() ==
1177  group_key =
1178  col_width_size == sizeof(int32_t)
1179  ? LL_BUILDER.CreateAlloca(llvm::Type::getInt32Ty(LL_CONTEXT), key_size_lv)
1180  : LL_BUILDER.CreateAlloca(llvm::Type::getInt64Ty(LL_CONTEXT), key_size_lv);
1181  }
1182  CHECK(group_key);
1183  CHECK(key_size_lv);
1184  }
1185 
1186  int32_t subkey_idx = 0;
1187  CHECK(query_mem_desc.getGroupbyColCount() == ra_exe_unit_.groupby_exprs.size());
1188  for (const auto& group_expr : ra_exe_unit_.groupby_exprs) {
1189  const auto col_range_info =
1191  const auto translated_null_value = static_cast<int64_t>(
1192  query_mem_desc.isSingleColumnGroupByWithPerfectHash()
1193  ? checked_int64_t(query_mem_desc.getMaxVal()) +
1194  (query_mem_desc.getBucket() ? query_mem_desc.getBucket() : 1)
1195  : checked_int64_t(col_range_info.max) +
1196  (col_range_info.bucket ? col_range_info.bucket : 1));
1197 
1198  const bool col_has_nulls =
1199  query_mem_desc.getQueryDescriptionType() ==
1201  ? (query_mem_desc.isSingleColumnGroupByWithPerfectHash()
1202  ? query_mem_desc.hasNulls()
1203  : col_range_info.has_nulls)
1204  : false;
1205 
1206  const auto group_expr_lvs =
1207  executor_->groupByColumnCodegen(group_expr.get(),
1208  col_width_size,
1209  co,
1210  col_has_nulls,
1211  translated_null_value,
1212  diamond_codegen,
1213  array_loops,
1214  query_mem_desc.threadsShareMemory());
1215  const auto group_expr_lv = group_expr_lvs.translated_value;
1216  if (query_mem_desc.isSingleColumnGroupByWithPerfectHash()) {
1217  CHECK_EQ(size_t(1), ra_exe_unit_.groupby_exprs.size());
1218  return codegenSingleColumnPerfectHash(query_mem_desc,
1219  co,
1220  &*groups_buffer,
1221  group_expr_lv,
1222  group_expr_lvs.original_value,
1223  row_size_quad);
1224  } else {
1225  // store the sub-key to the buffer
1226  LL_BUILDER.CreateStore(
1227  group_expr_lv,
1228  LL_BUILDER.CreateGEP(
1229  group_key->getType()->getScalarType()->getPointerElementType(),
1230  group_key,
1231  LL_INT(subkey_idx++)));
1232  }
1233  }
1234  if (query_mem_desc.getQueryDescriptionType() ==
1236  CHECK(ra_exe_unit_.groupby_exprs.size() != 1);
1238  &*groups_buffer, group_key, key_size_lv, query_mem_desc, row_size_quad);
1239  } else if (query_mem_desc.getQueryDescriptionType() ==
1242  &*groups_buffer,
1243  group_key,
1244  key_size_lv,
1245  query_mem_desc,
1246  col_width_size,
1247  row_size_quad);
1248  }
1249  CHECK(false);
1250  return std::make_tuple(nullptr, nullptr);
1251 }
#define CHECK_EQ(x, y)
Definition: Logger.h:230
#define ROW_FUNC
ColRangeInfo get_expr_range_info(const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, const Analyzer::Expr *expr, Executor *executor)
#define LL_BUILDER
boost::multiprecision::number< boost::multiprecision::cpp_int_backend< 64, 64, boost::multiprecision::signed_magnitude, boost::multiprecision::checked, void >> checked_int64_t
#define LL_CONTEXT
#define LL_INT(v)
size_t getEffectiveKeyWidth() const
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
std::tuple< llvm::Value *, llvm::Value * > codegenMultiColumnBaselineHash(const CompilationOptions &co, llvm::Value *groups_buffer, llvm::Value *group_key, llvm::Value *key_size_lv, const QueryMemoryDescriptor &query_mem_desc, const size_t key_width, const int32_t row_size_quad)
size_t getGroupbyColCount() const
std::tuple< llvm::Value *, llvm::Value * > codegenSingleColumnPerfectHash(const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, llvm::Value *groups_buffer, llvm::Value *group_expr_lv_translated, llvm::Value *group_expr_lv_original, const int32_t row_size_quad)
#define AUTOMATIC_IR_METADATA(CGENSTATE)
std::tuple< llvm::Value *, llvm::Value * > codegenMultiColumnPerfectHash(llvm::Value *groups_buffer, llvm::Value *group_key, llvm::Value *key_size_lv, const QueryMemoryDescriptor &query_mem_desc, const int32_t row_size_quad)
QueryDescriptionType getQueryDescriptionType() const
const std::vector< InputTableInfo > & query_infos_
bool isSingleColumnGroupByWithPerfectHash() const
llvm::Value * codegenOutputSlot(llvm::Value *groups_buffer, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, DiamondCodegen &diamond_codegen)
#define CHECK(condition)
Definition: Logger.h:222
const RelAlgExecutionUnit & ra_exe_unit_

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::tuple< llvm::Value *, llvm::Value * > GroupByAndAggregate::codegenMultiColumnBaselineHash ( const CompilationOptions co,
llvm::Value *  groups_buffer,
llvm::Value *  group_key,
llvm::Value *  key_size_lv,
const QueryMemoryDescriptor query_mem_desc,
const size_t  key_width,
const int32_t  row_size_quad 
)
private

Definition at line 1362 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, CHECK, QueryMemoryDescriptor::didOutputColumnar(), emitCall(), executor_, QueryMemoryDescriptor::getEntryCount(), LL_BUILDER, LL_CONTEXT, LL_INT, and CompilationOptions::with_dynamic_watchdog.

Referenced by codegenGroupBy().

1369  {
1370  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1371  if (group_key->getType() != llvm::Type::getInt64PtrTy(LL_CONTEXT)) {
1372  CHECK(key_width == sizeof(int32_t));
1373  group_key =
1374  LL_BUILDER.CreatePointerCast(group_key, llvm::Type::getInt64PtrTy(LL_CONTEXT));
1375  }
1376  std::vector<llvm::Value*> func_args{
1377  groups_buffer,
1378  LL_INT(static_cast<int32_t>(query_mem_desc.getEntryCount())),
1379  &*group_key,
1380  &*key_size_lv,
1381  LL_INT(static_cast<int32_t>(key_width))};
1382  std::string func_name{"get_group_value"};
1383  if (query_mem_desc.didOutputColumnar()) {
1384  func_name += "_columnar_slot";
1385  } else {
1386  func_args.push_back(LL_INT(row_size_quad));
1387  }
1388  if (co.with_dynamic_watchdog) {
1389  func_name += "_with_watchdog";
1390  }
1391  if (query_mem_desc.didOutputColumnar()) {
1392  return std::make_tuple(groups_buffer, emitCall(func_name, func_args));
1393  } else {
1394  return std::make_tuple(emitCall(func_name, func_args), nullptr);
1395  }
1396 }
#define LL_BUILDER
#define LL_CONTEXT
#define LL_INT(v)
llvm::Value * emitCall(const std::string &fname, const std::vector< llvm::Value * > &args)
#define AUTOMATIC_IR_METADATA(CGENSTATE)
#define CHECK(condition)
Definition: Logger.h:222

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::tuple< llvm::Value *, llvm::Value * > GroupByAndAggregate::codegenMultiColumnPerfectHash ( llvm::Value *  groups_buffer,
llvm::Value *  group_key,
llvm::Value *  key_size_lv,
const QueryMemoryDescriptor query_mem_desc,
const int32_t  row_size_quad 
)
private

Definition at line 1318 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, CHECK, codegenPerfectHashFunction(), QueryMemoryDescriptor::didOutputColumnar(), emitCall(), executor_, get_int_type(), QueryMemoryDescriptor::getEntryCount(), QueryMemoryDescriptor::getQueryDescriptionType(), GroupByPerfectHash, QueryMemoryDescriptor::hasKeylessHash(), LL_BUILDER, LL_CONTEXT, and LL_INT.

Referenced by codegenGroupBy().

1323  {
1324  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1325  CHECK(query_mem_desc.getQueryDescriptionType() ==
1327  // compute the index (perfect hash)
1328  auto perfect_hash_func = codegenPerfectHashFunction();
1329  auto hash_lv =
1330  LL_BUILDER.CreateCall(perfect_hash_func, std::vector<llvm::Value*>{group_key});
1331 
1332  if (query_mem_desc.didOutputColumnar()) {
1333  if (!query_mem_desc.hasKeylessHash()) {
1334  const std::string set_matching_func_name{
1335  "set_matching_group_value_perfect_hash_columnar"};
1336  const std::vector<llvm::Value*> set_matching_func_arg{
1337  groups_buffer,
1338  hash_lv,
1339  group_key,
1340  key_size_lv,
1341  llvm::ConstantInt::get(get_int_type(32, LL_CONTEXT),
1342  query_mem_desc.getEntryCount())};
1343  emitCall(set_matching_func_name, set_matching_func_arg);
1344  }
1345  return std::make_tuple(groups_buffer, hash_lv);
1346  } else {
1347  if (query_mem_desc.hasKeylessHash()) {
1348  return std::make_tuple(emitCall("get_matching_group_value_perfect_hash_keyless",
1349  {groups_buffer, hash_lv, LL_INT(row_size_quad)}),
1350  nullptr);
1351  } else {
1352  return std::make_tuple(
1353  emitCall(
1354  "get_matching_group_value_perfect_hash",
1355  {groups_buffer, hash_lv, group_key, key_size_lv, LL_INT(row_size_quad)}),
1356  nullptr);
1357  }
1358  }
1359 }
#define LL_BUILDER
#define LL_CONTEXT
#define LL_INT(v)
llvm::Value * emitCall(const std::string &fname, const std::vector< llvm::Value * > &args)
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
#define AUTOMATIC_IR_METADATA(CGENSTATE)
QueryDescriptionType getQueryDescriptionType() const
llvm::Function * codegenPerfectHashFunction()
#define CHECK(condition)
Definition: Logger.h:222

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

llvm::Value * GroupByAndAggregate::codegenOutputSlot ( llvm::Value *  groups_buffer,
const QueryMemoryDescriptor query_mem_desc,
const CompilationOptions co,
DiamondCodegen diamond_codegen 
)
private

Definition at line 1043 of file GroupByAndAggregate.cpp.

References run_benchmark_import::args, AUTOMATIC_IR_METADATA, CHECK, CHECK_EQ, CHECK_GE, CHECK_LT, CodeGenerator::codegen(), QueryMemoryDescriptor::didOutputColumnar(), emitCall(), executor_, get_arg_by_name(), get_heap_key_slot_index(), QueryMemoryDescriptor::getColOffInBytes(), QueryMemoryDescriptor::getPaddedSlotWidthBytes(), QueryMemoryDescriptor::getQueryDescriptionType(), QueryMemoryDescriptor::getRowSize(), RelAlgExecutionUnit::groupby_exprs, inline_fp_null_val(), inline_int_null_val(), SortInfo::limit, LL_BOOL, LL_BUILDER, LL_FP, LL_INT, anonymous_namespace{Utm.h}::n, SortInfo::offset, SortInfo::order_entries, CodeGenerator::posArg(), Projection, ra_exe_unit_, ROW_FUNC, RelAlgExecutionUnit::sort_info, RelAlgExecutionUnit::target_exprs, to_string(), and QueryMemoryDescriptor::useStreamingTopN().

Referenced by codegenGroupBy(), and codegenWindowRowPointer().

1047  {
1048  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1050  CHECK_EQ(size_t(1), ra_exe_unit_.groupby_exprs.size());
1051  const auto group_expr = ra_exe_unit_.groupby_exprs.front();
1052  CHECK(!group_expr);
1053  if (!query_mem_desc.didOutputColumnar()) {
1054  CHECK_EQ(size_t(0), query_mem_desc.getRowSize() % sizeof(int64_t));
1055  }
1056  const int32_t row_size_quad = query_mem_desc.didOutputColumnar()
1057  ? 0
1058  : query_mem_desc.getRowSize() / sizeof(int64_t);
1059  CodeGenerator code_generator(executor_);
1060  if (query_mem_desc.useStreamingTopN()) {
1061  const auto& only_order_entry = ra_exe_unit_.sort_info.order_entries.front();
1062  CHECK_GE(only_order_entry.tle_no, int(1));
1063  const size_t target_idx = only_order_entry.tle_no - 1;
1064  CHECK_LT(target_idx, ra_exe_unit_.target_exprs.size());
1065  const auto order_entry_expr = ra_exe_unit_.target_exprs[target_idx];
1066  const auto chosen_bytes =
1067  static_cast<size_t>(query_mem_desc.getPaddedSlotWidthBytes(target_idx));
1068  auto order_entry_lv = executor_->cgen_state_->castToTypeIn(
1069  code_generator.codegen(order_entry_expr, true, co).front(), chosen_bytes * 8);
1071  std::string fname = "get_bin_from_k_heap";
1072  const auto& oe_ti = order_entry_expr->get_type_info();
1073  llvm::Value* null_key_lv = nullptr;
1074  if (oe_ti.is_integer() || oe_ti.is_decimal() || oe_ti.is_time()) {
1075  const size_t bit_width = order_entry_lv->getType()->getIntegerBitWidth();
1076  switch (bit_width) {
1077  case 32:
1078  null_key_lv = LL_INT(static_cast<int32_t>(inline_int_null_val(oe_ti)));
1079  break;
1080  case 64:
1081  null_key_lv = LL_INT(static_cast<int64_t>(inline_int_null_val(oe_ti)));
1082  break;
1083  default:
1084  CHECK(false);
1085  }
1086  fname += "_int" + std::to_string(bit_width) + "_t";
1087  } else {
1088  CHECK(oe_ti.is_fp());
1089  if (order_entry_lv->getType()->isDoubleTy()) {
1090  null_key_lv = LL_FP(static_cast<double>(inline_fp_null_val(oe_ti)));
1091  } else {
1092  null_key_lv = LL_FP(static_cast<float>(inline_fp_null_val(oe_ti)));
1093  }
1094  fname += order_entry_lv->getType()->isDoubleTy() ? "_double" : "_float";
1095  }
1096  const auto key_slot_idx =
1098  return emitCall(
1099  fname,
1100  {groups_buffer,
1101  LL_INT(n),
1102  LL_INT(row_size_quad),
1103  LL_INT(static_cast<uint32_t>(query_mem_desc.getColOffInBytes(key_slot_idx))),
1104  LL_BOOL(only_order_entry.is_desc),
1105  LL_BOOL(!order_entry_expr->get_type_info().get_notnull()),
1106  LL_BOOL(only_order_entry.nulls_first),
1107  null_key_lv,
1108  order_entry_lv});
1109  } else {
1110  auto* arg = get_arg_by_name(ROW_FUNC, "max_matched");
1111  const auto output_buffer_entry_count_lv =
1112  LL_BUILDER.CreateLoad(arg->getType()->getPointerElementType(), arg);
1113  arg = get_arg_by_name(ROW_FUNC, "old_total_matched");
1114  const auto group_expr_lv =
1115  LL_BUILDER.CreateLoad(arg->getType()->getPointerElementType(), arg);
1116  std::vector<llvm::Value*> args{groups_buffer,
1117  output_buffer_entry_count_lv,
1118  group_expr_lv,
1119  code_generator.posArg(nullptr)};
1120  if (query_mem_desc.didOutputColumnar()) {
1121  const auto columnar_output_offset =
1122  emitCall("get_columnar_scan_output_offset", args);
1123  return columnar_output_offset;
1124  }
1125  args.push_back(LL_INT(row_size_quad));
1126  return emitCall("get_scan_output_slot", args);
1127  }
1128 }
std::vector< Analyzer::Expr * > target_exprs
#define CHECK_EQ(x, y)
Definition: Logger.h:230
#define ROW_FUNC
#define LL_BUILDER
const std::list< Analyzer::OrderEntry > order_entries
#define LL_INT(v)
#define CHECK_GE(x, y)
Definition: Logger.h:235
llvm::Value * emitCall(const std::string &fname, const std::vector< llvm::Value * > &args)
size_t get_heap_key_slot_index(const std::vector< Analyzer::Expr * > &target_exprs, const size_t target_idx)
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
double inline_fp_null_val(const SQL_TYPE_INFO &ti)
std::string to_string(char const *&&v)
#define LL_BOOL(v)
const size_t limit
llvm::Value * get_arg_by_name(llvm::Function *func, const std::string &name)
Definition: Execute.h:166
#define LL_FP(v)
const int8_t getPaddedSlotWidthBytes(const size_t slot_idx) const
#define AUTOMATIC_IR_METADATA(CGENSTATE)
QueryDescriptionType getQueryDescriptionType() const
#define CHECK_LT(x, y)
Definition: Logger.h:232
#define CHECK(condition)
Definition: Logger.h:222
int64_t inline_int_null_val(const SQL_TYPE_INFO &ti)
constexpr double n
Definition: Utm.h:38
const RelAlgExecutionUnit & ra_exe_unit_
const size_t offset
size_t getColOffInBytes(const size_t col_idx) const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

llvm::Function * GroupByAndAggregate::codegenPerfectHashFunction ( )
private

Definition at line 1398 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, CHECK, CHECK_GT, executor_, anonymous_namespace{GroupByAndAggregate.cpp}::get_expr_range_info(), get_int_type(), getBucketedCardinality(), RelAlgExecutionUnit::groupby_exprs, GroupByPerfectHash, LL_CONTEXT, LL_INT, mark_function_always_inline(), query_infos_, and ra_exe_unit_.

Referenced by codegenMultiColumnPerfectHash().

1398  {
1399  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1400  CHECK_GT(ra_exe_unit_.groupby_exprs.size(), size_t(1));
1401  auto ft = llvm::FunctionType::get(
1402  get_int_type(32, LL_CONTEXT),
1403  std::vector<llvm::Type*>{llvm::PointerType::get(get_int_type(64, LL_CONTEXT), 0)},
1404  false);
1405  auto key_hash_func = llvm::Function::Create(ft,
1406  llvm::Function::ExternalLinkage,
1407  "perfect_key_hash",
1408  executor_->cgen_state_->module_);
1409  executor_->cgen_state_->helper_functions_.push_back(key_hash_func);
1410  mark_function_always_inline(key_hash_func);
1411  auto& key_buff_arg = *key_hash_func->args().begin();
1412  llvm::Value* key_buff_lv = &key_buff_arg;
1413  auto bb = llvm::BasicBlock::Create(LL_CONTEXT, "entry", key_hash_func);
1414  llvm::IRBuilder<> key_hash_func_builder(bb);
1415  llvm::Value* hash_lv{llvm::ConstantInt::get(get_int_type(64, LL_CONTEXT), 0)};
1416  std::vector<int64_t> cardinalities;
1417  for (const auto& groupby_expr : ra_exe_unit_.groupby_exprs) {
1418  auto col_range_info =
1419  get_expr_range_info(ra_exe_unit_, query_infos_, groupby_expr.get(), executor_);
1420  CHECK(col_range_info.hash_type_ == QueryDescriptionType::GroupByPerfectHash);
1421  cardinalities.push_back(getBucketedCardinality(col_range_info));
1422  }
1423  size_t dim_idx = 0;
1424  for (const auto& groupby_expr : ra_exe_unit_.groupby_exprs) {
1425  auto* gep = key_hash_func_builder.CreateGEP(
1426  key_buff_lv->getType()->getScalarType()->getPointerElementType(),
1427  key_buff_lv,
1428  LL_INT(dim_idx));
1429  auto key_comp_lv =
1430  key_hash_func_builder.CreateLoad(gep->getType()->getPointerElementType(), gep);
1431  auto col_range_info =
1432  get_expr_range_info(ra_exe_unit_, query_infos_, groupby_expr.get(), executor_);
1433  auto crt_term_lv =
1434  key_hash_func_builder.CreateSub(key_comp_lv, LL_INT(col_range_info.min));
1435  if (col_range_info.bucket) {
1436  crt_term_lv =
1437  key_hash_func_builder.CreateSDiv(crt_term_lv, LL_INT(col_range_info.bucket));
1438  }
1439  for (size_t prev_dim_idx = 0; prev_dim_idx < dim_idx; ++prev_dim_idx) {
1440  crt_term_lv = key_hash_func_builder.CreateMul(crt_term_lv,
1441  LL_INT(cardinalities[prev_dim_idx]));
1442  }
1443  hash_lv = key_hash_func_builder.CreateAdd(hash_lv, crt_term_lv);
1444  ++dim_idx;
1445  }
1446  key_hash_func_builder.CreateRet(
1447  key_hash_func_builder.CreateTrunc(hash_lv, get_int_type(32, LL_CONTEXT)));
1448  return key_hash_func;
1449 }
static int64_t getBucketedCardinality(const ColRangeInfo &col_range_info)
ColRangeInfo get_expr_range_info(const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, const Analyzer::Expr *expr, Executor *executor)
#define LL_CONTEXT
void mark_function_always_inline(llvm::Function *func)
#define LL_INT(v)
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
#define CHECK_GT(x, y)
Definition: Logger.h:234
#define AUTOMATIC_IR_METADATA(CGENSTATE)
const std::vector< InputTableInfo > & query_infos_
#define CHECK(condition)
Definition: Logger.h:222
const RelAlgExecutionUnit & ra_exe_unit_

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::tuple< llvm::Value *, llvm::Value * > GroupByAndAggregate::codegenSingleColumnPerfectHash ( const QueryMemoryDescriptor query_mem_desc,
const CompilationOptions co,
llvm::Value *  groups_buffer,
llvm::Value *  group_expr_lv_translated,
llvm::Value *  group_expr_lv_original,
const int32_t  row_size_quad 
)
private

Definition at line 1268 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, CHECK, CompilationOptions::device_type, QueryMemoryDescriptor::didOutputColumnar(), emitCall(), executor_, QueryMemoryDescriptor::getBucket(), QueryMemoryDescriptor::getMinVal(), QueryMemoryDescriptor::hasKeylessHash(), QueryMemoryDescriptor::interleavedBins(), LL_INT, QueryMemoryDescriptor::mustUseBaselineSort(), and QueryMemoryDescriptor::usesGetGroupValueFast().

Referenced by codegenGroupBy().

1274  {
1275  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1276  CHECK(query_mem_desc.usesGetGroupValueFast());
1277  std::string get_group_fn_name{query_mem_desc.didOutputColumnar()
1278  ? "get_columnar_group_bin_offset"
1279  : "get_group_value_fast"};
1280  if (!query_mem_desc.didOutputColumnar() && query_mem_desc.hasKeylessHash()) {
1281  get_group_fn_name += "_keyless";
1282  }
1283  if (query_mem_desc.interleavedBins(co.device_type)) {
1284  CHECK(!query_mem_desc.didOutputColumnar());
1285  CHECK(query_mem_desc.hasKeylessHash());
1286  get_group_fn_name += "_semiprivate";
1287  }
1288  std::vector<llvm::Value*> get_group_fn_args{&*groups_buffer,
1289  &*group_expr_lv_translated};
1290  if (group_expr_lv_original && get_group_fn_name == "get_group_value_fast" &&
1291  query_mem_desc.mustUseBaselineSort()) {
1292  get_group_fn_name += "_with_original_key";
1293  get_group_fn_args.push_back(group_expr_lv_original);
1294  }
1295  get_group_fn_args.push_back(LL_INT(query_mem_desc.getMinVal()));
1296  get_group_fn_args.push_back(LL_INT(query_mem_desc.getBucket()));
1297  if (!query_mem_desc.hasKeylessHash()) {
1298  if (!query_mem_desc.didOutputColumnar()) {
1299  get_group_fn_args.push_back(LL_INT(row_size_quad));
1300  }
1301  } else {
1302  if (!query_mem_desc.didOutputColumnar()) {
1303  get_group_fn_args.push_back(LL_INT(row_size_quad));
1304  }
1305  if (query_mem_desc.interleavedBins(co.device_type)) {
1306  auto warp_idx = emitCall("thread_warp_idx", {LL_INT(executor_->warpSize())});
1307  get_group_fn_args.push_back(warp_idx);
1308  get_group_fn_args.push_back(LL_INT(executor_->warpSize()));
1309  }
1310  }
1311  if (get_group_fn_name == "get_columnar_group_bin_offset") {
1312  return std::make_tuple(&*groups_buffer,
1313  emitCall(get_group_fn_name, get_group_fn_args));
1314  }
1315  return std::make_tuple(emitCall(get_group_fn_name, get_group_fn_args), nullptr);
1316 }
#define LL_INT(v)
llvm::Value * emitCall(const std::string &fname, const std::vector< llvm::Value * > &args)
#define AUTOMATIC_IR_METADATA(CGENSTATE)
ExecutorDeviceType device_type
bool interleavedBins(const ExecutorDeviceType) const
#define CHECK(condition)
Definition: Logger.h:222

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

llvm::Value * GroupByAndAggregate::codegenVarlenOutputBuffer ( const QueryMemoryDescriptor query_mem_desc)
private

Definition at line 1253 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, CHECK, executor_, QueryMemoryDescriptor::hasVarlenOutput(), LL_CONTEXT, and ROW_FUNC.

Referenced by codegen().

1254  {
1255  if (!query_mem_desc.hasVarlenOutput()) {
1256  return nullptr;
1257  }
1258 
1259  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1260  auto arg_it = ROW_FUNC->arg_begin();
1261  arg_it++; /* groups_buffer */
1262  auto varlen_output_buffer = arg_it++;
1263  CHECK(varlen_output_buffer->getType() == llvm::Type::getInt64PtrTy(LL_CONTEXT));
1264  return varlen_output_buffer;
1265 }
#define ROW_FUNC
#define LL_CONTEXT
#define AUTOMATIC_IR_METADATA(CGENSTATE)
#define CHECK(condition)
Definition: Logger.h:222

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

llvm::Value * GroupByAndAggregate::codegenWindowRowPointer ( const Analyzer::WindowFunction window_func,
const QueryMemoryDescriptor query_mem_desc,
const CompilationOptions co,
DiamondCodegen diamond_codegen 
)
private

Definition at line 1502 of file GroupByAndAggregate.cpp.

References run_benchmark_import::args, AUTOMATIC_IR_METADATA, codegenOutputSlot(), CodeGenerator::codegenWindowPosition(), QueryMemoryDescriptor::didOutputColumnar(), emitCall(), executor_, get_int_type(), WindowProjectNodeContext::getActiveWindowFunctionContext(), QueryMemoryDescriptor::getEntryCount(), Analyzer::WindowFunction::getKind(), QueryMemoryDescriptor::getRowSize(), LL_BUILDER, LL_CONTEXT, LL_INT, CodeGenerator::posArg(), ROW_FUNC, and window_function_is_aggregate().

Referenced by TargetExprCodegen::codegen().

1506  {
1507  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1508  const auto window_func_context =
1510  if (window_func_context && window_function_is_aggregate(window_func->getKind())) {
1511  const int32_t row_size_quad = query_mem_desc.didOutputColumnar()
1512  ? 0
1513  : query_mem_desc.getRowSize() / sizeof(int64_t);
1514  auto arg_it = ROW_FUNC->arg_begin();
1515  auto groups_buffer = arg_it++;
1516  CodeGenerator code_generator(executor_);
1517  auto window_pos_lv = code_generator.codegenWindowPosition(
1518  window_func_context, code_generator.posArg(nullptr));
1519  const auto pos_in_window =
1520  LL_BUILDER.CreateTrunc(window_pos_lv, get_int_type(32, LL_CONTEXT));
1521  llvm::Value* entry_count_lv =
1522  LL_INT(static_cast<int32_t>(query_mem_desc.getEntryCount()));
1523  std::vector<llvm::Value*> args{
1524  &*groups_buffer, entry_count_lv, pos_in_window, code_generator.posArg(nullptr)};
1525  if (query_mem_desc.didOutputColumnar()) {
1526  const auto columnar_output_offset =
1527  emitCall("get_columnar_scan_output_offset", args);
1528  return LL_BUILDER.CreateSExt(columnar_output_offset, get_int_type(64, LL_CONTEXT));
1529  }
1530  args.push_back(LL_INT(row_size_quad));
1531  return emitCall("get_scan_output_slot", args);
1532  }
1533  auto arg_it = ROW_FUNC->arg_begin();
1534  auto groups_buffer = arg_it++;
1535  return codegenOutputSlot(&*groups_buffer, query_mem_desc, co, diamond_codegen);
1536 }
SqlWindowFunctionKind getKind() const
Definition: Analyzer.h:2297
#define ROW_FUNC
#define LL_BUILDER
#define LL_CONTEXT
#define LL_INT(v)
llvm::Value * emitCall(const std::string &fname, const std::vector< llvm::Value * > &args)
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
static WindowFunctionContext * getActiveWindowFunctionContext(Executor *executor)
#define AUTOMATIC_IR_METADATA(CGENSTATE)
bool window_function_is_aggregate(const SqlWindowFunctionKind kind)
Definition: WindowContext.h:44
llvm::Value * codegenOutputSlot(llvm::Value *groups_buffer, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, DiamondCodegen &diamond_codegen)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

llvm::Value * GroupByAndAggregate::convertNullIfAny ( const SQLTypeInfo arg_type,
const TargetInfo agg_info,
llvm::Value *  target 
)
private

Definition at line 1451 of file GroupByAndAggregate.cpp.

References TargetInfo::agg_kind, AUTOMATIC_IR_METADATA, CHECK, executor_, SQLTypeInfo::get_size(), SQLTypeInfo::is_fp(), kAPPROX_COUNT_DISTINCT, kCOUNT, LL_BUILDER, and TargetInfo::sql_type.

Referenced by TargetExprCodegen::codegenAggregate().

1453  {
1454  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1455  const auto& agg_type = agg_info.sql_type;
1456  const size_t chosen_bytes = agg_type.get_size();
1457 
1458  bool need_conversion{false};
1459  llvm::Value* arg_null{nullptr};
1460  llvm::Value* agg_null{nullptr};
1461  llvm::Value* target_to_cast{target};
1462  if (arg_type.is_fp()) {
1463  arg_null = executor_->cgen_state_->inlineFpNull(arg_type);
1464  if (agg_type.is_fp()) {
1465  agg_null = executor_->cgen_state_->inlineFpNull(agg_type);
1466  if (!static_cast<llvm::ConstantFP*>(arg_null)->isExactlyValue(
1467  static_cast<llvm::ConstantFP*>(agg_null)->getValueAPF())) {
1468  need_conversion = true;
1469  }
1470  } else {
1471  CHECK(agg_info.agg_kind == kCOUNT || agg_info.agg_kind == kAPPROX_COUNT_DISTINCT);
1472  return target;
1473  }
1474  } else {
1475  arg_null = executor_->cgen_state_->inlineIntNull(arg_type);
1476  if (agg_type.is_fp()) {
1477  agg_null = executor_->cgen_state_->inlineFpNull(agg_type);
1478  need_conversion = true;
1479  target_to_cast = executor_->castToFP(target, arg_type, agg_type);
1480  } else {
1481  agg_null = executor_->cgen_state_->inlineIntNull(agg_type);
1482  if ((static_cast<llvm::ConstantInt*>(arg_null)->getBitWidth() !=
1483  static_cast<llvm::ConstantInt*>(agg_null)->getBitWidth()) ||
1484  (static_cast<llvm::ConstantInt*>(arg_null)->getValue() !=
1485  static_cast<llvm::ConstantInt*>(agg_null)->getValue())) {
1486  need_conversion = true;
1487  }
1488  }
1489  }
1490  if (need_conversion) {
1491  auto cmp = arg_type.is_fp() ? LL_BUILDER.CreateFCmpOEQ(target, arg_null)
1492  : LL_BUILDER.CreateICmpEQ(target, arg_null);
1493  return LL_BUILDER.CreateSelect(
1494  cmp,
1495  agg_null,
1496  executor_->cgen_state_->castToTypeIn(target_to_cast, chosen_bytes << 3));
1497  } else {
1498  return target;
1499  }
1500 }
HOST DEVICE int get_size() const
Definition: sqltypes.h:339
#define LL_BUILDER
SQLTypeInfo sql_type
Definition: TargetInfo.h:52
bool is_fp() const
Definition: sqltypes.h:514
#define AUTOMATIC_IR_METADATA(CGENSTATE)
SQLAgg agg_kind
Definition: TargetInfo.h:51
Definition: sqldefs.h:77
#define CHECK(condition)
Definition: Logger.h:222

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

llvm::Value * GroupByAndAggregate::emitCall ( const std::string &  fname,
const std::vector< llvm::Value * > &  args 
)
private

Definition at line 2035 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, and executor_.

Referenced by TargetExprCodegen::codegen(), TargetExprCodegen::codegenAggregate(), codegenCountDistinct(), codegenEstimator(), codegenMultiColumnBaselineHash(), codegenMultiColumnPerfectHash(), codegenOutputSlot(), codegenSingleColumnPerfectHash(), and codegenWindowRowPointer().

2036  {
2037  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
2038  return executor_->cgen_state_->emitCall(fname, args);
2039 }
#define AUTOMATIC_IR_METADATA(CGENSTATE)

+ Here is the caller graph for this function:

llvm::Value * GroupByAndAggregate::getAdditionalLiteral ( const int32_t  off)
private

Definition at line 1843 of file GroupByAndAggregate.cpp.

References CHECK_LT, get_arg_by_name(), get_int_type(), LL_BUILDER, LL_CONTEXT, LL_INT, and ROW_FUNC.

Referenced by codegenCountDistinct().

1843  {
1844  CHECK_LT(off, 0);
1845  const auto lit_buff_lv = get_arg_by_name(ROW_FUNC, "literals");
1846  auto* bit_cast = LL_BUILDER.CreateBitCast(
1847  lit_buff_lv, llvm::PointerType::get(get_int_type(64, LL_CONTEXT), 0));
1848  auto* gep =
1849  LL_BUILDER.CreateGEP(bit_cast->getType()->getScalarType()->getPointerElementType(),
1850  bit_cast,
1851  LL_INT(off));
1852  return LL_BUILDER.CreateLoad(gep->getType()->getPointerElementType(), gep);
1853 }
#define ROW_FUNC
#define LL_BUILDER
#define LL_CONTEXT
#define LL_INT(v)
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
llvm::Value * get_arg_by_name(llvm::Function *func, const std::string &name)
Definition: Execute.h:166
#define CHECK_LT(x, y)
Definition: Logger.h:232

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

int64_t GroupByAndAggregate::getBucketedCardinality ( const ColRangeInfo col_range_info)
staticprivate

Definition at line 364 of file GroupByAndAggregate.cpp.

References ColRangeInfo::bucket, ColRangeInfo::has_nulls, ColRangeInfo::max, and ColRangeInfo::min.

Referenced by codegenPerfectHashFunction(), and getColRangeInfo().

364  {
365  checked_int64_t crt_col_cardinality =
366  checked_int64_t(col_range_info.max) - checked_int64_t(col_range_info.min);
367  if (col_range_info.bucket) {
368  crt_col_cardinality /= col_range_info.bucket;
369  }
370  return static_cast<int64_t>(crt_col_cardinality +
371  (1 + (col_range_info.has_nulls ? 1 : 0)));
372 }
boost::multiprecision::number< boost::multiprecision::cpp_int_backend< 64, 64, boost::multiprecision::signed_magnitude, boost::multiprecision::checked, void >> checked_int64_t

+ Here is the caller graph for this function:

ColRangeInfo GroupByAndAggregate::getColRangeInfo ( )
private

Definition at line 241 of file GroupByAndAggregate.cpp.

References Executor::baseline_threshold, anonymous_namespace{GroupByAndAggregate.cpp}::cardinality_estimate_less_than_column_range(), CHECK, CHECK_GE, device_type_, executor_, anonymous_namespace{GroupByAndAggregate.cpp}::expr_is_rowid(), anonymous_namespace{GroupByAndAggregate.cpp}::get_expr_range_info(), getBucketedCardinality(), GPU, group_cardinality_estimation_, RelAlgExecutionUnit::groupby_exprs, GroupByBaselineHash, GroupByPerfectHash, anonymous_namespace{GroupByAndAggregate.cpp}::has_count_distinct(), anonymous_namespace{GroupByAndAggregate.cpp}::is_column_range_too_big_for_perfect_hash(), kENCODING_DICT, SortInfo::order_entries, RelAlgExecutionUnit::quals, query_infos_, ra_exe_unit_, RelAlgExecutionUnit::simple_quals, RelAlgExecutionUnit::sort_info, and RelAlgExecutionUnit::target_exprs.

Referenced by initQueryMemoryDescriptorImpl().

241  {
242  // Use baseline layout more eagerly on the GPU if the query uses count distinct,
243  // because our HyperLogLog implementation is 4x less memory efficient on GPU.
244  // Technically, this only applies to APPROX_COUNT_DISTINCT, but in practice we
245  // can expect this to be true anyway for grouped queries since the precise version
246  // uses significantly more memory.
247  const int64_t baseline_threshold =
252  if (ra_exe_unit_.groupby_exprs.size() != 1) {
253  try {
254  checked_int64_t cardinality{1};
255  bool has_nulls{false};
256  for (const auto& groupby_expr : ra_exe_unit_.groupby_exprs) {
257  auto col_range_info = get_expr_range_info(
258  ra_exe_unit_, query_infos_, groupby_expr.get(), executor_);
259  if (col_range_info.hash_type_ != QueryDescriptionType::GroupByPerfectHash) {
260  // going through baseline hash if a non-integer type is encountered
261  return {QueryDescriptionType::GroupByBaselineHash, 0, 0, 0, false};
262  }
263  auto crt_col_cardinality = getBucketedCardinality(col_range_info);
264  CHECK_GE(crt_col_cardinality, 0);
265  cardinality *= crt_col_cardinality;
266  if (col_range_info.has_nulls) {
267  has_nulls = true;
268  }
269  }
270  // For zero or high cardinalities, use baseline layout.
271  if (!cardinality || cardinality > baseline_threshold) {
272  return {QueryDescriptionType::GroupByBaselineHash, 0, 0, 0, false};
273  }
275  0,
276  int64_t(cardinality),
277  0,
278  has_nulls};
279  } catch (...) { // overflow when computing cardinality
280  return {QueryDescriptionType::GroupByBaselineHash, 0, 0, 0, false};
281  }
282  }
283  // For single column groupby on high timestamps, force baseline hash due to wide ranges
284  // we are likely to encounter when applying quals to the expression range
285  // TODO: consider allowing TIMESTAMP(9) (nanoseconds) with quals to use perfect hash if
286  // the range is small enough
287  if (ra_exe_unit_.groupby_exprs.front() &&
288  ra_exe_unit_.groupby_exprs.front()->get_type_info().is_high_precision_timestamp() &&
289  ra_exe_unit_.simple_quals.size() > 0) {
290  return {QueryDescriptionType::GroupByBaselineHash, 0, 0, 0, false};
291  }
292  const auto col_range_info = get_expr_range_info(
294  if (!ra_exe_unit_.groupby_exprs.front()) {
295  return col_range_info;
296  }
297  static const int64_t MAX_BUFFER_SIZE = 1 << 30;
298  const int64_t col_count =
300  int64_t max_entry_count = MAX_BUFFER_SIZE / (col_count * sizeof(int64_t));
302  max_entry_count = std::min(max_entry_count, baseline_threshold);
303  }
304  const auto& groupby_expr_ti = ra_exe_unit_.groupby_exprs.front()->get_type_info();
305  if (groupby_expr_ti.is_string() && !col_range_info.bucket) {
306  CHECK(groupby_expr_ti.get_compression() == kENCODING_DICT);
307 
308  const bool has_filters =
309  !ra_exe_unit_.quals.empty() || !ra_exe_unit_.simple_quals.empty();
310  if (has_filters &&
311  is_column_range_too_big_for_perfect_hash(col_range_info, max_entry_count)) {
312  // if filters are present, we can use the filter to narrow the cardinality of the
313  // group by in the case of ranges too big for perfect hash. Otherwise, we are better
314  // off attempting perfect hash (since we know the range will be made of
315  // monotonically increasing numbers from min to max for dictionary encoded strings)
316  // and failing later due to excessive memory use.
317  // Check the conditions where baseline hash can provide a performance increase and
318  // return baseline hash (potentially forcing an estimator query) as the range type.
319  // Otherwise, return col_range_info which will likely be perfect hash, though could
320  // be baseline from a previous call of this function prior to the estimator query.
321  if (!ra_exe_unit_.sort_info.order_entries.empty()) {
322  // TODO(adb): allow some sorts to pass through this block by centralizing sort
323  // algorithm decision making
325  is_column_range_too_big_for_perfect_hash(col_range_info, max_entry_count)) {
326  // always use baseline hash for column range too big for perfect hash with count
327  // distinct descriptors. We will need 8GB of CPU memory minimum for the perfect
328  // hash group by in this case.
330  col_range_info.min,
331  col_range_info.max,
332  0,
333  col_range_info.has_nulls};
334  } else {
335  // use original col range for sort
336  return col_range_info;
337  }
338  }
339  // if filters are present and the filtered range is less than the cardinality of
340  // the column, consider baseline hash
343  col_range_info)) {
345  col_range_info.min,
346  col_range_info.max,
347  0,
348  col_range_info.has_nulls};
349  }
350  }
351  } else if ((!expr_is_rowid(ra_exe_unit_.groupby_exprs.front().get(),
352  *executor_->catalog_)) &&
353  is_column_range_too_big_for_perfect_hash(col_range_info, max_entry_count) &&
354  !col_range_info.bucket) {
356  col_range_info.min,
357  col_range_info.max,
358  0,
359  col_range_info.has_nulls};
360  }
361  return col_range_info;
362 }
std::vector< Analyzer::Expr * > target_exprs
static int64_t getBucketedCardinality(const ColRangeInfo &col_range_info)
ColRangeInfo get_expr_range_info(const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, const Analyzer::Expr *expr, Executor *executor)
boost::multiprecision::number< boost::multiprecision::cpp_int_backend< 64, 64, boost::multiprecision::signed_magnitude, boost::multiprecision::checked, void >> checked_int64_t
bool is_column_range_too_big_for_perfect_hash(const ColRangeInfo &col_range_info, const int64_t max_entry_count)
bool expr_is_rowid(const Analyzer::Expr *expr, const Catalog_Namespace::Catalog &cat)
const std::list< Analyzer::OrderEntry > order_entries
static const size_t baseline_threshold
Definition: Execute.h:1272
#define CHECK_GE(x, y)
Definition: Logger.h:235
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
bool has_count_distinct(const RelAlgExecutionUnit &ra_exe_unit)
const std::vector< InputTableInfo > & query_infos_
const ExecutorDeviceType device_type_
bool cardinality_estimate_less_than_column_range(const int64_t cardinality_estimate, const ColRangeInfo &col_range_info)
const std::optional< int64_t > group_cardinality_estimation_
std::list< std::shared_ptr< Analyzer::Expr > > quals
#define CHECK(condition)
Definition: Logger.h:222
const RelAlgExecutionUnit & ra_exe_unit_
std::list< std::shared_ptr< Analyzer::Expr > > simple_quals

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

int64_t GroupByAndAggregate::getShardedTopBucket ( const ColRangeInfo col_range_info,
const size_t  shard_count 
) const
private

Definition at line 429 of file GroupByAndAggregate.cpp.

References ColRangeInfo::bucket, CHECK, CHECK_GT, device_type_, executor_, g_leaf_count, and GPU.

Referenced by initQueryMemoryDescriptorImpl().

430  {
431  size_t device_count{0};
433  device_count = executor_->cudaMgr()->getDeviceCount();
434  CHECK_GT(device_count, 0u);
435  }
436 
437  int64_t bucket{col_range_info.bucket};
438 
439  if (shard_count) {
440  CHECK(!col_range_info.bucket);
441  /*
442  when a node has fewer devices than shard count,
443  a) In a distributed setup, the minimum distance between two keys would be
444  device_count because shards are stored consecutively across the physical tables,
445  i.e if a shard column has values 0 to 9, and 3 shards on each leaf, then node 1
446  would have values: 0,1,2,6,7,8 and node 2 would have values: 3,4,5,9. If each leaf
447  node has only 1 device, in this case, all the keys from each node are loaded on
448  the device each.
449 
450  b) In a single node setup, the distance would be minimum of device_count or
451  difference of device_count - shard_count. For example: If a single node server
452  running on 3 devices a shard column has values 0 to 9 in a table with 4 shards,
453  device to fragment keys mapping would be: device 1 - 4,8,3,7 device 2 - 1,5,9
454  device 3 - 2, 6 The bucket value would be 4(shards) - 3(devices) = 1 i.e. minimum
455  of device_count or difference.
456 
457  When a node has device count equal to or more than shard count then the
458  minimum distance is always at least shard_count * no of leaf nodes.
459  */
460  if (device_count < shard_count) {
461  bucket = g_leaf_count ? std::max(device_count, static_cast<size_t>(1))
462  : std::min(device_count, shard_count - device_count);
463  } else {
464  bucket = shard_count * std::max(g_leaf_count, static_cast<size_t>(1));
465  }
466  }
467 
468  return bucket;
469 }
#define CHECK_GT(x, y)
Definition: Logger.h:234
const ExecutorDeviceType device_type_
#define CHECK(condition)
Definition: Logger.h:222
size_t g_leaf_count
Definition: ParserNode.cpp:76

+ Here is the caller graph for this function:

bool GroupByAndAggregate::gpuCanHandleOrderEntries ( const std::list< Analyzer::OrderEntry > &  order_entries)
private

Definition at line 862 of file GroupByAndAggregate.cpp.

References CHECK, CHECK_GE, CHECK_LE, executor_, Analyzer::AggExpr::get_arg(), anonymous_namespace{GroupByAndAggregate.cpp}::get_expr_range_info(), Analyzer::Expr::get_type_info(), GroupByPerfectHash, kAPPROX_COUNT_DISTINCT, kAVG, kMAX, kMIN, query_infos_, ra_exe_unit_, and RelAlgExecutionUnit::target_exprs.

Referenced by initQueryMemoryDescriptor().

863  {
864  if (order_entries.size() > 1) { // TODO(alex): lift this restriction
865  return false;
866  }
867  for (const auto& order_entry : order_entries) {
868  CHECK_GE(order_entry.tle_no, 1);
869  CHECK_LE(static_cast<size_t>(order_entry.tle_no), ra_exe_unit_.target_exprs.size());
870  const auto target_expr = ra_exe_unit_.target_exprs[order_entry.tle_no - 1];
871  if (!dynamic_cast<Analyzer::AggExpr*>(target_expr)) {
872  return false;
873  }
874  // TODO(alex): relax the restrictions
875  auto agg_expr = static_cast<Analyzer::AggExpr*>(target_expr);
876  if (agg_expr->get_is_distinct() || agg_expr->get_aggtype() == kAVG ||
877  agg_expr->get_aggtype() == kMIN || agg_expr->get_aggtype() == kMAX ||
878  agg_expr->get_aggtype() == kAPPROX_COUNT_DISTINCT) {
879  return false;
880  }
881  if (agg_expr->get_arg()) {
882  const auto& arg_ti = agg_expr->get_arg()->get_type_info();
883  if (arg_ti.is_fp()) {
884  return false;
885  }
886  auto expr_range_info =
887  get_expr_range_info(ra_exe_unit_, query_infos_, agg_expr->get_arg(), executor_);
888  // TOD(adb): QMD not actually initialized here?
889  if ((!(expr_range_info.hash_type_ == QueryDescriptionType::GroupByPerfectHash &&
890  /* query_mem_desc.getGroupbyColCount() == 1 */ false) ||
891  expr_range_info.has_nulls) &&
892  order_entry.is_desc == order_entry.nulls_first) {
893  return false;
894  }
895  }
896  const auto& target_ti = target_expr->get_type_info();
897  CHECK(!target_ti.is_buffer());
898  if (!target_ti.is_integer()) {
899  return false;
900  }
901  }
902  return true;
903 }
std::vector< Analyzer::Expr * > target_exprs
ColRangeInfo get_expr_range_info(const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, const Analyzer::Expr *expr, Executor *executor)
#define CHECK_GE(x, y)
Definition: Logger.h:235
Expr * get_arg() const
Definition: Analyzer.h:1202
Definition: sqldefs.h:74
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:81
const std::vector< InputTableInfo > & query_infos_
#define CHECK_LE(x, y)
Definition: Logger.h:233
#define CHECK(condition)
Definition: Logger.h:222
const RelAlgExecutionUnit & ra_exe_unit_
Definition: sqldefs.h:75
Definition: sqldefs.h:73

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::unique_ptr< QueryMemoryDescriptor > GroupByAndAggregate::initQueryMemoryDescriptor ( const bool  allow_multifrag,
const size_t  max_groups_buffer_entry_count,
const int8_t  crt_min_byte_width,
RenderInfo render_info,
const bool  output_columnar_hint 
)
private

Definition at line 737 of file GroupByAndAggregate.cpp.

References align_to_int64(), CHECK, device_type_, executor_, GPU, gpuCanHandleOrderEntries(), initQueryMemoryDescriptorImpl(), SortInfo::order_entries, query_mem_desc, ra_exe_unit_, shard_count_for_top_groups(), and RelAlgExecutionUnit::sort_info.

742  {
743  const auto shard_count =
746  : 0;
747  bool sort_on_gpu_hint =
748  device_type_ == ExecutorDeviceType::GPU && allow_multifrag &&
751  // must_use_baseline_sort is true iff we'd sort on GPU with the old algorithm
752  // but the total output buffer size would be too big or it's a sharded top query.
753  // For the sake of managing risk, use the new result set way very selectively for
754  // this case only (alongside the baseline layout we've enabled for a while now).
755  bool must_use_baseline_sort = shard_count;
756  std::unique_ptr<QueryMemoryDescriptor> query_mem_desc;
757  while (true) {
758  query_mem_desc = initQueryMemoryDescriptorImpl(allow_multifrag,
759  max_groups_buffer_entry_count,
760  crt_min_byte_width,
761  sort_on_gpu_hint,
762  render_info,
763  must_use_baseline_sort,
764  output_columnar_hint);
765  CHECK(query_mem_desc);
766  if (query_mem_desc->sortOnGpu() &&
767  (query_mem_desc->getBufferSizeBytes(device_type_) +
768  align_to_int64(query_mem_desc->getEntryCount() * sizeof(int32_t))) >
769  2 * 1024 * 1024 * 1024LL) {
770  must_use_baseline_sort = true;
771  sort_on_gpu_hint = false;
772  } else {
773  break;
774  }
775  }
776  return query_mem_desc;
777 }
bool gpuCanHandleOrderEntries(const std::list< Analyzer::OrderEntry > &order_entries)
std::unique_ptr< QueryMemoryDescriptor > initQueryMemoryDescriptorImpl(const bool allow_multifrag, const size_t max_groups_buffer_entry_count, const int8_t crt_min_byte_width, const bool sort_on_gpu_hint, RenderInfo *render_info, const bool must_use_baseline_sort, const bool output_columnar_hint)
const std::list< Analyzer::OrderEntry > order_entries
const ExecutorDeviceType device_type_
#define CHECK(condition)
Definition: Logger.h:222
const RelAlgExecutionUnit & ra_exe_unit_
FORCE_INLINE HOST DEVICE T align_to_int64(T addr)
static size_t shard_count_for_top_groups(const RelAlgExecutionUnit &ra_exe_unit, const Catalog_Namespace::Catalog &catalog)

+ Here is the call graph for this function:

std::unique_ptr< QueryMemoryDescriptor > GroupByAndAggregate::initQueryMemoryDescriptorImpl ( const bool  allow_multifrag,
const size_t  max_groups_buffer_entry_count,
const int8_t  crt_min_byte_width,
const bool  sort_on_gpu_hint,
RenderInfo render_info,
const bool  must_use_baseline_sort,
const bool  output_columnar_hint 
)
private

Definition at line 779 of file GroupByAndAggregate.cpp.

References device_type_, executor_, g_enable_watchdog, g_watchdog_baseline_max_groups, anonymous_namespace{GroupByAndAggregate.cpp}::get_keyless_info(), getColRangeInfo(), getShardedTopBucket(), GPU, RelAlgExecutionUnit::groupby_exprs, GroupByBaselineHash, GroupByPerfectHash, ColRangeInfo::hash_type_, QueryMemoryDescriptor::init(), anonymous_namespace{GroupByAndAggregate.cpp}::init_count_distinct_descriptors(), LOG, query_infos_, ra_exe_unit_, shard_count_for_top_groups(), and logger::WARNING.

Referenced by initQueryMemoryDescriptor().

786  {
787  const auto count_distinct_descriptors = init_count_distinct_descriptors(
789 
790  const bool is_group_by{!ra_exe_unit_.groupby_exprs.empty()};
791 
792  auto col_range_info_nosharding = getColRangeInfo();
793 
794  const auto shard_count =
797  : 0;
798 
799  const auto col_range_info =
800  ColRangeInfo{col_range_info_nosharding.hash_type_,
801  col_range_info_nosharding.min,
802  col_range_info_nosharding.max,
803  getShardedTopBucket(col_range_info_nosharding, shard_count),
804  col_range_info_nosharding.has_nulls};
805 
806  // Non-grouped aggregates do not support accessing aggregated ranges
807  // Keyless hash is currently only supported with single-column perfect hash
808  const auto keyless_info =
809  !(is_group_by &&
810  col_range_info.hash_type_ == QueryDescriptionType::GroupByPerfectHash)
811  ? KeylessInfo{false, -1}
813 
814  if (g_enable_watchdog &&
815  ((col_range_info.hash_type_ == QueryDescriptionType::GroupByBaselineHash &&
816  max_groups_buffer_entry_count > g_watchdog_baseline_max_groups) ||
817  (col_range_info.hash_type_ == QueryDescriptionType::GroupByPerfectHash &&
818  ra_exe_unit_.groupby_exprs.size() == 1 &&
819  (col_range_info.max - col_range_info.min) /
820  std::max(col_range_info.bucket, int64_t(1)) >
821  130000000))) {
822  throw WatchdogException("Query would use too much memory");
823  }
824  try {
826  ra_exe_unit_,
827  query_infos_,
828  col_range_info,
829  keyless_info,
830  allow_multifrag,
831  device_type_,
832  crt_min_byte_width,
833  sort_on_gpu_hint,
834  shard_count,
835  max_groups_buffer_entry_count,
836  render_info,
837  count_distinct_descriptors,
838  must_use_baseline_sort,
839  output_columnar_hint,
840  /*streaming_top_n_hint=*/true);
841  } catch (const StreamingTopNOOM& e) {
842  LOG(WARNING) << e.what() << " Disabling Streaming Top N.";
844  ra_exe_unit_,
845  query_infos_,
846  col_range_info,
847  keyless_info,
848  allow_multifrag,
849  device_type_,
850  crt_min_byte_width,
851  sort_on_gpu_hint,
852  shard_count,
853  max_groups_buffer_entry_count,
854  render_info,
855  count_distinct_descriptors,
856  must_use_baseline_sort,
857  output_columnar_hint,
858  /*streaming_top_n_hint=*/false);
859  }
860 }
size_t g_watchdog_baseline_max_groups
KeylessInfo get_keyless_info(const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, const bool is_group_by, Executor *executor)
#define LOG(tag)
Definition: Logger.h:216
ColRangeInfo getColRangeInfo()
QueryDescriptionType hash_type_
CountDistinctDescriptors init_count_distinct_descriptors(const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, const ExecutorDeviceType device_type, Executor *executor)
static std::unique_ptr< QueryMemoryDescriptor > init(const Executor *executor, const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, const ColRangeInfo &col_range_info, const KeylessInfo &keyless_info, const bool allow_multifrag, const ExecutorDeviceType device_type, const int8_t crt_min_byte_width, const bool sort_on_gpu_hint, const size_t shard_count, const size_t max_groups_buffer_entry_count, RenderInfo *render_info, const CountDistinctDescriptors count_distinct_descriptors, const bool must_use_baseline_sort, const bool output_columnar_hint, const bool streaming_top_n_hint)
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
bool g_enable_watchdog
int64_t getShardedTopBucket(const ColRangeInfo &col_range_info, const size_t shard_count) const
const std::vector< InputTableInfo > & query_infos_
const ExecutorDeviceType device_type_
const RelAlgExecutionUnit & ra_exe_unit_
static size_t shard_count_for_top_groups(const RelAlgExecutionUnit &ra_exe_unit, const Catalog_Namespace::Catalog &catalog)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool GroupByAndAggregate::needsUnnestDoublePatch ( llvm::Value const *  val_ptr,
const std::string &  agg_base_name,
const bool  threads_share_memory,
const CompilationOptions co 
) const
private

Definition at line 29 of file MaxwellCodegenPatch.cpp.

References CompilationOptions::device_type, and executor_.

Referenced by TargetExprCodegen::codegenAggregate().

32  {
33  return (executor_->isArchMaxwell(co.device_type) && threads_share_memory &&
34  llvm::isa<llvm::AllocaInst>(val_ptr) &&
35  val_ptr->getType() ==
36  llvm::Type::getDoublePtrTy(executor_->cgen_state_->context_) &&
37  "agg_id" == agg_base_name);
38 }
ExecutorDeviceType device_type

+ Here is the caller graph for this function:

void GroupByAndAggregate::prependForceSync ( )
private

Definition at line 40 of file MaxwellCodegenPatch.cpp.

References executor_.

Referenced by codegen().

40  {
41  executor_->cgen_state_->ir_builder_.CreateCall(
42  executor_->cgen_state_->module_->getFunction("force_sync"));
43 }

+ Here is the caller graph for this function:

size_t GroupByAndAggregate::shard_count_for_top_groups ( const RelAlgExecutionUnit ra_exe_unit,
const Catalog_Namespace::Catalog catalog 
)
static

Definition at line 2058 of file GroupByAndAggregate.cpp.

References Catalog_Namespace::Catalog::getMetadataForTable(), RelAlgExecutionUnit::groupby_exprs, SortInfo::limit, TableDescriptor::nShards, SortInfo::order_entries, and RelAlgExecutionUnit::sort_info.

Referenced by Executor::collectAllDeviceResults(), RelAlgExecutor::executeRelAlgQuerySingleStep(), initQueryMemoryDescriptor(), and initQueryMemoryDescriptorImpl().

2060  {
2061  if (ra_exe_unit.sort_info.order_entries.size() != 1 || !ra_exe_unit.sort_info.limit) {
2062  return 0;
2063  }
2064  for (const auto& group_expr : ra_exe_unit.groupby_exprs) {
2065  const auto grouped_col_expr =
2066  dynamic_cast<const Analyzer::ColumnVar*>(group_expr.get());
2067  if (!grouped_col_expr) {
2068  continue;
2069  }
2070  if (grouped_col_expr->get_table_id() <= 0) {
2071  return 0;
2072  }
2073  const auto td = catalog.getMetadataForTable(grouped_col_expr->get_table_id());
2074  if (td->shardedColumnId == grouped_col_expr->get_column_id()) {
2075  return td->nShards;
2076  }
2077  }
2078  return 0;
2079 }
const std::list< Analyzer::OrderEntry > order_entries
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
const size_t limit
const TableDescriptor * getMetadataForTable(const std::string &tableName, const bool populateFragmenter=true) const
Returns a pointer to a const TableDescriptor struct matching the provided tableName.

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

Friends And Related Function Documentation

friend class CodeGenerator
friend

Definition at line 214 of file GroupByAndAggregate.h.

friend class ExecutionKernel
friend

Definition at line 215 of file GroupByAndAggregate.h.

friend class Executor
friend

Definition at line 212 of file GroupByAndAggregate.h.

friend class QueryMemoryDescriptor
friend

Definition at line 213 of file GroupByAndAggregate.h.

friend struct TargetExprCodegen
friend

Definition at line 216 of file GroupByAndAggregate.h.

friend struct TargetExprCodegenBuilder
friend

Definition at line 217 of file GroupByAndAggregate.h.

Member Data Documentation

const ExecutorDeviceType GroupByAndAggregate::device_type_
private
const std::optional<int64_t> GroupByAndAggregate::group_cardinality_estimation_
private

Definition at line 210 of file GroupByAndAggregate.h.

Referenced by getColRangeInfo().

bool GroupByAndAggregate::output_columnar_
private

Definition at line 207 of file GroupByAndAggregate.h.

const std::vector<InputTableInfo>& GroupByAndAggregate::query_infos_
private
std::shared_ptr<RowSetMemoryOwner> GroupByAndAggregate::row_set_mem_owner_
private

Definition at line 206 of file GroupByAndAggregate.h.


The documentation for this class was generated from the following files: