OmniSciDB  d2f719934e
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
GroupByAndAggregate Class Reference

#include <GroupByAndAggregate.h>

+ Collaboration diagram for GroupByAndAggregate:

Public Member Functions

 GroupByAndAggregate (Executor *executor, const ExecutorDeviceType device_type, const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner, const std::optional< int64_t > &group_cardinality_estimation)
 
bool codegen (llvm::Value *filter_result, llvm::BasicBlock *sc_false, QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const GpuSharedMemoryContext &gpu_smem_context)
 

Static Public Member Functions

static size_t shard_count_for_top_groups (const RelAlgExecutionUnit &ra_exe_unit, const Catalog_Namespace::Catalog &catalog)
 

Private Member Functions

bool gpuCanHandleOrderEntries (const std::list< Analyzer::OrderEntry > &order_entries)
 
std::unique_ptr
< QueryMemoryDescriptor
initQueryMemoryDescriptor (const bool allow_multifrag, const size_t max_groups_buffer_entry_count, const int8_t crt_min_byte_width, RenderInfo *render_info, const bool output_columnar_hint)
 
std::unique_ptr
< QueryMemoryDescriptor
initQueryMemoryDescriptorImpl (const bool allow_multifrag, const size_t max_groups_buffer_entry_count, const int8_t crt_min_byte_width, const bool sort_on_gpu_hint, RenderInfo *render_info, const bool must_use_baseline_sort, const bool output_columnar_hint)
 
int64_t getShardedTopBucket (const ColRangeInfo &col_range_info, const size_t shard_count) const
 
llvm::Value * codegenOutputSlot (llvm::Value *groups_buffer, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, DiamondCodegen &diamond_codegen)
 
std::tuple< llvm::Value
*, llvm::Value * > 
codegenGroupBy (const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, DiamondCodegen &codegen)
 
llvm::Value * codegenVarlenOutputBuffer (const QueryMemoryDescriptor &query_mem_desc)
 
std::tuple< llvm::Value
*, llvm::Value * > 
codegenSingleColumnPerfectHash (const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, llvm::Value *groups_buffer, llvm::Value *group_expr_lv_translated, llvm::Value *group_expr_lv_original, const int32_t row_size_quad)
 
std::tuple< llvm::Value
*, llvm::Value * > 
codegenMultiColumnPerfectHash (llvm::Value *groups_buffer, llvm::Value *group_key, llvm::Value *key_size_lv, const QueryMemoryDescriptor &query_mem_desc, const int32_t row_size_quad)
 
llvm::Function * codegenPerfectHashFunction ()
 
std::tuple< llvm::Value
*, llvm::Value * > 
codegenMultiColumnBaselineHash (const CompilationOptions &co, llvm::Value *groups_buffer, llvm::Value *group_key, llvm::Value *key_size_lv, const QueryMemoryDescriptor &query_mem_desc, const size_t key_width, const int32_t row_size_quad)
 
ColRangeInfo getColRangeInfo ()
 
llvm::Value * convertNullIfAny (const SQLTypeInfo &arg_type, const TargetInfo &agg_info, llvm::Value *target)
 
bool codegenAggCalls (const std::tuple< llvm::Value *, llvm::Value * > &agg_out_ptr_w_idx, llvm::Value *varlen_output_buffer, const std::vector< llvm::Value * > &agg_out_vec, QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const GpuSharedMemoryContext &gpu_smem_context, DiamondCodegen &diamond_codegen)
 
llvm::Value * codegenWindowRowPointer (const Analyzer::WindowFunction *window_func, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, DiamondCodegen &diamond_codegen)
 
llvm::Value * codegenAggColumnPtr (llvm::Value *output_buffer_byte_stream, llvm::Value *out_row_idx, const std::tuple< llvm::Value *, llvm::Value * > &agg_out_ptr_w_idx, const QueryMemoryDescriptor &query_mem_desc, const size_t chosen_bytes, const size_t agg_out_off, const size_t target_idx)
 : returns the pointer to where the aggregation should be stored. More...
 
void codegenEstimator (std::stack< llvm::BasicBlock * > &array_loops, DiamondCodegen &diamond_codegen, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &)
 
void codegenCountDistinct (const size_t target_idx, const Analyzer::Expr *target_expr, std::vector< llvm::Value * > &agg_args, const QueryMemoryDescriptor &, const ExecutorDeviceType)
 
void codegenApproxQuantile (const size_t target_idx, const Analyzer::Expr *target_expr, std::vector< llvm::Value * > &agg_args, const QueryMemoryDescriptor &query_mem_desc, const ExecutorDeviceType device_type)
 
llvm::Value * getAdditionalLiteral (const int32_t off)
 
std::vector< llvm::Value * > codegenAggArg (const Analyzer::Expr *target_expr, const CompilationOptions &co)
 
llvm::Value * emitCall (const std::string &fname, const std::vector< llvm::Value * > &args)
 
void checkErrorCode (llvm::Value *retCode)
 
bool needsUnnestDoublePatch (llvm::Value const *val_ptr, const std::string &agg_base_name, const bool threads_share_memory, const CompilationOptions &co) const
 
void prependForceSync ()
 

Static Private Member Functions

static int64_t getBucketedCardinality (const ColRangeInfo &col_range_info)
 

Private Attributes

Executorexecutor_
 
const RelAlgExecutionUnitra_exe_unit_
 
const std::vector
< InputTableInfo > & 
query_infos_
 
std::shared_ptr
< RowSetMemoryOwner
row_set_mem_owner_
 
bool output_columnar_
 
const ExecutorDeviceType device_type_
 
const std::optional< int64_t > group_cardinality_estimation_
 

Friends

class Executor
 
class QueryMemoryDescriptor
 
class CodeGenerator
 
class ExecutionKernel
 
struct TargetExprCodegen
 
struct TargetExprCodegenBuilder
 

Detailed Description

Definition at line 61 of file GroupByAndAggregate.h.

Constructor & Destructor Documentation

GroupByAndAggregate::GroupByAndAggregate ( Executor executor,
const ExecutorDeviceType  device_type,
const RelAlgExecutionUnit ra_exe_unit,
const std::vector< InputTableInfo > &  query_infos,
std::shared_ptr< RowSetMemoryOwner row_set_mem_owner,
const std::optional< int64_t > &  group_cardinality_estimation 
)

Definition at line 316 of file GroupByAndAggregate.cpp.

References RelAlgExecutionUnit::groupby_exprs, and ra_exe_unit_.

323  : executor_(executor)
324  , ra_exe_unit_(ra_exe_unit)
325  , query_infos_(query_infos)
326  , row_set_mem_owner_(row_set_mem_owner)
327  , device_type_(device_type)
328  , group_cardinality_estimation_(group_cardinality_estimation) {
329  for (const auto& groupby_expr : ra_exe_unit_.groupby_exprs) {
330  if (!groupby_expr) {
331  continue;
332  }
333  const auto& groupby_ti = groupby_expr->get_type_info();
334  if (groupby_ti.is_bytes()) {
335  throw std::runtime_error(
336  "Cannot group by string columns which are not dictionary encoded.");
337  }
338  if (groupby_ti.is_buffer()) {
339  throw std::runtime_error("Group by buffer not supported");
340  }
341  if (groupby_ti.is_geometry()) {
342  throw std::runtime_error("Group by geometry not supported");
343  }
344  }
345 }
std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner_
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
const std::vector< InputTableInfo > & query_infos_
const ExecutorDeviceType device_type_
const std::optional< int64_t > group_cardinality_estimation_
const RelAlgExecutionUnit & ra_exe_unit_

Member Function Documentation

void GroupByAndAggregate::checkErrorCode ( llvm::Value *  retCode)
private

Definition at line 1934 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, and executor_.

Referenced by TargetExprCodegen::codegenAggregate().

1934  {
1935  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1936  auto zero_const = llvm::ConstantInt::get(retCode->getType(), 0, true);
1937  auto rc_check_condition = executor_->cgen_state_->ir_builder_.CreateICmp(
1938  llvm::ICmpInst::ICMP_EQ, retCode, zero_const);
1939 
1940  executor_->cgen_state_->emitErrorCheck(rc_check_condition, retCode, "rc");
1941 }
#define AUTOMATIC_IR_METADATA(CGENSTATE)

+ Here is the caller graph for this function:

bool GroupByAndAggregate::codegen ( llvm::Value *  filter_result,
llvm::BasicBlock *  sc_false,
QueryMemoryDescriptor query_mem_desc,
const CompilationOptions co,
const GpuSharedMemoryContext gpu_smem_context 
)

Definition at line 824 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, CHECK, codegenAggCalls(), codegenEstimator(), codegenGroupBy(), codegenVarlenOutputBuffer(), DiamondCodegen::cond_false_, CompilationOptions::device_type, QueryMemoryDescriptor::didOutputColumnar(), RelAlgExecutionUnit::estimator, executor_, anonymous_namespace{GroupByAndAggregate.cpp}::get_agg_count(), get_arg_by_name(), get_int_type(), QueryMemoryDescriptor::getGroupbyColCount(), QueryMemoryDescriptor::getQueryDescriptionType(), GPU, RelAlgExecutionUnit::groupby_exprs, GroupByPerfectHash, i, RelAlgExecutionUnit::join_quals, LL_BUILDER, LL_CONTEXT, LL_INT, LLVM_ALIGN, CodeGenerator::posArg(), prependForceSync(), Projection, query_mem_desc, ra_exe_unit_, ROW_FUNC, RelAlgExecutionUnit::target_exprs, QueryMemoryDescriptor::usesGetGroupValueFast(), and QueryMemoryDescriptor::useStreamingTopN().

828  {
829  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
830  CHECK(filter_result);
831 
832  bool can_return_error = false;
833  llvm::BasicBlock* filter_false{nullptr};
834 
835  {
836  const bool is_group_by = !ra_exe_unit_.groupby_exprs.empty();
837 
838  if (executor_->isArchMaxwell(co.device_type)) {
840  }
841  DiamondCodegen filter_cfg(filter_result,
842  executor_,
843  !is_group_by || query_mem_desc.usesGetGroupValueFast(),
844  "filter", // filter_true and filter_false basic blocks
845  nullptr,
846  false);
847  filter_false = filter_cfg.cond_false_;
848 
849  if (is_group_by) {
851  !query_mem_desc.useStreamingTopN()) {
852  const auto crt_matched = get_arg_by_name(ROW_FUNC, "crt_matched");
853  LL_BUILDER.CreateStore(LL_INT(int32_t(1)), crt_matched);
854  auto total_matched_ptr = get_arg_by_name(ROW_FUNC, "total_matched");
855  llvm::Value* old_total_matched_val{nullptr};
857  old_total_matched_val =
858  LL_BUILDER.CreateAtomicRMW(llvm::AtomicRMWInst::Add,
859  total_matched_ptr,
860  LL_INT(int32_t(1)),
861 #if LLVM_VERSION_MAJOR > 12
862  LLVM_ALIGN(8),
863 #endif
864  llvm::AtomicOrdering::Monotonic);
865  } else {
866  old_total_matched_val = LL_BUILDER.CreateLoad(total_matched_ptr);
867  LL_BUILDER.CreateStore(
868  LL_BUILDER.CreateAdd(old_total_matched_val, LL_INT(int32_t(1))),
869  total_matched_ptr);
870  }
871  auto old_total_matched_ptr = get_arg_by_name(ROW_FUNC, "old_total_matched");
872  LL_BUILDER.CreateStore(old_total_matched_val, old_total_matched_ptr);
873  }
874 
875  auto agg_out_ptr_w_idx = codegenGroupBy(query_mem_desc, co, filter_cfg);
876  auto varlen_output_buffer = codegenVarlenOutputBuffer(query_mem_desc);
877  if (query_mem_desc.usesGetGroupValueFast() ||
878  query_mem_desc.getQueryDescriptionType() ==
880  if (query_mem_desc.getGroupbyColCount() > 1) {
881  filter_cfg.setChainToNext();
882  }
883  // Don't generate null checks if the group slot is guaranteed to be non-null,
884  // as it's the case for get_group_value_fast* family.
885  can_return_error = codegenAggCalls(agg_out_ptr_w_idx,
886  varlen_output_buffer,
887  {},
889  co,
890  gpu_smem_context,
891  filter_cfg);
892  } else {
893  {
894  llvm::Value* nullcheck_cond{nullptr};
895  if (query_mem_desc.didOutputColumnar()) {
896  nullcheck_cond = LL_BUILDER.CreateICmpSGE(std::get<1>(agg_out_ptr_w_idx),
897  LL_INT(int32_t(0)));
898  } else {
899  nullcheck_cond = LL_BUILDER.CreateICmpNE(
900  std::get<0>(agg_out_ptr_w_idx),
901  llvm::ConstantPointerNull::get(
902  llvm::PointerType::get(get_int_type(64, LL_CONTEXT), 0)));
903  }
904  DiamondCodegen nullcheck_cfg(
905  nullcheck_cond, executor_, false, "groupby_nullcheck", &filter_cfg, false);
906  codegenAggCalls(agg_out_ptr_w_idx,
907  varlen_output_buffer,
908  {},
910  co,
911  gpu_smem_context,
912  filter_cfg);
913  }
914  can_return_error = true;
915  if (query_mem_desc.getQueryDescriptionType() ==
917  query_mem_desc.useStreamingTopN()) {
918  // Ignore rejection on pushing current row to top-K heap.
919  LL_BUILDER.CreateRet(LL_INT(int32_t(0)));
920  } else {
921  CodeGenerator code_generator(executor_);
922  LL_BUILDER.CreateRet(LL_BUILDER.CreateNeg(LL_BUILDER.CreateTrunc(
923  // TODO(alex): remove the trunc once pos is converted to 32 bits
924  code_generator.posArg(nullptr),
925  get_int_type(32, LL_CONTEXT))));
926  }
927  }
928  } else {
929  if (ra_exe_unit_.estimator) {
930  std::stack<llvm::BasicBlock*> array_loops;
931  codegenEstimator(array_loops, filter_cfg, query_mem_desc, co);
932  } else {
933  auto arg_it = ROW_FUNC->arg_begin();
934  std::vector<llvm::Value*> agg_out_vec;
935  for (int32_t i = 0; i < get_agg_count(ra_exe_unit_.target_exprs); ++i) {
936  agg_out_vec.push_back(&*arg_it++);
937  }
938  can_return_error = codegenAggCalls(std::make_tuple(nullptr, nullptr),
939  /*varlen_output_buffer=*/nullptr,
940  agg_out_vec,
941  query_mem_desc,
942  co,
943  gpu_smem_context,
944  filter_cfg);
945  }
946  }
947  }
948 
949  if (ra_exe_unit_.join_quals.empty()) {
950  executor_->cgen_state_->ir_builder_.CreateRet(LL_INT(int32_t(0)));
951  } else if (sc_false) {
952  const auto saved_insert_block = LL_BUILDER.GetInsertBlock();
953  LL_BUILDER.SetInsertPoint(sc_false);
954  LL_BUILDER.CreateBr(filter_false);
955  LL_BUILDER.SetInsertPoint(saved_insert_block);
956  }
957 
958  return can_return_error;
959 }
std::vector< Analyzer::Expr * > target_exprs
#define ROW_FUNC
llvm::BasicBlock * cond_false_
#define LL_BUILDER
#define LL_CONTEXT
#define LL_INT(v)
llvm::Value * codegenVarlenOutputBuffer(const QueryMemoryDescriptor &query_mem_desc)
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
#define LLVM_ALIGN(alignment)
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
llvm::Value * get_arg_by_name(llvm::Function *func, const std::string &name)
Definition: Execute.h:166
size_t getGroupbyColCount() const
const JoinQualsPerNestingLevel join_quals
bool codegenAggCalls(const std::tuple< llvm::Value *, llvm::Value * > &agg_out_ptr_w_idx, llvm::Value *varlen_output_buffer, const std::vector< llvm::Value * > &agg_out_vec, QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const GpuSharedMemoryContext &gpu_smem_context, DiamondCodegen &diamond_codegen)
std::tuple< llvm::Value *, llvm::Value * > codegenGroupBy(const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, DiamondCodegen &codegen)
const std::shared_ptr< Analyzer::Estimator > estimator
#define AUTOMATIC_IR_METADATA(CGENSTATE)
QueryDescriptionType getQueryDescriptionType() const
ExecutorDeviceType device_type
void codegenEstimator(std::stack< llvm::BasicBlock * > &array_loops, DiamondCodegen &diamond_codegen, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &)
int32_t get_agg_count(const std::vector< Analyzer::Expr * > &target_exprs)
#define CHECK(condition)
Definition: Logger.h:211
const RelAlgExecutionUnit & ra_exe_unit_

+ Here is the call graph for this function:

std::vector< llvm::Value * > GroupByAndAggregate::codegenAggArg ( const Analyzer::Expr target_expr,
const CompilationOptions co 
)
private

Definition at line 1749 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, CHECK, CHECK_EQ, CodeGenerator::codegen(), CUR_FUNC, executor_, get_int_type(), Analyzer::Expr::get_type_info(), SQLTypeInfo::is_geometry(), kARRAY, kPOINT, kSAMPLE, LL_BUILDER, LL_CONTEXT, log2_bytes(), and CodeGenerator::posArg().

Referenced by TargetExprCodegen::codegen(), and TargetExprCodegenBuilder::codegenMultiSlotSampleExpressions().

1751  {
1752  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1753  const auto agg_expr = dynamic_cast<const Analyzer::AggExpr*>(target_expr);
1754  const auto func_expr = dynamic_cast<const Analyzer::FunctionOper*>(target_expr);
1755  const auto arr_expr = dynamic_cast<const Analyzer::ArrayExpr*>(target_expr);
1756 
1757  // TODO(alex): handle arrays uniformly?
1758  CodeGenerator code_generator(executor_);
1759  if (target_expr) {
1760  const auto& target_ti = target_expr->get_type_info();
1761  if (target_ti.is_buffer() &&
1762  !executor_->plan_state_->isLazyFetchColumn(target_expr)) {
1763  const auto target_lvs =
1764  agg_expr ? code_generator.codegen(agg_expr->get_arg(), true, co)
1765  : code_generator.codegen(
1766  target_expr, !executor_->plan_state_->allow_lazy_fetch_, co);
1767  if (!func_expr && !arr_expr) {
1768  // Something with the chunk transport is code that was generated from a source
1769  // other than an ARRAY[] expression
1770  if (target_ti.is_bytes()) {
1771  CHECK_EQ(size_t(3), target_lvs.size());
1772  return {target_lvs[1], target_lvs[2]};
1773  }
1774  CHECK(target_ti.is_array());
1775  CHECK_EQ(size_t(1), target_lvs.size());
1776  CHECK(!agg_expr || agg_expr->get_aggtype() == kSAMPLE);
1777  const auto i32_ty = get_int_type(32, executor_->cgen_state_->context_);
1778  const auto i8p_ty =
1779  llvm::PointerType::get(get_int_type(8, executor_->cgen_state_->context_), 0);
1780  const auto& elem_ti = target_ti.get_elem_type();
1781  return {
1782  executor_->cgen_state_->emitExternalCall(
1783  "array_buff",
1784  i8p_ty,
1785  {target_lvs.front(), code_generator.posArg(target_expr)}),
1786  executor_->cgen_state_->emitExternalCall(
1787  "array_size",
1788  i32_ty,
1789  {target_lvs.front(),
1790  code_generator.posArg(target_expr),
1791  executor_->cgen_state_->llInt(log2_bytes(elem_ti.get_logical_size()))})};
1792  } else {
1793  if (agg_expr) {
1794  throw std::runtime_error(
1795  "Using array[] operator as argument to an aggregate operator is not "
1796  "supported");
1797  }
1798  CHECK(func_expr || arr_expr);
1799  if (dynamic_cast<const Analyzer::FunctionOper*>(target_expr)) {
1800  CHECK_EQ(size_t(1), target_lvs.size());
1801  const auto prefix = target_ti.get_buffer_name();
1802  CHECK(target_ti.is_array() || target_ti.is_bytes());
1803  const auto target_lv = LL_BUILDER.CreateLoad(target_lvs[0]);
1804  // const auto target_lv_type = target_lvs[0]->getType();
1805  // CHECK(target_lv_type->isStructTy());
1806  // CHECK_EQ(target_lv_type->getNumContainedTypes(), 3u);
1807  const auto i8p_ty = llvm::PointerType::get(
1808  get_int_type(8, executor_->cgen_state_->context_), 0);
1809  const auto ptr = LL_BUILDER.CreatePointerCast(
1810  LL_BUILDER.CreateExtractValue(target_lv, 0), i8p_ty);
1811  const auto size = LL_BUILDER.CreateExtractValue(target_lv, 1);
1812  const auto null_flag = LL_BUILDER.CreateExtractValue(target_lv, 2);
1813  const auto nullcheck_ok_bb =
1814  llvm::BasicBlock::Create(LL_CONTEXT, prefix + "_nullcheck_ok_bb", CUR_FUNC);
1815  const auto nullcheck_fail_bb = llvm::BasicBlock::Create(
1816  LL_CONTEXT, prefix + "_nullcheck_fail_bb", CUR_FUNC);
1817 
1818  // TODO(adb): probably better to zext the bool
1819  const auto nullcheck = LL_BUILDER.CreateICmpEQ(
1820  null_flag, executor_->cgen_state_->llInt(static_cast<int8_t>(1)));
1821  LL_BUILDER.CreateCondBr(nullcheck, nullcheck_fail_bb, nullcheck_ok_bb);
1822 
1823  const auto ret_bb =
1824  llvm::BasicBlock::Create(LL_CONTEXT, prefix + "_return", CUR_FUNC);
1825  LL_BUILDER.SetInsertPoint(ret_bb);
1826  auto result_phi = LL_BUILDER.CreatePHI(i8p_ty, 2, prefix + "_ptr_return");
1827  result_phi->addIncoming(ptr, nullcheck_ok_bb);
1828  const auto null_arr_sentinel = LL_BUILDER.CreateIntToPtr(
1829  executor_->cgen_state_->llInt(static_cast<int8_t>(0)), i8p_ty);
1830  result_phi->addIncoming(null_arr_sentinel, nullcheck_fail_bb);
1831  LL_BUILDER.SetInsertPoint(nullcheck_ok_bb);
1832  executor_->cgen_state_->emitExternalCall(
1833  "register_buffer_with_executor_rsm",
1834  llvm::Type::getVoidTy(executor_->cgen_state_->context_),
1835  {executor_->cgen_state_->llInt(reinterpret_cast<int64_t>(executor_)), ptr});
1836  LL_BUILDER.CreateBr(ret_bb);
1837  LL_BUILDER.SetInsertPoint(nullcheck_fail_bb);
1838  LL_BUILDER.CreateBr(ret_bb);
1839 
1840  LL_BUILDER.SetInsertPoint(ret_bb);
1841  return {result_phi, size};
1842  }
1843  CHECK_EQ(size_t(2), target_lvs.size());
1844  return {target_lvs[0], target_lvs[1]};
1845  }
1846  }
1847  if (target_ti.is_geometry() &&
1848  !executor_->plan_state_->isLazyFetchColumn(target_expr)) {
1849  auto generate_coord_lvs =
1850  [&](auto* selected_target_expr,
1851  bool const fetch_columns) -> std::vector<llvm::Value*> {
1852  const auto target_lvs =
1853  code_generator.codegen(selected_target_expr, fetch_columns, co);
1854  if (dynamic_cast<const Analyzer::GeoOperator*>(target_expr) &&
1855  target_expr->get_type_info().is_geometry()) {
1856  // return a pointer to the temporary alloca
1857  return target_lvs;
1858  }
1859  const auto geo_uoper = dynamic_cast<const Analyzer::GeoUOper*>(target_expr);
1860  const auto geo_binoper = dynamic_cast<const Analyzer::GeoBinOper*>(target_expr);
1861  if (geo_uoper || geo_binoper) {
1862  CHECK(target_expr->get_type_info().is_geometry());
1863  CHECK_EQ(2 * static_cast<size_t>(target_ti.get_physical_coord_cols()),
1864  target_lvs.size());
1865  return target_lvs;
1866  }
1867  CHECK_EQ(static_cast<size_t>(target_ti.get_physical_coord_cols()),
1868  target_lvs.size());
1869 
1870  const auto i32_ty = get_int_type(32, executor_->cgen_state_->context_);
1871  const auto i8p_ty =
1872  llvm::PointerType::get(get_int_type(8, executor_->cgen_state_->context_), 0);
1873  std::vector<llvm::Value*> coords;
1874  size_t ctr = 0;
1875  for (const auto& target_lv : target_lvs) {
1876  // TODO(adb): consider adding a utility to sqltypes so we can get the types of
1877  // the physical coords cols based on the sqltype (e.g. TINYINT for col 0, INT
1878  // for col 1 for pols / mpolys, etc). Hardcoding for now. first array is the
1879  // coords array (TINYINT). Subsequent arrays are regular INT.
1880 
1881  const size_t elem_sz = ctr == 0 ? 1 : 4;
1882  ctr++;
1883  int32_t fixlen = -1;
1884  if (target_ti.get_type() == kPOINT) {
1885  const auto col_var = dynamic_cast<const Analyzer::ColumnVar*>(target_expr);
1886  if (col_var) {
1887  const auto coords_cd = executor_->getPhysicalColumnDescriptor(col_var, 1);
1888  if (coords_cd && coords_cd->columnType.get_type() == kARRAY) {
1889  fixlen = coords_cd->columnType.get_size();
1890  }
1891  }
1892  }
1893  if (fixlen > 0) {
1894  coords.push_back(executor_->cgen_state_->emitExternalCall(
1895  "fast_fixlen_array_buff",
1896  i8p_ty,
1897  {target_lv, code_generator.posArg(selected_target_expr)}));
1898  coords.push_back(executor_->cgen_state_->llInt(int64_t(fixlen)));
1899  continue;
1900  }
1901  coords.push_back(executor_->cgen_state_->emitExternalCall(
1902  "array_buff",
1903  i8p_ty,
1904  {target_lv, code_generator.posArg(selected_target_expr)}));
1905  coords.push_back(executor_->cgen_state_->emitExternalCall(
1906  "array_size",
1907  i32_ty,
1908  {target_lv,
1909  code_generator.posArg(selected_target_expr),
1910  executor_->cgen_state_->llInt(log2_bytes(elem_sz))}));
1911  }
1912  return coords;
1913  };
1914 
1915  if (agg_expr) {
1916  return generate_coord_lvs(agg_expr->get_arg(), true);
1917  } else {
1918  return generate_coord_lvs(target_expr,
1919  !executor_->plan_state_->allow_lazy_fetch_);
1920  }
1921  }
1922  }
1923  return agg_expr ? code_generator.codegen(agg_expr->get_arg(), true, co)
1924  : code_generator.codegen(
1925  target_expr, !executor_->plan_state_->allow_lazy_fetch_, co);
1926 }
#define CHECK_EQ(x, y)
Definition: Logger.h:219
#define LL_BUILDER
#define LL_CONTEXT
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
#define AUTOMATIC_IR_METADATA(CGENSTATE)
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:77
#define CHECK(condition)
Definition: Logger.h:211
bool is_geometry() const
Definition: sqltypes.h:531
#define CUR_FUNC
uint32_t log2_bytes(const uint32_t bytes)
Definition: Execute.h:176

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool GroupByAndAggregate::codegenAggCalls ( const std::tuple< llvm::Value *, llvm::Value * > &  agg_out_ptr_w_idx,
llvm::Value *  varlen_output_buffer,
const std::vector< llvm::Value * > &  agg_out_vec,
QueryMemoryDescriptor query_mem_desc,
const CompilationOptions co,
const GpuSharedMemoryContext gpu_smem_context,
DiamondCodegen diamond_codegen 
)
private

Definition at line 1446 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, CHECK, TargetExprCodegenBuilder::codegen(), QueryMemoryDescriptor::didOutputColumnar(), executor_, g_cluster, QueryMemoryDescriptor::getQueryDescriptionType(), LL_BUILDER, LL_CONTEXT, Projection, ra_exe_unit_, and RelAlgExecutionUnit::target_exprs.

Referenced by codegen().

1453  {
1454  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1455  auto agg_out_ptr_w_idx = agg_out_ptr_w_idx_in;
1456  // TODO(alex): unify the two cases, the output for non-group by queries
1457  // should be a contiguous buffer
1458  const bool is_group_by = std::get<0>(agg_out_ptr_w_idx);
1459  bool can_return_error = false;
1460  if (is_group_by) {
1461  CHECK(agg_out_vec.empty());
1462  } else {
1463  CHECK(!agg_out_vec.empty());
1464  }
1465 
1466  // output buffer is casted into a byte stream to be able to handle data elements of
1467  // different sizes (only used when actual column width sizes are used)
1468  llvm::Value* output_buffer_byte_stream{nullptr};
1469  llvm::Value* out_row_idx{nullptr};
1470  if (query_mem_desc.didOutputColumnar() && !g_cluster &&
1472  output_buffer_byte_stream = LL_BUILDER.CreateBitCast(
1473  std::get<0>(agg_out_ptr_w_idx),
1474  llvm::PointerType::get(llvm::Type::getInt8Ty(LL_CONTEXT), 0));
1475  output_buffer_byte_stream->setName("out_buff_b_stream");
1476  CHECK(std::get<1>(agg_out_ptr_w_idx));
1477  out_row_idx = LL_BUILDER.CreateZExt(std::get<1>(agg_out_ptr_w_idx),
1478  llvm::Type::getInt64Ty(LL_CONTEXT));
1479  out_row_idx->setName("out_row_idx");
1480  }
1481 
1482  TargetExprCodegenBuilder target_builder(ra_exe_unit_, is_group_by);
1483  for (size_t target_idx = 0; target_idx < ra_exe_unit_.target_exprs.size();
1484  ++target_idx) {
1485  auto target_expr = ra_exe_unit_.target_exprs[target_idx];
1486  CHECK(target_expr);
1487 
1488  target_builder(target_expr, executor_, query_mem_desc, co);
1489  }
1490 
1491  target_builder.codegen(this,
1492  executor_,
1493  query_mem_desc,
1494  co,
1495  gpu_smem_context,
1496  agg_out_ptr_w_idx,
1497  agg_out_vec,
1498  output_buffer_byte_stream,
1499  out_row_idx,
1500  varlen_output_buffer,
1501  diamond_codegen);
1502 
1503  for (auto target_expr : ra_exe_unit_.target_exprs) {
1504  CHECK(target_expr);
1505  executor_->plan_state_->isLazyFetchColumn(target_expr);
1506  }
1507 
1508  return can_return_error;
1509 }
std::vector< Analyzer::Expr * > target_exprs
#define LL_BUILDER
#define LL_CONTEXT
#define AUTOMATIC_IR_METADATA(CGENSTATE)
QueryDescriptionType getQueryDescriptionType() const
#define CHECK(condition)
Definition: Logger.h:211
bool g_cluster
const RelAlgExecutionUnit & ra_exe_unit_

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

llvm::Value * GroupByAndAggregate::codegenAggColumnPtr ( llvm::Value *  output_buffer_byte_stream,
llvm::Value *  out_row_idx,
const std::tuple< llvm::Value *, llvm::Value * > &  agg_out_ptr_w_idx,
const QueryMemoryDescriptor query_mem_desc,
const size_t  chosen_bytes,
const size_t  agg_out_off,
const size_t  target_idx 
)
private

: returns the pointer to where the aggregation should be stored.

Definition at line 1514 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, CHECK, CHECK_EQ, QueryMemoryDescriptor::didOutputColumnar(), executor_, g_cluster, get_int_type(), QueryMemoryDescriptor::getColOffInBytes(), QueryMemoryDescriptor::getColOnlyOffInBytes(), QueryMemoryDescriptor::getQueryDescriptionType(), LL_BUILDER, LL_CONTEXT, LL_INT, Projection, and to_string().

Referenced by TargetExprCodegen::codegenAggregate(), and TargetExprCodegenBuilder::codegenMultiSlotSampleExpressions().

1521  {
1522  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1523  llvm::Value* agg_col_ptr{nullptr};
1524  if (query_mem_desc.didOutputColumnar()) {
1525  // TODO(Saman): remove the second columnar branch, and support all query description
1526  // types through the first branch. Then, input arguments should also be cleaned up
1527  if (!g_cluster &&
1529  CHECK(chosen_bytes == 1 || chosen_bytes == 2 || chosen_bytes == 4 ||
1530  chosen_bytes == 8);
1531  CHECK(output_buffer_byte_stream);
1532  CHECK(out_row_idx);
1533  uint32_t col_off = query_mem_desc.getColOffInBytes(agg_out_off);
1534  // multiplying by chosen_bytes, i.e., << log2(chosen_bytes)
1535  auto out_per_col_byte_idx =
1536 #ifdef _WIN32
1537  LL_BUILDER.CreateShl(out_row_idx, __lzcnt(chosen_bytes) - 1);
1538 #else
1539  LL_BUILDER.CreateShl(out_row_idx, __builtin_ffs(chosen_bytes) - 1);
1540 #endif
1541  auto byte_offset = LL_BUILDER.CreateAdd(out_per_col_byte_idx,
1542  LL_INT(static_cast<int64_t>(col_off)));
1543  byte_offset->setName("out_byte_off_target_" + std::to_string(target_idx));
1544  auto output_ptr = LL_BUILDER.CreateGEP(output_buffer_byte_stream, byte_offset);
1545  agg_col_ptr = LL_BUILDER.CreateBitCast(
1546  output_ptr,
1547  llvm::PointerType::get(get_int_type((chosen_bytes << 3), LL_CONTEXT), 0));
1548  agg_col_ptr->setName("out_ptr_target_" + std::to_string(target_idx));
1549  } else {
1550  uint32_t col_off = query_mem_desc.getColOffInBytes(agg_out_off);
1551  CHECK_EQ(size_t(0), col_off % chosen_bytes);
1552  col_off /= chosen_bytes;
1553  CHECK(std::get<1>(agg_out_ptr_w_idx));
1554  auto offset = LL_BUILDER.CreateAdd(std::get<1>(agg_out_ptr_w_idx), LL_INT(col_off));
1555  agg_col_ptr = LL_BUILDER.CreateGEP(
1556  LL_BUILDER.CreateBitCast(
1557  std::get<0>(agg_out_ptr_w_idx),
1558  llvm::PointerType::get(get_int_type((chosen_bytes << 3), LL_CONTEXT), 0)),
1559  offset);
1560  }
1561  } else {
1562  uint32_t col_off = query_mem_desc.getColOnlyOffInBytes(agg_out_off);
1563  CHECK_EQ(size_t(0), col_off % chosen_bytes);
1564  col_off /= chosen_bytes;
1565  agg_col_ptr = LL_BUILDER.CreateGEP(
1566  LL_BUILDER.CreateBitCast(
1567  std::get<0>(agg_out_ptr_w_idx),
1568  llvm::PointerType::get(get_int_type((chosen_bytes << 3), LL_CONTEXT), 0)),
1569  LL_INT(col_off));
1570  }
1571  CHECK(agg_col_ptr);
1572  return agg_col_ptr;
1573 }
#define CHECK_EQ(x, y)
Definition: Logger.h:219
#define LL_BUILDER
#define LL_CONTEXT
#define LL_INT(v)
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
std::string to_string(char const *&&v)
size_t getColOnlyOffInBytes(const size_t col_idx) const
#define AUTOMATIC_IR_METADATA(CGENSTATE)
QueryDescriptionType getQueryDescriptionType() const
#define CHECK(condition)
Definition: Logger.h:211
bool g_cluster
size_t getColOffInBytes(const size_t col_idx) const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void GroupByAndAggregate::codegenApproxQuantile ( const size_t  target_idx,
const Analyzer::Expr target_expr,
std::vector< llvm::Value * > &  agg_args,
const QueryMemoryDescriptor query_mem_desc,
const ExecutorDeviceType  device_type 
)
private

Definition at line 1699 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, executor_, g_bigint_count, SQLTypeInfo::get_notnull(), get_target_info(), Analyzer::Expr::get_type_info(), and GPU.

Referenced by TargetExprCodegen::codegenAggregate().

1704  {
1705  if (device_type == ExecutorDeviceType::GPU) {
1706  throw QueryMustRunOnCpu();
1707  }
1708  llvm::BasicBlock *calc, *skip;
1709  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1710  auto const arg_ti =
1711  static_cast<const Analyzer::AggExpr*>(target_expr)->get_arg()->get_type_info();
1712  bool const nullable = !arg_ti.get_notnull();
1713 
1714  auto* cs = executor_->cgen_state_.get();
1715  auto& irb = cs->ir_builder_;
1716  if (nullable) {
1717  auto* const null_value = cs->castToTypeIn(cs->inlineNull(arg_ti), 64);
1718  auto* const skip_cond = arg_ti.is_fp()
1719  ? irb.CreateFCmpOEQ(agg_args.back(), null_value)
1720  : irb.CreateICmpEQ(agg_args.back(), null_value);
1721  calc = llvm::BasicBlock::Create(cs->context_, "calc_approx_quantile");
1722  skip = llvm::BasicBlock::Create(cs->context_, "skip_approx_quantile");
1723  irb.CreateCondBr(skip_cond, skip, calc);
1724  cs->current_func_->getBasicBlockList().push_back(calc);
1725  irb.SetInsertPoint(calc);
1726  }
1727  if (!arg_ti.is_fp()) {
1728  auto const agg_info = get_target_info(target_expr, g_bigint_count);
1729  agg_args.back() = executor_->castToFP(agg_args.back(), arg_ti, agg_info.sql_type);
1730  }
1731  cs->emitExternalCall(
1732  "agg_approx_quantile", llvm::Type::getVoidTy(cs->context_), agg_args);
1733  if (nullable) {
1734  irb.CreateBr(skip);
1735  cs->current_func_->getBasicBlockList().push_back(skip);
1736  irb.SetInsertPoint(skip);
1737  }
1738 }
TargetInfo get_target_info(const PointerType target_expr, const bool bigint_count)
Definition: TargetInfo.h:92
bool g_bigint_count
#define AUTOMATIC_IR_METADATA(CGENSTATE)
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:77
HOST DEVICE bool get_notnull() const
Definition: sqltypes.h:336

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void GroupByAndAggregate::codegenCountDistinct ( const size_t  target_idx,
const Analyzer::Expr target_expr,
std::vector< llvm::Value * > &  agg_args,
const QueryMemoryDescriptor query_mem_desc,
const ExecutorDeviceType  device_type 
)
private

Definition at line 1630 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, Bitmap, CHECK, CHECK_EQ, emitCall(), executor_, g_bigint_count, get_int_type(), get_target_info(), Analyzer::Expr::get_type_info(), getAdditionalLiteral(), QueryMemoryDescriptor::getCountDistinctDescriptor(), GPU, Invalid, kAPPROX_COUNT_DISTINCT, LL_CONTEXT, and LL_INT.

Referenced by TargetExprCodegen::codegenAggregate().

1635  {
1636  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1637  const auto agg_info = get_target_info(target_expr, g_bigint_count);
1638  const auto& arg_ti =
1639  static_cast<const Analyzer::AggExpr*>(target_expr)->get_arg()->get_type_info();
1640  if (arg_ti.is_fp()) {
1641  agg_args.back() = executor_->cgen_state_->ir_builder_.CreateBitCast(
1642  agg_args.back(), get_int_type(64, executor_->cgen_state_->context_));
1643  }
1644  const auto& count_distinct_descriptor =
1645  query_mem_desc.getCountDistinctDescriptor(target_idx);
1646  CHECK(count_distinct_descriptor.impl_type_ != CountDistinctImplType::Invalid);
1647  if (agg_info.agg_kind == kAPPROX_COUNT_DISTINCT) {
1648  CHECK(count_distinct_descriptor.impl_type_ == CountDistinctImplType::Bitmap);
1649  agg_args.push_back(LL_INT(int32_t(count_distinct_descriptor.bitmap_sz_bits)));
1650  if (device_type == ExecutorDeviceType::GPU) {
1651  const auto base_dev_addr = getAdditionalLiteral(-1);
1652  const auto base_host_addr = getAdditionalLiteral(-2);
1653  agg_args.push_back(base_dev_addr);
1654  agg_args.push_back(base_host_addr);
1655  emitCall("agg_approximate_count_distinct_gpu", agg_args);
1656  } else {
1657  emitCall("agg_approximate_count_distinct", agg_args);
1658  }
1659  return;
1660  }
1661  std::string agg_fname{"agg_count_distinct"};
1662  if (count_distinct_descriptor.impl_type_ == CountDistinctImplType::Bitmap) {
1663  agg_fname += "_bitmap";
1664  agg_args.push_back(LL_INT(static_cast<int64_t>(count_distinct_descriptor.min_val)));
1665  }
1666  if (agg_info.skip_null_val) {
1667  auto null_lv = executor_->cgen_state_->castToTypeIn(
1668  (arg_ti.is_fp()
1669  ? static_cast<llvm::Value*>(executor_->cgen_state_->inlineFpNull(arg_ti))
1670  : static_cast<llvm::Value*>(executor_->cgen_state_->inlineIntNull(arg_ti))),
1671  64);
1672  null_lv = executor_->cgen_state_->ir_builder_.CreateBitCast(
1673  null_lv, get_int_type(64, executor_->cgen_state_->context_));
1674  agg_fname += "_skip_val";
1675  agg_args.push_back(null_lv);
1676  }
1677  if (device_type == ExecutorDeviceType::GPU) {
1678  CHECK(count_distinct_descriptor.impl_type_ == CountDistinctImplType::Bitmap);
1679  agg_fname += "_gpu";
1680  const auto base_dev_addr = getAdditionalLiteral(-1);
1681  const auto base_host_addr = getAdditionalLiteral(-2);
1682  agg_args.push_back(base_dev_addr);
1683  agg_args.push_back(base_host_addr);
1684  agg_args.push_back(LL_INT(int64_t(count_distinct_descriptor.sub_bitmap_count)));
1685  CHECK_EQ(size_t(0),
1686  count_distinct_descriptor.bitmapPaddedSizeBytes() %
1687  count_distinct_descriptor.sub_bitmap_count);
1688  agg_args.push_back(LL_INT(int64_t(count_distinct_descriptor.bitmapPaddedSizeBytes() /
1689  count_distinct_descriptor.sub_bitmap_count)));
1690  }
1691  if (count_distinct_descriptor.impl_type_ == CountDistinctImplType::Bitmap) {
1692  emitCall(agg_fname, agg_args);
1693  } else {
1694  executor_->cgen_state_->emitExternalCall(
1695  agg_fname, llvm::Type::getVoidTy(LL_CONTEXT), agg_args);
1696  }
1697 }
#define CHECK_EQ(x, y)
Definition: Logger.h:219
llvm::Value * getAdditionalLiteral(const int32_t off)
#define LL_CONTEXT
TargetInfo get_target_info(const PointerType target_expr, const bool bigint_count)
Definition: TargetInfo.h:92
#define LL_INT(v)
llvm::Value * emitCall(const std::string &fname, const std::vector< llvm::Value * > &args)
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
bool g_bigint_count
#define AUTOMATIC_IR_METADATA(CGENSTATE)
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:77
const CountDistinctDescriptor & getCountDistinctDescriptor(const size_t idx) const
#define CHECK(condition)
Definition: Logger.h:211

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void GroupByAndAggregate::codegenEstimator ( std::stack< llvm::BasicBlock * > &  array_loops,
DiamondCodegen diamond_codegen,
const QueryMemoryDescriptor query_mem_desc,
const CompilationOptions co 
)
private

Definition at line 1575 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, CHECK, emitCall(), RelAlgExecutionUnit::estimator, executor_, get_int_type(), QueryMemoryDescriptor::getEffectiveKeyWidth(), LL_BUILDER, LL_CONTEXT, LL_INT, ra_exe_unit_, and ROW_FUNC.

Referenced by codegen().

1578  {
1579  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1580  const auto& estimator_arg = ra_exe_unit_.estimator->getArgument();
1581  auto estimator_comp_count_lv = LL_INT(static_cast<int32_t>(estimator_arg.size()));
1582  auto estimator_key_lv = LL_BUILDER.CreateAlloca(llvm::Type::getInt64Ty(LL_CONTEXT),
1583  estimator_comp_count_lv);
1584  int32_t subkey_idx = 0;
1585  for (const auto& estimator_arg_comp : estimator_arg) {
1586  const auto estimator_arg_comp_lvs =
1587  executor_->groupByColumnCodegen(estimator_arg_comp.get(),
1588  query_mem_desc.getEffectiveKeyWidth(),
1589  co,
1590  false,
1591  0,
1592  diamond_codegen,
1593  array_loops,
1594  true);
1595  CHECK(!estimator_arg_comp_lvs.original_value);
1596  const auto estimator_arg_comp_lv = estimator_arg_comp_lvs.translated_value;
1597  // store the sub-key to the buffer
1598  LL_BUILDER.CreateStore(estimator_arg_comp_lv,
1599  LL_BUILDER.CreateGEP(estimator_key_lv, LL_INT(subkey_idx++)));
1600  }
1601  const auto int8_ptr_ty = llvm::PointerType::get(get_int_type(8, LL_CONTEXT), 0);
1602  const auto bitmap = LL_BUILDER.CreateBitCast(&*ROW_FUNC->arg_begin(), int8_ptr_ty);
1603  const auto key_bytes = LL_BUILDER.CreateBitCast(estimator_key_lv, int8_ptr_ty);
1604  const auto estimator_comp_bytes_lv =
1605  LL_INT(static_cast<int32_t>(estimator_arg.size() * sizeof(int64_t)));
1606  const auto bitmap_size_lv =
1607  LL_INT(static_cast<uint32_t>(ra_exe_unit_.estimator->getBufferSize()));
1608  emitCall(ra_exe_unit_.estimator->getRuntimeFunctionName(),
1609  {bitmap, &*bitmap_size_lv, key_bytes, &*estimator_comp_bytes_lv});
1610 }
#define ROW_FUNC
#define LL_BUILDER
#define LL_CONTEXT
#define LL_INT(v)
llvm::Value * emitCall(const std::string &fname, const std::vector< llvm::Value * > &args)
size_t getEffectiveKeyWidth() const
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
const std::shared_ptr< Analyzer::Estimator > estimator
#define AUTOMATIC_IR_METADATA(CGENSTATE)
#define CHECK(condition)
Definition: Logger.h:211
const RelAlgExecutionUnit & ra_exe_unit_

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::tuple< llvm::Value *, llvm::Value * > GroupByAndAggregate::codegenGroupBy ( const QueryMemoryDescriptor query_mem_desc,
const CompilationOptions co,
DiamondCodegen codegen 
)
private

Definition at line 1046 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, CHECK, CHECK_EQ, codegenMultiColumnBaselineHash(), codegenMultiColumnPerfectHash(), codegenOutputSlot(), codegenSingleColumnPerfectHash(), QueryMemoryDescriptor::didOutputColumnar(), executor_, anonymous_namespace{GroupByAndAggregate.cpp}::get_expr_range_info(), QueryMemoryDescriptor::getBucket(), QueryMemoryDescriptor::getEffectiveKeyWidth(), QueryMemoryDescriptor::getGroupbyColCount(), QueryMemoryDescriptor::getMaxVal(), QueryMemoryDescriptor::getQueryDescriptionType(), QueryMemoryDescriptor::getRowSize(), RelAlgExecutionUnit::groupby_exprs, GroupByBaselineHash, GroupByPerfectHash, QueryMemoryDescriptor::hasNulls(), QueryMemoryDescriptor::isSingleColumnGroupByWithPerfectHash(), LL_BUILDER, LL_CONTEXT, LL_INT, Projection, query_infos_, ra_exe_unit_, ROW_FUNC, and QueryMemoryDescriptor::threadsShareMemory().

Referenced by codegen().

1049  {
1050  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1051  auto arg_it = ROW_FUNC->arg_begin();
1052  auto groups_buffer = arg_it++;
1053 
1054  std::stack<llvm::BasicBlock*> array_loops;
1055 
1056  // TODO(Saman): move this logic outside of this function.
1058  if (query_mem_desc.didOutputColumnar()) {
1059  return std::make_tuple(
1060  &*groups_buffer,
1061  codegenOutputSlot(&*groups_buffer, query_mem_desc, co, diamond_codegen));
1062  } else {
1063  return std::make_tuple(
1064  codegenOutputSlot(&*groups_buffer, query_mem_desc, co, diamond_codegen),
1065  nullptr);
1066  }
1067  }
1068 
1069  CHECK(query_mem_desc.getQueryDescriptionType() ==
1071  query_mem_desc.getQueryDescriptionType() ==
1073 
1074  const int32_t row_size_quad = query_mem_desc.didOutputColumnar()
1075  ? 0
1076  : query_mem_desc.getRowSize() / sizeof(int64_t);
1077 
1078  const auto col_width_size = query_mem_desc.isSingleColumnGroupByWithPerfectHash()
1079  ? sizeof(int64_t)
1080  : query_mem_desc.getEffectiveKeyWidth();
1081  // for multi-column group by
1082  llvm::Value* group_key = nullptr;
1083  llvm::Value* key_size_lv = nullptr;
1084 
1085  if (!query_mem_desc.isSingleColumnGroupByWithPerfectHash()) {
1086  key_size_lv = LL_INT(static_cast<int32_t>(query_mem_desc.getGroupbyColCount()));
1087  if (query_mem_desc.getQueryDescriptionType() ==
1089  group_key =
1090  LL_BUILDER.CreateAlloca(llvm::Type::getInt64Ty(LL_CONTEXT), key_size_lv);
1091  } else if (query_mem_desc.getQueryDescriptionType() ==
1093  group_key =
1094  col_width_size == sizeof(int32_t)
1095  ? LL_BUILDER.CreateAlloca(llvm::Type::getInt32Ty(LL_CONTEXT), key_size_lv)
1096  : LL_BUILDER.CreateAlloca(llvm::Type::getInt64Ty(LL_CONTEXT), key_size_lv);
1097  }
1098  CHECK(group_key);
1099  CHECK(key_size_lv);
1100  }
1101 
1102  int32_t subkey_idx = 0;
1103  CHECK(query_mem_desc.getGroupbyColCount() == ra_exe_unit_.groupby_exprs.size());
1104  for (const auto& group_expr : ra_exe_unit_.groupby_exprs) {
1105  const auto col_range_info =
1107  const auto translated_null_value = static_cast<int64_t>(
1108  query_mem_desc.isSingleColumnGroupByWithPerfectHash()
1109  ? checked_int64_t(query_mem_desc.getMaxVal()) +
1110  (query_mem_desc.getBucket() ? query_mem_desc.getBucket() : 1)
1111  : checked_int64_t(col_range_info.max) +
1112  (col_range_info.bucket ? col_range_info.bucket : 1));
1113 
1114  const bool col_has_nulls =
1115  query_mem_desc.getQueryDescriptionType() ==
1117  ? (query_mem_desc.isSingleColumnGroupByWithPerfectHash()
1118  ? query_mem_desc.hasNulls()
1119  : col_range_info.has_nulls)
1120  : false;
1121 
1122  const auto group_expr_lvs =
1123  executor_->groupByColumnCodegen(group_expr.get(),
1124  col_width_size,
1125  co,
1126  col_has_nulls,
1127  translated_null_value,
1128  diamond_codegen,
1129  array_loops,
1130  query_mem_desc.threadsShareMemory());
1131  const auto group_expr_lv = group_expr_lvs.translated_value;
1132  if (query_mem_desc.isSingleColumnGroupByWithPerfectHash()) {
1133  CHECK_EQ(size_t(1), ra_exe_unit_.groupby_exprs.size());
1134  return codegenSingleColumnPerfectHash(query_mem_desc,
1135  co,
1136  &*groups_buffer,
1137  group_expr_lv,
1138  group_expr_lvs.original_value,
1139  row_size_quad);
1140  } else {
1141  // store the sub-key to the buffer
1142  LL_BUILDER.CreateStore(group_expr_lv,
1143  LL_BUILDER.CreateGEP(group_key, LL_INT(subkey_idx++)));
1144  }
1145  }
1146  if (query_mem_desc.getQueryDescriptionType() ==
1148  CHECK(ra_exe_unit_.groupby_exprs.size() != 1);
1150  &*groups_buffer, group_key, key_size_lv, query_mem_desc, row_size_quad);
1151  } else if (query_mem_desc.getQueryDescriptionType() ==
1154  &*groups_buffer,
1155  group_key,
1156  key_size_lv,
1157  query_mem_desc,
1158  col_width_size,
1159  row_size_quad);
1160  }
1161  CHECK(false);
1162  return std::make_tuple(nullptr, nullptr);
1163 }
#define CHECK_EQ(x, y)
Definition: Logger.h:219
#define ROW_FUNC
ColRangeInfo get_expr_range_info(const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, const Analyzer::Expr *expr, Executor *executor)
#define LL_BUILDER
boost::multiprecision::number< boost::multiprecision::cpp_int_backend< 64, 64, boost::multiprecision::signed_magnitude, boost::multiprecision::checked, void >> checked_int64_t
#define LL_CONTEXT
#define LL_INT(v)
size_t getEffectiveKeyWidth() const
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
std::tuple< llvm::Value *, llvm::Value * > codegenMultiColumnBaselineHash(const CompilationOptions &co, llvm::Value *groups_buffer, llvm::Value *group_key, llvm::Value *key_size_lv, const QueryMemoryDescriptor &query_mem_desc, const size_t key_width, const int32_t row_size_quad)
size_t getGroupbyColCount() const
std::tuple< llvm::Value *, llvm::Value * > codegenSingleColumnPerfectHash(const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, llvm::Value *groups_buffer, llvm::Value *group_expr_lv_translated, llvm::Value *group_expr_lv_original, const int32_t row_size_quad)
#define AUTOMATIC_IR_METADATA(CGENSTATE)
std::tuple< llvm::Value *, llvm::Value * > codegenMultiColumnPerfectHash(llvm::Value *groups_buffer, llvm::Value *group_key, llvm::Value *key_size_lv, const QueryMemoryDescriptor &query_mem_desc, const int32_t row_size_quad)
QueryDescriptionType getQueryDescriptionType() const
const std::vector< InputTableInfo > & query_infos_
bool isSingleColumnGroupByWithPerfectHash() const
llvm::Value * codegenOutputSlot(llvm::Value *groups_buffer, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, DiamondCodegen &diamond_codegen)
#define CHECK(condition)
Definition: Logger.h:211
const RelAlgExecutionUnit & ra_exe_unit_

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::tuple< llvm::Value *, llvm::Value * > GroupByAndAggregate::codegenMultiColumnBaselineHash ( const CompilationOptions co,
llvm::Value *  groups_buffer,
llvm::Value *  group_key,
llvm::Value *  key_size_lv,
const QueryMemoryDescriptor query_mem_desc,
const size_t  key_width,
const int32_t  row_size_quad 
)
private

Definition at line 1274 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, CHECK, QueryMemoryDescriptor::didOutputColumnar(), emitCall(), executor_, QueryMemoryDescriptor::getEntryCount(), LL_BUILDER, LL_CONTEXT, LL_INT, and CompilationOptions::with_dynamic_watchdog.

Referenced by codegenGroupBy().

1281  {
1282  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1283  if (group_key->getType() != llvm::Type::getInt64PtrTy(LL_CONTEXT)) {
1284  CHECK(key_width == sizeof(int32_t));
1285  group_key =
1286  LL_BUILDER.CreatePointerCast(group_key, llvm::Type::getInt64PtrTy(LL_CONTEXT));
1287  }
1288  std::vector<llvm::Value*> func_args{
1289  groups_buffer,
1290  LL_INT(static_cast<int32_t>(query_mem_desc.getEntryCount())),
1291  &*group_key,
1292  &*key_size_lv,
1293  LL_INT(static_cast<int32_t>(key_width))};
1294  std::string func_name{"get_group_value"};
1295  if (query_mem_desc.didOutputColumnar()) {
1296  func_name += "_columnar_slot";
1297  } else {
1298  func_args.push_back(LL_INT(row_size_quad));
1299  }
1300  if (co.with_dynamic_watchdog) {
1301  func_name += "_with_watchdog";
1302  }
1303  if (query_mem_desc.didOutputColumnar()) {
1304  return std::make_tuple(groups_buffer, emitCall(func_name, func_args));
1305  } else {
1306  return std::make_tuple(emitCall(func_name, func_args), nullptr);
1307  }
1308 }
#define LL_BUILDER
#define LL_CONTEXT
#define LL_INT(v)
llvm::Value * emitCall(const std::string &fname, const std::vector< llvm::Value * > &args)
#define AUTOMATIC_IR_METADATA(CGENSTATE)
#define CHECK(condition)
Definition: Logger.h:211

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::tuple< llvm::Value *, llvm::Value * > GroupByAndAggregate::codegenMultiColumnPerfectHash ( llvm::Value *  groups_buffer,
llvm::Value *  group_key,
llvm::Value *  key_size_lv,
const QueryMemoryDescriptor query_mem_desc,
const int32_t  row_size_quad 
)
private

Definition at line 1230 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, CHECK, codegenPerfectHashFunction(), QueryMemoryDescriptor::didOutputColumnar(), emitCall(), executor_, get_int_type(), QueryMemoryDescriptor::getEntryCount(), QueryMemoryDescriptor::getQueryDescriptionType(), GroupByPerfectHash, QueryMemoryDescriptor::hasKeylessHash(), LL_BUILDER, LL_CONTEXT, and LL_INT.

Referenced by codegenGroupBy().

1235  {
1236  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1237  CHECK(query_mem_desc.getQueryDescriptionType() ==
1239  // compute the index (perfect hash)
1240  auto perfect_hash_func = codegenPerfectHashFunction();
1241  auto hash_lv =
1242  LL_BUILDER.CreateCall(perfect_hash_func, std::vector<llvm::Value*>{group_key});
1243 
1244  if (query_mem_desc.didOutputColumnar()) {
1245  if (!query_mem_desc.hasKeylessHash()) {
1246  const std::string set_matching_func_name{
1247  "set_matching_group_value_perfect_hash_columnar"};
1248  const std::vector<llvm::Value*> set_matching_func_arg{
1249  groups_buffer,
1250  hash_lv,
1251  group_key,
1252  key_size_lv,
1253  llvm::ConstantInt::get(get_int_type(32, LL_CONTEXT),
1254  query_mem_desc.getEntryCount())};
1255  emitCall(set_matching_func_name, set_matching_func_arg);
1256  }
1257  return std::make_tuple(groups_buffer, hash_lv);
1258  } else {
1259  if (query_mem_desc.hasKeylessHash()) {
1260  return std::make_tuple(emitCall("get_matching_group_value_perfect_hash_keyless",
1261  {groups_buffer, hash_lv, LL_INT(row_size_quad)}),
1262  nullptr);
1263  } else {
1264  return std::make_tuple(
1265  emitCall(
1266  "get_matching_group_value_perfect_hash",
1267  {groups_buffer, hash_lv, group_key, key_size_lv, LL_INT(row_size_quad)}),
1268  nullptr);
1269  }
1270  }
1271 }
#define LL_BUILDER
#define LL_CONTEXT
#define LL_INT(v)
llvm::Value * emitCall(const std::string &fname, const std::vector< llvm::Value * > &args)
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
#define AUTOMATIC_IR_METADATA(CGENSTATE)
QueryDescriptionType getQueryDescriptionType() const
llvm::Function * codegenPerfectHashFunction()
#define CHECK(condition)
Definition: Logger.h:211

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

llvm::Value * GroupByAndAggregate::codegenOutputSlot ( llvm::Value *  groups_buffer,
const QueryMemoryDescriptor query_mem_desc,
const CompilationOptions co,
DiamondCodegen diamond_codegen 
)
private

Definition at line 961 of file GroupByAndAggregate.cpp.

References run_benchmark_import::args, AUTOMATIC_IR_METADATA, CHECK, CHECK_EQ, CHECK_GE, CHECK_LT, CodeGenerator::codegen(), QueryMemoryDescriptor::didOutputColumnar(), emitCall(), executor_, get_arg_by_name(), get_heap_key_slot_index(), QueryMemoryDescriptor::getColOffInBytes(), QueryMemoryDescriptor::getPaddedSlotWidthBytes(), QueryMemoryDescriptor::getQueryDescriptionType(), QueryMemoryDescriptor::getRowSize(), RelAlgExecutionUnit::groupby_exprs, inline_fp_null_val(), inline_int_null_val(), SortInfo::limit, LL_BOOL, LL_BUILDER, LL_FP, LL_INT, anonymous_namespace{Utm.h}::n, SortInfo::offset, SortInfo::order_entries, CodeGenerator::posArg(), Projection, ra_exe_unit_, ROW_FUNC, RelAlgExecutionUnit::sort_info, RelAlgExecutionUnit::target_exprs, to_string(), and QueryMemoryDescriptor::useStreamingTopN().

Referenced by codegenGroupBy(), and codegenWindowRowPointer().

965  {
966  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
968  CHECK_EQ(size_t(1), ra_exe_unit_.groupby_exprs.size());
969  const auto group_expr = ra_exe_unit_.groupby_exprs.front();
970  CHECK(!group_expr);
971  if (!query_mem_desc.didOutputColumnar()) {
972  CHECK_EQ(size_t(0), query_mem_desc.getRowSize() % sizeof(int64_t));
973  }
974  const int32_t row_size_quad = query_mem_desc.didOutputColumnar()
975  ? 0
976  : query_mem_desc.getRowSize() / sizeof(int64_t);
977  CodeGenerator code_generator(executor_);
978  if (query_mem_desc.useStreamingTopN()) {
979  const auto& only_order_entry = ra_exe_unit_.sort_info.order_entries.front();
980  CHECK_GE(only_order_entry.tle_no, int(1));
981  const size_t target_idx = only_order_entry.tle_no - 1;
982  CHECK_LT(target_idx, ra_exe_unit_.target_exprs.size());
983  const auto order_entry_expr = ra_exe_unit_.target_exprs[target_idx];
984  const auto chosen_bytes =
985  static_cast<size_t>(query_mem_desc.getPaddedSlotWidthBytes(target_idx));
986  auto order_entry_lv = executor_->cgen_state_->castToTypeIn(
987  code_generator.codegen(order_entry_expr, true, co).front(), chosen_bytes * 8);
989  std::string fname = "get_bin_from_k_heap";
990  const auto& oe_ti = order_entry_expr->get_type_info();
991  llvm::Value* null_key_lv = nullptr;
992  if (oe_ti.is_integer() || oe_ti.is_decimal() || oe_ti.is_time()) {
993  const size_t bit_width = order_entry_lv->getType()->getIntegerBitWidth();
994  switch (bit_width) {
995  case 32:
996  null_key_lv = LL_INT(static_cast<int32_t>(inline_int_null_val(oe_ti)));
997  break;
998  case 64:
999  null_key_lv = LL_INT(static_cast<int64_t>(inline_int_null_val(oe_ti)));
1000  break;
1001  default:
1002  CHECK(false);
1003  }
1004  fname += "_int" + std::to_string(bit_width) + "_t";
1005  } else {
1006  CHECK(oe_ti.is_fp());
1007  if (order_entry_lv->getType()->isDoubleTy()) {
1008  null_key_lv = LL_FP(static_cast<double>(inline_fp_null_val(oe_ti)));
1009  } else {
1010  null_key_lv = LL_FP(static_cast<float>(inline_fp_null_val(oe_ti)));
1011  }
1012  fname += order_entry_lv->getType()->isDoubleTy() ? "_double" : "_float";
1013  }
1014  const auto key_slot_idx =
1016  return emitCall(
1017  fname,
1018  {groups_buffer,
1019  LL_INT(n),
1020  LL_INT(row_size_quad),
1021  LL_INT(static_cast<uint32_t>(query_mem_desc.getColOffInBytes(key_slot_idx))),
1022  LL_BOOL(only_order_entry.is_desc),
1023  LL_BOOL(!order_entry_expr->get_type_info().get_notnull()),
1024  LL_BOOL(only_order_entry.nulls_first),
1025  null_key_lv,
1026  order_entry_lv});
1027  } else {
1028  const auto output_buffer_entry_count_lv =
1029  LL_BUILDER.CreateLoad(get_arg_by_name(ROW_FUNC, "max_matched"));
1030  const auto group_expr_lv =
1031  LL_BUILDER.CreateLoad(get_arg_by_name(ROW_FUNC, "old_total_matched"));
1032  std::vector<llvm::Value*> args{groups_buffer,
1033  output_buffer_entry_count_lv,
1034  group_expr_lv,
1035  code_generator.posArg(nullptr)};
1036  if (query_mem_desc.didOutputColumnar()) {
1037  const auto columnar_output_offset =
1038  emitCall("get_columnar_scan_output_offset", args);
1039  return columnar_output_offset;
1040  }
1041  args.push_back(LL_INT(row_size_quad));
1042  return emitCall("get_scan_output_slot", args);
1043  }
1044 }
std::vector< Analyzer::Expr * > target_exprs
#define CHECK_EQ(x, y)
Definition: Logger.h:219
#define ROW_FUNC
#define LL_BUILDER
const std::list< Analyzer::OrderEntry > order_entries
#define LL_INT(v)
#define CHECK_GE(x, y)
Definition: Logger.h:224
llvm::Value * emitCall(const std::string &fname, const std::vector< llvm::Value * > &args)
size_t get_heap_key_slot_index(const std::vector< Analyzer::Expr * > &target_exprs, const size_t target_idx)
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
double inline_fp_null_val(const SQL_TYPE_INFO &ti)
std::string to_string(char const *&&v)
#define LL_BOOL(v)
const size_t limit
llvm::Value * get_arg_by_name(llvm::Function *func, const std::string &name)
Definition: Execute.h:166
#define LL_FP(v)
const int8_t getPaddedSlotWidthBytes(const size_t slot_idx) const
#define AUTOMATIC_IR_METADATA(CGENSTATE)
QueryDescriptionType getQueryDescriptionType() const
#define CHECK_LT(x, y)
Definition: Logger.h:221
#define CHECK(condition)
Definition: Logger.h:211
int64_t inline_int_null_val(const SQL_TYPE_INFO &ti)
constexpr double n
Definition: Utm.h:39
const RelAlgExecutionUnit & ra_exe_unit_
const size_t offset
size_t getColOffInBytes(const size_t col_idx) const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

llvm::Function * GroupByAndAggregate::codegenPerfectHashFunction ( )
private

Definition at line 1310 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, CHECK, CHECK_GT, executor_, anonymous_namespace{GroupByAndAggregate.cpp}::get_expr_range_info(), get_int_type(), getBucketedCardinality(), RelAlgExecutionUnit::groupby_exprs, GroupByPerfectHash, LL_CONTEXT, LL_INT, mark_function_always_inline(), query_infos_, and ra_exe_unit_.

Referenced by codegenMultiColumnPerfectHash().

1310  {
1311  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1312  CHECK_GT(ra_exe_unit_.groupby_exprs.size(), size_t(1));
1313  auto ft = llvm::FunctionType::get(
1314  get_int_type(32, LL_CONTEXT),
1315  std::vector<llvm::Type*>{llvm::PointerType::get(get_int_type(64, LL_CONTEXT), 0)},
1316  false);
1317  auto key_hash_func = llvm::Function::Create(ft,
1318  llvm::Function::ExternalLinkage,
1319  "perfect_key_hash",
1320  executor_->cgen_state_->module_);
1321  executor_->cgen_state_->helper_functions_.push_back(key_hash_func);
1322  mark_function_always_inline(key_hash_func);
1323  auto& key_buff_arg = *key_hash_func->args().begin();
1324  llvm::Value* key_buff_lv = &key_buff_arg;
1325  auto bb = llvm::BasicBlock::Create(LL_CONTEXT, "entry", key_hash_func);
1326  llvm::IRBuilder<> key_hash_func_builder(bb);
1327  llvm::Value* hash_lv{llvm::ConstantInt::get(get_int_type(64, LL_CONTEXT), 0)};
1328  std::vector<int64_t> cardinalities;
1329  for (const auto& groupby_expr : ra_exe_unit_.groupby_exprs) {
1330  auto col_range_info =
1331  get_expr_range_info(ra_exe_unit_, query_infos_, groupby_expr.get(), executor_);
1332  CHECK(col_range_info.hash_type_ == QueryDescriptionType::GroupByPerfectHash);
1333  cardinalities.push_back(getBucketedCardinality(col_range_info));
1334  }
1335  size_t dim_idx = 0;
1336  for (const auto& groupby_expr : ra_exe_unit_.groupby_exprs) {
1337  auto key_comp_lv = key_hash_func_builder.CreateLoad(
1338  key_hash_func_builder.CreateGEP(key_buff_lv, LL_INT(dim_idx)));
1339  auto col_range_info =
1340  get_expr_range_info(ra_exe_unit_, query_infos_, groupby_expr.get(), executor_);
1341  auto crt_term_lv =
1342  key_hash_func_builder.CreateSub(key_comp_lv, LL_INT(col_range_info.min));
1343  if (col_range_info.bucket) {
1344  crt_term_lv =
1345  key_hash_func_builder.CreateSDiv(crt_term_lv, LL_INT(col_range_info.bucket));
1346  }
1347  for (size_t prev_dim_idx = 0; prev_dim_idx < dim_idx; ++prev_dim_idx) {
1348  crt_term_lv = key_hash_func_builder.CreateMul(crt_term_lv,
1349  LL_INT(cardinalities[prev_dim_idx]));
1350  }
1351  hash_lv = key_hash_func_builder.CreateAdd(hash_lv, crt_term_lv);
1352  ++dim_idx;
1353  }
1354  key_hash_func_builder.CreateRet(
1355  key_hash_func_builder.CreateTrunc(hash_lv, get_int_type(32, LL_CONTEXT)));
1356  return key_hash_func;
1357 }
static int64_t getBucketedCardinality(const ColRangeInfo &col_range_info)
ColRangeInfo get_expr_range_info(const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, const Analyzer::Expr *expr, Executor *executor)
#define LL_CONTEXT
void mark_function_always_inline(llvm::Function *func)
#define LL_INT(v)
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
#define CHECK_GT(x, y)
Definition: Logger.h:223
#define AUTOMATIC_IR_METADATA(CGENSTATE)
const std::vector< InputTableInfo > & query_infos_
#define CHECK(condition)
Definition: Logger.h:211
const RelAlgExecutionUnit & ra_exe_unit_

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::tuple< llvm::Value *, llvm::Value * > GroupByAndAggregate::codegenSingleColumnPerfectHash ( const QueryMemoryDescriptor query_mem_desc,
const CompilationOptions co,
llvm::Value *  groups_buffer,
llvm::Value *  group_expr_lv_translated,
llvm::Value *  group_expr_lv_original,
const int32_t  row_size_quad 
)
private

Definition at line 1180 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, CHECK, CompilationOptions::device_type, QueryMemoryDescriptor::didOutputColumnar(), emitCall(), executor_, QueryMemoryDescriptor::getBucket(), QueryMemoryDescriptor::getMinVal(), QueryMemoryDescriptor::hasKeylessHash(), QueryMemoryDescriptor::interleavedBins(), LL_INT, QueryMemoryDescriptor::mustUseBaselineSort(), and QueryMemoryDescriptor::usesGetGroupValueFast().

Referenced by codegenGroupBy().

1186  {
1187  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1188  CHECK(query_mem_desc.usesGetGroupValueFast());
1189  std::string get_group_fn_name{query_mem_desc.didOutputColumnar()
1190  ? "get_columnar_group_bin_offset"
1191  : "get_group_value_fast"};
1192  if (!query_mem_desc.didOutputColumnar() && query_mem_desc.hasKeylessHash()) {
1193  get_group_fn_name += "_keyless";
1194  }
1195  if (query_mem_desc.interleavedBins(co.device_type)) {
1196  CHECK(!query_mem_desc.didOutputColumnar());
1197  CHECK(query_mem_desc.hasKeylessHash());
1198  get_group_fn_name += "_semiprivate";
1199  }
1200  std::vector<llvm::Value*> get_group_fn_args{&*groups_buffer,
1201  &*group_expr_lv_translated};
1202  if (group_expr_lv_original && get_group_fn_name == "get_group_value_fast" &&
1203  query_mem_desc.mustUseBaselineSort()) {
1204  get_group_fn_name += "_with_original_key";
1205  get_group_fn_args.push_back(group_expr_lv_original);
1206  }
1207  get_group_fn_args.push_back(LL_INT(query_mem_desc.getMinVal()));
1208  get_group_fn_args.push_back(LL_INT(query_mem_desc.getBucket()));
1209  if (!query_mem_desc.hasKeylessHash()) {
1210  if (!query_mem_desc.didOutputColumnar()) {
1211  get_group_fn_args.push_back(LL_INT(row_size_quad));
1212  }
1213  } else {
1214  if (!query_mem_desc.didOutputColumnar()) {
1215  get_group_fn_args.push_back(LL_INT(row_size_quad));
1216  }
1217  if (query_mem_desc.interleavedBins(co.device_type)) {
1218  auto warp_idx = emitCall("thread_warp_idx", {LL_INT(executor_->warpSize())});
1219  get_group_fn_args.push_back(warp_idx);
1220  get_group_fn_args.push_back(LL_INT(executor_->warpSize()));
1221  }
1222  }
1223  if (get_group_fn_name == "get_columnar_group_bin_offset") {
1224  return std::make_tuple(&*groups_buffer,
1225  emitCall(get_group_fn_name, get_group_fn_args));
1226  }
1227  return std::make_tuple(emitCall(get_group_fn_name, get_group_fn_args), nullptr);
1228 }
#define LL_INT(v)
llvm::Value * emitCall(const std::string &fname, const std::vector< llvm::Value * > &args)
#define AUTOMATIC_IR_METADATA(CGENSTATE)
ExecutorDeviceType device_type
bool interleavedBins(const ExecutorDeviceType) const
#define CHECK(condition)
Definition: Logger.h:211

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

llvm::Value * GroupByAndAggregate::codegenVarlenOutputBuffer ( const QueryMemoryDescriptor query_mem_desc)
private

Definition at line 1165 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, CHECK, executor_, QueryMemoryDescriptor::hasVarlenOutput(), LL_CONTEXT, and ROW_FUNC.

Referenced by codegen().

1166  {
1167  if (!query_mem_desc.hasVarlenOutput()) {
1168  return nullptr;
1169  }
1170 
1171  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1172  auto arg_it = ROW_FUNC->arg_begin();
1173  arg_it++; /* groups_buffer */
1174  auto varlen_output_buffer = arg_it++;
1175  CHECK(varlen_output_buffer->getType() == llvm::Type::getInt64PtrTy(LL_CONTEXT));
1176  return varlen_output_buffer;
1177 }
#define ROW_FUNC
#define LL_CONTEXT
#define AUTOMATIC_IR_METADATA(CGENSTATE)
#define CHECK(condition)
Definition: Logger.h:211

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

llvm::Value * GroupByAndAggregate::codegenWindowRowPointer ( const Analyzer::WindowFunction window_func,
const QueryMemoryDescriptor query_mem_desc,
const CompilationOptions co,
DiamondCodegen diamond_codegen 
)
private

Definition at line 1410 of file GroupByAndAggregate.cpp.

References run_benchmark_import::args, AUTOMATIC_IR_METADATA, codegenOutputSlot(), CodeGenerator::codegenWindowPosition(), QueryMemoryDescriptor::didOutputColumnar(), emitCall(), executor_, get_int_type(), WindowProjectNodeContext::getActiveWindowFunctionContext(), QueryMemoryDescriptor::getEntryCount(), Analyzer::WindowFunction::getKind(), QueryMemoryDescriptor::getRowSize(), LL_BUILDER, LL_CONTEXT, LL_INT, CodeGenerator::posArg(), ROW_FUNC, and window_function_is_aggregate().

Referenced by TargetExprCodegen::codegen().

1414  {
1415  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1416  const auto window_func_context =
1418  if (window_func_context && window_function_is_aggregate(window_func->getKind())) {
1419  const int32_t row_size_quad = query_mem_desc.didOutputColumnar()
1420  ? 0
1421  : query_mem_desc.getRowSize() / sizeof(int64_t);
1422  auto arg_it = ROW_FUNC->arg_begin();
1423  auto groups_buffer = arg_it++;
1424  CodeGenerator code_generator(executor_);
1425  auto window_pos_lv = code_generator.codegenWindowPosition(
1426  window_func_context, code_generator.posArg(nullptr));
1427  const auto pos_in_window =
1428  LL_BUILDER.CreateTrunc(window_pos_lv, get_int_type(32, LL_CONTEXT));
1429  llvm::Value* entry_count_lv =
1430  LL_INT(static_cast<int32_t>(query_mem_desc.getEntryCount()));
1431  std::vector<llvm::Value*> args{
1432  &*groups_buffer, entry_count_lv, pos_in_window, code_generator.posArg(nullptr)};
1433  if (query_mem_desc.didOutputColumnar()) {
1434  const auto columnar_output_offset =
1435  emitCall("get_columnar_scan_output_offset", args);
1436  return LL_BUILDER.CreateSExt(columnar_output_offset, get_int_type(64, LL_CONTEXT));
1437  }
1438  args.push_back(LL_INT(row_size_quad));
1439  return emitCall("get_scan_output_slot", args);
1440  }
1441  auto arg_it = ROW_FUNC->arg_begin();
1442  auto groups_buffer = arg_it++;
1443  return codegenOutputSlot(&*groups_buffer, query_mem_desc, co, diamond_codegen);
1444 }
SqlWindowFunctionKind getKind() const
Definition: Analyzer.h:1607
#define ROW_FUNC
#define LL_BUILDER
#define LL_CONTEXT
#define LL_INT(v)
llvm::Value * emitCall(const std::string &fname, const std::vector< llvm::Value * > &args)
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
static WindowFunctionContext * getActiveWindowFunctionContext(Executor *executor)
#define AUTOMATIC_IR_METADATA(CGENSTATE)
bool window_function_is_aggregate(const SqlWindowFunctionKind kind)
Definition: WindowContext.h:42
llvm::Value * codegenOutputSlot(llvm::Value *groups_buffer, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, DiamondCodegen &diamond_codegen)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

llvm::Value * GroupByAndAggregate::convertNullIfAny ( const SQLTypeInfo arg_type,
const TargetInfo agg_info,
llvm::Value *  target 
)
private

Definition at line 1359 of file GroupByAndAggregate.cpp.

References TargetInfo::agg_kind, AUTOMATIC_IR_METADATA, CHECK, executor_, SQLTypeInfo::get_size(), SQLTypeInfo::is_fp(), kAPPROX_COUNT_DISTINCT, kCOUNT, LL_BUILDER, and TargetInfo::sql_type.

Referenced by TargetExprCodegen::codegenAggregate().

1361  {
1362  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1363  const auto& agg_type = agg_info.sql_type;
1364  const size_t chosen_bytes = agg_type.get_size();
1365 
1366  bool need_conversion{false};
1367  llvm::Value* arg_null{nullptr};
1368  llvm::Value* agg_null{nullptr};
1369  llvm::Value* target_to_cast{target};
1370  if (arg_type.is_fp()) {
1371  arg_null = executor_->cgen_state_->inlineFpNull(arg_type);
1372  if (agg_type.is_fp()) {
1373  agg_null = executor_->cgen_state_->inlineFpNull(agg_type);
1374  if (!static_cast<llvm::ConstantFP*>(arg_null)->isExactlyValue(
1375  static_cast<llvm::ConstantFP*>(agg_null)->getValueAPF())) {
1376  need_conversion = true;
1377  }
1378  } else {
1379  CHECK(agg_info.agg_kind == kCOUNT || agg_info.agg_kind == kAPPROX_COUNT_DISTINCT);
1380  return target;
1381  }
1382  } else {
1383  arg_null = executor_->cgen_state_->inlineIntNull(arg_type);
1384  if (agg_type.is_fp()) {
1385  agg_null = executor_->cgen_state_->inlineFpNull(agg_type);
1386  need_conversion = true;
1387  target_to_cast = executor_->castToFP(target, arg_type, agg_type);
1388  } else {
1389  agg_null = executor_->cgen_state_->inlineIntNull(agg_type);
1390  if ((static_cast<llvm::ConstantInt*>(arg_null)->getBitWidth() !=
1391  static_cast<llvm::ConstantInt*>(agg_null)->getBitWidth()) ||
1392  (static_cast<llvm::ConstantInt*>(arg_null)->getValue() !=
1393  static_cast<llvm::ConstantInt*>(agg_null)->getValue())) {
1394  need_conversion = true;
1395  }
1396  }
1397  }
1398  if (need_conversion) {
1399  auto cmp = arg_type.is_fp() ? LL_BUILDER.CreateFCmpOEQ(target, arg_null)
1400  : LL_BUILDER.CreateICmpEQ(target, arg_null);
1401  return LL_BUILDER.CreateSelect(
1402  cmp,
1403  agg_null,
1404  executor_->cgen_state_->castToTypeIn(target_to_cast, chosen_bytes << 3));
1405  } else {
1406  return target;
1407  }
1408 }
HOST DEVICE int get_size() const
Definition: sqltypes.h:339
#define LL_BUILDER
SQLTypeInfo sql_type
Definition: TargetInfo.h:51
bool is_fp() const
Definition: sqltypes.h:523
#define AUTOMATIC_IR_METADATA(CGENSTATE)
SQLAgg agg_kind
Definition: TargetInfo.h:50
Definition: sqldefs.h:76
#define CHECK(condition)
Definition: Logger.h:211

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

llvm::Value * GroupByAndAggregate::emitCall ( const std::string &  fname,
const std::vector< llvm::Value * > &  args 
)
private

Definition at line 1928 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, and executor_.

Referenced by TargetExprCodegen::codegen(), TargetExprCodegen::codegenAggregate(), codegenCountDistinct(), codegenEstimator(), codegenMultiColumnBaselineHash(), codegenMultiColumnPerfectHash(), codegenOutputSlot(), codegenSingleColumnPerfectHash(), and codegenWindowRowPointer().

1929  {
1930  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1931  return executor_->cgen_state_->emitCall(fname, args);
1932 }
#define AUTOMATIC_IR_METADATA(CGENSTATE)

+ Here is the caller graph for this function:

llvm::Value * GroupByAndAggregate::getAdditionalLiteral ( const int32_t  off)
private

Definition at line 1740 of file GroupByAndAggregate.cpp.

References CHECK_LT, get_arg_by_name(), get_int_type(), LL_BUILDER, LL_CONTEXT, LL_INT, and ROW_FUNC.

Referenced by codegenCountDistinct().

1740  {
1741  CHECK_LT(off, 0);
1742  const auto lit_buff_lv = get_arg_by_name(ROW_FUNC, "literals");
1743  return LL_BUILDER.CreateLoad(LL_BUILDER.CreateGEP(
1744  LL_BUILDER.CreateBitCast(lit_buff_lv,
1745  llvm::PointerType::get(get_int_type(64, LL_CONTEXT), 0)),
1746  LL_INT(off)));
1747 }
#define ROW_FUNC
#define LL_BUILDER
#define LL_CONTEXT
#define LL_INT(v)
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
llvm::Value * get_arg_by_name(llvm::Function *func, const std::string &name)
Definition: Execute.h:166
#define CHECK_LT(x, y)
Definition: Logger.h:221

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

int64_t GroupByAndAggregate::getBucketedCardinality ( const ColRangeInfo col_range_info)
staticprivate

Definition at line 298 of file GroupByAndAggregate.cpp.

References ColRangeInfo::bucket, ColRangeInfo::has_nulls, ColRangeInfo::max, and ColRangeInfo::min.

Referenced by codegenPerfectHashFunction(), and getColRangeInfo().

298  {
299  checked_int64_t crt_col_cardinality =
300  checked_int64_t(col_range_info.max) - checked_int64_t(col_range_info.min);
301  if (col_range_info.bucket) {
302  crt_col_cardinality /= col_range_info.bucket;
303  }
304  return static_cast<int64_t>(crt_col_cardinality +
305  (1 + (col_range_info.has_nulls ? 1 : 0)));
306 }
boost::multiprecision::number< boost::multiprecision::cpp_int_backend< 64, 64, boost::multiprecision::signed_magnitude, boost::multiprecision::checked, void >> checked_int64_t

+ Here is the caller graph for this function:

ColRangeInfo GroupByAndAggregate::getColRangeInfo ( )
private

Definition at line 175 of file GroupByAndAggregate.cpp.

References Executor::baseline_threshold, anonymous_namespace{GroupByAndAggregate.cpp}::cardinality_estimate_less_than_column_range(), CHECK, CHECK_GE, device_type_, executor_, anonymous_namespace{GroupByAndAggregate.cpp}::expr_is_rowid(), anonymous_namespace{GroupByAndAggregate.cpp}::get_expr_range_info(), getBucketedCardinality(), GPU, group_cardinality_estimation_, RelAlgExecutionUnit::groupby_exprs, GroupByBaselineHash, GroupByPerfectHash, anonymous_namespace{GroupByAndAggregate.cpp}::has_count_distinct(), anonymous_namespace{GroupByAndAggregate.cpp}::is_column_range_too_big_for_perfect_hash(), kENCODING_DICT, SortInfo::order_entries, RelAlgExecutionUnit::quals, query_infos_, ra_exe_unit_, RelAlgExecutionUnit::simple_quals, RelAlgExecutionUnit::sort_info, and RelAlgExecutionUnit::target_exprs.

Referenced by initQueryMemoryDescriptorImpl().

175  {
176  // Use baseline layout more eagerly on the GPU if the query uses count distinct,
177  // because our HyperLogLog implementation is 4x less memory efficient on GPU.
178  // Technically, this only applies to APPROX_COUNT_DISTINCT, but in practice we
179  // can expect this to be true anyway for grouped queries since the precise version
180  // uses significantly more memory.
181  const int64_t baseline_threshold =
186  if (ra_exe_unit_.groupby_exprs.size() != 1) {
187  try {
188  checked_int64_t cardinality{1};
189  bool has_nulls{false};
190  for (const auto& groupby_expr : ra_exe_unit_.groupby_exprs) {
191  auto col_range_info = get_expr_range_info(
192  ra_exe_unit_, query_infos_, groupby_expr.get(), executor_);
193  if (col_range_info.hash_type_ != QueryDescriptionType::GroupByPerfectHash) {
194  // going through baseline hash if a non-integer type is encountered
195  return {QueryDescriptionType::GroupByBaselineHash, 0, 0, 0, false};
196  }
197  auto crt_col_cardinality = getBucketedCardinality(col_range_info);
198  CHECK_GE(crt_col_cardinality, 0);
199  cardinality *= crt_col_cardinality;
200  if (col_range_info.has_nulls) {
201  has_nulls = true;
202  }
203  }
204  // For zero or high cardinalities, use baseline layout.
205  if (!cardinality || cardinality > baseline_threshold) {
206  return {QueryDescriptionType::GroupByBaselineHash, 0, 0, 0, false};
207  }
209  0,
210  int64_t(cardinality),
211  0,
212  has_nulls};
213  } catch (...) { // overflow when computing cardinality
214  return {QueryDescriptionType::GroupByBaselineHash, 0, 0, 0, false};
215  }
216  }
217  // For single column groupby on high timestamps, force baseline hash due to wide ranges
218  // we are likely to encounter when applying quals to the expression range
219  // TODO: consider allowing TIMESTAMP(9) (nanoseconds) with quals to use perfect hash if
220  // the range is small enough
221  if (ra_exe_unit_.groupby_exprs.front() &&
222  ra_exe_unit_.groupby_exprs.front()->get_type_info().is_high_precision_timestamp() &&
223  ra_exe_unit_.simple_quals.size() > 0) {
224  return {QueryDescriptionType::GroupByBaselineHash, 0, 0, 0, false};
225  }
226  const auto col_range_info = get_expr_range_info(
228  if (!ra_exe_unit_.groupby_exprs.front()) {
229  return col_range_info;
230  }
231  static const int64_t MAX_BUFFER_SIZE = 1 << 30;
232  const int64_t col_count =
234  int64_t max_entry_count = MAX_BUFFER_SIZE / (col_count * sizeof(int64_t));
236  max_entry_count = std::min(max_entry_count, baseline_threshold);
237  }
238  const auto& groupby_expr_ti = ra_exe_unit_.groupby_exprs.front()->get_type_info();
239  if (groupby_expr_ti.is_string() && !col_range_info.bucket) {
240  CHECK(groupby_expr_ti.get_compression() == kENCODING_DICT);
241 
242  const bool has_filters =
243  !ra_exe_unit_.quals.empty() || !ra_exe_unit_.simple_quals.empty();
244  if (has_filters &&
245  is_column_range_too_big_for_perfect_hash(col_range_info, max_entry_count)) {
246  // if filters are present, we can use the filter to narrow the cardinality of the
247  // group by in the case of ranges too big for perfect hash. Otherwise, we are better
248  // off attempting perfect hash (since we know the range will be made of
249  // monotonically increasing numbers from min to max for dictionary encoded strings)
250  // and failing later due to excessive memory use.
251  // Check the conditions where baseline hash can provide a performance increase and
252  // return baseline hash (potentially forcing an estimator query) as the range type.
253  // Otherwise, return col_range_info which will likely be perfect hash, though could
254  // be baseline from a previous call of this function prior to the estimator query.
255  if (!ra_exe_unit_.sort_info.order_entries.empty()) {
256  // TODO(adb): allow some sorts to pass through this block by centralizing sort
257  // algorithm decision making
259  is_column_range_too_big_for_perfect_hash(col_range_info, max_entry_count)) {
260  // always use baseline hash for column range too big for perfect hash with count
261  // distinct descriptors. We will need 8GB of CPU memory minimum for the perfect
262  // hash group by in this case.
264  col_range_info.min,
265  col_range_info.max,
266  0,
267  col_range_info.has_nulls};
268  } else {
269  // use original col range for sort
270  return col_range_info;
271  }
272  }
273  // if filters are present and the filtered range is less than the cardinality of
274  // the column, consider baseline hash
277  col_range_info)) {
279  col_range_info.min,
280  col_range_info.max,
281  0,
282  col_range_info.has_nulls};
283  }
284  }
285  } else if ((!expr_is_rowid(ra_exe_unit_.groupby_exprs.front().get(),
286  *executor_->catalog_)) &&
287  is_column_range_too_big_for_perfect_hash(col_range_info, max_entry_count) &&
288  !col_range_info.bucket) {
290  col_range_info.min,
291  col_range_info.max,
292  0,
293  col_range_info.has_nulls};
294  }
295  return col_range_info;
296 }
std::vector< Analyzer::Expr * > target_exprs
static int64_t getBucketedCardinality(const ColRangeInfo &col_range_info)
ColRangeInfo get_expr_range_info(const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, const Analyzer::Expr *expr, Executor *executor)
boost::multiprecision::number< boost::multiprecision::cpp_int_backend< 64, 64, boost::multiprecision::signed_magnitude, boost::multiprecision::checked, void >> checked_int64_t
bool is_column_range_too_big_for_perfect_hash(const ColRangeInfo &col_range_info, const int64_t max_entry_count)
bool expr_is_rowid(const Analyzer::Expr *expr, const Catalog_Namespace::Catalog &cat)
const std::list< Analyzer::OrderEntry > order_entries
static const size_t baseline_threshold
Definition: Execute.h:1093
#define CHECK_GE(x, y)
Definition: Logger.h:224
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
bool has_count_distinct(const RelAlgExecutionUnit &ra_exe_unit)
const std::vector< InputTableInfo > & query_infos_
const ExecutorDeviceType device_type_
bool cardinality_estimate_less_than_column_range(const int64_t cardinality_estimate, const ColRangeInfo &col_range_info)
const std::optional< int64_t > group_cardinality_estimation_
std::list< std::shared_ptr< Analyzer::Expr > > quals
#define CHECK(condition)
Definition: Logger.h:211
const RelAlgExecutionUnit & ra_exe_unit_
std::list< std::shared_ptr< Analyzer::Expr > > simple_quals

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

int64_t GroupByAndAggregate::getShardedTopBucket ( const ColRangeInfo col_range_info,
const size_t  shard_count 
) const
private

Definition at line 347 of file GroupByAndAggregate.cpp.

References ColRangeInfo::bucket, CHECK, CHECK_GT, device_type_, executor_, g_leaf_count, and GPU.

Referenced by initQueryMemoryDescriptorImpl().

348  {
349  size_t device_count{0};
351  device_count = executor_->cudaMgr()->getDeviceCount();
352  CHECK_GT(device_count, 0u);
353  }
354 
355  int64_t bucket{col_range_info.bucket};
356 
357  if (shard_count) {
358  CHECK(!col_range_info.bucket);
359  /*
360  when a node has fewer devices than shard count,
361  a) In a distributed setup, the minimum distance between two keys would be
362  device_count because shards are stored consecutively across the physical tables,
363  i.e if a shard column has values 0 to 9, and 3 shards on each leaf, then node 1
364  would have values: 0,1,2,6,7,8 and node 2 would have values: 3,4,5,9. If each leaf
365  node has only 1 device, in this case, all the keys from each node are loaded on
366  the device each.
367 
368  b) In a single node setup, the distance would be minimum of device_count or
369  difference of device_count - shard_count. For example: If a single node server
370  running on 3 devices a shard column has values 0 to 9 in a table with 4 shards,
371  device to fragment keys mapping would be: device 1 - 4,8,3,7 device 2 - 1,5,9
372  device 3 - 2, 6 The bucket value would be 4(shards) - 3(devices) = 1 i.e. minimum
373  of device_count or difference.
374 
375  When a node has device count equal to or more than shard count then the
376  minimum distance is always at least shard_count * no of leaf nodes.
377  */
378  if (device_count < shard_count) {
379  bucket = g_leaf_count ? std::max(device_count, static_cast<size_t>(1))
380  : std::min(device_count, shard_count - device_count);
381  } else {
382  bucket = shard_count * std::max(g_leaf_count, static_cast<size_t>(1));
383  }
384  }
385 
386  return bucket;
387 }
#define CHECK_GT(x, y)
Definition: Logger.h:223
const ExecutorDeviceType device_type_
#define CHECK(condition)
Definition: Logger.h:211
size_t g_leaf_count
Definition: ParserNode.cpp:78

+ Here is the caller graph for this function:

bool GroupByAndAggregate::gpuCanHandleOrderEntries ( const std::list< Analyzer::OrderEntry > &  order_entries)
private

Definition at line 781 of file GroupByAndAggregate.cpp.

References CHECK, CHECK_GE, CHECK_LE, executor_, Analyzer::AggExpr::get_arg(), anonymous_namespace{GroupByAndAggregate.cpp}::get_expr_range_info(), Analyzer::Expr::get_type_info(), GroupByPerfectHash, kAPPROX_COUNT_DISTINCT, kAVG, kMAX, kMIN, query_infos_, ra_exe_unit_, and RelAlgExecutionUnit::target_exprs.

Referenced by initQueryMemoryDescriptor().

782  {
783  if (order_entries.size() > 1) { // TODO(alex): lift this restriction
784  return false;
785  }
786  for (const auto& order_entry : order_entries) {
787  CHECK_GE(order_entry.tle_no, 1);
788  CHECK_LE(static_cast<size_t>(order_entry.tle_no), ra_exe_unit_.target_exprs.size());
789  const auto target_expr = ra_exe_unit_.target_exprs[order_entry.tle_no - 1];
790  if (!dynamic_cast<Analyzer::AggExpr*>(target_expr)) {
791  return false;
792  }
793  // TODO(alex): relax the restrictions
794  auto agg_expr = static_cast<Analyzer::AggExpr*>(target_expr);
795  if (agg_expr->get_is_distinct() || agg_expr->get_aggtype() == kAVG ||
796  agg_expr->get_aggtype() == kMIN || agg_expr->get_aggtype() == kMAX ||
797  agg_expr->get_aggtype() == kAPPROX_COUNT_DISTINCT) {
798  return false;
799  }
800  if (agg_expr->get_arg()) {
801  const auto& arg_ti = agg_expr->get_arg()->get_type_info();
802  if (arg_ti.is_fp()) {
803  return false;
804  }
805  auto expr_range_info =
806  get_expr_range_info(ra_exe_unit_, query_infos_, agg_expr->get_arg(), executor_);
807  // TOD(adb): QMD not actually initialized here?
808  if ((!(expr_range_info.hash_type_ == QueryDescriptionType::GroupByPerfectHash &&
809  /* query_mem_desc.getGroupbyColCount() == 1 */ false) ||
810  expr_range_info.has_nulls) &&
811  order_entry.is_desc == order_entry.nulls_first) {
812  return false;
813  }
814  }
815  const auto& target_ti = target_expr->get_type_info();
816  CHECK(!target_ti.is_buffer());
817  if (!target_ti.is_integer()) {
818  return false;
819  }
820  }
821  return true;
822 }
std::vector< Analyzer::Expr * > target_exprs
ColRangeInfo get_expr_range_info(const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, const Analyzer::Expr *expr, Executor *executor)
#define CHECK_GE(x, y)
Definition: Logger.h:224
Expr * get_arg() const
Definition: Analyzer.h:1250
Definition: sqldefs.h:73
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:77
const std::vector< InputTableInfo > & query_infos_
#define CHECK_LE(x, y)
Definition: Logger.h:222
#define CHECK(condition)
Definition: Logger.h:211
const RelAlgExecutionUnit & ra_exe_unit_
Definition: sqldefs.h:74
Definition: sqldefs.h:72

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::unique_ptr< QueryMemoryDescriptor > GroupByAndAggregate::initQueryMemoryDescriptor ( const bool  allow_multifrag,
const size_t  max_groups_buffer_entry_count,
const int8_t  crt_min_byte_width,
RenderInfo render_info,
const bool  output_columnar_hint 
)
private

Definition at line 656 of file GroupByAndAggregate.cpp.

References align_to_int64(), CHECK, device_type_, executor_, GPU, gpuCanHandleOrderEntries(), initQueryMemoryDescriptorImpl(), SortInfo::order_entries, query_mem_desc, ra_exe_unit_, shard_count_for_top_groups(), and RelAlgExecutionUnit::sort_info.

661  {
662  const auto shard_count =
665  : 0;
666  bool sort_on_gpu_hint =
667  device_type_ == ExecutorDeviceType::GPU && allow_multifrag &&
670  // must_use_baseline_sort is true iff we'd sort on GPU with the old algorithm
671  // but the total output buffer size would be too big or it's a sharded top query.
672  // For the sake of managing risk, use the new result set way very selectively for
673  // this case only (alongside the baseline layout we've enabled for a while now).
674  bool must_use_baseline_sort = shard_count;
675  std::unique_ptr<QueryMemoryDescriptor> query_mem_desc;
676  while (true) {
677  query_mem_desc = initQueryMemoryDescriptorImpl(allow_multifrag,
678  max_groups_buffer_entry_count,
679  crt_min_byte_width,
680  sort_on_gpu_hint,
681  render_info,
682  must_use_baseline_sort,
683  output_columnar_hint);
684  CHECK(query_mem_desc);
685  if (query_mem_desc->sortOnGpu() &&
686  (query_mem_desc->getBufferSizeBytes(device_type_) +
687  align_to_int64(query_mem_desc->getEntryCount() * sizeof(int32_t))) >
688  2 * 1024 * 1024 * 1024LL) {
689  must_use_baseline_sort = true;
690  sort_on_gpu_hint = false;
691  } else {
692  break;
693  }
694  }
695  return query_mem_desc;
696 }
bool gpuCanHandleOrderEntries(const std::list< Analyzer::OrderEntry > &order_entries)
std::unique_ptr< QueryMemoryDescriptor > initQueryMemoryDescriptorImpl(const bool allow_multifrag, const size_t max_groups_buffer_entry_count, const int8_t crt_min_byte_width, const bool sort_on_gpu_hint, RenderInfo *render_info, const bool must_use_baseline_sort, const bool output_columnar_hint)
const std::list< Analyzer::OrderEntry > order_entries
const ExecutorDeviceType device_type_
#define CHECK(condition)
Definition: Logger.h:211
const RelAlgExecutionUnit & ra_exe_unit_
FORCE_INLINE HOST DEVICE T align_to_int64(T addr)
static size_t shard_count_for_top_groups(const RelAlgExecutionUnit &ra_exe_unit, const Catalog_Namespace::Catalog &catalog)

+ Here is the call graph for this function:

std::unique_ptr< QueryMemoryDescriptor > GroupByAndAggregate::initQueryMemoryDescriptorImpl ( const bool  allow_multifrag,
const size_t  max_groups_buffer_entry_count,
const int8_t  crt_min_byte_width,
const bool  sort_on_gpu_hint,
RenderInfo render_info,
const bool  must_use_baseline_sort,
const bool  output_columnar_hint 
)
private

Definition at line 698 of file GroupByAndAggregate.cpp.

References device_type_, executor_, g_enable_watchdog, g_watchdog_baseline_max_groups, anonymous_namespace{GroupByAndAggregate.cpp}::get_keyless_info(), getColRangeInfo(), getShardedTopBucket(), GPU, RelAlgExecutionUnit::groupby_exprs, GroupByBaselineHash, GroupByPerfectHash, ColRangeInfo::hash_type_, QueryMemoryDescriptor::init(), anonymous_namespace{GroupByAndAggregate.cpp}::init_count_distinct_descriptors(), LOG, query_infos_, ra_exe_unit_, shard_count_for_top_groups(), and logger::WARNING.

Referenced by initQueryMemoryDescriptor().

705  {
706  const auto count_distinct_descriptors = init_count_distinct_descriptors(
708 
709  const bool is_group_by{!ra_exe_unit_.groupby_exprs.empty()};
710 
711  auto col_range_info_nosharding = getColRangeInfo();
712 
713  const auto shard_count =
716  : 0;
717 
718  const auto col_range_info =
719  ColRangeInfo{col_range_info_nosharding.hash_type_,
720  col_range_info_nosharding.min,
721  col_range_info_nosharding.max,
722  getShardedTopBucket(col_range_info_nosharding, shard_count),
723  col_range_info_nosharding.has_nulls};
724 
725  // Non-grouped aggregates do not support accessing aggregated ranges
726  // Keyless hash is currently only supported with single-column perfect hash
727  const auto keyless_info =
728  !(is_group_by &&
729  col_range_info.hash_type_ == QueryDescriptionType::GroupByPerfectHash)
730  ? KeylessInfo{false, -1}
732 
733  if (g_enable_watchdog &&
734  ((col_range_info.hash_type_ == QueryDescriptionType::GroupByBaselineHash &&
735  max_groups_buffer_entry_count > g_watchdog_baseline_max_groups) ||
736  (col_range_info.hash_type_ == QueryDescriptionType::GroupByPerfectHash &&
737  ra_exe_unit_.groupby_exprs.size() == 1 &&
738  (col_range_info.max - col_range_info.min) /
739  std::max(col_range_info.bucket, int64_t(1)) >
740  130000000))) {
741  throw WatchdogException("Query would use too much memory");
742  }
743  try {
745  ra_exe_unit_,
746  query_infos_,
747  col_range_info,
748  keyless_info,
749  allow_multifrag,
750  device_type_,
751  crt_min_byte_width,
752  sort_on_gpu_hint,
753  shard_count,
754  max_groups_buffer_entry_count,
755  render_info,
756  count_distinct_descriptors,
757  must_use_baseline_sort,
758  output_columnar_hint,
759  /*streaming_top_n_hint=*/true);
760  } catch (const StreamingTopNOOM& e) {
761  LOG(WARNING) << e.what() << " Disabling Streaming Top N.";
763  ra_exe_unit_,
764  query_infos_,
765  col_range_info,
766  keyless_info,
767  allow_multifrag,
768  device_type_,
769  crt_min_byte_width,
770  sort_on_gpu_hint,
771  shard_count,
772  max_groups_buffer_entry_count,
773  render_info,
774  count_distinct_descriptors,
775  must_use_baseline_sort,
776  output_columnar_hint,
777  /*streaming_top_n_hint=*/false);
778  }
779 }
size_t g_watchdog_baseline_max_groups
bool g_enable_watchdog
KeylessInfo get_keyless_info(const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, const bool is_group_by, Executor *executor)
#define LOG(tag)
Definition: Logger.h:205
ColRangeInfo getColRangeInfo()
QueryDescriptionType hash_type_
CountDistinctDescriptors init_count_distinct_descriptors(const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, const ExecutorDeviceType device_type, Executor *executor)
static std::unique_ptr< QueryMemoryDescriptor > init(const Executor *executor, const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, const ColRangeInfo &col_range_info, const KeylessInfo &keyless_info, const bool allow_multifrag, const ExecutorDeviceType device_type, const int8_t crt_min_byte_width, const bool sort_on_gpu_hint, const size_t shard_count, const size_t max_groups_buffer_entry_count, RenderInfo *render_info, const CountDistinctDescriptors count_distinct_descriptors, const bool must_use_baseline_sort, const bool output_columnar_hint, const bool streaming_top_n_hint)
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
int64_t getShardedTopBucket(const ColRangeInfo &col_range_info, const size_t shard_count) const
const std::vector< InputTableInfo > & query_infos_
const ExecutorDeviceType device_type_
const RelAlgExecutionUnit & ra_exe_unit_
static size_t shard_count_for_top_groups(const RelAlgExecutionUnit &ra_exe_unit, const Catalog_Namespace::Catalog &catalog)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool GroupByAndAggregate::needsUnnestDoublePatch ( llvm::Value const val_ptr,
const std::string &  agg_base_name,
const bool  threads_share_memory,
const CompilationOptions co 
) const
private

Definition at line 29 of file MaxwellCodegenPatch.cpp.

References CompilationOptions::device_type, and executor_.

Referenced by TargetExprCodegen::codegenAggregate().

32  {
33  return (executor_->isArchMaxwell(co.device_type) && threads_share_memory &&
34  llvm::isa<llvm::AllocaInst>(val_ptr) &&
35  val_ptr->getType() ==
36  llvm::Type::getDoublePtrTy(executor_->cgen_state_->context_) &&
37  "agg_id" == agg_base_name);
38 }
ExecutorDeviceType device_type

+ Here is the caller graph for this function:

void GroupByAndAggregate::prependForceSync ( )
private

Definition at line 40 of file MaxwellCodegenPatch.cpp.

References executor_.

Referenced by codegen().

40  {
41  executor_->cgen_state_->ir_builder_.CreateCall(
42  executor_->cgen_state_->module_->getFunction("force_sync"));
43 }

+ Here is the caller graph for this function:

size_t GroupByAndAggregate::shard_count_for_top_groups ( const RelAlgExecutionUnit ra_exe_unit,
const Catalog_Namespace::Catalog catalog 
)
static

Definition at line 1951 of file GroupByAndAggregate.cpp.

References Catalog_Namespace::Catalog::getMetadataForTable(), RelAlgExecutionUnit::groupby_exprs, SortInfo::limit, TableDescriptor::nShards, SortInfo::order_entries, and RelAlgExecutionUnit::sort_info.

Referenced by Executor::collectAllDeviceResults(), RelAlgExecutor::executeRelAlgQuerySingleStep(), initQueryMemoryDescriptor(), and initQueryMemoryDescriptorImpl().

1953  {
1954  if (ra_exe_unit.sort_info.order_entries.size() != 1 || !ra_exe_unit.sort_info.limit) {
1955  return 0;
1956  }
1957  for (const auto& group_expr : ra_exe_unit.groupby_exprs) {
1958  const auto grouped_col_expr =
1959  dynamic_cast<const Analyzer::ColumnVar*>(group_expr.get());
1960  if (!grouped_col_expr) {
1961  continue;
1962  }
1963  if (grouped_col_expr->get_table_id() <= 0) {
1964  return 0;
1965  }
1966  const auto td = catalog.getMetadataForTable(grouped_col_expr->get_table_id());
1967  if (td->shardedColumnId == grouped_col_expr->get_column_id()) {
1968  return td->nShards;
1969  }
1970  }
1971  return 0;
1972 }
const std::list< Analyzer::OrderEntry > order_entries
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
const size_t limit
const TableDescriptor * getMetadataForTable(const std::string &tableName, const bool populateFragmenter=true) const
Returns a pointer to a const TableDescriptor struct matching the provided tableName.

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

Friends And Related Function Documentation

friend class CodeGenerator
friend

Definition at line 214 of file GroupByAndAggregate.h.

friend class ExecutionKernel
friend

Definition at line 215 of file GroupByAndAggregate.h.

friend class Executor
friend

Definition at line 212 of file GroupByAndAggregate.h.

friend class QueryMemoryDescriptor
friend

Definition at line 213 of file GroupByAndAggregate.h.

friend struct TargetExprCodegen
friend

Definition at line 216 of file GroupByAndAggregate.h.

friend struct TargetExprCodegenBuilder
friend

Definition at line 217 of file GroupByAndAggregate.h.

Member Data Documentation

const ExecutorDeviceType GroupByAndAggregate::device_type_
private
const std::optional<int64_t> GroupByAndAggregate::group_cardinality_estimation_
private

Definition at line 210 of file GroupByAndAggregate.h.

Referenced by getColRangeInfo().

bool GroupByAndAggregate::output_columnar_
private

Definition at line 207 of file GroupByAndAggregate.h.

std::shared_ptr<RowSetMemoryOwner> GroupByAndAggregate::row_set_mem_owner_
private

Definition at line 206 of file GroupByAndAggregate.h.


The documentation for this class was generated from the following files: