OmniSciDB  fe05a0c208
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
GroupByAndAggregate Class Reference

#include <GroupByAndAggregate.h>

+ Collaboration diagram for GroupByAndAggregate:

Public Member Functions

 GroupByAndAggregate (Executor *executor, const ExecutorDeviceType device_type, const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner, const std::optional< int64_t > &group_cardinality_estimation)
 
bool codegen (llvm::Value *filter_result, llvm::BasicBlock *sc_false, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const GpuSharedMemoryContext &gpu_smem_context)
 

Static Public Member Functions

static size_t shard_count_for_top_groups (const RelAlgExecutionUnit &ra_exe_unit, const Catalog_Namespace::Catalog &catalog)
 

Private Member Functions

bool gpuCanHandleOrderEntries (const std::list< Analyzer::OrderEntry > &order_entries)
 
std::unique_ptr
< QueryMemoryDescriptor
initQueryMemoryDescriptor (const bool allow_multifrag, const size_t max_groups_buffer_entry_count, const int8_t crt_min_byte_width, RenderInfo *render_info, const bool output_columnar_hint)
 
std::unique_ptr
< QueryMemoryDescriptor
initQueryMemoryDescriptorImpl (const bool allow_multifrag, const size_t max_groups_buffer_entry_count, const int8_t crt_min_byte_width, const bool sort_on_gpu_hint, RenderInfo *render_info, const bool must_use_baseline_sort, const bool output_columnar_hint)
 
int64_t getShardedTopBucket (const ColRangeInfo &col_range_info, const size_t shard_count) const
 
llvm::Value * codegenOutputSlot (llvm::Value *groups_buffer, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, DiamondCodegen &diamond_codegen)
 
std::tuple< llvm::Value
*, llvm::Value * > 
codegenGroupBy (const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, DiamondCodegen &codegen)
 
std::tuple< llvm::Value
*, llvm::Value * > 
codegenSingleColumnPerfectHash (const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, llvm::Value *groups_buffer, llvm::Value *group_expr_lv_translated, llvm::Value *group_expr_lv_original, const int32_t row_size_quad)
 
std::tuple< llvm::Value
*, llvm::Value * > 
codegenMultiColumnPerfectHash (llvm::Value *groups_buffer, llvm::Value *group_key, llvm::Value *key_size_lv, const QueryMemoryDescriptor &query_mem_desc, const int32_t row_size_quad)
 
llvm::Function * codegenPerfectHashFunction ()
 
std::tuple< llvm::Value
*, llvm::Value * > 
codegenMultiColumnBaselineHash (const CompilationOptions &co, llvm::Value *groups_buffer, llvm::Value *group_key, llvm::Value *key_size_lv, const QueryMemoryDescriptor &query_mem_desc, const size_t key_width, const int32_t row_size_quad)
 
ColRangeInfo getColRangeInfo ()
 
llvm::Value * convertNullIfAny (const SQLTypeInfo &arg_type, const TargetInfo &agg_info, llvm::Value *target)
 
bool codegenAggCalls (const std::tuple< llvm::Value *, llvm::Value * > &agg_out_ptr_w_idx, const std::vector< llvm::Value * > &agg_out_vec, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const GpuSharedMemoryContext &gpu_smem_context, DiamondCodegen &diamond_codegen)
 
llvm::Value * codegenWindowRowPointer (const Analyzer::WindowFunction *window_func, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, DiamondCodegen &diamond_codegen)
 
llvm::Value * codegenAggColumnPtr (llvm::Value *output_buffer_byte_stream, llvm::Value *out_row_idx, const std::tuple< llvm::Value *, llvm::Value * > &agg_out_ptr_w_idx, const QueryMemoryDescriptor &query_mem_desc, const size_t chosen_bytes, const size_t agg_out_off, const size_t target_idx)
 : returns the pointer to where the aggregation should be stored. More...
 
void codegenEstimator (std::stack< llvm::BasicBlock * > &array_loops, DiamondCodegen &diamond_codegen, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &)
 
void codegenCountDistinct (const size_t target_idx, const Analyzer::Expr *target_expr, std::vector< llvm::Value * > &agg_args, const QueryMemoryDescriptor &, const ExecutorDeviceType)
 
void codegenApproxMedian (const size_t target_idx, const Analyzer::Expr *target_expr, std::vector< llvm::Value * > &agg_args, const QueryMemoryDescriptor &query_mem_desc, const ExecutorDeviceType device_type)
 
llvm::Value * getAdditionalLiteral (const int32_t off)
 
std::vector< llvm::Value * > codegenAggArg (const Analyzer::Expr *target_expr, const CompilationOptions &co)
 
llvm::Value * emitCall (const std::string &fname, const std::vector< llvm::Value * > &args)
 
void checkErrorCode (llvm::Value *retCode)
 
bool needsUnnestDoublePatch (llvm::Value const *val_ptr, const std::string &agg_base_name, const bool threads_share_memory, const CompilationOptions &co) const
 
void prependForceSync ()
 

Static Private Member Functions

static int64_t getBucketedCardinality (const ColRangeInfo &col_range_info)
 

Private Attributes

Executorexecutor_
 
const RelAlgExecutionUnitra_exe_unit_
 
const std::vector
< InputTableInfo > & 
query_infos_
 
std::shared_ptr
< RowSetMemoryOwner
row_set_mem_owner_
 
bool output_columnar_
 
const ExecutorDeviceType device_type_
 
const std::optional< int64_t > group_cardinality_estimation_
 

Friends

class Executor
 
class QueryMemoryDescriptor
 
class CodeGenerator
 
class ExecutionKernel
 
struct TargetExprCodegen
 
struct TargetExprCodegenBuilder
 

Detailed Description

Definition at line 61 of file GroupByAndAggregate.h.

Constructor & Destructor Documentation

GroupByAndAggregate::GroupByAndAggregate ( Executor executor,
const ExecutorDeviceType  device_type,
const RelAlgExecutionUnit ra_exe_unit,
const std::vector< InputTableInfo > &  query_infos,
std::shared_ptr< RowSetMemoryOwner row_set_mem_owner,
const std::optional< int64_t > &  group_cardinality_estimation 
)

Definition at line 316 of file GroupByAndAggregate.cpp.

References RelAlgExecutionUnit::groupby_exprs, and ra_exe_unit_.

323  : executor_(executor)
324  , ra_exe_unit_(ra_exe_unit)
325  , query_infos_(query_infos)
326  , row_set_mem_owner_(row_set_mem_owner)
327  , device_type_(device_type)
328  , group_cardinality_estimation_(group_cardinality_estimation) {
329  for (const auto& groupby_expr : ra_exe_unit_.groupby_exprs) {
330  if (!groupby_expr) {
331  continue;
332  }
333  const auto& groupby_ti = groupby_expr->get_type_info();
334  if (groupby_ti.is_bytes()) {
335  throw std::runtime_error(
336  "Cannot group by string columns which are not dictionary encoded.");
337  }
338  if (groupby_ti.is_buffer()) {
339  throw std::runtime_error("Group by buffer not supported");
340  }
341  if (groupby_ti.is_geometry()) {
342  throw std::runtime_error("Group by geometry not supported");
343  }
344  }
345 }
std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner_
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
const std::vector< InputTableInfo > & query_infos_
const ExecutorDeviceType device_type_
const std::optional< int64_t > group_cardinality_estimation_
const RelAlgExecutionUnit & ra_exe_unit_

Member Function Documentation

void GroupByAndAggregate::checkErrorCode ( llvm::Value *  retCode)
private

Definition at line 1902 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, and executor_.

Referenced by TargetExprCodegen::codegenAggregate().

1902  {
1903  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1904  auto zero_const = llvm::ConstantInt::get(retCode->getType(), 0, true);
1905  auto rc_check_condition = executor_->cgen_state_->ir_builder_.CreateICmp(
1906  llvm::ICmpInst::ICMP_EQ, retCode, zero_const);
1907 
1908  executor_->cgen_state_->emitErrorCheck(rc_check_condition, retCode, "rc");
1909 }
#define AUTOMATIC_IR_METADATA(CGENSTATE)

+ Here is the caller graph for this function:

bool GroupByAndAggregate::codegen ( llvm::Value *  filter_result,
llvm::BasicBlock *  sc_false,
const QueryMemoryDescriptor query_mem_desc,
const CompilationOptions co,
const GpuSharedMemoryContext gpu_smem_context 
)

Definition at line 826 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, CHECK, codegenAggCalls(), codegenEstimator(), codegenGroupBy(), DiamondCodegen::cond_false_, CompilationOptions::device_type, QueryMemoryDescriptor::didOutputColumnar(), RelAlgExecutionUnit::estimator, executor_, anonymous_namespace{GroupByAndAggregate.cpp}::get_agg_count(), get_arg_by_name(), get_int_type(), QueryMemoryDescriptor::getGroupbyColCount(), QueryMemoryDescriptor::getQueryDescriptionType(), GPU, RelAlgExecutionUnit::groupby_exprs, GroupByPerfectHash, i, RelAlgExecutionUnit::join_quals, LL_BUILDER, LL_CONTEXT, LL_INT, CodeGenerator::posArg(), prependForceSync(), Projection, query_mem_desc, ra_exe_unit_, ROW_FUNC, RelAlgExecutionUnit::target_exprs, QueryMemoryDescriptor::usesGetGroupValueFast(), and QueryMemoryDescriptor::useStreamingTopN().

830  {
831  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
832  CHECK(filter_result);
833 
834  bool can_return_error = false;
835  llvm::BasicBlock* filter_false{nullptr};
836 
837  {
838  const bool is_group_by = !ra_exe_unit_.groupby_exprs.empty();
839 
840  if (executor_->isArchMaxwell(co.device_type)) {
842  }
843  DiamondCodegen filter_cfg(filter_result,
844  executor_,
845  !is_group_by || query_mem_desc.usesGetGroupValueFast(),
846  "filter", // filter_true and filter_false basic blocks
847  nullptr,
848  false);
849  filter_false = filter_cfg.cond_false_;
850 
851  if (is_group_by) {
853  !query_mem_desc.useStreamingTopN()) {
854  const auto crt_matched = get_arg_by_name(ROW_FUNC, "crt_matched");
855  LL_BUILDER.CreateStore(LL_INT(int32_t(1)), crt_matched);
856  auto total_matched_ptr = get_arg_by_name(ROW_FUNC, "total_matched");
857  llvm::Value* old_total_matched_val{nullptr};
859  old_total_matched_val =
860  LL_BUILDER.CreateAtomicRMW(llvm::AtomicRMWInst::Add,
861  total_matched_ptr,
862  LL_INT(int32_t(1)),
863  llvm::AtomicOrdering::Monotonic);
864  } else {
865  old_total_matched_val = LL_BUILDER.CreateLoad(total_matched_ptr);
866  LL_BUILDER.CreateStore(
867  LL_BUILDER.CreateAdd(old_total_matched_val, LL_INT(int32_t(1))),
868  total_matched_ptr);
869  }
870  auto old_total_matched_ptr = get_arg_by_name(ROW_FUNC, "old_total_matched");
871  LL_BUILDER.CreateStore(old_total_matched_val, old_total_matched_ptr);
872  }
873 
874  auto agg_out_ptr_w_idx = codegenGroupBy(query_mem_desc, co, filter_cfg);
875  if (query_mem_desc.usesGetGroupValueFast() ||
876  query_mem_desc.getQueryDescriptionType() ==
878  if (query_mem_desc.getGroupbyColCount() > 1) {
879  filter_cfg.setChainToNext();
880  }
881  // Don't generate null checks if the group slot is guaranteed to be non-null,
882  // as it's the case for get_group_value_fast* family.
883  can_return_error = codegenAggCalls(
884  agg_out_ptr_w_idx, {}, query_mem_desc, co, gpu_smem_context, filter_cfg);
885  } else {
886  {
887  llvm::Value* nullcheck_cond{nullptr};
888  if (query_mem_desc.didOutputColumnar()) {
889  nullcheck_cond = LL_BUILDER.CreateICmpSGE(std::get<1>(agg_out_ptr_w_idx),
890  LL_INT(int32_t(0)));
891  } else {
892  nullcheck_cond = LL_BUILDER.CreateICmpNE(
893  std::get<0>(agg_out_ptr_w_idx),
894  llvm::ConstantPointerNull::get(
895  llvm::PointerType::get(get_int_type(64, LL_CONTEXT), 0)));
896  }
897  DiamondCodegen nullcheck_cfg(
898  nullcheck_cond, executor_, false, "groupby_nullcheck", &filter_cfg, false);
900  agg_out_ptr_w_idx, {}, query_mem_desc, co, gpu_smem_context, filter_cfg);
901  }
902  can_return_error = true;
903  if (query_mem_desc.getQueryDescriptionType() ==
905  query_mem_desc.useStreamingTopN()) {
906  // Ignore rejection on pushing current row to top-K heap.
907  LL_BUILDER.CreateRet(LL_INT(int32_t(0)));
908  } else {
909  CodeGenerator code_generator(executor_);
910  LL_BUILDER.CreateRet(LL_BUILDER.CreateNeg(LL_BUILDER.CreateTrunc(
911  // TODO(alex): remove the trunc once pos is converted to 32 bits
912  code_generator.posArg(nullptr),
913  get_int_type(32, LL_CONTEXT))));
914  }
915  }
916  } else {
917  if (ra_exe_unit_.estimator) {
918  std::stack<llvm::BasicBlock*> array_loops;
919  codegenEstimator(array_loops, filter_cfg, query_mem_desc, co);
920  } else {
921  auto arg_it = ROW_FUNC->arg_begin();
922  std::vector<llvm::Value*> agg_out_vec;
923  for (int32_t i = 0; i < get_agg_count(ra_exe_unit_.target_exprs); ++i) {
924  agg_out_vec.push_back(&*arg_it++);
925  }
926  can_return_error = codegenAggCalls(std::make_tuple(nullptr, nullptr),
927  agg_out_vec,
928  query_mem_desc,
929  co,
930  gpu_smem_context,
931  filter_cfg);
932  }
933  }
934  }
935 
936  if (ra_exe_unit_.join_quals.empty()) {
937  executor_->cgen_state_->ir_builder_.CreateRet(LL_INT(int32_t(0)));
938  } else if (sc_false) {
939  const auto saved_insert_block = LL_BUILDER.GetInsertBlock();
940  LL_BUILDER.SetInsertPoint(sc_false);
941  LL_BUILDER.CreateBr(filter_false);
942  LL_BUILDER.SetInsertPoint(saved_insert_block);
943  }
944 
945  return can_return_error;
946 }
std::vector< Analyzer::Expr * > target_exprs
#define ROW_FUNC
llvm::BasicBlock * cond_false_
bool codegenAggCalls(const std::tuple< llvm::Value *, llvm::Value * > &agg_out_ptr_w_idx, const std::vector< llvm::Value * > &agg_out_vec, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const GpuSharedMemoryContext &gpu_smem_context, DiamondCodegen &diamond_codegen)
#define LL_BUILDER
#define LL_CONTEXT
#define LL_INT(v)
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
llvm::Value * get_arg_by_name(llvm::Function *func, const std::string &name)
Definition: Execute.h:167
size_t getGroupbyColCount() const
const JoinQualsPerNestingLevel join_quals
std::tuple< llvm::Value *, llvm::Value * > codegenGroupBy(const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, DiamondCodegen &codegen)
const std::shared_ptr< Analyzer::Estimator > estimator
#define AUTOMATIC_IR_METADATA(CGENSTATE)
QueryDescriptionType getQueryDescriptionType() const
ExecutorDeviceType device_type
void codegenEstimator(std::stack< llvm::BasicBlock * > &array_loops, DiamondCodegen &diamond_codegen, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &)
int32_t get_agg_count(const std::vector< Analyzer::Expr * > &target_exprs)
#define CHECK(condition)
Definition: Logger.h:203
const RelAlgExecutionUnit & ra_exe_unit_

+ Here is the call graph for this function:

std::vector< llvm::Value * > GroupByAndAggregate::codegenAggArg ( const Analyzer::Expr target_expr,
const CompilationOptions co 
)
private

Definition at line 1722 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, CHECK, CHECK_EQ, CodeGenerator::codegen(), CUR_FUNC, executor_, get_int_type(), Analyzer::Expr::get_type_info(), SQLTypeInfo::is_geometry(), kARRAY, kPOINT, kSAMPLE, LL_BUILDER, LL_CONTEXT, log2_bytes(), and CodeGenerator::posArg().

Referenced by TargetExprCodegen::codegen(), and TargetExprCodegenBuilder::codegenMultiSlotSampleExpressions().

1724  {
1725  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1726  const auto agg_expr = dynamic_cast<const Analyzer::AggExpr*>(target_expr);
1727  const auto func_expr = dynamic_cast<const Analyzer::FunctionOper*>(target_expr);
1728  const auto arr_expr = dynamic_cast<const Analyzer::ArrayExpr*>(target_expr);
1729 
1730  // TODO(alex): handle arrays uniformly?
1731  CodeGenerator code_generator(executor_);
1732  if (target_expr) {
1733  const auto& target_ti = target_expr->get_type_info();
1734  if (target_ti.is_buffer() &&
1735  !executor_->plan_state_->isLazyFetchColumn(target_expr)) {
1736  const auto target_lvs =
1737  agg_expr ? code_generator.codegen(agg_expr->get_arg(), true, co)
1738  : code_generator.codegen(
1739  target_expr, !executor_->plan_state_->allow_lazy_fetch_, co);
1740  if (!func_expr && !arr_expr) {
1741  // Something with the chunk transport is code that was generated from a source
1742  // other than an ARRAY[] expression
1743  if (target_ti.is_bytes()) {
1744  CHECK_EQ(size_t(3), target_lvs.size());
1745  return {target_lvs[1], target_lvs[2]};
1746  }
1747  CHECK(target_ti.is_array());
1748  CHECK_EQ(size_t(1), target_lvs.size());
1749  CHECK(!agg_expr || agg_expr->get_aggtype() == kSAMPLE);
1750  const auto i32_ty = get_int_type(32, executor_->cgen_state_->context_);
1751  const auto i8p_ty =
1752  llvm::PointerType::get(get_int_type(8, executor_->cgen_state_->context_), 0);
1753  const auto& elem_ti = target_ti.get_elem_type();
1754  return {
1755  executor_->cgen_state_->emitExternalCall(
1756  "array_buff",
1757  i8p_ty,
1758  {target_lvs.front(), code_generator.posArg(target_expr)}),
1759  executor_->cgen_state_->emitExternalCall(
1760  "array_size",
1761  i32_ty,
1762  {target_lvs.front(),
1763  code_generator.posArg(target_expr),
1764  executor_->cgen_state_->llInt(log2_bytes(elem_ti.get_logical_size()))})};
1765  } else {
1766  if (agg_expr) {
1767  throw std::runtime_error(
1768  "Using array[] operator as argument to an aggregate operator is not "
1769  "supported");
1770  }
1771  CHECK(func_expr || arr_expr);
1772  if (dynamic_cast<const Analyzer::FunctionOper*>(target_expr)) {
1773  CHECK_EQ(size_t(1), target_lvs.size());
1774  const auto prefix = target_ti.get_buffer_name();
1775  CHECK(target_ti.is_array() || target_ti.is_bytes());
1776  const auto target_lv = LL_BUILDER.CreateLoad(target_lvs[0]);
1777  // const auto target_lv_type = target_lvs[0]->getType();
1778  // CHECK(target_lv_type->isStructTy());
1779  // CHECK_EQ(target_lv_type->getNumContainedTypes(), 3u);
1780  const auto i8p_ty = llvm::PointerType::get(
1781  get_int_type(8, executor_->cgen_state_->context_), 0);
1782  const auto ptr = LL_BUILDER.CreatePointerCast(
1783  LL_BUILDER.CreateExtractValue(target_lv, 0), i8p_ty);
1784  const auto size = LL_BUILDER.CreateExtractValue(target_lv, 1);
1785  const auto null_flag = LL_BUILDER.CreateExtractValue(target_lv, 2);
1786  const auto nullcheck_ok_bb =
1787  llvm::BasicBlock::Create(LL_CONTEXT, prefix + "_nullcheck_ok_bb", CUR_FUNC);
1788  const auto nullcheck_fail_bb = llvm::BasicBlock::Create(
1789  LL_CONTEXT, prefix + "_nullcheck_fail_bb", CUR_FUNC);
1790 
1791  // TODO(adb): probably better to zext the bool
1792  const auto nullcheck = LL_BUILDER.CreateICmpEQ(
1793  null_flag, executor_->cgen_state_->llInt(static_cast<int8_t>(1)));
1794  LL_BUILDER.CreateCondBr(nullcheck, nullcheck_fail_bb, nullcheck_ok_bb);
1795 
1796  const auto ret_bb =
1797  llvm::BasicBlock::Create(LL_CONTEXT, prefix + "_return", CUR_FUNC);
1798  LL_BUILDER.SetInsertPoint(ret_bb);
1799  auto result_phi = LL_BUILDER.CreatePHI(i8p_ty, 2, prefix + "_ptr_return");
1800  result_phi->addIncoming(ptr, nullcheck_ok_bb);
1801  const auto null_arr_sentinel = LL_BUILDER.CreateIntToPtr(
1802  executor_->cgen_state_->llInt(static_cast<int8_t>(0)), i8p_ty);
1803  result_phi->addIncoming(null_arr_sentinel, nullcheck_fail_bb);
1804  LL_BUILDER.SetInsertPoint(nullcheck_ok_bb);
1805  executor_->cgen_state_->emitExternalCall(
1806  "register_buffer_with_executor_rsm",
1807  llvm::Type::getVoidTy(executor_->cgen_state_->context_),
1808  {executor_->cgen_state_->llInt(reinterpret_cast<int64_t>(executor_)), ptr});
1809  LL_BUILDER.CreateBr(ret_bb);
1810  LL_BUILDER.SetInsertPoint(nullcheck_fail_bb);
1811  LL_BUILDER.CreateBr(ret_bb);
1812 
1813  LL_BUILDER.SetInsertPoint(ret_bb);
1814  return {result_phi, size};
1815  }
1816  CHECK_EQ(size_t(2), target_lvs.size());
1817  return {target_lvs[0], target_lvs[1]};
1818  }
1819  }
1820  if (target_ti.is_geometry() &&
1821  !executor_->plan_state_->isLazyFetchColumn(target_expr)) {
1822  auto generate_coord_lvs =
1823  [&](auto* selected_target_expr,
1824  bool const fetch_columns) -> std::vector<llvm::Value*> {
1825  const auto target_lvs =
1826  code_generator.codegen(selected_target_expr, fetch_columns, co);
1827  const auto geo_uoper = dynamic_cast<const Analyzer::GeoUOper*>(target_expr);
1828  const auto geo_binoper = dynamic_cast<const Analyzer::GeoBinOper*>(target_expr);
1829  if (geo_uoper || geo_binoper) {
1830  CHECK(target_expr->get_type_info().is_geometry());
1831  CHECK_EQ(2 * static_cast<size_t>(target_ti.get_physical_coord_cols()),
1832  target_lvs.size());
1833  return target_lvs;
1834  }
1835  CHECK_EQ(static_cast<size_t>(target_ti.get_physical_coord_cols()),
1836  target_lvs.size());
1837 
1838  const auto i32_ty = get_int_type(32, executor_->cgen_state_->context_);
1839  const auto i8p_ty =
1840  llvm::PointerType::get(get_int_type(8, executor_->cgen_state_->context_), 0);
1841  std::vector<llvm::Value*> coords;
1842  size_t ctr = 0;
1843  for (const auto& target_lv : target_lvs) {
1844  // TODO(adb): consider adding a utility to sqltypes so we can get the types of
1845  // the physical coords cols based on the sqltype (e.g. TINYINT for col 0, INT
1846  // for col 1 for pols / mpolys, etc). Hardcoding for now. first array is the
1847  // coords array (TINYINT). Subsequent arrays are regular INT.
1848 
1849  const size_t elem_sz = ctr == 0 ? 1 : 4;
1850  ctr++;
1851  int32_t fixlen = -1;
1852  if (target_ti.get_type() == kPOINT) {
1853  const auto col_var = dynamic_cast<const Analyzer::ColumnVar*>(target_expr);
1854  if (col_var) {
1855  const auto coords_cd = executor_->getPhysicalColumnDescriptor(col_var, 1);
1856  if (coords_cd && coords_cd->columnType.get_type() == kARRAY) {
1857  fixlen = coords_cd->columnType.get_size();
1858  }
1859  }
1860  }
1861  if (fixlen > 0) {
1862  coords.push_back(executor_->cgen_state_->emitExternalCall(
1863  "fast_fixlen_array_buff",
1864  i8p_ty,
1865  {target_lv, code_generator.posArg(selected_target_expr)}));
1866  coords.push_back(executor_->cgen_state_->llInt(int64_t(fixlen)));
1867  continue;
1868  }
1869  coords.push_back(executor_->cgen_state_->emitExternalCall(
1870  "array_buff",
1871  i8p_ty,
1872  {target_lv, code_generator.posArg(selected_target_expr)}));
1873  coords.push_back(executor_->cgen_state_->emitExternalCall(
1874  "array_size",
1875  i32_ty,
1876  {target_lv,
1877  code_generator.posArg(selected_target_expr),
1878  executor_->cgen_state_->llInt(log2_bytes(elem_sz))}));
1879  }
1880  return coords;
1881  };
1882 
1883  if (agg_expr) {
1884  return generate_coord_lvs(agg_expr->get_arg(), true);
1885  } else {
1886  return generate_coord_lvs(target_expr,
1887  !executor_->plan_state_->allow_lazy_fetch_);
1888  }
1889  }
1890  }
1891  return agg_expr ? code_generator.codegen(agg_expr->get_arg(), true, co)
1892  : code_generator.codegen(
1893  target_expr, !executor_->plan_state_->allow_lazy_fetch_, co);
1894 }
#define CHECK_EQ(x, y)
Definition: Logger.h:211
#define LL_BUILDER
#define LL_CONTEXT
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
#define AUTOMATIC_IR_METADATA(CGENSTATE)
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:78
#define CHECK(condition)
Definition: Logger.h:203
bool is_geometry() const
Definition: sqltypes.h:501
#define CUR_FUNC
uint32_t log2_bytes(const uint32_t bytes)
Definition: Execute.h:177

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool GroupByAndAggregate::codegenAggCalls ( const std::tuple< llvm::Value *, llvm::Value * > &  agg_out_ptr_w_idx,
const std::vector< llvm::Value * > &  agg_out_vec,
const QueryMemoryDescriptor query_mem_desc,
const CompilationOptions co,
const GpuSharedMemoryContext gpu_smem_context,
DiamondCodegen diamond_codegen 
)
private

Definition at line 1426 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, CHECK, TargetExprCodegenBuilder::codegen(), QueryMemoryDescriptor::didOutputColumnar(), executor_, g_cluster, QueryMemoryDescriptor::getQueryDescriptionType(), LL_BUILDER, LL_CONTEXT, Projection, ra_exe_unit_, and RelAlgExecutionUnit::target_exprs.

Referenced by codegen().

1432  {
1433  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1434  auto agg_out_ptr_w_idx = agg_out_ptr_w_idx_in;
1435  // TODO(alex): unify the two cases, the output for non-group by queries
1436  // should be a contiguous buffer
1437  const bool is_group_by = std::get<0>(agg_out_ptr_w_idx);
1438  bool can_return_error = false;
1439  if (is_group_by) {
1440  CHECK(agg_out_vec.empty());
1441  } else {
1442  CHECK(!agg_out_vec.empty());
1443  }
1444 
1445  // output buffer is casted into a byte stream to be able to handle data elements of
1446  // different sizes (only used when actual column width sizes are used)
1447  llvm::Value* output_buffer_byte_stream{nullptr};
1448  llvm::Value* out_row_idx{nullptr};
1449  if (query_mem_desc.didOutputColumnar() && !g_cluster &&
1451  output_buffer_byte_stream = LL_BUILDER.CreateBitCast(
1452  std::get<0>(agg_out_ptr_w_idx),
1453  llvm::PointerType::get(llvm::Type::getInt8Ty(LL_CONTEXT), 0));
1454  output_buffer_byte_stream->setName("out_buff_b_stream");
1455  CHECK(std::get<1>(agg_out_ptr_w_idx));
1456  out_row_idx = LL_BUILDER.CreateZExt(std::get<1>(agg_out_ptr_w_idx),
1457  llvm::Type::getInt64Ty(LL_CONTEXT));
1458  out_row_idx->setName("out_row_idx");
1459  }
1460 
1461  TargetExprCodegenBuilder target_builder(query_mem_desc, ra_exe_unit_, is_group_by);
1462  for (size_t target_idx = 0; target_idx < ra_exe_unit_.target_exprs.size();
1463  ++target_idx) {
1464  auto target_expr = ra_exe_unit_.target_exprs[target_idx];
1465  CHECK(target_expr);
1466 
1467  target_builder(target_expr, executor_, co);
1468  }
1469 
1470  target_builder.codegen(this,
1471  executor_,
1472  query_mem_desc,
1473  co,
1474  gpu_smem_context,
1475  agg_out_ptr_w_idx,
1476  agg_out_vec,
1477  output_buffer_byte_stream,
1478  out_row_idx,
1479  diamond_codegen);
1480 
1481  for (auto target_expr : ra_exe_unit_.target_exprs) {
1482  CHECK(target_expr);
1483  executor_->plan_state_->isLazyFetchColumn(target_expr);
1484  }
1485 
1486  return can_return_error;
1487 }
std::vector< Analyzer::Expr * > target_exprs
#define LL_BUILDER
#define LL_CONTEXT
#define AUTOMATIC_IR_METADATA(CGENSTATE)
QueryDescriptionType getQueryDescriptionType() const
#define CHECK(condition)
Definition: Logger.h:203
bool g_cluster
const RelAlgExecutionUnit & ra_exe_unit_

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

llvm::Value * GroupByAndAggregate::codegenAggColumnPtr ( llvm::Value *  output_buffer_byte_stream,
llvm::Value *  out_row_idx,
const std::tuple< llvm::Value *, llvm::Value * > &  agg_out_ptr_w_idx,
const QueryMemoryDescriptor query_mem_desc,
const size_t  chosen_bytes,
const size_t  agg_out_off,
const size_t  target_idx 
)
private

: returns the pointer to where the aggregation should be stored.

Definition at line 1492 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, CHECK, CHECK_EQ, QueryMemoryDescriptor::didOutputColumnar(), executor_, g_cluster, get_int_type(), QueryMemoryDescriptor::getColOffInBytes(), QueryMemoryDescriptor::getColOnlyOffInBytes(), QueryMemoryDescriptor::getQueryDescriptionType(), LL_BUILDER, LL_CONTEXT, LL_INT, Projection, and to_string().

Referenced by TargetExprCodegen::codegenAggregate(), and TargetExprCodegenBuilder::codegenMultiSlotSampleExpressions().

1499  {
1500  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1501  llvm::Value* agg_col_ptr{nullptr};
1502  if (query_mem_desc.didOutputColumnar()) {
1503  // TODO(Saman): remove the second columnar branch, and support all query description
1504  // types through the first branch. Then, input arguments should also be cleaned up
1505  if (!g_cluster &&
1507  CHECK(chosen_bytes == 1 || chosen_bytes == 2 || chosen_bytes == 4 ||
1508  chosen_bytes == 8);
1509  CHECK(output_buffer_byte_stream);
1510  CHECK(out_row_idx);
1511  uint32_t col_off = query_mem_desc.getColOffInBytes(agg_out_off);
1512  // multiplying by chosen_bytes, i.e., << log2(chosen_bytes)
1513  auto out_per_col_byte_idx =
1514  LL_BUILDER.CreateShl(out_row_idx, __builtin_ffs(chosen_bytes) - 1);
1515  auto byte_offset = LL_BUILDER.CreateAdd(out_per_col_byte_idx,
1516  LL_INT(static_cast<int64_t>(col_off)));
1517  byte_offset->setName("out_byte_off_target_" + std::to_string(target_idx));
1518  auto output_ptr = LL_BUILDER.CreateGEP(output_buffer_byte_stream, byte_offset);
1519  agg_col_ptr = LL_BUILDER.CreateBitCast(
1520  output_ptr,
1521  llvm::PointerType::get(get_int_type((chosen_bytes << 3), LL_CONTEXT), 0));
1522  agg_col_ptr->setName("out_ptr_target_" + std::to_string(target_idx));
1523  } else {
1524  uint32_t col_off = query_mem_desc.getColOffInBytes(agg_out_off);
1525  CHECK_EQ(size_t(0), col_off % chosen_bytes);
1526  col_off /= chosen_bytes;
1527  CHECK(std::get<1>(agg_out_ptr_w_idx));
1528  auto offset = LL_BUILDER.CreateAdd(std::get<1>(agg_out_ptr_w_idx), LL_INT(col_off));
1529  agg_col_ptr = LL_BUILDER.CreateGEP(
1530  LL_BUILDER.CreateBitCast(
1531  std::get<0>(agg_out_ptr_w_idx),
1532  llvm::PointerType::get(get_int_type((chosen_bytes << 3), LL_CONTEXT), 0)),
1533  offset);
1534  }
1535  } else {
1536  uint32_t col_off = query_mem_desc.getColOnlyOffInBytes(agg_out_off);
1537  CHECK_EQ(size_t(0), col_off % chosen_bytes);
1538  col_off /= chosen_bytes;
1539  agg_col_ptr = LL_BUILDER.CreateGEP(
1540  LL_BUILDER.CreateBitCast(
1541  std::get<0>(agg_out_ptr_w_idx),
1542  llvm::PointerType::get(get_int_type((chosen_bytes << 3), LL_CONTEXT), 0)),
1543  LL_INT(col_off));
1544  }
1545  CHECK(agg_col_ptr);
1546  return agg_col_ptr;
1547 }
#define CHECK_EQ(x, y)
Definition: Logger.h:211
#define LL_BUILDER
#define LL_CONTEXT
#define LL_INT(v)
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
std::string to_string(char const *&&v)
size_t getColOnlyOffInBytes(const size_t col_idx) const
#define AUTOMATIC_IR_METADATA(CGENSTATE)
QueryDescriptionType getQueryDescriptionType() const
#define CHECK(condition)
Definition: Logger.h:203
bool g_cluster
size_t getColOffInBytes(const size_t col_idx) const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void GroupByAndAggregate::codegenApproxMedian ( const size_t  target_idx,
const Analyzer::Expr target_expr,
std::vector< llvm::Value * > &  agg_args,
const QueryMemoryDescriptor query_mem_desc,
const ExecutorDeviceType  device_type 
)
private

Definition at line 1673 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, executor_, g_bigint_count, SQLTypeInfo::get_notnull(), get_target_info(), Analyzer::Expr::get_type_info(), and GPU.

Referenced by TargetExprCodegen::codegenAggregate().

1677  {
1678  if (device_type == ExecutorDeviceType::GPU) {
1679  throw QueryMustRunOnCpu();
1680  }
1681  llvm::BasicBlock *calc, *skip;
1682  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1683  auto const arg_ti =
1684  static_cast<const Analyzer::AggExpr*>(target_expr)->get_arg()->get_type_info();
1685  bool const nullable = !arg_ti.get_notnull();
1686 
1687  auto* cs = executor_->cgen_state_.get();
1688  auto& irb = cs->ir_builder_;
1689  if (nullable) {
1690  auto* const null_value = cs->castToTypeIn(cs->inlineNull(arg_ti), 64);
1691  auto* const skip_cond = arg_ti.is_fp()
1692  ? irb.CreateFCmpOEQ(agg_args.back(), null_value)
1693  : irb.CreateICmpEQ(agg_args.back(), null_value);
1694  calc = llvm::BasicBlock::Create(cs->context_, "calc_approx_median");
1695  skip = llvm::BasicBlock::Create(cs->context_, "skip_approx_median");
1696  irb.CreateCondBr(skip_cond, skip, calc);
1697  cs->current_func_->getBasicBlockList().push_back(calc);
1698  irb.SetInsertPoint(calc);
1699  }
1700  if (!arg_ti.is_fp()) {
1701  auto const agg_info = get_target_info(target_expr, g_bigint_count);
1702  agg_args.back() = executor_->castToFP(agg_args.back(), arg_ti, agg_info.sql_type);
1703  }
1704  cs->emitExternalCall(
1705  "agg_approx_median", llvm::Type::getVoidTy(cs->context_), agg_args);
1706  if (nullable) {
1707  irb.CreateBr(skip);
1708  cs->current_func_->getBasicBlockList().push_back(skip);
1709  irb.SetInsertPoint(skip);
1710  }
1711 }
TargetInfo get_target_info(const PointerType target_expr, const bool bigint_count)
Definition: TargetInfo.h:79
bool g_bigint_count
#define AUTOMATIC_IR_METADATA(CGENSTATE)
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:78
HOST DEVICE bool get_notnull() const
Definition: sqltypes.h:321

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void GroupByAndAggregate::codegenCountDistinct ( const size_t  target_idx,
const Analyzer::Expr target_expr,
std::vector< llvm::Value * > &  agg_args,
const QueryMemoryDescriptor query_mem_desc,
const ExecutorDeviceType  device_type 
)
private

Definition at line 1604 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, Bitmap, CHECK, CHECK_EQ, emitCall(), executor_, g_bigint_count, get_int_type(), get_target_info(), Analyzer::Expr::get_type_info(), getAdditionalLiteral(), QueryMemoryDescriptor::getCountDistinctDescriptor(), GPU, Invalid, kAPPROX_COUNT_DISTINCT, LL_CONTEXT, and LL_INT.

Referenced by TargetExprCodegen::codegenAggregate().

1609  {
1610  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1611  const auto agg_info = get_target_info(target_expr, g_bigint_count);
1612  const auto& arg_ti =
1613  static_cast<const Analyzer::AggExpr*>(target_expr)->get_arg()->get_type_info();
1614  if (arg_ti.is_fp()) {
1615  agg_args.back() = executor_->cgen_state_->ir_builder_.CreateBitCast(
1616  agg_args.back(), get_int_type(64, executor_->cgen_state_->context_));
1617  }
1618  const auto& count_distinct_descriptor =
1619  query_mem_desc.getCountDistinctDescriptor(target_idx);
1620  CHECK(count_distinct_descriptor.impl_type_ != CountDistinctImplType::Invalid);
1621  if (agg_info.agg_kind == kAPPROX_COUNT_DISTINCT) {
1622  CHECK(count_distinct_descriptor.impl_type_ == CountDistinctImplType::Bitmap);
1623  agg_args.push_back(LL_INT(int32_t(count_distinct_descriptor.bitmap_sz_bits)));
1624  if (device_type == ExecutorDeviceType::GPU) {
1625  const auto base_dev_addr = getAdditionalLiteral(-1);
1626  const auto base_host_addr = getAdditionalLiteral(-2);
1627  agg_args.push_back(base_dev_addr);
1628  agg_args.push_back(base_host_addr);
1629  emitCall("agg_approximate_count_distinct_gpu", agg_args);
1630  } else {
1631  emitCall("agg_approximate_count_distinct", agg_args);
1632  }
1633  return;
1634  }
1635  std::string agg_fname{"agg_count_distinct"};
1636  if (count_distinct_descriptor.impl_type_ == CountDistinctImplType::Bitmap) {
1637  agg_fname += "_bitmap";
1638  agg_args.push_back(LL_INT(static_cast<int64_t>(count_distinct_descriptor.min_val)));
1639  }
1640  if (agg_info.skip_null_val) {
1641  auto null_lv = executor_->cgen_state_->castToTypeIn(
1642  (arg_ti.is_fp()
1643  ? static_cast<llvm::Value*>(executor_->cgen_state_->inlineFpNull(arg_ti))
1644  : static_cast<llvm::Value*>(executor_->cgen_state_->inlineIntNull(arg_ti))),
1645  64);
1646  null_lv = executor_->cgen_state_->ir_builder_.CreateBitCast(
1647  null_lv, get_int_type(64, executor_->cgen_state_->context_));
1648  agg_fname += "_skip_val";
1649  agg_args.push_back(null_lv);
1650  }
1651  if (device_type == ExecutorDeviceType::GPU) {
1652  CHECK(count_distinct_descriptor.impl_type_ == CountDistinctImplType::Bitmap);
1653  agg_fname += "_gpu";
1654  const auto base_dev_addr = getAdditionalLiteral(-1);
1655  const auto base_host_addr = getAdditionalLiteral(-2);
1656  agg_args.push_back(base_dev_addr);
1657  agg_args.push_back(base_host_addr);
1658  agg_args.push_back(LL_INT(int64_t(count_distinct_descriptor.sub_bitmap_count)));
1659  CHECK_EQ(size_t(0),
1660  count_distinct_descriptor.bitmapPaddedSizeBytes() %
1661  count_distinct_descriptor.sub_bitmap_count);
1662  agg_args.push_back(LL_INT(int64_t(count_distinct_descriptor.bitmapPaddedSizeBytes() /
1663  count_distinct_descriptor.sub_bitmap_count)));
1664  }
1665  if (count_distinct_descriptor.impl_type_ == CountDistinctImplType::Bitmap) {
1666  emitCall(agg_fname, agg_args);
1667  } else {
1668  executor_->cgen_state_->emitExternalCall(
1669  agg_fname, llvm::Type::getVoidTy(LL_CONTEXT), agg_args);
1670  }
1671 }
#define CHECK_EQ(x, y)
Definition: Logger.h:211
llvm::Value * getAdditionalLiteral(const int32_t off)
#define LL_CONTEXT
TargetInfo get_target_info(const PointerType target_expr, const bool bigint_count)
Definition: TargetInfo.h:79
#define LL_INT(v)
llvm::Value * emitCall(const std::string &fname, const std::vector< llvm::Value * > &args)
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
bool g_bigint_count
#define AUTOMATIC_IR_METADATA(CGENSTATE)
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:78
const CountDistinctDescriptor & getCountDistinctDescriptor(const size_t idx) const
#define CHECK(condition)
Definition: Logger.h:203

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void GroupByAndAggregate::codegenEstimator ( std::stack< llvm::BasicBlock * > &  array_loops,
DiamondCodegen diamond_codegen,
const QueryMemoryDescriptor query_mem_desc,
const CompilationOptions co 
)
private

Definition at line 1549 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, CHECK, emitCall(), RelAlgExecutionUnit::estimator, executor_, get_int_type(), QueryMemoryDescriptor::getEffectiveKeyWidth(), LL_BUILDER, LL_CONTEXT, LL_INT, ra_exe_unit_, and ROW_FUNC.

Referenced by codegen().

1552  {
1553  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1554  const auto& estimator_arg = ra_exe_unit_.estimator->getArgument();
1555  auto estimator_comp_count_lv = LL_INT(static_cast<int32_t>(estimator_arg.size()));
1556  auto estimator_key_lv = LL_BUILDER.CreateAlloca(llvm::Type::getInt64Ty(LL_CONTEXT),
1557  estimator_comp_count_lv);
1558  int32_t subkey_idx = 0;
1559  for (const auto& estimator_arg_comp : estimator_arg) {
1560  const auto estimator_arg_comp_lvs =
1561  executor_->groupByColumnCodegen(estimator_arg_comp.get(),
1562  query_mem_desc.getEffectiveKeyWidth(),
1563  co,
1564  false,
1565  0,
1566  diamond_codegen,
1567  array_loops,
1568  true);
1569  CHECK(!estimator_arg_comp_lvs.original_value);
1570  const auto estimator_arg_comp_lv = estimator_arg_comp_lvs.translated_value;
1571  // store the sub-key to the buffer
1572  LL_BUILDER.CreateStore(estimator_arg_comp_lv,
1573  LL_BUILDER.CreateGEP(estimator_key_lv, LL_INT(subkey_idx++)));
1574  }
1575  const auto int8_ptr_ty = llvm::PointerType::get(get_int_type(8, LL_CONTEXT), 0);
1576  const auto bitmap = LL_BUILDER.CreateBitCast(&*ROW_FUNC->arg_begin(), int8_ptr_ty);
1577  const auto key_bytes = LL_BUILDER.CreateBitCast(estimator_key_lv, int8_ptr_ty);
1578  const auto estimator_comp_bytes_lv =
1579  LL_INT(static_cast<int32_t>(estimator_arg.size() * sizeof(int64_t)));
1580  const auto bitmap_size_lv =
1581  LL_INT(static_cast<uint32_t>(ra_exe_unit_.estimator->getBufferSize()));
1582  emitCall(ra_exe_unit_.estimator->getRuntimeFunctionName(),
1583  {bitmap, &*bitmap_size_lv, key_bytes, &*estimator_comp_bytes_lv});
1584 }
#define ROW_FUNC
#define LL_BUILDER
#define LL_CONTEXT
#define LL_INT(v)
llvm::Value * emitCall(const std::string &fname, const std::vector< llvm::Value * > &args)
size_t getEffectiveKeyWidth() const
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
const std::shared_ptr< Analyzer::Estimator > estimator
#define AUTOMATIC_IR_METADATA(CGENSTATE)
#define CHECK(condition)
Definition: Logger.h:203
const RelAlgExecutionUnit & ra_exe_unit_

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::tuple< llvm::Value *, llvm::Value * > GroupByAndAggregate::codegenGroupBy ( const QueryMemoryDescriptor query_mem_desc,
const CompilationOptions co,
DiamondCodegen codegen 
)
private

Definition at line 1040 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, CHECK, CHECK_EQ, codegenMultiColumnBaselineHash(), codegenMultiColumnPerfectHash(), codegenOutputSlot(), codegenSingleColumnPerfectHash(), QueryMemoryDescriptor::didOutputColumnar(), executor_, anonymous_namespace{GroupByAndAggregate.cpp}::get_expr_range_info(), QueryMemoryDescriptor::getBucket(), QueryMemoryDescriptor::getEffectiveKeyWidth(), QueryMemoryDescriptor::getGroupbyColCount(), QueryMemoryDescriptor::getMaxVal(), QueryMemoryDescriptor::getQueryDescriptionType(), QueryMemoryDescriptor::getRowSize(), RelAlgExecutionUnit::groupby_exprs, GroupByBaselineHash, GroupByPerfectHash, QueryMemoryDescriptor::hasNulls(), QueryMemoryDescriptor::isSingleColumnGroupByWithPerfectHash(), LL_BUILDER, LL_CONTEXT, LL_INT, Projection, query_infos_, ra_exe_unit_, ROW_FUNC, and QueryMemoryDescriptor::threadsShareMemory().

Referenced by codegen().

1043  {
1044  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1045  auto arg_it = ROW_FUNC->arg_begin();
1046  auto groups_buffer = arg_it++;
1047 
1048  std::stack<llvm::BasicBlock*> array_loops;
1049 
1050  // TODO(Saman): move this logic outside of this function.
1052  if (query_mem_desc.didOutputColumnar()) {
1053  return std::make_tuple(
1054  &*groups_buffer,
1055  codegenOutputSlot(&*groups_buffer, query_mem_desc, co, diamond_codegen));
1056  } else {
1057  return std::make_tuple(
1058  codegenOutputSlot(&*groups_buffer, query_mem_desc, co, diamond_codegen),
1059  nullptr);
1060  }
1061  }
1062 
1063  CHECK(query_mem_desc.getQueryDescriptionType() ==
1065  query_mem_desc.getQueryDescriptionType() ==
1067 
1068  const int32_t row_size_quad = query_mem_desc.didOutputColumnar()
1069  ? 0
1070  : query_mem_desc.getRowSize() / sizeof(int64_t);
1071 
1072  const auto col_width_size = query_mem_desc.isSingleColumnGroupByWithPerfectHash()
1073  ? sizeof(int64_t)
1074  : query_mem_desc.getEffectiveKeyWidth();
1075  // for multi-column group by
1076  llvm::Value* group_key = nullptr;
1077  llvm::Value* key_size_lv = nullptr;
1078 
1079  if (!query_mem_desc.isSingleColumnGroupByWithPerfectHash()) {
1080  key_size_lv = LL_INT(static_cast<int32_t>(query_mem_desc.getGroupbyColCount()));
1081  if (query_mem_desc.getQueryDescriptionType() ==
1083  group_key =
1084  LL_BUILDER.CreateAlloca(llvm::Type::getInt64Ty(LL_CONTEXT), key_size_lv);
1085  } else if (query_mem_desc.getQueryDescriptionType() ==
1087  group_key =
1088  col_width_size == sizeof(int32_t)
1089  ? LL_BUILDER.CreateAlloca(llvm::Type::getInt32Ty(LL_CONTEXT), key_size_lv)
1090  : LL_BUILDER.CreateAlloca(llvm::Type::getInt64Ty(LL_CONTEXT), key_size_lv);
1091  }
1092  CHECK(group_key);
1093  CHECK(key_size_lv);
1094  }
1095 
1096  int32_t subkey_idx = 0;
1097  CHECK(query_mem_desc.getGroupbyColCount() == ra_exe_unit_.groupby_exprs.size());
1098  for (const auto& group_expr : ra_exe_unit_.groupby_exprs) {
1099  const auto col_range_info =
1101  const auto translated_null_value = static_cast<int64_t>(
1102  query_mem_desc.isSingleColumnGroupByWithPerfectHash()
1103  ? checked_int64_t(query_mem_desc.getMaxVal()) +
1104  (query_mem_desc.getBucket() ? query_mem_desc.getBucket() : 1)
1105  : checked_int64_t(col_range_info.max) +
1106  (col_range_info.bucket ? col_range_info.bucket : 1));
1107 
1108  const bool col_has_nulls =
1109  query_mem_desc.getQueryDescriptionType() ==
1111  ? (query_mem_desc.isSingleColumnGroupByWithPerfectHash()
1112  ? query_mem_desc.hasNulls()
1113  : col_range_info.has_nulls)
1114  : false;
1115 
1116  const auto group_expr_lvs =
1117  executor_->groupByColumnCodegen(group_expr.get(),
1118  col_width_size,
1119  co,
1120  col_has_nulls,
1121  translated_null_value,
1122  diamond_codegen,
1123  array_loops,
1124  query_mem_desc.threadsShareMemory());
1125  const auto group_expr_lv = group_expr_lvs.translated_value;
1126  if (query_mem_desc.isSingleColumnGroupByWithPerfectHash()) {
1127  CHECK_EQ(size_t(1), ra_exe_unit_.groupby_exprs.size());
1128  return codegenSingleColumnPerfectHash(query_mem_desc,
1129  co,
1130  &*groups_buffer,
1131  group_expr_lv,
1132  group_expr_lvs.original_value,
1133  row_size_quad);
1134  } else {
1135  // store the sub-key to the buffer
1136  LL_BUILDER.CreateStore(group_expr_lv,
1137  LL_BUILDER.CreateGEP(group_key, LL_INT(subkey_idx++)));
1138  }
1139  }
1140  if (query_mem_desc.getQueryDescriptionType() ==
1142  CHECK(ra_exe_unit_.groupby_exprs.size() != 1);
1144  &*groups_buffer, group_key, key_size_lv, query_mem_desc, row_size_quad);
1145  } else if (query_mem_desc.getQueryDescriptionType() ==
1148  &*groups_buffer,
1149  group_key,
1150  key_size_lv,
1151  query_mem_desc,
1152  col_width_size,
1153  row_size_quad);
1154  }
1155  CHECK(false);
1156  return std::make_tuple(nullptr, nullptr);
1157 }
#define CHECK_EQ(x, y)
Definition: Logger.h:211
#define ROW_FUNC
ColRangeInfo get_expr_range_info(const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, const Analyzer::Expr *expr, Executor *executor)
#define LL_BUILDER
boost::multiprecision::number< boost::multiprecision::cpp_int_backend< 64, 64, boost::multiprecision::signed_magnitude, boost::multiprecision::checked, void >> checked_int64_t
#define LL_CONTEXT
#define LL_INT(v)
size_t getEffectiveKeyWidth() const
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
std::tuple< llvm::Value *, llvm::Value * > codegenMultiColumnBaselineHash(const CompilationOptions &co, llvm::Value *groups_buffer, llvm::Value *group_key, llvm::Value *key_size_lv, const QueryMemoryDescriptor &query_mem_desc, const size_t key_width, const int32_t row_size_quad)
size_t getGroupbyColCount() const
std::tuple< llvm::Value *, llvm::Value * > codegenSingleColumnPerfectHash(const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, llvm::Value *groups_buffer, llvm::Value *group_expr_lv_translated, llvm::Value *group_expr_lv_original, const int32_t row_size_quad)
#define AUTOMATIC_IR_METADATA(CGENSTATE)
std::tuple< llvm::Value *, llvm::Value * > codegenMultiColumnPerfectHash(llvm::Value *groups_buffer, llvm::Value *group_key, llvm::Value *key_size_lv, const QueryMemoryDescriptor &query_mem_desc, const int32_t row_size_quad)
QueryDescriptionType getQueryDescriptionType() const
const std::vector< InputTableInfo > & query_infos_
bool isSingleColumnGroupByWithPerfectHash() const
llvm::Value * codegenOutputSlot(llvm::Value *groups_buffer, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, DiamondCodegen &diamond_codegen)
#define CHECK(condition)
Definition: Logger.h:203
const RelAlgExecutionUnit & ra_exe_unit_

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::tuple< llvm::Value *, llvm::Value * > GroupByAndAggregate::codegenMultiColumnBaselineHash ( const CompilationOptions co,
llvm::Value *  groups_buffer,
llvm::Value *  group_key,
llvm::Value *  key_size_lv,
const QueryMemoryDescriptor query_mem_desc,
const size_t  key_width,
const int32_t  row_size_quad 
)
private

Definition at line 1254 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, CHECK, QueryMemoryDescriptor::didOutputColumnar(), emitCall(), executor_, QueryMemoryDescriptor::getEntryCount(), LL_BUILDER, LL_CONTEXT, LL_INT, and CompilationOptions::with_dynamic_watchdog.

Referenced by codegenGroupBy().

1261  {
1262  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1263  if (group_key->getType() != llvm::Type::getInt64PtrTy(LL_CONTEXT)) {
1264  CHECK(key_width == sizeof(int32_t));
1265  group_key =
1266  LL_BUILDER.CreatePointerCast(group_key, llvm::Type::getInt64PtrTy(LL_CONTEXT));
1267  }
1268  std::vector<llvm::Value*> func_args{
1269  groups_buffer,
1270  LL_INT(static_cast<int32_t>(query_mem_desc.getEntryCount())),
1271  &*group_key,
1272  &*key_size_lv,
1273  LL_INT(static_cast<int32_t>(key_width))};
1274  std::string func_name{"get_group_value"};
1275  if (query_mem_desc.didOutputColumnar()) {
1276  func_name += "_columnar_slot";
1277  } else {
1278  func_args.push_back(LL_INT(row_size_quad));
1279  }
1280  if (co.with_dynamic_watchdog) {
1281  func_name += "_with_watchdog";
1282  }
1283  if (query_mem_desc.didOutputColumnar()) {
1284  return std::make_tuple(groups_buffer, emitCall(func_name, func_args));
1285  } else {
1286  return std::make_tuple(emitCall(func_name, func_args), nullptr);
1287  }
1288 }
#define LL_BUILDER
#define LL_CONTEXT
#define LL_INT(v)
llvm::Value * emitCall(const std::string &fname, const std::vector< llvm::Value * > &args)
#define AUTOMATIC_IR_METADATA(CGENSTATE)
#define CHECK(condition)
Definition: Logger.h:203

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::tuple< llvm::Value *, llvm::Value * > GroupByAndAggregate::codegenMultiColumnPerfectHash ( llvm::Value *  groups_buffer,
llvm::Value *  group_key,
llvm::Value *  key_size_lv,
const QueryMemoryDescriptor query_mem_desc,
const int32_t  row_size_quad 
)
private

Definition at line 1210 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, CHECK, codegenPerfectHashFunction(), QueryMemoryDescriptor::didOutputColumnar(), emitCall(), executor_, get_int_type(), QueryMemoryDescriptor::getEntryCount(), QueryMemoryDescriptor::getQueryDescriptionType(), GroupByPerfectHash, QueryMemoryDescriptor::hasKeylessHash(), LL_BUILDER, LL_CONTEXT, and LL_INT.

Referenced by codegenGroupBy().

1215  {
1216  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1217  CHECK(query_mem_desc.getQueryDescriptionType() ==
1219  // compute the index (perfect hash)
1220  auto perfect_hash_func = codegenPerfectHashFunction();
1221  auto hash_lv =
1222  LL_BUILDER.CreateCall(perfect_hash_func, std::vector<llvm::Value*>{group_key});
1223 
1224  if (query_mem_desc.didOutputColumnar()) {
1225  if (!query_mem_desc.hasKeylessHash()) {
1226  const std::string set_matching_func_name{
1227  "set_matching_group_value_perfect_hash_columnar"};
1228  const std::vector<llvm::Value*> set_matching_func_arg{
1229  groups_buffer,
1230  hash_lv,
1231  group_key,
1232  key_size_lv,
1233  llvm::ConstantInt::get(get_int_type(32, LL_CONTEXT),
1234  query_mem_desc.getEntryCount())};
1235  emitCall(set_matching_func_name, set_matching_func_arg);
1236  }
1237  return std::make_tuple(groups_buffer, hash_lv);
1238  } else {
1239  if (query_mem_desc.hasKeylessHash()) {
1240  return std::make_tuple(emitCall("get_matching_group_value_perfect_hash_keyless",
1241  {groups_buffer, hash_lv, LL_INT(row_size_quad)}),
1242  nullptr);
1243  } else {
1244  return std::make_tuple(
1245  emitCall(
1246  "get_matching_group_value_perfect_hash",
1247  {groups_buffer, hash_lv, group_key, key_size_lv, LL_INT(row_size_quad)}),
1248  nullptr);
1249  }
1250  }
1251 }
#define LL_BUILDER
#define LL_CONTEXT
#define LL_INT(v)
llvm::Value * emitCall(const std::string &fname, const std::vector< llvm::Value * > &args)
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
#define AUTOMATIC_IR_METADATA(CGENSTATE)
QueryDescriptionType getQueryDescriptionType() const
llvm::Function * codegenPerfectHashFunction()
#define CHECK(condition)
Definition: Logger.h:203

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

llvm::Value * GroupByAndAggregate::codegenOutputSlot ( llvm::Value *  groups_buffer,
const QueryMemoryDescriptor query_mem_desc,
const CompilationOptions co,
DiamondCodegen diamond_codegen 
)
private

Definition at line 948 of file GroupByAndAggregate.cpp.

References run_benchmark_import::args, AUTOMATIC_IR_METADATA, CHECK, CHECK_EQ, CHECK_GE, CHECK_LT, CodeGenerator::codegen(), QueryMemoryDescriptor::didOutputColumnar(), emitCall(), executor_, get_arg_by_name(), get_heap_key_slot_index(), QueryMemoryDescriptor::getColOffInBytes(), QueryMemoryDescriptor::getEntryCount(), QueryMemoryDescriptor::getPaddedSlotWidthBytes(), QueryMemoryDescriptor::getQueryDescriptionType(), QueryMemoryDescriptor::getRowSize(), RelAlgExecutionUnit::groupby_exprs, inline_fp_null_val(), inline_int_null_val(), SortInfo::limit, LL_BOOL, LL_BUILDER, LL_FP, LL_INT, SortInfo::offset, SortInfo::order_entries, CodeGenerator::posArg(), Projection, ra_exe_unit_, ROW_FUNC, RelAlgExecutionUnit::sort_info, RelAlgExecutionUnit::target_exprs, to_string(), RelAlgExecutionUnit::use_bump_allocator, and QueryMemoryDescriptor::useStreamingTopN().

Referenced by codegenGroupBy(), and codegenWindowRowPointer().

952  {
953  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
955  CHECK_EQ(size_t(1), ra_exe_unit_.groupby_exprs.size());
956  const auto group_expr = ra_exe_unit_.groupby_exprs.front();
957  CHECK(!group_expr);
958  if (!query_mem_desc.didOutputColumnar()) {
959  CHECK_EQ(size_t(0), query_mem_desc.getRowSize() % sizeof(int64_t));
960  }
961  const int32_t row_size_quad = query_mem_desc.didOutputColumnar()
962  ? 0
963  : query_mem_desc.getRowSize() / sizeof(int64_t);
964  CodeGenerator code_generator(executor_);
965  if (query_mem_desc.useStreamingTopN()) {
966  const auto& only_order_entry = ra_exe_unit_.sort_info.order_entries.front();
967  CHECK_GE(only_order_entry.tle_no, int(1));
968  const size_t target_idx = only_order_entry.tle_no - 1;
969  CHECK_LT(target_idx, ra_exe_unit_.target_exprs.size());
970  const auto order_entry_expr = ra_exe_unit_.target_exprs[target_idx];
971  const auto chosen_bytes =
972  static_cast<size_t>(query_mem_desc.getPaddedSlotWidthBytes(target_idx));
973  auto order_entry_lv = executor_->cgen_state_->castToTypeIn(
974  code_generator.codegen(order_entry_expr, true, co).front(), chosen_bytes * 8);
976  std::string fname = "get_bin_from_k_heap";
977  const auto& oe_ti = order_entry_expr->get_type_info();
978  llvm::Value* null_key_lv = nullptr;
979  if (oe_ti.is_integer() || oe_ti.is_decimal() || oe_ti.is_time()) {
980  const size_t bit_width = order_entry_lv->getType()->getIntegerBitWidth();
981  switch (bit_width) {
982  case 32:
983  null_key_lv = LL_INT(static_cast<int32_t>(inline_int_null_val(oe_ti)));
984  break;
985  case 64:
986  null_key_lv = LL_INT(static_cast<int64_t>(inline_int_null_val(oe_ti)));
987  break;
988  default:
989  CHECK(false);
990  }
991  fname += "_int" + std::to_string(bit_width) + "_t";
992  } else {
993  CHECK(oe_ti.is_fp());
994  if (order_entry_lv->getType()->isDoubleTy()) {
995  null_key_lv = LL_FP(static_cast<double>(inline_fp_null_val(oe_ti)));
996  } else {
997  null_key_lv = LL_FP(static_cast<float>(inline_fp_null_val(oe_ti)));
998  }
999  fname += order_entry_lv->getType()->isDoubleTy() ? "_double" : "_float";
1000  }
1001  const auto key_slot_idx =
1003  return emitCall(
1004  fname,
1005  {groups_buffer,
1006  LL_INT(n),
1007  LL_INT(row_size_quad),
1008  LL_INT(static_cast<uint32_t>(query_mem_desc.getColOffInBytes(key_slot_idx))),
1009  LL_BOOL(only_order_entry.is_desc),
1010  LL_BOOL(!order_entry_expr->get_type_info().get_notnull()),
1011  LL_BOOL(only_order_entry.nulls_first),
1012  null_key_lv,
1013  order_entry_lv});
1014  } else {
1015  llvm::Value* output_buffer_entry_count_lv{nullptr};
1017  output_buffer_entry_count_lv =
1018  LL_BUILDER.CreateLoad(get_arg_by_name(ROW_FUNC, "max_matched"));
1019  CHECK(output_buffer_entry_count_lv);
1020  }
1021  const auto group_expr_lv =
1022  LL_BUILDER.CreateLoad(get_arg_by_name(ROW_FUNC, "old_total_matched"));
1023  std::vector<llvm::Value*> args{
1024  groups_buffer,
1025  output_buffer_entry_count_lv
1026  ? output_buffer_entry_count_lv
1027  : LL_INT(static_cast<int32_t>(query_mem_desc.getEntryCount())),
1028  group_expr_lv,
1029  code_generator.posArg(nullptr)};
1030  if (query_mem_desc.didOutputColumnar()) {
1031  const auto columnar_output_offset =
1032  emitCall("get_columnar_scan_output_offset", args);
1033  return columnar_output_offset;
1034  }
1035  args.push_back(LL_INT(row_size_quad));
1036  return emitCall("get_scan_output_slot", args);
1037  }
1038 }
std::vector< Analyzer::Expr * > target_exprs
#define CHECK_EQ(x, y)
Definition: Logger.h:211
#define ROW_FUNC
#define LL_BUILDER
const std::list< Analyzer::OrderEntry > order_entries
#define LL_INT(v)
#define CHECK_GE(x, y)
Definition: Logger.h:216
llvm::Value * emitCall(const std::string &fname, const std::vector< llvm::Value * > &args)
size_t get_heap_key_slot_index(const std::vector< Analyzer::Expr * > &target_exprs, const size_t target_idx)
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
double inline_fp_null_val(const SQL_TYPE_INFO &ti)
std::string to_string(char const *&&v)
#define LL_BOOL(v)
const size_t limit
llvm::Value * get_arg_by_name(llvm::Function *func, const std::string &name)
Definition: Execute.h:167
const SortInfo sort_info
#define LL_FP(v)
const int8_t getPaddedSlotWidthBytes(const size_t slot_idx) const
#define AUTOMATIC_IR_METADATA(CGENSTATE)
QueryDescriptionType getQueryDescriptionType() const
#define CHECK_LT(x, y)
Definition: Logger.h:213
#define CHECK(condition)
Definition: Logger.h:203
int64_t inline_int_null_val(const SQL_TYPE_INFO &ti)
const RelAlgExecutionUnit & ra_exe_unit_
const size_t offset
size_t getColOffInBytes(const size_t col_idx) const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

llvm::Function * GroupByAndAggregate::codegenPerfectHashFunction ( )
private

Definition at line 1290 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, CHECK, CHECK_GT, executor_, anonymous_namespace{GroupByAndAggregate.cpp}::get_expr_range_info(), get_int_type(), getBucketedCardinality(), RelAlgExecutionUnit::groupby_exprs, GroupByPerfectHash, LL_CONTEXT, LL_INT, mark_function_always_inline(), query_infos_, and ra_exe_unit_.

Referenced by codegenMultiColumnPerfectHash().

1290  {
1291  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1292  CHECK_GT(ra_exe_unit_.groupby_exprs.size(), size_t(1));
1293  auto ft = llvm::FunctionType::get(
1294  get_int_type(32, LL_CONTEXT),
1295  std::vector<llvm::Type*>{llvm::PointerType::get(get_int_type(64, LL_CONTEXT), 0)},
1296  false);
1297  auto key_hash_func = llvm::Function::Create(ft,
1298  llvm::Function::ExternalLinkage,
1299  "perfect_key_hash",
1300  executor_->cgen_state_->module_);
1301  executor_->cgen_state_->helper_functions_.push_back(key_hash_func);
1302  mark_function_always_inline(key_hash_func);
1303  auto& key_buff_arg = *key_hash_func->args().begin();
1304  llvm::Value* key_buff_lv = &key_buff_arg;
1305  auto bb = llvm::BasicBlock::Create(LL_CONTEXT, "entry", key_hash_func);
1306  llvm::IRBuilder<> key_hash_func_builder(bb);
1307  llvm::Value* hash_lv{llvm::ConstantInt::get(get_int_type(64, LL_CONTEXT), 0)};
1308  std::vector<int64_t> cardinalities;
1309  for (const auto& groupby_expr : ra_exe_unit_.groupby_exprs) {
1310  auto col_range_info =
1311  get_expr_range_info(ra_exe_unit_, query_infos_, groupby_expr.get(), executor_);
1312  CHECK(col_range_info.hash_type_ == QueryDescriptionType::GroupByPerfectHash);
1313  cardinalities.push_back(getBucketedCardinality(col_range_info));
1314  }
1315  size_t dim_idx = 0;
1316  for (const auto& groupby_expr : ra_exe_unit_.groupby_exprs) {
1317  auto key_comp_lv = key_hash_func_builder.CreateLoad(
1318  key_hash_func_builder.CreateGEP(key_buff_lv, LL_INT(dim_idx)));
1319  auto col_range_info =
1320  get_expr_range_info(ra_exe_unit_, query_infos_, groupby_expr.get(), executor_);
1321  auto crt_term_lv =
1322  key_hash_func_builder.CreateSub(key_comp_lv, LL_INT(col_range_info.min));
1323  if (col_range_info.bucket) {
1324  crt_term_lv =
1325  key_hash_func_builder.CreateSDiv(crt_term_lv, LL_INT(col_range_info.bucket));
1326  }
1327  for (size_t prev_dim_idx = 0; prev_dim_idx < dim_idx; ++prev_dim_idx) {
1328  crt_term_lv = key_hash_func_builder.CreateMul(crt_term_lv,
1329  LL_INT(cardinalities[prev_dim_idx]));
1330  }
1331  hash_lv = key_hash_func_builder.CreateAdd(hash_lv, crt_term_lv);
1332  ++dim_idx;
1333  }
1334  key_hash_func_builder.CreateRet(
1335  key_hash_func_builder.CreateTrunc(hash_lv, get_int_type(32, LL_CONTEXT)));
1336  return key_hash_func;
1337 }
static int64_t getBucketedCardinality(const ColRangeInfo &col_range_info)
ColRangeInfo get_expr_range_info(const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, const Analyzer::Expr *expr, Executor *executor)
#define LL_CONTEXT
void mark_function_always_inline(llvm::Function *func)
#define LL_INT(v)
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
#define CHECK_GT(x, y)
Definition: Logger.h:215
#define AUTOMATIC_IR_METADATA(CGENSTATE)
const std::vector< InputTableInfo > & query_infos_
#define CHECK(condition)
Definition: Logger.h:203
const RelAlgExecutionUnit & ra_exe_unit_

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::tuple< llvm::Value *, llvm::Value * > GroupByAndAggregate::codegenSingleColumnPerfectHash ( const QueryMemoryDescriptor query_mem_desc,
const CompilationOptions co,
llvm::Value *  groups_buffer,
llvm::Value *  group_expr_lv_translated,
llvm::Value *  group_expr_lv_original,
const int32_t  row_size_quad 
)
private

Definition at line 1160 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, CHECK, CompilationOptions::device_type, QueryMemoryDescriptor::didOutputColumnar(), emitCall(), executor_, QueryMemoryDescriptor::getBucket(), QueryMemoryDescriptor::getMinVal(), QueryMemoryDescriptor::hasKeylessHash(), QueryMemoryDescriptor::interleavedBins(), LL_INT, QueryMemoryDescriptor::mustUseBaselineSort(), and QueryMemoryDescriptor::usesGetGroupValueFast().

Referenced by codegenGroupBy().

1166  {
1167  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1168  CHECK(query_mem_desc.usesGetGroupValueFast());
1169  std::string get_group_fn_name{query_mem_desc.didOutputColumnar()
1170  ? "get_columnar_group_bin_offset"
1171  : "get_group_value_fast"};
1172  if (!query_mem_desc.didOutputColumnar() && query_mem_desc.hasKeylessHash()) {
1173  get_group_fn_name += "_keyless";
1174  }
1175  if (query_mem_desc.interleavedBins(co.device_type)) {
1176  CHECK(!query_mem_desc.didOutputColumnar());
1177  CHECK(query_mem_desc.hasKeylessHash());
1178  get_group_fn_name += "_semiprivate";
1179  }
1180  std::vector<llvm::Value*> get_group_fn_args{&*groups_buffer,
1181  &*group_expr_lv_translated};
1182  if (group_expr_lv_original && get_group_fn_name == "get_group_value_fast" &&
1183  query_mem_desc.mustUseBaselineSort()) {
1184  get_group_fn_name += "_with_original_key";
1185  get_group_fn_args.push_back(group_expr_lv_original);
1186  }
1187  get_group_fn_args.push_back(LL_INT(query_mem_desc.getMinVal()));
1188  get_group_fn_args.push_back(LL_INT(query_mem_desc.getBucket()));
1189  if (!query_mem_desc.hasKeylessHash()) {
1190  if (!query_mem_desc.didOutputColumnar()) {
1191  get_group_fn_args.push_back(LL_INT(row_size_quad));
1192  }
1193  } else {
1194  if (!query_mem_desc.didOutputColumnar()) {
1195  get_group_fn_args.push_back(LL_INT(row_size_quad));
1196  }
1197  if (query_mem_desc.interleavedBins(co.device_type)) {
1198  auto warp_idx = emitCall("thread_warp_idx", {LL_INT(executor_->warpSize())});
1199  get_group_fn_args.push_back(warp_idx);
1200  get_group_fn_args.push_back(LL_INT(executor_->warpSize()));
1201  }
1202  }
1203  if (get_group_fn_name == "get_columnar_group_bin_offset") {
1204  return std::make_tuple(&*groups_buffer,
1205  emitCall(get_group_fn_name, get_group_fn_args));
1206  }
1207  return std::make_tuple(emitCall(get_group_fn_name, get_group_fn_args), nullptr);
1208 }
#define LL_INT(v)
llvm::Value * emitCall(const std::string &fname, const std::vector< llvm::Value * > &args)
#define AUTOMATIC_IR_METADATA(CGENSTATE)
ExecutorDeviceType device_type
bool interleavedBins(const ExecutorDeviceType) const
#define CHECK(condition)
Definition: Logger.h:203

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

llvm::Value * GroupByAndAggregate::codegenWindowRowPointer ( const Analyzer::WindowFunction window_func,
const QueryMemoryDescriptor query_mem_desc,
const CompilationOptions co,
DiamondCodegen diamond_codegen 
)
private

Definition at line 1390 of file GroupByAndAggregate.cpp.

References run_benchmark_import::args, AUTOMATIC_IR_METADATA, codegenOutputSlot(), CodeGenerator::codegenWindowPosition(), QueryMemoryDescriptor::didOutputColumnar(), emitCall(), executor_, get_int_type(), WindowProjectNodeContext::getActiveWindowFunctionContext(), QueryMemoryDescriptor::getEntryCount(), Analyzer::WindowFunction::getKind(), QueryMemoryDescriptor::getRowSize(), LL_BUILDER, LL_CONTEXT, LL_INT, CodeGenerator::posArg(), ROW_FUNC, and window_function_is_aggregate().

Referenced by TargetExprCodegen::codegen().

1394  {
1395  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1396  const auto window_func_context =
1398  if (window_func_context && window_function_is_aggregate(window_func->getKind())) {
1399  const int32_t row_size_quad = query_mem_desc.didOutputColumnar()
1400  ? 0
1401  : query_mem_desc.getRowSize() / sizeof(int64_t);
1402  auto arg_it = ROW_FUNC->arg_begin();
1403  auto groups_buffer = arg_it++;
1404  CodeGenerator code_generator(executor_);
1405  auto window_pos_lv = code_generator.codegenWindowPosition(
1406  window_func_context, code_generator.posArg(nullptr));
1407  const auto pos_in_window =
1408  LL_BUILDER.CreateTrunc(window_pos_lv, get_int_type(32, LL_CONTEXT));
1409  llvm::Value* entry_count_lv =
1410  LL_INT(static_cast<int32_t>(query_mem_desc.getEntryCount()));
1411  std::vector<llvm::Value*> args{
1412  &*groups_buffer, entry_count_lv, pos_in_window, code_generator.posArg(nullptr)};
1413  if (query_mem_desc.didOutputColumnar()) {
1414  const auto columnar_output_offset =
1415  emitCall("get_columnar_scan_output_offset", args);
1416  return LL_BUILDER.CreateSExt(columnar_output_offset, get_int_type(64, LL_CONTEXT));
1417  }
1418  args.push_back(LL_INT(row_size_quad));
1419  return emitCall("get_scan_output_slot", args);
1420  }
1421  auto arg_it = ROW_FUNC->arg_begin();
1422  auto groups_buffer = arg_it++;
1423  return codegenOutputSlot(&*groups_buffer, query_mem_desc, co, diamond_codegen);
1424 }
SqlWindowFunctionKind getKind() const
Definition: Analyzer.h:1447
#define ROW_FUNC
#define LL_BUILDER
#define LL_CONTEXT
#define LL_INT(v)
llvm::Value * emitCall(const std::string &fname, const std::vector< llvm::Value * > &args)
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
static WindowFunctionContext * getActiveWindowFunctionContext(Executor *executor)
#define AUTOMATIC_IR_METADATA(CGENSTATE)
bool window_function_is_aggregate(const SqlWindowFunctionKind kind)
Definition: WindowContext.h:42
llvm::Value * codegenOutputSlot(llvm::Value *groups_buffer, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, DiamondCodegen &diamond_codegen)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

llvm::Value * GroupByAndAggregate::convertNullIfAny ( const SQLTypeInfo arg_type,
const TargetInfo agg_info,
llvm::Value *  target 
)
private

Definition at line 1339 of file GroupByAndAggregate.cpp.

References TargetInfo::agg_kind, AUTOMATIC_IR_METADATA, CHECK, executor_, SQLTypeInfo::get_size(), SQLTypeInfo::is_fp(), kAPPROX_COUNT_DISTINCT, kCOUNT, LL_BUILDER, and TargetInfo::sql_type.

Referenced by TargetExprCodegen::codegenAggregate().

1341  {
1342  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1343  const auto& agg_type = agg_info.sql_type;
1344  const size_t chosen_bytes = agg_type.get_size();
1345 
1346  bool need_conversion{false};
1347  llvm::Value* arg_null{nullptr};
1348  llvm::Value* agg_null{nullptr};
1349  llvm::Value* target_to_cast{target};
1350  if (arg_type.is_fp()) {
1351  arg_null = executor_->cgen_state_->inlineFpNull(arg_type);
1352  if (agg_type.is_fp()) {
1353  agg_null = executor_->cgen_state_->inlineFpNull(agg_type);
1354  if (!static_cast<llvm::ConstantFP*>(arg_null)->isExactlyValue(
1355  static_cast<llvm::ConstantFP*>(agg_null)->getValueAPF())) {
1356  need_conversion = true;
1357  }
1358  } else {
1359  CHECK(agg_info.agg_kind == kCOUNT || agg_info.agg_kind == kAPPROX_COUNT_DISTINCT);
1360  return target;
1361  }
1362  } else {
1363  arg_null = executor_->cgen_state_->inlineIntNull(arg_type);
1364  if (agg_type.is_fp()) {
1365  agg_null = executor_->cgen_state_->inlineFpNull(agg_type);
1366  need_conversion = true;
1367  target_to_cast = executor_->castToFP(target, arg_type, agg_type);
1368  } else {
1369  agg_null = executor_->cgen_state_->inlineIntNull(agg_type);
1370  if ((static_cast<llvm::ConstantInt*>(arg_null)->getBitWidth() !=
1371  static_cast<llvm::ConstantInt*>(agg_null)->getBitWidth()) ||
1372  (static_cast<llvm::ConstantInt*>(arg_null)->getValue() !=
1373  static_cast<llvm::ConstantInt*>(agg_null)->getValue())) {
1374  need_conversion = true;
1375  }
1376  }
1377  }
1378  if (need_conversion) {
1379  auto cmp = arg_type.is_fp() ? LL_BUILDER.CreateFCmpOEQ(target, arg_null)
1380  : LL_BUILDER.CreateICmpEQ(target, arg_null);
1381  return LL_BUILDER.CreateSelect(
1382  cmp,
1383  agg_null,
1384  executor_->cgen_state_->castToTypeIn(target_to_cast, chosen_bytes << 3));
1385  } else {
1386  return target;
1387  }
1388 }
HOST DEVICE int get_size() const
Definition: sqltypes.h:324
#define LL_BUILDER
SQLTypeInfo sql_type
Definition: TargetInfo.h:42
bool is_fp() const
Definition: sqltypes.h:493
#define AUTOMATIC_IR_METADATA(CGENSTATE)
SQLAgg agg_kind
Definition: TargetInfo.h:41
Definition: sqldefs.h:76
#define CHECK(condition)
Definition: Logger.h:203

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

llvm::Value * GroupByAndAggregate::emitCall ( const std::string &  fname,
const std::vector< llvm::Value * > &  args 
)
private

Definition at line 1896 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, and executor_.

Referenced by TargetExprCodegen::codegen(), TargetExprCodegen::codegenAggregate(), codegenCountDistinct(), codegenEstimator(), codegenMultiColumnBaselineHash(), codegenMultiColumnPerfectHash(), codegenOutputSlot(), codegenSingleColumnPerfectHash(), and codegenWindowRowPointer().

1897  {
1898  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1899  return executor_->cgen_state_->emitCall(fname, args);
1900 }
#define AUTOMATIC_IR_METADATA(CGENSTATE)

+ Here is the caller graph for this function:

llvm::Value * GroupByAndAggregate::getAdditionalLiteral ( const int32_t  off)
private

Definition at line 1713 of file GroupByAndAggregate.cpp.

References CHECK_LT, get_arg_by_name(), get_int_type(), LL_BUILDER, LL_CONTEXT, LL_INT, and ROW_FUNC.

Referenced by codegenCountDistinct().

1713  {
1714  CHECK_LT(off, 0);
1715  const auto lit_buff_lv = get_arg_by_name(ROW_FUNC, "literals");
1716  return LL_BUILDER.CreateLoad(LL_BUILDER.CreateGEP(
1717  LL_BUILDER.CreateBitCast(lit_buff_lv,
1718  llvm::PointerType::get(get_int_type(64, LL_CONTEXT), 0)),
1719  LL_INT(off)));
1720 }
#define ROW_FUNC
#define LL_BUILDER
#define LL_CONTEXT
#define LL_INT(v)
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
llvm::Value * get_arg_by_name(llvm::Function *func, const std::string &name)
Definition: Execute.h:167
#define CHECK_LT(x, y)
Definition: Logger.h:213

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

int64_t GroupByAndAggregate::getBucketedCardinality ( const ColRangeInfo col_range_info)
staticprivate

Definition at line 298 of file GroupByAndAggregate.cpp.

References ColRangeInfo::bucket, ColRangeInfo::has_nulls, ColRangeInfo::max, and ColRangeInfo::min.

Referenced by codegenPerfectHashFunction(), and getColRangeInfo().

298  {
299  checked_int64_t crt_col_cardinality =
300  checked_int64_t(col_range_info.max) - checked_int64_t(col_range_info.min);
301  if (col_range_info.bucket) {
302  crt_col_cardinality /= col_range_info.bucket;
303  }
304  return static_cast<int64_t>(crt_col_cardinality +
305  (1 + (col_range_info.has_nulls ? 1 : 0)));
306 }
boost::multiprecision::number< boost::multiprecision::cpp_int_backend< 64, 64, boost::multiprecision::signed_magnitude, boost::multiprecision::checked, void >> checked_int64_t

+ Here is the caller graph for this function:

ColRangeInfo GroupByAndAggregate::getColRangeInfo ( )
private

Definition at line 175 of file GroupByAndAggregate.cpp.

References Executor::baseline_threshold, anonymous_namespace{GroupByAndAggregate.cpp}::cardinality_estimate_less_than_column_range(), CHECK, CHECK_GE, device_type_, executor_, anonymous_namespace{GroupByAndAggregate.cpp}::expr_is_rowid(), anonymous_namespace{GroupByAndAggregate.cpp}::get_expr_range_info(), getBucketedCardinality(), GPU, group_cardinality_estimation_, RelAlgExecutionUnit::groupby_exprs, GroupByBaselineHash, GroupByPerfectHash, anonymous_namespace{GroupByAndAggregate.cpp}::has_count_distinct(), anonymous_namespace{GroupByAndAggregate.cpp}::is_column_range_too_big_for_perfect_hash(), kENCODING_DICT, SortInfo::order_entries, RelAlgExecutionUnit::quals, query_infos_, ra_exe_unit_, RelAlgExecutionUnit::simple_quals, RelAlgExecutionUnit::sort_info, and RelAlgExecutionUnit::target_exprs.

Referenced by initQueryMemoryDescriptorImpl().

175  {
176  // Use baseline layout more eagerly on the GPU if the query uses count distinct,
177  // because our HyperLogLog implementation is 4x less memory efficient on GPU.
178  // Technically, this only applies to APPROX_COUNT_DISTINCT, but in practice we
179  // can expect this to be true anyway for grouped queries since the precise version
180  // uses significantly more memory.
181  const int64_t baseline_threshold =
186  if (ra_exe_unit_.groupby_exprs.size() != 1) {
187  try {
188  checked_int64_t cardinality{1};
189  bool has_nulls{false};
190  for (const auto& groupby_expr : ra_exe_unit_.groupby_exprs) {
191  auto col_range_info = get_expr_range_info(
192  ra_exe_unit_, query_infos_, groupby_expr.get(), executor_);
193  if (col_range_info.hash_type_ != QueryDescriptionType::GroupByPerfectHash) {
194  // going through baseline hash if a non-integer type is encountered
195  return {QueryDescriptionType::GroupByBaselineHash, 0, 0, 0, false};
196  }
197  auto crt_col_cardinality = getBucketedCardinality(col_range_info);
198  CHECK_GE(crt_col_cardinality, 0);
199  cardinality *= crt_col_cardinality;
200  if (col_range_info.has_nulls) {
201  has_nulls = true;
202  }
203  }
204  // For zero or high cardinalities, use baseline layout.
205  if (!cardinality || cardinality > baseline_threshold) {
206  return {QueryDescriptionType::GroupByBaselineHash, 0, 0, 0, false};
207  }
209  0,
210  int64_t(cardinality),
211  0,
212  has_nulls};
213  } catch (...) { // overflow when computing cardinality
214  return {QueryDescriptionType::GroupByBaselineHash, 0, 0, 0, false};
215  }
216  }
217  // For single column groupby on high timestamps, force baseline hash due to wide ranges
218  // we are likely to encounter when applying quals to the expression range
219  // TODO: consider allowing TIMESTAMP(9) (nanoseconds) with quals to use perfect hash if
220  // the range is small enough
221  if (ra_exe_unit_.groupby_exprs.front() &&
222  ra_exe_unit_.groupby_exprs.front()->get_type_info().is_high_precision_timestamp() &&
223  ra_exe_unit_.simple_quals.size() > 0) {
224  return {QueryDescriptionType::GroupByBaselineHash, 0, 0, 0, false};
225  }
226  const auto col_range_info = get_expr_range_info(
228  if (!ra_exe_unit_.groupby_exprs.front()) {
229  return col_range_info;
230  }
231  static const int64_t MAX_BUFFER_SIZE = 1 << 30;
232  const int64_t col_count =
234  int64_t max_entry_count = MAX_BUFFER_SIZE / (col_count * sizeof(int64_t));
236  max_entry_count = std::min(max_entry_count, baseline_threshold);
237  }
238  const auto& groupby_expr_ti = ra_exe_unit_.groupby_exprs.front()->get_type_info();
239  if (groupby_expr_ti.is_string() && !col_range_info.bucket) {
240  CHECK(groupby_expr_ti.get_compression() == kENCODING_DICT);
241 
242  const bool has_filters =
243  !ra_exe_unit_.quals.empty() || !ra_exe_unit_.simple_quals.empty();
244  if (has_filters &&
245  is_column_range_too_big_for_perfect_hash(col_range_info, max_entry_count)) {
246  // if filters are present, we can use the filter to narrow the cardinality of the
247  // group by in the case of ranges too big for perfect hash. Otherwise, we are better
248  // off attempting perfect hash (since we know the range will be made of
249  // monotonically increasing numbers from min to max for dictionary encoded strings)
250  // and failing later due to excessive memory use.
251  // Check the conditions where baseline hash can provide a performance increase and
252  // return baseline hash (potentially forcing an estimator query) as the range type.
253  // Otherwise, return col_range_info which will likely be perfect hash, though could
254  // be baseline from a previous call of this function prior to the estimator query.
255  if (!ra_exe_unit_.sort_info.order_entries.empty()) {
256  // TODO(adb): allow some sorts to pass through this block by centralizing sort
257  // algorithm decision making
259  is_column_range_too_big_for_perfect_hash(col_range_info, max_entry_count)) {
260  // always use baseline hash for column range too big for perfect hash with count
261  // distinct descriptors. We will need 8GB of CPU memory minimum for the perfect
262  // hash group by in this case.
264  col_range_info.min,
265  col_range_info.max,
266  0,
267  col_range_info.has_nulls};
268  } else {
269  // use original col range for sort
270  return col_range_info;
271  }
272  }
273  // if filters are present and the filtered range is less than the cardinality of
274  // the column, consider baseline hash
277  col_range_info)) {
279  col_range_info.min,
280  col_range_info.max,
281  0,
282  col_range_info.has_nulls};
283  }
284  }
285  } else if ((!expr_is_rowid(ra_exe_unit_.groupby_exprs.front().get(),
286  *executor_->catalog_)) &&
287  is_column_range_too_big_for_perfect_hash(col_range_info, max_entry_count) &&
288  !col_range_info.bucket) {
290  col_range_info.min,
291  col_range_info.max,
292  0,
293  col_range_info.has_nulls};
294  }
295  return col_range_info;
296 }
std::vector< Analyzer::Expr * > target_exprs
static int64_t getBucketedCardinality(const ColRangeInfo &col_range_info)
ColRangeInfo get_expr_range_info(const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, const Analyzer::Expr *expr, Executor *executor)
boost::multiprecision::number< boost::multiprecision::cpp_int_backend< 64, 64, boost::multiprecision::signed_magnitude, boost::multiprecision::checked, void >> checked_int64_t
bool is_column_range_too_big_for_perfect_hash(const ColRangeInfo &col_range_info, const int64_t max_entry_count)
bool expr_is_rowid(const Analyzer::Expr *expr, const Catalog_Namespace::Catalog &cat)
const std::list< Analyzer::OrderEntry > order_entries
static const size_t baseline_threshold
Definition: Execute.h:1046
#define CHECK_GE(x, y)
Definition: Logger.h:216
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
bool has_count_distinct(const RelAlgExecutionUnit &ra_exe_unit)
const SortInfo sort_info
const std::vector< InputTableInfo > & query_infos_
const ExecutorDeviceType device_type_
bool cardinality_estimate_less_than_column_range(const int64_t cardinality_estimate, const ColRangeInfo &col_range_info)
const std::optional< int64_t > group_cardinality_estimation_
std::list< std::shared_ptr< Analyzer::Expr > > quals
#define CHECK(condition)
Definition: Logger.h:203
const RelAlgExecutionUnit & ra_exe_unit_
std::list< std::shared_ptr< Analyzer::Expr > > simple_quals

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

int64_t GroupByAndAggregate::getShardedTopBucket ( const ColRangeInfo col_range_info,
const size_t  shard_count 
) const
private

Definition at line 347 of file GroupByAndAggregate.cpp.

References ColRangeInfo::bucket, CHECK, CHECK_GT, device_type_, executor_, g_leaf_count, and GPU.

Referenced by initQueryMemoryDescriptorImpl().

348  {
349  size_t device_count{0};
351  auto cuda_mgr = executor_->getCatalog()->getDataMgr().getCudaMgr();
352  CHECK(cuda_mgr);
353  device_count = executor_->getCatalog()->getDataMgr().getCudaMgr()->getDeviceCount();
354  CHECK_GT(device_count, 0u);
355  }
356 
357  int64_t bucket{col_range_info.bucket};
358 
359  if (shard_count) {
360  CHECK(!col_range_info.bucket);
361  /*
362  when a node has fewer devices than shard count,
363  a) In a distributed setup, the minimum distance between two keys would be
364  device_count because shards are stored consecutively across the physical tables,
365  i.e if a shard column has values 0 to 9, and 3 shards on each leaf, then node 1
366  would have values: 0,1,2,6,7,8 and node 2 would have values: 3,4,5,9. If each leaf
367  node has only 1 device, in this case, all the keys from each node are loaded on
368  the device each.
369 
370  b) In a single node setup, the distance would be minimum of device_count or
371  difference of device_count - shard_count. For example: If a single node server
372  running on 3 devices a shard column has values 0 to 9 in a table with 4 shards,
373  device to fragment keys mapping would be: device 1 - 4,8,3,7 device 2 - 1,5,9
374  device 3 - 2, 6 The bucket value would be 4(shards) - 3(devices) = 1 i.e. minimum
375  of device_count or difference.
376 
377  When a node has device count equal to or more than shard count then the
378  minimum distance is always at least shard_count * no of leaf nodes.
379  */
380  if (device_count < shard_count) {
381  bucket = g_leaf_count ? std::max(device_count, static_cast<size_t>(1))
382  : std::min(device_count, shard_count - device_count);
383  } else {
384  bucket = shard_count * std::max(g_leaf_count, static_cast<size_t>(1));
385  }
386  }
387 
388  return bucket;
389 }
#define CHECK_GT(x, y)
Definition: Logger.h:215
const ExecutorDeviceType device_type_
#define CHECK(condition)
Definition: Logger.h:203
size_t g_leaf_count
Definition: ParserNode.cpp:76

+ Here is the caller graph for this function:

bool GroupByAndAggregate::gpuCanHandleOrderEntries ( const std::list< Analyzer::OrderEntry > &  order_entries)
private

Definition at line 783 of file GroupByAndAggregate.cpp.

References CHECK, CHECK_GE, CHECK_LE, executor_, Analyzer::AggExpr::get_arg(), anonymous_namespace{GroupByAndAggregate.cpp}::get_expr_range_info(), Analyzer::Expr::get_type_info(), GroupByPerfectHash, kAPPROX_COUNT_DISTINCT, kAVG, kMAX, kMIN, query_infos_, ra_exe_unit_, and RelAlgExecutionUnit::target_exprs.

Referenced by initQueryMemoryDescriptor().

784  {
785  if (order_entries.size() > 1) { // TODO(alex): lift this restriction
786  return false;
787  }
788  for (const auto& order_entry : order_entries) {
789  CHECK_GE(order_entry.tle_no, 1);
790  CHECK_LE(static_cast<size_t>(order_entry.tle_no), ra_exe_unit_.target_exprs.size());
791  const auto target_expr = ra_exe_unit_.target_exprs[order_entry.tle_no - 1];
792  if (!dynamic_cast<Analyzer::AggExpr*>(target_expr)) {
793  return false;
794  }
795  // TODO(alex): relax the restrictions
796  auto agg_expr = static_cast<Analyzer::AggExpr*>(target_expr);
797  if (agg_expr->get_is_distinct() || agg_expr->get_aggtype() == kAVG ||
798  agg_expr->get_aggtype() == kMIN || agg_expr->get_aggtype() == kMAX ||
799  agg_expr->get_aggtype() == kAPPROX_COUNT_DISTINCT) {
800  return false;
801  }
802  if (agg_expr->get_arg()) {
803  const auto& arg_ti = agg_expr->get_arg()->get_type_info();
804  if (arg_ti.is_fp()) {
805  return false;
806  }
807  auto expr_range_info =
808  get_expr_range_info(ra_exe_unit_, query_infos_, agg_expr->get_arg(), executor_);
809  // TOD(adb): QMD not actually initialized here?
810  if ((!(expr_range_info.hash_type_ == QueryDescriptionType::GroupByPerfectHash &&
811  /* query_mem_desc.getGroupbyColCount() == 1 */ false) ||
812  expr_range_info.has_nulls) &&
813  order_entry.is_desc == order_entry.nulls_first) {
814  return false;
815  }
816  }
817  const auto& target_ti = target_expr->get_type_info();
818  CHECK(!target_ti.is_buffer());
819  if (!target_ti.is_integer()) {
820  return false;
821  }
822  }
823  return true;
824 }
std::vector< Analyzer::Expr * > target_exprs
ColRangeInfo get_expr_range_info(const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, const Analyzer::Expr *expr, Executor *executor)
#define CHECK_GE(x, y)
Definition: Logger.h:216
Expr * get_arg() const
Definition: Analyzer.h:1096
Definition: sqldefs.h:73
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:78
const std::vector< InputTableInfo > & query_infos_
#define CHECK_LE(x, y)
Definition: Logger.h:214
#define CHECK(condition)
Definition: Logger.h:203
const RelAlgExecutionUnit & ra_exe_unit_
Definition: sqldefs.h:74
Definition: sqldefs.h:72

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::unique_ptr< QueryMemoryDescriptor > GroupByAndAggregate::initQueryMemoryDescriptor ( const bool  allow_multifrag,
const size_t  max_groups_buffer_entry_count,
const int8_t  crt_min_byte_width,
RenderInfo render_info,
const bool  output_columnar_hint 
)
private

Definition at line 658 of file GroupByAndAggregate.cpp.

References align_to_int64(), CHECK, device_type_, executor_, GPU, gpuCanHandleOrderEntries(), initQueryMemoryDescriptorImpl(), SortInfo::order_entries, query_mem_desc, ra_exe_unit_, shard_count_for_top_groups(), and RelAlgExecutionUnit::sort_info.

663  {
664  const auto shard_count =
667  : 0;
668  bool sort_on_gpu_hint =
669  device_type_ == ExecutorDeviceType::GPU && allow_multifrag &&
672  // must_use_baseline_sort is true iff we'd sort on GPU with the old algorithm
673  // but the total output buffer size would be too big or it's a sharded top query.
674  // For the sake of managing risk, use the new result set way very selectively for
675  // this case only (alongside the baseline layout we've enabled for a while now).
676  bool must_use_baseline_sort = shard_count;
677  std::unique_ptr<QueryMemoryDescriptor> query_mem_desc;
678  while (true) {
679  query_mem_desc = initQueryMemoryDescriptorImpl(allow_multifrag,
680  max_groups_buffer_entry_count,
681  crt_min_byte_width,
682  sort_on_gpu_hint,
683  render_info,
684  must_use_baseline_sort,
685  output_columnar_hint);
686  CHECK(query_mem_desc);
687  if (query_mem_desc->sortOnGpu() &&
688  (query_mem_desc->getBufferSizeBytes(device_type_) +
689  align_to_int64(query_mem_desc->getEntryCount() * sizeof(int32_t))) >
690  2 * 1024 * 1024 * 1024LL) {
691  must_use_baseline_sort = true;
692  sort_on_gpu_hint = false;
693  } else {
694  break;
695  }
696  }
697  return query_mem_desc;
698 }
bool gpuCanHandleOrderEntries(const std::list< Analyzer::OrderEntry > &order_entries)
std::unique_ptr< QueryMemoryDescriptor > initQueryMemoryDescriptorImpl(const bool allow_multifrag, const size_t max_groups_buffer_entry_count, const int8_t crt_min_byte_width, const bool sort_on_gpu_hint, RenderInfo *render_info, const bool must_use_baseline_sort, const bool output_columnar_hint)
const std::list< Analyzer::OrderEntry > order_entries
const SortInfo sort_info
const ExecutorDeviceType device_type_
#define CHECK(condition)
Definition: Logger.h:203
const RelAlgExecutionUnit & ra_exe_unit_
FORCE_INLINE HOST DEVICE T align_to_int64(T addr)
static size_t shard_count_for_top_groups(const RelAlgExecutionUnit &ra_exe_unit, const Catalog_Namespace::Catalog &catalog)

+ Here is the call graph for this function:

std::unique_ptr< QueryMemoryDescriptor > GroupByAndAggregate::initQueryMemoryDescriptorImpl ( const bool  allow_multifrag,
const size_t  max_groups_buffer_entry_count,
const int8_t  crt_min_byte_width,
const bool  sort_on_gpu_hint,
RenderInfo render_info,
const bool  must_use_baseline_sort,
const bool  output_columnar_hint 
)
private

Definition at line 700 of file GroupByAndAggregate.cpp.

References device_type_, executor_, g_enable_watchdog, anonymous_namespace{GroupByAndAggregate.cpp}::get_keyless_info(), getColRangeInfo(), getShardedTopBucket(), GPU, RelAlgExecutionUnit::groupby_exprs, GroupByBaselineHash, GroupByPerfectHash, ColRangeInfo::hash_type_, QueryMemoryDescriptor::init(), anonymous_namespace{GroupByAndAggregate.cpp}::init_count_distinct_descriptors(), LOG, query_infos_, ra_exe_unit_, shard_count_for_top_groups(), and logger::WARNING.

Referenced by initQueryMemoryDescriptor().

707  {
708  const auto count_distinct_descriptors = init_count_distinct_descriptors(
710 
711  const bool is_group_by{!ra_exe_unit_.groupby_exprs.empty()};
712 
713  auto col_range_info_nosharding = getColRangeInfo();
714 
715  const auto shard_count =
718  : 0;
719 
720  const auto col_range_info =
721  ColRangeInfo{col_range_info_nosharding.hash_type_,
722  col_range_info_nosharding.min,
723  col_range_info_nosharding.max,
724  getShardedTopBucket(col_range_info_nosharding, shard_count),
725  col_range_info_nosharding.has_nulls};
726 
727  // Non-grouped aggregates do not support accessing aggregated ranges
728  // Keyless hash is currently only supported with single-column perfect hash
729  const auto keyless_info =
730  !(is_group_by &&
731  col_range_info.hash_type_ == QueryDescriptionType::GroupByPerfectHash)
732  ? KeylessInfo{false, -1}
734 
735  if (g_enable_watchdog &&
736  ((col_range_info.hash_type_ == QueryDescriptionType::GroupByBaselineHash &&
737  max_groups_buffer_entry_count > 120000000) ||
738  (col_range_info.hash_type_ == QueryDescriptionType::GroupByPerfectHash &&
739  ra_exe_unit_.groupby_exprs.size() == 1 &&
740  (col_range_info.max - col_range_info.min) /
741  std::max(col_range_info.bucket, int64_t(1)) >
742  130000000))) {
743  throw WatchdogException("Query would use too much memory");
744  }
745  try {
747  ra_exe_unit_,
748  query_infos_,
749  col_range_info,
750  keyless_info,
751  allow_multifrag,
752  device_type_,
753  crt_min_byte_width,
754  sort_on_gpu_hint,
755  shard_count,
756  max_groups_buffer_entry_count,
757  render_info,
758  count_distinct_descriptors,
759  must_use_baseline_sort,
760  output_columnar_hint,
761  /*streaming_top_n_hint=*/true);
762  } catch (const StreamingTopNOOM& e) {
763  LOG(WARNING) << e.what() << " Disabling Streaming Top N.";
765  ra_exe_unit_,
766  query_infos_,
767  col_range_info,
768  keyless_info,
769  allow_multifrag,
770  device_type_,
771  crt_min_byte_width,
772  sort_on_gpu_hint,
773  shard_count,
774  max_groups_buffer_entry_count,
775  render_info,
776  count_distinct_descriptors,
777  must_use_baseline_sort,
778  output_columnar_hint,
779  /*streaming_top_n_hint=*/false);
780  }
781 }
bool g_enable_watchdog
KeylessInfo get_keyless_info(const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, const bool is_group_by, Executor *executor)
#define LOG(tag)
Definition: Logger.h:194
ColRangeInfo getColRangeInfo()
QueryDescriptionType hash_type_
CountDistinctDescriptors init_count_distinct_descriptors(const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, const ExecutorDeviceType device_type, Executor *executor)
static std::unique_ptr< QueryMemoryDescriptor > init(const Executor *executor, const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, const ColRangeInfo &col_range_info, const KeylessInfo &keyless_info, const bool allow_multifrag, const ExecutorDeviceType device_type, const int8_t crt_min_byte_width, const bool sort_on_gpu_hint, const size_t shard_count, const size_t max_groups_buffer_entry_count, RenderInfo *render_info, const CountDistinctDescriptors count_distinct_descriptors, const bool must_use_baseline_sort, const bool output_columnar_hint, const bool streaming_top_n_hint)
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
int64_t getShardedTopBucket(const ColRangeInfo &col_range_info, const size_t shard_count) const
const std::vector< InputTableInfo > & query_infos_
const ExecutorDeviceType device_type_
const RelAlgExecutionUnit & ra_exe_unit_
static size_t shard_count_for_top_groups(const RelAlgExecutionUnit &ra_exe_unit, const Catalog_Namespace::Catalog &catalog)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool GroupByAndAggregate::needsUnnestDoublePatch ( llvm::Value const val_ptr,
const std::string &  agg_base_name,
const bool  threads_share_memory,
const CompilationOptions co 
) const
private

Definition at line 30 of file MaxwellCodegenPatch.cpp.

References CompilationOptions::device_type, and executor_.

Referenced by TargetExprCodegen::codegenAggregate().

33  {
34  return (executor_->isArchMaxwell(co.device_type) && threads_share_memory &&
35  llvm::isa<llvm::AllocaInst>(val_ptr) &&
36  val_ptr->getType() ==
37  llvm::Type::getDoublePtrTy(executor_->cgen_state_->context_) &&
38  "agg_id" == agg_base_name);
39 }
ExecutorDeviceType device_type

+ Here is the caller graph for this function:

void GroupByAndAggregate::prependForceSync ( )
private

Definition at line 41 of file MaxwellCodegenPatch.cpp.

References executor_.

Referenced by codegen().

41  {
42  executor_->cgen_state_->ir_builder_.CreateCall(
43  executor_->cgen_state_->module_->getFunction("force_sync"));
44 }

+ Here is the caller graph for this function:

size_t GroupByAndAggregate::shard_count_for_top_groups ( const RelAlgExecutionUnit ra_exe_unit,
const Catalog_Namespace::Catalog catalog 
)
static

Definition at line 1919 of file GroupByAndAggregate.cpp.

References Catalog_Namespace::Catalog::getMetadataForTable(), RelAlgExecutionUnit::groupby_exprs, SortInfo::limit, TableDescriptor::nShards, SortInfo::order_entries, and RelAlgExecutionUnit::sort_info.

Referenced by Executor::collectAllDeviceResults(), RelAlgExecutor::executeRelAlgQuerySingleStep(), initQueryMemoryDescriptor(), and initQueryMemoryDescriptorImpl().

1921  {
1922  if (ra_exe_unit.sort_info.order_entries.size() != 1 || !ra_exe_unit.sort_info.limit) {
1923  return 0;
1924  }
1925  for (const auto& group_expr : ra_exe_unit.groupby_exprs) {
1926  const auto grouped_col_expr =
1927  dynamic_cast<const Analyzer::ColumnVar*>(group_expr.get());
1928  if (!grouped_col_expr) {
1929  continue;
1930  }
1931  if (grouped_col_expr->get_table_id() <= 0) {
1932  return 0;
1933  }
1934  const auto td = catalog.getMetadataForTable(grouped_col_expr->get_table_id());
1935  if (td->shardedColumnId == grouped_col_expr->get_column_id()) {
1936  return td->nShards;
1937  }
1938  }
1939  return 0;
1940 }
const std::list< Analyzer::OrderEntry > order_entries
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
const size_t limit
const SortInfo sort_info
const TableDescriptor * getMetadataForTable(const std::string &tableName, const bool populateFragmenter=true) const
Returns a pointer to a const TableDescriptor struct matching the provided tableName.

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

Friends And Related Function Documentation

friend class CodeGenerator
friend

Definition at line 211 of file GroupByAndAggregate.h.

friend class ExecutionKernel
friend

Definition at line 212 of file GroupByAndAggregate.h.

friend class Executor
friend

Definition at line 209 of file GroupByAndAggregate.h.

friend class QueryMemoryDescriptor
friend

Definition at line 210 of file GroupByAndAggregate.h.

friend struct TargetExprCodegen
friend

Definition at line 213 of file GroupByAndAggregate.h.

friend struct TargetExprCodegenBuilder
friend

Definition at line 214 of file GroupByAndAggregate.h.

Member Data Documentation

const ExecutorDeviceType GroupByAndAggregate::device_type_
private
const std::optional<int64_t> GroupByAndAggregate::group_cardinality_estimation_
private

Definition at line 207 of file GroupByAndAggregate.h.

Referenced by getColRangeInfo().

bool GroupByAndAggregate::output_columnar_
private

Definition at line 204 of file GroupByAndAggregate.h.

std::shared_ptr<RowSetMemoryOwner> GroupByAndAggregate::row_set_mem_owner_
private

Definition at line 203 of file GroupByAndAggregate.h.


The documentation for this class was generated from the following files: