OmniSciDB  21ac014ffc
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
GroupByAndAggregate Class Reference

#include <GroupByAndAggregate.h>

+ Collaboration diagram for GroupByAndAggregate:

Public Member Functions

 GroupByAndAggregate (Executor *executor, const ExecutorDeviceType device_type, const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner, const std::optional< int64_t > &group_cardinality_estimation)
 
bool codegen (llvm::Value *filter_result, llvm::BasicBlock *sc_false, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const GpuSharedMemoryContext &gpu_smem_context)
 

Static Public Member Functions

static size_t shard_count_for_top_groups (const RelAlgExecutionUnit &ra_exe_unit, const Catalog_Namespace::Catalog &catalog)
 

Private Member Functions

bool gpuCanHandleOrderEntries (const std::list< Analyzer::OrderEntry > &order_entries)
 
std::unique_ptr
< QueryMemoryDescriptor
initQueryMemoryDescriptor (const bool allow_multifrag, const size_t max_groups_buffer_entry_count, const int8_t crt_min_byte_width, RenderInfo *render_info, const bool output_columnar_hint)
 
std::unique_ptr
< QueryMemoryDescriptor
initQueryMemoryDescriptorImpl (const bool allow_multifrag, const size_t max_groups_buffer_entry_count, const int8_t crt_min_byte_width, const bool sort_on_gpu_hint, RenderInfo *render_info, const bool must_use_baseline_sort, const bool output_columnar_hint)
 
int64_t getShardedTopBucket (const ColRangeInfo &col_range_info, const size_t shard_count) const
 
llvm::Value * codegenOutputSlot (llvm::Value *groups_buffer, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, DiamondCodegen &diamond_codegen)
 
std::tuple< llvm::Value
*, llvm::Value * > 
codegenGroupBy (const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, DiamondCodegen &codegen)
 
llvm::Value * codegenVarlenOutputBuffer (const QueryMemoryDescriptor &query_mem_desc)
 
std::tuple< llvm::Value
*, llvm::Value * > 
codegenSingleColumnPerfectHash (const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, llvm::Value *groups_buffer, llvm::Value *group_expr_lv_translated, llvm::Value *group_expr_lv_original, const int32_t row_size_quad)
 
std::tuple< llvm::Value
*, llvm::Value * > 
codegenMultiColumnPerfectHash (llvm::Value *groups_buffer, llvm::Value *group_key, llvm::Value *key_size_lv, const QueryMemoryDescriptor &query_mem_desc, const int32_t row_size_quad)
 
llvm::Function * codegenPerfectHashFunction ()
 
std::tuple< llvm::Value
*, llvm::Value * > 
codegenMultiColumnBaselineHash (const CompilationOptions &co, llvm::Value *groups_buffer, llvm::Value *group_key, llvm::Value *key_size_lv, const QueryMemoryDescriptor &query_mem_desc, const size_t key_width, const int32_t row_size_quad)
 
ColRangeInfo getColRangeInfo ()
 
llvm::Value * convertNullIfAny (const SQLTypeInfo &arg_type, const TargetInfo &agg_info, llvm::Value *target)
 
bool codegenAggCalls (const std::tuple< llvm::Value *, llvm::Value * > &agg_out_ptr_w_idx, llvm::Value *varlen_output_buffer, const std::vector< llvm::Value * > &agg_out_vec, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const GpuSharedMemoryContext &gpu_smem_context, DiamondCodegen &diamond_codegen)
 
llvm::Value * codegenWindowRowPointer (const Analyzer::WindowFunction *window_func, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, DiamondCodegen &diamond_codegen)
 
llvm::Value * codegenAggColumnPtr (llvm::Value *output_buffer_byte_stream, llvm::Value *out_row_idx, const std::tuple< llvm::Value *, llvm::Value * > &agg_out_ptr_w_idx, const QueryMemoryDescriptor &query_mem_desc, const size_t chosen_bytes, const size_t agg_out_off, const size_t target_idx)
 : returns the pointer to where the aggregation should be stored. More...
 
void codegenEstimator (std::stack< llvm::BasicBlock * > &array_loops, DiamondCodegen &diamond_codegen, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &)
 
void codegenCountDistinct (const size_t target_idx, const Analyzer::Expr *target_expr, std::vector< llvm::Value * > &agg_args, const QueryMemoryDescriptor &, const ExecutorDeviceType)
 
void codegenApproxQuantile (const size_t target_idx, const Analyzer::Expr *target_expr, std::vector< llvm::Value * > &agg_args, const QueryMemoryDescriptor &query_mem_desc, const ExecutorDeviceType device_type)
 
llvm::Value * getAdditionalLiteral (const int32_t off)
 
std::vector< llvm::Value * > codegenAggArg (const Analyzer::Expr *target_expr, const CompilationOptions &co)
 
llvm::Value * emitCall (const std::string &fname, const std::vector< llvm::Value * > &args)
 
void checkErrorCode (llvm::Value *retCode)
 
bool needsUnnestDoublePatch (llvm::Value const *val_ptr, const std::string &agg_base_name, const bool threads_share_memory, const CompilationOptions &co) const
 
void prependForceSync ()
 

Static Private Member Functions

static int64_t getBucketedCardinality (const ColRangeInfo &col_range_info)
 

Private Attributes

Executorexecutor_
 
const RelAlgExecutionUnitra_exe_unit_
 
const std::vector
< InputTableInfo > & 
query_infos_
 
std::shared_ptr
< RowSetMemoryOwner
row_set_mem_owner_
 
bool output_columnar_
 
const ExecutorDeviceType device_type_
 
const std::optional< int64_t > group_cardinality_estimation_
 

Friends

class Executor
 
class QueryMemoryDescriptor
 
class CodeGenerator
 
class ExecutionKernel
 
struct TargetExprCodegen
 
struct TargetExprCodegenBuilder
 

Detailed Description

Definition at line 61 of file GroupByAndAggregate.h.

Constructor & Destructor Documentation

GroupByAndAggregate::GroupByAndAggregate ( Executor executor,
const ExecutorDeviceType  device_type,
const RelAlgExecutionUnit ra_exe_unit,
const std::vector< InputTableInfo > &  query_infos,
std::shared_ptr< RowSetMemoryOwner row_set_mem_owner,
const std::optional< int64_t > &  group_cardinality_estimation 
)

Definition at line 316 of file GroupByAndAggregate.cpp.

References RelAlgExecutionUnit::groupby_exprs, and ra_exe_unit_.

323  : executor_(executor)
324  , ra_exe_unit_(ra_exe_unit)
325  , query_infos_(query_infos)
326  , row_set_mem_owner_(row_set_mem_owner)
327  , device_type_(device_type)
328  , group_cardinality_estimation_(group_cardinality_estimation) {
329  for (const auto& groupby_expr : ra_exe_unit_.groupby_exprs) {
330  if (!groupby_expr) {
331  continue;
332  }
333  const auto& groupby_ti = groupby_expr->get_type_info();
334  if (groupby_ti.is_bytes()) {
335  throw std::runtime_error(
336  "Cannot group by string columns which are not dictionary encoded.");
337  }
338  if (groupby_ti.is_buffer()) {
339  throw std::runtime_error("Group by buffer not supported");
340  }
341  if (groupby_ti.is_geometry()) {
342  throw std::runtime_error("Group by geometry not supported");
343  }
344  }
345 }
std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner_
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
const std::vector< InputTableInfo > & query_infos_
const ExecutorDeviceType device_type_
const std::optional< int64_t > group_cardinality_estimation_
const RelAlgExecutionUnit & ra_exe_unit_

Member Function Documentation

void GroupByAndAggregate::checkErrorCode ( llvm::Value *  retCode)
private

Definition at line 1927 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, and executor_.

Referenced by TargetExprCodegen::codegenAggregate().

1927  {
1928  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1929  auto zero_const = llvm::ConstantInt::get(retCode->getType(), 0, true);
1930  auto rc_check_condition = executor_->cgen_state_->ir_builder_.CreateICmp(
1931  llvm::ICmpInst::ICMP_EQ, retCode, zero_const);
1932 
1933  executor_->cgen_state_->emitErrorCheck(rc_check_condition, retCode, "rc");
1934 }
#define AUTOMATIC_IR_METADATA(CGENSTATE)

+ Here is the caller graph for this function:

bool GroupByAndAggregate::codegen ( llvm::Value *  filter_result,
llvm::BasicBlock *  sc_false,
const QueryMemoryDescriptor query_mem_desc,
const CompilationOptions co,
const GpuSharedMemoryContext gpu_smem_context 
)

Definition at line 824 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, CHECK, codegenAggCalls(), codegenEstimator(), codegenGroupBy(), codegenVarlenOutputBuffer(), DiamondCodegen::cond_false_, CompilationOptions::device_type, QueryMemoryDescriptor::didOutputColumnar(), RelAlgExecutionUnit::estimator, executor_, anonymous_namespace{GroupByAndAggregate.cpp}::get_agg_count(), get_arg_by_name(), get_int_type(), QueryMemoryDescriptor::getGroupbyColCount(), QueryMemoryDescriptor::getQueryDescriptionType(), GPU, RelAlgExecutionUnit::groupby_exprs, GroupByPerfectHash, i, RelAlgExecutionUnit::join_quals, LL_BUILDER, LL_CONTEXT, LL_INT, CodeGenerator::posArg(), prependForceSync(), Projection, query_mem_desc, ra_exe_unit_, ROW_FUNC, RelAlgExecutionUnit::target_exprs, QueryMemoryDescriptor::usesGetGroupValueFast(), and QueryMemoryDescriptor::useStreamingTopN().

828  {
829  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
830  CHECK(filter_result);
831 
832  bool can_return_error = false;
833  llvm::BasicBlock* filter_false{nullptr};
834 
835  {
836  const bool is_group_by = !ra_exe_unit_.groupby_exprs.empty();
837 
838  if (executor_->isArchMaxwell(co.device_type)) {
840  }
841  DiamondCodegen filter_cfg(filter_result,
842  executor_,
843  !is_group_by || query_mem_desc.usesGetGroupValueFast(),
844  "filter", // filter_true and filter_false basic blocks
845  nullptr,
846  false);
847  filter_false = filter_cfg.cond_false_;
848 
849  if (is_group_by) {
851  !query_mem_desc.useStreamingTopN()) {
852  const auto crt_matched = get_arg_by_name(ROW_FUNC, "crt_matched");
853  LL_BUILDER.CreateStore(LL_INT(int32_t(1)), crt_matched);
854  auto total_matched_ptr = get_arg_by_name(ROW_FUNC, "total_matched");
855  llvm::Value* old_total_matched_val{nullptr};
857  old_total_matched_val =
858  LL_BUILDER.CreateAtomicRMW(llvm::AtomicRMWInst::Add,
859  total_matched_ptr,
860  LL_INT(int32_t(1)),
861  llvm::AtomicOrdering::Monotonic);
862  } else {
863  old_total_matched_val = LL_BUILDER.CreateLoad(total_matched_ptr);
864  LL_BUILDER.CreateStore(
865  LL_BUILDER.CreateAdd(old_total_matched_val, LL_INT(int32_t(1))),
866  total_matched_ptr);
867  }
868  auto old_total_matched_ptr = get_arg_by_name(ROW_FUNC, "old_total_matched");
869  LL_BUILDER.CreateStore(old_total_matched_val, old_total_matched_ptr);
870  }
871 
872  auto agg_out_ptr_w_idx = codegenGroupBy(query_mem_desc, co, filter_cfg);
873  auto varlen_output_buffer = codegenVarlenOutputBuffer(query_mem_desc);
874  if (query_mem_desc.usesGetGroupValueFast() ||
875  query_mem_desc.getQueryDescriptionType() ==
877  if (query_mem_desc.getGroupbyColCount() > 1) {
878  filter_cfg.setChainToNext();
879  }
880  // Don't generate null checks if the group slot is guaranteed to be non-null,
881  // as it's the case for get_group_value_fast* family.
882  can_return_error = codegenAggCalls(agg_out_ptr_w_idx,
883  varlen_output_buffer,
884  {},
886  co,
887  gpu_smem_context,
888  filter_cfg);
889  } else {
890  {
891  llvm::Value* nullcheck_cond{nullptr};
892  if (query_mem_desc.didOutputColumnar()) {
893  nullcheck_cond = LL_BUILDER.CreateICmpSGE(std::get<1>(agg_out_ptr_w_idx),
894  LL_INT(int32_t(0)));
895  } else {
896  nullcheck_cond = LL_BUILDER.CreateICmpNE(
897  std::get<0>(agg_out_ptr_w_idx),
898  llvm::ConstantPointerNull::get(
899  llvm::PointerType::get(get_int_type(64, LL_CONTEXT), 0)));
900  }
901  DiamondCodegen nullcheck_cfg(
902  nullcheck_cond, executor_, false, "groupby_nullcheck", &filter_cfg, false);
903  codegenAggCalls(agg_out_ptr_w_idx,
904  varlen_output_buffer,
905  {},
907  co,
908  gpu_smem_context,
909  filter_cfg);
910  }
911  can_return_error = true;
912  if (query_mem_desc.getQueryDescriptionType() ==
914  query_mem_desc.useStreamingTopN()) {
915  // Ignore rejection on pushing current row to top-K heap.
916  LL_BUILDER.CreateRet(LL_INT(int32_t(0)));
917  } else {
918  CodeGenerator code_generator(executor_);
919  LL_BUILDER.CreateRet(LL_BUILDER.CreateNeg(LL_BUILDER.CreateTrunc(
920  // TODO(alex): remove the trunc once pos is converted to 32 bits
921  code_generator.posArg(nullptr),
922  get_int_type(32, LL_CONTEXT))));
923  }
924  }
925  } else {
926  if (ra_exe_unit_.estimator) {
927  std::stack<llvm::BasicBlock*> array_loops;
928  codegenEstimator(array_loops, filter_cfg, query_mem_desc, co);
929  } else {
930  auto arg_it = ROW_FUNC->arg_begin();
931  std::vector<llvm::Value*> agg_out_vec;
932  for (int32_t i = 0; i < get_agg_count(ra_exe_unit_.target_exprs); ++i) {
933  agg_out_vec.push_back(&*arg_it++);
934  }
935  can_return_error = codegenAggCalls(std::make_tuple(nullptr, nullptr),
936  /*varlen_output_buffer=*/nullptr,
937  agg_out_vec,
938  query_mem_desc,
939  co,
940  gpu_smem_context,
941  filter_cfg);
942  }
943  }
944  }
945 
946  if (ra_exe_unit_.join_quals.empty()) {
947  executor_->cgen_state_->ir_builder_.CreateRet(LL_INT(int32_t(0)));
948  } else if (sc_false) {
949  const auto saved_insert_block = LL_BUILDER.GetInsertBlock();
950  LL_BUILDER.SetInsertPoint(sc_false);
951  LL_BUILDER.CreateBr(filter_false);
952  LL_BUILDER.SetInsertPoint(saved_insert_block);
953  }
954 
955  return can_return_error;
956 }
std::vector< Analyzer::Expr * > target_exprs
#define ROW_FUNC
llvm::BasicBlock * cond_false_
bool codegenAggCalls(const std::tuple< llvm::Value *, llvm::Value * > &agg_out_ptr_w_idx, llvm::Value *varlen_output_buffer, const std::vector< llvm::Value * > &agg_out_vec, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const GpuSharedMemoryContext &gpu_smem_context, DiamondCodegen &diamond_codegen)
#define LL_BUILDER
#define LL_CONTEXT
#define LL_INT(v)
llvm::Value * codegenVarlenOutputBuffer(const QueryMemoryDescriptor &query_mem_desc)
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
llvm::Value * get_arg_by_name(llvm::Function *func, const std::string &name)
Definition: Execute.h:166
size_t getGroupbyColCount() const
const JoinQualsPerNestingLevel join_quals
std::tuple< llvm::Value *, llvm::Value * > codegenGroupBy(const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, DiamondCodegen &codegen)
const std::shared_ptr< Analyzer::Estimator > estimator
#define AUTOMATIC_IR_METADATA(CGENSTATE)
QueryDescriptionType getQueryDescriptionType() const
ExecutorDeviceType device_type
void codegenEstimator(std::stack< llvm::BasicBlock * > &array_loops, DiamondCodegen &diamond_codegen, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &)
int32_t get_agg_count(const std::vector< Analyzer::Expr * > &target_exprs)
#define CHECK(condition)
Definition: Logger.h:206
const RelAlgExecutionUnit & ra_exe_unit_

+ Here is the call graph for this function:

std::vector< llvm::Value * > GroupByAndAggregate::codegenAggArg ( const Analyzer::Expr target_expr,
const CompilationOptions co 
)
private

Definition at line 1742 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, CHECK, CHECK_EQ, CodeGenerator::codegen(), CUR_FUNC, executor_, get_int_type(), Analyzer::Expr::get_type_info(), SQLTypeInfo::is_geometry(), kARRAY, kPOINT, kSAMPLE, LL_BUILDER, LL_CONTEXT, log2_bytes(), and CodeGenerator::posArg().

Referenced by TargetExprCodegen::codegen(), and TargetExprCodegenBuilder::codegenMultiSlotSampleExpressions().

1744  {
1745  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1746  const auto agg_expr = dynamic_cast<const Analyzer::AggExpr*>(target_expr);
1747  const auto func_expr = dynamic_cast<const Analyzer::FunctionOper*>(target_expr);
1748  const auto arr_expr = dynamic_cast<const Analyzer::ArrayExpr*>(target_expr);
1749 
1750  // TODO(alex): handle arrays uniformly?
1751  CodeGenerator code_generator(executor_);
1752  if (target_expr) {
1753  const auto& target_ti = target_expr->get_type_info();
1754  if (target_ti.is_buffer() &&
1755  !executor_->plan_state_->isLazyFetchColumn(target_expr)) {
1756  const auto target_lvs =
1757  agg_expr ? code_generator.codegen(agg_expr->get_arg(), true, co)
1758  : code_generator.codegen(
1759  target_expr, !executor_->plan_state_->allow_lazy_fetch_, co);
1760  if (!func_expr && !arr_expr) {
1761  // Something with the chunk transport is code that was generated from a source
1762  // other than an ARRAY[] expression
1763  if (target_ti.is_bytes()) {
1764  CHECK_EQ(size_t(3), target_lvs.size());
1765  return {target_lvs[1], target_lvs[2]};
1766  }
1767  CHECK(target_ti.is_array());
1768  CHECK_EQ(size_t(1), target_lvs.size());
1769  CHECK(!agg_expr || agg_expr->get_aggtype() == kSAMPLE);
1770  const auto i32_ty = get_int_type(32, executor_->cgen_state_->context_);
1771  const auto i8p_ty =
1772  llvm::PointerType::get(get_int_type(8, executor_->cgen_state_->context_), 0);
1773  const auto& elem_ti = target_ti.get_elem_type();
1774  return {
1775  executor_->cgen_state_->emitExternalCall(
1776  "array_buff",
1777  i8p_ty,
1778  {target_lvs.front(), code_generator.posArg(target_expr)}),
1779  executor_->cgen_state_->emitExternalCall(
1780  "array_size",
1781  i32_ty,
1782  {target_lvs.front(),
1783  code_generator.posArg(target_expr),
1784  executor_->cgen_state_->llInt(log2_bytes(elem_ti.get_logical_size()))})};
1785  } else {
1786  if (agg_expr) {
1787  throw std::runtime_error(
1788  "Using array[] operator as argument to an aggregate operator is not "
1789  "supported");
1790  }
1791  CHECK(func_expr || arr_expr);
1792  if (dynamic_cast<const Analyzer::FunctionOper*>(target_expr)) {
1793  CHECK_EQ(size_t(1), target_lvs.size());
1794  const auto prefix = target_ti.get_buffer_name();
1795  CHECK(target_ti.is_array() || target_ti.is_bytes());
1796  const auto target_lv = LL_BUILDER.CreateLoad(target_lvs[0]);
1797  // const auto target_lv_type = target_lvs[0]->getType();
1798  // CHECK(target_lv_type->isStructTy());
1799  // CHECK_EQ(target_lv_type->getNumContainedTypes(), 3u);
1800  const auto i8p_ty = llvm::PointerType::get(
1801  get_int_type(8, executor_->cgen_state_->context_), 0);
1802  const auto ptr = LL_BUILDER.CreatePointerCast(
1803  LL_BUILDER.CreateExtractValue(target_lv, 0), i8p_ty);
1804  const auto size = LL_BUILDER.CreateExtractValue(target_lv, 1);
1805  const auto null_flag = LL_BUILDER.CreateExtractValue(target_lv, 2);
1806  const auto nullcheck_ok_bb =
1807  llvm::BasicBlock::Create(LL_CONTEXT, prefix + "_nullcheck_ok_bb", CUR_FUNC);
1808  const auto nullcheck_fail_bb = llvm::BasicBlock::Create(
1809  LL_CONTEXT, prefix + "_nullcheck_fail_bb", CUR_FUNC);
1810 
1811  // TODO(adb): probably better to zext the bool
1812  const auto nullcheck = LL_BUILDER.CreateICmpEQ(
1813  null_flag, executor_->cgen_state_->llInt(static_cast<int8_t>(1)));
1814  LL_BUILDER.CreateCondBr(nullcheck, nullcheck_fail_bb, nullcheck_ok_bb);
1815 
1816  const auto ret_bb =
1817  llvm::BasicBlock::Create(LL_CONTEXT, prefix + "_return", CUR_FUNC);
1818  LL_BUILDER.SetInsertPoint(ret_bb);
1819  auto result_phi = LL_BUILDER.CreatePHI(i8p_ty, 2, prefix + "_ptr_return");
1820  result_phi->addIncoming(ptr, nullcheck_ok_bb);
1821  const auto null_arr_sentinel = LL_BUILDER.CreateIntToPtr(
1822  executor_->cgen_state_->llInt(static_cast<int8_t>(0)), i8p_ty);
1823  result_phi->addIncoming(null_arr_sentinel, nullcheck_fail_bb);
1824  LL_BUILDER.SetInsertPoint(nullcheck_ok_bb);
1825  executor_->cgen_state_->emitExternalCall(
1826  "register_buffer_with_executor_rsm",
1827  llvm::Type::getVoidTy(executor_->cgen_state_->context_),
1828  {executor_->cgen_state_->llInt(reinterpret_cast<int64_t>(executor_)), ptr});
1829  LL_BUILDER.CreateBr(ret_bb);
1830  LL_BUILDER.SetInsertPoint(nullcheck_fail_bb);
1831  LL_BUILDER.CreateBr(ret_bb);
1832 
1833  LL_BUILDER.SetInsertPoint(ret_bb);
1834  return {result_phi, size};
1835  }
1836  CHECK_EQ(size_t(2), target_lvs.size());
1837  return {target_lvs[0], target_lvs[1]};
1838  }
1839  }
1840  if (target_ti.is_geometry() &&
1841  !executor_->plan_state_->isLazyFetchColumn(target_expr)) {
1842  auto generate_coord_lvs =
1843  [&](auto* selected_target_expr,
1844  bool const fetch_columns) -> std::vector<llvm::Value*> {
1845  const auto target_lvs =
1846  code_generator.codegen(selected_target_expr, fetch_columns, co);
1847  if (dynamic_cast<const Analyzer::GeoOperator*>(target_expr) &&
1848  target_expr->get_type_info().is_geometry()) {
1849  // return a pointer to the temporary alloca
1850  return target_lvs;
1851  }
1852  const auto geo_uoper = dynamic_cast<const Analyzer::GeoUOper*>(target_expr);
1853  const auto geo_binoper = dynamic_cast<const Analyzer::GeoBinOper*>(target_expr);
1854  if (geo_uoper || geo_binoper) {
1855  CHECK(target_expr->get_type_info().is_geometry());
1856  CHECK_EQ(2 * static_cast<size_t>(target_ti.get_physical_coord_cols()),
1857  target_lvs.size());
1858  return target_lvs;
1859  }
1860  CHECK_EQ(static_cast<size_t>(target_ti.get_physical_coord_cols()),
1861  target_lvs.size());
1862 
1863  const auto i32_ty = get_int_type(32, executor_->cgen_state_->context_);
1864  const auto i8p_ty =
1865  llvm::PointerType::get(get_int_type(8, executor_->cgen_state_->context_), 0);
1866  std::vector<llvm::Value*> coords;
1867  size_t ctr = 0;
1868  for (const auto& target_lv : target_lvs) {
1869  // TODO(adb): consider adding a utility to sqltypes so we can get the types of
1870  // the physical coords cols based on the sqltype (e.g. TINYINT for col 0, INT
1871  // for col 1 for pols / mpolys, etc). Hardcoding for now. first array is the
1872  // coords array (TINYINT). Subsequent arrays are regular INT.
1873 
1874  const size_t elem_sz = ctr == 0 ? 1 : 4;
1875  ctr++;
1876  int32_t fixlen = -1;
1877  if (target_ti.get_type() == kPOINT) {
1878  const auto col_var = dynamic_cast<const Analyzer::ColumnVar*>(target_expr);
1879  if (col_var) {
1880  const auto coords_cd = executor_->getPhysicalColumnDescriptor(col_var, 1);
1881  if (coords_cd && coords_cd->columnType.get_type() == kARRAY) {
1882  fixlen = coords_cd->columnType.get_size();
1883  }
1884  }
1885  }
1886  if (fixlen > 0) {
1887  coords.push_back(executor_->cgen_state_->emitExternalCall(
1888  "fast_fixlen_array_buff",
1889  i8p_ty,
1890  {target_lv, code_generator.posArg(selected_target_expr)}));
1891  coords.push_back(executor_->cgen_state_->llInt(int64_t(fixlen)));
1892  continue;
1893  }
1894  coords.push_back(executor_->cgen_state_->emitExternalCall(
1895  "array_buff",
1896  i8p_ty,
1897  {target_lv, code_generator.posArg(selected_target_expr)}));
1898  coords.push_back(executor_->cgen_state_->emitExternalCall(
1899  "array_size",
1900  i32_ty,
1901  {target_lv,
1902  code_generator.posArg(selected_target_expr),
1903  executor_->cgen_state_->llInt(log2_bytes(elem_sz))}));
1904  }
1905  return coords;
1906  };
1907 
1908  if (agg_expr) {
1909  return generate_coord_lvs(agg_expr->get_arg(), true);
1910  } else {
1911  return generate_coord_lvs(target_expr,
1912  !executor_->plan_state_->allow_lazy_fetch_);
1913  }
1914  }
1915  }
1916  return agg_expr ? code_generator.codegen(agg_expr->get_arg(), true, co)
1917  : code_generator.codegen(
1918  target_expr, !executor_->plan_state_->allow_lazy_fetch_, co);
1919 }
#define CHECK_EQ(x, y)
Definition: Logger.h:214
#define LL_BUILDER
#define LL_CONTEXT
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
#define AUTOMATIC_IR_METADATA(CGENSTATE)
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:78
#define CHECK(condition)
Definition: Logger.h:206
bool is_geometry() const
Definition: sqltypes.h:510
#define CUR_FUNC
uint32_t log2_bytes(const uint32_t bytes)
Definition: Execute.h:176

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool GroupByAndAggregate::codegenAggCalls ( const std::tuple< llvm::Value *, llvm::Value * > &  agg_out_ptr_w_idx,
llvm::Value *  varlen_output_buffer,
const std::vector< llvm::Value * > &  agg_out_vec,
const QueryMemoryDescriptor query_mem_desc,
const CompilationOptions co,
const GpuSharedMemoryContext gpu_smem_context,
DiamondCodegen diamond_codegen 
)
private

Definition at line 1443 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, CHECK, TargetExprCodegenBuilder::codegen(), QueryMemoryDescriptor::didOutputColumnar(), executor_, g_cluster, QueryMemoryDescriptor::getQueryDescriptionType(), LL_BUILDER, LL_CONTEXT, Projection, ra_exe_unit_, and RelAlgExecutionUnit::target_exprs.

Referenced by codegen().

1450  {
1451  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1452  auto agg_out_ptr_w_idx = agg_out_ptr_w_idx_in;
1453  // TODO(alex): unify the two cases, the output for non-group by queries
1454  // should be a contiguous buffer
1455  const bool is_group_by = std::get<0>(agg_out_ptr_w_idx);
1456  bool can_return_error = false;
1457  if (is_group_by) {
1458  CHECK(agg_out_vec.empty());
1459  } else {
1460  CHECK(!agg_out_vec.empty());
1461  }
1462 
1463  // output buffer is casted into a byte stream to be able to handle data elements of
1464  // different sizes (only used when actual column width sizes are used)
1465  llvm::Value* output_buffer_byte_stream{nullptr};
1466  llvm::Value* out_row_idx{nullptr};
1467  if (query_mem_desc.didOutputColumnar() && !g_cluster &&
1469  output_buffer_byte_stream = LL_BUILDER.CreateBitCast(
1470  std::get<0>(agg_out_ptr_w_idx),
1471  llvm::PointerType::get(llvm::Type::getInt8Ty(LL_CONTEXT), 0));
1472  output_buffer_byte_stream->setName("out_buff_b_stream");
1473  CHECK(std::get<1>(agg_out_ptr_w_idx));
1474  out_row_idx = LL_BUILDER.CreateZExt(std::get<1>(agg_out_ptr_w_idx),
1475  llvm::Type::getInt64Ty(LL_CONTEXT));
1476  out_row_idx->setName("out_row_idx");
1477  }
1478 
1479  TargetExprCodegenBuilder target_builder(query_mem_desc, ra_exe_unit_, is_group_by);
1480  for (size_t target_idx = 0; target_idx < ra_exe_unit_.target_exprs.size();
1481  ++target_idx) {
1482  auto target_expr = ra_exe_unit_.target_exprs[target_idx];
1483  CHECK(target_expr);
1484 
1485  target_builder(target_expr, executor_, co);
1486  }
1487 
1488  target_builder.codegen(this,
1489  executor_,
1490  query_mem_desc,
1491  co,
1492  gpu_smem_context,
1493  agg_out_ptr_w_idx,
1494  agg_out_vec,
1495  output_buffer_byte_stream,
1496  out_row_idx,
1497  varlen_output_buffer,
1498  diamond_codegen);
1499 
1500  for (auto target_expr : ra_exe_unit_.target_exprs) {
1501  CHECK(target_expr);
1502  executor_->plan_state_->isLazyFetchColumn(target_expr);
1503  }
1504 
1505  return can_return_error;
1506 }
std::vector< Analyzer::Expr * > target_exprs
#define LL_BUILDER
#define LL_CONTEXT
#define AUTOMATIC_IR_METADATA(CGENSTATE)
QueryDescriptionType getQueryDescriptionType() const
#define CHECK(condition)
Definition: Logger.h:206
bool g_cluster
const RelAlgExecutionUnit & ra_exe_unit_

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

llvm::Value * GroupByAndAggregate::codegenAggColumnPtr ( llvm::Value *  output_buffer_byte_stream,
llvm::Value *  out_row_idx,
const std::tuple< llvm::Value *, llvm::Value * > &  agg_out_ptr_w_idx,
const QueryMemoryDescriptor query_mem_desc,
const size_t  chosen_bytes,
const size_t  agg_out_off,
const size_t  target_idx 
)
private

: returns the pointer to where the aggregation should be stored.

Definition at line 1511 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, CHECK, CHECK_EQ, QueryMemoryDescriptor::didOutputColumnar(), executor_, g_cluster, get_int_type(), QueryMemoryDescriptor::getColOffInBytes(), QueryMemoryDescriptor::getColOnlyOffInBytes(), QueryMemoryDescriptor::getQueryDescriptionType(), LL_BUILDER, LL_CONTEXT, LL_INT, Projection, and to_string().

Referenced by TargetExprCodegen::codegenAggregate(), and TargetExprCodegenBuilder::codegenMultiSlotSampleExpressions().

1518  {
1519  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1520  llvm::Value* agg_col_ptr{nullptr};
1521  if (query_mem_desc.didOutputColumnar()) {
1522  // TODO(Saman): remove the second columnar branch, and support all query description
1523  // types through the first branch. Then, input arguments should also be cleaned up
1524  if (!g_cluster &&
1526  CHECK(chosen_bytes == 1 || chosen_bytes == 2 || chosen_bytes == 4 ||
1527  chosen_bytes == 8);
1528  CHECK(output_buffer_byte_stream);
1529  CHECK(out_row_idx);
1530  uint32_t col_off = query_mem_desc.getColOffInBytes(agg_out_off);
1531  // multiplying by chosen_bytes, i.e., << log2(chosen_bytes)
1532  auto out_per_col_byte_idx =
1533  LL_BUILDER.CreateShl(out_row_idx, __builtin_ffs(chosen_bytes) - 1);
1534  auto byte_offset = LL_BUILDER.CreateAdd(out_per_col_byte_idx,
1535  LL_INT(static_cast<int64_t>(col_off)));
1536  byte_offset->setName("out_byte_off_target_" + std::to_string(target_idx));
1537  auto output_ptr = LL_BUILDER.CreateGEP(output_buffer_byte_stream, byte_offset);
1538  agg_col_ptr = LL_BUILDER.CreateBitCast(
1539  output_ptr,
1540  llvm::PointerType::get(get_int_type((chosen_bytes << 3), LL_CONTEXT), 0));
1541  agg_col_ptr->setName("out_ptr_target_" + std::to_string(target_idx));
1542  } else {
1543  uint32_t col_off = query_mem_desc.getColOffInBytes(agg_out_off);
1544  CHECK_EQ(size_t(0), col_off % chosen_bytes);
1545  col_off /= chosen_bytes;
1546  CHECK(std::get<1>(agg_out_ptr_w_idx));
1547  auto offset = LL_BUILDER.CreateAdd(std::get<1>(agg_out_ptr_w_idx), LL_INT(col_off));
1548  agg_col_ptr = LL_BUILDER.CreateGEP(
1549  LL_BUILDER.CreateBitCast(
1550  std::get<0>(agg_out_ptr_w_idx),
1551  llvm::PointerType::get(get_int_type((chosen_bytes << 3), LL_CONTEXT), 0)),
1552  offset);
1553  }
1554  } else {
1555  uint32_t col_off = query_mem_desc.getColOnlyOffInBytes(agg_out_off);
1556  CHECK_EQ(size_t(0), col_off % chosen_bytes);
1557  col_off /= chosen_bytes;
1558  agg_col_ptr = LL_BUILDER.CreateGEP(
1559  LL_BUILDER.CreateBitCast(
1560  std::get<0>(agg_out_ptr_w_idx),
1561  llvm::PointerType::get(get_int_type((chosen_bytes << 3), LL_CONTEXT), 0)),
1562  LL_INT(col_off));
1563  }
1564  CHECK(agg_col_ptr);
1565  return agg_col_ptr;
1566 }
#define CHECK_EQ(x, y)
Definition: Logger.h:214
#define LL_BUILDER
#define LL_CONTEXT
#define LL_INT(v)
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
std::string to_string(char const *&&v)
size_t getColOnlyOffInBytes(const size_t col_idx) const
#define AUTOMATIC_IR_METADATA(CGENSTATE)
QueryDescriptionType getQueryDescriptionType() const
#define CHECK(condition)
Definition: Logger.h:206
bool g_cluster
size_t getColOffInBytes(const size_t col_idx) const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void GroupByAndAggregate::codegenApproxQuantile ( const size_t  target_idx,
const Analyzer::Expr target_expr,
std::vector< llvm::Value * > &  agg_args,
const QueryMemoryDescriptor query_mem_desc,
const ExecutorDeviceType  device_type 
)
private

Definition at line 1692 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, executor_, g_bigint_count, SQLTypeInfo::get_notnull(), get_target_info(), Analyzer::Expr::get_type_info(), and GPU.

Referenced by TargetExprCodegen::codegenAggregate().

1697  {
1698  if (device_type == ExecutorDeviceType::GPU) {
1699  throw QueryMustRunOnCpu();
1700  }
1701  llvm::BasicBlock *calc, *skip;
1702  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1703  auto const arg_ti =
1704  static_cast<const Analyzer::AggExpr*>(target_expr)->get_arg()->get_type_info();
1705  bool const nullable = !arg_ti.get_notnull();
1706 
1707  auto* cs = executor_->cgen_state_.get();
1708  auto& irb = cs->ir_builder_;
1709  if (nullable) {
1710  auto* const null_value = cs->castToTypeIn(cs->inlineNull(arg_ti), 64);
1711  auto* const skip_cond = arg_ti.is_fp()
1712  ? irb.CreateFCmpOEQ(agg_args.back(), null_value)
1713  : irb.CreateICmpEQ(agg_args.back(), null_value);
1714  calc = llvm::BasicBlock::Create(cs->context_, "calc_approx_quantile");
1715  skip = llvm::BasicBlock::Create(cs->context_, "skip_approx_quantile");
1716  irb.CreateCondBr(skip_cond, skip, calc);
1717  cs->current_func_->getBasicBlockList().push_back(calc);
1718  irb.SetInsertPoint(calc);
1719  }
1720  if (!arg_ti.is_fp()) {
1721  auto const agg_info = get_target_info(target_expr, g_bigint_count);
1722  agg_args.back() = executor_->castToFP(agg_args.back(), arg_ti, agg_info.sql_type);
1723  }
1724  cs->emitExternalCall(
1725  "agg_approx_quantile", llvm::Type::getVoidTy(cs->context_), agg_args);
1726  if (nullable) {
1727  irb.CreateBr(skip);
1728  cs->current_func_->getBasicBlockList().push_back(skip);
1729  irb.SetInsertPoint(skip);
1730  }
1731 }
TargetInfo get_target_info(const PointerType target_expr, const bool bigint_count)
Definition: TargetInfo.h:92
bool g_bigint_count
#define AUTOMATIC_IR_METADATA(CGENSTATE)
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:78
HOST DEVICE bool get_notnull() const
Definition: sqltypes.h:330

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void GroupByAndAggregate::codegenCountDistinct ( const size_t  target_idx,
const Analyzer::Expr target_expr,
std::vector< llvm::Value * > &  agg_args,
const QueryMemoryDescriptor query_mem_desc,
const ExecutorDeviceType  device_type 
)
private

Definition at line 1623 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, Bitmap, CHECK, CHECK_EQ, emitCall(), executor_, g_bigint_count, get_int_type(), get_target_info(), Analyzer::Expr::get_type_info(), getAdditionalLiteral(), QueryMemoryDescriptor::getCountDistinctDescriptor(), GPU, Invalid, kAPPROX_COUNT_DISTINCT, LL_CONTEXT, and LL_INT.

Referenced by TargetExprCodegen::codegenAggregate().

1628  {
1629  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1630  const auto agg_info = get_target_info(target_expr, g_bigint_count);
1631  const auto& arg_ti =
1632  static_cast<const Analyzer::AggExpr*>(target_expr)->get_arg()->get_type_info();
1633  if (arg_ti.is_fp()) {
1634  agg_args.back() = executor_->cgen_state_->ir_builder_.CreateBitCast(
1635  agg_args.back(), get_int_type(64, executor_->cgen_state_->context_));
1636  }
1637  const auto& count_distinct_descriptor =
1638  query_mem_desc.getCountDistinctDescriptor(target_idx);
1639  CHECK(count_distinct_descriptor.impl_type_ != CountDistinctImplType::Invalid);
1640  if (agg_info.agg_kind == kAPPROX_COUNT_DISTINCT) {
1641  CHECK(count_distinct_descriptor.impl_type_ == CountDistinctImplType::Bitmap);
1642  agg_args.push_back(LL_INT(int32_t(count_distinct_descriptor.bitmap_sz_bits)));
1643  if (device_type == ExecutorDeviceType::GPU) {
1644  const auto base_dev_addr = getAdditionalLiteral(-1);
1645  const auto base_host_addr = getAdditionalLiteral(-2);
1646  agg_args.push_back(base_dev_addr);
1647  agg_args.push_back(base_host_addr);
1648  emitCall("agg_approximate_count_distinct_gpu", agg_args);
1649  } else {
1650  emitCall("agg_approximate_count_distinct", agg_args);
1651  }
1652  return;
1653  }
1654  std::string agg_fname{"agg_count_distinct"};
1655  if (count_distinct_descriptor.impl_type_ == CountDistinctImplType::Bitmap) {
1656  agg_fname += "_bitmap";
1657  agg_args.push_back(LL_INT(static_cast<int64_t>(count_distinct_descriptor.min_val)));
1658  }
1659  if (agg_info.skip_null_val) {
1660  auto null_lv = executor_->cgen_state_->castToTypeIn(
1661  (arg_ti.is_fp()
1662  ? static_cast<llvm::Value*>(executor_->cgen_state_->inlineFpNull(arg_ti))
1663  : static_cast<llvm::Value*>(executor_->cgen_state_->inlineIntNull(arg_ti))),
1664  64);
1665  null_lv = executor_->cgen_state_->ir_builder_.CreateBitCast(
1666  null_lv, get_int_type(64, executor_->cgen_state_->context_));
1667  agg_fname += "_skip_val";
1668  agg_args.push_back(null_lv);
1669  }
1670  if (device_type == ExecutorDeviceType::GPU) {
1671  CHECK(count_distinct_descriptor.impl_type_ == CountDistinctImplType::Bitmap);
1672  agg_fname += "_gpu";
1673  const auto base_dev_addr = getAdditionalLiteral(-1);
1674  const auto base_host_addr = getAdditionalLiteral(-2);
1675  agg_args.push_back(base_dev_addr);
1676  agg_args.push_back(base_host_addr);
1677  agg_args.push_back(LL_INT(int64_t(count_distinct_descriptor.sub_bitmap_count)));
1678  CHECK_EQ(size_t(0),
1679  count_distinct_descriptor.bitmapPaddedSizeBytes() %
1680  count_distinct_descriptor.sub_bitmap_count);
1681  agg_args.push_back(LL_INT(int64_t(count_distinct_descriptor.bitmapPaddedSizeBytes() /
1682  count_distinct_descriptor.sub_bitmap_count)));
1683  }
1684  if (count_distinct_descriptor.impl_type_ == CountDistinctImplType::Bitmap) {
1685  emitCall(agg_fname, agg_args);
1686  } else {
1687  executor_->cgen_state_->emitExternalCall(
1688  agg_fname, llvm::Type::getVoidTy(LL_CONTEXT), agg_args);
1689  }
1690 }
#define CHECK_EQ(x, y)
Definition: Logger.h:214
llvm::Value * getAdditionalLiteral(const int32_t off)
#define LL_CONTEXT
TargetInfo get_target_info(const PointerType target_expr, const bool bigint_count)
Definition: TargetInfo.h:92
#define LL_INT(v)
llvm::Value * emitCall(const std::string &fname, const std::vector< llvm::Value * > &args)
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
bool g_bigint_count
#define AUTOMATIC_IR_METADATA(CGENSTATE)
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:78
const CountDistinctDescriptor & getCountDistinctDescriptor(const size_t idx) const
#define CHECK(condition)
Definition: Logger.h:206

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void GroupByAndAggregate::codegenEstimator ( std::stack< llvm::BasicBlock * > &  array_loops,
DiamondCodegen diamond_codegen,
const QueryMemoryDescriptor query_mem_desc,
const CompilationOptions co 
)
private

Definition at line 1568 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, CHECK, emitCall(), RelAlgExecutionUnit::estimator, executor_, get_int_type(), QueryMemoryDescriptor::getEffectiveKeyWidth(), LL_BUILDER, LL_CONTEXT, LL_INT, ra_exe_unit_, and ROW_FUNC.

Referenced by codegen().

1571  {
1572  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1573  const auto& estimator_arg = ra_exe_unit_.estimator->getArgument();
1574  auto estimator_comp_count_lv = LL_INT(static_cast<int32_t>(estimator_arg.size()));
1575  auto estimator_key_lv = LL_BUILDER.CreateAlloca(llvm::Type::getInt64Ty(LL_CONTEXT),
1576  estimator_comp_count_lv);
1577  int32_t subkey_idx = 0;
1578  for (const auto& estimator_arg_comp : estimator_arg) {
1579  const auto estimator_arg_comp_lvs =
1580  executor_->groupByColumnCodegen(estimator_arg_comp.get(),
1581  query_mem_desc.getEffectiveKeyWidth(),
1582  co,
1583  false,
1584  0,
1585  diamond_codegen,
1586  array_loops,
1587  true);
1588  CHECK(!estimator_arg_comp_lvs.original_value);
1589  const auto estimator_arg_comp_lv = estimator_arg_comp_lvs.translated_value;
1590  // store the sub-key to the buffer
1591  LL_BUILDER.CreateStore(estimator_arg_comp_lv,
1592  LL_BUILDER.CreateGEP(estimator_key_lv, LL_INT(subkey_idx++)));
1593  }
1594  const auto int8_ptr_ty = llvm::PointerType::get(get_int_type(8, LL_CONTEXT), 0);
1595  const auto bitmap = LL_BUILDER.CreateBitCast(&*ROW_FUNC->arg_begin(), int8_ptr_ty);
1596  const auto key_bytes = LL_BUILDER.CreateBitCast(estimator_key_lv, int8_ptr_ty);
1597  const auto estimator_comp_bytes_lv =
1598  LL_INT(static_cast<int32_t>(estimator_arg.size() * sizeof(int64_t)));
1599  const auto bitmap_size_lv =
1600  LL_INT(static_cast<uint32_t>(ra_exe_unit_.estimator->getBufferSize()));
1601  emitCall(ra_exe_unit_.estimator->getRuntimeFunctionName(),
1602  {bitmap, &*bitmap_size_lv, key_bytes, &*estimator_comp_bytes_lv});
1603 }
#define ROW_FUNC
#define LL_BUILDER
#define LL_CONTEXT
#define LL_INT(v)
llvm::Value * emitCall(const std::string &fname, const std::vector< llvm::Value * > &args)
size_t getEffectiveKeyWidth() const
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
const std::shared_ptr< Analyzer::Estimator > estimator
#define AUTOMATIC_IR_METADATA(CGENSTATE)
#define CHECK(condition)
Definition: Logger.h:206
const RelAlgExecutionUnit & ra_exe_unit_

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::tuple< llvm::Value *, llvm::Value * > GroupByAndAggregate::codegenGroupBy ( const QueryMemoryDescriptor query_mem_desc,
const CompilationOptions co,
DiamondCodegen codegen 
)
private

Definition at line 1043 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, CHECK, CHECK_EQ, codegenMultiColumnBaselineHash(), codegenMultiColumnPerfectHash(), codegenOutputSlot(), codegenSingleColumnPerfectHash(), QueryMemoryDescriptor::didOutputColumnar(), executor_, anonymous_namespace{GroupByAndAggregate.cpp}::get_expr_range_info(), QueryMemoryDescriptor::getBucket(), QueryMemoryDescriptor::getEffectiveKeyWidth(), QueryMemoryDescriptor::getGroupbyColCount(), QueryMemoryDescriptor::getMaxVal(), QueryMemoryDescriptor::getQueryDescriptionType(), QueryMemoryDescriptor::getRowSize(), RelAlgExecutionUnit::groupby_exprs, GroupByBaselineHash, GroupByPerfectHash, QueryMemoryDescriptor::hasNulls(), QueryMemoryDescriptor::isSingleColumnGroupByWithPerfectHash(), LL_BUILDER, LL_CONTEXT, LL_INT, Projection, query_infos_, ra_exe_unit_, ROW_FUNC, and QueryMemoryDescriptor::threadsShareMemory().

Referenced by codegen().

1046  {
1047  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1048  auto arg_it = ROW_FUNC->arg_begin();
1049  auto groups_buffer = arg_it++;
1050 
1051  std::stack<llvm::BasicBlock*> array_loops;
1052 
1053  // TODO(Saman): move this logic outside of this function.
1055  if (query_mem_desc.didOutputColumnar()) {
1056  return std::make_tuple(
1057  &*groups_buffer,
1058  codegenOutputSlot(&*groups_buffer, query_mem_desc, co, diamond_codegen));
1059  } else {
1060  return std::make_tuple(
1061  codegenOutputSlot(&*groups_buffer, query_mem_desc, co, diamond_codegen),
1062  nullptr);
1063  }
1064  }
1065 
1066  CHECK(query_mem_desc.getQueryDescriptionType() ==
1068  query_mem_desc.getQueryDescriptionType() ==
1070 
1071  const int32_t row_size_quad = query_mem_desc.didOutputColumnar()
1072  ? 0
1073  : query_mem_desc.getRowSize() / sizeof(int64_t);
1074 
1075  const auto col_width_size = query_mem_desc.isSingleColumnGroupByWithPerfectHash()
1076  ? sizeof(int64_t)
1077  : query_mem_desc.getEffectiveKeyWidth();
1078  // for multi-column group by
1079  llvm::Value* group_key = nullptr;
1080  llvm::Value* key_size_lv = nullptr;
1081 
1082  if (!query_mem_desc.isSingleColumnGroupByWithPerfectHash()) {
1083  key_size_lv = LL_INT(static_cast<int32_t>(query_mem_desc.getGroupbyColCount()));
1084  if (query_mem_desc.getQueryDescriptionType() ==
1086  group_key =
1087  LL_BUILDER.CreateAlloca(llvm::Type::getInt64Ty(LL_CONTEXT), key_size_lv);
1088  } else if (query_mem_desc.getQueryDescriptionType() ==
1090  group_key =
1091  col_width_size == sizeof(int32_t)
1092  ? LL_BUILDER.CreateAlloca(llvm::Type::getInt32Ty(LL_CONTEXT), key_size_lv)
1093  : LL_BUILDER.CreateAlloca(llvm::Type::getInt64Ty(LL_CONTEXT), key_size_lv);
1094  }
1095  CHECK(group_key);
1096  CHECK(key_size_lv);
1097  }
1098 
1099  int32_t subkey_idx = 0;
1100  CHECK(query_mem_desc.getGroupbyColCount() == ra_exe_unit_.groupby_exprs.size());
1101  for (const auto& group_expr : ra_exe_unit_.groupby_exprs) {
1102  const auto col_range_info =
1104  const auto translated_null_value = static_cast<int64_t>(
1105  query_mem_desc.isSingleColumnGroupByWithPerfectHash()
1106  ? checked_int64_t(query_mem_desc.getMaxVal()) +
1107  (query_mem_desc.getBucket() ? query_mem_desc.getBucket() : 1)
1108  : checked_int64_t(col_range_info.max) +
1109  (col_range_info.bucket ? col_range_info.bucket : 1));
1110 
1111  const bool col_has_nulls =
1112  query_mem_desc.getQueryDescriptionType() ==
1114  ? (query_mem_desc.isSingleColumnGroupByWithPerfectHash()
1115  ? query_mem_desc.hasNulls()
1116  : col_range_info.has_nulls)
1117  : false;
1118 
1119  const auto group_expr_lvs =
1120  executor_->groupByColumnCodegen(group_expr.get(),
1121  col_width_size,
1122  co,
1123  col_has_nulls,
1124  translated_null_value,
1125  diamond_codegen,
1126  array_loops,
1127  query_mem_desc.threadsShareMemory());
1128  const auto group_expr_lv = group_expr_lvs.translated_value;
1129  if (query_mem_desc.isSingleColumnGroupByWithPerfectHash()) {
1130  CHECK_EQ(size_t(1), ra_exe_unit_.groupby_exprs.size());
1131  return codegenSingleColumnPerfectHash(query_mem_desc,
1132  co,
1133  &*groups_buffer,
1134  group_expr_lv,
1135  group_expr_lvs.original_value,
1136  row_size_quad);
1137  } else {
1138  // store the sub-key to the buffer
1139  LL_BUILDER.CreateStore(group_expr_lv,
1140  LL_BUILDER.CreateGEP(group_key, LL_INT(subkey_idx++)));
1141  }
1142  }
1143  if (query_mem_desc.getQueryDescriptionType() ==
1145  CHECK(ra_exe_unit_.groupby_exprs.size() != 1);
1147  &*groups_buffer, group_key, key_size_lv, query_mem_desc, row_size_quad);
1148  } else if (query_mem_desc.getQueryDescriptionType() ==
1151  &*groups_buffer,
1152  group_key,
1153  key_size_lv,
1154  query_mem_desc,
1155  col_width_size,
1156  row_size_quad);
1157  }
1158  CHECK(false);
1159  return std::make_tuple(nullptr, nullptr);
1160 }
#define CHECK_EQ(x, y)
Definition: Logger.h:214
#define ROW_FUNC
ColRangeInfo get_expr_range_info(const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, const Analyzer::Expr *expr, Executor *executor)
#define LL_BUILDER
boost::multiprecision::number< boost::multiprecision::cpp_int_backend< 64, 64, boost::multiprecision::signed_magnitude, boost::multiprecision::checked, void >> checked_int64_t
#define LL_CONTEXT
#define LL_INT(v)
size_t getEffectiveKeyWidth() const
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
std::tuple< llvm::Value *, llvm::Value * > codegenMultiColumnBaselineHash(const CompilationOptions &co, llvm::Value *groups_buffer, llvm::Value *group_key, llvm::Value *key_size_lv, const QueryMemoryDescriptor &query_mem_desc, const size_t key_width, const int32_t row_size_quad)
size_t getGroupbyColCount() const
std::tuple< llvm::Value *, llvm::Value * > codegenSingleColumnPerfectHash(const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, llvm::Value *groups_buffer, llvm::Value *group_expr_lv_translated, llvm::Value *group_expr_lv_original, const int32_t row_size_quad)
#define AUTOMATIC_IR_METADATA(CGENSTATE)
std::tuple< llvm::Value *, llvm::Value * > codegenMultiColumnPerfectHash(llvm::Value *groups_buffer, llvm::Value *group_key, llvm::Value *key_size_lv, const QueryMemoryDescriptor &query_mem_desc, const int32_t row_size_quad)
QueryDescriptionType getQueryDescriptionType() const
const std::vector< InputTableInfo > & query_infos_
bool isSingleColumnGroupByWithPerfectHash() const
llvm::Value * codegenOutputSlot(llvm::Value *groups_buffer, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, DiamondCodegen &diamond_codegen)
#define CHECK(condition)
Definition: Logger.h:206
const RelAlgExecutionUnit & ra_exe_unit_

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::tuple< llvm::Value *, llvm::Value * > GroupByAndAggregate::codegenMultiColumnBaselineHash ( const CompilationOptions co,
llvm::Value *  groups_buffer,
llvm::Value *  group_key,
llvm::Value *  key_size_lv,
const QueryMemoryDescriptor query_mem_desc,
const size_t  key_width,
const int32_t  row_size_quad 
)
private

Definition at line 1271 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, CHECK, QueryMemoryDescriptor::didOutputColumnar(), emitCall(), executor_, QueryMemoryDescriptor::getEntryCount(), LL_BUILDER, LL_CONTEXT, LL_INT, and CompilationOptions::with_dynamic_watchdog.

Referenced by codegenGroupBy().

1278  {
1279  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1280  if (group_key->getType() != llvm::Type::getInt64PtrTy(LL_CONTEXT)) {
1281  CHECK(key_width == sizeof(int32_t));
1282  group_key =
1283  LL_BUILDER.CreatePointerCast(group_key, llvm::Type::getInt64PtrTy(LL_CONTEXT));
1284  }
1285  std::vector<llvm::Value*> func_args{
1286  groups_buffer,
1287  LL_INT(static_cast<int32_t>(query_mem_desc.getEntryCount())),
1288  &*group_key,
1289  &*key_size_lv,
1290  LL_INT(static_cast<int32_t>(key_width))};
1291  std::string func_name{"get_group_value"};
1292  if (query_mem_desc.didOutputColumnar()) {
1293  func_name += "_columnar_slot";
1294  } else {
1295  func_args.push_back(LL_INT(row_size_quad));
1296  }
1297  if (co.with_dynamic_watchdog) {
1298  func_name += "_with_watchdog";
1299  }
1300  if (query_mem_desc.didOutputColumnar()) {
1301  return std::make_tuple(groups_buffer, emitCall(func_name, func_args));
1302  } else {
1303  return std::make_tuple(emitCall(func_name, func_args), nullptr);
1304  }
1305 }
#define LL_BUILDER
#define LL_CONTEXT
#define LL_INT(v)
llvm::Value * emitCall(const std::string &fname, const std::vector< llvm::Value * > &args)
#define AUTOMATIC_IR_METADATA(CGENSTATE)
#define CHECK(condition)
Definition: Logger.h:206

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::tuple< llvm::Value *, llvm::Value * > GroupByAndAggregate::codegenMultiColumnPerfectHash ( llvm::Value *  groups_buffer,
llvm::Value *  group_key,
llvm::Value *  key_size_lv,
const QueryMemoryDescriptor query_mem_desc,
const int32_t  row_size_quad 
)
private

Definition at line 1227 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, CHECK, codegenPerfectHashFunction(), QueryMemoryDescriptor::didOutputColumnar(), emitCall(), executor_, get_int_type(), QueryMemoryDescriptor::getEntryCount(), QueryMemoryDescriptor::getQueryDescriptionType(), GroupByPerfectHash, QueryMemoryDescriptor::hasKeylessHash(), LL_BUILDER, LL_CONTEXT, and LL_INT.

Referenced by codegenGroupBy().

1232  {
1233  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1234  CHECK(query_mem_desc.getQueryDescriptionType() ==
1236  // compute the index (perfect hash)
1237  auto perfect_hash_func = codegenPerfectHashFunction();
1238  auto hash_lv =
1239  LL_BUILDER.CreateCall(perfect_hash_func, std::vector<llvm::Value*>{group_key});
1240 
1241  if (query_mem_desc.didOutputColumnar()) {
1242  if (!query_mem_desc.hasKeylessHash()) {
1243  const std::string set_matching_func_name{
1244  "set_matching_group_value_perfect_hash_columnar"};
1245  const std::vector<llvm::Value*> set_matching_func_arg{
1246  groups_buffer,
1247  hash_lv,
1248  group_key,
1249  key_size_lv,
1250  llvm::ConstantInt::get(get_int_type(32, LL_CONTEXT),
1251  query_mem_desc.getEntryCount())};
1252  emitCall(set_matching_func_name, set_matching_func_arg);
1253  }
1254  return std::make_tuple(groups_buffer, hash_lv);
1255  } else {
1256  if (query_mem_desc.hasKeylessHash()) {
1257  return std::make_tuple(emitCall("get_matching_group_value_perfect_hash_keyless",
1258  {groups_buffer, hash_lv, LL_INT(row_size_quad)}),
1259  nullptr);
1260  } else {
1261  return std::make_tuple(
1262  emitCall(
1263  "get_matching_group_value_perfect_hash",
1264  {groups_buffer, hash_lv, group_key, key_size_lv, LL_INT(row_size_quad)}),
1265  nullptr);
1266  }
1267  }
1268 }
#define LL_BUILDER
#define LL_CONTEXT
#define LL_INT(v)
llvm::Value * emitCall(const std::string &fname, const std::vector< llvm::Value * > &args)
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
#define AUTOMATIC_IR_METADATA(CGENSTATE)
QueryDescriptionType getQueryDescriptionType() const
llvm::Function * codegenPerfectHashFunction()
#define CHECK(condition)
Definition: Logger.h:206

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

llvm::Value * GroupByAndAggregate::codegenOutputSlot ( llvm::Value *  groups_buffer,
const QueryMemoryDescriptor query_mem_desc,
const CompilationOptions co,
DiamondCodegen diamond_codegen 
)
private

Definition at line 958 of file GroupByAndAggregate.cpp.

References run_benchmark_import::args, AUTOMATIC_IR_METADATA, CHECK, CHECK_EQ, CHECK_GE, CHECK_LT, CodeGenerator::codegen(), QueryMemoryDescriptor::didOutputColumnar(), emitCall(), executor_, get_arg_by_name(), get_heap_key_slot_index(), QueryMemoryDescriptor::getColOffInBytes(), QueryMemoryDescriptor::getPaddedSlotWidthBytes(), QueryMemoryDescriptor::getQueryDescriptionType(), QueryMemoryDescriptor::getRowSize(), RelAlgExecutionUnit::groupby_exprs, inline_fp_null_val(), inline_int_null_val(), SortInfo::limit, LL_BOOL, LL_BUILDER, LL_FP, LL_INT, SortInfo::offset, SortInfo::order_entries, CodeGenerator::posArg(), Projection, ra_exe_unit_, ROW_FUNC, RelAlgExecutionUnit::sort_info, RelAlgExecutionUnit::target_exprs, to_string(), and QueryMemoryDescriptor::useStreamingTopN().

Referenced by codegenGroupBy(), and codegenWindowRowPointer().

962  {
963  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
965  CHECK_EQ(size_t(1), ra_exe_unit_.groupby_exprs.size());
966  const auto group_expr = ra_exe_unit_.groupby_exprs.front();
967  CHECK(!group_expr);
968  if (!query_mem_desc.didOutputColumnar()) {
969  CHECK_EQ(size_t(0), query_mem_desc.getRowSize() % sizeof(int64_t));
970  }
971  const int32_t row_size_quad = query_mem_desc.didOutputColumnar()
972  ? 0
973  : query_mem_desc.getRowSize() / sizeof(int64_t);
974  CodeGenerator code_generator(executor_);
975  if (query_mem_desc.useStreamingTopN()) {
976  const auto& only_order_entry = ra_exe_unit_.sort_info.order_entries.front();
977  CHECK_GE(only_order_entry.tle_no, int(1));
978  const size_t target_idx = only_order_entry.tle_no - 1;
979  CHECK_LT(target_idx, ra_exe_unit_.target_exprs.size());
980  const auto order_entry_expr = ra_exe_unit_.target_exprs[target_idx];
981  const auto chosen_bytes =
982  static_cast<size_t>(query_mem_desc.getPaddedSlotWidthBytes(target_idx));
983  auto order_entry_lv = executor_->cgen_state_->castToTypeIn(
984  code_generator.codegen(order_entry_expr, true, co).front(), chosen_bytes * 8);
986  std::string fname = "get_bin_from_k_heap";
987  const auto& oe_ti = order_entry_expr->get_type_info();
988  llvm::Value* null_key_lv = nullptr;
989  if (oe_ti.is_integer() || oe_ti.is_decimal() || oe_ti.is_time()) {
990  const size_t bit_width = order_entry_lv->getType()->getIntegerBitWidth();
991  switch (bit_width) {
992  case 32:
993  null_key_lv = LL_INT(static_cast<int32_t>(inline_int_null_val(oe_ti)));
994  break;
995  case 64:
996  null_key_lv = LL_INT(static_cast<int64_t>(inline_int_null_val(oe_ti)));
997  break;
998  default:
999  CHECK(false);
1000  }
1001  fname += "_int" + std::to_string(bit_width) + "_t";
1002  } else {
1003  CHECK(oe_ti.is_fp());
1004  if (order_entry_lv->getType()->isDoubleTy()) {
1005  null_key_lv = LL_FP(static_cast<double>(inline_fp_null_val(oe_ti)));
1006  } else {
1007  null_key_lv = LL_FP(static_cast<float>(inline_fp_null_val(oe_ti)));
1008  }
1009  fname += order_entry_lv->getType()->isDoubleTy() ? "_double" : "_float";
1010  }
1011  const auto key_slot_idx =
1013  return emitCall(
1014  fname,
1015  {groups_buffer,
1016  LL_INT(n),
1017  LL_INT(row_size_quad),
1018  LL_INT(static_cast<uint32_t>(query_mem_desc.getColOffInBytes(key_slot_idx))),
1019  LL_BOOL(only_order_entry.is_desc),
1020  LL_BOOL(!order_entry_expr->get_type_info().get_notnull()),
1021  LL_BOOL(only_order_entry.nulls_first),
1022  null_key_lv,
1023  order_entry_lv});
1024  } else {
1025  const auto output_buffer_entry_count_lv =
1026  LL_BUILDER.CreateLoad(get_arg_by_name(ROW_FUNC, "max_matched"));
1027  const auto group_expr_lv =
1028  LL_BUILDER.CreateLoad(get_arg_by_name(ROW_FUNC, "old_total_matched"));
1029  std::vector<llvm::Value*> args{groups_buffer,
1030  output_buffer_entry_count_lv,
1031  group_expr_lv,
1032  code_generator.posArg(nullptr)};
1033  if (query_mem_desc.didOutputColumnar()) {
1034  const auto columnar_output_offset =
1035  emitCall("get_columnar_scan_output_offset", args);
1036  return columnar_output_offset;
1037  }
1038  args.push_back(LL_INT(row_size_quad));
1039  return emitCall("get_scan_output_slot", args);
1040  }
1041 }
std::vector< Analyzer::Expr * > target_exprs
#define CHECK_EQ(x, y)
Definition: Logger.h:214
#define ROW_FUNC
#define LL_BUILDER
const std::list< Analyzer::OrderEntry > order_entries
#define LL_INT(v)
#define CHECK_GE(x, y)
Definition: Logger.h:219
llvm::Value * emitCall(const std::string &fname, const std::vector< llvm::Value * > &args)
size_t get_heap_key_slot_index(const std::vector< Analyzer::Expr * > &target_exprs, const size_t target_idx)
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
double inline_fp_null_val(const SQL_TYPE_INFO &ti)
std::string to_string(char const *&&v)
#define LL_BOOL(v)
const size_t limit
llvm::Value * get_arg_by_name(llvm::Function *func, const std::string &name)
Definition: Execute.h:166
#define LL_FP(v)
const int8_t getPaddedSlotWidthBytes(const size_t slot_idx) const
#define AUTOMATIC_IR_METADATA(CGENSTATE)
QueryDescriptionType getQueryDescriptionType() const
#define CHECK_LT(x, y)
Definition: Logger.h:216
#define CHECK(condition)
Definition: Logger.h:206
int64_t inline_int_null_val(const SQL_TYPE_INFO &ti)
const RelAlgExecutionUnit & ra_exe_unit_
const size_t offset
size_t getColOffInBytes(const size_t col_idx) const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

llvm::Function * GroupByAndAggregate::codegenPerfectHashFunction ( )
private

Definition at line 1307 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, CHECK, CHECK_GT, executor_, anonymous_namespace{GroupByAndAggregate.cpp}::get_expr_range_info(), get_int_type(), getBucketedCardinality(), RelAlgExecutionUnit::groupby_exprs, GroupByPerfectHash, LL_CONTEXT, LL_INT, mark_function_always_inline(), query_infos_, and ra_exe_unit_.

Referenced by codegenMultiColumnPerfectHash().

1307  {
1308  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1309  CHECK_GT(ra_exe_unit_.groupby_exprs.size(), size_t(1));
1310  auto ft = llvm::FunctionType::get(
1311  get_int_type(32, LL_CONTEXT),
1312  std::vector<llvm::Type*>{llvm::PointerType::get(get_int_type(64, LL_CONTEXT), 0)},
1313  false);
1314  auto key_hash_func = llvm::Function::Create(ft,
1315  llvm::Function::ExternalLinkage,
1316  "perfect_key_hash",
1317  executor_->cgen_state_->module_);
1318  executor_->cgen_state_->helper_functions_.push_back(key_hash_func);
1319  mark_function_always_inline(key_hash_func);
1320  auto& key_buff_arg = *key_hash_func->args().begin();
1321  llvm::Value* key_buff_lv = &key_buff_arg;
1322  auto bb = llvm::BasicBlock::Create(LL_CONTEXT, "entry", key_hash_func);
1323  llvm::IRBuilder<> key_hash_func_builder(bb);
1324  llvm::Value* hash_lv{llvm::ConstantInt::get(get_int_type(64, LL_CONTEXT), 0)};
1325  std::vector<int64_t> cardinalities;
1326  for (const auto& groupby_expr : ra_exe_unit_.groupby_exprs) {
1327  auto col_range_info =
1328  get_expr_range_info(ra_exe_unit_, query_infos_, groupby_expr.get(), executor_);
1329  CHECK(col_range_info.hash_type_ == QueryDescriptionType::GroupByPerfectHash);
1330  cardinalities.push_back(getBucketedCardinality(col_range_info));
1331  }
1332  size_t dim_idx = 0;
1333  for (const auto& groupby_expr : ra_exe_unit_.groupby_exprs) {
1334  auto key_comp_lv = key_hash_func_builder.CreateLoad(
1335  key_hash_func_builder.CreateGEP(key_buff_lv, LL_INT(dim_idx)));
1336  auto col_range_info =
1337  get_expr_range_info(ra_exe_unit_, query_infos_, groupby_expr.get(), executor_);
1338  auto crt_term_lv =
1339  key_hash_func_builder.CreateSub(key_comp_lv, LL_INT(col_range_info.min));
1340  if (col_range_info.bucket) {
1341  crt_term_lv =
1342  key_hash_func_builder.CreateSDiv(crt_term_lv, LL_INT(col_range_info.bucket));
1343  }
1344  for (size_t prev_dim_idx = 0; prev_dim_idx < dim_idx; ++prev_dim_idx) {
1345  crt_term_lv = key_hash_func_builder.CreateMul(crt_term_lv,
1346  LL_INT(cardinalities[prev_dim_idx]));
1347  }
1348  hash_lv = key_hash_func_builder.CreateAdd(hash_lv, crt_term_lv);
1349  ++dim_idx;
1350  }
1351  key_hash_func_builder.CreateRet(
1352  key_hash_func_builder.CreateTrunc(hash_lv, get_int_type(32, LL_CONTEXT)));
1353  return key_hash_func;
1354 }
static int64_t getBucketedCardinality(const ColRangeInfo &col_range_info)
ColRangeInfo get_expr_range_info(const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, const Analyzer::Expr *expr, Executor *executor)
#define LL_CONTEXT
void mark_function_always_inline(llvm::Function *func)
#define LL_INT(v)
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
#define CHECK_GT(x, y)
Definition: Logger.h:218
#define AUTOMATIC_IR_METADATA(CGENSTATE)
const std::vector< InputTableInfo > & query_infos_
#define CHECK(condition)
Definition: Logger.h:206
const RelAlgExecutionUnit & ra_exe_unit_

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::tuple< llvm::Value *, llvm::Value * > GroupByAndAggregate::codegenSingleColumnPerfectHash ( const QueryMemoryDescriptor query_mem_desc,
const CompilationOptions co,
llvm::Value *  groups_buffer,
llvm::Value *  group_expr_lv_translated,
llvm::Value *  group_expr_lv_original,
const int32_t  row_size_quad 
)
private

Definition at line 1177 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, CHECK, CompilationOptions::device_type, QueryMemoryDescriptor::didOutputColumnar(), emitCall(), executor_, QueryMemoryDescriptor::getBucket(), QueryMemoryDescriptor::getMinVal(), QueryMemoryDescriptor::hasKeylessHash(), QueryMemoryDescriptor::interleavedBins(), LL_INT, QueryMemoryDescriptor::mustUseBaselineSort(), and QueryMemoryDescriptor::usesGetGroupValueFast().

Referenced by codegenGroupBy().

1183  {
1184  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1185  CHECK(query_mem_desc.usesGetGroupValueFast());
1186  std::string get_group_fn_name{query_mem_desc.didOutputColumnar()
1187  ? "get_columnar_group_bin_offset"
1188  : "get_group_value_fast"};
1189  if (!query_mem_desc.didOutputColumnar() && query_mem_desc.hasKeylessHash()) {
1190  get_group_fn_name += "_keyless";
1191  }
1192  if (query_mem_desc.interleavedBins(co.device_type)) {
1193  CHECK(!query_mem_desc.didOutputColumnar());
1194  CHECK(query_mem_desc.hasKeylessHash());
1195  get_group_fn_name += "_semiprivate";
1196  }
1197  std::vector<llvm::Value*> get_group_fn_args{&*groups_buffer,
1198  &*group_expr_lv_translated};
1199  if (group_expr_lv_original && get_group_fn_name == "get_group_value_fast" &&
1200  query_mem_desc.mustUseBaselineSort()) {
1201  get_group_fn_name += "_with_original_key";
1202  get_group_fn_args.push_back(group_expr_lv_original);
1203  }
1204  get_group_fn_args.push_back(LL_INT(query_mem_desc.getMinVal()));
1205  get_group_fn_args.push_back(LL_INT(query_mem_desc.getBucket()));
1206  if (!query_mem_desc.hasKeylessHash()) {
1207  if (!query_mem_desc.didOutputColumnar()) {
1208  get_group_fn_args.push_back(LL_INT(row_size_quad));
1209  }
1210  } else {
1211  if (!query_mem_desc.didOutputColumnar()) {
1212  get_group_fn_args.push_back(LL_INT(row_size_quad));
1213  }
1214  if (query_mem_desc.interleavedBins(co.device_type)) {
1215  auto warp_idx = emitCall("thread_warp_idx", {LL_INT(executor_->warpSize())});
1216  get_group_fn_args.push_back(warp_idx);
1217  get_group_fn_args.push_back(LL_INT(executor_->warpSize()));
1218  }
1219  }
1220  if (get_group_fn_name == "get_columnar_group_bin_offset") {
1221  return std::make_tuple(&*groups_buffer,
1222  emitCall(get_group_fn_name, get_group_fn_args));
1223  }
1224  return std::make_tuple(emitCall(get_group_fn_name, get_group_fn_args), nullptr);
1225 }
#define LL_INT(v)
llvm::Value * emitCall(const std::string &fname, const std::vector< llvm::Value * > &args)
#define AUTOMATIC_IR_METADATA(CGENSTATE)
ExecutorDeviceType device_type
bool interleavedBins(const ExecutorDeviceType) const
#define CHECK(condition)
Definition: Logger.h:206

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

llvm::Value * GroupByAndAggregate::codegenVarlenOutputBuffer ( const QueryMemoryDescriptor query_mem_desc)
private

Definition at line 1162 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, CHECK, executor_, QueryMemoryDescriptor::hasVarlenOutput(), LL_CONTEXT, and ROW_FUNC.

Referenced by codegen().

1163  {
1164  if (!query_mem_desc.hasVarlenOutput()) {
1165  return nullptr;
1166  }
1167 
1168  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1169  auto arg_it = ROW_FUNC->arg_begin();
1170  arg_it++; /* groups_buffer */
1171  auto varlen_output_buffer = arg_it++;
1172  CHECK(varlen_output_buffer->getType() == llvm::Type::getInt64PtrTy(LL_CONTEXT));
1173  return varlen_output_buffer;
1174 }
#define ROW_FUNC
#define LL_CONTEXT
#define AUTOMATIC_IR_METADATA(CGENSTATE)
#define CHECK(condition)
Definition: Logger.h:206

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

llvm::Value * GroupByAndAggregate::codegenWindowRowPointer ( const Analyzer::WindowFunction window_func,
const QueryMemoryDescriptor query_mem_desc,
const CompilationOptions co,
DiamondCodegen diamond_codegen 
)
private

Definition at line 1407 of file GroupByAndAggregate.cpp.

References run_benchmark_import::args, AUTOMATIC_IR_METADATA, codegenOutputSlot(), CodeGenerator::codegenWindowPosition(), QueryMemoryDescriptor::didOutputColumnar(), emitCall(), executor_, get_int_type(), WindowProjectNodeContext::getActiveWindowFunctionContext(), QueryMemoryDescriptor::getEntryCount(), Analyzer::WindowFunction::getKind(), QueryMemoryDescriptor::getRowSize(), LL_BUILDER, LL_CONTEXT, LL_INT, CodeGenerator::posArg(), ROW_FUNC, and window_function_is_aggregate().

Referenced by TargetExprCodegen::codegen().

1411  {
1412  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1413  const auto window_func_context =
1415  if (window_func_context && window_function_is_aggregate(window_func->getKind())) {
1416  const int32_t row_size_quad = query_mem_desc.didOutputColumnar()
1417  ? 0
1418  : query_mem_desc.getRowSize() / sizeof(int64_t);
1419  auto arg_it = ROW_FUNC->arg_begin();
1420  auto groups_buffer = arg_it++;
1421  CodeGenerator code_generator(executor_);
1422  auto window_pos_lv = code_generator.codegenWindowPosition(
1423  window_func_context, code_generator.posArg(nullptr));
1424  const auto pos_in_window =
1425  LL_BUILDER.CreateTrunc(window_pos_lv, get_int_type(32, LL_CONTEXT));
1426  llvm::Value* entry_count_lv =
1427  LL_INT(static_cast<int32_t>(query_mem_desc.getEntryCount()));
1428  std::vector<llvm::Value*> args{
1429  &*groups_buffer, entry_count_lv, pos_in_window, code_generator.posArg(nullptr)};
1430  if (query_mem_desc.didOutputColumnar()) {
1431  const auto columnar_output_offset =
1432  emitCall("get_columnar_scan_output_offset", args);
1433  return LL_BUILDER.CreateSExt(columnar_output_offset, get_int_type(64, LL_CONTEXT));
1434  }
1435  args.push_back(LL_INT(row_size_quad));
1436  return emitCall("get_scan_output_slot", args);
1437  }
1438  auto arg_it = ROW_FUNC->arg_begin();
1439  auto groups_buffer = arg_it++;
1440  return codegenOutputSlot(&*groups_buffer, query_mem_desc, co, diamond_codegen);
1441 }
SqlWindowFunctionKind getKind() const
Definition: Analyzer.h:1453
#define ROW_FUNC
#define LL_BUILDER
#define LL_CONTEXT
#define LL_INT(v)
llvm::Value * emitCall(const std::string &fname, const std::vector< llvm::Value * > &args)
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
static WindowFunctionContext * getActiveWindowFunctionContext(Executor *executor)
#define AUTOMATIC_IR_METADATA(CGENSTATE)
bool window_function_is_aggregate(const SqlWindowFunctionKind kind)
Definition: WindowContext.h:42
llvm::Value * codegenOutputSlot(llvm::Value *groups_buffer, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, DiamondCodegen &diamond_codegen)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

llvm::Value * GroupByAndAggregate::convertNullIfAny ( const SQLTypeInfo arg_type,
const TargetInfo agg_info,
llvm::Value *  target 
)
private

Definition at line 1356 of file GroupByAndAggregate.cpp.

References TargetInfo::agg_kind, AUTOMATIC_IR_METADATA, CHECK, executor_, SQLTypeInfo::get_size(), SQLTypeInfo::is_fp(), kAPPROX_COUNT_DISTINCT, kCOUNT, LL_BUILDER, and TargetInfo::sql_type.

Referenced by TargetExprCodegen::codegenAggregate().

1358  {
1359  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1360  const auto& agg_type = agg_info.sql_type;
1361  const size_t chosen_bytes = agg_type.get_size();
1362 
1363  bool need_conversion{false};
1364  llvm::Value* arg_null{nullptr};
1365  llvm::Value* agg_null{nullptr};
1366  llvm::Value* target_to_cast{target};
1367  if (arg_type.is_fp()) {
1368  arg_null = executor_->cgen_state_->inlineFpNull(arg_type);
1369  if (agg_type.is_fp()) {
1370  agg_null = executor_->cgen_state_->inlineFpNull(agg_type);
1371  if (!static_cast<llvm::ConstantFP*>(arg_null)->isExactlyValue(
1372  static_cast<llvm::ConstantFP*>(agg_null)->getValueAPF())) {
1373  need_conversion = true;
1374  }
1375  } else {
1376  CHECK(agg_info.agg_kind == kCOUNT || agg_info.agg_kind == kAPPROX_COUNT_DISTINCT);
1377  return target;
1378  }
1379  } else {
1380  arg_null = executor_->cgen_state_->inlineIntNull(arg_type);
1381  if (agg_type.is_fp()) {
1382  agg_null = executor_->cgen_state_->inlineFpNull(agg_type);
1383  need_conversion = true;
1384  target_to_cast = executor_->castToFP(target, arg_type, agg_type);
1385  } else {
1386  agg_null = executor_->cgen_state_->inlineIntNull(agg_type);
1387  if ((static_cast<llvm::ConstantInt*>(arg_null)->getBitWidth() !=
1388  static_cast<llvm::ConstantInt*>(agg_null)->getBitWidth()) ||
1389  (static_cast<llvm::ConstantInt*>(arg_null)->getValue() !=
1390  static_cast<llvm::ConstantInt*>(agg_null)->getValue())) {
1391  need_conversion = true;
1392  }
1393  }
1394  }
1395  if (need_conversion) {
1396  auto cmp = arg_type.is_fp() ? LL_BUILDER.CreateFCmpOEQ(target, arg_null)
1397  : LL_BUILDER.CreateICmpEQ(target, arg_null);
1398  return LL_BUILDER.CreateSelect(
1399  cmp,
1400  agg_null,
1401  executor_->cgen_state_->castToTypeIn(target_to_cast, chosen_bytes << 3));
1402  } else {
1403  return target;
1404  }
1405 }
HOST DEVICE int get_size() const
Definition: sqltypes.h:333
#define LL_BUILDER
SQLTypeInfo sql_type
Definition: TargetInfo.h:51
bool is_fp() const
Definition: sqltypes.h:502
#define AUTOMATIC_IR_METADATA(CGENSTATE)
SQLAgg agg_kind
Definition: TargetInfo.h:50
Definition: sqldefs.h:76
#define CHECK(condition)
Definition: Logger.h:206

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

llvm::Value * GroupByAndAggregate::emitCall ( const std::string &  fname,
const std::vector< llvm::Value * > &  args 
)
private

Definition at line 1921 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, and executor_.

Referenced by TargetExprCodegen::codegen(), TargetExprCodegen::codegenAggregate(), codegenCountDistinct(), codegenEstimator(), codegenMultiColumnBaselineHash(), codegenMultiColumnPerfectHash(), codegenOutputSlot(), codegenSingleColumnPerfectHash(), and codegenWindowRowPointer().

1922  {
1923  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1924  return executor_->cgen_state_->emitCall(fname, args);
1925 }
#define AUTOMATIC_IR_METADATA(CGENSTATE)

+ Here is the caller graph for this function:

llvm::Value * GroupByAndAggregate::getAdditionalLiteral ( const int32_t  off)
private

Definition at line 1733 of file GroupByAndAggregate.cpp.

References CHECK_LT, get_arg_by_name(), get_int_type(), LL_BUILDER, LL_CONTEXT, LL_INT, and ROW_FUNC.

Referenced by codegenCountDistinct().

1733  {
1734  CHECK_LT(off, 0);
1735  const auto lit_buff_lv = get_arg_by_name(ROW_FUNC, "literals");
1736  return LL_BUILDER.CreateLoad(LL_BUILDER.CreateGEP(
1737  LL_BUILDER.CreateBitCast(lit_buff_lv,
1738  llvm::PointerType::get(get_int_type(64, LL_CONTEXT), 0)),
1739  LL_INT(off)));
1740 }
#define ROW_FUNC
#define LL_BUILDER
#define LL_CONTEXT
#define LL_INT(v)
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
llvm::Value * get_arg_by_name(llvm::Function *func, const std::string &name)
Definition: Execute.h:166
#define CHECK_LT(x, y)
Definition: Logger.h:216

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

int64_t GroupByAndAggregate::getBucketedCardinality ( const ColRangeInfo col_range_info)
staticprivate

Definition at line 298 of file GroupByAndAggregate.cpp.

References ColRangeInfo::bucket, ColRangeInfo::has_nulls, ColRangeInfo::max, and ColRangeInfo::min.

Referenced by codegenPerfectHashFunction(), and getColRangeInfo().

298  {
299  checked_int64_t crt_col_cardinality =
300  checked_int64_t(col_range_info.max) - checked_int64_t(col_range_info.min);
301  if (col_range_info.bucket) {
302  crt_col_cardinality /= col_range_info.bucket;
303  }
304  return static_cast<int64_t>(crt_col_cardinality +
305  (1 + (col_range_info.has_nulls ? 1 : 0)));
306 }
boost::multiprecision::number< boost::multiprecision::cpp_int_backend< 64, 64, boost::multiprecision::signed_magnitude, boost::multiprecision::checked, void >> checked_int64_t

+ Here is the caller graph for this function:

ColRangeInfo GroupByAndAggregate::getColRangeInfo ( )
private

Definition at line 175 of file GroupByAndAggregate.cpp.

References Executor::baseline_threshold, anonymous_namespace{GroupByAndAggregate.cpp}::cardinality_estimate_less_than_column_range(), CHECK, CHECK_GE, device_type_, executor_, anonymous_namespace{GroupByAndAggregate.cpp}::expr_is_rowid(), anonymous_namespace{GroupByAndAggregate.cpp}::get_expr_range_info(), getBucketedCardinality(), GPU, group_cardinality_estimation_, RelAlgExecutionUnit::groupby_exprs, GroupByBaselineHash, GroupByPerfectHash, anonymous_namespace{GroupByAndAggregate.cpp}::has_count_distinct(), anonymous_namespace{GroupByAndAggregate.cpp}::is_column_range_too_big_for_perfect_hash(), kENCODING_DICT, SortInfo::order_entries, RelAlgExecutionUnit::quals, query_infos_, ra_exe_unit_, RelAlgExecutionUnit::simple_quals, RelAlgExecutionUnit::sort_info, and RelAlgExecutionUnit::target_exprs.

Referenced by initQueryMemoryDescriptorImpl().

175  {
176  // Use baseline layout more eagerly on the GPU if the query uses count distinct,
177  // because our HyperLogLog implementation is 4x less memory efficient on GPU.
178  // Technically, this only applies to APPROX_COUNT_DISTINCT, but in practice we
179  // can expect this to be true anyway for grouped queries since the precise version
180  // uses significantly more memory.
181  const int64_t baseline_threshold =
186  if (ra_exe_unit_.groupby_exprs.size() != 1) {
187  try {
188  checked_int64_t cardinality{1};
189  bool has_nulls{false};
190  for (const auto& groupby_expr : ra_exe_unit_.groupby_exprs) {
191  auto col_range_info = get_expr_range_info(
192  ra_exe_unit_, query_infos_, groupby_expr.get(), executor_);
193  if (col_range_info.hash_type_ != QueryDescriptionType::GroupByPerfectHash) {
194  // going through baseline hash if a non-integer type is encountered
195  return {QueryDescriptionType::GroupByBaselineHash, 0, 0, 0, false};
196  }
197  auto crt_col_cardinality = getBucketedCardinality(col_range_info);
198  CHECK_GE(crt_col_cardinality, 0);
199  cardinality *= crt_col_cardinality;
200  if (col_range_info.has_nulls) {
201  has_nulls = true;
202  }
203  }
204  // For zero or high cardinalities, use baseline layout.
205  if (!cardinality || cardinality > baseline_threshold) {
206  return {QueryDescriptionType::GroupByBaselineHash, 0, 0, 0, false};
207  }
209  0,
210  int64_t(cardinality),
211  0,
212  has_nulls};
213  } catch (...) { // overflow when computing cardinality
214  return {QueryDescriptionType::GroupByBaselineHash, 0, 0, 0, false};
215  }
216  }
217  // For single column groupby on high timestamps, force baseline hash due to wide ranges
218  // we are likely to encounter when applying quals to the expression range
219  // TODO: consider allowing TIMESTAMP(9) (nanoseconds) with quals to use perfect hash if
220  // the range is small enough
221  if (ra_exe_unit_.groupby_exprs.front() &&
222  ra_exe_unit_.groupby_exprs.front()->get_type_info().is_high_precision_timestamp() &&
223  ra_exe_unit_.simple_quals.size() > 0) {
224  return {QueryDescriptionType::GroupByBaselineHash, 0, 0, 0, false};
225  }
226  const auto col_range_info = get_expr_range_info(
228  if (!ra_exe_unit_.groupby_exprs.front()) {
229  return col_range_info;
230  }
231  static const int64_t MAX_BUFFER_SIZE = 1 << 30;
232  const int64_t col_count =
234  int64_t max_entry_count = MAX_BUFFER_SIZE / (col_count * sizeof(int64_t));
236  max_entry_count = std::min(max_entry_count, baseline_threshold);
237  }
238  const auto& groupby_expr_ti = ra_exe_unit_.groupby_exprs.front()->get_type_info();
239  if (groupby_expr_ti.is_string() && !col_range_info.bucket) {
240  CHECK(groupby_expr_ti.get_compression() == kENCODING_DICT);
241 
242  const bool has_filters =
243  !ra_exe_unit_.quals.empty() || !ra_exe_unit_.simple_quals.empty();
244  if (has_filters &&
245  is_column_range_too_big_for_perfect_hash(col_range_info, max_entry_count)) {
246  // if filters are present, we can use the filter to narrow the cardinality of the
247  // group by in the case of ranges too big for perfect hash. Otherwise, we are better
248  // off attempting perfect hash (since we know the range will be made of
249  // monotonically increasing numbers from min to max for dictionary encoded strings)
250  // and failing later due to excessive memory use.
251  // Check the conditions where baseline hash can provide a performance increase and
252  // return baseline hash (potentially forcing an estimator query) as the range type.
253  // Otherwise, return col_range_info which will likely be perfect hash, though could
254  // be baseline from a previous call of this function prior to the estimator query.
255  if (!ra_exe_unit_.sort_info.order_entries.empty()) {
256  // TODO(adb): allow some sorts to pass through this block by centralizing sort
257  // algorithm decision making
259  is_column_range_too_big_for_perfect_hash(col_range_info, max_entry_count)) {
260  // always use baseline hash for column range too big for perfect hash with count
261  // distinct descriptors. We will need 8GB of CPU memory minimum for the perfect
262  // hash group by in this case.
264  col_range_info.min,
265  col_range_info.max,
266  0,
267  col_range_info.has_nulls};
268  } else {
269  // use original col range for sort
270  return col_range_info;
271  }
272  }
273  // if filters are present and the filtered range is less than the cardinality of
274  // the column, consider baseline hash
277  col_range_info)) {
279  col_range_info.min,
280  col_range_info.max,
281  0,
282  col_range_info.has_nulls};
283  }
284  }
285  } else if ((!expr_is_rowid(ra_exe_unit_.groupby_exprs.front().get(),
286  *executor_->catalog_)) &&
287  is_column_range_too_big_for_perfect_hash(col_range_info, max_entry_count) &&
288  !col_range_info.bucket) {
290  col_range_info.min,
291  col_range_info.max,
292  0,
293  col_range_info.has_nulls};
294  }
295  return col_range_info;
296 }
std::vector< Analyzer::Expr * > target_exprs
static int64_t getBucketedCardinality(const ColRangeInfo &col_range_info)
ColRangeInfo get_expr_range_info(const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, const Analyzer::Expr *expr, Executor *executor)
boost::multiprecision::number< boost::multiprecision::cpp_int_backend< 64, 64, boost::multiprecision::signed_magnitude, boost::multiprecision::checked, void >> checked_int64_t
bool is_column_range_too_big_for_perfect_hash(const ColRangeInfo &col_range_info, const int64_t max_entry_count)
bool expr_is_rowid(const Analyzer::Expr *expr, const Catalog_Namespace::Catalog &cat)
const std::list< Analyzer::OrderEntry > order_entries
static const size_t baseline_threshold
Definition: Execute.h:1067
#define CHECK_GE(x, y)
Definition: Logger.h:219
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
bool has_count_distinct(const RelAlgExecutionUnit &ra_exe_unit)
const std::vector< InputTableInfo > & query_infos_
const ExecutorDeviceType device_type_
bool cardinality_estimate_less_than_column_range(const int64_t cardinality_estimate, const ColRangeInfo &col_range_info)
const std::optional< int64_t > group_cardinality_estimation_
std::list< std::shared_ptr< Analyzer::Expr > > quals
#define CHECK(condition)
Definition: Logger.h:206
const RelAlgExecutionUnit & ra_exe_unit_
std::list< std::shared_ptr< Analyzer::Expr > > simple_quals

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

int64_t GroupByAndAggregate::getShardedTopBucket ( const ColRangeInfo col_range_info,
const size_t  shard_count 
) const
private

Definition at line 347 of file GroupByAndAggregate.cpp.

References ColRangeInfo::bucket, CHECK, CHECK_GT, device_type_, executor_, g_leaf_count, and GPU.

Referenced by initQueryMemoryDescriptorImpl().

348  {
349  size_t device_count{0};
351  device_count = executor_->cudaMgr()->getDeviceCount();
352  CHECK_GT(device_count, 0u);
353  }
354 
355  int64_t bucket{col_range_info.bucket};
356 
357  if (shard_count) {
358  CHECK(!col_range_info.bucket);
359  /*
360  when a node has fewer devices than shard count,
361  a) In a distributed setup, the minimum distance between two keys would be
362  device_count because shards are stored consecutively across the physical tables,
363  i.e if a shard column has values 0 to 9, and 3 shards on each leaf, then node 1
364  would have values: 0,1,2,6,7,8 and node 2 would have values: 3,4,5,9. If each leaf
365  node has only 1 device, in this case, all the keys from each node are loaded on
366  the device each.
367 
368  b) In a single node setup, the distance would be minimum of device_count or
369  difference of device_count - shard_count. For example: If a single node server
370  running on 3 devices a shard column has values 0 to 9 in a table with 4 shards,
371  device to fragment keys mapping would be: device 1 - 4,8,3,7 device 2 - 1,5,9
372  device 3 - 2, 6 The bucket value would be 4(shards) - 3(devices) = 1 i.e. minimum
373  of device_count or difference.
374 
375  When a node has device count equal to or more than shard count then the
376  minimum distance is always at least shard_count * no of leaf nodes.
377  */
378  if (device_count < shard_count) {
379  bucket = g_leaf_count ? std::max(device_count, static_cast<size_t>(1))
380  : std::min(device_count, shard_count - device_count);
381  } else {
382  bucket = shard_count * std::max(g_leaf_count, static_cast<size_t>(1));
383  }
384  }
385 
386  return bucket;
387 }
#define CHECK_GT(x, y)
Definition: Logger.h:218
const ExecutorDeviceType device_type_
#define CHECK(condition)
Definition: Logger.h:206
size_t g_leaf_count
Definition: ParserNode.cpp:76

+ Here is the caller graph for this function:

bool GroupByAndAggregate::gpuCanHandleOrderEntries ( const std::list< Analyzer::OrderEntry > &  order_entries)
private

Definition at line 781 of file GroupByAndAggregate.cpp.

References CHECK, CHECK_GE, CHECK_LE, executor_, Analyzer::AggExpr::get_arg(), anonymous_namespace{GroupByAndAggregate.cpp}::get_expr_range_info(), Analyzer::Expr::get_type_info(), GroupByPerfectHash, kAPPROX_COUNT_DISTINCT, kAVG, kMAX, kMIN, query_infos_, ra_exe_unit_, and RelAlgExecutionUnit::target_exprs.

Referenced by initQueryMemoryDescriptor().

782  {
783  if (order_entries.size() > 1) { // TODO(alex): lift this restriction
784  return false;
785  }
786  for (const auto& order_entry : order_entries) {
787  CHECK_GE(order_entry.tle_no, 1);
788  CHECK_LE(static_cast<size_t>(order_entry.tle_no), ra_exe_unit_.target_exprs.size());
789  const auto target_expr = ra_exe_unit_.target_exprs[order_entry.tle_no - 1];
790  if (!dynamic_cast<Analyzer::AggExpr*>(target_expr)) {
791  return false;
792  }
793  // TODO(alex): relax the restrictions
794  auto agg_expr = static_cast<Analyzer::AggExpr*>(target_expr);
795  if (agg_expr->get_is_distinct() || agg_expr->get_aggtype() == kAVG ||
796  agg_expr->get_aggtype() == kMIN || agg_expr->get_aggtype() == kMAX ||
797  agg_expr->get_aggtype() == kAPPROX_COUNT_DISTINCT) {
798  return false;
799  }
800  if (agg_expr->get_arg()) {
801  const auto& arg_ti = agg_expr->get_arg()->get_type_info();
802  if (arg_ti.is_fp()) {
803  return false;
804  }
805  auto expr_range_info =
806  get_expr_range_info(ra_exe_unit_, query_infos_, agg_expr->get_arg(), executor_);
807  // TOD(adb): QMD not actually initialized here?
808  if ((!(expr_range_info.hash_type_ == QueryDescriptionType::GroupByPerfectHash &&
809  /* query_mem_desc.getGroupbyColCount() == 1 */ false) ||
810  expr_range_info.has_nulls) &&
811  order_entry.is_desc == order_entry.nulls_first) {
812  return false;
813  }
814  }
815  const auto& target_ti = target_expr->get_type_info();
816  CHECK(!target_ti.is_buffer());
817  if (!target_ti.is_integer()) {
818  return false;
819  }
820  }
821  return true;
822 }
std::vector< Analyzer::Expr * > target_exprs
ColRangeInfo get_expr_range_info(const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, const Analyzer::Expr *expr, Executor *executor)
#define CHECK_GE(x, y)
Definition: Logger.h:219
Expr * get_arg() const
Definition: Analyzer.h:1096
Definition: sqldefs.h:73
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:78
const std::vector< InputTableInfo > & query_infos_
#define CHECK_LE(x, y)
Definition: Logger.h:217
#define CHECK(condition)
Definition: Logger.h:206
const RelAlgExecutionUnit & ra_exe_unit_
Definition: sqldefs.h:74
Definition: sqldefs.h:72

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::unique_ptr< QueryMemoryDescriptor > GroupByAndAggregate::initQueryMemoryDescriptor ( const bool  allow_multifrag,
const size_t  max_groups_buffer_entry_count,
const int8_t  crt_min_byte_width,
RenderInfo render_info,
const bool  output_columnar_hint 
)
private

Definition at line 656 of file GroupByAndAggregate.cpp.

References align_to_int64(), CHECK, device_type_, executor_, GPU, gpuCanHandleOrderEntries(), initQueryMemoryDescriptorImpl(), SortInfo::order_entries, query_mem_desc, ra_exe_unit_, shard_count_for_top_groups(), and RelAlgExecutionUnit::sort_info.

661  {
662  const auto shard_count =
665  : 0;
666  bool sort_on_gpu_hint =
667  device_type_ == ExecutorDeviceType::GPU && allow_multifrag &&
670  // must_use_baseline_sort is true iff we'd sort on GPU with the old algorithm
671  // but the total output buffer size would be too big or it's a sharded top query.
672  // For the sake of managing risk, use the new result set way very selectively for
673  // this case only (alongside the baseline layout we've enabled for a while now).
674  bool must_use_baseline_sort = shard_count;
675  std::unique_ptr<QueryMemoryDescriptor> query_mem_desc;
676  while (true) {
677  query_mem_desc = initQueryMemoryDescriptorImpl(allow_multifrag,
678  max_groups_buffer_entry_count,
679  crt_min_byte_width,
680  sort_on_gpu_hint,
681  render_info,
682  must_use_baseline_sort,
683  output_columnar_hint);
684  CHECK(query_mem_desc);
685  if (query_mem_desc->sortOnGpu() &&
686  (query_mem_desc->getBufferSizeBytes(device_type_) +
687  align_to_int64(query_mem_desc->getEntryCount() * sizeof(int32_t))) >
688  2 * 1024 * 1024 * 1024LL) {
689  must_use_baseline_sort = true;
690  sort_on_gpu_hint = false;
691  } else {
692  break;
693  }
694  }
695  return query_mem_desc;
696 }
bool gpuCanHandleOrderEntries(const std::list< Analyzer::OrderEntry > &order_entries)
std::unique_ptr< QueryMemoryDescriptor > initQueryMemoryDescriptorImpl(const bool allow_multifrag, const size_t max_groups_buffer_entry_count, const int8_t crt_min_byte_width, const bool sort_on_gpu_hint, RenderInfo *render_info, const bool must_use_baseline_sort, const bool output_columnar_hint)
const std::list< Analyzer::OrderEntry > order_entries
const ExecutorDeviceType device_type_
#define CHECK(condition)
Definition: Logger.h:206
const RelAlgExecutionUnit & ra_exe_unit_
FORCE_INLINE HOST DEVICE T align_to_int64(T addr)
static size_t shard_count_for_top_groups(const RelAlgExecutionUnit &ra_exe_unit, const Catalog_Namespace::Catalog &catalog)

+ Here is the call graph for this function:

std::unique_ptr< QueryMemoryDescriptor > GroupByAndAggregate::initQueryMemoryDescriptorImpl ( const bool  allow_multifrag,
const size_t  max_groups_buffer_entry_count,
const int8_t  crt_min_byte_width,
const bool  sort_on_gpu_hint,
RenderInfo render_info,
const bool  must_use_baseline_sort,
const bool  output_columnar_hint 
)
private

Definition at line 698 of file GroupByAndAggregate.cpp.

References device_type_, executor_, g_enable_watchdog, g_watchdog_baseline_max_groups, anonymous_namespace{GroupByAndAggregate.cpp}::get_keyless_info(), getColRangeInfo(), getShardedTopBucket(), GPU, RelAlgExecutionUnit::groupby_exprs, GroupByBaselineHash, GroupByPerfectHash, ColRangeInfo::hash_type_, QueryMemoryDescriptor::init(), anonymous_namespace{GroupByAndAggregate.cpp}::init_count_distinct_descriptors(), LOG, query_infos_, ra_exe_unit_, shard_count_for_top_groups(), and logger::WARNING.

Referenced by initQueryMemoryDescriptor().

705  {
706  const auto count_distinct_descriptors = init_count_distinct_descriptors(
708 
709  const bool is_group_by{!ra_exe_unit_.groupby_exprs.empty()};
710 
711  auto col_range_info_nosharding = getColRangeInfo();
712 
713  const auto shard_count =
716  : 0;
717 
718  const auto col_range_info =
719  ColRangeInfo{col_range_info_nosharding.hash_type_,
720  col_range_info_nosharding.min,
721  col_range_info_nosharding.max,
722  getShardedTopBucket(col_range_info_nosharding, shard_count),
723  col_range_info_nosharding.has_nulls};
724 
725  // Non-grouped aggregates do not support accessing aggregated ranges
726  // Keyless hash is currently only supported with single-column perfect hash
727  const auto keyless_info =
728  !(is_group_by &&
729  col_range_info.hash_type_ == QueryDescriptionType::GroupByPerfectHash)
730  ? KeylessInfo{false, -1}
732 
733  if (g_enable_watchdog &&
734  ((col_range_info.hash_type_ == QueryDescriptionType::GroupByBaselineHash &&
735  max_groups_buffer_entry_count > g_watchdog_baseline_max_groups) ||
736  (col_range_info.hash_type_ == QueryDescriptionType::GroupByPerfectHash &&
737  ra_exe_unit_.groupby_exprs.size() == 1 &&
738  (col_range_info.max - col_range_info.min) /
739  std::max(col_range_info.bucket, int64_t(1)) >
740  130000000))) {
741  throw WatchdogException("Query would use too much memory");
742  }
743  try {
745  ra_exe_unit_,
746  query_infos_,
747  col_range_info,
748  keyless_info,
749  allow_multifrag,
750  device_type_,
751  crt_min_byte_width,
752  sort_on_gpu_hint,
753  shard_count,
754  max_groups_buffer_entry_count,
755  render_info,
756  count_distinct_descriptors,
757  must_use_baseline_sort,
758  output_columnar_hint,
759  /*streaming_top_n_hint=*/true);
760  } catch (const StreamingTopNOOM& e) {
761  LOG(WARNING) << e.what() << " Disabling Streaming Top N.";
763  ra_exe_unit_,
764  query_infos_,
765  col_range_info,
766  keyless_info,
767  allow_multifrag,
768  device_type_,
769  crt_min_byte_width,
770  sort_on_gpu_hint,
771  shard_count,
772  max_groups_buffer_entry_count,
773  render_info,
774  count_distinct_descriptors,
775  must_use_baseline_sort,
776  output_columnar_hint,
777  /*streaming_top_n_hint=*/false);
778  }
779 }
size_t g_watchdog_baseline_max_groups
bool g_enable_watchdog
KeylessInfo get_keyless_info(const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, const bool is_group_by, Executor *executor)
#define LOG(tag)
Definition: Logger.h:200
ColRangeInfo getColRangeInfo()
QueryDescriptionType hash_type_
CountDistinctDescriptors init_count_distinct_descriptors(const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, const ExecutorDeviceType device_type, Executor *executor)
static std::unique_ptr< QueryMemoryDescriptor > init(const Executor *executor, const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, const ColRangeInfo &col_range_info, const KeylessInfo &keyless_info, const bool allow_multifrag, const ExecutorDeviceType device_type, const int8_t crt_min_byte_width, const bool sort_on_gpu_hint, const size_t shard_count, const size_t max_groups_buffer_entry_count, RenderInfo *render_info, const CountDistinctDescriptors count_distinct_descriptors, const bool must_use_baseline_sort, const bool output_columnar_hint, const bool streaming_top_n_hint)
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
int64_t getShardedTopBucket(const ColRangeInfo &col_range_info, const size_t shard_count) const
const std::vector< InputTableInfo > & query_infos_
const ExecutorDeviceType device_type_
const RelAlgExecutionUnit & ra_exe_unit_
static size_t shard_count_for_top_groups(const RelAlgExecutionUnit &ra_exe_unit, const Catalog_Namespace::Catalog &catalog)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool GroupByAndAggregate::needsUnnestDoublePatch ( llvm::Value const val_ptr,
const std::string &  agg_base_name,
const bool  threads_share_memory,
const CompilationOptions co 
) const
private

Definition at line 29 of file MaxwellCodegenPatch.cpp.

References CompilationOptions::device_type, and executor_.

Referenced by TargetExprCodegen::codegenAggregate().

32  {
33  return (executor_->isArchMaxwell(co.device_type) && threads_share_memory &&
34  llvm::isa<llvm::AllocaInst>(val_ptr) &&
35  val_ptr->getType() ==
36  llvm::Type::getDoublePtrTy(executor_->cgen_state_->context_) &&
37  "agg_id" == agg_base_name);
38 }
ExecutorDeviceType device_type

+ Here is the caller graph for this function:

void GroupByAndAggregate::prependForceSync ( )
private

Definition at line 40 of file MaxwellCodegenPatch.cpp.

References executor_.

Referenced by codegen().

40  {
41  executor_->cgen_state_->ir_builder_.CreateCall(
42  executor_->cgen_state_->module_->getFunction("force_sync"));
43 }

+ Here is the caller graph for this function:

size_t GroupByAndAggregate::shard_count_for_top_groups ( const RelAlgExecutionUnit ra_exe_unit,
const Catalog_Namespace::Catalog catalog 
)
static

Definition at line 1944 of file GroupByAndAggregate.cpp.

References Catalog_Namespace::Catalog::getMetadataForTable(), RelAlgExecutionUnit::groupby_exprs, SortInfo::limit, TableDescriptor::nShards, SortInfo::order_entries, and RelAlgExecutionUnit::sort_info.

Referenced by Executor::collectAllDeviceResults(), RelAlgExecutor::executeRelAlgQuerySingleStep(), initQueryMemoryDescriptor(), and initQueryMemoryDescriptorImpl().

1946  {
1947  if (ra_exe_unit.sort_info.order_entries.size() != 1 || !ra_exe_unit.sort_info.limit) {
1948  return 0;
1949  }
1950  for (const auto& group_expr : ra_exe_unit.groupby_exprs) {
1951  const auto grouped_col_expr =
1952  dynamic_cast<const Analyzer::ColumnVar*>(group_expr.get());
1953  if (!grouped_col_expr) {
1954  continue;
1955  }
1956  if (grouped_col_expr->get_table_id() <= 0) {
1957  return 0;
1958  }
1959  const auto td = catalog.getMetadataForTable(grouped_col_expr->get_table_id());
1960  if (td->shardedColumnId == grouped_col_expr->get_column_id()) {
1961  return td->nShards;
1962  }
1963  }
1964  return 0;
1965 }
const std::list< Analyzer::OrderEntry > order_entries
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
const size_t limit
const TableDescriptor * getMetadataForTable(const std::string &tableName, const bool populateFragmenter=true) const
Returns a pointer to a const TableDescriptor struct matching the provided tableName.

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

Friends And Related Function Documentation

friend class CodeGenerator
friend

Definition at line 214 of file GroupByAndAggregate.h.

friend class ExecutionKernel
friend

Definition at line 215 of file GroupByAndAggregate.h.

friend class Executor
friend

Definition at line 212 of file GroupByAndAggregate.h.

friend class QueryMemoryDescriptor
friend

Definition at line 213 of file GroupByAndAggregate.h.

friend struct TargetExprCodegen
friend

Definition at line 216 of file GroupByAndAggregate.h.

friend struct TargetExprCodegenBuilder
friend

Definition at line 217 of file GroupByAndAggregate.h.

Member Data Documentation

const ExecutorDeviceType GroupByAndAggregate::device_type_
private
const std::optional<int64_t> GroupByAndAggregate::group_cardinality_estimation_
private

Definition at line 210 of file GroupByAndAggregate.h.

Referenced by getColRangeInfo().

bool GroupByAndAggregate::output_columnar_
private

Definition at line 207 of file GroupByAndAggregate.h.

std::shared_ptr<RowSetMemoryOwner> GroupByAndAggregate::row_set_mem_owner_
private

Definition at line 206 of file GroupByAndAggregate.h.


The documentation for this class was generated from the following files: