OmniSciDB  06b3bd477c
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
GroupByAndAggregate Class Reference

#include <GroupByAndAggregate.h>

+ Collaboration diagram for GroupByAndAggregate:

Classes

struct  DiamondCodegen
 

Public Member Functions

 GroupByAndAggregate (Executor *executor, const ExecutorDeviceType device_type, const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, std::shared_ptr< RowSetMemoryOwner >)
 
bool codegen (llvm::Value *filter_result, llvm::BasicBlock *sc_false, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const GpuSharedMemoryContext &gpu_smem_context)
 

Static Public Member Functions

static void addTransientStringLiterals (const RelAlgExecutionUnit &ra_exe_unit, Executor *executor, std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner)
 
static size_t shard_count_for_top_groups (const RelAlgExecutionUnit &ra_exe_unit, const Catalog_Namespace::Catalog &catalog)
 

Private Member Functions

bool gpuCanHandleOrderEntries (const std::list< Analyzer::OrderEntry > &order_entries)
 
std::unique_ptr
< QueryMemoryDescriptor
initQueryMemoryDescriptor (const bool allow_multifrag, const size_t max_groups_buffer_entry_count, const int8_t crt_min_byte_width, RenderInfo *render_info, const bool output_columnar_hint)
 
std::unique_ptr
< QueryMemoryDescriptor
initQueryMemoryDescriptorImpl (const bool allow_multifrag, const size_t max_groups_buffer_entry_count, const int8_t crt_min_byte_width, const bool sort_on_gpu_hint, RenderInfo *render_info, const bool must_use_baseline_sort, const bool output_columnar_hint)
 
int64_t getShardedTopBucket (const ColRangeInfo &col_range_info, const size_t shard_count) const
 
void addTransientStringLiterals ()
 
CountDistinctDescriptors initCountDistinctDescriptors ()
 
llvm::Value * codegenOutputSlot (llvm::Value *groups_buffer, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, DiamondCodegen &diamond_codegen)
 
std::tuple< llvm::Value
*, llvm::Value * > 
codegenGroupBy (const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, DiamondCodegen &codegen)
 
std::tuple< llvm::Value
*, llvm::Value * > 
codegenSingleColumnPerfectHash (const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, llvm::Value *groups_buffer, llvm::Value *group_expr_lv_translated, llvm::Value *group_expr_lv_original, const int32_t row_size_quad)
 
std::tuple< llvm::Value
*, llvm::Value * > 
codegenMultiColumnPerfectHash (llvm::Value *groups_buffer, llvm::Value *group_key, llvm::Value *key_size_lv, const QueryMemoryDescriptor &query_mem_desc, const int32_t row_size_quad)
 
llvm::Function * codegenPerfectHashFunction ()
 
std::tuple< llvm::Value
*, llvm::Value * > 
codegenMultiColumnBaselineHash (const CompilationOptions &co, llvm::Value *groups_buffer, llvm::Value *group_key, llvm::Value *key_size_lv, const QueryMemoryDescriptor &query_mem_desc, const size_t key_width, const int32_t row_size_quad)
 
ColRangeInfo getColRangeInfo ()
 
ColRangeInfo getExprRangeInfo (const Analyzer::Expr *expr) const
 
KeylessInfo getKeylessInfo (const std::vector< Analyzer::Expr * > &target_expr_list, const bool is_group_by) const
 
llvm::Value * convertNullIfAny (const SQLTypeInfo &arg_type, const TargetInfo &agg_info, llvm::Value *target)
 
bool codegenAggCalls (const std::tuple< llvm::Value *, llvm::Value * > &agg_out_ptr_w_idx, const std::vector< llvm::Value * > &agg_out_vec, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const GpuSharedMemoryContext &gpu_smem_context, DiamondCodegen &diamond_codegen)
 
llvm::Value * codegenWindowRowPointer (const Analyzer::WindowFunction *window_func, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, DiamondCodegen &diamond_codegen)
 
llvm::Value * codegenAggColumnPtr (llvm::Value *output_buffer_byte_stream, llvm::Value *out_row_idx, const std::tuple< llvm::Value *, llvm::Value * > &agg_out_ptr_w_idx, const QueryMemoryDescriptor &query_mem_desc, const size_t chosen_bytes, const size_t agg_out_off, const size_t target_idx)
 : returns the pointer to where the aggregation should be stored. More...
 
void codegenEstimator (std::stack< llvm::BasicBlock * > &array_loops, GroupByAndAggregate::DiamondCodegen &diamond_codegen, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &)
 
void codegenCountDistinct (const size_t target_idx, const Analyzer::Expr *target_expr, std::vector< llvm::Value * > &agg_args, const QueryMemoryDescriptor &, const ExecutorDeviceType)
 
llvm::Value * getAdditionalLiteral (const int32_t off)
 
std::vector< llvm::Value * > codegenAggArg (const Analyzer::Expr *target_expr, const CompilationOptions &co)
 
llvm::Value * emitCall (const std::string &fname, const std::vector< llvm::Value * > &args)
 
void checkErrorCode (llvm::Value *retCode)
 
bool needsUnnestDoublePatch (llvm::Value *val_ptr, const std::string &agg_base_name, const bool threads_share_memory, const CompilationOptions &co) const
 
void prependForceSync ()
 

Static Private Member Functions

static int64_t getBucketedCardinality (const ColRangeInfo &col_range_info)
 

Private Attributes

Executorexecutor_
 
const RelAlgExecutionUnitra_exe_unit_
 
const std::vector
< InputTableInfo > & 
query_infos_
 
std::shared_ptr
< RowSetMemoryOwner
row_set_mem_owner_
 
bool output_columnar_
 
const ExecutorDeviceType device_type_
 

Friends

class Executor
 
class QueryMemoryDescriptor
 
class CodeGenerator
 
class ExecutionKernel
 
struct TargetExprCodegen
 
struct TargetExprCodegenBuilder
 

Detailed Description

Definition at line 125 of file GroupByAndAggregate.h.

Constructor & Destructor Documentation

GroupByAndAggregate::GroupByAndAggregate ( Executor executor,
const ExecutorDeviceType  device_type,
const RelAlgExecutionUnit ra_exe_unit,
const std::vector< InputTableInfo > &  query_infos,
std::shared_ptr< RowSetMemoryOwner row_set_mem_owner 
)

Definition at line 244 of file GroupByAndAggregate.cpp.

References RelAlgExecutionUnit::groupby_exprs, kENCODING_DICT, and ra_exe_unit_.

250  : executor_(executor)
251  , ra_exe_unit_(ra_exe_unit)
252  , query_infos_(query_infos)
253  , row_set_mem_owner_(row_set_mem_owner)
254  , device_type_(device_type) {
255  for (const auto& groupby_expr : ra_exe_unit_.groupby_exprs) {
256  if (!groupby_expr) {
257  continue;
258  }
259  const auto& groupby_ti = groupby_expr->get_type_info();
260  if (groupby_ti.is_string() && groupby_ti.get_compression() != kENCODING_DICT) {
261  throw std::runtime_error(
262  "Cannot group by string columns which are not dictionary encoded.");
263  }
264  if (groupby_ti.is_array()) {
265  throw std::runtime_error("Group by array not supported");
266  }
267  if (groupby_ti.is_geometry()) {
268  throw std::runtime_error("Group by geometry not supported");
269  }
270  }
271 }
std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner_
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
const std::vector< InputTableInfo > & query_infos_
const ExecutorDeviceType device_type_
const RelAlgExecutionUnit & ra_exe_unit_

Member Function Documentation

void GroupByAndAggregate::addTransientStringLiterals ( const RelAlgExecutionUnit ra_exe_unit,
Executor executor,
std::shared_ptr< RowSetMemoryOwner row_set_mem_owner 
)
static

Definition at line 510 of file GroupByAndAggregate.cpp.

References anonymous_namespace{GroupByAndAggregate.cpp}::add_transient_string_literals_for_expression(), RelAlgExecutionUnit::groupby_exprs, kENCODING_DICT, kSAMPLE, kSINGLE_VALUE, and RelAlgExecutionUnit::target_exprs.

513  {
514  for (const auto& group_expr : ra_exe_unit.groupby_exprs) {
516  group_expr.get(), executor, row_set_mem_owner);
517  }
518  for (const auto target_expr : ra_exe_unit.target_exprs) {
519  const auto& target_type = target_expr->get_type_info();
520  if (target_type.is_string() && target_type.get_compression() != kENCODING_DICT) {
521  continue;
522  }
523  const auto agg_expr = dynamic_cast<const Analyzer::AggExpr*>(target_expr);
524  if (agg_expr) {
525  if (agg_expr->get_aggtype() == kSINGLE_VALUE ||
526  agg_expr->get_aggtype() == kSAMPLE) {
528  agg_expr->get_arg(), executor, row_set_mem_owner);
529  }
530  } else {
532  target_expr, executor, row_set_mem_owner);
533  }
534  }
535  row_set_mem_owner->addLiteralStringDictProxy(executor->lit_str_dict_proxy_);
536 }
std::vector< Analyzer::Expr * > target_exprs
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
void add_transient_string_literals_for_expression(const Analyzer::Expr *expr, Executor *executor, std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner)

+ Here is the call graph for this function:

void GroupByAndAggregate::addTransientStringLiterals ( )
private

Definition at line 444 of file GroupByAndAggregate.cpp.

References executor_, ra_exe_unit_, and row_set_mem_owner_.

Referenced by RelAlgExecutor::executeSort(), RelAlgExecutor::executeWorkUnit(), and initQueryMemoryDescriptorImpl().

444  {
446 }
std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner_
const RelAlgExecutionUnit & ra_exe_unit_

+ Here is the caller graph for this function:

void GroupByAndAggregate::checkErrorCode ( llvm::Value *  retCode)
private

Definition at line 1906 of file GroupByAndAggregate.cpp.

References executor_.

Referenced by TargetExprCodegen::codegenAggregate().

1906  {
1907  auto zero_const = llvm::ConstantInt::get(retCode->getType(), 0, true);
1908  auto rc_check_condition = executor_->cgen_state_->ir_builder_.CreateICmp(
1909  llvm::ICmpInst::ICMP_EQ, retCode, zero_const);
1910 
1911  executor_->cgen_state_->emitErrorCheck(rc_check_condition, retCode, "rc");
1912 }

+ Here is the caller graph for this function:

bool GroupByAndAggregate::codegen ( llvm::Value *  filter_result,
llvm::BasicBlock *  sc_false,
const QueryMemoryDescriptor query_mem_desc,
const CompilationOptions co,
const GpuSharedMemoryContext gpu_smem_context 
)

Definition at line 881 of file GroupByAndAggregate.cpp.

References CHECK(), codegenAggCalls(), codegenEstimator(), codegenGroupBy(), GroupByAndAggregate::DiamondCodegen::cond_false_, CompilationOptions::device_type, QueryMemoryDescriptor::didOutputColumnar(), RelAlgExecutionUnit::estimator, executor_, anonymous_namespace{GroupByAndAggregate.cpp}::get_agg_count(), get_arg_by_name(), get_int_type(), QueryMemoryDescriptor::getGroupbyColCount(), QueryMemoryDescriptor::getQueryDescriptionType(), GPU, RelAlgExecutionUnit::groupby_exprs, GroupByPerfectHash, RelAlgExecutionUnit::join_quals, LL_BUILDER, LL_CONTEXT, LL_INT, CodeGenerator::posArg(), prependForceSync(), Projection, query_mem_desc, ra_exe_unit_, ROW_FUNC, RelAlgExecutionUnit::target_exprs, QueryMemoryDescriptor::usesGetGroupValueFast(), and QueryMemoryDescriptor::useStreamingTopN().

885  {
886  CHECK(filter_result);
887 
888  bool can_return_error = false;
889  llvm::BasicBlock* filter_false{nullptr};
890 
891  {
892  const bool is_group_by = !ra_exe_unit_.groupby_exprs.empty();
893 
894  if (executor_->isArchMaxwell(co.device_type)) {
896  }
897  DiamondCodegen filter_cfg(filter_result,
898  executor_,
899  !is_group_by || query_mem_desc.usesGetGroupValueFast(),
900  "filter",
901  nullptr,
902  false);
903  filter_false = filter_cfg.cond_false_;
904 
905  if (is_group_by) {
907  !query_mem_desc.useStreamingTopN()) {
908  const auto crt_matched = get_arg_by_name(ROW_FUNC, "crt_matched");
909  LL_BUILDER.CreateStore(LL_INT(int32_t(1)), crt_matched);
910  auto total_matched_ptr = get_arg_by_name(ROW_FUNC, "total_matched");
911  llvm::Value* old_total_matched_val{nullptr};
913  old_total_matched_val =
914  LL_BUILDER.CreateAtomicRMW(llvm::AtomicRMWInst::Add,
915  total_matched_ptr,
916  LL_INT(int32_t(1)),
917  llvm::AtomicOrdering::Monotonic);
918  } else {
919  old_total_matched_val = LL_BUILDER.CreateLoad(total_matched_ptr);
920  LL_BUILDER.CreateStore(
921  LL_BUILDER.CreateAdd(old_total_matched_val, LL_INT(int32_t(1))),
922  total_matched_ptr);
923  }
924  auto old_total_matched_ptr = get_arg_by_name(ROW_FUNC, "old_total_matched");
925  LL_BUILDER.CreateStore(old_total_matched_val, old_total_matched_ptr);
926  }
927 
928  auto agg_out_ptr_w_idx = codegenGroupBy(query_mem_desc, co, filter_cfg);
929  if (query_mem_desc.usesGetGroupValueFast() ||
930  query_mem_desc.getQueryDescriptionType() ==
932  if (query_mem_desc.getGroupbyColCount() > 1) {
933  filter_cfg.setChainToNext();
934  }
935  // Don't generate null checks if the group slot is guaranteed to be non-null,
936  // as it's the case for get_group_value_fast* family.
937  can_return_error = codegenAggCalls(
938  agg_out_ptr_w_idx, {}, query_mem_desc, co, gpu_smem_context, filter_cfg);
939  } else {
940  {
941  llvm::Value* nullcheck_cond{nullptr};
942  if (query_mem_desc.didOutputColumnar()) {
943  nullcheck_cond = LL_BUILDER.CreateICmpSGE(std::get<1>(agg_out_ptr_w_idx),
944  LL_INT(int32_t(0)));
945  } else {
946  nullcheck_cond = LL_BUILDER.CreateICmpNE(
947  std::get<0>(agg_out_ptr_w_idx),
948  llvm::ConstantPointerNull::get(
949  llvm::PointerType::get(get_int_type(64, LL_CONTEXT), 0)));
950  }
951  DiamondCodegen nullcheck_cfg(
952  nullcheck_cond, executor_, false, "groupby_nullcheck", &filter_cfg, false);
954  agg_out_ptr_w_idx, {}, query_mem_desc, co, gpu_smem_context, filter_cfg);
955  }
956  can_return_error = true;
957  if (query_mem_desc.getQueryDescriptionType() ==
959  query_mem_desc.useStreamingTopN()) {
960  // Ignore rejection on pushing current row to top-K heap.
961  LL_BUILDER.CreateRet(LL_INT(int32_t(0)));
962  } else {
963  CodeGenerator code_generator(executor_);
964  LL_BUILDER.CreateRet(LL_BUILDER.CreateNeg(LL_BUILDER.CreateTrunc(
965  // TODO(alex): remove the trunc once pos is converted to 32 bits
966  code_generator.posArg(nullptr),
967  get_int_type(32, LL_CONTEXT))));
968  }
969  }
970  } else {
971  if (ra_exe_unit_.estimator) {
972  std::stack<llvm::BasicBlock*> array_loops;
973  codegenEstimator(array_loops, filter_cfg, query_mem_desc, co);
974  } else {
975  auto arg_it = ROW_FUNC->arg_begin();
976  std::vector<llvm::Value*> agg_out_vec;
977  for (int32_t i = 0; i < get_agg_count(ra_exe_unit_.target_exprs); ++i) {
978  agg_out_vec.push_back(&*arg_it++);
979  }
980  can_return_error = codegenAggCalls(std::make_tuple(nullptr, nullptr),
981  agg_out_vec,
982  query_mem_desc,
983  co,
984  gpu_smem_context,
985  filter_cfg);
986  }
987  }
988  }
989 
990  if (ra_exe_unit_.join_quals.empty()) {
991  executor_->cgen_state_->ir_builder_.CreateRet(LL_INT(int32_t(0)));
992  } else if (sc_false) {
993  const auto saved_insert_block = LL_BUILDER.GetInsertBlock();
994  LL_BUILDER.SetInsertPoint(sc_false);
995  LL_BUILDER.CreateBr(filter_false);
996  LL_BUILDER.SetInsertPoint(saved_insert_block);
997  }
998 
999  return can_return_error;
1000 }
void codegenEstimator(std::stack< llvm::BasicBlock * > &array_loops, GroupByAndAggregate::DiamondCodegen &diamond_codegen, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &)
std::vector< Analyzer::Expr * > target_exprs
#define ROW_FUNC
bool codegenAggCalls(const std::tuple< llvm::Value *, llvm::Value * > &agg_out_ptr_w_idx, const std::vector< llvm::Value * > &agg_out_vec, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const GpuSharedMemoryContext &gpu_smem_context, DiamondCodegen &diamond_codegen)
#define LL_BUILDER
#define LL_CONTEXT
#define LL_INT(v)
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
CHECK(cgen_state)
llvm::Value * get_arg_by_name(llvm::Function *func, const std::string &name)
Definition: Execute.h:95
size_t getGroupbyColCount() const
const JoinQualsPerNestingLevel join_quals
std::tuple< llvm::Value *, llvm::Value * > codegenGroupBy(const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, DiamondCodegen &codegen)
const std::shared_ptr< Analyzer::Estimator > estimator
QueryDescriptionType getQueryDescriptionType() const
ExecutorDeviceType device_type
int32_t get_agg_count(const std::vector< Analyzer::Expr * > &target_exprs)
const RelAlgExecutionUnit & ra_exe_unit_

+ Here is the call graph for this function:

std::vector< llvm::Value * > GroupByAndAggregate::codegenAggArg ( const Analyzer::Expr target_expr,
const CompilationOptions co 
)
private

Definition at line 1729 of file GroupByAndAggregate.cpp.

References CHECK(), CHECK_EQ, CodeGenerator::codegen(), executor_, get_int_type(), Analyzer::Expr::get_type_info(), SQLTypeInfo::is_geometry(), kARRAY, kPOINT, kSAMPLE, LL_BUILDER, LL_CONTEXT, log2_bytes(), and CodeGenerator::posArg().

Referenced by TargetExprCodegen::codegen(), and TargetExprCodegenBuilder::codegenMultiSlotSampleExpressions().

1731  {
1732  const auto agg_expr = dynamic_cast<const Analyzer::AggExpr*>(target_expr);
1733  const auto func_expr = dynamic_cast<const Analyzer::FunctionOper*>(target_expr);
1734  const auto arr_expr = dynamic_cast<const Analyzer::ArrayExpr*>(target_expr);
1735 
1736  // TODO(alex): handle arrays uniformly?
1737  CodeGenerator code_generator(executor_);
1738  if (target_expr) {
1739  const auto& target_ti = target_expr->get_type_info();
1740  if (target_ti.is_array() && !executor_->plan_state_->isLazyFetchColumn(target_expr)) {
1741  const auto target_lvs =
1742  agg_expr ? code_generator.codegen(agg_expr->get_arg(), true, co)
1743  : code_generator.codegen(
1744  target_expr, !executor_->plan_state_->allow_lazy_fetch_, co);
1745  if (!func_expr && !arr_expr) {
1746  // Something with the chunk transport is code that was generated from a source
1747  // other than an ARRAY[] expression
1748  CHECK_EQ(size_t(1), target_lvs.size());
1749  CHECK(!agg_expr || agg_expr->get_aggtype() == kSAMPLE);
1750  const auto i32_ty = get_int_type(32, executor_->cgen_state_->context_);
1751  const auto i8p_ty =
1752  llvm::PointerType::get(get_int_type(8, executor_->cgen_state_->context_), 0);
1753  const auto& elem_ti = target_ti.get_elem_type();
1754  return {
1755  executor_->cgen_state_->emitExternalCall(
1756  "array_buff",
1757  i8p_ty,
1758  {target_lvs.front(), code_generator.posArg(target_expr)}),
1759  executor_->cgen_state_->emitExternalCall(
1760  "array_size",
1761  i32_ty,
1762  {target_lvs.front(),
1763  code_generator.posArg(target_expr),
1764  executor_->cgen_state_->llInt(log2_bytes(elem_ti.get_logical_size()))})};
1765  } else {
1766  if (agg_expr) {
1767  throw std::runtime_error(
1768  "Using array[] operator as argument to an aggregate operator is not "
1769  "supported");
1770  }
1771  CHECK(func_expr || arr_expr);
1772  if (dynamic_cast<const Analyzer::FunctionOper*>(target_expr)) {
1773  CHECK_EQ(size_t(1), target_lvs.size());
1774 
1775  const auto target_lv = LL_BUILDER.CreateLoad(target_lvs[0]);
1776 
1777  // const auto target_lv_type = target_lvs[0]->getType();
1778  // CHECK(target_lv_type->isStructTy());
1779  // CHECK_EQ(target_lv_type->getNumContainedTypes(), 3u);
1780  const auto i8p_ty = llvm::PointerType::get(
1781  get_int_type(8, executor_->cgen_state_->context_), 0);
1782  const auto ptr = LL_BUILDER.CreatePointerCast(
1783  LL_BUILDER.CreateExtractValue(target_lv, 0), i8p_ty);
1784  const auto size = LL_BUILDER.CreateExtractValue(target_lv, 1);
1785  const auto null_flag = LL_BUILDER.CreateExtractValue(target_lv, 2);
1786 
1787  const auto nullcheck_ok_bb = llvm::BasicBlock::Create(
1788  LL_CONTEXT, "arr_nullcheck_ok_bb", executor_->cgen_state_->row_func_);
1789  const auto nullcheck_fail_bb = llvm::BasicBlock::Create(
1790  LL_CONTEXT, "arr_nullcheck_fail_bb", executor_->cgen_state_->row_func_);
1791 
1792  // TODO(adb): probably better to zext the bool
1793  const auto nullcheck = LL_BUILDER.CreateICmpEQ(
1794  null_flag, executor_->cgen_state_->llInt(static_cast<int8_t>(1)));
1795  LL_BUILDER.CreateCondBr(nullcheck, nullcheck_fail_bb, nullcheck_ok_bb);
1796 
1797  const auto ret_bb = llvm::BasicBlock::Create(
1798  LL_CONTEXT, "arr_return", executor_->cgen_state_->row_func_);
1799  LL_BUILDER.SetInsertPoint(ret_bb);
1800  auto result_phi = LL_BUILDER.CreatePHI(i8p_ty, 2, "array_ptr_return");
1801  result_phi->addIncoming(ptr, nullcheck_ok_bb);
1802 
1803  const auto null_arr_sentinel = LL_BUILDER.CreateIntToPtr(
1804  executor_->cgen_state_->llInt(static_cast<int8_t>(0)), i8p_ty);
1805  result_phi->addIncoming(null_arr_sentinel, nullcheck_fail_bb);
1806 
1807  LL_BUILDER.SetInsertPoint(nullcheck_ok_bb);
1808  executor_->cgen_state_->emitExternalCall(
1809  "register_buffer_with_executor_rsm",
1810  llvm::Type::getVoidTy(executor_->cgen_state_->context_),
1811  {executor_->cgen_state_->llInt(reinterpret_cast<int64_t>(executor_)), ptr});
1812  LL_BUILDER.CreateBr(ret_bb);
1813 
1814  LL_BUILDER.SetInsertPoint(nullcheck_fail_bb);
1815  LL_BUILDER.CreateBr(ret_bb);
1816 
1817  LL_BUILDER.SetInsertPoint(ret_bb);
1818 
1819  return {result_phi, size};
1820  }
1821  CHECK_EQ(size_t(2), target_lvs.size());
1822  return {target_lvs[0], target_lvs[1]};
1823  }
1824  }
1825  if (target_ti.is_geometry() &&
1826  !executor_->plan_state_->isLazyFetchColumn(target_expr)) {
1827  auto generate_coord_lvs =
1828  [&](auto* selected_target_expr,
1829  bool const fetch_columns) -> std::vector<llvm::Value*> {
1830  const auto target_lvs =
1831  code_generator.codegen(selected_target_expr, fetch_columns, co);
1832  const auto geo_uoper = dynamic_cast<const Analyzer::GeoUOper*>(target_expr);
1833  const auto geo_binoper = dynamic_cast<const Analyzer::GeoBinOper*>(target_expr);
1834  if (geo_uoper || geo_binoper) {
1835  CHECK(target_expr->get_type_info().is_geometry());
1836  CHECK_EQ(2 * static_cast<size_t>(target_ti.get_physical_coord_cols()),
1837  target_lvs.size());
1838  return target_lvs;
1839  }
1840  CHECK_EQ(static_cast<size_t>(target_ti.get_physical_coord_cols()),
1841  target_lvs.size());
1842 
1843  const auto i32_ty = get_int_type(32, executor_->cgen_state_->context_);
1844  const auto i8p_ty =
1845  llvm::PointerType::get(get_int_type(8, executor_->cgen_state_->context_), 0);
1846  std::vector<llvm::Value*> coords;
1847  size_t ctr = 0;
1848  for (const auto& target_lv : target_lvs) {
1849  // TODO(adb): consider adding a utility to sqltypes so we can get the types of
1850  // the physical coords cols based on the sqltype (e.g. TINYINT for col 0, INT
1851  // for col 1 for pols / mpolys, etc). Hardcoding for now. first array is the
1852  // coords array (TINYINT). Subsequent arrays are regular INT.
1853 
1854  const size_t elem_sz = ctr == 0 ? 1 : 4;
1855  ctr++;
1856  int32_t fixlen = -1;
1857  if (target_ti.get_type() == kPOINT) {
1858  const auto col_var = dynamic_cast<const Analyzer::ColumnVar*>(target_expr);
1859  if (col_var) {
1860  const auto coords_cd = executor_->getPhysicalColumnDescriptor(col_var, 1);
1861  if (coords_cd && coords_cd->columnType.get_type() == kARRAY) {
1862  fixlen = coords_cd->columnType.get_size();
1863  }
1864  }
1865  }
1866  if (fixlen > 0) {
1867  coords.push_back(executor_->cgen_state_->emitExternalCall(
1868  "fast_fixlen_array_buff",
1869  i8p_ty,
1870  {target_lv, code_generator.posArg(selected_target_expr)}));
1871  coords.push_back(executor_->cgen_state_->llInt(int64_t(fixlen)));
1872  continue;
1873  }
1874  coords.push_back(executor_->cgen_state_->emitExternalCall(
1875  "array_buff",
1876  i8p_ty,
1877  {target_lv, code_generator.posArg(selected_target_expr)}));
1878  coords.push_back(executor_->cgen_state_->emitExternalCall(
1879  "array_size",
1880  i32_ty,
1881  {target_lv,
1882  code_generator.posArg(selected_target_expr),
1883  executor_->cgen_state_->llInt(log2_bytes(elem_sz))}));
1884  }
1885  return coords;
1886  };
1887 
1888  if (agg_expr) {
1889  return generate_coord_lvs(agg_expr->get_arg(), true);
1890  } else {
1891  return generate_coord_lvs(target_expr,
1892  !executor_->plan_state_->allow_lazy_fetch_);
1893  }
1894  }
1895  }
1896  return agg_expr ? code_generator.codegen(agg_expr->get_arg(), true, co)
1897  : code_generator.codegen(
1898  target_expr, !executor_->plan_state_->allow_lazy_fetch_, co);
1899 }
#define CHECK_EQ(x, y)
Definition: Logger.h:205
#define LL_BUILDER
#define LL_CONTEXT
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
CHECK(cgen_state)
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:79
bool is_geometry() const
Definition: sqltypes.h:427
uint32_t log2_bytes(const uint32_t bytes)
Definition: Execute.h:105

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool GroupByAndAggregate::codegenAggCalls ( const std::tuple< llvm::Value *, llvm::Value * > &  agg_out_ptr_w_idx,
const std::vector< llvm::Value * > &  agg_out_vec,
const QueryMemoryDescriptor query_mem_desc,
const CompilationOptions co,
const GpuSharedMemoryContext gpu_smem_context,
DiamondCodegen diamond_codegen 
)
private

Definition at line 1482 of file GroupByAndAggregate.cpp.

References CHECK(), TargetExprCodegenBuilder::codegen(), QueryMemoryDescriptor::didOutputColumnar(), executor_, g_cluster, QueryMemoryDescriptor::getQueryDescriptionType(), LL_BUILDER, LL_CONTEXT, Projection, ra_exe_unit_, and RelAlgExecutionUnit::target_exprs.

Referenced by codegen().

1488  {
1489  auto agg_out_ptr_w_idx = agg_out_ptr_w_idx_in;
1490  // TODO(alex): unify the two cases, the output for non-group by queries
1491  // should be a contiguous buffer
1492  const bool is_group_by{std::get<0>(agg_out_ptr_w_idx)};
1493  bool can_return_error = false;
1494  if (is_group_by) {
1495  CHECK(agg_out_vec.empty());
1496  } else {
1497  CHECK(!agg_out_vec.empty());
1498  }
1499 
1500  // output buffer is casted into a byte stream to be able to handle data elements of
1501  // different sizes (only used when actual column width sizes are used)
1502  llvm::Value* output_buffer_byte_stream{nullptr};
1503  llvm::Value* out_row_idx{nullptr};
1504  if (query_mem_desc.didOutputColumnar() && !g_cluster &&
1506  output_buffer_byte_stream = LL_BUILDER.CreateBitCast(
1507  std::get<0>(agg_out_ptr_w_idx),
1508  llvm::PointerType::get(llvm::Type::getInt8Ty(LL_CONTEXT), 0));
1509  output_buffer_byte_stream->setName("out_buff_b_stream");
1510  CHECK(std::get<1>(agg_out_ptr_w_idx));
1511  out_row_idx = LL_BUILDER.CreateZExt(std::get<1>(agg_out_ptr_w_idx),
1512  llvm::Type::getInt64Ty(LL_CONTEXT));
1513  out_row_idx->setName("out_row_idx");
1514  }
1515 
1516  TargetExprCodegenBuilder target_builder(query_mem_desc, ra_exe_unit_, is_group_by);
1517  for (size_t target_idx = 0; target_idx < ra_exe_unit_.target_exprs.size();
1518  ++target_idx) {
1519  auto target_expr = ra_exe_unit_.target_exprs[target_idx];
1520  CHECK(target_expr);
1521 
1522  target_builder(target_expr, executor_, co);
1523  }
1524 
1525  target_builder.codegen(this,
1526  executor_,
1527  query_mem_desc,
1528  co,
1529  gpu_smem_context,
1530  agg_out_ptr_w_idx,
1531  agg_out_vec,
1532  output_buffer_byte_stream,
1533  out_row_idx,
1534  diamond_codegen);
1535 
1536  for (auto target_expr : ra_exe_unit_.target_exprs) {
1537  CHECK(target_expr);
1538  executor_->plan_state_->isLazyFetchColumn(target_expr);
1539  }
1540 
1541  return can_return_error;
1542 }
std::vector< Analyzer::Expr * > target_exprs
#define LL_BUILDER
#define LL_CONTEXT
CHECK(cgen_state)
QueryDescriptionType getQueryDescriptionType() const
bool g_cluster
const RelAlgExecutionUnit & ra_exe_unit_

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

llvm::Value * GroupByAndAggregate::codegenAggColumnPtr ( llvm::Value *  output_buffer_byte_stream,
llvm::Value *  out_row_idx,
const std::tuple< llvm::Value *, llvm::Value * > &  agg_out_ptr_w_idx,
const QueryMemoryDescriptor query_mem_desc,
const size_t  chosen_bytes,
const size_t  agg_out_off,
const size_t  target_idx 
)
private

: returns the pointer to where the aggregation should be stored.

Definition at line 1547 of file GroupByAndAggregate.cpp.

References CHECK(), CHECK_EQ, QueryMemoryDescriptor::didOutputColumnar(), g_cluster, get_int_type(), QueryMemoryDescriptor::getColOffInBytes(), QueryMemoryDescriptor::getColOnlyOffInBytes(), QueryMemoryDescriptor::getQueryDescriptionType(), LL_BUILDER, LL_CONTEXT, LL_INT, Projection, and to_string().

Referenced by TargetExprCodegen::codegenAggregate(), and TargetExprCodegenBuilder::codegenMultiSlotSampleExpressions().

1554  {
1555  llvm::Value* agg_col_ptr{nullptr};
1556  if (query_mem_desc.didOutputColumnar()) {
1557  // TODO(Saman): remove the second columnar branch, and support all query description
1558  // types through the first branch. Then, input arguments should also be cleaned up
1559  if (!g_cluster &&
1561  CHECK(chosen_bytes == 1 || chosen_bytes == 2 || chosen_bytes == 4 ||
1562  chosen_bytes == 8);
1563  CHECK(output_buffer_byte_stream);
1564  CHECK(out_row_idx);
1565  uint32_t col_off = query_mem_desc.getColOffInBytes(agg_out_off);
1566  // multiplying by chosen_bytes, i.e., << log2(chosen_bytes)
1567  auto out_per_col_byte_idx =
1568  LL_BUILDER.CreateShl(out_row_idx, __builtin_ffs(chosen_bytes) - 1);
1569  auto byte_offset = LL_BUILDER.CreateAdd(out_per_col_byte_idx,
1570  LL_INT(static_cast<int64_t>(col_off)));
1571  byte_offset->setName("out_byte_off_target_" + std::to_string(target_idx));
1572  auto output_ptr = LL_BUILDER.CreateGEP(output_buffer_byte_stream, byte_offset);
1573  agg_col_ptr = LL_BUILDER.CreateBitCast(
1574  output_ptr,
1575  llvm::PointerType::get(get_int_type((chosen_bytes << 3), LL_CONTEXT), 0));
1576  agg_col_ptr->setName("out_ptr_target_" + std::to_string(target_idx));
1577  } else {
1578  uint32_t col_off = query_mem_desc.getColOffInBytes(agg_out_off);
1579  CHECK_EQ(size_t(0), col_off % chosen_bytes);
1580  col_off /= chosen_bytes;
1581  CHECK(std::get<1>(agg_out_ptr_w_idx));
1582  auto offset = LL_BUILDER.CreateAdd(std::get<1>(agg_out_ptr_w_idx), LL_INT(col_off));
1583  agg_col_ptr = LL_BUILDER.CreateGEP(
1584  LL_BUILDER.CreateBitCast(
1585  std::get<0>(agg_out_ptr_w_idx),
1586  llvm::PointerType::get(get_int_type((chosen_bytes << 3), LL_CONTEXT), 0)),
1587  offset);
1588  }
1589  } else {
1590  uint32_t col_off = query_mem_desc.getColOnlyOffInBytes(agg_out_off);
1591  CHECK_EQ(size_t(0), col_off % chosen_bytes);
1592  col_off /= chosen_bytes;
1593  agg_col_ptr = LL_BUILDER.CreateGEP(
1594  LL_BUILDER.CreateBitCast(
1595  std::get<0>(agg_out_ptr_w_idx),
1596  llvm::PointerType::get(get_int_type((chosen_bytes << 3), LL_CONTEXT), 0)),
1597  LL_INT(col_off));
1598  }
1599  CHECK(agg_col_ptr);
1600  return agg_col_ptr;
1601 }
#define CHECK_EQ(x, y)
Definition: Logger.h:205
#define LL_BUILDER
#define LL_CONTEXT
#define LL_INT(v)
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
std::string to_string(char const *&&v)
size_t getColOnlyOffInBytes(const size_t col_idx) const
CHECK(cgen_state)
QueryDescriptionType getQueryDescriptionType() const
bool g_cluster
size_t getColOffInBytes(const size_t col_idx) const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void GroupByAndAggregate::codegenCountDistinct ( const size_t  target_idx,
const Analyzer::Expr target_expr,
std::vector< llvm::Value * > &  agg_args,
const QueryMemoryDescriptor query_mem_desc,
const ExecutorDeviceType  device_type 
)
private

Definition at line 1652 of file GroupByAndAggregate.cpp.

References Bitmap, CHECK(), CHECK_EQ, emitCall(), executor_, g_bigint_count, get_int_type(), get_target_info(), Analyzer::Expr::get_type_info(), getAdditionalLiteral(), QueryMemoryDescriptor::getCountDistinctDescriptor(), GPU, Invalid, kAPPROX_COUNT_DISTINCT, LL_CONTEXT, and LL_INT.

Referenced by TargetExprCodegen::codegenAggregate().

1657  {
1658  const auto agg_info = get_target_info(target_expr, g_bigint_count);
1659  const auto& arg_ti =
1660  static_cast<const Analyzer::AggExpr*>(target_expr)->get_arg()->get_type_info();
1661  if (arg_ti.is_fp()) {
1662  agg_args.back() = executor_->cgen_state_->ir_builder_.CreateBitCast(
1663  agg_args.back(), get_int_type(64, executor_->cgen_state_->context_));
1664  }
1665  const auto& count_distinct_descriptor =
1666  query_mem_desc.getCountDistinctDescriptor(target_idx);
1667  CHECK(count_distinct_descriptor.impl_type_ != CountDistinctImplType::Invalid);
1668  if (agg_info.agg_kind == kAPPROX_COUNT_DISTINCT) {
1669  CHECK(count_distinct_descriptor.impl_type_ == CountDistinctImplType::Bitmap);
1670  agg_args.push_back(LL_INT(int32_t(count_distinct_descriptor.bitmap_sz_bits)));
1671  if (device_type == ExecutorDeviceType::GPU) {
1672  const auto base_dev_addr = getAdditionalLiteral(-1);
1673  const auto base_host_addr = getAdditionalLiteral(-2);
1674  agg_args.push_back(base_dev_addr);
1675  agg_args.push_back(base_host_addr);
1676  emitCall("agg_approximate_count_distinct_gpu", agg_args);
1677  } else {
1678  emitCall("agg_approximate_count_distinct", agg_args);
1679  }
1680  return;
1681  }
1682  std::string agg_fname{"agg_count_distinct"};
1683  if (count_distinct_descriptor.impl_type_ == CountDistinctImplType::Bitmap) {
1684  agg_fname += "_bitmap";
1685  agg_args.push_back(LL_INT(static_cast<int64_t>(count_distinct_descriptor.min_val)));
1686  }
1687  if (agg_info.skip_null_val) {
1688  auto null_lv = executor_->cgen_state_->castToTypeIn(
1689  (arg_ti.is_fp()
1690  ? static_cast<llvm::Value*>(executor_->cgen_state_->inlineFpNull(arg_ti))
1691  : static_cast<llvm::Value*>(executor_->cgen_state_->inlineIntNull(arg_ti))),
1692  64);
1693  null_lv = executor_->cgen_state_->ir_builder_.CreateBitCast(
1694  null_lv, get_int_type(64, executor_->cgen_state_->context_));
1695  agg_fname += "_skip_val";
1696  agg_args.push_back(null_lv);
1697  }
1698  if (device_type == ExecutorDeviceType::GPU) {
1699  CHECK(count_distinct_descriptor.impl_type_ == CountDistinctImplType::Bitmap);
1700  agg_fname += "_gpu";
1701  const auto base_dev_addr = getAdditionalLiteral(-1);
1702  const auto base_host_addr = getAdditionalLiteral(-2);
1703  agg_args.push_back(base_dev_addr);
1704  agg_args.push_back(base_host_addr);
1705  agg_args.push_back(LL_INT(int64_t(count_distinct_descriptor.sub_bitmap_count)));
1706  CHECK_EQ(size_t(0),
1707  count_distinct_descriptor.bitmapPaddedSizeBytes() %
1708  count_distinct_descriptor.sub_bitmap_count);
1709  agg_args.push_back(LL_INT(int64_t(count_distinct_descriptor.bitmapPaddedSizeBytes() /
1710  count_distinct_descriptor.sub_bitmap_count)));
1711  }
1712  if (count_distinct_descriptor.impl_type_ == CountDistinctImplType::Bitmap) {
1713  emitCall(agg_fname, agg_args);
1714  } else {
1715  executor_->cgen_state_->emitExternalCall(
1716  agg_fname, llvm::Type::getVoidTy(LL_CONTEXT), agg_args);
1717  }
1718 }
#define CHECK_EQ(x, y)
Definition: Logger.h:205
llvm::Value * getAdditionalLiteral(const int32_t off)
#define LL_CONTEXT
TargetInfo get_target_info(const PointerType target_expr, const bool bigint_count)
Definition: TargetInfo.h:78
#define LL_INT(v)
llvm::Value * emitCall(const std::string &fname, const std::vector< llvm::Value * > &args)
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
CHECK(cgen_state)
bool g_bigint_count
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:79
const CountDistinctDescriptor & getCountDistinctDescriptor(const size_t idx) const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void GroupByAndAggregate::codegenEstimator ( std::stack< llvm::BasicBlock * > &  array_loops,
GroupByAndAggregate::DiamondCodegen diamond_codegen,
const QueryMemoryDescriptor query_mem_desc,
const CompilationOptions co 
)
private

Definition at line 1603 of file GroupByAndAggregate.cpp.

References CHECK(), emitCall(), RelAlgExecutionUnit::estimator, executor_, get_int_type(), QueryMemoryDescriptor::getEffectiveKeyWidth(), LL_BUILDER, LL_CONTEXT, LL_INT, ra_exe_unit_, and ROW_FUNC.

Referenced by codegen().

1607  {
1608  const auto& estimator_arg = ra_exe_unit_.estimator->getArgument();
1609  auto estimator_comp_count_lv = LL_INT(static_cast<int32_t>(estimator_arg.size()));
1610  auto estimator_key_lv = LL_BUILDER.CreateAlloca(llvm::Type::getInt64Ty(LL_CONTEXT),
1611  estimator_comp_count_lv);
1612  int32_t subkey_idx = 0;
1613  for (const auto& estimator_arg_comp : estimator_arg) {
1614  const auto estimator_arg_comp_lvs =
1615  executor_->groupByColumnCodegen(estimator_arg_comp.get(),
1616  query_mem_desc.getEffectiveKeyWidth(),
1617  co,
1618  false,
1619  0,
1620  diamond_codegen,
1621  array_loops,
1622  true);
1623  CHECK(!estimator_arg_comp_lvs.original_value);
1624  const auto estimator_arg_comp_lv = estimator_arg_comp_lvs.translated_value;
1625  // store the sub-key to the buffer
1626  LL_BUILDER.CreateStore(estimator_arg_comp_lv,
1627  LL_BUILDER.CreateGEP(estimator_key_lv, LL_INT(subkey_idx++)));
1628  }
1629  const auto int8_ptr_ty = llvm::PointerType::get(get_int_type(8, LL_CONTEXT), 0);
1630  const auto bitmap = LL_BUILDER.CreateBitCast(&*ROW_FUNC->arg_begin(), int8_ptr_ty);
1631  const auto key_bytes = LL_BUILDER.CreateBitCast(estimator_key_lv, int8_ptr_ty);
1632  const auto estimator_comp_bytes_lv =
1633  LL_INT(static_cast<int32_t>(estimator_arg.size() * sizeof(int64_t)));
1634  const auto bitmap_size_lv =
1635  LL_INT(static_cast<uint32_t>(ra_exe_unit_.estimator->getBufferSize()));
1636  emitCall(ra_exe_unit_.estimator->getRuntimeFunctionName(),
1637  {bitmap, &*bitmap_size_lv, key_bytes, &*estimator_comp_bytes_lv});
1638 }
#define ROW_FUNC
#define LL_BUILDER
#define LL_CONTEXT
#define LL_INT(v)
llvm::Value * emitCall(const std::string &fname, const std::vector< llvm::Value * > &args)
size_t getEffectiveKeyWidth() const
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
CHECK(cgen_state)
const std::shared_ptr< Analyzer::Estimator > estimator
const RelAlgExecutionUnit & ra_exe_unit_

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::tuple< llvm::Value *, llvm::Value * > GroupByAndAggregate::codegenGroupBy ( const QueryMemoryDescriptor query_mem_desc,
const CompilationOptions co,
DiamondCodegen codegen 
)
private

Definition at line 1093 of file GroupByAndAggregate.cpp.

References CHECK(), CHECK_EQ, codegenMultiColumnBaselineHash(), codegenMultiColumnPerfectHash(), codegenOutputSlot(), codegenSingleColumnPerfectHash(), QueryMemoryDescriptor::didOutputColumnar(), executor_, QueryMemoryDescriptor::getBucket(), QueryMemoryDescriptor::getEffectiveKeyWidth(), getExprRangeInfo(), QueryMemoryDescriptor::getGroupbyColCount(), QueryMemoryDescriptor::getMaxVal(), QueryMemoryDescriptor::getQueryDescriptionType(), QueryMemoryDescriptor::getRowSize(), RelAlgExecutionUnit::groupby_exprs, GroupByBaselineHash, GroupByPerfectHash, groups_buffer, QueryMemoryDescriptor::hasNulls(), QueryMemoryDescriptor::isSingleColumnGroupByWithPerfectHash(), LL_BUILDER, LL_CONTEXT, LL_INT, Projection, ra_exe_unit_, ROW_FUNC, and QueryMemoryDescriptor::threadsShareMemory().

Referenced by codegen().

1096  {
1097  auto arg_it = ROW_FUNC->arg_begin();
1098  auto groups_buffer = arg_it++;
1099 
1100  std::stack<llvm::BasicBlock*> array_loops;
1101 
1102  // TODO(Saman): move this logic outside of this function.
1104  if (query_mem_desc.didOutputColumnar()) {
1105  return std::make_tuple(
1106  &*groups_buffer,
1107  codegenOutputSlot(&*groups_buffer, query_mem_desc, co, diamond_codegen));
1108  } else {
1109  return std::make_tuple(
1110  codegenOutputSlot(&*groups_buffer, query_mem_desc, co, diamond_codegen),
1111  nullptr);
1112  }
1113  }
1114 
1115  CHECK(query_mem_desc.getQueryDescriptionType() ==
1117  query_mem_desc.getQueryDescriptionType() ==
1119 
1120  const int32_t row_size_quad = query_mem_desc.didOutputColumnar()
1121  ? 0
1122  : query_mem_desc.getRowSize() / sizeof(int64_t);
1123 
1124  const auto col_width_size = query_mem_desc.isSingleColumnGroupByWithPerfectHash()
1125  ? sizeof(int64_t)
1126  : query_mem_desc.getEffectiveKeyWidth();
1127  // for multi-column group by
1128  llvm::Value* group_key = nullptr;
1129  llvm::Value* key_size_lv = nullptr;
1130 
1131  if (!query_mem_desc.isSingleColumnGroupByWithPerfectHash()) {
1132  key_size_lv = LL_INT(static_cast<int32_t>(query_mem_desc.getGroupbyColCount()));
1133  if (query_mem_desc.getQueryDescriptionType() ==
1135  group_key =
1136  LL_BUILDER.CreateAlloca(llvm::Type::getInt64Ty(LL_CONTEXT), key_size_lv);
1137  } else if (query_mem_desc.getQueryDescriptionType() ==
1139  group_key =
1140  col_width_size == sizeof(int32_t)
1141  ? LL_BUILDER.CreateAlloca(llvm::Type::getInt32Ty(LL_CONTEXT), key_size_lv)
1142  : LL_BUILDER.CreateAlloca(llvm::Type::getInt64Ty(LL_CONTEXT), key_size_lv);
1143  }
1144  CHECK(group_key);
1145  CHECK(key_size_lv);
1146  }
1147 
1148  int32_t subkey_idx = 0;
1149  CHECK(query_mem_desc.getGroupbyColCount() == ra_exe_unit_.groupby_exprs.size());
1150  for (const auto& group_expr : ra_exe_unit_.groupby_exprs) {
1151  const auto col_range_info = getExprRangeInfo(group_expr.get());
1152  const auto translated_null_value = static_cast<int64_t>(
1153  query_mem_desc.isSingleColumnGroupByWithPerfectHash()
1154  ? checked_int64_t(query_mem_desc.getMaxVal()) +
1155  (query_mem_desc.getBucket() ? query_mem_desc.getBucket() : 1)
1156  : checked_int64_t(col_range_info.max) +
1157  (col_range_info.bucket ? col_range_info.bucket : 1));
1158 
1159  const bool col_has_nulls =
1160  query_mem_desc.getQueryDescriptionType() ==
1162  ? (query_mem_desc.isSingleColumnGroupByWithPerfectHash()
1163  ? query_mem_desc.hasNulls()
1164  : col_range_info.has_nulls)
1165  : false;
1166 
1167  const auto group_expr_lvs =
1168  executor_->groupByColumnCodegen(group_expr.get(),
1169  col_width_size,
1170  co,
1171  col_has_nulls,
1172  translated_null_value,
1173  diamond_codegen,
1174  array_loops,
1175  query_mem_desc.threadsShareMemory());
1176  const auto group_expr_lv = group_expr_lvs.translated_value;
1177  if (query_mem_desc.isSingleColumnGroupByWithPerfectHash()) {
1178  CHECK_EQ(size_t(1), ra_exe_unit_.groupby_exprs.size());
1179  return codegenSingleColumnPerfectHash(query_mem_desc,
1180  co,
1181  &*groups_buffer,
1182  group_expr_lv,
1183  group_expr_lvs.original_value,
1184  row_size_quad);
1185  } else {
1186  // store the sub-key to the buffer
1187  LL_BUILDER.CreateStore(group_expr_lv,
1188  LL_BUILDER.CreateGEP(group_key, LL_INT(subkey_idx++)));
1189  }
1190  }
1191  if (query_mem_desc.getQueryDescriptionType() ==
1193  CHECK(ra_exe_unit_.groupby_exprs.size() != 1);
1195  &*groups_buffer, group_key, key_size_lv, query_mem_desc, row_size_quad);
1196  } else if (query_mem_desc.getQueryDescriptionType() ==
1199  &*groups_buffer,
1200  group_key,
1201  key_size_lv,
1202  query_mem_desc,
1203  col_width_size,
1204  row_size_quad);
1205  }
1206  CHECK(false);
1207  return std::make_tuple(nullptr, nullptr);
1208 }
#define CHECK_EQ(x, y)
Definition: Logger.h:205
#define ROW_FUNC
const int32_t groups_buffer_size return groups_buffer
#define LL_BUILDER
boost::multiprecision::number< boost::multiprecision::cpp_int_backend< 64, 64, boost::multiprecision::signed_magnitude, boost::multiprecision::checked, void >> checked_int64_t
#define LL_CONTEXT
#define LL_INT(v)
size_t getEffectiveKeyWidth() const
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
std::tuple< llvm::Value *, llvm::Value * > codegenMultiColumnBaselineHash(const CompilationOptions &co, llvm::Value *groups_buffer, llvm::Value *group_key, llvm::Value *key_size_lv, const QueryMemoryDescriptor &query_mem_desc, const size_t key_width, const int32_t row_size_quad)
CHECK(cgen_state)
size_t getGroupbyColCount() const
std::tuple< llvm::Value *, llvm::Value * > codegenSingleColumnPerfectHash(const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, llvm::Value *groups_buffer, llvm::Value *group_expr_lv_translated, llvm::Value *group_expr_lv_original, const int32_t row_size_quad)
std::tuple< llvm::Value *, llvm::Value * > codegenMultiColumnPerfectHash(llvm::Value *groups_buffer, llvm::Value *group_key, llvm::Value *key_size_lv, const QueryMemoryDescriptor &query_mem_desc, const int32_t row_size_quad)
QueryDescriptionType getQueryDescriptionType() const
bool isSingleColumnGroupByWithPerfectHash() const
llvm::Value * codegenOutputSlot(llvm::Value *groups_buffer, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, DiamondCodegen &diamond_codegen)
ColRangeInfo getExprRangeInfo(const Analyzer::Expr *expr) const
const RelAlgExecutionUnit & ra_exe_unit_

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::tuple< llvm::Value *, llvm::Value * > GroupByAndAggregate::codegenMultiColumnBaselineHash ( const CompilationOptions co,
llvm::Value *  groups_buffer,
llvm::Value *  group_key,
llvm::Value *  key_size_lv,
const QueryMemoryDescriptor query_mem_desc,
const size_t  key_width,
const int32_t  row_size_quad 
)
private

Definition at line 1303 of file GroupByAndAggregate.cpp.

References CHECK(), QueryMemoryDescriptor::didOutputColumnar(), emitCall(), QueryMemoryDescriptor::getEntryCount(), groups_buffer, LL_BUILDER, LL_CONTEXT, LL_INT, ROW_FUNC, and CompilationOptions::with_dynamic_watchdog.

Referenced by codegenGroupBy().

1310  {
1311  auto arg_it = ROW_FUNC->arg_begin(); // groups_buffer
1312  ++arg_it; // current match count
1313  ++arg_it; // total match count
1314  ++arg_it; // old match count
1315  ++arg_it; // output buffer slots count
1316  ++arg_it; // aggregate init values
1317  CHECK(arg_it->getName() == "agg_init_val");
1318  if (group_key->getType() != llvm::Type::getInt64PtrTy(LL_CONTEXT)) {
1319  CHECK(key_width == sizeof(int32_t));
1320  group_key =
1321  LL_BUILDER.CreatePointerCast(group_key, llvm::Type::getInt64PtrTy(LL_CONTEXT));
1322  }
1323  std::vector<llvm::Value*> func_args{
1324  groups_buffer,
1325  LL_INT(static_cast<int32_t>(query_mem_desc.getEntryCount())),
1326  &*group_key,
1327  &*key_size_lv,
1328  LL_INT(static_cast<int32_t>(key_width))};
1329  std::string func_name{"get_group_value"};
1330  if (query_mem_desc.didOutputColumnar()) {
1331  func_name += "_columnar_slot";
1332  } else {
1333  func_args.push_back(LL_INT(row_size_quad));
1334  func_args.push_back(&*arg_it);
1335  }
1336  if (co.with_dynamic_watchdog) {
1337  func_name += "_with_watchdog";
1338  }
1339  if (query_mem_desc.didOutputColumnar()) {
1340  return std::make_tuple(groups_buffer, emitCall(func_name, func_args));
1341  } else {
1342  return std::make_tuple(emitCall(func_name, func_args), nullptr);
1343  }
1344 }
#define ROW_FUNC
const int32_t groups_buffer_size return groups_buffer
#define LL_BUILDER
#define LL_CONTEXT
#define LL_INT(v)
llvm::Value * emitCall(const std::string &fname, const std::vector< llvm::Value * > &args)
CHECK(cgen_state)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::tuple< llvm::Value *, llvm::Value * > GroupByAndAggregate::codegenMultiColumnPerfectHash ( llvm::Value *  groups_buffer,
llvm::Value *  group_key,
llvm::Value *  key_size_lv,
const QueryMemoryDescriptor query_mem_desc,
const int32_t  row_size_quad 
)
private

Definition at line 1260 of file GroupByAndAggregate.cpp.

References CHECK(), codegenPerfectHashFunction(), QueryMemoryDescriptor::didOutputColumnar(), emitCall(), get_int_type(), QueryMemoryDescriptor::getEntryCount(), QueryMemoryDescriptor::getQueryDescriptionType(), GroupByPerfectHash, groups_buffer, QueryMemoryDescriptor::hasKeylessHash(), LL_BUILDER, LL_CONTEXT, and LL_INT.

Referenced by codegenGroupBy().

1265  {
1266  CHECK(query_mem_desc.getQueryDescriptionType() ==
1268  // compute the index (perfect hash)
1269  auto perfect_hash_func = codegenPerfectHashFunction();
1270  auto hash_lv =
1271  LL_BUILDER.CreateCall(perfect_hash_func, std::vector<llvm::Value*>{group_key});
1272 
1273  if (query_mem_desc.didOutputColumnar()) {
1274  if (!query_mem_desc.hasKeylessHash()) {
1275  const std::string set_matching_func_name{
1276  "set_matching_group_value_perfect_hash_columnar"};
1277  const std::vector<llvm::Value*> set_matching_func_arg{
1278  groups_buffer,
1279  hash_lv,
1280  group_key,
1281  key_size_lv,
1282  llvm::ConstantInt::get(get_int_type(32, LL_CONTEXT),
1283  query_mem_desc.getEntryCount())};
1284  emitCall(set_matching_func_name, set_matching_func_arg);
1285  }
1286  return std::make_tuple(groups_buffer, hash_lv);
1287  } else {
1288  if (query_mem_desc.hasKeylessHash()) {
1289  return std::make_tuple(emitCall("get_matching_group_value_perfect_hash_keyless",
1290  {groups_buffer, hash_lv, LL_INT(row_size_quad)}),
1291  nullptr);
1292  } else {
1293  return std::make_tuple(
1294  emitCall(
1295  "get_matching_group_value_perfect_hash",
1296  {groups_buffer, hash_lv, group_key, key_size_lv, LL_INT(row_size_quad)}),
1297  nullptr);
1298  }
1299  }
1300 }
const int32_t groups_buffer_size return groups_buffer
#define LL_BUILDER
#define LL_CONTEXT
#define LL_INT(v)
llvm::Value * emitCall(const std::string &fname, const std::vector< llvm::Value * > &args)
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
CHECK(cgen_state)
QueryDescriptionType getQueryDescriptionType() const
llvm::Function * codegenPerfectHashFunction()

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

llvm::Value * GroupByAndAggregate::codegenOutputSlot ( llvm::Value *  groups_buffer,
const QueryMemoryDescriptor query_mem_desc,
const CompilationOptions co,
DiamondCodegen diamond_codegen 
)
private

Definition at line 1002 of file GroupByAndAggregate.cpp.

References run_benchmark_import::args, CHECK(), CHECK_EQ, CHECK_GE, CHECK_LT, CodeGenerator::codegen(), QueryMemoryDescriptor::didOutputColumnar(), emitCall(), executor_, get_arg_by_name(), get_heap_key_slot_index(), QueryMemoryDescriptor::getColOffInBytes(), QueryMemoryDescriptor::getEntryCount(), QueryMemoryDescriptor::getPaddedSlotWidthBytes(), QueryMemoryDescriptor::getQueryDescriptionType(), QueryMemoryDescriptor::getRowSize(), RelAlgExecutionUnit::groupby_exprs, groups_buffer, inline_fp_null_val(), inline_int_null_val(), SortInfo::limit, LL_BOOL, LL_BUILDER, LL_FP, LL_INT, SortInfo::offset, SortInfo::order_entries, CodeGenerator::posArg(), Projection, ra_exe_unit_, ROW_FUNC, RelAlgExecutionUnit::sort_info, RelAlgExecutionUnit::target_exprs, to_string(), RelAlgExecutionUnit::use_bump_allocator, and QueryMemoryDescriptor::useStreamingTopN().

Referenced by codegenGroupBy(), and codegenWindowRowPointer().

1006  {
1008  CHECK_EQ(size_t(1), ra_exe_unit_.groupby_exprs.size());
1009  const auto group_expr = ra_exe_unit_.groupby_exprs.front();
1010  CHECK(!group_expr);
1011  if (!query_mem_desc.didOutputColumnar()) {
1012  CHECK_EQ(size_t(0), query_mem_desc.getRowSize() % sizeof(int64_t));
1013  }
1014  const int32_t row_size_quad = query_mem_desc.didOutputColumnar()
1015  ? 0
1016  : query_mem_desc.getRowSize() / sizeof(int64_t);
1017  CodeGenerator code_generator(executor_);
1018  if (query_mem_desc.useStreamingTopN()) {
1019  const auto& only_order_entry = ra_exe_unit_.sort_info.order_entries.front();
1020  CHECK_GE(only_order_entry.tle_no, int(1));
1021  const size_t target_idx = only_order_entry.tle_no - 1;
1022  CHECK_LT(target_idx, ra_exe_unit_.target_exprs.size());
1023  const auto order_entry_expr = ra_exe_unit_.target_exprs[target_idx];
1024  const auto chosen_bytes =
1025  static_cast<size_t>(query_mem_desc.getPaddedSlotWidthBytes(target_idx));
1026  auto order_entry_lv = executor_->cgen_state_->castToTypeIn(
1027  code_generator.codegen(order_entry_expr, true, co).front(), chosen_bytes * 8);
1029  std::string fname = "get_bin_from_k_heap";
1030  const auto& oe_ti = order_entry_expr->get_type_info();
1031  llvm::Value* null_key_lv = nullptr;
1032  if (oe_ti.is_integer() || oe_ti.is_decimal() || oe_ti.is_time()) {
1033  const size_t bit_width = order_entry_lv->getType()->getIntegerBitWidth();
1034  switch (bit_width) {
1035  case 32:
1036  null_key_lv = LL_INT(static_cast<int32_t>(inline_int_null_val(oe_ti)));
1037  break;
1038  case 64:
1039  null_key_lv = LL_INT(static_cast<int64_t>(inline_int_null_val(oe_ti)));
1040  break;
1041  default:
1042  CHECK(false);
1043  }
1044  fname += "_int" + std::to_string(bit_width) + "_t";
1045  } else {
1046  CHECK(oe_ti.is_fp());
1047  if (order_entry_lv->getType()->isDoubleTy()) {
1048  null_key_lv = LL_FP(static_cast<double>(inline_fp_null_val(oe_ti)));
1049  } else {
1050  null_key_lv = LL_FP(static_cast<float>(inline_fp_null_val(oe_ti)));
1051  }
1052  fname += order_entry_lv->getType()->isDoubleTy() ? "_double" : "_float";
1053  }
1054  const auto key_slot_idx =
1056  return emitCall(
1057  fname,
1058  {groups_buffer,
1059  LL_INT(n),
1060  LL_INT(row_size_quad),
1061  LL_INT(static_cast<uint32_t>(query_mem_desc.getColOffInBytes(key_slot_idx))),
1062  LL_BOOL(only_order_entry.is_desc),
1063  LL_BOOL(!order_entry_expr->get_type_info().get_notnull()),
1064  LL_BOOL(only_order_entry.nulls_first),
1065  null_key_lv,
1066  order_entry_lv});
1067  } else {
1068  llvm::Value* output_buffer_entry_count_lv{nullptr};
1070  output_buffer_entry_count_lv =
1071  LL_BUILDER.CreateLoad(get_arg_by_name(ROW_FUNC, "max_matched"));
1072  CHECK(output_buffer_entry_count_lv);
1073  }
1074  const auto group_expr_lv =
1075  LL_BUILDER.CreateLoad(get_arg_by_name(ROW_FUNC, "old_total_matched"));
1076  std::vector<llvm::Value*> args{
1077  groups_buffer,
1078  output_buffer_entry_count_lv
1079  ? output_buffer_entry_count_lv
1080  : LL_INT(static_cast<int32_t>(query_mem_desc.getEntryCount())),
1081  group_expr_lv,
1082  code_generator.posArg(nullptr)};
1083  if (query_mem_desc.didOutputColumnar()) {
1084  const auto columnar_output_offset =
1085  emitCall("get_columnar_scan_output_offset", args);
1086  return columnar_output_offset;
1087  }
1088  args.push_back(LL_INT(row_size_quad));
1089  return emitCall("get_scan_output_slot", args);
1090  }
1091 }
std::vector< Analyzer::Expr * > target_exprs
#define CHECK_EQ(x, y)
Definition: Logger.h:205
#define ROW_FUNC
const int32_t groups_buffer_size return groups_buffer
#define LL_BUILDER
const std::list< Analyzer::OrderEntry > order_entries
#define LL_INT(v)
#define CHECK_GE(x, y)
Definition: Logger.h:210
llvm::Value * emitCall(const std::string &fname, const std::vector< llvm::Value * > &args)
size_t get_heap_key_slot_index(const std::vector< Analyzer::Expr * > &target_exprs, const size_t target_idx)
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
double inline_fp_null_val(const SQL_TYPE_INFO &ti)
std::string to_string(char const *&&v)
#define LL_BOOL(v)
const size_t limit
CHECK(cgen_state)
llvm::Value * get_arg_by_name(llvm::Function *func, const std::string &name)
Definition: Execute.h:95
const SortInfo sort_info
#define LL_FP(v)
const int8_t getPaddedSlotWidthBytes(const size_t slot_idx) const
QueryDescriptionType getQueryDescriptionType() const
#define CHECK_LT(x, y)
Definition: Logger.h:207
int64_t inline_int_null_val(const SQL_TYPE_INFO &ti)
const RelAlgExecutionUnit & ra_exe_unit_
const size_t offset
size_t getColOffInBytes(const size_t col_idx) const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

llvm::Function * GroupByAndAggregate::codegenPerfectHashFunction ( )
private

Definition at line 1346 of file GroupByAndAggregate.cpp.

References CHECK(), CHECK_GT, executor_, get_int_type(), getBucketedCardinality(), getExprRangeInfo(), RelAlgExecutionUnit::groupby_exprs, GroupByPerfectHash, LL_CONTEXT, LL_INT, mark_function_always_inline(), and ra_exe_unit_.

Referenced by codegenMultiColumnPerfectHash().

1346  {
1347  CHECK_GT(ra_exe_unit_.groupby_exprs.size(), size_t(1));
1348  auto ft = llvm::FunctionType::get(
1349  get_int_type(32, LL_CONTEXT),
1350  std::vector<llvm::Type*>{llvm::PointerType::get(get_int_type(64, LL_CONTEXT), 0)},
1351  false);
1352  auto key_hash_func = llvm::Function::Create(ft,
1353  llvm::Function::ExternalLinkage,
1354  "perfect_key_hash",
1355  executor_->cgen_state_->module_);
1356  executor_->cgen_state_->helper_functions_.push_back(key_hash_func);
1357  mark_function_always_inline(key_hash_func);
1358  auto& key_buff_arg = *key_hash_func->args().begin();
1359  llvm::Value* key_buff_lv = &key_buff_arg;
1360  auto bb = llvm::BasicBlock::Create(LL_CONTEXT, "entry", key_hash_func);
1361  llvm::IRBuilder<> key_hash_func_builder(bb);
1362  llvm::Value* hash_lv{llvm::ConstantInt::get(get_int_type(64, LL_CONTEXT), 0)};
1363  std::vector<int64_t> cardinalities;
1364  for (const auto& groupby_expr : ra_exe_unit_.groupby_exprs) {
1365  auto col_range_info = getExprRangeInfo(groupby_expr.get());
1366  CHECK(col_range_info.hash_type_ == QueryDescriptionType::GroupByPerfectHash);
1367  cardinalities.push_back(getBucketedCardinality(col_range_info));
1368  }
1369  size_t dim_idx = 0;
1370  for (const auto& groupby_expr : ra_exe_unit_.groupby_exprs) {
1371  auto key_comp_lv = key_hash_func_builder.CreateLoad(
1372  key_hash_func_builder.CreateGEP(key_buff_lv, LL_INT(dim_idx)));
1373  auto col_range_info = getExprRangeInfo(groupby_expr.get());
1374  auto crt_term_lv =
1375  key_hash_func_builder.CreateSub(key_comp_lv, LL_INT(col_range_info.min));
1376  if (col_range_info.bucket) {
1377  crt_term_lv =
1378  key_hash_func_builder.CreateSDiv(crt_term_lv, LL_INT(col_range_info.bucket));
1379  }
1380  for (size_t prev_dim_idx = 0; prev_dim_idx < dim_idx; ++prev_dim_idx) {
1381  crt_term_lv = key_hash_func_builder.CreateMul(crt_term_lv,
1382  LL_INT(cardinalities[prev_dim_idx]));
1383  }
1384  hash_lv = key_hash_func_builder.CreateAdd(hash_lv, crt_term_lv);
1385  ++dim_idx;
1386  }
1387  key_hash_func_builder.CreateRet(
1388  key_hash_func_builder.CreateTrunc(hash_lv, get_int_type(32, LL_CONTEXT)));
1389  return key_hash_func;
1390 }
static int64_t getBucketedCardinality(const ColRangeInfo &col_range_info)
#define LL_CONTEXT
void mark_function_always_inline(llvm::Function *func)
#define LL_INT(v)
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
#define CHECK_GT(x, y)
Definition: Logger.h:209
CHECK(cgen_state)
ColRangeInfo getExprRangeInfo(const Analyzer::Expr *expr) const
const RelAlgExecutionUnit & ra_exe_unit_

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::tuple< llvm::Value *, llvm::Value * > GroupByAndAggregate::codegenSingleColumnPerfectHash ( const QueryMemoryDescriptor query_mem_desc,
const CompilationOptions co,
llvm::Value *  groups_buffer,
llvm::Value *  group_expr_lv_translated,
llvm::Value *  group_expr_lv_original,
const int32_t  row_size_quad 
)
private

Definition at line 1211 of file GroupByAndAggregate.cpp.

References CHECK(), CompilationOptions::device_type, QueryMemoryDescriptor::didOutputColumnar(), emitCall(), executor_, QueryMemoryDescriptor::getBucket(), QueryMemoryDescriptor::getMinVal(), groups_buffer, QueryMemoryDescriptor::hasKeylessHash(), QueryMemoryDescriptor::interleavedBins(), LL_INT, QueryMemoryDescriptor::mustUseBaselineSort(), and QueryMemoryDescriptor::usesGetGroupValueFast().

Referenced by codegenGroupBy().

1217  {
1218  CHECK(query_mem_desc.usesGetGroupValueFast());
1219  std::string get_group_fn_name{query_mem_desc.didOutputColumnar()
1220  ? "get_columnar_group_bin_offset"
1221  : "get_group_value_fast"};
1222  if (!query_mem_desc.didOutputColumnar() && query_mem_desc.hasKeylessHash()) {
1223  get_group_fn_name += "_keyless";
1224  }
1225  if (query_mem_desc.interleavedBins(co.device_type)) {
1226  CHECK(!query_mem_desc.didOutputColumnar());
1227  CHECK(query_mem_desc.hasKeylessHash());
1228  get_group_fn_name += "_semiprivate";
1229  }
1230  std::vector<llvm::Value*> get_group_fn_args{&*groups_buffer,
1231  &*group_expr_lv_translated};
1232  if (group_expr_lv_original && get_group_fn_name == "get_group_value_fast" &&
1233  query_mem_desc.mustUseBaselineSort()) {
1234  get_group_fn_name += "_with_original_key";
1235  get_group_fn_args.push_back(group_expr_lv_original);
1236  }
1237  get_group_fn_args.push_back(LL_INT(query_mem_desc.getMinVal()));
1238  get_group_fn_args.push_back(LL_INT(query_mem_desc.getBucket()));
1239  if (!query_mem_desc.hasKeylessHash()) {
1240  if (!query_mem_desc.didOutputColumnar()) {
1241  get_group_fn_args.push_back(LL_INT(row_size_quad));
1242  }
1243  } else {
1244  if (!query_mem_desc.didOutputColumnar()) {
1245  get_group_fn_args.push_back(LL_INT(row_size_quad));
1246  }
1247  if (query_mem_desc.interleavedBins(co.device_type)) {
1248  auto warp_idx = emitCall("thread_warp_idx", {LL_INT(executor_->warpSize())});
1249  get_group_fn_args.push_back(warp_idx);
1250  get_group_fn_args.push_back(LL_INT(executor_->warpSize()));
1251  }
1252  }
1253  if (get_group_fn_name == "get_columnar_group_bin_offset") {
1254  return std::make_tuple(&*groups_buffer,
1255  emitCall(get_group_fn_name, get_group_fn_args));
1256  }
1257  return std::make_tuple(emitCall(get_group_fn_name, get_group_fn_args), nullptr);
1258 }
const int32_t groups_buffer_size return groups_buffer
#define LL_INT(v)
llvm::Value * emitCall(const std::string &fname, const std::vector< llvm::Value * > &args)
CHECK(cgen_state)
ExecutorDeviceType device_type
bool interleavedBins(const ExecutorDeviceType) const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

llvm::Value * GroupByAndAggregate::codegenWindowRowPointer ( const Analyzer::WindowFunction window_func,
const QueryMemoryDescriptor query_mem_desc,
const CompilationOptions co,
DiamondCodegen diamond_codegen 
)
private

Definition at line 1442 of file GroupByAndAggregate.cpp.

References run_benchmark_import::args, CHECK(), codegenOutputSlot(), COUNT, QueryMemoryDescriptor::didOutputColumnar(), emitCall(), executor_, get_int_type(), WindowProjectNodeContext::getActiveWindowFunctionContext(), QueryMemoryDescriptor::getEntryCount(), Analyzer::WindowFunction::getKind(), QueryMemoryDescriptor::getRowSize(), groups_buffer, LL_BUILDER, LL_CONTEXT, LL_INT, CodeGenerator::posArg(), ROW_FUNC, and window_function_is_aggregate().

Referenced by TargetExprCodegen::codegen().

1446  {
1447  const auto window_func_context =
1449  if (window_func_context && window_function_is_aggregate(window_func->getKind())) {
1450  const int32_t row_size_quad = query_mem_desc.didOutputColumnar()
1451  ? 0
1452  : query_mem_desc.getRowSize() / sizeof(int64_t);
1453  auto arg_it = ROW_FUNC->arg_begin();
1454  auto groups_buffer = arg_it++;
1455  CodeGenerator code_generator(executor_);
1456  if (!window_func_context->getRowNumber()) {
1457  CHECK(window_func->getKind() == SqlWindowFunctionKind::COUNT);
1458  window_func_context->setRowNumber(emitCall(
1459  "row_number_window_func",
1460  {LL_INT(reinterpret_cast<const int64_t>(window_func_context->output())),
1461  code_generator.posArg(nullptr)}));
1462  }
1463  const auto pos_in_window = LL_BUILDER.CreateTrunc(window_func_context->getRowNumber(),
1464  get_int_type(32, LL_CONTEXT));
1465  llvm::Value* entry_count_lv =
1466  LL_INT(static_cast<int32_t>(query_mem_desc.getEntryCount()));
1467  std::vector<llvm::Value*> args{
1468  &*groups_buffer, entry_count_lv, pos_in_window, code_generator.posArg(nullptr)};
1469  if (query_mem_desc.didOutputColumnar()) {
1470  const auto columnar_output_offset =
1471  emitCall("get_columnar_scan_output_offset", args);
1472  return LL_BUILDER.CreateSExt(columnar_output_offset, get_int_type(64, LL_CONTEXT));
1473  }
1474  args.push_back(LL_INT(row_size_quad));
1475  return emitCall("get_scan_output_slot", args);
1476  }
1477  auto arg_it = ROW_FUNC->arg_begin();
1478  auto groups_buffer = arg_it++;
1479  return codegenOutputSlot(&*groups_buffer, query_mem_desc, co, diamond_codegen);
1480 }
SqlWindowFunctionKind getKind() const
Definition: Analyzer.h:1448
#define ROW_FUNC
const int32_t groups_buffer_size return groups_buffer
#define LL_BUILDER
#define LL_CONTEXT
#define LL_INT(v)
llvm::Value * emitCall(const std::string &fname, const std::vector< llvm::Value * > &args)
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
static WindowFunctionContext * getActiveWindowFunctionContext(Executor *executor)
CHECK(cgen_state)
bool window_function_is_aggregate(const SqlWindowFunctionKind kind)
Definition: WindowContext.h:42
llvm::Value * codegenOutputSlot(llvm::Value *groups_buffer, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, DiamondCodegen &diamond_codegen)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

llvm::Value * GroupByAndAggregate::convertNullIfAny ( const SQLTypeInfo arg_type,
const TargetInfo agg_info,
llvm::Value *  target 
)
private

Definition at line 1392 of file GroupByAndAggregate.cpp.

References TargetInfo::agg_kind, CHECK(), executor_, SQLTypeInfo::get_size(), SQLTypeInfo::is_fp(), kAPPROX_COUNT_DISTINCT, kCOUNT, LL_BUILDER, and TargetInfo::sql_type.

Referenced by TargetExprCodegen::codegenAggregate().

1394  {
1395  const auto& agg_type = agg_info.sql_type;
1396  const size_t chosen_bytes = agg_type.get_size();
1397 
1398  bool need_conversion{false};
1399  llvm::Value* arg_null{nullptr};
1400  llvm::Value* agg_null{nullptr};
1401  llvm::Value* target_to_cast{target};
1402  if (arg_type.is_fp()) {
1403  arg_null = executor_->cgen_state_->inlineFpNull(arg_type);
1404  if (agg_type.is_fp()) {
1405  agg_null = executor_->cgen_state_->inlineFpNull(agg_type);
1406  if (!static_cast<llvm::ConstantFP*>(arg_null)->isExactlyValue(
1407  static_cast<llvm::ConstantFP*>(agg_null)->getValueAPF())) {
1408  need_conversion = true;
1409  }
1410  } else {
1411  CHECK(agg_info.agg_kind == kCOUNT || agg_info.agg_kind == kAPPROX_COUNT_DISTINCT);
1412  return target;
1413  }
1414  } else {
1415  arg_null = executor_->cgen_state_->inlineIntNull(arg_type);
1416  if (agg_type.is_fp()) {
1417  agg_null = executor_->cgen_state_->inlineFpNull(agg_type);
1418  need_conversion = true;
1419  target_to_cast = executor_->castToFP(target);
1420  } else {
1421  agg_null = executor_->cgen_state_->inlineIntNull(agg_type);
1422  if ((static_cast<llvm::ConstantInt*>(arg_null)->getBitWidth() !=
1423  static_cast<llvm::ConstantInt*>(agg_null)->getBitWidth()) ||
1424  (static_cast<llvm::ConstantInt*>(arg_null)->getValue() !=
1425  static_cast<llvm::ConstantInt*>(agg_null)->getValue())) {
1426  need_conversion = true;
1427  }
1428  }
1429  }
1430  if (need_conversion) {
1431  auto cmp = arg_type.is_fp() ? LL_BUILDER.CreateFCmpOEQ(target, arg_null)
1432  : LL_BUILDER.CreateICmpEQ(target, arg_null);
1433  return LL_BUILDER.CreateSelect(
1434  cmp,
1435  agg_null,
1436  executor_->cgen_state_->castToTypeIn(target_to_cast, chosen_bytes << 3));
1437  } else {
1438  return target;
1439  }
1440 }
HOST DEVICE int get_size() const
Definition: sqltypes.h:268
#define LL_BUILDER
SQLTypeInfo sql_type
Definition: TargetInfo.h:42
bool is_fp() const
Definition: sqltypes.h:419
CHECK(cgen_state)
SQLAgg agg_kind
Definition: TargetInfo.h:41
Definition: sqldefs.h:76

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

llvm::Value * GroupByAndAggregate::emitCall ( const std::string &  fname,
const std::vector< llvm::Value * > &  args 
)
private

Definition at line 1901 of file GroupByAndAggregate.cpp.

References executor_.

Referenced by TargetExprCodegen::codegen(), TargetExprCodegen::codegenAggregate(), codegenCountDistinct(), codegenEstimator(), codegenMultiColumnBaselineHash(), codegenMultiColumnPerfectHash(), codegenOutputSlot(), codegenSingleColumnPerfectHash(), and codegenWindowRowPointer().

1902  {
1903  return executor_->cgen_state_->emitCall(fname, args);
1904 }

+ Here is the caller graph for this function:

llvm::Value * GroupByAndAggregate::getAdditionalLiteral ( const int32_t  off)
private

Definition at line 1720 of file GroupByAndAggregate.cpp.

References CHECK_LT, get_arg_by_name(), get_int_type(), LL_BUILDER, LL_CONTEXT, LL_INT, and ROW_FUNC.

Referenced by codegenCountDistinct().

1720  {
1721  CHECK_LT(off, 0);
1722  const auto lit_buff_lv = get_arg_by_name(ROW_FUNC, "literals");
1723  return LL_BUILDER.CreateLoad(LL_BUILDER.CreateGEP(
1724  LL_BUILDER.CreateBitCast(lit_buff_lv,
1725  llvm::PointerType::get(get_int_type(64, LL_CONTEXT), 0)),
1726  LL_INT(off)));
1727 }
#define ROW_FUNC
#define LL_BUILDER
#define LL_CONTEXT
#define LL_INT(v)
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
llvm::Value * get_arg_by_name(llvm::Function *func, const std::string &name)
Definition: Execute.h:95
#define CHECK_LT(x, y)
Definition: Logger.h:207

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

int64_t GroupByAndAggregate::getBucketedCardinality ( const ColRangeInfo col_range_info)
staticprivate

Definition at line 227 of file GroupByAndAggregate.cpp.

References ColRangeInfo::bucket, ColRangeInfo::has_nulls, ColRangeInfo::max, and ColRangeInfo::min.

Referenced by codegenPerfectHashFunction(), and getColRangeInfo().

227  {
228  checked_int64_t crt_col_cardinality =
229  checked_int64_t(col_range_info.max) - checked_int64_t(col_range_info.min);
230  if (col_range_info.bucket) {
231  crt_col_cardinality /= col_range_info.bucket;
232  }
233  return static_cast<int64_t>(crt_col_cardinality +
234  (1 + (col_range_info.has_nulls ? 1 : 0)));
235 }
boost::multiprecision::number< boost::multiprecision::cpp_int_backend< 64, 64, boost::multiprecision::signed_magnitude, boost::multiprecision::checked, void >> checked_int64_t

+ Here is the caller graph for this function:

ColRangeInfo GroupByAndAggregate::getColRangeInfo ( )
private

Definition at line 117 of file GroupByAndAggregate.cpp.

References Executor::baseline_threshold, CHECK_GE, device_type_, executor_, anonymous_namespace{GroupByAndAggregate.cpp}::expr_is_rowid(), getBucketedCardinality(), getExprRangeInfo(), GPU, RelAlgExecutionUnit::groupby_exprs, GroupByBaselineHash, GroupByPerfectHash, anonymous_namespace{GroupByAndAggregate.cpp}::has_count_distinct(), anonymous_namespace{GroupByAndAggregate.cpp}::is_column_range_too_big_for_perfect_hash(), ra_exe_unit_, RelAlgExecutionUnit::simple_quals, and RelAlgExecutionUnit::target_exprs.

117  {
118  // Use baseline layout more eagerly on the GPU if the query uses count distinct,
119  // because our HyperLogLog implementation is 4x less memory efficient on GPU.
120  // Technically, this only applies to APPROX_COUNT_DISTINCT, but in practice we
121  // can expect this to be true anyway for grouped queries since the precise version
122  // uses significantly more memory.
123  const int64_t baseline_threshold =
128  if (ra_exe_unit_.groupby_exprs.size() != 1) {
129  try {
130  checked_int64_t cardinality{1};
131  bool has_nulls{false};
132  for (const auto& groupby_expr : ra_exe_unit_.groupby_exprs) {
133  auto col_range_info = getExprRangeInfo(groupby_expr.get());
134  if (col_range_info.hash_type_ != QueryDescriptionType::GroupByPerfectHash) {
135  // going through baseline hash if a non-integer type is encountered
136  return {QueryDescriptionType::GroupByBaselineHash, 0, 0, 0, false};
137  }
138  auto crt_col_cardinality = getBucketedCardinality(col_range_info);
139  CHECK_GE(crt_col_cardinality, 0);
140  cardinality *= crt_col_cardinality;
141  if (col_range_info.has_nulls) {
142  has_nulls = true;
143  }
144  }
145  // For zero or high cardinalities, use baseline layout.
146  if (!cardinality || cardinality > baseline_threshold) {
147  return {QueryDescriptionType::GroupByBaselineHash, 0, 0, 0, false};
148  }
150  0,
151  int64_t(cardinality),
152  0,
153  has_nulls};
154  } catch (...) { // overflow when computing cardinality
155  return {QueryDescriptionType::GroupByBaselineHash, 0, 0, 0, false};
156  }
157  }
158  // For single column groupby on high timestamps, force baseline hash due to wide ranges
159  // we are likely to encounter when applying quals to the expression range
160  // TODO: consider allowing TIMESTAMP(9) (nanoseconds) with quals to use perfect hash if
161  // the range is small enough
162  if (ra_exe_unit_.groupby_exprs.front() &&
163  ra_exe_unit_.groupby_exprs.front()->get_type_info().is_high_precision_timestamp() &&
164  ra_exe_unit_.simple_quals.size() > 0) {
165  return {QueryDescriptionType::GroupByBaselineHash, 0, 0, 0, false};
166  }
167  const auto col_range_info = getExprRangeInfo(ra_exe_unit_.groupby_exprs.front().get());
168  if (!ra_exe_unit_.groupby_exprs.front()) {
169  return col_range_info;
170  }
171  static const int64_t MAX_BUFFER_SIZE = 1 << 30;
172  const int64_t col_count =
174  int64_t max_entry_count = MAX_BUFFER_SIZE / (col_count * sizeof(int64_t));
176  max_entry_count = std::min(max_entry_count, baseline_threshold);
177  }
178  if ((!ra_exe_unit_.groupby_exprs.front()->get_type_info().is_string() &&
179  !expr_is_rowid(ra_exe_unit_.groupby_exprs.front().get(), *executor_->catalog_)) &&
180  is_column_range_too_big_for_perfect_hash(col_range_info, max_entry_count) &&
181  !col_range_info.bucket) {
183  col_range_info.min,
184  col_range_info.max,
185  0,
186  col_range_info.has_nulls};
187  }
188  return col_range_info;
189 }
std::vector< Analyzer::Expr * > target_exprs
static int64_t getBucketedCardinality(const ColRangeInfo &col_range_info)
boost::multiprecision::number< boost::multiprecision::cpp_int_backend< 64, 64, boost::multiprecision::signed_magnitude, boost::multiprecision::checked, void >> checked_int64_t
bool is_column_range_too_big_for_perfect_hash(const ColRangeInfo &col_range_info, const int64_t max_entry_count)
bool expr_is_rowid(const Analyzer::Expr *expr, const Catalog_Namespace::Catalog &cat)
static const size_t baseline_threshold
Definition: Execute.h:889
#define CHECK_GE(x, y)
Definition: Logger.h:210
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
bool has_count_distinct(const RelAlgExecutionUnit &ra_exe_unit)
const ExecutorDeviceType device_type_
ColRangeInfo getExprRangeInfo(const Analyzer::Expr *expr) const
const RelAlgExecutionUnit & ra_exe_unit_
std::list< std::shared_ptr< Analyzer::Expr > > simple_quals

+ Here is the call graph for this function:

ColRangeInfo GroupByAndAggregate::getExprRangeInfo ( const Analyzer::Expr expr) const
private

Definition at line 191 of file GroupByAndAggregate.cpp.

References CHECK(), Double, executor_, Float, getExpressionRange(), GroupByBaselineHash, GroupByPerfectHash, Integer, Invalid, NonGroupedAggregate, Projection, query_infos_, ra_exe_unit_, and RelAlgExecutionUnit::simple_quals.

Referenced by codegenGroupBy(), codegenPerfectHashFunction(), getColRangeInfo(), gpuCanHandleOrderEntries(), and initCountDistinctDescriptors().

191  {
192  if (!expr) {
193  return {QueryDescriptionType::Projection, 0, 0, 0, false};
194  }
195 
196  const auto expr_range = getExpressionRange(
197  expr, query_infos_, executor_, boost::make_optional(ra_exe_unit_.simple_quals));
198  switch (expr_range.getType()) {
200  if (expr_range.getIntMin() > expr_range.getIntMax()) {
201  return {
202  QueryDescriptionType::GroupByBaselineHash, 0, -1, 0, expr_range.hasNulls()};
203  }
205  expr_range.getIntMin(),
206  expr_range.getIntMax(),
207  expr_range.getBucket(),
208  expr_range.hasNulls()};
209  }
212  if (expr_range.getFpMin() > expr_range.getFpMax()) {
213  return {
214  QueryDescriptionType::GroupByBaselineHash, 0, -1, 0, expr_range.hasNulls()};
215  }
216  return {QueryDescriptionType::GroupByBaselineHash, 0, 0, 0, false};
217  }
219  return {QueryDescriptionType::GroupByBaselineHash, 0, 0, 0, false};
220  default:
221  CHECK(false);
222  }
223  CHECK(false);
224  return {QueryDescriptionType::NonGroupedAggregate, 0, 0, 0, false};
225 }
CHECK(cgen_state)
ExpressionRange getExpressionRange(const Analyzer::BinOper *expr, const std::vector< InputTableInfo > &query_infos, const Executor *, boost::optional< std::list< std::shared_ptr< Analyzer::Expr >>> simple_quals)
const std::vector< InputTableInfo > & query_infos_
const RelAlgExecutionUnit & ra_exe_unit_
std::list< std::shared_ptr< Analyzer::Expr > > simple_quals

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

KeylessInfo GroupByAndAggregate::getKeylessInfo ( const std::vector< Analyzer::Expr * > &  target_expr_list,
const bool  is_group_by 
) const
private

This function goes through all target expressions and answers two questions:

  1. Is it possible to have keyless hash?
  2. If yes to 1, then what aggregate expression should be considered to represent the key's presence, if needed (e.g., in detecting empty entries in the result set).

NOTE: Keyless hash is only valid with single-column group by at the moment.

Definition at line 634 of file GroupByAndAggregate.cpp.

References agg_arg(), CHECK(), constrained_not_null(), Double, executor_, Float, g_bigint_count, get_agg_initial_val(), get_compact_type(), get_target_info(), getExpressionRange(), Integer, Invalid, is_distinct_target(), kAVG, kCOUNT, keyless, kMAX, kMIN, kSUM, RelAlgExecutionUnit::quals, query_infos_, ra_exe_unit_, and takes_float_argument().

636  {
637  bool keyless{true}, found{false};
638  int32_t num_agg_expr{0};
639  int32_t index{0};
640  for (const auto target_expr : target_expr_list) {
641  const auto agg_info = get_target_info(target_expr, g_bigint_count);
642  const auto chosen_type = get_compact_type(agg_info);
643  if (agg_info.is_agg) {
644  num_agg_expr++;
645  }
646  if (!found && agg_info.is_agg && !is_distinct_target(agg_info)) {
647  auto agg_expr = dynamic_cast<const Analyzer::AggExpr*>(target_expr);
648  CHECK(agg_expr);
649  const auto arg_expr = agg_arg(target_expr);
650  const bool float_argument_input = takes_float_argument(agg_info);
651  switch (agg_info.agg_kind) {
652  case kAVG:
653  ++index;
654  if (arg_expr && !arg_expr->get_type_info().get_notnull()) {
655  auto expr_range_info = getExpressionRange(arg_expr, query_infos_, executor_);
656  if (expr_range_info.getType() == ExpressionRangeType::Invalid ||
657  expr_range_info.hasNulls()) {
658  break;
659  }
660  }
661  found = true;
662  break;
663  case kCOUNT:
664  if (arg_expr && !arg_expr->get_type_info().get_notnull()) {
665  auto expr_range_info = getExpressionRange(arg_expr, query_infos_, executor_);
666  if (expr_range_info.getType() == ExpressionRangeType::Invalid ||
667  expr_range_info.hasNulls()) {
668  break;
669  }
670  }
671  found = true;
672  break;
673  case kSUM: {
674  auto arg_ti = arg_expr->get_type_info();
675  if (constrained_not_null(arg_expr, ra_exe_unit_.quals)) {
676  arg_ti.set_notnull(true);
677  }
678  if (!arg_ti.get_notnull()) {
679  auto expr_range_info = getExpressionRange(arg_expr, query_infos_, executor_);
680  if (expr_range_info.getType() != ExpressionRangeType::Invalid &&
681  !expr_range_info.hasNulls()) {
682  found = true;
683  }
684  } else {
685  auto expr_range_info = getExpressionRange(arg_expr, query_infos_, executor_);
686  switch (expr_range_info.getType()) {
689  if (expr_range_info.getFpMax() < 0 || expr_range_info.getFpMin() > 0) {
690  found = true;
691  }
692  break;
694  if (expr_range_info.getIntMax() < 0 || expr_range_info.getIntMin() > 0) {
695  found = true;
696  }
697  break;
698  default:
699  break;
700  }
701  }
702  break;
703  }
704  case kMIN: {
705  CHECK(agg_expr && agg_expr->get_arg());
706  const auto& arg_ti = agg_expr->get_arg()->get_type_info();
707  if (arg_ti.is_string() || arg_ti.is_array()) {
708  break;
709  }
710  auto expr_range_info =
711  getExpressionRange(agg_expr->get_arg(), query_infos_, executor_);
712  auto init_max = get_agg_initial_val(agg_info.agg_kind,
713  chosen_type,
714  is_group_by || float_argument_input,
715  float_argument_input ? sizeof(float) : 8);
716  switch (expr_range_info.getType()) {
719  auto double_max =
720  *reinterpret_cast<const double*>(may_alias_ptr(&init_max));
721  if (expr_range_info.getFpMax() < double_max) {
722  found = true;
723  }
724  break;
725  }
727  if (expr_range_info.getIntMax() < init_max) {
728  found = true;
729  }
730  break;
731  default:
732  break;
733  }
734  break;
735  }
736  case kMAX: {
737  CHECK(agg_expr && agg_expr->get_arg());
738  const auto& arg_ti = agg_expr->get_arg()->get_type_info();
739  if (arg_ti.is_string() || arg_ti.is_array()) {
740  break;
741  }
742  auto expr_range_info =
743  getExpressionRange(agg_expr->get_arg(), query_infos_, executor_);
744  // NULL sentinel and init value for kMAX are identical, which results in
745  // ambiguity in detecting empty keys in presence of nulls.
746  if (expr_range_info.getType() == ExpressionRangeType::Invalid ||
747  expr_range_info.hasNulls()) {
748  break;
749  }
750  auto init_min = get_agg_initial_val(agg_info.agg_kind,
751  chosen_type,
752  is_group_by || float_argument_input,
753  float_argument_input ? sizeof(float) : 8);
754  switch (expr_range_info.getType()) {
757  auto double_min =
758  *reinterpret_cast<const double*>(may_alias_ptr(&init_min));
759  if (expr_range_info.getFpMin() > double_min) {
760  found = true;
761  }
762  break;
763  }
765  if (expr_range_info.getIntMin() > init_min) {
766  found = true;
767  }
768  break;
769  default:
770  break;
771  }
772  break;
773  }
774  default:
775  keyless = false;
776  break;
777  }
778  }
779  if (!keyless) {
780  break;
781  }
782  if (!found) {
783  ++index;
784  }
785  }
786 
787  // shouldn't use keyless for projection only
788  return {
789  keyless && found,
790  index,
791  };
792 }
const Analyzer::Expr * agg_arg(const Analyzer::Expr *expr)
bool constrained_not_null(const Analyzer::Expr *expr, const std::list< std::shared_ptr< Analyzer::Expr >> &quals)
TargetInfo get_target_info(const PointerType target_expr, const bool bigint_count)
Definition: TargetInfo.h:78
int64_t get_agg_initial_val(const SQLAgg agg, const SQLTypeInfo &ti, const bool enable_compaction, const unsigned min_byte_width_to_compact)
bool takes_float_argument(const TargetInfo &target_info)
Definition: TargetInfo.h:133
Definition: sqldefs.h:73
const SQLTypeInfo get_compact_type(const TargetInfo &target)
CHECK(cgen_state)
bool g_bigint_count
Definition: sqldefs.h:75
bool is_distinct_target(const TargetInfo &target_info)
Definition: TargetInfo.h:129
ExpressionRange getExpressionRange(const Analyzer::BinOper *expr, const std::vector< InputTableInfo > &query_infos, const Executor *, boost::optional< std::list< std::shared_ptr< Analyzer::Expr >>> simple_quals)
const std::vector< InputTableInfo > & query_infos_
Definition: sqldefs.h:76
std::list< std::shared_ptr< Analyzer::Expr > > quals
const RelAlgExecutionUnit & ra_exe_unit_
Definition: sqldefs.h:74
Definition: sqldefs.h:72
const int64_t const uint32_t const uint32_t const uint32_t const bool keyless

+ Here is the call graph for this function:

int64_t GroupByAndAggregate::getShardedTopBucket ( const ColRangeInfo col_range_info,
const size_t  shard_count 
) const
private

Definition at line 273 of file GroupByAndAggregate.cpp.

References ColRangeInfo::bucket, CHECK(), CHECK_GT, device_type_, executor_, g_leaf_count, and GPU.

274  {
275  size_t device_count{0};
277  auto cuda_mgr = executor_->getCatalog()->getDataMgr().getCudaMgr();
278  CHECK(cuda_mgr);
279  device_count = executor_->getCatalog()->getDataMgr().getCudaMgr()->getDeviceCount();
280  CHECK_GT(device_count, 0u);
281  }
282 
283  int64_t bucket{col_range_info.bucket};
284 
285  if (shard_count) {
286  CHECK(!col_range_info.bucket);
287  /*
288  when a node has fewer devices than shard count,
289  a) In a distributed setup, the minimum distance between two keys would be
290  device_count because shards are stored consecutively across the physical tables, i.e
291  if a shard column has values 0 to 9, and 3 shards on each leaf, then node 1 would
292  have values: 0,1,2,6,7,8 and node 2 would have values: 3,4,5,9. If each leaf node
293  has only 1 device, in this case, all the keys from each node are loaded on the
294  device each.
295 
296  b) In a single node setup, the distance would be minimum of device_count or
297  difference of device_count - shard_count. For example: If a single node server
298  running on 3 devices a shard column has values 0 to 9 in a table with 4 shards,
299  device to fragment keys mapping would be: device 1 - 4,8,3,7 device 2 - 1,5,9 device
300  3 - 2, 6 The bucket value would be 4(shards) - 3(devices) = 1 i.e. minimum of
301  device_count or difference.
302 
303  When a node has device count equal to or more than shard count then the
304  minimum distance is always at least shard_count * no of leaf nodes.
305  */
306  if (device_count < shard_count) {
307  bucket = g_leaf_count ? std::max(device_count, static_cast<size_t>(1))
308  : std::min(device_count, shard_count - device_count);
309  } else {
310  bucket = shard_count * std::max(g_leaf_count, static_cast<size_t>(1));
311  }
312  }
313 
314  return bucket;
315 }
#define CHECK_GT(x, y)
Definition: Logger.h:209
CHECK(cgen_state)
const ExecutorDeviceType device_type_
size_t g_leaf_count
Definition: ParserNode.cpp:70

+ Here is the call graph for this function:

bool GroupByAndAggregate::gpuCanHandleOrderEntries ( const std::list< Analyzer::OrderEntry > &  order_entries)
private

Definition at line 794 of file GroupByAndAggregate.cpp.

References CHECK(), CHECK_GE, CHECK_LE, Analyzer::AggExpr::get_arg(), Analyzer::Expr::get_type_info(), getExprRangeInfo(), GroupByPerfectHash, kAPPROX_COUNT_DISTINCT, kAVG, kMAX, kMIN, ra_exe_unit_, and RelAlgExecutionUnit::target_exprs.

Referenced by initQueryMemoryDescriptor().

795  {
796  if (order_entries.size() > 1) { // TODO(alex): lift this restriction
797  return false;
798  }
799  for (const auto& order_entry : order_entries) {
800  CHECK_GE(order_entry.tle_no, 1);
801  CHECK_LE(static_cast<size_t>(order_entry.tle_no), ra_exe_unit_.target_exprs.size());
802  const auto target_expr = ra_exe_unit_.target_exprs[order_entry.tle_no - 1];
803  if (!dynamic_cast<Analyzer::AggExpr*>(target_expr)) {
804  return false;
805  }
806  // TODO(alex): relax the restrictions
807  auto agg_expr = static_cast<Analyzer::AggExpr*>(target_expr);
808  if (agg_expr->get_is_distinct() || agg_expr->get_aggtype() == kAVG ||
809  agg_expr->get_aggtype() == kMIN || agg_expr->get_aggtype() == kMAX ||
810  agg_expr->get_aggtype() == kAPPROX_COUNT_DISTINCT) {
811  return false;
812  }
813  if (agg_expr->get_arg()) {
814  const auto& arg_ti = agg_expr->get_arg()->get_type_info();
815  if (arg_ti.is_fp()) {
816  return false;
817  }
818  auto expr_range_info = getExprRangeInfo(agg_expr->get_arg());
819  // TOD(adb): QMD not actually initialized here?
820  if ((!(expr_range_info.hash_type_ == QueryDescriptionType::GroupByPerfectHash &&
821  /* query_mem_desc.getGroupbyColCount() == 1 */ false) ||
822  expr_range_info.has_nulls) &&
823  order_entry.is_desc == order_entry.nulls_first) {
824  return false;
825  }
826  }
827  const auto& target_ti = target_expr->get_type_info();
828  CHECK(!target_ti.is_array());
829  if (!target_ti.is_integer()) {
830  return false;
831  }
832  }
833  return true;
834 }
std::vector< Analyzer::Expr * > target_exprs
#define CHECK_GE(x, y)
Definition: Logger.h:210
Expr * get_arg() const
Definition: Analyzer.h:1097
Definition: sqldefs.h:73
CHECK(cgen_state)
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:79
#define CHECK_LE(x, y)
Definition: Logger.h:208
ColRangeInfo getExprRangeInfo(const Analyzer::Expr *expr) const
const RelAlgExecutionUnit & ra_exe_unit_
Definition: sqldefs.h:74
Definition: sqldefs.h:72

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

CountDistinctDescriptors GroupByAndAggregate::initCountDistinctDescriptors ( )
private

Definition at line 538 of file GroupByAndAggregate.cpp.

References Bitmap, CHECK(), CHECK_GE, device_type_, g_bigint_count, g_enable_watchdog, g_hll_precision_bits, Analyzer::AggExpr::get_arg(), get_count_distinct_sub_bitmap_count(), get_target_info(), Analyzer::Expr::get_type_info(), getExprRangeInfo(), GroupByPerfectHash, hll_size_for_rate(), Invalid, is_distinct_target(), kAPPROX_COUNT_DISTINCT, kCOUNT, kENCODING_DICT, kINT, Projection, ra_exe_unit_, StdSet, and RelAlgExecutionUnit::target_exprs.

Referenced by initQueryMemoryDescriptorImpl().

538  {
539  CountDistinctDescriptors count_distinct_descriptors;
540  for (const auto target_expr : ra_exe_unit_.target_exprs) {
541  auto agg_info = get_target_info(target_expr, g_bigint_count);
542  if (is_distinct_target(agg_info)) {
543  CHECK(agg_info.is_agg);
544  CHECK(agg_info.agg_kind == kCOUNT || agg_info.agg_kind == kAPPROX_COUNT_DISTINCT);
545  const auto agg_expr = static_cast<const Analyzer::AggExpr*>(target_expr);
546  const auto& arg_ti = agg_expr->get_arg()->get_type_info();
547  if (arg_ti.is_string() && arg_ti.get_compression() != kENCODING_DICT) {
548  throw std::runtime_error(
549  "Strings must be dictionary-encoded for COUNT(DISTINCT).");
550  }
551  if (agg_info.agg_kind == kAPPROX_COUNT_DISTINCT && arg_ti.is_array()) {
552  throw std::runtime_error("APPROX_COUNT_DISTINCT on arrays not supported yet");
553  }
554  if (agg_info.agg_kind == kAPPROX_COUNT_DISTINCT && arg_ti.is_geometry()) {
555  throw std::runtime_error(
556  "APPROX_COUNT_DISTINCT on geometry columns not supported");
557  }
558  if (agg_info.is_distinct && arg_ti.is_geometry()) {
559  throw std::runtime_error("COUNT DISTINCT on geometry columns not supported");
560  }
561  ColRangeInfo no_range_info{QueryDescriptionType::Projection, 0, 0, 0, false};
562  auto arg_range_info =
563  arg_ti.is_fp() ? no_range_info : getExprRangeInfo(agg_expr->get_arg());
564  CountDistinctImplType count_distinct_impl_type{CountDistinctImplType::StdSet};
565  int64_t bitmap_sz_bits{0};
566  if (agg_info.agg_kind == kAPPROX_COUNT_DISTINCT) {
567  const auto error_rate = agg_expr->get_error_rate();
568  if (error_rate) {
569  CHECK(error_rate->get_type_info().get_type() == kINT);
570  CHECK_GE(error_rate->get_constval().intval, 1);
571  bitmap_sz_bits = hll_size_for_rate(error_rate->get_constval().smallintval);
572  } else {
573  bitmap_sz_bits = g_hll_precision_bits;
574  }
575  }
576  if (arg_range_info.isEmpty()) {
577  count_distinct_descriptors.emplace_back(
579  0,
580  64,
581  agg_info.agg_kind == kAPPROX_COUNT_DISTINCT,
582  device_type_,
583  1});
584  continue;
585  }
586  if (arg_range_info.hash_type_ == QueryDescriptionType::GroupByPerfectHash &&
587  !(arg_ti.is_array() || arg_ti.is_geometry())) { // TODO(alex): allow bitmap
588  // implementation for arrays
589  count_distinct_impl_type = CountDistinctImplType::Bitmap;
590  if (agg_info.agg_kind == kCOUNT) {
591  bitmap_sz_bits = arg_range_info.max - arg_range_info.min + 1;
592  const int64_t MAX_BITMAP_BITS{8 * 1000 * 1000 * 1000L};
593  if (bitmap_sz_bits <= 0 || bitmap_sz_bits > MAX_BITMAP_BITS) {
594  count_distinct_impl_type = CountDistinctImplType::StdSet;
595  }
596  }
597  }
598  if (agg_info.agg_kind == kAPPROX_COUNT_DISTINCT &&
599  count_distinct_impl_type == CountDistinctImplType::StdSet &&
600  !(arg_ti.is_array() || arg_ti.is_geometry())) {
601  count_distinct_impl_type = CountDistinctImplType::Bitmap;
602  }
603 
604  if (g_enable_watchdog && !(arg_range_info.isEmpty()) &&
605  count_distinct_impl_type == CountDistinctImplType::StdSet) {
606  throw WatchdogException("Cannot use a fast path for COUNT distinct");
607  }
608  const auto sub_bitmap_count =
610  count_distinct_descriptors.emplace_back(
611  CountDistinctDescriptor{count_distinct_impl_type,
612  arg_range_info.min,
613  bitmap_sz_bits,
614  agg_info.agg_kind == kAPPROX_COUNT_DISTINCT,
615  device_type_,
616  sub_bitmap_count});
617  } else {
618  count_distinct_descriptors.emplace_back(CountDistinctDescriptor{
619  CountDistinctImplType::Invalid, 0, 0, false, device_type_, 0});
620  }
621  }
622  return count_distinct_descriptors;
623 }
std::vector< Analyzer::Expr * > target_exprs
bool g_enable_watchdog
int hll_size_for_rate(const int err_percent)
Definition: HyperLogLog.h:115
TargetInfo get_target_info(const PointerType target_expr, const bool bigint_count)
Definition: TargetInfo.h:78
#define CHECK_GE(x, y)
Definition: Logger.h:210
Expr * get_arg() const
Definition: Analyzer.h:1097
int g_hll_precision_bits
size_t get_count_distinct_sub_bitmap_count(const size_t bitmap_sz_bits, const RelAlgExecutionUnit &ra_exe_unit, const ExecutorDeviceType device_type)
CHECK(cgen_state)
std::vector< CountDistinctDescriptor > CountDistinctDescriptors
Definition: CountDistinct.h:35
bool g_bigint_count
bool is_distinct_target(const TargetInfo &target_info)
Definition: TargetInfo.h:129
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:79
const ExecutorDeviceType device_type_
Definition: sqldefs.h:76
CountDistinctImplType
ColRangeInfo getExprRangeInfo(const Analyzer::Expr *expr) const
Definition: sqltypes.h:46
const RelAlgExecutionUnit & ra_exe_unit_

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::unique_ptr< QueryMemoryDescriptor > GroupByAndAggregate::initQueryMemoryDescriptor ( const bool  allow_multifrag,
const size_t  max_groups_buffer_entry_count,
const int8_t  crt_min_byte_width,
RenderInfo render_info,
const bool  output_columnar_hint 
)
private

Definition at line 317 of file GroupByAndAggregate.cpp.

References align_to_int64(), CHECK(), device_type_, executor_, GPU, gpuCanHandleOrderEntries(), initQueryMemoryDescriptorImpl(), SortInfo::order_entries, query_mem_desc, ra_exe_unit_, shard_count_for_top_groups(), and RelAlgExecutionUnit::sort_info.

322  {
323  const auto shard_count =
326  : 0;
327  bool sort_on_gpu_hint =
328  device_type_ == ExecutorDeviceType::GPU && allow_multifrag &&
331  // must_use_baseline_sort is true iff we'd sort on GPU with the old algorithm
332  // but the total output buffer size would be too big or it's a sharded top query.
333  // For the sake of managing risk, use the new result set way very selectively for
334  // this case only (alongside the baseline layout we've enabled for a while now).
335  bool must_use_baseline_sort = shard_count;
336  std::unique_ptr<QueryMemoryDescriptor> query_mem_desc;
337  while (true) {
338  query_mem_desc = initQueryMemoryDescriptorImpl(allow_multifrag,
339  max_groups_buffer_entry_count,
340  crt_min_byte_width,
341  sort_on_gpu_hint,
342  render_info,
343  must_use_baseline_sort,
344  output_columnar_hint);
345  CHECK(query_mem_desc);
346  if (query_mem_desc->sortOnGpu() &&
347  (query_mem_desc->getBufferSizeBytes(device_type_) +
348  align_to_int64(query_mem_desc->getEntryCount() * sizeof(int32_t))) >
349  2 * 1024 * 1024 * 1024L) {
350  must_use_baseline_sort = true;
351  sort_on_gpu_hint = false;
352  } else {
353  break;
354  }
355  }
356  return query_mem_desc;
357 }
bool gpuCanHandleOrderEntries(const std::list< Analyzer::OrderEntry > &order_entries)
std::unique_ptr< QueryMemoryDescriptor > initQueryMemoryDescriptorImpl(const bool allow_multifrag, const size_t max_groups_buffer_entry_count, const int8_t crt_min_byte_width, const bool sort_on_gpu_hint, RenderInfo *render_info, const bool must_use_baseline_sort, const bool output_columnar_hint)
const std::list< Analyzer::OrderEntry > order_entries
CHECK(cgen_state)
const SortInfo sort_info
const ExecutorDeviceType device_type_
const RelAlgExecutionUnit & ra_exe_unit_
FORCE_INLINE HOST DEVICE T align_to_int64(T addr)
static size_t shard_count_for_top_groups(const RelAlgExecutionUnit &ra_exe_unit, const Catalog_Namespace::Catalog &catalog)

+ Here is the call graph for this function:

std::unique_ptr< QueryMemoryDescriptor > GroupByAndAggregate::initQueryMemoryDescriptorImpl ( const bool  allow_multifrag,
const size_t  max_groups_buffer_entry_count,
const int8_t  crt_min_byte_width,
const bool  sort_on_gpu_hint,
RenderInfo render_info,
const bool  must_use_baseline_sort,
const bool  output_columnar_hint 
)
private

Definition at line 359 of file GroupByAndAggregate.cpp.

References addTransientStringLiterals(), get_col_byte_widths(), RelAlgExecutionUnit::groupby_exprs, initCountDistinctDescriptors(), and ra_exe_unit_.

Referenced by initQueryMemoryDescriptor().

366  {
368 
369  const auto count_distinct_descriptors = initCountDistinctDescriptors();
370 
371  auto group_col_widths = get_col_byte_widths(ra_exe_unit_.groupby_exprs, {});
372 
373  const bool is_group_by{!ra_exe_unit_.groupby_exprs.empty()};
374 
375  auto col_range_info_nosharding = getColRangeInfo();
376 
377  const auto shard_count =
380  : 0;
381 
382  const auto col_range_info =
383  ColRangeInfo{col_range_info_nosharding.hash_type_,
384  col_range_info_nosharding.min,
385  col_range_info_nosharding.max,
386  getShardedTopBucket(col_range_info_nosharding, shard_count),
387  col_range_info_nosharding.has_nulls};
388 
389  // Non-grouped aggregates do not support accessing aggregated ranges
390  // Keyless hash is currently only supported with single-column perfect hash
391  const auto keyless_info = !(is_group_by && col_range_info.hash_type_ ==
393  ? KeylessInfo{false, -1}
394  : getKeylessInfo(ra_exe_unit_.target_exprs, is_group_by);
395 
396  if (g_enable_watchdog &&
397  ((col_range_info.hash_type_ == QueryDescriptionType::GroupByBaselineHash &&
398  max_groups_buffer_entry_count > 120000000) ||
399  (col_range_info.hash_type_ == QueryDescriptionType::GroupByPerfectHash &&
400  ra_exe_unit_.groupby_exprs.size() == 1 &&
401  (col_range_info.max - col_range_info.min) /
402  std::max(col_range_info.bucket, int64_t(1)) >
403  130000000))) {
404  throw WatchdogException("Query would use too much memory");
405  }
406  try {
408  ra_exe_unit_,
409  query_infos_,
410  col_range_info,
411  keyless_info,
412  allow_multifrag,
413  device_type_,
414  crt_min_byte_width,
415  sort_on_gpu_hint,
416  shard_count,
417  max_groups_buffer_entry_count,
418  render_info,
419  count_distinct_descriptors,
420  must_use_baseline_sort,
421  output_columnar_hint,
422  /*streaming_top_n_hint=*/true);
423  } catch (const StreamingTopNOOM& e) {
424  LOG(WARNING) << e.what() << " Disabling Streaming Top N.";
426  ra_exe_unit_,
427  query_infos_,
428  col_range_info,
429  keyless_info,
430  allow_multifrag,
431  device_type_,
432  crt_min_byte_width,
433  sort_on_gpu_hint,
434  shard_count,
435  max_groups_buffer_entry_count,
436  render_info,
437  count_distinct_descriptors,
438  must_use_baseline_sort,
439  output_columnar_hint,
440  /*streaming_top_n_hint=*/false);
441  }
442 }
std::vector< Analyzer::Expr * > target_exprs
bool g_enable_watchdog
std::vector< int8_t > get_col_byte_widths(const T &col_expr_list, const std::vector< ssize_t > &col_exprs_to_not_project)
#define LOG(tag)
Definition: Logger.h:188
ColRangeInfo getColRangeInfo()
QueryDescriptionType hash_type_
static std::unique_ptr< QueryMemoryDescriptor > init(const Executor *executor, const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, const ColRangeInfo &col_range_info, const KeylessInfo &keyless_info, const bool allow_multifrag, const ExecutorDeviceType device_type, const int8_t crt_min_byte_width, const bool sort_on_gpu_hint, const size_t shard_count, const size_t max_groups_buffer_entry_count, RenderInfo *render_info, const CountDistinctDescriptors count_distinct_descriptors, const bool must_use_baseline_sort, const bool output_columnar_hint, const bool streaming_top_n_hint)
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
CountDistinctDescriptors initCountDistinctDescriptors()
KeylessInfo getKeylessInfo(const std::vector< Analyzer::Expr * > &target_expr_list, const bool is_group_by) const
int64_t getShardedTopBucket(const ColRangeInfo &col_range_info, const size_t shard_count) const
const std::vector< InputTableInfo > & query_infos_
const ExecutorDeviceType device_type_
const RelAlgExecutionUnit & ra_exe_unit_
static size_t shard_count_for_top_groups(const RelAlgExecutionUnit &ra_exe_unit, const Catalog_Namespace::Catalog &catalog)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool GroupByAndAggregate::needsUnnestDoublePatch ( llvm::Value *  val_ptr,
const std::string &  agg_base_name,
const bool  threads_share_memory,
const CompilationOptions co 
) const
private

Definition at line 30 of file MaxwellCodegenPatch.cpp.

References CompilationOptions::device_type, and executor_.

Referenced by TargetExprCodegen::codegenAggregate().

33  {
34  return (executor_->isArchMaxwell(co.device_type) && threads_share_memory &&
35  llvm::isa<llvm::AllocaInst>(val_ptr) &&
36  val_ptr->getType() ==
37  llvm::Type::getDoublePtrTy(executor_->cgen_state_->context_) &&
38  "agg_id" == agg_base_name);
39 }
ExecutorDeviceType device_type

+ Here is the caller graph for this function:

void GroupByAndAggregate::prependForceSync ( )
private

Definition at line 41 of file MaxwellCodegenPatch.cpp.

References executor_.

Referenced by codegen().

41  {
42  executor_->cgen_state_->ir_builder_.CreateCall(
43  executor_->cgen_state_->module_->getFunction("force_sync"));
44 }

+ Here is the caller graph for this function:

size_t GroupByAndAggregate::shard_count_for_top_groups ( const RelAlgExecutionUnit ra_exe_unit,
const Catalog_Namespace::Catalog catalog 
)
static

Definition at line 1921 of file GroupByAndAggregate.cpp.

References Catalog_Namespace::Catalog::getMetadataForTable(), RelAlgExecutionUnit::groupby_exprs, SortInfo::limit, TableDescriptor::nShards, SortInfo::order_entries, and RelAlgExecutionUnit::sort_info.

Referenced by Executor::collectAllDeviceResults(), RelAlgExecutor::executeRelAlgQuerySingleStep(), and initQueryMemoryDescriptor().

1923  {
1924  if (ra_exe_unit.sort_info.order_entries.size() != 1 || !ra_exe_unit.sort_info.limit) {
1925  return 0;
1926  }
1927  for (const auto& group_expr : ra_exe_unit.groupby_exprs) {
1928  const auto grouped_col_expr =
1929  dynamic_cast<const Analyzer::ColumnVar*>(group_expr.get());
1930  if (!grouped_col_expr) {
1931  continue;
1932  }
1933  if (grouped_col_expr->get_table_id() <= 0) {
1934  return 0;
1935  }
1936  const auto td = catalog.getMetadataForTable(grouped_col_expr->get_table_id());
1937  if (td->shardedColumnId == grouped_col_expr->get_column_id()) {
1938  return td->nShards;
1939  }
1940  }
1941  return 0;
1942 }
const std::list< Analyzer::OrderEntry > order_entries
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
const size_t limit
const SortInfo sort_info
const TableDescriptor * getMetadataForTable(const std::string &tableName, const bool populateFragmenter=true) const
Returns a pointer to a const TableDescriptor struct matching the provided tableName.

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

Friends And Related Function Documentation

friend class CodeGenerator
friend

Definition at line 299 of file GroupByAndAggregate.h.

friend class ExecutionKernel
friend

Definition at line 300 of file GroupByAndAggregate.h.

friend class Executor
friend

Definition at line 297 of file GroupByAndAggregate.h.

friend class QueryMemoryDescriptor
friend

Definition at line 298 of file GroupByAndAggregate.h.

friend struct TargetExprCodegen
friend

Definition at line 301 of file GroupByAndAggregate.h.

friend struct TargetExprCodegenBuilder
friend

Definition at line 302 of file GroupByAndAggregate.h.

Member Data Documentation

const ExecutorDeviceType GroupByAndAggregate::device_type_
private
bool GroupByAndAggregate::output_columnar_
private

Definition at line 294 of file GroupByAndAggregate.h.

const std::vector<InputTableInfo>& GroupByAndAggregate::query_infos_
private

Definition at line 292 of file GroupByAndAggregate.h.

Referenced by getExprRangeInfo(), and getKeylessInfo().

std::shared_ptr<RowSetMemoryOwner> GroupByAndAggregate::row_set_mem_owner_
private

Definition at line 293 of file GroupByAndAggregate.h.

Referenced by addTransientStringLiterals().


The documentation for this class was generated from the following files: