OmniSciDB  c07336695a
GroupByAndAggregate Class Reference

#include <GroupByAndAggregate.h>

+ Collaboration diagram for GroupByAndAggregate:

Classes

struct  DiamondCodegen
 

Public Member Functions

 GroupByAndAggregate (Executor *executor, const ExecutorDeviceType device_type, const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, std::shared_ptr< RowSetMemoryOwner >)
 
bool codegen (llvm::Value *filter_result, llvm::BasicBlock *sc_false, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co)
 

Static Public Member Functions

static void addTransientStringLiterals (const RelAlgExecutionUnit &ra_exe_unit, Executor *executor, std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner)
 
static size_t shard_count_for_top_groups (const RelAlgExecutionUnit &ra_exe_unit, const Catalog_Namespace::Catalog &catalog)
 

Private Member Functions

bool supportedTypeForGpuSharedMemUsage (const SQLTypeInfo &target_type_info) const
 
bool gpuCanHandleOrderEntries (const std::list< Analyzer::OrderEntry > &order_entries)
 
std::unique_ptr< QueryMemoryDescriptorinitQueryMemoryDescriptor (const bool allow_multifrag, const size_t max_groups_buffer_entry_count, const int8_t crt_min_byte_width, RenderInfo *render_info, const bool output_columnar_hint)
 
std::unique_ptr< QueryMemoryDescriptorinitQueryMemoryDescriptorImpl (const bool allow_multifrag, const size_t max_groups_buffer_entry_count, const int8_t crt_min_byte_width, const bool sort_on_gpu_hint, RenderInfo *render_info, const bool must_use_baseline_sort, const bool output_columnar_hint)
 
int64_t getShardedTopBucket (const ColRangeInfo &col_range_info, const size_t shard_count) const
 
void addTransientStringLiterals ()
 
CountDistinctDescriptors initCountDistinctDescriptors ()
 
llvm::Value * codegenOutputSlot (llvm::Value *groups_buffer, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, DiamondCodegen &diamond_codegen)
 
std::tuple< llvm::Value *, llvm::Value * > codegenGroupBy (const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, DiamondCodegen &codegen)
 
std::tuple< llvm::Value *, llvm::Value * > codegenSingleColumnPerfectHash (const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, llvm::Value *groups_buffer, llvm::Value *group_expr_lv_translated, llvm::Value *group_expr_lv_original, const int32_t row_size_quad)
 
std::tuple< llvm::Value *, llvm::Value * > codegenMultiColumnPerfectHash (llvm::Value *groups_buffer, llvm::Value *group_key, llvm::Value *key_size_lv, const QueryMemoryDescriptor &query_mem_desc, const int32_t row_size_quad)
 
llvm::Function * codegenPerfectHashFunction ()
 
std::tuple< llvm::Value *, llvm::Value * > codegenMultiColumnBaselineHash (const CompilationOptions &co, llvm::Value *groups_buffer, llvm::Value *group_key, llvm::Value *key_size_lv, const QueryMemoryDescriptor &query_mem_desc, const size_t key_width, const int32_t row_size_quad)
 
ColRangeInfo getColRangeInfo ()
 
ColRangeInfo getExprRangeInfo (const Analyzer::Expr *expr) const
 
KeylessInfo getKeylessInfo (const std::vector< Analyzer::Expr *> &target_expr_list, const bool is_group_by) const
 
llvm::Value * convertNullIfAny (const SQLTypeInfo &arg_type, const TargetInfo &agg_info, llvm::Value *target)
 
bool codegenAggCalls (const std::tuple< llvm::Value *, llvm::Value *> &agg_out_ptr_w_idx, const std::vector< llvm::Value *> &agg_out_vec, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, DiamondCodegen &diamond_codegen)
 
llvm::Value * codegenWindowRowPointer (const Analyzer::WindowFunction *window_func, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, DiamondCodegen &diamond_codegen)
 
llvm::Value * codegenAggColumnPtr (llvm::Value *output_buffer_byte_stream, llvm::Value *out_row_idx, const std::tuple< llvm::Value *, llvm::Value *> &agg_out_ptr_w_idx, const QueryMemoryDescriptor &query_mem_desc, const size_t chosen_bytes, const size_t agg_out_off, const size_t target_idx)
 : returns the pointer to where the aggregation should be stored. More...
 
void codegenEstimator (std::stack< llvm::BasicBlock *> &array_loops, GroupByAndAggregate::DiamondCodegen &diamond_codegen, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &)
 
void codegenCountDistinct (const size_t target_idx, const Analyzer::Expr *target_expr, std::vector< llvm::Value *> &agg_args, const QueryMemoryDescriptor &, const ExecutorDeviceType)
 
llvm::Value * getAdditionalLiteral (const int32_t off)
 
std::vector< llvm::Value * > codegenAggArg (const Analyzer::Expr *target_expr, const CompilationOptions &co)
 
llvm::Value * emitCall (const std::string &fname, const std::vector< llvm::Value *> &args)
 
bool needsUnnestDoublePatch (llvm::Value *val_ptr, const std::string &agg_base_name, const bool threads_share_memory, const CompilationOptions &co) const
 
void prependForceSync ()
 

Static Private Member Functions

static bool supportedExprForGpuSharedMemUsage (Analyzer::Expr *expr)
 
static int64_t getBucketedCardinality (const ColRangeInfo &col_range_info)
 

Private Attributes

Executorexecutor_
 
const RelAlgExecutionUnitra_exe_unit_
 
const std::vector< InputTableInfo > & query_infos_
 
std::shared_ptr< RowSetMemoryOwnerrow_set_mem_owner_
 
bool output_columnar_
 
const ExecutorDeviceType device_type_
 

Friends

class Executor
 
class QueryMemoryDescriptor
 
struct TargetExprCodegen
 
struct TargetExprCodegenBuilder
 

Detailed Description

Definition at line 130 of file GroupByAndAggregate.h.

Constructor & Destructor Documentation

◆ GroupByAndAggregate()

GroupByAndAggregate::GroupByAndAggregate ( Executor executor,
const ExecutorDeviceType  device_type,
const RelAlgExecutionUnit ra_exe_unit,
const std::vector< InputTableInfo > &  query_infos,
std::shared_ptr< RowSetMemoryOwner row_set_mem_owner 
)

Definition at line 234 of file GroupByAndAggregate.cpp.

References RelAlgExecutionUnit::groupby_exprs, kENCODING_DICT, and ra_exe_unit_.

240  : executor_(executor)
241  , ra_exe_unit_(ra_exe_unit)
242  , query_infos_(query_infos)
243  , row_set_mem_owner_(row_set_mem_owner)
244  , device_type_(device_type) {
245  for (const auto groupby_expr : ra_exe_unit_.groupby_exprs) {
246  if (!groupby_expr) {
247  continue;
248  }
249  const auto& groupby_ti = groupby_expr->get_type_info();
250  if (groupby_ti.is_string() && groupby_ti.get_compression() != kENCODING_DICT) {
251  throw std::runtime_error(
252  "Cannot group by string columns which are not dictionary encoded.");
253  }
254  if (groupby_ti.is_array()) {
255  throw std::runtime_error("Group by array not supported");
256  }
257  if (groupby_ti.is_geometry()) {
258  throw std::runtime_error("Group by geometry not supported");
259  }
260  }
261 }
std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner_
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
const std::vector< InputTableInfo > & query_infos_
const ExecutorDeviceType device_type_
const RelAlgExecutionUnit & ra_exe_unit_

Member Function Documentation

◆ addTransientStringLiterals() [1/2]

void GroupByAndAggregate::addTransientStringLiterals ( const RelAlgExecutionUnit ra_exe_unit,
Executor executor,
std::shared_ptr< RowSetMemoryOwner row_set_mem_owner 
)
static

Definition at line 475 of file GroupByAndAggregate.cpp.

References anonymous_namespace{GroupByAndAggregate.cpp}::add_transient_string_literals_for_expression(), RelAlgExecutionUnit::groupby_exprs, kENCODING_DICT, kSAMPLE, and RelAlgExecutionUnit::target_exprs.

478  {
479  for (const auto group_expr : ra_exe_unit.groupby_exprs) {
481  group_expr.get(), executor, row_set_mem_owner);
482  }
483  for (const auto target_expr : ra_exe_unit.target_exprs) {
484  const auto& target_type = target_expr->get_type_info();
485  if (target_type.is_string() && target_type.get_compression() != kENCODING_DICT) {
486  continue;
487  }
488  const auto agg_expr = dynamic_cast<const Analyzer::AggExpr*>(target_expr);
489  if (agg_expr) {
490  if (agg_expr->get_aggtype() == kSAMPLE) {
492  agg_expr->get_arg(), executor, row_set_mem_owner);
493  }
494  } else {
496  target_expr, executor, row_set_mem_owner);
497  }
498  }
499  row_set_mem_owner->addLiteralStringDictProxy(executor->lit_str_dict_proxy_);
500 }
std::vector< Analyzer::Expr * > target_exprs
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
void add_transient_string_literals_for_expression(const Analyzer::Expr *expr, Executor *executor, std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner)
+ Here is the call graph for this function:

◆ addTransientStringLiterals() [2/2]

void GroupByAndAggregate::addTransientStringLiterals ( )
private

Definition at line 409 of file GroupByAndAggregate.cpp.

References executor_, ra_exe_unit_, and row_set_mem_owner_.

Referenced by RelAlgExecutor::executeSort(), RelAlgExecutor::executeWorkUnit(), and initQueryMemoryDescriptorImpl().

409  {
411 }
std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner_
const RelAlgExecutionUnit & ra_exe_unit_
+ Here is the caller graph for this function:

◆ codegen()

bool GroupByAndAggregate::codegen ( llvm::Value *  filter_result,
llvm::BasicBlock *  sc_false,
const QueryMemoryDescriptor query_mem_desc,
const CompilationOptions co 
)

Definition at line 891 of file GroupByAndAggregate.cpp.

References CHECK, codegenAggCalls(), codegenEstimator(), codegenGroupBy(), GroupByAndAggregate::DiamondCodegen::cond_false_, CompilationOptions::device_type_, QueryMemoryDescriptor::didOutputColumnar(), RelAlgExecutionUnit::estimator, GroupByAndAggregate::DiamondCodegen::executor_, anonymous_namespace{GroupByAndAggregate.cpp}::get_agg_count(), get_arg_by_name(), get_int_type(), QueryMemoryDescriptor::getGroupbyColCount(), QueryMemoryDescriptor::getQueryDescriptionType(), GPU, RelAlgExecutionUnit::groupby_exprs, GroupByPerfectHash, RelAlgExecutionUnit::join_quals, LL_BUILDER, LL_CONTEXT, LL_INT, CodeGenerator::posArg(), prependForceSync(), Projection, ra_exe_unit_, ROW_FUNC, RelAlgExecutionUnit::target_exprs, use_streaming_top_n(), and QueryMemoryDescriptor::usesGetGroupValueFast().

Referenced by Executor::compileBody().

894  {
895  CHECK(filter_result);
896 
897  bool can_return_error = false;
898  llvm::BasicBlock* filter_false{nullptr};
899 
900  {
901  const bool is_group_by = !ra_exe_unit_.groupby_exprs.empty();
902 
903  if (executor_->isArchMaxwell(co.device_type_)) {
905  }
906  DiamondCodegen filter_cfg(filter_result,
907  executor_,
908  !is_group_by || query_mem_desc.usesGetGroupValueFast(),
909  "filter",
910  nullptr,
911  false);
912  filter_false = filter_cfg.cond_false_;
913 
914  if (is_group_by) {
916  !use_streaming_top_n(ra_exe_unit_, query_mem_desc.didOutputColumnar())) {
917  const auto crt_matched = get_arg_by_name(ROW_FUNC, "crt_matched");
918  LL_BUILDER.CreateStore(LL_INT(int32_t(1)), crt_matched);
919  auto total_matched_ptr = get_arg_by_name(ROW_FUNC, "total_matched");
920  llvm::Value* old_total_matched_val{nullptr};
922  old_total_matched_val =
923  LL_BUILDER.CreateAtomicRMW(llvm::AtomicRMWInst::Add,
924  total_matched_ptr,
925  LL_INT(int32_t(1)),
926  llvm::AtomicOrdering::Monotonic);
927  } else {
928  old_total_matched_val = LL_BUILDER.CreateLoad(total_matched_ptr);
929  LL_BUILDER.CreateStore(
930  LL_BUILDER.CreateAdd(old_total_matched_val, LL_INT(int32_t(1))),
931  total_matched_ptr);
932  }
933  auto old_total_matched_ptr = get_arg_by_name(ROW_FUNC, "old_total_matched");
934  LL_BUILDER.CreateStore(old_total_matched_val, old_total_matched_ptr);
935  }
936 
937  auto agg_out_ptr_w_idx = codegenGroupBy(query_mem_desc, co, filter_cfg);
938  if (query_mem_desc.usesGetGroupValueFast() ||
939  query_mem_desc.getQueryDescriptionType() ==
941  if (query_mem_desc.getGroupbyColCount() > 1) {
942  filter_cfg.setChainToNext();
943  }
944  // Don't generate null checks if the group slot is guaranteed to be non-null,
945  // as it's the case for get_group_value_fast* family.
946  can_return_error =
947  codegenAggCalls(agg_out_ptr_w_idx, {}, query_mem_desc, co, filter_cfg);
948  } else {
949  {
950  llvm::Value* nullcheck_cond{nullptr};
951  if (query_mem_desc.didOutputColumnar()) {
952  nullcheck_cond = LL_BUILDER.CreateICmpSGE(std::get<1>(agg_out_ptr_w_idx),
953  LL_INT(int32_t(0)));
954  } else {
955  nullcheck_cond = LL_BUILDER.CreateICmpNE(
956  std::get<0>(agg_out_ptr_w_idx),
957  llvm::ConstantPointerNull::get(
958  llvm::PointerType::get(get_int_type(64, LL_CONTEXT), 0)));
959  }
960  DiamondCodegen nullcheck_cfg(
961  nullcheck_cond, executor_, false, "groupby_nullcheck", &filter_cfg, false);
962  codegenAggCalls(agg_out_ptr_w_idx, {}, query_mem_desc, co, filter_cfg);
963  }
964  can_return_error = true;
965  if (query_mem_desc.getQueryDescriptionType() ==
968  // Ignore rejection on pushing current row to top-K heap.
969  LL_BUILDER.CreateRet(LL_INT(int32_t(0)));
970  } else {
971  CodeGenerator code_generator(executor_);
972  LL_BUILDER.CreateRet(LL_BUILDER.CreateNeg(LL_BUILDER.CreateTrunc(
973  // TODO(alex): remove the trunc once pos is converted to 32 bits
974  code_generator.posArg(nullptr),
975  get_int_type(32, LL_CONTEXT))));
976  }
977  }
978  } else {
979  if (ra_exe_unit_.estimator) {
980  std::stack<llvm::BasicBlock*> array_loops;
981  codegenEstimator(array_loops, filter_cfg, query_mem_desc, co);
982  } else {
983  auto arg_it = ROW_FUNC->arg_begin();
984  std::vector<llvm::Value*> agg_out_vec;
985  for (int32_t i = 0; i < get_agg_count(ra_exe_unit_.target_exprs); ++i) {
986  agg_out_vec.push_back(&*arg_it++);
987  }
988  can_return_error = codegenAggCalls(std::make_tuple(nullptr, nullptr),
989  agg_out_vec,
990  query_mem_desc,
991  co,
992  filter_cfg);
993  }
994  }
995  }
996 
997  if (ra_exe_unit_.join_quals.empty()) {
998  executor_->cgen_state_->ir_builder_.CreateRet(LL_INT(int32_t(0)));
999  } else if (sc_false) {
1000  const auto saved_insert_block = LL_BUILDER.GetInsertBlock();
1001  LL_BUILDER.SetInsertPoint(sc_false);
1002  LL_BUILDER.CreateBr(filter_false);
1003  LL_BUILDER.SetInsertPoint(saved_insert_block);
1004  }
1005 
1006  return can_return_error;
1007 }
std::vector< Analyzer::Expr * > target_exprs
bool use_streaming_top_n(const RelAlgExecutionUnit &ra_exe_unit, const bool output_columnar)
#define ROW_FUNC
int32_t get_agg_count(const std::vector< Analyzer::Expr *> &target_exprs)
#define LL_BUILDER
#define LL_CONTEXT
#define LL_INT(v)
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
llvm::Value * get_arg_by_name(llvm::Function *func, const std::string &name)
Definition: Execute.h:114
const JoinQualsPerNestingLevel join_quals
std::tuple< llvm::Value *, llvm::Value * > codegenGroupBy(const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, DiamondCodegen &codegen)
bool codegenAggCalls(const std::tuple< llvm::Value *, llvm::Value *> &agg_out_ptr_w_idx, const std::vector< llvm::Value *> &agg_out_vec, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, DiamondCodegen &diamond_codegen)
const std::shared_ptr< Analyzer::Estimator > estimator
ExecutorDeviceType device_type_
void codegenEstimator(std::stack< llvm::BasicBlock *> &array_loops, GroupByAndAggregate::DiamondCodegen &diamond_codegen, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &)
#define CHECK(condition)
Definition: Logger.h:187
const RelAlgExecutionUnit & ra_exe_unit_
QueryDescriptionType getQueryDescriptionType() const
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ codegenAggArg()

std::vector< llvm::Value * > GroupByAndAggregate::codegenAggArg ( const Analyzer::Expr target_expr,
const CompilationOptions co 
)
private

Definition at line 1725 of file GroupByAndAggregate.cpp.

References CHECK, CHECK_EQ, CodeGenerator::codegen(), GroupByAndAggregate::DiamondCodegen::executor_, get_int_type(), Analyzer::Expr::get_type_info(), kARRAY, kPOINT, kSAMPLE, and CodeGenerator::posArg().

Referenced by TargetExprCodegen::codegen(), and TargetExprCodegenBuilder::codegenMultiSlotSampleExpressions().

1727  {
1728  const auto agg_expr = dynamic_cast<const Analyzer::AggExpr*>(target_expr);
1729  // TODO(alex): handle arrays uniformly?
1730  CodeGenerator code_generator(executor_);
1731  if (target_expr) {
1732  const auto& target_ti = target_expr->get_type_info();
1733  if (target_ti.is_array() && !executor_->plan_state_->isLazyFetchColumn(target_expr)) {
1734  const auto target_lvs =
1735  agg_expr ? code_generator.codegen(agg_expr->get_arg(), true, co)
1736  : code_generator.codegen(
1737  target_expr, !executor_->plan_state_->allow_lazy_fetch_, co);
1738  if (target_ti.isChunkIteratorPackaging()) {
1739  // Something with the chunk transport is code that was generated from a source
1740  // other than an ARRAY[] expression
1741  CHECK_EQ(size_t(1), target_lvs.size());
1742  CHECK(!agg_expr || agg_expr->get_aggtype() == kSAMPLE);
1743  const auto i32_ty = get_int_type(32, executor_->cgen_state_->context_);
1744  const auto i8p_ty =
1745  llvm::PointerType::get(get_int_type(8, executor_->cgen_state_->context_), 0);
1746  const auto& elem_ti = target_ti.get_elem_type();
1747  return {
1748  executor_->cgen_state_->emitExternalCall(
1749  "array_buff",
1750  i8p_ty,
1751  {target_lvs.front(), code_generator.posArg(target_expr)}),
1752  executor_->cgen_state_->emitExternalCall(
1753  "array_size",
1754  i32_ty,
1755  {target_lvs.front(),
1756  code_generator.posArg(target_expr),
1757  executor_->cgen_state_->llInt(log2_bytes(elem_ti.get_logical_size()))})};
1758  } else if (target_ti.isStandardBufferPackaging()) {
1759  if (agg_expr) {
1760  throw std::runtime_error(
1761  "Using array[] operator as argument to an aggregate operator is not "
1762  "supported");
1763  }
1764  return {target_lvs[0], target_lvs[1]};
1765  }
1766  }
1767  if (target_ti.is_geometry() &&
1768  !executor_->plan_state_->isLazyFetchColumn(target_expr)) {
1769  auto generate_coord_lvs =
1770  [&](auto* selected_target_expr,
1771  bool const fetch_columns) -> std::vector<llvm::Value*> {
1772  const auto target_lvs =
1773  code_generator.codegen(selected_target_expr, fetch_columns, co);
1774  CHECK_EQ(static_cast<size_t>(target_ti.get_physical_coord_cols()),
1775  target_lvs.size());
1776 
1777  const auto i32_ty = get_int_type(32, executor_->cgen_state_->context_);
1778  const auto i8p_ty =
1779  llvm::PointerType::get(get_int_type(8, executor_->cgen_state_->context_), 0);
1780  std::vector<llvm::Value*> coords;
1781  size_t ctr = 0;
1782  for (const auto& target_lv : target_lvs) {
1783  // TODO(adb): consider adding a utility to sqltypes so we can get the types of
1784  // the physical coords cols based on the sqltype (e.g. TINYINT for col 0, INT
1785  // for col 1 for pols / mpolys, etc). Hardcoding for now. first array is the
1786  // coords array (TINYINT). Subsequent arrays are regular INT.
1787 
1788  const size_t elem_sz = ctr == 0 ? 1 : 4;
1789  ctr++;
1790  int32_t fixlen = -1;
1791  if (target_ti.get_type() == kPOINT) {
1792  const auto col_var = dynamic_cast<const Analyzer::ColumnVar*>(target_expr);
1793  if (col_var) {
1794  const auto coords_cd = executor_->getPhysicalColumnDescriptor(col_var, 1);
1795  if (coords_cd && coords_cd->columnType.get_type() == kARRAY) {
1796  fixlen = coords_cd->columnType.get_size();
1797  }
1798  }
1799  }
1800  if (fixlen > 0) {
1801  coords.push_back(executor_->cgen_state_->emitExternalCall(
1802  "fast_fixlen_array_buff",
1803  i8p_ty,
1804  {target_lv, code_generator.posArg(selected_target_expr)}));
1805  coords.push_back(executor_->cgen_state_->llInt(int64_t(fixlen)));
1806  continue;
1807  }
1808  coords.push_back(executor_->cgen_state_->emitExternalCall(
1809  "array_buff",
1810  i8p_ty,
1811  {target_lv, code_generator.posArg(selected_target_expr)}));
1812  coords.push_back(executor_->cgen_state_->emitExternalCall(
1813  "array_size",
1814  i32_ty,
1815  {target_lv,
1816  code_generator.posArg(selected_target_expr),
1817  executor_->cgen_state_->llInt(log2_bytes(elem_sz))}));
1818  }
1819  return coords;
1820  };
1821 
1822  if (agg_expr) {
1823  return generate_coord_lvs(agg_expr->get_arg(), true);
1824  } else {
1825  return generate_coord_lvs(target_expr,
1826  !executor_->plan_state_->allow_lazy_fetch_);
1827  }
1828  }
1829  }
1830  return agg_expr ? code_generator.codegen(agg_expr->get_arg(), true, co)
1831  : code_generator.codegen(
1832  target_expr, !executor_->plan_state_->allow_lazy_fetch_, co);
1833 }
#define CHECK_EQ(x, y)
Definition: Logger.h:195
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:77
#define CHECK(condition)
Definition: Logger.h:187
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ codegenAggCalls()

bool GroupByAndAggregate::codegenAggCalls ( const std::tuple< llvm::Value *, llvm::Value *> &  agg_out_ptr_w_idx,
const std::vector< llvm::Value *> &  agg_out_vec,
const QueryMemoryDescriptor query_mem_desc,
const CompilationOptions co,
DiamondCodegen diamond_codegen 
)
private

Definition at line 1480 of file GroupByAndAggregate.cpp.

References CHECK, TargetExprCodegenBuilder::codegen(), QueryMemoryDescriptor::didOutputColumnar(), GroupByAndAggregate::DiamondCodegen::executor_, g_cluster, QueryMemoryDescriptor::getQueryDescriptionType(), LL_BUILDER, LL_CONTEXT, Projection, ra_exe_unit_, and RelAlgExecutionUnit::target_exprs.

Referenced by codegen().

1485  {
1486  auto agg_out_ptr_w_idx = agg_out_ptr_w_idx_in;
1487  // TODO(alex): unify the two cases, the output for non-group by queries
1488  // should be a contiguous buffer
1489  const bool is_group_by{std::get<0>(agg_out_ptr_w_idx)};
1490  bool can_return_error = false;
1491  if (is_group_by) {
1492  CHECK(agg_out_vec.empty());
1493  } else {
1494  CHECK(!agg_out_vec.empty());
1495  }
1496 
1497  // output buffer is casted into a byte stream to be able to handle data elements of
1498  // different sizes (only used when actual column width sizes are used)
1499  llvm::Value* output_buffer_byte_stream{nullptr};
1500  llvm::Value* out_row_idx{nullptr};
1501  if (query_mem_desc.didOutputColumnar() && !g_cluster &&
1503  output_buffer_byte_stream = LL_BUILDER.CreateBitCast(
1504  std::get<0>(agg_out_ptr_w_idx),
1505  llvm::PointerType::get(llvm::Type::getInt8Ty(LL_CONTEXT), 0));
1506  output_buffer_byte_stream->setName("out_buff_b_stream");
1507  CHECK(std::get<1>(agg_out_ptr_w_idx));
1508  out_row_idx = LL_BUILDER.CreateZExt(std::get<1>(agg_out_ptr_w_idx),
1509  llvm::Type::getInt64Ty(LL_CONTEXT));
1510  out_row_idx->setName("out_row_idx");
1511  }
1512 
1513  TargetExprCodegenBuilder target_builder(query_mem_desc, ra_exe_unit_, is_group_by);
1514  for (size_t target_idx = 0; target_idx < ra_exe_unit_.target_exprs.size();
1515  ++target_idx) {
1516  auto target_expr = ra_exe_unit_.target_exprs[target_idx];
1517  CHECK(target_expr);
1518 
1519  target_builder(target_expr, executor_, co);
1520  }
1521 
1522  target_builder.codegen(this,
1523  executor_,
1524  query_mem_desc,
1525  co,
1526  agg_out_ptr_w_idx,
1527  agg_out_vec,
1528  output_buffer_byte_stream,
1529  out_row_idx,
1530  diamond_codegen);
1531 
1532  for (auto target_expr : ra_exe_unit_.target_exprs) {
1533  CHECK(target_expr);
1534  executor_->plan_state_->isLazyFetchColumn(target_expr);
1535  }
1536 
1537  return can_return_error;
1538 }
std::vector< Analyzer::Expr * > target_exprs
#define LL_BUILDER
#define LL_CONTEXT
bool g_cluster
#define CHECK(condition)
Definition: Logger.h:187
const RelAlgExecutionUnit & ra_exe_unit_
QueryDescriptionType getQueryDescriptionType() const
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ codegenAggColumnPtr()

llvm::Value * GroupByAndAggregate::codegenAggColumnPtr ( llvm::Value *  output_buffer_byte_stream,
llvm::Value *  out_row_idx,
const std::tuple< llvm::Value *, llvm::Value *> &  agg_out_ptr_w_idx,
const QueryMemoryDescriptor query_mem_desc,
const size_t  chosen_bytes,
const size_t  agg_out_off,
const size_t  target_idx 
)
private

: returns the pointer to where the aggregation should be stored.

Definition at line 1543 of file GroupByAndAggregate.cpp.

References CHECK, CHECK_EQ, QueryMemoryDescriptor::didOutputColumnar(), g_cluster, get_int_type(), QueryMemoryDescriptor::getColOffInBytes(), QueryMemoryDescriptor::getColOnlyOffInBytes(), QueryMemoryDescriptor::getQueryDescriptionType(), LL_BUILDER, LL_CONTEXT, LL_INT, Projection, and to_string().

Referenced by TargetExprCodegen::codegen(), and TargetExprCodegenBuilder::codegenMultiSlotSampleExpressions().

1550  {
1551  llvm::Value* agg_col_ptr{nullptr};
1552  if (query_mem_desc.didOutputColumnar()) {
1553  // TODO(Saman): remove the second columnar branch, and support all query description
1554  // types through the first branch. Then, input arguments should also be cleaned up
1555  if (!g_cluster &&
1557  CHECK(chosen_bytes == 1 || chosen_bytes == 2 || chosen_bytes == 4 ||
1558  chosen_bytes == 8);
1559  CHECK(output_buffer_byte_stream);
1560  CHECK(out_row_idx);
1561  uint32_t col_off = query_mem_desc.getColOffInBytes(agg_out_off);
1562  // multiplying by chosen_bytes, i.e., << log2(chosen_bytes)
1563  auto out_per_col_byte_idx =
1564  LL_BUILDER.CreateShl(out_row_idx, __builtin_ffs(chosen_bytes) - 1);
1565  auto byte_offset = LL_BUILDER.CreateAdd(out_per_col_byte_idx,
1566  LL_INT(static_cast<int64_t>(col_off)));
1567  byte_offset->setName("out_byte_off_target_" + std::to_string(target_idx));
1568  auto output_ptr = LL_BUILDER.CreateGEP(output_buffer_byte_stream, byte_offset);
1569  agg_col_ptr = LL_BUILDER.CreateBitCast(
1570  output_ptr,
1571  llvm::PointerType::get(get_int_type((chosen_bytes << 3), LL_CONTEXT), 0));
1572  agg_col_ptr->setName("out_ptr_target_" + std::to_string(target_idx));
1573  } else {
1574  uint32_t col_off = query_mem_desc.getColOffInBytes(agg_out_off);
1575  CHECK_EQ(size_t(0), col_off % chosen_bytes);
1576  col_off /= chosen_bytes;
1577  CHECK(std::get<1>(agg_out_ptr_w_idx));
1578  auto offset = LL_BUILDER.CreateAdd(std::get<1>(agg_out_ptr_w_idx), LL_INT(col_off));
1579  agg_col_ptr = LL_BUILDER.CreateGEP(
1580  LL_BUILDER.CreateBitCast(
1581  std::get<0>(agg_out_ptr_w_idx),
1582  llvm::PointerType::get(get_int_type((chosen_bytes << 3), LL_CONTEXT), 0)),
1583  offset);
1584  }
1585  } else {
1586  uint32_t col_off = query_mem_desc.getColOnlyOffInBytes(agg_out_off);
1587  CHECK_EQ(size_t(0), col_off % chosen_bytes);
1588  col_off /= chosen_bytes;
1589  agg_col_ptr = LL_BUILDER.CreateGEP(
1590  LL_BUILDER.CreateBitCast(
1591  std::get<0>(agg_out_ptr_w_idx),
1592  llvm::PointerType::get(get_int_type((chosen_bytes << 3), LL_CONTEXT), 0)),
1593  LL_INT(col_off));
1594  }
1595  CHECK(agg_col_ptr);
1596  return agg_col_ptr;
1597 }
#define CHECK_EQ(x, y)
Definition: Logger.h:195
#define LL_BUILDER
#define LL_CONTEXT
#define LL_INT(v)
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
std::string to_string(char const *&&v)
size_t getColOnlyOffInBytes(const size_t col_idx) const
bool g_cluster
#define CHECK(condition)
Definition: Logger.h:187
size_t getColOffInBytes(const size_t col_idx) const
QueryDescriptionType getQueryDescriptionType() const
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ codegenCountDistinct()

void GroupByAndAggregate::codegenCountDistinct ( const size_t  target_idx,
const Analyzer::Expr target_expr,
std::vector< llvm::Value *> &  agg_args,
const QueryMemoryDescriptor query_mem_desc,
const ExecutorDeviceType  device_type 
)
private

Definition at line 1648 of file GroupByAndAggregate.cpp.

References Bitmap, CHECK, CHECK_EQ, emitCall(), GroupByAndAggregate::DiamondCodegen::executor_, g_bigint_count, get_int_type(), get_target_info(), Analyzer::Expr::get_type_info(), getAdditionalLiteral(), QueryMemoryDescriptor::getCountDistinctDescriptor(), GPU, Invalid, kAPPROX_COUNT_DISTINCT, LL_CONTEXT, and LL_INT.

Referenced by TargetExprCodegen::codegen().

1653  {
1654  const auto agg_info = get_target_info(target_expr, g_bigint_count);
1655  const auto& arg_ti =
1656  static_cast<const Analyzer::AggExpr*>(target_expr)->get_arg()->get_type_info();
1657  if (arg_ti.is_fp()) {
1658  agg_args.back() = executor_->cgen_state_->ir_builder_.CreateBitCast(
1659  agg_args.back(), get_int_type(64, executor_->cgen_state_->context_));
1660  }
1661  const auto& count_distinct_descriptor =
1662  query_mem_desc.getCountDistinctDescriptor(target_idx);
1663  CHECK(count_distinct_descriptor.impl_type_ != CountDistinctImplType::Invalid);
1664  if (agg_info.agg_kind == kAPPROX_COUNT_DISTINCT) {
1665  CHECK(count_distinct_descriptor.impl_type_ == CountDistinctImplType::Bitmap);
1666  agg_args.push_back(LL_INT(int32_t(count_distinct_descriptor.bitmap_sz_bits)));
1667  if (device_type == ExecutorDeviceType::GPU) {
1668  const auto base_dev_addr = getAdditionalLiteral(-1);
1669  const auto base_host_addr = getAdditionalLiteral(-2);
1670  agg_args.push_back(base_dev_addr);
1671  agg_args.push_back(base_host_addr);
1672  emitCall("agg_approximate_count_distinct_gpu", agg_args);
1673  } else {
1674  emitCall("agg_approximate_count_distinct", agg_args);
1675  }
1676  return;
1677  }
1678  std::string agg_fname{"agg_count_distinct"};
1679  if (count_distinct_descriptor.impl_type_ == CountDistinctImplType::Bitmap) {
1680  agg_fname += "_bitmap";
1681  agg_args.push_back(LL_INT(static_cast<int64_t>(count_distinct_descriptor.min_val)));
1682  }
1683  if (agg_info.skip_null_val) {
1684  auto null_lv = executor_->cgen_state_->castToTypeIn(
1685  (arg_ti.is_fp()
1686  ? static_cast<llvm::Value*>(executor_->cgen_state_->inlineFpNull(arg_ti))
1687  : static_cast<llvm::Value*>(executor_->cgen_state_->inlineIntNull(arg_ti))),
1688  64);
1689  null_lv = executor_->cgen_state_->ir_builder_.CreateBitCast(
1690  null_lv, get_int_type(64, executor_->cgen_state_->context_));
1691  agg_fname += "_skip_val";
1692  agg_args.push_back(null_lv);
1693  }
1694  if (device_type == ExecutorDeviceType::GPU) {
1695  CHECK(count_distinct_descriptor.impl_type_ == CountDistinctImplType::Bitmap);
1696  agg_fname += "_gpu";
1697  const auto base_dev_addr = getAdditionalLiteral(-1);
1698  const auto base_host_addr = getAdditionalLiteral(-2);
1699  agg_args.push_back(base_dev_addr);
1700  agg_args.push_back(base_host_addr);
1701  agg_args.push_back(LL_INT(int64_t(count_distinct_descriptor.sub_bitmap_count)));
1702  CHECK_EQ(size_t(0),
1703  count_distinct_descriptor.bitmapPaddedSizeBytes() %
1704  count_distinct_descriptor.sub_bitmap_count);
1705  agg_args.push_back(LL_INT(int64_t(count_distinct_descriptor.bitmapPaddedSizeBytes() /
1706  count_distinct_descriptor.sub_bitmap_count)));
1707  }
1708  if (count_distinct_descriptor.impl_type_ == CountDistinctImplType::Bitmap) {
1709  emitCall(agg_fname, agg_args);
1710  } else {
1711  executor_->cgen_state_->emitExternalCall(
1712  agg_fname, llvm::Type::getVoidTy(LL_CONTEXT), agg_args);
1713  }
1714 }
#define CHECK_EQ(x, y)
Definition: Logger.h:195
llvm::Value * getAdditionalLiteral(const int32_t off)
#define LL_CONTEXT
TargetInfo get_target_info(const PointerType target_expr, const bool bigint_count)
Definition: TargetInfo.h:65
#define LL_INT(v)
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
bool g_bigint_count
llvm::Value * emitCall(const std::string &fname, const std::vector< llvm::Value *> &args)
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:77
#define CHECK(condition)
Definition: Logger.h:187
const CountDistinctDescriptor & getCountDistinctDescriptor(const size_t idx) const
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ codegenEstimator()

void GroupByAndAggregate::codegenEstimator ( std::stack< llvm::BasicBlock *> &  array_loops,
GroupByAndAggregate::DiamondCodegen diamond_codegen,
const QueryMemoryDescriptor query_mem_desc,
const CompilationOptions co 
)
private

Definition at line 1599 of file GroupByAndAggregate.cpp.

References CHECK, emitCall(), RelAlgExecutionUnit::estimator, GroupByAndAggregate::DiamondCodegen::executor_, get_int_type(), QueryMemoryDescriptor::getEffectiveKeyWidth(), LL_BUILDER, LL_CONTEXT, LL_INT, ra_exe_unit_, and ROW_FUNC.

Referenced by codegen().

1603  {
1604  const auto& estimator_arg = ra_exe_unit_.estimator->getArgument();
1605  auto estimator_comp_count_lv = LL_INT(static_cast<int32_t>(estimator_arg.size()));
1606  auto estimator_key_lv = LL_BUILDER.CreateAlloca(llvm::Type::getInt64Ty(LL_CONTEXT),
1607  estimator_comp_count_lv);
1608  int32_t subkey_idx = 0;
1609  for (const auto estimator_arg_comp : estimator_arg) {
1610  const auto estimator_arg_comp_lvs =
1611  executor_->groupByColumnCodegen(estimator_arg_comp.get(),
1612  query_mem_desc.getEffectiveKeyWidth(),
1613  co,
1614  false,
1615  0,
1616  diamond_codegen,
1617  array_loops,
1618  true);
1619  CHECK(!estimator_arg_comp_lvs.original_value);
1620  const auto estimator_arg_comp_lv = estimator_arg_comp_lvs.translated_value;
1621  // store the sub-key to the buffer
1622  LL_BUILDER.CreateStore(estimator_arg_comp_lv,
1623  LL_BUILDER.CreateGEP(estimator_key_lv, LL_INT(subkey_idx++)));
1624  }
1625  const auto int8_ptr_ty = llvm::PointerType::get(get_int_type(8, LL_CONTEXT), 0);
1626  const auto bitmap = LL_BUILDER.CreateBitCast(&*ROW_FUNC->arg_begin(), int8_ptr_ty);
1627  const auto key_bytes = LL_BUILDER.CreateBitCast(estimator_key_lv, int8_ptr_ty);
1628  const auto estimator_comp_bytes_lv =
1629  LL_INT(static_cast<int32_t>(estimator_arg.size() * sizeof(int64_t)));
1630  const auto bitmap_size_lv =
1631  LL_INT(static_cast<uint32_t>(ra_exe_unit_.estimator->getBufferSize()));
1632  emitCall(ra_exe_unit_.estimator->getRuntimeFunctionName(),
1633  {bitmap, &*bitmap_size_lv, key_bytes, &*estimator_comp_bytes_lv});
1634 }
#define ROW_FUNC
#define LL_BUILDER
#define LL_CONTEXT
#define LL_INT(v)
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
const std::shared_ptr< Analyzer::Estimator > estimator
llvm::Value * emitCall(const std::string &fname, const std::vector< llvm::Value *> &args)
#define CHECK(condition)
Definition: Logger.h:187
const RelAlgExecutionUnit & ra_exe_unit_
size_t getEffectiveKeyWidth() const
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ codegenGroupBy()

std::tuple< llvm::Value *, llvm::Value * > GroupByAndAggregate::codegenGroupBy ( const QueryMemoryDescriptor query_mem_desc,
const CompilationOptions co,
DiamondCodegen codegen 
)
private

Definition at line 1100 of file GroupByAndAggregate.cpp.

References CHECK, CHECK_EQ, codegenMultiColumnBaselineHash(), codegenMultiColumnPerfectHash(), codegenOutputSlot(), codegenSingleColumnPerfectHash(), QueryMemoryDescriptor::didOutputColumnar(), GroupByAndAggregate::DiamondCodegen::executor_, QueryMemoryDescriptor::getBucket(), QueryMemoryDescriptor::getEffectiveKeyWidth(), getExprRangeInfo(), QueryMemoryDescriptor::getGroupbyColCount(), QueryMemoryDescriptor::getMaxVal(), QueryMemoryDescriptor::getQueryDescriptionType(), QueryMemoryDescriptor::getRowSize(), RelAlgExecutionUnit::groupby_exprs, GroupByBaselineHash, GroupByPerfectHash, QueryMemoryDescriptor::groupColWidthsSize(), QueryMemoryDescriptor::hasNulls(), QueryMemoryDescriptor::isSingleColumnGroupByWithPerfectHash(), LL_BUILDER, LL_CONTEXT, LL_INT, Projection, ra_exe_unit_, ROW_FUNC, and QueryMemoryDescriptor::threadsShareMemory().

Referenced by codegen().

1103  {
1104  auto arg_it = ROW_FUNC->arg_begin();
1105  auto groups_buffer = arg_it++;
1106 
1107  std::stack<llvm::BasicBlock*> array_loops;
1108 
1109  // TODO(Saman): move this logic outside of this function.
1111  if (query_mem_desc.didOutputColumnar()) {
1112  return std::make_tuple(
1113  &*groups_buffer,
1114  codegenOutputSlot(&*groups_buffer, query_mem_desc, co, diamond_codegen));
1115  } else {
1116  return std::make_tuple(
1117  codegenOutputSlot(&*groups_buffer, query_mem_desc, co, diamond_codegen),
1118  nullptr);
1119  }
1120  }
1121 
1122  CHECK(query_mem_desc.getQueryDescriptionType() ==
1124  query_mem_desc.getQueryDescriptionType() ==
1126 
1127  const int32_t row_size_quad = query_mem_desc.didOutputColumnar()
1128  ? 0
1129  : query_mem_desc.getRowSize() / sizeof(int64_t);
1130 
1131  const auto col_width_size = query_mem_desc.isSingleColumnGroupByWithPerfectHash()
1132  ? sizeof(int64_t)
1133  : query_mem_desc.getEffectiveKeyWidth();
1134  // for multi-column group by
1135  llvm::Value* group_key = nullptr;
1136  llvm::Value* key_size_lv = nullptr;
1137 
1138  if (!query_mem_desc.isSingleColumnGroupByWithPerfectHash()) {
1139  key_size_lv = LL_INT(static_cast<int32_t>(query_mem_desc.groupColWidthsSize()));
1140  if (query_mem_desc.getQueryDescriptionType() ==
1142  group_key =
1143  LL_BUILDER.CreateAlloca(llvm::Type::getInt64Ty(LL_CONTEXT), key_size_lv);
1144  } else if (query_mem_desc.getQueryDescriptionType() ==
1146  group_key =
1147  col_width_size == sizeof(int32_t)
1148  ? LL_BUILDER.CreateAlloca(llvm::Type::getInt32Ty(LL_CONTEXT), key_size_lv)
1149  : LL_BUILDER.CreateAlloca(llvm::Type::getInt64Ty(LL_CONTEXT), key_size_lv);
1150  }
1151  CHECK(group_key);
1152  CHECK(key_size_lv);
1153  }
1154 
1155  int32_t subkey_idx = 0;
1156  CHECK(query_mem_desc.getGroupbyColCount() == ra_exe_unit_.groupby_exprs.size());
1157  for (const auto group_expr : ra_exe_unit_.groupby_exprs) {
1158  const auto col_range_info = getExprRangeInfo(group_expr.get());
1159  const auto translated_null_value = static_cast<int64_t>(
1160  query_mem_desc.isSingleColumnGroupByWithPerfectHash()
1161  ? checked_int64_t(query_mem_desc.getMaxVal()) +
1162  (query_mem_desc.getBucket() ? query_mem_desc.getBucket() : 1)
1163  : checked_int64_t(col_range_info.max) +
1164  (col_range_info.bucket ? col_range_info.bucket : 1));
1165 
1166  const bool col_has_nulls =
1167  query_mem_desc.getQueryDescriptionType() ==
1169  ? (query_mem_desc.isSingleColumnGroupByWithPerfectHash()
1170  ? query_mem_desc.hasNulls()
1171  : col_range_info.has_nulls)
1172  : false;
1173 
1174  const auto group_expr_lvs =
1175  executor_->groupByColumnCodegen(group_expr.get(),
1176  col_width_size,
1177  co,
1178  col_has_nulls,
1179  translated_null_value,
1180  diamond_codegen,
1181  array_loops,
1182  query_mem_desc.threadsShareMemory());
1183  const auto group_expr_lv = group_expr_lvs.translated_value;
1184  if (query_mem_desc.isSingleColumnGroupByWithPerfectHash()) {
1185  CHECK_EQ(size_t(1), ra_exe_unit_.groupby_exprs.size());
1186  return codegenSingleColumnPerfectHash(query_mem_desc,
1187  co,
1188  &*groups_buffer,
1189  group_expr_lv,
1190  group_expr_lvs.original_value,
1191  row_size_quad);
1192  } else {
1193  // store the sub-key to the buffer
1194  LL_BUILDER.CreateStore(group_expr_lv,
1195  LL_BUILDER.CreateGEP(group_key, LL_INT(subkey_idx++)));
1196  }
1197  }
1198  if (query_mem_desc.getQueryDescriptionType() ==
1200  CHECK(ra_exe_unit_.groupby_exprs.size() != 1);
1202  &*groups_buffer, group_key, key_size_lv, query_mem_desc, row_size_quad);
1203  } else if (query_mem_desc.getQueryDescriptionType() ==
1206  &*groups_buffer,
1207  group_key,
1208  key_size_lv,
1209  query_mem_desc,
1210  col_width_size,
1211  row_size_quad);
1212  }
1213  CHECK(false);
1214  return std::make_tuple(nullptr, nullptr);
1215 }
#define CHECK_EQ(x, y)
Definition: Logger.h:195
#define ROW_FUNC
#define LL_BUILDER
#define LL_CONTEXT
#define LL_INT(v)
ColRangeInfo getExprRangeInfo(const Analyzer::Expr *expr) const
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
std::tuple< llvm::Value *, llvm::Value * > codegenMultiColumnBaselineHash(const CompilationOptions &co, llvm::Value *groups_buffer, llvm::Value *group_key, llvm::Value *key_size_lv, const QueryMemoryDescriptor &query_mem_desc, const size_t key_width, const int32_t row_size_quad)
std::tuple< llvm::Value *, llvm::Value * > codegenSingleColumnPerfectHash(const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, llvm::Value *groups_buffer, llvm::Value *group_expr_lv_translated, llvm::Value *group_expr_lv_original, const int32_t row_size_quad)
boost::multiprecision::number< boost::multiprecision::cpp_int_backend< 64, 64, boost::multiprecision::signed_magnitude, boost::multiprecision::checked, void > > checked_int64_t
std::tuple< llvm::Value *, llvm::Value * > codegenMultiColumnPerfectHash(llvm::Value *groups_buffer, llvm::Value *group_key, llvm::Value *key_size_lv, const QueryMemoryDescriptor &query_mem_desc, const int32_t row_size_quad)
llvm::Value * codegenOutputSlot(llvm::Value *groups_buffer, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, DiamondCodegen &diamond_codegen)
#define CHECK(condition)
Definition: Logger.h:187
const RelAlgExecutionUnit & ra_exe_unit_
QueryDescriptionType getQueryDescriptionType() const
bool isSingleColumnGroupByWithPerfectHash() const
size_t getEffectiveKeyWidth() const
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ codegenMultiColumnBaselineHash()

std::tuple< llvm::Value *, llvm::Value * > GroupByAndAggregate::codegenMultiColumnBaselineHash ( const CompilationOptions co,
llvm::Value *  groups_buffer,
llvm::Value *  group_key,
llvm::Value *  key_size_lv,
const QueryMemoryDescriptor query_mem_desc,
const size_t  key_width,
const int32_t  row_size_quad 
)
private

Definition at line 1301 of file GroupByAndAggregate.cpp.

References CHECK, QueryMemoryDescriptor::didOutputColumnar(), emitCall(), QueryMemoryDescriptor::getEntryCount(), LL_BUILDER, LL_CONTEXT, LL_INT, ROW_FUNC, and CompilationOptions::with_dynamic_watchdog_.

Referenced by codegenGroupBy().

1308  {
1309  auto arg_it = ROW_FUNC->arg_begin(); // groups_buffer
1310  ++arg_it; // current match count
1311  ++arg_it; // total match count
1312  ++arg_it; // old match count
1313  ++arg_it; // output buffer slots count
1314  ++arg_it; // aggregate init values
1315  CHECK(arg_it->getName() == "agg_init_val");
1316  if (group_key->getType() != llvm::Type::getInt64PtrTy(LL_CONTEXT)) {
1317  CHECK(key_width == sizeof(int32_t));
1318  group_key =
1319  LL_BUILDER.CreatePointerCast(group_key, llvm::Type::getInt64PtrTy(LL_CONTEXT));
1320  }
1321  std::vector<llvm::Value*> func_args{
1322  groups_buffer,
1323  LL_INT(static_cast<int32_t>(query_mem_desc.getEntryCount())),
1324  &*group_key,
1325  &*key_size_lv,
1326  LL_INT(static_cast<int32_t>(key_width))};
1327  std::string func_name{"get_group_value"};
1328  if (query_mem_desc.didOutputColumnar()) {
1329  func_name += "_columnar_slot";
1330  } else {
1331  func_args.push_back(LL_INT(row_size_quad));
1332  func_args.push_back(&*arg_it);
1333  }
1334  if (co.with_dynamic_watchdog_) {
1335  func_name += "_with_watchdog";
1336  }
1337  if (query_mem_desc.didOutputColumnar()) {
1338  return std::make_tuple(groups_buffer, emitCall(func_name, func_args));
1339  } else {
1340  return std::make_tuple(emitCall(func_name, func_args), nullptr);
1341  }
1342 }
#define ROW_FUNC
#define LL_BUILDER
#define LL_CONTEXT
#define LL_INT(v)
llvm::Value * emitCall(const std::string &fname, const std::vector< llvm::Value *> &args)
#define CHECK(condition)
Definition: Logger.h:187
const bool with_dynamic_watchdog_
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ codegenMultiColumnPerfectHash()

std::tuple< llvm::Value *, llvm::Value * > GroupByAndAggregate::codegenMultiColumnPerfectHash ( llvm::Value *  groups_buffer,
llvm::Value *  group_key,
llvm::Value *  key_size_lv,
const QueryMemoryDescriptor query_mem_desc,
const int32_t  row_size_quad 
)
private

Definition at line 1267 of file GroupByAndAggregate.cpp.

References CHECK, codegenPerfectHashFunction(), QueryMemoryDescriptor::didOutputColumnar(), emitCall(), get_int_type(), QueryMemoryDescriptor::getEntryCount(), QueryMemoryDescriptor::getQueryDescriptionType(), GroupByPerfectHash, LL_BUILDER, LL_CONTEXT, and LL_INT.

Referenced by codegenGroupBy().

1272  {
1273  CHECK(query_mem_desc.getQueryDescriptionType() ==
1275  // compute the index (perfect hash)
1276  auto perfect_hash_func = codegenPerfectHashFunction();
1277  auto hash_lv =
1278  LL_BUILDER.CreateCall(perfect_hash_func, std::vector<llvm::Value*>{group_key});
1279 
1280  if (query_mem_desc.didOutputColumnar()) {
1281  const std::string set_matching_func_name{
1282  "set_matching_group_value_perfect_hash_columnar"};
1283  const std::vector<llvm::Value*> set_matching_func_arg{
1284  groups_buffer,
1285  hash_lv,
1286  group_key,
1287  key_size_lv,
1288  llvm::ConstantInt::get(get_int_type(32, LL_CONTEXT),
1289  query_mem_desc.getEntryCount())};
1290  emitCall(set_matching_func_name, set_matching_func_arg);
1291  return std::make_tuple(groups_buffer, hash_lv);
1292  } else {
1293  return std::make_tuple(
1294  emitCall("get_matching_group_value_perfect_hash",
1295  {groups_buffer, hash_lv, group_key, key_size_lv, LL_INT(row_size_quad)}),
1296  nullptr);
1297  }
1298 }
#define LL_BUILDER
#define LL_CONTEXT
#define LL_INT(v)
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
llvm::Value * emitCall(const std::string &fname, const std::vector< llvm::Value *> &args)
llvm::Function * codegenPerfectHashFunction()
#define CHECK(condition)
Definition: Logger.h:187
QueryDescriptionType getQueryDescriptionType() const
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ codegenOutputSlot()

llvm::Value * GroupByAndAggregate::codegenOutputSlot ( llvm::Value *  groups_buffer,
const QueryMemoryDescriptor query_mem_desc,
const CompilationOptions co,
DiamondCodegen diamond_codegen 
)
private

Definition at line 1009 of file GroupByAndAggregate.cpp.

References run-benchmark-import::args, CHECK, CHECK_EQ, CHECK_GE, CHECK_LT, CodeGenerator::codegen(), QueryMemoryDescriptor::didOutputColumnar(), emitCall(), GroupByAndAggregate::DiamondCodegen::executor_, get_arg_by_name(), get_heap_key_slot_index(), QueryMemoryDescriptor::getColOffInBytes(), QueryMemoryDescriptor::getEntryCount(), QueryMemoryDescriptor::getPaddedSlotWidthBytes(), QueryMemoryDescriptor::getQueryDescriptionType(), QueryMemoryDescriptor::getRowSize(), RelAlgExecutionUnit::groupby_exprs, inline_fp_null_val(), inline_int_null_val(), SortInfo::limit, LL_BOOL, LL_BUILDER, LL_FP, LL_INT, SortInfo::offset, SortInfo::order_entries, CodeGenerator::posArg(), Projection, ra_exe_unit_, ROW_FUNC, RelAlgExecutionUnit::sort_info, RelAlgExecutionUnit::target_exprs, to_string(), RelAlgExecutionUnit::use_bump_allocator, and use_streaming_top_n().

Referenced by codegenGroupBy(), and codegenWindowRowPointer().

1013  {
1015  CHECK_EQ(size_t(1), ra_exe_unit_.groupby_exprs.size());
1016  const auto group_expr = ra_exe_unit_.groupby_exprs.front();
1017  CHECK(!group_expr);
1018  if (!query_mem_desc.didOutputColumnar()) {
1019  CHECK_EQ(size_t(0), query_mem_desc.getRowSize() % sizeof(int64_t));
1020  }
1021  const int32_t row_size_quad = query_mem_desc.didOutputColumnar()
1022  ? 0
1023  : query_mem_desc.getRowSize() / sizeof(int64_t);
1024  CodeGenerator code_generator(executor_);
1025  if (use_streaming_top_n(ra_exe_unit_, query_mem_desc.didOutputColumnar())) {
1026  const auto& only_order_entry = ra_exe_unit_.sort_info.order_entries.front();
1027  CHECK_GE(only_order_entry.tle_no, int(1));
1028  const size_t target_idx = only_order_entry.tle_no - 1;
1029  CHECK_LT(target_idx, ra_exe_unit_.target_exprs.size());
1030  const auto order_entry_expr = ra_exe_unit_.target_exprs[target_idx];
1031  const auto chosen_bytes =
1032  static_cast<size_t>(query_mem_desc.getPaddedSlotWidthBytes(target_idx));
1033  auto order_entry_lv = executor_->cgen_state_->castToTypeIn(
1034  code_generator.codegen(order_entry_expr, true, co).front(), chosen_bytes * 8);
1036  std::string fname = "get_bin_from_k_heap";
1037  const auto& oe_ti = order_entry_expr->get_type_info();
1038  llvm::Value* null_key_lv = nullptr;
1039  if (oe_ti.is_integer() || oe_ti.is_decimal() || oe_ti.is_time()) {
1040  const size_t bit_width = order_entry_lv->getType()->getIntegerBitWidth();
1041  switch (bit_width) {
1042  case 32:
1043  null_key_lv = LL_INT(static_cast<int32_t>(inline_int_null_val(oe_ti)));
1044  break;
1045  case 64:
1046  null_key_lv = LL_INT(static_cast<int64_t>(inline_int_null_val(oe_ti)));
1047  break;
1048  default:
1049  CHECK(false);
1050  }
1051  fname += "_int" + std::to_string(bit_width) + "_t";
1052  } else {
1053  CHECK(oe_ti.is_fp());
1054  if (order_entry_lv->getType()->isDoubleTy()) {
1055  null_key_lv = LL_FP(static_cast<double>(inline_fp_null_val(oe_ti)));
1056  } else {
1057  null_key_lv = LL_FP(static_cast<float>(inline_fp_null_val(oe_ti)));
1058  }
1059  fname += order_entry_lv->getType()->isDoubleTy() ? "_double" : "_float";
1060  }
1061  const auto key_slot_idx =
1063  return emitCall(
1064  fname,
1065  {groups_buffer,
1066  LL_INT(n),
1067  LL_INT(row_size_quad),
1068  LL_INT(static_cast<uint32_t>(query_mem_desc.getColOffInBytes(key_slot_idx))),
1069  LL_BOOL(only_order_entry.is_desc),
1070  LL_BOOL(!order_entry_expr->get_type_info().get_notnull()),
1071  LL_BOOL(only_order_entry.nulls_first),
1072  null_key_lv,
1073  order_entry_lv});
1074  } else {
1075  llvm::Value* output_buffer_entry_count_lv{nullptr};
1077  output_buffer_entry_count_lv =
1078  LL_BUILDER.CreateLoad(get_arg_by_name(ROW_FUNC, "max_matched"));
1079  CHECK(output_buffer_entry_count_lv);
1080  }
1081  const auto group_expr_lv =
1082  LL_BUILDER.CreateLoad(get_arg_by_name(ROW_FUNC, "old_total_matched"));
1083  std::vector<llvm::Value*> args{
1084  groups_buffer,
1085  output_buffer_entry_count_lv
1086  ? output_buffer_entry_count_lv
1087  : LL_INT(static_cast<int32_t>(query_mem_desc.getEntryCount())),
1088  group_expr_lv,
1089  code_generator.posArg(nullptr)};
1090  if (query_mem_desc.didOutputColumnar()) {
1091  const auto columnar_output_offset =
1092  emitCall("get_columnar_scan_output_offset", args);
1093  return columnar_output_offset;
1094  }
1095  args.push_back(LL_INT(row_size_quad));
1096  return emitCall("get_scan_output_slot", args);
1097  }
1098 }
std::vector< Analyzer::Expr * > target_exprs
#define CHECK_EQ(x, y)
Definition: Logger.h:195
bool use_streaming_top_n(const RelAlgExecutionUnit &ra_exe_unit, const bool output_columnar)
#define ROW_FUNC
#define LL_BUILDER
const std::list< Analyzer::OrderEntry > order_entries
#define LL_INT(v)
#define CHECK_GE(x, y)
Definition: Logger.h:200
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
double inline_fp_null_val(const SQL_TYPE_INFO &ti)
std::string to_string(char const *&&v)
const int8_t getPaddedSlotWidthBytes(const size_t slot_idx) const
#define LL_BOOL(v)
const size_t limit
llvm::Value * get_arg_by_name(llvm::Function *func, const std::string &name)
Definition: Execute.h:114
const SortInfo sort_info
#define LL_FP(v)
size_t get_heap_key_slot_index(const std::vector< Analyzer::Expr *> &target_exprs, const size_t target_idx)
#define CHECK_LT(x, y)
Definition: Logger.h:197
llvm::Value * emitCall(const std::string &fname, const std::vector< llvm::Value *> &args)
#define CHECK(condition)
Definition: Logger.h:187
int64_t inline_int_null_val(const SQL_TYPE_INFO &ti)
size_t getColOffInBytes(const size_t col_idx) const
const RelAlgExecutionUnit & ra_exe_unit_
const size_t offset
QueryDescriptionType getQueryDescriptionType() const
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ codegenPerfectHashFunction()

llvm::Function * GroupByAndAggregate::codegenPerfectHashFunction ( )
private

Definition at line 1344 of file GroupByAndAggregate.cpp.

References CHECK, CHECK_GT, GroupByAndAggregate::DiamondCodegen::executor_, get_int_type(), getBucketedCardinality(), getExprRangeInfo(), RelAlgExecutionUnit::groupby_exprs, GroupByPerfectHash, LL_CONTEXT, LL_INT, mark_function_always_inline(), and ra_exe_unit_.

Referenced by codegenMultiColumnPerfectHash().

1344  {
1345  CHECK_GT(ra_exe_unit_.groupby_exprs.size(), size_t(1));
1346  auto ft = llvm::FunctionType::get(
1347  get_int_type(32, LL_CONTEXT),
1348  std::vector<llvm::Type*>{llvm::PointerType::get(get_int_type(64, LL_CONTEXT), 0)},
1349  false);
1350  auto key_hash_func = llvm::Function::Create(ft,
1351  llvm::Function::ExternalLinkage,
1352  "perfect_key_hash",
1353  executor_->cgen_state_->module_);
1354  executor_->cgen_state_->helper_functions_.push_back(key_hash_func);
1355  mark_function_always_inline(key_hash_func);
1356  auto& key_buff_arg = *key_hash_func->args().begin();
1357  llvm::Value* key_buff_lv = &key_buff_arg;
1358  auto bb = llvm::BasicBlock::Create(LL_CONTEXT, "entry", key_hash_func);
1359  llvm::IRBuilder<> key_hash_func_builder(bb);
1360  llvm::Value* hash_lv{llvm::ConstantInt::get(get_int_type(64, LL_CONTEXT), 0)};
1361  std::vector<int64_t> cardinalities;
1362  for (const auto groupby_expr : ra_exe_unit_.groupby_exprs) {
1363  auto col_range_info = getExprRangeInfo(groupby_expr.get());
1364  CHECK(col_range_info.hash_type_ == QueryDescriptionType::GroupByPerfectHash);
1365  cardinalities.push_back(getBucketedCardinality(col_range_info));
1366  }
1367  size_t dim_idx = 0;
1368  for (const auto groupby_expr : ra_exe_unit_.groupby_exprs) {
1369  auto key_comp_lv = key_hash_func_builder.CreateLoad(
1370  key_hash_func_builder.CreateGEP(key_buff_lv, LL_INT(dim_idx)));
1371  auto col_range_info = getExprRangeInfo(groupby_expr.get());
1372  auto crt_term_lv =
1373  key_hash_func_builder.CreateSub(key_comp_lv, LL_INT(col_range_info.min));
1374  if (col_range_info.bucket) {
1375  crt_term_lv =
1376  key_hash_func_builder.CreateSDiv(crt_term_lv, LL_INT(col_range_info.bucket));
1377  }
1378  for (size_t prev_dim_idx = 0; prev_dim_idx < dim_idx; ++prev_dim_idx) {
1379  crt_term_lv = key_hash_func_builder.CreateMul(crt_term_lv,
1380  LL_INT(cardinalities[prev_dim_idx]));
1381  }
1382  hash_lv = key_hash_func_builder.CreateAdd(hash_lv, crt_term_lv);
1383  ++dim_idx;
1384  }
1385  key_hash_func_builder.CreateRet(
1386  key_hash_func_builder.CreateTrunc(hash_lv, get_int_type(32, LL_CONTEXT)));
1387  return key_hash_func;
1388 }
static int64_t getBucketedCardinality(const ColRangeInfo &col_range_info)
#define LL_CONTEXT
void mark_function_always_inline(llvm::Function *func)
#define LL_INT(v)
ColRangeInfo getExprRangeInfo(const Analyzer::Expr *expr) const
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
#define CHECK_GT(x, y)
Definition: Logger.h:199
#define CHECK(condition)
Definition: Logger.h:187
const RelAlgExecutionUnit & ra_exe_unit_
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ codegenSingleColumnPerfectHash()

std::tuple< llvm::Value *, llvm::Value * > GroupByAndAggregate::codegenSingleColumnPerfectHash ( const QueryMemoryDescriptor query_mem_desc,
const CompilationOptions co,
llvm::Value *  groups_buffer,
llvm::Value *  group_expr_lv_translated,
llvm::Value *  group_expr_lv_original,
const int32_t  row_size_quad 
)
private

Definition at line 1218 of file GroupByAndAggregate.cpp.

References CHECK, CompilationOptions::device_type_, QueryMemoryDescriptor::didOutputColumnar(), emitCall(), GroupByAndAggregate::DiamondCodegen::executor_, QueryMemoryDescriptor::getBucket(), QueryMemoryDescriptor::getMinVal(), QueryMemoryDescriptor::hasKeylessHash(), QueryMemoryDescriptor::interleavedBins(), LL_INT, QueryMemoryDescriptor::mustUseBaselineSort(), and QueryMemoryDescriptor::usesGetGroupValueFast().

Referenced by codegenGroupBy().

1224  {
1225  CHECK(query_mem_desc.usesGetGroupValueFast());
1226  std::string get_group_fn_name{query_mem_desc.didOutputColumnar()
1227  ? "get_columnar_group_bin_offset"
1228  : "get_group_value_fast"};
1229  if (!query_mem_desc.didOutputColumnar() && query_mem_desc.hasKeylessHash()) {
1230  get_group_fn_name += "_keyless";
1231  }
1232  if (query_mem_desc.interleavedBins(co.device_type_)) {
1233  CHECK(!query_mem_desc.didOutputColumnar());
1234  CHECK(query_mem_desc.hasKeylessHash());
1235  get_group_fn_name += "_semiprivate";
1236  }
1237  std::vector<llvm::Value*> get_group_fn_args{&*groups_buffer,
1238  &*group_expr_lv_translated};
1239  if (group_expr_lv_original && get_group_fn_name == "get_group_value_fast" &&
1240  query_mem_desc.mustUseBaselineSort()) {
1241  get_group_fn_name += "_with_original_key";
1242  get_group_fn_args.push_back(group_expr_lv_original);
1243  }
1244  get_group_fn_args.push_back(LL_INT(query_mem_desc.getMinVal()));
1245  get_group_fn_args.push_back(LL_INT(query_mem_desc.getBucket()));
1246  if (!query_mem_desc.hasKeylessHash()) {
1247  if (!query_mem_desc.didOutputColumnar()) {
1248  get_group_fn_args.push_back(LL_INT(row_size_quad));
1249  }
1250  } else {
1251  if (!query_mem_desc.didOutputColumnar()) {
1252  get_group_fn_args.push_back(LL_INT(row_size_quad));
1253  }
1254  if (query_mem_desc.interleavedBins(co.device_type_)) {
1255  auto warp_idx = emitCall("thread_warp_idx", {LL_INT(executor_->warpSize())});
1256  get_group_fn_args.push_back(warp_idx);
1257  get_group_fn_args.push_back(LL_INT(executor_->warpSize()));
1258  }
1259  }
1260  if (get_group_fn_name == "get_columnar_group_bin_offset") {
1261  return std::make_tuple(&*groups_buffer,
1262  emitCall(get_group_fn_name, get_group_fn_args));
1263  }
1264  return std::make_tuple(emitCall(get_group_fn_name, get_group_fn_args), nullptr);
1265 }
#define LL_INT(v)
bool interleavedBins(const ExecutorDeviceType) const
ExecutorDeviceType device_type_
llvm::Value * emitCall(const std::string &fname, const std::vector< llvm::Value *> &args)
#define CHECK(condition)
Definition: Logger.h:187
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ codegenWindowRowPointer()

llvm::Value * GroupByAndAggregate::codegenWindowRowPointer ( const Analyzer::WindowFunction window_func,
const QueryMemoryDescriptor query_mem_desc,
const CompilationOptions co,
DiamondCodegen diamond_codegen 
)
private

Definition at line 1440 of file GroupByAndAggregate.cpp.

References run-benchmark-import::args, CHECK, codegenOutputSlot(), COUNT, QueryMemoryDescriptor::didOutputColumnar(), emitCall(), GroupByAndAggregate::DiamondCodegen::executor_, get_int_type(), WindowProjectNodeContext::getActiveWindowFunctionContext(), QueryMemoryDescriptor::getEntryCount(), Analyzer::WindowFunction::getKind(), QueryMemoryDescriptor::getRowSize(), LL_BUILDER, LL_CONTEXT, LL_INT, CodeGenerator::posArg(), ROW_FUNC, and window_function_is_aggregate().

Referenced by TargetExprCodegen::codegen().

1444  {
1445  const auto window_func_context =
1447  if (window_func_context && window_function_is_aggregate(window_func->getKind())) {
1448  const int32_t row_size_quad = query_mem_desc.didOutputColumnar()
1449  ? 0
1450  : query_mem_desc.getRowSize() / sizeof(int64_t);
1451  auto arg_it = ROW_FUNC->arg_begin();
1452  auto groups_buffer = arg_it++;
1453  CodeGenerator code_generator(executor_);
1454  if (!window_func_context->getRowNumber()) {
1455  CHECK(window_func->getKind() == SqlWindowFunctionKind::COUNT);
1456  window_func_context->setRowNumber(emitCall(
1457  "row_number_window_func",
1458  {LL_INT(reinterpret_cast<const int64_t>(window_func_context->output())),
1459  code_generator.posArg(nullptr)}));
1460  }
1461  const auto pos_in_window = LL_BUILDER.CreateTrunc(window_func_context->getRowNumber(),
1462  get_int_type(32, LL_CONTEXT));
1463  llvm::Value* entry_count_lv =
1464  LL_INT(static_cast<int32_t>(query_mem_desc.getEntryCount()));
1465  std::vector<llvm::Value*> args{
1466  &*groups_buffer, entry_count_lv, pos_in_window, code_generator.posArg(nullptr)};
1467  if (query_mem_desc.didOutputColumnar()) {
1468  const auto columnar_output_offset =
1469  emitCall("get_columnar_scan_output_offset", args);
1470  return LL_BUILDER.CreateSExt(columnar_output_offset, get_int_type(64, LL_CONTEXT));
1471  }
1472  args.push_back(LL_INT(row_size_quad));
1473  return emitCall("get_scan_output_slot", args);
1474  }
1475  auto arg_it = ROW_FUNC->arg_begin();
1476  auto groups_buffer = arg_it++;
1477  return codegenOutputSlot(&*groups_buffer, query_mem_desc, co, diamond_codegen);
1478 }
#define ROW_FUNC
#define LL_BUILDER
#define LL_CONTEXT
#define LL_INT(v)
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
bool window_function_is_aggregate(const SqlWindowFunctionKind kind)
Definition: WindowContext.h:42
llvm::Value * emitCall(const std::string &fname, const std::vector< llvm::Value *> &args)
llvm::Value * codegenOutputSlot(llvm::Value *groups_buffer, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, DiamondCodegen &diamond_codegen)
#define CHECK(condition)
Definition: Logger.h:187
SqlWindowFunctionKind getKind() const
Definition: Analyzer.h:1339
static WindowFunctionContext * getActiveWindowFunctionContext()
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ convertNullIfAny()

llvm::Value * GroupByAndAggregate::convertNullIfAny ( const SQLTypeInfo arg_type,
const TargetInfo agg_info,
llvm::Value *  target 
)
private

Definition at line 1390 of file GroupByAndAggregate.cpp.

References TargetInfo::agg_kind, CHECK, GroupByAndAggregate::DiamondCodegen::executor_, SQLTypeInfoCore< TYPE_FACET_PACK >::get_size(), SQLTypeInfoCore< TYPE_FACET_PACK >::is_fp(), kAPPROX_COUNT_DISTINCT, kCOUNT, LL_BUILDER, and TargetInfo::sql_type.

Referenced by TargetExprCodegen::codegen().

1392  {
1393  const auto& agg_type = agg_info.sql_type;
1394  const size_t chosen_bytes = agg_type.get_size();
1395 
1396  bool need_conversion{false};
1397  llvm::Value* arg_null{nullptr};
1398  llvm::Value* agg_null{nullptr};
1399  llvm::Value* target_to_cast{target};
1400  if (arg_type.is_fp()) {
1401  arg_null = executor_->cgen_state_->inlineFpNull(arg_type);
1402  if (agg_type.is_fp()) {
1403  agg_null = executor_->cgen_state_->inlineFpNull(agg_type);
1404  if (!static_cast<llvm::ConstantFP*>(arg_null)->isExactlyValue(
1405  static_cast<llvm::ConstantFP*>(agg_null)->getValueAPF())) {
1406  need_conversion = true;
1407  }
1408  } else {
1409  CHECK(agg_info.agg_kind == kCOUNT || agg_info.agg_kind == kAPPROX_COUNT_DISTINCT);
1410  return target;
1411  }
1412  } else {
1413  arg_null = executor_->cgen_state_->inlineIntNull(arg_type);
1414  if (agg_type.is_fp()) {
1415  agg_null = executor_->cgen_state_->inlineFpNull(agg_type);
1416  need_conversion = true;
1417  target_to_cast = executor_->castToFP(target);
1418  } else {
1419  agg_null = executor_->cgen_state_->inlineIntNull(agg_type);
1420  if ((static_cast<llvm::ConstantInt*>(arg_null)->getBitWidth() !=
1421  static_cast<llvm::ConstantInt*>(agg_null)->getBitWidth()) ||
1422  (static_cast<llvm::ConstantInt*>(arg_null)->getValue() !=
1423  static_cast<llvm::ConstantInt*>(agg_null)->getValue())) {
1424  need_conversion = true;
1425  }
1426  }
1427  }
1428  if (need_conversion) {
1429  auto cmp = arg_type.is_fp() ? LL_BUILDER.CreateFCmpOEQ(target, arg_null)
1430  : LL_BUILDER.CreateICmpEQ(target, arg_null);
1431  return LL_BUILDER.CreateSelect(
1432  cmp,
1433  agg_null,
1434  executor_->cgen_state_->castToTypeIn(target_to_cast, chosen_bytes << 3));
1435  } else {
1436  return target;
1437  }
1438 }
HOST DEVICE int get_size() const
Definition: sqltypes.h:329
#define LL_BUILDER
bool is_fp() const
Definition: sqltypes.h:450
SQLTypeInfo sql_type
Definition: TargetInfo.h:42
SQLAgg agg_kind
Definition: TargetInfo.h:41
Definition: sqldefs.h:71
#define CHECK(condition)
Definition: Logger.h:187
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ emitCall()

llvm::Value * GroupByAndAggregate::emitCall ( const std::string &  fname,
const std::vector< llvm::Value *> &  args 
)
private

Definition at line 1835 of file GroupByAndAggregate.cpp.

References GroupByAndAggregate::DiamondCodegen::executor_.

Referenced by TargetExprCodegen::codegen(), codegenCountDistinct(), codegenEstimator(), codegenMultiColumnBaselineHash(), codegenMultiColumnPerfectHash(), codegenOutputSlot(), codegenSingleColumnPerfectHash(), and codegenWindowRowPointer().

1836  {
1837  return executor_->cgen_state_->emitCall(fname, args);
1838 }
+ Here is the caller graph for this function:

◆ getAdditionalLiteral()

llvm::Value * GroupByAndAggregate::getAdditionalLiteral ( const int32_t  off)
private

Definition at line 1716 of file GroupByAndAggregate.cpp.

References CHECK_LT, get_arg_by_name(), get_int_type(), LL_BUILDER, LL_CONTEXT, LL_INT, and ROW_FUNC.

Referenced by codegenCountDistinct().

1716  {
1717  CHECK_LT(off, 0);
1718  const auto lit_buff_lv = get_arg_by_name(ROW_FUNC, "literals");
1719  return LL_BUILDER.CreateLoad(LL_BUILDER.CreateGEP(
1720  LL_BUILDER.CreateBitCast(lit_buff_lv,
1721  llvm::PointerType::get(get_int_type(64, LL_CONTEXT), 0)),
1722  LL_INT(off)));
1723 }
#define ROW_FUNC
#define LL_BUILDER
#define LL_CONTEXT
#define LL_INT(v)
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
llvm::Value * get_arg_by_name(llvm::Function *func, const std::string &name)
Definition: Execute.h:114
#define CHECK_LT(x, y)
Definition: Logger.h:197
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ getBucketedCardinality()

int64_t GroupByAndAggregate::getBucketedCardinality ( const ColRangeInfo col_range_info)
staticprivate

Definition at line 217 of file GroupByAndAggregate.cpp.

References ColRangeInfo::bucket, ColRangeInfo::has_nulls, ColRangeInfo::max, and ColRangeInfo::min.

Referenced by codegenPerfectHashFunction(), and QueryMemoryDescriptor::init().

217  {
218  checked_int64_t crt_col_cardinality =
219  checked_int64_t(col_range_info.max) - checked_int64_t(col_range_info.min);
220  if (col_range_info.bucket) {
221  crt_col_cardinality /= col_range_info.bucket;
222  }
223  return static_cast<int64_t>(crt_col_cardinality +
224  (1 + (col_range_info.has_nulls ? 1 : 0)));
225 }
boost::multiprecision::number< boost::multiprecision::cpp_int_backend< 64, 64, boost::multiprecision::signed_magnitude, boost::multiprecision::checked, void > > checked_int64_t
+ Here is the caller graph for this function:

◆ getColRangeInfo()

ColRangeInfo GroupByAndAggregate::getColRangeInfo ( )
private

Definition at line 118 of file GroupByAndAggregate.cpp.

References Executor::baseline_threshold, CHECK_GE, anonymous_namespace{GroupByAndAggregate.cpp}::expr_is_rowid(), GPU, GroupByBaselineHash, GroupByPerfectHash, anonymous_namespace{GroupByAndAggregate.cpp}::has_count_distinct(), and anonymous_namespace{GroupByAndAggregate.cpp}::is_column_range_too_big_for_perfect_hash().

Referenced by initQueryMemoryDescriptorImpl().

118  {
119  // Use baseline layout more eagerly on the GPU if the query uses count distinct,
120  // because our HyperLogLog implementation is 4x less memory efficient on GPU.
121  // Technically, this only applies to APPROX_COUNT_DISTINCT, but in practice we
122  // can expect this to be true anyway for grouped queries since the precise version
123  // uses significantly more memory.
124  const int64_t baseline_threshold =
129  if (ra_exe_unit_.groupby_exprs.size() != 1) {
130  try {
131  checked_int64_t cardinality{1};
132  bool has_nulls{false};
133  for (const auto groupby_expr : ra_exe_unit_.groupby_exprs) {
134  auto col_range_info = getExprRangeInfo(groupby_expr.get());
135  if (col_range_info.hash_type_ != QueryDescriptionType::GroupByPerfectHash) {
136  // going through baseline hash if a non-integer type is encountered
137  return {QueryDescriptionType::GroupByBaselineHash, 0, 0, 0, false};
138  }
139  auto crt_col_cardinality = getBucketedCardinality(col_range_info);
140  CHECK_GE(crt_col_cardinality, 0);
141  cardinality *= crt_col_cardinality;
142  if (col_range_info.has_nulls) {
143  has_nulls = true;
144  }
145  }
146  // For zero or high cardinalities, use baseline layout.
147  if (!cardinality || cardinality > baseline_threshold) {
148  return {QueryDescriptionType::GroupByBaselineHash, 0, 0, 0, false};
149  }
151  0,
152  int64_t(cardinality),
153  0,
154  has_nulls};
155  } catch (...) { // overflow when computing cardinality
156  return {QueryDescriptionType::GroupByBaselineHash, 0, 0, 0, false};
157  }
158  }
159  // For single column groupby on high timestamps, force baseline hash due to wide ranges
160  // we are likely to encounter when applying quals to the expression range
161  // TODO: consider allowing TIMESTAMP(9) (nanoseconds) with quals to use perfect hash if
162  // the range is small enough
163  if (ra_exe_unit_.groupby_exprs.front() &&
164  ra_exe_unit_.groupby_exprs.front()->get_type_info().is_high_precision_timestamp() &&
165  ra_exe_unit_.simple_quals.size() > 0) {
166  return {QueryDescriptionType::GroupByBaselineHash, 0, 0, 0, false};
167  }
168  const auto col_range_info = getExprRangeInfo(ra_exe_unit_.groupby_exprs.front().get());
169  if (!ra_exe_unit_.groupby_exprs.front()) {
170  return col_range_info;
171  }
172  static const int64_t MAX_BUFFER_SIZE = 1 << 30;
173  const int64_t col_count =
175  int64_t max_entry_count = MAX_BUFFER_SIZE / (col_count * sizeof(int64_t));
177  max_entry_count = std::min(max_entry_count, baseline_threshold);
178  }
179  if ((!ra_exe_unit_.groupby_exprs.front()->get_type_info().is_string() &&
180  !expr_is_rowid(ra_exe_unit_.groupby_exprs.front().get(), *executor_->catalog_)) &&
181  is_column_range_too_big_for_perfect_hash(col_range_info, max_entry_count) &&
182  !col_range_info.bucket) {
184  col_range_info.min,
185  col_range_info.max,
186  0,
187  col_range_info.has_nulls};
188  }
189  return col_range_info;
190 }
std::vector< Analyzer::Expr * > target_exprs
static int64_t getBucketedCardinality(const ColRangeInfo &col_range_info)
bool is_column_range_too_big_for_perfect_hash(const ColRangeInfo &col_range_info, const int64_t max_entry_count)
bool expr_is_rowid(const Analyzer::Expr *expr, const Catalog_Namespace::Catalog &cat)
static const size_t baseline_threshold
Definition: Execute.h:980
ColRangeInfo getExprRangeInfo(const Analyzer::Expr *expr) const
#define CHECK_GE(x, y)
Definition: Logger.h:200
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
bool has_count_distinct(const RelAlgExecutionUnit &ra_exe_unit)
boost::multiprecision::number< boost::multiprecision::cpp_int_backend< 64, 64, boost::multiprecision::signed_magnitude, boost::multiprecision::checked, void > > checked_int64_t
const ExecutorDeviceType device_type_
const RelAlgExecutionUnit & ra_exe_unit_
std::list< std::shared_ptr< Analyzer::Expr > > simple_quals
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ getExprRangeInfo()

ColRangeInfo GroupByAndAggregate::getExprRangeInfo ( const Analyzer::Expr expr) const
private

Definition at line 192 of file GroupByAndAggregate.cpp.

References CHECK, Double, Float, getExpressionRange(), GroupByBaselineHash, GroupByPerfectHash, Integer, Invalid, NonGroupedAggregate, and Projection.

Referenced by codegenGroupBy(), codegenPerfectHashFunction(), gpuCanHandleOrderEntries(), and initCountDistinctDescriptors().

192  {
193  if (!expr) {
194  return {QueryDescriptionType::Projection, 0, 0, 0, false};
195  }
196 
197  const auto expr_range = getExpressionRange(
198  expr, query_infos_, executor_, boost::make_optional(ra_exe_unit_.simple_quals));
199  switch (expr_range.getType()) {
202  expr_range.getIntMin(),
203  expr_range.getIntMax(),
204  expr_range.getBucket(),
205  expr_range.hasNulls()};
210  default:
211  CHECK(false);
212  }
213  CHECK(false);
214  return {QueryDescriptionType::NonGroupedAggregate, 0, 0, 0, false};
215 }
ExpressionRange getExpressionRange(const Analyzer::BinOper *expr, const std::vector< InputTableInfo > &query_infos, const Executor *, boost::optional< std::list< std::shared_ptr< Analyzer::Expr >>> simple_quals)
const std::vector< InputTableInfo > & query_infos_
#define CHECK(condition)
Definition: Logger.h:187
const RelAlgExecutionUnit & ra_exe_unit_
std::list< std::shared_ptr< Analyzer::Expr > > simple_quals
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ getKeylessInfo()

KeylessInfo GroupByAndAggregate::getKeylessInfo ( const std::vector< Analyzer::Expr *> &  target_expr_list,
const bool  is_group_by 
) const
private

Currently just support shared memory usage when dealing with one keyless aggregate operation. Currently just support shared memory usage for up to two target expressions.

Definition at line 588 of file GroupByAndAggregate.cpp.

References agg_arg(), CHECK, constrained_not_null(), Double, executor_, Float, g_bigint_count, get_agg_initial_val(), get_compact_type(), get_target_info(), getExpressionRange(), Integer, Invalid, is_distinct_target(), kAVG, kCOUNT, kENCODING_NONE, keyless, kMAX, kMIN, kSUM, RelAlgExecutionUnit::quals, query_infos_, ra_exe_unit_, supportedTypeForGpuSharedMemUsage(), and takes_float_argument().

Referenced by initQueryMemoryDescriptorImpl().

590  {
591  bool keyless{true}, found{false}, shared_mem_support{false},
592  shared_mem_valid_data_type{true};
593  /* Currently support shared memory usage for a limited subset of possible aggregate
594  * operations. shared_mem_support and
595  * shared_mem_valid_data_type are declared to ensure such support. */
596  int32_t num_agg_expr{0}; // used for shared memory support on the GPU
597  int32_t index{0};
598  for (const auto target_expr : target_expr_list) {
599  const auto agg_info = get_target_info(target_expr, g_bigint_count);
600  const auto chosen_type = get_compact_type(agg_info);
601  // TODO(Saman): should be eventually removed, once I make sure what data types can
602  // be used in this shared memory setting.
603 
604  shared_mem_valid_data_type =
605  shared_mem_valid_data_type && supportedTypeForGpuSharedMemUsage(chosen_type);
606 
607  if (agg_info.is_agg) {
608  num_agg_expr++;
609  }
610  if (!found && agg_info.is_agg && !is_distinct_target(agg_info)) {
611  auto agg_expr = dynamic_cast<const Analyzer::AggExpr*>(target_expr);
612  CHECK(agg_expr);
613  const auto arg_expr = agg_arg(target_expr);
614  const bool float_argument_input = takes_float_argument(agg_info);
615  switch (agg_info.agg_kind) {
616  case kAVG:
617  ++index;
618  if (arg_expr && !arg_expr->get_type_info().get_notnull()) {
619  auto expr_range_info = getExpressionRange(arg_expr, query_infos_, executor_);
620  if (expr_range_info.getType() == ExpressionRangeType::Invalid ||
621  expr_range_info.hasNulls()) {
622  break;
623  }
624  }
625  found = true;
626  break;
627  case kCOUNT:
628  if (arg_expr && !arg_expr->get_type_info().get_notnull()) {
629  const auto& arg_ti = arg_expr->get_type_info();
630  if (arg_ti.is_string() && arg_ti.get_compression() == kENCODING_NONE) {
631  break;
632  }
633  auto expr_range_info = getExpressionRange(arg_expr, query_infos_, executor_);
634  if (expr_range_info.getType() == ExpressionRangeType::Invalid ||
635  expr_range_info.hasNulls()) {
636  break;
637  }
638  }
639  found = true;
640  if (!agg_info.skip_null_val) {
641  shared_mem_support = true; // currently just support 8 bytes per group
642  }
643  break;
644  case kSUM: {
645  auto arg_ti = arg_expr->get_type_info();
646  if (constrained_not_null(arg_expr, ra_exe_unit_.quals)) {
647  arg_ti.set_notnull(true);
648  }
649  if (!arg_ti.get_notnull()) {
650  auto expr_range_info = getExpressionRange(arg_expr, query_infos_, executor_);
651  if (expr_range_info.getType() != ExpressionRangeType::Invalid &&
652  !expr_range_info.hasNulls()) {
653  found = true;
654  }
655  } else {
656  auto expr_range_info = getExpressionRange(arg_expr, query_infos_, executor_);
657  switch (expr_range_info.getType()) {
660  if (expr_range_info.getFpMax() < 0 || expr_range_info.getFpMin() > 0) {
661  found = true;
662  }
663  break;
665  if (expr_range_info.getIntMax() < 0 || expr_range_info.getIntMin() > 0) {
666  found = true;
667  }
668  break;
669  default:
670  break;
671  }
672  }
673  break;
674  }
675  case kMIN: {
676  CHECK(agg_expr && agg_expr->get_arg());
677  const auto& arg_ti = agg_expr->get_arg()->get_type_info();
678  if (arg_ti.is_string() || arg_ti.is_array()) {
679  break;
680  }
681  auto expr_range_info =
682  getExpressionRange(agg_expr->get_arg(), query_infos_, executor_);
683  auto init_max = get_agg_initial_val(agg_info.agg_kind,
684  chosen_type,
685  is_group_by || float_argument_input,
686  float_argument_input ? sizeof(float) : 8);
687  switch (expr_range_info.getType()) {
690  auto double_max =
691  *reinterpret_cast<const double*>(may_alias_ptr(&init_max));
692  if (expr_range_info.getFpMax() < double_max) {
693  found = true;
694  }
695  break;
696  }
698  if (expr_range_info.getIntMax() < init_max) {
699  found = true;
700  }
701  break;
702  default:
703  break;
704  }
705  break;
706  }
707  case kMAX: {
708  CHECK(agg_expr && agg_expr->get_arg());
709  const auto& arg_ti = agg_expr->get_arg()->get_type_info();
710  if (arg_ti.is_string() || arg_ti.is_array()) {
711  break;
712  }
713  auto expr_range_info =
714  getExpressionRange(agg_expr->get_arg(), query_infos_, executor_);
715  auto init_min = get_agg_initial_val(agg_info.agg_kind,
716  chosen_type,
717  is_group_by || float_argument_input,
718  float_argument_input ? sizeof(float) : 8);
719  switch (expr_range_info.getType()) {
722  auto double_min =
723  *reinterpret_cast<const double*>(may_alias_ptr(&init_min));
724  if (expr_range_info.getFpMin() > double_min) {
725  found = true;
726  }
727  break;
728  }
730  if (expr_range_info.getIntMin() > init_min) {
731  found = true;
732  }
733  break;
734  default:
735  break;
736  }
737  break;
738  }
739  default:
740  keyless = false;
741  break;
742  }
743  }
744  if (!keyless) {
745  break;
746  }
747  if (!found) {
748  ++index;
749  }
750  }
751 
752  // shouldn't use keyless for projection only
758  return {keyless && found,
759  index,
760  ((num_agg_expr == 1) && (target_expr_list.size() <= 2))
761  ? shared_mem_support && shared_mem_valid_data_type
762  : false};
763 }
const Analyzer::Expr * agg_arg(const Analyzer::Expr *expr)
bool constrained_not_null(const Analyzer::Expr *expr, const std::list< std::shared_ptr< Analyzer::Expr >> &quals)
TargetInfo get_target_info(const PointerType target_expr, const bool bigint_count)
Definition: TargetInfo.h:65
int64_t get_agg_initial_val(const SQLAgg agg, const SQLTypeInfo &ti, const bool enable_compaction, const unsigned min_byte_width_to_compact)
bool takes_float_argument(const TargetInfo &target_info)
Definition: TargetInfo.h:120
bool g_bigint_count
Definition: sqldefs.h:71
const SQLTypeInfo get_compact_type(const TargetInfo &target)
bool supportedTypeForGpuSharedMemUsage(const SQLTypeInfo &target_type_info) const
Definition: sqldefs.h:71
bool is_distinct_target(const TargetInfo &target_info)
Definition: TargetInfo.h:116
ExpressionRange getExpressionRange(const Analyzer::BinOper *expr, const std::vector< InputTableInfo > &query_infos, const Executor *, boost::optional< std::list< std::shared_ptr< Analyzer::Expr >>> simple_quals)
const std::vector< InputTableInfo > & query_infos_
Definition: sqldefs.h:71
std::list< std::shared_ptr< Analyzer::Expr > > quals
#define CHECK(condition)
Definition: Logger.h:187
const RelAlgExecutionUnit & ra_exe_unit_
Definition: sqldefs.h:71
Definition: sqldefs.h:71
const int64_t const uint32_t const uint32_t const uint32_t const bool keyless
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ getShardedTopBucket()

int64_t GroupByAndAggregate::getShardedTopBucket ( const ColRangeInfo col_range_info,
const size_t  shard_count 
) const
private

Definition at line 263 of file GroupByAndAggregate.cpp.

References ColRangeInfo::bucket, CHECK, CHECK_GT, device_type_, executor_, g_leaf_count, and GPU.

Referenced by initQueryMemoryDescriptorImpl().

264  {
265  size_t device_count{0};
267  device_count = executor_->getCatalog()->getDataMgr().getCudaMgr()->getDeviceCount();
268  CHECK_GT(device_count, 0u);
269  }
270 
271  int64_t bucket{col_range_info.bucket};
272 
273  if (shard_count) {
274  CHECK(!col_range_info.bucket);
275  /*
276  when a node has fewer devices than shard count,
277  a) In a distributed setup, the minimum distance between two keys would be
278  device_count because shards are stored consecutively across the physical tables, i.e
279  if a shard column has values 0 to 9, and 3 shards on each leaf, then node 1 would
280  have values: 0,1,2,6,7,8 and node 2 would have values: 3,4,5,9. If each leaf node
281  has only 1 device, in this case, all the keys from each node are loaded on the
282  device each.
283 
284  b) In a single node setup, the distance would be minimum of device_count or
285  difference of device_count - shard_count. For example: If a single node server
286  running on 3 devices a shard column has values 0 to 9 in a table with 4 shards,
287  device to fragment keys mapping would be: device 1 - 4,8,3,7 device 2 - 1,5,9 device
288  3 - 2, 6 The bucket value would be 4(shards) - 3(devices) = 1 i.e. minimum of
289  device_count or difference.
290 
291  When a node has device count equal to or more than shard count then the
292  minimum distance is always at least shard_count * no of leaf nodes.
293  */
294  if (device_count < shard_count) {
295  bucket = g_leaf_count ? std::max(device_count, static_cast<size_t>(1))
296  : std::min(device_count, shard_count - device_count);
297  } else {
298  bucket = shard_count * std::max(g_leaf_count, static_cast<size_t>(1));
299  }
300  }
301 
302  return bucket;
303 }
#define CHECK_GT(x, y)
Definition: Logger.h:199
const ExecutorDeviceType device_type_
size_t g_leaf_count
Definition: ParserNode.cpp:63
#define CHECK(condition)
Definition: Logger.h:187
+ Here is the caller graph for this function:

◆ gpuCanHandleOrderEntries()

bool GroupByAndAggregate::gpuCanHandleOrderEntries ( const std::list< Analyzer::OrderEntry > &  order_entries)
private

Definition at line 804 of file GroupByAndAggregate.cpp.

References CHECK, CHECK_GE, CHECK_LE, Analyzer::AggExpr::get_arg(), Analyzer::Expr::get_type_info(), getExprRangeInfo(), GroupByPerfectHash, kAPPROX_COUNT_DISTINCT, kAVG, kMAX, kMIN, ra_exe_unit_, and RelAlgExecutionUnit::target_exprs.

Referenced by initQueryMemoryDescriptor().

805  {
806  if (order_entries.size() > 1) { // TODO(alex): lift this restriction
807  return false;
808  }
809  for (const auto order_entry : order_entries) {
810  CHECK_GE(order_entry.tle_no, 1);
811  CHECK_LE(static_cast<size_t>(order_entry.tle_no), ra_exe_unit_.target_exprs.size());
812  const auto target_expr = ra_exe_unit_.target_exprs[order_entry.tle_no - 1];
813  if (!dynamic_cast<Analyzer::AggExpr*>(target_expr)) {
814  return false;
815  }
816  // TODO(alex): relax the restrictions
817  auto agg_expr = static_cast<Analyzer::AggExpr*>(target_expr);
818  if (agg_expr->get_is_distinct() || agg_expr->get_aggtype() == kAVG ||
819  agg_expr->get_aggtype() == kMIN || agg_expr->get_aggtype() == kMAX ||
820  agg_expr->get_aggtype() == kAPPROX_COUNT_DISTINCT) {
821  return false;
822  }
823  if (agg_expr->get_arg()) {
824  const auto& arg_ti = agg_expr->get_arg()->get_type_info();
825  if (arg_ti.is_fp()) {
826  return false;
827  }
828  auto expr_range_info = getExprRangeInfo(agg_expr->get_arg());
829  // TOD(adb): QMD not actually initialized here?
830  if ((!(expr_range_info.hash_type_ == QueryDescriptionType::GroupByPerfectHash &&
831  /* query_mem_desc.getGroupbyColCount() == 1 */ false) ||
832  expr_range_info.has_nulls) &&
833  order_entry.is_desc == order_entry.nulls_first) {
834  return false;
835  }
836  }
837  const auto& target_ti = target_expr->get_type_info();
838  CHECK(!target_ti.is_array());
839  if (!target_ti.is_integer()) {
840  return false;
841  }
842  }
843  return true;
844 }
std::vector< Analyzer::Expr * > target_exprs
ColRangeInfo getExprRangeInfo(const Analyzer::Expr *expr) const
#define CHECK_GE(x, y)
Definition: Logger.h:200
Expr * get_arg() const
Definition: Analyzer.h:988
Definition: sqldefs.h:71
#define CHECK_LE(x, y)
Definition: Logger.h:198
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:77
#define CHECK(condition)
Definition: Logger.h:187
const RelAlgExecutionUnit & ra_exe_unit_
Definition: sqldefs.h:71
Definition: sqldefs.h:71
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ initCountDistinctDescriptors()

CountDistinctDescriptors GroupByAndAggregate::initCountDistinctDescriptors ( )
private

Definition at line 502 of file GroupByAndAggregate.cpp.

References Bitmap, CHECK, CHECK_GE, device_type_, g_bigint_count, g_enable_watchdog, g_hll_precision_bits, Analyzer::AggExpr::get_arg(), get_count_distinct_sub_bitmap_count(), get_target_info(), Analyzer::Expr::get_type_info(), getExprRangeInfo(), GroupByPerfectHash, hll_size_for_rate(), Invalid, is_distinct_target(), kAPPROX_COUNT_DISTINCT, kCOUNT, kENCODING_DICT, kINT, Projection, ra_exe_unit_, StdSet, and RelAlgExecutionUnit::target_exprs.

Referenced by initQueryMemoryDescriptorImpl().

502  {
503  CountDistinctDescriptors count_distinct_descriptors;
504  for (const auto target_expr : ra_exe_unit_.target_exprs) {
505  auto agg_info = get_target_info(target_expr, g_bigint_count);
506  if (is_distinct_target(agg_info)) {
507  CHECK(agg_info.is_agg);
508  CHECK(agg_info.agg_kind == kCOUNT || agg_info.agg_kind == kAPPROX_COUNT_DISTINCT);
509  const auto agg_expr = static_cast<const Analyzer::AggExpr*>(target_expr);
510  const auto& arg_ti = agg_expr->get_arg()->get_type_info();
511  if (arg_ti.is_string() && arg_ti.get_compression() != kENCODING_DICT) {
512  throw std::runtime_error(
513  "Strings must be dictionary-encoded for COUNT(DISTINCT).");
514  }
515  if (agg_info.agg_kind == kAPPROX_COUNT_DISTINCT && arg_ti.is_array()) {
516  throw std::runtime_error("APPROX_COUNT_DISTINCT on arrays not supported yet");
517  }
518  if (agg_info.agg_kind == kAPPROX_COUNT_DISTINCT && arg_ti.is_geometry()) {
519  throw std::runtime_error(
520  "APPROX_COUNT_DISTINCT on geometry columns not supported");
521  }
522  if (agg_info.is_distinct && arg_ti.is_geometry()) {
523  throw std::runtime_error("COUNT DISTINCT on geometry columns not supported");
524  }
525  ColRangeInfo no_range_info{QueryDescriptionType::Projection, 0, 0, 0, false};
526  auto arg_range_info =
527  arg_ti.is_fp() ? no_range_info : getExprRangeInfo(agg_expr->get_arg());
528  CountDistinctImplType count_distinct_impl_type{CountDistinctImplType::StdSet};
529  int64_t bitmap_sz_bits{0};
530  if (agg_info.agg_kind == kAPPROX_COUNT_DISTINCT) {
531  const auto error_rate = agg_expr->get_error_rate();
532  if (error_rate) {
533  CHECK(error_rate->get_type_info().get_type() == kINT);
534  CHECK_GE(error_rate->get_constval().intval, 1);
535  bitmap_sz_bits = hll_size_for_rate(error_rate->get_constval().smallintval);
536  } else {
537  bitmap_sz_bits = g_hll_precision_bits;
538  }
539  }
540  if (arg_range_info.hash_type_ == QueryDescriptionType::GroupByPerfectHash &&
541  !(arg_ti.is_array() || arg_ti.is_geometry())) { // TODO(alex): allow bitmap
542  // implementation for arrays
543  if (arg_range_info.isEmpty()) {
544  count_distinct_descriptors.emplace_back(
546  0,
547  64,
548  agg_info.agg_kind == kAPPROX_COUNT_DISTINCT,
549  device_type_,
550  1});
551  continue;
552  }
553  count_distinct_impl_type = CountDistinctImplType::Bitmap;
554  if (agg_info.agg_kind == kCOUNT) {
555  bitmap_sz_bits = arg_range_info.max - arg_range_info.min + 1;
556  const int64_t MAX_BITMAP_BITS{8 * 1000 * 1000 * 1000L};
557  if (bitmap_sz_bits <= 0 || bitmap_sz_bits > MAX_BITMAP_BITS) {
558  count_distinct_impl_type = CountDistinctImplType::StdSet;
559  }
560  }
561  }
562  if (agg_info.agg_kind == kAPPROX_COUNT_DISTINCT &&
563  count_distinct_impl_type == CountDistinctImplType::StdSet &&
564  !(arg_ti.is_array() || arg_ti.is_geometry())) {
565  count_distinct_impl_type = CountDistinctImplType::Bitmap;
566  }
567  if (g_enable_watchdog &&
568  count_distinct_impl_type == CountDistinctImplType::StdSet) {
569  throw WatchdogException("Cannot use a fast path for COUNT distinct");
570  }
571  const auto sub_bitmap_count =
573  count_distinct_descriptors.emplace_back(
574  CountDistinctDescriptor{count_distinct_impl_type,
575  arg_range_info.min,
576  bitmap_sz_bits,
577  agg_info.agg_kind == kAPPROX_COUNT_DISTINCT,
578  device_type_,
579  sub_bitmap_count});
580  } else {
581  count_distinct_descriptors.emplace_back(CountDistinctDescriptor{
582  CountDistinctImplType::Invalid, 0, 0, false, device_type_, 0});
583  }
584  }
585  return count_distinct_descriptors;
586 }
std::vector< Analyzer::Expr * > target_exprs
int hll_size_for_rate(const int err_percent)
Definition: HyperLogLog.h:115
TargetInfo get_target_info(const PointerType target_expr, const bool bigint_count)
Definition: TargetInfo.h:65
ColRangeInfo getExprRangeInfo(const Analyzer::Expr *expr) const
#define CHECK_GE(x, y)
Definition: Logger.h:200
int g_hll_precision_bits
bool g_bigint_count
Expr * get_arg() const
Definition: Analyzer.h:988
size_t get_count_distinct_sub_bitmap_count(const size_t bitmap_sz_bits, const RelAlgExecutionUnit &ra_exe_unit, const ExecutorDeviceType device_type)
std::vector< CountDistinctDescriptor > CountDistinctDescriptors
Definition: CountDistinct.h:35
bool is_distinct_target(const TargetInfo &target_info)
Definition: TargetInfo.h:116
const ExecutorDeviceType device_type_
Definition: sqldefs.h:71
CountDistinctImplType
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:77
#define CHECK(condition)
Definition: Logger.h:187
bool g_enable_watchdog
Definition: Execute.cpp:69
Definition: sqltypes.h:47
const RelAlgExecutionUnit & ra_exe_unit_
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ initQueryMemoryDescriptor()

std::unique_ptr< QueryMemoryDescriptor > GroupByAndAggregate::initQueryMemoryDescriptor ( const bool  allow_multifrag,
const size_t  max_groups_buffer_entry_count,
const int8_t  crt_min_byte_width,
RenderInfo render_info,
const bool  output_columnar_hint 
)
private

Definition at line 305 of file GroupByAndAggregate.cpp.

References align_to_int64(), CHECK, device_type_, executor_, GPU, gpuCanHandleOrderEntries(), initQueryMemoryDescriptorImpl(), SortInfo::order_entries, ra_exe_unit_, shard_count_for_top_groups(), and RelAlgExecutionUnit::sort_info.

Referenced by Executor::compileWorkUnit().

310  {
311  const auto shard_count =
314  : 0;
315  bool sort_on_gpu_hint =
316  device_type_ == ExecutorDeviceType::GPU && allow_multifrag &&
319  // must_use_baseline_sort is true iff we'd sort on GPU with the old algorithm
320  // but the total output buffer size would be too big or it's a sharded top query.
321  // For the sake of managing risk, use the new result set way very selectively for
322  // this case only (alongside the baseline layout we've enabled for a while now).
323  bool must_use_baseline_sort = shard_count;
324  std::unique_ptr<QueryMemoryDescriptor> query_mem_desc;
325  while (true) {
326  query_mem_desc = initQueryMemoryDescriptorImpl(allow_multifrag,
327  max_groups_buffer_entry_count,
328  crt_min_byte_width,
329  sort_on_gpu_hint,
330  render_info,
331  must_use_baseline_sort,
332  output_columnar_hint);
333  CHECK(query_mem_desc);
334  if (query_mem_desc->sortOnGpu() &&
335  (query_mem_desc->getBufferSizeBytes(device_type_) +
336  align_to_int64(query_mem_desc->getEntryCount() * sizeof(int32_t))) >
337  2 * 1024 * 1024 * 1024L) {
338  must_use_baseline_sort = true;
339  sort_on_gpu_hint = false;
340  } else {
341  break;
342  }
343  }
344  return query_mem_desc;
345 }
bool gpuCanHandleOrderEntries(const std::list< Analyzer::OrderEntry > &order_entries)
std::unique_ptr< QueryMemoryDescriptor > initQueryMemoryDescriptorImpl(const bool allow_multifrag, const size_t max_groups_buffer_entry_count, const int8_t crt_min_byte_width, const bool sort_on_gpu_hint, RenderInfo *render_info, const bool must_use_baseline_sort, const bool output_columnar_hint)
const std::list< Analyzer::OrderEntry > order_entries
const SortInfo sort_info
const ExecutorDeviceType device_type_
#define CHECK(condition)
Definition: Logger.h:187
const RelAlgExecutionUnit & ra_exe_unit_
FORCE_INLINE HOST DEVICE T align_to_int64(T addr)
static size_t shard_count_for_top_groups(const RelAlgExecutionUnit &ra_exe_unit, const Catalog_Namespace::Catalog &catalog)
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ initQueryMemoryDescriptorImpl()

std::unique_ptr< QueryMemoryDescriptor > GroupByAndAggregate::initQueryMemoryDescriptorImpl ( const bool  allow_multifrag,
const size_t  max_groups_buffer_entry_count,
const int8_t  crt_min_byte_width,
const bool  sort_on_gpu_hint,
RenderInfo render_info,
const bool  must_use_baseline_sort,
const bool  output_columnar_hint 
)
private

Definition at line 347 of file GroupByAndAggregate.cpp.

References addTransientStringLiterals(), device_type_, executor_, g_enable_watchdog, get_col_byte_widths(), getColRangeInfo(), getKeylessInfo(), getShardedTopBucket(), GPU, RelAlgExecutionUnit::groupby_exprs, GroupByBaselineHash, GroupByPerfectHash, ColRangeInfo::hash_type_, QueryMemoryDescriptor::init(), initCountDistinctDescriptors(), query_infos_, ra_exe_unit_, shard_count_for_top_groups(), and RelAlgExecutionUnit::target_exprs.

Referenced by initQueryMemoryDescriptor().

354  {
356 
357  const auto count_distinct_descriptors = initCountDistinctDescriptors();
358 
359  auto group_col_widths = get_col_byte_widths(ra_exe_unit_.groupby_exprs, {});
360 
361  const bool is_group_by{!ra_exe_unit_.groupby_exprs.empty()};
362 
363  auto col_range_info_nosharding = getColRangeInfo();
364 
365  const auto shard_count =
368  : 0;
369 
370  const auto col_range_info =
371  ColRangeInfo{col_range_info_nosharding.hash_type_,
372  col_range_info_nosharding.min,
373  col_range_info_nosharding.max,
374  getShardedTopBucket(col_range_info_nosharding, shard_count),
375  col_range_info_nosharding.has_nulls};
376 
377  // Non-grouped aggregates do not support accessing aggregated ranges
378  const auto keyless_info = !is_group_by
379  ? KeylessInfo{false, -1, false}
380  : getKeylessInfo(ra_exe_unit_.target_exprs, is_group_by);
381 
382  if (g_enable_watchdog &&
383  ((col_range_info.hash_type_ == QueryDescriptionType::GroupByBaselineHash &&
384  max_groups_buffer_entry_count > 120000000) ||
385  (col_range_info.hash_type_ == QueryDescriptionType::GroupByPerfectHash &&
386  ra_exe_unit_.groupby_exprs.size() == 1 &&
387  (col_range_info.max - col_range_info.min) /
388  std::max(col_range_info.bucket, int64_t(1)) >
389  130000000))) {
390  throw WatchdogException("Query would use too much memory");
391  }
393  ra_exe_unit_,
394  query_infos_,
395  col_range_info,
396  keyless_info,
397  allow_multifrag,
398  device_type_,
399  crt_min_byte_width,
400  sort_on_gpu_hint,
401  shard_count,
402  max_groups_buffer_entry_count,
403  render_info,
404  count_distinct_descriptors,
405  must_use_baseline_sort,
406  output_columnar_hint);
407 }
std::vector< Analyzer::Expr * > target_exprs
std::vector< int8_t > get_col_byte_widths(const T &col_expr_list, const std::vector< ssize_t > &col_exprs_to_not_project)
ColRangeInfo getColRangeInfo()
QueryDescriptionType hash_type_
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
CountDistinctDescriptors initCountDistinctDescriptors()
const std::vector< InputTableInfo > & query_infos_
const ExecutorDeviceType device_type_
KeylessInfo getKeylessInfo(const std::vector< Analyzer::Expr *> &target_expr_list, const bool is_group_by) const
int64_t getShardedTopBucket(const ColRangeInfo &col_range_info, const size_t shard_count) const
bool g_enable_watchdog
Definition: Execute.cpp:69
static std::unique_ptr< QueryMemoryDescriptor > init(const Executor *executor, const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, const ColRangeInfo &col_range_info, const KeylessInfo &keyless_info, const bool allow_multifrag, const ExecutorDeviceType device_type, const int8_t crt_min_byte_width, const bool sort_on_gpu_hint, const size_t shard_count, const size_t max_groups_buffer_entry_count, RenderInfo *render_info, const CountDistinctDescriptors count_distinct_descriptors, const bool must_use_baseline_sort, const bool output_columnar_hint)
const RelAlgExecutionUnit & ra_exe_unit_
static size_t shard_count_for_top_groups(const RelAlgExecutionUnit &ra_exe_unit, const Catalog_Namespace::Catalog &catalog)
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ needsUnnestDoublePatch()

bool GroupByAndAggregate::needsUnnestDoublePatch ( llvm::Value *  val_ptr,
const std::string &  agg_base_name,
const bool  threads_share_memory,
const CompilationOptions co 
) const
private

Definition at line 30 of file MaxwellCodegenPatch.cpp.

References CompilationOptions::device_type_, and executor_.

Referenced by TargetExprCodegen::codegen().

33  {
34  return (executor_->isArchMaxwell(co.device_type_) && threads_share_memory &&
35  llvm::isa<llvm::AllocaInst>(val_ptr) &&
36  val_ptr->getType() ==
37  llvm::Type::getDoublePtrTy(executor_->cgen_state_->context_) &&
38  "agg_id" == agg_base_name);
39 }
ExecutorDeviceType device_type_
+ Here is the caller graph for this function:

◆ prependForceSync()

void GroupByAndAggregate::prependForceSync ( )
private

Definition at line 41 of file MaxwellCodegenPatch.cpp.

References executor_.

Referenced by codegen().

41  {
42  executor_->cgen_state_->ir_builder_.CreateCall(
43  executor_->cgen_state_->module_->getFunction("force_sync"));
44 }
+ Here is the caller graph for this function:

◆ shard_count_for_top_groups()

size_t GroupByAndAggregate::shard_count_for_top_groups ( const RelAlgExecutionUnit ra_exe_unit,
const Catalog_Namespace::Catalog catalog 
)
static

Definition at line 1847 of file GroupByAndAggregate.cpp.

References Catalog_Namespace::Catalog::getMetadataForTable(), RelAlgExecutionUnit::groupby_exprs, SortInfo::limit, TableDescriptor::nShards, SortInfo::order_entries, and RelAlgExecutionUnit::sort_info.

Referenced by Executor::collectAllDeviceResults(), RelAlgExecutor::executeRelAlgQuerySingleStep(), initQueryMemoryDescriptor(), and initQueryMemoryDescriptorImpl().

1849  {
1850  if (ra_exe_unit.sort_info.order_entries.size() != 1 || !ra_exe_unit.sort_info.limit) {
1851  return 0;
1852  }
1853  for (const auto& group_expr : ra_exe_unit.groupby_exprs) {
1854  const auto grouped_col_expr =
1855  dynamic_cast<const Analyzer::ColumnVar*>(group_expr.get());
1856  if (!grouped_col_expr) {
1857  continue;
1858  }
1859  if (grouped_col_expr->get_table_id() <= 0) {
1860  return 0;
1861  }
1862  const auto td = catalog.getMetadataForTable(grouped_col_expr->get_table_id());
1863  if (td->shardedColumnId == grouped_col_expr->get_column_id()) {
1864  return td->nShards;
1865  }
1866  }
1867  return 0;
1868 }
const TableDescriptor * getMetadataForTable(const std::string &tableName, const bool populateFragmenter=true) const
Returns a pointer to a const TableDescriptor struct matching the provided tableName.
const std::list< Analyzer::OrderEntry > order_entries
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
const size_t limit
const SortInfo sort_info
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ supportedExprForGpuSharedMemUsage()

bool GroupByAndAggregate::supportedExprForGpuSharedMemUsage ( Analyzer::Expr expr)
staticprivate

Definition at line 791 of file GroupByAndAggregate.cpp.

References kUNNEST.

Referenced by QueryMemoryDescriptor::init().

791  {
792  /*
793  UNNEST operations follow a slightly different internal memory layout compared to other
794  keyless aggregates Currently, we opt out of using shared memory if there is any UNNEST
795  operation involved.
796  */
797  if (dynamic_cast<Analyzer::UOper*>(expr) &&
798  static_cast<Analyzer::UOper*>(expr)->get_optype() == kUNNEST) {
799  return false;
800  }
801  return true;
802 }
+ Here is the caller graph for this function:

◆ supportedTypeForGpuSharedMemUsage()

bool GroupByAndAggregate::supportedTypeForGpuSharedMemUsage ( const SQLTypeInfo target_type_info) const
private

Supported data types for the current shared memory usage for keyless aggregates with COUNT(*) Currently only for single-column group by queries.

Definition at line 769 of file GroupByAndAggregate.cpp.

References SQLTypeInfoCore< TYPE_FACET_PACK >::get_compression(), SQLTypeInfoCore< TYPE_FACET_PACK >::get_type(), kENCODING_DICT, kINT, kSMALLINT, kTEXT, kTINYINT, and run-benchmark-import::result.

Referenced by getKeylessInfo().

770  {
771  bool result = false;
772  switch (target_type_info.get_type()) {
773  case SQLTypes::kTINYINT:
774  case SQLTypes::kSMALLINT:
775  case SQLTypes::kINT:
776  result = true;
777  break;
778  case SQLTypes::kTEXT:
779  if (target_type_info.get_compression() == EncodingType::kENCODING_DICT) {
780  result = true;
781  }
782  break;
783  default:
784  break;
785  }
786  return result;
787 }
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:319
HOST DEVICE EncodingType get_compression() const
Definition: sqltypes.h:327
Definition: sqltypes.h:54
Definition: sqltypes.h:47
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

Friends And Related Function Documentation

◆ Executor

friend class Executor
friend

Definition at line 302 of file GroupByAndAggregate.h.

◆ QueryMemoryDescriptor

friend class QueryMemoryDescriptor
friend

Definition at line 303 of file GroupByAndAggregate.h.

◆ TargetExprCodegen

friend struct TargetExprCodegen
friend

Definition at line 304 of file GroupByAndAggregate.h.

◆ TargetExprCodegenBuilder

friend struct TargetExprCodegenBuilder
friend

Definition at line 305 of file GroupByAndAggregate.h.

Member Data Documentation

◆ device_type_

const ExecutorDeviceType GroupByAndAggregate::device_type_
private

◆ executor_

◆ output_columnar_

bool GroupByAndAggregate::output_columnar_
private

Definition at line 299 of file GroupByAndAggregate.h.

◆ query_infos_

const std::vector<InputTableInfo>& GroupByAndAggregate::query_infos_
private

Definition at line 297 of file GroupByAndAggregate.h.

Referenced by getKeylessInfo(), and initQueryMemoryDescriptorImpl().

◆ ra_exe_unit_

◆ row_set_mem_owner_

std::shared_ptr<RowSetMemoryOwner> GroupByAndAggregate::row_set_mem_owner_
private

Definition at line 298 of file GroupByAndAggregate.h.

Referenced by addTransientStringLiterals().


The documentation for this class was generated from the following files: