OmniSciDB  addbbd5075
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
GroupByAndAggregate Class Reference

#include <GroupByAndAggregate.h>

+ Collaboration diagram for GroupByAndAggregate:

Classes

struct  DiamondCodegen
 

Public Member Functions

 GroupByAndAggregate (Executor *executor, const ExecutorDeviceType device_type, const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, std::shared_ptr< RowSetMemoryOwner >)
 
bool codegen (llvm::Value *filter_result, llvm::BasicBlock *sc_false, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co)
 

Static Public Member Functions

static void addTransientStringLiterals (const RelAlgExecutionUnit &ra_exe_unit, Executor *executor, std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner)
 
static size_t shard_count_for_top_groups (const RelAlgExecutionUnit &ra_exe_unit, const Catalog_Namespace::Catalog &catalog)
 

Private Member Functions

bool supportedTypeForGpuSharedMemUsage (const SQLTypeInfo &target_type_info) const
 
bool gpuCanHandleOrderEntries (const std::list< Analyzer::OrderEntry > &order_entries)
 
std::unique_ptr
< QueryMemoryDescriptor
initQueryMemoryDescriptor (const bool allow_multifrag, const size_t max_groups_buffer_entry_count, const int8_t crt_min_byte_width, RenderInfo *render_info, const bool output_columnar_hint)
 
std::unique_ptr
< QueryMemoryDescriptor
initQueryMemoryDescriptorImpl (const bool allow_multifrag, const size_t max_groups_buffer_entry_count, const int8_t crt_min_byte_width, const bool sort_on_gpu_hint, RenderInfo *render_info, const bool must_use_baseline_sort, const bool output_columnar_hint)
 
int64_t getShardedTopBucket (const ColRangeInfo &col_range_info, const size_t shard_count) const
 
void addTransientStringLiterals ()
 
CountDistinctDescriptors initCountDistinctDescriptors ()
 
llvm::Value * codegenOutputSlot (llvm::Value *groups_buffer, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, DiamondCodegen &diamond_codegen)
 
std::tuple< llvm::Value
*, llvm::Value * > 
codegenGroupBy (const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, DiamondCodegen &codegen)
 
std::tuple< llvm::Value
*, llvm::Value * > 
codegenSingleColumnPerfectHash (const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, llvm::Value *groups_buffer, llvm::Value *group_expr_lv_translated, llvm::Value *group_expr_lv_original, const int32_t row_size_quad)
 
std::tuple< llvm::Value
*, llvm::Value * > 
codegenMultiColumnPerfectHash (llvm::Value *groups_buffer, llvm::Value *group_key, llvm::Value *key_size_lv, const QueryMemoryDescriptor &query_mem_desc, const int32_t row_size_quad)
 
llvm::Function * codegenPerfectHashFunction ()
 
std::tuple< llvm::Value
*, llvm::Value * > 
codegenMultiColumnBaselineHash (const CompilationOptions &co, llvm::Value *groups_buffer, llvm::Value *group_key, llvm::Value *key_size_lv, const QueryMemoryDescriptor &query_mem_desc, const size_t key_width, const int32_t row_size_quad)
 
ColRangeInfo getColRangeInfo ()
 
ColRangeInfo getExprRangeInfo (const Analyzer::Expr *expr) const
 
KeylessInfo getKeylessInfo (const std::vector< Analyzer::Expr * > &target_expr_list, const bool is_group_by) const
 
llvm::Value * convertNullIfAny (const SQLTypeInfo &arg_type, const TargetInfo &agg_info, llvm::Value *target)
 
bool codegenAggCalls (const std::tuple< llvm::Value *, llvm::Value * > &agg_out_ptr_w_idx, const std::vector< llvm::Value * > &agg_out_vec, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, DiamondCodegen &diamond_codegen)
 
llvm::Value * codegenWindowRowPointer (const Analyzer::WindowFunction *window_func, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, DiamondCodegen &diamond_codegen)
 
llvm::Value * codegenAggColumnPtr (llvm::Value *output_buffer_byte_stream, llvm::Value *out_row_idx, const std::tuple< llvm::Value *, llvm::Value * > &agg_out_ptr_w_idx, const QueryMemoryDescriptor &query_mem_desc, const size_t chosen_bytes, const size_t agg_out_off, const size_t target_idx)
 : returns the pointer to where the aggregation should be stored. More...
 
void codegenEstimator (std::stack< llvm::BasicBlock * > &array_loops, GroupByAndAggregate::DiamondCodegen &diamond_codegen, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &)
 
void codegenCountDistinct (const size_t target_idx, const Analyzer::Expr *target_expr, std::vector< llvm::Value * > &agg_args, const QueryMemoryDescriptor &, const ExecutorDeviceType)
 
llvm::Value * getAdditionalLiteral (const int32_t off)
 
std::vector< llvm::Value * > codegenAggArg (const Analyzer::Expr *target_expr, const CompilationOptions &co)
 
llvm::Value * emitCall (const std::string &fname, const std::vector< llvm::Value * > &args)
 
void checkErrorCode (llvm::Value *retCode)
 
bool needsUnnestDoublePatch (llvm::Value *val_ptr, const std::string &agg_base_name, const bool threads_share_memory, const CompilationOptions &co) const
 
void prependForceSync ()
 

Static Private Member Functions

static bool supportedExprForGpuSharedMemUsage (Analyzer::Expr *expr)
 
static int64_t getBucketedCardinality (const ColRangeInfo &col_range_info)
 

Private Attributes

Executorexecutor_
 
const RelAlgExecutionUnitra_exe_unit_
 
const std::vector
< InputTableInfo > & 
query_infos_
 
std::shared_ptr
< RowSetMemoryOwner
row_set_mem_owner_
 
bool output_columnar_
 
const ExecutorDeviceType device_type_
 

Friends

class Executor
 
class QueryMemoryDescriptor
 
struct TargetExprCodegen
 
struct TargetExprCodegenBuilder
 

Detailed Description

Definition at line 130 of file GroupByAndAggregate.h.

Constructor & Destructor Documentation

GroupByAndAggregate::GroupByAndAggregate ( Executor executor,
const ExecutorDeviceType  device_type,
const RelAlgExecutionUnit ra_exe_unit,
const std::vector< InputTableInfo > &  query_infos,
std::shared_ptr< RowSetMemoryOwner row_set_mem_owner 
)

Definition at line 234 of file GroupByAndAggregate.cpp.

References RelAlgExecutionUnit::groupby_exprs, kENCODING_DICT, and ra_exe_unit_.

240  : executor_(executor)
241  , ra_exe_unit_(ra_exe_unit)
242  , query_infos_(query_infos)
243  , row_set_mem_owner_(row_set_mem_owner)
244  , device_type_(device_type) {
245  for (const auto groupby_expr : ra_exe_unit_.groupby_exprs) {
246  if (!groupby_expr) {
247  continue;
248  }
249  const auto& groupby_ti = groupby_expr->get_type_info();
250  if (groupby_ti.is_string() && groupby_ti.get_compression() != kENCODING_DICT) {
251  throw std::runtime_error(
252  "Cannot group by string columns which are not dictionary encoded.");
253  }
254  if (groupby_ti.is_array()) {
255  throw std::runtime_error("Group by array not supported");
256  }
257  if (groupby_ti.is_geometry()) {
258  throw std::runtime_error("Group by geometry not supported");
259  }
260  }
261 }
std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner_
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
const std::vector< InputTableInfo > & query_infos_
const ExecutorDeviceType device_type_
const RelAlgExecutionUnit & ra_exe_unit_

Member Function Documentation

void GroupByAndAggregate::addTransientStringLiterals ( const RelAlgExecutionUnit ra_exe_unit,
Executor executor,
std::shared_ptr< RowSetMemoryOwner row_set_mem_owner 
)
static

Definition at line 479 of file GroupByAndAggregate.cpp.

References anonymous_namespace{GroupByAndAggregate.cpp}::add_transient_string_literals_for_expression(), RelAlgExecutionUnit::groupby_exprs, kENCODING_DICT, kSAMPLE, kSINGLE_VALUE, and RelAlgExecutionUnit::target_exprs.

482  {
483  for (const auto group_expr : ra_exe_unit.groupby_exprs) {
485  group_expr.get(), executor, row_set_mem_owner);
486  }
487  for (const auto target_expr : ra_exe_unit.target_exprs) {
488  const auto& target_type = target_expr->get_type_info();
489  if (target_type.is_string() && target_type.get_compression() != kENCODING_DICT) {
490  continue;
491  }
492  const auto agg_expr = dynamic_cast<const Analyzer::AggExpr*>(target_expr);
493  if (agg_expr) {
494  if (agg_expr->get_aggtype() == kSINGLE_VALUE ||
495  agg_expr->get_aggtype() == kSAMPLE) {
497  agg_expr->get_arg(), executor, row_set_mem_owner);
498  }
499  } else {
501  target_expr, executor, row_set_mem_owner);
502  }
503  }
504  row_set_mem_owner->addLiteralStringDictProxy(executor->lit_str_dict_proxy_);
505 }
std::vector< Analyzer::Expr * > target_exprs
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
void add_transient_string_literals_for_expression(const Analyzer::Expr *expr, Executor *executor, std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner)

+ Here is the call graph for this function:

void GroupByAndAggregate::addTransientStringLiterals ( )
private

Definition at line 413 of file GroupByAndAggregate.cpp.

References executor_, ra_exe_unit_, and row_set_mem_owner_.

Referenced by RelAlgExecutor::executeSort(), RelAlgExecutor::executeWorkUnit(), and initQueryMemoryDescriptorImpl().

413  {
415 }
std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner_
const RelAlgExecutionUnit & ra_exe_unit_

+ Here is the caller graph for this function:

void GroupByAndAggregate::checkErrorCode ( llvm::Value *  retCode)
private

Definition at line 1863 of file GroupByAndAggregate.cpp.

References executor_.

Referenced by TargetExprCodegen::codegen().

1863  {
1864  auto zero_const = llvm::ConstantInt::get(retCode->getType(), 0, true);
1865  auto rc_check_condition = executor_->cgen_state_->ir_builder_.CreateICmp(
1866  llvm::ICmpInst::ICMP_EQ, retCode, zero_const);
1867 
1868  executor_->cgen_state_->emitErrorCheck(rc_check_condition, retCode, "rc");
1869 }

+ Here is the caller graph for this function:

bool GroupByAndAggregate::codegen ( llvm::Value *  filter_result,
llvm::BasicBlock *  sc_false,
const QueryMemoryDescriptor query_mem_desc,
const CompilationOptions co 
)

Definition at line 908 of file GroupByAndAggregate.cpp.

References CHECK(), codegenAggCalls(), codegenEstimator(), codegenGroupBy(), GroupByAndAggregate::DiamondCodegen::cond_false_, CompilationOptions::device_type_, QueryMemoryDescriptor::didOutputColumnar(), RelAlgExecutionUnit::estimator, executor_, anonymous_namespace{GroupByAndAggregate.cpp}::get_agg_count(), get_arg_by_name(), get_int_type(), QueryMemoryDescriptor::getGroupbyColCount(), QueryMemoryDescriptor::getQueryDescriptionType(), GPU, RelAlgExecutionUnit::groupby_exprs, GroupByPerfectHash, RelAlgExecutionUnit::join_quals, LL_BUILDER, LL_CONTEXT, LL_INT, CodeGenerator::posArg(), prependForceSync(), Projection, query_mem_desc, ra_exe_unit_, ROW_FUNC, RelAlgExecutionUnit::target_exprs, use_streaming_top_n(), and QueryMemoryDescriptor::usesGetGroupValueFast().

911  {
912  CHECK(filter_result);
913 
914  bool can_return_error = false;
915  llvm::BasicBlock* filter_false{nullptr};
916 
917  {
918  const bool is_group_by = !ra_exe_unit_.groupby_exprs.empty();
919 
920  if (executor_->isArchMaxwell(co.device_type_)) {
922  }
923  DiamondCodegen filter_cfg(filter_result,
924  executor_,
925  !is_group_by || query_mem_desc.usesGetGroupValueFast(),
926  "filter",
927  nullptr,
928  false);
929  filter_false = filter_cfg.cond_false_;
930 
931  if (is_group_by) {
933  !use_streaming_top_n(ra_exe_unit_, query_mem_desc.didOutputColumnar())) {
934  const auto crt_matched = get_arg_by_name(ROW_FUNC, "crt_matched");
935  LL_BUILDER.CreateStore(LL_INT(int32_t(1)), crt_matched);
936  auto total_matched_ptr = get_arg_by_name(ROW_FUNC, "total_matched");
937  llvm::Value* old_total_matched_val{nullptr};
939  old_total_matched_val =
940  LL_BUILDER.CreateAtomicRMW(llvm::AtomicRMWInst::Add,
941  total_matched_ptr,
942  LL_INT(int32_t(1)),
943  llvm::AtomicOrdering::Monotonic);
944  } else {
945  old_total_matched_val = LL_BUILDER.CreateLoad(total_matched_ptr);
946  LL_BUILDER.CreateStore(
947  LL_BUILDER.CreateAdd(old_total_matched_val, LL_INT(int32_t(1))),
948  total_matched_ptr);
949  }
950  auto old_total_matched_ptr = get_arg_by_name(ROW_FUNC, "old_total_matched");
951  LL_BUILDER.CreateStore(old_total_matched_val, old_total_matched_ptr);
952  }
953 
954  auto agg_out_ptr_w_idx = codegenGroupBy(query_mem_desc, co, filter_cfg);
955  if (query_mem_desc.usesGetGroupValueFast() ||
956  query_mem_desc.getQueryDescriptionType() ==
958  if (query_mem_desc.getGroupbyColCount() > 1) {
959  filter_cfg.setChainToNext();
960  }
961  // Don't generate null checks if the group slot is guaranteed to be non-null,
962  // as it's the case for get_group_value_fast* family.
963  can_return_error =
964  codegenAggCalls(agg_out_ptr_w_idx, {}, query_mem_desc, co, filter_cfg);
965  } else {
966  {
967  llvm::Value* nullcheck_cond{nullptr};
968  if (query_mem_desc.didOutputColumnar()) {
969  nullcheck_cond = LL_BUILDER.CreateICmpSGE(std::get<1>(agg_out_ptr_w_idx),
970  LL_INT(int32_t(0)));
971  } else {
972  nullcheck_cond = LL_BUILDER.CreateICmpNE(
973  std::get<0>(agg_out_ptr_w_idx),
974  llvm::ConstantPointerNull::get(
975  llvm::PointerType::get(get_int_type(64, LL_CONTEXT), 0)));
976  }
977  DiamondCodegen nullcheck_cfg(
978  nullcheck_cond, executor_, false, "groupby_nullcheck", &filter_cfg, false);
979  codegenAggCalls(agg_out_ptr_w_idx, {}, query_mem_desc, co, filter_cfg);
980  }
981  can_return_error = true;
982  if (query_mem_desc.getQueryDescriptionType() ==
985  // Ignore rejection on pushing current row to top-K heap.
986  LL_BUILDER.CreateRet(LL_INT(int32_t(0)));
987  } else {
988  CodeGenerator code_generator(executor_);
989  LL_BUILDER.CreateRet(LL_BUILDER.CreateNeg(LL_BUILDER.CreateTrunc(
990  // TODO(alex): remove the trunc once pos is converted to 32 bits
991  code_generator.posArg(nullptr),
992  get_int_type(32, LL_CONTEXT))));
993  }
994  }
995  } else {
996  if (ra_exe_unit_.estimator) {
997  std::stack<llvm::BasicBlock*> array_loops;
998  codegenEstimator(array_loops, filter_cfg, query_mem_desc, co);
999  } else {
1000  auto arg_it = ROW_FUNC->arg_begin();
1001  std::vector<llvm::Value*> agg_out_vec;
1002  for (int32_t i = 0; i < get_agg_count(ra_exe_unit_.target_exprs); ++i) {
1003  agg_out_vec.push_back(&*arg_it++);
1004  }
1005  can_return_error = codegenAggCalls(std::make_tuple(nullptr, nullptr),
1006  agg_out_vec,
1007  query_mem_desc,
1008  co,
1009  filter_cfg);
1010  }
1011  }
1012  }
1013 
1014  if (ra_exe_unit_.join_quals.empty()) {
1015  executor_->cgen_state_->ir_builder_.CreateRet(LL_INT(int32_t(0)));
1016  } else if (sc_false) {
1017  const auto saved_insert_block = LL_BUILDER.GetInsertBlock();
1018  LL_BUILDER.SetInsertPoint(sc_false);
1019  LL_BUILDER.CreateBr(filter_false);
1020  LL_BUILDER.SetInsertPoint(saved_insert_block);
1021  }
1022 
1023  return can_return_error;
1024 }
void codegenEstimator(std::stack< llvm::BasicBlock * > &array_loops, GroupByAndAggregate::DiamondCodegen &diamond_codegen, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &)
std::vector< Analyzer::Expr * > target_exprs
bool use_streaming_top_n(const RelAlgExecutionUnit &ra_exe_unit, const bool output_columnar)
#define ROW_FUNC
#define LL_BUILDER
#define LL_CONTEXT
#define LL_INT(v)
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
CHECK(cgen_state)
llvm::Value * get_arg_by_name(llvm::Function *func, const std::string &name)
Definition: Execute.h:116
size_t getGroupbyColCount() const
const JoinQualsPerNestingLevel join_quals
std::tuple< llvm::Value *, llvm::Value * > codegenGroupBy(const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, DiamondCodegen &codegen)
const std::shared_ptr< Analyzer::Estimator > estimator
bool codegenAggCalls(const std::tuple< llvm::Value *, llvm::Value * > &agg_out_ptr_w_idx, const std::vector< llvm::Value * > &agg_out_vec, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, DiamondCodegen &diamond_codegen)
ExecutorDeviceType device_type_
QueryDescriptionType getQueryDescriptionType() const
int32_t get_agg_count(const std::vector< Analyzer::Expr * > &target_exprs)
const RelAlgExecutionUnit & ra_exe_unit_

+ Here is the call graph for this function:

std::vector< llvm::Value * > GroupByAndAggregate::codegenAggArg ( const Analyzer::Expr target_expr,
const CompilationOptions co 
)
private

Definition at line 1742 of file GroupByAndAggregate.cpp.

References CHECK(), CHECK_EQ, CodeGenerator::codegen(), executor_, get_int_type(), Analyzer::Expr::get_type_info(), kARRAY, kPOINT, kSAMPLE, log2_bytes(), and CodeGenerator::posArg().

Referenced by TargetExprCodegen::codegen(), and TargetExprCodegenBuilder::codegenMultiSlotSampleExpressions().

1744  {
1745  const auto agg_expr = dynamic_cast<const Analyzer::AggExpr*>(target_expr);
1746  // TODO(alex): handle arrays uniformly?
1747  CodeGenerator code_generator(executor_);
1748  if (target_expr) {
1749  const auto& target_ti = target_expr->get_type_info();
1750  if (target_ti.is_array() && !executor_->plan_state_->isLazyFetchColumn(target_expr)) {
1751  const auto target_lvs =
1752  agg_expr ? code_generator.codegen(agg_expr->get_arg(), true, co)
1753  : code_generator.codegen(
1754  target_expr, !executor_->plan_state_->allow_lazy_fetch_, co);
1755  if (target_ti.isChunkIteratorPackaging()) {
1756  // Something with the chunk transport is code that was generated from a source
1757  // other than an ARRAY[] expression
1758  CHECK_EQ(size_t(1), target_lvs.size());
1759  CHECK(!agg_expr || agg_expr->get_aggtype() == kSAMPLE);
1760  const auto i32_ty = get_int_type(32, executor_->cgen_state_->context_);
1761  const auto i8p_ty =
1762  llvm::PointerType::get(get_int_type(8, executor_->cgen_state_->context_), 0);
1763  const auto& elem_ti = target_ti.get_elem_type();
1764  return {
1765  executor_->cgen_state_->emitExternalCall(
1766  "array_buff",
1767  i8p_ty,
1768  {target_lvs.front(), code_generator.posArg(target_expr)}),
1769  executor_->cgen_state_->emitExternalCall(
1770  "array_size",
1771  i32_ty,
1772  {target_lvs.front(),
1773  code_generator.posArg(target_expr),
1774  executor_->cgen_state_->llInt(log2_bytes(elem_ti.get_logical_size()))})};
1775  } else if (target_ti.isStandardBufferPackaging()) {
1776  if (agg_expr) {
1777  throw std::runtime_error(
1778  "Using array[] operator as argument to an aggregate operator is not "
1779  "supported");
1780  }
1781  return {target_lvs[0], target_lvs[1]};
1782  }
1783  }
1784  if (target_ti.is_geometry() &&
1785  !executor_->plan_state_->isLazyFetchColumn(target_expr)) {
1786  auto generate_coord_lvs =
1787  [&](auto* selected_target_expr,
1788  bool const fetch_columns) -> std::vector<llvm::Value*> {
1789  const auto target_lvs =
1790  code_generator.codegen(selected_target_expr, fetch_columns, co);
1791  const auto geo_expr = dynamic_cast<const Analyzer::GeoExpr*>(target_expr);
1792  if (geo_expr) {
1793  CHECK_EQ(2 * static_cast<size_t>(target_ti.get_physical_coord_cols()),
1794  target_lvs.size());
1795  return target_lvs;
1796  }
1797  CHECK_EQ(static_cast<size_t>(target_ti.get_physical_coord_cols()),
1798  target_lvs.size());
1799 
1800  const auto i32_ty = get_int_type(32, executor_->cgen_state_->context_);
1801  const auto i8p_ty =
1802  llvm::PointerType::get(get_int_type(8, executor_->cgen_state_->context_), 0);
1803  std::vector<llvm::Value*> coords;
1804  size_t ctr = 0;
1805  for (const auto& target_lv : target_lvs) {
1806  // TODO(adb): consider adding a utility to sqltypes so we can get the types of
1807  // the physical coords cols based on the sqltype (e.g. TINYINT for col 0, INT
1808  // for col 1 for pols / mpolys, etc). Hardcoding for now. first array is the
1809  // coords array (TINYINT). Subsequent arrays are regular INT.
1810 
1811  const size_t elem_sz = ctr == 0 ? 1 : 4;
1812  ctr++;
1813  int32_t fixlen = -1;
1814  if (target_ti.get_type() == kPOINT) {
1815  const auto col_var = dynamic_cast<const Analyzer::ColumnVar*>(target_expr);
1816  if (col_var) {
1817  const auto coords_cd = executor_->getPhysicalColumnDescriptor(col_var, 1);
1818  if (coords_cd && coords_cd->columnType.get_type() == kARRAY) {
1819  fixlen = coords_cd->columnType.get_size();
1820  }
1821  }
1822  }
1823  if (fixlen > 0) {
1824  coords.push_back(executor_->cgen_state_->emitExternalCall(
1825  "fast_fixlen_array_buff",
1826  i8p_ty,
1827  {target_lv, code_generator.posArg(selected_target_expr)}));
1828  coords.push_back(executor_->cgen_state_->llInt(int64_t(fixlen)));
1829  continue;
1830  }
1831  coords.push_back(executor_->cgen_state_->emitExternalCall(
1832  "array_buff",
1833  i8p_ty,
1834  {target_lv, code_generator.posArg(selected_target_expr)}));
1835  coords.push_back(executor_->cgen_state_->emitExternalCall(
1836  "array_size",
1837  i32_ty,
1838  {target_lv,
1839  code_generator.posArg(selected_target_expr),
1840  executor_->cgen_state_->llInt(log2_bytes(elem_sz))}));
1841  }
1842  return coords;
1843  };
1844 
1845  if (agg_expr) {
1846  return generate_coord_lvs(agg_expr->get_arg(), true);
1847  } else {
1848  return generate_coord_lvs(target_expr,
1849  !executor_->plan_state_->allow_lazy_fetch_);
1850  }
1851  }
1852  }
1853  return agg_expr ? code_generator.codegen(agg_expr->get_arg(), true, co)
1854  : code_generator.codegen(
1855  target_expr, !executor_->plan_state_->allow_lazy_fetch_, co);
1856 }
#define CHECK_EQ(x, y)
Definition: Logger.h:201
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
CHECK(cgen_state)
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:78
uint32_t log2_bytes(const uint32_t bytes)
Definition: Execute.h:126

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool GroupByAndAggregate::codegenAggCalls ( const std::tuple< llvm::Value *, llvm::Value * > &  agg_out_ptr_w_idx,
const std::vector< llvm::Value * > &  agg_out_vec,
const QueryMemoryDescriptor query_mem_desc,
const CompilationOptions co,
DiamondCodegen diamond_codegen 
)
private

Definition at line 1497 of file GroupByAndAggregate.cpp.

References CHECK(), TargetExprCodegenBuilder::codegen(), QueryMemoryDescriptor::didOutputColumnar(), executor_, g_cluster, QueryMemoryDescriptor::getQueryDescriptionType(), LL_BUILDER, LL_CONTEXT, Projection, ra_exe_unit_, and RelAlgExecutionUnit::target_exprs.

Referenced by codegen().

1502  {
1503  auto agg_out_ptr_w_idx = agg_out_ptr_w_idx_in;
1504  // TODO(alex): unify the two cases, the output for non-group by queries
1505  // should be a contiguous buffer
1506  const bool is_group_by{std::get<0>(agg_out_ptr_w_idx)};
1507  bool can_return_error = false;
1508  if (is_group_by) {
1509  CHECK(agg_out_vec.empty());
1510  } else {
1511  CHECK(!agg_out_vec.empty());
1512  }
1513 
1514  // output buffer is casted into a byte stream to be able to handle data elements of
1515  // different sizes (only used when actual column width sizes are used)
1516  llvm::Value* output_buffer_byte_stream{nullptr};
1517  llvm::Value* out_row_idx{nullptr};
1518  if (query_mem_desc.didOutputColumnar() && !g_cluster &&
1520  output_buffer_byte_stream = LL_BUILDER.CreateBitCast(
1521  std::get<0>(agg_out_ptr_w_idx),
1522  llvm::PointerType::get(llvm::Type::getInt8Ty(LL_CONTEXT), 0));
1523  output_buffer_byte_stream->setName("out_buff_b_stream");
1524  CHECK(std::get<1>(agg_out_ptr_w_idx));
1525  out_row_idx = LL_BUILDER.CreateZExt(std::get<1>(agg_out_ptr_w_idx),
1526  llvm::Type::getInt64Ty(LL_CONTEXT));
1527  out_row_idx->setName("out_row_idx");
1528  }
1529 
1530  TargetExprCodegenBuilder target_builder(query_mem_desc, ra_exe_unit_, is_group_by);
1531  for (size_t target_idx = 0; target_idx < ra_exe_unit_.target_exprs.size();
1532  ++target_idx) {
1533  auto target_expr = ra_exe_unit_.target_exprs[target_idx];
1534  CHECK(target_expr);
1535 
1536  target_builder(target_expr, executor_, co);
1537  }
1538 
1539  target_builder.codegen(this,
1540  executor_,
1541  query_mem_desc,
1542  co,
1543  agg_out_ptr_w_idx,
1544  agg_out_vec,
1545  output_buffer_byte_stream,
1546  out_row_idx,
1547  diamond_codegen);
1548 
1549  for (auto target_expr : ra_exe_unit_.target_exprs) {
1550  CHECK(target_expr);
1551  executor_->plan_state_->isLazyFetchColumn(target_expr);
1552  }
1553 
1554  return can_return_error;
1555 }
std::vector< Analyzer::Expr * > target_exprs
#define LL_BUILDER
bool g_cluster
#define LL_CONTEXT
CHECK(cgen_state)
QueryDescriptionType getQueryDescriptionType() const
const RelAlgExecutionUnit & ra_exe_unit_

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

llvm::Value * GroupByAndAggregate::codegenAggColumnPtr ( llvm::Value *  output_buffer_byte_stream,
llvm::Value *  out_row_idx,
const std::tuple< llvm::Value *, llvm::Value * > &  agg_out_ptr_w_idx,
const QueryMemoryDescriptor query_mem_desc,
const size_t  chosen_bytes,
const size_t  agg_out_off,
const size_t  target_idx 
)
private

: returns the pointer to where the aggregation should be stored.

Definition at line 1560 of file GroupByAndAggregate.cpp.

References CHECK(), CHECK_EQ, QueryMemoryDescriptor::didOutputColumnar(), g_cluster, get_int_type(), QueryMemoryDescriptor::getColOffInBytes(), QueryMemoryDescriptor::getColOnlyOffInBytes(), QueryMemoryDescriptor::getQueryDescriptionType(), LL_BUILDER, LL_CONTEXT, LL_INT, Projection, and to_string().

Referenced by TargetExprCodegen::codegen(), and TargetExprCodegenBuilder::codegenMultiSlotSampleExpressions().

1567  {
1568  llvm::Value* agg_col_ptr{nullptr};
1569  if (query_mem_desc.didOutputColumnar()) {
1570  // TODO(Saman): remove the second columnar branch, and support all query description
1571  // types through the first branch. Then, input arguments should also be cleaned up
1572  if (!g_cluster &&
1574  CHECK(chosen_bytes == 1 || chosen_bytes == 2 || chosen_bytes == 4 ||
1575  chosen_bytes == 8);
1576  CHECK(output_buffer_byte_stream);
1577  CHECK(out_row_idx);
1578  uint32_t col_off = query_mem_desc.getColOffInBytes(agg_out_off);
1579  // multiplying by chosen_bytes, i.e., << log2(chosen_bytes)
1580  auto out_per_col_byte_idx =
1581  LL_BUILDER.CreateShl(out_row_idx, __builtin_ffs(chosen_bytes) - 1);
1582  auto byte_offset = LL_BUILDER.CreateAdd(out_per_col_byte_idx,
1583  LL_INT(static_cast<int64_t>(col_off)));
1584  byte_offset->setName("out_byte_off_target_" + std::to_string(target_idx));
1585  auto output_ptr = LL_BUILDER.CreateGEP(output_buffer_byte_stream, byte_offset);
1586  agg_col_ptr = LL_BUILDER.CreateBitCast(
1587  output_ptr,
1588  llvm::PointerType::get(get_int_type((chosen_bytes << 3), LL_CONTEXT), 0));
1589  agg_col_ptr->setName("out_ptr_target_" + std::to_string(target_idx));
1590  } else {
1591  uint32_t col_off = query_mem_desc.getColOffInBytes(agg_out_off);
1592  CHECK_EQ(size_t(0), col_off % chosen_bytes);
1593  col_off /= chosen_bytes;
1594  CHECK(std::get<1>(agg_out_ptr_w_idx));
1595  auto offset = LL_BUILDER.CreateAdd(std::get<1>(agg_out_ptr_w_idx), LL_INT(col_off));
1596  agg_col_ptr = LL_BUILDER.CreateGEP(
1597  LL_BUILDER.CreateBitCast(
1598  std::get<0>(agg_out_ptr_w_idx),
1599  llvm::PointerType::get(get_int_type((chosen_bytes << 3), LL_CONTEXT), 0)),
1600  offset);
1601  }
1602  } else {
1603  uint32_t col_off = query_mem_desc.getColOnlyOffInBytes(agg_out_off);
1604  CHECK_EQ(size_t(0), col_off % chosen_bytes);
1605  col_off /= chosen_bytes;
1606  agg_col_ptr = LL_BUILDER.CreateGEP(
1607  LL_BUILDER.CreateBitCast(
1608  std::get<0>(agg_out_ptr_w_idx),
1609  llvm::PointerType::get(get_int_type((chosen_bytes << 3), LL_CONTEXT), 0)),
1610  LL_INT(col_off));
1611  }
1612  CHECK(agg_col_ptr);
1613  return agg_col_ptr;
1614 }
#define CHECK_EQ(x, y)
Definition: Logger.h:201
#define LL_BUILDER
bool g_cluster
#define LL_CONTEXT
#define LL_INT(v)
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
std::string to_string(char const *&&v)
size_t getColOnlyOffInBytes(const size_t col_idx) const
CHECK(cgen_state)
QueryDescriptionType getQueryDescriptionType() const
size_t getColOffInBytes(const size_t col_idx) const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void GroupByAndAggregate::codegenCountDistinct ( const size_t  target_idx,
const Analyzer::Expr target_expr,
std::vector< llvm::Value * > &  agg_args,
const QueryMemoryDescriptor query_mem_desc,
const ExecutorDeviceType  device_type 
)
private

Definition at line 1665 of file GroupByAndAggregate.cpp.

References Bitmap, CHECK(), CHECK_EQ, emitCall(), executor_, g_bigint_count, get_int_type(), get_target_info(), Analyzer::Expr::get_type_info(), getAdditionalLiteral(), QueryMemoryDescriptor::getCountDistinctDescriptor(), GPU, Invalid, kAPPROX_COUNT_DISTINCT, LL_CONTEXT, and LL_INT.

Referenced by TargetExprCodegen::codegen().

1670  {
1671  const auto agg_info = get_target_info(target_expr, g_bigint_count);
1672  const auto& arg_ti =
1673  static_cast<const Analyzer::AggExpr*>(target_expr)->get_arg()->get_type_info();
1674  if (arg_ti.is_fp()) {
1675  agg_args.back() = executor_->cgen_state_->ir_builder_.CreateBitCast(
1676  agg_args.back(), get_int_type(64, executor_->cgen_state_->context_));
1677  }
1678  const auto& count_distinct_descriptor =
1679  query_mem_desc.getCountDistinctDescriptor(target_idx);
1680  CHECK(count_distinct_descriptor.impl_type_ != CountDistinctImplType::Invalid);
1681  if (agg_info.agg_kind == kAPPROX_COUNT_DISTINCT) {
1682  CHECK(count_distinct_descriptor.impl_type_ == CountDistinctImplType::Bitmap);
1683  agg_args.push_back(LL_INT(int32_t(count_distinct_descriptor.bitmap_sz_bits)));
1684  if (device_type == ExecutorDeviceType::GPU) {
1685  const auto base_dev_addr = getAdditionalLiteral(-1);
1686  const auto base_host_addr = getAdditionalLiteral(-2);
1687  agg_args.push_back(base_dev_addr);
1688  agg_args.push_back(base_host_addr);
1689  emitCall("agg_approximate_count_distinct_gpu", agg_args);
1690  } else {
1691  emitCall("agg_approximate_count_distinct", agg_args);
1692  }
1693  return;
1694  }
1695  std::string agg_fname{"agg_count_distinct"};
1696  if (count_distinct_descriptor.impl_type_ == CountDistinctImplType::Bitmap) {
1697  agg_fname += "_bitmap";
1698  agg_args.push_back(LL_INT(static_cast<int64_t>(count_distinct_descriptor.min_val)));
1699  }
1700  if (agg_info.skip_null_val) {
1701  auto null_lv = executor_->cgen_state_->castToTypeIn(
1702  (arg_ti.is_fp()
1703  ? static_cast<llvm::Value*>(executor_->cgen_state_->inlineFpNull(arg_ti))
1704  : static_cast<llvm::Value*>(executor_->cgen_state_->inlineIntNull(arg_ti))),
1705  64);
1706  null_lv = executor_->cgen_state_->ir_builder_.CreateBitCast(
1707  null_lv, get_int_type(64, executor_->cgen_state_->context_));
1708  agg_fname += "_skip_val";
1709  agg_args.push_back(null_lv);
1710  }
1711  if (device_type == ExecutorDeviceType::GPU) {
1712  CHECK(count_distinct_descriptor.impl_type_ == CountDistinctImplType::Bitmap);
1713  agg_fname += "_gpu";
1714  const auto base_dev_addr = getAdditionalLiteral(-1);
1715  const auto base_host_addr = getAdditionalLiteral(-2);
1716  agg_args.push_back(base_dev_addr);
1717  agg_args.push_back(base_host_addr);
1718  agg_args.push_back(LL_INT(int64_t(count_distinct_descriptor.sub_bitmap_count)));
1719  CHECK_EQ(size_t(0),
1720  count_distinct_descriptor.bitmapPaddedSizeBytes() %
1721  count_distinct_descriptor.sub_bitmap_count);
1722  agg_args.push_back(LL_INT(int64_t(count_distinct_descriptor.bitmapPaddedSizeBytes() /
1723  count_distinct_descriptor.sub_bitmap_count)));
1724  }
1725  if (count_distinct_descriptor.impl_type_ == CountDistinctImplType::Bitmap) {
1726  emitCall(agg_fname, agg_args);
1727  } else {
1728  executor_->cgen_state_->emitExternalCall(
1729  agg_fname, llvm::Type::getVoidTy(LL_CONTEXT), agg_args);
1730  }
1731 }
#define CHECK_EQ(x, y)
Definition: Logger.h:201
llvm::Value * getAdditionalLiteral(const int32_t off)
#define LL_CONTEXT
TargetInfo get_target_info(const PointerType target_expr, const bool bigint_count)
Definition: TargetInfo.h:66
#define LL_INT(v)
llvm::Value * emitCall(const std::string &fname, const std::vector< llvm::Value * > &args)
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
CHECK(cgen_state)
bool g_bigint_count
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:78
const CountDistinctDescriptor & getCountDistinctDescriptor(const size_t idx) const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void GroupByAndAggregate::codegenEstimator ( std::stack< llvm::BasicBlock * > &  array_loops,
GroupByAndAggregate::DiamondCodegen diamond_codegen,
const QueryMemoryDescriptor query_mem_desc,
const CompilationOptions co 
)
private

Definition at line 1616 of file GroupByAndAggregate.cpp.

References CHECK(), emitCall(), RelAlgExecutionUnit::estimator, executor_, get_int_type(), QueryMemoryDescriptor::getEffectiveKeyWidth(), LL_BUILDER, LL_CONTEXT, LL_INT, ra_exe_unit_, and ROW_FUNC.

Referenced by codegen().

1620  {
1621  const auto& estimator_arg = ra_exe_unit_.estimator->getArgument();
1622  auto estimator_comp_count_lv = LL_INT(static_cast<int32_t>(estimator_arg.size()));
1623  auto estimator_key_lv = LL_BUILDER.CreateAlloca(llvm::Type::getInt64Ty(LL_CONTEXT),
1624  estimator_comp_count_lv);
1625  int32_t subkey_idx = 0;
1626  for (const auto estimator_arg_comp : estimator_arg) {
1627  const auto estimator_arg_comp_lvs =
1628  executor_->groupByColumnCodegen(estimator_arg_comp.get(),
1629  query_mem_desc.getEffectiveKeyWidth(),
1630  co,
1631  false,
1632  0,
1633  diamond_codegen,
1634  array_loops,
1635  true);
1636  CHECK(!estimator_arg_comp_lvs.original_value);
1637  const auto estimator_arg_comp_lv = estimator_arg_comp_lvs.translated_value;
1638  // store the sub-key to the buffer
1639  LL_BUILDER.CreateStore(estimator_arg_comp_lv,
1640  LL_BUILDER.CreateGEP(estimator_key_lv, LL_INT(subkey_idx++)));
1641  }
1642  const auto int8_ptr_ty = llvm::PointerType::get(get_int_type(8, LL_CONTEXT), 0);
1643  const auto bitmap = LL_BUILDER.CreateBitCast(&*ROW_FUNC->arg_begin(), int8_ptr_ty);
1644  const auto key_bytes = LL_BUILDER.CreateBitCast(estimator_key_lv, int8_ptr_ty);
1645  const auto estimator_comp_bytes_lv =
1646  LL_INT(static_cast<int32_t>(estimator_arg.size() * sizeof(int64_t)));
1647  const auto bitmap_size_lv =
1648  LL_INT(static_cast<uint32_t>(ra_exe_unit_.estimator->getBufferSize()));
1649  emitCall(ra_exe_unit_.estimator->getRuntimeFunctionName(),
1650  {bitmap, &*bitmap_size_lv, key_bytes, &*estimator_comp_bytes_lv});
1651 }
#define ROW_FUNC
#define LL_BUILDER
#define LL_CONTEXT
#define LL_INT(v)
llvm::Value * emitCall(const std::string &fname, const std::vector< llvm::Value * > &args)
size_t getEffectiveKeyWidth() const
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
CHECK(cgen_state)
const std::shared_ptr< Analyzer::Estimator > estimator
const RelAlgExecutionUnit & ra_exe_unit_

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::tuple< llvm::Value *, llvm::Value * > GroupByAndAggregate::codegenGroupBy ( const QueryMemoryDescriptor query_mem_desc,
const CompilationOptions co,
DiamondCodegen codegen 
)
private

Definition at line 1117 of file GroupByAndAggregate.cpp.

References CHECK(), CHECK_EQ, codegenMultiColumnBaselineHash(), codegenMultiColumnPerfectHash(), codegenOutputSlot(), codegenSingleColumnPerfectHash(), QueryMemoryDescriptor::didOutputColumnar(), executor_, QueryMemoryDescriptor::getBucket(), QueryMemoryDescriptor::getEffectiveKeyWidth(), getExprRangeInfo(), QueryMemoryDescriptor::getGroupbyColCount(), QueryMemoryDescriptor::getMaxVal(), QueryMemoryDescriptor::getQueryDescriptionType(), QueryMemoryDescriptor::getRowSize(), RelAlgExecutionUnit::groupby_exprs, GroupByBaselineHash, GroupByPerfectHash, groups_buffer, QueryMemoryDescriptor::hasNulls(), QueryMemoryDescriptor::isSingleColumnGroupByWithPerfectHash(), LL_BUILDER, LL_CONTEXT, LL_INT, Projection, ra_exe_unit_, ROW_FUNC, and QueryMemoryDescriptor::threadsShareMemory().

Referenced by codegen().

1120  {
1121  auto arg_it = ROW_FUNC->arg_begin();
1122  auto groups_buffer = arg_it++;
1123 
1124  std::stack<llvm::BasicBlock*> array_loops;
1125 
1126  // TODO(Saman): move this logic outside of this function.
1128  if (query_mem_desc.didOutputColumnar()) {
1129  return std::make_tuple(
1130  &*groups_buffer,
1131  codegenOutputSlot(&*groups_buffer, query_mem_desc, co, diamond_codegen));
1132  } else {
1133  return std::make_tuple(
1134  codegenOutputSlot(&*groups_buffer, query_mem_desc, co, diamond_codegen),
1135  nullptr);
1136  }
1137  }
1138 
1139  CHECK(query_mem_desc.getQueryDescriptionType() ==
1141  query_mem_desc.getQueryDescriptionType() ==
1143 
1144  const int32_t row_size_quad = query_mem_desc.didOutputColumnar()
1145  ? 0
1146  : query_mem_desc.getRowSize() / sizeof(int64_t);
1147 
1148  const auto col_width_size = query_mem_desc.isSingleColumnGroupByWithPerfectHash()
1149  ? sizeof(int64_t)
1150  : query_mem_desc.getEffectiveKeyWidth();
1151  // for multi-column group by
1152  llvm::Value* group_key = nullptr;
1153  llvm::Value* key_size_lv = nullptr;
1154 
1155  if (!query_mem_desc.isSingleColumnGroupByWithPerfectHash()) {
1156  key_size_lv = LL_INT(static_cast<int32_t>(query_mem_desc.getGroupbyColCount()));
1157  if (query_mem_desc.getQueryDescriptionType() ==
1159  group_key =
1160  LL_BUILDER.CreateAlloca(llvm::Type::getInt64Ty(LL_CONTEXT), key_size_lv);
1161  } else if (query_mem_desc.getQueryDescriptionType() ==
1163  group_key =
1164  col_width_size == sizeof(int32_t)
1165  ? LL_BUILDER.CreateAlloca(llvm::Type::getInt32Ty(LL_CONTEXT), key_size_lv)
1166  : LL_BUILDER.CreateAlloca(llvm::Type::getInt64Ty(LL_CONTEXT), key_size_lv);
1167  }
1168  CHECK(group_key);
1169  CHECK(key_size_lv);
1170  }
1171 
1172  int32_t subkey_idx = 0;
1173  CHECK(query_mem_desc.getGroupbyColCount() == ra_exe_unit_.groupby_exprs.size());
1174  for (const auto group_expr : ra_exe_unit_.groupby_exprs) {
1175  const auto col_range_info = getExprRangeInfo(group_expr.get());
1176  const auto translated_null_value = static_cast<int64_t>(
1177  query_mem_desc.isSingleColumnGroupByWithPerfectHash()
1178  ? checked_int64_t(query_mem_desc.getMaxVal()) +
1179  (query_mem_desc.getBucket() ? query_mem_desc.getBucket() : 1)
1180  : checked_int64_t(col_range_info.max) +
1181  (col_range_info.bucket ? col_range_info.bucket : 1));
1182 
1183  const bool col_has_nulls =
1184  query_mem_desc.getQueryDescriptionType() ==
1186  ? (query_mem_desc.isSingleColumnGroupByWithPerfectHash()
1187  ? query_mem_desc.hasNulls()
1188  : col_range_info.has_nulls)
1189  : false;
1190 
1191  const auto group_expr_lvs =
1192  executor_->groupByColumnCodegen(group_expr.get(),
1193  col_width_size,
1194  co,
1195  col_has_nulls,
1196  translated_null_value,
1197  diamond_codegen,
1198  array_loops,
1199  query_mem_desc.threadsShareMemory());
1200  const auto group_expr_lv = group_expr_lvs.translated_value;
1201  if (query_mem_desc.isSingleColumnGroupByWithPerfectHash()) {
1202  CHECK_EQ(size_t(1), ra_exe_unit_.groupby_exprs.size());
1203  return codegenSingleColumnPerfectHash(query_mem_desc,
1204  co,
1205  &*groups_buffer,
1206  group_expr_lv,
1207  group_expr_lvs.original_value,
1208  row_size_quad);
1209  } else {
1210  // store the sub-key to the buffer
1211  LL_BUILDER.CreateStore(group_expr_lv,
1212  LL_BUILDER.CreateGEP(group_key, LL_INT(subkey_idx++)));
1213  }
1214  }
1215  if (query_mem_desc.getQueryDescriptionType() ==
1217  CHECK(ra_exe_unit_.groupby_exprs.size() != 1);
1219  &*groups_buffer, group_key, key_size_lv, query_mem_desc, row_size_quad);
1220  } else if (query_mem_desc.getQueryDescriptionType() ==
1223  &*groups_buffer,
1224  group_key,
1225  key_size_lv,
1226  query_mem_desc,
1227  col_width_size,
1228  row_size_quad);
1229  }
1230  CHECK(false);
1231  return std::make_tuple(nullptr, nullptr);
1232 }
#define CHECK_EQ(x, y)
Definition: Logger.h:201
#define ROW_FUNC
const int32_t groups_buffer_size return groups_buffer
#define LL_BUILDER
#define LL_CONTEXT
#define LL_INT(v)
size_t getEffectiveKeyWidth() const
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
std::tuple< llvm::Value *, llvm::Value * > codegenMultiColumnBaselineHash(const CompilationOptions &co, llvm::Value *groups_buffer, llvm::Value *group_key, llvm::Value *key_size_lv, const QueryMemoryDescriptor &query_mem_desc, const size_t key_width, const int32_t row_size_quad)
CHECK(cgen_state)
size_t getGroupbyColCount() const
std::tuple< llvm::Value *, llvm::Value * > codegenSingleColumnPerfectHash(const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, llvm::Value *groups_buffer, llvm::Value *group_expr_lv_translated, llvm::Value *group_expr_lv_original, const int32_t row_size_quad)
boost::multiprecision::number< boost::multiprecision::cpp_int_backend< 64, 64, boost::multiprecision::signed_magnitude, boost::multiprecision::checked, void > > checked_int64_t
std::tuple< llvm::Value *, llvm::Value * > codegenMultiColumnPerfectHash(llvm::Value *groups_buffer, llvm::Value *group_key, llvm::Value *key_size_lv, const QueryMemoryDescriptor &query_mem_desc, const int32_t row_size_quad)
QueryDescriptionType getQueryDescriptionType() const
bool isSingleColumnGroupByWithPerfectHash() const
llvm::Value * codegenOutputSlot(llvm::Value *groups_buffer, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, DiamondCodegen &diamond_codegen)
ColRangeInfo getExprRangeInfo(const Analyzer::Expr *expr) const
const RelAlgExecutionUnit & ra_exe_unit_

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::tuple< llvm::Value *, llvm::Value * > GroupByAndAggregate::codegenMultiColumnBaselineHash ( const CompilationOptions co,
llvm::Value *  groups_buffer,
llvm::Value *  group_key,
llvm::Value *  key_size_lv,
const QueryMemoryDescriptor query_mem_desc,
const size_t  key_width,
const int32_t  row_size_quad 
)
private

Definition at line 1318 of file GroupByAndAggregate.cpp.

References CHECK(), QueryMemoryDescriptor::didOutputColumnar(), emitCall(), QueryMemoryDescriptor::getEntryCount(), groups_buffer, LL_BUILDER, LL_CONTEXT, LL_INT, ROW_FUNC, and CompilationOptions::with_dynamic_watchdog_.

Referenced by codegenGroupBy().

1325  {
1326  auto arg_it = ROW_FUNC->arg_begin(); // groups_buffer
1327  ++arg_it; // current match count
1328  ++arg_it; // total match count
1329  ++arg_it; // old match count
1330  ++arg_it; // output buffer slots count
1331  ++arg_it; // aggregate init values
1332  CHECK(arg_it->getName() == "agg_init_val");
1333  if (group_key->getType() != llvm::Type::getInt64PtrTy(LL_CONTEXT)) {
1334  CHECK(key_width == sizeof(int32_t));
1335  group_key =
1336  LL_BUILDER.CreatePointerCast(group_key, llvm::Type::getInt64PtrTy(LL_CONTEXT));
1337  }
1338  std::vector<llvm::Value*> func_args{
1339  groups_buffer,
1340  LL_INT(static_cast<int32_t>(query_mem_desc.getEntryCount())),
1341  &*group_key,
1342  &*key_size_lv,
1343  LL_INT(static_cast<int32_t>(key_width))};
1344  std::string func_name{"get_group_value"};
1345  if (query_mem_desc.didOutputColumnar()) {
1346  func_name += "_columnar_slot";
1347  } else {
1348  func_args.push_back(LL_INT(row_size_quad));
1349  func_args.push_back(&*arg_it);
1350  }
1351  if (co.with_dynamic_watchdog_) {
1352  func_name += "_with_watchdog";
1353  }
1354  if (query_mem_desc.didOutputColumnar()) {
1355  return std::make_tuple(groups_buffer, emitCall(func_name, func_args));
1356  } else {
1357  return std::make_tuple(emitCall(func_name, func_args), nullptr);
1358  }
1359 }
#define ROW_FUNC
const int32_t groups_buffer_size return groups_buffer
#define LL_BUILDER
#define LL_CONTEXT
#define LL_INT(v)
llvm::Value * emitCall(const std::string &fname, const std::vector< llvm::Value * > &args)
CHECK(cgen_state)
const bool with_dynamic_watchdog_

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::tuple< llvm::Value *, llvm::Value * > GroupByAndAggregate::codegenMultiColumnPerfectHash ( llvm::Value *  groups_buffer,
llvm::Value *  group_key,
llvm::Value *  key_size_lv,
const QueryMemoryDescriptor query_mem_desc,
const int32_t  row_size_quad 
)
private

Definition at line 1284 of file GroupByAndAggregate.cpp.

References CHECK(), codegenPerfectHashFunction(), QueryMemoryDescriptor::didOutputColumnar(), emitCall(), get_int_type(), QueryMemoryDescriptor::getEntryCount(), QueryMemoryDescriptor::getQueryDescriptionType(), GroupByPerfectHash, groups_buffer, LL_BUILDER, LL_CONTEXT, and LL_INT.

Referenced by codegenGroupBy().

1289  {
1290  CHECK(query_mem_desc.getQueryDescriptionType() ==
1292  // compute the index (perfect hash)
1293  auto perfect_hash_func = codegenPerfectHashFunction();
1294  auto hash_lv =
1295  LL_BUILDER.CreateCall(perfect_hash_func, std::vector<llvm::Value*>{group_key});
1296 
1297  if (query_mem_desc.didOutputColumnar()) {
1298  const std::string set_matching_func_name{
1299  "set_matching_group_value_perfect_hash_columnar"};
1300  const std::vector<llvm::Value*> set_matching_func_arg{
1301  groups_buffer,
1302  hash_lv,
1303  group_key,
1304  key_size_lv,
1305  llvm::ConstantInt::get(get_int_type(32, LL_CONTEXT),
1306  query_mem_desc.getEntryCount())};
1307  emitCall(set_matching_func_name, set_matching_func_arg);
1308  return std::make_tuple(groups_buffer, hash_lv);
1309  } else {
1310  return std::make_tuple(
1311  emitCall("get_matching_group_value_perfect_hash",
1312  {groups_buffer, hash_lv, group_key, key_size_lv, LL_INT(row_size_quad)}),
1313  nullptr);
1314  }
1315 }
const int32_t groups_buffer_size return groups_buffer
#define LL_BUILDER
#define LL_CONTEXT
#define LL_INT(v)
llvm::Value * emitCall(const std::string &fname, const std::vector< llvm::Value * > &args)
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
CHECK(cgen_state)
QueryDescriptionType getQueryDescriptionType() const
llvm::Function * codegenPerfectHashFunction()

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

llvm::Value * GroupByAndAggregate::codegenOutputSlot ( llvm::Value *  groups_buffer,
const QueryMemoryDescriptor query_mem_desc,
const CompilationOptions co,
DiamondCodegen diamond_codegen 
)
private

Definition at line 1026 of file GroupByAndAggregate.cpp.

References run_benchmark_import::args, CHECK(), CHECK_EQ, CHECK_GE, CHECK_LT, CodeGenerator::codegen(), QueryMemoryDescriptor::didOutputColumnar(), emitCall(), executor_, get_arg_by_name(), get_heap_key_slot_index(), QueryMemoryDescriptor::getColOffInBytes(), QueryMemoryDescriptor::getEntryCount(), QueryMemoryDescriptor::getPaddedSlotWidthBytes(), QueryMemoryDescriptor::getQueryDescriptionType(), QueryMemoryDescriptor::getRowSize(), RelAlgExecutionUnit::groupby_exprs, groups_buffer, inline_fp_null_val(), inline_int_null_val(), SortInfo::limit, LL_BOOL, LL_BUILDER, LL_FP, LL_INT, SortInfo::offset, SortInfo::order_entries, CodeGenerator::posArg(), Projection, ra_exe_unit_, ROW_FUNC, RelAlgExecutionUnit::sort_info, RelAlgExecutionUnit::target_exprs, to_string(), RelAlgExecutionUnit::use_bump_allocator, and use_streaming_top_n().

Referenced by codegenGroupBy(), and codegenWindowRowPointer().

1030  {
1032  CHECK_EQ(size_t(1), ra_exe_unit_.groupby_exprs.size());
1033  const auto group_expr = ra_exe_unit_.groupby_exprs.front();
1034  CHECK(!group_expr);
1035  if (!query_mem_desc.didOutputColumnar()) {
1036  CHECK_EQ(size_t(0), query_mem_desc.getRowSize() % sizeof(int64_t));
1037  }
1038  const int32_t row_size_quad = query_mem_desc.didOutputColumnar()
1039  ? 0
1040  : query_mem_desc.getRowSize() / sizeof(int64_t);
1041  CodeGenerator code_generator(executor_);
1042  if (use_streaming_top_n(ra_exe_unit_, query_mem_desc.didOutputColumnar())) {
1043  const auto& only_order_entry = ra_exe_unit_.sort_info.order_entries.front();
1044  CHECK_GE(only_order_entry.tle_no, int(1));
1045  const size_t target_idx = only_order_entry.tle_no - 1;
1046  CHECK_LT(target_idx, ra_exe_unit_.target_exprs.size());
1047  const auto order_entry_expr = ra_exe_unit_.target_exprs[target_idx];
1048  const auto chosen_bytes =
1049  static_cast<size_t>(query_mem_desc.getPaddedSlotWidthBytes(target_idx));
1050  auto order_entry_lv = executor_->cgen_state_->castToTypeIn(
1051  code_generator.codegen(order_entry_expr, true, co).front(), chosen_bytes * 8);
1053  std::string fname = "get_bin_from_k_heap";
1054  const auto& oe_ti = order_entry_expr->get_type_info();
1055  llvm::Value* null_key_lv = nullptr;
1056  if (oe_ti.is_integer() || oe_ti.is_decimal() || oe_ti.is_time()) {
1057  const size_t bit_width = order_entry_lv->getType()->getIntegerBitWidth();
1058  switch (bit_width) {
1059  case 32:
1060  null_key_lv = LL_INT(static_cast<int32_t>(inline_int_null_val(oe_ti)));
1061  break;
1062  case 64:
1063  null_key_lv = LL_INT(static_cast<int64_t>(inline_int_null_val(oe_ti)));
1064  break;
1065  default:
1066  CHECK(false);
1067  }
1068  fname += "_int" + std::to_string(bit_width) + "_t";
1069  } else {
1070  CHECK(oe_ti.is_fp());
1071  if (order_entry_lv->getType()->isDoubleTy()) {
1072  null_key_lv = LL_FP(static_cast<double>(inline_fp_null_val(oe_ti)));
1073  } else {
1074  null_key_lv = LL_FP(static_cast<float>(inline_fp_null_val(oe_ti)));
1075  }
1076  fname += order_entry_lv->getType()->isDoubleTy() ? "_double" : "_float";
1077  }
1078  const auto key_slot_idx =
1080  return emitCall(
1081  fname,
1082  {groups_buffer,
1083  LL_INT(n),
1084  LL_INT(row_size_quad),
1085  LL_INT(static_cast<uint32_t>(query_mem_desc.getColOffInBytes(key_slot_idx))),
1086  LL_BOOL(only_order_entry.is_desc),
1087  LL_BOOL(!order_entry_expr->get_type_info().get_notnull()),
1088  LL_BOOL(only_order_entry.nulls_first),
1089  null_key_lv,
1090  order_entry_lv});
1091  } else {
1092  llvm::Value* output_buffer_entry_count_lv{nullptr};
1094  output_buffer_entry_count_lv =
1095  LL_BUILDER.CreateLoad(get_arg_by_name(ROW_FUNC, "max_matched"));
1096  CHECK(output_buffer_entry_count_lv);
1097  }
1098  const auto group_expr_lv =
1099  LL_BUILDER.CreateLoad(get_arg_by_name(ROW_FUNC, "old_total_matched"));
1100  std::vector<llvm::Value*> args{
1101  groups_buffer,
1102  output_buffer_entry_count_lv
1103  ? output_buffer_entry_count_lv
1104  : LL_INT(static_cast<int32_t>(query_mem_desc.getEntryCount())),
1105  group_expr_lv,
1106  code_generator.posArg(nullptr)};
1107  if (query_mem_desc.didOutputColumnar()) {
1108  const auto columnar_output_offset =
1109  emitCall("get_columnar_scan_output_offset", args);
1110  return columnar_output_offset;
1111  }
1112  args.push_back(LL_INT(row_size_quad));
1113  return emitCall("get_scan_output_slot", args);
1114  }
1115 }
std::vector< Analyzer::Expr * > target_exprs
#define CHECK_EQ(x, y)
Definition: Logger.h:201
bool use_streaming_top_n(const RelAlgExecutionUnit &ra_exe_unit, const bool output_columnar)
#define ROW_FUNC
const int32_t groups_buffer_size return groups_buffer
#define LL_BUILDER
const std::list< Analyzer::OrderEntry > order_entries
#define LL_INT(v)
#define CHECK_GE(x, y)
Definition: Logger.h:206
llvm::Value * emitCall(const std::string &fname, const std::vector< llvm::Value * > &args)
size_t get_heap_key_slot_index(const std::vector< Analyzer::Expr * > &target_exprs, const size_t target_idx)
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
double inline_fp_null_val(const SQL_TYPE_INFO &ti)
std::string to_string(char const *&&v)
#define LL_BOOL(v)
const size_t limit
CHECK(cgen_state)
llvm::Value * get_arg_by_name(llvm::Function *func, const std::string &name)
Definition: Execute.h:116
const SortInfo sort_info
#define LL_FP(v)
const int8_t getPaddedSlotWidthBytes(const size_t slot_idx) const
QueryDescriptionType getQueryDescriptionType() const
#define CHECK_LT(x, y)
Definition: Logger.h:203
int64_t inline_int_null_val(const SQL_TYPE_INFO &ti)
const RelAlgExecutionUnit & ra_exe_unit_
const size_t offset
size_t getColOffInBytes(const size_t col_idx) const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

llvm::Function * GroupByAndAggregate::codegenPerfectHashFunction ( )
private

Definition at line 1361 of file GroupByAndAggregate.cpp.

References CHECK(), CHECK_GT, executor_, get_int_type(), getBucketedCardinality(), getExprRangeInfo(), RelAlgExecutionUnit::groupby_exprs, GroupByPerfectHash, LL_CONTEXT, LL_INT, mark_function_always_inline(), and ra_exe_unit_.

Referenced by codegenMultiColumnPerfectHash().

1361  {
1362  CHECK_GT(ra_exe_unit_.groupby_exprs.size(), size_t(1));
1363  auto ft = llvm::FunctionType::get(
1364  get_int_type(32, LL_CONTEXT),
1365  std::vector<llvm::Type*>{llvm::PointerType::get(get_int_type(64, LL_CONTEXT), 0)},
1366  false);
1367  auto key_hash_func = llvm::Function::Create(ft,
1368  llvm::Function::ExternalLinkage,
1369  "perfect_key_hash",
1370  executor_->cgen_state_->module_);
1371  executor_->cgen_state_->helper_functions_.push_back(key_hash_func);
1372  mark_function_always_inline(key_hash_func);
1373  auto& key_buff_arg = *key_hash_func->args().begin();
1374  llvm::Value* key_buff_lv = &key_buff_arg;
1375  auto bb = llvm::BasicBlock::Create(LL_CONTEXT, "entry", key_hash_func);
1376  llvm::IRBuilder<> key_hash_func_builder(bb);
1377  llvm::Value* hash_lv{llvm::ConstantInt::get(get_int_type(64, LL_CONTEXT), 0)};
1378  std::vector<int64_t> cardinalities;
1379  for (const auto groupby_expr : ra_exe_unit_.groupby_exprs) {
1380  auto col_range_info = getExprRangeInfo(groupby_expr.get());
1381  CHECK(col_range_info.hash_type_ == QueryDescriptionType::GroupByPerfectHash);
1382  cardinalities.push_back(getBucketedCardinality(col_range_info));
1383  }
1384  size_t dim_idx = 0;
1385  for (const auto groupby_expr : ra_exe_unit_.groupby_exprs) {
1386  auto key_comp_lv = key_hash_func_builder.CreateLoad(
1387  key_hash_func_builder.CreateGEP(key_buff_lv, LL_INT(dim_idx)));
1388  auto col_range_info = getExprRangeInfo(groupby_expr.get());
1389  auto crt_term_lv =
1390  key_hash_func_builder.CreateSub(key_comp_lv, LL_INT(col_range_info.min));
1391  if (col_range_info.bucket) {
1392  crt_term_lv =
1393  key_hash_func_builder.CreateSDiv(crt_term_lv, LL_INT(col_range_info.bucket));
1394  }
1395  for (size_t prev_dim_idx = 0; prev_dim_idx < dim_idx; ++prev_dim_idx) {
1396  crt_term_lv = key_hash_func_builder.CreateMul(crt_term_lv,
1397  LL_INT(cardinalities[prev_dim_idx]));
1398  }
1399  hash_lv = key_hash_func_builder.CreateAdd(hash_lv, crt_term_lv);
1400  ++dim_idx;
1401  }
1402  key_hash_func_builder.CreateRet(
1403  key_hash_func_builder.CreateTrunc(hash_lv, get_int_type(32, LL_CONTEXT)));
1404  return key_hash_func;
1405 }
static int64_t getBucketedCardinality(const ColRangeInfo &col_range_info)
#define LL_CONTEXT
void mark_function_always_inline(llvm::Function *func)
#define LL_INT(v)
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
#define CHECK_GT(x, y)
Definition: Logger.h:205
CHECK(cgen_state)
ColRangeInfo getExprRangeInfo(const Analyzer::Expr *expr) const
const RelAlgExecutionUnit & ra_exe_unit_

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::tuple< llvm::Value *, llvm::Value * > GroupByAndAggregate::codegenSingleColumnPerfectHash ( const QueryMemoryDescriptor query_mem_desc,
const CompilationOptions co,
llvm::Value *  groups_buffer,
llvm::Value *  group_expr_lv_translated,
llvm::Value *  group_expr_lv_original,
const int32_t  row_size_quad 
)
private

Definition at line 1235 of file GroupByAndAggregate.cpp.

References CHECK(), CompilationOptions::device_type_, QueryMemoryDescriptor::didOutputColumnar(), emitCall(), executor_, QueryMemoryDescriptor::getBucket(), QueryMemoryDescriptor::getMinVal(), groups_buffer, QueryMemoryDescriptor::hasKeylessHash(), QueryMemoryDescriptor::interleavedBins(), LL_INT, QueryMemoryDescriptor::mustUseBaselineSort(), and QueryMemoryDescriptor::usesGetGroupValueFast().

Referenced by codegenGroupBy().

1241  {
1242  CHECK(query_mem_desc.usesGetGroupValueFast());
1243  std::string get_group_fn_name{query_mem_desc.didOutputColumnar()
1244  ? "get_columnar_group_bin_offset"
1245  : "get_group_value_fast"};
1246  if (!query_mem_desc.didOutputColumnar() && query_mem_desc.hasKeylessHash()) {
1247  get_group_fn_name += "_keyless";
1248  }
1249  if (query_mem_desc.interleavedBins(co.device_type_)) {
1250  CHECK(!query_mem_desc.didOutputColumnar());
1251  CHECK(query_mem_desc.hasKeylessHash());
1252  get_group_fn_name += "_semiprivate";
1253  }
1254  std::vector<llvm::Value*> get_group_fn_args{&*groups_buffer,
1255  &*group_expr_lv_translated};
1256  if (group_expr_lv_original && get_group_fn_name == "get_group_value_fast" &&
1257  query_mem_desc.mustUseBaselineSort()) {
1258  get_group_fn_name += "_with_original_key";
1259  get_group_fn_args.push_back(group_expr_lv_original);
1260  }
1261  get_group_fn_args.push_back(LL_INT(query_mem_desc.getMinVal()));
1262  get_group_fn_args.push_back(LL_INT(query_mem_desc.getBucket()));
1263  if (!query_mem_desc.hasKeylessHash()) {
1264  if (!query_mem_desc.didOutputColumnar()) {
1265  get_group_fn_args.push_back(LL_INT(row_size_quad));
1266  }
1267  } else {
1268  if (!query_mem_desc.didOutputColumnar()) {
1269  get_group_fn_args.push_back(LL_INT(row_size_quad));
1270  }
1271  if (query_mem_desc.interleavedBins(co.device_type_)) {
1272  auto warp_idx = emitCall("thread_warp_idx", {LL_INT(executor_->warpSize())});
1273  get_group_fn_args.push_back(warp_idx);
1274  get_group_fn_args.push_back(LL_INT(executor_->warpSize()));
1275  }
1276  }
1277  if (get_group_fn_name == "get_columnar_group_bin_offset") {
1278  return std::make_tuple(&*groups_buffer,
1279  emitCall(get_group_fn_name, get_group_fn_args));
1280  }
1281  return std::make_tuple(emitCall(get_group_fn_name, get_group_fn_args), nullptr);
1282 }
const int32_t groups_buffer_size return groups_buffer
#define LL_INT(v)
llvm::Value * emitCall(const std::string &fname, const std::vector< llvm::Value * > &args)
CHECK(cgen_state)
ExecutorDeviceType device_type_
bool interleavedBins(const ExecutorDeviceType) const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

llvm::Value * GroupByAndAggregate::codegenWindowRowPointer ( const Analyzer::WindowFunction window_func,
const QueryMemoryDescriptor query_mem_desc,
const CompilationOptions co,
DiamondCodegen diamond_codegen 
)
private

Definition at line 1457 of file GroupByAndAggregate.cpp.

References run_benchmark_import::args, CHECK(), codegenOutputSlot(), COUNT, QueryMemoryDescriptor::didOutputColumnar(), emitCall(), executor_, get_int_type(), WindowProjectNodeContext::getActiveWindowFunctionContext(), QueryMemoryDescriptor::getEntryCount(), Analyzer::WindowFunction::getKind(), QueryMemoryDescriptor::getRowSize(), groups_buffer, LL_BUILDER, LL_CONTEXT, LL_INT, CodeGenerator::posArg(), ROW_FUNC, and window_function_is_aggregate().

Referenced by TargetExprCodegen::codegen().

1461  {
1462  const auto window_func_context =
1464  if (window_func_context && window_function_is_aggregate(window_func->getKind())) {
1465  const int32_t row_size_quad = query_mem_desc.didOutputColumnar()
1466  ? 0
1467  : query_mem_desc.getRowSize() / sizeof(int64_t);
1468  auto arg_it = ROW_FUNC->arg_begin();
1469  auto groups_buffer = arg_it++;
1470  CodeGenerator code_generator(executor_);
1471  if (!window_func_context->getRowNumber()) {
1472  CHECK(window_func->getKind() == SqlWindowFunctionKind::COUNT);
1473  window_func_context->setRowNumber(emitCall(
1474  "row_number_window_func",
1475  {LL_INT(reinterpret_cast<const int64_t>(window_func_context->output())),
1476  code_generator.posArg(nullptr)}));
1477  }
1478  const auto pos_in_window = LL_BUILDER.CreateTrunc(window_func_context->getRowNumber(),
1479  get_int_type(32, LL_CONTEXT));
1480  llvm::Value* entry_count_lv =
1481  LL_INT(static_cast<int32_t>(query_mem_desc.getEntryCount()));
1482  std::vector<llvm::Value*> args{
1483  &*groups_buffer, entry_count_lv, pos_in_window, code_generator.posArg(nullptr)};
1484  if (query_mem_desc.didOutputColumnar()) {
1485  const auto columnar_output_offset =
1486  emitCall("get_columnar_scan_output_offset", args);
1487  return LL_BUILDER.CreateSExt(columnar_output_offset, get_int_type(64, LL_CONTEXT));
1488  }
1489  args.push_back(LL_INT(row_size_quad));
1490  return emitCall("get_scan_output_slot", args);
1491  }
1492  auto arg_it = ROW_FUNC->arg_begin();
1493  auto groups_buffer = arg_it++;
1494  return codegenOutputSlot(&*groups_buffer, query_mem_desc, co, diamond_codegen);
1495 }
SqlWindowFunctionKind getKind() const
Definition: Analyzer.h:1396
#define ROW_FUNC
const int32_t groups_buffer_size return groups_buffer
#define LL_BUILDER
#define LL_CONTEXT
#define LL_INT(v)
llvm::Value * emitCall(const std::string &fname, const std::vector< llvm::Value * > &args)
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
CHECK(cgen_state)
bool window_function_is_aggregate(const SqlWindowFunctionKind kind)
Definition: WindowContext.h:42
llvm::Value * codegenOutputSlot(llvm::Value *groups_buffer, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, DiamondCodegen &diamond_codegen)
static WindowFunctionContext * getActiveWindowFunctionContext()

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

llvm::Value * GroupByAndAggregate::convertNullIfAny ( const SQLTypeInfo arg_type,
const TargetInfo agg_info,
llvm::Value *  target 
)
private

Definition at line 1407 of file GroupByAndAggregate.cpp.

References TargetInfo::agg_kind, CHECK(), executor_, SQLTypeInfoCore< TYPE_FACET_PACK >::get_size(), SQLTypeInfoCore< TYPE_FACET_PACK >::is_fp(), kAPPROX_COUNT_DISTINCT, kCOUNT, LL_BUILDER, and TargetInfo::sql_type.

Referenced by TargetExprCodegen::codegen().

1409  {
1410  const auto& agg_type = agg_info.sql_type;
1411  const size_t chosen_bytes = agg_type.get_size();
1412 
1413  bool need_conversion{false};
1414  llvm::Value* arg_null{nullptr};
1415  llvm::Value* agg_null{nullptr};
1416  llvm::Value* target_to_cast{target};
1417  if (arg_type.is_fp()) {
1418  arg_null = executor_->cgen_state_->inlineFpNull(arg_type);
1419  if (agg_type.is_fp()) {
1420  agg_null = executor_->cgen_state_->inlineFpNull(agg_type);
1421  if (!static_cast<llvm::ConstantFP*>(arg_null)->isExactlyValue(
1422  static_cast<llvm::ConstantFP*>(agg_null)->getValueAPF())) {
1423  need_conversion = true;
1424  }
1425  } else {
1426  CHECK(agg_info.agg_kind == kCOUNT || agg_info.agg_kind == kAPPROX_COUNT_DISTINCT);
1427  return target;
1428  }
1429  } else {
1430  arg_null = executor_->cgen_state_->inlineIntNull(arg_type);
1431  if (agg_type.is_fp()) {
1432  agg_null = executor_->cgen_state_->inlineFpNull(agg_type);
1433  need_conversion = true;
1434  target_to_cast = executor_->castToFP(target);
1435  } else {
1436  agg_null = executor_->cgen_state_->inlineIntNull(agg_type);
1437  if ((static_cast<llvm::ConstantInt*>(arg_null)->getBitWidth() !=
1438  static_cast<llvm::ConstantInt*>(agg_null)->getBitWidth()) ||
1439  (static_cast<llvm::ConstantInt*>(arg_null)->getValue() !=
1440  static_cast<llvm::ConstantInt*>(agg_null)->getValue())) {
1441  need_conversion = true;
1442  }
1443  }
1444  }
1445  if (need_conversion) {
1446  auto cmp = arg_type.is_fp() ? LL_BUILDER.CreateFCmpOEQ(target, arg_null)
1447  : LL_BUILDER.CreateICmpEQ(target, arg_null);
1448  return LL_BUILDER.CreateSelect(
1449  cmp,
1450  agg_null,
1451  executor_->cgen_state_->castToTypeIn(target_to_cast, chosen_bytes << 3));
1452  } else {
1453  return target;
1454  }
1455 }
bool is_fp() const
Definition: sqltypes.h:481
#define LL_BUILDER
SQLTypeInfo sql_type
Definition: TargetInfo.h:42
HOST DEVICE int get_size() const
Definition: sqltypes.h:336
CHECK(cgen_state)
SQLAgg agg_kind
Definition: TargetInfo.h:41
Definition: sqldefs.h:76

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

llvm::Value * GroupByAndAggregate::emitCall ( const std::string &  fname,
const std::vector< llvm::Value * > &  args 
)
private

Definition at line 1858 of file GroupByAndAggregate.cpp.

References executor_.

Referenced by TargetExprCodegen::codegen(), codegenCountDistinct(), codegenEstimator(), codegenMultiColumnBaselineHash(), codegenMultiColumnPerfectHash(), codegenOutputSlot(), codegenSingleColumnPerfectHash(), and codegenWindowRowPointer().

1859  {
1860  return executor_->cgen_state_->emitCall(fname, args);
1861 }

+ Here is the caller graph for this function:

llvm::Value * GroupByAndAggregate::getAdditionalLiteral ( const int32_t  off)
private

Definition at line 1733 of file GroupByAndAggregate.cpp.

References CHECK_LT, get_arg_by_name(), get_int_type(), LL_BUILDER, LL_CONTEXT, LL_INT, and ROW_FUNC.

Referenced by codegenCountDistinct().

1733  {
1734  CHECK_LT(off, 0);
1735  const auto lit_buff_lv = get_arg_by_name(ROW_FUNC, "literals");
1736  return LL_BUILDER.CreateLoad(LL_BUILDER.CreateGEP(
1737  LL_BUILDER.CreateBitCast(lit_buff_lv,
1738  llvm::PointerType::get(get_int_type(64, LL_CONTEXT), 0)),
1739  LL_INT(off)));
1740 }
#define ROW_FUNC
#define LL_BUILDER
#define LL_CONTEXT
#define LL_INT(v)
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
llvm::Value * get_arg_by_name(llvm::Function *func, const std::string &name)
Definition: Execute.h:116
#define CHECK_LT(x, y)
Definition: Logger.h:203

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

int64_t GroupByAndAggregate::getBucketedCardinality ( const ColRangeInfo col_range_info)
staticprivate

Definition at line 217 of file GroupByAndAggregate.cpp.

References ColRangeInfo::bucket, ColRangeInfo::has_nulls, ColRangeInfo::max, and ColRangeInfo::min.

Referenced by codegenPerfectHashFunction(), and getColRangeInfo().

217  {
218  checked_int64_t crt_col_cardinality =
219  checked_int64_t(col_range_info.max) - checked_int64_t(col_range_info.min);
220  if (col_range_info.bucket) {
221  crt_col_cardinality /= col_range_info.bucket;
222  }
223  return static_cast<int64_t>(crt_col_cardinality +
224  (1 + (col_range_info.has_nulls ? 1 : 0)));
225 }
boost::multiprecision::number< boost::multiprecision::cpp_int_backend< 64, 64, boost::multiprecision::signed_magnitude, boost::multiprecision::checked, void > > checked_int64_t

+ Here is the caller graph for this function:

ColRangeInfo GroupByAndAggregate::getColRangeInfo ( )
private

Definition at line 118 of file GroupByAndAggregate.cpp.

References Executor::baseline_threshold, CHECK_GE, device_type_, executor_, anonymous_namespace{GroupByAndAggregate.cpp}::expr_is_rowid(), getBucketedCardinality(), getExprRangeInfo(), GPU, RelAlgExecutionUnit::groupby_exprs, GroupByBaselineHash, GroupByPerfectHash, anonymous_namespace{GroupByAndAggregate.cpp}::has_count_distinct(), anonymous_namespace{GroupByAndAggregate.cpp}::is_column_range_too_big_for_perfect_hash(), ra_exe_unit_, RelAlgExecutionUnit::simple_quals, and RelAlgExecutionUnit::target_exprs.

118  {
119  // Use baseline layout more eagerly on the GPU if the query uses count distinct,
120  // because our HyperLogLog implementation is 4x less memory efficient on GPU.
121  // Technically, this only applies to APPROX_COUNT_DISTINCT, but in practice we
122  // can expect this to be true anyway for grouped queries since the precise version
123  // uses significantly more memory.
124  const int64_t baseline_threshold =
129  if (ra_exe_unit_.groupby_exprs.size() != 1) {
130  try {
131  checked_int64_t cardinality{1};
132  bool has_nulls{false};
133  for (const auto groupby_expr : ra_exe_unit_.groupby_exprs) {
134  auto col_range_info = getExprRangeInfo(groupby_expr.get());
135  if (col_range_info.hash_type_ != QueryDescriptionType::GroupByPerfectHash) {
136  // going through baseline hash if a non-integer type is encountered
137  return {QueryDescriptionType::GroupByBaselineHash, 0, 0, 0, false};
138  }
139  auto crt_col_cardinality = getBucketedCardinality(col_range_info);
140  CHECK_GE(crt_col_cardinality, 0);
141  cardinality *= crt_col_cardinality;
142  if (col_range_info.has_nulls) {
143  has_nulls = true;
144  }
145  }
146  // For zero or high cardinalities, use baseline layout.
147  if (!cardinality || cardinality > baseline_threshold) {
148  return {QueryDescriptionType::GroupByBaselineHash, 0, 0, 0, false};
149  }
151  0,
152  int64_t(cardinality),
153  0,
154  has_nulls};
155  } catch (...) { // overflow when computing cardinality
156  return {QueryDescriptionType::GroupByBaselineHash, 0, 0, 0, false};
157  }
158  }
159  // For single column groupby on high timestamps, force baseline hash due to wide ranges
160  // we are likely to encounter when applying quals to the expression range
161  // TODO: consider allowing TIMESTAMP(9) (nanoseconds) with quals to use perfect hash if
162  // the range is small enough
163  if (ra_exe_unit_.groupby_exprs.front() &&
164  ra_exe_unit_.groupby_exprs.front()->get_type_info().is_high_precision_timestamp() &&
165  ra_exe_unit_.simple_quals.size() > 0) {
166  return {QueryDescriptionType::GroupByBaselineHash, 0, 0, 0, false};
167  }
168  const auto col_range_info = getExprRangeInfo(ra_exe_unit_.groupby_exprs.front().get());
169  if (!ra_exe_unit_.groupby_exprs.front()) {
170  return col_range_info;
171  }
172  static const int64_t MAX_BUFFER_SIZE = 1 << 30;
173  const int64_t col_count =
175  int64_t max_entry_count = MAX_BUFFER_SIZE / (col_count * sizeof(int64_t));
177  max_entry_count = std::min(max_entry_count, baseline_threshold);
178  }
179  if ((!ra_exe_unit_.groupby_exprs.front()->get_type_info().is_string() &&
180  !expr_is_rowid(ra_exe_unit_.groupby_exprs.front().get(), *executor_->catalog_)) &&
181  is_column_range_too_big_for_perfect_hash(col_range_info, max_entry_count) &&
182  !col_range_info.bucket) {
184  col_range_info.min,
185  col_range_info.max,
186  0,
187  col_range_info.has_nulls};
188  }
189  return col_range_info;
190 }
std::vector< Analyzer::Expr * > target_exprs
static int64_t getBucketedCardinality(const ColRangeInfo &col_range_info)
bool is_column_range_too_big_for_perfect_hash(const ColRangeInfo &col_range_info, const int64_t max_entry_count)
bool expr_is_rowid(const Analyzer::Expr *expr, const Catalog_Namespace::Catalog &cat)
static const size_t baseline_threshold
Definition: Execute.h:948
#define CHECK_GE(x, y)
Definition: Logger.h:206
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
bool has_count_distinct(const RelAlgExecutionUnit &ra_exe_unit)
boost::multiprecision::number< boost::multiprecision::cpp_int_backend< 64, 64, boost::multiprecision::signed_magnitude, boost::multiprecision::checked, void > > checked_int64_t
const ExecutorDeviceType device_type_
ColRangeInfo getExprRangeInfo(const Analyzer::Expr *expr) const
const RelAlgExecutionUnit & ra_exe_unit_
std::list< std::shared_ptr< Analyzer::Expr > > simple_quals

+ Here is the call graph for this function:

ColRangeInfo GroupByAndAggregate::getExprRangeInfo ( const Analyzer::Expr expr) const
private

Definition at line 192 of file GroupByAndAggregate.cpp.

References CHECK(), Double, executor_, Float, getExpressionRange(), GroupByBaselineHash, GroupByPerfectHash, Integer, Invalid, NonGroupedAggregate, Projection, query_infos_, ra_exe_unit_, and RelAlgExecutionUnit::simple_quals.

Referenced by codegenGroupBy(), codegenPerfectHashFunction(), getColRangeInfo(), gpuCanHandleOrderEntries(), and initCountDistinctDescriptors().

192  {
193  if (!expr) {
194  return {QueryDescriptionType::Projection, 0, 0, 0, false};
195  }
196 
197  const auto expr_range = getExpressionRange(
198  expr, query_infos_, executor_, boost::make_optional(ra_exe_unit_.simple_quals));
199  switch (expr_range.getType()) {
202  expr_range.getIntMin(),
203  expr_range.getIntMax(),
204  expr_range.getBucket(),
205  expr_range.hasNulls()};
210  default:
211  CHECK(false);
212  }
213  CHECK(false);
214  return {QueryDescriptionType::NonGroupedAggregate, 0, 0, 0, false};
215 }
CHECK(cgen_state)
ExpressionRange getExpressionRange(const Analyzer::BinOper *expr, const std::vector< InputTableInfo > &query_infos, const Executor *, boost::optional< std::list< std::shared_ptr< Analyzer::Expr >>> simple_quals)
const std::vector< InputTableInfo > & query_infos_
const RelAlgExecutionUnit & ra_exe_unit_
std::list< std::shared_ptr< Analyzer::Expr > > simple_quals

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

KeylessInfo GroupByAndAggregate::getKeylessInfo ( const std::vector< Analyzer::Expr * > &  target_expr_list,
const bool  is_group_by 
) const
private

This function goes through all target expressions and answers two questions:

  1. Is it possible to have keyless hash?
  2. If yes to 1, then what aggregate expression should be considered to represent the key's presence, if needed (e.g., in detecting empty entries in the result set).

NOTE: Keyless hash is only valid with single-column group by at the moment.

TODO(Saman): remove the shared memory discussion out of this function.

Currently just support shared memory usage when dealing with one keyless aggregate operation. Currently just support shared memory usage for up to two target expressions.

Definition at line 603 of file GroupByAndAggregate.cpp.

References agg_arg(), CHECK(), constrained_not_null(), Double, executor_, Float, g_bigint_count, get_agg_initial_val(), get_compact_type(), get_target_info(), getExpressionRange(), Integer, Invalid, is_distinct_target(), kAVG, kCOUNT, keyless, kMAX, kMIN, kSUM, RelAlgExecutionUnit::quals, query_infos_, ra_exe_unit_, supportedTypeForGpuSharedMemUsage(), and takes_float_argument().

605  {
606  bool keyless{true}, found{false}, shared_mem_support{false},
607  shared_mem_valid_data_type{true};
608  /* Currently support shared memory usage for a limited subset of possible aggregate
609  * operations. shared_mem_support and
610  * shared_mem_valid_data_type are declared to ensure such support. */
611  int32_t num_agg_expr{0}; // used for shared memory support on the GPU
612  int32_t index{0};
613  for (const auto target_expr : target_expr_list) {
614  const auto agg_info = get_target_info(target_expr, g_bigint_count);
615  const auto chosen_type = get_compact_type(agg_info);
616  // TODO(Saman): should be eventually removed, once I make sure what data types can
617  // be used in this shared memory setting.
618 
619  shared_mem_valid_data_type =
620  shared_mem_valid_data_type && supportedTypeForGpuSharedMemUsage(chosen_type);
621 
622  if (agg_info.is_agg) {
623  num_agg_expr++;
624  }
625  if (!found && agg_info.is_agg && !is_distinct_target(agg_info)) {
626  auto agg_expr = dynamic_cast<const Analyzer::AggExpr*>(target_expr);
627  CHECK(agg_expr);
628  const auto arg_expr = agg_arg(target_expr);
629  const bool float_argument_input = takes_float_argument(agg_info);
630  switch (agg_info.agg_kind) {
631  case kAVG:
632  ++index;
633  if (arg_expr && !arg_expr->get_type_info().get_notnull()) {
634  auto expr_range_info = getExpressionRange(arg_expr, query_infos_, executor_);
635  if (expr_range_info.getType() == ExpressionRangeType::Invalid ||
636  expr_range_info.hasNulls()) {
637  break;
638  }
639  }
640  found = true;
641  break;
642  case kCOUNT:
643  if (arg_expr && !arg_expr->get_type_info().get_notnull()) {
644  auto expr_range_info = getExpressionRange(arg_expr, query_infos_, executor_);
645  if (expr_range_info.getType() == ExpressionRangeType::Invalid ||
646  expr_range_info.hasNulls()) {
647  break;
648  }
649  }
650  found = true;
651  if (!agg_info.skip_null_val) {
652  shared_mem_support = true; // currently just support 8 bytes per group
653  }
654  break;
655  case kSUM: {
656  auto arg_ti = arg_expr->get_type_info();
657  if (constrained_not_null(arg_expr, ra_exe_unit_.quals)) {
658  arg_ti.set_notnull(true);
659  }
660  if (!arg_ti.get_notnull()) {
661  auto expr_range_info = getExpressionRange(arg_expr, query_infos_, executor_);
662  if (expr_range_info.getType() != ExpressionRangeType::Invalid &&
663  !expr_range_info.hasNulls()) {
664  found = true;
665  }
666  } else {
667  auto expr_range_info = getExpressionRange(arg_expr, query_infos_, executor_);
668  switch (expr_range_info.getType()) {
671  if (expr_range_info.getFpMax() < 0 || expr_range_info.getFpMin() > 0) {
672  found = true;
673  }
674  break;
676  if (expr_range_info.getIntMax() < 0 || expr_range_info.getIntMin() > 0) {
677  found = true;
678  }
679  break;
680  default:
681  break;
682  }
683  }
684  break;
685  }
686  case kMIN: {
687  CHECK(agg_expr && agg_expr->get_arg());
688  const auto& arg_ti = agg_expr->get_arg()->get_type_info();
689  if (arg_ti.is_string() || arg_ti.is_array()) {
690  break;
691  }
692  auto expr_range_info =
693  getExpressionRange(agg_expr->get_arg(), query_infos_, executor_);
694  auto init_max = get_agg_initial_val(agg_info.agg_kind,
695  chosen_type,
696  is_group_by || float_argument_input,
697  float_argument_input ? sizeof(float) : 8);
698  switch (expr_range_info.getType()) {
701  auto double_max =
702  *reinterpret_cast<const double*>(may_alias_ptr(&init_max));
703  if (expr_range_info.getFpMax() < double_max) {
704  found = true;
705  }
706  break;
707  }
709  if (expr_range_info.getIntMax() < init_max) {
710  found = true;
711  }
712  break;
713  default:
714  break;
715  }
716  break;
717  }
718  case kMAX: {
719  CHECK(agg_expr && agg_expr->get_arg());
720  const auto& arg_ti = agg_expr->get_arg()->get_type_info();
721  if (arg_ti.is_string() || arg_ti.is_array()) {
722  break;
723  }
724  auto expr_range_info =
725  getExpressionRange(agg_expr->get_arg(), query_infos_, executor_);
726  // NULL sentinel and init value for kMAX are identical, which results in
727  // ambiguity in detecting empty keys in presence of nulls.
728  if (expr_range_info.getType() == ExpressionRangeType::Invalid ||
729  expr_range_info.hasNulls()) {
730  break;
731  }
732  auto init_min = get_agg_initial_val(agg_info.agg_kind,
733  chosen_type,
734  is_group_by || float_argument_input,
735  float_argument_input ? sizeof(float) : 8);
736  switch (expr_range_info.getType()) {
739  auto double_min =
740  *reinterpret_cast<const double*>(may_alias_ptr(&init_min));
741  if (expr_range_info.getFpMin() > double_min) {
742  found = true;
743  }
744  break;
745  }
747  if (expr_range_info.getIntMin() > init_min) {
748  found = true;
749  }
750  break;
751  default:
752  break;
753  }
754  break;
755  }
756  default:
757  keyless = false;
758  break;
759  }
760  }
761  if (!keyless) {
762  break;
763  }
764  if (!found) {
765  ++index;
766  }
767  }
768 
769  // shouldn't use keyless for projection only
775  return {keyless && found,
776  index,
777  ((num_agg_expr == 1) && (target_expr_list.size() <= 2))
778  ? shared_mem_support && shared_mem_valid_data_type
779  : false};
780 }
const Analyzer::Expr * agg_arg(const Analyzer::Expr *expr)
bool constrained_not_null(const Analyzer::Expr *expr, const std::list< std::shared_ptr< Analyzer::Expr >> &quals)
TargetInfo get_target_info(const PointerType target_expr, const bool bigint_count)
Definition: TargetInfo.h:66
int64_t get_agg_initial_val(const SQLAgg agg, const SQLTypeInfo &ti, const bool enable_compaction, const unsigned min_byte_width_to_compact)
bool takes_float_argument(const TargetInfo &target_info)
Definition: TargetInfo.h:121
bool supportedTypeForGpuSharedMemUsage(const SQLTypeInfo &target_type_info) const
Definition: sqldefs.h:73
const SQLTypeInfo get_compact_type(const TargetInfo &target)
CHECK(cgen_state)
bool g_bigint_count
Definition: sqldefs.h:75
bool is_distinct_target(const TargetInfo &target_info)
Definition: TargetInfo.h:117
ExpressionRange getExpressionRange(const Analyzer::BinOper *expr, const std::vector< InputTableInfo > &query_infos, const Executor *, boost::optional< std::list< std::shared_ptr< Analyzer::Expr >>> simple_quals)
const std::vector< InputTableInfo > & query_infos_
Definition: sqldefs.h:76
std::list< std::shared_ptr< Analyzer::Expr > > quals
const RelAlgExecutionUnit & ra_exe_unit_
Definition: sqldefs.h:74
Definition: sqldefs.h:72
const int64_t const uint32_t const uint32_t const uint32_t const bool keyless

+ Here is the call graph for this function:

int64_t GroupByAndAggregate::getShardedTopBucket ( const ColRangeInfo col_range_info,
const size_t  shard_count 
) const
private

Definition at line 263 of file GroupByAndAggregate.cpp.

References ColRangeInfo::bucket, CHECK(), CHECK_GT, device_type_, executor_, g_leaf_count, and GPU.

264  {
265  size_t device_count{0};
267  device_count = executor_->getCatalog()->getDataMgr().getCudaMgr()->getDeviceCount();
268  CHECK_GT(device_count, 0u);
269  }
270 
271  int64_t bucket{col_range_info.bucket};
272 
273  if (shard_count) {
274  CHECK(!col_range_info.bucket);
275  /*
276  when a node has fewer devices than shard count,
277  a) In a distributed setup, the minimum distance between two keys would be
278  device_count because shards are stored consecutively across the physical tables, i.e
279  if a shard column has values 0 to 9, and 3 shards on each leaf, then node 1 would
280  have values: 0,1,2,6,7,8 and node 2 would have values: 3,4,5,9. If each leaf node
281  has only 1 device, in this case, all the keys from each node are loaded on the
282  device each.
283 
284  b) In a single node setup, the distance would be minimum of device_count or
285  difference of device_count - shard_count. For example: If a single node server
286  running on 3 devices a shard column has values 0 to 9 in a table with 4 shards,
287  device to fragment keys mapping would be: device 1 - 4,8,3,7 device 2 - 1,5,9 device
288  3 - 2, 6 The bucket value would be 4(shards) - 3(devices) = 1 i.e. minimum of
289  device_count or difference.
290 
291  When a node has device count equal to or more than shard count then the
292  minimum distance is always at least shard_count * no of leaf nodes.
293  */
294  if (device_count < shard_count) {
295  bucket = g_leaf_count ? std::max(device_count, static_cast<size_t>(1))
296  : std::min(device_count, shard_count - device_count);
297  } else {
298  bucket = shard_count * std::max(g_leaf_count, static_cast<size_t>(1));
299  }
300  }
301 
302  return bucket;
303 }
#define CHECK_GT(x, y)
Definition: Logger.h:205
CHECK(cgen_state)
size_t g_leaf_count
Definition: ParserNode.cpp:66
const ExecutorDeviceType device_type_

+ Here is the call graph for this function:

bool GroupByAndAggregate::gpuCanHandleOrderEntries ( const std::list< Analyzer::OrderEntry > &  order_entries)
private

Definition at line 821 of file GroupByAndAggregate.cpp.

References CHECK(), CHECK_GE, CHECK_LE, Analyzer::AggExpr::get_arg(), Analyzer::Expr::get_type_info(), getExprRangeInfo(), GroupByPerfectHash, kAPPROX_COUNT_DISTINCT, kAVG, kMAX, kMIN, ra_exe_unit_, and RelAlgExecutionUnit::target_exprs.

Referenced by initQueryMemoryDescriptor().

822  {
823  if (order_entries.size() > 1) { // TODO(alex): lift this restriction
824  return false;
825  }
826  for (const auto order_entry : order_entries) {
827  CHECK_GE(order_entry.tle_no, 1);
828  CHECK_LE(static_cast<size_t>(order_entry.tle_no), ra_exe_unit_.target_exprs.size());
829  const auto target_expr = ra_exe_unit_.target_exprs[order_entry.tle_no - 1];
830  if (!dynamic_cast<Analyzer::AggExpr*>(target_expr)) {
831  return false;
832  }
833  // TODO(alex): relax the restrictions
834  auto agg_expr = static_cast<Analyzer::AggExpr*>(target_expr);
835  if (agg_expr->get_is_distinct() || agg_expr->get_aggtype() == kAVG ||
836  agg_expr->get_aggtype() == kMIN || agg_expr->get_aggtype() == kMAX ||
837  agg_expr->get_aggtype() == kAPPROX_COUNT_DISTINCT) {
838  return false;
839  }
840  if (agg_expr->get_arg()) {
841  const auto& arg_ti = agg_expr->get_arg()->get_type_info();
842  if (arg_ti.is_fp()) {
843  return false;
844  }
845  auto expr_range_info = getExprRangeInfo(agg_expr->get_arg());
846  // TOD(adb): QMD not actually initialized here?
847  if ((!(expr_range_info.hash_type_ == QueryDescriptionType::GroupByPerfectHash &&
848  /* query_mem_desc.getGroupbyColCount() == 1 */ false) ||
849  expr_range_info.has_nulls) &&
850  order_entry.is_desc == order_entry.nulls_first) {
851  return false;
852  }
853  }
854  const auto& target_ti = target_expr->get_type_info();
855  CHECK(!target_ti.is_array());
856  if (!target_ti.is_integer()) {
857  return false;
858  }
859  }
860  return true;
861 }
std::vector< Analyzer::Expr * > target_exprs
#define CHECK_GE(x, y)
Definition: Logger.h:206
Expr * get_arg() const
Definition: Analyzer.h:1045
Definition: sqldefs.h:73
CHECK(cgen_state)
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:78
#define CHECK_LE(x, y)
Definition: Logger.h:204
ColRangeInfo getExprRangeInfo(const Analyzer::Expr *expr) const
const RelAlgExecutionUnit & ra_exe_unit_
Definition: sqldefs.h:74
Definition: sqldefs.h:72

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

CountDistinctDescriptors GroupByAndAggregate::initCountDistinctDescriptors ( )
private

Definition at line 507 of file GroupByAndAggregate.cpp.

References Bitmap, CHECK(), CHECK_GE, device_type_, g_bigint_count, g_enable_watchdog, g_hll_precision_bits, Analyzer::AggExpr::get_arg(), get_count_distinct_sub_bitmap_count(), get_target_info(), Analyzer::Expr::get_type_info(), getExprRangeInfo(), GroupByPerfectHash, hll_size_for_rate(), Invalid, is_distinct_target(), kAPPROX_COUNT_DISTINCT, kCOUNT, kENCODING_DICT, kINT, Projection, ra_exe_unit_, StdSet, and RelAlgExecutionUnit::target_exprs.

Referenced by initQueryMemoryDescriptorImpl().

507  {
508  CountDistinctDescriptors count_distinct_descriptors;
509  for (const auto target_expr : ra_exe_unit_.target_exprs) {
510  auto agg_info = get_target_info(target_expr, g_bigint_count);
511  if (is_distinct_target(agg_info)) {
512  CHECK(agg_info.is_agg);
513  CHECK(agg_info.agg_kind == kCOUNT || agg_info.agg_kind == kAPPROX_COUNT_DISTINCT);
514  const auto agg_expr = static_cast<const Analyzer::AggExpr*>(target_expr);
515  const auto& arg_ti = agg_expr->get_arg()->get_type_info();
516  if (arg_ti.is_string() && arg_ti.get_compression() != kENCODING_DICT) {
517  throw std::runtime_error(
518  "Strings must be dictionary-encoded for COUNT(DISTINCT).");
519  }
520  if (agg_info.agg_kind == kAPPROX_COUNT_DISTINCT && arg_ti.is_array()) {
521  throw std::runtime_error("APPROX_COUNT_DISTINCT on arrays not supported yet");
522  }
523  if (agg_info.agg_kind == kAPPROX_COUNT_DISTINCT && arg_ti.is_geometry()) {
524  throw std::runtime_error(
525  "APPROX_COUNT_DISTINCT on geometry columns not supported");
526  }
527  if (agg_info.is_distinct && arg_ti.is_geometry()) {
528  throw std::runtime_error("COUNT DISTINCT on geometry columns not supported");
529  }
530  ColRangeInfo no_range_info{QueryDescriptionType::Projection, 0, 0, 0, false};
531  auto arg_range_info =
532  arg_ti.is_fp() ? no_range_info : getExprRangeInfo(agg_expr->get_arg());
533  CountDistinctImplType count_distinct_impl_type{CountDistinctImplType::StdSet};
534  int64_t bitmap_sz_bits{0};
535  if (agg_info.agg_kind == kAPPROX_COUNT_DISTINCT) {
536  const auto error_rate = agg_expr->get_error_rate();
537  if (error_rate) {
538  CHECK(error_rate->get_type_info().get_type() == kINT);
539  CHECK_GE(error_rate->get_constval().intval, 1);
540  bitmap_sz_bits = hll_size_for_rate(error_rate->get_constval().smallintval);
541  } else {
542  bitmap_sz_bits = g_hll_precision_bits;
543  }
544  }
545  if (arg_range_info.hash_type_ == QueryDescriptionType::GroupByPerfectHash &&
546  !(arg_ti.is_array() || arg_ti.is_geometry())) { // TODO(alex): allow bitmap
547  // implementation for arrays
548  if (arg_range_info.isEmpty()) {
549  count_distinct_descriptors.emplace_back(
551  0,
552  64,
553  agg_info.agg_kind == kAPPROX_COUNT_DISTINCT,
554  device_type_,
555  1});
556  continue;
557  }
558  count_distinct_impl_type = CountDistinctImplType::Bitmap;
559  if (agg_info.agg_kind == kCOUNT) {
560  bitmap_sz_bits = arg_range_info.max - arg_range_info.min + 1;
561  const int64_t MAX_BITMAP_BITS{8 * 1000 * 1000 * 1000L};
562  if (bitmap_sz_bits <= 0 || bitmap_sz_bits > MAX_BITMAP_BITS) {
563  count_distinct_impl_type = CountDistinctImplType::StdSet;
564  }
565  }
566  }
567  if (agg_info.agg_kind == kAPPROX_COUNT_DISTINCT &&
568  count_distinct_impl_type == CountDistinctImplType::StdSet &&
569  !(arg_ti.is_array() || arg_ti.is_geometry())) {
570  count_distinct_impl_type = CountDistinctImplType::Bitmap;
571  }
572  if (g_enable_watchdog &&
573  count_distinct_impl_type == CountDistinctImplType::StdSet) {
574  throw WatchdogException("Cannot use a fast path for COUNT distinct");
575  }
576  const auto sub_bitmap_count =
578  count_distinct_descriptors.emplace_back(
579  CountDistinctDescriptor{count_distinct_impl_type,
580  arg_range_info.min,
581  bitmap_sz_bits,
582  agg_info.agg_kind == kAPPROX_COUNT_DISTINCT,
583  device_type_,
584  sub_bitmap_count});
585  } else {
586  count_distinct_descriptors.emplace_back(CountDistinctDescriptor{
587  CountDistinctImplType::Invalid, 0, 0, false, device_type_, 0});
588  }
589  }
590  return count_distinct_descriptors;
591 }
std::vector< Analyzer::Expr * > target_exprs
int hll_size_for_rate(const int err_percent)
Definition: HyperLogLog.h:115
TargetInfo get_target_info(const PointerType target_expr, const bool bigint_count)
Definition: TargetInfo.h:66
#define CHECK_GE(x, y)
Definition: Logger.h:206
Expr * get_arg() const
Definition: Analyzer.h:1045
int g_hll_precision_bits
bool g_enable_watchdog
size_t get_count_distinct_sub_bitmap_count(const size_t bitmap_sz_bits, const RelAlgExecutionUnit &ra_exe_unit, const ExecutorDeviceType device_type)
CHECK(cgen_state)
std::vector< CountDistinctDescriptor > CountDistinctDescriptors
Definition: CountDistinct.h:35
bool g_bigint_count
bool is_distinct_target(const TargetInfo &target_info)
Definition: TargetInfo.h:117
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:78
const ExecutorDeviceType device_type_
Definition: sqldefs.h:76
CountDistinctImplType
ColRangeInfo getExprRangeInfo(const Analyzer::Expr *expr) const
Definition: sqltypes.h:48
const RelAlgExecutionUnit & ra_exe_unit_

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::unique_ptr< QueryMemoryDescriptor > GroupByAndAggregate::initQueryMemoryDescriptor ( const bool  allow_multifrag,
const size_t  max_groups_buffer_entry_count,
const int8_t  crt_min_byte_width,
RenderInfo render_info,
const bool  output_columnar_hint 
)
private

Definition at line 305 of file GroupByAndAggregate.cpp.

References align_to_int64(), CHECK(), device_type_, executor_, GPU, gpuCanHandleOrderEntries(), initQueryMemoryDescriptorImpl(), SortInfo::order_entries, query_mem_desc, ra_exe_unit_, shard_count_for_top_groups(), and RelAlgExecutionUnit::sort_info.

310  {
311  const auto shard_count =
314  : 0;
315  bool sort_on_gpu_hint =
316  device_type_ == ExecutorDeviceType::GPU && allow_multifrag &&
319  // must_use_baseline_sort is true iff we'd sort on GPU with the old algorithm
320  // but the total output buffer size would be too big or it's a sharded top query.
321  // For the sake of managing risk, use the new result set way very selectively for
322  // this case only (alongside the baseline layout we've enabled for a while now).
323  bool must_use_baseline_sort = shard_count;
324  std::unique_ptr<QueryMemoryDescriptor> query_mem_desc;
325  while (true) {
326  query_mem_desc = initQueryMemoryDescriptorImpl(allow_multifrag,
327  max_groups_buffer_entry_count,
328  crt_min_byte_width,
329  sort_on_gpu_hint,
330  render_info,
331  must_use_baseline_sort,
332  output_columnar_hint);
333  CHECK(query_mem_desc);
334  if (query_mem_desc->sortOnGpu() &&
335  (query_mem_desc->getBufferSizeBytes(device_type_) +
336  align_to_int64(query_mem_desc->getEntryCount() * sizeof(int32_t))) >
337  2 * 1024 * 1024 * 1024L) {
338  must_use_baseline_sort = true;
339  sort_on_gpu_hint = false;
340  } else {
341  break;
342  }
343  }
344  return query_mem_desc;
345 }
bool gpuCanHandleOrderEntries(const std::list< Analyzer::OrderEntry > &order_entries)
std::unique_ptr< QueryMemoryDescriptor > initQueryMemoryDescriptorImpl(const bool allow_multifrag, const size_t max_groups_buffer_entry_count, const int8_t crt_min_byte_width, const bool sort_on_gpu_hint, RenderInfo *render_info, const bool must_use_baseline_sort, const bool output_columnar_hint)
const std::list< Analyzer::OrderEntry > order_entries
CHECK(cgen_state)
const SortInfo sort_info
const ExecutorDeviceType device_type_
const RelAlgExecutionUnit & ra_exe_unit_
FORCE_INLINE HOST DEVICE T align_to_int64(T addr)
static size_t shard_count_for_top_groups(const RelAlgExecutionUnit &ra_exe_unit, const Catalog_Namespace::Catalog &catalog)

+ Here is the call graph for this function:

std::unique_ptr< QueryMemoryDescriptor > GroupByAndAggregate::initQueryMemoryDescriptorImpl ( const bool  allow_multifrag,
const size_t  max_groups_buffer_entry_count,
const int8_t  crt_min_byte_width,
const bool  sort_on_gpu_hint,
RenderInfo render_info,
const bool  must_use_baseline_sort,
const bool  output_columnar_hint 
)
private

Definition at line 347 of file GroupByAndAggregate.cpp.

References addTransientStringLiterals(), get_col_byte_widths(), RelAlgExecutionUnit::groupby_exprs, initCountDistinctDescriptors(), and ra_exe_unit_.

Referenced by initQueryMemoryDescriptor().

354  {
356 
357  const auto count_distinct_descriptors = initCountDistinctDescriptors();
358 
359  auto group_col_widths = get_col_byte_widths(ra_exe_unit_.groupby_exprs, {});
360 
361  const bool is_group_by{!ra_exe_unit_.groupby_exprs.empty()};
362 
363  auto col_range_info_nosharding = getColRangeInfo();
364 
365  const auto shard_count =
368  : 0;
369 
370  const auto col_range_info =
371  ColRangeInfo{col_range_info_nosharding.hash_type_,
372  col_range_info_nosharding.min,
373  col_range_info_nosharding.max,
374  getShardedTopBucket(col_range_info_nosharding, shard_count),
375  col_range_info_nosharding.has_nulls};
376 
377  // Non-grouped aggregates do not support accessing aggregated ranges
378  // Keyless hash is currently only supported with single-column perfect hash
379  const auto keyless_info =
380  !(is_group_by &&
381  col_range_info.hash_type_ == QueryDescriptionType::GroupByPerfectHash &&
382  ra_exe_unit_.groupby_exprs.size() == 1)
383  ? KeylessInfo{false, -1, false}
384  : getKeylessInfo(ra_exe_unit_.target_exprs, is_group_by);
385 
386  if (g_enable_watchdog &&
387  ((col_range_info.hash_type_ == QueryDescriptionType::GroupByBaselineHash &&
388  max_groups_buffer_entry_count > 120000000) ||
389  (col_range_info.hash_type_ == QueryDescriptionType::GroupByPerfectHash &&
390  ra_exe_unit_.groupby_exprs.size() == 1 &&
391  (col_range_info.max - col_range_info.min) /
392  std::max(col_range_info.bucket, int64_t(1)) >
393  130000000))) {
394  throw WatchdogException("Query would use too much memory");
395  }
397  ra_exe_unit_,
398  query_infos_,
399  col_range_info,
400  keyless_info,
401  allow_multifrag,
402  device_type_,
403  crt_min_byte_width,
404  sort_on_gpu_hint,
405  shard_count,
406  max_groups_buffer_entry_count,
407  render_info,
408  count_distinct_descriptors,
409  must_use_baseline_sort,
410  output_columnar_hint);
411 }
std::vector< Analyzer::Expr * > target_exprs
std::vector< int8_t > get_col_byte_widths(const T &col_expr_list, const std::vector< ssize_t > &col_exprs_to_not_project)
ColRangeInfo getColRangeInfo()
QueryDescriptionType hash_type_
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
bool g_enable_watchdog
CountDistinctDescriptors initCountDistinctDescriptors()
KeylessInfo getKeylessInfo(const std::vector< Analyzer::Expr * > &target_expr_list, const bool is_group_by) const
int64_t getShardedTopBucket(const ColRangeInfo &col_range_info, const size_t shard_count) const
const std::vector< InputTableInfo > & query_infos_
const ExecutorDeviceType device_type_
static std::unique_ptr< QueryMemoryDescriptor > init(const Executor *executor, const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, const ColRangeInfo &col_range_info, const KeylessInfo &keyless_info, const bool allow_multifrag, const ExecutorDeviceType device_type, const int8_t crt_min_byte_width, const bool sort_on_gpu_hint, const size_t shard_count, const size_t max_groups_buffer_entry_count, RenderInfo *render_info, const CountDistinctDescriptors count_distinct_descriptors, const bool must_use_baseline_sort, const bool output_columnar_hint)
const RelAlgExecutionUnit & ra_exe_unit_
static size_t shard_count_for_top_groups(const RelAlgExecutionUnit &ra_exe_unit, const Catalog_Namespace::Catalog &catalog)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool GroupByAndAggregate::needsUnnestDoublePatch ( llvm::Value *  val_ptr,
const std::string &  agg_base_name,
const bool  threads_share_memory,
const CompilationOptions co 
) const
private

Definition at line 30 of file MaxwellCodegenPatch.cpp.

References CompilationOptions::device_type_, and executor_.

Referenced by TargetExprCodegen::codegen().

33  {
34  return (executor_->isArchMaxwell(co.device_type_) && threads_share_memory &&
35  llvm::isa<llvm::AllocaInst>(val_ptr) &&
36  val_ptr->getType() ==
37  llvm::Type::getDoublePtrTy(executor_->cgen_state_->context_) &&
38  "agg_id" == agg_base_name);
39 }
ExecutorDeviceType device_type_

+ Here is the caller graph for this function:

void GroupByAndAggregate::prependForceSync ( )
private

Definition at line 41 of file MaxwellCodegenPatch.cpp.

References executor_.

Referenced by codegen().

41  {
42  executor_->cgen_state_->ir_builder_.CreateCall(
43  executor_->cgen_state_->module_->getFunction("force_sync"));
44 }

+ Here is the caller graph for this function:

size_t GroupByAndAggregate::shard_count_for_top_groups ( const RelAlgExecutionUnit ra_exe_unit,
const Catalog_Namespace::Catalog catalog 
)
static

Definition at line 1878 of file GroupByAndAggregate.cpp.

References Catalog_Namespace::Catalog::getMetadataForTable(), RelAlgExecutionUnit::groupby_exprs, SortInfo::limit, TableDescriptor::nShards, SortInfo::order_entries, and RelAlgExecutionUnit::sort_info.

Referenced by Executor::collectAllDeviceResults(), RelAlgExecutor::executeRelAlgQuerySingleStep(), and initQueryMemoryDescriptor().

1880  {
1881  if (ra_exe_unit.sort_info.order_entries.size() != 1 || !ra_exe_unit.sort_info.limit) {
1882  return 0;
1883  }
1884  for (const auto& group_expr : ra_exe_unit.groupby_exprs) {
1885  const auto grouped_col_expr =
1886  dynamic_cast<const Analyzer::ColumnVar*>(group_expr.get());
1887  if (!grouped_col_expr) {
1888  continue;
1889  }
1890  if (grouped_col_expr->get_table_id() <= 0) {
1891  return 0;
1892  }
1893  const auto td = catalog.getMetadataForTable(grouped_col_expr->get_table_id());
1894  if (td->shardedColumnId == grouped_col_expr->get_column_id()) {
1895  return td->nShards;
1896  }
1897  }
1898  return 0;
1899 }
const std::list< Analyzer::OrderEntry > order_entries
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
const size_t limit
const SortInfo sort_info
const TableDescriptor * getMetadataForTable(const std::string &tableName, const bool populateFragmenter=true) const
Returns a pointer to a const TableDescriptor struct matching the provided tableName.

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool GroupByAndAggregate::supportedExprForGpuSharedMemUsage ( Analyzer::Expr expr)
staticprivate

Definition at line 808 of file GroupByAndAggregate.cpp.

References kUNNEST.

808  {
809  /*
810  UNNEST operations follow a slightly different internal memory layout compared to other
811  keyless aggregates Currently, we opt out of using shared memory if there is any UNNEST
812  operation involved.
813  */
814  if (dynamic_cast<Analyzer::UOper*>(expr) &&
815  static_cast<Analyzer::UOper*>(expr)->get_optype() == kUNNEST) {
816  return false;
817  }
818  return true;
819 }
bool GroupByAndAggregate::supportedTypeForGpuSharedMemUsage ( const SQLTypeInfo target_type_info) const
private

Supported data types for the current shared memory usage for keyless aggregates with COUNT(*) Currently only for single-column group by queries.

Definition at line 786 of file GroupByAndAggregate.cpp.

References SQLTypeInfoCore< TYPE_FACET_PACK >::get_compression(), SQLTypeInfoCore< TYPE_FACET_PACK >::get_type(), kENCODING_DICT, kINT, kSMALLINT, kTEXT, kTINYINT, and run_benchmark_import::result.

Referenced by getKeylessInfo().

787  {
788  bool result = false;
789  switch (target_type_info.get_type()) {
790  case SQLTypes::kTINYINT:
791  case SQLTypes::kSMALLINT:
792  case SQLTypes::kINT:
793  result = true;
794  break;
795  case SQLTypes::kTEXT:
796  if (target_type_info.get_compression() == EncodingType::kENCODING_DICT) {
797  result = true;
798  }
799  break;
800  default:
801  break;
802  }
803  return result;
804 }
HOST DEVICE EncodingType get_compression() const
Definition: sqltypes.h:334
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:326
Definition: sqltypes.h:55
Definition: sqltypes.h:48

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

Friends And Related Function Documentation

friend class Executor
friend

Definition at line 304 of file GroupByAndAggregate.h.

friend class QueryMemoryDescriptor
friend

Definition at line 305 of file GroupByAndAggregate.h.

friend struct TargetExprCodegen
friend

Definition at line 306 of file GroupByAndAggregate.h.

friend struct TargetExprCodegenBuilder
friend

Definition at line 307 of file GroupByAndAggregate.h.

Member Data Documentation

const ExecutorDeviceType GroupByAndAggregate::device_type_
private
bool GroupByAndAggregate::output_columnar_
private

Definition at line 301 of file GroupByAndAggregate.h.

const std::vector<InputTableInfo>& GroupByAndAggregate::query_infos_
private

Definition at line 299 of file GroupByAndAggregate.h.

Referenced by getExprRangeInfo(), and getKeylessInfo().

std::shared_ptr<RowSetMemoryOwner> GroupByAndAggregate::row_set_mem_owner_
private

Definition at line 300 of file GroupByAndAggregate.h.

Referenced by addTransientStringLiterals().


The documentation for this class was generated from the following files: