OmniSciDB  1dac507f6e
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
GroupByAndAggregate Class Reference

#include <GroupByAndAggregate.h>

+ Collaboration diagram for GroupByAndAggregate:

Classes

struct  DiamondCodegen
 

Public Member Functions

 GroupByAndAggregate (Executor *executor, const ExecutorDeviceType device_type, const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, std::shared_ptr< RowSetMemoryOwner >)
 
bool codegen (llvm::Value *filter_result, llvm::BasicBlock *sc_false, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co)
 

Static Public Member Functions

static void addTransientStringLiterals (const RelAlgExecutionUnit &ra_exe_unit, Executor *executor, std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner)
 
static size_t shard_count_for_top_groups (const RelAlgExecutionUnit &ra_exe_unit, const Catalog_Namespace::Catalog &catalog)
 

Private Member Functions

bool supportedTypeForGpuSharedMemUsage (const SQLTypeInfo &target_type_info) const
 
bool gpuCanHandleOrderEntries (const std::list< Analyzer::OrderEntry > &order_entries)
 
std::unique_ptr
< QueryMemoryDescriptor
initQueryMemoryDescriptor (const bool allow_multifrag, const size_t max_groups_buffer_entry_count, const int8_t crt_min_byte_width, RenderInfo *render_info, const bool output_columnar_hint)
 
std::unique_ptr
< QueryMemoryDescriptor
initQueryMemoryDescriptorImpl (const bool allow_multifrag, const size_t max_groups_buffer_entry_count, const int8_t crt_min_byte_width, const bool sort_on_gpu_hint, RenderInfo *render_info, const bool must_use_baseline_sort, const bool output_columnar_hint)
 
int64_t getShardedTopBucket (const ColRangeInfo &col_range_info, const size_t shard_count) const
 
void addTransientStringLiterals ()
 
CountDistinctDescriptors initCountDistinctDescriptors ()
 
llvm::Value * codegenOutputSlot (llvm::Value *groups_buffer, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, DiamondCodegen &diamond_codegen)
 
std::tuple< llvm::Value
*, llvm::Value * > 
codegenGroupBy (const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, DiamondCodegen &codegen)
 
std::tuple< llvm::Value
*, llvm::Value * > 
codegenSingleColumnPerfectHash (const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, llvm::Value *groups_buffer, llvm::Value *group_expr_lv_translated, llvm::Value *group_expr_lv_original, const int32_t row_size_quad)
 
std::tuple< llvm::Value
*, llvm::Value * > 
codegenMultiColumnPerfectHash (llvm::Value *groups_buffer, llvm::Value *group_key, llvm::Value *key_size_lv, const QueryMemoryDescriptor &query_mem_desc, const int32_t row_size_quad)
 
llvm::Function * codegenPerfectHashFunction ()
 
std::tuple< llvm::Value
*, llvm::Value * > 
codegenMultiColumnBaselineHash (const CompilationOptions &co, llvm::Value *groups_buffer, llvm::Value *group_key, llvm::Value *key_size_lv, const QueryMemoryDescriptor &query_mem_desc, const size_t key_width, const int32_t row_size_quad)
 
ColRangeInfo getColRangeInfo ()
 
ColRangeInfo getExprRangeInfo (const Analyzer::Expr *expr) const
 
KeylessInfo getKeylessInfo (const std::vector< Analyzer::Expr * > &target_expr_list, const bool is_group_by) const
 
llvm::Value * convertNullIfAny (const SQLTypeInfo &arg_type, const TargetInfo &agg_info, llvm::Value *target)
 
bool codegenAggCalls (const std::tuple< llvm::Value *, llvm::Value * > &agg_out_ptr_w_idx, const std::vector< llvm::Value * > &agg_out_vec, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, DiamondCodegen &diamond_codegen)
 
llvm::Value * codegenWindowRowPointer (const Analyzer::WindowFunction *window_func, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, DiamondCodegen &diamond_codegen)
 
llvm::Value * codegenAggColumnPtr (llvm::Value *output_buffer_byte_stream, llvm::Value *out_row_idx, const std::tuple< llvm::Value *, llvm::Value * > &agg_out_ptr_w_idx, const QueryMemoryDescriptor &query_mem_desc, const size_t chosen_bytes, const size_t agg_out_off, const size_t target_idx)
 : returns the pointer to where the aggregation should be stored. More...
 
void codegenEstimator (std::stack< llvm::BasicBlock * > &array_loops, GroupByAndAggregate::DiamondCodegen &diamond_codegen, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &)
 
void codegenCountDistinct (const size_t target_idx, const Analyzer::Expr *target_expr, std::vector< llvm::Value * > &agg_args, const QueryMemoryDescriptor &, const ExecutorDeviceType)
 
llvm::Value * getAdditionalLiteral (const int32_t off)
 
std::vector< llvm::Value * > codegenAggArg (const Analyzer::Expr *target_expr, const CompilationOptions &co)
 
llvm::Value * emitCall (const std::string &fname, const std::vector< llvm::Value * > &args)
 
bool needsUnnestDoublePatch (llvm::Value *val_ptr, const std::string &agg_base_name, const bool threads_share_memory, const CompilationOptions &co) const
 
void prependForceSync ()
 

Static Private Member Functions

static bool supportedExprForGpuSharedMemUsage (Analyzer::Expr *expr)
 
static int64_t getBucketedCardinality (const ColRangeInfo &col_range_info)
 

Private Attributes

Executorexecutor_
 
const RelAlgExecutionUnitra_exe_unit_
 
const std::vector
< InputTableInfo > & 
query_infos_
 
std::shared_ptr
< RowSetMemoryOwner
row_set_mem_owner_
 
bool output_columnar_
 
const ExecutorDeviceType device_type_
 

Friends

class Executor
 
class QueryMemoryDescriptor
 
struct TargetExprCodegen
 
struct TargetExprCodegenBuilder
 

Detailed Description

Definition at line 130 of file GroupByAndAggregate.h.

Constructor & Destructor Documentation

GroupByAndAggregate::GroupByAndAggregate ( Executor executor,
const ExecutorDeviceType  device_type,
const RelAlgExecutionUnit ra_exe_unit,
const std::vector< InputTableInfo > &  query_infos,
std::shared_ptr< RowSetMemoryOwner row_set_mem_owner 
)

Definition at line 234 of file GroupByAndAggregate.cpp.

References RelAlgExecutionUnit::groupby_exprs, kENCODING_DICT, and ra_exe_unit_.

240  : executor_(executor)
241  , ra_exe_unit_(ra_exe_unit)
242  , query_infos_(query_infos)
243  , row_set_mem_owner_(row_set_mem_owner)
244  , device_type_(device_type) {
245  for (const auto groupby_expr : ra_exe_unit_.groupby_exprs) {
246  if (!groupby_expr) {
247  continue;
248  }
249  const auto& groupby_ti = groupby_expr->get_type_info();
250  if (groupby_ti.is_string() && groupby_ti.get_compression() != kENCODING_DICT) {
251  throw std::runtime_error(
252  "Cannot group by string columns which are not dictionary encoded.");
253  }
254  if (groupby_ti.is_array()) {
255  throw std::runtime_error("Group by array not supported");
256  }
257  if (groupby_ti.is_geometry()) {
258  throw std::runtime_error("Group by geometry not supported");
259  }
260  }
261 }
std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner_
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
const std::vector< InputTableInfo > & query_infos_
const ExecutorDeviceType device_type_
const RelAlgExecutionUnit & ra_exe_unit_

Member Function Documentation

void GroupByAndAggregate::addTransientStringLiterals ( const RelAlgExecutionUnit ra_exe_unit,
Executor executor,
std::shared_ptr< RowSetMemoryOwner row_set_mem_owner 
)
static

Definition at line 479 of file GroupByAndAggregate.cpp.

References anonymous_namespace{GroupByAndAggregate.cpp}::add_transient_string_literals_for_expression(), RelAlgExecutionUnit::groupby_exprs, kENCODING_DICT, kSAMPLE, and RelAlgExecutionUnit::target_exprs.

482  {
483  for (const auto group_expr : ra_exe_unit.groupby_exprs) {
485  group_expr.get(), executor, row_set_mem_owner);
486  }
487  for (const auto target_expr : ra_exe_unit.target_exprs) {
488  const auto& target_type = target_expr->get_type_info();
489  if (target_type.is_string() && target_type.get_compression() != kENCODING_DICT) {
490  continue;
491  }
492  const auto agg_expr = dynamic_cast<const Analyzer::AggExpr*>(target_expr);
493  if (agg_expr) {
494  if (agg_expr->get_aggtype() == kSAMPLE) {
496  agg_expr->get_arg(), executor, row_set_mem_owner);
497  }
498  } else {
500  target_expr, executor, row_set_mem_owner);
501  }
502  }
503  row_set_mem_owner->addLiteralStringDictProxy(executor->lit_str_dict_proxy_);
504 }
std::vector< Analyzer::Expr * > target_exprs
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
void add_transient_string_literals_for_expression(const Analyzer::Expr *expr, Executor *executor, std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner)

+ Here is the call graph for this function:

void GroupByAndAggregate::addTransientStringLiterals ( )
private

Definition at line 413 of file GroupByAndAggregate.cpp.

References executor_, ra_exe_unit_, and row_set_mem_owner_.

Referenced by RelAlgExecutor::executeSort(), RelAlgExecutor::executeWorkUnit(), and initQueryMemoryDescriptorImpl().

413  {
415 }
std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner_
const RelAlgExecutionUnit & ra_exe_unit_

+ Here is the caller graph for this function:

bool GroupByAndAggregate::codegen ( llvm::Value *  filter_result,
llvm::BasicBlock *  sc_false,
const QueryMemoryDescriptor query_mem_desc,
const CompilationOptions co 
)

Definition at line 907 of file GroupByAndAggregate.cpp.

References CHECK(), codegenAggCalls(), codegenEstimator(), codegenGroupBy(), GroupByAndAggregate::DiamondCodegen::cond_false_, CompilationOptions::device_type_, QueryMemoryDescriptor::didOutputColumnar(), RelAlgExecutionUnit::estimator, executor_, anonymous_namespace{GroupByAndAggregate.cpp}::get_agg_count(), get_arg_by_name(), get_int_type(), QueryMemoryDescriptor::getGroupbyColCount(), QueryMemoryDescriptor::getQueryDescriptionType(), GPU, RelAlgExecutionUnit::groupby_exprs, GroupByPerfectHash, RelAlgExecutionUnit::join_quals, LL_BUILDER, LL_CONTEXT, LL_INT, CodeGenerator::posArg(), prependForceSync(), Projection, query_mem_desc, ra_exe_unit_, ROW_FUNC, RelAlgExecutionUnit::target_exprs, use_streaming_top_n(), and QueryMemoryDescriptor::usesGetGroupValueFast().

910  {
911  CHECK(filter_result);
912 
913  bool can_return_error = false;
914  llvm::BasicBlock* filter_false{nullptr};
915 
916  {
917  const bool is_group_by = !ra_exe_unit_.groupby_exprs.empty();
918 
919  if (executor_->isArchMaxwell(co.device_type_)) {
921  }
922  DiamondCodegen filter_cfg(filter_result,
923  executor_,
924  !is_group_by || query_mem_desc.usesGetGroupValueFast(),
925  "filter",
926  nullptr,
927  false);
928  filter_false = filter_cfg.cond_false_;
929 
930  if (is_group_by) {
932  !use_streaming_top_n(ra_exe_unit_, query_mem_desc.didOutputColumnar())) {
933  const auto crt_matched = get_arg_by_name(ROW_FUNC, "crt_matched");
934  LL_BUILDER.CreateStore(LL_INT(int32_t(1)), crt_matched);
935  auto total_matched_ptr = get_arg_by_name(ROW_FUNC, "total_matched");
936  llvm::Value* old_total_matched_val{nullptr};
938  old_total_matched_val =
939  LL_BUILDER.CreateAtomicRMW(llvm::AtomicRMWInst::Add,
940  total_matched_ptr,
941  LL_INT(int32_t(1)),
942  llvm::AtomicOrdering::Monotonic);
943  } else {
944  old_total_matched_val = LL_BUILDER.CreateLoad(total_matched_ptr);
945  LL_BUILDER.CreateStore(
946  LL_BUILDER.CreateAdd(old_total_matched_val, LL_INT(int32_t(1))),
947  total_matched_ptr);
948  }
949  auto old_total_matched_ptr = get_arg_by_name(ROW_FUNC, "old_total_matched");
950  LL_BUILDER.CreateStore(old_total_matched_val, old_total_matched_ptr);
951  }
952 
953  auto agg_out_ptr_w_idx = codegenGroupBy(query_mem_desc, co, filter_cfg);
954  if (query_mem_desc.usesGetGroupValueFast() ||
955  query_mem_desc.getQueryDescriptionType() ==
957  if (query_mem_desc.getGroupbyColCount() > 1) {
958  filter_cfg.setChainToNext();
959  }
960  // Don't generate null checks if the group slot is guaranteed to be non-null,
961  // as it's the case for get_group_value_fast* family.
962  can_return_error =
963  codegenAggCalls(agg_out_ptr_w_idx, {}, query_mem_desc, co, filter_cfg);
964  } else {
965  {
966  llvm::Value* nullcheck_cond{nullptr};
967  if (query_mem_desc.didOutputColumnar()) {
968  nullcheck_cond = LL_BUILDER.CreateICmpSGE(std::get<1>(agg_out_ptr_w_idx),
969  LL_INT(int32_t(0)));
970  } else {
971  nullcheck_cond = LL_BUILDER.CreateICmpNE(
972  std::get<0>(agg_out_ptr_w_idx),
973  llvm::ConstantPointerNull::get(
974  llvm::PointerType::get(get_int_type(64, LL_CONTEXT), 0)));
975  }
976  DiamondCodegen nullcheck_cfg(
977  nullcheck_cond, executor_, false, "groupby_nullcheck", &filter_cfg, false);
978  codegenAggCalls(agg_out_ptr_w_idx, {}, query_mem_desc, co, filter_cfg);
979  }
980  can_return_error = true;
981  if (query_mem_desc.getQueryDescriptionType() ==
984  // Ignore rejection on pushing current row to top-K heap.
985  LL_BUILDER.CreateRet(LL_INT(int32_t(0)));
986  } else {
987  CodeGenerator code_generator(executor_);
988  LL_BUILDER.CreateRet(LL_BUILDER.CreateNeg(LL_BUILDER.CreateTrunc(
989  // TODO(alex): remove the trunc once pos is converted to 32 bits
990  code_generator.posArg(nullptr),
991  get_int_type(32, LL_CONTEXT))));
992  }
993  }
994  } else {
995  if (ra_exe_unit_.estimator) {
996  std::stack<llvm::BasicBlock*> array_loops;
997  codegenEstimator(array_loops, filter_cfg, query_mem_desc, co);
998  } else {
999  auto arg_it = ROW_FUNC->arg_begin();
1000  std::vector<llvm::Value*> agg_out_vec;
1001  for (int32_t i = 0; i < get_agg_count(ra_exe_unit_.target_exprs); ++i) {
1002  agg_out_vec.push_back(&*arg_it++);
1003  }
1004  can_return_error = codegenAggCalls(std::make_tuple(nullptr, nullptr),
1005  agg_out_vec,
1006  query_mem_desc,
1007  co,
1008  filter_cfg);
1009  }
1010  }
1011  }
1012 
1013  if (ra_exe_unit_.join_quals.empty()) {
1014  executor_->cgen_state_->ir_builder_.CreateRet(LL_INT(int32_t(0)));
1015  } else if (sc_false) {
1016  const auto saved_insert_block = LL_BUILDER.GetInsertBlock();
1017  LL_BUILDER.SetInsertPoint(sc_false);
1018  LL_BUILDER.CreateBr(filter_false);
1019  LL_BUILDER.SetInsertPoint(saved_insert_block);
1020  }
1021 
1022  return can_return_error;
1023 }
void codegenEstimator(std::stack< llvm::BasicBlock * > &array_loops, GroupByAndAggregate::DiamondCodegen &diamond_codegen, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &)
std::vector< Analyzer::Expr * > target_exprs
bool use_streaming_top_n(const RelAlgExecutionUnit &ra_exe_unit, const bool output_columnar)
#define ROW_FUNC
#define LL_BUILDER
#define LL_CONTEXT
#define LL_INT(v)
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
CHECK(cgen_state)
llvm::Value * get_arg_by_name(llvm::Function *func, const std::string &name)
Definition: Execute.h:117
size_t getGroupbyColCount() const
const JoinQualsPerNestingLevel join_quals
std::tuple< llvm::Value *, llvm::Value * > codegenGroupBy(const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, DiamondCodegen &codegen)
const std::shared_ptr< Analyzer::Estimator > estimator
bool codegenAggCalls(const std::tuple< llvm::Value *, llvm::Value * > &agg_out_ptr_w_idx, const std::vector< llvm::Value * > &agg_out_vec, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, DiamondCodegen &diamond_codegen)
ExecutorDeviceType device_type_
QueryDescriptionType getQueryDescriptionType() const
int32_t get_agg_count(const std::vector< Analyzer::Expr * > &target_exprs)
const RelAlgExecutionUnit & ra_exe_unit_

+ Here is the call graph for this function:

std::vector< llvm::Value * > GroupByAndAggregate::codegenAggArg ( const Analyzer::Expr target_expr,
const CompilationOptions co 
)
private

Definition at line 1741 of file GroupByAndAggregate.cpp.

References CHECK(), CHECK_EQ, CodeGenerator::codegen(), executor_, get_int_type(), Analyzer::Expr::get_type_info(), kARRAY, kPOINT, kSAMPLE, log2_bytes(), and CodeGenerator::posArg().

Referenced by TargetExprCodegen::codegen(), and TargetExprCodegenBuilder::codegenMultiSlotSampleExpressions().

1743  {
1744  const auto agg_expr = dynamic_cast<const Analyzer::AggExpr*>(target_expr);
1745  // TODO(alex): handle arrays uniformly?
1746  CodeGenerator code_generator(executor_);
1747  if (target_expr) {
1748  const auto& target_ti = target_expr->get_type_info();
1749  if (target_ti.is_array() && !executor_->plan_state_->isLazyFetchColumn(target_expr)) {
1750  const auto target_lvs =
1751  agg_expr ? code_generator.codegen(agg_expr->get_arg(), true, co)
1752  : code_generator.codegen(
1753  target_expr, !executor_->plan_state_->allow_lazy_fetch_, co);
1754  if (target_ti.isChunkIteratorPackaging()) {
1755  // Something with the chunk transport is code that was generated from a source
1756  // other than an ARRAY[] expression
1757  CHECK_EQ(size_t(1), target_lvs.size());
1758  CHECK(!agg_expr || agg_expr->get_aggtype() == kSAMPLE);
1759  const auto i32_ty = get_int_type(32, executor_->cgen_state_->context_);
1760  const auto i8p_ty =
1761  llvm::PointerType::get(get_int_type(8, executor_->cgen_state_->context_), 0);
1762  const auto& elem_ti = target_ti.get_elem_type();
1763  return {
1764  executor_->cgen_state_->emitExternalCall(
1765  "array_buff",
1766  i8p_ty,
1767  {target_lvs.front(), code_generator.posArg(target_expr)}),
1768  executor_->cgen_state_->emitExternalCall(
1769  "array_size",
1770  i32_ty,
1771  {target_lvs.front(),
1772  code_generator.posArg(target_expr),
1773  executor_->cgen_state_->llInt(log2_bytes(elem_ti.get_logical_size()))})};
1774  } else if (target_ti.isStandardBufferPackaging()) {
1775  if (agg_expr) {
1776  throw std::runtime_error(
1777  "Using array[] operator as argument to an aggregate operator is not "
1778  "supported");
1779  }
1780  return {target_lvs[0], target_lvs[1]};
1781  }
1782  }
1783  if (target_ti.is_geometry() &&
1784  !executor_->plan_state_->isLazyFetchColumn(target_expr)) {
1785  auto generate_coord_lvs =
1786  [&](auto* selected_target_expr,
1787  bool const fetch_columns) -> std::vector<llvm::Value*> {
1788  const auto target_lvs =
1789  code_generator.codegen(selected_target_expr, fetch_columns, co);
1790  const auto geo_expr = dynamic_cast<const Analyzer::GeoExpr*>(target_expr);
1791  if (geo_expr) {
1792  CHECK_EQ(2 * static_cast<size_t>(target_ti.get_physical_coord_cols()),
1793  target_lvs.size());
1794  return target_lvs;
1795  }
1796  CHECK_EQ(static_cast<size_t>(target_ti.get_physical_coord_cols()),
1797  target_lvs.size());
1798 
1799  const auto i32_ty = get_int_type(32, executor_->cgen_state_->context_);
1800  const auto i8p_ty =
1801  llvm::PointerType::get(get_int_type(8, executor_->cgen_state_->context_), 0);
1802  std::vector<llvm::Value*> coords;
1803  size_t ctr = 0;
1804  for (const auto& target_lv : target_lvs) {
1805  // TODO(adb): consider adding a utility to sqltypes so we can get the types of
1806  // the physical coords cols based on the sqltype (e.g. TINYINT for col 0, INT
1807  // for col 1 for pols / mpolys, etc). Hardcoding for now. first array is the
1808  // coords array (TINYINT). Subsequent arrays are regular INT.
1809 
1810  const size_t elem_sz = ctr == 0 ? 1 : 4;
1811  ctr++;
1812  int32_t fixlen = -1;
1813  if (target_ti.get_type() == kPOINT) {
1814  const auto col_var = dynamic_cast<const Analyzer::ColumnVar*>(target_expr);
1815  if (col_var) {
1816  const auto coords_cd = executor_->getPhysicalColumnDescriptor(col_var, 1);
1817  if (coords_cd && coords_cd->columnType.get_type() == kARRAY) {
1818  fixlen = coords_cd->columnType.get_size();
1819  }
1820  }
1821  }
1822  if (fixlen > 0) {
1823  coords.push_back(executor_->cgen_state_->emitExternalCall(
1824  "fast_fixlen_array_buff",
1825  i8p_ty,
1826  {target_lv, code_generator.posArg(selected_target_expr)}));
1827  coords.push_back(executor_->cgen_state_->llInt(int64_t(fixlen)));
1828  continue;
1829  }
1830  coords.push_back(executor_->cgen_state_->emitExternalCall(
1831  "array_buff",
1832  i8p_ty,
1833  {target_lv, code_generator.posArg(selected_target_expr)}));
1834  coords.push_back(executor_->cgen_state_->emitExternalCall(
1835  "array_size",
1836  i32_ty,
1837  {target_lv,
1838  code_generator.posArg(selected_target_expr),
1839  executor_->cgen_state_->llInt(log2_bytes(elem_sz))}));
1840  }
1841  return coords;
1842  };
1843 
1844  if (agg_expr) {
1845  return generate_coord_lvs(agg_expr->get_arg(), true);
1846  } else {
1847  return generate_coord_lvs(target_expr,
1848  !executor_->plan_state_->allow_lazy_fetch_);
1849  }
1850  }
1851  }
1852  return agg_expr ? code_generator.codegen(agg_expr->get_arg(), true, co)
1853  : code_generator.codegen(
1854  target_expr, !executor_->plan_state_->allow_lazy_fetch_, co);
1855 }
#define CHECK_EQ(x, y)
Definition: Logger.h:198
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
CHECK(cgen_state)
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:78
uint32_t log2_bytes(const uint32_t bytes)
Definition: Execute.h:127

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool GroupByAndAggregate::codegenAggCalls ( const std::tuple< llvm::Value *, llvm::Value * > &  agg_out_ptr_w_idx,
const std::vector< llvm::Value * > &  agg_out_vec,
const QueryMemoryDescriptor query_mem_desc,
const CompilationOptions co,
DiamondCodegen diamond_codegen 
)
private

Definition at line 1496 of file GroupByAndAggregate.cpp.

References CHECK(), TargetExprCodegenBuilder::codegen(), QueryMemoryDescriptor::didOutputColumnar(), executor_, g_cluster, QueryMemoryDescriptor::getQueryDescriptionType(), LL_BUILDER, LL_CONTEXT, Projection, ra_exe_unit_, and RelAlgExecutionUnit::target_exprs.

Referenced by codegen().

1501  {
1502  auto agg_out_ptr_w_idx = agg_out_ptr_w_idx_in;
1503  // TODO(alex): unify the two cases, the output for non-group by queries
1504  // should be a contiguous buffer
1505  const bool is_group_by{std::get<0>(agg_out_ptr_w_idx)};
1506  bool can_return_error = false;
1507  if (is_group_by) {
1508  CHECK(agg_out_vec.empty());
1509  } else {
1510  CHECK(!agg_out_vec.empty());
1511  }
1512 
1513  // output buffer is casted into a byte stream to be able to handle data elements of
1514  // different sizes (only used when actual column width sizes are used)
1515  llvm::Value* output_buffer_byte_stream{nullptr};
1516  llvm::Value* out_row_idx{nullptr};
1517  if (query_mem_desc.didOutputColumnar() && !g_cluster &&
1519  output_buffer_byte_stream = LL_BUILDER.CreateBitCast(
1520  std::get<0>(agg_out_ptr_w_idx),
1521  llvm::PointerType::get(llvm::Type::getInt8Ty(LL_CONTEXT), 0));
1522  output_buffer_byte_stream->setName("out_buff_b_stream");
1523  CHECK(std::get<1>(agg_out_ptr_w_idx));
1524  out_row_idx = LL_BUILDER.CreateZExt(std::get<1>(agg_out_ptr_w_idx),
1525  llvm::Type::getInt64Ty(LL_CONTEXT));
1526  out_row_idx->setName("out_row_idx");
1527  }
1528 
1529  TargetExprCodegenBuilder target_builder(query_mem_desc, ra_exe_unit_, is_group_by);
1530  for (size_t target_idx = 0; target_idx < ra_exe_unit_.target_exprs.size();
1531  ++target_idx) {
1532  auto target_expr = ra_exe_unit_.target_exprs[target_idx];
1533  CHECK(target_expr);
1534 
1535  target_builder(target_expr, executor_, co);
1536  }
1537 
1538  target_builder.codegen(this,
1539  executor_,
1540  query_mem_desc,
1541  co,
1542  agg_out_ptr_w_idx,
1543  agg_out_vec,
1544  output_buffer_byte_stream,
1545  out_row_idx,
1546  diamond_codegen);
1547 
1548  for (auto target_expr : ra_exe_unit_.target_exprs) {
1549  CHECK(target_expr);
1550  executor_->plan_state_->isLazyFetchColumn(target_expr);
1551  }
1552 
1553  return can_return_error;
1554 }
std::vector< Analyzer::Expr * > target_exprs
#define LL_BUILDER
bool g_cluster
#define LL_CONTEXT
CHECK(cgen_state)
QueryDescriptionType getQueryDescriptionType() const
const RelAlgExecutionUnit & ra_exe_unit_

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

llvm::Value * GroupByAndAggregate::codegenAggColumnPtr ( llvm::Value *  output_buffer_byte_stream,
llvm::Value *  out_row_idx,
const std::tuple< llvm::Value *, llvm::Value * > &  agg_out_ptr_w_idx,
const QueryMemoryDescriptor query_mem_desc,
const size_t  chosen_bytes,
const size_t  agg_out_off,
const size_t  target_idx 
)
private

: returns the pointer to where the aggregation should be stored.

Definition at line 1559 of file GroupByAndAggregate.cpp.

References CHECK(), CHECK_EQ, QueryMemoryDescriptor::didOutputColumnar(), g_cluster, get_int_type(), QueryMemoryDescriptor::getColOffInBytes(), QueryMemoryDescriptor::getColOnlyOffInBytes(), QueryMemoryDescriptor::getQueryDescriptionType(), LL_BUILDER, LL_CONTEXT, LL_INT, Projection, and to_string().

Referenced by TargetExprCodegen::codegen(), and TargetExprCodegenBuilder::codegenMultiSlotSampleExpressions().

1566  {
1567  llvm::Value* agg_col_ptr{nullptr};
1568  if (query_mem_desc.didOutputColumnar()) {
1569  // TODO(Saman): remove the second columnar branch, and support all query description
1570  // types through the first branch. Then, input arguments should also be cleaned up
1571  if (!g_cluster &&
1573  CHECK(chosen_bytes == 1 || chosen_bytes == 2 || chosen_bytes == 4 ||
1574  chosen_bytes == 8);
1575  CHECK(output_buffer_byte_stream);
1576  CHECK(out_row_idx);
1577  uint32_t col_off = query_mem_desc.getColOffInBytes(agg_out_off);
1578  // multiplying by chosen_bytes, i.e., << log2(chosen_bytes)
1579  auto out_per_col_byte_idx =
1580  LL_BUILDER.CreateShl(out_row_idx, __builtin_ffs(chosen_bytes) - 1);
1581  auto byte_offset = LL_BUILDER.CreateAdd(out_per_col_byte_idx,
1582  LL_INT(static_cast<int64_t>(col_off)));
1583  byte_offset->setName("out_byte_off_target_" + std::to_string(target_idx));
1584  auto output_ptr = LL_BUILDER.CreateGEP(output_buffer_byte_stream, byte_offset);
1585  agg_col_ptr = LL_BUILDER.CreateBitCast(
1586  output_ptr,
1587  llvm::PointerType::get(get_int_type((chosen_bytes << 3), LL_CONTEXT), 0));
1588  agg_col_ptr->setName("out_ptr_target_" + std::to_string(target_idx));
1589  } else {
1590  uint32_t col_off = query_mem_desc.getColOffInBytes(agg_out_off);
1591  CHECK_EQ(size_t(0), col_off % chosen_bytes);
1592  col_off /= chosen_bytes;
1593  CHECK(std::get<1>(agg_out_ptr_w_idx));
1594  auto offset = LL_BUILDER.CreateAdd(std::get<1>(agg_out_ptr_w_idx), LL_INT(col_off));
1595  agg_col_ptr = LL_BUILDER.CreateGEP(
1596  LL_BUILDER.CreateBitCast(
1597  std::get<0>(agg_out_ptr_w_idx),
1598  llvm::PointerType::get(get_int_type((chosen_bytes << 3), LL_CONTEXT), 0)),
1599  offset);
1600  }
1601  } else {
1602  uint32_t col_off = query_mem_desc.getColOnlyOffInBytes(agg_out_off);
1603  CHECK_EQ(size_t(0), col_off % chosen_bytes);
1604  col_off /= chosen_bytes;
1605  agg_col_ptr = LL_BUILDER.CreateGEP(
1606  LL_BUILDER.CreateBitCast(
1607  std::get<0>(agg_out_ptr_w_idx),
1608  llvm::PointerType::get(get_int_type((chosen_bytes << 3), LL_CONTEXT), 0)),
1609  LL_INT(col_off));
1610  }
1611  CHECK(agg_col_ptr);
1612  return agg_col_ptr;
1613 }
#define CHECK_EQ(x, y)
Definition: Logger.h:198
#define LL_BUILDER
bool g_cluster
#define LL_CONTEXT
#define LL_INT(v)
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
std::string to_string(char const *&&v)
size_t getColOnlyOffInBytes(const size_t col_idx) const
CHECK(cgen_state)
QueryDescriptionType getQueryDescriptionType() const
size_t getColOffInBytes(const size_t col_idx) const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void GroupByAndAggregate::codegenCountDistinct ( const size_t  target_idx,
const Analyzer::Expr target_expr,
std::vector< llvm::Value * > &  agg_args,
const QueryMemoryDescriptor query_mem_desc,
const ExecutorDeviceType  device_type 
)
private

Definition at line 1664 of file GroupByAndAggregate.cpp.

References Bitmap, CHECK(), CHECK_EQ, emitCall(), executor_, g_bigint_count, get_int_type(), get_target_info(), Analyzer::Expr::get_type_info(), getAdditionalLiteral(), QueryMemoryDescriptor::getCountDistinctDescriptor(), GPU, Invalid, kAPPROX_COUNT_DISTINCT, LL_CONTEXT, and LL_INT.

Referenced by TargetExprCodegen::codegen().

1669  {
1670  const auto agg_info = get_target_info(target_expr, g_bigint_count);
1671  const auto& arg_ti =
1672  static_cast<const Analyzer::AggExpr*>(target_expr)->get_arg()->get_type_info();
1673  if (arg_ti.is_fp()) {
1674  agg_args.back() = executor_->cgen_state_->ir_builder_.CreateBitCast(
1675  agg_args.back(), get_int_type(64, executor_->cgen_state_->context_));
1676  }
1677  const auto& count_distinct_descriptor =
1678  query_mem_desc.getCountDistinctDescriptor(target_idx);
1679  CHECK(count_distinct_descriptor.impl_type_ != CountDistinctImplType::Invalid);
1680  if (agg_info.agg_kind == kAPPROX_COUNT_DISTINCT) {
1681  CHECK(count_distinct_descriptor.impl_type_ == CountDistinctImplType::Bitmap);
1682  agg_args.push_back(LL_INT(int32_t(count_distinct_descriptor.bitmap_sz_bits)));
1683  if (device_type == ExecutorDeviceType::GPU) {
1684  const auto base_dev_addr = getAdditionalLiteral(-1);
1685  const auto base_host_addr = getAdditionalLiteral(-2);
1686  agg_args.push_back(base_dev_addr);
1687  agg_args.push_back(base_host_addr);
1688  emitCall("agg_approximate_count_distinct_gpu", agg_args);
1689  } else {
1690  emitCall("agg_approximate_count_distinct", agg_args);
1691  }
1692  return;
1693  }
1694  std::string agg_fname{"agg_count_distinct"};
1695  if (count_distinct_descriptor.impl_type_ == CountDistinctImplType::Bitmap) {
1696  agg_fname += "_bitmap";
1697  agg_args.push_back(LL_INT(static_cast<int64_t>(count_distinct_descriptor.min_val)));
1698  }
1699  if (agg_info.skip_null_val) {
1700  auto null_lv = executor_->cgen_state_->castToTypeIn(
1701  (arg_ti.is_fp()
1702  ? static_cast<llvm::Value*>(executor_->cgen_state_->inlineFpNull(arg_ti))
1703  : static_cast<llvm::Value*>(executor_->cgen_state_->inlineIntNull(arg_ti))),
1704  64);
1705  null_lv = executor_->cgen_state_->ir_builder_.CreateBitCast(
1706  null_lv, get_int_type(64, executor_->cgen_state_->context_));
1707  agg_fname += "_skip_val";
1708  agg_args.push_back(null_lv);
1709  }
1710  if (device_type == ExecutorDeviceType::GPU) {
1711  CHECK(count_distinct_descriptor.impl_type_ == CountDistinctImplType::Bitmap);
1712  agg_fname += "_gpu";
1713  const auto base_dev_addr = getAdditionalLiteral(-1);
1714  const auto base_host_addr = getAdditionalLiteral(-2);
1715  agg_args.push_back(base_dev_addr);
1716  agg_args.push_back(base_host_addr);
1717  agg_args.push_back(LL_INT(int64_t(count_distinct_descriptor.sub_bitmap_count)));
1718  CHECK_EQ(size_t(0),
1719  count_distinct_descriptor.bitmapPaddedSizeBytes() %
1720  count_distinct_descriptor.sub_bitmap_count);
1721  agg_args.push_back(LL_INT(int64_t(count_distinct_descriptor.bitmapPaddedSizeBytes() /
1722  count_distinct_descriptor.sub_bitmap_count)));
1723  }
1724  if (count_distinct_descriptor.impl_type_ == CountDistinctImplType::Bitmap) {
1725  emitCall(agg_fname, agg_args);
1726  } else {
1727  executor_->cgen_state_->emitExternalCall(
1728  agg_fname, llvm::Type::getVoidTy(LL_CONTEXT), agg_args);
1729  }
1730 }
#define CHECK_EQ(x, y)
Definition: Logger.h:198
llvm::Value * getAdditionalLiteral(const int32_t off)
#define LL_CONTEXT
TargetInfo get_target_info(const PointerType target_expr, const bool bigint_count)
Definition: TargetInfo.h:65
#define LL_INT(v)
llvm::Value * emitCall(const std::string &fname, const std::vector< llvm::Value * > &args)
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
CHECK(cgen_state)
bool g_bigint_count
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:78
const CountDistinctDescriptor & getCountDistinctDescriptor(const size_t idx) const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void GroupByAndAggregate::codegenEstimator ( std::stack< llvm::BasicBlock * > &  array_loops,
GroupByAndAggregate::DiamondCodegen diamond_codegen,
const QueryMemoryDescriptor query_mem_desc,
const CompilationOptions co 
)
private

Definition at line 1615 of file GroupByAndAggregate.cpp.

References CHECK(), emitCall(), RelAlgExecutionUnit::estimator, executor_, get_int_type(), QueryMemoryDescriptor::getEffectiveKeyWidth(), LL_BUILDER, LL_CONTEXT, LL_INT, ra_exe_unit_, and ROW_FUNC.

Referenced by codegen().

1619  {
1620  const auto& estimator_arg = ra_exe_unit_.estimator->getArgument();
1621  auto estimator_comp_count_lv = LL_INT(static_cast<int32_t>(estimator_arg.size()));
1622  auto estimator_key_lv = LL_BUILDER.CreateAlloca(llvm::Type::getInt64Ty(LL_CONTEXT),
1623  estimator_comp_count_lv);
1624  int32_t subkey_idx = 0;
1625  for (const auto estimator_arg_comp : estimator_arg) {
1626  const auto estimator_arg_comp_lvs =
1627  executor_->groupByColumnCodegen(estimator_arg_comp.get(),
1628  query_mem_desc.getEffectiveKeyWidth(),
1629  co,
1630  false,
1631  0,
1632  diamond_codegen,
1633  array_loops,
1634  true);
1635  CHECK(!estimator_arg_comp_lvs.original_value);
1636  const auto estimator_arg_comp_lv = estimator_arg_comp_lvs.translated_value;
1637  // store the sub-key to the buffer
1638  LL_BUILDER.CreateStore(estimator_arg_comp_lv,
1639  LL_BUILDER.CreateGEP(estimator_key_lv, LL_INT(subkey_idx++)));
1640  }
1641  const auto int8_ptr_ty = llvm::PointerType::get(get_int_type(8, LL_CONTEXT), 0);
1642  const auto bitmap = LL_BUILDER.CreateBitCast(&*ROW_FUNC->arg_begin(), int8_ptr_ty);
1643  const auto key_bytes = LL_BUILDER.CreateBitCast(estimator_key_lv, int8_ptr_ty);
1644  const auto estimator_comp_bytes_lv =
1645  LL_INT(static_cast<int32_t>(estimator_arg.size() * sizeof(int64_t)));
1646  const auto bitmap_size_lv =
1647  LL_INT(static_cast<uint32_t>(ra_exe_unit_.estimator->getBufferSize()));
1648  emitCall(ra_exe_unit_.estimator->getRuntimeFunctionName(),
1649  {bitmap, &*bitmap_size_lv, key_bytes, &*estimator_comp_bytes_lv});
1650 }
#define ROW_FUNC
#define LL_BUILDER
#define LL_CONTEXT
#define LL_INT(v)
llvm::Value * emitCall(const std::string &fname, const std::vector< llvm::Value * > &args)
size_t getEffectiveKeyWidth() const
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
CHECK(cgen_state)
const std::shared_ptr< Analyzer::Estimator > estimator
const RelAlgExecutionUnit & ra_exe_unit_

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::tuple< llvm::Value *, llvm::Value * > GroupByAndAggregate::codegenGroupBy ( const QueryMemoryDescriptor query_mem_desc,
const CompilationOptions co,
DiamondCodegen codegen 
)
private

Definition at line 1116 of file GroupByAndAggregate.cpp.

References CHECK(), CHECK_EQ, codegenMultiColumnBaselineHash(), codegenMultiColumnPerfectHash(), codegenOutputSlot(), codegenSingleColumnPerfectHash(), QueryMemoryDescriptor::didOutputColumnar(), executor_, QueryMemoryDescriptor::getBucket(), QueryMemoryDescriptor::getEffectiveKeyWidth(), getExprRangeInfo(), QueryMemoryDescriptor::getGroupbyColCount(), QueryMemoryDescriptor::getMaxVal(), QueryMemoryDescriptor::getQueryDescriptionType(), QueryMemoryDescriptor::getRowSize(), RelAlgExecutionUnit::groupby_exprs, GroupByBaselineHash, GroupByPerfectHash, groups_buffer, QueryMemoryDescriptor::hasNulls(), QueryMemoryDescriptor::isSingleColumnGroupByWithPerfectHash(), LL_BUILDER, LL_CONTEXT, LL_INT, Projection, ra_exe_unit_, ROW_FUNC, and QueryMemoryDescriptor::threadsShareMemory().

Referenced by codegen().

1119  {
1120  auto arg_it = ROW_FUNC->arg_begin();
1121  auto groups_buffer = arg_it++;
1122 
1123  std::stack<llvm::BasicBlock*> array_loops;
1124 
1125  // TODO(Saman): move this logic outside of this function.
1127  if (query_mem_desc.didOutputColumnar()) {
1128  return std::make_tuple(
1129  &*groups_buffer,
1130  codegenOutputSlot(&*groups_buffer, query_mem_desc, co, diamond_codegen));
1131  } else {
1132  return std::make_tuple(
1133  codegenOutputSlot(&*groups_buffer, query_mem_desc, co, diamond_codegen),
1134  nullptr);
1135  }
1136  }
1137 
1138  CHECK(query_mem_desc.getQueryDescriptionType() ==
1140  query_mem_desc.getQueryDescriptionType() ==
1142 
1143  const int32_t row_size_quad = query_mem_desc.didOutputColumnar()
1144  ? 0
1145  : query_mem_desc.getRowSize() / sizeof(int64_t);
1146 
1147  const auto col_width_size = query_mem_desc.isSingleColumnGroupByWithPerfectHash()
1148  ? sizeof(int64_t)
1149  : query_mem_desc.getEffectiveKeyWidth();
1150  // for multi-column group by
1151  llvm::Value* group_key = nullptr;
1152  llvm::Value* key_size_lv = nullptr;
1153 
1154  if (!query_mem_desc.isSingleColumnGroupByWithPerfectHash()) {
1155  key_size_lv = LL_INT(static_cast<int32_t>(query_mem_desc.getGroupbyColCount()));
1156  if (query_mem_desc.getQueryDescriptionType() ==
1158  group_key =
1159  LL_BUILDER.CreateAlloca(llvm::Type::getInt64Ty(LL_CONTEXT), key_size_lv);
1160  } else if (query_mem_desc.getQueryDescriptionType() ==
1162  group_key =
1163  col_width_size == sizeof(int32_t)
1164  ? LL_BUILDER.CreateAlloca(llvm::Type::getInt32Ty(LL_CONTEXT), key_size_lv)
1165  : LL_BUILDER.CreateAlloca(llvm::Type::getInt64Ty(LL_CONTEXT), key_size_lv);
1166  }
1167  CHECK(group_key);
1168  CHECK(key_size_lv);
1169  }
1170 
1171  int32_t subkey_idx = 0;
1172  CHECK(query_mem_desc.getGroupbyColCount() == ra_exe_unit_.groupby_exprs.size());
1173  for (const auto group_expr : ra_exe_unit_.groupby_exprs) {
1174  const auto col_range_info = getExprRangeInfo(group_expr.get());
1175  const auto translated_null_value = static_cast<int64_t>(
1176  query_mem_desc.isSingleColumnGroupByWithPerfectHash()
1177  ? checked_int64_t(query_mem_desc.getMaxVal()) +
1178  (query_mem_desc.getBucket() ? query_mem_desc.getBucket() : 1)
1179  : checked_int64_t(col_range_info.max) +
1180  (col_range_info.bucket ? col_range_info.bucket : 1));
1181 
1182  const bool col_has_nulls =
1183  query_mem_desc.getQueryDescriptionType() ==
1185  ? (query_mem_desc.isSingleColumnGroupByWithPerfectHash()
1186  ? query_mem_desc.hasNulls()
1187  : col_range_info.has_nulls)
1188  : false;
1189 
1190  const auto group_expr_lvs =
1191  executor_->groupByColumnCodegen(group_expr.get(),
1192  col_width_size,
1193  co,
1194  col_has_nulls,
1195  translated_null_value,
1196  diamond_codegen,
1197  array_loops,
1198  query_mem_desc.threadsShareMemory());
1199  const auto group_expr_lv = group_expr_lvs.translated_value;
1200  if (query_mem_desc.isSingleColumnGroupByWithPerfectHash()) {
1201  CHECK_EQ(size_t(1), ra_exe_unit_.groupby_exprs.size());
1202  return codegenSingleColumnPerfectHash(query_mem_desc,
1203  co,
1204  &*groups_buffer,
1205  group_expr_lv,
1206  group_expr_lvs.original_value,
1207  row_size_quad);
1208  } else {
1209  // store the sub-key to the buffer
1210  LL_BUILDER.CreateStore(group_expr_lv,
1211  LL_BUILDER.CreateGEP(group_key, LL_INT(subkey_idx++)));
1212  }
1213  }
1214  if (query_mem_desc.getQueryDescriptionType() ==
1216  CHECK(ra_exe_unit_.groupby_exprs.size() != 1);
1218  &*groups_buffer, group_key, key_size_lv, query_mem_desc, row_size_quad);
1219  } else if (query_mem_desc.getQueryDescriptionType() ==
1222  &*groups_buffer,
1223  group_key,
1224  key_size_lv,
1225  query_mem_desc,
1226  col_width_size,
1227  row_size_quad);
1228  }
1229  CHECK(false);
1230  return std::make_tuple(nullptr, nullptr);
1231 }
#define CHECK_EQ(x, y)
Definition: Logger.h:198
#define ROW_FUNC
const int32_t groups_buffer_size return groups_buffer
#define LL_BUILDER
#define LL_CONTEXT
#define LL_INT(v)
size_t getEffectiveKeyWidth() const
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
std::tuple< llvm::Value *, llvm::Value * > codegenMultiColumnBaselineHash(const CompilationOptions &co, llvm::Value *groups_buffer, llvm::Value *group_key, llvm::Value *key_size_lv, const QueryMemoryDescriptor &query_mem_desc, const size_t key_width, const int32_t row_size_quad)
CHECK(cgen_state)
size_t getGroupbyColCount() const
std::tuple< llvm::Value *, llvm::Value * > codegenSingleColumnPerfectHash(const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, llvm::Value *groups_buffer, llvm::Value *group_expr_lv_translated, llvm::Value *group_expr_lv_original, const int32_t row_size_quad)
boost::multiprecision::number< boost::multiprecision::cpp_int_backend< 64, 64, boost::multiprecision::signed_magnitude, boost::multiprecision::checked, void > > checked_int64_t
std::tuple< llvm::Value *, llvm::Value * > codegenMultiColumnPerfectHash(llvm::Value *groups_buffer, llvm::Value *group_key, llvm::Value *key_size_lv, const QueryMemoryDescriptor &query_mem_desc, const int32_t row_size_quad)
QueryDescriptionType getQueryDescriptionType() const
bool isSingleColumnGroupByWithPerfectHash() const
llvm::Value * codegenOutputSlot(llvm::Value *groups_buffer, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, DiamondCodegen &diamond_codegen)
ColRangeInfo getExprRangeInfo(const Analyzer::Expr *expr) const
const RelAlgExecutionUnit & ra_exe_unit_

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::tuple< llvm::Value *, llvm::Value * > GroupByAndAggregate::codegenMultiColumnBaselineHash ( const CompilationOptions co,
llvm::Value *  groups_buffer,
llvm::Value *  group_key,
llvm::Value *  key_size_lv,
const QueryMemoryDescriptor query_mem_desc,
const size_t  key_width,
const int32_t  row_size_quad 
)
private

Definition at line 1317 of file GroupByAndAggregate.cpp.

References CHECK(), QueryMemoryDescriptor::didOutputColumnar(), emitCall(), QueryMemoryDescriptor::getEntryCount(), groups_buffer, LL_BUILDER, LL_CONTEXT, LL_INT, ROW_FUNC, and CompilationOptions::with_dynamic_watchdog_.

Referenced by codegenGroupBy().

1324  {
1325  auto arg_it = ROW_FUNC->arg_begin(); // groups_buffer
1326  ++arg_it; // current match count
1327  ++arg_it; // total match count
1328  ++arg_it; // old match count
1329  ++arg_it; // output buffer slots count
1330  ++arg_it; // aggregate init values
1331  CHECK(arg_it->getName() == "agg_init_val");
1332  if (group_key->getType() != llvm::Type::getInt64PtrTy(LL_CONTEXT)) {
1333  CHECK(key_width == sizeof(int32_t));
1334  group_key =
1335  LL_BUILDER.CreatePointerCast(group_key, llvm::Type::getInt64PtrTy(LL_CONTEXT));
1336  }
1337  std::vector<llvm::Value*> func_args{
1338  groups_buffer,
1339  LL_INT(static_cast<int32_t>(query_mem_desc.getEntryCount())),
1340  &*group_key,
1341  &*key_size_lv,
1342  LL_INT(static_cast<int32_t>(key_width))};
1343  std::string func_name{"get_group_value"};
1344  if (query_mem_desc.didOutputColumnar()) {
1345  func_name += "_columnar_slot";
1346  } else {
1347  func_args.push_back(LL_INT(row_size_quad));
1348  func_args.push_back(&*arg_it);
1349  }
1350  if (co.with_dynamic_watchdog_) {
1351  func_name += "_with_watchdog";
1352  }
1353  if (query_mem_desc.didOutputColumnar()) {
1354  return std::make_tuple(groups_buffer, emitCall(func_name, func_args));
1355  } else {
1356  return std::make_tuple(emitCall(func_name, func_args), nullptr);
1357  }
1358 }
#define ROW_FUNC
const int32_t groups_buffer_size return groups_buffer
#define LL_BUILDER
#define LL_CONTEXT
#define LL_INT(v)
llvm::Value * emitCall(const std::string &fname, const std::vector< llvm::Value * > &args)
CHECK(cgen_state)
const bool with_dynamic_watchdog_

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::tuple< llvm::Value *, llvm::Value * > GroupByAndAggregate::codegenMultiColumnPerfectHash ( llvm::Value *  groups_buffer,
llvm::Value *  group_key,
llvm::Value *  key_size_lv,
const QueryMemoryDescriptor query_mem_desc,
const int32_t  row_size_quad 
)
private

Definition at line 1283 of file GroupByAndAggregate.cpp.

References CHECK(), codegenPerfectHashFunction(), QueryMemoryDescriptor::didOutputColumnar(), emitCall(), get_int_type(), QueryMemoryDescriptor::getEntryCount(), QueryMemoryDescriptor::getQueryDescriptionType(), GroupByPerfectHash, groups_buffer, LL_BUILDER, LL_CONTEXT, and LL_INT.

Referenced by codegenGroupBy().

1288  {
1289  CHECK(query_mem_desc.getQueryDescriptionType() ==
1291  // compute the index (perfect hash)
1292  auto perfect_hash_func = codegenPerfectHashFunction();
1293  auto hash_lv =
1294  LL_BUILDER.CreateCall(perfect_hash_func, std::vector<llvm::Value*>{group_key});
1295 
1296  if (query_mem_desc.didOutputColumnar()) {
1297  const std::string set_matching_func_name{
1298  "set_matching_group_value_perfect_hash_columnar"};
1299  const std::vector<llvm::Value*> set_matching_func_arg{
1300  groups_buffer,
1301  hash_lv,
1302  group_key,
1303  key_size_lv,
1304  llvm::ConstantInt::get(get_int_type(32, LL_CONTEXT),
1305  query_mem_desc.getEntryCount())};
1306  emitCall(set_matching_func_name, set_matching_func_arg);
1307  return std::make_tuple(groups_buffer, hash_lv);
1308  } else {
1309  return std::make_tuple(
1310  emitCall("get_matching_group_value_perfect_hash",
1311  {groups_buffer, hash_lv, group_key, key_size_lv, LL_INT(row_size_quad)}),
1312  nullptr);
1313  }
1314 }
const int32_t groups_buffer_size return groups_buffer
#define LL_BUILDER
#define LL_CONTEXT
#define LL_INT(v)
llvm::Value * emitCall(const std::string &fname, const std::vector< llvm::Value * > &args)
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
CHECK(cgen_state)
QueryDescriptionType getQueryDescriptionType() const
llvm::Function * codegenPerfectHashFunction()

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

llvm::Value * GroupByAndAggregate::codegenOutputSlot ( llvm::Value *  groups_buffer,
const QueryMemoryDescriptor query_mem_desc,
const CompilationOptions co,
DiamondCodegen diamond_codegen 
)
private

Definition at line 1025 of file GroupByAndAggregate.cpp.

References run_benchmark_import::args, CHECK(), CHECK_EQ, CHECK_GE, CHECK_LT, CodeGenerator::codegen(), QueryMemoryDescriptor::didOutputColumnar(), emitCall(), executor_, get_arg_by_name(), get_heap_key_slot_index(), QueryMemoryDescriptor::getColOffInBytes(), QueryMemoryDescriptor::getEntryCount(), QueryMemoryDescriptor::getPaddedSlotWidthBytes(), QueryMemoryDescriptor::getQueryDescriptionType(), QueryMemoryDescriptor::getRowSize(), RelAlgExecutionUnit::groupby_exprs, groups_buffer, inline_fp_null_val(), inline_int_null_val(), SortInfo::limit, LL_BOOL, LL_BUILDER, LL_FP, LL_INT, SortInfo::offset, SortInfo::order_entries, CodeGenerator::posArg(), Projection, ra_exe_unit_, ROW_FUNC, RelAlgExecutionUnit::sort_info, RelAlgExecutionUnit::target_exprs, to_string(), RelAlgExecutionUnit::use_bump_allocator, and use_streaming_top_n().

Referenced by codegenGroupBy(), and codegenWindowRowPointer().

1029  {
1031  CHECK_EQ(size_t(1), ra_exe_unit_.groupby_exprs.size());
1032  const auto group_expr = ra_exe_unit_.groupby_exprs.front();
1033  CHECK(!group_expr);
1034  if (!query_mem_desc.didOutputColumnar()) {
1035  CHECK_EQ(size_t(0), query_mem_desc.getRowSize() % sizeof(int64_t));
1036  }
1037  const int32_t row_size_quad = query_mem_desc.didOutputColumnar()
1038  ? 0
1039  : query_mem_desc.getRowSize() / sizeof(int64_t);
1040  CodeGenerator code_generator(executor_);
1041  if (use_streaming_top_n(ra_exe_unit_, query_mem_desc.didOutputColumnar())) {
1042  const auto& only_order_entry = ra_exe_unit_.sort_info.order_entries.front();
1043  CHECK_GE(only_order_entry.tle_no, int(1));
1044  const size_t target_idx = only_order_entry.tle_no - 1;
1045  CHECK_LT(target_idx, ra_exe_unit_.target_exprs.size());
1046  const auto order_entry_expr = ra_exe_unit_.target_exprs[target_idx];
1047  const auto chosen_bytes =
1048  static_cast<size_t>(query_mem_desc.getPaddedSlotWidthBytes(target_idx));
1049  auto order_entry_lv = executor_->cgen_state_->castToTypeIn(
1050  code_generator.codegen(order_entry_expr, true, co).front(), chosen_bytes * 8);
1052  std::string fname = "get_bin_from_k_heap";
1053  const auto& oe_ti = order_entry_expr->get_type_info();
1054  llvm::Value* null_key_lv = nullptr;
1055  if (oe_ti.is_integer() || oe_ti.is_decimal() || oe_ti.is_time()) {
1056  const size_t bit_width = order_entry_lv->getType()->getIntegerBitWidth();
1057  switch (bit_width) {
1058  case 32:
1059  null_key_lv = LL_INT(static_cast<int32_t>(inline_int_null_val(oe_ti)));
1060  break;
1061  case 64:
1062  null_key_lv = LL_INT(static_cast<int64_t>(inline_int_null_val(oe_ti)));
1063  break;
1064  default:
1065  CHECK(false);
1066  }
1067  fname += "_int" + std::to_string(bit_width) + "_t";
1068  } else {
1069  CHECK(oe_ti.is_fp());
1070  if (order_entry_lv->getType()->isDoubleTy()) {
1071  null_key_lv = LL_FP(static_cast<double>(inline_fp_null_val(oe_ti)));
1072  } else {
1073  null_key_lv = LL_FP(static_cast<float>(inline_fp_null_val(oe_ti)));
1074  }
1075  fname += order_entry_lv->getType()->isDoubleTy() ? "_double" : "_float";
1076  }
1077  const auto key_slot_idx =
1079  return emitCall(
1080  fname,
1081  {groups_buffer,
1082  LL_INT(n),
1083  LL_INT(row_size_quad),
1084  LL_INT(static_cast<uint32_t>(query_mem_desc.getColOffInBytes(key_slot_idx))),
1085  LL_BOOL(only_order_entry.is_desc),
1086  LL_BOOL(!order_entry_expr->get_type_info().get_notnull()),
1087  LL_BOOL(only_order_entry.nulls_first),
1088  null_key_lv,
1089  order_entry_lv});
1090  } else {
1091  llvm::Value* output_buffer_entry_count_lv{nullptr};
1093  output_buffer_entry_count_lv =
1094  LL_BUILDER.CreateLoad(get_arg_by_name(ROW_FUNC, "max_matched"));
1095  CHECK(output_buffer_entry_count_lv);
1096  }
1097  const auto group_expr_lv =
1098  LL_BUILDER.CreateLoad(get_arg_by_name(ROW_FUNC, "old_total_matched"));
1099  std::vector<llvm::Value*> args{
1100  groups_buffer,
1101  output_buffer_entry_count_lv
1102  ? output_buffer_entry_count_lv
1103  : LL_INT(static_cast<int32_t>(query_mem_desc.getEntryCount())),
1104  group_expr_lv,
1105  code_generator.posArg(nullptr)};
1106  if (query_mem_desc.didOutputColumnar()) {
1107  const auto columnar_output_offset =
1108  emitCall("get_columnar_scan_output_offset", args);
1109  return columnar_output_offset;
1110  }
1111  args.push_back(LL_INT(row_size_quad));
1112  return emitCall("get_scan_output_slot", args);
1113  }
1114 }
std::vector< Analyzer::Expr * > target_exprs
#define CHECK_EQ(x, y)
Definition: Logger.h:198
bool use_streaming_top_n(const RelAlgExecutionUnit &ra_exe_unit, const bool output_columnar)
#define ROW_FUNC
const int32_t groups_buffer_size return groups_buffer
#define LL_BUILDER
const std::list< Analyzer::OrderEntry > order_entries
#define LL_INT(v)
#define CHECK_GE(x, y)
Definition: Logger.h:203
llvm::Value * emitCall(const std::string &fname, const std::vector< llvm::Value * > &args)
size_t get_heap_key_slot_index(const std::vector< Analyzer::Expr * > &target_exprs, const size_t target_idx)
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
double inline_fp_null_val(const SQL_TYPE_INFO &ti)
std::string to_string(char const *&&v)
#define LL_BOOL(v)
const size_t limit
CHECK(cgen_state)
llvm::Value * get_arg_by_name(llvm::Function *func, const std::string &name)
Definition: Execute.h:117
const SortInfo sort_info
#define LL_FP(v)
const int8_t getPaddedSlotWidthBytes(const size_t slot_idx) const
QueryDescriptionType getQueryDescriptionType() const
#define CHECK_LT(x, y)
Definition: Logger.h:200
int64_t inline_int_null_val(const SQL_TYPE_INFO &ti)
const RelAlgExecutionUnit & ra_exe_unit_
const size_t offset
size_t getColOffInBytes(const size_t col_idx) const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

llvm::Function * GroupByAndAggregate::codegenPerfectHashFunction ( )
private

Definition at line 1360 of file GroupByAndAggregate.cpp.

References CHECK(), CHECK_GT, executor_, get_int_type(), getBucketedCardinality(), getExprRangeInfo(), RelAlgExecutionUnit::groupby_exprs, GroupByPerfectHash, LL_CONTEXT, LL_INT, mark_function_always_inline(), and ra_exe_unit_.

Referenced by codegenMultiColumnPerfectHash().

1360  {
1361  CHECK_GT(ra_exe_unit_.groupby_exprs.size(), size_t(1));
1362  auto ft = llvm::FunctionType::get(
1363  get_int_type(32, LL_CONTEXT),
1364  std::vector<llvm::Type*>{llvm::PointerType::get(get_int_type(64, LL_CONTEXT), 0)},
1365  false);
1366  auto key_hash_func = llvm::Function::Create(ft,
1367  llvm::Function::ExternalLinkage,
1368  "perfect_key_hash",
1369  executor_->cgen_state_->module_);
1370  executor_->cgen_state_->helper_functions_.push_back(key_hash_func);
1371  mark_function_always_inline(key_hash_func);
1372  auto& key_buff_arg = *key_hash_func->args().begin();
1373  llvm::Value* key_buff_lv = &key_buff_arg;
1374  auto bb = llvm::BasicBlock::Create(LL_CONTEXT, "entry", key_hash_func);
1375  llvm::IRBuilder<> key_hash_func_builder(bb);
1376  llvm::Value* hash_lv{llvm::ConstantInt::get(get_int_type(64, LL_CONTEXT), 0)};
1377  std::vector<int64_t> cardinalities;
1378  for (const auto groupby_expr : ra_exe_unit_.groupby_exprs) {
1379  auto col_range_info = getExprRangeInfo(groupby_expr.get());
1380  CHECK(col_range_info.hash_type_ == QueryDescriptionType::GroupByPerfectHash);
1381  cardinalities.push_back(getBucketedCardinality(col_range_info));
1382  }
1383  size_t dim_idx = 0;
1384  for (const auto groupby_expr : ra_exe_unit_.groupby_exprs) {
1385  auto key_comp_lv = key_hash_func_builder.CreateLoad(
1386  key_hash_func_builder.CreateGEP(key_buff_lv, LL_INT(dim_idx)));
1387  auto col_range_info = getExprRangeInfo(groupby_expr.get());
1388  auto crt_term_lv =
1389  key_hash_func_builder.CreateSub(key_comp_lv, LL_INT(col_range_info.min));
1390  if (col_range_info.bucket) {
1391  crt_term_lv =
1392  key_hash_func_builder.CreateSDiv(crt_term_lv, LL_INT(col_range_info.bucket));
1393  }
1394  for (size_t prev_dim_idx = 0; prev_dim_idx < dim_idx; ++prev_dim_idx) {
1395  crt_term_lv = key_hash_func_builder.CreateMul(crt_term_lv,
1396  LL_INT(cardinalities[prev_dim_idx]));
1397  }
1398  hash_lv = key_hash_func_builder.CreateAdd(hash_lv, crt_term_lv);
1399  ++dim_idx;
1400  }
1401  key_hash_func_builder.CreateRet(
1402  key_hash_func_builder.CreateTrunc(hash_lv, get_int_type(32, LL_CONTEXT)));
1403  return key_hash_func;
1404 }
static int64_t getBucketedCardinality(const ColRangeInfo &col_range_info)
#define LL_CONTEXT
void mark_function_always_inline(llvm::Function *func)
#define LL_INT(v)
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
#define CHECK_GT(x, y)
Definition: Logger.h:202
CHECK(cgen_state)
ColRangeInfo getExprRangeInfo(const Analyzer::Expr *expr) const
const RelAlgExecutionUnit & ra_exe_unit_

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::tuple< llvm::Value *, llvm::Value * > GroupByAndAggregate::codegenSingleColumnPerfectHash ( const QueryMemoryDescriptor query_mem_desc,
const CompilationOptions co,
llvm::Value *  groups_buffer,
llvm::Value *  group_expr_lv_translated,
llvm::Value *  group_expr_lv_original,
const int32_t  row_size_quad 
)
private

Definition at line 1234 of file GroupByAndAggregate.cpp.

References CHECK(), CompilationOptions::device_type_, QueryMemoryDescriptor::didOutputColumnar(), emitCall(), executor_, QueryMemoryDescriptor::getBucket(), QueryMemoryDescriptor::getMinVal(), groups_buffer, QueryMemoryDescriptor::hasKeylessHash(), QueryMemoryDescriptor::interleavedBins(), LL_INT, QueryMemoryDescriptor::mustUseBaselineSort(), and QueryMemoryDescriptor::usesGetGroupValueFast().

Referenced by codegenGroupBy().

1240  {
1241  CHECK(query_mem_desc.usesGetGroupValueFast());
1242  std::string get_group_fn_name{query_mem_desc.didOutputColumnar()
1243  ? "get_columnar_group_bin_offset"
1244  : "get_group_value_fast"};
1245  if (!query_mem_desc.didOutputColumnar() && query_mem_desc.hasKeylessHash()) {
1246  get_group_fn_name += "_keyless";
1247  }
1248  if (query_mem_desc.interleavedBins(co.device_type_)) {
1249  CHECK(!query_mem_desc.didOutputColumnar());
1250  CHECK(query_mem_desc.hasKeylessHash());
1251  get_group_fn_name += "_semiprivate";
1252  }
1253  std::vector<llvm::Value*> get_group_fn_args{&*groups_buffer,
1254  &*group_expr_lv_translated};
1255  if (group_expr_lv_original && get_group_fn_name == "get_group_value_fast" &&
1256  query_mem_desc.mustUseBaselineSort()) {
1257  get_group_fn_name += "_with_original_key";
1258  get_group_fn_args.push_back(group_expr_lv_original);
1259  }
1260  get_group_fn_args.push_back(LL_INT(query_mem_desc.getMinVal()));
1261  get_group_fn_args.push_back(LL_INT(query_mem_desc.getBucket()));
1262  if (!query_mem_desc.hasKeylessHash()) {
1263  if (!query_mem_desc.didOutputColumnar()) {
1264  get_group_fn_args.push_back(LL_INT(row_size_quad));
1265  }
1266  } else {
1267  if (!query_mem_desc.didOutputColumnar()) {
1268  get_group_fn_args.push_back(LL_INT(row_size_quad));
1269  }
1270  if (query_mem_desc.interleavedBins(co.device_type_)) {
1271  auto warp_idx = emitCall("thread_warp_idx", {LL_INT(executor_->warpSize())});
1272  get_group_fn_args.push_back(warp_idx);
1273  get_group_fn_args.push_back(LL_INT(executor_->warpSize()));
1274  }
1275  }
1276  if (get_group_fn_name == "get_columnar_group_bin_offset") {
1277  return std::make_tuple(&*groups_buffer,
1278  emitCall(get_group_fn_name, get_group_fn_args));
1279  }
1280  return std::make_tuple(emitCall(get_group_fn_name, get_group_fn_args), nullptr);
1281 }
const int32_t groups_buffer_size return groups_buffer
#define LL_INT(v)
llvm::Value * emitCall(const std::string &fname, const std::vector< llvm::Value * > &args)
CHECK(cgen_state)
ExecutorDeviceType device_type_
bool interleavedBins(const ExecutorDeviceType) const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

llvm::Value * GroupByAndAggregate::codegenWindowRowPointer ( const Analyzer::WindowFunction window_func,
const QueryMemoryDescriptor query_mem_desc,
const CompilationOptions co,
DiamondCodegen diamond_codegen 
)
private

Definition at line 1456 of file GroupByAndAggregate.cpp.

References run_benchmark_import::args, CHECK(), codegenOutputSlot(), COUNT, QueryMemoryDescriptor::didOutputColumnar(), emitCall(), executor_, get_int_type(), WindowProjectNodeContext::getActiveWindowFunctionContext(), QueryMemoryDescriptor::getEntryCount(), Analyzer::WindowFunction::getKind(), QueryMemoryDescriptor::getRowSize(), groups_buffer, LL_BUILDER, LL_CONTEXT, LL_INT, CodeGenerator::posArg(), ROW_FUNC, and window_function_is_aggregate().

Referenced by TargetExprCodegen::codegen().

1460  {
1461  const auto window_func_context =
1463  if (window_func_context && window_function_is_aggregate(window_func->getKind())) {
1464  const int32_t row_size_quad = query_mem_desc.didOutputColumnar()
1465  ? 0
1466  : query_mem_desc.getRowSize() / sizeof(int64_t);
1467  auto arg_it = ROW_FUNC->arg_begin();
1468  auto groups_buffer = arg_it++;
1469  CodeGenerator code_generator(executor_);
1470  if (!window_func_context->getRowNumber()) {
1471  CHECK(window_func->getKind() == SqlWindowFunctionKind::COUNT);
1472  window_func_context->setRowNumber(emitCall(
1473  "row_number_window_func",
1474  {LL_INT(reinterpret_cast<const int64_t>(window_func_context->output())),
1475  code_generator.posArg(nullptr)}));
1476  }
1477  const auto pos_in_window = LL_BUILDER.CreateTrunc(window_func_context->getRowNumber(),
1478  get_int_type(32, LL_CONTEXT));
1479  llvm::Value* entry_count_lv =
1480  LL_INT(static_cast<int32_t>(query_mem_desc.getEntryCount()));
1481  std::vector<llvm::Value*> args{
1482  &*groups_buffer, entry_count_lv, pos_in_window, code_generator.posArg(nullptr)};
1483  if (query_mem_desc.didOutputColumnar()) {
1484  const auto columnar_output_offset =
1485  emitCall("get_columnar_scan_output_offset", args);
1486  return LL_BUILDER.CreateSExt(columnar_output_offset, get_int_type(64, LL_CONTEXT));
1487  }
1488  args.push_back(LL_INT(row_size_quad));
1489  return emitCall("get_scan_output_slot", args);
1490  }
1491  auto arg_it = ROW_FUNC->arg_begin();
1492  auto groups_buffer = arg_it++;
1493  return codegenOutputSlot(&*groups_buffer, query_mem_desc, co, diamond_codegen);
1494 }
SqlWindowFunctionKind getKind() const
Definition: Analyzer.h:1396
#define ROW_FUNC
const int32_t groups_buffer_size return groups_buffer
#define LL_BUILDER
#define LL_CONTEXT
#define LL_INT(v)
llvm::Value * emitCall(const std::string &fname, const std::vector< llvm::Value * > &args)
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
CHECK(cgen_state)
bool window_function_is_aggregate(const SqlWindowFunctionKind kind)
Definition: WindowContext.h:42
llvm::Value * codegenOutputSlot(llvm::Value *groups_buffer, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, DiamondCodegen &diamond_codegen)
static WindowFunctionContext * getActiveWindowFunctionContext()

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

llvm::Value * GroupByAndAggregate::convertNullIfAny ( const SQLTypeInfo arg_type,
const TargetInfo agg_info,
llvm::Value *  target 
)
private

Definition at line 1406 of file GroupByAndAggregate.cpp.

References TargetInfo::agg_kind, CHECK(), executor_, SQLTypeInfoCore< TYPE_FACET_PACK >::get_size(), SQLTypeInfoCore< TYPE_FACET_PACK >::is_fp(), kAPPROX_COUNT_DISTINCT, kCOUNT, LL_BUILDER, and TargetInfo::sql_type.

Referenced by TargetExprCodegen::codegen().

1408  {
1409  const auto& agg_type = agg_info.sql_type;
1410  const size_t chosen_bytes = agg_type.get_size();
1411 
1412  bool need_conversion{false};
1413  llvm::Value* arg_null{nullptr};
1414  llvm::Value* agg_null{nullptr};
1415  llvm::Value* target_to_cast{target};
1416  if (arg_type.is_fp()) {
1417  arg_null = executor_->cgen_state_->inlineFpNull(arg_type);
1418  if (agg_type.is_fp()) {
1419  agg_null = executor_->cgen_state_->inlineFpNull(agg_type);
1420  if (!static_cast<llvm::ConstantFP*>(arg_null)->isExactlyValue(
1421  static_cast<llvm::ConstantFP*>(agg_null)->getValueAPF())) {
1422  need_conversion = true;
1423  }
1424  } else {
1425  CHECK(agg_info.agg_kind == kCOUNT || agg_info.agg_kind == kAPPROX_COUNT_DISTINCT);
1426  return target;
1427  }
1428  } else {
1429  arg_null = executor_->cgen_state_->inlineIntNull(arg_type);
1430  if (agg_type.is_fp()) {
1431  agg_null = executor_->cgen_state_->inlineFpNull(agg_type);
1432  need_conversion = true;
1433  target_to_cast = executor_->castToFP(target);
1434  } else {
1435  agg_null = executor_->cgen_state_->inlineIntNull(agg_type);
1436  if ((static_cast<llvm::ConstantInt*>(arg_null)->getBitWidth() !=
1437  static_cast<llvm::ConstantInt*>(agg_null)->getBitWidth()) ||
1438  (static_cast<llvm::ConstantInt*>(arg_null)->getValue() !=
1439  static_cast<llvm::ConstantInt*>(agg_null)->getValue())) {
1440  need_conversion = true;
1441  }
1442  }
1443  }
1444  if (need_conversion) {
1445  auto cmp = arg_type.is_fp() ? LL_BUILDER.CreateFCmpOEQ(target, arg_null)
1446  : LL_BUILDER.CreateICmpEQ(target, arg_null);
1447  return LL_BUILDER.CreateSelect(
1448  cmp,
1449  agg_null,
1450  executor_->cgen_state_->castToTypeIn(target_to_cast, chosen_bytes << 3));
1451  } else {
1452  return target;
1453  }
1454 }
bool is_fp() const
Definition: sqltypes.h:481
#define LL_BUILDER
SQLTypeInfo sql_type
Definition: TargetInfo.h:42
HOST DEVICE int get_size() const
Definition: sqltypes.h:336
CHECK(cgen_state)
SQLAgg agg_kind
Definition: TargetInfo.h:41
Definition: sqldefs.h:71

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

llvm::Value * GroupByAndAggregate::emitCall ( const std::string &  fname,
const std::vector< llvm::Value * > &  args 
)
private

Definition at line 1857 of file GroupByAndAggregate.cpp.

References executor_.

Referenced by TargetExprCodegen::codegen(), codegenCountDistinct(), codegenEstimator(), codegenMultiColumnBaselineHash(), codegenMultiColumnPerfectHash(), codegenOutputSlot(), codegenSingleColumnPerfectHash(), and codegenWindowRowPointer().

1858  {
1859  return executor_->cgen_state_->emitCall(fname, args);
1860 }

+ Here is the caller graph for this function:

llvm::Value * GroupByAndAggregate::getAdditionalLiteral ( const int32_t  off)
private

Definition at line 1732 of file GroupByAndAggregate.cpp.

References CHECK_LT, get_arg_by_name(), get_int_type(), LL_BUILDER, LL_CONTEXT, LL_INT, and ROW_FUNC.

Referenced by codegenCountDistinct().

1732  {
1733  CHECK_LT(off, 0);
1734  const auto lit_buff_lv = get_arg_by_name(ROW_FUNC, "literals");
1735  return LL_BUILDER.CreateLoad(LL_BUILDER.CreateGEP(
1736  LL_BUILDER.CreateBitCast(lit_buff_lv,
1737  llvm::PointerType::get(get_int_type(64, LL_CONTEXT), 0)),
1738  LL_INT(off)));
1739 }
#define ROW_FUNC
#define LL_BUILDER
#define LL_CONTEXT
#define LL_INT(v)
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
llvm::Value * get_arg_by_name(llvm::Function *func, const std::string &name)
Definition: Execute.h:117
#define CHECK_LT(x, y)
Definition: Logger.h:200

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

int64_t GroupByAndAggregate::getBucketedCardinality ( const ColRangeInfo col_range_info)
staticprivate

Definition at line 217 of file GroupByAndAggregate.cpp.

References ColRangeInfo::bucket, ColRangeInfo::has_nulls, ColRangeInfo::max, and ColRangeInfo::min.

Referenced by codegenPerfectHashFunction(), and getColRangeInfo().

217  {
218  checked_int64_t crt_col_cardinality =
219  checked_int64_t(col_range_info.max) - checked_int64_t(col_range_info.min);
220  if (col_range_info.bucket) {
221  crt_col_cardinality /= col_range_info.bucket;
222  }
223  return static_cast<int64_t>(crt_col_cardinality +
224  (1 + (col_range_info.has_nulls ? 1 : 0)));
225 }
boost::multiprecision::number< boost::multiprecision::cpp_int_backend< 64, 64, boost::multiprecision::signed_magnitude, boost::multiprecision::checked, void > > checked_int64_t

+ Here is the caller graph for this function:

ColRangeInfo GroupByAndAggregate::getColRangeInfo ( )
private

Definition at line 118 of file GroupByAndAggregate.cpp.

References Executor::baseline_threshold, CHECK_GE, device_type_, executor_, anonymous_namespace{GroupByAndAggregate.cpp}::expr_is_rowid(), getBucketedCardinality(), getExprRangeInfo(), GPU, RelAlgExecutionUnit::groupby_exprs, GroupByBaselineHash, GroupByPerfectHash, anonymous_namespace{GroupByAndAggregate.cpp}::has_count_distinct(), anonymous_namespace{GroupByAndAggregate.cpp}::is_column_range_too_big_for_perfect_hash(), ra_exe_unit_, RelAlgExecutionUnit::simple_quals, and RelAlgExecutionUnit::target_exprs.

118  {
119  // Use baseline layout more eagerly on the GPU if the query uses count distinct,
120  // because our HyperLogLog implementation is 4x less memory efficient on GPU.
121  // Technically, this only applies to APPROX_COUNT_DISTINCT, but in practice we
122  // can expect this to be true anyway for grouped queries since the precise version
123  // uses significantly more memory.
124  const int64_t baseline_threshold =
129  if (ra_exe_unit_.groupby_exprs.size() != 1) {
130  try {
131  checked_int64_t cardinality{1};
132  bool has_nulls{false};
133  for (const auto groupby_expr : ra_exe_unit_.groupby_exprs) {
134  auto col_range_info = getExprRangeInfo(groupby_expr.get());
135  if (col_range_info.hash_type_ != QueryDescriptionType::GroupByPerfectHash) {
136  // going through baseline hash if a non-integer type is encountered
137  return {QueryDescriptionType::GroupByBaselineHash, 0, 0, 0, false};
138  }
139  auto crt_col_cardinality = getBucketedCardinality(col_range_info);
140  CHECK_GE(crt_col_cardinality, 0);
141  cardinality *= crt_col_cardinality;
142  if (col_range_info.has_nulls) {
143  has_nulls = true;
144  }
145  }
146  // For zero or high cardinalities, use baseline layout.
147  if (!cardinality || cardinality > baseline_threshold) {
148  return {QueryDescriptionType::GroupByBaselineHash, 0, 0, 0, false};
149  }
151  0,
152  int64_t(cardinality),
153  0,
154  has_nulls};
155  } catch (...) { // overflow when computing cardinality
156  return {QueryDescriptionType::GroupByBaselineHash, 0, 0, 0, false};
157  }
158  }
159  // For single column groupby on high timestamps, force baseline hash due to wide ranges
160  // we are likely to encounter when applying quals to the expression range
161  // TODO: consider allowing TIMESTAMP(9) (nanoseconds) with quals to use perfect hash if
162  // the range is small enough
163  if (ra_exe_unit_.groupby_exprs.front() &&
164  ra_exe_unit_.groupby_exprs.front()->get_type_info().is_high_precision_timestamp() &&
165  ra_exe_unit_.simple_quals.size() > 0) {
166  return {QueryDescriptionType::GroupByBaselineHash, 0, 0, 0, false};
167  }
168  const auto col_range_info = getExprRangeInfo(ra_exe_unit_.groupby_exprs.front().get());
169  if (!ra_exe_unit_.groupby_exprs.front()) {
170  return col_range_info;
171  }
172  static const int64_t MAX_BUFFER_SIZE = 1 << 30;
173  const int64_t col_count =
175  int64_t max_entry_count = MAX_BUFFER_SIZE / (col_count * sizeof(int64_t));
177  max_entry_count = std::min(max_entry_count, baseline_threshold);
178  }
179  if ((!ra_exe_unit_.groupby_exprs.front()->get_type_info().is_string() &&
180  !expr_is_rowid(ra_exe_unit_.groupby_exprs.front().get(), *executor_->catalog_)) &&
181  is_column_range_too_big_for_perfect_hash(col_range_info, max_entry_count) &&
182  !col_range_info.bucket) {
184  col_range_info.min,
185  col_range_info.max,
186  0,
187  col_range_info.has_nulls};
188  }
189  return col_range_info;
190 }
std::vector< Analyzer::Expr * > target_exprs
static int64_t getBucketedCardinality(const ColRangeInfo &col_range_info)
bool is_column_range_too_big_for_perfect_hash(const ColRangeInfo &col_range_info, const int64_t max_entry_count)
bool expr_is_rowid(const Analyzer::Expr *expr, const Catalog_Namespace::Catalog &cat)
static const size_t baseline_threshold
Definition: Execute.h:1009
#define CHECK_GE(x, y)
Definition: Logger.h:203
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
bool has_count_distinct(const RelAlgExecutionUnit &ra_exe_unit)
boost::multiprecision::number< boost::multiprecision::cpp_int_backend< 64, 64, boost::multiprecision::signed_magnitude, boost::multiprecision::checked, void > > checked_int64_t
const ExecutorDeviceType device_type_
ColRangeInfo getExprRangeInfo(const Analyzer::Expr *expr) const
const RelAlgExecutionUnit & ra_exe_unit_
std::list< std::shared_ptr< Analyzer::Expr > > simple_quals

+ Here is the call graph for this function:

ColRangeInfo GroupByAndAggregate::getExprRangeInfo ( const Analyzer::Expr expr) const
private

Definition at line 192 of file GroupByAndAggregate.cpp.

References CHECK(), Double, executor_, Float, getExpressionRange(), GroupByBaselineHash, GroupByPerfectHash, Integer, Invalid, NonGroupedAggregate, Projection, query_infos_, ra_exe_unit_, and RelAlgExecutionUnit::simple_quals.

Referenced by codegenGroupBy(), codegenPerfectHashFunction(), getColRangeInfo(), gpuCanHandleOrderEntries(), and initCountDistinctDescriptors().

192  {
193  if (!expr) {
194  return {QueryDescriptionType::Projection, 0, 0, 0, false};
195  }
196 
197  const auto expr_range = getExpressionRange(
198  expr, query_infos_, executor_, boost::make_optional(ra_exe_unit_.simple_quals));
199  switch (expr_range.getType()) {
202  expr_range.getIntMin(),
203  expr_range.getIntMax(),
204  expr_range.getBucket(),
205  expr_range.hasNulls()};
210  default:
211  CHECK(false);
212  }
213  CHECK(false);
214  return {QueryDescriptionType::NonGroupedAggregate, 0, 0, 0, false};
215 }
CHECK(cgen_state)
ExpressionRange getExpressionRange(const Analyzer::BinOper *expr, const std::vector< InputTableInfo > &query_infos, const Executor *, boost::optional< std::list< std::shared_ptr< Analyzer::Expr >>> simple_quals)
const std::vector< InputTableInfo > & query_infos_
const RelAlgExecutionUnit & ra_exe_unit_
std::list< std::shared_ptr< Analyzer::Expr > > simple_quals

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

KeylessInfo GroupByAndAggregate::getKeylessInfo ( const std::vector< Analyzer::Expr * > &  target_expr_list,
const bool  is_group_by 
) const
private

This function goes through all target expressions and answers two questions:

  1. Is it possible to have keyless hash?
  2. If yes to 1, then what aggregate expression should be considered to represent the key's presence, if needed (e.g., in detecting empty entries in the result set).

NOTE: Keyless hash is only valid with single-column group by at the moment.

TODO(Saman): remove the shared memory discussion out of this function.

Currently just support shared memory usage when dealing with one keyless aggregate operation. Currently just support shared memory usage for up to two target expressions.

Definition at line 602 of file GroupByAndAggregate.cpp.

References agg_arg(), CHECK(), constrained_not_null(), Double, executor_, Float, g_bigint_count, get_agg_initial_val(), get_compact_type(), get_target_info(), getExpressionRange(), Integer, Invalid, is_distinct_target(), kAVG, kCOUNT, keyless, kMAX, kMIN, kSUM, RelAlgExecutionUnit::quals, query_infos_, ra_exe_unit_, supportedTypeForGpuSharedMemUsage(), and takes_float_argument().

604  {
605  bool keyless{true}, found{false}, shared_mem_support{false},
606  shared_mem_valid_data_type{true};
607  /* Currently support shared memory usage for a limited subset of possible aggregate
608  * operations. shared_mem_support and
609  * shared_mem_valid_data_type are declared to ensure such support. */
610  int32_t num_agg_expr{0}; // used for shared memory support on the GPU
611  int32_t index{0};
612  for (const auto target_expr : target_expr_list) {
613  const auto agg_info = get_target_info(target_expr, g_bigint_count);
614  const auto chosen_type = get_compact_type(agg_info);
615  // TODO(Saman): should be eventually removed, once I make sure what data types can
616  // be used in this shared memory setting.
617 
618  shared_mem_valid_data_type =
619  shared_mem_valid_data_type && supportedTypeForGpuSharedMemUsage(chosen_type);
620 
621  if (agg_info.is_agg) {
622  num_agg_expr++;
623  }
624  if (!found && agg_info.is_agg && !is_distinct_target(agg_info)) {
625  auto agg_expr = dynamic_cast<const Analyzer::AggExpr*>(target_expr);
626  CHECK(agg_expr);
627  const auto arg_expr = agg_arg(target_expr);
628  const bool float_argument_input = takes_float_argument(agg_info);
629  switch (agg_info.agg_kind) {
630  case kAVG:
631  ++index;
632  if (arg_expr && !arg_expr->get_type_info().get_notnull()) {
633  auto expr_range_info = getExpressionRange(arg_expr, query_infos_, executor_);
634  if (expr_range_info.getType() == ExpressionRangeType::Invalid ||
635  expr_range_info.hasNulls()) {
636  break;
637  }
638  }
639  found = true;
640  break;
641  case kCOUNT:
642  if (arg_expr && !arg_expr->get_type_info().get_notnull()) {
643  auto expr_range_info = getExpressionRange(arg_expr, query_infos_, executor_);
644  if (expr_range_info.getType() == ExpressionRangeType::Invalid ||
645  expr_range_info.hasNulls()) {
646  break;
647  }
648  }
649  found = true;
650  if (!agg_info.skip_null_val) {
651  shared_mem_support = true; // currently just support 8 bytes per group
652  }
653  break;
654  case kSUM: {
655  auto arg_ti = arg_expr->get_type_info();
656  if (constrained_not_null(arg_expr, ra_exe_unit_.quals)) {
657  arg_ti.set_notnull(true);
658  }
659  if (!arg_ti.get_notnull()) {
660  auto expr_range_info = getExpressionRange(arg_expr, query_infos_, executor_);
661  if (expr_range_info.getType() != ExpressionRangeType::Invalid &&
662  !expr_range_info.hasNulls()) {
663  found = true;
664  }
665  } else {
666  auto expr_range_info = getExpressionRange(arg_expr, query_infos_, executor_);
667  switch (expr_range_info.getType()) {
670  if (expr_range_info.getFpMax() < 0 || expr_range_info.getFpMin() > 0) {
671  found = true;
672  }
673  break;
675  if (expr_range_info.getIntMax() < 0 || expr_range_info.getIntMin() > 0) {
676  found = true;
677  }
678  break;
679  default:
680  break;
681  }
682  }
683  break;
684  }
685  case kMIN: {
686  CHECK(agg_expr && agg_expr->get_arg());
687  const auto& arg_ti = agg_expr->get_arg()->get_type_info();
688  if (arg_ti.is_string() || arg_ti.is_array()) {
689  break;
690  }
691  auto expr_range_info =
692  getExpressionRange(agg_expr->get_arg(), query_infos_, executor_);
693  auto init_max = get_agg_initial_val(agg_info.agg_kind,
694  chosen_type,
695  is_group_by || float_argument_input,
696  float_argument_input ? sizeof(float) : 8);
697  switch (expr_range_info.getType()) {
700  auto double_max =
701  *reinterpret_cast<const double*>(may_alias_ptr(&init_max));
702  if (expr_range_info.getFpMax() < double_max) {
703  found = true;
704  }
705  break;
706  }
708  if (expr_range_info.getIntMax() < init_max) {
709  found = true;
710  }
711  break;
712  default:
713  break;
714  }
715  break;
716  }
717  case kMAX: {
718  CHECK(agg_expr && agg_expr->get_arg());
719  const auto& arg_ti = agg_expr->get_arg()->get_type_info();
720  if (arg_ti.is_string() || arg_ti.is_array()) {
721  break;
722  }
723  auto expr_range_info =
724  getExpressionRange(agg_expr->get_arg(), query_infos_, executor_);
725  // NULL sentinel and init value for kMAX are identical, which results in
726  // ambiguity in detecting empty keys in presence of nulls.
727  if (expr_range_info.getType() == ExpressionRangeType::Invalid ||
728  expr_range_info.hasNulls()) {
729  break;
730  }
731  auto init_min = get_agg_initial_val(agg_info.agg_kind,
732  chosen_type,
733  is_group_by || float_argument_input,
734  float_argument_input ? sizeof(float) : 8);
735  switch (expr_range_info.getType()) {
738  auto double_min =
739  *reinterpret_cast<const double*>(may_alias_ptr(&init_min));
740  if (expr_range_info.getFpMin() > double_min) {
741  found = true;
742  }
743  break;
744  }
746  if (expr_range_info.getIntMin() > init_min) {
747  found = true;
748  }
749  break;
750  default:
751  break;
752  }
753  break;
754  }
755  default:
756  keyless = false;
757  break;
758  }
759  }
760  if (!keyless) {
761  break;
762  }
763  if (!found) {
764  ++index;
765  }
766  }
767 
768  // shouldn't use keyless for projection only
774  return {keyless && found,
775  index,
776  ((num_agg_expr == 1) && (target_expr_list.size() <= 2))
777  ? shared_mem_support && shared_mem_valid_data_type
778  : false};
779 }
const Analyzer::Expr * agg_arg(const Analyzer::Expr *expr)
bool constrained_not_null(const Analyzer::Expr *expr, const std::list< std::shared_ptr< Analyzer::Expr >> &quals)
TargetInfo get_target_info(const PointerType target_expr, const bool bigint_count)
Definition: TargetInfo.h:65
int64_t get_agg_initial_val(const SQLAgg agg, const SQLTypeInfo &ti, const bool enable_compaction, const unsigned min_byte_width_to_compact)
bool takes_float_argument(const TargetInfo &target_info)
Definition: TargetInfo.h:120
bool supportedTypeForGpuSharedMemUsage(const SQLTypeInfo &target_type_info) const
Definition: sqldefs.h:71
const SQLTypeInfo get_compact_type(const TargetInfo &target)
CHECK(cgen_state)
bool g_bigint_count
Definition: sqldefs.h:71
bool is_distinct_target(const TargetInfo &target_info)
Definition: TargetInfo.h:116
ExpressionRange getExpressionRange(const Analyzer::BinOper *expr, const std::vector< InputTableInfo > &query_infos, const Executor *, boost::optional< std::list< std::shared_ptr< Analyzer::Expr >>> simple_quals)
const std::vector< InputTableInfo > & query_infos_
Definition: sqldefs.h:71
std::list< std::shared_ptr< Analyzer::Expr > > quals
const RelAlgExecutionUnit & ra_exe_unit_
Definition: sqldefs.h:71
Definition: sqldefs.h:71
const int64_t const uint32_t const uint32_t const uint32_t const bool keyless

+ Here is the call graph for this function:

int64_t GroupByAndAggregate::getShardedTopBucket ( const ColRangeInfo col_range_info,
const size_t  shard_count 
) const
private

Definition at line 263 of file GroupByAndAggregate.cpp.

References ColRangeInfo::bucket, CHECK(), CHECK_GT, device_type_, executor_, g_leaf_count, and GPU.

264  {
265  size_t device_count{0};
267  device_count = executor_->getCatalog()->getDataMgr().getCudaMgr()->getDeviceCount();
268  CHECK_GT(device_count, 0u);
269  }
270 
271  int64_t bucket{col_range_info.bucket};
272 
273  if (shard_count) {
274  CHECK(!col_range_info.bucket);
275  /*
276  when a node has fewer devices than shard count,
277  a) In a distributed setup, the minimum distance between two keys would be
278  device_count because shards are stored consecutively across the physical tables, i.e
279  if a shard column has values 0 to 9, and 3 shards on each leaf, then node 1 would
280  have values: 0,1,2,6,7,8 and node 2 would have values: 3,4,5,9. If each leaf node
281  has only 1 device, in this case, all the keys from each node are loaded on the
282  device each.
283 
284  b) In a single node setup, the distance would be minimum of device_count or
285  difference of device_count - shard_count. For example: If a single node server
286  running on 3 devices a shard column has values 0 to 9 in a table with 4 shards,
287  device to fragment keys mapping would be: device 1 - 4,8,3,7 device 2 - 1,5,9 device
288  3 - 2, 6 The bucket value would be 4(shards) - 3(devices) = 1 i.e. minimum of
289  device_count or difference.
290 
291  When a node has device count equal to or more than shard count then the
292  minimum distance is always at least shard_count * no of leaf nodes.
293  */
294  if (device_count < shard_count) {
295  bucket = g_leaf_count ? std::max(device_count, static_cast<size_t>(1))
296  : std::min(device_count, shard_count - device_count);
297  } else {
298  bucket = shard_count * std::max(g_leaf_count, static_cast<size_t>(1));
299  }
300  }
301 
302  return bucket;
303 }
#define CHECK_GT(x, y)
Definition: Logger.h:202
CHECK(cgen_state)
size_t g_leaf_count
Definition: ParserNode.cpp:66
const ExecutorDeviceType device_type_

+ Here is the call graph for this function:

bool GroupByAndAggregate::gpuCanHandleOrderEntries ( const std::list< Analyzer::OrderEntry > &  order_entries)
private

Definition at line 820 of file GroupByAndAggregate.cpp.

References CHECK(), CHECK_GE, CHECK_LE, Analyzer::AggExpr::get_arg(), Analyzer::Expr::get_type_info(), getExprRangeInfo(), GroupByPerfectHash, kAPPROX_COUNT_DISTINCT, kAVG, kMAX, kMIN, ra_exe_unit_, and RelAlgExecutionUnit::target_exprs.

Referenced by initQueryMemoryDescriptor().

821  {
822  if (order_entries.size() > 1) { // TODO(alex): lift this restriction
823  return false;
824  }
825  for (const auto order_entry : order_entries) {
826  CHECK_GE(order_entry.tle_no, 1);
827  CHECK_LE(static_cast<size_t>(order_entry.tle_no), ra_exe_unit_.target_exprs.size());
828  const auto target_expr = ra_exe_unit_.target_exprs[order_entry.tle_no - 1];
829  if (!dynamic_cast<Analyzer::AggExpr*>(target_expr)) {
830  return false;
831  }
832  // TODO(alex): relax the restrictions
833  auto agg_expr = static_cast<Analyzer::AggExpr*>(target_expr);
834  if (agg_expr->get_is_distinct() || agg_expr->get_aggtype() == kAVG ||
835  agg_expr->get_aggtype() == kMIN || agg_expr->get_aggtype() == kMAX ||
836  agg_expr->get_aggtype() == kAPPROX_COUNT_DISTINCT) {
837  return false;
838  }
839  if (agg_expr->get_arg()) {
840  const auto& arg_ti = agg_expr->get_arg()->get_type_info();
841  if (arg_ti.is_fp()) {
842  return false;
843  }
844  auto expr_range_info = getExprRangeInfo(agg_expr->get_arg());
845  // TOD(adb): QMD not actually initialized here?
846  if ((!(expr_range_info.hash_type_ == QueryDescriptionType::GroupByPerfectHash &&
847  /* query_mem_desc.getGroupbyColCount() == 1 */ false) ||
848  expr_range_info.has_nulls) &&
849  order_entry.is_desc == order_entry.nulls_first) {
850  return false;
851  }
852  }
853  const auto& target_ti = target_expr->get_type_info();
854  CHECK(!target_ti.is_array());
855  if (!target_ti.is_integer()) {
856  return false;
857  }
858  }
859  return true;
860 }
std::vector< Analyzer::Expr * > target_exprs
#define CHECK_GE(x, y)
Definition: Logger.h:203
Expr * get_arg() const
Definition: Analyzer.h:1045
Definition: sqldefs.h:71
CHECK(cgen_state)
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:78
#define CHECK_LE(x, y)
Definition: Logger.h:201
ColRangeInfo getExprRangeInfo(const Analyzer::Expr *expr) const
const RelAlgExecutionUnit & ra_exe_unit_
Definition: sqldefs.h:71
Definition: sqldefs.h:71

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

CountDistinctDescriptors GroupByAndAggregate::initCountDistinctDescriptors ( )
private

Definition at line 506 of file GroupByAndAggregate.cpp.

References Bitmap, CHECK(), CHECK_GE, device_type_, g_bigint_count, g_enable_watchdog, g_hll_precision_bits, Analyzer::AggExpr::get_arg(), get_count_distinct_sub_bitmap_count(), get_target_info(), Analyzer::Expr::get_type_info(), getExprRangeInfo(), GroupByPerfectHash, hll_size_for_rate(), Invalid, is_distinct_target(), kAPPROX_COUNT_DISTINCT, kCOUNT, kENCODING_DICT, kINT, Projection, ra_exe_unit_, StdSet, and RelAlgExecutionUnit::target_exprs.

Referenced by initQueryMemoryDescriptorImpl().

506  {
507  CountDistinctDescriptors count_distinct_descriptors;
508  for (const auto target_expr : ra_exe_unit_.target_exprs) {
509  auto agg_info = get_target_info(target_expr, g_bigint_count);
510  if (is_distinct_target(agg_info)) {
511  CHECK(agg_info.is_agg);
512  CHECK(agg_info.agg_kind == kCOUNT || agg_info.agg_kind == kAPPROX_COUNT_DISTINCT);
513  const auto agg_expr = static_cast<const Analyzer::AggExpr*>(target_expr);
514  const auto& arg_ti = agg_expr->get_arg()->get_type_info();
515  if (arg_ti.is_string() && arg_ti.get_compression() != kENCODING_DICT) {
516  throw std::runtime_error(
517  "Strings must be dictionary-encoded for COUNT(DISTINCT).");
518  }
519  if (agg_info.agg_kind == kAPPROX_COUNT_DISTINCT && arg_ti.is_array()) {
520  throw std::runtime_error("APPROX_COUNT_DISTINCT on arrays not supported yet");
521  }
522  if (agg_info.agg_kind == kAPPROX_COUNT_DISTINCT && arg_ti.is_geometry()) {
523  throw std::runtime_error(
524  "APPROX_COUNT_DISTINCT on geometry columns not supported");
525  }
526  if (agg_info.is_distinct && arg_ti.is_geometry()) {
527  throw std::runtime_error("COUNT DISTINCT on geometry columns not supported");
528  }
529  ColRangeInfo no_range_info{QueryDescriptionType::Projection, 0, 0, 0, false};
530  auto arg_range_info =
531  arg_ti.is_fp() ? no_range_info : getExprRangeInfo(agg_expr->get_arg());
532  CountDistinctImplType count_distinct_impl_type{CountDistinctImplType::StdSet};
533  int64_t bitmap_sz_bits{0};
534  if (agg_info.agg_kind == kAPPROX_COUNT_DISTINCT) {
535  const auto error_rate = agg_expr->get_error_rate();
536  if (error_rate) {
537  CHECK(error_rate->get_type_info().get_type() == kINT);
538  CHECK_GE(error_rate->get_constval().intval, 1);
539  bitmap_sz_bits = hll_size_for_rate(error_rate->get_constval().smallintval);
540  } else {
541  bitmap_sz_bits = g_hll_precision_bits;
542  }
543  }
544  if (arg_range_info.hash_type_ == QueryDescriptionType::GroupByPerfectHash &&
545  !(arg_ti.is_array() || arg_ti.is_geometry())) { // TODO(alex): allow bitmap
546  // implementation for arrays
547  if (arg_range_info.isEmpty()) {
548  count_distinct_descriptors.emplace_back(
550  0,
551  64,
552  agg_info.agg_kind == kAPPROX_COUNT_DISTINCT,
553  device_type_,
554  1});
555  continue;
556  }
557  count_distinct_impl_type = CountDistinctImplType::Bitmap;
558  if (agg_info.agg_kind == kCOUNT) {
559  bitmap_sz_bits = arg_range_info.max - arg_range_info.min + 1;
560  const int64_t MAX_BITMAP_BITS{8 * 1000 * 1000 * 1000L};
561  if (bitmap_sz_bits <= 0 || bitmap_sz_bits > MAX_BITMAP_BITS) {
562  count_distinct_impl_type = CountDistinctImplType::StdSet;
563  }
564  }
565  }
566  if (agg_info.agg_kind == kAPPROX_COUNT_DISTINCT &&
567  count_distinct_impl_type == CountDistinctImplType::StdSet &&
568  !(arg_ti.is_array() || arg_ti.is_geometry())) {
569  count_distinct_impl_type = CountDistinctImplType::Bitmap;
570  }
571  if (g_enable_watchdog &&
572  count_distinct_impl_type == CountDistinctImplType::StdSet) {
573  throw WatchdogException("Cannot use a fast path for COUNT distinct");
574  }
575  const auto sub_bitmap_count =
577  count_distinct_descriptors.emplace_back(
578  CountDistinctDescriptor{count_distinct_impl_type,
579  arg_range_info.min,
580  bitmap_sz_bits,
581  agg_info.agg_kind == kAPPROX_COUNT_DISTINCT,
582  device_type_,
583  sub_bitmap_count});
584  } else {
585  count_distinct_descriptors.emplace_back(CountDistinctDescriptor{
586  CountDistinctImplType::Invalid, 0, 0, false, device_type_, 0});
587  }
588  }
589  return count_distinct_descriptors;
590 }
std::vector< Analyzer::Expr * > target_exprs
int hll_size_for_rate(const int err_percent)
Definition: HyperLogLog.h:115
TargetInfo get_target_info(const PointerType target_expr, const bool bigint_count)
Definition: TargetInfo.h:65
#define CHECK_GE(x, y)
Definition: Logger.h:203
Expr * get_arg() const
Definition: Analyzer.h:1045
int g_hll_precision_bits
bool g_enable_watchdog
size_t get_count_distinct_sub_bitmap_count(const size_t bitmap_sz_bits, const RelAlgExecutionUnit &ra_exe_unit, const ExecutorDeviceType device_type)
CHECK(cgen_state)
std::vector< CountDistinctDescriptor > CountDistinctDescriptors
Definition: CountDistinct.h:35
bool g_bigint_count
bool is_distinct_target(const TargetInfo &target_info)
Definition: TargetInfo.h:116
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:78
const ExecutorDeviceType device_type_
Definition: sqldefs.h:71
CountDistinctImplType
ColRangeInfo getExprRangeInfo(const Analyzer::Expr *expr) const
Definition: sqltypes.h:48
const RelAlgExecutionUnit & ra_exe_unit_

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::unique_ptr< QueryMemoryDescriptor > GroupByAndAggregate::initQueryMemoryDescriptor ( const bool  allow_multifrag,
const size_t  max_groups_buffer_entry_count,
const int8_t  crt_min_byte_width,
RenderInfo render_info,
const bool  output_columnar_hint 
)
private

Definition at line 305 of file GroupByAndAggregate.cpp.

References align_to_int64(), CHECK(), device_type_, executor_, GPU, gpuCanHandleOrderEntries(), initQueryMemoryDescriptorImpl(), SortInfo::order_entries, query_mem_desc, ra_exe_unit_, shard_count_for_top_groups(), and RelAlgExecutionUnit::sort_info.

310  {
311  const auto shard_count =
314  : 0;
315  bool sort_on_gpu_hint =
316  device_type_ == ExecutorDeviceType::GPU && allow_multifrag &&
319  // must_use_baseline_sort is true iff we'd sort on GPU with the old algorithm
320  // but the total output buffer size would be too big or it's a sharded top query.
321  // For the sake of managing risk, use the new result set way very selectively for
322  // this case only (alongside the baseline layout we've enabled for a while now).
323  bool must_use_baseline_sort = shard_count;
324  std::unique_ptr<QueryMemoryDescriptor> query_mem_desc;
325  while (true) {
326  query_mem_desc = initQueryMemoryDescriptorImpl(allow_multifrag,
327  max_groups_buffer_entry_count,
328  crt_min_byte_width,
329  sort_on_gpu_hint,
330  render_info,
331  must_use_baseline_sort,
332  output_columnar_hint);
333  CHECK(query_mem_desc);
334  if (query_mem_desc->sortOnGpu() &&
335  (query_mem_desc->getBufferSizeBytes(device_type_) +
336  align_to_int64(query_mem_desc->getEntryCount() * sizeof(int32_t))) >
337  2 * 1024 * 1024 * 1024L) {
338  must_use_baseline_sort = true;
339  sort_on_gpu_hint = false;
340  } else {
341  break;
342  }
343  }
344  return query_mem_desc;
345 }
bool gpuCanHandleOrderEntries(const std::list< Analyzer::OrderEntry > &order_entries)
std::unique_ptr< QueryMemoryDescriptor > initQueryMemoryDescriptorImpl(const bool allow_multifrag, const size_t max_groups_buffer_entry_count, const int8_t crt_min_byte_width, const bool sort_on_gpu_hint, RenderInfo *render_info, const bool must_use_baseline_sort, const bool output_columnar_hint)
const std::list< Analyzer::OrderEntry > order_entries
CHECK(cgen_state)
const SortInfo sort_info
const ExecutorDeviceType device_type_
const RelAlgExecutionUnit & ra_exe_unit_
FORCE_INLINE HOST DEVICE T align_to_int64(T addr)
static size_t shard_count_for_top_groups(const RelAlgExecutionUnit &ra_exe_unit, const Catalog_Namespace::Catalog &catalog)

+ Here is the call graph for this function:

std::unique_ptr< QueryMemoryDescriptor > GroupByAndAggregate::initQueryMemoryDescriptorImpl ( const bool  allow_multifrag,
const size_t  max_groups_buffer_entry_count,
const int8_t  crt_min_byte_width,
const bool  sort_on_gpu_hint,
RenderInfo render_info,
const bool  must_use_baseline_sort,
const bool  output_columnar_hint 
)
private

Definition at line 347 of file GroupByAndAggregate.cpp.

References addTransientStringLiterals(), get_col_byte_widths(), RelAlgExecutionUnit::groupby_exprs, initCountDistinctDescriptors(), and ra_exe_unit_.

Referenced by initQueryMemoryDescriptor().

354  {
356 
357  const auto count_distinct_descriptors = initCountDistinctDescriptors();
358 
359  auto group_col_widths = get_col_byte_widths(ra_exe_unit_.groupby_exprs, {});
360 
361  const bool is_group_by{!ra_exe_unit_.groupby_exprs.empty()};
362 
363  auto col_range_info_nosharding = getColRangeInfo();
364 
365  const auto shard_count =
368  : 0;
369 
370  const auto col_range_info =
371  ColRangeInfo{col_range_info_nosharding.hash_type_,
372  col_range_info_nosharding.min,
373  col_range_info_nosharding.max,
374  getShardedTopBucket(col_range_info_nosharding, shard_count),
375  col_range_info_nosharding.has_nulls};
376 
377  // Non-grouped aggregates do not support accessing aggregated ranges
378  // Keyless hash is currently only supported with single-column perfect hash
379  const auto keyless_info =
380  !(is_group_by &&
381  col_range_info.hash_type_ == QueryDescriptionType::GroupByPerfectHash &&
382  ra_exe_unit_.groupby_exprs.size() == 1)
383  ? KeylessInfo{false, -1, false}
384  : getKeylessInfo(ra_exe_unit_.target_exprs, is_group_by);
385 
386  if (g_enable_watchdog &&
387  ((col_range_info.hash_type_ == QueryDescriptionType::GroupByBaselineHash &&
388  max_groups_buffer_entry_count > 120000000) ||
389  (col_range_info.hash_type_ == QueryDescriptionType::GroupByPerfectHash &&
390  ra_exe_unit_.groupby_exprs.size() == 1 &&
391  (col_range_info.max - col_range_info.min) /
392  std::max(col_range_info.bucket, int64_t(1)) >
393  130000000))) {
394  throw WatchdogException("Query would use too much memory");
395  }
397  ra_exe_unit_,
398  query_infos_,
399  col_range_info,
400  keyless_info,
401  allow_multifrag,
402  device_type_,
403  crt_min_byte_width,
404  sort_on_gpu_hint,
405  shard_count,
406  max_groups_buffer_entry_count,
407  render_info,
408  count_distinct_descriptors,
409  must_use_baseline_sort,
410  output_columnar_hint);
411 }
std::vector< Analyzer::Expr * > target_exprs
std::vector< int8_t > get_col_byte_widths(const T &col_expr_list, const std::vector< ssize_t > &col_exprs_to_not_project)
ColRangeInfo getColRangeInfo()
QueryDescriptionType hash_type_
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
bool g_enable_watchdog
CountDistinctDescriptors initCountDistinctDescriptors()
KeylessInfo getKeylessInfo(const std::vector< Analyzer::Expr * > &target_expr_list, const bool is_group_by) const
int64_t getShardedTopBucket(const ColRangeInfo &col_range_info, const size_t shard_count) const
const std::vector< InputTableInfo > & query_infos_
const ExecutorDeviceType device_type_
static std::unique_ptr< QueryMemoryDescriptor > init(const Executor *executor, const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, const ColRangeInfo &col_range_info, const KeylessInfo &keyless_info, const bool allow_multifrag, const ExecutorDeviceType device_type, const int8_t crt_min_byte_width, const bool sort_on_gpu_hint, const size_t shard_count, const size_t max_groups_buffer_entry_count, RenderInfo *render_info, const CountDistinctDescriptors count_distinct_descriptors, const bool must_use_baseline_sort, const bool output_columnar_hint)
const RelAlgExecutionUnit & ra_exe_unit_
static size_t shard_count_for_top_groups(const RelAlgExecutionUnit &ra_exe_unit, const Catalog_Namespace::Catalog &catalog)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool GroupByAndAggregate::needsUnnestDoublePatch ( llvm::Value *  val_ptr,
const std::string &  agg_base_name,
const bool  threads_share_memory,
const CompilationOptions co 
) const
private

Definition at line 30 of file MaxwellCodegenPatch.cpp.

References CompilationOptions::device_type_, and executor_.

Referenced by TargetExprCodegen::codegen().

33  {
34  return (executor_->isArchMaxwell(co.device_type_) && threads_share_memory &&
35  llvm::isa<llvm::AllocaInst>(val_ptr) &&
36  val_ptr->getType() ==
37  llvm::Type::getDoublePtrTy(executor_->cgen_state_->context_) &&
38  "agg_id" == agg_base_name);
39 }
ExecutorDeviceType device_type_

+ Here is the caller graph for this function:

void GroupByAndAggregate::prependForceSync ( )
private

Definition at line 41 of file MaxwellCodegenPatch.cpp.

References executor_.

Referenced by codegen().

41  {
42  executor_->cgen_state_->ir_builder_.CreateCall(
43  executor_->cgen_state_->module_->getFunction("force_sync"));
44 }

+ Here is the caller graph for this function:

size_t GroupByAndAggregate::shard_count_for_top_groups ( const RelAlgExecutionUnit ra_exe_unit,
const Catalog_Namespace::Catalog catalog 
)
static

Definition at line 1869 of file GroupByAndAggregate.cpp.

References Catalog_Namespace::Catalog::getMetadataForTable(), RelAlgExecutionUnit::groupby_exprs, SortInfo::limit, TableDescriptor::nShards, SortInfo::order_entries, and RelAlgExecutionUnit::sort_info.

Referenced by Executor::collectAllDeviceResults(), RelAlgExecutor::executeRelAlgQuerySingleStep(), and initQueryMemoryDescriptor().

1871  {
1872  if (ra_exe_unit.sort_info.order_entries.size() != 1 || !ra_exe_unit.sort_info.limit) {
1873  return 0;
1874  }
1875  for (const auto& group_expr : ra_exe_unit.groupby_exprs) {
1876  const auto grouped_col_expr =
1877  dynamic_cast<const Analyzer::ColumnVar*>(group_expr.get());
1878  if (!grouped_col_expr) {
1879  continue;
1880  }
1881  if (grouped_col_expr->get_table_id() <= 0) {
1882  return 0;
1883  }
1884  const auto td = catalog.getMetadataForTable(grouped_col_expr->get_table_id());
1885  if (td->shardedColumnId == grouped_col_expr->get_column_id()) {
1886  return td->nShards;
1887  }
1888  }
1889  return 0;
1890 }
const std::list< Analyzer::OrderEntry > order_entries
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
const size_t limit
const SortInfo sort_info
const TableDescriptor * getMetadataForTable(const std::string &tableName, const bool populateFragmenter=true) const
Returns a pointer to a const TableDescriptor struct matching the provided tableName.

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool GroupByAndAggregate::supportedExprForGpuSharedMemUsage ( Analyzer::Expr expr)
staticprivate

Definition at line 807 of file GroupByAndAggregate.cpp.

References kUNNEST.

807  {
808  /*
809  UNNEST operations follow a slightly different internal memory layout compared to other
810  keyless aggregates Currently, we opt out of using shared memory if there is any UNNEST
811  operation involved.
812  */
813  if (dynamic_cast<Analyzer::UOper*>(expr) &&
814  static_cast<Analyzer::UOper*>(expr)->get_optype() == kUNNEST) {
815  return false;
816  }
817  return true;
818 }
bool GroupByAndAggregate::supportedTypeForGpuSharedMemUsage ( const SQLTypeInfo target_type_info) const
private

Supported data types for the current shared memory usage for keyless aggregates with COUNT(*) Currently only for single-column group by queries.

Definition at line 785 of file GroupByAndAggregate.cpp.

References SQLTypeInfoCore< TYPE_FACET_PACK >::get_compression(), SQLTypeInfoCore< TYPE_FACET_PACK >::get_type(), kENCODING_DICT, kINT, kSMALLINT, kTEXT, kTINYINT, and run_benchmark_import::result.

Referenced by getKeylessInfo().

786  {
787  bool result = false;
788  switch (target_type_info.get_type()) {
789  case SQLTypes::kTINYINT:
790  case SQLTypes::kSMALLINT:
791  case SQLTypes::kINT:
792  result = true;
793  break;
794  case SQLTypes::kTEXT:
795  if (target_type_info.get_compression() == EncodingType::kENCODING_DICT) {
796  result = true;
797  }
798  break;
799  default:
800  break;
801  }
802  return result;
803 }
HOST DEVICE EncodingType get_compression() const
Definition: sqltypes.h:334
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:326
Definition: sqltypes.h:55
Definition: sqltypes.h:48

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

Friends And Related Function Documentation

friend class Executor
friend

Definition at line 302 of file GroupByAndAggregate.h.

friend class QueryMemoryDescriptor
friend

Definition at line 303 of file GroupByAndAggregate.h.

friend struct TargetExprCodegen
friend

Definition at line 304 of file GroupByAndAggregate.h.

friend struct TargetExprCodegenBuilder
friend

Definition at line 305 of file GroupByAndAggregate.h.

Member Data Documentation

const ExecutorDeviceType GroupByAndAggregate::device_type_
private
bool GroupByAndAggregate::output_columnar_
private

Definition at line 299 of file GroupByAndAggregate.h.

const std::vector<InputTableInfo>& GroupByAndAggregate::query_infos_
private

Definition at line 297 of file GroupByAndAggregate.h.

Referenced by getExprRangeInfo(), and getKeylessInfo().

std::shared_ptr<RowSetMemoryOwner> GroupByAndAggregate::row_set_mem_owner_
private

Definition at line 298 of file GroupByAndAggregate.h.

Referenced by addTransientStringLiterals().


The documentation for this class was generated from the following files: