OmniSciDB  0fdbebe030
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
GroupByAndAggregate Class Reference

#include <GroupByAndAggregate.h>

+ Collaboration diagram for GroupByAndAggregate:

Classes

struct  DiamondCodegen
 

Public Member Functions

 GroupByAndAggregate (Executor *executor, const ExecutorDeviceType device_type, const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, std::shared_ptr< RowSetMemoryOwner >)
 
bool codegen (llvm::Value *filter_result, llvm::BasicBlock *sc_false, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co)
 

Static Public Member Functions

static void addTransientStringLiterals (const RelAlgExecutionUnit &ra_exe_unit, Executor *executor, std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner)
 
static size_t shard_count_for_top_groups (const RelAlgExecutionUnit &ra_exe_unit, const Catalog_Namespace::Catalog &catalog)
 

Private Member Functions

bool supportedTypeForGpuSharedMemUsage (const SQLTypeInfo &target_type_info) const
 
bool gpuCanHandleOrderEntries (const std::list< Analyzer::OrderEntry > &order_entries)
 
std::unique_ptr
< QueryMemoryDescriptor
initQueryMemoryDescriptor (const bool allow_multifrag, const size_t max_groups_buffer_entry_count, const int8_t crt_min_byte_width, RenderInfo *render_info, const bool output_columnar_hint)
 
std::unique_ptr
< QueryMemoryDescriptor
initQueryMemoryDescriptorImpl (const bool allow_multifrag, const size_t max_groups_buffer_entry_count, const int8_t crt_min_byte_width, const bool sort_on_gpu_hint, RenderInfo *render_info, const bool must_use_baseline_sort, const bool output_columnar_hint)
 
int64_t getShardedTopBucket (const ColRangeInfo &col_range_info, const size_t shard_count) const
 
void addTransientStringLiterals ()
 
CountDistinctDescriptors initCountDistinctDescriptors ()
 
llvm::Value * codegenOutputSlot (llvm::Value *groups_buffer, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, DiamondCodegen &diamond_codegen)
 
std::tuple< llvm::Value
*, llvm::Value * > 
codegenGroupBy (const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, DiamondCodegen &codegen)
 
std::tuple< llvm::Value
*, llvm::Value * > 
codegenSingleColumnPerfectHash (const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, llvm::Value *groups_buffer, llvm::Value *group_expr_lv_translated, llvm::Value *group_expr_lv_original, const int32_t row_size_quad)
 
std::tuple< llvm::Value
*, llvm::Value * > 
codegenMultiColumnPerfectHash (llvm::Value *groups_buffer, llvm::Value *group_key, llvm::Value *key_size_lv, const QueryMemoryDescriptor &query_mem_desc, const int32_t row_size_quad)
 
llvm::Function * codegenPerfectHashFunction ()
 
std::tuple< llvm::Value
*, llvm::Value * > 
codegenMultiColumnBaselineHash (const CompilationOptions &co, llvm::Value *groups_buffer, llvm::Value *group_key, llvm::Value *key_size_lv, const QueryMemoryDescriptor &query_mem_desc, const size_t key_width, const int32_t row_size_quad)
 
ColRangeInfo getColRangeInfo ()
 
ColRangeInfo getExprRangeInfo (const Analyzer::Expr *expr) const
 
KeylessInfo getKeylessInfo (const std::vector< Analyzer::Expr * > &target_expr_list, const bool is_group_by) const
 
llvm::Value * convertNullIfAny (const SQLTypeInfo &arg_type, const TargetInfo &agg_info, llvm::Value *target)
 
bool codegenAggCalls (const std::tuple< llvm::Value *, llvm::Value * > &agg_out_ptr_w_idx, const std::vector< llvm::Value * > &agg_out_vec, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, DiamondCodegen &diamond_codegen)
 
llvm::Value * codegenWindowRowPointer (const Analyzer::WindowFunction *window_func, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, DiamondCodegen &diamond_codegen)
 
llvm::Value * codegenAggColumnPtr (llvm::Value *output_buffer_byte_stream, llvm::Value *out_row_idx, const std::tuple< llvm::Value *, llvm::Value * > &agg_out_ptr_w_idx, const QueryMemoryDescriptor &query_mem_desc, const size_t chosen_bytes, const size_t agg_out_off, const size_t target_idx)
 : returns the pointer to where the aggregation should be stored. More...
 
void codegenEstimator (std::stack< llvm::BasicBlock * > &array_loops, GroupByAndAggregate::DiamondCodegen &diamond_codegen, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &)
 
void codegenCountDistinct (const size_t target_idx, const Analyzer::Expr *target_expr, std::vector< llvm::Value * > &agg_args, const QueryMemoryDescriptor &, const ExecutorDeviceType)
 
llvm::Value * getAdditionalLiteral (const int32_t off)
 
std::vector< llvm::Value * > codegenAggArg (const Analyzer::Expr *target_expr, const CompilationOptions &co)
 
llvm::Value * emitCall (const std::string &fname, const std::vector< llvm::Value * > &args)
 
void checkErrorCode (llvm::Value *retCode)
 
bool needsUnnestDoublePatch (llvm::Value *val_ptr, const std::string &agg_base_name, const bool threads_share_memory, const CompilationOptions &co) const
 
void prependForceSync ()
 

Static Private Member Functions

static bool supportedExprForGpuSharedMemUsage (Analyzer::Expr *expr)
 
static int64_t getBucketedCardinality (const ColRangeInfo &col_range_info)
 

Private Attributes

Executorexecutor_
 
const RelAlgExecutionUnitra_exe_unit_
 
const std::vector
< InputTableInfo > & 
query_infos_
 
std::shared_ptr
< RowSetMemoryOwner
row_set_mem_owner_
 
bool output_columnar_
 
const ExecutorDeviceType device_type_
 

Friends

class Executor
 
class QueryMemoryDescriptor
 
struct TargetExprCodegen
 
struct TargetExprCodegenBuilder
 

Detailed Description

Definition at line 129 of file GroupByAndAggregate.h.

Constructor & Destructor Documentation

GroupByAndAggregate::GroupByAndAggregate ( Executor executor,
const ExecutorDeviceType  device_type,
const RelAlgExecutionUnit ra_exe_unit,
const std::vector< InputTableInfo > &  query_infos,
std::shared_ptr< RowSetMemoryOwner row_set_mem_owner 
)

Definition at line 234 of file GroupByAndAggregate.cpp.

References RelAlgExecutionUnit::groupby_exprs, kENCODING_DICT, and ra_exe_unit_.

240  : executor_(executor)
241  , ra_exe_unit_(ra_exe_unit)
242  , query_infos_(query_infos)
243  , row_set_mem_owner_(row_set_mem_owner)
244  , device_type_(device_type) {
245  for (const auto groupby_expr : ra_exe_unit_.groupby_exprs) {
246  if (!groupby_expr) {
247  continue;
248  }
249  const auto& groupby_ti = groupby_expr->get_type_info();
250  if (groupby_ti.is_string() && groupby_ti.get_compression() != kENCODING_DICT) {
251  throw std::runtime_error(
252  "Cannot group by string columns which are not dictionary encoded.");
253  }
254  if (groupby_ti.is_array()) {
255  throw std::runtime_error("Group by array not supported");
256  }
257  if (groupby_ti.is_geometry()) {
258  throw std::runtime_error("Group by geometry not supported");
259  }
260  }
261 }
std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner_
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
const std::vector< InputTableInfo > & query_infos_
const ExecutorDeviceType device_type_
const RelAlgExecutionUnit & ra_exe_unit_

Member Function Documentation

void GroupByAndAggregate::addTransientStringLiterals ( const RelAlgExecutionUnit ra_exe_unit,
Executor executor,
std::shared_ptr< RowSetMemoryOwner row_set_mem_owner 
)
static

Definition at line 498 of file GroupByAndAggregate.cpp.

References anonymous_namespace{GroupByAndAggregate.cpp}::add_transient_string_literals_for_expression(), RelAlgExecutionUnit::groupby_exprs, kENCODING_DICT, kSAMPLE, kSINGLE_VALUE, and RelAlgExecutionUnit::target_exprs.

501  {
502  for (const auto group_expr : ra_exe_unit.groupby_exprs) {
504  group_expr.get(), executor, row_set_mem_owner);
505  }
506  for (const auto target_expr : ra_exe_unit.target_exprs) {
507  const auto& target_type = target_expr->get_type_info();
508  if (target_type.is_string() && target_type.get_compression() != kENCODING_DICT) {
509  continue;
510  }
511  const auto agg_expr = dynamic_cast<const Analyzer::AggExpr*>(target_expr);
512  if (agg_expr) {
513  if (agg_expr->get_aggtype() == kSINGLE_VALUE ||
514  agg_expr->get_aggtype() == kSAMPLE) {
516  agg_expr->get_arg(), executor, row_set_mem_owner);
517  }
518  } else {
520  target_expr, executor, row_set_mem_owner);
521  }
522  }
523  row_set_mem_owner->addLiteralStringDictProxy(executor->lit_str_dict_proxy_);
524 }
std::vector< Analyzer::Expr * > target_exprs
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
void add_transient_string_literals_for_expression(const Analyzer::Expr *expr, Executor *executor, std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner)

+ Here is the call graph for this function:

void GroupByAndAggregate::addTransientStringLiterals ( )
private

Definition at line 432 of file GroupByAndAggregate.cpp.

References executor_, ra_exe_unit_, and row_set_mem_owner_.

Referenced by RelAlgExecutor::executeSort(), RelAlgExecutor::executeWorkUnit(), and initQueryMemoryDescriptorImpl().

432  {
434 }
std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner_
const RelAlgExecutionUnit & ra_exe_unit_

+ Here is the caller graph for this function:

void GroupByAndAggregate::checkErrorCode ( llvm::Value *  retCode)
private

Definition at line 1945 of file GroupByAndAggregate.cpp.

References executor_.

Referenced by TargetExprCodegen::codegen().

1945  {
1946  auto zero_const = llvm::ConstantInt::get(retCode->getType(), 0, true);
1947  auto rc_check_condition = executor_->cgen_state_->ir_builder_.CreateICmp(
1948  llvm::ICmpInst::ICMP_EQ, retCode, zero_const);
1949 
1950  executor_->cgen_state_->emitErrorCheck(rc_check_condition, retCode, "rc");
1951 }

+ Here is the caller graph for this function:

bool GroupByAndAggregate::codegen ( llvm::Value *  filter_result,
llvm::BasicBlock *  sc_false,
const QueryMemoryDescriptor query_mem_desc,
const CompilationOptions co 
)

Definition at line 927 of file GroupByAndAggregate.cpp.

References CHECK(), codegenAggCalls(), codegenEstimator(), codegenGroupBy(), GroupByAndAggregate::DiamondCodegen::cond_false_, CompilationOptions::device_type, QueryMemoryDescriptor::didOutputColumnar(), RelAlgExecutionUnit::estimator, executor_, anonymous_namespace{GroupByAndAggregate.cpp}::get_agg_count(), get_arg_by_name(), get_int_type(), QueryMemoryDescriptor::getGroupbyColCount(), QueryMemoryDescriptor::getQueryDescriptionType(), GPU, RelAlgExecutionUnit::groupby_exprs, GroupByPerfectHash, RelAlgExecutionUnit::join_quals, LL_BUILDER, LL_CONTEXT, LL_INT, CodeGenerator::posArg(), prependForceSync(), Projection, query_mem_desc, ra_exe_unit_, ROW_FUNC, RelAlgExecutionUnit::target_exprs, QueryMemoryDescriptor::usesGetGroupValueFast(), and QueryMemoryDescriptor::useStreamingTopN().

930  {
931  CHECK(filter_result);
932 
933  bool can_return_error = false;
934  llvm::BasicBlock* filter_false{nullptr};
935 
936  {
937  const bool is_group_by = !ra_exe_unit_.groupby_exprs.empty();
938 
939  if (executor_->isArchMaxwell(co.device_type)) {
941  }
942  DiamondCodegen filter_cfg(filter_result,
943  executor_,
944  !is_group_by || query_mem_desc.usesGetGroupValueFast(),
945  "filter",
946  nullptr,
947  false);
948  filter_false = filter_cfg.cond_false_;
949 
950  if (is_group_by) {
952  !query_mem_desc.useStreamingTopN()) {
953  const auto crt_matched = get_arg_by_name(ROW_FUNC, "crt_matched");
954  LL_BUILDER.CreateStore(LL_INT(int32_t(1)), crt_matched);
955  auto total_matched_ptr = get_arg_by_name(ROW_FUNC, "total_matched");
956  llvm::Value* old_total_matched_val{nullptr};
958  old_total_matched_val =
959  LL_BUILDER.CreateAtomicRMW(llvm::AtomicRMWInst::Add,
960  total_matched_ptr,
961  LL_INT(int32_t(1)),
962  llvm::AtomicOrdering::Monotonic);
963  } else {
964  old_total_matched_val = LL_BUILDER.CreateLoad(total_matched_ptr);
965  LL_BUILDER.CreateStore(
966  LL_BUILDER.CreateAdd(old_total_matched_val, LL_INT(int32_t(1))),
967  total_matched_ptr);
968  }
969  auto old_total_matched_ptr = get_arg_by_name(ROW_FUNC, "old_total_matched");
970  LL_BUILDER.CreateStore(old_total_matched_val, old_total_matched_ptr);
971  }
972 
973  auto agg_out_ptr_w_idx = codegenGroupBy(query_mem_desc, co, filter_cfg);
974  if (query_mem_desc.usesGetGroupValueFast() ||
975  query_mem_desc.getQueryDescriptionType() ==
977  if (query_mem_desc.getGroupbyColCount() > 1) {
978  filter_cfg.setChainToNext();
979  }
980  // Don't generate null checks if the group slot is guaranteed to be non-null,
981  // as it's the case for get_group_value_fast* family.
982  can_return_error =
983  codegenAggCalls(agg_out_ptr_w_idx, {}, query_mem_desc, co, filter_cfg);
984  } else {
985  {
986  llvm::Value* nullcheck_cond{nullptr};
987  if (query_mem_desc.didOutputColumnar()) {
988  nullcheck_cond = LL_BUILDER.CreateICmpSGE(std::get<1>(agg_out_ptr_w_idx),
989  LL_INT(int32_t(0)));
990  } else {
991  nullcheck_cond = LL_BUILDER.CreateICmpNE(
992  std::get<0>(agg_out_ptr_w_idx),
993  llvm::ConstantPointerNull::get(
994  llvm::PointerType::get(get_int_type(64, LL_CONTEXT), 0)));
995  }
996  DiamondCodegen nullcheck_cfg(
997  nullcheck_cond, executor_, false, "groupby_nullcheck", &filter_cfg, false);
998  codegenAggCalls(agg_out_ptr_w_idx, {}, query_mem_desc, co, filter_cfg);
999  }
1000  can_return_error = true;
1001  if (query_mem_desc.getQueryDescriptionType() ==
1003  query_mem_desc.useStreamingTopN()) {
1004  // Ignore rejection on pushing current row to top-K heap.
1005  LL_BUILDER.CreateRet(LL_INT(int32_t(0)));
1006  } else {
1007  CodeGenerator code_generator(executor_);
1008  LL_BUILDER.CreateRet(LL_BUILDER.CreateNeg(LL_BUILDER.CreateTrunc(
1009  // TODO(alex): remove the trunc once pos is converted to 32 bits
1010  code_generator.posArg(nullptr),
1011  get_int_type(32, LL_CONTEXT))));
1012  }
1013  }
1014  } else {
1015  if (ra_exe_unit_.estimator) {
1016  std::stack<llvm::BasicBlock*> array_loops;
1017  codegenEstimator(array_loops, filter_cfg, query_mem_desc, co);
1018  } else {
1019  auto arg_it = ROW_FUNC->arg_begin();
1020  std::vector<llvm::Value*> agg_out_vec;
1021  for (int32_t i = 0; i < get_agg_count(ra_exe_unit_.target_exprs); ++i) {
1022  agg_out_vec.push_back(&*arg_it++);
1023  }
1024  can_return_error = codegenAggCalls(std::make_tuple(nullptr, nullptr),
1025  agg_out_vec,
1026  query_mem_desc,
1027  co,
1028  filter_cfg);
1029  }
1030  }
1031  }
1032 
1033  if (ra_exe_unit_.join_quals.empty()) {
1034  executor_->cgen_state_->ir_builder_.CreateRet(LL_INT(int32_t(0)));
1035  } else if (sc_false) {
1036  const auto saved_insert_block = LL_BUILDER.GetInsertBlock();
1037  LL_BUILDER.SetInsertPoint(sc_false);
1038  LL_BUILDER.CreateBr(filter_false);
1039  LL_BUILDER.SetInsertPoint(saved_insert_block);
1040  }
1041 
1042  return can_return_error;
1043 }
void codegenEstimator(std::stack< llvm::BasicBlock * > &array_loops, GroupByAndAggregate::DiamondCodegen &diamond_codegen, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &)
std::vector< Analyzer::Expr * > target_exprs
#define ROW_FUNC
#define LL_BUILDER
#define LL_CONTEXT
#define LL_INT(v)
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
CHECK(cgen_state)
llvm::Value * get_arg_by_name(llvm::Function *func, const std::string &name)
Definition: Execute.h:117
size_t getGroupbyColCount() const
const JoinQualsPerNestingLevel join_quals
std::tuple< llvm::Value *, llvm::Value * > codegenGroupBy(const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, DiamondCodegen &codegen)
const std::shared_ptr< Analyzer::Estimator > estimator
bool codegenAggCalls(const std::tuple< llvm::Value *, llvm::Value * > &agg_out_ptr_w_idx, const std::vector< llvm::Value * > &agg_out_vec, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, DiamondCodegen &diamond_codegen)
QueryDescriptionType getQueryDescriptionType() const
ExecutorDeviceType device_type
int32_t get_agg_count(const std::vector< Analyzer::Expr * > &target_exprs)
const RelAlgExecutionUnit & ra_exe_unit_

+ Here is the call graph for this function:

std::vector< llvm::Value * > GroupByAndAggregate::codegenAggArg ( const Analyzer::Expr target_expr,
const CompilationOptions co 
)
private

Definition at line 1770 of file GroupByAndAggregate.cpp.

References CHECK(), CHECK_EQ, CodeGenerator::codegen(), executor_, get_int_type(), Analyzer::Expr::get_type_info(), kARRAY, kPOINT, kSAMPLE, LL_BUILDER, LL_CONTEXT, log2_bytes(), and CodeGenerator::posArg().

Referenced by TargetExprCodegen::codegen(), and TargetExprCodegenBuilder::codegenMultiSlotSampleExpressions().

1772  {
1773  const auto agg_expr = dynamic_cast<const Analyzer::AggExpr*>(target_expr);
1774  const auto func_expr = dynamic_cast<const Analyzer::FunctionOper*>(target_expr);
1775  const auto arr_expr = dynamic_cast<const Analyzer::ArrayExpr*>(target_expr);
1776 
1777  // TODO(alex): handle arrays uniformly?
1778  CodeGenerator code_generator(executor_);
1779  if (target_expr) {
1780  const auto& target_ti = target_expr->get_type_info();
1781  if (target_ti.is_array() && !executor_->plan_state_->isLazyFetchColumn(target_expr)) {
1782  const auto target_lvs =
1783  agg_expr ? code_generator.codegen(agg_expr->get_arg(), true, co)
1784  : code_generator.codegen(
1785  target_expr, !executor_->plan_state_->allow_lazy_fetch_, co);
1786  if (!func_expr && !arr_expr) {
1787  // Something with the chunk transport is code that was generated from a source
1788  // other than an ARRAY[] expression
1789  CHECK_EQ(size_t(1), target_lvs.size());
1790  CHECK(!agg_expr || agg_expr->get_aggtype() == kSAMPLE);
1791  const auto i32_ty = get_int_type(32, executor_->cgen_state_->context_);
1792  const auto i8p_ty =
1793  llvm::PointerType::get(get_int_type(8, executor_->cgen_state_->context_), 0);
1794  const auto& elem_ti = target_ti.get_elem_type();
1795  return {
1796  executor_->cgen_state_->emitExternalCall(
1797  "array_buff",
1798  i8p_ty,
1799  {target_lvs.front(), code_generator.posArg(target_expr)}),
1800  executor_->cgen_state_->emitExternalCall(
1801  "array_size",
1802  i32_ty,
1803  {target_lvs.front(),
1804  code_generator.posArg(target_expr),
1805  executor_->cgen_state_->llInt(log2_bytes(elem_ti.get_logical_size()))})};
1806  } else {
1807  if (agg_expr) {
1808  throw std::runtime_error(
1809  "Using array[] operator as argument to an aggregate operator is not "
1810  "supported");
1811  }
1812  CHECK(func_expr || arr_expr);
1813  if (dynamic_cast<const Analyzer::FunctionOper*>(target_expr)) {
1814  CHECK_EQ(size_t(1), target_lvs.size());
1815 
1816  const auto target_lv = LL_BUILDER.CreateLoad(target_lvs[0]);
1817 
1818  // const auto target_lv_type = target_lvs[0]->getType();
1819  // CHECK(target_lv_type->isStructTy());
1820  // CHECK_EQ(target_lv_type->getNumContainedTypes(), 3u);
1821  const auto i8p_ty = llvm::PointerType::get(
1822  get_int_type(8, executor_->cgen_state_->context_), 0);
1823  const auto ptr = LL_BUILDER.CreatePointerCast(
1824  LL_BUILDER.CreateExtractValue(target_lv, 0), i8p_ty);
1825  const auto size = LL_BUILDER.CreateExtractValue(target_lv, 1);
1826  const auto null_flag = LL_BUILDER.CreateExtractValue(target_lv, 2);
1827 
1828  const auto nullcheck_ok_bb = llvm::BasicBlock::Create(
1829  LL_CONTEXT, "arr_nullcheck_ok_bb", executor_->cgen_state_->row_func_);
1830  const auto nullcheck_fail_bb = llvm::BasicBlock::Create(
1831  LL_CONTEXT, "arr_nullcheck_fail_bb", executor_->cgen_state_->row_func_);
1832 
1833  // TODO(adb): probably better to zext the bool
1834  const auto nullcheck = LL_BUILDER.CreateICmpEQ(
1835  null_flag, executor_->cgen_state_->llInt(static_cast<int8_t>(1)));
1836  LL_BUILDER.CreateCondBr(nullcheck, nullcheck_fail_bb, nullcheck_ok_bb);
1837 
1838  const auto ret_bb = llvm::BasicBlock::Create(
1839  LL_CONTEXT, "arr_return", executor_->cgen_state_->row_func_);
1840  LL_BUILDER.SetInsertPoint(ret_bb);
1841  auto result_phi = LL_BUILDER.CreatePHI(i8p_ty, 2, "array_ptr_return");
1842  result_phi->addIncoming(ptr, nullcheck_ok_bb);
1843 
1844  const auto null_arr_sentinel = LL_BUILDER.CreateIntToPtr(
1845  executor_->cgen_state_->llInt(static_cast<int8_t>(0)), i8p_ty);
1846  result_phi->addIncoming(null_arr_sentinel, nullcheck_fail_bb);
1847 
1848  LL_BUILDER.SetInsertPoint(nullcheck_ok_bb);
1849  executor_->cgen_state_->emitExternalCall(
1850  "register_buffer_with_executor_rsm",
1851  llvm::Type::getVoidTy(executor_->cgen_state_->context_),
1852  {executor_->cgen_state_->llInt(reinterpret_cast<int64_t>(executor_)), ptr});
1853  LL_BUILDER.CreateBr(ret_bb);
1854 
1855  LL_BUILDER.SetInsertPoint(nullcheck_fail_bb);
1856  LL_BUILDER.CreateBr(ret_bb);
1857 
1858  LL_BUILDER.SetInsertPoint(ret_bb);
1859 
1860  return {result_phi, size};
1861  }
1862  CHECK_EQ(size_t(2), target_lvs.size());
1863  return {target_lvs[0], target_lvs[1]};
1864  }
1865  }
1866  if (target_ti.is_geometry() &&
1867  !executor_->plan_state_->isLazyFetchColumn(target_expr)) {
1868  auto generate_coord_lvs =
1869  [&](auto* selected_target_expr,
1870  bool const fetch_columns) -> std::vector<llvm::Value*> {
1871  const auto target_lvs =
1872  code_generator.codegen(selected_target_expr, fetch_columns, co);
1873  const auto geo_expr = dynamic_cast<const Analyzer::GeoExpr*>(target_expr);
1874  if (geo_expr) {
1875  CHECK_EQ(2 * static_cast<size_t>(target_ti.get_physical_coord_cols()),
1876  target_lvs.size());
1877  return target_lvs;
1878  }
1879  CHECK_EQ(static_cast<size_t>(target_ti.get_physical_coord_cols()),
1880  target_lvs.size());
1881 
1882  const auto i32_ty = get_int_type(32, executor_->cgen_state_->context_);
1883  const auto i8p_ty =
1884  llvm::PointerType::get(get_int_type(8, executor_->cgen_state_->context_), 0);
1885  std::vector<llvm::Value*> coords;
1886  size_t ctr = 0;
1887  for (const auto& target_lv : target_lvs) {
1888  // TODO(adb): consider adding a utility to sqltypes so we can get the types of
1889  // the physical coords cols based on the sqltype (e.g. TINYINT for col 0, INT
1890  // for col 1 for pols / mpolys, etc). Hardcoding for now. first array is the
1891  // coords array (TINYINT). Subsequent arrays are regular INT.
1892 
1893  const size_t elem_sz = ctr == 0 ? 1 : 4;
1894  ctr++;
1895  int32_t fixlen = -1;
1896  if (target_ti.get_type() == kPOINT) {
1897  const auto col_var = dynamic_cast<const Analyzer::ColumnVar*>(target_expr);
1898  if (col_var) {
1899  const auto coords_cd = executor_->getPhysicalColumnDescriptor(col_var, 1);
1900  if (coords_cd && coords_cd->columnType.get_type() == kARRAY) {
1901  fixlen = coords_cd->columnType.get_size();
1902  }
1903  }
1904  }
1905  if (fixlen > 0) {
1906  coords.push_back(executor_->cgen_state_->emitExternalCall(
1907  "fast_fixlen_array_buff",
1908  i8p_ty,
1909  {target_lv, code_generator.posArg(selected_target_expr)}));
1910  coords.push_back(executor_->cgen_state_->llInt(int64_t(fixlen)));
1911  continue;
1912  }
1913  coords.push_back(executor_->cgen_state_->emitExternalCall(
1914  "array_buff",
1915  i8p_ty,
1916  {target_lv, code_generator.posArg(selected_target_expr)}));
1917  coords.push_back(executor_->cgen_state_->emitExternalCall(
1918  "array_size",
1919  i32_ty,
1920  {target_lv,
1921  code_generator.posArg(selected_target_expr),
1922  executor_->cgen_state_->llInt(log2_bytes(elem_sz))}));
1923  }
1924  return coords;
1925  };
1926 
1927  if (agg_expr) {
1928  return generate_coord_lvs(agg_expr->get_arg(), true);
1929  } else {
1930  return generate_coord_lvs(target_expr,
1931  !executor_->plan_state_->allow_lazy_fetch_);
1932  }
1933  }
1934  }
1935  return agg_expr ? code_generator.codegen(agg_expr->get_arg(), true, co)
1936  : code_generator.codegen(
1937  target_expr, !executor_->plan_state_->allow_lazy_fetch_, co);
1938 }
#define CHECK_EQ(x, y)
Definition: Logger.h:205
#define LL_BUILDER
#define LL_CONTEXT
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
CHECK(cgen_state)
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:78
uint32_t log2_bytes(const uint32_t bytes)
Definition: Execute.h:127

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool GroupByAndAggregate::codegenAggCalls ( const std::tuple< llvm::Value *, llvm::Value * > &  agg_out_ptr_w_idx,
const std::vector< llvm::Value * > &  agg_out_vec,
const QueryMemoryDescriptor query_mem_desc,
const CompilationOptions co,
DiamondCodegen diamond_codegen 
)
private

Definition at line 1525 of file GroupByAndAggregate.cpp.

References CHECK(), TargetExprCodegenBuilder::codegen(), QueryMemoryDescriptor::didOutputColumnar(), executor_, g_cluster, QueryMemoryDescriptor::getQueryDescriptionType(), LL_BUILDER, LL_CONTEXT, Projection, ra_exe_unit_, and RelAlgExecutionUnit::target_exprs.

Referenced by codegen().

1530  {
1531  auto agg_out_ptr_w_idx = agg_out_ptr_w_idx_in;
1532  // TODO(alex): unify the two cases, the output for non-group by queries
1533  // should be a contiguous buffer
1534  const bool is_group_by{std::get<0>(agg_out_ptr_w_idx)};
1535  bool can_return_error = false;
1536  if (is_group_by) {
1537  CHECK(agg_out_vec.empty());
1538  } else {
1539  CHECK(!agg_out_vec.empty());
1540  }
1541 
1542  // output buffer is casted into a byte stream to be able to handle data elements of
1543  // different sizes (only used when actual column width sizes are used)
1544  llvm::Value* output_buffer_byte_stream{nullptr};
1545  llvm::Value* out_row_idx{nullptr};
1546  if (query_mem_desc.didOutputColumnar() && !g_cluster &&
1548  output_buffer_byte_stream = LL_BUILDER.CreateBitCast(
1549  std::get<0>(agg_out_ptr_w_idx),
1550  llvm::PointerType::get(llvm::Type::getInt8Ty(LL_CONTEXT), 0));
1551  output_buffer_byte_stream->setName("out_buff_b_stream");
1552  CHECK(std::get<1>(agg_out_ptr_w_idx));
1553  out_row_idx = LL_BUILDER.CreateZExt(std::get<1>(agg_out_ptr_w_idx),
1554  llvm::Type::getInt64Ty(LL_CONTEXT));
1555  out_row_idx->setName("out_row_idx");
1556  }
1557 
1558  TargetExprCodegenBuilder target_builder(query_mem_desc, ra_exe_unit_, is_group_by);
1559  for (size_t target_idx = 0; target_idx < ra_exe_unit_.target_exprs.size();
1560  ++target_idx) {
1561  auto target_expr = ra_exe_unit_.target_exprs[target_idx];
1562  CHECK(target_expr);
1563 
1564  target_builder(target_expr, executor_, co);
1565  }
1566 
1567  target_builder.codegen(this,
1568  executor_,
1569  query_mem_desc,
1570  co,
1571  agg_out_ptr_w_idx,
1572  agg_out_vec,
1573  output_buffer_byte_stream,
1574  out_row_idx,
1575  diamond_codegen);
1576 
1577  for (auto target_expr : ra_exe_unit_.target_exprs) {
1578  CHECK(target_expr);
1579  executor_->plan_state_->isLazyFetchColumn(target_expr);
1580  }
1581 
1582  return can_return_error;
1583 }
std::vector< Analyzer::Expr * > target_exprs
#define LL_BUILDER
#define LL_CONTEXT
CHECK(cgen_state)
QueryDescriptionType getQueryDescriptionType() const
bool g_cluster
const RelAlgExecutionUnit & ra_exe_unit_

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

llvm::Value * GroupByAndAggregate::codegenAggColumnPtr ( llvm::Value *  output_buffer_byte_stream,
llvm::Value *  out_row_idx,
const std::tuple< llvm::Value *, llvm::Value * > &  agg_out_ptr_w_idx,
const QueryMemoryDescriptor query_mem_desc,
const size_t  chosen_bytes,
const size_t  agg_out_off,
const size_t  target_idx 
)
private

: returns the pointer to where the aggregation should be stored.

Definition at line 1588 of file GroupByAndAggregate.cpp.

References CHECK(), CHECK_EQ, QueryMemoryDescriptor::didOutputColumnar(), g_cluster, get_int_type(), QueryMemoryDescriptor::getColOffInBytes(), QueryMemoryDescriptor::getColOnlyOffInBytes(), QueryMemoryDescriptor::getQueryDescriptionType(), LL_BUILDER, LL_CONTEXT, LL_INT, Projection, and to_string().

Referenced by TargetExprCodegen::codegen(), and TargetExprCodegenBuilder::codegenMultiSlotSampleExpressions().

1595  {
1596  llvm::Value* agg_col_ptr{nullptr};
1597  if (query_mem_desc.didOutputColumnar()) {
1598  // TODO(Saman): remove the second columnar branch, and support all query description
1599  // types through the first branch. Then, input arguments should also be cleaned up
1600  if (!g_cluster &&
1602  CHECK(chosen_bytes == 1 || chosen_bytes == 2 || chosen_bytes == 4 ||
1603  chosen_bytes == 8);
1604  CHECK(output_buffer_byte_stream);
1605  CHECK(out_row_idx);
1606  uint32_t col_off = query_mem_desc.getColOffInBytes(agg_out_off);
1607  // multiplying by chosen_bytes, i.e., << log2(chosen_bytes)
1608  auto out_per_col_byte_idx =
1609  LL_BUILDER.CreateShl(out_row_idx, __builtin_ffs(chosen_bytes) - 1);
1610  auto byte_offset = LL_BUILDER.CreateAdd(out_per_col_byte_idx,
1611  LL_INT(static_cast<int64_t>(col_off)));
1612  byte_offset->setName("out_byte_off_target_" + std::to_string(target_idx));
1613  auto output_ptr = LL_BUILDER.CreateGEP(output_buffer_byte_stream, byte_offset);
1614  agg_col_ptr = LL_BUILDER.CreateBitCast(
1615  output_ptr,
1616  llvm::PointerType::get(get_int_type((chosen_bytes << 3), LL_CONTEXT), 0));
1617  agg_col_ptr->setName("out_ptr_target_" + std::to_string(target_idx));
1618  } else {
1619  uint32_t col_off = query_mem_desc.getColOffInBytes(agg_out_off);
1620  CHECK_EQ(size_t(0), col_off % chosen_bytes);
1621  col_off /= chosen_bytes;
1622  CHECK(std::get<1>(agg_out_ptr_w_idx));
1623  auto offset = LL_BUILDER.CreateAdd(std::get<1>(agg_out_ptr_w_idx), LL_INT(col_off));
1624  agg_col_ptr = LL_BUILDER.CreateGEP(
1625  LL_BUILDER.CreateBitCast(
1626  std::get<0>(agg_out_ptr_w_idx),
1627  llvm::PointerType::get(get_int_type((chosen_bytes << 3), LL_CONTEXT), 0)),
1628  offset);
1629  }
1630  } else {
1631  uint32_t col_off = query_mem_desc.getColOnlyOffInBytes(agg_out_off);
1632  CHECK_EQ(size_t(0), col_off % chosen_bytes);
1633  col_off /= chosen_bytes;
1634  agg_col_ptr = LL_BUILDER.CreateGEP(
1635  LL_BUILDER.CreateBitCast(
1636  std::get<0>(agg_out_ptr_w_idx),
1637  llvm::PointerType::get(get_int_type((chosen_bytes << 3), LL_CONTEXT), 0)),
1638  LL_INT(col_off));
1639  }
1640  CHECK(agg_col_ptr);
1641  return agg_col_ptr;
1642 }
#define CHECK_EQ(x, y)
Definition: Logger.h:205
#define LL_BUILDER
#define LL_CONTEXT
#define LL_INT(v)
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
std::string to_string(char const *&&v)
size_t getColOnlyOffInBytes(const size_t col_idx) const
CHECK(cgen_state)
QueryDescriptionType getQueryDescriptionType() const
bool g_cluster
size_t getColOffInBytes(const size_t col_idx) const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void GroupByAndAggregate::codegenCountDistinct ( const size_t  target_idx,
const Analyzer::Expr target_expr,
std::vector< llvm::Value * > &  agg_args,
const QueryMemoryDescriptor query_mem_desc,
const ExecutorDeviceType  device_type 
)
private

Definition at line 1693 of file GroupByAndAggregate.cpp.

References Bitmap, CHECK(), CHECK_EQ, emitCall(), executor_, g_bigint_count, get_int_type(), get_target_info(), Analyzer::Expr::get_type_info(), getAdditionalLiteral(), QueryMemoryDescriptor::getCountDistinctDescriptor(), GPU, Invalid, kAPPROX_COUNT_DISTINCT, LL_CONTEXT, and LL_INT.

Referenced by TargetExprCodegen::codegen().

1698  {
1699  const auto agg_info = get_target_info(target_expr, g_bigint_count);
1700  const auto& arg_ti =
1701  static_cast<const Analyzer::AggExpr*>(target_expr)->get_arg()->get_type_info();
1702  if (arg_ti.is_fp()) {
1703  agg_args.back() = executor_->cgen_state_->ir_builder_.CreateBitCast(
1704  agg_args.back(), get_int_type(64, executor_->cgen_state_->context_));
1705  }
1706  const auto& count_distinct_descriptor =
1707  query_mem_desc.getCountDistinctDescriptor(target_idx);
1708  CHECK(count_distinct_descriptor.impl_type_ != CountDistinctImplType::Invalid);
1709  if (agg_info.agg_kind == kAPPROX_COUNT_DISTINCT) {
1710  CHECK(count_distinct_descriptor.impl_type_ == CountDistinctImplType::Bitmap);
1711  agg_args.push_back(LL_INT(int32_t(count_distinct_descriptor.bitmap_sz_bits)));
1712  if (device_type == ExecutorDeviceType::GPU) {
1713  const auto base_dev_addr = getAdditionalLiteral(-1);
1714  const auto base_host_addr = getAdditionalLiteral(-2);
1715  agg_args.push_back(base_dev_addr);
1716  agg_args.push_back(base_host_addr);
1717  emitCall("agg_approximate_count_distinct_gpu", agg_args);
1718  } else {
1719  emitCall("agg_approximate_count_distinct", agg_args);
1720  }
1721  return;
1722  }
1723  std::string agg_fname{"agg_count_distinct"};
1724  if (count_distinct_descriptor.impl_type_ == CountDistinctImplType::Bitmap) {
1725  agg_fname += "_bitmap";
1726  agg_args.push_back(LL_INT(static_cast<int64_t>(count_distinct_descriptor.min_val)));
1727  }
1728  if (agg_info.skip_null_val) {
1729  auto null_lv = executor_->cgen_state_->castToTypeIn(
1730  (arg_ti.is_fp()
1731  ? static_cast<llvm::Value*>(executor_->cgen_state_->inlineFpNull(arg_ti))
1732  : static_cast<llvm::Value*>(executor_->cgen_state_->inlineIntNull(arg_ti))),
1733  64);
1734  null_lv = executor_->cgen_state_->ir_builder_.CreateBitCast(
1735  null_lv, get_int_type(64, executor_->cgen_state_->context_));
1736  agg_fname += "_skip_val";
1737  agg_args.push_back(null_lv);
1738  }
1739  if (device_type == ExecutorDeviceType::GPU) {
1740  CHECK(count_distinct_descriptor.impl_type_ == CountDistinctImplType::Bitmap);
1741  agg_fname += "_gpu";
1742  const auto base_dev_addr = getAdditionalLiteral(-1);
1743  const auto base_host_addr = getAdditionalLiteral(-2);
1744  agg_args.push_back(base_dev_addr);
1745  agg_args.push_back(base_host_addr);
1746  agg_args.push_back(LL_INT(int64_t(count_distinct_descriptor.sub_bitmap_count)));
1747  CHECK_EQ(size_t(0),
1748  count_distinct_descriptor.bitmapPaddedSizeBytes() %
1749  count_distinct_descriptor.sub_bitmap_count);
1750  agg_args.push_back(LL_INT(int64_t(count_distinct_descriptor.bitmapPaddedSizeBytes() /
1751  count_distinct_descriptor.sub_bitmap_count)));
1752  }
1753  if (count_distinct_descriptor.impl_type_ == CountDistinctImplType::Bitmap) {
1754  emitCall(agg_fname, agg_args);
1755  } else {
1756  executor_->cgen_state_->emitExternalCall(
1757  agg_fname, llvm::Type::getVoidTy(LL_CONTEXT), agg_args);
1758  }
1759 }
#define CHECK_EQ(x, y)
Definition: Logger.h:205
llvm::Value * getAdditionalLiteral(const int32_t off)
#define LL_CONTEXT
TargetInfo get_target_info(const PointerType target_expr, const bool bigint_count)
Definition: TargetInfo.h:66
#define LL_INT(v)
llvm::Value * emitCall(const std::string &fname, const std::vector< llvm::Value * > &args)
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
CHECK(cgen_state)
bool g_bigint_count
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:78
const CountDistinctDescriptor & getCountDistinctDescriptor(const size_t idx) const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void GroupByAndAggregate::codegenEstimator ( std::stack< llvm::BasicBlock * > &  array_loops,
GroupByAndAggregate::DiamondCodegen diamond_codegen,
const QueryMemoryDescriptor query_mem_desc,
const CompilationOptions co 
)
private

Definition at line 1644 of file GroupByAndAggregate.cpp.

References CHECK(), emitCall(), RelAlgExecutionUnit::estimator, executor_, get_int_type(), QueryMemoryDescriptor::getEffectiveKeyWidth(), LL_BUILDER, LL_CONTEXT, LL_INT, ra_exe_unit_, and ROW_FUNC.

Referenced by codegen().

1648  {
1649  const auto& estimator_arg = ra_exe_unit_.estimator->getArgument();
1650  auto estimator_comp_count_lv = LL_INT(static_cast<int32_t>(estimator_arg.size()));
1651  auto estimator_key_lv = LL_BUILDER.CreateAlloca(llvm::Type::getInt64Ty(LL_CONTEXT),
1652  estimator_comp_count_lv);
1653  int32_t subkey_idx = 0;
1654  for (const auto estimator_arg_comp : estimator_arg) {
1655  const auto estimator_arg_comp_lvs =
1656  executor_->groupByColumnCodegen(estimator_arg_comp.get(),
1657  query_mem_desc.getEffectiveKeyWidth(),
1658  co,
1659  false,
1660  0,
1661  diamond_codegen,
1662  array_loops,
1663  true);
1664  CHECK(!estimator_arg_comp_lvs.original_value);
1665  const auto estimator_arg_comp_lv = estimator_arg_comp_lvs.translated_value;
1666  // store the sub-key to the buffer
1667  LL_BUILDER.CreateStore(estimator_arg_comp_lv,
1668  LL_BUILDER.CreateGEP(estimator_key_lv, LL_INT(subkey_idx++)));
1669  }
1670  const auto int8_ptr_ty = llvm::PointerType::get(get_int_type(8, LL_CONTEXT), 0);
1671  const auto bitmap = LL_BUILDER.CreateBitCast(&*ROW_FUNC->arg_begin(), int8_ptr_ty);
1672  const auto key_bytes = LL_BUILDER.CreateBitCast(estimator_key_lv, int8_ptr_ty);
1673  const auto estimator_comp_bytes_lv =
1674  LL_INT(static_cast<int32_t>(estimator_arg.size() * sizeof(int64_t)));
1675  const auto bitmap_size_lv =
1676  LL_INT(static_cast<uint32_t>(ra_exe_unit_.estimator->getBufferSize()));
1677  emitCall(ra_exe_unit_.estimator->getRuntimeFunctionName(),
1678  {bitmap, &*bitmap_size_lv, key_bytes, &*estimator_comp_bytes_lv});
1679 }
#define ROW_FUNC
#define LL_BUILDER
#define LL_CONTEXT
#define LL_INT(v)
llvm::Value * emitCall(const std::string &fname, const std::vector< llvm::Value * > &args)
size_t getEffectiveKeyWidth() const
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
CHECK(cgen_state)
const std::shared_ptr< Analyzer::Estimator > estimator
const RelAlgExecutionUnit & ra_exe_unit_

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::tuple< llvm::Value *, llvm::Value * > GroupByAndAggregate::codegenGroupBy ( const QueryMemoryDescriptor query_mem_desc,
const CompilationOptions co,
DiamondCodegen codegen 
)
private

Definition at line 1136 of file GroupByAndAggregate.cpp.

References CHECK(), CHECK_EQ, codegenMultiColumnBaselineHash(), codegenMultiColumnPerfectHash(), codegenOutputSlot(), codegenSingleColumnPerfectHash(), QueryMemoryDescriptor::didOutputColumnar(), executor_, QueryMemoryDescriptor::getBucket(), QueryMemoryDescriptor::getEffectiveKeyWidth(), getExprRangeInfo(), QueryMemoryDescriptor::getGroupbyColCount(), QueryMemoryDescriptor::getMaxVal(), QueryMemoryDescriptor::getQueryDescriptionType(), QueryMemoryDescriptor::getRowSize(), RelAlgExecutionUnit::groupby_exprs, GroupByBaselineHash, GroupByPerfectHash, groups_buffer, QueryMemoryDescriptor::hasNulls(), QueryMemoryDescriptor::isSingleColumnGroupByWithPerfectHash(), LL_BUILDER, LL_CONTEXT, LL_INT, Projection, ra_exe_unit_, ROW_FUNC, and QueryMemoryDescriptor::threadsShareMemory().

Referenced by codegen().

1139  {
1140  auto arg_it = ROW_FUNC->arg_begin();
1141  auto groups_buffer = arg_it++;
1142 
1143  std::stack<llvm::BasicBlock*> array_loops;
1144 
1145  // TODO(Saman): move this logic outside of this function.
1147  if (query_mem_desc.didOutputColumnar()) {
1148  return std::make_tuple(
1149  &*groups_buffer,
1150  codegenOutputSlot(&*groups_buffer, query_mem_desc, co, diamond_codegen));
1151  } else {
1152  return std::make_tuple(
1153  codegenOutputSlot(&*groups_buffer, query_mem_desc, co, diamond_codegen),
1154  nullptr);
1155  }
1156  }
1157 
1158  CHECK(query_mem_desc.getQueryDescriptionType() ==
1160  query_mem_desc.getQueryDescriptionType() ==
1162 
1163  const int32_t row_size_quad = query_mem_desc.didOutputColumnar()
1164  ? 0
1165  : query_mem_desc.getRowSize() / sizeof(int64_t);
1166 
1167  const auto col_width_size = query_mem_desc.isSingleColumnGroupByWithPerfectHash()
1168  ? sizeof(int64_t)
1169  : query_mem_desc.getEffectiveKeyWidth();
1170  // for multi-column group by
1171  llvm::Value* group_key = nullptr;
1172  llvm::Value* key_size_lv = nullptr;
1173 
1174  if (!query_mem_desc.isSingleColumnGroupByWithPerfectHash()) {
1175  key_size_lv = LL_INT(static_cast<int32_t>(query_mem_desc.getGroupbyColCount()));
1176  if (query_mem_desc.getQueryDescriptionType() ==
1178  group_key =
1179  LL_BUILDER.CreateAlloca(llvm::Type::getInt64Ty(LL_CONTEXT), key_size_lv);
1180  } else if (query_mem_desc.getQueryDescriptionType() ==
1182  group_key =
1183  col_width_size == sizeof(int32_t)
1184  ? LL_BUILDER.CreateAlloca(llvm::Type::getInt32Ty(LL_CONTEXT), key_size_lv)
1185  : LL_BUILDER.CreateAlloca(llvm::Type::getInt64Ty(LL_CONTEXT), key_size_lv);
1186  }
1187  CHECK(group_key);
1188  CHECK(key_size_lv);
1189  }
1190 
1191  int32_t subkey_idx = 0;
1192  CHECK(query_mem_desc.getGroupbyColCount() == ra_exe_unit_.groupby_exprs.size());
1193  for (const auto group_expr : ra_exe_unit_.groupby_exprs) {
1194  const auto col_range_info = getExprRangeInfo(group_expr.get());
1195  const auto translated_null_value = static_cast<int64_t>(
1196  query_mem_desc.isSingleColumnGroupByWithPerfectHash()
1197  ? checked_int64_t(query_mem_desc.getMaxVal()) +
1198  (query_mem_desc.getBucket() ? query_mem_desc.getBucket() : 1)
1199  : checked_int64_t(col_range_info.max) +
1200  (col_range_info.bucket ? col_range_info.bucket : 1));
1201 
1202  const bool col_has_nulls =
1203  query_mem_desc.getQueryDescriptionType() ==
1205  ? (query_mem_desc.isSingleColumnGroupByWithPerfectHash()
1206  ? query_mem_desc.hasNulls()
1207  : col_range_info.has_nulls)
1208  : false;
1209 
1210  const auto group_expr_lvs =
1211  executor_->groupByColumnCodegen(group_expr.get(),
1212  col_width_size,
1213  co,
1214  col_has_nulls,
1215  translated_null_value,
1216  diamond_codegen,
1217  array_loops,
1218  query_mem_desc.threadsShareMemory());
1219  const auto group_expr_lv = group_expr_lvs.translated_value;
1220  if (query_mem_desc.isSingleColumnGroupByWithPerfectHash()) {
1221  CHECK_EQ(size_t(1), ra_exe_unit_.groupby_exprs.size());
1222  return codegenSingleColumnPerfectHash(query_mem_desc,
1223  co,
1224  &*groups_buffer,
1225  group_expr_lv,
1226  group_expr_lvs.original_value,
1227  row_size_quad);
1228  } else {
1229  // store the sub-key to the buffer
1230  LL_BUILDER.CreateStore(group_expr_lv,
1231  LL_BUILDER.CreateGEP(group_key, LL_INT(subkey_idx++)));
1232  }
1233  }
1234  if (query_mem_desc.getQueryDescriptionType() ==
1236  CHECK(ra_exe_unit_.groupby_exprs.size() != 1);
1238  &*groups_buffer, group_key, key_size_lv, query_mem_desc, row_size_quad);
1239  } else if (query_mem_desc.getQueryDescriptionType() ==
1242  &*groups_buffer,
1243  group_key,
1244  key_size_lv,
1245  query_mem_desc,
1246  col_width_size,
1247  row_size_quad);
1248  }
1249  CHECK(false);
1250  return std::make_tuple(nullptr, nullptr);
1251 }
#define CHECK_EQ(x, y)
Definition: Logger.h:205
#define ROW_FUNC
const int32_t groups_buffer_size return groups_buffer
#define LL_BUILDER
boost::multiprecision::number< boost::multiprecision::cpp_int_backend< 64, 64, boost::multiprecision::signed_magnitude, boost::multiprecision::checked, void >> checked_int64_t
#define LL_CONTEXT
#define LL_INT(v)
size_t getEffectiveKeyWidth() const
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
std::tuple< llvm::Value *, llvm::Value * > codegenMultiColumnBaselineHash(const CompilationOptions &co, llvm::Value *groups_buffer, llvm::Value *group_key, llvm::Value *key_size_lv, const QueryMemoryDescriptor &query_mem_desc, const size_t key_width, const int32_t row_size_quad)
CHECK(cgen_state)
size_t getGroupbyColCount() const
std::tuple< llvm::Value *, llvm::Value * > codegenSingleColumnPerfectHash(const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, llvm::Value *groups_buffer, llvm::Value *group_expr_lv_translated, llvm::Value *group_expr_lv_original, const int32_t row_size_quad)
std::tuple< llvm::Value *, llvm::Value * > codegenMultiColumnPerfectHash(llvm::Value *groups_buffer, llvm::Value *group_key, llvm::Value *key_size_lv, const QueryMemoryDescriptor &query_mem_desc, const int32_t row_size_quad)
QueryDescriptionType getQueryDescriptionType() const
bool isSingleColumnGroupByWithPerfectHash() const
llvm::Value * codegenOutputSlot(llvm::Value *groups_buffer, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, DiamondCodegen &diamond_codegen)
ColRangeInfo getExprRangeInfo(const Analyzer::Expr *expr) const
const RelAlgExecutionUnit & ra_exe_unit_

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::tuple< llvm::Value *, llvm::Value * > GroupByAndAggregate::codegenMultiColumnBaselineHash ( const CompilationOptions co,
llvm::Value *  groups_buffer,
llvm::Value *  group_key,
llvm::Value *  key_size_lv,
const QueryMemoryDescriptor query_mem_desc,
const size_t  key_width,
const int32_t  row_size_quad 
)
private

Definition at line 1346 of file GroupByAndAggregate.cpp.

References CHECK(), QueryMemoryDescriptor::didOutputColumnar(), emitCall(), QueryMemoryDescriptor::getEntryCount(), groups_buffer, LL_BUILDER, LL_CONTEXT, LL_INT, ROW_FUNC, and CompilationOptions::with_dynamic_watchdog.

Referenced by codegenGroupBy().

1353  {
1354  auto arg_it = ROW_FUNC->arg_begin(); // groups_buffer
1355  ++arg_it; // current match count
1356  ++arg_it; // total match count
1357  ++arg_it; // old match count
1358  ++arg_it; // output buffer slots count
1359  ++arg_it; // aggregate init values
1360  CHECK(arg_it->getName() == "agg_init_val");
1361  if (group_key->getType() != llvm::Type::getInt64PtrTy(LL_CONTEXT)) {
1362  CHECK(key_width == sizeof(int32_t));
1363  group_key =
1364  LL_BUILDER.CreatePointerCast(group_key, llvm::Type::getInt64PtrTy(LL_CONTEXT));
1365  }
1366  std::vector<llvm::Value*> func_args{
1367  groups_buffer,
1368  LL_INT(static_cast<int32_t>(query_mem_desc.getEntryCount())),
1369  &*group_key,
1370  &*key_size_lv,
1371  LL_INT(static_cast<int32_t>(key_width))};
1372  std::string func_name{"get_group_value"};
1373  if (query_mem_desc.didOutputColumnar()) {
1374  func_name += "_columnar_slot";
1375  } else {
1376  func_args.push_back(LL_INT(row_size_quad));
1377  func_args.push_back(&*arg_it);
1378  }
1379  if (co.with_dynamic_watchdog) {
1380  func_name += "_with_watchdog";
1381  }
1382  if (query_mem_desc.didOutputColumnar()) {
1383  return std::make_tuple(groups_buffer, emitCall(func_name, func_args));
1384  } else {
1385  return std::make_tuple(emitCall(func_name, func_args), nullptr);
1386  }
1387 }
#define ROW_FUNC
const int32_t groups_buffer_size return groups_buffer
#define LL_BUILDER
#define LL_CONTEXT
#define LL_INT(v)
llvm::Value * emitCall(const std::string &fname, const std::vector< llvm::Value * > &args)
CHECK(cgen_state)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::tuple< llvm::Value *, llvm::Value * > GroupByAndAggregate::codegenMultiColumnPerfectHash ( llvm::Value *  groups_buffer,
llvm::Value *  group_key,
llvm::Value *  key_size_lv,
const QueryMemoryDescriptor query_mem_desc,
const int32_t  row_size_quad 
)
private

Definition at line 1303 of file GroupByAndAggregate.cpp.

References CHECK(), codegenPerfectHashFunction(), QueryMemoryDescriptor::didOutputColumnar(), emitCall(), get_int_type(), QueryMemoryDescriptor::getEntryCount(), QueryMemoryDescriptor::getQueryDescriptionType(), GroupByPerfectHash, groups_buffer, QueryMemoryDescriptor::hasKeylessHash(), LL_BUILDER, LL_CONTEXT, and LL_INT.

Referenced by codegenGroupBy().

1308  {
1309  CHECK(query_mem_desc.getQueryDescriptionType() ==
1311  // compute the index (perfect hash)
1312  auto perfect_hash_func = codegenPerfectHashFunction();
1313  auto hash_lv =
1314  LL_BUILDER.CreateCall(perfect_hash_func, std::vector<llvm::Value*>{group_key});
1315 
1316  if (query_mem_desc.didOutputColumnar()) {
1317  if (!query_mem_desc.hasKeylessHash()) {
1318  const std::string set_matching_func_name{
1319  "set_matching_group_value_perfect_hash_columnar"};
1320  const std::vector<llvm::Value*> set_matching_func_arg{
1321  groups_buffer,
1322  hash_lv,
1323  group_key,
1324  key_size_lv,
1325  llvm::ConstantInt::get(get_int_type(32, LL_CONTEXT),
1326  query_mem_desc.getEntryCount())};
1327  emitCall(set_matching_func_name, set_matching_func_arg);
1328  }
1329  return std::make_tuple(groups_buffer, hash_lv);
1330  } else {
1331  if (query_mem_desc.hasKeylessHash()) {
1332  return std::make_tuple(emitCall("get_matching_group_value_perfect_hash_keyless",
1333  {groups_buffer, hash_lv, LL_INT(row_size_quad)}),
1334  nullptr);
1335  } else {
1336  return std::make_tuple(
1337  emitCall(
1338  "get_matching_group_value_perfect_hash",
1339  {groups_buffer, hash_lv, group_key, key_size_lv, LL_INT(row_size_quad)}),
1340  nullptr);
1341  }
1342  }
1343 }
const int32_t groups_buffer_size return groups_buffer
#define LL_BUILDER
#define LL_CONTEXT
#define LL_INT(v)
llvm::Value * emitCall(const std::string &fname, const std::vector< llvm::Value * > &args)
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
CHECK(cgen_state)
QueryDescriptionType getQueryDescriptionType() const
llvm::Function * codegenPerfectHashFunction()

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

llvm::Value * GroupByAndAggregate::codegenOutputSlot ( llvm::Value *  groups_buffer,
const QueryMemoryDescriptor query_mem_desc,
const CompilationOptions co,
DiamondCodegen diamond_codegen 
)
private

Definition at line 1045 of file GroupByAndAggregate.cpp.

References run_benchmark_import::args, CHECK(), CHECK_EQ, CHECK_GE, CHECK_LT, CodeGenerator::codegen(), QueryMemoryDescriptor::didOutputColumnar(), emitCall(), executor_, get_arg_by_name(), get_heap_key_slot_index(), QueryMemoryDescriptor::getColOffInBytes(), QueryMemoryDescriptor::getEntryCount(), QueryMemoryDescriptor::getPaddedSlotWidthBytes(), QueryMemoryDescriptor::getQueryDescriptionType(), QueryMemoryDescriptor::getRowSize(), RelAlgExecutionUnit::groupby_exprs, groups_buffer, inline_fp_null_val(), inline_int_null_val(), SortInfo::limit, LL_BOOL, LL_BUILDER, LL_FP, LL_INT, SortInfo::offset, SortInfo::order_entries, CodeGenerator::posArg(), Projection, ra_exe_unit_, ROW_FUNC, RelAlgExecutionUnit::sort_info, RelAlgExecutionUnit::target_exprs, to_string(), RelAlgExecutionUnit::use_bump_allocator, and QueryMemoryDescriptor::useStreamingTopN().

Referenced by codegenGroupBy(), and codegenWindowRowPointer().

1049  {
1051  CHECK_EQ(size_t(1), ra_exe_unit_.groupby_exprs.size());
1052  const auto group_expr = ra_exe_unit_.groupby_exprs.front();
1053  CHECK(!group_expr);
1054  if (!query_mem_desc.didOutputColumnar()) {
1055  CHECK_EQ(size_t(0), query_mem_desc.getRowSize() % sizeof(int64_t));
1056  }
1057  const int32_t row_size_quad = query_mem_desc.didOutputColumnar()
1058  ? 0
1059  : query_mem_desc.getRowSize() / sizeof(int64_t);
1060  CodeGenerator code_generator(executor_);
1061  if (query_mem_desc.useStreamingTopN()) {
1062  const auto& only_order_entry = ra_exe_unit_.sort_info.order_entries.front();
1063  CHECK_GE(only_order_entry.tle_no, int(1));
1064  const size_t target_idx = only_order_entry.tle_no - 1;
1065  CHECK_LT(target_idx, ra_exe_unit_.target_exprs.size());
1066  const auto order_entry_expr = ra_exe_unit_.target_exprs[target_idx];
1067  const auto chosen_bytes =
1068  static_cast<size_t>(query_mem_desc.getPaddedSlotWidthBytes(target_idx));
1069  auto order_entry_lv = executor_->cgen_state_->castToTypeIn(
1070  code_generator.codegen(order_entry_expr, true, co).front(), chosen_bytes * 8);
1072  std::string fname = "get_bin_from_k_heap";
1073  const auto& oe_ti = order_entry_expr->get_type_info();
1074  llvm::Value* null_key_lv = nullptr;
1075  if (oe_ti.is_integer() || oe_ti.is_decimal() || oe_ti.is_time()) {
1076  const size_t bit_width = order_entry_lv->getType()->getIntegerBitWidth();
1077  switch (bit_width) {
1078  case 32:
1079  null_key_lv = LL_INT(static_cast<int32_t>(inline_int_null_val(oe_ti)));
1080  break;
1081  case 64:
1082  null_key_lv = LL_INT(static_cast<int64_t>(inline_int_null_val(oe_ti)));
1083  break;
1084  default:
1085  CHECK(false);
1086  }
1087  fname += "_int" + std::to_string(bit_width) + "_t";
1088  } else {
1089  CHECK(oe_ti.is_fp());
1090  if (order_entry_lv->getType()->isDoubleTy()) {
1091  null_key_lv = LL_FP(static_cast<double>(inline_fp_null_val(oe_ti)));
1092  } else {
1093  null_key_lv = LL_FP(static_cast<float>(inline_fp_null_val(oe_ti)));
1094  }
1095  fname += order_entry_lv->getType()->isDoubleTy() ? "_double" : "_float";
1096  }
1097  const auto key_slot_idx =
1099  return emitCall(
1100  fname,
1101  {groups_buffer,
1102  LL_INT(n),
1103  LL_INT(row_size_quad),
1104  LL_INT(static_cast<uint32_t>(query_mem_desc.getColOffInBytes(key_slot_idx))),
1105  LL_BOOL(only_order_entry.is_desc),
1106  LL_BOOL(!order_entry_expr->get_type_info().get_notnull()),
1107  LL_BOOL(only_order_entry.nulls_first),
1108  null_key_lv,
1109  order_entry_lv});
1110  } else {
1111  llvm::Value* output_buffer_entry_count_lv{nullptr};
1113  output_buffer_entry_count_lv =
1114  LL_BUILDER.CreateLoad(get_arg_by_name(ROW_FUNC, "max_matched"));
1115  CHECK(output_buffer_entry_count_lv);
1116  }
1117  const auto group_expr_lv =
1118  LL_BUILDER.CreateLoad(get_arg_by_name(ROW_FUNC, "old_total_matched"));
1119  std::vector<llvm::Value*> args{
1120  groups_buffer,
1121  output_buffer_entry_count_lv
1122  ? output_buffer_entry_count_lv
1123  : LL_INT(static_cast<int32_t>(query_mem_desc.getEntryCount())),
1124  group_expr_lv,
1125  code_generator.posArg(nullptr)};
1126  if (query_mem_desc.didOutputColumnar()) {
1127  const auto columnar_output_offset =
1128  emitCall("get_columnar_scan_output_offset", args);
1129  return columnar_output_offset;
1130  }
1131  args.push_back(LL_INT(row_size_quad));
1132  return emitCall("get_scan_output_slot", args);
1133  }
1134 }
std::vector< Analyzer::Expr * > target_exprs
#define CHECK_EQ(x, y)
Definition: Logger.h:205
#define ROW_FUNC
const int32_t groups_buffer_size return groups_buffer
#define LL_BUILDER
const std::list< Analyzer::OrderEntry > order_entries
#define LL_INT(v)
#define CHECK_GE(x, y)
Definition: Logger.h:210
llvm::Value * emitCall(const std::string &fname, const std::vector< llvm::Value * > &args)
size_t get_heap_key_slot_index(const std::vector< Analyzer::Expr * > &target_exprs, const size_t target_idx)
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
double inline_fp_null_val(const SQL_TYPE_INFO &ti)
std::string to_string(char const *&&v)
#define LL_BOOL(v)
const size_t limit
CHECK(cgen_state)
llvm::Value * get_arg_by_name(llvm::Function *func, const std::string &name)
Definition: Execute.h:117
const SortInfo sort_info
#define LL_FP(v)
const int8_t getPaddedSlotWidthBytes(const size_t slot_idx) const
QueryDescriptionType getQueryDescriptionType() const
#define CHECK_LT(x, y)
Definition: Logger.h:207
int64_t inline_int_null_val(const SQL_TYPE_INFO &ti)
const RelAlgExecutionUnit & ra_exe_unit_
const size_t offset
size_t getColOffInBytes(const size_t col_idx) const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

llvm::Function * GroupByAndAggregate::codegenPerfectHashFunction ( )
private

Definition at line 1389 of file GroupByAndAggregate.cpp.

References CHECK(), CHECK_GT, executor_, get_int_type(), getBucketedCardinality(), getExprRangeInfo(), RelAlgExecutionUnit::groupby_exprs, GroupByPerfectHash, LL_CONTEXT, LL_INT, mark_function_always_inline(), and ra_exe_unit_.

Referenced by codegenMultiColumnPerfectHash().

1389  {
1390  CHECK_GT(ra_exe_unit_.groupby_exprs.size(), size_t(1));
1391  auto ft = llvm::FunctionType::get(
1392  get_int_type(32, LL_CONTEXT),
1393  std::vector<llvm::Type*>{llvm::PointerType::get(get_int_type(64, LL_CONTEXT), 0)},
1394  false);
1395  auto key_hash_func = llvm::Function::Create(ft,
1396  llvm::Function::ExternalLinkage,
1397  "perfect_key_hash",
1398  executor_->cgen_state_->module_);
1399  executor_->cgen_state_->helper_functions_.push_back(key_hash_func);
1400  mark_function_always_inline(key_hash_func);
1401  auto& key_buff_arg = *key_hash_func->args().begin();
1402  llvm::Value* key_buff_lv = &key_buff_arg;
1403  auto bb = llvm::BasicBlock::Create(LL_CONTEXT, "entry", key_hash_func);
1404  llvm::IRBuilder<> key_hash_func_builder(bb);
1405  llvm::Value* hash_lv{llvm::ConstantInt::get(get_int_type(64, LL_CONTEXT), 0)};
1406  std::vector<int64_t> cardinalities;
1407  for (const auto groupby_expr : ra_exe_unit_.groupby_exprs) {
1408  auto col_range_info = getExprRangeInfo(groupby_expr.get());
1409  CHECK(col_range_info.hash_type_ == QueryDescriptionType::GroupByPerfectHash);
1410  cardinalities.push_back(getBucketedCardinality(col_range_info));
1411  }
1412  size_t dim_idx = 0;
1413  for (const auto groupby_expr : ra_exe_unit_.groupby_exprs) {
1414  auto key_comp_lv = key_hash_func_builder.CreateLoad(
1415  key_hash_func_builder.CreateGEP(key_buff_lv, LL_INT(dim_idx)));
1416  auto col_range_info = getExprRangeInfo(groupby_expr.get());
1417  auto crt_term_lv =
1418  key_hash_func_builder.CreateSub(key_comp_lv, LL_INT(col_range_info.min));
1419  if (col_range_info.bucket) {
1420  crt_term_lv =
1421  key_hash_func_builder.CreateSDiv(crt_term_lv, LL_INT(col_range_info.bucket));
1422  }
1423  for (size_t prev_dim_idx = 0; prev_dim_idx < dim_idx; ++prev_dim_idx) {
1424  crt_term_lv = key_hash_func_builder.CreateMul(crt_term_lv,
1425  LL_INT(cardinalities[prev_dim_idx]));
1426  }
1427  hash_lv = key_hash_func_builder.CreateAdd(hash_lv, crt_term_lv);
1428  ++dim_idx;
1429  }
1430  key_hash_func_builder.CreateRet(
1431  key_hash_func_builder.CreateTrunc(hash_lv, get_int_type(32, LL_CONTEXT)));
1432  return key_hash_func;
1433 }
static int64_t getBucketedCardinality(const ColRangeInfo &col_range_info)
#define LL_CONTEXT
void mark_function_always_inline(llvm::Function *func)
#define LL_INT(v)
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
#define CHECK_GT(x, y)
Definition: Logger.h:209
CHECK(cgen_state)
ColRangeInfo getExprRangeInfo(const Analyzer::Expr *expr) const
const RelAlgExecutionUnit & ra_exe_unit_

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::tuple< llvm::Value *, llvm::Value * > GroupByAndAggregate::codegenSingleColumnPerfectHash ( const QueryMemoryDescriptor query_mem_desc,
const CompilationOptions co,
llvm::Value *  groups_buffer,
llvm::Value *  group_expr_lv_translated,
llvm::Value *  group_expr_lv_original,
const int32_t  row_size_quad 
)
private

Definition at line 1254 of file GroupByAndAggregate.cpp.

References CHECK(), CompilationOptions::device_type, QueryMemoryDescriptor::didOutputColumnar(), emitCall(), executor_, QueryMemoryDescriptor::getBucket(), QueryMemoryDescriptor::getMinVal(), groups_buffer, QueryMemoryDescriptor::hasKeylessHash(), QueryMemoryDescriptor::interleavedBins(), LL_INT, QueryMemoryDescriptor::mustUseBaselineSort(), and QueryMemoryDescriptor::usesGetGroupValueFast().

Referenced by codegenGroupBy().

1260  {
1261  CHECK(query_mem_desc.usesGetGroupValueFast());
1262  std::string get_group_fn_name{query_mem_desc.didOutputColumnar()
1263  ? "get_columnar_group_bin_offset"
1264  : "get_group_value_fast"};
1265  if (!query_mem_desc.didOutputColumnar() && query_mem_desc.hasKeylessHash()) {
1266  get_group_fn_name += "_keyless";
1267  }
1268  if (query_mem_desc.interleavedBins(co.device_type)) {
1269  CHECK(!query_mem_desc.didOutputColumnar());
1270  CHECK(query_mem_desc.hasKeylessHash());
1271  get_group_fn_name += "_semiprivate";
1272  }
1273  std::vector<llvm::Value*> get_group_fn_args{&*groups_buffer,
1274  &*group_expr_lv_translated};
1275  if (group_expr_lv_original && get_group_fn_name == "get_group_value_fast" &&
1276  query_mem_desc.mustUseBaselineSort()) {
1277  get_group_fn_name += "_with_original_key";
1278  get_group_fn_args.push_back(group_expr_lv_original);
1279  }
1280  get_group_fn_args.push_back(LL_INT(query_mem_desc.getMinVal()));
1281  get_group_fn_args.push_back(LL_INT(query_mem_desc.getBucket()));
1282  if (!query_mem_desc.hasKeylessHash()) {
1283  if (!query_mem_desc.didOutputColumnar()) {
1284  get_group_fn_args.push_back(LL_INT(row_size_quad));
1285  }
1286  } else {
1287  if (!query_mem_desc.didOutputColumnar()) {
1288  get_group_fn_args.push_back(LL_INT(row_size_quad));
1289  }
1290  if (query_mem_desc.interleavedBins(co.device_type)) {
1291  auto warp_idx = emitCall("thread_warp_idx", {LL_INT(executor_->warpSize())});
1292  get_group_fn_args.push_back(warp_idx);
1293  get_group_fn_args.push_back(LL_INT(executor_->warpSize()));
1294  }
1295  }
1296  if (get_group_fn_name == "get_columnar_group_bin_offset") {
1297  return std::make_tuple(&*groups_buffer,
1298  emitCall(get_group_fn_name, get_group_fn_args));
1299  }
1300  return std::make_tuple(emitCall(get_group_fn_name, get_group_fn_args), nullptr);
1301 }
const int32_t groups_buffer_size return groups_buffer
#define LL_INT(v)
llvm::Value * emitCall(const std::string &fname, const std::vector< llvm::Value * > &args)
CHECK(cgen_state)
ExecutorDeviceType device_type
bool interleavedBins(const ExecutorDeviceType) const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

llvm::Value * GroupByAndAggregate::codegenWindowRowPointer ( const Analyzer::WindowFunction window_func,
const QueryMemoryDescriptor query_mem_desc,
const CompilationOptions co,
DiamondCodegen diamond_codegen 
)
private

Definition at line 1485 of file GroupByAndAggregate.cpp.

References run_benchmark_import::args, CHECK(), codegenOutputSlot(), COUNT, QueryMemoryDescriptor::didOutputColumnar(), emitCall(), executor_, get_int_type(), WindowProjectNodeContext::getActiveWindowFunctionContext(), QueryMemoryDescriptor::getEntryCount(), Analyzer::WindowFunction::getKind(), QueryMemoryDescriptor::getRowSize(), groups_buffer, LL_BUILDER, LL_CONTEXT, LL_INT, CodeGenerator::posArg(), ROW_FUNC, and window_function_is_aggregate().

Referenced by TargetExprCodegen::codegen().

1489  {
1490  const auto window_func_context =
1492  if (window_func_context && window_function_is_aggregate(window_func->getKind())) {
1493  const int32_t row_size_quad = query_mem_desc.didOutputColumnar()
1494  ? 0
1495  : query_mem_desc.getRowSize() / sizeof(int64_t);
1496  auto arg_it = ROW_FUNC->arg_begin();
1497  auto groups_buffer = arg_it++;
1498  CodeGenerator code_generator(executor_);
1499  if (!window_func_context->getRowNumber()) {
1500  CHECK(window_func->getKind() == SqlWindowFunctionKind::COUNT);
1501  window_func_context->setRowNumber(emitCall(
1502  "row_number_window_func",
1503  {LL_INT(reinterpret_cast<const int64_t>(window_func_context->output())),
1504  code_generator.posArg(nullptr)}));
1505  }
1506  const auto pos_in_window = LL_BUILDER.CreateTrunc(window_func_context->getRowNumber(),
1507  get_int_type(32, LL_CONTEXT));
1508  llvm::Value* entry_count_lv =
1509  LL_INT(static_cast<int32_t>(query_mem_desc.getEntryCount()));
1510  std::vector<llvm::Value*> args{
1511  &*groups_buffer, entry_count_lv, pos_in_window, code_generator.posArg(nullptr)};
1512  if (query_mem_desc.didOutputColumnar()) {
1513  const auto columnar_output_offset =
1514  emitCall("get_columnar_scan_output_offset", args);
1515  return LL_BUILDER.CreateSExt(columnar_output_offset, get_int_type(64, LL_CONTEXT));
1516  }
1517  args.push_back(LL_INT(row_size_quad));
1518  return emitCall("get_scan_output_slot", args);
1519  }
1520  auto arg_it = ROW_FUNC->arg_begin();
1521  auto groups_buffer = arg_it++;
1522  return codegenOutputSlot(&*groups_buffer, query_mem_desc, co, diamond_codegen);
1523 }
SqlWindowFunctionKind getKind() const
Definition: Analyzer.h:1396
#define ROW_FUNC
const int32_t groups_buffer_size return groups_buffer
#define LL_BUILDER
#define LL_CONTEXT
#define LL_INT(v)
llvm::Value * emitCall(const std::string &fname, const std::vector< llvm::Value * > &args)
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
CHECK(cgen_state)
bool window_function_is_aggregate(const SqlWindowFunctionKind kind)
Definition: WindowContext.h:42
llvm::Value * codegenOutputSlot(llvm::Value *groups_buffer, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, DiamondCodegen &diamond_codegen)
static WindowFunctionContext * getActiveWindowFunctionContext()

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

llvm::Value * GroupByAndAggregate::convertNullIfAny ( const SQLTypeInfo arg_type,
const TargetInfo agg_info,
llvm::Value *  target 
)
private

Definition at line 1435 of file GroupByAndAggregate.cpp.

References TargetInfo::agg_kind, CHECK(), executor_, SQLTypeInfo::get_size(), SQLTypeInfo::is_fp(), kAPPROX_COUNT_DISTINCT, kCOUNT, LL_BUILDER, and TargetInfo::sql_type.

Referenced by TargetExprCodegen::codegen().

1437  {
1438  const auto& agg_type = agg_info.sql_type;
1439  const size_t chosen_bytes = agg_type.get_size();
1440 
1441  bool need_conversion{false};
1442  llvm::Value* arg_null{nullptr};
1443  llvm::Value* agg_null{nullptr};
1444  llvm::Value* target_to_cast{target};
1445  if (arg_type.is_fp()) {
1446  arg_null = executor_->cgen_state_->inlineFpNull(arg_type);
1447  if (agg_type.is_fp()) {
1448  agg_null = executor_->cgen_state_->inlineFpNull(agg_type);
1449  if (!static_cast<llvm::ConstantFP*>(arg_null)->isExactlyValue(
1450  static_cast<llvm::ConstantFP*>(agg_null)->getValueAPF())) {
1451  need_conversion = true;
1452  }
1453  } else {
1454  CHECK(agg_info.agg_kind == kCOUNT || agg_info.agg_kind == kAPPROX_COUNT_DISTINCT);
1455  return target;
1456  }
1457  } else {
1458  arg_null = executor_->cgen_state_->inlineIntNull(arg_type);
1459  if (agg_type.is_fp()) {
1460  agg_null = executor_->cgen_state_->inlineFpNull(agg_type);
1461  need_conversion = true;
1462  target_to_cast = executor_->castToFP(target);
1463  } else {
1464  agg_null = executor_->cgen_state_->inlineIntNull(agg_type);
1465  if ((static_cast<llvm::ConstantInt*>(arg_null)->getBitWidth() !=
1466  static_cast<llvm::ConstantInt*>(agg_null)->getBitWidth()) ||
1467  (static_cast<llvm::ConstantInt*>(arg_null)->getValue() !=
1468  static_cast<llvm::ConstantInt*>(agg_null)->getValue())) {
1469  need_conversion = true;
1470  }
1471  }
1472  }
1473  if (need_conversion) {
1474  auto cmp = arg_type.is_fp() ? LL_BUILDER.CreateFCmpOEQ(target, arg_null)
1475  : LL_BUILDER.CreateICmpEQ(target, arg_null);
1476  return LL_BUILDER.CreateSelect(
1477  cmp,
1478  agg_null,
1479  executor_->cgen_state_->castToTypeIn(target_to_cast, chosen_bytes << 3));
1480  } else {
1481  return target;
1482  }
1483 }
HOST DEVICE int get_size() const
Definition: sqltypes.h:258
#define LL_BUILDER
SQLTypeInfo sql_type
Definition: TargetInfo.h:42
bool is_fp() const
Definition: sqltypes.h:403
CHECK(cgen_state)
SQLAgg agg_kind
Definition: TargetInfo.h:41
Definition: sqldefs.h:76

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

llvm::Value * GroupByAndAggregate::emitCall ( const std::string &  fname,
const std::vector< llvm::Value * > &  args 
)
private

Definition at line 1940 of file GroupByAndAggregate.cpp.

References executor_.

Referenced by TargetExprCodegen::codegen(), codegenCountDistinct(), codegenEstimator(), codegenMultiColumnBaselineHash(), codegenMultiColumnPerfectHash(), codegenOutputSlot(), codegenSingleColumnPerfectHash(), and codegenWindowRowPointer().

1941  {
1942  return executor_->cgen_state_->emitCall(fname, args);
1943 }

+ Here is the caller graph for this function:

llvm::Value * GroupByAndAggregate::getAdditionalLiteral ( const int32_t  off)
private

Definition at line 1761 of file GroupByAndAggregate.cpp.

References CHECK_LT, get_arg_by_name(), get_int_type(), LL_BUILDER, LL_CONTEXT, LL_INT, and ROW_FUNC.

Referenced by codegenCountDistinct().

1761  {
1762  CHECK_LT(off, 0);
1763  const auto lit_buff_lv = get_arg_by_name(ROW_FUNC, "literals");
1764  return LL_BUILDER.CreateLoad(LL_BUILDER.CreateGEP(
1765  LL_BUILDER.CreateBitCast(lit_buff_lv,
1766  llvm::PointerType::get(get_int_type(64, LL_CONTEXT), 0)),
1767  LL_INT(off)));
1768 }
#define ROW_FUNC
#define LL_BUILDER
#define LL_CONTEXT
#define LL_INT(v)
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
llvm::Value * get_arg_by_name(llvm::Function *func, const std::string &name)
Definition: Execute.h:117
#define CHECK_LT(x, y)
Definition: Logger.h:207

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

int64_t GroupByAndAggregate::getBucketedCardinality ( const ColRangeInfo col_range_info)
staticprivate

Definition at line 217 of file GroupByAndAggregate.cpp.

References ColRangeInfo::bucket, ColRangeInfo::has_nulls, ColRangeInfo::max, and ColRangeInfo::min.

Referenced by codegenPerfectHashFunction(), and getColRangeInfo().

217  {
218  checked_int64_t crt_col_cardinality =
219  checked_int64_t(col_range_info.max) - checked_int64_t(col_range_info.min);
220  if (col_range_info.bucket) {
221  crt_col_cardinality /= col_range_info.bucket;
222  }
223  return static_cast<int64_t>(crt_col_cardinality +
224  (1 + (col_range_info.has_nulls ? 1 : 0)));
225 }
boost::multiprecision::number< boost::multiprecision::cpp_int_backend< 64, 64, boost::multiprecision::signed_magnitude, boost::multiprecision::checked, void >> checked_int64_t

+ Here is the caller graph for this function:

ColRangeInfo GroupByAndAggregate::getColRangeInfo ( )
private

Definition at line 118 of file GroupByAndAggregate.cpp.

References Executor::baseline_threshold, CHECK_GE, device_type_, executor_, anonymous_namespace{GroupByAndAggregate.cpp}::expr_is_rowid(), getBucketedCardinality(), getExprRangeInfo(), GPU, RelAlgExecutionUnit::groupby_exprs, GroupByBaselineHash, GroupByPerfectHash, anonymous_namespace{GroupByAndAggregate.cpp}::has_count_distinct(), anonymous_namespace{GroupByAndAggregate.cpp}::is_column_range_too_big_for_perfect_hash(), ra_exe_unit_, RelAlgExecutionUnit::simple_quals, and RelAlgExecutionUnit::target_exprs.

118  {
119  // Use baseline layout more eagerly on the GPU if the query uses count distinct,
120  // because our HyperLogLog implementation is 4x less memory efficient on GPU.
121  // Technically, this only applies to APPROX_COUNT_DISTINCT, but in practice we
122  // can expect this to be true anyway for grouped queries since the precise version
123  // uses significantly more memory.
124  const int64_t baseline_threshold =
129  if (ra_exe_unit_.groupby_exprs.size() != 1) {
130  try {
131  checked_int64_t cardinality{1};
132  bool has_nulls{false};
133  for (const auto groupby_expr : ra_exe_unit_.groupby_exprs) {
134  auto col_range_info = getExprRangeInfo(groupby_expr.get());
135  if (col_range_info.hash_type_ != QueryDescriptionType::GroupByPerfectHash) {
136  // going through baseline hash if a non-integer type is encountered
137  return {QueryDescriptionType::GroupByBaselineHash, 0, 0, 0, false};
138  }
139  auto crt_col_cardinality = getBucketedCardinality(col_range_info);
140  CHECK_GE(crt_col_cardinality, 0);
141  cardinality *= crt_col_cardinality;
142  if (col_range_info.has_nulls) {
143  has_nulls = true;
144  }
145  }
146  // For zero or high cardinalities, use baseline layout.
147  if (!cardinality || cardinality > baseline_threshold) {
148  return {QueryDescriptionType::GroupByBaselineHash, 0, 0, 0, false};
149  }
151  0,
152  int64_t(cardinality),
153  0,
154  has_nulls};
155  } catch (...) { // overflow when computing cardinality
156  return {QueryDescriptionType::GroupByBaselineHash, 0, 0, 0, false};
157  }
158  }
159  // For single column groupby on high timestamps, force baseline hash due to wide ranges
160  // we are likely to encounter when applying quals to the expression range
161  // TODO: consider allowing TIMESTAMP(9) (nanoseconds) with quals to use perfect hash if
162  // the range is small enough
163  if (ra_exe_unit_.groupby_exprs.front() &&
164  ra_exe_unit_.groupby_exprs.front()->get_type_info().is_high_precision_timestamp() &&
165  ra_exe_unit_.simple_quals.size() > 0) {
166  return {QueryDescriptionType::GroupByBaselineHash, 0, 0, 0, false};
167  }
168  const auto col_range_info = getExprRangeInfo(ra_exe_unit_.groupby_exprs.front().get());
169  if (!ra_exe_unit_.groupby_exprs.front()) {
170  return col_range_info;
171  }
172  static const int64_t MAX_BUFFER_SIZE = 1 << 30;
173  const int64_t col_count =
175  int64_t max_entry_count = MAX_BUFFER_SIZE / (col_count * sizeof(int64_t));
177  max_entry_count = std::min(max_entry_count, baseline_threshold);
178  }
179  if ((!ra_exe_unit_.groupby_exprs.front()->get_type_info().is_string() &&
180  !expr_is_rowid(ra_exe_unit_.groupby_exprs.front().get(), *executor_->catalog_)) &&
181  is_column_range_too_big_for_perfect_hash(col_range_info, max_entry_count) &&
182  !col_range_info.bucket) {
184  col_range_info.min,
185  col_range_info.max,
186  0,
187  col_range_info.has_nulls};
188  }
189  return col_range_info;
190 }
std::vector< Analyzer::Expr * > target_exprs
static int64_t getBucketedCardinality(const ColRangeInfo &col_range_info)
boost::multiprecision::number< boost::multiprecision::cpp_int_backend< 64, 64, boost::multiprecision::signed_magnitude, boost::multiprecision::checked, void >> checked_int64_t
bool is_column_range_too_big_for_perfect_hash(const ColRangeInfo &col_range_info, const int64_t max_entry_count)
bool expr_is_rowid(const Analyzer::Expr *expr, const Catalog_Namespace::Catalog &cat)
static const size_t baseline_threshold
Definition: Execute.h:951
#define CHECK_GE(x, y)
Definition: Logger.h:210
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
bool has_count_distinct(const RelAlgExecutionUnit &ra_exe_unit)
const ExecutorDeviceType device_type_
ColRangeInfo getExprRangeInfo(const Analyzer::Expr *expr) const
const RelAlgExecutionUnit & ra_exe_unit_
std::list< std::shared_ptr< Analyzer::Expr > > simple_quals

+ Here is the call graph for this function:

ColRangeInfo GroupByAndAggregate::getExprRangeInfo ( const Analyzer::Expr expr) const
private

Definition at line 192 of file GroupByAndAggregate.cpp.

References CHECK(), Double, executor_, Float, getExpressionRange(), GroupByBaselineHash, GroupByPerfectHash, Integer, Invalid, NonGroupedAggregate, Projection, query_infos_, ra_exe_unit_, and RelAlgExecutionUnit::simple_quals.

Referenced by codegenGroupBy(), codegenPerfectHashFunction(), getColRangeInfo(), gpuCanHandleOrderEntries(), and initCountDistinctDescriptors().

192  {
193  if (!expr) {
194  return {QueryDescriptionType::Projection, 0, 0, 0, false};
195  }
196 
197  const auto expr_range = getExpressionRange(
198  expr, query_infos_, executor_, boost::make_optional(ra_exe_unit_.simple_quals));
199  switch (expr_range.getType()) {
202  expr_range.getIntMin(),
203  expr_range.getIntMax(),
204  expr_range.getBucket(),
205  expr_range.hasNulls()};
210  default:
211  CHECK(false);
212  }
213  CHECK(false);
214  return {QueryDescriptionType::NonGroupedAggregate, 0, 0, 0, false};
215 }
CHECK(cgen_state)
ExpressionRange getExpressionRange(const Analyzer::BinOper *expr, const std::vector< InputTableInfo > &query_infos, const Executor *, boost::optional< std::list< std::shared_ptr< Analyzer::Expr >>> simple_quals)
const std::vector< InputTableInfo > & query_infos_
const RelAlgExecutionUnit & ra_exe_unit_
std::list< std::shared_ptr< Analyzer::Expr > > simple_quals

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

KeylessInfo GroupByAndAggregate::getKeylessInfo ( const std::vector< Analyzer::Expr * > &  target_expr_list,
const bool  is_group_by 
) const
private

This function goes through all target expressions and answers two questions:

  1. Is it possible to have keyless hash?
  2. If yes to 1, then what aggregate expression should be considered to represent the key's presence, if needed (e.g., in detecting empty entries in the result set).

NOTE: Keyless hash is only valid with single-column group by at the moment.

TODO(Saman): remove the shared memory discussion out of this function.

Currently just support shared memory usage when dealing with one keyless aggregate operation. Currently just support shared memory usage for up to two target expressions.

Definition at line 622 of file GroupByAndAggregate.cpp.

References agg_arg(), CHECK(), constrained_not_null(), Double, executor_, Float, g_bigint_count, get_agg_initial_val(), get_compact_type(), get_target_info(), getExpressionRange(), Integer, Invalid, is_distinct_target(), kAVG, kCOUNT, keyless, kMAX, kMIN, kSUM, RelAlgExecutionUnit::quals, query_infos_, ra_exe_unit_, supportedTypeForGpuSharedMemUsage(), and takes_float_argument().

624  {
625  bool keyless{true}, found{false}, shared_mem_support{false},
626  shared_mem_valid_data_type{true};
627  /* Currently support shared memory usage for a limited subset of possible aggregate
628  * operations. shared_mem_support and
629  * shared_mem_valid_data_type are declared to ensure such support. */
630  int32_t num_agg_expr{0}; // used for shared memory support on the GPU
631  int32_t index{0};
632  for (const auto target_expr : target_expr_list) {
633  const auto agg_info = get_target_info(target_expr, g_bigint_count);
634  const auto chosen_type = get_compact_type(agg_info);
635  // TODO(Saman): should be eventually removed, once I make sure what data types can
636  // be used in this shared memory setting.
637 
638  shared_mem_valid_data_type =
639  shared_mem_valid_data_type && supportedTypeForGpuSharedMemUsage(chosen_type);
640 
641  if (agg_info.is_agg) {
642  num_agg_expr++;
643  }
644  if (!found && agg_info.is_agg && !is_distinct_target(agg_info)) {
645  auto agg_expr = dynamic_cast<const Analyzer::AggExpr*>(target_expr);
646  CHECK(agg_expr);
647  const auto arg_expr = agg_arg(target_expr);
648  const bool float_argument_input = takes_float_argument(agg_info);
649  switch (agg_info.agg_kind) {
650  case kAVG:
651  ++index;
652  if (arg_expr && !arg_expr->get_type_info().get_notnull()) {
653  auto expr_range_info = getExpressionRange(arg_expr, query_infos_, executor_);
654  if (expr_range_info.getType() == ExpressionRangeType::Invalid ||
655  expr_range_info.hasNulls()) {
656  break;
657  }
658  }
659  found = true;
660  break;
661  case kCOUNT:
662  if (arg_expr && !arg_expr->get_type_info().get_notnull()) {
663  auto expr_range_info = getExpressionRange(arg_expr, query_infos_, executor_);
664  if (expr_range_info.getType() == ExpressionRangeType::Invalid ||
665  expr_range_info.hasNulls()) {
666  break;
667  }
668  }
669  found = true;
670  if (!agg_info.skip_null_val) {
671  shared_mem_support = true; // currently just support 8 bytes per group
672  }
673  break;
674  case kSUM: {
675  auto arg_ti = arg_expr->get_type_info();
676  if (constrained_not_null(arg_expr, ra_exe_unit_.quals)) {
677  arg_ti.set_notnull(true);
678  }
679  if (!arg_ti.get_notnull()) {
680  auto expr_range_info = getExpressionRange(arg_expr, query_infos_, executor_);
681  if (expr_range_info.getType() != ExpressionRangeType::Invalid &&
682  !expr_range_info.hasNulls()) {
683  found = true;
684  }
685  } else {
686  auto expr_range_info = getExpressionRange(arg_expr, query_infos_, executor_);
687  switch (expr_range_info.getType()) {
690  if (expr_range_info.getFpMax() < 0 || expr_range_info.getFpMin() > 0) {
691  found = true;
692  }
693  break;
695  if (expr_range_info.getIntMax() < 0 || expr_range_info.getIntMin() > 0) {
696  found = true;
697  }
698  break;
699  default:
700  break;
701  }
702  }
703  break;
704  }
705  case kMIN: {
706  CHECK(agg_expr && agg_expr->get_arg());
707  const auto& arg_ti = agg_expr->get_arg()->get_type_info();
708  if (arg_ti.is_string() || arg_ti.is_array()) {
709  break;
710  }
711  auto expr_range_info =
712  getExpressionRange(agg_expr->get_arg(), query_infos_, executor_);
713  auto init_max = get_agg_initial_val(agg_info.agg_kind,
714  chosen_type,
715  is_group_by || float_argument_input,
716  float_argument_input ? sizeof(float) : 8);
717  switch (expr_range_info.getType()) {
720  auto double_max =
721  *reinterpret_cast<const double*>(may_alias_ptr(&init_max));
722  if (expr_range_info.getFpMax() < double_max) {
723  found = true;
724  }
725  break;
726  }
728  if (expr_range_info.getIntMax() < init_max) {
729  found = true;
730  }
731  break;
732  default:
733  break;
734  }
735  break;
736  }
737  case kMAX: {
738  CHECK(agg_expr && agg_expr->get_arg());
739  const auto& arg_ti = agg_expr->get_arg()->get_type_info();
740  if (arg_ti.is_string() || arg_ti.is_array()) {
741  break;
742  }
743  auto expr_range_info =
744  getExpressionRange(agg_expr->get_arg(), query_infos_, executor_);
745  // NULL sentinel and init value for kMAX are identical, which results in
746  // ambiguity in detecting empty keys in presence of nulls.
747  if (expr_range_info.getType() == ExpressionRangeType::Invalid ||
748  expr_range_info.hasNulls()) {
749  break;
750  }
751  auto init_min = get_agg_initial_val(agg_info.agg_kind,
752  chosen_type,
753  is_group_by || float_argument_input,
754  float_argument_input ? sizeof(float) : 8);
755  switch (expr_range_info.getType()) {
758  auto double_min =
759  *reinterpret_cast<const double*>(may_alias_ptr(&init_min));
760  if (expr_range_info.getFpMin() > double_min) {
761  found = true;
762  }
763  break;
764  }
766  if (expr_range_info.getIntMin() > init_min) {
767  found = true;
768  }
769  break;
770  default:
771  break;
772  }
773  break;
774  }
775  default:
776  keyless = false;
777  break;
778  }
779  }
780  if (!keyless) {
781  break;
782  }
783  if (!found) {
784  ++index;
785  }
786  }
787 
788  // shouldn't use keyless for projection only
794  return {keyless && found,
795  index,
796  ((num_agg_expr == 1) && (target_expr_list.size() <= 2))
797  ? shared_mem_support && shared_mem_valid_data_type
798  : false};
799 }
const Analyzer::Expr * agg_arg(const Analyzer::Expr *expr)
bool constrained_not_null(const Analyzer::Expr *expr, const std::list< std::shared_ptr< Analyzer::Expr >> &quals)
TargetInfo get_target_info(const PointerType target_expr, const bool bigint_count)
Definition: TargetInfo.h:66
int64_t get_agg_initial_val(const SQLAgg agg, const SQLTypeInfo &ti, const bool enable_compaction, const unsigned min_byte_width_to_compact)
bool takes_float_argument(const TargetInfo &target_info)
Definition: TargetInfo.h:121
bool supportedTypeForGpuSharedMemUsage(const SQLTypeInfo &target_type_info) const
Definition: sqldefs.h:73
const SQLTypeInfo get_compact_type(const TargetInfo &target)
CHECK(cgen_state)
bool g_bigint_count
Definition: sqldefs.h:75
bool is_distinct_target(const TargetInfo &target_info)
Definition: TargetInfo.h:117
ExpressionRange getExpressionRange(const Analyzer::BinOper *expr, const std::vector< InputTableInfo > &query_infos, const Executor *, boost::optional< std::list< std::shared_ptr< Analyzer::Expr >>> simple_quals)
const std::vector< InputTableInfo > & query_infos_
Definition: sqldefs.h:76
std::list< std::shared_ptr< Analyzer::Expr > > quals
const RelAlgExecutionUnit & ra_exe_unit_
Definition: sqldefs.h:74
Definition: sqldefs.h:72
const int64_t const uint32_t const uint32_t const uint32_t const bool keyless

+ Here is the call graph for this function:

int64_t GroupByAndAggregate::getShardedTopBucket ( const ColRangeInfo col_range_info,
const size_t  shard_count 
) const
private

Definition at line 263 of file GroupByAndAggregate.cpp.

References ColRangeInfo::bucket, CHECK(), CHECK_GT, device_type_, executor_, g_leaf_count, and GPU.

264  {
265  size_t device_count{0};
267  device_count = executor_->getCatalog()->getDataMgr().getCudaMgr()->getDeviceCount();
268  CHECK_GT(device_count, 0u);
269  }
270 
271  int64_t bucket{col_range_info.bucket};
272 
273  if (shard_count) {
274  CHECK(!col_range_info.bucket);
275  /*
276  when a node has fewer devices than shard count,
277  a) In a distributed setup, the minimum distance between two keys would be
278  device_count because shards are stored consecutively across the physical tables, i.e
279  if a shard column has values 0 to 9, and 3 shards on each leaf, then node 1 would
280  have values: 0,1,2,6,7,8 and node 2 would have values: 3,4,5,9. If each leaf node
281  has only 1 device, in this case, all the keys from each node are loaded on the
282  device each.
283 
284  b) In a single node setup, the distance would be minimum of device_count or
285  difference of device_count - shard_count. For example: If a single node server
286  running on 3 devices a shard column has values 0 to 9 in a table with 4 shards,
287  device to fragment keys mapping would be: device 1 - 4,8,3,7 device 2 - 1,5,9 device
288  3 - 2, 6 The bucket value would be 4(shards) - 3(devices) = 1 i.e. minimum of
289  device_count or difference.
290 
291  When a node has device count equal to or more than shard count then the
292  minimum distance is always at least shard_count * no of leaf nodes.
293  */
294  if (device_count < shard_count) {
295  bucket = g_leaf_count ? std::max(device_count, static_cast<size_t>(1))
296  : std::min(device_count, shard_count - device_count);
297  } else {
298  bucket = shard_count * std::max(g_leaf_count, static_cast<size_t>(1));
299  }
300  }
301 
302  return bucket;
303 }
#define CHECK_GT(x, y)
Definition: Logger.h:209
CHECK(cgen_state)
size_t g_leaf_count
Definition: ParserNode.cpp:68
const ExecutorDeviceType device_type_

+ Here is the call graph for this function:

bool GroupByAndAggregate::gpuCanHandleOrderEntries ( const std::list< Analyzer::OrderEntry > &  order_entries)
private

Definition at line 840 of file GroupByAndAggregate.cpp.

References CHECK(), CHECK_GE, CHECK_LE, Analyzer::AggExpr::get_arg(), Analyzer::Expr::get_type_info(), getExprRangeInfo(), GroupByPerfectHash, kAPPROX_COUNT_DISTINCT, kAVG, kMAX, kMIN, ra_exe_unit_, and RelAlgExecutionUnit::target_exprs.

Referenced by initQueryMemoryDescriptor().

841  {
842  if (order_entries.size() > 1) { // TODO(alex): lift this restriction
843  return false;
844  }
845  for (const auto order_entry : order_entries) {
846  CHECK_GE(order_entry.tle_no, 1);
847  CHECK_LE(static_cast<size_t>(order_entry.tle_no), ra_exe_unit_.target_exprs.size());
848  const auto target_expr = ra_exe_unit_.target_exprs[order_entry.tle_no - 1];
849  if (!dynamic_cast<Analyzer::AggExpr*>(target_expr)) {
850  return false;
851  }
852  // TODO(alex): relax the restrictions
853  auto agg_expr = static_cast<Analyzer::AggExpr*>(target_expr);
854  if (agg_expr->get_is_distinct() || agg_expr->get_aggtype() == kAVG ||
855  agg_expr->get_aggtype() == kMIN || agg_expr->get_aggtype() == kMAX ||
856  agg_expr->get_aggtype() == kAPPROX_COUNT_DISTINCT) {
857  return false;
858  }
859  if (agg_expr->get_arg()) {
860  const auto& arg_ti = agg_expr->get_arg()->get_type_info();
861  if (arg_ti.is_fp()) {
862  return false;
863  }
864  auto expr_range_info = getExprRangeInfo(agg_expr->get_arg());
865  // TOD(adb): QMD not actually initialized here?
866  if ((!(expr_range_info.hash_type_ == QueryDescriptionType::GroupByPerfectHash &&
867  /* query_mem_desc.getGroupbyColCount() == 1 */ false) ||
868  expr_range_info.has_nulls) &&
869  order_entry.is_desc == order_entry.nulls_first) {
870  return false;
871  }
872  }
873  const auto& target_ti = target_expr->get_type_info();
874  CHECK(!target_ti.is_array());
875  if (!target_ti.is_integer()) {
876  return false;
877  }
878  }
879  return true;
880 }
std::vector< Analyzer::Expr * > target_exprs
#define CHECK_GE(x, y)
Definition: Logger.h:210
Expr * get_arg() const
Definition: Analyzer.h:1045
Definition: sqldefs.h:73
CHECK(cgen_state)
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:78
#define CHECK_LE(x, y)
Definition: Logger.h:208
ColRangeInfo getExprRangeInfo(const Analyzer::Expr *expr) const
const RelAlgExecutionUnit & ra_exe_unit_
Definition: sqldefs.h:74
Definition: sqldefs.h:72

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

CountDistinctDescriptors GroupByAndAggregate::initCountDistinctDescriptors ( )
private

Definition at line 526 of file GroupByAndAggregate.cpp.

References Bitmap, CHECK(), CHECK_GE, device_type_, g_bigint_count, g_enable_watchdog, g_hll_precision_bits, Analyzer::AggExpr::get_arg(), get_count_distinct_sub_bitmap_count(), get_target_info(), Analyzer::Expr::get_type_info(), getExprRangeInfo(), GroupByPerfectHash, hll_size_for_rate(), Invalid, is_distinct_target(), kAPPROX_COUNT_DISTINCT, kCOUNT, kENCODING_DICT, kINT, Projection, ra_exe_unit_, StdSet, and RelAlgExecutionUnit::target_exprs.

Referenced by initQueryMemoryDescriptorImpl().

526  {
527  CountDistinctDescriptors count_distinct_descriptors;
528  for (const auto target_expr : ra_exe_unit_.target_exprs) {
529  auto agg_info = get_target_info(target_expr, g_bigint_count);
530  if (is_distinct_target(agg_info)) {
531  CHECK(agg_info.is_agg);
532  CHECK(agg_info.agg_kind == kCOUNT || agg_info.agg_kind == kAPPROX_COUNT_DISTINCT);
533  const auto agg_expr = static_cast<const Analyzer::AggExpr*>(target_expr);
534  const auto& arg_ti = agg_expr->get_arg()->get_type_info();
535  if (arg_ti.is_string() && arg_ti.get_compression() != kENCODING_DICT) {
536  throw std::runtime_error(
537  "Strings must be dictionary-encoded for COUNT(DISTINCT).");
538  }
539  if (agg_info.agg_kind == kAPPROX_COUNT_DISTINCT && arg_ti.is_array()) {
540  throw std::runtime_error("APPROX_COUNT_DISTINCT on arrays not supported yet");
541  }
542  if (agg_info.agg_kind == kAPPROX_COUNT_DISTINCT && arg_ti.is_geometry()) {
543  throw std::runtime_error(
544  "APPROX_COUNT_DISTINCT on geometry columns not supported");
545  }
546  if (agg_info.is_distinct && arg_ti.is_geometry()) {
547  throw std::runtime_error("COUNT DISTINCT on geometry columns not supported");
548  }
549  ColRangeInfo no_range_info{QueryDescriptionType::Projection, 0, 0, 0, false};
550  auto arg_range_info =
551  arg_ti.is_fp() ? no_range_info : getExprRangeInfo(agg_expr->get_arg());
552  CountDistinctImplType count_distinct_impl_type{CountDistinctImplType::StdSet};
553  int64_t bitmap_sz_bits{0};
554  if (agg_info.agg_kind == kAPPROX_COUNT_DISTINCT) {
555  const auto error_rate = agg_expr->get_error_rate();
556  if (error_rate) {
557  CHECK(error_rate->get_type_info().get_type() == kINT);
558  CHECK_GE(error_rate->get_constval().intval, 1);
559  bitmap_sz_bits = hll_size_for_rate(error_rate->get_constval().smallintval);
560  } else {
561  bitmap_sz_bits = g_hll_precision_bits;
562  }
563  }
564  if (arg_range_info.hash_type_ == QueryDescriptionType::GroupByPerfectHash &&
565  !(arg_ti.is_array() || arg_ti.is_geometry())) { // TODO(alex): allow bitmap
566  // implementation for arrays
567  if (arg_range_info.isEmpty()) {
568  count_distinct_descriptors.emplace_back(
570  0,
571  64,
572  agg_info.agg_kind == kAPPROX_COUNT_DISTINCT,
573  device_type_,
574  1});
575  continue;
576  }
577  count_distinct_impl_type = CountDistinctImplType::Bitmap;
578  if (agg_info.agg_kind == kCOUNT) {
579  bitmap_sz_bits = arg_range_info.max - arg_range_info.min + 1;
580  const int64_t MAX_BITMAP_BITS{8 * 1000 * 1000 * 1000L};
581  if (bitmap_sz_bits <= 0 || bitmap_sz_bits > MAX_BITMAP_BITS) {
582  count_distinct_impl_type = CountDistinctImplType::StdSet;
583  }
584  }
585  }
586  if (agg_info.agg_kind == kAPPROX_COUNT_DISTINCT &&
587  count_distinct_impl_type == CountDistinctImplType::StdSet &&
588  !(arg_ti.is_array() || arg_ti.is_geometry())) {
589  count_distinct_impl_type = CountDistinctImplType::Bitmap;
590  }
591  if (g_enable_watchdog &&
592  count_distinct_impl_type == CountDistinctImplType::StdSet) {
593  throw WatchdogException("Cannot use a fast path for COUNT distinct");
594  }
595  const auto sub_bitmap_count =
597  count_distinct_descriptors.emplace_back(
598  CountDistinctDescriptor{count_distinct_impl_type,
599  arg_range_info.min,
600  bitmap_sz_bits,
601  agg_info.agg_kind == kAPPROX_COUNT_DISTINCT,
602  device_type_,
603  sub_bitmap_count});
604  } else {
605  count_distinct_descriptors.emplace_back(CountDistinctDescriptor{
606  CountDistinctImplType::Invalid, 0, 0, false, device_type_, 0});
607  }
608  }
609  return count_distinct_descriptors;
610 }
std::vector< Analyzer::Expr * > target_exprs
int hll_size_for_rate(const int err_percent)
Definition: HyperLogLog.h:115
TargetInfo get_target_info(const PointerType target_expr, const bool bigint_count)
Definition: TargetInfo.h:66
#define CHECK_GE(x, y)
Definition: Logger.h:210
Expr * get_arg() const
Definition: Analyzer.h:1045
int g_hll_precision_bits
bool g_enable_watchdog
size_t get_count_distinct_sub_bitmap_count(const size_t bitmap_sz_bits, const RelAlgExecutionUnit &ra_exe_unit, const ExecutorDeviceType device_type)
CHECK(cgen_state)
std::vector< CountDistinctDescriptor > CountDistinctDescriptors
Definition: CountDistinct.h:35
bool g_bigint_count
bool is_distinct_target(const TargetInfo &target_info)
Definition: TargetInfo.h:117
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:78
const ExecutorDeviceType device_type_
Definition: sqldefs.h:76
CountDistinctImplType
ColRangeInfo getExprRangeInfo(const Analyzer::Expr *expr) const
Definition: sqltypes.h:46
const RelAlgExecutionUnit & ra_exe_unit_

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::unique_ptr< QueryMemoryDescriptor > GroupByAndAggregate::initQueryMemoryDescriptor ( const bool  allow_multifrag,
const size_t  max_groups_buffer_entry_count,
const int8_t  crt_min_byte_width,
RenderInfo render_info,
const bool  output_columnar_hint 
)
private

Definition at line 305 of file GroupByAndAggregate.cpp.

References align_to_int64(), CHECK(), device_type_, executor_, GPU, gpuCanHandleOrderEntries(), initQueryMemoryDescriptorImpl(), SortInfo::order_entries, query_mem_desc, ra_exe_unit_, shard_count_for_top_groups(), and RelAlgExecutionUnit::sort_info.

310  {
311  const auto shard_count =
314  : 0;
315  bool sort_on_gpu_hint =
316  device_type_ == ExecutorDeviceType::GPU && allow_multifrag &&
319  // must_use_baseline_sort is true iff we'd sort on GPU with the old algorithm
320  // but the total output buffer size would be too big or it's a sharded top query.
321  // For the sake of managing risk, use the new result set way very selectively for
322  // this case only (alongside the baseline layout we've enabled for a while now).
323  bool must_use_baseline_sort = shard_count;
324  std::unique_ptr<QueryMemoryDescriptor> query_mem_desc;
325  while (true) {
326  query_mem_desc = initQueryMemoryDescriptorImpl(allow_multifrag,
327  max_groups_buffer_entry_count,
328  crt_min_byte_width,
329  sort_on_gpu_hint,
330  render_info,
331  must_use_baseline_sort,
332  output_columnar_hint);
333  CHECK(query_mem_desc);
334  if (query_mem_desc->sortOnGpu() &&
335  (query_mem_desc->getBufferSizeBytes(device_type_) +
336  align_to_int64(query_mem_desc->getEntryCount() * sizeof(int32_t))) >
337  2 * 1024 * 1024 * 1024L) {
338  must_use_baseline_sort = true;
339  sort_on_gpu_hint = false;
340  } else {
341  break;
342  }
343  }
344  return query_mem_desc;
345 }
bool gpuCanHandleOrderEntries(const std::list< Analyzer::OrderEntry > &order_entries)
std::unique_ptr< QueryMemoryDescriptor > initQueryMemoryDescriptorImpl(const bool allow_multifrag, const size_t max_groups_buffer_entry_count, const int8_t crt_min_byte_width, const bool sort_on_gpu_hint, RenderInfo *render_info, const bool must_use_baseline_sort, const bool output_columnar_hint)
const std::list< Analyzer::OrderEntry > order_entries
CHECK(cgen_state)
const SortInfo sort_info
const ExecutorDeviceType device_type_
const RelAlgExecutionUnit & ra_exe_unit_
FORCE_INLINE HOST DEVICE T align_to_int64(T addr)
static size_t shard_count_for_top_groups(const RelAlgExecutionUnit &ra_exe_unit, const Catalog_Namespace::Catalog &catalog)

+ Here is the call graph for this function:

std::unique_ptr< QueryMemoryDescriptor > GroupByAndAggregate::initQueryMemoryDescriptorImpl ( const bool  allow_multifrag,
const size_t  max_groups_buffer_entry_count,
const int8_t  crt_min_byte_width,
const bool  sort_on_gpu_hint,
RenderInfo render_info,
const bool  must_use_baseline_sort,
const bool  output_columnar_hint 
)
private

Definition at line 347 of file GroupByAndAggregate.cpp.

References addTransientStringLiterals(), get_col_byte_widths(), RelAlgExecutionUnit::groupby_exprs, initCountDistinctDescriptors(), and ra_exe_unit_.

Referenced by initQueryMemoryDescriptor().

354  {
356 
357  const auto count_distinct_descriptors = initCountDistinctDescriptors();
358 
359  auto group_col_widths = get_col_byte_widths(ra_exe_unit_.groupby_exprs, {});
360 
361  const bool is_group_by{!ra_exe_unit_.groupby_exprs.empty()};
362 
363  auto col_range_info_nosharding = getColRangeInfo();
364 
365  const auto shard_count =
368  : 0;
369 
370  const auto col_range_info =
371  ColRangeInfo{col_range_info_nosharding.hash_type_,
372  col_range_info_nosharding.min,
373  col_range_info_nosharding.max,
374  getShardedTopBucket(col_range_info_nosharding, shard_count),
375  col_range_info_nosharding.has_nulls};
376 
377  // Non-grouped aggregates do not support accessing aggregated ranges
378  // Keyless hash is currently only supported with single-column perfect hash
379  const auto keyless_info = !(is_group_by && col_range_info.hash_type_ ==
381  ? KeylessInfo{false, -1, false}
382  : getKeylessInfo(ra_exe_unit_.target_exprs, is_group_by);
383 
384  if (g_enable_watchdog &&
385  ((col_range_info.hash_type_ == QueryDescriptionType::GroupByBaselineHash &&
386  max_groups_buffer_entry_count > 120000000) ||
387  (col_range_info.hash_type_ == QueryDescriptionType::GroupByPerfectHash &&
388  ra_exe_unit_.groupby_exprs.size() == 1 &&
389  (col_range_info.max - col_range_info.min) /
390  std::max(col_range_info.bucket, int64_t(1)) >
391  130000000))) {
392  throw WatchdogException("Query would use too much memory");
393  }
394  try {
396  ra_exe_unit_,
397  query_infos_,
398  col_range_info,
399  keyless_info,
400  allow_multifrag,
401  device_type_,
402  crt_min_byte_width,
403  sort_on_gpu_hint,
404  shard_count,
405  max_groups_buffer_entry_count,
406  render_info,
407  count_distinct_descriptors,
408  must_use_baseline_sort,
409  output_columnar_hint,
410  /*streaming_top_n_hint=*/true);
411  } catch (const StreamingTopNOOM& e) {
412  LOG(WARNING) << e.what() << " Disabling Streaming Top N.";
414  ra_exe_unit_,
415  query_infos_,
416  col_range_info,
417  keyless_info,
418  allow_multifrag,
419  device_type_,
420  crt_min_byte_width,
421  sort_on_gpu_hint,
422  shard_count,
423  max_groups_buffer_entry_count,
424  render_info,
425  count_distinct_descriptors,
426  must_use_baseline_sort,
427  output_columnar_hint,
428  /*streaming_top_n_hint=*/false);
429  }
430 }
std::vector< Analyzer::Expr * > target_exprs
std::vector< int8_t > get_col_byte_widths(const T &col_expr_list, const std::vector< ssize_t > &col_exprs_to_not_project)
#define LOG(tag)
Definition: Logger.h:188
ColRangeInfo getColRangeInfo()
QueryDescriptionType hash_type_
static std::unique_ptr< QueryMemoryDescriptor > init(const Executor *executor, const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, const ColRangeInfo &col_range_info, const KeylessInfo &keyless_info, const bool allow_multifrag, const ExecutorDeviceType device_type, const int8_t crt_min_byte_width, const bool sort_on_gpu_hint, const size_t shard_count, const size_t max_groups_buffer_entry_count, RenderInfo *render_info, const CountDistinctDescriptors count_distinct_descriptors, const bool must_use_baseline_sort, const bool output_columnar_hint, const bool streaming_top_n_hint)
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
bool g_enable_watchdog
CountDistinctDescriptors initCountDistinctDescriptors()
KeylessInfo getKeylessInfo(const std::vector< Analyzer::Expr * > &target_expr_list, const bool is_group_by) const
int64_t getShardedTopBucket(const ColRangeInfo &col_range_info, const size_t shard_count) const
const std::vector< InputTableInfo > & query_infos_
const ExecutorDeviceType device_type_
const RelAlgExecutionUnit & ra_exe_unit_
static size_t shard_count_for_top_groups(const RelAlgExecutionUnit &ra_exe_unit, const Catalog_Namespace::Catalog &catalog)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool GroupByAndAggregate::needsUnnestDoublePatch ( llvm::Value *  val_ptr,
const std::string &  agg_base_name,
const bool  threads_share_memory,
const CompilationOptions co 
) const
private

Definition at line 30 of file MaxwellCodegenPatch.cpp.

References CompilationOptions::device_type, and executor_.

Referenced by TargetExprCodegen::codegen().

33  {
34  return (executor_->isArchMaxwell(co.device_type) && threads_share_memory &&
35  llvm::isa<llvm::AllocaInst>(val_ptr) &&
36  val_ptr->getType() ==
37  llvm::Type::getDoublePtrTy(executor_->cgen_state_->context_) &&
38  "agg_id" == agg_base_name);
39 }
ExecutorDeviceType device_type

+ Here is the caller graph for this function:

void GroupByAndAggregate::prependForceSync ( )
private

Definition at line 41 of file MaxwellCodegenPatch.cpp.

References executor_.

Referenced by codegen().

41  {
42  executor_->cgen_state_->ir_builder_.CreateCall(
43  executor_->cgen_state_->module_->getFunction("force_sync"));
44 }

+ Here is the caller graph for this function:

size_t GroupByAndAggregate::shard_count_for_top_groups ( const RelAlgExecutionUnit ra_exe_unit,
const Catalog_Namespace::Catalog catalog 
)
static

Definition at line 1960 of file GroupByAndAggregate.cpp.

References Catalog_Namespace::Catalog::getMetadataForTable(), RelAlgExecutionUnit::groupby_exprs, SortInfo::limit, TableDescriptor::nShards, SortInfo::order_entries, and RelAlgExecutionUnit::sort_info.

Referenced by Executor::collectAllDeviceResults(), RelAlgExecutor::executeRelAlgQuerySingleStep(), and initQueryMemoryDescriptor().

1962  {
1963  if (ra_exe_unit.sort_info.order_entries.size() != 1 || !ra_exe_unit.sort_info.limit) {
1964  return 0;
1965  }
1966  for (const auto& group_expr : ra_exe_unit.groupby_exprs) {
1967  const auto grouped_col_expr =
1968  dynamic_cast<const Analyzer::ColumnVar*>(group_expr.get());
1969  if (!grouped_col_expr) {
1970  continue;
1971  }
1972  if (grouped_col_expr->get_table_id() <= 0) {
1973  return 0;
1974  }
1975  const auto td = catalog.getMetadataForTable(grouped_col_expr->get_table_id());
1976  if (td->shardedColumnId == grouped_col_expr->get_column_id()) {
1977  return td->nShards;
1978  }
1979  }
1980  return 0;
1981 }
const std::list< Analyzer::OrderEntry > order_entries
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
const size_t limit
const SortInfo sort_info
const TableDescriptor * getMetadataForTable(const std::string &tableName, const bool populateFragmenter=true) const
Returns a pointer to a const TableDescriptor struct matching the provided tableName.

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool GroupByAndAggregate::supportedExprForGpuSharedMemUsage ( Analyzer::Expr expr)
staticprivate

Definition at line 827 of file GroupByAndAggregate.cpp.

References kUNNEST.

827  {
828  /*
829  UNNEST operations follow a slightly different internal memory layout compared to other
830  keyless aggregates Currently, we opt out of using shared memory if there is any UNNEST
831  operation involved.
832  */
833  if (dynamic_cast<Analyzer::UOper*>(expr) &&
834  static_cast<Analyzer::UOper*>(expr)->get_optype() == kUNNEST) {
835  return false;
836  }
837  return true;
838 }
bool GroupByAndAggregate::supportedTypeForGpuSharedMemUsage ( const SQLTypeInfo target_type_info) const
private

Supported data types for the current shared memory usage for keyless aggregates with COUNT(*) Currently only for single-column group by queries.

Definition at line 805 of file GroupByAndAggregate.cpp.

References SQLTypeInfo::get_compression(), SQLTypeInfo::get_type(), kENCODING_DICT, kINT, kSMALLINT, kTEXT, kTINYINT, and run_benchmark_import::result.

Referenced by getKeylessInfo().

806  {
807  bool result = false;
808  switch (target_type_info.get_type()) {
809  case SQLTypes::kTINYINT:
810  case SQLTypes::kSMALLINT:
811  case SQLTypes::kINT:
812  result = true;
813  break;
814  case SQLTypes::kTEXT:
815  if (target_type_info.get_compression() == EncodingType::kENCODING_DICT) {
816  result = true;
817  }
818  break;
819  default:
820  break;
821  }
822  return result;
823 }
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:248
Definition: sqltypes.h:53
HOST DEVICE EncodingType get_compression() const
Definition: sqltypes.h:256
Definition: sqltypes.h:46

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

Friends And Related Function Documentation

friend class Executor
friend

Definition at line 303 of file GroupByAndAggregate.h.

friend class QueryMemoryDescriptor
friend

Definition at line 304 of file GroupByAndAggregate.h.

friend struct TargetExprCodegen
friend

Definition at line 305 of file GroupByAndAggregate.h.

friend struct TargetExprCodegenBuilder
friend

Definition at line 306 of file GroupByAndAggregate.h.

Member Data Documentation

const ExecutorDeviceType GroupByAndAggregate::device_type_
private
bool GroupByAndAggregate::output_columnar_
private

Definition at line 300 of file GroupByAndAggregate.h.

const std::vector<InputTableInfo>& GroupByAndAggregate::query_infos_
private

Definition at line 298 of file GroupByAndAggregate.h.

Referenced by getExprRangeInfo(), and getKeylessInfo().

std::shared_ptr<RowSetMemoryOwner> GroupByAndAggregate::row_set_mem_owner_
private

Definition at line 299 of file GroupByAndAggregate.h.

Referenced by addTransientStringLiterals().


The documentation for this class was generated from the following files: