OmniSciDB  471d68cefb
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
anonymous_namespace{GroupByAndAggregate.cpp} Namespace Reference

Functions

int32_t get_agg_count (const std::vector< Analyzer::Expr * > &target_exprs)
 
bool expr_is_rowid (const Analyzer::Expr *expr, const Catalog_Namespace::Catalog &cat)
 
bool has_count_distinct (const RelAlgExecutionUnit &ra_exe_unit)
 
bool is_column_range_too_big_for_perfect_hash (const ColRangeInfo &col_range_info, const int64_t max_entry_count)
 
bool cardinality_estimate_less_than_column_range (const int64_t cardinality_estimate, const ColRangeInfo &col_range_info)
 
ColRangeInfo get_expr_range_info (const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, const Analyzer::Expr *expr, Executor *executor)
 
KeylessInfo get_keyless_info (const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, const bool is_group_by, Executor *executor)
 
CountDistinctDescriptors init_count_distinct_descriptors (const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, const ExecutorDeviceType device_type, Executor *executor)
 

Function Documentation

bool anonymous_namespace{GroupByAndAggregate.cpp}::cardinality_estimate_less_than_column_range ( const int64_t  cardinality_estimate,
const ColRangeInfo col_range_info 
)

Definition at line 118 of file GroupByAndAggregate.cpp.

References ColRangeInfo::max, and ColRangeInfo::min.

Referenced by GroupByAndAggregate::getColRangeInfo().

119  {
120  try {
121  // the cardinality estimate is the size of the baseline hash table. further penalize
122  // the baseline hash table by a factor of 2x due to overhead in computing baseline
123  // hash. This has the overall effect of penalizing baseline hash over perfect hash by
124  // 4x; i.e. if the cardinality of the filtered data is less than 25% of the entry
125  // count of the column, we use baseline hash on the filtered set
126  return checked_int64_t(cardinality_estimate) * 2 <
127  static_cast<int64_t>(checked_int64_t(col_range_info.max) -
128  checked_int64_t(col_range_info.min));
129  } catch (...) {
130  return false;
131  }
132 }
boost::multiprecision::number< boost::multiprecision::cpp_int_backend< 64, 64, boost::multiprecision::signed_magnitude, boost::multiprecision::checked, void >> checked_int64_t

+ Here is the caller graph for this function:

bool anonymous_namespace{GroupByAndAggregate.cpp}::expr_is_rowid ( const Analyzer::Expr expr,
const Catalog_Namespace::Catalog cat 
)

Definition at line 84 of file GroupByAndAggregate.cpp.

References cat(), CHECK_EQ, and get_column_descriptor_maybe().

Referenced by GroupByAndAggregate::getColRangeInfo().

84  {
85  const auto col = dynamic_cast<const Analyzer::ColumnVar*>(expr);
86  if (!col) {
87  return false;
88  }
89  const auto cd =
90  get_column_descriptor_maybe(col->get_column_id(), col->get_table_id(), cat);
91  if (!cd || !cd->isVirtualCol) {
92  return false;
93  }
94  CHECK_EQ("rowid", cd->columnName);
95  return true;
96 }
#define CHECK_EQ(x, y)
Definition: Logger.h:217
std::string cat(Ts &&...args)
const ColumnDescriptor * get_column_descriptor_maybe(const int col_id, const int table_id, const Catalog_Namespace::Catalog &cat)
Definition: Execute.h:218

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

int32_t anonymous_namespace{GroupByAndAggregate.cpp}::get_agg_count ( const std::vector< Analyzer::Expr * > &  target_exprs)

Definition at line 59 of file GroupByAndAggregate.cpp.

References agg_count(), CHECK, Analyzer::Expr::get_type_info(), kAVG, and kSAMPLE.

Referenced by GroupByAndAggregate::codegen().

59  {
60  int32_t agg_count{0};
61  for (auto target_expr : target_exprs) {
62  CHECK(target_expr);
63  const auto agg_expr = dynamic_cast<Analyzer::AggExpr*>(target_expr);
64  if (!agg_expr || agg_expr->get_aggtype() == kSAMPLE) {
65  const auto& ti = target_expr->get_type_info();
66  if (ti.is_buffer()) {
67  agg_count += 2;
68  } else if (ti.is_geometry()) {
69  agg_count += ti.get_physical_coord_cols() * 2;
70  } else {
71  ++agg_count;
72  }
73  continue;
74  }
75  if (agg_expr && agg_expr->get_aggtype() == kAVG) {
76  agg_count += 2;
77  } else {
78  ++agg_count;
79  }
80  }
81  return agg_count;
82 }
ALWAYS_INLINE uint64_t agg_count(uint64_t *agg, const int64_t)
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:77
#define CHECK(condition)
Definition: Logger.h:209
Definition: sqldefs.h:72

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

ColRangeInfo anonymous_namespace{GroupByAndAggregate.cpp}::get_expr_range_info ( const RelAlgExecutionUnit ra_exe_unit,
const std::vector< InputTableInfo > &  query_infos,
const Analyzer::Expr expr,
Executor executor 
)

Definition at line 134 of file GroupByAndAggregate.cpp.

References CHECK, Double, Float, getExpressionRange(), GroupByBaselineHash, GroupByPerfectHash, Integer, Invalid, NonGroupedAggregate, Projection, and RelAlgExecutionUnit::simple_quals.

Referenced by GroupByAndAggregate::codegenGroupBy(), GroupByAndAggregate::codegenPerfectHashFunction(), GroupByAndAggregate::getColRangeInfo(), GroupByAndAggregate::gpuCanHandleOrderEntries(), and init_count_distinct_descriptors().

137  {
138  if (!expr) {
139  return {QueryDescriptionType::Projection, 0, 0, 0, false};
140  }
141 
142  const auto expr_range = getExpressionRange(
143  expr, query_infos, executor, boost::make_optional(ra_exe_unit.simple_quals));
144  switch (expr_range.getType()) {
146  if (expr_range.getIntMin() > expr_range.getIntMax()) {
147  return {
148  QueryDescriptionType::GroupByBaselineHash, 0, -1, 0, expr_range.hasNulls()};
149  }
151  expr_range.getIntMin(),
152  expr_range.getIntMax(),
153  expr_range.getBucket(),
154  expr_range.hasNulls()};
155  }
158  if (expr_range.getFpMin() > expr_range.getFpMax()) {
159  return {
160  QueryDescriptionType::GroupByBaselineHash, 0, -1, 0, expr_range.hasNulls()};
161  }
162  return {QueryDescriptionType::GroupByBaselineHash, 0, 0, 0, false};
163  }
165  return {QueryDescriptionType::GroupByBaselineHash, 0, 0, 0, false};
166  default:
167  CHECK(false);
168  }
169  CHECK(false);
170  return {QueryDescriptionType::NonGroupedAggregate, 0, 0, 0, false};
171 }
ExpressionRange getExpressionRange(const Analyzer::BinOper *expr, const std::vector< InputTableInfo > &query_infos, const Executor *, boost::optional< std::list< std::shared_ptr< Analyzer::Expr >>> simple_quals)
#define CHECK(condition)
Definition: Logger.h:209
std::list< std::shared_ptr< Analyzer::Expr > > simple_quals

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

KeylessInfo anonymous_namespace{GroupByAndAggregate.cpp}::get_keyless_info ( const RelAlgExecutionUnit ra_exe_unit,
const std::vector< InputTableInfo > &  query_infos,
const bool  is_group_by,
Executor executor 
)

This function goes through all target expressions and answers two questions:

  1. Is it possible to have keyless hash?
  2. If yes to 1, then what aggregate expression should be considered to represent the key's presence, if needed (e.g., in detecting empty entries in the result set).

NOTE: Keyless hash is only valid with single-column group by at the moment.

Definition at line 400 of file GroupByAndAggregate.cpp.

References agg_arg(), CHECK, constrained_not_null(), Double, Float, g_bigint_count, get_agg_initial_val(), get_compact_type(), get_target_info(), getExpressionRange(), Integer, Invalid, is_distinct_target(), kAVG, kCOUNT, kMAX, kMIN, kSUM, RelAlgExecutionUnit::quals, takes_float_argument(), and RelAlgExecutionUnit::target_exprs.

Referenced by GroupByAndAggregate::initQueryMemoryDescriptorImpl().

403  {
404  bool keyless{true}, found{false};
405  int32_t num_agg_expr{0};
406  int32_t index{0};
407  for (const auto target_expr : ra_exe_unit.target_exprs) {
408  const auto agg_info = get_target_info(target_expr, g_bigint_count);
409  const auto chosen_type = get_compact_type(agg_info);
410  if (agg_info.is_agg) {
411  num_agg_expr++;
412  }
413  if (!found && agg_info.is_agg && !is_distinct_target(agg_info)) {
414  auto agg_expr = dynamic_cast<const Analyzer::AggExpr*>(target_expr);
415  CHECK(agg_expr);
416  const auto arg_expr = agg_arg(target_expr);
417  const bool float_argument_input = takes_float_argument(agg_info);
418  switch (agg_info.agg_kind) {
419  case kAVG:
420  ++index;
421  if (arg_expr && !arg_expr->get_type_info().get_notnull()) {
422  auto expr_range_info = getExpressionRange(arg_expr, query_infos, executor);
423  if (expr_range_info.getType() == ExpressionRangeType::Invalid ||
424  expr_range_info.hasNulls()) {
425  break;
426  }
427  }
428  found = true;
429  break;
430  case kCOUNT:
431  if (arg_expr && !arg_expr->get_type_info().get_notnull()) {
432  auto expr_range_info = getExpressionRange(arg_expr, query_infos, executor);
433  if (expr_range_info.getType() == ExpressionRangeType::Invalid ||
434  expr_range_info.hasNulls()) {
435  break;
436  }
437  }
438  found = true;
439  break;
440  case kSUM: {
441  auto arg_ti = arg_expr->get_type_info();
442  if (constrained_not_null(arg_expr, ra_exe_unit.quals)) {
443  arg_ti.set_notnull(true);
444  }
445  if (!arg_ti.get_notnull()) {
446  auto expr_range_info = getExpressionRange(arg_expr, query_infos, executor);
447  if (expr_range_info.getType() != ExpressionRangeType::Invalid &&
448  !expr_range_info.hasNulls()) {
449  found = true;
450  }
451  } else {
452  auto expr_range_info = getExpressionRange(arg_expr, query_infos, executor);
453  switch (expr_range_info.getType()) {
456  if (expr_range_info.getFpMax() < 0 || expr_range_info.getFpMin() > 0) {
457  found = true;
458  }
459  break;
461  if (expr_range_info.getIntMax() < 0 || expr_range_info.getIntMin() > 0) {
462  found = true;
463  }
464  break;
465  default:
466  break;
467  }
468  }
469  break;
470  }
471  case kMIN: {
472  CHECK(agg_expr && agg_expr->get_arg());
473  const auto& arg_ti = agg_expr->get_arg()->get_type_info();
474  if (arg_ti.is_string() || arg_ti.is_buffer()) {
475  break;
476  }
477  auto expr_range_info =
478  getExpressionRange(agg_expr->get_arg(), query_infos, executor);
479  auto init_max = get_agg_initial_val(agg_info.agg_kind,
480  chosen_type,
481  is_group_by || float_argument_input,
482  float_argument_input ? sizeof(float) : 8);
483  switch (expr_range_info.getType()) {
486  auto double_max =
487  *reinterpret_cast<const double*>(may_alias_ptr(&init_max));
488  if (expr_range_info.getFpMax() < double_max) {
489  found = true;
490  }
491  break;
492  }
494  if (expr_range_info.getIntMax() < init_max) {
495  found = true;
496  }
497  break;
498  default:
499  break;
500  }
501  break;
502  }
503  case kMAX: {
504  CHECK(agg_expr && agg_expr->get_arg());
505  const auto& arg_ti = agg_expr->get_arg()->get_type_info();
506  if (arg_ti.is_string() || arg_ti.is_buffer()) {
507  break;
508  }
509  auto expr_range_info =
510  getExpressionRange(agg_expr->get_arg(), query_infos, executor);
511  // NULL sentinel and init value for kMAX are identical, which results in
512  // ambiguity in detecting empty keys in presence of nulls.
513  if (expr_range_info.getType() == ExpressionRangeType::Invalid ||
514  expr_range_info.hasNulls()) {
515  break;
516  }
517  auto init_min = get_agg_initial_val(agg_info.agg_kind,
518  chosen_type,
519  is_group_by || float_argument_input,
520  float_argument_input ? sizeof(float) : 8);
521  switch (expr_range_info.getType()) {
524  auto double_min =
525  *reinterpret_cast<const double*>(may_alias_ptr(&init_min));
526  if (expr_range_info.getFpMin() > double_min) {
527  found = true;
528  }
529  break;
530  }
532  if (expr_range_info.getIntMin() > init_min) {
533  found = true;
534  }
535  break;
536  default:
537  break;
538  }
539  break;
540  }
541  default:
542  keyless = false;
543  break;
544  }
545  }
546  if (!keyless) {
547  break;
548  }
549  if (!found) {
550  ++index;
551  }
552  }
553 
554  // shouldn't use keyless for projection only
555  return {
556  keyless && found,
557  index,
558  };
559 }
const Analyzer::Expr * agg_arg(const Analyzer::Expr *expr)
std::vector< Analyzer::Expr * > target_exprs
bool constrained_not_null(const Analyzer::Expr *expr, const std::list< std::shared_ptr< Analyzer::Expr >> &quals)
TargetInfo get_target_info(const PointerType target_expr, const bool bigint_count)
Definition: TargetInfo.h:92
int64_t get_agg_initial_val(const SQLAgg agg, const SQLTypeInfo &ti, const bool enable_compaction, const unsigned min_byte_width_to_compact)
bool takes_float_argument(const TargetInfo &target_info)
Definition: TargetInfo.h:157
Definition: sqldefs.h:73
const SQLTypeInfo get_compact_type(const TargetInfo &target)
bool g_bigint_count
Definition: sqldefs.h:75
bool is_distinct_target(const TargetInfo &target_info)
Definition: TargetInfo.h:153
ExpressionRange getExpressionRange(const Analyzer::BinOper *expr, const std::vector< InputTableInfo > &query_infos, const Executor *, boost::optional< std::list< std::shared_ptr< Analyzer::Expr >>> simple_quals)
Definition: sqldefs.h:76
std::list< std::shared_ptr< Analyzer::Expr > > quals
#define CHECK(condition)
Definition: Logger.h:209
Definition: sqldefs.h:74
Definition: sqldefs.h:72

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool anonymous_namespace{GroupByAndAggregate.cpp}::has_count_distinct ( const RelAlgExecutionUnit ra_exe_unit)

Definition at line 98 of file GroupByAndAggregate.cpp.

References g_bigint_count, get_target_info(), is_distinct_target(), and RelAlgExecutionUnit::target_exprs.

Referenced by GroupByAndAggregate::getColRangeInfo().

98  {
99  for (const auto& target_expr : ra_exe_unit.target_exprs) {
100  const auto agg_info = get_target_info(target_expr, g_bigint_count);
101  if (agg_info.is_agg && is_distinct_target(agg_info)) {
102  return true;
103  }
104  }
105  return false;
106 }
std::vector< Analyzer::Expr * > target_exprs
TargetInfo get_target_info(const PointerType target_expr, const bool bigint_count)
Definition: TargetInfo.h:92
bool g_bigint_count
bool is_distinct_target(const TargetInfo &target_info)
Definition: TargetInfo.h:153

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

CountDistinctDescriptors anonymous_namespace{GroupByAndAggregate.cpp}::init_count_distinct_descriptors ( const RelAlgExecutionUnit ra_exe_unit,
const std::vector< InputTableInfo > &  query_infos,
const ExecutorDeviceType  device_type,
Executor executor 
)

Definition at line 561 of file GroupByAndAggregate.cpp.

References Bitmap, CHECK, CHECK_GE, g_bigint_count, g_enable_watchdog, g_hll_precision_bits, Analyzer::AggExpr::get_arg(), get_count_distinct_sub_bitmap_count(), get_expr_range_info(), get_target_info(), Analyzer::Expr::get_type_info(), GroupByPerfectHash, hll_size_for_rate(), Invalid, is_distinct_target(), kAPPROX_COUNT_DISTINCT, kCOUNT, kINT, Projection, StdSet, and RelAlgExecutionUnit::target_exprs.

Referenced by GroupByAndAggregate::initQueryMemoryDescriptorImpl().

565  {
566  CountDistinctDescriptors count_distinct_descriptors;
567  for (const auto target_expr : ra_exe_unit.target_exprs) {
568  auto agg_info = get_target_info(target_expr, g_bigint_count);
569  if (is_distinct_target(agg_info)) {
570  CHECK(agg_info.is_agg);
571  CHECK(agg_info.agg_kind == kCOUNT || agg_info.agg_kind == kAPPROX_COUNT_DISTINCT);
572  const auto agg_expr = static_cast<const Analyzer::AggExpr*>(target_expr);
573  const auto& arg_ti = agg_expr->get_arg()->get_type_info();
574  if (arg_ti.is_bytes()) {
575  throw std::runtime_error(
576  "Strings must be dictionary-encoded for COUNT(DISTINCT).");
577  }
578  if (agg_info.agg_kind == kAPPROX_COUNT_DISTINCT && arg_ti.is_buffer()) {
579  throw std::runtime_error("APPROX_COUNT_DISTINCT on arrays not supported yet");
580  }
581  if (agg_info.agg_kind == kAPPROX_COUNT_DISTINCT && arg_ti.is_geometry()) {
582  throw std::runtime_error(
583  "APPROX_COUNT_DISTINCT on geometry columns not supported");
584  }
585  if (agg_info.is_distinct && arg_ti.is_geometry()) {
586  throw std::runtime_error("COUNT DISTINCT on geometry columns not supported");
587  }
588  ColRangeInfo no_range_info{QueryDescriptionType::Projection, 0, 0, 0, false};
589  auto arg_range_info =
590  arg_ti.is_fp() ? no_range_info
592  ra_exe_unit, query_infos, agg_expr->get_arg(), executor);
593  CountDistinctImplType count_distinct_impl_type{CountDistinctImplType::StdSet};
594  int64_t bitmap_sz_bits{0};
595  if (agg_info.agg_kind == kAPPROX_COUNT_DISTINCT) {
596  const auto error_rate = agg_expr->get_arg1();
597  if (error_rate) {
598  CHECK(error_rate->get_type_info().get_type() == kINT);
599  CHECK_GE(error_rate->get_constval().intval, 1);
600  bitmap_sz_bits = hll_size_for_rate(error_rate->get_constval().smallintval);
601  } else {
602  bitmap_sz_bits = g_hll_precision_bits;
603  }
604  }
605  if (arg_range_info.isEmpty()) {
606  count_distinct_descriptors.emplace_back(
608  0,
609  64,
610  agg_info.agg_kind == kAPPROX_COUNT_DISTINCT,
611  device_type,
612  1});
613  continue;
614  }
615  if (arg_range_info.hash_type_ == QueryDescriptionType::GroupByPerfectHash &&
616  !(arg_ti.is_buffer() || arg_ti.is_geometry())) { // TODO(alex): allow bitmap
617  // implementation for arrays
618  count_distinct_impl_type = CountDistinctImplType::Bitmap;
619  if (agg_info.agg_kind == kCOUNT) {
620  bitmap_sz_bits = arg_range_info.max - arg_range_info.min + 1;
621  const int64_t MAX_BITMAP_BITS{8 * 1000 * 1000 * 1000LL};
622  if (bitmap_sz_bits <= 0 || bitmap_sz_bits > MAX_BITMAP_BITS) {
623  count_distinct_impl_type = CountDistinctImplType::StdSet;
624  }
625  }
626  }
627  if (agg_info.agg_kind == kAPPROX_COUNT_DISTINCT &&
628  count_distinct_impl_type == CountDistinctImplType::StdSet &&
629  !(arg_ti.is_array() || arg_ti.is_geometry())) {
630  count_distinct_impl_type = CountDistinctImplType::Bitmap;
631  }
632 
633  if (g_enable_watchdog && !(arg_range_info.isEmpty()) &&
634  count_distinct_impl_type == CountDistinctImplType::StdSet) {
635  throw WatchdogException("Cannot use a fast path for COUNT distinct");
636  }
637  const auto sub_bitmap_count =
638  get_count_distinct_sub_bitmap_count(bitmap_sz_bits, ra_exe_unit, device_type);
639  count_distinct_descriptors.emplace_back(
640  CountDistinctDescriptor{count_distinct_impl_type,
641  arg_range_info.min,
642  bitmap_sz_bits,
643  agg_info.agg_kind == kAPPROX_COUNT_DISTINCT,
644  device_type,
645  sub_bitmap_count});
646  } else {
647  count_distinct_descriptors.emplace_back(CountDistinctDescriptor{
648  CountDistinctImplType::Invalid, 0, 0, false, device_type, 0});
649  }
650  }
651  return count_distinct_descriptors;
652 }
std::vector< Analyzer::Expr * > target_exprs
ColRangeInfo get_expr_range_info(const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, const Analyzer::Expr *expr, Executor *executor)
bool g_enable_watchdog
int hll_size_for_rate(const int err_percent)
Definition: HyperLogLog.h:115
TargetInfo get_target_info(const PointerType target_expr, const bool bigint_count)
Definition: TargetInfo.h:92
#define CHECK_GE(x, y)
Definition: Logger.h:222
Expr * get_arg() const
Definition: Analyzer.h:1250
int g_hll_precision_bits
size_t get_count_distinct_sub_bitmap_count(const size_t bitmap_sz_bits, const RelAlgExecutionUnit &ra_exe_unit, const ExecutorDeviceType device_type)
std::vector< CountDistinctDescriptor > CountDistinctDescriptors
Definition: CountDistinct.h:35
bool g_bigint_count
bool is_distinct_target(const TargetInfo &target_info)
Definition: TargetInfo.h:153
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:77
Definition: sqldefs.h:76
CountDistinctImplType
#define CHECK(condition)
Definition: Logger.h:209
Definition: sqltypes.h:45

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool anonymous_namespace{GroupByAndAggregate.cpp}::is_column_range_too_big_for_perfect_hash ( const ColRangeInfo col_range_info,
const int64_t  max_entry_count 
)

Definition at line 108 of file GroupByAndAggregate.cpp.

References ColRangeInfo::max, and ColRangeInfo::min.

Referenced by GroupByAndAggregate::getColRangeInfo().

109  {
110  try {
111  return static_cast<int64_t>(checked_int64_t(col_range_info.max) -
112  checked_int64_t(col_range_info.min)) >= max_entry_count;
113  } catch (...) {
114  return true;
115  }
116 }
boost::multiprecision::number< boost::multiprecision::cpp_int_backend< 64, 64, boost::multiprecision::signed_magnitude, boost::multiprecision::checked, void >> checked_int64_t

+ Here is the caller graph for this function: