OmniSciDB  fe05a0c208
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
anonymous_namespace{GroupByAndAggregate.cpp} Namespace Reference

Functions

int32_t get_agg_count (const std::vector< Analyzer::Expr * > &target_exprs)
 
bool expr_is_rowid (const Analyzer::Expr *expr, const Catalog_Namespace::Catalog &cat)
 
bool has_count_distinct (const RelAlgExecutionUnit &ra_exe_unit)
 
bool is_column_range_too_big_for_perfect_hash (const ColRangeInfo &col_range_info, const int64_t max_entry_count)
 
bool cardinality_estimate_less_than_column_range (const int64_t cardinality_estimate, const ColRangeInfo &col_range_info)
 
ColRangeInfo get_expr_range_info (const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, const Analyzer::Expr *expr, Executor *executor)
 
KeylessInfo get_keyless_info (const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, const bool is_group_by, Executor *executor)
 
CountDistinctDescriptors init_count_distinct_descriptors (const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, const ExecutorDeviceType device_type, Executor *executor)
 

Function Documentation

bool anonymous_namespace{GroupByAndAggregate.cpp}::cardinality_estimate_less_than_column_range ( const int64_t  cardinality_estimate,
const ColRangeInfo col_range_info 
)

Definition at line 118 of file GroupByAndAggregate.cpp.

References ColRangeInfo::max, and ColRangeInfo::min.

Referenced by GroupByAndAggregate::getColRangeInfo().

119  {
120  try {
121  // the cardinality estimate is the size of the baseline hash table. further penalize
122  // the baseline hash table by a factor of 2x due to overhead in computing baseline
123  // hash. This has the overall effect of penalizing baseline hash over perfect hash by
124  // 4x; i.e. if the cardinality of the filtered data is less than 25% of the entry
125  // count of the column, we use baseline hash on the filtered set
126  return checked_int64_t(cardinality_estimate) * 2 <
127  static_cast<int64_t>(checked_int64_t(col_range_info.max) -
128  checked_int64_t(col_range_info.min));
129  } catch (...) {
130  return false;
131  }
132 }
boost::multiprecision::number< boost::multiprecision::cpp_int_backend< 64, 64, boost::multiprecision::signed_magnitude, boost::multiprecision::checked, void >> checked_int64_t

+ Here is the caller graph for this function:

bool anonymous_namespace{GroupByAndAggregate.cpp}::expr_is_rowid ( const Analyzer::Expr expr,
const Catalog_Namespace::Catalog cat 
)

Definition at line 84 of file GroupByAndAggregate.cpp.

References cat(), CHECK_EQ, and get_column_descriptor_maybe().

Referenced by GroupByAndAggregate::getColRangeInfo().

84  {
85  const auto col = dynamic_cast<const Analyzer::ColumnVar*>(expr);
86  if (!col) {
87  return false;
88  }
89  const auto cd =
90  get_column_descriptor_maybe(col->get_column_id(), col->get_table_id(), cat);
91  if (!cd || !cd->isVirtualCol) {
92  return false;
93  }
94  CHECK_EQ("rowid", cd->columnName);
95  return true;
96 }
#define CHECK_EQ(x, y)
Definition: Logger.h:211
std::string cat(Ts &&...args)
const ColumnDescriptor * get_column_descriptor_maybe(const int col_id, const int table_id, const Catalog_Namespace::Catalog &cat)
Definition: Execute.h:221

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

int32_t anonymous_namespace{GroupByAndAggregate.cpp}::get_agg_count ( const std::vector< Analyzer::Expr * > &  target_exprs)

Definition at line 58 of file GroupByAndAggregate.cpp.

References agg_count(), CHECK, Analyzer::Expr::get_type_info(), kAVG, and kSAMPLE.

Referenced by GroupByAndAggregate::codegen().

58  {
59  int32_t agg_count{0};
60  for (auto target_expr : target_exprs) {
61  CHECK(target_expr);
62  const auto agg_expr = dynamic_cast<Analyzer::AggExpr*>(target_expr);
63  if (!agg_expr || agg_expr->get_aggtype() == kSAMPLE) {
64  const auto& ti = target_expr->get_type_info();
65  // TODO(pavan): or if is_geometry()
66  if (ti.is_buffer()) {
67  agg_count += 2;
68  } else if (ti.is_geometry()) {
69  agg_count += ti.get_physical_coord_cols() * 2;
70  } else {
71  ++agg_count;
72  }
73  continue;
74  }
75  if (agg_expr && agg_expr->get_aggtype() == kAVG) {
76  agg_count += 2;
77  } else {
78  ++agg_count;
79  }
80  }
81  return agg_count;
82 }
ALWAYS_INLINE uint64_t agg_count(uint64_t *agg, const int64_t)
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:78
#define CHECK(condition)
Definition: Logger.h:203
Definition: sqldefs.h:72

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

ColRangeInfo anonymous_namespace{GroupByAndAggregate.cpp}::get_expr_range_info ( const RelAlgExecutionUnit ra_exe_unit,
const std::vector< InputTableInfo > &  query_infos,
const Analyzer::Expr expr,
Executor executor 
)

Definition at line 134 of file GroupByAndAggregate.cpp.

References CHECK, Double, Float, getExpressionRange(), GroupByBaselineHash, GroupByPerfectHash, Integer, Invalid, NonGroupedAggregate, Projection, and RelAlgExecutionUnit::simple_quals.

Referenced by GroupByAndAggregate::codegenGroupBy(), GroupByAndAggregate::codegenPerfectHashFunction(), GroupByAndAggregate::getColRangeInfo(), GroupByAndAggregate::gpuCanHandleOrderEntries(), and init_count_distinct_descriptors().

137  {
138  if (!expr) {
139  return {QueryDescriptionType::Projection, 0, 0, 0, false};
140  }
141 
142  const auto expr_range = getExpressionRange(
143  expr, query_infos, executor, boost::make_optional(ra_exe_unit.simple_quals));
144  switch (expr_range.getType()) {
146  if (expr_range.getIntMin() > expr_range.getIntMax()) {
147  return {
148  QueryDescriptionType::GroupByBaselineHash, 0, -1, 0, expr_range.hasNulls()};
149  }
151  expr_range.getIntMin(),
152  expr_range.getIntMax(),
153  expr_range.getBucket(),
154  expr_range.hasNulls()};
155  }
158  if (expr_range.getFpMin() > expr_range.getFpMax()) {
159  return {
160  QueryDescriptionType::GroupByBaselineHash, 0, -1, 0, expr_range.hasNulls()};
161  }
162  return {QueryDescriptionType::GroupByBaselineHash, 0, 0, 0, false};
163  }
165  return {QueryDescriptionType::GroupByBaselineHash, 0, 0, 0, false};
166  default:
167  CHECK(false);
168  }
169  CHECK(false);
170  return {QueryDescriptionType::NonGroupedAggregate, 0, 0, 0, false};
171 }
ExpressionRange getExpressionRange(const Analyzer::BinOper *expr, const std::vector< InputTableInfo > &query_infos, const Executor *, boost::optional< std::list< std::shared_ptr< Analyzer::Expr >>> simple_quals)
#define CHECK(condition)
Definition: Logger.h:203
std::list< std::shared_ptr< Analyzer::Expr > > simple_quals

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

KeylessInfo anonymous_namespace{GroupByAndAggregate.cpp}::get_keyless_info ( const RelAlgExecutionUnit ra_exe_unit,
const std::vector< InputTableInfo > &  query_infos,
const bool  is_group_by,
Executor executor 
)

This function goes through all target expressions and answers two questions:

  1. Is it possible to have keyless hash?
  2. If yes to 1, then what aggregate expression should be considered to represent the key's presence, if needed (e.g., in detecting empty entries in the result set).

NOTE: Keyless hash is only valid with single-column group by at the moment.

Definition at line 402 of file GroupByAndAggregate.cpp.

References agg_arg(), CHECK, constrained_not_null(), Double, Float, g_bigint_count, get_agg_initial_val(), get_compact_type(), get_target_info(), getExpressionRange(), Integer, Invalid, is_distinct_target(), kAVG, kCOUNT, kMAX, kMIN, kSUM, RelAlgExecutionUnit::quals, takes_float_argument(), and RelAlgExecutionUnit::target_exprs.

Referenced by GroupByAndAggregate::initQueryMemoryDescriptorImpl().

405  {
406  bool keyless{true}, found{false};
407  int32_t num_agg_expr{0};
408  int32_t index{0};
409  for (const auto target_expr : ra_exe_unit.target_exprs) {
410  const auto agg_info = get_target_info(target_expr, g_bigint_count);
411  const auto chosen_type = get_compact_type(agg_info);
412  if (agg_info.is_agg) {
413  num_agg_expr++;
414  }
415  if (!found && agg_info.is_agg && !is_distinct_target(agg_info)) {
416  auto agg_expr = dynamic_cast<const Analyzer::AggExpr*>(target_expr);
417  CHECK(agg_expr);
418  const auto arg_expr = agg_arg(target_expr);
419  const bool float_argument_input = takes_float_argument(agg_info);
420  switch (agg_info.agg_kind) {
421  case kAVG:
422  ++index;
423  if (arg_expr && !arg_expr->get_type_info().get_notnull()) {
424  auto expr_range_info = getExpressionRange(arg_expr, query_infos, executor);
425  if (expr_range_info.getType() == ExpressionRangeType::Invalid ||
426  expr_range_info.hasNulls()) {
427  break;
428  }
429  }
430  found = true;
431  break;
432  case kCOUNT:
433  if (arg_expr && !arg_expr->get_type_info().get_notnull()) {
434  auto expr_range_info = getExpressionRange(arg_expr, query_infos, executor);
435  if (expr_range_info.getType() == ExpressionRangeType::Invalid ||
436  expr_range_info.hasNulls()) {
437  break;
438  }
439  }
440  found = true;
441  break;
442  case kSUM: {
443  auto arg_ti = arg_expr->get_type_info();
444  if (constrained_not_null(arg_expr, ra_exe_unit.quals)) {
445  arg_ti.set_notnull(true);
446  }
447  if (!arg_ti.get_notnull()) {
448  auto expr_range_info = getExpressionRange(arg_expr, query_infos, executor);
449  if (expr_range_info.getType() != ExpressionRangeType::Invalid &&
450  !expr_range_info.hasNulls()) {
451  found = true;
452  }
453  } else {
454  auto expr_range_info = getExpressionRange(arg_expr, query_infos, executor);
455  switch (expr_range_info.getType()) {
458  if (expr_range_info.getFpMax() < 0 || expr_range_info.getFpMin() > 0) {
459  found = true;
460  }
461  break;
463  if (expr_range_info.getIntMax() < 0 || expr_range_info.getIntMin() > 0) {
464  found = true;
465  }
466  break;
467  default:
468  break;
469  }
470  }
471  break;
472  }
473  case kMIN: {
474  CHECK(agg_expr && agg_expr->get_arg());
475  const auto& arg_ti = agg_expr->get_arg()->get_type_info();
476  if (arg_ti.is_string() || arg_ti.is_buffer()) {
477  break;
478  }
479  auto expr_range_info =
480  getExpressionRange(agg_expr->get_arg(), query_infos, executor);
481  auto init_max = get_agg_initial_val(agg_info.agg_kind,
482  chosen_type,
483  is_group_by || float_argument_input,
484  float_argument_input ? sizeof(float) : 8);
485  switch (expr_range_info.getType()) {
488  auto double_max =
489  *reinterpret_cast<const double*>(may_alias_ptr(&init_max));
490  if (expr_range_info.getFpMax() < double_max) {
491  found = true;
492  }
493  break;
494  }
496  if (expr_range_info.getIntMax() < init_max) {
497  found = true;
498  }
499  break;
500  default:
501  break;
502  }
503  break;
504  }
505  case kMAX: {
506  CHECK(agg_expr && agg_expr->get_arg());
507  const auto& arg_ti = agg_expr->get_arg()->get_type_info();
508  if (arg_ti.is_string() || arg_ti.is_buffer()) {
509  break;
510  }
511  auto expr_range_info =
512  getExpressionRange(agg_expr->get_arg(), query_infos, executor);
513  // NULL sentinel and init value for kMAX are identical, which results in
514  // ambiguity in detecting empty keys in presence of nulls.
515  if (expr_range_info.getType() == ExpressionRangeType::Invalid ||
516  expr_range_info.hasNulls()) {
517  break;
518  }
519  auto init_min = get_agg_initial_val(agg_info.agg_kind,
520  chosen_type,
521  is_group_by || float_argument_input,
522  float_argument_input ? sizeof(float) : 8);
523  switch (expr_range_info.getType()) {
526  auto double_min =
527  *reinterpret_cast<const double*>(may_alias_ptr(&init_min));
528  if (expr_range_info.getFpMin() > double_min) {
529  found = true;
530  }
531  break;
532  }
534  if (expr_range_info.getIntMin() > init_min) {
535  found = true;
536  }
537  break;
538  default:
539  break;
540  }
541  break;
542  }
543  default:
544  keyless = false;
545  break;
546  }
547  }
548  if (!keyless) {
549  break;
550  }
551  if (!found) {
552  ++index;
553  }
554  }
555 
556  // shouldn't use keyless for projection only
557  return {
558  keyless && found,
559  index,
560  };
561 }
const Analyzer::Expr * agg_arg(const Analyzer::Expr *expr)
std::vector< Analyzer::Expr * > target_exprs
bool constrained_not_null(const Analyzer::Expr *expr, const std::list< std::shared_ptr< Analyzer::Expr >> &quals)
TargetInfo get_target_info(const PointerType target_expr, const bool bigint_count)
Definition: TargetInfo.h:79
int64_t get_agg_initial_val(const SQLAgg agg, const SQLTypeInfo &ti, const bool enable_compaction, const unsigned min_byte_width_to_compact)
bool takes_float_argument(const TargetInfo &target_info)
Definition: TargetInfo.h:134
Definition: sqldefs.h:73
const SQLTypeInfo get_compact_type(const TargetInfo &target)
bool g_bigint_count
Definition: sqldefs.h:75
bool is_distinct_target(const TargetInfo &target_info)
Definition: TargetInfo.h:130
ExpressionRange getExpressionRange(const Analyzer::BinOper *expr, const std::vector< InputTableInfo > &query_infos, const Executor *, boost::optional< std::list< std::shared_ptr< Analyzer::Expr >>> simple_quals)
Definition: sqldefs.h:76
std::list< std::shared_ptr< Analyzer::Expr > > quals
#define CHECK(condition)
Definition: Logger.h:203
Definition: sqldefs.h:74
Definition: sqldefs.h:72

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool anonymous_namespace{GroupByAndAggregate.cpp}::has_count_distinct ( const RelAlgExecutionUnit ra_exe_unit)

Definition at line 98 of file GroupByAndAggregate.cpp.

References g_bigint_count, get_target_info(), is_distinct_target(), and RelAlgExecutionUnit::target_exprs.

Referenced by GroupByAndAggregate::getColRangeInfo().

98  {
99  for (const auto& target_expr : ra_exe_unit.target_exprs) {
100  const auto agg_info = get_target_info(target_expr, g_bigint_count);
101  if (agg_info.is_agg && is_distinct_target(agg_info)) {
102  return true;
103  }
104  }
105  return false;
106 }
std::vector< Analyzer::Expr * > target_exprs
TargetInfo get_target_info(const PointerType target_expr, const bool bigint_count)
Definition: TargetInfo.h:79
bool g_bigint_count
bool is_distinct_target(const TargetInfo &target_info)
Definition: TargetInfo.h:130

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

CountDistinctDescriptors anonymous_namespace{GroupByAndAggregate.cpp}::init_count_distinct_descriptors ( const RelAlgExecutionUnit ra_exe_unit,
const std::vector< InputTableInfo > &  query_infos,
const ExecutorDeviceType  device_type,
Executor executor 
)

Definition at line 563 of file GroupByAndAggregate.cpp.

References Bitmap, CHECK, CHECK_GE, g_bigint_count, g_enable_watchdog, g_hll_precision_bits, Analyzer::AggExpr::get_arg(), get_count_distinct_sub_bitmap_count(), get_expr_range_info(), get_target_info(), Analyzer::Expr::get_type_info(), GroupByPerfectHash, hll_size_for_rate(), Invalid, is_distinct_target(), kAPPROX_COUNT_DISTINCT, kCOUNT, kINT, Projection, StdSet, and RelAlgExecutionUnit::target_exprs.

Referenced by GroupByAndAggregate::initQueryMemoryDescriptorImpl().

567  {
568  CountDistinctDescriptors count_distinct_descriptors;
569  for (const auto target_expr : ra_exe_unit.target_exprs) {
570  auto agg_info = get_target_info(target_expr, g_bigint_count);
571  if (is_distinct_target(agg_info)) {
572  CHECK(agg_info.is_agg);
573  CHECK(agg_info.agg_kind == kCOUNT || agg_info.agg_kind == kAPPROX_COUNT_DISTINCT);
574  const auto agg_expr = static_cast<const Analyzer::AggExpr*>(target_expr);
575  const auto& arg_ti = agg_expr->get_arg()->get_type_info();
576  if (arg_ti.is_bytes()) {
577  throw std::runtime_error(
578  "Strings must be dictionary-encoded for COUNT(DISTINCT).");
579  }
580  if (agg_info.agg_kind == kAPPROX_COUNT_DISTINCT && arg_ti.is_buffer()) {
581  throw std::runtime_error("APPROX_COUNT_DISTINCT on arrays not supported yet");
582  }
583  if (agg_info.agg_kind == kAPPROX_COUNT_DISTINCT && arg_ti.is_geometry()) {
584  throw std::runtime_error(
585  "APPROX_COUNT_DISTINCT on geometry columns not supported");
586  }
587  if (agg_info.is_distinct && arg_ti.is_geometry()) {
588  throw std::runtime_error("COUNT DISTINCT on geometry columns not supported");
589  }
590  ColRangeInfo no_range_info{QueryDescriptionType::Projection, 0, 0, 0, false};
591  auto arg_range_info =
592  arg_ti.is_fp() ? no_range_info
594  ra_exe_unit, query_infos, agg_expr->get_arg(), executor);
595  CountDistinctImplType count_distinct_impl_type{CountDistinctImplType::StdSet};
596  int64_t bitmap_sz_bits{0};
597  if (agg_info.agg_kind == kAPPROX_COUNT_DISTINCT) {
598  const auto error_rate = agg_expr->get_error_rate();
599  if (error_rate) {
600  CHECK(error_rate->get_type_info().get_type() == kINT);
601  CHECK_GE(error_rate->get_constval().intval, 1);
602  bitmap_sz_bits = hll_size_for_rate(error_rate->get_constval().smallintval);
603  } else {
604  bitmap_sz_bits = g_hll_precision_bits;
605  }
606  }
607  if (arg_range_info.isEmpty()) {
608  count_distinct_descriptors.emplace_back(
610  0,
611  64,
612  agg_info.agg_kind == kAPPROX_COUNT_DISTINCT,
613  device_type,
614  1});
615  continue;
616  }
617  if (arg_range_info.hash_type_ == QueryDescriptionType::GroupByPerfectHash &&
618  !(arg_ti.is_buffer() || arg_ti.is_geometry())) { // TODO(alex): allow bitmap
619  // implementation for arrays
620  count_distinct_impl_type = CountDistinctImplType::Bitmap;
621  if (agg_info.agg_kind == kCOUNT) {
622  bitmap_sz_bits = arg_range_info.max - arg_range_info.min + 1;
623  const int64_t MAX_BITMAP_BITS{8 * 1000 * 1000 * 1000LL};
624  if (bitmap_sz_bits <= 0 || bitmap_sz_bits > MAX_BITMAP_BITS) {
625  count_distinct_impl_type = CountDistinctImplType::StdSet;
626  }
627  }
628  }
629  if (agg_info.agg_kind == kAPPROX_COUNT_DISTINCT &&
630  count_distinct_impl_type == CountDistinctImplType::StdSet &&
631  !(arg_ti.is_array() || arg_ti.is_geometry())) {
632  count_distinct_impl_type = CountDistinctImplType::Bitmap;
633  }
634 
635  if (g_enable_watchdog && !(arg_range_info.isEmpty()) &&
636  count_distinct_impl_type == CountDistinctImplType::StdSet) {
637  throw WatchdogException("Cannot use a fast path for COUNT distinct");
638  }
639  const auto sub_bitmap_count =
640  get_count_distinct_sub_bitmap_count(bitmap_sz_bits, ra_exe_unit, device_type);
641  count_distinct_descriptors.emplace_back(
642  CountDistinctDescriptor{count_distinct_impl_type,
643  arg_range_info.min,
644  bitmap_sz_bits,
645  agg_info.agg_kind == kAPPROX_COUNT_DISTINCT,
646  device_type,
647  sub_bitmap_count});
648  } else {
649  count_distinct_descriptors.emplace_back(CountDistinctDescriptor{
650  CountDistinctImplType::Invalid, 0, 0, false, device_type, 0});
651  }
652  }
653  return count_distinct_descriptors;
654 }
std::vector< Analyzer::Expr * > target_exprs
ColRangeInfo get_expr_range_info(const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, const Analyzer::Expr *expr, Executor *executor)
bool g_enable_watchdog
int hll_size_for_rate(const int err_percent)
Definition: HyperLogLog.h:115
TargetInfo get_target_info(const PointerType target_expr, const bool bigint_count)
Definition: TargetInfo.h:79
#define CHECK_GE(x, y)
Definition: Logger.h:216
Expr * get_arg() const
Definition: Analyzer.h:1096
int g_hll_precision_bits
size_t get_count_distinct_sub_bitmap_count(const size_t bitmap_sz_bits, const RelAlgExecutionUnit &ra_exe_unit, const ExecutorDeviceType device_type)
std::vector< CountDistinctDescriptor > CountDistinctDescriptors
Definition: CountDistinct.h:35
bool g_bigint_count
bool is_distinct_target(const TargetInfo &target_info)
Definition: TargetInfo.h:130
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:78
Definition: sqldefs.h:76
CountDistinctImplType
#define CHECK(condition)
Definition: Logger.h:203
Definition: sqltypes.h:44

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool anonymous_namespace{GroupByAndAggregate.cpp}::is_column_range_too_big_for_perfect_hash ( const ColRangeInfo col_range_info,
const int64_t  max_entry_count 
)

Definition at line 108 of file GroupByAndAggregate.cpp.

References ColRangeInfo::max, and ColRangeInfo::min.

Referenced by GroupByAndAggregate::getColRangeInfo().

109  {
110  try {
111  return static_cast<int64_t>(checked_int64_t(col_range_info.max) -
112  checked_int64_t(col_range_info.min)) >= max_entry_count;
113  } catch (...) {
114  return true;
115  }
116 }
boost::multiprecision::number< boost::multiprecision::cpp_int_backend< 64, 64, boost::multiprecision::signed_magnitude, boost::multiprecision::checked, void >> checked_int64_t

+ Here is the caller graph for this function: