OmniSciDB  471d68cefb
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
CardinalityEstimator.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2018 OmniSci, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "CardinalityEstimator.h"
18 #include "ErrorHandling.h"
19 #include "ExpressionRewrite.h"
20 #include "RelAlgExecutor.h"
21 
22 int64_t g_large_ndv_threshold = 10000000;
24 
25 namespace Analyzer {
26 
28  return 1024 * 1024 * g_large_ndv_multiplier;
29 }
30 
31 } // namespace Analyzer
32 
34  CHECK(dynamic_cast<const Analyzer::NDVEstimator*>(estimator_.get()));
35  CHECK(host_estimator_buffer_);
36  auto bits_set = bitmap_set_size(host_estimator_buffer_, estimator_->getBufferSize());
37  if (bits_set == 0) {
38  // empty result set, return 1 for a groups buffer size of 1
39  return 1;
40  }
41  const auto total_bits = estimator_->getBufferSize() * 8;
42  CHECK_LE(bits_set, total_bits);
43  const auto unset_bits = total_bits - bits_set;
44  const auto ratio = static_cast<double>(unset_bits) / total_bits;
45  if (ratio == 0.) {
46  LOG(WARNING)
47  << "Failed to get a high quality cardinality estimation, falling back to "
48  "approximate group by buffer size guess.";
49  return 0;
50  }
51  return -static_cast<double>(total_bits) * log(ratio);
52 }
53 
55  const int64_t range,
56  const bool is_agg,
57  const CompilationOptions& co,
58  const ExecutionOptions& eo) {
59  const auto estimator_exe_unit = create_ndv_execution_unit(work_unit.exe_unit, range);
60  size_t one{1};
61  ColumnCacheMap column_cache;
62  try {
63  const auto estimator_result =
64  executor_->executeWorkUnit(one,
65  is_agg,
67  estimator_exe_unit,
68  co,
69  eo,
70  cat_,
71  nullptr,
72  false,
73  column_cache);
74  if (!estimator_result) {
75  return 1; // empty row set, only needs one slot
76  }
77  return estimator_result->getNDVEstimator();
78  } catch (const QueryExecutionError& e) {
80  throw std::runtime_error("Cardinality estimation query ran out of time");
81  }
83  throw std::runtime_error("Cardinality estimation query has been interrupted");
84  }
85  throw std::runtime_error("Failed to run the cardinality estimation query: " +
87  }
88  UNREACHABLE();
89  return 0;
90 }
91 
93  const int64_t range) {
94  const bool use_large_estimator =
95  range > g_large_ndv_threshold || ra_exe_unit.groupby_exprs.size() > 1;
96  return {ra_exe_unit.input_descs,
97  ra_exe_unit.input_col_descs,
98  ra_exe_unit.simple_quals,
99  ra_exe_unit.quals,
100  ra_exe_unit.join_quals,
101  {},
102  {},
103  use_large_estimator
104  ? makeExpr<Analyzer::LargeNDVEstimator>(ra_exe_unit.groupby_exprs)
105  : makeExpr<Analyzer::NDVEstimator>(ra_exe_unit.groupby_exprs),
106  SortInfo{{}, SortAlgorithm::Default, 0, 0},
107  0,
108  ra_exe_unit.query_hint,
109  ra_exe_unit.query_plan_dag,
110  ra_exe_unit.hash_table_build_plan_dag,
111  ra_exe_unit.table_id_to_node_map,
112  false,
113  ra_exe_unit.union_all,
114  ra_exe_unit.query_state};
115 }
116 
118  const RelAlgExecutionUnit& ra_exe_unit,
119  std::shared_ptr<Analyzer::Expr> replacement_target) {
120  return {ra_exe_unit.input_descs,
121  ra_exe_unit.input_col_descs,
122  ra_exe_unit.simple_quals,
123  strip_join_covered_filter_quals(ra_exe_unit.quals, ra_exe_unit.join_quals),
124  ra_exe_unit.join_quals,
125  {},
126  {replacement_target.get()},
127  nullptr,
128  SortInfo{{}, SortAlgorithm::Default, 0, 0},
129  0,
130  ra_exe_unit.query_hint,
131  ra_exe_unit.query_plan_dag,
132  ra_exe_unit.hash_table_build_plan_dag,
133  ra_exe_unit.table_id_to_node_map,
134  false,
135  ra_exe_unit.union_all,
136  ra_exe_unit.query_state};
137 }
138 
140  const RelAlgExecutionUnit& ra_exe_unit,
141  std::vector<std::pair<ResultSetPtr, std::vector<size_t>>>& results_per_device) {
142  if (results_per_device.empty()) {
143  return nullptr;
144  }
145  CHECK(dynamic_cast<const Analyzer::NDVEstimator*>(ra_exe_unit.estimator.get()));
146  const auto& result_set = results_per_device.front().first;
147  CHECK(result_set);
148  auto estimator_buffer = result_set->getHostEstimatorBuffer();
149  CHECK(estimator_buffer);
150  for (size_t i = 1; i < results_per_device.size(); ++i) {
151  const auto& next_result_set = results_per_device[i].first;
152  const auto other_estimator_buffer = next_result_set->getHostEstimatorBuffer();
153  for (size_t off = 0; off < ra_exe_unit.estimator->getBufferSize(); ++off) {
154  estimator_buffer[off] |= other_estimator_buffer[off];
155  }
156  }
157  return std::move(result_set);
158 }
size_t getBufferSize() const final
bool is_agg(const Analyzer::Expr *expr)
int64_t g_large_ndv_threshold
int32_t getErrorCode() const
Definition: ErrorHandling.h:55
static const int32_t ERR_INTERRUPTED
Definition: Execute.h:1163
const std::optional< bool > union_all
#define LOG(tag)
Definition: Logger.h:203
RelAlgExecutionUnit exe_unit
std::vector< InputDescriptor > input_descs
#define UNREACHABLE()
Definition: Logger.h:253
std::shared_ptr< ResultSet > ResultSetPtr
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
std::list< std::shared_ptr< Analyzer::Expr > > strip_join_covered_filter_quals(const std::list< std::shared_ptr< Analyzer::Expr >> &quals, const JoinQualsPerNestingLevel &join_quals)
RelAlgExecutionUnit create_count_all_execution_unit(const RelAlgExecutionUnit &ra_exe_unit, std::shared_ptr< Analyzer::Expr > replacement_target)
const JoinQualsPerNestingLevel join_quals
const Catalog_Namespace::Catalog & cat_
TableIdToNodeMap table_id_to_node_map
static const int32_t ERR_OUT_OF_TIME
Definition: Execute.h:1162
size_t getNDVEstimation(const WorkUnit &work_unit, const int64_t range, const bool is_agg, const CompilationOptions &co, const ExecutionOptions &eo)
const std::shared_ptr< Analyzer::Estimator > estimator
std::unordered_map< int, std::unordered_map< int, std::shared_ptr< const ColumnarResults >>> ColumnCacheMap
#define CHECK_LE(x, y)
Definition: Logger.h:220
size_t getNDVEstimator() const
std::list< std::shared_ptr< Analyzer::Expr > > quals
RelAlgExecutionUnit create_ndv_execution_unit(const RelAlgExecutionUnit &ra_exe_unit, const int64_t range)
RegisteredQueryHint query_hint
#define CHECK(condition)
Definition: Logger.h:209
std::vector< InputTableInfo > get_table_infos(const std::vector< InputDescriptor > &input_descs, Executor *executor)
Estimators to be used when precise cardinality isn&#39;t useful.
static std::string getErrorMessageFromCode(const int32_t error_code)
std::shared_ptr< const query_state::QueryState > query_state
ResultSetPtr reduce_estimator_results(const RelAlgExecutionUnit &ra_exe_unit, std::vector< std::pair< ResultSetPtr, std::vector< size_t >>> &results_per_device)
std::list< std::shared_ptr< const InputColDescriptor > > input_col_descs
Executor * executor_
size_t bitmap_set_size(const int8_t *bitmap, const size_t bitmap_byte_sz)
Definition: CountDistinct.h:37
std::list< std::shared_ptr< Analyzer::Expr > > simple_quals
size_t g_large_ndv_multiplier
HashTableBuildDagMap hash_table_build_plan_dag