OmniSciDB  1dac507f6e
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
CardinalityEstimator.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2018 OmniSci, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "CardinalityEstimator.h"
18 #include "ErrorHandling.h"
19 #include "ExpressionRewrite.h"
20 #include "RelAlgExecutor.h"
21 
23  CHECK(dynamic_cast<const Analyzer::NDVEstimator*>(estimator_.get()));
24  CHECK(host_estimator_buffer_);
25  auto bits_set = bitmap_set_size(host_estimator_buffer_, estimator_->getBufferSize());
26  const auto total_bits = estimator_->getBufferSize() * 8;
27  CHECK_LE(bits_set, total_bits);
28  const auto unset_bits = total_bits - bits_set;
29  const auto ratio = static_cast<double>(unset_bits) / total_bits;
30  if (ratio == 0.) {
31  throw std::runtime_error("Failed to get a high quality cardinality estimation");
32  }
33  return -static_cast<double>(total_bits) * log(ratio);
34 }
35 
37  const bool is_agg,
38  const CompilationOptions& co,
39  const ExecutionOptions& eo) {
40  const auto estimator_exe_unit = create_ndv_execution_unit(work_unit.exe_unit);
41  size_t one{1};
42  ColumnCacheMap column_cache;
43  try {
44  const auto estimator_result =
45  executor_->executeWorkUnit(one,
46  is_agg,
48  estimator_exe_unit,
49  co,
50  eo,
51  cat_,
52  executor_->row_set_mem_owner_,
53  nullptr,
54  false,
55  column_cache);
56  if (!estimator_result) {
57  return 1;
58  }
59  return std::max(estimator_result->getNDVEstimator(), size_t(1));
60  } catch (const QueryExecutionError& e) {
62  throw std::runtime_error("Cardinality estimation query ran out of time");
63  }
65  throw std::runtime_error("Cardinality estimation query has been interrupted");
66  }
67  throw std::runtime_error("Failed to run the cardinality estimation query: " +
69  }
70  UNREACHABLE();
71  return 1;
72 }
73 
75  return {ra_exe_unit.input_descs,
76  ra_exe_unit.input_col_descs,
77  ra_exe_unit.simple_quals,
78  ra_exe_unit.quals,
79  ra_exe_unit.join_quals,
80  {},
81  {},
82  makeExpr<Analyzer::NDVEstimator>(ra_exe_unit.groupby_exprs),
84  0};
85 }
86 
88  const RelAlgExecutionUnit& ra_exe_unit,
89  std::shared_ptr<Analyzer::Expr> replacement_target) {
90  return {ra_exe_unit.input_descs,
91  ra_exe_unit.input_col_descs,
92  ra_exe_unit.simple_quals,
93  strip_join_covered_filter_quals(ra_exe_unit.quals, ra_exe_unit.join_quals),
94  ra_exe_unit.join_quals,
95  {},
96  {replacement_target.get()},
97  nullptr,
99  0};
100 }
101 
103  const RelAlgExecutionUnit& ra_exe_unit,
104  std::vector<std::pair<ResultSetPtr, std::vector<size_t>>>& results_per_device) {
105  if (results_per_device.empty()) {
106  return nullptr;
107  }
108  CHECK(dynamic_cast<const Analyzer::NDVEstimator*>(ra_exe_unit.estimator.get()));
109  const auto& result_set = results_per_device.front().first;
110  CHECK(result_set);
111  auto estimator_buffer = result_set->getHostEstimatorBuffer();
112  CHECK(estimator_buffer);
113  for (size_t i = 1; i < results_per_device.size(); ++i) {
114  const auto& next_result_set = results_per_device[i].first;
115  const auto other_estimator_buffer = next_result_set->getHostEstimatorBuffer();
116  for (size_t off = 0; off < ra_exe_unit.estimator->getBufferSize(); ++off) {
117  estimator_buffer[off] |= other_estimator_buffer[off];
118  }
119  }
120  return std::move(result_set);
121 }
bool is_agg(const Analyzer::Expr *expr)
int32_t getErrorCode() const
Definition: ErrorHandling.h:55
static const int32_t ERR_INTERRUPTED
Definition: Execute.h:1042
RelAlgExecutionUnit exe_unit
#define UNREACHABLE()
Definition: Logger.h:234
std::shared_ptr< ResultSet > ResultSetPtr
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
RelAlgExecutionUnit create_ndv_execution_unit(const RelAlgExecutionUnit &ra_exe_unit)
size_t getNDVEstimation(const WorkUnit &work_unit, const bool is_agg, const CompilationOptions &co, const ExecutionOptions &eo)
const std::vector< InputDescriptor > input_descs
std::list< std::shared_ptr< Analyzer::Expr > > strip_join_covered_filter_quals(const std::list< std::shared_ptr< Analyzer::Expr >> &quals, const JoinQualsPerNestingLevel &join_quals)
RelAlgExecutionUnit create_count_all_execution_unit(const RelAlgExecutionUnit &ra_exe_unit, std::shared_ptr< Analyzer::Expr > replacement_target)
CHECK(cgen_state)
const JoinQualsPerNestingLevel join_quals
const Catalog_Namespace::Catalog & cat_
static const int32_t ERR_OUT_OF_TIME
Definition: Execute.h:1041
const std::shared_ptr< Analyzer::Estimator > estimator
#define CHECK_LE(x, y)
Definition: Logger.h:201
size_t getNDVEstimator() const
std::list< std::shared_ptr< Analyzer::Expr > > quals
std::vector< InputTableInfo > get_table_infos(const std::vector< InputDescriptor > &input_descs, Executor *executor)
Estimators to be used when precise cardinality isn&#39;t useful.
static std::string getErrorMessageFromCode(const int32_t error_code)
std::unordered_map< int, std::unordered_map< int, std::shared_ptr< const ColumnarResults > > > ColumnCacheMap
ResultSetPtr reduce_estimator_results(const RelAlgExecutionUnit &ra_exe_unit, std::vector< std::pair< ResultSetPtr, std::vector< size_t >>> &results_per_device)
std::list< std::shared_ptr< const InputColDescriptor > > input_col_descs
Executor * executor_
size_t bitmap_set_size(const int8_t *bitmap, const size_t bitmap_byte_sz)
Definition: CountDistinct.h:37
std::list< std::shared_ptr< Analyzer::Expr > > simple_quals