OmniSciDB  2e3a973ef4
CardinalityEstimator.h
Go to the documentation of this file.
1 /*
2  * Copyright 2017 MapD Technologies, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
25 #ifndef QUERYENGINE_CARDINALITYESTIMATOR_H
26 #define QUERYENGINE_CARDINALITYESTIMATOR_H
27 
28 #include "RelAlgExecutionUnit.h"
29 
30 #include "../Analyzer/Analyzer.h"
31 #include "Logger/Logger.h"
32 
33 class CardinalityEstimationRequired : public std::runtime_error {
34  public:
36  : std::runtime_error("CardinalityEstimationRequired"), range_(range) {}
37 
38  int64_t range() const { return range_; }
39 
40  private:
41  const int64_t range_;
42 };
43 
44 namespace Analyzer {
45 
46 /*
47  * @type Estimator
48  * @brief Infrastructure to define estimators which take an expression tuple, are called
49  * for every row and need a buffer to track state.
50  */
51 class Estimator : public Analyzer::Expr {
52  public:
53  Estimator() : Expr(SQLTypeInfo(kINT, true)){};
54 
55  // The tuple argument received by the estimator for every row.
56  virtual const std::list<std::shared_ptr<Analyzer::Expr>>& getArgument() const = 0;
57 
58  // The size of the working buffer used by the estimator.
59  virtual size_t getBufferSize() const = 0;
60 
61  // The name for the estimator runtime function which is called for every row.
62  // The runtime function will receive four arguments:
63  // uint8_t* the pointer to the beginning of the estimator buffer
64  // uint32_t the size of the estimator buffer, in bytes
65  // uint8_t* the concatenated bytes for the argument tuple
66  // uint32_t the size of the argument tuple, in bytes
67  virtual std::string getRuntimeFunctionName() const = 0;
68 
69  std::shared_ptr<Analyzer::Expr> deep_copy() const override {
70  CHECK(false);
71  return nullptr;
72  }
73 
74  bool operator==(const Expr& rhs) const override {
75  CHECK(false);
76  return false;
77  }
78 
79  std::string toString() const override {
80  CHECK(false);
81  return "";
82  }
83 };
84 
85 /*
86  * @type NDVEstimator
87  * @brief Provides an estimate for the number of distinct tuples. Not a real
88  * Analyzer expression, it's only used in RelAlgExecutionUnit synthesized
89  * for the cardinality estimation before running an user-provided query.
90  */
92  public:
93  NDVEstimator(const std::list<std::shared_ptr<Analyzer::Expr>>& expr_tuple)
94  : expr_tuple_(expr_tuple) {}
95 
96  const std::list<std::shared_ptr<Analyzer::Expr>>& getArgument() const override {
97  return expr_tuple_;
98  }
99 
100  size_t getBufferSize() const override { return 1024 * 1024; }
101 
102  std::string getRuntimeFunctionName() const override {
103  return "linear_probabilistic_count";
104  }
105 
106  private:
107  const std::list<std::shared_ptr<Analyzer::Expr>> expr_tuple_;
108 };
109 
111  public:
112  LargeNDVEstimator(const std::list<std::shared_ptr<Analyzer::Expr>>& expr_tuple)
113  : NDVEstimator(expr_tuple) {}
114 
115  size_t getBufferSize() const final;
116 };
117 
118 } // namespace Analyzer
119 
121  const int64_t range);
122 
124  const RelAlgExecutionUnit& ra_exe_unit,
125  std::shared_ptr<Analyzer::Expr> replacement_target);
126 
128  const RelAlgExecutionUnit& ra_exe_unit,
129  std::vector<std::pair<ResultSetPtr, std::vector<size_t>>>& results_per_device);
130 
131 #endif // QUERYENGINE_CARDINALITYESTIMATOR_H
const std::list< std::shared_ptr< Analyzer::Expr > > expr_tuple_
const std::list< std::shared_ptr< Analyzer::Expr > > & getArgument() const override
LargeNDVEstimator(const std::list< std::shared_ptr< Analyzer::Expr >> &expr_tuple)
std::shared_ptr< Analyzer::Expr > deep_copy() const override
CardinalityEstimationRequired(const int64_t range)
std::string getRuntimeFunctionName() const override
bool operator==(const Expr &rhs) const override
std::shared_ptr< ResultSet > ResultSetPtr
ResultSetPtr reduce_estimator_results(const RelAlgExecutionUnit &ra_exe_unit, std::vector< std::pair< ResultSetPtr, std::vector< size_t >>> &results_per_device)
NDVEstimator(const std::list< std::shared_ptr< Analyzer::Expr >> &expr_tuple)
RelAlgExecutionUnit create_count_all_execution_unit(const RelAlgExecutionUnit &ra_exe_unit, std::shared_ptr< Analyzer::Expr > replacement_target)
size_t getBufferSize() const override
#define CHECK(condition)
Definition: Logger.h:197
RelAlgExecutionUnit create_ndv_execution_unit(const RelAlgExecutionUnit &ra_exe_unit, const int64_t range)
Definition: sqltypes.h:47
std::string toString() const override
Execution unit for relational algebra. It&#39;s a low-level description of any relational algebra operati...