OmniSciDB  72c90bc290
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
CardinalityEstimator.h
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
23 #ifndef QUERYENGINE_CARDINALITYESTIMATOR_H
24 #define QUERYENGINE_CARDINALITYESTIMATOR_H
25 
26 #include "RelAlgExecutionUnit.h"
27 
28 #include "../Analyzer/Analyzer.h"
29 #include "Logger/Logger.h"
30 
31 class CardinalityEstimationRequired : public std::runtime_error {
32  public:
34  : std::runtime_error("CardinalityEstimationRequired"), range_(range) {}
35 
36  int64_t range() const { return range_; }
37 
38  private:
39  const int64_t range_;
40 };
41 
42 namespace Analyzer {
43 
44 /*
45  * @type Estimator
46  * @brief Infrastructure to define estimators which take an expression tuple, are called
47  * for every row and need a buffer to track state.
48  */
49 class Estimator : public Analyzer::Expr {
50  public:
52 
53  // The tuple argument received by the estimator for every row.
54  virtual const std::list<std::shared_ptr<Analyzer::Expr>>& getArgument() const = 0;
55 
56  // The size of the working buffer used by the estimator.
57  virtual size_t getBufferSize() const = 0;
58 
59  // The name for the estimator runtime function which is called for every row.
60  // The runtime function will receive four arguments:
61  // uint8_t* the pointer to the beginning of the estimator buffer
62  // uint32_t the size of the estimator buffer, in bytes
63  // uint8_t* the concatenated bytes for the argument tuple
64  // uint32_t the size of the argument tuple, in bytes
65  virtual std::string getRuntimeFunctionName() const = 0;
66 
67  std::shared_ptr<Analyzer::Expr> deep_copy() const override {
68  CHECK(false);
69  return nullptr;
70  }
71 
72  bool operator==(const Expr& rhs) const override {
73  CHECK(false);
74  return false;
75  }
76 
77  std::string toString() const override {
78  CHECK(false);
79  return "";
80  }
81 };
82 
83 /*
84  * @type NDVEstimator
85  * @brief Provides an estimate for the number of distinct tuples. Not a real
86  * Analyzer expression, it's only used in RelAlgExecutionUnit synthesized
87  * for the cardinality estimation before running an user-provided query.
88  */
90  public:
91  NDVEstimator(const std::list<std::shared_ptr<Analyzer::Expr>>& expr_tuple)
92  : expr_tuple_(expr_tuple) {}
93 
94  const std::list<std::shared_ptr<Analyzer::Expr>>& getArgument() const override {
95  return expr_tuple_;
96  }
97 
98  size_t getBufferSize() const override { return 1024 * 1024; }
99 
100  std::string getRuntimeFunctionName() const override {
101  return "linear_probabilistic_count";
102  }
103 
104  private:
105  const std::list<std::shared_ptr<Analyzer::Expr>> expr_tuple_;
106 };
107 
109  public:
110  LargeNDVEstimator(const std::list<std::shared_ptr<Analyzer::Expr>>& expr_tuple)
111  : NDVEstimator(expr_tuple) {}
112 
113  size_t getBufferSize() const final;
114 };
115 
116 } // namespace Analyzer
117 
119  const RelAlgExecutionUnit& ra_exe_unit,
120  std::vector<std::pair<ResultSetPtr, std::vector<size_t>>>& results_per_device);
121 
122 #endif // QUERYENGINE_CARDINALITYESTIMATOR_H
size_t getBufferSize() const final
const std::list< std::shared_ptr< Analyzer::Expr > > expr_tuple_
const std::list< std::shared_ptr< Analyzer::Expr > > & getArgument() const override
LargeNDVEstimator(const std::list< std::shared_ptr< Analyzer::Expr >> &expr_tuple)
std::shared_ptr< Analyzer::Expr > deep_copy() const override
CardinalityEstimationRequired(const int64_t range)
virtual std::string getRuntimeFunctionName() const =0
std::string getRuntimeFunctionName() const override
bool operator==(const Expr &rhs) const override
std::shared_ptr< ResultSet > ResultSetPtr
NDVEstimator(const std::list< std::shared_ptr< Analyzer::Expr >> &expr_tuple)
bool g_enable_smem_group_by true
virtual size_t getBufferSize() const =0
size_t getBufferSize() const override
#define CHECK(condition)
Definition: Logger.h:291
ResultSetPtr reduce_estimator_results(const RelAlgExecutionUnit &ra_exe_unit, std::vector< std::pair< ResultSetPtr, std::vector< size_t >>> &results_per_device)
Definition: sqltypes.h:72
std::string toString() const override
Execution unit for relational algebra. It&#39;s a low-level description of any relational algebra operati...
virtual const std::list< std::shared_ptr< Analyzer::Expr > > & getArgument() const =0