OmniSciDB  1dac507f6e
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Planner.h
Go to the documentation of this file.
1 /*
2  * Copyright 2017 MapD Technologies, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
24 #ifndef PLANNER_H
25 #define PLANNER_H
26 
27 #include <cstdint>
28 #include <list>
29 #include <map>
30 #include <string>
31 #include <vector>
32 #include "../Analyzer/Analyzer.h"
33 
34 namespace Planner {
35 /*
36  * @type Plan
37  * @brief super class for all plan nodes
38  */
39 class Plan {
40  public:
41  Plan(const std::vector<std::shared_ptr<Analyzer::TargetEntry>>& t,
42  const std::list<std::shared_ptr<Analyzer::Expr>>& q,
43  double c,
44  Plan* p)
45  : targetlist(t), quals(q), cost(c), child_plan(p) {}
46  Plan(const std::vector<std::shared_ptr<Analyzer::TargetEntry>>& t, double c, Plan* p)
47  : targetlist(t), cost(c), child_plan(p) {}
48  Plan() : cost(0.0), child_plan(nullptr) {}
49  Plan(const std::vector<std::shared_ptr<Analyzer::TargetEntry>>& t)
50  : targetlist(t), cost(0.0), child_plan(nullptr) {}
51  virtual ~Plan() {}
52  const std::vector<std::shared_ptr<Analyzer::TargetEntry>>& get_targetlist() const {
53  return targetlist;
54  }
55  const std::list<std::shared_ptr<Analyzer::Expr>>& get_quals() const { return quals; }
56  double get_cost() const { return cost; }
57  const Plan* get_child_plan() const { return child_plan.get(); }
58  void add_tle(std::shared_ptr<Analyzer::TargetEntry> tle) { targetlist.push_back(tle); }
59  void set_targetlist(const std::vector<std::shared_ptr<Analyzer::TargetEntry>>& t) {
60  targetlist = t;
61  }
62  virtual void print() const;
63 
64  protected:
65  std::vector<std::shared_ptr<Analyzer::TargetEntry>>
66  targetlist; // projection of this plan node
67  std::list<std::shared_ptr<Analyzer::Expr>>
68  quals; // list of boolean expressions, implicitly conjunctive
69  double cost; // Planner assigned cost for optimization purpose
70  std::unique_ptr<Plan> child_plan; // most plan nodes have at least one child, therefore
71  // keep it in super class
72 };
73 
74 /*
75  * @type Result
76  * @brief Result node is for evaluating constant predicates, e.g., 1 < 2 or $1 = 'foo'
77  * It is also used to perform further projections and qualifications
78  * from the child_plan. One use case is to eliminate columns that
79  * are required by the child_plan but not in the final targetlist
80  * , e.g., group by columns that are not selected. Another use case is
81  * to evaluate expressions over group by columns and aggregates in the targetlist.
82  * A 3rd use case is to process the HAVING clause for filtering
83  * rows produced by AggPlan.
84  */
85 class Result : public Plan {
86  public:
87  Result(std::vector<std::shared_ptr<Analyzer::TargetEntry>>& t,
88  const std::list<std::shared_ptr<Analyzer::Expr>>& q,
89  double c,
90  Plan* p,
91  const std::list<std::shared_ptr<Analyzer::Expr>>& cq)
92  : Plan(t, q, c, p), const_quals(cq) {}
93  const std::list<std::shared_ptr<Analyzer::Expr>>& get_constquals() const {
94  return const_quals;
95  }
96  void print() const override;
97 
98  private:
99  std::list<std::shared_ptr<Analyzer::Expr>>
100  const_quals; // constant quals to evaluate only once
101 };
102 
103 /*
104  * @type Scan
105  * @brief Scan node is for scanning a table or rowset.
106  */
107 class Scan : public Plan {
108  public:
109  Scan(const std::vector<std::shared_ptr<Analyzer::TargetEntry>>& t,
110  const std::list<std::shared_ptr<Analyzer::Expr>>& q,
111  double c,
112  Plan* p,
113  std::list<std::shared_ptr<Analyzer::Expr>>& sq,
114  int r,
115  const std::list<int>& cl)
116  : Plan(t, q, c, p), simple_quals(sq), table_id(r), col_list(cl) {}
117  Scan(const Analyzer::RangeTableEntry& rte);
118  const std::list<std::shared_ptr<Analyzer::Expr>>& get_simple_quals() const {
119  return simple_quals;
120  };
121  int get_table_id() const { return table_id; }
122  const std::list<int>& get_col_list() const { return col_list; }
123  void add_predicate(std::shared_ptr<Analyzer::Expr> pred) { quals.push_back(pred); }
124  void add_simple_predicate(std::shared_ptr<Analyzer::Expr> pred) {
125  simple_quals.push_back(pred);
126  }
127  void print() const override;
128 
129  private:
130  // simple_quals consists of predicates of the form 'ColumnVar BinOper Constant'
131  // it can be used for eliminating fragments and/or partitions from the scan.
132  std::list<std::shared_ptr<Analyzer::Expr>> simple_quals;
133  int table_id; // rangetable entry index for the table to scan
134  std::list<int> col_list; // list of column ids to scan
135 };
136 
137 /*
138  * @type ValuesScan
139  * @brief ValuesScan returns a row from a list of values.
140  * It is used for processing INSERT INTO tab VALUES (...)
141  */
142 class ValuesScan : public Plan {
143  public:
144  ValuesScan(const std::vector<std::shared_ptr<Analyzer::TargetEntry>>& t) : Plan(t) {}
145  void print() const override;
146 };
147 
148 /*
149  * @type Join
150  * @brief super class for all join nodes.
151  */
152 class Join : public Plan {
153  public:
154  Join(const std::vector<std::shared_ptr<Analyzer::TargetEntry>>& t,
155  const std::list<std::shared_ptr<Analyzer::Expr>>& q,
156  double c,
157  Plan* p,
158  Plan* cp2)
159  : Plan(t, q, c, p), child_plan2(cp2) {}
160  void print() const override;
161  const Plan* get_outerplan() const { return child_plan.get(); }
162  const Plan* get_innerplan() const { return child_plan2.get(); }
163 
164  private:
165  std::unique_ptr<Plan> child_plan2;
166 };
167 
168 /*
169  * @type AggPlan
170  * @brief AggPlan handles aggregate functions and group by.
171  */
172 class AggPlan : public Plan {
173  public:
174  AggPlan(const std::vector<std::shared_ptr<Analyzer::TargetEntry>>& t,
175  double c,
176  Plan* p,
177  const std::list<std::shared_ptr<Analyzer::Expr>>& gl)
178  : Plan(t, c, p), groupby_list(gl) {}
179  const std::list<std::shared_ptr<Analyzer::Expr>>& get_groupby_list() const {
180  return groupby_list;
181  }
183  const std::list<std::shared_ptr<Analyzer::Expr>>& new_groupby_list) {
184  groupby_list = new_groupby_list;
185  }
186  void print() const override;
187 
188  private:
189  std::list<std::shared_ptr<Analyzer::Expr>>
190  groupby_list; // list of expressions for group by. only Var nodes are allow now.
191 };
192 
193 /*
194  * @type Append
195  * @brief Append evaluates a list of query plans one by one and simply appends all rows
196  * to result set. It is for processing UNION ALL queries.
197  */
198 class Append : public Plan {
199  public:
200  Append(const std::vector<std::shared_ptr<Analyzer::TargetEntry>>& t,
201  const std::list<std::shared_ptr<Analyzer::Expr>>& q,
202  double c,
203  Plan* p,
204  std::list<std::unique_ptr<Plan>>& pl)
205  : Plan(t, q, c, p), plan_list(std::move(pl)) {}
206  const std::list<std::unique_ptr<Plan>>& get_plan_list() const { return plan_list; }
207  void print() const override;
208 
209  private:
210  std::list<std::unique_ptr<Plan>> plan_list; // list of plans to union all
211 };
212 
213 /*
214  * @type MergeAppend
215  * @brief MergeAppend merges sorted streams of rows and eliminate duplicates.
216  * It is for processing UNION queries.
217  */
218 class MergeAppend : public Plan {
219  public:
220  MergeAppend(const std::vector<std::shared_ptr<Analyzer::TargetEntry>>& t,
221  const std::list<std::shared_ptr<Analyzer::Expr>>& q,
222  double c,
223  Plan* p,
224  std::list<std::unique_ptr<Plan>>& pl,
225  const std::list<Analyzer::OrderEntry>& oe)
226  : Plan(t, q, c, p), mergeplan_list(std::move(pl)), order_entries(oe) {}
227  const std::list<std::unique_ptr<Plan>>& get_mergeplan_list() const {
228  return mergeplan_list;
229  }
230  const std::list<Analyzer::OrderEntry>& get_order_entries() const {
231  return order_entries;
232  }
233  void print() const override;
234 
235  private:
236  std::list<std::unique_ptr<Plan>> mergeplan_list; // list of plans to merge
237  std::list<Analyzer::OrderEntry> order_entries; // defines how the mergeplans are sorted
238 };
239 
240 /*
241  * @type Sort
242  * @brief Sort node returns rows from the child_plan sorted by selected columns.
243  * It handles the Order By clause and is used for mergejoin and for remove duplicates.
244  */
245 class Sort : public Plan {
246  public:
247  Sort(const std::vector<std::shared_ptr<Analyzer::TargetEntry>>& t,
248  double c,
249  Plan* p,
250  const std::list<Analyzer::OrderEntry>& oe,
251  bool d)
252  : Plan(t, c, p), order_entries(oe), remove_duplicates(d) {}
253  const std::list<Analyzer::OrderEntry>& get_order_entries() const {
254  return order_entries;
255  }
256  bool get_remove_duplicates() const { return remove_duplicates; }
257  void print() const override;
258 
259  private:
260  std::list<Analyzer::OrderEntry>
261  order_entries; // defines columns to sort on and in what order
263 };
264 
265 /*
266  * @type RootPlan
267  * @brief RootPlan is the end result produced by the Planner for the Executor to execute.
268  * @TODO add support for parametrized queries.
269  */
270 class RootPlan {
271  public:
274  SQLStmtType t,
275  int r,
276  const std::list<int>& c,
277  const Catalog_Namespace::Catalog& cat,
278  int64_t l,
279  int64_t o)
280  : plan(p)
281  , stmt_type(t)
282  , result_table_id(r)
283  , result_col_list(c)
284  , catalog(cat)
285  , limit(l)
286  , offset(o)
287  , render_type("NONE")
288  , plan_dest(kCLIENT) {}
289  const Plan* get_plan() const { return plan.get(); }
290  SQLStmtType get_stmt_type() const { return stmt_type; }
291  int get_result_table_id() const { return result_table_id; }
292  const std::list<int>& get_result_col_list() const { return result_col_list; }
293  const Catalog_Namespace::Catalog& getCatalog() const { return catalog; }
294  virtual void print() const;
295  int64_t get_limit() const { return limit; }
296  int64_t get_offset() const { return offset; }
297  const std::string& get_render_type() const { return render_type; }
298  void set_render_type(std::string t) { render_type = t; }
299  Dest get_plan_dest() const { return plan_dest; }
300  void set_plan_dest(Dest d) { plan_dest = d; }
301  virtual ~RootPlan() {}
302 
303  private:
304  std::unique_ptr<Plan> plan; // query plan
305  SQLStmtType stmt_type; // SELECT, UPDATE, DELETE or INSERT
306  int result_table_id; // For UPDATE, DELETE or INSERT only: table id for the result
307  // table
308  std::list<int>
309  result_col_list; // For UPDATE and INSERT only: list of result column ids.
311  catalog; // include the current catalog here for the executor
312  int64_t limit; // limit from LIMIT clause. 0 means ALL
313  int64_t offset; // offset from OFFSET clause. 0 means no offset.
314  std::string render_type;
316 };
317 
318 /*
319  * @type Optimizer
320  * @brief This is the class for performing query optimizations.
321  */
322 class Optimizer {
323  public:
326  /*
327  * @brief optimize optimize an entire SQL DML statement
328  */
329  RootPlan* optimize();
330 
331  private:
332  /*
333  * @brief optimize_query optimize the query portion of the statement. can be a union
334  * query
335  */
336  Plan* optimize_query();
337  /*
338  * @brief optimize_current_query optimize cur_query and output plan to cur_plan.
339  * must be a non-union query.
340  */
341  void optimize_current_query();
342  void optimize_scans();
343  void optimize_aggs();
344  void optimize_orderby();
345  void process_targetlist();
346  std::vector<Scan*> base_scans;
347  std::list<const Analyzer::Expr*> join_predicates;
348  std::list<const Analyzer::Expr*> const_predicates;
353 };
354 } // namespace Planner
355 
356 #endif // PLANNER_H
void print() const override
Definition: Planner.cpp:439
void set_plan_dest(Dest d)
Definition: Planner.h:300
void add_tle(std::shared_ptr< Analyzer::TargetEntry > tle)
Definition: Planner.h:58
const Plan * get_plan() const
Definition: Planner.h:289
void add_simple_predicate(std::shared_ptr< Analyzer::Expr > pred)
Definition: Planner.h:124
const std::list< int > & get_col_list() const
Definition: Planner.h:122
const Analyzer::Query & query
Definition: Planner.h:351
bool get_remove_duplicates() const
Definition: Planner.h:256
MergeAppend(const std::vector< std::shared_ptr< Analyzer::TargetEntry >> &t, const std::list< std::shared_ptr< Analyzer::Expr >> &q, double c, Plan *p, std::list< std::unique_ptr< Plan >> &pl, const std::list< Analyzer::OrderEntry > &oe)
Definition: Planner.h:220
int get_result_table_id() const
Definition: Planner.h:291
class for a per-database catalog. also includes metadata for the current database and the current use...
Definition: Catalog.h:81
double get_cost() const
Definition: Planner.h:56
const Plan * get_child_plan() const
Definition: Planner.h:57
std::list< Analyzer::OrderEntry > order_entries
Definition: Planner.h:237
virtual ~RootPlan()
Definition: Planner.h:301
const Catalog_Namespace::Catalog & catalog
Definition: Planner.h:311
SQLStmtType stmt_type
Definition: Planner.h:305
std::unique_ptr< Plan > child_plan
Definition: Planner.h:70
std::list< std::shared_ptr< Analyzer::Expr > > const_quals
Definition: Planner.h:100
Plan(const std::vector< std::shared_ptr< Analyzer::TargetEntry >> &t, const std::list< std::shared_ptr< Analyzer::Expr >> &q, double c, Plan *p)
Definition: Planner.h:41
int64_t get_offset() const
Definition: Planner.h:296
SQLStmtType get_stmt_type() const
Definition: Planner.h:290
double cost
Definition: Planner.h:69
void add_predicate(std::shared_ptr< Analyzer::Expr > pred)
Definition: Planner.h:123
void process_targetlist()
Definition: Planner.cpp:330
std::list< int > result_col_list
Definition: Planner.h:309
Join(const std::vector< std::shared_ptr< Analyzer::TargetEntry >> &t, const std::list< std::shared_ptr< Analyzer::Expr >> &q, double c, Plan *p, Plan *cp2)
Definition: Planner.h:154
const std::list< std::shared_ptr< Analyzer::Expr > > & get_constquals() const
Definition: Planner.h:93
const Catalog_Namespace::Catalog & getCatalog() const
Definition: Planner.h:293
const Analyzer::Query * cur_query
Definition: Planner.h:349
void print() const override
Definition: Planner.cpp:431
Plan(const std::vector< std::shared_ptr< Analyzer::TargetEntry >> &t)
Definition: Planner.h:49
std::list< std::unique_ptr< Plan > > mergeplan_list
Definition: Planner.h:236
const std::list< std::unique_ptr< Plan > > & get_mergeplan_list() const
Definition: Planner.h:227
void print() const override
Definition: Planner.cpp:412
void optimize_scans()
Definition: Planner.cpp:135
Sort(const std::vector< std::shared_ptr< Analyzer::TargetEntry >> &t, double c, Plan *p, const std::list< Analyzer::OrderEntry > &oe, bool d)
Definition: Planner.h:247
void optimize_aggs()
Definition: Planner.cpp:215
const std::list< Analyzer::OrderEntry > & get_order_entries() const
Definition: Planner.h:253
virtual void print() const
Definition: Planner.cpp:450
std::vector< std::shared_ptr< Analyzer::TargetEntry > > targetlist
Definition: Planner.h:66
int64_t offset
Definition: Planner.h:313
RootPlan * optimize()
Definition: Planner.cpp:43
const std::vector< std::shared_ptr< Analyzer::TargetEntry > > & get_targetlist() const
Definition: Planner.h:52
void print() const override
Definition: Planner.cpp:380
const std::list< std::unique_ptr< Plan > > & get_plan_list() const
Definition: Planner.h:206
const std::list< int > & get_result_col_list() const
Definition: Planner.h:292
std::list< std::unique_ptr< Plan > > plan_list
Definition: Planner.h:210
std::unique_ptr< Plan > plan
Definition: Planner.h:304
virtual ~Plan()
Definition: Planner.h:51
const int32_t groups_buffer_size return nullptr
void set_targetlist(const std::vector< std::shared_ptr< Analyzer::TargetEntry >> &t)
Definition: Planner.h:59
Scan(const std::vector< std::shared_ptr< Analyzer::TargetEntry >> &t, const std::list< std::shared_ptr< Analyzer::Expr >> &q, double c, Plan *p, std::list< std::shared_ptr< Analyzer::Expr >> &sq, int r, const std::list< int > &cl)
Definition: Planner.h:109
Result(std::vector< std::shared_ptr< Analyzer::TargetEntry >> &t, const std::list< std::shared_ptr< Analyzer::Expr >> &q, double c, Plan *p, const std::list< std::shared_ptr< Analyzer::Expr >> &cq)
Definition: Planner.h:87
int table_id
Definition: Planner.h:133
void set_groupby_list(const std::list< std::shared_ptr< Analyzer::Expr >> &new_groupby_list)
Definition: Planner.h:182
std::list< Analyzer::OrderEntry > order_entries
Definition: Planner.h:261
Plan * optimize_query()
Definition: Planner.cpp:74
const Plan * get_outerplan() const
Definition: Planner.h:161
const std::list< std::shared_ptr< Analyzer::Expr > > & get_quals() const
Definition: Planner.h:55
SQLStmtType
Definition: sqldefs.h:92
void print() const override
Definition: Planner.cpp:423
const std::string & get_render_type() const
Definition: Planner.h:297
int get_table_id() const
Definition: Planner.h:121
std::list< const Analyzer::Expr * > join_predicates
Definition: Planner.h:347
Optimizer(const Analyzer::Query &q, const Catalog_Namespace::Catalog &c)
Definition: Planner.h:324
Plan(const std::vector< std::shared_ptr< Analyzer::TargetEntry >> &t, double c, Plan *p)
Definition: Planner.h:46
const Catalog_Namespace::Catalog & catalog
Definition: Planner.h:352
const std::list< Analyzer::OrderEntry > & get_order_entries() const
Definition: Planner.h:230
std::list< std::shared_ptr< Analyzer::Expr > > groupby_list
Definition: Planner.h:190
int64_t limit
Definition: Planner.h:312
const Plan * get_innerplan() const
Definition: Planner.h:162
std::list< int > col_list
Definition: Planner.h:134
void print() const override
Definition: Planner.cpp:402
Dest get_plan_dest() const
Definition: Planner.h:299
void optimize_orderby()
Definition: Planner.cpp:312
std::string render_type
Definition: Planner.h:314
const std::list< std::shared_ptr< Analyzer::Expr > > & get_groupby_list() const
Definition: Planner.h:179
void set_render_type(std::string t)
Definition: Planner.h:298
std::unique_ptr< Plan > child_plan2
Definition: Planner.h:165
void optimize_current_query()
Definition: Planner.cpp:109
int64_t get_limit() const
Definition: Planner.h:295
ValuesScan(const std::vector< std::shared_ptr< Analyzer::TargetEntry >> &t)
Definition: Planner.h:144
std::list< std::shared_ptr< Analyzer::Expr > > simple_quals
Definition: Planner.h:132
const std::list< std::shared_ptr< Analyzer::Expr > > & get_simple_quals() const
Definition: Planner.h:118
std::list< std::shared_ptr< Analyzer::Expr > > quals
Definition: Planner.h:68
virtual void print() const
Definition: Planner.cpp:356
std::list< const Analyzer::Expr * > const_predicates
Definition: Planner.h:348
RootPlan(Plan *p, SQLStmtType t, int r, const std::list< int > &c, const Catalog_Namespace::Catalog &cat, int64_t l, int64_t o)
Definition: Planner.h:273
bool remove_duplicates
Definition: Planner.h:262
Append(const std::vector< std::shared_ptr< Analyzer::TargetEntry >> &t, const std::list< std::shared_ptr< Analyzer::Expr >> &q, double c, Plan *p, std::list< std::unique_ptr< Plan >> &pl)
Definition: Planner.h:200
std::vector< Scan * > base_scans
Definition: Planner.h:346
AggPlan(const std::vector< std::shared_ptr< Analyzer::TargetEntry >> &t, double c, Plan *p, const std::list< std::shared_ptr< Analyzer::Expr >> &gl)
Definition: Planner.h:174
void print() const override
Definition: Planner.cpp:396
void print() const override
Definition: Planner.cpp:369