OmniSciDB  c07336695a
Planner.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2017 MapD Technologies, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
25 #include "Planner.h"
26 #include <cassert>
27 #include <iostream>
28 #include <stdexcept>
29 #include "../Analyzer/Analyzer.h"
30 #include "../Analyzer/RangeTableEntry.h"
31 #include "../Catalog/ColumnDescriptor.h"
32 #include "gen-cpp/MapD.h"
33 
34 namespace Planner {
35 
37  table_id = rte.get_table_id();
38  for (auto cd : rte.get_column_descs()) {
39  col_list.push_back(cd->columnId);
40  }
41 }
42 
44  Plan* plan;
45  SQLStmtType stmt_type = query.get_stmt_type();
46  int result_table_id = 0;
47  std::list<int> result_col_list;
48  switch (stmt_type) {
49  case kSELECT:
50  // nothing to do for SELECT for now
51  break;
52  case kINSERT:
53  result_table_id = query.get_result_table_id();
54  result_col_list = query.get_result_col_list();
55  break;
56  case kUPDATE:
57  case kDELETE:
58  // should have been rejected by the Analyzer for now
59  assert(false);
60  break;
61  default:
62  assert(false);
63  }
64  plan = optimize_query();
65  return new RootPlan(plan,
66  stmt_type,
67  result_table_id,
68  result_col_list,
69  catalog,
70  query.get_limit(),
71  query.get_offset());
72 }
73 
75  //@TODO add support for union queries
76  if (query.get_next_query() != nullptr) {
77  throw std::runtime_error("UNION queries are not supported yet.");
78  }
79  cur_query = &query;
80  optimize_scans();
81  cur_plan = nullptr;
82  if (base_scans.size() > 2) {
83  throw std::runtime_error("More than two tables in a join not supported yet.");
84  }
85  for (auto base_scan : base_scans) {
86  if (cur_plan) {
87  std::list<std::shared_ptr<Analyzer::Expr>> shared_join_predicates;
88  for (auto join_pred : join_predicates) {
89  shared_join_predicates.emplace_back(join_pred->deep_copy());
90  }
91  std::vector<std::shared_ptr<Analyzer::TargetEntry>> join_targetlist;
92  for (const auto tle : cur_plan->get_targetlist()) {
93  join_targetlist.emplace_back(new Analyzer::TargetEntry(
94  tle->get_resname(), tle->get_expr()->deep_copy(), tle->get_unnest()));
95  }
96  cur_plan =
97  new Join(join_targetlist, shared_join_predicates, 0, cur_plan, base_scan);
98  } else {
99  cur_plan = base_scan;
100  }
101  }
102  optimize_current_query();
103  if (query.get_order_by() != nullptr) {
104  optimize_orderby();
105  }
106  return cur_plan;
107 }
108 
110  if (cur_query->get_num_aggs() > 0 || cur_query->get_having_predicate() != nullptr ||
111  !cur_query->get_group_by().empty()) {
112  optimize_aggs();
113  } else {
114  process_targetlist();
115  if (!const_predicates.empty()) {
116  // add result node to evaluate constant predicates
117  std::vector<std::shared_ptr<Analyzer::TargetEntry>> tlist;
118  int i = 1;
119  for (auto tle : cur_query->get_targetlist()) {
120  tlist.emplace_back(new Analyzer::TargetEntry(
121  tle->get_resname(),
122  makeExpr<Analyzer::Var>(
123  tle->get_expr()->get_type_info(), Analyzer::Var::kINPUT_OUTER, i++),
124  false));
125  }
126  std::list<std::shared_ptr<Analyzer::Expr>> const_quals;
127  for (auto p : const_predicates) {
128  const_quals.push_back(p->deep_copy());
129  }
130  cur_plan = new Result(tlist, {}, 0.0, cur_plan, const_quals);
131  }
132  }
133 }
134 
136  const std::vector<Analyzer::RangeTableEntry*>& rt = cur_query->get_rangetable();
137  for (auto rte : rt) {
138  base_scans.push_back(new Scan(*rte));
139  }
140  const Analyzer::Expr* where_pred = cur_query->get_where_predicate();
141  std::list<const Analyzer::Expr*> scan_predicates;
142  if (where_pred != nullptr) {
143  where_pred->group_predicates(scan_predicates, join_predicates, const_predicates);
144  }
145  for (auto p : scan_predicates) {
146  int rte_idx;
147  auto simple_pred = p->normalize_simple_predicate(rte_idx);
148  if (simple_pred != nullptr) {
149  base_scans[rte_idx]->add_simple_predicate(simple_pred);
150  } else {
151  std::set<int> rte_idx_set;
152  p->collect_rte_idx(rte_idx_set);
153  for (auto x : rte_idx_set) {
154  rte_idx = x;
155  break; // grab rte_idx out of the singleton set
156  }
157  base_scans[rte_idx]->add_predicate(p->deep_copy());
158  }
159  }
160  const std::vector<std::shared_ptr<Analyzer::TargetEntry>>& tlist =
161  cur_query->get_targetlist();
162  bool (*fn_pt)(const Analyzer::ColumnVar*, const Analyzer::ColumnVar*) =
164  std::set<const Analyzer::ColumnVar*,
165  bool (*)(const Analyzer::ColumnVar*, const Analyzer::ColumnVar*)>
166  colvar_set(fn_pt);
167  for (auto tle : tlist) {
168  tle->get_expr()->collect_column_var(colvar_set, true);
169  }
170  for (auto p : join_predicates) {
171  p->collect_column_var(colvar_set, true);
172  }
173  const auto group_by = cur_query->get_group_by();
174  if (!group_by.empty()) {
175  for (auto e : group_by) {
176  e->collect_column_var(colvar_set, true);
177  }
178  }
179  const Analyzer::Expr* having_pred = cur_query->get_having_predicate();
180  if (having_pred != nullptr) {
181  having_pred->collect_column_var(colvar_set, true);
182  }
183  for (auto colvar : colvar_set) {
184  if (dynamic_cast<const Analyzer::Var*>(colvar) == nullptr) {
185  // only run omptimizations on true ColumnVars. Skip over Var exprs.
186  auto tle = std::make_shared<Analyzer::TargetEntry>("", colvar->deep_copy(), false);
187  base_scans[colvar->get_rte_idx()]->add_tle(tle);
188  }
189  };
190 }
191 
192 namespace {
193 
195  const auto agg_plan = dynamic_cast<const Planner::AggPlan*>(plan);
196  return agg_plan ? dynamic_cast<const Planner::Scan*>(plan->get_child_plan())
197  : dynamic_cast<const Planner::Scan*>(plan);
198 }
199 
200 std::vector<std::shared_ptr<Analyzer::TargetEntry>> get_join_target_list(
201  const Planner::Join* join_plan) {
202  const auto outer_plan = get_scan_child(join_plan->get_outerplan());
203  CHECK(outer_plan);
204  auto join_target_list = outer_plan->get_targetlist();
205  const auto inner_plan = get_scan_child(join_plan->get_innerplan());
206  CHECK(inner_plan);
207  const auto inner_target_list = inner_plan->get_targetlist();
208  join_target_list.insert(
209  join_target_list.end(), inner_target_list.begin(), inner_target_list.end());
210  return join_target_list;
211 }
212 
213 } // namespace
214 
216  std::vector<std::shared_ptr<Analyzer::TargetEntry>> agg_tlist;
217  const Analyzer::Expr* having_pred = cur_query->get_having_predicate();
218  bool (*fn_pt)(const Analyzer::ColumnVar*, const Analyzer::ColumnVar*) =
220  std::set<const Analyzer::ColumnVar*,
221  bool (*)(const Analyzer::ColumnVar*, const Analyzer::ColumnVar*)>
222  colvar_set(fn_pt);
223  auto is_agg = [](const Analyzer::Expr* e) -> bool {
224  return typeid(*e) == typeid(Analyzer::AggExpr);
225  };
226  std::list<const Analyzer::Expr*> aggexpr_list;
227  // need to determine if the final targetlist involves expressions over
228  // the group by columns and/or aggregates, e.g., sum(x) + sum(y).
229  // see no_expression to false if an expression is found.
230  bool no_expression = true;
231  for (auto tle : cur_query->get_targetlist()) {
232  // collect all the aggregate functions from targetlist
233  tle->get_expr()->find_expr(is_agg, aggexpr_list);
234  if (!dynamic_cast<Analyzer::ColumnVar*>(tle->get_expr()) &&
235  !dynamic_cast<Analyzer::Var*>(tle->get_expr()) &&
236  !dynamic_cast<Analyzer::AggExpr*>(tle->get_expr())) {
237  no_expression = false;
238  }
239  }
240  // collect all the group by columns in having clause
241  if (having_pred != nullptr) {
242  having_pred->find_expr(is_agg, aggexpr_list);
243  }
244 
245  std::list<std::shared_ptr<Analyzer::Expr>> groupby_list;
246  auto target_list = cur_plan->get_targetlist();
247  if (dynamic_cast<const Planner::Join*>(cur_plan)) {
248  target_list = get_join_target_list(static_cast<const Planner::Join*>(cur_plan));
249  }
250  if (!cur_query->get_group_by().empty()) {
251  for (auto e : cur_query->get_group_by()) {
252  groupby_list.push_back(e->rewrite_with_child_targetlist(target_list));
253  }
254  }
255 
256  // form the AggPlan targetlist with the group by columns followed by aggregates
257  int varno = 1;
258  for (auto e : groupby_list) {
259  std::shared_ptr<Analyzer::TargetEntry> new_tle;
260  std::shared_ptr<Analyzer::Expr> expr;
261  auto c = std::dynamic_pointer_cast<Analyzer::ColumnVar>(e);
262  if (c != nullptr) {
263  expr = makeExpr<Analyzer::Var>(c->get_type_info(),
264  c->get_table_id(),
265  c->get_column_id(),
266  c->get_rte_idx(),
268  varno);
269  } else {
270  expr = makeExpr<Analyzer::Var>(e->get_type_info(), Analyzer::Var::kGROUPBY, varno);
271  }
272  new_tle = std::make_shared<Analyzer::TargetEntry>("", expr, false);
273  agg_tlist.push_back(new_tle);
274  varno++;
275  }
276  for (auto e : aggexpr_list) {
277  std::shared_ptr<Analyzer::TargetEntry> new_tle;
278  new_tle = std::make_shared<Analyzer::TargetEntry>(
279  "", e->rewrite_with_child_targetlist(target_list), false);
280  agg_tlist.push_back(new_tle);
281  }
282 
283  std::list<std::shared_ptr<Analyzer::Expr>> having_quals;
284  if (having_pred != nullptr) {
285  std::list<const Analyzer::Expr*> preds;
286  having_pred->group_predicates(preds, preds, preds);
287  for (auto p : preds) {
288  having_quals.push_back(p->rewrite_agg_to_var(agg_tlist));
289  }
290  }
291 
292  cur_plan = new AggPlan(agg_tlist, 0.0, cur_plan, groupby_list);
293  if (no_expression && having_pred == nullptr) {
294  // in this case, no need to add a Result node on top
295  process_targetlist();
296  } else {
297  std::vector<std::shared_ptr<Analyzer::TargetEntry>> result_tlist;
298  for (auto tle : cur_query->get_targetlist()) {
299  result_tlist.emplace_back(
300  new Analyzer::TargetEntry(tle->get_resname(),
301  tle->get_expr()->rewrite_agg_to_var(agg_tlist),
302  tle->get_unnest()));
303  }
304  std::list<std::shared_ptr<Analyzer::Expr>> const_quals;
305  for (auto p : const_predicates) {
306  const_quals.push_back(p->deep_copy());
307  }
308  cur_plan = new Result(result_tlist, having_quals, 0.0, cur_plan, const_quals);
309  }
310 }
311 
313  if (query.get_order_by() == nullptr) {
314  return;
315  }
316  std::vector<std::shared_ptr<Analyzer::TargetEntry>> tlist;
317  int varno = 1;
318  for (auto tle : cur_plan->get_targetlist()) {
319  tlist.emplace_back(new Analyzer::TargetEntry(
320  tle->get_resname(),
321  makeExpr<Analyzer::Var>(
322  tle->get_expr()->get_type_info(), Analyzer::Var::kINPUT_OUTER, varno),
323  false));
324  varno++;
325  }
326  cur_plan =
327  new Sort(tlist, 0.0, cur_plan, *query.get_order_by(), query.get_is_distinct());
328 }
329 
331  std::vector<std::shared_ptr<Analyzer::TargetEntry>> final_tlist;
332  for (auto tle : query.get_targetlist()) {
333  std::shared_ptr<Analyzer::TargetEntry> new_tle;
334  if (cur_plan == nullptr) {
335  new_tle = std::make_shared<Analyzer::TargetEntry>(
336  tle->get_resname(), tle->get_expr()->deep_copy(), tle->get_unnest());
337  } else {
338  auto target_list = cur_plan->get_targetlist();
339  if (dynamic_cast<const Planner::Join*>(cur_plan)) {
340  target_list = get_join_target_list(static_cast<const Planner::Join*>(cur_plan));
341  }
342  new_tle = std::make_shared<Analyzer::TargetEntry>(
343  tle->get_resname(),
344  tle->get_expr()->rewrite_with_targetlist(target_list),
345  tle->get_unnest());
346  }
347  final_tlist.push_back(new_tle);
348  }
349  if (cur_plan == nullptr) {
350  cur_plan = new ValuesScan(final_tlist);
351  } else {
352  cur_plan->set_targetlist(final_tlist);
353  }
354 }
355 
356 void Plan::print() const {
357  std::cout << "targetlist: ";
358  for (auto t : targetlist) {
359  t->print();
360  }
361  std::cout << std::endl;
362  std::cout << "quals: ";
363  for (auto p : quals) {
364  p->print();
365  }
366  std::cout << std::endl;
367 }
368 
369 void Result::print() const {
370  std::cout << "(Result" << std::endl;
371  Plan::print();
372  child_plan->print();
373  std::cout << "const_quals: ";
374  for (auto p : const_quals) {
375  p->print();
376  }
377  std::cout << ")" << std::endl;
378 }
379 
380 void Scan::print() const {
381  std::cout << "(Scan" << std::endl;
382  Plan::print();
383  std::cout << "simple_quals: ";
384  for (auto p : simple_quals) {
385  p->print();
386  }
387  std::cout << std::endl << "table: " << table_id;
388  std::cout << " columns: ";
389  for (auto i : col_list) {
390  std::cout << i;
391  std::cout << " ";
392  }
393  std::cout << ")" << std::endl;
394 }
395 
396 void ValuesScan::print() const {
397  std::cout << "(ValuesScan" << std::endl;
398  Plan::print();
399  std::cout << ")" << std::endl;
400 }
401 
402 void Join::print() const {
403  std::cout << "(Join" << std::endl;
404  Plan::print();
405  std::cout << "Outer Plan: ";
406  get_outerplan()->print();
407  std::cout << "Inner Plan: ";
408  get_innerplan()->print();
409  std::cout << ")" << std::endl;
410 }
411 
412 void AggPlan::print() const {
413  std::cout << "(Agg" << std::endl;
414  Plan::print();
415  child_plan->print();
416  std::cout << "Group By: ";
417  for (auto e : groupby_list) {
418  e->print();
419  }
420  std::cout << ")" << std::endl;
421 }
422 
423 void Append::print() const {
424  std::cout << "(Append" << std::endl;
425  for (const auto& p : plan_list) {
426  p->print();
427  }
428  std::cout << ")" << std::endl;
429 }
430 
431 void MergeAppend::print() const {
432  std::cout << "(MergeAppend" << std::endl;
433  for (const auto& p : mergeplan_list) {
434  p->print();
435  }
436  std::cout << ")" << std::endl;
437 }
438 
439 void Sort::print() const {
440  std::cout << "(Sort" << std::endl;
441  Plan::print();
442  child_plan->print();
443  std::cout << "Order By: ";
444  for (auto o : order_entries) {
445  o.print();
446  }
447  std::cout << ")" << std::endl;
448 }
449 
450 void RootPlan::print() const {
451  std::cout << "(RootPlan ";
452  switch (stmt_type) {
453  case kSELECT:
454  std::cout << "SELECT" << std::endl;
455  break;
456  case kUPDATE:
457  std::cout << "UPDATE "
458  << "result table: " << result_table_id << " columns: ";
459  for (auto i : result_col_list) {
460  std::cout << i;
461  std::cout << " ";
462  }
463  std::cout << std::endl;
464  break;
465  case kINSERT:
466  std::cout << "INSERT "
467  << "result table: " << result_table_id << " columns: ";
468  for (auto i : result_col_list) {
469  std::cout << i;
470  std::cout << " ";
471  }
472  std::cout << std::endl;
473  break;
474  case kDELETE:
475  std::cout << "DELETE "
476  << "result table: " << result_table_id << std::endl;
477  break;
478  default:
479  break;
480  }
481  plan->print();
482  std::cout << "limit: " << limit;
483  std::cout << " offset: " << offset << ")" << std::endl;
484 }
485 } // namespace Planner
void print() const override
Definition: Planner.cpp:439
int32_t get_table_id() const
const Plan * get_child_plan() const
Definition: Planner.h:57
Definition: Analyzer.h:1406
static bool colvar_comp(const ColumnVar *l, const ColumnVar *r)
Definition: Analyzer.h:207
const std::list< const ColumnDescriptor * > & get_column_descs() const
void collect_column_var(std::set< const ColumnVar *, bool(*)(const ColumnVar *, const ColumnVar *)> &colvar_set, bool include_agg) const override
Definition: Analyzer.h:212
virtual void find_expr(bool(*f)(const Expr *), std::list< const Expr *> &expr_list) const
Definition: Analyzer.h:158
std::unique_ptr< Plan > child_plan
Definition: Planner.h:70
void c(const std::string &query_string, const ExecutorDeviceType device_type)
void process_targetlist()
Definition: Planner.cpp:330
const Plan * get_innerplan() const
Definition: Planner.h:162
void print() const override
Definition: Planner.cpp:431
virtual void print() const
Definition: Planner.cpp:450
virtual void print() const
Definition: Planner.cpp:356
void print() const override
Definition: Planner.cpp:412
void optimize_scans()
Definition: Planner.cpp:135
void optimize_aggs()
Definition: Planner.cpp:215
std::vector< std::shared_ptr< Analyzer::TargetEntry > > targetlist
Definition: Planner.h:66
std::vector< std::shared_ptr< Analyzer::TargetEntry > > get_join_target_list(const Planner::Join *join_plan)
Definition: Planner.cpp:200
RootPlan * optimize()
Definition: Planner.cpp:43
void print() const override
Definition: Planner.cpp:380
Scan(const std::vector< std::shared_ptr< Analyzer::TargetEntry >> &t, const std::list< std::shared_ptr< Analyzer::Expr >> &q, double c, Plan *p, std::list< std::shared_ptr< Analyzer::Expr >> &sq, int r, const std::list< int > &cl)
Definition: Planner.h:109
int table_id
Definition: Planner.h:133
Defines data structures for query plan nodes.
const Planner::Scan * get_scan_child(const Planner::Plan *plan)
Definition: Planner.cpp:194
Plan * optimize_query()
Definition: Planner.cpp:74
SQLStmtType
Definition: sqldefs.h:92
void print() const override
Definition: Planner.cpp:423
virtual void group_predicates(std::list< const Expr *> &scan_predicates, std::list< const Expr *> &join_predicates, std::list< const Expr *> &const_predicates) const
Definition: Analyzer.h:101
std::list< int > col_list
Definition: Planner.h:134
void print() const override
Definition: Planner.cpp:402
void optimize_orderby()
Definition: Planner.cpp:312
#define CHECK(condition)
Definition: Logger.h:187
void optimize_current_query()
Definition: Planner.cpp:109
const Plan * get_outerplan() const
Definition: Planner.h:161
std::list< std::shared_ptr< Analyzer::Expr > > simple_quals
Definition: Planner.h:132
std::list< std::shared_ptr< Analyzer::Expr > > quals
Definition: Planner.h:68
void print() const override
Definition: Planner.cpp:396
void print() const override
Definition: Planner.cpp:369
virtual void collect_column_var(std::set< const ColumnVar *, bool(*)(const ColumnVar *, const ColumnVar *)> &colvar_set, bool include_agg) const
Definition: Analyzer.h:115