OmniSciDB  2e3a973ef4
RelAlgOptimizer.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2017 MapD Technologies, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "RelAlgOptimizer.h"
18 #include "Logger/Logger.h"
19 #include "RexVisitor.h"
21 
22 #include <numeric>
23 #include <string>
24 #include <unordered_map>
25 
26 namespace {
27 
29  public:
30  RexProjectInputRedirector(const std::unordered_set<const RelProject*>& crt_inputs)
31  : crt_projects_(crt_inputs) {}
32 
33  RetType visitInput(const RexInput* input) const override {
34  auto source = dynamic_cast<const RelProject*>(input->getSourceNode());
35  if (!source || !crt_projects_.count(source)) {
36  return input->deepCopy();
37  }
38  auto new_source = source->getInput(0);
39  auto new_input =
40  dynamic_cast<const RexInput*>(source->getProjectAt(input->getIndex()));
41  if (!new_input) {
42  return input->deepCopy();
43  }
44  if (auto join = dynamic_cast<const RelJoin*>(new_source)) {
45  CHECK(new_input->getSourceNode() == join->getInput(0) ||
46  new_input->getSourceNode() == join->getInput(1));
47  } else {
48  CHECK_EQ(new_input->getSourceNode(), new_source);
49  }
50  return new_input->deepCopy();
51  }
52 
53  private:
54  const std::unordered_set<const RelProject*>& crt_projects_;
55 };
56 
57 class RexRebindInputsVisitor : public RexVisitor<void*> {
58  public:
59  RexRebindInputsVisitor(const RelAlgNode* old_input, const RelAlgNode* new_input)
60  : old_input_(old_input), new_input_(new_input) {}
61 
62  void* visitInput(const RexInput* rex_input) const override {
63  const auto old_source = rex_input->getSourceNode();
64  if (old_source == old_input_) {
65  rex_input->setSourceNode(new_input_);
66  }
67  return nullptr;
68  };
69 
70  void visitNode(const RelAlgNode* node) const {
71  if (dynamic_cast<const RelAggregate*>(node) || dynamic_cast<const RelSort*>(node)) {
72  return;
73  }
74  if (auto join = dynamic_cast<const RelJoin*>(node)) {
75  if (auto condition = join->getCondition()) {
76  visit(condition);
77  }
78  return;
79  }
80  if (auto project = dynamic_cast<const RelProject*>(node)) {
81  for (size_t i = 0; i < project->size(); ++i) {
82  visit(project->getProjectAt(i));
83  }
84  return;
85  }
86  if (auto filter = dynamic_cast<const RelFilter*>(node)) {
87  visit(filter->getCondition());
88  return;
89  }
90  CHECK(false);
91  }
92 
93  private:
96 };
97 
99  const RelProject* curr_project,
100  const std::unordered_set<const RelProject*>& projects_to_remove) {
101  auto source = curr_project->getInput(0);
102  while (auto filter = dynamic_cast<const RelFilter*>(source)) {
103  source = filter->getInput(0);
104  }
105  if (auto src_project = dynamic_cast<const RelProject*>(source)) {
106  if (projects_to_remove.count(src_project)) {
107  return get_actual_source_size(src_project, projects_to_remove);
108  }
109  }
110  return curr_project->getInput(0)->size();
111 }
112 
114  const RelProject* project,
115  const std::unordered_map<const RelAlgNode*, std::unordered_set<const RelAlgNode*>>&
116  du_web) {
117  if (!project->isSimple()) {
118  return false;
119  }
120  auto usrs_it = du_web.find(project);
121  CHECK(usrs_it != du_web.end());
122  for (auto usr : usrs_it->second) {
123  if (!dynamic_cast<const RelProject*>(usr) && !dynamic_cast<const RelFilter*>(usr)) {
124  return false;
125  }
126  }
127  return true;
128 }
129 
131  const RelProject* project,
132  const std::unordered_map<const RelAlgNode*, std::unordered_set<const RelAlgNode*>>&
133  du_web,
134  const std::unordered_set<const RelProject*>& projects_to_remove,
135  std::unordered_set<const RelProject*>& permutating_projects) {
136  auto source_size = get_actual_source_size(project, projects_to_remove);
137  if (project->size() > source_size) {
138  return false;
139  }
140 
141  if (project->size() < source_size) {
142  auto usrs_it = du_web.find(project);
143  CHECK(usrs_it != du_web.end());
144  bool guard_found = false;
145  while (usrs_it->second.size() == size_t(1)) {
146  auto only_usr = *usrs_it->second.begin();
147  if (dynamic_cast<const RelProject*>(only_usr)) {
148  guard_found = true;
149  break;
150  }
151  if (dynamic_cast<const RelAggregate*>(only_usr) ||
152  dynamic_cast<const RelSort*>(only_usr) ||
153  dynamic_cast<const RelJoin*>(only_usr) ||
154  dynamic_cast<const RelTableFunction*>(only_usr) ||
155  dynamic_cast<const RelLogicalUnion*>(only_usr)) {
156  return false;
157  }
158  CHECK(dynamic_cast<const RelFilter*>(only_usr))
159  << "only_usr: " << only_usr->toString();
160  usrs_it = du_web.find(only_usr);
161  CHECK(usrs_it != du_web.end());
162  }
163 
164  if (!guard_found) {
165  return false;
166  }
167  }
168 
169  bool identical = true;
170  for (size_t i = 0; i < project->size(); ++i) {
171  auto target = dynamic_cast<const RexInput*>(project->getProjectAt(i));
172  CHECK(target);
173  if (i != target->getIndex()) {
174  identical = false;
175  break;
176  }
177  }
178 
179  if (identical) {
180  return true;
181  }
182 
183  if (safe_to_redirect(project, du_web)) {
184  permutating_projects.insert(project);
185  return true;
186  }
187 
188  return false;
189 }
190 
192  const RelFilter* excluded_root,
193  const std::unordered_map<const RelAlgNode*, std::unordered_set<const RelAlgNode*>>&
194  du_web) {
195  CHECK(excluded_root);
196  auto src_project = dynamic_cast<const RelProject*>(excluded_root->getInput(0));
197  CHECK(src_project && src_project->isSimple());
198  const auto indirect_join_src = dynamic_cast<const RelJoin*>(src_project->getInput(0));
199  std::unordered_map<size_t, size_t> old_to_new_idx;
200  for (size_t i = 0; i < src_project->size(); ++i) {
201  auto rex_in = dynamic_cast<const RexInput*>(src_project->getProjectAt(i));
202  CHECK(rex_in);
203  size_t src_base = 0;
204  if (indirect_join_src != nullptr &&
205  indirect_join_src->getInput(1) == rex_in->getSourceNode()) {
206  src_base = indirect_join_src->getInput(0)->size();
207  }
208  old_to_new_idx.insert(std::make_pair(i, src_base + rex_in->getIndex()));
209  old_to_new_idx.insert(std::make_pair(i, rex_in->getIndex()));
210  }
211  CHECK(old_to_new_idx.size());
212  RexInputRenumber<false> renumber(old_to_new_idx);
213  auto usrs_it = du_web.find(excluded_root);
214  CHECK(usrs_it != du_web.end());
215  std::vector<const RelAlgNode*> work_set(usrs_it->second.begin(), usrs_it->second.end());
216  while (!work_set.empty()) {
217  auto node = work_set.back();
218  work_set.pop_back();
219  auto modified_node = const_cast<RelAlgNode*>(node);
220  if (auto filter = dynamic_cast<RelFilter*>(modified_node)) {
221  auto new_condition = renumber.visit(filter->getCondition());
222  filter->setCondition(new_condition);
223  auto usrs_it = du_web.find(filter);
224  CHECK(usrs_it != du_web.end() && usrs_it->second.size() == 1);
225  work_set.push_back(*usrs_it->second.begin());
226  continue;
227  }
228  if (auto project = dynamic_cast<RelProject*>(modified_node)) {
229  std::vector<std::unique_ptr<const RexScalar>> new_exprs;
230  for (size_t i = 0; i < project->size(); ++i) {
231  new_exprs.push_back(renumber.visit(project->getProjectAt(i)));
232  }
233  project->setExpressions(new_exprs);
234  continue;
235  }
236  CHECK(false);
237  }
238 }
239 
240 // This function appears to redirect/remove redundant Projection input nodes(?)
242  std::shared_ptr<RelAlgNode> node,
243  const std::unordered_set<const RelProject*>& projects,
244  const std::unordered_set<const RelProject*>& permutating_projects,
245  const std::unordered_map<const RelAlgNode*, std::unordered_set<const RelAlgNode*>>&
246  du_web) {
247  if (dynamic_cast<RelLogicalUnion*>(node.get())) {
248  return; // UNION keeps all Projection inputs.
249  }
250  std::shared_ptr<const RelProject> src_project = nullptr;
251  for (size_t i = 0; i < node->inputCount(); ++i) {
252  if (auto project =
253  std::dynamic_pointer_cast<const RelProject>(node->getAndOwnInput(i))) {
254  if (projects.count(project.get())) {
255  src_project = project;
256  break;
257  }
258  }
259  }
260  if (!src_project) {
261  return;
262  }
263  if (auto join = std::dynamic_pointer_cast<RelJoin>(node)) {
264  auto other_project =
265  src_project == node->getAndOwnInput(0)
266  ? std::dynamic_pointer_cast<const RelProject>(node->getAndOwnInput(1))
267  : std::dynamic_pointer_cast<const RelProject>(node->getAndOwnInput(0));
268  join->replaceInput(src_project, src_project->getAndOwnInput(0));
269  RexRebindInputsVisitor rebinder(src_project.get(), src_project->getInput(0));
270  auto usrs_it = du_web.find(join.get());
271  CHECK(usrs_it != du_web.end());
272  for (auto usr : usrs_it->second) {
273  rebinder.visitNode(usr);
274  }
275 
276  if (other_project && projects.count(other_project.get())) {
277  join->replaceInput(other_project, other_project->getAndOwnInput(0));
278  RexRebindInputsVisitor other_rebinder(other_project.get(),
279  other_project->getInput(0));
280  for (auto usr : usrs_it->second) {
281  other_rebinder.visitNode(usr);
282  }
283  }
284  return;
285  }
286  if (auto project = std::dynamic_pointer_cast<RelProject>(node)) {
287  project->RelAlgNode::replaceInput(src_project, src_project->getAndOwnInput(0));
288  RexProjectInputRedirector redirector(projects);
289  std::vector<std::unique_ptr<const RexScalar>> new_exprs;
290  for (size_t i = 0; i < project->size(); ++i) {
291  new_exprs.push_back(redirector.visit(project->getProjectAt(i)));
292  }
293  project->setExpressions(new_exprs);
294  return;
295  }
296  if (auto filter = std::dynamic_pointer_cast<RelFilter>(node)) {
297  const bool is_permutating_proj = permutating_projects.count(src_project.get());
298  if (is_permutating_proj || dynamic_cast<const RelJoin*>(src_project->getInput(0))) {
299  if (is_permutating_proj) {
300  propagate_rex_input_renumber(filter.get(), du_web);
301  }
302  filter->RelAlgNode::replaceInput(src_project, src_project->getAndOwnInput(0));
303  RexProjectInputRedirector redirector(projects);
304  auto new_condition = redirector.visit(filter->getCondition());
305  filter->setCondition(new_condition);
306  } else {
307  filter->replaceInput(src_project, src_project->getAndOwnInput(0));
308  }
309  return;
310  }
311  if (std::dynamic_pointer_cast<RelSort>(node)) {
312  auto const src_project_input = src_project->getInput(0);
313  if (dynamic_cast<const RelScan*>(src_project_input) ||
314  dynamic_cast<const RelLogicalValues*>(src_project_input) ||
315  dynamic_cast<const RelLogicalUnion*>(src_project_input)) {
316  return;
317  }
318  }
319  if (std::dynamic_pointer_cast<RelModify>(node)) {
320  return; // NOTE: Review this. Not sure about this.
321  }
322  CHECK(std::dynamic_pointer_cast<RelAggregate>(node) ||
323  std::dynamic_pointer_cast<RelSort>(node));
324  node->replaceInput(src_project, src_project->getAndOwnInput(0));
325 }
326 
327 void cleanup_dead_nodes(std::vector<std::shared_ptr<RelAlgNode>>& nodes) {
328  for (auto nodeIt = nodes.rbegin(); nodeIt != nodes.rend(); ++nodeIt) {
329  if (nodeIt->use_count() == 1) {
330  LOG(INFO) << "ID=" << (*nodeIt)->getId() << " " << (*nodeIt)->toString()
331  << " deleted!";
332  nodeIt->reset();
333  }
334  }
335 
336  std::vector<std::shared_ptr<RelAlgNode>> new_nodes;
337  for (auto node : nodes) {
338  if (!node) {
339  continue;
340  }
341  new_nodes.push_back(node);
342  }
343  nodes.swap(new_nodes);
344 }
345 
346 std::unordered_set<const RelProject*> get_visible_projects(const RelAlgNode* root) {
347  if (auto project = dynamic_cast<const RelProject*>(root)) {
348  return {project};
349  }
350 
351  if (dynamic_cast<const RelAggregate*>(root) || dynamic_cast<const RelScan*>(root) ||
352  dynamic_cast<const RelLogicalValues*>(root) ||
353  dynamic_cast<const RelModify*>(root)) {
354  return std::unordered_set<const RelProject*>{};
355  }
356 
357  if (auto join = dynamic_cast<const RelJoin*>(root)) {
358  auto lhs_projs = get_visible_projects(join->getInput(0));
359  auto rhs_projs = get_visible_projects(join->getInput(1));
360  lhs_projs.insert(rhs_projs.begin(), rhs_projs.end());
361  return lhs_projs;
362  }
363 
364  if (auto logical_union = dynamic_cast<const RelLogicalUnion*>(root)) {
365  auto projections = get_visible_projects(logical_union->getInput(0));
366  for (size_t i = 1; i < logical_union->inputCount(); ++i) {
367  auto next = get_visible_projects(logical_union->getInput(i));
368  projections.insert(next.begin(), next.end());
369  }
370  return projections;
371  }
372 
373  CHECK(dynamic_cast<const RelFilter*>(root) || dynamic_cast<const RelSort*>(root))
374  << "root = " << root->toString();
375  return get_visible_projects(root->getInput(0));
376 }
377 
378 // TODO(miyu): checking this at runtime is more accurate
379 bool is_distinct(const size_t input_idx, const RelAlgNode* node) {
380  if (dynamic_cast<const RelFilter*>(node) || dynamic_cast<const RelSort*>(node)) {
381  CHECK_EQ(size_t(1), node->inputCount());
382  return is_distinct(input_idx, node->getInput(0));
383  }
384  if (auto aggregate = dynamic_cast<const RelAggregate*>(node)) {
385  CHECK_EQ(size_t(1), node->inputCount());
386  if (aggregate->getGroupByCount() == 1 && !input_idx) {
387  return true;
388  }
389  if (input_idx < aggregate->getGroupByCount()) {
390  return is_distinct(input_idx, node->getInput(0));
391  }
392  return false;
393  }
394  if (auto project = dynamic_cast<const RelProject*>(node)) {
395  CHECK_LT(input_idx, project->size());
396  if (auto input = dynamic_cast<const RexInput*>(project->getProjectAt(input_idx))) {
397  CHECK_EQ(size_t(1), node->inputCount());
398  return is_distinct(input->getIndex(), project->getInput(0));
399  }
400  return false;
401  }
402  CHECK(dynamic_cast<const RelJoin*>(node) || dynamic_cast<const RelScan*>(node));
403  return false;
404 }
405 
406 } // namespace
407 
408 std::unordered_map<const RelAlgNode*, std::unordered_set<const RelAlgNode*>> build_du_web(
409  const std::vector<std::shared_ptr<RelAlgNode>>& nodes) noexcept {
410  std::unordered_map<const RelAlgNode*, std::unordered_set<const RelAlgNode*>> web;
411  std::unordered_set<const RelAlgNode*> visited;
412  std::vector<const RelAlgNode*> work_set;
413  for (auto node : nodes) {
414  if (std::dynamic_pointer_cast<RelScan>(node) ||
415  std::dynamic_pointer_cast<RelModify>(node) || visited.count(node.get())) {
416  continue;
417  }
418  work_set.push_back(node.get());
419  while (!work_set.empty()) {
420  auto walker = work_set.back();
421  work_set.pop_back();
422  if (visited.count(walker)) {
423  continue;
424  }
425  CHECK(!web.count(walker));
426  auto it_ok =
427  web.insert(std::make_pair(walker, std::unordered_set<const RelAlgNode*>{}));
428  CHECK(it_ok.second);
429  visited.insert(walker);
430  CHECK(dynamic_cast<const RelJoin*>(walker) ||
431  dynamic_cast<const RelProject*>(walker) ||
432  dynamic_cast<const RelAggregate*>(walker) ||
433  dynamic_cast<const RelFilter*>(walker) ||
434  dynamic_cast<const RelSort*>(walker) ||
435  dynamic_cast<const RelLeftDeepInnerJoin*>(walker) ||
436  dynamic_cast<const RelLogicalValues*>(walker) ||
437  dynamic_cast<const RelTableFunction*>(walker) ||
438  dynamic_cast<const RelLogicalUnion*>(walker));
439  for (size_t i = 0; i < walker->inputCount(); ++i) {
440  auto src = walker->getInput(i);
441  if (dynamic_cast<const RelScan*>(src) || dynamic_cast<const RelModify*>(src)) {
442  continue;
443  }
444  if (web.empty() || !web.count(src)) {
445  web.insert(std::make_pair(src, std::unordered_set<const RelAlgNode*>{}));
446  }
447  web[src].insert(walker);
448  work_set.push_back(src);
449  }
450  }
451  }
452  return web;
453 }
454 
463 bool project_separates_sort(const RelProject* project, const RelAlgNode* next_node) {
464  CHECK(project);
465  if (!next_node) {
466  return false;
467  }
468 
469  auto sort = dynamic_cast<const RelSort*>(next_node);
470  if (!sort) {
471  return false;
472  }
473  if (!(project->inputCount() == 1)) {
474  return false;
475  }
476 
477  if (dynamic_cast<const RelSort*>(project->getInput(0))) {
478  return true;
479  }
480  return false;
481 }
482 
483 // For now, the only target to eliminate is restricted to project-aggregate pair between
484 // scan/sort and join
485 // TODO(miyu): allow more chance if proved safe
486 void eliminate_identical_copy(std::vector<std::shared_ptr<RelAlgNode>>& nodes) noexcept {
487  std::unordered_set<std::shared_ptr<const RelAlgNode>> copies;
488  auto sink = nodes.back();
489  for (auto node : nodes) {
490  auto aggregate = std::dynamic_pointer_cast<const RelAggregate>(node);
491  if (!aggregate || aggregate == sink ||
492  !(aggregate->getGroupByCount() == 1 && aggregate->getAggExprsCount() == 0)) {
493  continue;
494  }
495  auto project =
496  std::dynamic_pointer_cast<const RelProject>(aggregate->getAndOwnInput(0));
497  if (project && project->size() == aggregate->size() &&
498  project->getFields() == aggregate->getFields()) {
499  CHECK_EQ(size_t(0), copies.count(aggregate));
500  copies.insert(aggregate);
501  }
502  }
503  for (auto node : nodes) {
504  if (!node->inputCount()) {
505  continue;
506  }
507  auto last_source = node->getAndOwnInput(node->inputCount() - 1);
508  if (!copies.count(last_source)) {
509  continue;
510  }
511  auto aggregate = std::dynamic_pointer_cast<const RelAggregate>(last_source);
512  CHECK(aggregate);
513  if (!std::dynamic_pointer_cast<const RelJoin>(node) || aggregate->size() != 1) {
514  continue;
515  }
516  auto project =
517  std::dynamic_pointer_cast<const RelProject>(aggregate->getAndOwnInput(0));
518  CHECK(project);
519  CHECK_EQ(size_t(1), project->size());
520  if (!is_distinct(size_t(0), project.get())) {
521  continue;
522  }
523  auto new_source = project->getAndOwnInput(0);
524  if (std::dynamic_pointer_cast<const RelSort>(new_source) ||
525  std::dynamic_pointer_cast<const RelScan>(new_source)) {
526  node->replaceInput(last_source, new_source);
527  }
528  }
529  decltype(copies)().swap(copies);
530 
531  auto web = build_du_web(nodes);
532 
533  std::unordered_set<const RelProject*> projects;
534  std::unordered_set<const RelProject*> permutating_projects;
535  auto const visible_projs = get_visible_projects(nodes.back().get());
536  for (auto node_it = nodes.begin(); node_it != nodes.end(); node_it++) {
537  auto node = *node_it;
538  auto project = std::dynamic_pointer_cast<RelProject>(node);
539  auto next_node_it = std::next(node_it);
540  if (project && project->isSimple() &&
541  (!visible_projs.count(project.get()) || !project->isRenaming()) &&
542  is_identical_copy(project.get(), web, projects, permutating_projects) &&
544  project.get(), next_node_it == nodes.end() ? nullptr : next_node_it->get())) {
545  projects.insert(project.get());
546  }
547  }
548 
549  for (auto node : nodes) {
550  redirect_inputs_of(node, projects, permutating_projects, web);
551  }
552 
553  cleanup_dead_nodes(nodes);
554 }
555 
556 namespace {
557 
558 class RexInputCollector : public RexVisitor<std::unordered_set<RexInput>> {
559  private:
561 
562  protected:
563  using RetType = std::unordered_set<RexInput>;
564  RetType aggregateResult(const RetType& aggregate,
565  const RetType& next_result) const override {
566  RetType result(aggregate.begin(), aggregate.end());
567  result.insert(next_result.begin(), next_result.end());
568  return result;
569  }
570 
571  public:
572  RexInputCollector(const RelAlgNode* node) : node_(node) {}
573  RetType visitInput(const RexInput* input) const override {
574  RetType result;
575  if (node_->inputCount() == 1) {
576  auto src = node_->getInput(0);
577  if (auto join = dynamic_cast<const RelJoin*>(src)) {
578  CHECK_EQ(join->inputCount(), size_t(2));
579  const auto src2_in_offset = join->getInput(0)->size();
580  if (input->getSourceNode() == join->getInput(1)) {
581  result.emplace(src, input->getIndex() + src2_in_offset);
582  } else {
583  result.emplace(src, input->getIndex());
584  }
585  return result;
586  }
587  }
588  result.insert(*input);
589  return result;
590  }
591 };
592 
594  CHECK(node->size());
595  RexInputCollector collector(node);
596  if (auto filter = dynamic_cast<const RelFilter*>(node)) {
597  auto rex_ins = collector.visit(filter->getCondition());
598  if (!rex_ins.empty()) {
599  return static_cast<size_t>(rex_ins.begin()->getIndex());
600  }
601  return pick_always_live_col_idx(filter->getInput(0));
602  } else if (auto join = dynamic_cast<const RelJoin*>(node)) {
603  auto rex_ins = collector.visit(join->getCondition());
604  if (!rex_ins.empty()) {
605  return static_cast<size_t>(rex_ins.begin()->getIndex());
606  }
607  if (auto lhs_idx = pick_always_live_col_idx(join->getInput(0))) {
608  return lhs_idx;
609  }
610  if (auto rhs_idx = pick_always_live_col_idx(join->getInput(0))) {
611  return rhs_idx + join->getInput(0)->size();
612  }
613  } else if (auto sort = dynamic_cast<const RelSort*>(node)) {
614  if (sort->collationCount()) {
615  return sort->getCollation(0).getField();
616  }
617  return pick_always_live_col_idx(sort->getInput(0));
618  }
619  return size_t(0);
620 }
621 
622 std::vector<std::unordered_set<size_t>> get_live_ins(
623  const RelAlgNode* node,
624  const std::unordered_map<const RelAlgNode*, std::unordered_set<size_t>>& live_outs) {
625  if (!node || dynamic_cast<const RelScan*>(node)) {
626  return {};
627  }
628  RexInputCollector collector(node);
629  auto it = live_outs.find(node);
630  CHECK(it != live_outs.end());
631  auto live_out = it->second;
632  if (auto project = dynamic_cast<const RelProject*>(node)) {
633  CHECK_EQ(size_t(1), project->inputCount());
634  std::unordered_set<size_t> live_in;
635  for (const auto& idx : live_out) {
636  CHECK_LT(idx, project->size());
637  auto partial_in = collector.visit(project->getProjectAt(idx));
638  for (auto rex_in : partial_in) {
639  live_in.insert(rex_in.getIndex());
640  }
641  }
642  if (project->size() == 1 &&
643  dynamic_cast<const RexLiteral*>(project->getProjectAt(0))) {
644  CHECK(live_in.empty());
645  live_in.insert(pick_always_live_col_idx(project->getInput(0)));
646  }
647  return {live_in};
648  }
649  if (auto aggregate = dynamic_cast<const RelAggregate*>(node)) {
650  CHECK_EQ(size_t(1), aggregate->inputCount());
651  const auto group_key_count = static_cast<size_t>(aggregate->getGroupByCount());
652  const auto agg_expr_count = static_cast<size_t>(aggregate->getAggExprsCount());
653  std::unordered_set<size_t> live_in;
654  for (size_t i = 0; i < group_key_count; ++i) {
655  live_in.insert(i);
656  }
657  bool has_count_star_only{false};
658  for (const auto& idx : live_out) {
659  if (idx < group_key_count) {
660  continue;
661  }
662  const auto agg_idx = idx - group_key_count;
663  CHECK_LT(agg_idx, agg_expr_count);
664  const auto& cur_agg_expr = aggregate->getAggExprs()[agg_idx];
665  const auto n_operands = cur_agg_expr->size();
666  for (size_t i = 0; i < n_operands; ++i) {
667  live_in.insert(static_cast<size_t>(cur_agg_expr->getOperand(i)));
668  }
669  if (n_operands == 0) {
670  has_count_star_only = true;
671  }
672  }
673  if (has_count_star_only && !group_key_count) {
674  live_in.insert(size_t(0));
675  }
676  return {live_in};
677  }
678  if (auto join = dynamic_cast<const RelJoin*>(node)) {
679  std::unordered_set<size_t> lhs_live_ins;
680  std::unordered_set<size_t> rhs_live_ins;
681  CHECK_EQ(size_t(2), join->inputCount());
682  auto lhs = join->getInput(0);
683  auto rhs = join->getInput(1);
684  const auto rhs_idx_base = lhs->size();
685  for (const auto idx : live_out) {
686  if (idx < rhs_idx_base) {
687  lhs_live_ins.insert(idx);
688  } else {
689  rhs_live_ins.insert(idx - rhs_idx_base);
690  }
691  }
692  auto rex_ins = collector.visit(join->getCondition());
693  for (const auto& rex_in : rex_ins) {
694  const auto in_idx = static_cast<size_t>(rex_in.getIndex());
695  if (rex_in.getSourceNode() == lhs) {
696  lhs_live_ins.insert(in_idx);
697  continue;
698  }
699  if (rex_in.getSourceNode() == rhs) {
700  rhs_live_ins.insert(in_idx);
701  continue;
702  }
703  CHECK(false);
704  }
705  return {lhs_live_ins, rhs_live_ins};
706  }
707  if (auto sort = dynamic_cast<const RelSort*>(node)) {
708  CHECK_EQ(size_t(1), sort->inputCount());
709  std::unordered_set<size_t> live_in(live_out.begin(), live_out.end());
710  for (size_t i = 0; i < sort->collationCount(); ++i) {
711  live_in.insert(sort->getCollation(i).getField());
712  }
713  return {live_in};
714  }
715  if (auto filter = dynamic_cast<const RelFilter*>(node)) {
716  CHECK_EQ(size_t(1), filter->inputCount());
717  std::unordered_set<size_t> live_in(live_out.begin(), live_out.end());
718  auto rex_ins = collector.visit(filter->getCondition());
719  for (const auto& rex_in : rex_ins) {
720  live_in.insert(static_cast<size_t>(rex_in.getIndex()));
721  }
722  return {live_in};
723  }
724  if (auto table_func = dynamic_cast<const RelTableFunction*>(node)) {
725  const auto input_count = table_func->size();
726  std::unordered_set<size_t> live_in;
727  for (size_t i = 0; i < input_count; i++) {
728  live_in.insert(i);
729  }
730 
731  std::vector<std::unordered_set<size_t>> result;
732  // Is the computed result correct in general?
733  for (size_t i = table_func->inputCount(); i > 0; i--) {
734  result.push_back(live_in);
735  }
736 
737  return result;
738  }
739  if (auto logical_union = dynamic_cast<const RelLogicalUnion*>(node)) {
740  return std::vector<std::unordered_set<size_t>>(logical_union->inputCount(), live_out);
741  }
742  return {};
743 }
744 
745 bool any_dead_col_in(const RelAlgNode* node,
746  const std::unordered_set<size_t>& live_outs) {
747  CHECK(!dynamic_cast<const RelScan*>(node));
748  if (auto aggregate = dynamic_cast<const RelAggregate*>(node)) {
749  for (size_t i = aggregate->getGroupByCount(); i < aggregate->size(); ++i) {
750  if (!live_outs.count(i)) {
751  return true;
752  }
753  }
754  return false;
755  }
756 
757  return node->size() > live_outs.size();
758 }
759 
760 bool does_redef_cols(const RelAlgNode* node) {
761  return dynamic_cast<const RelAggregate*>(node) || dynamic_cast<const RelProject*>(node);
762 }
763 
765  public:
767  const std::unordered_map<const RelAlgNode*, std::unordered_map<size_t, size_t>>&
768  liveouts,
769  const std::unordered_set<const RelAlgNode*>& intact_nodes)
770  : liveouts_(liveouts), intact_nodes_(intact_nodes) {}
771 
772  bool hasAllSrcReady(const RelAlgNode* node) const {
773  for (size_t i = 0; i < node->inputCount(); ++i) {
774  auto src = node->getInput(i);
775  if (!dynamic_cast<const RelScan*>(src) && liveouts_.find(src) == liveouts_.end() &&
776  !intact_nodes_.count(src)) {
777  return false;
778  }
779  }
780  return true;
781  }
782 
783  private:
784  const std::unordered_map<const RelAlgNode*, std::unordered_map<size_t, size_t>>&
786  const std::unordered_set<const RelAlgNode*>& intact_nodes_;
787 };
788 
790  const RelAlgNode* node,
791  std::unordered_map<const RelAlgNode*, std::unordered_map<size_t, size_t>>&
792  new_liveouts,
793  const std::unordered_set<size_t>& old_liveouts,
794  const std::unordered_set<const RelAlgNode*>& intact_nodes,
795  const std::unordered_map<const RelAlgNode*, size_t>& orig_node_sizes) {
796  auto live_fields = old_liveouts;
797  if (auto aggregate = dynamic_cast<const RelAggregate*>(node)) {
798  for (size_t i = 0; i < aggregate->getGroupByCount(); ++i) {
799  live_fields.insert(i);
800  }
801  }
802  auto it_ok =
803  new_liveouts.insert(std::make_pair(node, std::unordered_map<size_t, size_t>{}));
804  CHECK(it_ok.second);
805  auto& new_indices = it_ok.first->second;
806  if (intact_nodes.count(node)) {
807  for (size_t i = 0, e = node->size(); i < e; ++i) {
808  new_indices.insert(std::make_pair(i, i));
809  }
810  return;
811  }
812  if (does_redef_cols(node)) {
813  auto node_sz_it = orig_node_sizes.find(node);
814  CHECK(node_sz_it != orig_node_sizes.end());
815  const auto node_size = node_sz_it->second;
816  CHECK_GT(node_size, live_fields.size());
817  LOG(INFO) << node->toString() << " eliminated " << node_size - live_fields.size()
818  << " columns.";
819  std::vector<size_t> ordered_indices(live_fields.begin(), live_fields.end());
820  std::sort(ordered_indices.begin(), ordered_indices.end());
821  for (size_t i = 0; i < ordered_indices.size(); ++i) {
822  new_indices.insert(std::make_pair(ordered_indices[i], i));
823  }
824  return;
825  }
826  std::vector<size_t> ordered_indices;
827  for (size_t i = 0, old_base = 0, new_base = 0; i < node->inputCount(); ++i) {
828  auto src = node->getInput(i);
829  auto src_renum_it = new_liveouts.find(src);
830  if (src_renum_it != new_liveouts.end()) {
831  for (auto m : src_renum_it->second) {
832  new_indices.insert(std::make_pair(old_base + m.first, new_base + m.second));
833  }
834  new_base += src_renum_it->second.size();
835  } else if (dynamic_cast<const RelScan*>(src) || intact_nodes.count(src)) {
836  for (size_t i = 0; i < src->size(); ++i) {
837  new_indices.insert(std::make_pair(old_base + i, new_base + i));
838  }
839  new_base += src->size();
840  } else {
841  CHECK(false);
842  }
843  auto src_sz_it = orig_node_sizes.find(src);
844  CHECK(src_sz_it != orig_node_sizes.end());
845  old_base += src_sz_it->second;
846  }
847 }
848 
850  public:
852  const std::unordered_map<const RelAlgNode*, std::unordered_map<size_t, size_t>>&
853  new_numbering)
854  : node_to_input_renum_(new_numbering) {}
855  RetType visitInput(const RexInput* input) const override {
856  auto source = input->getSourceNode();
857  auto node_it = node_to_input_renum_.find(source);
858  if (node_it != node_to_input_renum_.end()) {
859  auto old_to_new_num = node_it->second;
860  auto renum_it = old_to_new_num.find(input->getIndex());
861  CHECK(renum_it != old_to_new_num.end());
862  return boost::make_unique<RexInput>(source, renum_it->second);
863  }
864  return input->deepCopy();
865  }
866 
867  private:
868  const std::unordered_map<const RelAlgNode*, std::unordered_map<size_t, size_t>>&
870 };
871 
872 std::vector<std::unique_ptr<const RexAgg>> renumber_rex_aggs(
873  std::vector<std::unique_ptr<const RexAgg>>& agg_exprs,
874  const std::unordered_map<size_t, size_t>& new_numbering) {
875  std::vector<std::unique_ptr<const RexAgg>> new_exprs;
876  for (auto& expr : agg_exprs) {
877  if (expr->size() >= 1) {
878  auto old_idx = expr->getOperand(0);
879  auto idx_it = new_numbering.find(old_idx);
880  if (idx_it != new_numbering.end()) {
881  std::vector<size_t> operands;
882  operands.push_back(idx_it->second);
883  if (expr->size() == 2) {
884  operands.push_back(expr->getOperand(1));
885  }
886  new_exprs.push_back(boost::make_unique<RexAgg>(
887  expr->getKind(), expr->isDistinct(), expr->getType(), operands));
888  continue;
889  }
890  }
891  new_exprs.push_back(std::move(expr));
892  }
893  return new_exprs;
894 }
895 
897  const std::unordered_map<size_t, size_t>& new_numbering) {
898  auto field_idx = old_field.getField();
899  auto idx_it = new_numbering.find(field_idx);
900  if (idx_it != new_numbering.end()) {
901  field_idx = idx_it->second;
902  }
903  return SortField(field_idx, old_field.getSortDir(), old_field.getNullsPosition());
904 }
905 
906 std::unordered_map<const RelAlgNode*, std::unordered_set<size_t>> mark_live_columns(
907  std::vector<std::shared_ptr<RelAlgNode>>& nodes) {
908  std::unordered_map<const RelAlgNode*, std::unordered_set<size_t>> live_outs;
909  std::vector<const RelAlgNode*> work_set;
910  for (auto node_it = nodes.rbegin(); node_it != nodes.rend(); ++node_it) {
911  auto node = node_it->get();
912  if (dynamic_cast<const RelScan*>(node) || live_outs.count(node) ||
913  dynamic_cast<const RelModify*>(node) ||
914  dynamic_cast<const RelTableFunction*>(node)) {
915  continue;
916  }
917  std::vector<size_t> all_live(node->size());
918  std::iota(all_live.begin(), all_live.end(), size_t(0));
919  live_outs.insert(std::make_pair(
920  node, std::unordered_set<size_t>(all_live.begin(), all_live.end())));
921 
922  work_set.push_back(node);
923  while (!work_set.empty()) {
924  auto walker = work_set.back();
925  work_set.pop_back();
926  CHECK(!dynamic_cast<const RelScan*>(walker));
927  CHECK(live_outs.count(walker));
928  auto live_ins = get_live_ins(walker, live_outs);
929  CHECK_EQ(live_ins.size(), walker->inputCount());
930  for (size_t i = 0; i < walker->inputCount(); ++i) {
931  auto src = walker->getInput(i);
932  if (dynamic_cast<const RelScan*>(src) ||
933  dynamic_cast<const RelTableFunction*>(src) || live_ins[i].empty()) {
934  continue;
935  }
936  if (!live_outs.count(src)) {
937  live_outs.insert(std::make_pair(src, std::unordered_set<size_t>{}));
938  }
939  auto src_it = live_outs.find(src);
940  CHECK(src_it != live_outs.end());
941  auto& live_out = src_it->second;
942  bool changed = false;
943  if (!live_out.empty()) {
944  live_out.insert(live_ins[i].begin(), live_ins[i].end());
945  changed = true;
946  } else {
947  for (int idx : live_ins[i]) {
948  changed |= live_out.insert(idx).second;
949  }
950  }
951  if (changed) {
952  work_set.push_back(src);
953  }
954  }
955  }
956  }
957  return live_outs;
958 }
959 
960 std::string get_field_name(const RelAlgNode* node, size_t index) {
961  CHECK_LT(index, node->size());
962  if (auto scan = dynamic_cast<const RelScan*>(node)) {
963  return scan->getFieldName(index);
964  }
965  if (auto aggregate = dynamic_cast<const RelAggregate*>(node)) {
966  CHECK_EQ(aggregate->size(), aggregate->getFields().size());
967  return aggregate->getFieldName(index);
968  }
969  if (auto join = dynamic_cast<const RelJoin*>(node)) {
970  const auto lhs_size = join->getInput(0)->size();
971  if (index < lhs_size) {
972  return get_field_name(join->getInput(0), index);
973  }
974  return get_field_name(join->getInput(1), index - lhs_size);
975  }
976  if (auto project = dynamic_cast<const RelProject*>(node)) {
977  return project->getFieldName(index);
978  }
979  CHECK(dynamic_cast<const RelSort*>(node) || dynamic_cast<const RelFilter*>(node));
980  return get_field_name(node->getInput(0), index);
981 }
982 
984  std::vector<std::shared_ptr<RelAlgNode>>& nodes,
985  std::unordered_map<const RelAlgNode*, std::unordered_set<size_t>>& liveouts,
986  std::unordered_map<const RelAlgNode*, std::unordered_set<const RelAlgNode*>>&
987  du_web) {
988  std::vector<std::shared_ptr<RelAlgNode>> new_nodes;
989  for (auto node : nodes) {
990  new_nodes.push_back(node);
991  if (!std::dynamic_pointer_cast<RelFilter>(node)) {
992  continue;
993  }
994  const auto filter = node.get();
995  auto liveout_it = liveouts.find(filter);
996  CHECK(liveout_it != liveouts.end());
997  auto& outs = liveout_it->second;
998  if (!any_dead_col_in(filter, outs)) {
999  continue;
1000  }
1001  auto usrs_it = du_web.find(filter);
1002  CHECK(usrs_it != du_web.end());
1003  auto& usrs = usrs_it->second;
1004  if (usrs.size() != 1 || does_redef_cols(*usrs.begin())) {
1005  continue;
1006  }
1007  auto only_usr = const_cast<RelAlgNode*>(*usrs.begin());
1008 
1009  std::vector<std::unique_ptr<const RexScalar>> exprs;
1010  std::vector<std::string> fields;
1011  for (size_t i = 0; i < filter->size(); ++i) {
1012  exprs.push_back(boost::make_unique<RexInput>(filter, i));
1013  fields.push_back(get_field_name(filter, i));
1014  }
1015  auto project_owner = std::make_shared<RelProject>(exprs, fields, node);
1016  auto project = project_owner.get();
1017 
1018  only_usr->replaceInput(node, project_owner);
1019  if (dynamic_cast<const RelJoin*>(only_usr)) {
1020  RexRebindInputsVisitor visitor(filter, project);
1021  for (auto usr : du_web[only_usr]) {
1022  visitor.visitNode(usr);
1023  }
1024  }
1025 
1026  liveouts.insert(std::make_pair(project, outs));
1027 
1028  usrs.clear();
1029  usrs.insert(project);
1030  du_web.insert(
1031  std::make_pair(project, std::unordered_set<const RelAlgNode*>{only_usr}));
1032 
1033  new_nodes.push_back(project_owner);
1034  }
1035  if (new_nodes.size() > nodes.size()) {
1036  nodes.swap(new_nodes);
1037  }
1038 }
1039 
1040 std::pair<std::unordered_map<const RelAlgNode*, std::unordered_map<size_t, size_t>>,
1041  std::vector<const RelAlgNode*>>
1043  const std::unordered_map<const RelAlgNode*, std::unordered_set<size_t>>& live_outs,
1044  const std::vector<std::shared_ptr<RelAlgNode>>& nodes,
1045  const std::unordered_set<const RelAlgNode*>& intact_nodes,
1046  const std::unordered_map<const RelAlgNode*, std::unordered_set<const RelAlgNode*>>&
1047  du_web,
1048  const std::unordered_map<const RelAlgNode*, size_t>& orig_node_sizes) {
1049  std::unordered_map<const RelAlgNode*, std::unordered_map<size_t, size_t>>
1050  liveouts_renumbering;
1051  std::vector<const RelAlgNode*> ready_nodes;
1052  AvailabilityChecker checker(liveouts_renumbering, intact_nodes);
1053  for (auto node : nodes) {
1054  // Ignore empty live_out due to some invalid node
1055  if (!does_redef_cols(node.get()) || intact_nodes.count(node.get())) {
1056  continue;
1057  }
1058  auto live_pair = live_outs.find(node.get());
1059  CHECK(live_pair != live_outs.end());
1060  auto old_live_outs = live_pair->second;
1062  node.get(), liveouts_renumbering, old_live_outs, intact_nodes, orig_node_sizes);
1063  if (auto aggregate = std::dynamic_pointer_cast<RelAggregate>(node)) {
1064  auto old_exprs = aggregate->getAggExprsAndRelease();
1065  std::vector<std::unique_ptr<const RexAgg>> new_exprs;
1066  auto key_name_it = aggregate->getFields().begin();
1067  std::vector<std::string> new_fields(key_name_it,
1068  key_name_it + aggregate->getGroupByCount());
1069  for (size_t i = aggregate->getGroupByCount(), j = 0;
1070  i < aggregate->getFields().size() && j < old_exprs.size();
1071  ++i, ++j) {
1072  if (old_live_outs.count(i)) {
1073  new_exprs.push_back(std::move(old_exprs[j]));
1074  new_fields.push_back(aggregate->getFieldName(i));
1075  }
1076  }
1077  aggregate->setAggExprs(new_exprs);
1078  aggregate->setFields(new_fields);
1079  } else if (auto project = std::dynamic_pointer_cast<RelProject>(node)) {
1080  auto old_exprs = project->getExpressionsAndRelease();
1081  std::vector<std::unique_ptr<const RexScalar>> new_exprs;
1082  std::vector<std::string> new_fields;
1083  for (size_t i = 0; i < old_exprs.size(); ++i) {
1084  if (old_live_outs.count(i)) {
1085  new_exprs.push_back(std::move(old_exprs[i]));
1086  new_fields.push_back(project->getFieldName(i));
1087  }
1088  }
1089  project->setExpressions(new_exprs);
1090  project->setFields(new_fields);
1091  } else {
1092  CHECK(false);
1093  }
1094  auto usrs_it = du_web.find(node.get());
1095  CHECK(usrs_it != du_web.end());
1096  for (auto usr : usrs_it->second) {
1097  if (checker.hasAllSrcReady(usr)) {
1098  ready_nodes.push_back(usr);
1099  }
1100  }
1101  }
1102  return {liveouts_renumbering, ready_nodes};
1103 }
1104 
1106  std::unordered_map<const RelAlgNode*, std::unordered_map<size_t, size_t>>&
1107  liveout_renumbering,
1108  const std::vector<const RelAlgNode*>& ready_nodes,
1109  const std::unordered_map<const RelAlgNode*, std::unordered_set<size_t>>& old_liveouts,
1110  const std::unordered_set<const RelAlgNode*>& intact_nodes,
1111  const std::unordered_map<const RelAlgNode*, std::unordered_set<const RelAlgNode*>>&
1112  du_web,
1113  const std::unordered_map<const RelAlgNode*, size_t>& orig_node_sizes) {
1114  RexInputRenumberVisitor renumberer(liveout_renumbering);
1115  AvailabilityChecker checker(liveout_renumbering, intact_nodes);
1116  std::deque<const RelAlgNode*> work_set(ready_nodes.begin(), ready_nodes.end());
1117  while (!work_set.empty()) {
1118  auto walker = work_set.front();
1119  work_set.pop_front();
1120  CHECK(!dynamic_cast<const RelScan*>(walker));
1121  auto node = const_cast<RelAlgNode*>(walker);
1122  if (auto project = dynamic_cast<RelProject*>(node)) {
1123  auto old_exprs = project->getExpressionsAndRelease();
1124  std::vector<std::unique_ptr<const RexScalar>> new_exprs;
1125  for (auto& expr : old_exprs) {
1126  new_exprs.push_back(renumberer.visit(expr.get()));
1127  }
1128  project->setExpressions(new_exprs);
1129  } else if (auto aggregate = dynamic_cast<RelAggregate*>(node)) {
1130  auto src_it = liveout_renumbering.find(node->getInput(0));
1131  CHECK(src_it != liveout_renumbering.end());
1132  auto old_exprs = aggregate->getAggExprsAndRelease();
1133  auto new_exprs = renumber_rex_aggs(old_exprs, src_it->second);
1134  aggregate->setAggExprs(new_exprs);
1135  } else if (auto join = dynamic_cast<RelJoin*>(node)) {
1136  auto new_condition = renumberer.visit(join->getCondition());
1137  join->setCondition(new_condition);
1138  } else if (auto filter = dynamic_cast<RelFilter*>(node)) {
1139  auto new_condition = renumberer.visit(filter->getCondition());
1140  filter->setCondition(new_condition);
1141  } else if (auto sort = dynamic_cast<RelSort*>(node)) {
1142  auto src_it = liveout_renumbering.find(node->getInput(0));
1143  CHECK(src_it != liveout_renumbering.end());
1144  std::vector<SortField> new_collations;
1145  for (size_t i = 0; i < sort->collationCount(); ++i) {
1146  new_collations.push_back(
1147  renumber_sort_field(sort->getCollation(i), src_it->second));
1148  }
1149  sort->setCollation(std::move(new_collations));
1150  } else if (!dynamic_cast<RelLogicalUnion*>(node)) {
1151  LOG(FATAL) << "Unhandled node type: " << node->toString();
1152  }
1153 
1154  // Ignore empty live_out due to some invalid node
1155  if (does_redef_cols(node) || intact_nodes.count(node)) {
1156  continue;
1157  }
1158  auto live_pair = old_liveouts.find(node);
1159  CHECK(live_pair != old_liveouts.end());
1160  auto live_out = live_pair->second;
1162  node, liveout_renumbering, live_out, intact_nodes, orig_node_sizes);
1163  auto usrs_it = du_web.find(walker);
1164  CHECK(usrs_it != du_web.end());
1165  for (auto usr : usrs_it->second) {
1166  if (checker.hasAllSrcReady(usr)) {
1167  work_set.push_back(usr);
1168  }
1169  }
1170  }
1171 }
1172 
1173 } // namespace
1174 
1175 void eliminate_dead_columns(std::vector<std::shared_ptr<RelAlgNode>>& nodes) noexcept {
1176  if (nodes.empty()) {
1177  return;
1178  }
1179  auto root = nodes.back().get();
1180  if (!root) {
1181  return;
1182  }
1183  CHECK(!dynamic_cast<const RelScan*>(root) && !dynamic_cast<const RelJoin*>(root));
1184  // Mark
1185  auto old_liveouts = mark_live_columns(nodes);
1186  std::unordered_set<const RelAlgNode*> intact_nodes;
1187  bool has_dead_cols = false;
1188  for (auto live_pair : old_liveouts) {
1189  auto node = live_pair.first;
1190  const auto& outs = live_pair.second;
1191  if (outs.empty()) {
1192  LOG(WARNING) << "RA node with no used column: " << node->toString();
1193  // Ignore empty live_out due to some invalid node
1194  intact_nodes.insert(node);
1195  }
1196  if (any_dead_col_in(node, outs)) {
1197  has_dead_cols = true;
1198  } else {
1199  intact_nodes.insert(node);
1200  }
1201  }
1202  if (!has_dead_cols) {
1203  return;
1204  }
1205  auto web = build_du_web(nodes);
1206  try_insert_coalesceable_proj(nodes, old_liveouts, web);
1207 
1208  for (auto node : nodes) {
1209  if (intact_nodes.count(node.get()) || does_redef_cols(node.get())) {
1210  continue;
1211  }
1212  bool intact = true;
1213  for (size_t i = 0; i < node->inputCount(); ++i) {
1214  auto source = node->getInput(i);
1215  if (!dynamic_cast<const RelScan*>(source) && !intact_nodes.count(source)) {
1216  intact = false;
1217  break;
1218  }
1219  }
1220  if (intact) {
1221  intact_nodes.insert(node.get());
1222  }
1223  }
1224 
1225  std::unordered_map<const RelAlgNode*, size_t> orig_node_sizes;
1226  for (auto node : nodes) {
1227  orig_node_sizes.insert(std::make_pair(node.get(), node->size()));
1228  }
1229  // Sweep
1230  std::unordered_map<const RelAlgNode*, std::unordered_map<size_t, size_t>>
1231  liveout_renumbering;
1232  std::vector<const RelAlgNode*> ready_nodes;
1233  std::tie(liveout_renumbering, ready_nodes) =
1234  sweep_dead_columns(old_liveouts, nodes, intact_nodes, web, orig_node_sizes);
1235  // Propagate
1237  liveout_renumbering, ready_nodes, old_liveouts, intact_nodes, web, orig_node_sizes);
1238 }
1239 
1240 void eliminate_dead_subqueries(std::vector<std::shared_ptr<RexSubQuery>>& subqueries,
1241  RelAlgNode const* root) {
1242  if (!subqueries.empty()) {
1243  auto live_ids = RexSubQueryIdCollector::getLiveRexSubQueryIds(root);
1244  auto sort_live_ids_first = [&live_ids](auto& a, auto& b) {
1245  return live_ids.count(a->getId()) && !live_ids.count(b->getId());
1246  };
1247  std::stable_sort(subqueries.begin(), subqueries.end(), sort_live_ids_first);
1248  size_t n_dead_subqueries;
1249  if (live_ids.count(subqueries.front()->getId())) {
1250  auto first_dead_itr = std::upper_bound(subqueries.cbegin(),
1251  subqueries.cend(),
1252  subqueries.front(),
1253  sort_live_ids_first);
1254  n_dead_subqueries = subqueries.cend() - first_dead_itr;
1255  } else {
1256  n_dead_subqueries = subqueries.size();
1257  }
1258  if (n_dead_subqueries) {
1259  VLOG(1) << "Eliminating " << n_dead_subqueries
1260  << (n_dead_subqueries == 1 ? " subquery." : " subqueries.");
1261  subqueries.resize(subqueries.size() - n_dead_subqueries);
1262  subqueries.shrink_to_fit();
1263  }
1264  }
1265 }
1266 
1267 namespace {
1268 
1270  public:
1271  RexInputSinker(const std::unordered_map<size_t, size_t>& old_to_new_idx,
1272  const RelAlgNode* new_src)
1273  : old_to_new_in_idx_(old_to_new_idx), target_(new_src) {}
1274 
1275  RetType visitInput(const RexInput* input) const override {
1276  CHECK_EQ(target_->inputCount(), size_t(1));
1277  CHECK_EQ(target_->getInput(0), input->getSourceNode());
1278  auto idx_it = old_to_new_in_idx_.find(input->getIndex());
1279  CHECK(idx_it != old_to_new_in_idx_.end());
1280  return boost::make_unique<RexInput>(target_, idx_it->second);
1281  }
1282 
1283  private:
1284  const std::unordered_map<size_t, size_t>& old_to_new_in_idx_;
1286 };
1287 
1289  public:
1290  SubConditionReplacer(const std::unordered_map<size_t, std::unique_ptr<const RexScalar>>&
1291  idx_to_sub_condition)
1292  : idx_to_subcond_(idx_to_sub_condition) {}
1293  RetType visitInput(const RexInput* input) const override {
1294  auto subcond_it = idx_to_subcond_.find(input->getIndex());
1295  if (subcond_it != idx_to_subcond_.end()) {
1296  return RexDeepCopyVisitor::visit(subcond_it->second.get());
1297  }
1298  return RexDeepCopyVisitor::visitInput(input);
1299  }
1300 
1301  private:
1302  const std::unordered_map<size_t, std::unique_ptr<const RexScalar>>& idx_to_subcond_;
1303 };
1304 
1305 } // namespace
1306 
1308  std::vector<std::shared_ptr<RelAlgNode>>& nodes) noexcept {
1309  auto web = build_du_web(nodes);
1310  auto liveouts = mark_live_columns(nodes);
1311  for (auto node : nodes) {
1312  auto project = std::dynamic_pointer_cast<RelProject>(node);
1313  // TODO(miyu): relax RelScan limitation
1314  if (!project || project->isSimple() ||
1315  !dynamic_cast<const RelScan*>(project->getInput(0))) {
1316  continue;
1317  }
1318  auto usrs_it = web.find(project.get());
1319  CHECK(usrs_it != web.end());
1320  auto& usrs = usrs_it->second;
1321  if (usrs.size() != 1) {
1322  continue;
1323  }
1324  auto join = dynamic_cast<RelJoin*>(const_cast<RelAlgNode*>(*usrs.begin()));
1325  if (!join) {
1326  continue;
1327  }
1328  auto outs_it = liveouts.find(join);
1329  CHECK(outs_it != liveouts.end());
1330  std::unordered_map<size_t, size_t> in_to_out_index;
1331  std::unordered_set<size_t> boolean_expr_indicies;
1332  bool discarded = false;
1333  for (size_t i = 0; i < project->size(); ++i) {
1334  auto oper = dynamic_cast<const RexOperator*>(project->getProjectAt(i));
1335  if (oper && oper->getType().get_type() == kBOOLEAN) {
1336  boolean_expr_indicies.insert(i);
1337  } else {
1338  // TODO(miyu): relax?
1339  if (auto input = dynamic_cast<const RexInput*>(project->getProjectAt(i))) {
1340  in_to_out_index.insert(std::make_pair(input->getIndex(), i));
1341  } else {
1342  discarded = true;
1343  }
1344  }
1345  }
1346  if (discarded || boolean_expr_indicies.empty()) {
1347  continue;
1348  }
1349  const size_t index_base =
1350  join->getInput(0) == project.get() ? 0 : join->getInput(0)->size();
1351  for (auto i : boolean_expr_indicies) {
1352  auto join_idx = index_base + i;
1353  if (outs_it->second.count(join_idx)) {
1354  discarded = true;
1355  break;
1356  }
1357  }
1358  if (discarded) {
1359  continue;
1360  }
1361  RexInputCollector collector(project.get());
1362  std::vector<size_t> unloaded_input_indices;
1363  std::unordered_map<size_t, std::unique_ptr<const RexScalar>> in_idx_to_new_subcond;
1364  // Given all are dead right after join, safe to sink
1365  for (auto i : boolean_expr_indicies) {
1366  auto rex_ins = collector.visit(project->getProjectAt(i));
1367  for (auto& in : rex_ins) {
1368  CHECK_EQ(in.getSourceNode(), project->getInput(0));
1369  if (!in_to_out_index.count(in.getIndex())) {
1370  auto curr_out_index = project->size() + unloaded_input_indices.size();
1371  in_to_out_index.insert(std::make_pair(in.getIndex(), curr_out_index));
1372  unloaded_input_indices.push_back(in.getIndex());
1373  }
1374  RexInputSinker sinker(in_to_out_index, project.get());
1375  in_idx_to_new_subcond.insert(
1376  std::make_pair(i, sinker.visit(project->getProjectAt(i))));
1377  }
1378  }
1379  if (in_idx_to_new_subcond.empty()) {
1380  continue;
1381  }
1382  std::vector<std::unique_ptr<const RexScalar>> new_projections;
1383  for (size_t i = 0; i < project->size(); ++i) {
1384  if (boolean_expr_indicies.count(i)) {
1385  new_projections.push_back(boost::make_unique<RexInput>(project->getInput(0), 0));
1386  } else {
1387  auto rex_input = dynamic_cast<const RexInput*>(project->getProjectAt(i));
1388  CHECK(rex_input != nullptr);
1389  new_projections.push_back(rex_input->deepCopy());
1390  }
1391  }
1392  for (auto i : unloaded_input_indices) {
1393  new_projections.push_back(boost::make_unique<RexInput>(project->getInput(0), i));
1394  }
1395  project->setExpressions(new_projections);
1396 
1397  SubConditionReplacer replacer(in_idx_to_new_subcond);
1398  auto new_condition = replacer.visit(join->getCondition());
1399  join->setCondition(new_condition);
1400  }
1401 }
1402 
1403 namespace {
1404 
1406  public:
1407  RexInputRedirector(const RelAlgNode* old_src, const RelAlgNode* new_src)
1408  : old_src_(old_src), new_src_(new_src) {}
1409 
1410  RetType visitInput(const RexInput* input) const override {
1411  CHECK_EQ(old_src_, input->getSourceNode());
1412  CHECK_NE(old_src_, new_src_);
1413  auto actual_new_src = new_src_;
1414  if (auto join = dynamic_cast<const RelJoin*>(new_src_)) {
1415  actual_new_src = join->getInput(0);
1416  CHECK_EQ(join->inputCount(), size_t(2));
1417  auto src2_input_base = actual_new_src->size();
1418  if (input->getIndex() >= src2_input_base) {
1419  actual_new_src = join->getInput(1);
1420  return boost::make_unique<RexInput>(actual_new_src,
1421  input->getIndex() - src2_input_base);
1422  }
1423  }
1424 
1425  return boost::make_unique<RexInput>(actual_new_src, input->getIndex());
1426  }
1427 
1428  private:
1431 };
1432 
1434  std::shared_ptr<const RelAlgNode> old_def_node,
1435  std::shared_ptr<const RelAlgNode> new_def_node,
1436  std::unordered_map<const RelAlgNode*, std::shared_ptr<RelAlgNode>>& deconst_mapping,
1437  std::unordered_map<const RelAlgNode*, std::unordered_set<const RelAlgNode*>>&
1438  du_web) {
1439  auto usrs_it = du_web.find(old_def_node.get());
1440  RexInputRedirector redirector(new_def_node.get(), old_def_node.get());
1441  CHECK(usrs_it != du_web.end());
1442  for (auto usr : usrs_it->second) {
1443  auto usr_it = deconst_mapping.find(usr);
1444  CHECK(usr_it != deconst_mapping.end());
1445  usr_it->second->replaceInput(old_def_node, new_def_node);
1446  }
1447  auto new_usrs_it = du_web.find(new_def_node.get());
1448  CHECK(new_usrs_it != du_web.end());
1449  new_usrs_it->second.insert(usrs_it->second.begin(), usrs_it->second.end());
1450  usrs_it->second.clear();
1451 }
1452 
1453 } // namespace
1454 
1455 void fold_filters(std::vector<std::shared_ptr<RelAlgNode>>& nodes) noexcept {
1456  std::unordered_map<const RelAlgNode*, std::shared_ptr<RelAlgNode>> deconst_mapping;
1457  for (auto node : nodes) {
1458  deconst_mapping.insert(std::make_pair(node.get(), node));
1459  }
1460 
1461  auto web = build_du_web(nodes);
1462  for (auto node_it = nodes.rbegin(); node_it != nodes.rend(); ++node_it) {
1463  auto& node = *node_it;
1464  if (auto filter = std::dynamic_pointer_cast<RelFilter>(node)) {
1465  CHECK_EQ(filter->inputCount(), size_t(1));
1466  auto src_filter = dynamic_cast<const RelFilter*>(filter->getInput(0));
1467  if (!src_filter) {
1468  continue;
1469  }
1470  auto siblings_it = web.find(src_filter);
1471  if (siblings_it == web.end() || siblings_it->second.size() != size_t(1)) {
1472  continue;
1473  }
1474  auto src_it = deconst_mapping.find(src_filter);
1475  CHECK(src_it != deconst_mapping.end());
1476  auto folded_filter = std::dynamic_pointer_cast<RelFilter>(src_it->second);
1477  CHECK(folded_filter);
1478  // TODO(miyu) : drop filter w/ only expression valued constant TRUE?
1479  if (auto rex_operator = dynamic_cast<const RexOperator*>(filter->getCondition())) {
1480  LOG(INFO) << "ID=" << filter->getId() << " " << filter->toString()
1481  << " folded into "
1482  << "ID=" << folded_filter->getId() << " " << folded_filter->toString()
1483  << std::endl;
1484  std::vector<std::unique_ptr<const RexScalar>> operands;
1485  operands.emplace_back(folded_filter->getAndReleaseCondition());
1486  auto old_condition = dynamic_cast<const RexOperator*>(operands.back().get());
1487  CHECK(old_condition && old_condition->getType().get_type() == kBOOLEAN);
1488  RexInputRedirector redirector(folded_filter.get(), folded_filter->getInput(0));
1489  operands.push_back(redirector.visit(rex_operator));
1490  auto other_condition = dynamic_cast<const RexOperator*>(operands.back().get());
1491  CHECK(other_condition && other_condition->getType().get_type() == kBOOLEAN);
1492  const bool notnull = old_condition->getType().get_notnull() &&
1493  other_condition->getType().get_notnull();
1494  auto new_condition = std::unique_ptr<const RexScalar>(
1495  new RexOperator(kAND, operands, SQLTypeInfo(kBOOLEAN, notnull)));
1496  folded_filter->setCondition(new_condition);
1497  replace_all_usages(filter, folded_filter, deconst_mapping, web);
1498  deconst_mapping.erase(filter.get());
1499  web.erase(filter.get());
1500  web[filter->getInput(0)].erase(filter.get());
1501  node.reset();
1502  }
1503  }
1504  }
1505 
1506  if (!nodes.empty()) {
1507  auto sink = nodes.back();
1508  for (auto node_it = std::next(nodes.rend()); !sink && node_it != nodes.rbegin();
1509  ++node_it) {
1510  sink = *node_it;
1511  }
1512  CHECK(sink);
1513  cleanup_dead_nodes(nodes);
1514  }
1515 }
1516 
1517 std::vector<const RexScalar*> find_hoistable_conditions(const RexScalar* condition,
1518  const RelAlgNode* source,
1519  const size_t first_col_idx,
1520  const size_t last_col_idx) {
1521  if (auto rex_op = dynamic_cast<const RexOperator*>(condition)) {
1522  switch (rex_op->getOperator()) {
1523  case kAND: {
1524  std::vector<const RexScalar*> subconditions;
1525  size_t complete_subcond_count = 0;
1526  for (size_t i = 0; i < rex_op->size(); ++i) {
1527  auto conds = find_hoistable_conditions(
1528  rex_op->getOperand(i), source, first_col_idx, last_col_idx);
1529  if (conds.size() == size_t(1)) {
1530  ++complete_subcond_count;
1531  }
1532  subconditions.insert(subconditions.end(), conds.begin(), conds.end());
1533  }
1534  if (complete_subcond_count == rex_op->size()) {
1535  return {rex_op};
1536  } else {
1537  return {subconditions};
1538  }
1539  break;
1540  }
1541  case kEQ: {
1542  const auto lhs_conds = find_hoistable_conditions(
1543  rex_op->getOperand(0), source, first_col_idx, last_col_idx);
1544  const auto rhs_conds = find_hoistable_conditions(
1545  rex_op->getOperand(1), source, first_col_idx, last_col_idx);
1546  const auto lhs_in = lhs_conds.size() == 1
1547  ? dynamic_cast<const RexInput*>(*lhs_conds.begin())
1548  : nullptr;
1549  const auto rhs_in = rhs_conds.size() == 1
1550  ? dynamic_cast<const RexInput*>(*rhs_conds.begin())
1551  : nullptr;
1552  if (lhs_in && rhs_in) {
1553  return {rex_op};
1554  }
1555  return {};
1556  break;
1557  }
1558  default:
1559  break;
1560  }
1561  return {};
1562  }
1563  if (auto rex_in = dynamic_cast<const RexInput*>(condition)) {
1564  if (rex_in->getSourceNode() == source) {
1565  const auto col_idx = rex_in->getIndex();
1566  return {col_idx >= first_col_idx && col_idx <= last_col_idx ? condition : nullptr};
1567  }
1568  return {};
1569  }
1570  return {};
1571 }
1572 
1574  public:
1575  JoinTargetRebaser(const RelJoin* join, const unsigned old_base)
1576  : join_(join)
1577  , old_base_(old_base)
1578  , src1_base_(join->getInput(0)->size())
1579  , target_count_(join->size()) {}
1580  RetType visitInput(const RexInput* input) const override {
1581  auto curr_idx = input->getIndex();
1582  CHECK_GE(curr_idx, old_base_);
1583  CHECK_LT(static_cast<size_t>(curr_idx), target_count_);
1584  curr_idx -= old_base_;
1585  if (curr_idx >= src1_base_) {
1586  return boost::make_unique<RexInput>(join_->getInput(1), curr_idx - src1_base_);
1587  } else {
1588  return boost::make_unique<RexInput>(join_->getInput(0), curr_idx);
1589  }
1590  }
1591 
1592  private:
1593  const RelJoin* join_;
1594  const unsigned old_base_;
1595  const size_t src1_base_;
1596  const size_t target_count_;
1597 };
1598 
1600  public:
1601  SubConditionRemover(const std::vector<const RexScalar*> sub_conds)
1602  : sub_conditions_(sub_conds.begin(), sub_conds.end()) {}
1603  RetType visitOperator(const RexOperator* rex_operator) const override {
1604  if (sub_conditions_.count(rex_operator)) {
1605  return boost::make_unique<RexLiteral>(
1606  true, kBOOLEAN, kBOOLEAN, unsigned(-2147483648), 1, unsigned(-2147483648), 1);
1607  }
1608  return RexDeepCopyVisitor::visitOperator(rex_operator);
1609  }
1610 
1611  private:
1612  std::unordered_set<const RexScalar*> sub_conditions_;
1613 };
1614 
1616  std::vector<std::shared_ptr<RelAlgNode>>& nodes) noexcept {
1617  std::unordered_set<const RelAlgNode*> visited;
1618  auto web = build_du_web(nodes);
1619  for (auto node : nodes) {
1620  if (visited.count(node.get())) {
1621  continue;
1622  }
1623  visited.insert(node.get());
1624  auto join = dynamic_cast<RelJoin*>(node.get());
1625  if (join && join->getJoinType() == JoinType::INNER) {
1626  // Only allow cross join for now.
1627  if (auto literal = dynamic_cast<const RexLiteral*>(join->getCondition())) {
1628  // Assume Calcite always generates an inner join on constant boolean true for
1629  // cross join.
1630  CHECK(literal->getType() == kBOOLEAN && literal->getVal<bool>());
1631  size_t first_col_idx = 0;
1632  const RelFilter* filter = nullptr;
1633  std::vector<const RelJoin*> join_seq{join};
1634  for (const RelJoin* curr_join = join; !filter;) {
1635  auto usrs_it = web.find(curr_join);
1636  CHECK(usrs_it != web.end());
1637  if (usrs_it->second.size() != size_t(1)) {
1638  break;
1639  }
1640  auto only_usr = *usrs_it->second.begin();
1641  if (auto usr_join = dynamic_cast<const RelJoin*>(only_usr)) {
1642  if (join == usr_join->getInput(1)) {
1643  const auto src1_offset = usr_join->getInput(0)->size();
1644  first_col_idx += src1_offset;
1645  }
1646  join_seq.push_back(usr_join);
1647  curr_join = usr_join;
1648  continue;
1649  }
1650 
1651  filter = dynamic_cast<const RelFilter*>(only_usr);
1652  break;
1653  }
1654  if (!filter) {
1655  visited.insert(join_seq.begin(), join_seq.end());
1656  continue;
1657  }
1658  const auto src_join = dynamic_cast<const RelJoin*>(filter->getInput(0));
1659  CHECK(src_join);
1660  auto modified_filter = const_cast<RelFilter*>(filter);
1661 
1662  if (src_join == join) {
1663  std::unique_ptr<const RexScalar> filter_condition(
1664  modified_filter->getAndReleaseCondition());
1665  std::unique_ptr<const RexScalar> true_condition =
1666  boost::make_unique<RexLiteral>(true,
1667  kBOOLEAN,
1668  kBOOLEAN,
1669  unsigned(-2147483648),
1670  1,
1671  unsigned(-2147483648),
1672  1);
1673  modified_filter->setCondition(true_condition);
1674  join->setCondition(filter_condition);
1675  continue;
1676  }
1677  const auto src1_base = src_join->getInput(0)->size();
1678  auto source =
1679  first_col_idx < src1_base ? src_join->getInput(0) : src_join->getInput(1);
1680  first_col_idx =
1681  first_col_idx < src1_base ? first_col_idx : first_col_idx - src1_base;
1682  auto join_conditions =
1684  source,
1685  first_col_idx,
1686  first_col_idx + join->size() - 1);
1687  if (join_conditions.empty()) {
1688  continue;
1689  }
1690 
1691  JoinTargetRebaser rebaser(join, first_col_idx);
1692  if (join_conditions.size() == 1) {
1693  auto new_join_condition = rebaser.visit(*join_conditions.begin());
1694  join->setCondition(new_join_condition);
1695  } else {
1696  std::vector<std::unique_ptr<const RexScalar>> operands;
1697  bool notnull = true;
1698  for (size_t i = 0; i < join_conditions.size(); ++i) {
1699  operands.emplace_back(rebaser.visit(join_conditions[i]));
1700  auto old_subcond = dynamic_cast<const RexOperator*>(join_conditions[i]);
1701  CHECK(old_subcond && old_subcond->getType().get_type() == kBOOLEAN);
1702  notnull = notnull && old_subcond->getType().get_notnull();
1703  }
1704  auto new_join_condition = std::unique_ptr<const RexScalar>(
1705  new RexOperator(kAND, operands, SQLTypeInfo(kBOOLEAN, notnull)));
1706  join->setCondition(new_join_condition);
1707  }
1708 
1709  SubConditionRemover remover(join_conditions);
1710  auto new_filter_condition = remover.visit(filter->getCondition());
1711  modified_filter->setCondition(new_filter_condition);
1712  }
1713  }
1714  }
1715 }
1716 
1717 // For some reason, Calcite generates Sort, Project, Sort sequences where the
1718 // two Sort nodes are identical and the Project is identity. Simplify this
1719 // pattern by re-binding the input of the second sort to the input of the first.
1720 void simplify_sort(std::vector<std::shared_ptr<RelAlgNode>>& nodes) noexcept {
1721  if (nodes.size() < 3) {
1722  return;
1723  }
1724  for (size_t i = 0; i <= nodes.size() - 3;) {
1725  auto first_sort = std::dynamic_pointer_cast<RelSort>(nodes[i]);
1726  const auto project = std::dynamic_pointer_cast<const RelProject>(nodes[i + 1]);
1727  auto second_sort = std::dynamic_pointer_cast<RelSort>(nodes[i + 2]);
1728  if (first_sort && second_sort && project && project->isIdentity() &&
1729  *first_sort == *second_sort) {
1730  second_sort->replaceInput(second_sort->getAndOwnInput(0),
1731  first_sort->getAndOwnInput(0));
1732  nodes[i].reset();
1733  nodes[i + 1].reset();
1734  i += 3;
1735  } else {
1736  ++i;
1737  }
1738  }
1739 
1740  std::vector<std::shared_ptr<RelAlgNode>> new_nodes;
1741  for (auto node : nodes) {
1742  if (!node) {
1743  continue;
1744  }
1745  new_nodes.push_back(node);
1746  }
1747  nodes.swap(new_nodes);
1748 }
SubConditionReplacer(const std::unordered_map< size_t, std::unique_ptr< const RexScalar >> &idx_to_sub_condition)
std::vector< const RexScalar * > find_hoistable_conditions(const RexScalar *condition, const RelAlgNode *source, const size_t first_col_idx, const size_t last_col_idx)
std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * > > build_du_web(const std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
RetType visitInput(const RexInput *input) const override
#define CHECK_EQ(x, y)
Definition: Logger.h:205
int64_t * src
const size_t target_count_
RexInputRedirector(const RelAlgNode *old_src, const RelAlgNode *new_src)
size_t pick_always_live_col_idx(const RelAlgNode *node)
void setSourceNode(const RelAlgNode *node) const
const std::unordered_map< const RelAlgNode *, std::unordered_map< size_t, size_t > > & node_to_input_renum_
void hoist_filter_cond_to_cross_join(std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
RexInputSinker(const std::unordered_map< size_t, size_t > &old_to_new_idx, const RelAlgNode *new_src)
size_t size() const override
void sink_projected_boolean_expr_to_join(std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
RetType visitInput(const RexInput *input) const override
void eliminate_identical_copy(std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
void add_new_indices_for(const RelAlgNode *node, std::unordered_map< const RelAlgNode *, std::unordered_map< size_t, size_t >> &new_liveouts, const std::unordered_set< size_t > &old_liveouts, const std::unordered_set< const RelAlgNode *> &intact_nodes, const std::unordered_map< const RelAlgNode *, size_t > &orig_node_sizes)
void setCondition(std::unique_ptr< const RexScalar > &condition)
SubConditionRemover(const std::vector< const RexScalar *> sub_conds)
SortDirection getSortDir() const
#define LOG(tag)
Definition: Logger.h:188
const RexScalar * getProjectAt(const size_t idx) const
bool does_redef_cols(const RelAlgNode *node)
RetType visitOperator(const RexOperator *rex_operator) const override
Definition: RexVisitor.h:153
std::string join(T const &container, std::string const &delim)
void * visitInput(const RexInput *rex_input) const override
AvailabilityChecker(const std::unordered_map< const RelAlgNode *, std::unordered_map< size_t, size_t >> &liveouts, const std::unordered_set< const RelAlgNode *> &intact_nodes)
#define CHECK_GE(x, y)
Definition: Logger.h:210
RetType visitOperator(const RexOperator *rex_operator) const override
size_t get_actual_source_size(const RelProject *curr_project, const std::unordered_set< const RelProject *> &projects_to_remove)
Definition: sqldefs.h:30
std::unordered_set< const RelProject * > get_visible_projects(const RelAlgNode *root)
#define CHECK_GT(x, y)
Definition: Logger.h:209
const std::unordered_set< const RelProject * > & crt_projects_
bool project_separates_sort(const RelProject *project, const RelAlgNode *next_node)
std::unique_ptr< RexInput > deepCopy() const
std::unordered_map< const RelAlgNode *, std::unordered_set< size_t > > mark_live_columns(std::vector< std::shared_ptr< RelAlgNode >> &nodes)
void simplify_sort(std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
const std::unordered_map< const RelAlgNode *, std::unordered_map< size_t, size_t > > & liveouts_
std::vector< std::unordered_set< size_t > > get_live_ins(const RelAlgNode *node, const std::unordered_map< const RelAlgNode *, std::unordered_set< size_t >> &live_outs)
RetType visitInput(const RexInput *input) const override
bool isSimple() const
void cleanup_dead_nodes(std::vector< std::shared_ptr< RelAlgNode >> &nodes)
RetType visitInput(const RexInput *input) const override
const RelAlgNode * getSourceNode() const
#define CHECK_NE(x, y)
Definition: Logger.h:206
static Ids getLiveRexSubQueryIds(RelAlgNode const *)
const std::unordered_map< size_t, size_t > & old_to_new_in_idx_
Definition: sqldefs.h:37
RetType visitInput(const RexInput *input) const override
Definition: RexVisitor.h:141
std::pair< std::unordered_map< const RelAlgNode *, std::unordered_map< size_t, size_t > >, std::vector< const RelAlgNode * > > sweep_dead_columns(const std::unordered_map< const RelAlgNode *, std::unordered_set< size_t >> &live_outs, const std::vector< std::shared_ptr< RelAlgNode >> &nodes, const std::unordered_set< const RelAlgNode *> &intact_nodes, const std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode *>> &du_web, const std::unordered_map< const RelAlgNode *, size_t > &orig_node_sizes)
virtual void replaceInput(std::shared_ptr< const RelAlgNode > old_input, std::shared_ptr< const RelAlgNode > input)
RetType aggregateResult(const RetType &aggregate, const RetType &next_result) const override
bool any_dead_col_in(const RelAlgNode *node, const std::unordered_set< size_t > &live_outs)
const std::unordered_set< const RelAlgNode * > & intact_nodes_
RexRebindInputsVisitor(const RelAlgNode *old_input, const RelAlgNode *new_input)
virtual T visit(const RexScalar *rex_scalar) const
Definition: RexVisitor.h:27
#define CHECK_LT(x, y)
Definition: Logger.h:207
NullSortedPosition getNullsPosition() const
const RexScalar * getCondition() const
unsigned getIndex() const
const size_t inputCount() const
RetType visitInput(const RexInput *input) const override
virtual size_t size() const =0
RetType visitInput(const RexInput *input) const override
void try_insert_coalesceable_proj(std::vector< std::shared_ptr< RelAlgNode >> &nodes, std::unordered_map< const RelAlgNode *, std::unordered_set< size_t >> &liveouts, std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode *>> &du_web)
std::vector< std::unique_ptr< const RexAgg > > renumber_rex_aggs(std::vector< std::unique_ptr< const RexAgg >> &agg_exprs, const std::unordered_map< size_t, size_t > &new_numbering)
void propagate_input_renumbering(std::unordered_map< const RelAlgNode *, std::unordered_map< size_t, size_t >> &liveout_renumbering, const std::vector< const RelAlgNode *> &ready_nodes, const std::unordered_map< const RelAlgNode *, std::unordered_set< size_t >> &old_liveouts, const std::unordered_set< const RelAlgNode *> &intact_nodes, const std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode *>> &du_web, const std::unordered_map< const RelAlgNode *, size_t > &orig_node_sizes)
SortField renumber_sort_field(const SortField &old_field, const std::unordered_map< size_t, size_t > &new_numbering)
bool safe_to_redirect(const RelProject *project, const std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode *>> &du_web)
virtual std::string toString() const =0
#define CHECK(condition)
Definition: Logger.h:197
std::unordered_set< const RexScalar * > sub_conditions_
const RelJoin * join_
const RelAlgNode * getInput(const size_t idx) const
void redirect_inputs_of(std::shared_ptr< RelAlgNode > node, const std::unordered_set< const RelProject *> &projects, const std::unordered_set< const RelProject *> &permutating_projects, const std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode *>> &du_web)
std::string get_field_name(const RelAlgNode *node, size_t index)
void fold_filters(std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
const std::unordered_map< size_t, std::unique_ptr< const RexScalar > > & idx_to_subcond_
void propagate_rex_input_renumber(const RelFilter *excluded_root, const std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode *>> &du_web)
bool is_identical_copy(const RelProject *project, const std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode *>> &du_web, const std::unordered_set< const RelProject *> &projects_to_remove, std::unordered_set< const RelProject *> &permutating_projects)
void eliminate_dead_subqueries(std::vector< std::shared_ptr< RexSubQuery >> &subqueries, RelAlgNode const *root)
const unsigned old_base_
RexInputRenumberVisitor(const std::unordered_map< const RelAlgNode *, std::unordered_map< size_t, size_t >> &new_numbering)
bool is_distinct(const size_t input_idx, const RelAlgNode *node)
JoinTargetRebaser(const RelJoin *join, const unsigned old_base)
std::unique_ptr< const RexScalar > RetType
Definition: RexVisitor.h:139
void replace_all_usages(std::shared_ptr< const RelAlgNode > old_def_node, std::shared_ptr< const RelAlgNode > new_def_node, std::unordered_map< const RelAlgNode *, std::shared_ptr< RelAlgNode >> &deconst_mapping, std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode *>> &du_web)
#define VLOG(n)
Definition: Logger.h:291
RexProjectInputRedirector(const std::unordered_set< const RelProject *> &crt_inputs)
void eliminate_dead_columns(std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
size_t getField() const
RetType visitInput(const RexInput *input) const override