OmniSciDB  2c44a3935d
RelAlgOptimizer.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2017 MapD Technologies, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "RelAlgOptimizer.h"
18 #include "RexVisitor.h"
19 #include "Shared/Logger.h"
20 
21 #include <numeric>
22 #include <string>
23 #include <unordered_map>
24 
25 namespace {
26 
28  public:
29  RexProjectInputRedirector(const std::unordered_set<const RelProject*>& crt_inputs)
30  : crt_projects_(crt_inputs) {}
31 
32  RetType visitInput(const RexInput* input) const override {
33  auto source = dynamic_cast<const RelProject*>(input->getSourceNode());
34  if (!source || !crt_projects_.count(source)) {
35  return input->deepCopy();
36  }
37  auto new_source = source->getInput(0);
38  auto new_input =
39  dynamic_cast<const RexInput*>(source->getProjectAt(input->getIndex()));
40  if (!new_input) {
41  return input->deepCopy();
42  }
43  if (auto join = dynamic_cast<const RelJoin*>(new_source)) {
44  CHECK(new_input->getSourceNode() == join->getInput(0) ||
45  new_input->getSourceNode() == join->getInput(1));
46  } else {
47  CHECK_EQ(new_input->getSourceNode(), new_source);
48  }
49  return new_input->deepCopy();
50  }
51 
52  private:
53  const std::unordered_set<const RelProject*>& crt_projects_;
54 };
55 
56 class RexRebindInputsVisitor : public RexVisitor<void*> {
57  public:
58  RexRebindInputsVisitor(const RelAlgNode* old_input, const RelAlgNode* new_input)
59  : old_input_(old_input), new_input_(new_input) {}
60 
61  void* visitInput(const RexInput* rex_input) const override {
62  const auto old_source = rex_input->getSourceNode();
63  if (old_source == old_input_) {
64  rex_input->setSourceNode(new_input_);
65  }
66  return nullptr;
67  };
68 
69  void visitNode(const RelAlgNode* node) const {
70  if (dynamic_cast<const RelAggregate*>(node) || dynamic_cast<const RelSort*>(node)) {
71  return;
72  }
73  if (auto join = dynamic_cast<const RelJoin*>(node)) {
74  if (auto condition = join->getCondition()) {
75  visit(condition);
76  }
77  return;
78  }
79  if (auto project = dynamic_cast<const RelProject*>(node)) {
80  for (size_t i = 0; i < project->size(); ++i) {
81  visit(project->getProjectAt(i));
82  }
83  return;
84  }
85  if (auto filter = dynamic_cast<const RelFilter*>(node)) {
86  visit(filter->getCondition());
87  return;
88  }
89  CHECK(false);
90  }
91 
92  private:
95 };
96 
98  const RelProject* curr_project,
99  const std::unordered_set<const RelProject*>& projects_to_remove) {
100  auto source = curr_project->getInput(0);
101  while (auto filter = dynamic_cast<const RelFilter*>(source)) {
102  source = filter->getInput(0);
103  }
104  if (auto src_project = dynamic_cast<const RelProject*>(source)) {
105  if (projects_to_remove.count(src_project)) {
106  return get_actual_source_size(src_project, projects_to_remove);
107  }
108  }
109  return curr_project->getInput(0)->size();
110 }
111 
113  const RelProject* project,
114  const std::unordered_map<const RelAlgNode*, std::unordered_set<const RelAlgNode*>>&
115  du_web) {
116  if (!project->isSimple()) {
117  return false;
118  }
119  auto usrs_it = du_web.find(project);
120  CHECK(usrs_it != du_web.end());
121  for (auto usr : usrs_it->second) {
122  if (!dynamic_cast<const RelProject*>(usr) && !dynamic_cast<const RelFilter*>(usr)) {
123  return false;
124  }
125  }
126  return true;
127 }
128 
130  const RelProject* project,
131  const std::unordered_map<const RelAlgNode*, std::unordered_set<const RelAlgNode*>>&
132  du_web,
133  const std::unordered_set<const RelProject*>& projects_to_remove,
134  std::unordered_set<const RelProject*>& permutating_projects) {
135  auto source_size = get_actual_source_size(project, projects_to_remove);
136  if (project->size() > source_size) {
137  return false;
138  }
139 
140  if (project->size() < source_size) {
141  auto usrs_it = du_web.find(project);
142  CHECK(usrs_it != du_web.end());
143  bool guard_found = false;
144  while (usrs_it->second.size() == size_t(1)) {
145  auto only_usr = *usrs_it->second.begin();
146  if (dynamic_cast<const RelProject*>(only_usr)) {
147  guard_found = true;
148  break;
149  }
150  if (dynamic_cast<const RelAggregate*>(only_usr) ||
151  dynamic_cast<const RelSort*>(only_usr) ||
152  dynamic_cast<const RelJoin*>(only_usr) ||
153  dynamic_cast<const RelTableFunction*>(only_usr) ||
154  dynamic_cast<const RelLogicalUnion*>(only_usr)) {
155  return false;
156  }
157  CHECK(dynamic_cast<const RelFilter*>(only_usr))
158  << "only_usr: " << only_usr->toString();
159  usrs_it = du_web.find(only_usr);
160  CHECK(usrs_it != du_web.end());
161  }
162 
163  if (!guard_found) {
164  return false;
165  }
166  }
167 
168  bool identical = true;
169  for (size_t i = 0; i < project->size(); ++i) {
170  auto target = dynamic_cast<const RexInput*>(project->getProjectAt(i));
171  CHECK(target);
172  if (i != target->getIndex()) {
173  identical = false;
174  break;
175  }
176  }
177 
178  if (identical) {
179  return true;
180  }
181 
182  if (safe_to_redirect(project, du_web)) {
183  permutating_projects.insert(project);
184  return true;
185  }
186 
187  return false;
188 }
189 
191  const RelFilter* excluded_root,
192  const std::unordered_map<const RelAlgNode*, std::unordered_set<const RelAlgNode*>>&
193  du_web) {
194  CHECK(excluded_root);
195  auto src_project = dynamic_cast<const RelProject*>(excluded_root->getInput(0));
196  CHECK(src_project && src_project->isSimple());
197  const auto indirect_join_src = dynamic_cast<const RelJoin*>(src_project->getInput(0));
198  std::unordered_map<size_t, size_t> old_to_new_idx;
199  for (size_t i = 0; i < src_project->size(); ++i) {
200  auto rex_in = dynamic_cast<const RexInput*>(src_project->getProjectAt(i));
201  CHECK(rex_in);
202  size_t src_base = 0;
203  if (indirect_join_src != nullptr &&
204  indirect_join_src->getInput(1) == rex_in->getSourceNode()) {
205  src_base = indirect_join_src->getInput(0)->size();
206  }
207  old_to_new_idx.insert(std::make_pair(i, src_base + rex_in->getIndex()));
208  old_to_new_idx.insert(std::make_pair(i, rex_in->getIndex()));
209  }
210  CHECK(old_to_new_idx.size());
211  RexInputRenumber<false> renumber(old_to_new_idx);
212  auto usrs_it = du_web.find(excluded_root);
213  CHECK(usrs_it != du_web.end());
214  std::vector<const RelAlgNode*> work_set(usrs_it->second.begin(), usrs_it->second.end());
215  while (!work_set.empty()) {
216  auto node = work_set.back();
217  work_set.pop_back();
218  auto modified_node = const_cast<RelAlgNode*>(node);
219  if (auto filter = dynamic_cast<RelFilter*>(modified_node)) {
220  auto new_condition = renumber.visit(filter->getCondition());
221  filter->setCondition(new_condition);
222  auto usrs_it = du_web.find(filter);
223  CHECK(usrs_it != du_web.end() && usrs_it->second.size() == 1);
224  work_set.push_back(*usrs_it->second.begin());
225  continue;
226  }
227  if (auto project = dynamic_cast<RelProject*>(modified_node)) {
228  std::vector<std::unique_ptr<const RexScalar>> new_exprs;
229  for (size_t i = 0; i < project->size(); ++i) {
230  new_exprs.push_back(renumber.visit(project->getProjectAt(i)));
231  }
232  project->setExpressions(new_exprs);
233  continue;
234  }
235  CHECK(false);
236  }
237 }
238 
239 // This function appears to redirect/remove redundant Projection input nodes(?)
241  std::shared_ptr<RelAlgNode> node,
242  const std::unordered_set<const RelProject*>& projects,
243  const std::unordered_set<const RelProject*>& permutating_projects,
244  const std::unordered_map<const RelAlgNode*, std::unordered_set<const RelAlgNode*>>&
245  du_web) {
246  if (dynamic_cast<RelLogicalUnion*>(node.get())) {
247  return; // UNION keeps all Projection inputs.
248  }
249  std::shared_ptr<const RelProject> src_project = nullptr;
250  for (size_t i = 0; i < node->inputCount(); ++i) {
251  if (auto project =
252  std::dynamic_pointer_cast<const RelProject>(node->getAndOwnInput(i))) {
253  if (projects.count(project.get())) {
254  src_project = project;
255  break;
256  }
257  }
258  }
259  if (!src_project) {
260  return;
261  }
262  if (auto join = std::dynamic_pointer_cast<RelJoin>(node)) {
263  auto other_project =
264  src_project == node->getAndOwnInput(0)
265  ? std::dynamic_pointer_cast<const RelProject>(node->getAndOwnInput(1))
266  : std::dynamic_pointer_cast<const RelProject>(node->getAndOwnInput(0));
267  join->replaceInput(src_project, src_project->getAndOwnInput(0));
268  RexRebindInputsVisitor rebinder(src_project.get(), src_project->getInput(0));
269  auto usrs_it = du_web.find(join.get());
270  CHECK(usrs_it != du_web.end());
271  for (auto usr : usrs_it->second) {
272  rebinder.visitNode(usr);
273  }
274 
275  if (other_project && projects.count(other_project.get())) {
276  join->replaceInput(other_project, other_project->getAndOwnInput(0));
277  RexRebindInputsVisitor other_rebinder(other_project.get(),
278  other_project->getInput(0));
279  for (auto usr : usrs_it->second) {
280  other_rebinder.visitNode(usr);
281  }
282  }
283  return;
284  }
285  if (auto project = std::dynamic_pointer_cast<RelProject>(node)) {
286  project->RelAlgNode::replaceInput(src_project, src_project->getAndOwnInput(0));
287  RexProjectInputRedirector redirector(projects);
288  std::vector<std::unique_ptr<const RexScalar>> new_exprs;
289  for (size_t i = 0; i < project->size(); ++i) {
290  new_exprs.push_back(redirector.visit(project->getProjectAt(i)));
291  }
292  project->setExpressions(new_exprs);
293  return;
294  }
295  if (auto filter = std::dynamic_pointer_cast<RelFilter>(node)) {
296  const bool is_permutating_proj = permutating_projects.count(src_project.get());
297  if (is_permutating_proj || dynamic_cast<const RelJoin*>(src_project->getInput(0))) {
298  if (is_permutating_proj) {
299  propagate_rex_input_renumber(filter.get(), du_web);
300  }
301  filter->RelAlgNode::replaceInput(src_project, src_project->getAndOwnInput(0));
302  RexProjectInputRedirector redirector(projects);
303  auto new_condition = redirector.visit(filter->getCondition());
304  filter->setCondition(new_condition);
305  } else {
306  filter->replaceInput(src_project, src_project->getAndOwnInput(0));
307  }
308  return;
309  }
310  if (std::dynamic_pointer_cast<RelSort>(node)) {
311  auto const src_project_input = src_project->getInput(0);
312  if (dynamic_cast<const RelScan*>(src_project_input) ||
313  dynamic_cast<const RelLogicalValues*>(src_project_input) ||
314  dynamic_cast<const RelLogicalUnion*>(src_project_input)) {
315  return;
316  }
317  }
318  if (std::dynamic_pointer_cast<RelModify>(node)) {
319  return; // NOTE: Review this. Not sure about this.
320  }
321  CHECK(std::dynamic_pointer_cast<RelAggregate>(node) ||
322  std::dynamic_pointer_cast<RelSort>(node));
323  node->replaceInput(src_project, src_project->getAndOwnInput(0));
324 }
325 
326 void cleanup_dead_nodes(std::vector<std::shared_ptr<RelAlgNode>>& nodes) {
327  for (auto nodeIt = nodes.rbegin(); nodeIt != nodes.rend(); ++nodeIt) {
328  if (nodeIt->unique()) {
329  LOG(INFO) << "ID=" << (*nodeIt)->getId() << " " << (*nodeIt)->toString()
330  << " deleted!";
331  nodeIt->reset();
332  }
333  }
334 
335  std::vector<std::shared_ptr<RelAlgNode>> new_nodes;
336  for (auto node : nodes) {
337  if (!node) {
338  continue;
339  }
340  new_nodes.push_back(node);
341  }
342  nodes.swap(new_nodes);
343 }
344 
345 std::unordered_set<const RelProject*> get_visible_projects(const RelAlgNode* root) {
346  if (auto project = dynamic_cast<const RelProject*>(root)) {
347  return {project};
348  }
349 
350  if (dynamic_cast<const RelAggregate*>(root) || dynamic_cast<const RelScan*>(root) ||
351  dynamic_cast<const RelLogicalValues*>(root) ||
352  dynamic_cast<const RelModify*>(root)) {
353  return std::unordered_set<const RelProject*>{};
354  }
355 
356  if (auto join = dynamic_cast<const RelJoin*>(root)) {
357  auto lhs_projs = get_visible_projects(join->getInput(0));
358  auto rhs_projs = get_visible_projects(join->getInput(1));
359  lhs_projs.insert(rhs_projs.begin(), rhs_projs.end());
360  return lhs_projs;
361  }
362 
363  if (auto logical_union = dynamic_cast<const RelLogicalUnion*>(root)) {
364  auto projections = get_visible_projects(logical_union->getInput(0));
365  for (size_t i = 1; i < logical_union->inputCount(); ++i) {
366  auto next = get_visible_projects(logical_union->getInput(i));
367  projections.insert(next.begin(), next.end());
368  }
369  return projections;
370  }
371 
372  CHECK(dynamic_cast<const RelFilter*>(root) || dynamic_cast<const RelSort*>(root))
373  << "root = " << root->toString();
374  return get_visible_projects(root->getInput(0));
375 }
376 
377 // TODO(miyu): checking this at runtime is more accurate
378 bool is_distinct(const size_t input_idx, const RelAlgNode* node) {
379  if (dynamic_cast<const RelFilter*>(node) || dynamic_cast<const RelSort*>(node)) {
380  CHECK_EQ(size_t(1), node->inputCount());
381  return is_distinct(input_idx, node->getInput(0));
382  }
383  if (auto aggregate = dynamic_cast<const RelAggregate*>(node)) {
384  CHECK_EQ(size_t(1), node->inputCount());
385  if (aggregate->getGroupByCount() == 1 && !input_idx) {
386  return true;
387  }
388  if (input_idx < aggregate->getGroupByCount()) {
389  return is_distinct(input_idx, node->getInput(0));
390  }
391  return false;
392  }
393  if (auto project = dynamic_cast<const RelProject*>(node)) {
394  CHECK_LT(input_idx, project->size());
395  if (auto input = dynamic_cast<const RexInput*>(project->getProjectAt(input_idx))) {
396  CHECK_EQ(size_t(1), node->inputCount());
397  return is_distinct(input->getIndex(), project->getInput(0));
398  }
399  return false;
400  }
401  CHECK(dynamic_cast<const RelJoin*>(node) || dynamic_cast<const RelScan*>(node));
402  return false;
403 }
404 
405 } // namespace
406 
407 std::unordered_map<const RelAlgNode*, std::unordered_set<const RelAlgNode*>> build_du_web(
408  const std::vector<std::shared_ptr<RelAlgNode>>& nodes) noexcept {
409  std::unordered_map<const RelAlgNode*, std::unordered_set<const RelAlgNode*>> web;
410  std::unordered_set<const RelAlgNode*> visited;
411  std::vector<const RelAlgNode*> work_set;
412  for (auto node : nodes) {
413  if (std::dynamic_pointer_cast<RelScan>(node) ||
414  std::dynamic_pointer_cast<RelModify>(node) || visited.count(node.get())) {
415  continue;
416  }
417  work_set.push_back(node.get());
418  while (!work_set.empty()) {
419  auto walker = work_set.back();
420  work_set.pop_back();
421  if (visited.count(walker)) {
422  continue;
423  }
424  CHECK(!web.count(walker));
425  auto it_ok =
426  web.insert(std::make_pair(walker, std::unordered_set<const RelAlgNode*>{}));
427  CHECK(it_ok.second);
428  visited.insert(walker);
429  CHECK(dynamic_cast<const RelJoin*>(walker) ||
430  dynamic_cast<const RelProject*>(walker) ||
431  dynamic_cast<const RelAggregate*>(walker) ||
432  dynamic_cast<const RelFilter*>(walker) ||
433  dynamic_cast<const RelSort*>(walker) ||
434  dynamic_cast<const RelLeftDeepInnerJoin*>(walker) ||
435  dynamic_cast<const RelLogicalValues*>(walker) ||
436  dynamic_cast<const RelTableFunction*>(walker) ||
437  dynamic_cast<const RelLogicalUnion*>(walker));
438  for (size_t i = 0; i < walker->inputCount(); ++i) {
439  auto src = walker->getInput(i);
440  if (dynamic_cast<const RelScan*>(src) || dynamic_cast<const RelModify*>(src)) {
441  continue;
442  }
443  if (web.empty() || !web.count(src)) {
444  web.insert(std::make_pair(src, std::unordered_set<const RelAlgNode*>{}));
445  }
446  web[src].insert(walker);
447  work_set.push_back(src);
448  }
449  }
450  }
451  return web;
452 }
453 
462 bool project_separates_sort(const RelProject* project, const RelAlgNode* next_node) {
463  CHECK(project);
464  if (!next_node) {
465  return false;
466  }
467 
468  auto sort = dynamic_cast<const RelSort*>(next_node);
469  if (!sort) {
470  return false;
471  }
472  if (!(project->inputCount() == 1)) {
473  return false;
474  }
475 
476  if (dynamic_cast<const RelSort*>(project->getInput(0))) {
477  return true;
478  }
479  return false;
480 }
481 
482 // For now, the only target to eliminate is restricted to project-aggregate pair between
483 // scan/sort and join
484 // TODO(miyu): allow more chance if proved safe
485 void eliminate_identical_copy(std::vector<std::shared_ptr<RelAlgNode>>& nodes) noexcept {
486  std::unordered_set<std::shared_ptr<const RelAlgNode>> copies;
487  auto sink = nodes.back();
488  for (auto node : nodes) {
489  auto aggregate = std::dynamic_pointer_cast<const RelAggregate>(node);
490  if (!aggregate || aggregate == sink ||
491  !(aggregate->getGroupByCount() == 1 && aggregate->getAggExprsCount() == 0)) {
492  continue;
493  }
494  auto project =
495  std::dynamic_pointer_cast<const RelProject>(aggregate->getAndOwnInput(0));
496  if (project && project->size() == aggregate->size() &&
497  project->getFields() == aggregate->getFields()) {
498  CHECK_EQ(size_t(0), copies.count(aggregate));
499  copies.insert(aggregate);
500  }
501  }
502  for (auto node : nodes) {
503  if (!node->inputCount()) {
504  continue;
505  }
506  auto last_source = node->getAndOwnInput(node->inputCount() - 1);
507  if (!copies.count(last_source)) {
508  continue;
509  }
510  auto aggregate = std::dynamic_pointer_cast<const RelAggregate>(last_source);
511  CHECK(aggregate);
512  if (!std::dynamic_pointer_cast<const RelJoin>(node) || aggregate->size() != 1) {
513  continue;
514  }
515  auto project =
516  std::dynamic_pointer_cast<const RelProject>(aggregate->getAndOwnInput(0));
517  CHECK(project);
518  CHECK_EQ(size_t(1), project->size());
519  if (!is_distinct(size_t(0), project.get())) {
520  continue;
521  }
522  auto new_source = project->getAndOwnInput(0);
523  if (std::dynamic_pointer_cast<const RelSort>(new_source) ||
524  std::dynamic_pointer_cast<const RelScan>(new_source)) {
525  node->replaceInput(last_source, new_source);
526  }
527  }
528  decltype(copies)().swap(copies);
529 
530  auto web = build_du_web(nodes);
531 
532  std::unordered_set<const RelProject*> projects;
533  std::unordered_set<const RelProject*> permutating_projects;
534  auto const visible_projs = get_visible_projects(nodes.back().get());
535  for (auto node_it = nodes.begin(); node_it != nodes.end(); node_it++) {
536  auto node = *node_it;
537  auto project = std::dynamic_pointer_cast<RelProject>(node);
538  auto next_node_it = std::next(node_it);
539  if (project && project->isSimple() &&
540  (!visible_projs.count(project.get()) || !project->isRenaming()) &&
541  is_identical_copy(project.get(), web, projects, permutating_projects) &&
543  project.get(), next_node_it == nodes.end() ? nullptr : next_node_it->get())) {
544  projects.insert(project.get());
545  }
546  }
547 
548  for (auto node : nodes) {
549  redirect_inputs_of(node, projects, permutating_projects, web);
550  }
551 
552  cleanup_dead_nodes(nodes);
553 }
554 
555 namespace {
556 
557 class RexInputCollector : public RexVisitor<std::unordered_set<RexInput>> {
558  private:
560 
561  protected:
562  using RetType = std::unordered_set<RexInput>;
563  RetType aggregateResult(const RetType& aggregate,
564  const RetType& next_result) const override {
565  RetType result(aggregate.begin(), aggregate.end());
566  result.insert(next_result.begin(), next_result.end());
567  return result;
568  }
569 
570  public:
571  RexInputCollector(const RelAlgNode* node) : node_(node) {}
572  RetType visitInput(const RexInput* input) const override {
573  RetType result;
574  if (node_->inputCount() == 1) {
575  auto src = node_->getInput(0);
576  if (auto join = dynamic_cast<const RelJoin*>(src)) {
577  CHECK_EQ(join->inputCount(), size_t(2));
578  const auto src2_in_offset = join->getInput(0)->size();
579  if (input->getSourceNode() == join->getInput(1)) {
580  result.emplace(src, input->getIndex() + src2_in_offset);
581  } else {
582  result.emplace(src, input->getIndex());
583  }
584  return result;
585  }
586  }
587  result.insert(*input);
588  return result;
589  }
590 };
591 
593  CHECK(node->size());
594  RexInputCollector collector(node);
595  if (auto filter = dynamic_cast<const RelFilter*>(node)) {
596  auto rex_ins = collector.visit(filter->getCondition());
597  if (!rex_ins.empty()) {
598  return static_cast<size_t>(rex_ins.begin()->getIndex());
599  }
600  return pick_always_live_col_idx(filter->getInput(0));
601  } else if (auto join = dynamic_cast<const RelJoin*>(node)) {
602  auto rex_ins = collector.visit(join->getCondition());
603  if (!rex_ins.empty()) {
604  return static_cast<size_t>(rex_ins.begin()->getIndex());
605  }
606  if (auto lhs_idx = pick_always_live_col_idx(join->getInput(0))) {
607  return lhs_idx;
608  }
609  if (auto rhs_idx = pick_always_live_col_idx(join->getInput(0))) {
610  return rhs_idx + join->getInput(0)->size();
611  }
612  } else if (auto sort = dynamic_cast<const RelSort*>(node)) {
613  if (sort->collationCount()) {
614  return sort->getCollation(0).getField();
615  }
616  return pick_always_live_col_idx(sort->getInput(0));
617  }
618  return size_t(0);
619 }
620 
621 std::vector<std::unordered_set<size_t>> get_live_ins(
622  const RelAlgNode* node,
623  const std::unordered_map<const RelAlgNode*, std::unordered_set<size_t>>& live_outs) {
624  if (!node || dynamic_cast<const RelScan*>(node)) {
625  return {};
626  }
627  RexInputCollector collector(node);
628  auto it = live_outs.find(node);
629  CHECK(it != live_outs.end());
630  auto live_out = it->second;
631  if (auto project = dynamic_cast<const RelProject*>(node)) {
632  CHECK_EQ(size_t(1), project->inputCount());
633  std::unordered_set<size_t> live_in;
634  for (const auto& idx : live_out) {
635  CHECK_LT(idx, project->size());
636  auto partial_in = collector.visit(project->getProjectAt(idx));
637  for (auto rex_in : partial_in) {
638  live_in.insert(rex_in.getIndex());
639  }
640  }
641  if (project->size() == 1 &&
642  dynamic_cast<const RexLiteral*>(project->getProjectAt(0))) {
643  CHECK(live_in.empty());
644  live_in.insert(pick_always_live_col_idx(project->getInput(0)));
645  }
646  return {live_in};
647  }
648  if (auto aggregate = dynamic_cast<const RelAggregate*>(node)) {
649  CHECK_EQ(size_t(1), aggregate->inputCount());
650  const auto group_key_count = static_cast<size_t>(aggregate->getGroupByCount());
651  const auto agg_expr_count = static_cast<size_t>(aggregate->getAggExprsCount());
652  std::unordered_set<size_t> live_in;
653  for (size_t i = 0; i < group_key_count; ++i) {
654  live_in.insert(i);
655  }
656  bool has_count_star_only{false};
657  for (const auto& idx : live_out) {
658  if (idx < group_key_count) {
659  continue;
660  }
661  const auto agg_idx = idx - group_key_count;
662  CHECK_LT(agg_idx, agg_expr_count);
663  const auto& cur_agg_expr = aggregate->getAggExprs()[agg_idx];
664  const auto n_operands = cur_agg_expr->size();
665  for (size_t i = 0; i < n_operands; ++i) {
666  live_in.insert(static_cast<size_t>(cur_agg_expr->getOperand(i)));
667  }
668  if (n_operands == 0) {
669  has_count_star_only = true;
670  }
671  }
672  if (has_count_star_only && !group_key_count) {
673  live_in.insert(size_t(0));
674  }
675  return {live_in};
676  }
677  if (auto join = dynamic_cast<const RelJoin*>(node)) {
678  std::unordered_set<size_t> lhs_live_ins;
679  std::unordered_set<size_t> rhs_live_ins;
680  CHECK_EQ(size_t(2), join->inputCount());
681  auto lhs = join->getInput(0);
682  auto rhs = join->getInput(1);
683  const auto rhs_idx_base = lhs->size();
684  for (const auto idx : live_out) {
685  if (idx < rhs_idx_base) {
686  lhs_live_ins.insert(idx);
687  } else {
688  rhs_live_ins.insert(idx - rhs_idx_base);
689  }
690  }
691  auto rex_ins = collector.visit(join->getCondition());
692  for (const auto& rex_in : rex_ins) {
693  const auto in_idx = static_cast<size_t>(rex_in.getIndex());
694  if (rex_in.getSourceNode() == lhs) {
695  lhs_live_ins.insert(in_idx);
696  continue;
697  }
698  if (rex_in.getSourceNode() == rhs) {
699  rhs_live_ins.insert(in_idx);
700  continue;
701  }
702  CHECK(false);
703  }
704  return {lhs_live_ins, rhs_live_ins};
705  }
706  if (auto sort = dynamic_cast<const RelSort*>(node)) {
707  CHECK_EQ(size_t(1), sort->inputCount());
708  std::unordered_set<size_t> live_in(live_out.begin(), live_out.end());
709  for (size_t i = 0; i < sort->collationCount(); ++i) {
710  live_in.insert(sort->getCollation(i).getField());
711  }
712  return {live_in};
713  }
714  if (auto filter = dynamic_cast<const RelFilter*>(node)) {
715  CHECK_EQ(size_t(1), filter->inputCount());
716  std::unordered_set<size_t> live_in(live_out.begin(), live_out.end());
717  auto rex_ins = collector.visit(filter->getCondition());
718  for (const auto& rex_in : rex_ins) {
719  live_in.insert(static_cast<size_t>(rex_in.getIndex()));
720  }
721  return {live_in};
722  }
723  if (auto table_func = dynamic_cast<const RelTableFunction*>(node)) {
724  CHECK_EQ(size_t(1), table_func->inputCount());
725 
726  const auto input_count = table_func->size();
727  std::unordered_set<size_t> live_in;
728  for (size_t i = 0; i < input_count; i++) {
729  live_in.insert(i);
730  }
731  return {live_in};
732  }
733  if (auto logical_union = dynamic_cast<const RelLogicalUnion*>(node)) {
734  return std::vector<std::unordered_set<size_t>>(logical_union->inputCount(), live_out);
735  }
736  return {};
737 }
738 
739 bool any_dead_col_in(const RelAlgNode* node,
740  const std::unordered_set<size_t>& live_outs) {
741  CHECK(!dynamic_cast<const RelScan*>(node));
742  if (auto aggregate = dynamic_cast<const RelAggregate*>(node)) {
743  for (size_t i = aggregate->getGroupByCount(); i < aggregate->size(); ++i) {
744  if (!live_outs.count(i)) {
745  return true;
746  }
747  }
748  return false;
749  }
750 
751  return node->size() > live_outs.size();
752 }
753 
754 bool does_redef_cols(const RelAlgNode* node) {
755  return dynamic_cast<const RelAggregate*>(node) || dynamic_cast<const RelProject*>(node);
756 }
757 
759  public:
761  const std::unordered_map<const RelAlgNode*, std::unordered_map<size_t, size_t>>&
762  liveouts,
763  const std::unordered_set<const RelAlgNode*>& intact_nodes)
764  : liveouts_(liveouts), intact_nodes_(intact_nodes) {}
765 
766  bool hasAllSrcReady(const RelAlgNode* node) const {
767  for (size_t i = 0; i < node->inputCount(); ++i) {
768  auto src = node->getInput(i);
769  if (!dynamic_cast<const RelScan*>(src) && liveouts_.find(src) == liveouts_.end() &&
770  !intact_nodes_.count(src)) {
771  return false;
772  }
773  }
774  return true;
775  }
776 
777  private:
778  const std::unordered_map<const RelAlgNode*, std::unordered_map<size_t, size_t>>&
780  const std::unordered_set<const RelAlgNode*>& intact_nodes_;
781 };
782 
784  const RelAlgNode* node,
785  std::unordered_map<const RelAlgNode*, std::unordered_map<size_t, size_t>>&
786  new_liveouts,
787  const std::unordered_set<size_t>& old_liveouts,
788  const std::unordered_set<const RelAlgNode*>& intact_nodes,
789  const std::unordered_map<const RelAlgNode*, size_t>& orig_node_sizes) {
790  auto live_fields = old_liveouts;
791  if (auto aggregate = dynamic_cast<const RelAggregate*>(node)) {
792  for (size_t i = 0; i < aggregate->getGroupByCount(); ++i) {
793  live_fields.insert(i);
794  }
795  }
796  auto it_ok =
797  new_liveouts.insert(std::make_pair(node, std::unordered_map<size_t, size_t>{}));
798  CHECK(it_ok.second);
799  auto& new_indices = it_ok.first->second;
800  if (intact_nodes.count(node)) {
801  for (size_t i = 0, e = node->size(); i < e; ++i) {
802  new_indices.insert(std::make_pair(i, i));
803  }
804  return;
805  }
806  if (does_redef_cols(node)) {
807  auto node_sz_it = orig_node_sizes.find(node);
808  CHECK(node_sz_it != orig_node_sizes.end());
809  const auto node_size = node_sz_it->second;
810  CHECK_GT(node_size, live_fields.size());
811  LOG(INFO) << node->toString() << " eliminated " << node_size - live_fields.size()
812  << " columns.";
813  std::vector<size_t> ordered_indices(live_fields.begin(), live_fields.end());
814  std::sort(ordered_indices.begin(), ordered_indices.end());
815  for (size_t i = 0; i < ordered_indices.size(); ++i) {
816  new_indices.insert(std::make_pair(ordered_indices[i], i));
817  }
818  return;
819  }
820  std::vector<size_t> ordered_indices;
821  for (size_t i = 0, old_base = 0, new_base = 0; i < node->inputCount(); ++i) {
822  auto src = node->getInput(i);
823  auto src_renum_it = new_liveouts.find(src);
824  if (src_renum_it != new_liveouts.end()) {
825  for (auto m : src_renum_it->second) {
826  new_indices.insert(std::make_pair(old_base + m.first, new_base + m.second));
827  }
828  new_base += src_renum_it->second.size();
829  } else if (dynamic_cast<const RelScan*>(src) || intact_nodes.count(src)) {
830  for (size_t i = 0; i < src->size(); ++i) {
831  new_indices.insert(std::make_pair(old_base + i, new_base + i));
832  }
833  new_base += src->size();
834  } else {
835  CHECK(false);
836  }
837  auto src_sz_it = orig_node_sizes.find(src);
838  CHECK(src_sz_it != orig_node_sizes.end());
839  old_base += src_sz_it->second;
840  }
841 }
842 
844  public:
846  const std::unordered_map<const RelAlgNode*, std::unordered_map<size_t, size_t>>&
847  new_numbering)
848  : node_to_input_renum_(new_numbering) {}
849  RetType visitInput(const RexInput* input) const override {
850  auto source = input->getSourceNode();
851  auto node_it = node_to_input_renum_.find(source);
852  if (node_it != node_to_input_renum_.end()) {
853  auto old_to_new_num = node_it->second;
854  auto renum_it = old_to_new_num.find(input->getIndex());
855  CHECK(renum_it != old_to_new_num.end());
856  return boost::make_unique<RexInput>(source, renum_it->second);
857  }
858  return input->deepCopy();
859  }
860 
861  private:
862  const std::unordered_map<const RelAlgNode*, std::unordered_map<size_t, size_t>>&
864 };
865 
866 std::vector<std::unique_ptr<const RexAgg>> renumber_rex_aggs(
867  std::vector<std::unique_ptr<const RexAgg>>& agg_exprs,
868  const std::unordered_map<size_t, size_t>& new_numbering) {
869  std::vector<std::unique_ptr<const RexAgg>> new_exprs;
870  for (auto& expr : agg_exprs) {
871  if (expr->size() >= 1) {
872  auto old_idx = expr->getOperand(0);
873  auto idx_it = new_numbering.find(old_idx);
874  if (idx_it != new_numbering.end()) {
875  std::vector<size_t> operands;
876  operands.push_back(idx_it->second);
877  if (expr->size() == 2) {
878  operands.push_back(expr->getOperand(1));
879  }
880  new_exprs.push_back(boost::make_unique<RexAgg>(
881  expr->getKind(), expr->isDistinct(), expr->getType(), operands));
882  continue;
883  }
884  }
885  new_exprs.push_back(std::move(expr));
886  }
887  return new_exprs;
888 }
889 
891  const std::unordered_map<size_t, size_t>& new_numbering) {
892  auto field_idx = old_field.getField();
893  auto idx_it = new_numbering.find(field_idx);
894  if (idx_it != new_numbering.end()) {
895  field_idx = idx_it->second;
896  }
897  return SortField(field_idx, old_field.getSortDir(), old_field.getNullsPosition());
898 }
899 
900 std::unordered_map<const RelAlgNode*, std::unordered_set<size_t>> mark_live_columns(
901  std::vector<std::shared_ptr<RelAlgNode>>& nodes) {
902  std::unordered_map<const RelAlgNode*, std::unordered_set<size_t>> live_outs;
903  std::vector<const RelAlgNode*> work_set;
904  for (auto node_it = nodes.rbegin(); node_it != nodes.rend(); ++node_it) {
905  auto node = node_it->get();
906  if (dynamic_cast<const RelScan*>(node) || live_outs.count(node) ||
907  dynamic_cast<const RelModify*>(node)) {
908  continue;
909  }
910  std::vector<size_t> all_live(node->size());
911  std::iota(all_live.begin(), all_live.end(), size_t(0));
912  live_outs.insert(std::make_pair(
913  node, std::unordered_set<size_t>(all_live.begin(), all_live.end())));
914  work_set.push_back(node);
915  while (!work_set.empty()) {
916  auto walker = work_set.back();
917  work_set.pop_back();
918  CHECK(!dynamic_cast<const RelScan*>(walker));
919  CHECK(live_outs.count(walker));
920  auto live_ins = get_live_ins(walker, live_outs);
921  CHECK_EQ(live_ins.size(), walker->inputCount());
922  for (size_t i = 0; i < walker->inputCount(); ++i) {
923  auto src = walker->getInput(i);
924  if (dynamic_cast<const RelScan*>(src) || live_ins[i].empty()) {
925  continue;
926  }
927  if (!live_outs.count(src)) {
928  live_outs.insert(std::make_pair(src, std::unordered_set<size_t>{}));
929  }
930  auto src_it = live_outs.find(src);
931  CHECK(src_it != live_outs.end());
932  auto& live_out = src_it->second;
933  bool changed = false;
934  if (!live_out.empty()) {
935  live_out.insert(live_ins[i].begin(), live_ins[i].end());
936  changed = true;
937  } else {
938  for (int idx : live_ins[i]) {
939  changed |= live_out.insert(idx).second;
940  }
941  }
942  if (changed) {
943  work_set.push_back(src);
944  }
945  }
946  }
947  }
948  return live_outs;
949 }
950 
951 std::string get_field_name(const RelAlgNode* node, size_t index) {
952  CHECK_LT(index, node->size());
953  if (auto scan = dynamic_cast<const RelScan*>(node)) {
954  return scan->getFieldName(index);
955  }
956  if (auto aggregate = dynamic_cast<const RelAggregate*>(node)) {
957  CHECK_EQ(aggregate->size(), aggregate->getFields().size());
958  return aggregate->getFieldName(index);
959  }
960  if (auto join = dynamic_cast<const RelJoin*>(node)) {
961  const auto lhs_size = join->getInput(0)->size();
962  if (index < lhs_size) {
963  return get_field_name(join->getInput(0), index);
964  }
965  return get_field_name(join->getInput(1), index - lhs_size);
966  }
967  if (auto project = dynamic_cast<const RelProject*>(node)) {
968  return project->getFieldName(index);
969  }
970  CHECK(dynamic_cast<const RelSort*>(node) || dynamic_cast<const RelFilter*>(node));
971  return get_field_name(node->getInput(0), index);
972 }
973 
975  std::vector<std::shared_ptr<RelAlgNode>>& nodes,
976  std::unordered_map<const RelAlgNode*, std::unordered_set<size_t>>& liveouts,
977  std::unordered_map<const RelAlgNode*, std::unordered_set<const RelAlgNode*>>&
978  du_web) {
979  std::vector<std::shared_ptr<RelAlgNode>> new_nodes;
980  for (auto node : nodes) {
981  new_nodes.push_back(node);
982  if (!std::dynamic_pointer_cast<RelFilter>(node)) {
983  continue;
984  }
985  const auto filter = node.get();
986  auto liveout_it = liveouts.find(filter);
987  CHECK(liveout_it != liveouts.end());
988  auto& outs = liveout_it->second;
989  if (!any_dead_col_in(filter, outs)) {
990  continue;
991  }
992  auto usrs_it = du_web.find(filter);
993  CHECK(usrs_it != du_web.end());
994  auto& usrs = usrs_it->second;
995  if (usrs.size() != 1 || does_redef_cols(*usrs.begin())) {
996  continue;
997  }
998  auto only_usr = const_cast<RelAlgNode*>(*usrs.begin());
999 
1000  std::vector<std::unique_ptr<const RexScalar>> exprs;
1001  std::vector<std::string> fields;
1002  for (size_t i = 0; i < filter->size(); ++i) {
1003  exprs.push_back(boost::make_unique<RexInput>(filter, i));
1004  fields.push_back(get_field_name(filter, i));
1005  }
1006  auto project_owner = std::make_shared<RelProject>(exprs, fields, node);
1007  auto project = project_owner.get();
1008 
1009  only_usr->replaceInput(node, project_owner);
1010  if (dynamic_cast<const RelJoin*>(only_usr)) {
1011  RexRebindInputsVisitor visitor(filter, project);
1012  for (auto usr : du_web[only_usr]) {
1013  visitor.visitNode(usr);
1014  }
1015  }
1016 
1017  liveouts.insert(std::make_pair(project, outs));
1018 
1019  usrs.clear();
1020  usrs.insert(project);
1021  du_web.insert(
1022  std::make_pair(project, std::unordered_set<const RelAlgNode*>{only_usr}));
1023 
1024  new_nodes.push_back(project_owner);
1025  }
1026  if (new_nodes.size() > nodes.size()) {
1027  nodes.swap(new_nodes);
1028  }
1029 }
1030 
1031 std::pair<std::unordered_map<const RelAlgNode*, std::unordered_map<size_t, size_t>>,
1032  std::vector<const RelAlgNode*>>
1034  const std::unordered_map<const RelAlgNode*, std::unordered_set<size_t>>& live_outs,
1035  const std::vector<std::shared_ptr<RelAlgNode>>& nodes,
1036  const std::unordered_set<const RelAlgNode*>& intact_nodes,
1037  const std::unordered_map<const RelAlgNode*, std::unordered_set<const RelAlgNode*>>&
1038  du_web,
1039  const std::unordered_map<const RelAlgNode*, size_t>& orig_node_sizes) {
1040  std::unordered_map<const RelAlgNode*, std::unordered_map<size_t, size_t>>
1041  liveouts_renumbering;
1042  std::vector<const RelAlgNode*> ready_nodes;
1043  AvailabilityChecker checker(liveouts_renumbering, intact_nodes);
1044  for (auto node : nodes) {
1045  // Ignore empty live_out due to some invalid node
1046  if (!does_redef_cols(node.get()) || intact_nodes.count(node.get())) {
1047  continue;
1048  }
1049  auto live_pair = live_outs.find(node.get());
1050  CHECK(live_pair != live_outs.end());
1051  auto old_live_outs = live_pair->second;
1053  node.get(), liveouts_renumbering, old_live_outs, intact_nodes, orig_node_sizes);
1054  if (auto aggregate = std::dynamic_pointer_cast<RelAggregate>(node)) {
1055  auto old_exprs = aggregate->getAggExprsAndRelease();
1056  std::vector<std::unique_ptr<const RexAgg>> new_exprs;
1057  auto key_name_it = aggregate->getFields().begin();
1058  std::vector<std::string> new_fields(key_name_it,
1059  key_name_it + aggregate->getGroupByCount());
1060  for (size_t i = aggregate->getGroupByCount(), j = 0;
1061  i < aggregate->getFields().size() && j < old_exprs.size();
1062  ++i, ++j) {
1063  if (old_live_outs.count(i)) {
1064  new_exprs.push_back(std::move(old_exprs[j]));
1065  new_fields.push_back(aggregate->getFieldName(i));
1066  }
1067  }
1068  aggregate->setAggExprs(new_exprs);
1069  aggregate->setFields(new_fields);
1070  } else if (auto project = std::dynamic_pointer_cast<RelProject>(node)) {
1071  auto old_exprs = project->getExpressionsAndRelease();
1072  std::vector<std::unique_ptr<const RexScalar>> new_exprs;
1073  std::vector<std::string> new_fields;
1074  for (size_t i = 0; i < old_exprs.size(); ++i) {
1075  if (old_live_outs.count(i)) {
1076  new_exprs.push_back(std::move(old_exprs[i]));
1077  new_fields.push_back(project->getFieldName(i));
1078  }
1079  }
1080  project->setExpressions(new_exprs);
1081  project->setFields(new_fields);
1082  } else {
1083  CHECK(false);
1084  }
1085  auto usrs_it = du_web.find(node.get());
1086  CHECK(usrs_it != du_web.end());
1087  for (auto usr : usrs_it->second) {
1088  if (checker.hasAllSrcReady(usr)) {
1089  ready_nodes.push_back(usr);
1090  }
1091  }
1092  }
1093  return {liveouts_renumbering, ready_nodes};
1094 }
1095 
1097  std::unordered_map<const RelAlgNode*, std::unordered_map<size_t, size_t>>&
1098  liveout_renumbering,
1099  const std::vector<const RelAlgNode*>& ready_nodes,
1100  const std::unordered_map<const RelAlgNode*, std::unordered_set<size_t>>& old_liveouts,
1101  const std::unordered_set<const RelAlgNode*>& intact_nodes,
1102  const std::unordered_map<const RelAlgNode*, std::unordered_set<const RelAlgNode*>>&
1103  du_web,
1104  const std::unordered_map<const RelAlgNode*, size_t>& orig_node_sizes) {
1105  RexInputRenumberVisitor renumberer(liveout_renumbering);
1106  AvailabilityChecker checker(liveout_renumbering, intact_nodes);
1107  std::deque<const RelAlgNode*> work_set(ready_nodes.begin(), ready_nodes.end());
1108  while (!work_set.empty()) {
1109  auto walker = work_set.front();
1110  work_set.pop_front();
1111  CHECK(!dynamic_cast<const RelScan*>(walker));
1112  auto node = const_cast<RelAlgNode*>(walker);
1113  if (auto project = dynamic_cast<RelProject*>(node)) {
1114  auto old_exprs = project->getExpressionsAndRelease();
1115  std::vector<std::unique_ptr<const RexScalar>> new_exprs;
1116  for (auto& expr : old_exprs) {
1117  new_exprs.push_back(renumberer.visit(expr.get()));
1118  }
1119  project->setExpressions(new_exprs);
1120  } else if (auto aggregate = dynamic_cast<RelAggregate*>(node)) {
1121  auto src_it = liveout_renumbering.find(node->getInput(0));
1122  CHECK(src_it != liveout_renumbering.end());
1123  auto old_exprs = aggregate->getAggExprsAndRelease();
1124  auto new_exprs = renumber_rex_aggs(old_exprs, src_it->second);
1125  aggregate->setAggExprs(new_exprs);
1126  } else if (auto join = dynamic_cast<RelJoin*>(node)) {
1127  auto new_condition = renumberer.visit(join->getCondition());
1128  join->setCondition(new_condition);
1129  } else if (auto filter = dynamic_cast<RelFilter*>(node)) {
1130  auto new_condition = renumberer.visit(filter->getCondition());
1131  filter->setCondition(new_condition);
1132  } else if (auto sort = dynamic_cast<RelSort*>(node)) {
1133  auto src_it = liveout_renumbering.find(node->getInput(0));
1134  CHECK(src_it != liveout_renumbering.end());
1135  std::vector<SortField> new_collations;
1136  for (size_t i = 0; i < sort->collationCount(); ++i) {
1137  new_collations.push_back(
1138  renumber_sort_field(sort->getCollation(i), src_it->second));
1139  }
1140  sort->setCollation(std::move(new_collations));
1141  } else if (!dynamic_cast<RelLogicalUnion*>(node)) {
1142  LOG(FATAL) << "Unhandled node type: " << node->toString();
1143  }
1144 
1145  // Ignore empty live_out due to some invalid node
1146  if (does_redef_cols(node) || intact_nodes.count(node)) {
1147  continue;
1148  }
1149  auto live_pair = old_liveouts.find(node);
1150  CHECK(live_pair != old_liveouts.end());
1151  auto live_out = live_pair->second;
1153  node, liveout_renumbering, live_out, intact_nodes, orig_node_sizes);
1154  auto usrs_it = du_web.find(walker);
1155  CHECK(usrs_it != du_web.end());
1156  for (auto usr : usrs_it->second) {
1157  if (checker.hasAllSrcReady(usr)) {
1158  work_set.push_back(usr);
1159  }
1160  }
1161  }
1162 }
1163 
1164 } // namespace
1165 
1166 void eliminate_dead_columns(std::vector<std::shared_ptr<RelAlgNode>>& nodes) noexcept {
1167  if (nodes.empty()) {
1168  return;
1169  }
1170  auto root = nodes.back().get();
1171  if (!root) {
1172  return;
1173  }
1174  CHECK(!dynamic_cast<const RelScan*>(root) && !dynamic_cast<const RelJoin*>(root));
1175  // Mark
1176  auto old_liveouts = mark_live_columns(nodes);
1177  std::unordered_set<const RelAlgNode*> intact_nodes;
1178  bool has_dead_cols = false;
1179  for (auto live_pair : old_liveouts) {
1180  auto node = live_pair.first;
1181  const auto& outs = live_pair.second;
1182  if (outs.empty()) {
1183  LOG(WARNING) << "RA node with no used column: " << node->toString();
1184  // Ignore empty live_out due to some invalid node
1185  intact_nodes.insert(node);
1186  }
1187  if (any_dead_col_in(node, outs)) {
1188  has_dead_cols = true;
1189  } else {
1190  intact_nodes.insert(node);
1191  }
1192  }
1193  if (!has_dead_cols) {
1194  return;
1195  }
1196  auto web = build_du_web(nodes);
1197  try_insert_coalesceable_proj(nodes, old_liveouts, web);
1198 
1199  for (auto node : nodes) {
1200  if (intact_nodes.count(node.get()) || does_redef_cols(node.get())) {
1201  continue;
1202  }
1203  bool intact = true;
1204  for (size_t i = 0; i < node->inputCount(); ++i) {
1205  auto source = node->getInput(i);
1206  if (!dynamic_cast<const RelScan*>(source) && !intact_nodes.count(source)) {
1207  intact = false;
1208  break;
1209  }
1210  }
1211  if (intact) {
1212  intact_nodes.insert(node.get());
1213  }
1214  }
1215 
1216  std::unordered_map<const RelAlgNode*, size_t> orig_node_sizes;
1217  for (auto node : nodes) {
1218  orig_node_sizes.insert(std::make_pair(node.get(), node->size()));
1219  }
1220  // Sweep
1221  std::unordered_map<const RelAlgNode*, std::unordered_map<size_t, size_t>>
1222  liveout_renumbering;
1223  std::vector<const RelAlgNode*> ready_nodes;
1224  std::tie(liveout_renumbering, ready_nodes) =
1225  sweep_dead_columns(old_liveouts, nodes, intact_nodes, web, orig_node_sizes);
1226  // Propagate
1228  liveout_renumbering, ready_nodes, old_liveouts, intact_nodes, web, orig_node_sizes);
1229 }
1230 
1231 namespace {
1232 
1234  public:
1235  RexInputSinker(const std::unordered_map<size_t, size_t>& old_to_new_idx,
1236  const RelAlgNode* new_src)
1237  : old_to_new_in_idx_(old_to_new_idx), target_(new_src) {}
1238 
1239  RetType visitInput(const RexInput* input) const override {
1240  CHECK_EQ(target_->inputCount(), size_t(1));
1241  CHECK_EQ(target_->getInput(0), input->getSourceNode());
1242  auto idx_it = old_to_new_in_idx_.find(input->getIndex());
1243  CHECK(idx_it != old_to_new_in_idx_.end());
1244  return boost::make_unique<RexInput>(target_, idx_it->second);
1245  }
1246 
1247  private:
1248  const std::unordered_map<size_t, size_t>& old_to_new_in_idx_;
1250 };
1251 
1253  public:
1254  SubConditionReplacer(const std::unordered_map<size_t, std::unique_ptr<const RexScalar>>&
1255  idx_to_sub_condition)
1256  : idx_to_subcond_(idx_to_sub_condition) {}
1257  RetType visitInput(const RexInput* input) const override {
1258  auto subcond_it = idx_to_subcond_.find(input->getIndex());
1259  if (subcond_it != idx_to_subcond_.end()) {
1260  return RexDeepCopyVisitor::visit(subcond_it->second.get());
1261  }
1262  return RexDeepCopyVisitor::visitInput(input);
1263  }
1264 
1265  private:
1266  const std::unordered_map<size_t, std::unique_ptr<const RexScalar>>& idx_to_subcond_;
1267 };
1268 
1269 } // namespace
1270 
1272  std::vector<std::shared_ptr<RelAlgNode>>& nodes) noexcept {
1273  auto web = build_du_web(nodes);
1274  auto liveouts = mark_live_columns(nodes);
1275  for (auto node : nodes) {
1276  auto project = std::dynamic_pointer_cast<RelProject>(node);
1277  // TODO(miyu): relax RelScan limitation
1278  if (!project || project->isSimple() ||
1279  !dynamic_cast<const RelScan*>(project->getInput(0))) {
1280  continue;
1281  }
1282  auto usrs_it = web.find(project.get());
1283  CHECK(usrs_it != web.end());
1284  auto& usrs = usrs_it->second;
1285  if (usrs.size() != 1) {
1286  continue;
1287  }
1288  auto join = dynamic_cast<RelJoin*>(const_cast<RelAlgNode*>(*usrs.begin()));
1289  if (!join) {
1290  continue;
1291  }
1292  auto outs_it = liveouts.find(join);
1293  CHECK(outs_it != liveouts.end());
1294  std::unordered_map<size_t, size_t> in_to_out_index;
1295  std::unordered_set<size_t> boolean_expr_indicies;
1296  bool discarded = false;
1297  for (size_t i = 0; i < project->size(); ++i) {
1298  auto oper = dynamic_cast<const RexOperator*>(project->getProjectAt(i));
1299  if (oper && oper->getType().get_type() == kBOOLEAN) {
1300  boolean_expr_indicies.insert(i);
1301  } else {
1302  // TODO(miyu): relax?
1303  if (auto input = dynamic_cast<const RexInput*>(project->getProjectAt(i))) {
1304  in_to_out_index.insert(std::make_pair(input->getIndex(), i));
1305  } else {
1306  discarded = true;
1307  }
1308  }
1309  }
1310  if (discarded || boolean_expr_indicies.empty()) {
1311  continue;
1312  }
1313  const size_t index_base =
1314  join->getInput(0) == project.get() ? 0 : join->getInput(0)->size();
1315  for (auto i : boolean_expr_indicies) {
1316  auto join_idx = index_base + i;
1317  if (outs_it->second.count(join_idx)) {
1318  discarded = true;
1319  break;
1320  }
1321  }
1322  if (discarded) {
1323  continue;
1324  }
1325  RexInputCollector collector(project.get());
1326  std::vector<size_t> unloaded_input_indices;
1327  std::unordered_map<size_t, std::unique_ptr<const RexScalar>> in_idx_to_new_subcond;
1328  // Given all are dead right after join, safe to sink
1329  for (auto i : boolean_expr_indicies) {
1330  auto rex_ins = collector.visit(project->getProjectAt(i));
1331  for (auto& in : rex_ins) {
1332  CHECK_EQ(in.getSourceNode(), project->getInput(0));
1333  if (!in_to_out_index.count(in.getIndex())) {
1334  auto curr_out_index = project->size() + unloaded_input_indices.size();
1335  in_to_out_index.insert(std::make_pair(in.getIndex(), curr_out_index));
1336  unloaded_input_indices.push_back(in.getIndex());
1337  }
1338  RexInputSinker sinker(in_to_out_index, project.get());
1339  in_idx_to_new_subcond.insert(
1340  std::make_pair(i, sinker.visit(project->getProjectAt(i))));
1341  }
1342  }
1343  if (in_idx_to_new_subcond.empty()) {
1344  continue;
1345  }
1346  std::vector<std::unique_ptr<const RexScalar>> new_projections;
1347  for (size_t i = 0; i < project->size(); ++i) {
1348  if (boolean_expr_indicies.count(i)) {
1349  new_projections.push_back(boost::make_unique<RexInput>(project->getInput(0), 0));
1350  } else {
1351  auto rex_input = dynamic_cast<const RexInput*>(project->getProjectAt(i));
1352  CHECK(rex_input != nullptr);
1353  new_projections.push_back(rex_input->deepCopy());
1354  }
1355  }
1356  for (auto i : unloaded_input_indices) {
1357  new_projections.push_back(boost::make_unique<RexInput>(project->getInput(0), i));
1358  }
1359  project->setExpressions(new_projections);
1360 
1361  SubConditionReplacer replacer(in_idx_to_new_subcond);
1362  auto new_condition = replacer.visit(join->getCondition());
1363  join->setCondition(new_condition);
1364  }
1365 }
1366 
1367 namespace {
1368 
1370  public:
1371  RexInputRedirector(const RelAlgNode* old_src, const RelAlgNode* new_src)
1372  : old_src_(old_src), new_src_(new_src) {}
1373 
1374  RetType visitInput(const RexInput* input) const override {
1375  CHECK_EQ(old_src_, input->getSourceNode());
1376  CHECK_NE(old_src_, new_src_);
1377  auto actual_new_src = new_src_;
1378  if (auto join = dynamic_cast<const RelJoin*>(new_src_)) {
1379  actual_new_src = join->getInput(0);
1380  CHECK_EQ(join->inputCount(), size_t(2));
1381  auto src2_input_base = actual_new_src->size();
1382  if (input->getIndex() >= src2_input_base) {
1383  actual_new_src = join->getInput(1);
1384  return boost::make_unique<RexInput>(actual_new_src,
1385  input->getIndex() - src2_input_base);
1386  }
1387  }
1388 
1389  return boost::make_unique<RexInput>(actual_new_src, input->getIndex());
1390  }
1391 
1392  private:
1395 };
1396 
1398  std::shared_ptr<const RelAlgNode> old_def_node,
1399  std::shared_ptr<const RelAlgNode> new_def_node,
1400  std::unordered_map<const RelAlgNode*, std::shared_ptr<RelAlgNode>>& deconst_mapping,
1401  std::unordered_map<const RelAlgNode*, std::unordered_set<const RelAlgNode*>>&
1402  du_web) {
1403  auto usrs_it = du_web.find(old_def_node.get());
1404  RexInputRedirector redirector(new_def_node.get(), old_def_node.get());
1405  CHECK(usrs_it != du_web.end());
1406  for (auto usr : usrs_it->second) {
1407  auto usr_it = deconst_mapping.find(usr);
1408  CHECK(usr_it != deconst_mapping.end());
1409  usr_it->second->replaceInput(old_def_node, new_def_node);
1410  }
1411  auto new_usrs_it = du_web.find(new_def_node.get());
1412  CHECK(new_usrs_it != du_web.end());
1413  new_usrs_it->second.insert(usrs_it->second.begin(), usrs_it->second.end());
1414  usrs_it->second.clear();
1415 }
1416 
1417 } // namespace
1418 
1419 void fold_filters(std::vector<std::shared_ptr<RelAlgNode>>& nodes) noexcept {
1420  std::unordered_map<const RelAlgNode*, std::shared_ptr<RelAlgNode>> deconst_mapping;
1421  for (auto node : nodes) {
1422  deconst_mapping.insert(std::make_pair(node.get(), node));
1423  }
1424 
1425  auto web = build_du_web(nodes);
1426  for (auto node_it = nodes.rbegin(); node_it != nodes.rend(); ++node_it) {
1427  auto& node = *node_it;
1428  if (auto filter = std::dynamic_pointer_cast<RelFilter>(node)) {
1429  CHECK_EQ(filter->inputCount(), size_t(1));
1430  auto src_filter = dynamic_cast<const RelFilter*>(filter->getInput(0));
1431  if (!src_filter) {
1432  continue;
1433  }
1434  auto siblings_it = web.find(src_filter);
1435  if (siblings_it == web.end() || siblings_it->second.size() != size_t(1)) {
1436  continue;
1437  }
1438  auto src_it = deconst_mapping.find(src_filter);
1439  CHECK(src_it != deconst_mapping.end());
1440  auto folded_filter = std::dynamic_pointer_cast<RelFilter>(src_it->second);
1441  CHECK(folded_filter);
1442  // TODO(miyu) : drop filter w/ only expression valued constant TRUE?
1443  if (auto rex_operator = dynamic_cast<const RexOperator*>(filter->getCondition())) {
1444  LOG(INFO) << "ID=" << filter->getId() << " " << filter->toString()
1445  << " folded into "
1446  << "ID=" << folded_filter->getId() << " " << folded_filter->toString()
1447  << std::endl;
1448  std::vector<std::unique_ptr<const RexScalar>> operands;
1449  operands.emplace_back(folded_filter->getAndReleaseCondition());
1450  auto old_condition = dynamic_cast<const RexOperator*>(operands.back().get());
1451  CHECK(old_condition && old_condition->getType().get_type() == kBOOLEAN);
1452  RexInputRedirector redirector(folded_filter.get(), folded_filter->getInput(0));
1453  operands.push_back(redirector.visit(rex_operator));
1454  auto other_condition = dynamic_cast<const RexOperator*>(operands.back().get());
1455  CHECK(other_condition && other_condition->getType().get_type() == kBOOLEAN);
1456  const bool notnull = old_condition->getType().get_notnull() &&
1457  other_condition->getType().get_notnull();
1458  auto new_condition = std::unique_ptr<const RexScalar>(
1459  new RexOperator(kAND, operands, SQLTypeInfo(kBOOLEAN, notnull)));
1460  folded_filter->setCondition(new_condition);
1461  replace_all_usages(filter, folded_filter, deconst_mapping, web);
1462  deconst_mapping.erase(filter.get());
1463  web.erase(filter.get());
1464  web[filter->getInput(0)].erase(filter.get());
1465  node.reset();
1466  }
1467  }
1468  }
1469 
1470  if (!nodes.empty()) {
1471  auto sink = nodes.back();
1472  for (auto node_it = std::next(nodes.rend()); !sink && node_it != nodes.rbegin();
1473  ++node_it) {
1474  sink = *node_it;
1475  }
1476  CHECK(sink);
1477  cleanup_dead_nodes(nodes);
1478  }
1479 }
1480 
1481 std::vector<const RexScalar*> find_hoistable_conditions(const RexScalar* condition,
1482  const RelAlgNode* source,
1483  const size_t first_col_idx,
1484  const size_t last_col_idx) {
1485  if (auto rex_op = dynamic_cast<const RexOperator*>(condition)) {
1486  switch (rex_op->getOperator()) {
1487  case kAND: {
1488  std::vector<const RexScalar*> subconditions;
1489  size_t complete_subcond_count = 0;
1490  for (size_t i = 0; i < rex_op->size(); ++i) {
1491  auto conds = find_hoistable_conditions(
1492  rex_op->getOperand(i), source, first_col_idx, last_col_idx);
1493  if (conds.size() == size_t(1)) {
1494  ++complete_subcond_count;
1495  }
1496  subconditions.insert(subconditions.end(), conds.begin(), conds.end());
1497  }
1498  if (complete_subcond_count == rex_op->size()) {
1499  return {rex_op};
1500  } else {
1501  return {subconditions};
1502  }
1503  break;
1504  }
1505  case kEQ: {
1506  const auto lhs_conds = find_hoistable_conditions(
1507  rex_op->getOperand(0), source, first_col_idx, last_col_idx);
1508  const auto rhs_conds = find_hoistable_conditions(
1509  rex_op->getOperand(1), source, first_col_idx, last_col_idx);
1510  const auto lhs_in = lhs_conds.size() == 1
1511  ? dynamic_cast<const RexInput*>(*lhs_conds.begin())
1512  : nullptr;
1513  const auto rhs_in = rhs_conds.size() == 1
1514  ? dynamic_cast<const RexInput*>(*rhs_conds.begin())
1515  : nullptr;
1516  if (lhs_in && rhs_in) {
1517  return {rex_op};
1518  }
1519  return {};
1520  break;
1521  }
1522  default:
1523  break;
1524  }
1525  return {};
1526  }
1527  if (auto rex_in = dynamic_cast<const RexInput*>(condition)) {
1528  if (rex_in->getSourceNode() == source) {
1529  const auto col_idx = rex_in->getIndex();
1530  return {col_idx >= first_col_idx && col_idx <= last_col_idx ? condition : nullptr};
1531  }
1532  return {};
1533  }
1534  return {};
1535 }
1536 
1538  public:
1539  JoinTargetRebaser(const RelJoin* join, const unsigned old_base)
1540  : join_(join)
1541  , old_base_(old_base)
1542  , src1_base_(join->getInput(0)->size())
1543  , target_count_(join->size()) {}
1544  RetType visitInput(const RexInput* input) const override {
1545  auto curr_idx = input->getIndex();
1546  CHECK_GE(curr_idx, old_base_);
1547  CHECK_LT(static_cast<size_t>(curr_idx), target_count_);
1548  curr_idx -= old_base_;
1549  if (curr_idx >= src1_base_) {
1550  return boost::make_unique<RexInput>(join_->getInput(1), curr_idx - src1_base_);
1551  } else {
1552  return boost::make_unique<RexInput>(join_->getInput(0), curr_idx);
1553  }
1554  }
1555 
1556  private:
1557  const RelJoin* join_;
1558  const unsigned old_base_;
1559  const size_t src1_base_;
1560  const size_t target_count_;
1561 };
1562 
1564  public:
1565  SubConditionRemover(const std::vector<const RexScalar*> sub_conds)
1566  : sub_conditions_(sub_conds.begin(), sub_conds.end()) {}
1567  RetType visitOperator(const RexOperator* rex_operator) const override {
1568  if (sub_conditions_.count(rex_operator)) {
1569  return boost::make_unique<RexLiteral>(
1570  true, kBOOLEAN, kBOOLEAN, unsigned(-2147483648), 1, unsigned(-2147483648), 1);
1571  }
1572  return RexDeepCopyVisitor::visitOperator(rex_operator);
1573  }
1574 
1575  private:
1576  std::unordered_set<const RexScalar*> sub_conditions_;
1577 };
1578 
1580  std::vector<std::shared_ptr<RelAlgNode>>& nodes) noexcept {
1581  std::unordered_set<const RelAlgNode*> visited;
1582  auto web = build_du_web(nodes);
1583  for (auto node : nodes) {
1584  if (visited.count(node.get())) {
1585  continue;
1586  }
1587  visited.insert(node.get());
1588  auto join = dynamic_cast<RelJoin*>(node.get());
1589  if (join && join->getJoinType() == JoinType::INNER) {
1590  // Only allow cross join for now.
1591  if (auto literal = dynamic_cast<const RexLiteral*>(join->getCondition())) {
1592  // Assume Calcite always generates an inner join on constant boolean true for
1593  // cross join.
1594  CHECK(literal->getType() == kBOOLEAN && literal->getVal<bool>());
1595  size_t first_col_idx = 0;
1596  const RelFilter* filter = nullptr;
1597  std::vector<const RelJoin*> join_seq{join};
1598  for (const RelJoin* curr_join = join; !filter;) {
1599  auto usrs_it = web.find(curr_join);
1600  CHECK(usrs_it != web.end());
1601  if (usrs_it->second.size() != size_t(1)) {
1602  break;
1603  }
1604  auto only_usr = *usrs_it->second.begin();
1605  if (auto usr_join = dynamic_cast<const RelJoin*>(only_usr)) {
1606  if (join == usr_join->getInput(1)) {
1607  const auto src1_offset = usr_join->getInput(0)->size();
1608  first_col_idx += src1_offset;
1609  }
1610  join_seq.push_back(usr_join);
1611  curr_join = usr_join;
1612  continue;
1613  }
1614 
1615  filter = dynamic_cast<const RelFilter*>(only_usr);
1616  break;
1617  }
1618  if (!filter) {
1619  visited.insert(join_seq.begin(), join_seq.end());
1620  continue;
1621  }
1622  const auto src_join = dynamic_cast<const RelJoin*>(filter->getInput(0));
1623  CHECK(src_join);
1624  auto modified_filter = const_cast<RelFilter*>(filter);
1625 
1626  if (src_join == join) {
1627  std::unique_ptr<const RexScalar> filter_condition(
1628  modified_filter->getAndReleaseCondition());
1629  std::unique_ptr<const RexScalar> true_condition =
1630  boost::make_unique<RexLiteral>(true,
1631  kBOOLEAN,
1632  kBOOLEAN,
1633  unsigned(-2147483648),
1634  1,
1635  unsigned(-2147483648),
1636  1);
1637  modified_filter->setCondition(true_condition);
1638  join->setCondition(filter_condition);
1639  continue;
1640  }
1641  const auto src1_base = src_join->getInput(0)->size();
1642  auto source =
1643  first_col_idx < src1_base ? src_join->getInput(0) : src_join->getInput(1);
1644  first_col_idx =
1645  first_col_idx < src1_base ? first_col_idx : first_col_idx - src1_base;
1646  auto join_conditions =
1648  source,
1649  first_col_idx,
1650  first_col_idx + join->size() - 1);
1651  if (join_conditions.empty()) {
1652  continue;
1653  }
1654 
1655  JoinTargetRebaser rebaser(join, first_col_idx);
1656  if (join_conditions.size() == 1) {
1657  auto new_join_condition = rebaser.visit(*join_conditions.begin());
1658  join->setCondition(new_join_condition);
1659  } else {
1660  std::vector<std::unique_ptr<const RexScalar>> operands;
1661  bool notnull = true;
1662  for (size_t i = 0; i < join_conditions.size(); ++i) {
1663  operands.emplace_back(rebaser.visit(join_conditions[i]));
1664  auto old_subcond = dynamic_cast<const RexOperator*>(join_conditions[i]);
1665  CHECK(old_subcond && old_subcond->getType().get_type() == kBOOLEAN);
1666  notnull = notnull && old_subcond->getType().get_notnull();
1667  }
1668  auto new_join_condition = std::unique_ptr<const RexScalar>(
1669  new RexOperator(kAND, operands, SQLTypeInfo(kBOOLEAN, notnull)));
1670  join->setCondition(new_join_condition);
1671  }
1672 
1673  SubConditionRemover remover(join_conditions);
1674  auto new_filter_condition = remover.visit(filter->getCondition());
1675  modified_filter->setCondition(new_filter_condition);
1676  }
1677  }
1678  }
1679 }
1680 
1681 // For some reason, Calcite generates Sort, Project, Sort sequences where the
1682 // two Sort nodes are identical and the Project is identity. Simplify this
1683 // pattern by re-binding the input of the second sort to the input of the first.
1684 void simplify_sort(std::vector<std::shared_ptr<RelAlgNode>>& nodes) noexcept {
1685  if (nodes.size() < 3) {
1686  return;
1687  }
1688  for (size_t i = 0; i <= nodes.size() - 3;) {
1689  auto first_sort = std::dynamic_pointer_cast<RelSort>(nodes[i]);
1690  const auto project = std::dynamic_pointer_cast<const RelProject>(nodes[i + 1]);
1691  auto second_sort = std::dynamic_pointer_cast<RelSort>(nodes[i + 2]);
1692  if (first_sort && second_sort && project && project->isIdentity() &&
1693  *first_sort == *second_sort) {
1694  second_sort->replaceInput(second_sort->getAndOwnInput(0),
1695  first_sort->getAndOwnInput(0));
1696  nodes[i].reset();
1697  nodes[i + 1].reset();
1698  i += 3;
1699  } else {
1700  ++i;
1701  }
1702  }
1703 
1704  std::vector<std::shared_ptr<RelAlgNode>> new_nodes;
1705  for (auto node : nodes) {
1706  if (!node) {
1707  continue;
1708  }
1709  new_nodes.push_back(node);
1710  }
1711  nodes.swap(new_nodes);
1712 }
SubConditionReplacer(const std::unordered_map< size_t, std::unique_ptr< const RexScalar >> &idx_to_sub_condition)
std::vector< const RexScalar * > find_hoistable_conditions(const RexScalar *condition, const RelAlgNode *source, const size_t first_col_idx, const size_t last_col_idx)
std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * > > build_du_web(const std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
RetType visitInput(const RexInput *input) const override
#define CHECK_EQ(x, y)
Definition: Logger.h:205
int64_t * src
const size_t target_count_
RexInputRedirector(const RelAlgNode *old_src, const RelAlgNode *new_src)
size_t pick_always_live_col_idx(const RelAlgNode *node)
void setSourceNode(const RelAlgNode *node) const
const std::unordered_map< const RelAlgNode *, std::unordered_map< size_t, size_t > > & node_to_input_renum_
void hoist_filter_cond_to_cross_join(std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
RexInputSinker(const std::unordered_map< size_t, size_t > &old_to_new_idx, const RelAlgNode *new_src)
size_t size() const override
void sink_projected_boolean_expr_to_join(std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
RetType visitInput(const RexInput *input) const override
void eliminate_identical_copy(std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
void add_new_indices_for(const RelAlgNode *node, std::unordered_map< const RelAlgNode *, std::unordered_map< size_t, size_t >> &new_liveouts, const std::unordered_set< size_t > &old_liveouts, const std::unordered_set< const RelAlgNode *> &intact_nodes, const std::unordered_map< const RelAlgNode *, size_t > &orig_node_sizes)
void setCondition(std::unique_ptr< const RexScalar > &condition)
SubConditionRemover(const std::vector< const RexScalar *> sub_conds)
SortDirection getSortDir() const
#define LOG(tag)
Definition: Logger.h:188
const RexScalar * getProjectAt(const size_t idx) const
bool does_redef_cols(const RelAlgNode *node)
RetType visitOperator(const RexOperator *rex_operator) const override
Definition: RexVisitor.h:153
std::string join(T const &container, std::string const &delim)
void * visitInput(const RexInput *rex_input) const override
AvailabilityChecker(const std::unordered_map< const RelAlgNode *, std::unordered_map< size_t, size_t >> &liveouts, const std::unordered_set< const RelAlgNode *> &intact_nodes)
#define CHECK_GE(x, y)
Definition: Logger.h:210
RetType visitOperator(const RexOperator *rex_operator) const override
size_t get_actual_source_size(const RelProject *curr_project, const std::unordered_set< const RelProject *> &projects_to_remove)
Definition: sqldefs.h:30
std::unordered_set< const RelProject * > get_visible_projects(const RelAlgNode *root)
#define CHECK_GT(x, y)
Definition: Logger.h:209
const std::unordered_set< const RelProject * > & crt_projects_
bool project_separates_sort(const RelProject *project, const RelAlgNode *next_node)
std::unique_ptr< RexInput > deepCopy() const
std::unordered_map< const RelAlgNode *, std::unordered_set< size_t > > mark_live_columns(std::vector< std::shared_ptr< RelAlgNode >> &nodes)
void simplify_sort(std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
const std::unordered_map< const RelAlgNode *, std::unordered_map< size_t, size_t > > & liveouts_
std::vector< std::unordered_set< size_t > > get_live_ins(const RelAlgNode *node, const std::unordered_map< const RelAlgNode *, std::unordered_set< size_t >> &live_outs)
RetType visitInput(const RexInput *input) const override
bool isSimple() const
void cleanup_dead_nodes(std::vector< std::shared_ptr< RelAlgNode >> &nodes)
RetType visitInput(const RexInput *input) const override
const RelAlgNode * getSourceNode() const
#define CHECK_NE(x, y)
Definition: Logger.h:206
const std::unordered_map< size_t, size_t > & old_to_new_in_idx_
Definition: sqldefs.h:37
RetType visitInput(const RexInput *input) const override
Definition: RexVisitor.h:141
std::pair< std::unordered_map< const RelAlgNode *, std::unordered_map< size_t, size_t > >, std::vector< const RelAlgNode * > > sweep_dead_columns(const std::unordered_map< const RelAlgNode *, std::unordered_set< size_t >> &live_outs, const std::vector< std::shared_ptr< RelAlgNode >> &nodes, const std::unordered_set< const RelAlgNode *> &intact_nodes, const std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode *>> &du_web, const std::unordered_map< const RelAlgNode *, size_t > &orig_node_sizes)
virtual void replaceInput(std::shared_ptr< const RelAlgNode > old_input, std::shared_ptr< const RelAlgNode > input)
RetType aggregateResult(const RetType &aggregate, const RetType &next_result) const override
bool any_dead_col_in(const RelAlgNode *node, const std::unordered_set< size_t > &live_outs)
const std::unordered_set< const RelAlgNode * > & intact_nodes_
RexRebindInputsVisitor(const RelAlgNode *old_input, const RelAlgNode *new_input)
virtual T visit(const RexScalar *rex_scalar) const
Definition: RexVisitor.h:27
#define CHECK_LT(x, y)
Definition: Logger.h:207
NullSortedPosition getNullsPosition() const
const RexScalar * getCondition() const
unsigned getIndex() const
const size_t inputCount() const
RetType visitInput(const RexInput *input) const override
virtual size_t size() const =0
RetType visitInput(const RexInput *input) const override
void try_insert_coalesceable_proj(std::vector< std::shared_ptr< RelAlgNode >> &nodes, std::unordered_map< const RelAlgNode *, std::unordered_set< size_t >> &liveouts, std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode *>> &du_web)
std::vector< std::unique_ptr< const RexAgg > > renumber_rex_aggs(std::vector< std::unique_ptr< const RexAgg >> &agg_exprs, const std::unordered_map< size_t, size_t > &new_numbering)
void propagate_input_renumbering(std::unordered_map< const RelAlgNode *, std::unordered_map< size_t, size_t >> &liveout_renumbering, const std::vector< const RelAlgNode *> &ready_nodes, const std::unordered_map< const RelAlgNode *, std::unordered_set< size_t >> &old_liveouts, const std::unordered_set< const RelAlgNode *> &intact_nodes, const std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode *>> &du_web, const std::unordered_map< const RelAlgNode *, size_t > &orig_node_sizes)
SortField renumber_sort_field(const SortField &old_field, const std::unordered_map< size_t, size_t > &new_numbering)
bool safe_to_redirect(const RelProject *project, const std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode *>> &du_web)
virtual std::string toString() const =0
#define CHECK(condition)
Definition: Logger.h:197
std::unordered_set< const RexScalar * > sub_conditions_
const RelJoin * join_
const RelAlgNode * getInput(const size_t idx) const
void redirect_inputs_of(std::shared_ptr< RelAlgNode > node, const std::unordered_set< const RelProject *> &projects, const std::unordered_set< const RelProject *> &permutating_projects, const std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode *>> &du_web)
std::string get_field_name(const RelAlgNode *node, size_t index)
void fold_filters(std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
const std::unordered_map< size_t, std::unique_ptr< const RexScalar > > & idx_to_subcond_
void propagate_rex_input_renumber(const RelFilter *excluded_root, const std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode *>> &du_web)
bool is_identical_copy(const RelProject *project, const std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode *>> &du_web, const std::unordered_set< const RelProject *> &projects_to_remove, std::unordered_set< const RelProject *> &permutating_projects)
const unsigned old_base_
RexInputRenumberVisitor(const std::unordered_map< const RelAlgNode *, std::unordered_map< size_t, size_t >> &new_numbering)
bool is_distinct(const size_t input_idx, const RelAlgNode *node)
JoinTargetRebaser(const RelJoin *join, const unsigned old_base)
std::unique_ptr< const RexScalar > RetType
Definition: RexVisitor.h:139
void replace_all_usages(std::shared_ptr< const RelAlgNode > old_def_node, std::shared_ptr< const RelAlgNode > new_def_node, std::unordered_map< const RelAlgNode *, std::shared_ptr< RelAlgNode >> &deconst_mapping, std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode *>> &du_web)
RexProjectInputRedirector(const std::unordered_set< const RelProject *> &crt_inputs)
void eliminate_dead_columns(std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
size_t getField() const
RetType visitInput(const RexInput *input) const override