OmniSciDB  d2f719934e
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
RelAlgOptimizer.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2017 MapD Technologies, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "RelAlgOptimizer.h"
18 #include "Logger/Logger.h"
19 #include "RexVisitor.h"
21 
22 #include <numeric>
23 #include <string>
24 #include <unordered_map>
25 
26 namespace {
27 
29  public:
30  RexProjectInputRedirector(const std::unordered_set<const RelProject*>& crt_inputs)
31  : crt_projects_(crt_inputs) {}
32 
33  RetType visitInput(const RexInput* input) const override {
34  auto source = dynamic_cast<const RelProject*>(input->getSourceNode());
35  if (!source || !crt_projects_.count(source)) {
36  return input->deepCopy();
37  }
38  auto new_source = source->getInput(0);
39  auto new_input =
40  dynamic_cast<const RexInput*>(source->getProjectAt(input->getIndex()));
41  if (!new_input) {
42  return input->deepCopy();
43  }
44  if (auto join = dynamic_cast<const RelJoin*>(new_source)) {
45  CHECK(new_input->getSourceNode() == join->getInput(0) ||
46  new_input->getSourceNode() == join->getInput(1));
47  } else {
48  CHECK_EQ(new_input->getSourceNode(), new_source);
49  }
50  return new_input->deepCopy();
51  }
52 
53  private:
54  const std::unordered_set<const RelProject*>& crt_projects_;
55 };
56 
57 class RexRebindInputsVisitor : public RexVisitor<void*> {
58  public:
59  RexRebindInputsVisitor(const RelAlgNode* old_input, const RelAlgNode* new_input)
60  : old_input_(old_input), new_input_(new_input) {}
61 
62  void* visitInput(const RexInput* rex_input) const override {
63  const auto old_source = rex_input->getSourceNode();
64  if (old_source == old_input_) {
65  rex_input->setSourceNode(new_input_);
66  }
67  return nullptr;
68  };
69 
70  void visitNode(const RelAlgNode* node) const {
71  if (dynamic_cast<const RelAggregate*>(node) || dynamic_cast<const RelSort*>(node)) {
72  return;
73  }
74  if (auto join = dynamic_cast<const RelJoin*>(node)) {
75  if (auto condition = join->getCondition()) {
76  visit(condition);
77  }
78  return;
79  }
80  if (auto project = dynamic_cast<const RelProject*>(node)) {
81  for (size_t i = 0; i < project->size(); ++i) {
82  visit(project->getProjectAt(i));
83  }
84  return;
85  }
86  if (auto filter = dynamic_cast<const RelFilter*>(node)) {
87  visit(filter->getCondition());
88  return;
89  }
90  CHECK(false);
91  }
92 
93  private:
96 };
97 
99  const RelProject* curr_project,
100  const std::unordered_set<const RelProject*>& projects_to_remove) {
101  auto source = curr_project->getInput(0);
102  while (auto filter = dynamic_cast<const RelFilter*>(source)) {
103  source = filter->getInput(0);
104  }
105  if (auto src_project = dynamic_cast<const RelProject*>(source)) {
106  if (projects_to_remove.count(src_project)) {
107  return get_actual_source_size(src_project, projects_to_remove);
108  }
109  }
110  return curr_project->getInput(0)->size();
111 }
112 
114  const RelProject* project,
115  const std::unordered_map<const RelAlgNode*, std::unordered_set<const RelAlgNode*>>&
116  du_web) {
117  if (!project->isSimple()) {
118  return false;
119  }
120  auto usrs_it = du_web.find(project);
121  CHECK(usrs_it != du_web.end());
122  for (auto usr : usrs_it->second) {
123  if (!dynamic_cast<const RelProject*>(usr) && !dynamic_cast<const RelFilter*>(usr)) {
124  return false;
125  }
126  }
127  return true;
128 }
129 
131  const RelProject* project,
132  const std::unordered_map<const RelAlgNode*, std::unordered_set<const RelAlgNode*>>&
133  du_web,
134  const std::unordered_set<const RelProject*>& projects_to_remove,
135  std::unordered_set<const RelProject*>& permutating_projects) {
136  auto source_size = get_actual_source_size(project, projects_to_remove);
137  if (project->size() > source_size) {
138  return false;
139  }
140 
141  if (project->size() < source_size) {
142  auto usrs_it = du_web.find(project);
143  CHECK(usrs_it != du_web.end());
144  bool guard_found = false;
145  while (usrs_it->second.size() == size_t(1)) {
146  auto only_usr = *usrs_it->second.begin();
147  if (dynamic_cast<const RelProject*>(only_usr)) {
148  guard_found = true;
149  break;
150  }
151  if (dynamic_cast<const RelAggregate*>(only_usr) ||
152  dynamic_cast<const RelSort*>(only_usr) ||
153  dynamic_cast<const RelJoin*>(only_usr) ||
154  dynamic_cast<const RelTableFunction*>(only_usr) ||
155  dynamic_cast<const RelLogicalUnion*>(only_usr)) {
156  return false;
157  }
158  CHECK(dynamic_cast<const RelFilter*>(only_usr))
159  << "only_usr: " << only_usr->toString();
160  usrs_it = du_web.find(only_usr);
161  CHECK(usrs_it != du_web.end());
162  }
163 
164  if (!guard_found) {
165  return false;
166  }
167  }
168 
169  bool identical = true;
170  for (size_t i = 0; i < project->size(); ++i) {
171  auto target = dynamic_cast<const RexInput*>(project->getProjectAt(i));
172  CHECK(target);
173  if (i != target->getIndex()) {
174  identical = false;
175  break;
176  }
177  }
178 
179  if (identical) {
180  return true;
181  }
182 
183  if (safe_to_redirect(project, du_web)) {
184  permutating_projects.insert(project);
185  return true;
186  }
187 
188  return false;
189 }
190 
192  const RelFilter* excluded_root,
193  const std::unordered_map<const RelAlgNode*, std::unordered_set<const RelAlgNode*>>&
194  du_web) {
195  CHECK(excluded_root);
196  auto src_project = dynamic_cast<const RelProject*>(excluded_root->getInput(0));
197  CHECK(src_project && src_project->isSimple());
198  const auto indirect_join_src = dynamic_cast<const RelJoin*>(src_project->getInput(0));
199  std::unordered_map<size_t, size_t> old_to_new_idx;
200  for (size_t i = 0; i < src_project->size(); ++i) {
201  auto rex_in = dynamic_cast<const RexInput*>(src_project->getProjectAt(i));
202  CHECK(rex_in);
203  size_t src_base = 0;
204  if (indirect_join_src != nullptr &&
205  indirect_join_src->getInput(1) == rex_in->getSourceNode()) {
206  src_base = indirect_join_src->getInput(0)->size();
207  }
208  old_to_new_idx.insert(std::make_pair(i, src_base + rex_in->getIndex()));
209  old_to_new_idx.insert(std::make_pair(i, rex_in->getIndex()));
210  }
211  CHECK(old_to_new_idx.size());
212  RexInputRenumber<false> renumber(old_to_new_idx);
213  auto usrs_it = du_web.find(excluded_root);
214  CHECK(usrs_it != du_web.end());
215  std::vector<const RelAlgNode*> work_set(usrs_it->second.begin(), usrs_it->second.end());
216  while (!work_set.empty()) {
217  auto node = work_set.back();
218  work_set.pop_back();
219  auto modified_node = const_cast<RelAlgNode*>(node);
220  if (auto filter = dynamic_cast<RelFilter*>(modified_node)) {
221  auto new_condition = renumber.visit(filter->getCondition());
222  filter->setCondition(new_condition);
223  auto usrs_it = du_web.find(filter);
224  CHECK(usrs_it != du_web.end() && usrs_it->second.size() == 1);
225  work_set.push_back(*usrs_it->second.begin());
226  continue;
227  }
228  if (auto project = dynamic_cast<RelProject*>(modified_node)) {
229  std::vector<std::unique_ptr<const RexScalar>> new_exprs;
230  for (size_t i = 0; i < project->size(); ++i) {
231  new_exprs.push_back(renumber.visit(project->getProjectAt(i)));
232  }
233  project->setExpressions(new_exprs);
234  continue;
235  }
236  CHECK(false);
237  }
238 }
239 
240 // This function appears to redirect/remove redundant Projection input nodes(?)
242  std::shared_ptr<RelAlgNode> node,
243  const std::unordered_set<const RelProject*>& projects,
244  const std::unordered_set<const RelProject*>& permutating_projects,
245  const std::unordered_map<const RelAlgNode*, std::unordered_set<const RelAlgNode*>>&
246  du_web) {
247  if (dynamic_cast<RelLogicalUnion*>(node.get())) {
248  return; // UNION keeps all Projection inputs.
249  }
250  std::shared_ptr<const RelProject> src_project = nullptr;
251  for (size_t i = 0; i < node->inputCount(); ++i) {
252  if (auto project =
253  std::dynamic_pointer_cast<const RelProject>(node->getAndOwnInput(i))) {
254  if (projects.count(project.get())) {
255  src_project = project;
256  break;
257  }
258  }
259  }
260  if (!src_project) {
261  return;
262  }
263  if (auto join = std::dynamic_pointer_cast<RelJoin>(node)) {
264  auto other_project =
265  src_project == node->getAndOwnInput(0)
266  ? std::dynamic_pointer_cast<const RelProject>(node->getAndOwnInput(1))
267  : std::dynamic_pointer_cast<const RelProject>(node->getAndOwnInput(0));
268  join->replaceInput(src_project, src_project->getAndOwnInput(0));
269  RexRebindInputsVisitor rebinder(src_project.get(), src_project->getInput(0));
270  auto usrs_it = du_web.find(join.get());
271  CHECK(usrs_it != du_web.end());
272  for (auto usr : usrs_it->second) {
273  rebinder.visitNode(usr);
274  }
275 
276  if (other_project && projects.count(other_project.get())) {
277  join->replaceInput(other_project, other_project->getAndOwnInput(0));
278  RexRebindInputsVisitor other_rebinder(other_project.get(),
279  other_project->getInput(0));
280  for (auto usr : usrs_it->second) {
281  other_rebinder.visitNode(usr);
282  }
283  }
284  return;
285  }
286  if (auto project = std::dynamic_pointer_cast<RelProject>(node)) {
287  project->RelAlgNode::replaceInput(src_project, src_project->getAndOwnInput(0));
288  RexProjectInputRedirector redirector(projects);
289  std::vector<std::unique_ptr<const RexScalar>> new_exprs;
290  for (size_t i = 0; i < project->size(); ++i) {
291  new_exprs.push_back(redirector.visit(project->getProjectAt(i)));
292  }
293  project->setExpressions(new_exprs);
294  return;
295  }
296  if (auto filter = std::dynamic_pointer_cast<RelFilter>(node)) {
297  const bool is_permutating_proj = permutating_projects.count(src_project.get());
298  if (is_permutating_proj || dynamic_cast<const RelJoin*>(src_project->getInput(0))) {
299  if (is_permutating_proj) {
300  propagate_rex_input_renumber(filter.get(), du_web);
301  }
302  filter->RelAlgNode::replaceInput(src_project, src_project->getAndOwnInput(0));
303  RexProjectInputRedirector redirector(projects);
304  auto new_condition = redirector.visit(filter->getCondition());
305  filter->setCondition(new_condition);
306  } else {
307  filter->replaceInput(src_project, src_project->getAndOwnInput(0));
308  }
309  return;
310  }
311  if (std::dynamic_pointer_cast<RelSort>(node)) {
312  auto const src_project_input = src_project->getInput(0);
313  if (dynamic_cast<const RelScan*>(src_project_input) ||
314  dynamic_cast<const RelLogicalValues*>(src_project_input) ||
315  dynamic_cast<const RelLogicalUnion*>(src_project_input)) {
316  return;
317  }
318  }
319  if (std::dynamic_pointer_cast<RelModify>(node)) {
320  return; // NOTE: Review this. Not sure about this.
321  }
322  if (std::dynamic_pointer_cast<RelTableFunction>(node)) {
323  return;
324  }
325  CHECK(std::dynamic_pointer_cast<RelAggregate>(node) ||
326  std::dynamic_pointer_cast<RelSort>(node));
327  node->replaceInput(src_project, src_project->getAndOwnInput(0));
328 }
329 
330 void cleanup_dead_nodes(std::vector<std::shared_ptr<RelAlgNode>>& nodes) {
331  for (auto nodeIt = nodes.rbegin(); nodeIt != nodes.rend(); ++nodeIt) {
332  if (nodeIt->use_count() == 1) {
333  VLOG(1) << "Node (ID: " << (*nodeIt)->getId() << ") deleted.";
335  auto node_str = (*nodeIt)->toString();
336  constexpr size_t max_node_print_len{500};
337  const size_t node_str_len = node_str.size();
338  if (node_str_len > max_node_print_len) {
339  node_str = node_str.substr(0, max_node_print_len) + "...";
340  }
341  VLOG(2) << "Deleted Node (ID: " << (*nodeIt)->getId()
342  << ") contents: " << node_str;
343  }
344  nodeIt->reset();
345  }
346  }
347 
348  std::vector<std::shared_ptr<RelAlgNode>> new_nodes;
349  for (auto node : nodes) {
350  if (!node) {
351  continue;
352  }
353  new_nodes.push_back(node);
354  }
355  nodes.swap(new_nodes);
356 }
357 
358 std::unordered_set<const RelProject*> get_visible_projects(const RelAlgNode* root) {
359  if (auto project = dynamic_cast<const RelProject*>(root)) {
360  return {project};
361  }
362 
363  if (dynamic_cast<const RelAggregate*>(root) || dynamic_cast<const RelScan*>(root) ||
364  dynamic_cast<const RelLogicalValues*>(root) ||
365  dynamic_cast<const RelModify*>(root)) {
366  return std::unordered_set<const RelProject*>{};
367  }
368 
369  if (auto join = dynamic_cast<const RelJoin*>(root)) {
370  auto lhs_projs = get_visible_projects(join->getInput(0));
371  auto rhs_projs = get_visible_projects(join->getInput(1));
372  lhs_projs.insert(rhs_projs.begin(), rhs_projs.end());
373  return lhs_projs;
374  }
375 
376  if (auto logical_union = dynamic_cast<const RelLogicalUnion*>(root)) {
377  auto projections = get_visible_projects(logical_union->getInput(0));
378  for (size_t i = 1; i < logical_union->inputCount(); ++i) {
379  auto next = get_visible_projects(logical_union->getInput(i));
380  projections.insert(next.begin(), next.end());
381  }
382  return projections;
383  }
384 
385  CHECK(dynamic_cast<const RelFilter*>(root) || dynamic_cast<const RelSort*>(root))
386  << "root = " << root->toString();
387  return get_visible_projects(root->getInput(0));
388 }
389 
390 // TODO(miyu): checking this at runtime is more accurate
391 bool is_distinct(const size_t input_idx, const RelAlgNode* node) {
392  if (dynamic_cast<const RelFilter*>(node) || dynamic_cast<const RelSort*>(node)) {
393  CHECK_EQ(size_t(1), node->inputCount());
394  return is_distinct(input_idx, node->getInput(0));
395  }
396  if (auto aggregate = dynamic_cast<const RelAggregate*>(node)) {
397  CHECK_EQ(size_t(1), node->inputCount());
398  if (aggregate->getGroupByCount() == 1 && !input_idx) {
399  return true;
400  }
401  if (input_idx < aggregate->getGroupByCount()) {
402  return is_distinct(input_idx, node->getInput(0));
403  }
404  return false;
405  }
406  if (auto project = dynamic_cast<const RelProject*>(node)) {
407  CHECK_LT(input_idx, project->size());
408  if (auto input = dynamic_cast<const RexInput*>(project->getProjectAt(input_idx))) {
409  CHECK_EQ(size_t(1), node->inputCount());
410  return is_distinct(input->getIndex(), project->getInput(0));
411  }
412  return false;
413  }
414  CHECK(dynamic_cast<const RelJoin*>(node) || dynamic_cast<const RelScan*>(node));
415  return false;
416 }
417 
418 } // namespace
419 
420 std::unordered_map<const RelAlgNode*, std::unordered_set<const RelAlgNode*>> build_du_web(
421  const std::vector<std::shared_ptr<RelAlgNode>>& nodes) noexcept {
422  std::unordered_map<const RelAlgNode*, std::unordered_set<const RelAlgNode*>> web;
423  std::unordered_set<const RelAlgNode*> visited;
424  std::vector<const RelAlgNode*> work_set;
425  for (auto node : nodes) {
426  if (std::dynamic_pointer_cast<RelScan>(node) ||
427  std::dynamic_pointer_cast<RelModify>(node) || visited.count(node.get())) {
428  continue;
429  }
430  work_set.push_back(node.get());
431  while (!work_set.empty()) {
432  auto walker = work_set.back();
433  work_set.pop_back();
434  if (visited.count(walker)) {
435  continue;
436  }
437  CHECK(!web.count(walker));
438  auto it_ok =
439  web.insert(std::make_pair(walker, std::unordered_set<const RelAlgNode*>{}));
440  CHECK(it_ok.second);
441  visited.insert(walker);
442  CHECK(dynamic_cast<const RelJoin*>(walker) ||
443  dynamic_cast<const RelProject*>(walker) ||
444  dynamic_cast<const RelAggregate*>(walker) ||
445  dynamic_cast<const RelFilter*>(walker) ||
446  dynamic_cast<const RelSort*>(walker) ||
447  dynamic_cast<const RelLeftDeepInnerJoin*>(walker) ||
448  dynamic_cast<const RelLogicalValues*>(walker) ||
449  dynamic_cast<const RelTableFunction*>(walker) ||
450  dynamic_cast<const RelLogicalUnion*>(walker));
451  for (size_t i = 0; i < walker->inputCount(); ++i) {
452  auto src = walker->getInput(i);
453  if (dynamic_cast<const RelScan*>(src) || dynamic_cast<const RelModify*>(src)) {
454  continue;
455  }
456  if (web.empty() || !web.count(src)) {
457  web.insert(std::make_pair(src, std::unordered_set<const RelAlgNode*>{}));
458  }
459  web[src].insert(walker);
460  work_set.push_back(src);
461  }
462  }
463  }
464  return web;
465 }
466 
475 bool project_separates_sort(const RelProject* project, const RelAlgNode* next_node) {
476  CHECK(project);
477  if (!next_node) {
478  return false;
479  }
480 
481  auto sort = dynamic_cast<const RelSort*>(next_node);
482  if (!sort) {
483  return false;
484  }
485  if (!(project->inputCount() == 1)) {
486  return false;
487  }
488 
489  if (dynamic_cast<const RelSort*>(project->getInput(0))) {
490  return true;
491  }
492  return false;
493 }
494 
495 // For now, the only target to eliminate is restricted to project-aggregate pair between
496 // scan/sort and join
497 // TODO(miyu): allow more chance if proved safe
498 void eliminate_identical_copy(std::vector<std::shared_ptr<RelAlgNode>>& nodes) noexcept {
499  std::unordered_set<std::shared_ptr<const RelAlgNode>> copies;
500  auto sink = nodes.back();
501  for (auto node : nodes) {
502  auto aggregate = std::dynamic_pointer_cast<const RelAggregate>(node);
503  if (!aggregate || aggregate == sink ||
504  !(aggregate->getGroupByCount() == 1 && aggregate->getAggExprsCount() == 0)) {
505  continue;
506  }
507  auto project =
508  std::dynamic_pointer_cast<const RelProject>(aggregate->getAndOwnInput(0));
509  if (project && project->size() == aggregate->size() &&
510  project->getFields() == aggregate->getFields()) {
511  CHECK_EQ(size_t(0), copies.count(aggregate));
512  copies.insert(aggregate);
513  }
514  }
515  for (auto node : nodes) {
516  if (!node->inputCount()) {
517  continue;
518  }
519  auto last_source = node->getAndOwnInput(node->inputCount() - 1);
520  if (!copies.count(last_source)) {
521  continue;
522  }
523  auto aggregate = std::dynamic_pointer_cast<const RelAggregate>(last_source);
524  CHECK(aggregate);
525  if (!std::dynamic_pointer_cast<const RelJoin>(node) || aggregate->size() != 1) {
526  continue;
527  }
528  auto project =
529  std::dynamic_pointer_cast<const RelProject>(aggregate->getAndOwnInput(0));
530  CHECK(project);
531  CHECK_EQ(size_t(1), project->size());
532  if (!is_distinct(size_t(0), project.get())) {
533  continue;
534  }
535  auto new_source = project->getAndOwnInput(0);
536  if (std::dynamic_pointer_cast<const RelSort>(new_source) ||
537  std::dynamic_pointer_cast<const RelScan>(new_source)) {
538  node->replaceInput(last_source, new_source);
539  }
540  }
541  decltype(copies)().swap(copies);
542 
543  auto web = build_du_web(nodes);
544 
545  std::unordered_set<const RelProject*> projects;
546  std::unordered_set<const RelProject*> permutating_projects;
547  auto const visible_projs = get_visible_projects(nodes.back().get());
548  for (auto node_it = nodes.begin(); node_it != nodes.end(); node_it++) {
549  auto node = *node_it;
550  auto project = std::dynamic_pointer_cast<RelProject>(node);
551  auto next_node_it = std::next(node_it);
552  if (project && project->isSimple() &&
553  (!visible_projs.count(project.get()) || !project->isRenaming()) &&
554  is_identical_copy(project.get(), web, projects, permutating_projects) &&
556  project.get(), next_node_it == nodes.end() ? nullptr : next_node_it->get())) {
557  projects.insert(project.get());
558  }
559  }
560 
561  for (auto node : nodes) {
562  redirect_inputs_of(node, projects, permutating_projects, web);
563  }
564 
565  cleanup_dead_nodes(nodes);
566 }
567 
568 namespace {
569 
570 class RexInputCollector : public RexVisitor<std::unordered_set<RexInput>> {
571  private:
573 
574  protected:
575  using RetType = std::unordered_set<RexInput>;
576  RetType aggregateResult(const RetType& aggregate,
577  const RetType& next_result) const override {
578  RetType result(aggregate.begin(), aggregate.end());
579  result.insert(next_result.begin(), next_result.end());
580  return result;
581  }
582 
583  public:
584  RexInputCollector(const RelAlgNode* node) : node_(node) {}
585 
586  RetType visitInput(const RexInput* input) const override {
587  RetType result;
588  if (node_->inputCount() == 1) {
589  auto src = node_->getInput(0);
590  if (auto join = dynamic_cast<const RelJoin*>(src)) {
591  CHECK_EQ(join->inputCount(), size_t(2));
592  const auto src2_in_offset = join->getInput(0)->size();
593  if (input->getSourceNode() == join->getInput(1)) {
594  result.emplace(src, input->getIndex() + src2_in_offset);
595  } else {
596  result.emplace(src, input->getIndex());
597  }
598  return result;
599  }
600  }
601  result.insert(*input);
602  return result;
603  }
604 };
605 
607  CHECK(node->size());
608  RexInputCollector collector(node);
609  if (auto filter = dynamic_cast<const RelFilter*>(node)) {
610  auto rex_ins = collector.visit(filter->getCondition());
611  if (!rex_ins.empty()) {
612  return static_cast<size_t>(rex_ins.begin()->getIndex());
613  }
614  return pick_always_live_col_idx(filter->getInput(0));
615  } else if (auto join = dynamic_cast<const RelJoin*>(node)) {
616  auto rex_ins = collector.visit(join->getCondition());
617  if (!rex_ins.empty()) {
618  return static_cast<size_t>(rex_ins.begin()->getIndex());
619  }
620  if (auto lhs_idx = pick_always_live_col_idx(join->getInput(0))) {
621  return lhs_idx;
622  }
623  if (auto rhs_idx = pick_always_live_col_idx(join->getInput(0))) {
624  return rhs_idx + join->getInput(0)->size();
625  }
626  } else if (auto sort = dynamic_cast<const RelSort*>(node)) {
627  if (sort->collationCount()) {
628  return sort->getCollation(0).getField();
629  }
630  return pick_always_live_col_idx(sort->getInput(0));
631  }
632  return size_t(0);
633 }
634 
635 std::vector<std::unordered_set<size_t>> get_live_ins(
636  const RelAlgNode* node,
637  const std::unordered_map<const RelAlgNode*, std::unordered_set<size_t>>& live_outs) {
638  if (!node || dynamic_cast<const RelScan*>(node)) {
639  return {};
640  }
641  RexInputCollector collector(node);
642  auto it = live_outs.find(node);
643  CHECK(it != live_outs.end());
644  auto live_out = it->second;
645  if (auto project = dynamic_cast<const RelProject*>(node)) {
646  CHECK_EQ(size_t(1), project->inputCount());
647  std::unordered_set<size_t> live_in;
648  for (const auto& idx : live_out) {
649  CHECK_LT(idx, project->size());
650  auto partial_in = collector.visit(project->getProjectAt(idx));
651  for (auto rex_in : partial_in) {
652  live_in.insert(rex_in.getIndex());
653  }
654  }
655  if (project->size() == 1 &&
656  dynamic_cast<const RexLiteral*>(project->getProjectAt(0))) {
657  CHECK(live_in.empty());
658  live_in.insert(pick_always_live_col_idx(project->getInput(0)));
659  }
660  return {live_in};
661  }
662  if (auto aggregate = dynamic_cast<const RelAggregate*>(node)) {
663  CHECK_EQ(size_t(1), aggregate->inputCount());
664  const auto group_key_count = static_cast<size_t>(aggregate->getGroupByCount());
665  const auto agg_expr_count = static_cast<size_t>(aggregate->getAggExprsCount());
666  std::unordered_set<size_t> live_in;
667  for (size_t i = 0; i < group_key_count; ++i) {
668  live_in.insert(i);
669  }
670  bool has_count_star_only{false};
671  for (const auto& idx : live_out) {
672  if (idx < group_key_count) {
673  continue;
674  }
675  const auto agg_idx = idx - group_key_count;
676  CHECK_LT(agg_idx, agg_expr_count);
677  const auto& cur_agg_expr = aggregate->getAggExprs()[agg_idx];
678  const auto n_operands = cur_agg_expr->size();
679  for (size_t i = 0; i < n_operands; ++i) {
680  live_in.insert(static_cast<size_t>(cur_agg_expr->getOperand(i)));
681  }
682  if (n_operands == 0) {
683  has_count_star_only = true;
684  }
685  }
686  if (has_count_star_only && !group_key_count) {
687  live_in.insert(size_t(0));
688  }
689  return {live_in};
690  }
691  if (auto join = dynamic_cast<const RelJoin*>(node)) {
692  std::unordered_set<size_t> lhs_live_ins;
693  std::unordered_set<size_t> rhs_live_ins;
694  CHECK_EQ(size_t(2), join->inputCount());
695  auto lhs = join->getInput(0);
696  auto rhs = join->getInput(1);
697  const auto rhs_idx_base = lhs->size();
698  for (const auto idx : live_out) {
699  if (idx < rhs_idx_base) {
700  lhs_live_ins.insert(idx);
701  } else {
702  rhs_live_ins.insert(idx - rhs_idx_base);
703  }
704  }
705  auto rex_ins = collector.visit(join->getCondition());
706  for (const auto& rex_in : rex_ins) {
707  const auto in_idx = static_cast<size_t>(rex_in.getIndex());
708  if (rex_in.getSourceNode() == lhs) {
709  lhs_live_ins.insert(in_idx);
710  continue;
711  }
712  if (rex_in.getSourceNode() == rhs) {
713  rhs_live_ins.insert(in_idx);
714  continue;
715  }
716  CHECK(false);
717  }
718  return {lhs_live_ins, rhs_live_ins};
719  }
720  if (auto sort = dynamic_cast<const RelSort*>(node)) {
721  CHECK_EQ(size_t(1), sort->inputCount());
722  std::unordered_set<size_t> live_in(live_out.begin(), live_out.end());
723  for (size_t i = 0; i < sort->collationCount(); ++i) {
724  live_in.insert(sort->getCollation(i).getField());
725  }
726  return {live_in};
727  }
728  if (auto filter = dynamic_cast<const RelFilter*>(node)) {
729  CHECK_EQ(size_t(1), filter->inputCount());
730  std::unordered_set<size_t> live_in(live_out.begin(), live_out.end());
731  auto rex_ins = collector.visit(filter->getCondition());
732  for (const auto& rex_in : rex_ins) {
733  live_in.insert(static_cast<size_t>(rex_in.getIndex()));
734  }
735  return {live_in};
736  }
737  if (auto table_func = dynamic_cast<const RelTableFunction*>(node)) {
738  const auto input_count = table_func->size();
739  std::unordered_set<size_t> live_in;
740  for (size_t i = 0; i < input_count; i++) {
741  live_in.insert(i);
742  }
743 
744  std::vector<std::unordered_set<size_t>> result;
745  // Is the computed result correct in general?
746  for (size_t i = table_func->inputCount(); i > 0; i--) {
747  result.push_back(live_in);
748  }
749 
750  return result;
751  }
752  if (auto logical_union = dynamic_cast<const RelLogicalUnion*>(node)) {
753  return std::vector<std::unordered_set<size_t>>(logical_union->inputCount(), live_out);
754  }
755  return {};
756 }
757 
758 bool any_dead_col_in(const RelAlgNode* node,
759  const std::unordered_set<size_t>& live_outs) {
760  CHECK(!dynamic_cast<const RelScan*>(node));
761  if (auto aggregate = dynamic_cast<const RelAggregate*>(node)) {
762  for (size_t i = aggregate->getGroupByCount(); i < aggregate->size(); ++i) {
763  if (!live_outs.count(i)) {
764  return true;
765  }
766  }
767  return false;
768  }
769 
770  return node->size() > live_outs.size();
771 }
772 
773 bool does_redef_cols(const RelAlgNode* node) {
774  return dynamic_cast<const RelAggregate*>(node) || dynamic_cast<const RelProject*>(node);
775 }
776 
778  public:
780  const std::unordered_map<const RelAlgNode*, std::unordered_map<size_t, size_t>>&
781  liveouts,
782  const std::unordered_set<const RelAlgNode*>& intact_nodes)
783  : liveouts_(liveouts), intact_nodes_(intact_nodes) {}
784 
785  bool hasAllSrcReady(const RelAlgNode* node) const {
786  for (size_t i = 0; i < node->inputCount(); ++i) {
787  auto src = node->getInput(i);
788  if (!dynamic_cast<const RelScan*>(src) && liveouts_.find(src) == liveouts_.end() &&
789  !intact_nodes_.count(src)) {
790  return false;
791  }
792  }
793  return true;
794  }
795 
796  private:
797  const std::unordered_map<const RelAlgNode*, std::unordered_map<size_t, size_t>>&
799  const std::unordered_set<const RelAlgNode*>& intact_nodes_;
800 };
801 
803  const RelAlgNode* node,
804  std::unordered_map<const RelAlgNode*, std::unordered_map<size_t, size_t>>&
805  new_liveouts,
806  const std::unordered_set<size_t>& old_liveouts,
807  const std::unordered_set<const RelAlgNode*>& intact_nodes,
808  const std::unordered_map<const RelAlgNode*, size_t>& orig_node_sizes) {
809  auto live_fields = old_liveouts;
810  if (auto aggregate = dynamic_cast<const RelAggregate*>(node)) {
811  for (size_t i = 0; i < aggregate->getGroupByCount(); ++i) {
812  live_fields.insert(i);
813  }
814  }
815  auto it_ok =
816  new_liveouts.insert(std::make_pair(node, std::unordered_map<size_t, size_t>{}));
817  CHECK(it_ok.second);
818  auto& new_indices = it_ok.first->second;
819  if (intact_nodes.count(node)) {
820  for (size_t i = 0, e = node->size(); i < e; ++i) {
821  new_indices.insert(std::make_pair(i, i));
822  }
823  return;
824  }
825  if (does_redef_cols(node)) {
826  auto node_sz_it = orig_node_sizes.find(node);
827  CHECK(node_sz_it != orig_node_sizes.end());
828  const auto node_size = node_sz_it->second;
829  CHECK_GT(node_size, live_fields.size());
830  LOG(INFO) << node->toString() << " eliminated " << node_size - live_fields.size()
831  << " columns.";
832  std::vector<size_t> ordered_indices(live_fields.begin(), live_fields.end());
833  std::sort(ordered_indices.begin(), ordered_indices.end());
834  for (size_t i = 0; i < ordered_indices.size(); ++i) {
835  new_indices.insert(std::make_pair(ordered_indices[i], i));
836  }
837  return;
838  }
839  std::vector<size_t> ordered_indices;
840  for (size_t i = 0, old_base = 0, new_base = 0; i < node->inputCount(); ++i) {
841  auto src = node->getInput(i);
842  auto src_renum_it = new_liveouts.find(src);
843  if (src_renum_it != new_liveouts.end()) {
844  for (auto m : src_renum_it->second) {
845  new_indices.insert(std::make_pair(old_base + m.first, new_base + m.second));
846  }
847  new_base += src_renum_it->second.size();
848  } else if (dynamic_cast<const RelScan*>(src) || intact_nodes.count(src)) {
849  for (size_t i = 0; i < src->size(); ++i) {
850  new_indices.insert(std::make_pair(old_base + i, new_base + i));
851  }
852  new_base += src->size();
853  } else {
854  CHECK(false);
855  }
856  auto src_sz_it = orig_node_sizes.find(src);
857  CHECK(src_sz_it != orig_node_sizes.end());
858  old_base += src_sz_it->second;
859  }
860 }
861 
863  public:
865  const std::unordered_map<const RelAlgNode*, std::unordered_map<size_t, size_t>>&
866  new_numbering)
867  : node_to_input_renum_(new_numbering) {}
868  RetType visitInput(const RexInput* input) const override {
869  auto source = input->getSourceNode();
870  auto node_it = node_to_input_renum_.find(source);
871  if (node_it != node_to_input_renum_.end()) {
872  auto old_to_new_num = node_it->second;
873  auto renum_it = old_to_new_num.find(input->getIndex());
874  CHECK(renum_it != old_to_new_num.end());
875  return boost::make_unique<RexInput>(source, renum_it->second);
876  }
877  return input->deepCopy();
878  }
879 
880  private:
881  const std::unordered_map<const RelAlgNode*, std::unordered_map<size_t, size_t>>&
883 };
884 
885 std::vector<std::unique_ptr<const RexAgg>> renumber_rex_aggs(
886  std::vector<std::unique_ptr<const RexAgg>>& agg_exprs,
887  const std::unordered_map<size_t, size_t>& new_numbering) {
888  std::vector<std::unique_ptr<const RexAgg>> new_exprs;
889  for (auto& expr : agg_exprs) {
890  if (expr->size() >= 1) {
891  auto old_idx = expr->getOperand(0);
892  auto idx_it = new_numbering.find(old_idx);
893  if (idx_it != new_numbering.end()) {
894  std::vector<size_t> operands;
895  operands.push_back(idx_it->second);
896  if (expr->size() == 2) {
897  operands.push_back(expr->getOperand(1));
898  }
899  new_exprs.push_back(boost::make_unique<RexAgg>(
900  expr->getKind(), expr->isDistinct(), expr->getType(), operands));
901  continue;
902  }
903  }
904  new_exprs.push_back(std::move(expr));
905  }
906  return new_exprs;
907 }
908 
910  const std::unordered_map<size_t, size_t>& new_numbering) {
911  auto field_idx = old_field.getField();
912  auto idx_it = new_numbering.find(field_idx);
913  if (idx_it != new_numbering.end()) {
914  field_idx = idx_it->second;
915  }
916  return SortField(field_idx, old_field.getSortDir(), old_field.getNullsPosition());
917 }
918 
919 std::unordered_map<const RelAlgNode*, std::unordered_set<size_t>> mark_live_columns(
920  std::vector<std::shared_ptr<RelAlgNode>>& nodes) {
921  std::unordered_map<const RelAlgNode*, std::unordered_set<size_t>> live_outs;
922  std::vector<const RelAlgNode*> work_set;
923  for (auto node_it = nodes.rbegin(); node_it != nodes.rend(); ++node_it) {
924  auto node = node_it->get();
925  if (dynamic_cast<const RelScan*>(node) || live_outs.count(node) ||
926  dynamic_cast<const RelModify*>(node) ||
927  dynamic_cast<const RelTableFunction*>(node)) {
928  continue;
929  }
930  std::vector<size_t> all_live(node->size());
931  std::iota(all_live.begin(), all_live.end(), size_t(0));
932  live_outs.insert(std::make_pair(
933  node, std::unordered_set<size_t>(all_live.begin(), all_live.end())));
934 
935  work_set.push_back(node);
936  while (!work_set.empty()) {
937  auto walker = work_set.back();
938  work_set.pop_back();
939  CHECK(!dynamic_cast<const RelScan*>(walker));
940  CHECK(live_outs.count(walker));
941  auto live_ins = get_live_ins(walker, live_outs);
942  CHECK_EQ(live_ins.size(), walker->inputCount());
943  for (size_t i = 0; i < walker->inputCount(); ++i) {
944  auto src = walker->getInput(i);
945  if (dynamic_cast<const RelScan*>(src) ||
946  dynamic_cast<const RelTableFunction*>(src) || live_ins[i].empty()) {
947  continue;
948  }
949  if (!live_outs.count(src)) {
950  live_outs.insert(std::make_pair(src, std::unordered_set<size_t>{}));
951  }
952  auto src_it = live_outs.find(src);
953  CHECK(src_it != live_outs.end());
954  auto& live_out = src_it->second;
955  bool changed = false;
956  if (!live_out.empty()) {
957  live_out.insert(live_ins[i].begin(), live_ins[i].end());
958  changed = true;
959  } else {
960  for (int idx : live_ins[i]) {
961  changed |= live_out.insert(idx).second;
962  }
963  }
964  if (changed) {
965  work_set.push_back(src);
966  }
967  }
968  }
969  }
970  return live_outs;
971 }
972 
973 std::string get_field_name(const RelAlgNode* node, size_t index) {
974  CHECK_LT(index, node->size());
975  if (auto scan = dynamic_cast<const RelScan*>(node)) {
976  return scan->getFieldName(index);
977  }
978  if (auto aggregate = dynamic_cast<const RelAggregate*>(node)) {
979  CHECK_EQ(aggregate->size(), aggregate->getFields().size());
980  return aggregate->getFieldName(index);
981  }
982  if (auto join = dynamic_cast<const RelJoin*>(node)) {
983  const auto lhs_size = join->getInput(0)->size();
984  if (index < lhs_size) {
985  return get_field_name(join->getInput(0), index);
986  }
987  return get_field_name(join->getInput(1), index - lhs_size);
988  }
989  if (auto project = dynamic_cast<const RelProject*>(node)) {
990  return project->getFieldName(index);
991  }
992  CHECK(dynamic_cast<const RelSort*>(node) || dynamic_cast<const RelFilter*>(node));
993  return get_field_name(node->getInput(0), index);
994 }
995 
997  std::vector<std::shared_ptr<RelAlgNode>>& nodes,
998  std::unordered_map<const RelAlgNode*, std::unordered_set<size_t>>& liveouts,
999  std::unordered_map<const RelAlgNode*, std::unordered_set<const RelAlgNode*>>&
1000  du_web) {
1001  std::vector<std::shared_ptr<RelAlgNode>> new_nodes;
1002  for (auto node : nodes) {
1003  new_nodes.push_back(node);
1004  if (!std::dynamic_pointer_cast<RelFilter>(node)) {
1005  continue;
1006  }
1007  const auto filter = node.get();
1008  auto liveout_it = liveouts.find(filter);
1009  CHECK(liveout_it != liveouts.end());
1010  auto& outs = liveout_it->second;
1011  if (!any_dead_col_in(filter, outs)) {
1012  continue;
1013  }
1014  auto usrs_it = du_web.find(filter);
1015  CHECK(usrs_it != du_web.end());
1016  auto& usrs = usrs_it->second;
1017  if (usrs.size() != 1 || does_redef_cols(*usrs.begin())) {
1018  continue;
1019  }
1020  auto only_usr = const_cast<RelAlgNode*>(*usrs.begin());
1021 
1022  std::vector<std::unique_ptr<const RexScalar>> exprs;
1023  std::vector<std::string> fields;
1024  for (size_t i = 0; i < filter->size(); ++i) {
1025  exprs.push_back(boost::make_unique<RexInput>(filter, i));
1026  fields.push_back(get_field_name(filter, i));
1027  }
1028  auto project_owner = std::make_shared<RelProject>(exprs, fields, node);
1029  auto project = project_owner.get();
1030 
1031  only_usr->replaceInput(node, project_owner);
1032  if (dynamic_cast<const RelJoin*>(only_usr)) {
1033  RexRebindInputsVisitor visitor(filter, project);
1034  for (auto usr : du_web[only_usr]) {
1035  visitor.visitNode(usr);
1036  }
1037  }
1038 
1039  liveouts.insert(std::make_pair(project, outs));
1040 
1041  usrs.clear();
1042  usrs.insert(project);
1043  du_web.insert(
1044  std::make_pair(project, std::unordered_set<const RelAlgNode*>{only_usr}));
1045 
1046  new_nodes.push_back(project_owner);
1047  }
1048  if (new_nodes.size() > nodes.size()) {
1049  nodes.swap(new_nodes);
1050  }
1051 }
1052 
1053 std::pair<std::unordered_map<const RelAlgNode*, std::unordered_map<size_t, size_t>>,
1054  std::vector<const RelAlgNode*>>
1056  const std::unordered_map<const RelAlgNode*, std::unordered_set<size_t>>& live_outs,
1057  const std::vector<std::shared_ptr<RelAlgNode>>& nodes,
1058  const std::unordered_set<const RelAlgNode*>& intact_nodes,
1059  const std::unordered_map<const RelAlgNode*, std::unordered_set<const RelAlgNode*>>&
1060  du_web,
1061  const std::unordered_map<const RelAlgNode*, size_t>& orig_node_sizes) {
1062  std::unordered_map<const RelAlgNode*, std::unordered_map<size_t, size_t>>
1063  liveouts_renumbering;
1064  std::vector<const RelAlgNode*> ready_nodes;
1065  AvailabilityChecker checker(liveouts_renumbering, intact_nodes);
1066  for (auto node : nodes) {
1067  // Ignore empty live_out due to some invalid node
1068  if (!does_redef_cols(node.get()) || intact_nodes.count(node.get())) {
1069  continue;
1070  }
1071  auto live_pair = live_outs.find(node.get());
1072  CHECK(live_pair != live_outs.end());
1073  auto old_live_outs = live_pair->second;
1075  node.get(), liveouts_renumbering, old_live_outs, intact_nodes, orig_node_sizes);
1076  if (auto aggregate = std::dynamic_pointer_cast<RelAggregate>(node)) {
1077  auto old_exprs = aggregate->getAggExprsAndRelease();
1078  std::vector<std::unique_ptr<const RexAgg>> new_exprs;
1079  auto key_name_it = aggregate->getFields().begin();
1080  std::vector<std::string> new_fields(key_name_it,
1081  key_name_it + aggregate->getGroupByCount());
1082  for (size_t i = aggregate->getGroupByCount(), j = 0;
1083  i < aggregate->getFields().size() && j < old_exprs.size();
1084  ++i, ++j) {
1085  if (old_live_outs.count(i)) {
1086  new_exprs.push_back(std::move(old_exprs[j]));
1087  new_fields.push_back(aggregate->getFieldName(i));
1088  }
1089  }
1090  aggregate->setAggExprs(new_exprs);
1091  aggregate->setFields(new_fields);
1092  } else if (auto project = std::dynamic_pointer_cast<RelProject>(node)) {
1093  auto old_exprs = project->getExpressionsAndRelease();
1094  std::vector<std::unique_ptr<const RexScalar>> new_exprs;
1095  std::vector<std::string> new_fields;
1096  for (size_t i = 0; i < old_exprs.size(); ++i) {
1097  if (old_live_outs.count(i)) {
1098  new_exprs.push_back(std::move(old_exprs[i]));
1099  new_fields.push_back(project->getFieldName(i));
1100  }
1101  }
1102  project->setExpressions(new_exprs);
1103  project->setFields(new_fields);
1104  } else {
1105  CHECK(false);
1106  }
1107  auto usrs_it = du_web.find(node.get());
1108  CHECK(usrs_it != du_web.end());
1109  for (auto usr : usrs_it->second) {
1110  if (checker.hasAllSrcReady(usr)) {
1111  ready_nodes.push_back(usr);
1112  }
1113  }
1114  }
1115  return {liveouts_renumbering, ready_nodes};
1116 }
1117 
1119  std::unordered_map<const RelAlgNode*, std::unordered_map<size_t, size_t>>&
1120  liveout_renumbering,
1121  const std::vector<const RelAlgNode*>& ready_nodes,
1122  const std::unordered_map<const RelAlgNode*, std::unordered_set<size_t>>& old_liveouts,
1123  const std::unordered_set<const RelAlgNode*>& intact_nodes,
1124  const std::unordered_map<const RelAlgNode*, std::unordered_set<const RelAlgNode*>>&
1125  du_web,
1126  const std::unordered_map<const RelAlgNode*, size_t>& orig_node_sizes) {
1127  RexInputRenumberVisitor renumberer(liveout_renumbering);
1128  AvailabilityChecker checker(liveout_renumbering, intact_nodes);
1129  std::deque<const RelAlgNode*> work_set(ready_nodes.begin(), ready_nodes.end());
1130  while (!work_set.empty()) {
1131  auto walker = work_set.front();
1132  work_set.pop_front();
1133  CHECK(!dynamic_cast<const RelScan*>(walker));
1134  auto node = const_cast<RelAlgNode*>(walker);
1135  if (auto project = dynamic_cast<RelProject*>(node)) {
1136  auto old_exprs = project->getExpressionsAndRelease();
1137  std::vector<std::unique_ptr<const RexScalar>> new_exprs;
1138  for (auto& expr : old_exprs) {
1139  new_exprs.push_back(renumberer.visit(expr.get()));
1140  }
1141  project->setExpressions(new_exprs);
1142  } else if (auto aggregate = dynamic_cast<RelAggregate*>(node)) {
1143  auto src_it = liveout_renumbering.find(node->getInput(0));
1144  CHECK(src_it != liveout_renumbering.end());
1145  auto old_exprs = aggregate->getAggExprsAndRelease();
1146  auto new_exprs = renumber_rex_aggs(old_exprs, src_it->second);
1147  aggregate->setAggExprs(new_exprs);
1148  } else if (auto join = dynamic_cast<RelJoin*>(node)) {
1149  auto new_condition = renumberer.visit(join->getCondition());
1150  join->setCondition(new_condition);
1151  } else if (auto filter = dynamic_cast<RelFilter*>(node)) {
1152  auto new_condition = renumberer.visit(filter->getCondition());
1153  filter->setCondition(new_condition);
1154  } else if (auto sort = dynamic_cast<RelSort*>(node)) {
1155  auto src_it = liveout_renumbering.find(node->getInput(0));
1156  CHECK(src_it != liveout_renumbering.end());
1157  std::vector<SortField> new_collations;
1158  for (size_t i = 0; i < sort->collationCount(); ++i) {
1159  new_collations.push_back(
1160  renumber_sort_field(sort->getCollation(i), src_it->second));
1161  }
1162  sort->setCollation(std::move(new_collations));
1163  } else if (!dynamic_cast<RelLogicalUnion*>(node)) {
1164  LOG(FATAL) << "Unhandled node type: " << node->toString();
1165  }
1166 
1167  // Ignore empty live_out due to some invalid node
1168  if (does_redef_cols(node) || intact_nodes.count(node)) {
1169  continue;
1170  }
1171  auto live_pair = old_liveouts.find(node);
1172  CHECK(live_pair != old_liveouts.end());
1173  auto live_out = live_pair->second;
1175  node, liveout_renumbering, live_out, intact_nodes, orig_node_sizes);
1176  auto usrs_it = du_web.find(walker);
1177  CHECK(usrs_it != du_web.end());
1178  for (auto usr : usrs_it->second) {
1179  if (checker.hasAllSrcReady(usr)) {
1180  work_set.push_back(usr);
1181  }
1182  }
1183  }
1184 }
1185 
1186 } // namespace
1187 
1188 void eliminate_dead_columns(std::vector<std::shared_ptr<RelAlgNode>>& nodes) noexcept {
1189  if (nodes.empty()) {
1190  return;
1191  }
1192  auto root = nodes.back().get();
1193  if (!root) {
1194  return;
1195  }
1196  CHECK(!dynamic_cast<const RelScan*>(root) && !dynamic_cast<const RelJoin*>(root));
1197  // Mark
1198  auto old_liveouts = mark_live_columns(nodes);
1199  std::unordered_set<const RelAlgNode*> intact_nodes;
1200  bool has_dead_cols = false;
1201  for (auto live_pair : old_liveouts) {
1202  auto node = live_pair.first;
1203  const auto& outs = live_pair.second;
1204  if (outs.empty()) {
1205  LOG(WARNING) << "RA node with no used column: " << node->toString();
1206  // Ignore empty live_out due to some invalid node
1207  intact_nodes.insert(node);
1208  }
1209  if (any_dead_col_in(node, outs)) {
1210  has_dead_cols = true;
1211  } else {
1212  intact_nodes.insert(node);
1213  }
1214  }
1215  if (!has_dead_cols) {
1216  return;
1217  }
1218  auto web = build_du_web(nodes);
1219  try_insert_coalesceable_proj(nodes, old_liveouts, web);
1220 
1221  for (auto node : nodes) {
1222  if (intact_nodes.count(node.get()) || does_redef_cols(node.get())) {
1223  continue;
1224  }
1225  bool intact = true;
1226  for (size_t i = 0; i < node->inputCount(); ++i) {
1227  auto source = node->getInput(i);
1228  if (!dynamic_cast<const RelScan*>(source) && !intact_nodes.count(source)) {
1229  intact = false;
1230  break;
1231  }
1232  }
1233  if (intact) {
1234  intact_nodes.insert(node.get());
1235  }
1236  }
1237 
1238  std::unordered_map<const RelAlgNode*, size_t> orig_node_sizes;
1239  for (auto node : nodes) {
1240  orig_node_sizes.insert(std::make_pair(node.get(), node->size()));
1241  }
1242  // Sweep
1243  std::unordered_map<const RelAlgNode*, std::unordered_map<size_t, size_t>>
1244  liveout_renumbering;
1245  std::vector<const RelAlgNode*> ready_nodes;
1246  std::tie(liveout_renumbering, ready_nodes) =
1247  sweep_dead_columns(old_liveouts, nodes, intact_nodes, web, orig_node_sizes);
1248  // Propagate
1250  liveout_renumbering, ready_nodes, old_liveouts, intact_nodes, web, orig_node_sizes);
1251 }
1252 
1253 void eliminate_dead_subqueries(std::vector<std::shared_ptr<RexSubQuery>>& subqueries,
1254  RelAlgNode const* root) {
1255  if (!subqueries.empty()) {
1256  auto live_ids = RexSubQueryIdCollector::getLiveRexSubQueryIds(root);
1257  auto sort_live_ids_first = [&live_ids](auto& a, auto& b) {
1258  return live_ids.count(a->getId()) && !live_ids.count(b->getId());
1259  };
1260  std::stable_sort(subqueries.begin(), subqueries.end(), sort_live_ids_first);
1261  size_t n_dead_subqueries;
1262  if (live_ids.count(subqueries.front()->getId())) {
1263  auto first_dead_itr = std::upper_bound(subqueries.cbegin(),
1264  subqueries.cend(),
1265  subqueries.front(),
1266  sort_live_ids_first);
1267  n_dead_subqueries = subqueries.cend() - first_dead_itr;
1268  } else {
1269  n_dead_subqueries = subqueries.size();
1270  }
1271  if (n_dead_subqueries) {
1272  VLOG(1) << "Eliminating " << n_dead_subqueries
1273  << (n_dead_subqueries == 1 ? " subquery." : " subqueries.");
1274  subqueries.resize(subqueries.size() - n_dead_subqueries);
1275  subqueries.shrink_to_fit();
1276  }
1277  }
1278 }
1279 
1280 namespace {
1281 
1283  public:
1284  RexInputSinker(const std::unordered_map<size_t, size_t>& old_to_new_idx,
1285  const RelAlgNode* new_src)
1286  : old_to_new_in_idx_(old_to_new_idx), target_(new_src) {}
1287 
1288  RetType visitInput(const RexInput* input) const override {
1289  CHECK_EQ(target_->inputCount(), size_t(1));
1290  CHECK_EQ(target_->getInput(0), input->getSourceNode());
1291  auto idx_it = old_to_new_in_idx_.find(input->getIndex());
1292  CHECK(idx_it != old_to_new_in_idx_.end());
1293  return boost::make_unique<RexInput>(target_, idx_it->second);
1294  }
1295 
1296  private:
1297  const std::unordered_map<size_t, size_t>& old_to_new_in_idx_;
1299 };
1300 
1302  public:
1303  SubConditionReplacer(const std::unordered_map<size_t, std::unique_ptr<const RexScalar>>&
1304  idx_to_sub_condition)
1305  : idx_to_subcond_(idx_to_sub_condition) {}
1306  RetType visitInput(const RexInput* input) const override {
1307  auto subcond_it = idx_to_subcond_.find(input->getIndex());
1308  if (subcond_it != idx_to_subcond_.end()) {
1309  return RexDeepCopyVisitor::visit(subcond_it->second.get());
1310  }
1311  return RexDeepCopyVisitor::visitInput(input);
1312  }
1313 
1314  private:
1315  const std::unordered_map<size_t, std::unique_ptr<const RexScalar>>& idx_to_subcond_;
1316 };
1317 
1318 } // namespace
1319 
1321  std::vector<std::shared_ptr<RelAlgNode>>& nodes) noexcept {
1322  auto web = build_du_web(nodes);
1323  auto liveouts = mark_live_columns(nodes);
1324  for (auto node : nodes) {
1325  auto project = std::dynamic_pointer_cast<RelProject>(node);
1326  // TODO(miyu): relax RelScan limitation
1327  if (!project || project->isSimple() ||
1328  !dynamic_cast<const RelScan*>(project->getInput(0))) {
1329  continue;
1330  }
1331  auto usrs_it = web.find(project.get());
1332  CHECK(usrs_it != web.end());
1333  auto& usrs = usrs_it->second;
1334  if (usrs.size() != 1) {
1335  continue;
1336  }
1337  auto join = dynamic_cast<RelJoin*>(const_cast<RelAlgNode*>(*usrs.begin()));
1338  if (!join) {
1339  continue;
1340  }
1341  auto outs_it = liveouts.find(join);
1342  CHECK(outs_it != liveouts.end());
1343  std::unordered_map<size_t, size_t> in_to_out_index;
1344  std::unordered_set<size_t> boolean_expr_indicies;
1345  bool discarded = false;
1346  for (size_t i = 0; i < project->size(); ++i) {
1347  auto oper = dynamic_cast<const RexOperator*>(project->getProjectAt(i));
1348  if (oper && oper->getType().get_type() == kBOOLEAN) {
1349  boolean_expr_indicies.insert(i);
1350  } else {
1351  // TODO(miyu): relax?
1352  if (auto input = dynamic_cast<const RexInput*>(project->getProjectAt(i))) {
1353  in_to_out_index.insert(std::make_pair(input->getIndex(), i));
1354  } else {
1355  discarded = true;
1356  }
1357  }
1358  }
1359  if (discarded || boolean_expr_indicies.empty()) {
1360  continue;
1361  }
1362  const size_t index_base =
1363  join->getInput(0) == project.get() ? 0 : join->getInput(0)->size();
1364  for (auto i : boolean_expr_indicies) {
1365  auto join_idx = index_base + i;
1366  if (outs_it->second.count(join_idx)) {
1367  discarded = true;
1368  break;
1369  }
1370  }
1371  if (discarded) {
1372  continue;
1373  }
1374  RexInputCollector collector(project.get());
1375  std::vector<size_t> unloaded_input_indices;
1376  std::unordered_map<size_t, std::unique_ptr<const RexScalar>> in_idx_to_new_subcond;
1377  // Given all are dead right after join, safe to sink
1378  for (auto i : boolean_expr_indicies) {
1379  auto rex_ins = collector.visit(project->getProjectAt(i));
1380  for (auto& in : rex_ins) {
1381  CHECK_EQ(in.getSourceNode(), project->getInput(0));
1382  if (!in_to_out_index.count(in.getIndex())) {
1383  auto curr_out_index = project->size() + unloaded_input_indices.size();
1384  in_to_out_index.insert(std::make_pair(in.getIndex(), curr_out_index));
1385  unloaded_input_indices.push_back(in.getIndex());
1386  }
1387  RexInputSinker sinker(in_to_out_index, project.get());
1388  in_idx_to_new_subcond.insert(
1389  std::make_pair(i, sinker.visit(project->getProjectAt(i))));
1390  }
1391  }
1392  if (in_idx_to_new_subcond.empty()) {
1393  continue;
1394  }
1395  std::vector<std::unique_ptr<const RexScalar>> new_projections;
1396  for (size_t i = 0; i < project->size(); ++i) {
1397  if (boolean_expr_indicies.count(i)) {
1398  new_projections.push_back(boost::make_unique<RexInput>(project->getInput(0), 0));
1399  } else {
1400  auto rex_input = dynamic_cast<const RexInput*>(project->getProjectAt(i));
1401  CHECK(rex_input != nullptr);
1402  new_projections.push_back(rex_input->deepCopy());
1403  }
1404  }
1405  for (auto i : unloaded_input_indices) {
1406  new_projections.push_back(boost::make_unique<RexInput>(project->getInput(0), i));
1407  }
1408  project->setExpressions(new_projections);
1409 
1410  SubConditionReplacer replacer(in_idx_to_new_subcond);
1411  auto new_condition = replacer.visit(join->getCondition());
1412  join->setCondition(new_condition);
1413  }
1414 }
1415 
1416 namespace {
1417 
1419  public:
1420  RexInputRedirector(const RelAlgNode* old_src, const RelAlgNode* new_src)
1421  : old_src_(old_src), new_src_(new_src) {}
1422 
1423  RetType visitInput(const RexInput* input) const override {
1424  CHECK_EQ(old_src_, input->getSourceNode());
1425  CHECK_NE(old_src_, new_src_);
1426  auto actual_new_src = new_src_;
1427  if (auto join = dynamic_cast<const RelJoin*>(new_src_)) {
1428  actual_new_src = join->getInput(0);
1429  CHECK_EQ(join->inputCount(), size_t(2));
1430  auto src2_input_base = actual_new_src->size();
1431  if (input->getIndex() >= src2_input_base) {
1432  actual_new_src = join->getInput(1);
1433  return boost::make_unique<RexInput>(actual_new_src,
1434  input->getIndex() - src2_input_base);
1435  }
1436  }
1437 
1438  return boost::make_unique<RexInput>(actual_new_src, input->getIndex());
1439  }
1440 
1441  private:
1444 };
1445 
1447  std::shared_ptr<const RelAlgNode> old_def_node,
1448  std::shared_ptr<const RelAlgNode> new_def_node,
1449  std::unordered_map<const RelAlgNode*, std::shared_ptr<RelAlgNode>>& deconst_mapping,
1450  std::unordered_map<const RelAlgNode*, std::unordered_set<const RelAlgNode*>>&
1451  du_web) {
1452  auto usrs_it = du_web.find(old_def_node.get());
1453  RexInputRedirector redirector(new_def_node.get(), old_def_node.get());
1454  CHECK(usrs_it != du_web.end());
1455  for (auto usr : usrs_it->second) {
1456  auto usr_it = deconst_mapping.find(usr);
1457  CHECK(usr_it != deconst_mapping.end());
1458  usr_it->second->replaceInput(old_def_node, new_def_node);
1459  }
1460  auto new_usrs_it = du_web.find(new_def_node.get());
1461  CHECK(new_usrs_it != du_web.end());
1462  new_usrs_it->second.insert(usrs_it->second.begin(), usrs_it->second.end());
1463  usrs_it->second.clear();
1464 }
1465 
1466 } // namespace
1467 
1468 void fold_filters(std::vector<std::shared_ptr<RelAlgNode>>& nodes) noexcept {
1469  std::unordered_map<const RelAlgNode*, std::shared_ptr<RelAlgNode>> deconst_mapping;
1470  for (auto node : nodes) {
1471  deconst_mapping.insert(std::make_pair(node.get(), node));
1472  }
1473 
1474  auto web = build_du_web(nodes);
1475  for (auto node_it = nodes.rbegin(); node_it != nodes.rend(); ++node_it) {
1476  auto& node = *node_it;
1477  if (auto filter = std::dynamic_pointer_cast<RelFilter>(node)) {
1478  CHECK_EQ(filter->inputCount(), size_t(1));
1479  auto src_filter = dynamic_cast<const RelFilter*>(filter->getInput(0));
1480  if (!src_filter) {
1481  continue;
1482  }
1483  auto siblings_it = web.find(src_filter);
1484  if (siblings_it == web.end() || siblings_it->second.size() != size_t(1)) {
1485  continue;
1486  }
1487  auto src_it = deconst_mapping.find(src_filter);
1488  CHECK(src_it != deconst_mapping.end());
1489  auto folded_filter = std::dynamic_pointer_cast<RelFilter>(src_it->second);
1490  CHECK(folded_filter);
1491  // TODO(miyu) : drop filter w/ only expression valued constant TRUE?
1492  if (auto rex_operator = dynamic_cast<const RexOperator*>(filter->getCondition())) {
1493  VLOG(1) << "Node ID=" << filter->getId() << " folded into "
1494  << "ID=" << folded_filter->getId();
1496  auto node_str = folded_filter->toString();
1497  constexpr size_t max_node_print_len{500};
1498  const size_t node_str_len = node_str.size();
1499  if (node_str_len > max_node_print_len) {
1500  node_str = node_str.substr(0, max_node_print_len) + "...";
1501  }
1502  VLOG(2) << "Folded Node (ID: " << folded_filter->getId()
1503  << ") contents: " << node_str;
1504  }
1505  std::vector<std::unique_ptr<const RexScalar>> operands;
1506  operands.emplace_back(folded_filter->getAndReleaseCondition());
1507  auto old_condition = dynamic_cast<const RexOperator*>(operands.back().get());
1508  CHECK(old_condition && old_condition->getType().get_type() == kBOOLEAN);
1509  RexInputRedirector redirector(folded_filter.get(), folded_filter->getInput(0));
1510  operands.push_back(redirector.visit(rex_operator));
1511  auto other_condition = dynamic_cast<const RexOperator*>(operands.back().get());
1512  CHECK(other_condition && other_condition->getType().get_type() == kBOOLEAN);
1513  const bool notnull = old_condition->getType().get_notnull() &&
1514  other_condition->getType().get_notnull();
1515  auto new_condition = std::unique_ptr<const RexScalar>(
1516  new RexOperator(kAND, operands, SQLTypeInfo(kBOOLEAN, notnull)));
1517  folded_filter->setCondition(new_condition);
1518  replace_all_usages(filter, folded_filter, deconst_mapping, web);
1519  deconst_mapping.erase(filter.get());
1520  web.erase(filter.get());
1521  web[filter->getInput(0)].erase(filter.get());
1522  node.reset();
1523  }
1524  }
1525  }
1526 
1527  if (!nodes.empty()) {
1528  auto sink = nodes.back();
1529  for (auto node_it = std::next(nodes.rbegin()); node_it != nodes.rend(); ++node_it) {
1530  if (sink) {
1531  break;
1532  }
1533  sink = *node_it;
1534  }
1535  CHECK(sink);
1536  cleanup_dead_nodes(nodes);
1537  }
1538 }
1539 
1540 std::vector<const RexScalar*> find_hoistable_conditions(const RexScalar* condition,
1541  const RelAlgNode* source,
1542  const size_t first_col_idx,
1543  const size_t last_col_idx) {
1544  if (auto rex_op = dynamic_cast<const RexOperator*>(condition)) {
1545  switch (rex_op->getOperator()) {
1546  case kAND: {
1547  std::vector<const RexScalar*> subconditions;
1548  size_t complete_subcond_count = 0;
1549  for (size_t i = 0; i < rex_op->size(); ++i) {
1550  auto conds = find_hoistable_conditions(
1551  rex_op->getOperand(i), source, first_col_idx, last_col_idx);
1552  if (conds.size() == size_t(1)) {
1553  ++complete_subcond_count;
1554  }
1555  subconditions.insert(subconditions.end(), conds.begin(), conds.end());
1556  }
1557  if (complete_subcond_count == rex_op->size()) {
1558  return {rex_op};
1559  } else {
1560  return {subconditions};
1561  }
1562  break;
1563  }
1564  case kEQ: {
1565  const auto lhs_conds = find_hoistable_conditions(
1566  rex_op->getOperand(0), source, first_col_idx, last_col_idx);
1567  const auto rhs_conds = find_hoistable_conditions(
1568  rex_op->getOperand(1), source, first_col_idx, last_col_idx);
1569  const auto lhs_in = lhs_conds.size() == 1
1570  ? dynamic_cast<const RexInput*>(*lhs_conds.begin())
1571  : nullptr;
1572  const auto rhs_in = rhs_conds.size() == 1
1573  ? dynamic_cast<const RexInput*>(*rhs_conds.begin())
1574  : nullptr;
1575  if (lhs_in && rhs_in) {
1576  return {rex_op};
1577  }
1578  return {};
1579  break;
1580  }
1581  default:
1582  break;
1583  }
1584  return {};
1585  }
1586  if (auto rex_in = dynamic_cast<const RexInput*>(condition)) {
1587  if (rex_in->getSourceNode() == source) {
1588  const auto col_idx = rex_in->getIndex();
1589  return {col_idx >= first_col_idx && col_idx <= last_col_idx ? condition : nullptr};
1590  }
1591  return {};
1592  }
1593  return {};
1594 }
1595 
1597  public:
1598  JoinTargetRebaser(const RelJoin* join, const unsigned old_base)
1599  : join_(join)
1600  , old_base_(old_base)
1601  , src1_base_(join->getInput(0)->size())
1602  , target_count_(join->size()) {}
1603  RetType visitInput(const RexInput* input) const override {
1604  auto curr_idx = input->getIndex();
1605  CHECK_GE(curr_idx, old_base_);
1606  CHECK_LT(static_cast<size_t>(curr_idx), target_count_);
1607  curr_idx -= old_base_;
1608  if (curr_idx >= src1_base_) {
1609  return boost::make_unique<RexInput>(join_->getInput(1), curr_idx - src1_base_);
1610  } else {
1611  return boost::make_unique<RexInput>(join_->getInput(0), curr_idx);
1612  }
1613  }
1614 
1615  private:
1616  const RelJoin* join_;
1617  const unsigned old_base_;
1618  const size_t src1_base_;
1619  const size_t target_count_;
1620 };
1621 
1623  public:
1624  SubConditionRemover(const std::vector<const RexScalar*> sub_conds)
1625  : sub_conditions_(sub_conds.begin(), sub_conds.end()) {}
1626  RetType visitOperator(const RexOperator* rex_operator) const override {
1627  if (sub_conditions_.count(rex_operator)) {
1628  return boost::make_unique<RexLiteral>(
1629  true, kBOOLEAN, kBOOLEAN, unsigned(-2147483648), 1, unsigned(-2147483648), 1);
1630  }
1631  return RexDeepCopyVisitor::visitOperator(rex_operator);
1632  }
1633 
1634  private:
1635  std::unordered_set<const RexScalar*> sub_conditions_;
1636 };
1637 
1639  std::vector<std::shared_ptr<RelAlgNode>>& nodes) noexcept {
1640  std::unordered_set<const RelAlgNode*> visited;
1641  auto web = build_du_web(nodes);
1642  for (auto node : nodes) {
1643  if (visited.count(node.get())) {
1644  continue;
1645  }
1646  visited.insert(node.get());
1647  auto join = dynamic_cast<RelJoin*>(node.get());
1648  if (join && join->getJoinType() == JoinType::INNER) {
1649  // Only allow cross join for now.
1650  if (auto literal = dynamic_cast<const RexLiteral*>(join->getCondition())) {
1651  // Assume Calcite always generates an inner join on constant boolean true for
1652  // cross join.
1653  CHECK(literal->getType() == kBOOLEAN && literal->getVal<bool>());
1654  size_t first_col_idx = 0;
1655  const RelFilter* filter = nullptr;
1656  std::vector<const RelJoin*> join_seq{join};
1657  for (const RelJoin* curr_join = join; !filter;) {
1658  auto usrs_it = web.find(curr_join);
1659  CHECK(usrs_it != web.end());
1660  if (usrs_it->second.size() != size_t(1)) {
1661  break;
1662  }
1663  auto only_usr = *usrs_it->second.begin();
1664  if (auto usr_join = dynamic_cast<const RelJoin*>(only_usr)) {
1665  if (join == usr_join->getInput(1)) {
1666  const auto src1_offset = usr_join->getInput(0)->size();
1667  first_col_idx += src1_offset;
1668  }
1669  join_seq.push_back(usr_join);
1670  curr_join = usr_join;
1671  continue;
1672  }
1673 
1674  filter = dynamic_cast<const RelFilter*>(only_usr);
1675  break;
1676  }
1677  if (!filter) {
1678  visited.insert(join_seq.begin(), join_seq.end());
1679  continue;
1680  }
1681  const auto src_join = dynamic_cast<const RelJoin*>(filter->getInput(0));
1682  CHECK(src_join);
1683  auto modified_filter = const_cast<RelFilter*>(filter);
1684 
1685  if (src_join == join) {
1686  std::unique_ptr<const RexScalar> filter_condition(
1687  modified_filter->getAndReleaseCondition());
1688  std::unique_ptr<const RexScalar> true_condition =
1689  boost::make_unique<RexLiteral>(true,
1690  kBOOLEAN,
1691  kBOOLEAN,
1692  unsigned(-2147483648),
1693  1,
1694  unsigned(-2147483648),
1695  1);
1696  modified_filter->setCondition(true_condition);
1697  join->setCondition(filter_condition);
1698  continue;
1699  }
1700  const auto src1_base = src_join->getInput(0)->size();
1701  auto source =
1702  first_col_idx < src1_base ? src_join->getInput(0) : src_join->getInput(1);
1703  first_col_idx =
1704  first_col_idx < src1_base ? first_col_idx : first_col_idx - src1_base;
1705  auto join_conditions =
1707  source,
1708  first_col_idx,
1709  first_col_idx + join->size() - 1);
1710  if (join_conditions.empty()) {
1711  continue;
1712  }
1713 
1714  JoinTargetRebaser rebaser(join, first_col_idx);
1715  if (join_conditions.size() == 1) {
1716  auto new_join_condition = rebaser.visit(*join_conditions.begin());
1717  join->setCondition(new_join_condition);
1718  } else {
1719  std::vector<std::unique_ptr<const RexScalar>> operands;
1720  bool notnull = true;
1721  for (size_t i = 0; i < join_conditions.size(); ++i) {
1722  operands.emplace_back(rebaser.visit(join_conditions[i]));
1723  auto old_subcond = dynamic_cast<const RexOperator*>(join_conditions[i]);
1724  CHECK(old_subcond && old_subcond->getType().get_type() == kBOOLEAN);
1725  notnull = notnull && old_subcond->getType().get_notnull();
1726  }
1727  auto new_join_condition = std::unique_ptr<const RexScalar>(
1728  new RexOperator(kAND, operands, SQLTypeInfo(kBOOLEAN, notnull)));
1729  join->setCondition(new_join_condition);
1730  }
1731 
1732  SubConditionRemover remover(join_conditions);
1733  auto new_filter_condition = remover.visit(filter->getCondition());
1734  modified_filter->setCondition(new_filter_condition);
1735  }
1736  }
1737  }
1738 }
1739 
1740 // For some reason, Calcite generates Sort, Project, Sort sequences where the
1741 // two Sort nodes are identical and the Project is identity. Simplify this
1742 // pattern by re-binding the input of the second sort to the input of the first.
1743 void simplify_sort(std::vector<std::shared_ptr<RelAlgNode>>& nodes) noexcept {
1744  if (nodes.size() < 3) {
1745  return;
1746  }
1747  for (size_t i = 0; i <= nodes.size() - 3;) {
1748  auto first_sort = std::dynamic_pointer_cast<RelSort>(nodes[i]);
1749  const auto project = std::dynamic_pointer_cast<const RelProject>(nodes[i + 1]);
1750  auto second_sort = std::dynamic_pointer_cast<RelSort>(nodes[i + 2]);
1751  if (first_sort && second_sort && project && project->isIdentity() &&
1752  *first_sort == *second_sort) {
1753  second_sort->replaceInput(second_sort->getAndOwnInput(0),
1754  first_sort->getAndOwnInput(0));
1755  nodes[i].reset();
1756  nodes[i + 1].reset();
1757  i += 3;
1758  } else {
1759  ++i;
1760  }
1761  }
1762 
1763  std::vector<std::shared_ptr<RelAlgNode>> new_nodes;
1764  for (auto node : nodes) {
1765  if (!node) {
1766  continue;
1767  }
1768  new_nodes.push_back(node);
1769  }
1770  nodes.swap(new_nodes);
1771 }
DEVICE auto upper_bound(ARGS &&...args)
Definition: gpu_enabled.h:123
bool is_identical_copy(const RelProject *project, const std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * >> &du_web, const std::unordered_set< const RelProject * > &projects_to_remove, std::unordered_set< const RelProject * > &permutating_projects)
SubConditionReplacer(const std::unordered_map< size_t, std::unique_ptr< const RexScalar >> &idx_to_sub_condition)
std::vector< const RexScalar * > find_hoistable_conditions(const RexScalar *condition, const RelAlgNode *source, const size_t first_col_idx, const size_t last_col_idx)
std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * > > build_du_web(const std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
RetType visitInput(const RexInput *input) const override
#define CHECK_EQ(x, y)
Definition: Logger.h:219
const size_t target_count_
RexInputRedirector(const RelAlgNode *old_src, const RelAlgNode *new_src)
size_t get_actual_source_size(const RelProject *curr_project, const std::unordered_set< const RelProject * > &projects_to_remove)
size_t pick_always_live_col_idx(const RelAlgNode *node)
void redirect_inputs_of(std::shared_ptr< RelAlgNode > node, const std::unordered_set< const RelProject * > &projects, const std::unordered_set< const RelProject * > &permutating_projects, const std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * >> &du_web)
const std::unordered_map< const RelAlgNode *, std::unordered_map< size_t, size_t > > & node_to_input_renum_
void hoist_filter_cond_to_cross_join(std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
RexInputSinker(const std::unordered_map< size_t, size_t > &old_to_new_idx, const RelAlgNode *new_src)
AvailabilityChecker(const std::unordered_map< const RelAlgNode *, std::unordered_map< size_t, size_t >> &liveouts, const std::unordered_set< const RelAlgNode * > &intact_nodes)
size_t size() const override
void sink_projected_boolean_expr_to_join(std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
RetType visitInput(const RexInput *input) const override
void eliminate_identical_copy(std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
void setCondition(std::unique_ptr< const RexScalar > &condition)
#define LOG(tag)
Definition: Logger.h:205
SortDirection getSortDir() const
bool does_redef_cols(const RelAlgNode *node)
const RexScalar * getCondition() const
RetType visitOperator(const RexOperator *rex_operator) const override
Definition: RexVisitor.h:153
std::string join(T const &container, std::string const &delim)
void propagate_input_renumbering(std::unordered_map< const RelAlgNode *, std::unordered_map< size_t, size_t >> &liveout_renumbering, const std::vector< const RelAlgNode * > &ready_nodes, const std::unordered_map< const RelAlgNode *, std::unordered_set< size_t >> &old_liveouts, const std::unordered_set< const RelAlgNode * > &intact_nodes, const std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * >> &du_web, const std::unordered_map< const RelAlgNode *, size_t > &orig_node_sizes)
void * visitInput(const RexInput *rex_input) const override
DEVICE void sort(ARGS &&...args)
Definition: gpu_enabled.h:105
#define CHECK_GE(x, y)
Definition: Logger.h:224
size_t getField() const
RetType visitOperator(const RexOperator *rex_operator) const override
Definition: sqldefs.h:30
tuple root
Definition: setup.in.py:14
std::unordered_set< const RelProject * > get_visible_projects(const RelAlgNode *root)
#define CHECK_GT(x, y)
Definition: Logger.h:223
const std::unordered_set< const RelProject * > & crt_projects_
bool project_separates_sort(const RelProject *project, const RelAlgNode *next_node)
std::unordered_map< const RelAlgNode *, std::unordered_set< size_t > > mark_live_columns(std::vector< std::shared_ptr< RelAlgNode >> &nodes)
bool safe_to_redirect(const RelProject *project, const std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * >> &du_web)
void simplify_sort(std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
constexpr double a
Definition: Utm.h:33
virtual T visit(const RexScalar *rex_scalar) const
Definition: RexVisitor.h:27
const std::unordered_map< const RelAlgNode *, std::unordered_map< size_t, size_t > > & liveouts_
unsigned getIndex() const
std::vector< std::unordered_set< size_t > > get_live_ins(const RelAlgNode *node, const std::unordered_map< const RelAlgNode *, std::unordered_set< size_t >> &live_outs)
RetType visitInput(const RexInput *input) const override
void cleanup_dead_nodes(std::vector< std::shared_ptr< RelAlgNode >> &nodes)
RetType visitInput(const RexInput *input) const override
bool fast_logging_check(Channel)
Definition: Logger.h:185
#define CHECK_NE(x, y)
Definition: Logger.h:220
NullSortedPosition getNullsPosition() const
static Ids getLiveRexSubQueryIds(RelAlgNode const *)
const std::unordered_map< size_t, size_t > & old_to_new_in_idx_
const RelAlgNode * getInput(const size_t idx) const
Definition: sqldefs.h:37
bool isSimple() const
RetType visitInput(const RexInput *input) const override
Definition: RexVisitor.h:141
virtual void replaceInput(std::shared_ptr< const RelAlgNode > old_input, std::shared_ptr< const RelAlgNode > input)
RetType aggregateResult(const RetType &aggregate, const RetType &next_result) const override
std::unique_ptr< RexInput > deepCopy() const
bool any_dead_col_in(const RelAlgNode *node, const std::unordered_set< size_t > &live_outs)
const std::unordered_set< const RelAlgNode * > & intact_nodes_
RexRebindInputsVisitor(const RelAlgNode *old_input, const RelAlgNode *new_input)
const RexScalar * getProjectAt(const size_t idx) const
#define CHECK_LT(x, y)
Definition: Logger.h:221
void setSourceNode(const RelAlgNode *node) const
DEVICE void iota(ARGS &&...args)
Definition: gpu_enabled.h:69
RetType visitInput(const RexInput *input) const override
virtual size_t size() const =0
const RelAlgNode * getSourceNode() const
RetType visitInput(const RexInput *input) const override
void try_insert_coalesceable_proj(std::vector< std::shared_ptr< RelAlgNode >> &nodes, std::unordered_map< const RelAlgNode *, std::unordered_set< size_t >> &liveouts, std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * >> &du_web)
std::vector< std::unique_ptr< const RexAgg > > renumber_rex_aggs(std::vector< std::unique_ptr< const RexAgg >> &agg_exprs, const std::unordered_map< size_t, size_t > &new_numbering)
SortField renumber_sort_field(const SortField &old_field, const std::unordered_map< size_t, size_t > &new_numbering)
virtual std::string toString() const =0
#define CHECK(condition)
Definition: Logger.h:211
SubConditionRemover(const std::vector< const RexScalar * > sub_conds)
std::unordered_set< const RexScalar * > sub_conditions_
const RelJoin * join_
void propagate_rex_input_renumber(const RelFilter *excluded_root, const std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * >> &du_web)
void add_new_indices_for(const RelAlgNode *node, std::unordered_map< const RelAlgNode *, std::unordered_map< size_t, size_t >> &new_liveouts, const std::unordered_set< size_t > &old_liveouts, const std::unordered_set< const RelAlgNode * > &intact_nodes, const std::unordered_map< const RelAlgNode *, size_t > &orig_node_sizes)
std::string get_field_name(const RelAlgNode *node, size_t index)
void fold_filters(std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
const std::unordered_map< size_t, std::unique_ptr< const RexScalar > > & idx_to_subcond_
const size_t inputCount() const
void eliminate_dead_subqueries(std::vector< std::shared_ptr< RexSubQuery >> &subqueries, RelAlgNode const *root)
void replace_all_usages(std::shared_ptr< const RelAlgNode > old_def_node, std::shared_ptr< const RelAlgNode > new_def_node, std::unordered_map< const RelAlgNode *, std::shared_ptr< RelAlgNode >> &deconst_mapping, std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * >> &du_web)
const unsigned old_base_
RexInputRenumberVisitor(const std::unordered_map< const RelAlgNode *, std::unordered_map< size_t, size_t >> &new_numbering)
bool is_distinct(const size_t input_idx, const RelAlgNode *node)
std::pair< std::unordered_map< const RelAlgNode *, std::unordered_map< size_t, size_t > >, std::vector< const RelAlgNode * > > sweep_dead_columns(const std::unordered_map< const RelAlgNode *, std::unordered_set< size_t >> &live_outs, const std::vector< std::shared_ptr< RelAlgNode >> &nodes, const std::unordered_set< const RelAlgNode * > &intact_nodes, const std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * >> &du_web, const std::unordered_map< const RelAlgNode *, size_t > &orig_node_sizes)
JoinTargetRebaser(const RelJoin *join, const unsigned old_base)
RexProjectInputRedirector(const std::unordered_set< const RelProject * > &crt_inputs)
DEVICE void swap(ARGS &&...args)
Definition: gpu_enabled.h:114
std::unique_ptr< const RexScalar > RetType
Definition: RexVisitor.h:139
#define VLOG(n)
Definition: Logger.h:305
void eliminate_dead_columns(std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
RetType visitInput(const RexInput *input) const override