OmniSciDB  c07336695a
RelAlgOptimizer.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2017 MapD Technologies, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "RelAlgOptimizer.h"
18 #include "RexVisitor.h"
19 #include "Shared/Logger.h"
20 
21 #include <numeric>
22 #include <string>
23 #include <unordered_map>
24 
25 namespace {
26 
28  public:
29  RexProjectInputRedirector(const std::unordered_set<const RelProject*>& crt_inputs)
30  : crt_projects_(crt_inputs) {}
31 
32  RetType visitInput(const RexInput* input) const override {
33  auto source = dynamic_cast<const RelProject*>(input->getSourceNode());
34  if (!source || !crt_projects_.count(source)) {
35  return input->deepCopy();
36  }
37  auto new_source = source->getInput(0);
38  auto new_input =
39  dynamic_cast<const RexInput*>(source->getProjectAt(input->getIndex()));
40  if (!new_input) {
41  return input->deepCopy();
42  }
43  if (auto join = dynamic_cast<const RelJoin*>(new_source)) {
44  CHECK(new_input->getSourceNode() == join->getInput(0) ||
45  new_input->getSourceNode() == join->getInput(1));
46  } else {
47  CHECK_EQ(new_input->getSourceNode(), new_source);
48  }
49  return new_input->deepCopy();
50  }
51 
52  private:
53  const std::unordered_set<const RelProject*>& crt_projects_;
54 };
55 
56 class RexRebindInputsVisitor : public RexVisitor<void*> {
57  public:
58  RexRebindInputsVisitor(const RelAlgNode* old_input, const RelAlgNode* new_input)
59  : old_input_(old_input), new_input_(new_input) {}
60 
61  void* visitInput(const RexInput* rex_input) const override {
62  const auto old_source = rex_input->getSourceNode();
63  if (old_source == old_input_) {
64  rex_input->setSourceNode(new_input_);
65  }
66  return nullptr;
67  };
68 
69  void visitNode(const RelAlgNode* node) const {
70  if (dynamic_cast<const RelAggregate*>(node) || dynamic_cast<const RelSort*>(node)) {
71  return;
72  }
73  if (auto join = dynamic_cast<const RelJoin*>(node)) {
74  if (auto condition = join->getCondition()) {
75  visit(condition);
76  }
77  return;
78  }
79  if (auto project = dynamic_cast<const RelProject*>(node)) {
80  for (size_t i = 0; i < project->size(); ++i) {
81  visit(project->getProjectAt(i));
82  }
83  return;
84  }
85  if (auto filter = dynamic_cast<const RelFilter*>(node)) {
86  visit(filter->getCondition());
87  return;
88  }
89  CHECK(false);
90  }
91 
92  private:
95 };
96 
98  const RelProject* curr_project,
99  const std::unordered_set<const RelProject*>& projects_to_remove) {
100  auto source = curr_project->getInput(0);
101  while (auto filter = dynamic_cast<const RelFilter*>(source)) {
102  source = filter->getInput(0);
103  }
104  if (auto src_project = dynamic_cast<const RelProject*>(source)) {
105  if (projects_to_remove.count(src_project)) {
106  return get_actual_source_size(src_project, projects_to_remove);
107  }
108  }
109  return curr_project->getInput(0)->size();
110 }
111 
113  const RelProject* project,
114  const std::unordered_map<const RelAlgNode*, std::unordered_set<const RelAlgNode*>>&
115  du_web) {
116  if (!project->isSimple()) {
117  return false;
118  }
119  auto usrs_it = du_web.find(project);
120  CHECK(usrs_it != du_web.end());
121  for (auto usr : usrs_it->second) {
122  if (!dynamic_cast<const RelProject*>(usr) && !dynamic_cast<const RelFilter*>(usr)) {
123  return false;
124  }
125  }
126  return true;
127 }
128 
130  const RelProject* project,
131  const std::unordered_map<const RelAlgNode*, std::unordered_set<const RelAlgNode*>>&
132  du_web,
133  const std::unordered_set<const RelProject*>& projects_to_remove,
134  std::unordered_set<const RelProject*>& permutating_projects) {
135  auto source_size = get_actual_source_size(project, projects_to_remove);
136  if (project->size() > source_size) {
137  return false;
138  }
139 
140  if (project->size() < source_size) {
141  auto usrs_it = du_web.find(project);
142  CHECK(usrs_it != du_web.end());
143  bool guard_found = false;
144  while (usrs_it->second.size() == size_t(1)) {
145  auto only_usr = *usrs_it->second.begin();
146  if (dynamic_cast<const RelProject*>(only_usr)) {
147  guard_found = true;
148  break;
149  }
150  if (dynamic_cast<const RelAggregate*>(only_usr) ||
151  dynamic_cast<const RelSort*>(only_usr) ||
152  dynamic_cast<const RelJoin*>(only_usr)) {
153  return false;
154  }
155  CHECK(dynamic_cast<const RelFilter*>(only_usr));
156  usrs_it = du_web.find(only_usr);
157  CHECK(usrs_it != du_web.end());
158  }
159 
160  if (!guard_found) {
161  return false;
162  }
163  }
164 
165  bool identical = true;
166  for (size_t i = 0; i < project->size(); ++i) {
167  auto target = dynamic_cast<const RexInput*>(project->getProjectAt(i));
168  CHECK(target);
169  if (i != target->getIndex()) {
170  identical = false;
171  break;
172  }
173  }
174 
175  if (identical) {
176  return true;
177  }
178 
179  if (safe_to_redirect(project, du_web)) {
180  permutating_projects.insert(project);
181  return true;
182  }
183 
184  return false;
185 }
186 
188  const RelFilter* excluded_root,
189  const std::unordered_map<const RelAlgNode*, std::unordered_set<const RelAlgNode*>>&
190  du_web,
191  const std::unordered_map<const RelAlgNode*, RelAlgNode*>& deconst_mapping) {
192  CHECK(excluded_root);
193  auto src_project = dynamic_cast<const RelProject*>(excluded_root->getInput(0));
194  CHECK(src_project && src_project->isSimple());
195  const auto indirect_join_src = dynamic_cast<const RelJoin*>(src_project->getInput(0));
196  std::unordered_map<size_t, size_t> old_to_new_idx;
197  for (size_t i = 0; i < src_project->size(); ++i) {
198  auto rex_in = dynamic_cast<const RexInput*>(src_project->getProjectAt(i));
199  CHECK(rex_in);
200  size_t src_base = 0;
201  if (indirect_join_src != nullptr &&
202  indirect_join_src->getInput(1) == rex_in->getSourceNode()) {
203  src_base = indirect_join_src->getInput(0)->size();
204  }
205  old_to_new_idx.insert(std::make_pair(i, src_base + rex_in->getIndex()));
206  old_to_new_idx.insert(std::make_pair(i, rex_in->getIndex()));
207  }
208  CHECK(old_to_new_idx.size());
209  RexInputRenumber<false> renumber(old_to_new_idx);
210  auto usrs_it = du_web.find(excluded_root);
211  CHECK(usrs_it != du_web.end());
212  std::vector<const RelAlgNode*> work_set(usrs_it->second.begin(), usrs_it->second.end());
213  while (!work_set.empty()) {
214  auto node = work_set.back();
215  work_set.pop_back();
216  auto node_it = deconst_mapping.find(node);
217  CHECK(node_it != deconst_mapping.end());
218  auto modified_node = node_it->second;
219  if (auto filter = dynamic_cast<RelFilter*>(modified_node)) {
220  auto new_condition = renumber.visit(filter->getCondition());
221  filter->setCondition(new_condition);
222  auto usrs_it = du_web.find(filter);
223  CHECK(usrs_it != du_web.end() && usrs_it->second.size() == 1);
224  work_set.push_back(*usrs_it->second.begin());
225  continue;
226  }
227  if (auto project = dynamic_cast<RelProject*>(modified_node)) {
228  std::vector<std::unique_ptr<const RexScalar>> new_exprs;
229  for (size_t i = 0; i < project->size(); ++i) {
230  new_exprs.push_back(renumber.visit(project->getProjectAt(i)));
231  }
232  project->setExpressions(new_exprs);
233  continue;
234  }
235  CHECK(false);
236  }
237 }
238 
240  std::shared_ptr<RelAlgNode> node,
241  const std::unordered_set<const RelProject*>& projects,
242  const std::unordered_set<const RelProject*>& permutating_projects,
243  const std::unordered_map<const RelAlgNode*, std::unordered_set<const RelAlgNode*>>&
244  du_web,
245  const std::unordered_map<const RelAlgNode*, RelAlgNode*>& deconst_mapping) {
246  std::shared_ptr<const RelProject> src_project = nullptr;
247  for (size_t i = 0; i < node->inputCount(); ++i) {
248  if (auto project =
249  std::dynamic_pointer_cast<const RelProject>(node->getAndOwnInput(i))) {
250  if (projects.count(project.get())) {
251  src_project = project;
252  break;
253  }
254  }
255  }
256  if (!src_project) {
257  return;
258  }
259  if (auto join = std::dynamic_pointer_cast<RelJoin>(node)) {
260  auto other_project =
261  src_project == node->getAndOwnInput(0)
262  ? std::dynamic_pointer_cast<const RelProject>(node->getAndOwnInput(1))
263  : std::dynamic_pointer_cast<const RelProject>(node->getAndOwnInput(0));
264  join->replaceInput(src_project, src_project->getAndOwnInput(0));
265  RexRebindInputsVisitor rebinder(src_project.get(), src_project->getInput(0));
266  auto usrs_it = du_web.find(join.get());
267  CHECK(usrs_it != du_web.end());
268  for (auto usr : usrs_it->second) {
269  rebinder.visitNode(usr);
270  }
271 
272  if (other_project && projects.count(other_project.get())) {
273  join->replaceInput(other_project, other_project->getAndOwnInput(0));
274  RexRebindInputsVisitor other_rebinder(other_project.get(),
275  other_project->getInput(0));
276  for (auto usr : usrs_it->second) {
277  other_rebinder.visitNode(usr);
278  }
279  }
280  return;
281  }
282  if (auto project = std::dynamic_pointer_cast<RelProject>(node)) {
283  project->RelAlgNode::replaceInput(src_project, src_project->getAndOwnInput(0));
284  RexProjectInputRedirector redirector(projects);
285  std::vector<std::unique_ptr<const RexScalar>> new_exprs;
286  for (size_t i = 0; i < project->size(); ++i) {
287  new_exprs.push_back(redirector.visit(project->getProjectAt(i)));
288  }
289  project->setExpressions(new_exprs);
290  return;
291  }
292  if (auto filter = std::dynamic_pointer_cast<RelFilter>(node)) {
293  const bool is_permutating_proj = permutating_projects.count(src_project.get());
294  if (is_permutating_proj || dynamic_cast<const RelJoin*>(src_project->getInput(0))) {
295  if (is_permutating_proj) {
296  propagate_rex_input_renumber(filter.get(), du_web, deconst_mapping);
297  }
298  filter->RelAlgNode::replaceInput(src_project, src_project->getAndOwnInput(0));
299  RexProjectInputRedirector redirector(projects);
300  auto new_condition = redirector.visit(filter->getCondition());
301  filter->setCondition(new_condition);
302  } else {
303  filter->replaceInput(src_project, src_project->getAndOwnInput(0));
304  }
305  return;
306  }
307  if (std::dynamic_pointer_cast<RelSort>(node) &&
308  dynamic_cast<const RelScan*>(src_project->getInput(0))) {
309  return;
310  }
311  if (std::dynamic_pointer_cast<RelModify>(node)) {
312  return; // NOTE: Review this. Not sure about this.
313  }
314  CHECK(std::dynamic_pointer_cast<RelAggregate>(node) ||
315  std::dynamic_pointer_cast<RelSort>(node));
316  node->replaceInput(src_project, src_project->getAndOwnInput(0));
317 }
318 
319 void cleanup_dead_nodes(std::vector<std::shared_ptr<RelAlgNode>>& nodes) {
320  for (auto nodeIt = nodes.rbegin(); nodeIt != nodes.rend(); ++nodeIt) {
321  if (nodeIt->unique()) {
322  LOG(INFO) << "ID=" << (*nodeIt)->getId() << " " << (*nodeIt)->toString()
323  << " deleted!";
324  nodeIt->reset();
325  }
326  }
327 
328  std::vector<std::shared_ptr<RelAlgNode>> new_nodes;
329  for (auto node : nodes) {
330  if (!node) {
331  continue;
332  }
333  new_nodes.push_back(node);
334  }
335  nodes.swap(new_nodes);
336 }
337 
338 std::unordered_set<const RelProject*> get_visible_projects(const RelAlgNode* root) {
339  if (auto project = dynamic_cast<const RelProject*>(root)) {
340  return {project};
341  }
342 
343  if (dynamic_cast<const RelAggregate*>(root) || dynamic_cast<const RelScan*>(root) ||
344  dynamic_cast<const RelLogicalValues*>(root) ||
345  dynamic_cast<const RelModify*>(root)) {
346  return std::unordered_set<const RelProject*>{};
347  }
348 
349  if (auto join = dynamic_cast<const RelJoin*>(root)) {
350  auto lhs_projs = get_visible_projects(join->getInput(0));
351  auto rhs_projs = get_visible_projects(join->getInput(1));
352  lhs_projs.insert(rhs_projs.begin(), rhs_projs.end());
353  return lhs_projs;
354  }
355 
356  CHECK(dynamic_cast<const RelFilter*>(root) || dynamic_cast<const RelSort*>(root));
357  return get_visible_projects(root->getInput(0));
358 }
359 
360 // TODO(miyu): checking this at runtime is more accurate
361 bool is_distinct(const size_t input_idx, const RelAlgNode* node) {
362  if (dynamic_cast<const RelFilter*>(node) || dynamic_cast<const RelSort*>(node)) {
363  CHECK_EQ(size_t(1), node->inputCount());
364  return is_distinct(input_idx, node->getInput(0));
365  }
366  if (auto aggregate = dynamic_cast<const RelAggregate*>(node)) {
367  CHECK_EQ(size_t(1), node->inputCount());
368  if (aggregate->getGroupByCount() == 1 && !input_idx) {
369  return true;
370  }
371  if (input_idx < aggregate->getGroupByCount()) {
372  return is_distinct(input_idx, node->getInput(0));
373  }
374  return false;
375  }
376  if (auto project = dynamic_cast<const RelProject*>(node)) {
377  CHECK_LT(input_idx, project->size());
378  if (auto input = dynamic_cast<const RexInput*>(project->getProjectAt(input_idx))) {
379  CHECK_EQ(size_t(1), node->inputCount());
380  return is_distinct(input->getIndex(), project->getInput(0));
381  }
382  return false;
383  }
384  CHECK(dynamic_cast<const RelJoin*>(node) || dynamic_cast<const RelScan*>(node));
385  return false;
386 }
387 
388 } // namespace
389 
390 std::unordered_map<const RelAlgNode*, std::unordered_set<const RelAlgNode*>> build_du_web(
391  const std::vector<std::shared_ptr<RelAlgNode>>& nodes) noexcept {
392  std::unordered_map<const RelAlgNode*, std::unordered_set<const RelAlgNode*>> web;
393  std::unordered_set<const RelAlgNode*> visited;
394  std::vector<const RelAlgNode*> work_set;
395  for (auto node : nodes) {
396  if (std::dynamic_pointer_cast<RelScan>(node) ||
397  std::dynamic_pointer_cast<RelModify>(node) || visited.count(node.get())) {
398  continue;
399  }
400  work_set.push_back(node.get());
401  while (!work_set.empty()) {
402  auto walker = work_set.back();
403  work_set.pop_back();
404  if (visited.count(walker)) {
405  continue;
406  }
407  CHECK(!web.count(walker));
408  auto it_ok =
409  web.insert(std::make_pair(walker, std::unordered_set<const RelAlgNode*>{}));
410  CHECK(it_ok.second);
411  visited.insert(walker);
412  const auto join = dynamic_cast<const RelJoin*>(walker);
413  const auto project = dynamic_cast<const RelProject*>(walker);
414  const auto aggregate = dynamic_cast<const RelAggregate*>(walker);
415  const auto filter = dynamic_cast<const RelFilter*>(walker);
416  const auto sort = dynamic_cast<const RelSort*>(walker);
417  const auto left_deep_join = dynamic_cast<const RelLeftDeepInnerJoin*>(walker);
418  const auto logical_values = dynamic_cast<const RelLogicalValues*>(walker);
419  CHECK(join || project || aggregate || filter || sort || left_deep_join ||
420  logical_values);
421  for (size_t i = 0; i < walker->inputCount(); ++i) {
422  auto src = walker->getInput(i);
423  if (dynamic_cast<const RelScan*>(src) || dynamic_cast<const RelModify*>(src)) {
424  continue;
425  }
426  if (web.empty() || !web.count(src)) {
427  web.insert(std::make_pair(src, std::unordered_set<const RelAlgNode*>{}));
428  }
429  web[src].insert(walker);
430  work_set.push_back(src);
431  }
432  }
433  }
434  return web;
435 }
436 
437 // For now, the only target to eliminate is restricted to project-aggregate pair between
438 // scan/sort and join
439 // TODO(miyu): allow more chance if proved safe
440 void eliminate_identical_copy(std::vector<std::shared_ptr<RelAlgNode>>& nodes) noexcept {
441  std::unordered_set<std::shared_ptr<const RelAlgNode>> copies;
442  auto sink = nodes.back();
443  for (auto node : nodes) {
444  auto aggregate = std::dynamic_pointer_cast<const RelAggregate>(node);
445  if (!aggregate || aggregate == sink ||
446  !(aggregate->getGroupByCount() == 1 && aggregate->getAggExprsCount() == 0)) {
447  continue;
448  }
449  auto project =
450  std::dynamic_pointer_cast<const RelProject>(aggregate->getAndOwnInput(0));
451  if (project && project->size() == aggregate->size() &&
452  project->getFields() == aggregate->getFields()) {
453  CHECK_EQ(size_t(0), copies.count(aggregate));
454  copies.insert(aggregate);
455  }
456  }
457  for (auto node : nodes) {
458  if (!node->inputCount()) {
459  continue;
460  }
461  auto last_source = node->getAndOwnInput(node->inputCount() - 1);
462  if (!copies.count(last_source)) {
463  continue;
464  }
465  auto aggregate = std::dynamic_pointer_cast<const RelAggregate>(last_source);
466  CHECK(aggregate);
467  if (!std::dynamic_pointer_cast<const RelJoin>(node) || aggregate->size() != 1) {
468  continue;
469  }
470  auto project =
471  std::dynamic_pointer_cast<const RelProject>(aggregate->getAndOwnInput(0));
472  CHECK(project);
473  CHECK_EQ(size_t(1), project->size());
474  if (!is_distinct(size_t(0), project.get())) {
475  continue;
476  }
477  auto new_source = project->getAndOwnInput(0);
478  if (std::dynamic_pointer_cast<const RelSort>(new_source) ||
479  std::dynamic_pointer_cast<const RelScan>(new_source)) {
480  node->replaceInput(last_source, new_source);
481  }
482  }
483  decltype(copies)().swap(copies);
484 
485  auto web = build_du_web(nodes);
486 
487  std::unordered_map<const RelAlgNode*, RelAlgNode*> deconst_mapping;
488  for (auto node : nodes) {
489  deconst_mapping.insert(std::make_pair(node.get(), node.get()));
490  }
491 
492  std::unordered_set<const RelProject*> projects;
493  std::unordered_set<const RelProject*> permutating_projects;
494  auto visible_projs = get_visible_projects(nodes.back().get());
495  for (auto node : nodes) {
496  auto project = std::dynamic_pointer_cast<RelProject>(node);
497  if (project && project->isSimple() &&
498  (!visible_projs.count(project.get()) || !project->isRenaming()) &&
499  is_identical_copy(project.get(), web, projects, permutating_projects)) {
500  projects.insert(project.get());
501  }
502  }
503 
504  for (auto node : nodes) {
505  redirect_inputs_of(node, projects, permutating_projects, web, deconst_mapping);
506  }
507 
508  cleanup_dead_nodes(nodes);
509 }
510 
511 namespace {
512 
513 class RexInputCollector : public RexVisitor<std::unordered_set<RexInput>> {
514  private:
516 
517  protected:
518  using RetType = std::unordered_set<RexInput>;
519  RetType aggregateResult(const RetType& aggregate,
520  const RetType& next_result) const override {
521  RetType result(aggregate.begin(), aggregate.end());
522  result.insert(next_result.begin(), next_result.end());
523  return result;
524  }
525 
526  public:
527  RexInputCollector(const RelAlgNode* node) : node_(node) {}
528  RetType visitInput(const RexInput* input) const override {
529  RetType result;
530  if (node_->inputCount() == 1) {
531  auto src = node_->getInput(0);
532  if (auto join = dynamic_cast<const RelJoin*>(src)) {
533  CHECK_EQ(join->inputCount(), size_t(2));
534  const auto src2_in_offset = join->getInput(0)->size();
535  if (input->getSourceNode() == join->getInput(1)) {
536  result.emplace(src, input->getIndex() + src2_in_offset);
537  } else {
538  result.emplace(src, input->getIndex());
539  }
540  return result;
541  }
542  }
543  result.insert(*input);
544  return result;
545  }
546 };
547 
549  CHECK(node->size());
550  RexInputCollector collector(node);
551  if (auto filter = dynamic_cast<const RelFilter*>(node)) {
552  auto rex_ins = collector.visit(filter->getCondition());
553  if (!rex_ins.empty()) {
554  return static_cast<size_t>(rex_ins.begin()->getIndex());
555  }
556  return pick_always_live_col_idx(filter->getInput(0));
557  } else if (auto join = dynamic_cast<const RelJoin*>(node)) {
558  auto rex_ins = collector.visit(join->getCondition());
559  if (!rex_ins.empty()) {
560  return static_cast<size_t>(rex_ins.begin()->getIndex());
561  }
562  if (auto lhs_idx = pick_always_live_col_idx(join->getInput(0))) {
563  return lhs_idx;
564  }
565  if (auto rhs_idx = pick_always_live_col_idx(join->getInput(0))) {
566  return rhs_idx + join->getInput(0)->size();
567  }
568  } else if (auto sort = dynamic_cast<const RelSort*>(node)) {
569  if (sort->collationCount()) {
570  return sort->getCollation(0).getField();
571  }
572  return pick_always_live_col_idx(sort->getInput(0));
573  }
574  return size_t(0);
575 }
576 
577 std::vector<std::unordered_set<size_t>> get_live_ins(
578  const RelAlgNode* node,
579  const std::unordered_map<const RelAlgNode*, std::unordered_set<size_t>>& live_outs) {
580  if (!node || dynamic_cast<const RelScan*>(node)) {
581  return {};
582  }
583  RexInputCollector collector(node);
584  auto it = live_outs.find(node);
585  CHECK(it != live_outs.end());
586  auto live_out = it->second;
587  if (auto project = dynamic_cast<const RelProject*>(node)) {
588  CHECK_EQ(size_t(1), project->inputCount());
589  std::unordered_set<size_t> live_in;
590  for (const auto& idx : live_out) {
591  CHECK_LT(idx, project->size());
592  auto partial_in = collector.visit(project->getProjectAt(idx));
593  for (auto rex_in : partial_in) {
594  live_in.insert(rex_in.getIndex());
595  }
596  }
597  if (project->size() == 1 &&
598  dynamic_cast<const RexLiteral*>(project->getProjectAt(0))) {
599  CHECK(live_in.empty());
600  live_in.insert(pick_always_live_col_idx(project->getInput(0)));
601  }
602  return {live_in};
603  }
604  if (auto aggregate = dynamic_cast<const RelAggregate*>(node)) {
605  CHECK_EQ(size_t(1), aggregate->inputCount());
606  const auto group_key_count = static_cast<size_t>(aggregate->getGroupByCount());
607  const auto agg_expr_count = static_cast<size_t>(aggregate->getAggExprsCount());
608  std::unordered_set<size_t> live_in;
609  for (size_t i = 0; i < group_key_count; ++i) {
610  live_in.insert(i);
611  }
612  bool has_count_star_only{false};
613  for (const auto& idx : live_out) {
614  if (idx < group_key_count) {
615  continue;
616  }
617  const auto agg_idx = idx - group_key_count;
618  CHECK_LT(agg_idx, agg_expr_count);
619  const auto& cur_agg_expr = aggregate->getAggExprs()[agg_idx];
620  const auto n_operands = cur_agg_expr->size();
621  for (size_t i = 0; i < n_operands; ++i) {
622  live_in.insert(static_cast<size_t>(cur_agg_expr->getOperand(i)));
623  }
624  if (n_operands == 0) {
625  has_count_star_only = true;
626  }
627  }
628  if (has_count_star_only && !group_key_count) {
629  live_in.insert(size_t(0));
630  }
631  return {live_in};
632  }
633  if (auto join = dynamic_cast<const RelJoin*>(node)) {
634  std::unordered_set<size_t> lhs_live_ins;
635  std::unordered_set<size_t> rhs_live_ins;
636  CHECK_EQ(size_t(2), join->inputCount());
637  auto lhs = join->getInput(0);
638  auto rhs = join->getInput(1);
639  const auto rhs_idx_base = lhs->size();
640  for (const auto idx : live_out) {
641  if (idx < rhs_idx_base) {
642  lhs_live_ins.insert(idx);
643  } else {
644  rhs_live_ins.insert(idx - rhs_idx_base);
645  }
646  }
647  auto rex_ins = collector.visit(join->getCondition());
648  for (const auto& rex_in : rex_ins) {
649  const auto in_idx = static_cast<size_t>(rex_in.getIndex());
650  if (rex_in.getSourceNode() == lhs) {
651  lhs_live_ins.insert(in_idx);
652  continue;
653  }
654  if (rex_in.getSourceNode() == rhs) {
655  rhs_live_ins.insert(in_idx);
656  continue;
657  }
658  CHECK(false);
659  }
660  return {lhs_live_ins, rhs_live_ins};
661  }
662  if (auto sort = dynamic_cast<const RelSort*>(node)) {
663  CHECK_EQ(size_t(1), sort->inputCount());
664  std::unordered_set<size_t> live_in(live_out.begin(), live_out.end());
665  for (size_t i = 0; i < sort->collationCount(); ++i) {
666  live_in.insert(sort->getCollation(i).getField());
667  }
668  return {live_in};
669  }
670  if (auto filter = dynamic_cast<const RelFilter*>(node)) {
671  CHECK_EQ(size_t(1), filter->inputCount());
672  std::unordered_set<size_t> live_in(live_out.begin(), live_out.end());
673  auto rex_ins = collector.visit(filter->getCondition());
674  for (const auto& rex_in : rex_ins) {
675  live_in.insert(static_cast<size_t>(rex_in.getIndex()));
676  }
677  return {live_in};
678  }
679  return {};
680 }
681 
682 bool any_dead_col_in(const RelAlgNode* node,
683  const std::unordered_set<size_t>& live_outs) {
684  CHECK(!dynamic_cast<const RelScan*>(node));
685  if (auto aggregate = dynamic_cast<const RelAggregate*>(node)) {
686  for (size_t i = aggregate->getGroupByCount(); i < aggregate->size(); ++i) {
687  if (!live_outs.count(i)) {
688  return true;
689  }
690  }
691  return false;
692  }
693 
694  return node->size() > live_outs.size();
695 }
696 
697 bool does_redef_cols(const RelAlgNode* node) {
698  return dynamic_cast<const RelAggregate*>(node) || dynamic_cast<const RelProject*>(node);
699 }
700 
702  public:
704  const std::unordered_map<const RelAlgNode*, std::unordered_map<size_t, size_t>>&
705  liveouts,
706  const std::unordered_set<const RelAlgNode*>& intact_nodes)
707  : liveouts_(liveouts), intact_nodes_(intact_nodes) {}
708 
709  bool hasAllSrcReady(const RelAlgNode* node) const {
710  for (size_t i = 0; i < node->inputCount(); ++i) {
711  auto src = node->getInput(i);
712  if (!dynamic_cast<const RelScan*>(src) && liveouts_.find(src) == liveouts_.end() &&
713  !intact_nodes_.count(src)) {
714  return false;
715  }
716  }
717  return true;
718  }
719 
720  private:
721  const std::unordered_map<const RelAlgNode*, std::unordered_map<size_t, size_t>>&
723  const std::unordered_set<const RelAlgNode*>& intact_nodes_;
724 };
725 
727  const RelAlgNode* node,
728  std::unordered_map<const RelAlgNode*, std::unordered_map<size_t, size_t>>&
729  new_liveouts,
730  const std::unordered_set<size_t>& old_liveouts,
731  const std::unordered_set<const RelAlgNode*>& intact_nodes,
732  const std::unordered_map<const RelAlgNode*, size_t>& orig_node_sizes) {
733  auto live_fields = old_liveouts;
734  if (auto aggregate = dynamic_cast<const RelAggregate*>(node)) {
735  for (size_t i = 0; i < aggregate->getGroupByCount(); ++i) {
736  live_fields.insert(i);
737  }
738  }
739  auto it_ok =
740  new_liveouts.insert(std::make_pair(node, std::unordered_map<size_t, size_t>{}));
741  CHECK(it_ok.second);
742  auto& new_indices = it_ok.first->second;
743  if (intact_nodes.count(node)) {
744  for (size_t i = 0, e = node->size(); i < e; ++i) {
745  new_indices.insert(std::make_pair(i, i));
746  }
747  return;
748  }
749  if (does_redef_cols(node)) {
750  auto node_sz_it = orig_node_sizes.find(node);
751  CHECK(node_sz_it != orig_node_sizes.end());
752  const auto node_size = node_sz_it->second;
753  CHECK_GT(node_size, live_fields.size());
754  LOG(INFO) << node->toString() << " eliminated " << node_size - live_fields.size()
755  << " columns.";
756  std::vector<size_t> ordered_indices(live_fields.begin(), live_fields.end());
757  std::sort(ordered_indices.begin(), ordered_indices.end());
758  for (size_t i = 0; i < ordered_indices.size(); ++i) {
759  new_indices.insert(std::make_pair(ordered_indices[i], i));
760  }
761  return;
762  }
763  std::vector<size_t> ordered_indices;
764  for (size_t i = 0, old_base = 0, new_base = 0; i < node->inputCount(); ++i) {
765  auto src = node->getInput(i);
766  auto src_renum_it = new_liveouts.find(src);
767  if (src_renum_it != new_liveouts.end()) {
768  for (auto m : src_renum_it->second) {
769  new_indices.insert(std::make_pair(old_base + m.first, new_base + m.second));
770  }
771  new_base += src_renum_it->second.size();
772  } else if (dynamic_cast<const RelScan*>(src) || intact_nodes.count(src)) {
773  for (size_t i = 0; i < src->size(); ++i) {
774  new_indices.insert(std::make_pair(old_base + i, new_base + i));
775  }
776  new_base += src->size();
777  } else {
778  CHECK(false);
779  }
780  auto src_sz_it = orig_node_sizes.find(src);
781  CHECK(src_sz_it != orig_node_sizes.end());
782  old_base += src_sz_it->second;
783  }
784 }
785 
787  public:
789  const std::unordered_map<const RelAlgNode*, std::unordered_map<size_t, size_t>>&
790  new_numbering)
791  : node_to_input_renum_(new_numbering) {}
792  RetType visitInput(const RexInput* input) const override {
793  auto source = input->getSourceNode();
794  auto node_it = node_to_input_renum_.find(source);
795  if (node_it != node_to_input_renum_.end()) {
796  auto old_to_new_num = node_it->second;
797  auto renum_it = old_to_new_num.find(input->getIndex());
798  CHECK(renum_it != old_to_new_num.end());
799  return boost::make_unique<RexInput>(source, renum_it->second);
800  }
801  return input->deepCopy();
802  }
803 
804  private:
805  const std::unordered_map<const RelAlgNode*, std::unordered_map<size_t, size_t>>&
807 };
808 
809 std::vector<std::unique_ptr<const RexAgg>> renumber_rex_aggs(
810  std::vector<std::unique_ptr<const RexAgg>>& agg_exprs,
811  const std::unordered_map<size_t, size_t>& new_numbering) {
812  std::vector<std::unique_ptr<const RexAgg>> new_exprs;
813  for (auto& expr : agg_exprs) {
814  if (expr->size() >= 1) {
815  auto old_idx = expr->getOperand(0);
816  auto idx_it = new_numbering.find(old_idx);
817  if (idx_it != new_numbering.end()) {
818  std::vector<size_t> operands;
819  operands.push_back(idx_it->second);
820  if (expr->size() == 2) {
821  operands.push_back(expr->getOperand(1));
822  }
823  new_exprs.push_back(boost::make_unique<RexAgg>(
824  expr->getKind(), expr->isDistinct(), expr->getType(), operands));
825  continue;
826  }
827  }
828  new_exprs.push_back(std::move(expr));
829  }
830  return new_exprs;
831 }
832 
834  const std::unordered_map<size_t, size_t>& new_numbering) {
835  auto field_idx = old_field.getField();
836  auto idx_it = new_numbering.find(field_idx);
837  if (idx_it != new_numbering.end()) {
838  field_idx = idx_it->second;
839  }
840  return SortField(field_idx, old_field.getSortDir(), old_field.getNullsPosition());
841 }
842 
843 std::unordered_map<const RelAlgNode*, std::unordered_set<size_t>> mark_live_columns(
844  std::vector<std::shared_ptr<RelAlgNode>>& nodes) {
845  std::unordered_map<const RelAlgNode*, std::unordered_set<size_t>> live_outs;
846  std::vector<const RelAlgNode*> work_set;
847  for (auto node_it = nodes.rbegin(); node_it != nodes.rend(); ++node_it) {
848  auto node = node_it->get();
849  if (dynamic_cast<const RelScan*>(node) || live_outs.count(node) ||
850  dynamic_cast<const RelModify*>(node)) {
851  continue;
852  }
853  std::vector<size_t> all_live(node->size());
854  std::iota(all_live.begin(), all_live.end(), size_t(0));
855  live_outs.insert(std::make_pair(
856  node, std::unordered_set<size_t>(all_live.begin(), all_live.end())));
857  work_set.push_back(node);
858  while (!work_set.empty()) {
859  auto walker = work_set.back();
860  work_set.pop_back();
861  CHECK(!dynamic_cast<const RelScan*>(walker));
862  CHECK(live_outs.count(walker));
863  auto live_ins = get_live_ins(walker, live_outs);
864  CHECK_EQ(live_ins.size(), walker->inputCount());
865  for (size_t i = 0; i < walker->inputCount(); ++i) {
866  auto src = walker->getInput(i);
867  if (dynamic_cast<const RelScan*>(src) || live_ins[i].empty()) {
868  continue;
869  }
870  if (!live_outs.count(src)) {
871  live_outs.insert(std::make_pair(src, std::unordered_set<size_t>{}));
872  }
873  auto src_it = live_outs.find(src);
874  CHECK(src_it != live_outs.end());
875  auto& live_out = src_it->second;
876  bool changed = false;
877  if (!live_out.empty()) {
878  live_out.insert(live_ins[i].begin(), live_ins[i].end());
879  changed = true;
880  } else {
881  for (int idx : live_ins[i]) {
882  changed |= live_out.insert(idx).second;
883  }
884  }
885  if (changed) {
886  work_set.push_back(src);
887  }
888  }
889  }
890  }
891  return live_outs;
892 }
893 
894 std::string get_field_name(const RelAlgNode* node, size_t index) {
895  CHECK_LT(index, node->size());
896  if (auto scan = dynamic_cast<const RelScan*>(node)) {
897  return scan->getFieldName(index);
898  }
899  if (auto aggregate = dynamic_cast<const RelAggregate*>(node)) {
900  CHECK_EQ(aggregate->size(), aggregate->getFields().size());
901  return aggregate->getFieldName(index);
902  }
903  if (auto join = dynamic_cast<const RelJoin*>(node)) {
904  const auto lhs_size = join->getInput(0)->size();
905  if (index < lhs_size) {
906  return get_field_name(join->getInput(0), index);
907  }
908  return get_field_name(join->getInput(1), index - lhs_size);
909  }
910  if (auto project = dynamic_cast<const RelProject*>(node)) {
911  return project->getFieldName(index);
912  }
913  CHECK(dynamic_cast<const RelSort*>(node) || dynamic_cast<const RelFilter*>(node));
914  return get_field_name(node->getInput(0), index);
915 }
916 
918  std::vector<std::shared_ptr<RelAlgNode>>& nodes,
919  std::unordered_map<const RelAlgNode*, std::unordered_set<size_t>>& liveouts,
920  std::unordered_map<const RelAlgNode*, std::unordered_set<const RelAlgNode*>>& du_web,
921  std::unordered_map<const RelAlgNode*, RelAlgNode*>& deconst_mapping) {
922  std::vector<std::shared_ptr<RelAlgNode>> new_nodes;
923  for (auto node : nodes) {
924  new_nodes.push_back(node);
925  if (!std::dynamic_pointer_cast<RelFilter>(node)) {
926  continue;
927  }
928  const auto filter = node.get();
929  auto liveout_it = liveouts.find(filter);
930  CHECK(liveout_it != liveouts.end());
931  auto& outs = liveout_it->second;
932  if (!any_dead_col_in(filter, outs)) {
933  continue;
934  }
935  auto usrs_it = du_web.find(filter);
936  CHECK(usrs_it != du_web.end());
937  auto& usrs = usrs_it->second;
938  if (usrs.size() != 1 || does_redef_cols(*usrs.begin())) {
939  continue;
940  }
941  auto only_usr = deconst_mapping[*usrs.begin()];
942 
943  std::vector<std::unique_ptr<const RexScalar>> exprs;
944  std::vector<std::string> fields;
945  for (size_t i = 0; i < filter->size(); ++i) {
946  exprs.push_back(boost::make_unique<RexInput>(filter, i));
947  fields.push_back(get_field_name(filter, i));
948  }
949  auto project_owner = std::make_shared<RelProject>(exprs, fields, node);
950  auto project = project_owner.get();
951 
952  only_usr->replaceInput(node, project_owner);
953  if (dynamic_cast<const RelJoin*>(only_usr)) {
954  RexRebindInputsVisitor visitor(filter, project);
955  for (auto usr : du_web[only_usr]) {
956  visitor.visitNode(usr);
957  }
958  }
959 
960  liveouts.insert(std::make_pair(project, outs));
961 
962  usrs.clear();
963  usrs.insert(project);
964  du_web.insert(
965  std::make_pair(project, std::unordered_set<const RelAlgNode*>{only_usr}));
966 
967  new_nodes.push_back(project_owner);
968  }
969  if (new_nodes.size() > nodes.size()) {
970  nodes.swap(new_nodes);
971  deconst_mapping.clear();
972  for (auto node : nodes) {
973  deconst_mapping.insert(std::make_pair(node.get(), node.get()));
974  }
975  }
976 }
977 
978 std::pair<std::unordered_map<const RelAlgNode*, std::unordered_map<size_t, size_t>>,
979  std::vector<const RelAlgNode*>>
981  const std::unordered_map<const RelAlgNode*, std::unordered_set<size_t>>& live_outs,
982  const std::vector<std::shared_ptr<RelAlgNode>>& nodes,
983  const std::unordered_set<const RelAlgNode*>& intact_nodes,
984  const std::unordered_map<const RelAlgNode*, std::unordered_set<const RelAlgNode*>>&
985  du_web,
986  const std::unordered_map<const RelAlgNode*, size_t>& orig_node_sizes) {
987  std::unordered_map<const RelAlgNode*, std::unordered_map<size_t, size_t>>
988  liveouts_renumbering;
989  std::vector<const RelAlgNode*> ready_nodes;
990  AvailabilityChecker checker(liveouts_renumbering, intact_nodes);
991  for (auto node : nodes) {
992  // Ignore empty live_out due to some invalid node
993  if (!does_redef_cols(node.get()) || intact_nodes.count(node.get())) {
994  continue;
995  }
996  auto live_pair = live_outs.find(node.get());
997  CHECK(live_pair != live_outs.end());
998  auto old_live_outs = live_pair->second;
1000  node.get(), liveouts_renumbering, old_live_outs, intact_nodes, orig_node_sizes);
1001  if (auto aggregate = std::dynamic_pointer_cast<RelAggregate>(node)) {
1002  auto old_exprs = aggregate->getAggExprsAndRelease();
1003  std::vector<std::unique_ptr<const RexAgg>> new_exprs;
1004  auto key_name_it = aggregate->getFields().begin();
1005  std::vector<std::string> new_fields(key_name_it,
1006  key_name_it + aggregate->getGroupByCount());
1007  for (size_t i = aggregate->getGroupByCount(), j = 0;
1008  i < aggregate->getFields().size() && j < old_exprs.size();
1009  ++i, ++j) {
1010  if (old_live_outs.count(i)) {
1011  new_exprs.push_back(std::move(old_exprs[j]));
1012  new_fields.push_back(aggregate->getFieldName(i));
1013  }
1014  }
1015  aggregate->setAggExprs(new_exprs);
1016  aggregate->setFields(new_fields);
1017  } else if (auto project = std::dynamic_pointer_cast<RelProject>(node)) {
1018  auto old_exprs = project->getExpressionsAndRelease();
1019  std::vector<std::unique_ptr<const RexScalar>> new_exprs;
1020  std::vector<std::string> new_fields;
1021  for (size_t i = 0; i < old_exprs.size(); ++i) {
1022  if (old_live_outs.count(i)) {
1023  new_exprs.push_back(std::move(old_exprs[i]));
1024  new_fields.push_back(project->getFieldName(i));
1025  }
1026  }
1027  project->setExpressions(new_exprs);
1028  project->setFields(new_fields);
1029  } else {
1030  CHECK(false);
1031  }
1032  auto usrs_it = du_web.find(node.get());
1033  CHECK(usrs_it != du_web.end());
1034  for (auto usr : usrs_it->second) {
1035  if (checker.hasAllSrcReady(usr)) {
1036  ready_nodes.push_back(usr);
1037  }
1038  }
1039  }
1040  return {liveouts_renumbering, ready_nodes};
1041 }
1042 
1044  std::unordered_map<const RelAlgNode*, std::unordered_map<size_t, size_t>>&
1045  liveout_renumbering,
1046  const std::vector<const RelAlgNode*>& ready_nodes,
1047  const std::unordered_map<const RelAlgNode*, std::unordered_set<size_t>>& old_liveouts,
1048  const std::unordered_map<const RelAlgNode*, RelAlgNode*>& deconst_mapping,
1049  const std::unordered_set<const RelAlgNode*>& intact_nodes,
1050  const std::unordered_map<const RelAlgNode*, std::unordered_set<const RelAlgNode*>>&
1051  du_web,
1052  const std::unordered_map<const RelAlgNode*, size_t>& orig_node_sizes) {
1053  RexInputRenumberVisitor renumberer(liveout_renumbering);
1054  AvailabilityChecker checker(liveout_renumbering, intact_nodes);
1055  std::deque<const RelAlgNode*> work_set(ready_nodes.begin(), ready_nodes.end());
1056  while (!work_set.empty()) {
1057  auto walker = work_set.front();
1058  work_set.pop_front();
1059  CHECK(!dynamic_cast<const RelScan*>(walker));
1060  auto node_it = deconst_mapping.find(walker);
1061  CHECK(node_it != deconst_mapping.end());
1062  auto node = node_it->second;
1063  if (auto project = dynamic_cast<RelProject*>(node)) {
1064  auto old_exprs = project->getExpressionsAndRelease();
1065  std::vector<std::unique_ptr<const RexScalar>> new_exprs;
1066  for (auto& expr : old_exprs) {
1067  new_exprs.push_back(renumberer.visit(expr.get()));
1068  }
1069  project->setExpressions(new_exprs);
1070  } else if (auto aggregate = dynamic_cast<RelAggregate*>(node)) {
1071  auto src_it = liveout_renumbering.find(node->getInput(0));
1072  CHECK(src_it != liveout_renumbering.end());
1073  auto old_exprs = aggregate->getAggExprsAndRelease();
1074  auto new_exprs = renumber_rex_aggs(old_exprs, src_it->second);
1075  aggregate->setAggExprs(new_exprs);
1076  } else if (auto join = dynamic_cast<RelJoin*>(node)) {
1077  auto new_condition = renumberer.visit(join->getCondition());
1078  join->setCondition(new_condition);
1079  } else if (auto filter = dynamic_cast<RelFilter*>(node)) {
1080  auto new_condition = renumberer.visit(filter->getCondition());
1081  filter->setCondition(new_condition);
1082  } else if (auto sort = dynamic_cast<RelSort*>(node)) {
1083  auto src_it = liveout_renumbering.find(node->getInput(0));
1084  CHECK(src_it != liveout_renumbering.end());
1085  std::vector<SortField> new_collations;
1086  for (size_t i = 0; i < sort->collationCount(); ++i) {
1087  new_collations.push_back(
1088  renumber_sort_field(sort->getCollation(i), src_it->second));
1089  }
1090  sort->setCollation(std::move(new_collations));
1091  } else {
1092  CHECK(false);
1093  }
1094 
1095  // Ignore empty live_out due to some invalid node
1096  if (does_redef_cols(node) || intact_nodes.count(node)) {
1097  continue;
1098  }
1099  auto live_pair = old_liveouts.find(node);
1100  CHECK(live_pair != old_liveouts.end());
1101  auto live_out = live_pair->second;
1103  node, liveout_renumbering, live_out, intact_nodes, orig_node_sizes);
1104  auto usrs_it = du_web.find(walker);
1105  CHECK(usrs_it != du_web.end());
1106  for (auto usr : usrs_it->second) {
1107  if (checker.hasAllSrcReady(usr)) {
1108  work_set.push_back(usr);
1109  }
1110  }
1111  }
1112 }
1113 
1114 } // namespace
1115 
1116 void eliminate_dead_columns(std::vector<std::shared_ptr<RelAlgNode>>& nodes) noexcept {
1117  if (nodes.empty()) {
1118  return;
1119  }
1120  auto root = nodes.back().get();
1121  if (!root) {
1122  return;
1123  }
1124  CHECK(!dynamic_cast<const RelScan*>(root) && !dynamic_cast<const RelJoin*>(root));
1125  // Mark
1126  auto old_liveouts = mark_live_columns(nodes);
1127  std::unordered_set<const RelAlgNode*> intact_nodes;
1128  bool has_dead_cols = false;
1129  for (auto live_pair : old_liveouts) {
1130  auto node = live_pair.first;
1131  const auto& outs = live_pair.second;
1132  if (outs.empty()) {
1133  LOG(WARNING) << "RA node with no used column: " << node->toString();
1134  // Ignore empty live_out due to some invalid node
1135  intact_nodes.insert(node);
1136  }
1137  if (any_dead_col_in(node, outs)) {
1138  has_dead_cols = true;
1139  } else {
1140  intact_nodes.insert(node);
1141  }
1142  }
1143  if (!has_dead_cols) {
1144  return;
1145  }
1146  // Patch
1147  std::unordered_map<const RelAlgNode*, RelAlgNode*> deconst_mapping;
1148  for (auto node : nodes) {
1149  deconst_mapping.insert(std::make_pair(node.get(), node.get()));
1150  }
1151  auto web = build_du_web(nodes);
1152  try_insert_coalesceable_proj(nodes, old_liveouts, web, deconst_mapping);
1153 
1154  for (auto node : nodes) {
1155  if (intact_nodes.count(node.get()) || does_redef_cols(node.get())) {
1156  continue;
1157  }
1158  bool intact = true;
1159  for (size_t i = 0; i < node->inputCount(); ++i) {
1160  auto source = node->getInput(i);
1161  if (!dynamic_cast<const RelScan*>(source) && !intact_nodes.count(source)) {
1162  intact = false;
1163  break;
1164  }
1165  }
1166  if (intact) {
1167  intact_nodes.insert(node.get());
1168  }
1169  }
1170 
1171  std::unordered_map<const RelAlgNode*, size_t> orig_node_sizes;
1172  for (auto node : nodes) {
1173  orig_node_sizes.insert(std::make_pair(node.get(), node->size()));
1174  }
1175  // Sweep
1176  std::unordered_map<const RelAlgNode*, std::unordered_map<size_t, size_t>>
1177  liveout_renumbering;
1178  std::vector<const RelAlgNode*> ready_nodes;
1179  std::tie(liveout_renumbering, ready_nodes) =
1180  sweep_dead_columns(old_liveouts, nodes, intact_nodes, web, orig_node_sizes);
1181  // Propagate
1182  propagate_input_renumbering(liveout_renumbering,
1183  ready_nodes,
1184  old_liveouts,
1185  deconst_mapping,
1186  intact_nodes,
1187  web,
1188  orig_node_sizes);
1189 }
1190 
1191 namespace {
1192 
1194  public:
1195  RexInputSinker(const std::unordered_map<size_t, size_t>& old_to_new_idx,
1196  const RelAlgNode* new_src)
1197  : old_to_new_in_idx_(old_to_new_idx), target_(new_src) {}
1198 
1199  RetType visitInput(const RexInput* input) const override {
1200  CHECK_EQ(target_->inputCount(), size_t(1));
1201  CHECK_EQ(target_->getInput(0), input->getSourceNode());
1202  auto idx_it = old_to_new_in_idx_.find(input->getIndex());
1203  CHECK(idx_it != old_to_new_in_idx_.end());
1204  return boost::make_unique<RexInput>(target_, idx_it->second);
1205  }
1206 
1207  private:
1208  const std::unordered_map<size_t, size_t>& old_to_new_in_idx_;
1210 };
1211 
1213  public:
1214  SubConditionReplacer(const std::unordered_map<size_t, std::unique_ptr<const RexScalar>>&
1215  idx_to_sub_condition)
1216  : idx_to_subcond_(idx_to_sub_condition) {}
1217  RetType visitInput(const RexInput* input) const override {
1218  auto subcond_it = idx_to_subcond_.find(input->getIndex());
1219  if (subcond_it != idx_to_subcond_.end()) {
1220  return RexDeepCopyVisitor::visit(subcond_it->second.get());
1221  }
1222  return RexDeepCopyVisitor::visitInput(input);
1223  }
1224 
1225  private:
1226  const std::unordered_map<size_t, std::unique_ptr<const RexScalar>>& idx_to_subcond_;
1227 };
1228 
1229 } // namespace
1230 
1232  std::vector<std::shared_ptr<RelAlgNode>>& nodes) noexcept {
1233  auto web = build_du_web(nodes);
1234  std::unordered_map<const RelAlgNode*, RelAlgNode*> deconst_mapping;
1235  for (auto node : nodes) {
1236  deconst_mapping.insert(std::make_pair(node.get(), node.get()));
1237  }
1238  auto liveouts = mark_live_columns(nodes);
1239  for (auto node : nodes) {
1240  auto project = std::dynamic_pointer_cast<RelProject>(node);
1241  // TODO(miyu): relax RelScan limitation
1242  if (!project || project->isSimple() ||
1243  !dynamic_cast<const RelScan*>(project->getInput(0))) {
1244  continue;
1245  }
1246  auto usrs_it = web.find(project.get());
1247  CHECK(usrs_it != web.end());
1248  auto& usrs = usrs_it->second;
1249  if (usrs.size() != 1) {
1250  continue;
1251  }
1252  auto join = dynamic_cast<RelJoin*>(deconst_mapping[*usrs.begin()]);
1253  if (!join) {
1254  continue;
1255  }
1256  auto outs_it = liveouts.find(join);
1257  CHECK(outs_it != liveouts.end());
1258  std::unordered_map<size_t, size_t> in_to_out_index;
1259  std::unordered_set<size_t> boolean_expr_indicies;
1260  bool discarded = false;
1261  for (size_t i = 0; i < project->size(); ++i) {
1262  auto oper = dynamic_cast<const RexOperator*>(project->getProjectAt(i));
1263  if (oper && oper->getType().get_type() == kBOOLEAN) {
1264  boolean_expr_indicies.insert(i);
1265  } else {
1266  // TODO(miyu): relax?
1267  if (auto input = dynamic_cast<const RexInput*>(project->getProjectAt(i))) {
1268  in_to_out_index.insert(std::make_pair(input->getIndex(), i));
1269  } else {
1270  discarded = true;
1271  }
1272  }
1273  }
1274  if (discarded || boolean_expr_indicies.empty()) {
1275  continue;
1276  }
1277  const size_t index_base =
1278  join->getInput(0) == project.get() ? 0 : join->getInput(0)->size();
1279  for (auto i : boolean_expr_indicies) {
1280  auto join_idx = index_base + i;
1281  if (outs_it->second.count(join_idx)) {
1282  discarded = true;
1283  break;
1284  }
1285  }
1286  if (discarded) {
1287  continue;
1288  }
1289  RexInputCollector collector(project.get());
1290  std::vector<size_t> unloaded_input_indices;
1291  std::unordered_map<size_t, std::unique_ptr<const RexScalar>> in_idx_to_new_subcond;
1292  // Given all are dead right after join, safe to sink
1293  for (auto i : boolean_expr_indicies) {
1294  auto rex_ins = collector.visit(project->getProjectAt(i));
1295  for (auto& in : rex_ins) {
1296  CHECK_EQ(in.getSourceNode(), project->getInput(0));
1297  if (!in_to_out_index.count(in.getIndex())) {
1298  auto curr_out_index = project->size() + unloaded_input_indices.size();
1299  in_to_out_index.insert(std::make_pair(in.getIndex(), curr_out_index));
1300  unloaded_input_indices.push_back(in.getIndex());
1301  }
1302  RexInputSinker sinker(in_to_out_index, project.get());
1303  in_idx_to_new_subcond.insert(
1304  std::make_pair(i, sinker.visit(project->getProjectAt(i))));
1305  }
1306  }
1307  if (in_idx_to_new_subcond.empty()) {
1308  continue;
1309  }
1310  std::vector<std::unique_ptr<const RexScalar>> new_projections;
1311  for (size_t i = 0; i < project->size(); ++i) {
1312  if (boolean_expr_indicies.count(i)) {
1313  new_projections.push_back(boost::make_unique<RexInput>(project->getInput(0), 0));
1314  } else {
1315  auto rex_input = dynamic_cast<const RexInput*>(project->getProjectAt(i));
1316  CHECK(rex_input != nullptr);
1317  new_projections.push_back(rex_input->deepCopy());
1318  }
1319  }
1320  for (auto i : unloaded_input_indices) {
1321  new_projections.push_back(boost::make_unique<RexInput>(project->getInput(0), i));
1322  }
1323  project->setExpressions(new_projections);
1324 
1325  SubConditionReplacer replacer(in_idx_to_new_subcond);
1326  auto new_condition = replacer.visit(join->getCondition());
1327  join->setCondition(new_condition);
1328  }
1329 }
1330 
1331 namespace {
1332 
1334  public:
1335  RexInputRedirector(const RelAlgNode* old_src, const RelAlgNode* new_src)
1336  : old_src_(old_src), new_src_(new_src) {}
1337 
1338  RetType visitInput(const RexInput* input) const override {
1339  CHECK_EQ(old_src_, input->getSourceNode());
1340  CHECK_NE(old_src_, new_src_);
1341  auto actual_new_src = new_src_;
1342  if (auto join = dynamic_cast<const RelJoin*>(new_src_)) {
1343  actual_new_src = join->getInput(0);
1344  CHECK_EQ(join->inputCount(), size_t(2));
1345  auto src2_input_base = actual_new_src->size();
1346  if (input->getIndex() >= src2_input_base) {
1347  actual_new_src = join->getInput(1);
1348  return boost::make_unique<RexInput>(actual_new_src,
1349  input->getIndex() - src2_input_base);
1350  }
1351  }
1352 
1353  return boost::make_unique<RexInput>(actual_new_src, input->getIndex());
1354  }
1355 
1356  private:
1359 };
1360 
1362  std::shared_ptr<const RelAlgNode> old_def_node,
1363  std::shared_ptr<const RelAlgNode> new_def_node,
1364  std::unordered_map<const RelAlgNode*, std::shared_ptr<RelAlgNode>>& deconst_mapping,
1365  std::unordered_map<const RelAlgNode*, std::unordered_set<const RelAlgNode*>>&
1366  du_web) {
1367  auto usrs_it = du_web.find(old_def_node.get());
1368  RexInputRedirector redirector(new_def_node.get(), old_def_node.get());
1369  CHECK(usrs_it != du_web.end());
1370  for (auto usr : usrs_it->second) {
1371  auto usr_it = deconst_mapping.find(usr);
1372  CHECK(usr_it != deconst_mapping.end());
1373  usr_it->second->replaceInput(old_def_node, new_def_node);
1374  }
1375  auto new_usrs_it = du_web.find(new_def_node.get());
1376  CHECK(new_usrs_it != du_web.end());
1377  new_usrs_it->second.insert(usrs_it->second.begin(), usrs_it->second.end());
1378  usrs_it->second.clear();
1379 }
1380 
1381 } // namespace
1382 
1383 void fold_filters(std::vector<std::shared_ptr<RelAlgNode>>& nodes) noexcept {
1384  std::unordered_map<const RelAlgNode*, std::shared_ptr<RelAlgNode>> deconst_mapping;
1385  for (auto node : nodes) {
1386  deconst_mapping.insert(std::make_pair(node.get(), node));
1387  }
1388 
1389  auto web = build_du_web(nodes);
1390  for (auto node_it = nodes.rbegin(); node_it != nodes.rend(); ++node_it) {
1391  auto& node = *node_it;
1392  if (auto filter = std::dynamic_pointer_cast<RelFilter>(node)) {
1393  CHECK_EQ(filter->inputCount(), size_t(1));
1394  auto src_filter = dynamic_cast<const RelFilter*>(filter->getInput(0));
1395  if (!src_filter) {
1396  continue;
1397  }
1398  auto siblings_it = web.find(src_filter);
1399  if (siblings_it == web.end() || siblings_it->second.size() != size_t(1)) {
1400  continue;
1401  }
1402  auto src_it = deconst_mapping.find(src_filter);
1403  CHECK(src_it != deconst_mapping.end());
1404  auto folded_filter = std::dynamic_pointer_cast<RelFilter>(src_it->second);
1405  CHECK(folded_filter);
1406  // TODO(miyu) : drop filter w/ only expression valued constant TRUE?
1407  if (auto rex_operator = dynamic_cast<const RexOperator*>(filter->getCondition())) {
1408  LOG(INFO) << "ID=" << filter->getId() << " " << filter->toString()
1409  << " folded into "
1410  << "ID=" << folded_filter->getId() << " " << folded_filter->toString()
1411  << std::endl;
1412  std::vector<std::unique_ptr<const RexScalar>> operands;
1413  operands.emplace_back(folded_filter->getAndReleaseCondition());
1414  auto old_condition = dynamic_cast<const RexOperator*>(operands.back().get());
1415  CHECK(old_condition && old_condition->getType().get_type() == kBOOLEAN);
1416  RexInputRedirector redirector(folded_filter.get(), folded_filter->getInput(0));
1417  operands.push_back(redirector.visit(rex_operator));
1418  auto other_condition = dynamic_cast<const RexOperator*>(operands.back().get());
1419  CHECK(other_condition && other_condition->getType().get_type() == kBOOLEAN);
1420  const bool notnull = old_condition->getType().get_notnull() &&
1421  other_condition->getType().get_notnull();
1422  auto new_condition = std::unique_ptr<const RexScalar>(
1423  new RexOperator(kAND, operands, SQLTypeInfo(kBOOLEAN, notnull)));
1424  folded_filter->setCondition(new_condition);
1425  replace_all_usages(filter, folded_filter, deconst_mapping, web);
1426  deconst_mapping.erase(filter.get());
1427  web.erase(filter.get());
1428  web[filter->getInput(0)].erase(filter.get());
1429  node.reset();
1430  }
1431  }
1432  }
1433 
1434  if (!nodes.empty()) {
1435  auto sink = nodes.back();
1436  for (auto node_it = std::next(nodes.rend()); !sink && node_it != nodes.rbegin();
1437  ++node_it) {
1438  sink = *node_it;
1439  }
1440  CHECK(sink);
1441  cleanup_dead_nodes(nodes);
1442  }
1443 }
1444 
1445 std::vector<const RexScalar*> find_hoistable_conditions(const RexScalar* condition,
1446  const RelAlgNode* source,
1447  const size_t first_col_idx,
1448  const size_t last_col_idx) {
1449  if (auto rex_op = dynamic_cast<const RexOperator*>(condition)) {
1450  switch (rex_op->getOperator()) {
1451  case kAND: {
1452  std::vector<const RexScalar*> subconditions;
1453  size_t complete_subcond_count = 0;
1454  for (size_t i = 0; i < rex_op->size(); ++i) {
1455  auto conds = find_hoistable_conditions(
1456  rex_op->getOperand(i), source, first_col_idx, last_col_idx);
1457  if (conds.size() == size_t(1)) {
1458  ++complete_subcond_count;
1459  }
1460  subconditions.insert(subconditions.end(), conds.begin(), conds.end());
1461  }
1462  if (complete_subcond_count == rex_op->size()) {
1463  return {rex_op};
1464  } else {
1465  return {subconditions};
1466  }
1467  break;
1468  }
1469  case kEQ: {
1470  const auto lhs_conds = find_hoistable_conditions(
1471  rex_op->getOperand(0), source, first_col_idx, last_col_idx);
1472  const auto rhs_conds = find_hoistable_conditions(
1473  rex_op->getOperand(1), source, first_col_idx, last_col_idx);
1474  const auto lhs_in = lhs_conds.size() == 1
1475  ? dynamic_cast<const RexInput*>(*lhs_conds.begin())
1476  : nullptr;
1477  const auto rhs_in = rhs_conds.size() == 1
1478  ? dynamic_cast<const RexInput*>(*rhs_conds.begin())
1479  : nullptr;
1480  if (lhs_in && rhs_in) {
1481  return {rex_op};
1482  }
1483  return {};
1484  break;
1485  }
1486  default:
1487  break;
1488  }
1489  return {};
1490  }
1491  if (auto rex_in = dynamic_cast<const RexInput*>(condition)) {
1492  if (rex_in->getSourceNode() == source) {
1493  const auto col_idx = rex_in->getIndex();
1494  return {col_idx >= first_col_idx && col_idx <= last_col_idx ? condition : nullptr};
1495  }
1496  return {};
1497  }
1498  return {};
1499 }
1500 
1502  public:
1503  JoinTargetRebaser(const RelJoin* join, const unsigned old_base)
1504  : join_(join)
1505  , old_base_(old_base)
1506  , src1_base_(join->getInput(0)->size())
1507  , target_count_(join->size()) {}
1508  RetType visitInput(const RexInput* input) const override {
1509  auto curr_idx = input->getIndex();
1510  CHECK_GE(curr_idx, old_base_);
1511  CHECK_LT(static_cast<size_t>(curr_idx), target_count_);
1512  curr_idx -= old_base_;
1513  if (curr_idx >= src1_base_) {
1514  return boost::make_unique<RexInput>(join_->getInput(1), curr_idx - src1_base_);
1515  } else {
1516  return boost::make_unique<RexInput>(join_->getInput(0), curr_idx);
1517  }
1518  }
1519 
1520  private:
1521  const RelJoin* join_;
1522  const unsigned old_base_;
1523  const size_t src1_base_;
1524  const size_t target_count_;
1525 };
1526 
1528  public:
1529  SubConditionRemover(const std::vector<const RexScalar*> sub_conds)
1530  : sub_conditions_(sub_conds.begin(), sub_conds.end()) {}
1531  RetType visitOperator(const RexOperator* rex_operator) const override {
1532  if (sub_conditions_.count(rex_operator)) {
1533  return boost::make_unique<RexLiteral>(
1534  true, kBOOLEAN, kBOOLEAN, unsigned(-2147483648), 1, unsigned(-2147483648), 1);
1535  }
1536  return RexDeepCopyVisitor::visitOperator(rex_operator);
1537  }
1538 
1539  private:
1540  std::unordered_set<const RexScalar*> sub_conditions_;
1541 };
1542 
1544  std::vector<std::shared_ptr<RelAlgNode>>& nodes) noexcept {
1545  std::unordered_map<const RelAlgNode*, RelAlgNode*> deconst_mapping;
1546  for (auto node : nodes) {
1547  deconst_mapping.insert(std::make_pair(node.get(), node.get()));
1548  }
1549  std::unordered_set<const RelAlgNode*> visited;
1550  auto web = build_du_web(nodes);
1551  for (auto node : nodes) {
1552  if (visited.count(node.get())) {
1553  continue;
1554  }
1555  visited.insert(node.get());
1556  auto join = dynamic_cast<RelJoin*>(node.get());
1557  if (join && join->getJoinType() == JoinType::INNER) {
1558  // Only allow cross join for now.
1559  if (auto literal = dynamic_cast<const RexLiteral*>(join->getCondition())) {
1560  // Assume Calcite always generates an inner join on constant boolean true for
1561  // cross join.
1562  CHECK(literal->getType() == kBOOLEAN && literal->getVal<bool>());
1563  size_t first_col_idx = 0;
1564  const RelFilter* filter = nullptr;
1565  std::vector<const RelJoin*> join_seq{join};
1566  for (const RelJoin* curr_join = join; !filter;) {
1567  auto usrs_it = web.find(curr_join);
1568  CHECK(usrs_it != web.end());
1569  if (usrs_it->second.size() != size_t(1)) {
1570  break;
1571  }
1572  auto only_usr = *usrs_it->second.begin();
1573  if (auto usr_join = dynamic_cast<const RelJoin*>(only_usr)) {
1574  if (join == usr_join->getInput(1)) {
1575  const auto src1_offset = usr_join->getInput(0)->size();
1576  first_col_idx += src1_offset;
1577  }
1578  join_seq.push_back(usr_join);
1579  curr_join = usr_join;
1580  continue;
1581  }
1582 
1583  filter = dynamic_cast<const RelFilter*>(only_usr);
1584  break;
1585  }
1586  if (!filter) {
1587  visited.insert(join_seq.begin(), join_seq.end());
1588  continue;
1589  }
1590  const auto src_join = dynamic_cast<const RelJoin*>(filter->getInput(0));
1591  CHECK(src_join);
1592  auto filter_it = deconst_mapping.find(filter);
1593  CHECK(filter_it != deconst_mapping.end());
1594  auto modified_filter = dynamic_cast<RelFilter*>(filter_it->second);
1595  CHECK(modified_filter);
1596 
1597  if (src_join == join) {
1598  std::unique_ptr<const RexScalar> filter_condition(
1599  modified_filter->getAndReleaseCondition());
1600  std::unique_ptr<const RexScalar> true_condition =
1601  boost::make_unique<RexLiteral>(true,
1602  kBOOLEAN,
1603  kBOOLEAN,
1604  unsigned(-2147483648),
1605  1,
1606  unsigned(-2147483648),
1607  1);
1608  modified_filter->setCondition(true_condition);
1609  join->setCondition(filter_condition);
1610  continue;
1611  }
1612  const auto src1_base = src_join->getInput(0)->size();
1613  auto source =
1614  first_col_idx < src1_base ? src_join->getInput(0) : src_join->getInput(1);
1615  first_col_idx =
1616  first_col_idx < src1_base ? first_col_idx : first_col_idx - src1_base;
1617  auto join_conditions =
1619  source,
1620  first_col_idx,
1621  first_col_idx + join->size() - 1);
1622  if (join_conditions.empty()) {
1623  continue;
1624  }
1625 
1626  JoinTargetRebaser rebaser(join, first_col_idx);
1627  if (join_conditions.size() == 1) {
1628  auto new_join_condition = rebaser.visit(*join_conditions.begin());
1629  join->setCondition(new_join_condition);
1630  } else {
1631  std::vector<std::unique_ptr<const RexScalar>> operands;
1632  bool notnull = true;
1633  for (size_t i = 0; i < join_conditions.size(); ++i) {
1634  operands.emplace_back(rebaser.visit(join_conditions[i]));
1635  auto old_subcond = dynamic_cast<const RexOperator*>(join_conditions[i]);
1636  CHECK(old_subcond && old_subcond->getType().get_type() == kBOOLEAN);
1637  notnull = notnull && old_subcond->getType().get_notnull();
1638  }
1639  auto new_join_condition = std::unique_ptr<const RexScalar>(
1640  new RexOperator(kAND, operands, SQLTypeInfo(kBOOLEAN, notnull)));
1641  join->setCondition(new_join_condition);
1642  }
1643 
1644  SubConditionRemover remover(join_conditions);
1645  auto new_filter_condition = remover.visit(filter->getCondition());
1646  modified_filter->setCondition(new_filter_condition);
1647  }
1648  }
1649  }
1650 }
1651 
1652 // For some reason, Calcite generates Sort, Project, Sort sequences where the
1653 // two Sort nodes are identical and the Project is identity. Simplify this
1654 // pattern by re-binding the input of the second sort to the input of the first.
1655 void simplify_sort(std::vector<std::shared_ptr<RelAlgNode>>& nodes) noexcept {
1656  if (nodes.size() < 3) {
1657  return;
1658  }
1659  for (size_t i = 0; i <= nodes.size() - 3;) {
1660  auto first_sort = std::dynamic_pointer_cast<RelSort>(nodes[i]);
1661  const auto project = std::dynamic_pointer_cast<const RelProject>(nodes[i + 1]);
1662  auto second_sort = std::dynamic_pointer_cast<RelSort>(nodes[i + 2]);
1663  if (first_sort && second_sort && project && project->isIdentity() &&
1664  *first_sort == *second_sort) {
1665  second_sort->replaceInput(second_sort->getAndOwnInput(0),
1666  first_sort->getAndOwnInput(0));
1667  nodes[i].reset();
1668  nodes[i + 1].reset();
1669  i += 3;
1670  } else {
1671  ++i;
1672  }
1673  }
1674 
1675  std::vector<std::shared_ptr<RelAlgNode>> new_nodes;
1676  for (auto node : nodes) {
1677  if (!node) {
1678  continue;
1679  }
1680  new_nodes.push_back(node);
1681  }
1682  nodes.swap(new_nodes);
1683 }
SubConditionReplacer(const std::unordered_map< size_t, std::unique_ptr< const RexScalar >> &idx_to_sub_condition)
std::vector< const RexScalar * > find_hoistable_conditions(const RexScalar *condition, const RelAlgNode *source, const size_t first_col_idx, const size_t last_col_idx)
std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * > > build_du_web(const std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
RetType visitInput(const RexInput *input) const override
#define CHECK_EQ(x, y)
Definition: Logger.h:195
void redirect_inputs_of(std::shared_ptr< RelAlgNode > node, const std::unordered_set< const RelProject *> &projects, const std::unordered_set< const RelProject *> &permutating_projects, const std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode *>> &du_web, const std::unordered_map< const RelAlgNode *, RelAlgNode *> &deconst_mapping)
const size_t target_count_
RexInputRedirector(const RelAlgNode *old_src, const RelAlgNode *new_src)
size_t pick_always_live_col_idx(const RelAlgNode *node)
void setSourceNode(const RelAlgNode *node) const
const std::unordered_map< const RelAlgNode *, std::unordered_map< size_t, size_t > > & node_to_input_renum_
void hoist_filter_cond_to_cross_join(std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
RexInputSinker(const std::unordered_map< size_t, size_t > &old_to_new_idx, const RelAlgNode *new_src)
size_t size() const override
void sink_projected_boolean_expr_to_join(std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
RetType visitInput(const RexInput *input) const override
void eliminate_identical_copy(std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
void add_new_indices_for(const RelAlgNode *node, std::unordered_map< const RelAlgNode *, std::unordered_map< size_t, size_t >> &new_liveouts, const std::unordered_set< size_t > &old_liveouts, const std::unordered_set< const RelAlgNode *> &intact_nodes, const std::unordered_map< const RelAlgNode *, size_t > &orig_node_sizes)
SubConditionRemover(const std::vector< const RexScalar *> sub_conds)
SortDirection getSortDir() const
#define LOG(tag)
Definition: Logger.h:182
const RexScalar * getProjectAt(const size_t idx) const
bool does_redef_cols(const RelAlgNode *node)
RetType visitOperator(const RexOperator *rex_operator) const override
Definition: RexVisitor.h:152
std::string join(T const &container, std::string const &delim)
void * visitInput(const RexInput *rex_input) const override
AvailabilityChecker(const std::unordered_map< const RelAlgNode *, std::unordered_map< size_t, size_t >> &liveouts, const std::unordered_set< const RelAlgNode *> &intact_nodes)
int64_t * src
#define CHECK_GE(x, y)
Definition: Logger.h:200
RetType visitOperator(const RexOperator *rex_operator) const override
size_t get_actual_source_size(const RelProject *curr_project, const std::unordered_set< const RelProject *> &projects_to_remove)
Definition: sqldefs.h:30
std::unordered_set< const RelProject * > get_visible_projects(const RelAlgNode *root)
#define CHECK_GT(x, y)
Definition: Logger.h:199
const std::unordered_set< const RelProject * > & crt_projects_
std::unique_ptr< RexInput > deepCopy() const
std::unordered_map< const RelAlgNode *, std::unordered_set< size_t > > mark_live_columns(std::vector< std::shared_ptr< RelAlgNode >> &nodes)
void simplify_sort(std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
const std::unordered_map< const RelAlgNode *, std::unordered_map< size_t, size_t > > & liveouts_
void propagate_input_renumbering(std::unordered_map< const RelAlgNode *, std::unordered_map< size_t, size_t >> &liveout_renumbering, const std::vector< const RelAlgNode *> &ready_nodes, const std::unordered_map< const RelAlgNode *, std::unordered_set< size_t >> &old_liveouts, const std::unordered_map< const RelAlgNode *, RelAlgNode *> &deconst_mapping, const std::unordered_set< const RelAlgNode *> &intact_nodes, const std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode *>> &du_web, const std::unordered_map< const RelAlgNode *, size_t > &orig_node_sizes)
std::vector< std::unordered_set< size_t > > get_live_ins(const RelAlgNode *node, const std::unordered_map< const RelAlgNode *, std::unordered_set< size_t >> &live_outs)
RetType visitInput(const RexInput *input) const override
void cleanup_dead_nodes(std::vector< std::shared_ptr< RelAlgNode >> &nodes)
RetType visitInput(const RexInput *input) const override
const RelAlgNode * getSourceNode() const
#define CHECK_NE(x, y)
Definition: Logger.h:196
const std::unordered_map< size_t, size_t > & old_to_new_in_idx_
Definition: sqldefs.h:37
RetType visitInput(const RexInput *input) const override
Definition: RexVisitor.h:140
std::pair< std::unordered_map< const RelAlgNode *, std::unordered_map< size_t, size_t > >, std::vector< const RelAlgNode * > > sweep_dead_columns(const std::unordered_map< const RelAlgNode *, std::unordered_set< size_t >> &live_outs, const std::vector< std::shared_ptr< RelAlgNode >> &nodes, const std::unordered_set< const RelAlgNode *> &intact_nodes, const std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode *>> &du_web, const std::unordered_map< const RelAlgNode *, size_t > &orig_node_sizes)
virtual void replaceInput(std::shared_ptr< const RelAlgNode > old_input, std::shared_ptr< const RelAlgNode > input)
RetType aggregateResult(const RetType &aggregate, const RetType &next_result) const override
SQLTypeInfoCore< ArrayContextTypeSizer, ExecutorTypePackaging, DateTimeFacilities > SQLTypeInfo
Definition: sqltypes.h:819
bool any_dead_col_in(const RelAlgNode *node, const std::unordered_set< size_t > &live_outs)
const std::unordered_set< const RelAlgNode * > & intact_nodes_
RexRebindInputsVisitor(const RelAlgNode *old_input, const RelAlgNode *new_input)
virtual T visit(const RexScalar *rex_scalar) const
Definition: RexVisitor.h:27
#define CHECK_LT(x, y)
Definition: Logger.h:197
NullSortedPosition getNullsPosition() const
const RexScalar * getCondition() const
const size_t inputCount() const
RetType visitInput(const RexInput *input) const override
virtual size_t size() const =0
void try_insert_coalesceable_proj(std::vector< std::shared_ptr< RelAlgNode >> &nodes, std::unordered_map< const RelAlgNode *, std::unordered_set< size_t >> &liveouts, std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode *>> &du_web, std::unordered_map< const RelAlgNode *, RelAlgNode *> &deconst_mapping)
RetType visitInput(const RexInput *input) const override
std::vector< std::unique_ptr< const RexAgg > > renumber_rex_aggs(std::vector< std::unique_ptr< const RexAgg >> &agg_exprs, const std::unordered_map< size_t, size_t > &new_numbering)
SortField renumber_sort_field(const SortField &old_field, const std::unordered_map< size_t, size_t > &new_numbering)
bool safe_to_redirect(const RelProject *project, const std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode *>> &du_web)
virtual std::string toString() const =0
#define CHECK(condition)
Definition: Logger.h:187
std::unordered_set< const RexScalar * > sub_conditions_
const RelJoin * join_
const RelAlgNode * getInput(const size_t idx) const
std::string get_field_name(const RelAlgNode *node, size_t index)
void fold_filters(std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
const std::unordered_map< size_t, std::unique_ptr< const RexScalar > > & idx_to_subcond_
bool is_identical_copy(const RelProject *project, const std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode *>> &du_web, const std::unordered_set< const RelProject *> &projects_to_remove, std::unordered_set< const RelProject *> &permutating_projects)
const unsigned old_base_
RexInputRenumberVisitor(const std::unordered_map< const RelAlgNode *, std::unordered_map< size_t, size_t >> &new_numbering)
bool is_distinct(const size_t input_idx, const RelAlgNode *node)
JoinTargetRebaser(const RelJoin *join, const unsigned old_base)
std::unique_ptr< const RexScalar > RetType
Definition: RexVisitor.h:138
void replace_all_usages(std::shared_ptr< const RelAlgNode > old_def_node, std::shared_ptr< const RelAlgNode > new_def_node, std::unordered_map< const RelAlgNode *, std::shared_ptr< RelAlgNode >> &deconst_mapping, std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode *>> &du_web)
RexProjectInputRedirector(const std::unordered_set< const RelProject *> &crt_inputs)
void propagate_rex_input_renumber(const RelFilter *excluded_root, const std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode *>> &du_web, const std::unordered_map< const RelAlgNode *, RelAlgNode *> &deconst_mapping)
void eliminate_dead_columns(std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
size_t getField() const
RetType visitInput(const RexInput *input) const override