OmniSciDB  1dac507f6e
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
RelAlgOptimizer.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2017 MapD Technologies, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "RelAlgOptimizer.h"
18 #include "RexVisitor.h"
19 #include "Shared/Logger.h"
20 
21 #include <numeric>
22 #include <string>
23 #include <unordered_map>
24 
25 namespace {
26 
28  public:
29  RexProjectInputRedirector(const std::unordered_set<const RelProject*>& crt_inputs)
30  : crt_projects_(crt_inputs) {}
31 
32  RetType visitInput(const RexInput* input) const override {
33  auto source = dynamic_cast<const RelProject*>(input->getSourceNode());
34  if (!source || !crt_projects_.count(source)) {
35  return input->deepCopy();
36  }
37  auto new_source = source->getInput(0);
38  auto new_input =
39  dynamic_cast<const RexInput*>(source->getProjectAt(input->getIndex()));
40  if (!new_input) {
41  return input->deepCopy();
42  }
43  if (auto join = dynamic_cast<const RelJoin*>(new_source)) {
44  CHECK(new_input->getSourceNode() == join->getInput(0) ||
45  new_input->getSourceNode() == join->getInput(1));
46  } else {
47  CHECK_EQ(new_input->getSourceNode(), new_source);
48  }
49  return new_input->deepCopy();
50  }
51 
52  private:
53  const std::unordered_set<const RelProject*>& crt_projects_;
54 };
55 
56 class RexRebindInputsVisitor : public RexVisitor<void*> {
57  public:
58  RexRebindInputsVisitor(const RelAlgNode* old_input, const RelAlgNode* new_input)
59  : old_input_(old_input), new_input_(new_input) {}
60 
61  void* visitInput(const RexInput* rex_input) const override {
62  const auto old_source = rex_input->getSourceNode();
63  if (old_source == old_input_) {
64  rex_input->setSourceNode(new_input_);
65  }
66  return nullptr;
67  };
68 
69  void visitNode(const RelAlgNode* node) const {
70  if (dynamic_cast<const RelAggregate*>(node) || dynamic_cast<const RelSort*>(node)) {
71  return;
72  }
73  if (auto join = dynamic_cast<const RelJoin*>(node)) {
74  if (auto condition = join->getCondition()) {
75  visit(condition);
76  }
77  return;
78  }
79  if (auto project = dynamic_cast<const RelProject*>(node)) {
80  for (size_t i = 0; i < project->size(); ++i) {
81  visit(project->getProjectAt(i));
82  }
83  return;
84  }
85  if (auto filter = dynamic_cast<const RelFilter*>(node)) {
86  visit(filter->getCondition());
87  return;
88  }
89  CHECK(false);
90  }
91 
92  private:
95 };
96 
98  const RelProject* curr_project,
99  const std::unordered_set<const RelProject*>& projects_to_remove) {
100  auto source = curr_project->getInput(0);
101  while (auto filter = dynamic_cast<const RelFilter*>(source)) {
102  source = filter->getInput(0);
103  }
104  if (auto src_project = dynamic_cast<const RelProject*>(source)) {
105  if (projects_to_remove.count(src_project)) {
106  return get_actual_source_size(src_project, projects_to_remove);
107  }
108  }
109  return curr_project->getInput(0)->size();
110 }
111 
113  const RelProject* project,
114  const std::unordered_map<const RelAlgNode*, std::unordered_set<const RelAlgNode*>>&
115  du_web) {
116  if (!project->isSimple()) {
117  return false;
118  }
119  auto usrs_it = du_web.find(project);
120  CHECK(usrs_it != du_web.end());
121  for (auto usr : usrs_it->second) {
122  if (!dynamic_cast<const RelProject*>(usr) && !dynamic_cast<const RelFilter*>(usr)) {
123  return false;
124  }
125  }
126  return true;
127 }
128 
130  const RelProject* project,
131  const std::unordered_map<const RelAlgNode*, std::unordered_set<const RelAlgNode*>>&
132  du_web,
133  const std::unordered_set<const RelProject*>& projects_to_remove,
134  std::unordered_set<const RelProject*>& permutating_projects) {
135  auto source_size = get_actual_source_size(project, projects_to_remove);
136  if (project->size() > source_size) {
137  return false;
138  }
139 
140  if (project->size() < source_size) {
141  auto usrs_it = du_web.find(project);
142  CHECK(usrs_it != du_web.end());
143  bool guard_found = false;
144  while (usrs_it->second.size() == size_t(1)) {
145  auto only_usr = *usrs_it->second.begin();
146  if (dynamic_cast<const RelProject*>(only_usr)) {
147  guard_found = true;
148  break;
149  }
150  if (dynamic_cast<const RelAggregate*>(only_usr) ||
151  dynamic_cast<const RelSort*>(only_usr) ||
152  dynamic_cast<const RelJoin*>(only_usr) ||
153  dynamic_cast<const RelTableFunction*>(only_usr)) {
154  return false;
155  }
156  CHECK(dynamic_cast<const RelFilter*>(only_usr));
157  usrs_it = du_web.find(only_usr);
158  CHECK(usrs_it != du_web.end());
159  }
160 
161  if (!guard_found) {
162  return false;
163  }
164  }
165 
166  bool identical = true;
167  for (size_t i = 0; i < project->size(); ++i) {
168  auto target = dynamic_cast<const RexInput*>(project->getProjectAt(i));
169  CHECK(target);
170  if (i != target->getIndex()) {
171  identical = false;
172  break;
173  }
174  }
175 
176  if (identical) {
177  return true;
178  }
179 
180  if (safe_to_redirect(project, du_web)) {
181  permutating_projects.insert(project);
182  return true;
183  }
184 
185  return false;
186 }
187 
189  const RelFilter* excluded_root,
190  const std::unordered_map<const RelAlgNode*, std::unordered_set<const RelAlgNode*>>&
191  du_web) {
192  CHECK(excluded_root);
193  auto src_project = dynamic_cast<const RelProject*>(excluded_root->getInput(0));
194  CHECK(src_project && src_project->isSimple());
195  const auto indirect_join_src = dynamic_cast<const RelJoin*>(src_project->getInput(0));
196  std::unordered_map<size_t, size_t> old_to_new_idx;
197  for (size_t i = 0; i < src_project->size(); ++i) {
198  auto rex_in = dynamic_cast<const RexInput*>(src_project->getProjectAt(i));
199  CHECK(rex_in);
200  size_t src_base = 0;
201  if (indirect_join_src != nullptr &&
202  indirect_join_src->getInput(1) == rex_in->getSourceNode()) {
203  src_base = indirect_join_src->getInput(0)->size();
204  }
205  old_to_new_idx.insert(std::make_pair(i, src_base + rex_in->getIndex()));
206  old_to_new_idx.insert(std::make_pair(i, rex_in->getIndex()));
207  }
208  CHECK(old_to_new_idx.size());
209  RexInputRenumber<false> renumber(old_to_new_idx);
210  auto usrs_it = du_web.find(excluded_root);
211  CHECK(usrs_it != du_web.end());
212  std::vector<const RelAlgNode*> work_set(usrs_it->second.begin(), usrs_it->second.end());
213  while (!work_set.empty()) {
214  auto node = work_set.back();
215  work_set.pop_back();
216  auto modified_node = const_cast<RelAlgNode*>(node);
217  if (auto filter = dynamic_cast<RelFilter*>(modified_node)) {
218  auto new_condition = renumber.visit(filter->getCondition());
219  filter->setCondition(new_condition);
220  auto usrs_it = du_web.find(filter);
221  CHECK(usrs_it != du_web.end() && usrs_it->second.size() == 1);
222  work_set.push_back(*usrs_it->second.begin());
223  continue;
224  }
225  if (auto project = dynamic_cast<RelProject*>(modified_node)) {
226  std::vector<std::unique_ptr<const RexScalar>> new_exprs;
227  for (size_t i = 0; i < project->size(); ++i) {
228  new_exprs.push_back(renumber.visit(project->getProjectAt(i)));
229  }
230  project->setExpressions(new_exprs);
231  continue;
232  }
233  CHECK(false);
234  }
235 }
236 
238  std::shared_ptr<RelAlgNode> node,
239  const std::unordered_set<const RelProject*>& projects,
240  const std::unordered_set<const RelProject*>& permutating_projects,
241  const std::unordered_map<const RelAlgNode*, std::unordered_set<const RelAlgNode*>>&
242  du_web) {
243  std::shared_ptr<const RelProject> src_project = nullptr;
244  for (size_t i = 0; i < node->inputCount(); ++i) {
245  if (auto project =
246  std::dynamic_pointer_cast<const RelProject>(node->getAndOwnInput(i))) {
247  if (projects.count(project.get())) {
248  src_project = project;
249  break;
250  }
251  }
252  }
253  if (!src_project) {
254  return;
255  }
256  if (auto join = std::dynamic_pointer_cast<RelJoin>(node)) {
257  auto other_project =
258  src_project == node->getAndOwnInput(0)
259  ? std::dynamic_pointer_cast<const RelProject>(node->getAndOwnInput(1))
260  : std::dynamic_pointer_cast<const RelProject>(node->getAndOwnInput(0));
261  join->replaceInput(src_project, src_project->getAndOwnInput(0));
262  RexRebindInputsVisitor rebinder(src_project.get(), src_project->getInput(0));
263  auto usrs_it = du_web.find(join.get());
264  CHECK(usrs_it != du_web.end());
265  for (auto usr : usrs_it->second) {
266  rebinder.visitNode(usr);
267  }
268 
269  if (other_project && projects.count(other_project.get())) {
270  join->replaceInput(other_project, other_project->getAndOwnInput(0));
271  RexRebindInputsVisitor other_rebinder(other_project.get(),
272  other_project->getInput(0));
273  for (auto usr : usrs_it->second) {
274  other_rebinder.visitNode(usr);
275  }
276  }
277  return;
278  }
279  if (auto project = std::dynamic_pointer_cast<RelProject>(node)) {
280  project->RelAlgNode::replaceInput(src_project, src_project->getAndOwnInput(0));
281  RexProjectInputRedirector redirector(projects);
282  std::vector<std::unique_ptr<const RexScalar>> new_exprs;
283  for (size_t i = 0; i < project->size(); ++i) {
284  new_exprs.push_back(redirector.visit(project->getProjectAt(i)));
285  }
286  project->setExpressions(new_exprs);
287  return;
288  }
289  if (auto filter = std::dynamic_pointer_cast<RelFilter>(node)) {
290  const bool is_permutating_proj = permutating_projects.count(src_project.get());
291  if (is_permutating_proj || dynamic_cast<const RelJoin*>(src_project->getInput(0))) {
292  if (is_permutating_proj) {
293  propagate_rex_input_renumber(filter.get(), du_web);
294  }
295  filter->RelAlgNode::replaceInput(src_project, src_project->getAndOwnInput(0));
296  RexProjectInputRedirector redirector(projects);
297  auto new_condition = redirector.visit(filter->getCondition());
298  filter->setCondition(new_condition);
299  } else {
300  filter->replaceInput(src_project, src_project->getAndOwnInput(0));
301  }
302  return;
303  }
304  if (std::dynamic_pointer_cast<RelSort>(node) &&
305  dynamic_cast<const RelScan*>(src_project->getInput(0))) {
306  return;
307  }
308  if (std::dynamic_pointer_cast<RelModify>(node)) {
309  return; // NOTE: Review this. Not sure about this.
310  }
311  CHECK(std::dynamic_pointer_cast<RelAggregate>(node) ||
312  std::dynamic_pointer_cast<RelSort>(node));
313  node->replaceInput(src_project, src_project->getAndOwnInput(0));
314 }
315 
316 void cleanup_dead_nodes(std::vector<std::shared_ptr<RelAlgNode>>& nodes) {
317  for (auto nodeIt = nodes.rbegin(); nodeIt != nodes.rend(); ++nodeIt) {
318  if (nodeIt->unique()) {
319  LOG(INFO) << "ID=" << (*nodeIt)->getId() << " " << (*nodeIt)->toString()
320  << " deleted!";
321  nodeIt->reset();
322  }
323  }
324 
325  std::vector<std::shared_ptr<RelAlgNode>> new_nodes;
326  for (auto node : nodes) {
327  if (!node) {
328  continue;
329  }
330  new_nodes.push_back(node);
331  }
332  nodes.swap(new_nodes);
333 }
334 
335 std::unordered_set<const RelProject*> get_visible_projects(const RelAlgNode* root) {
336  if (auto project = dynamic_cast<const RelProject*>(root)) {
337  return {project};
338  }
339 
340  if (dynamic_cast<const RelAggregate*>(root) || dynamic_cast<const RelScan*>(root) ||
341  dynamic_cast<const RelLogicalValues*>(root) ||
342  dynamic_cast<const RelModify*>(root)) {
343  return std::unordered_set<const RelProject*>{};
344  }
345 
346  if (auto join = dynamic_cast<const RelJoin*>(root)) {
347  auto lhs_projs = get_visible_projects(join->getInput(0));
348  auto rhs_projs = get_visible_projects(join->getInput(1));
349  lhs_projs.insert(rhs_projs.begin(), rhs_projs.end());
350  return lhs_projs;
351  }
352 
353  CHECK(dynamic_cast<const RelFilter*>(root) || dynamic_cast<const RelSort*>(root));
354  return get_visible_projects(root->getInput(0));
355 }
356 
357 // TODO(miyu): checking this at runtime is more accurate
358 bool is_distinct(const size_t input_idx, const RelAlgNode* node) {
359  if (dynamic_cast<const RelFilter*>(node) || dynamic_cast<const RelSort*>(node)) {
360  CHECK_EQ(size_t(1), node->inputCount());
361  return is_distinct(input_idx, node->getInput(0));
362  }
363  if (auto aggregate = dynamic_cast<const RelAggregate*>(node)) {
364  CHECK_EQ(size_t(1), node->inputCount());
365  if (aggregate->getGroupByCount() == 1 && !input_idx) {
366  return true;
367  }
368  if (input_idx < aggregate->getGroupByCount()) {
369  return is_distinct(input_idx, node->getInput(0));
370  }
371  return false;
372  }
373  if (auto project = dynamic_cast<const RelProject*>(node)) {
374  CHECK_LT(input_idx, project->size());
375  if (auto input = dynamic_cast<const RexInput*>(project->getProjectAt(input_idx))) {
376  CHECK_EQ(size_t(1), node->inputCount());
377  return is_distinct(input->getIndex(), project->getInput(0));
378  }
379  return false;
380  }
381  CHECK(dynamic_cast<const RelJoin*>(node) || dynamic_cast<const RelScan*>(node));
382  return false;
383 }
384 
385 } // namespace
386 
387 std::unordered_map<const RelAlgNode*, std::unordered_set<const RelAlgNode*>> build_du_web(
388  const std::vector<std::shared_ptr<RelAlgNode>>& nodes) noexcept {
389  std::unordered_map<const RelAlgNode*, std::unordered_set<const RelAlgNode*>> web;
390  std::unordered_set<const RelAlgNode*> visited;
391  std::vector<const RelAlgNode*> work_set;
392  for (auto node : nodes) {
393  if (std::dynamic_pointer_cast<RelScan>(node) ||
394  std::dynamic_pointer_cast<RelModify>(node) || visited.count(node.get())) {
395  continue;
396  }
397  work_set.push_back(node.get());
398  while (!work_set.empty()) {
399  auto walker = work_set.back();
400  work_set.pop_back();
401  if (visited.count(walker)) {
402  continue;
403  }
404  CHECK(!web.count(walker));
405  auto it_ok =
406  web.insert(std::make_pair(walker, std::unordered_set<const RelAlgNode*>{}));
407  CHECK(it_ok.second);
408  visited.insert(walker);
409  const auto join = dynamic_cast<const RelJoin*>(walker);
410  const auto project = dynamic_cast<const RelProject*>(walker);
411  const auto aggregate = dynamic_cast<const RelAggregate*>(walker);
412  const auto filter = dynamic_cast<const RelFilter*>(walker);
413  const auto sort = dynamic_cast<const RelSort*>(walker);
414  const auto left_deep_join = dynamic_cast<const RelLeftDeepInnerJoin*>(walker);
415  const auto logical_values = dynamic_cast<const RelLogicalValues*>(walker);
416  const auto table_func = dynamic_cast<const RelTableFunction*>(walker);
417  CHECK(join || project || aggregate || filter || sort || left_deep_join ||
418  logical_values || table_func);
419  for (size_t i = 0; i < walker->inputCount(); ++i) {
420  auto src = walker->getInput(i);
421  if (dynamic_cast<const RelScan*>(src) || dynamic_cast<const RelModify*>(src)) {
422  continue;
423  }
424  if (web.empty() || !web.count(src)) {
425  web.insert(std::make_pair(src, std::unordered_set<const RelAlgNode*>{}));
426  }
427  web[src].insert(walker);
428  work_set.push_back(src);
429  }
430  }
431  }
432  return web;
433 }
434 
435 // For now, the only target to eliminate is restricted to project-aggregate pair between
436 // scan/sort and join
437 // TODO(miyu): allow more chance if proved safe
438 void eliminate_identical_copy(std::vector<std::shared_ptr<RelAlgNode>>& nodes) noexcept {
439  std::unordered_set<std::shared_ptr<const RelAlgNode>> copies;
440  auto sink = nodes.back();
441  for (auto node : nodes) {
442  auto aggregate = std::dynamic_pointer_cast<const RelAggregate>(node);
443  if (!aggregate || aggregate == sink ||
444  !(aggregate->getGroupByCount() == 1 && aggregate->getAggExprsCount() == 0)) {
445  continue;
446  }
447  auto project =
448  std::dynamic_pointer_cast<const RelProject>(aggregate->getAndOwnInput(0));
449  if (project && project->size() == aggregate->size() &&
450  project->getFields() == aggregate->getFields()) {
451  CHECK_EQ(size_t(0), copies.count(aggregate));
452  copies.insert(aggregate);
453  }
454  }
455  for (auto node : nodes) {
456  if (!node->inputCount()) {
457  continue;
458  }
459  auto last_source = node->getAndOwnInput(node->inputCount() - 1);
460  if (!copies.count(last_source)) {
461  continue;
462  }
463  auto aggregate = std::dynamic_pointer_cast<const RelAggregate>(last_source);
464  CHECK(aggregate);
465  if (!std::dynamic_pointer_cast<const RelJoin>(node) || aggregate->size() != 1) {
466  continue;
467  }
468  auto project =
469  std::dynamic_pointer_cast<const RelProject>(aggregate->getAndOwnInput(0));
470  CHECK(project);
471  CHECK_EQ(size_t(1), project->size());
472  if (!is_distinct(size_t(0), project.get())) {
473  continue;
474  }
475  auto new_source = project->getAndOwnInput(0);
476  if (std::dynamic_pointer_cast<const RelSort>(new_source) ||
477  std::dynamic_pointer_cast<const RelScan>(new_source)) {
478  node->replaceInput(last_source, new_source);
479  }
480  }
481  decltype(copies)().swap(copies);
482 
483  auto web = build_du_web(nodes);
484 
485  std::unordered_set<const RelProject*> projects;
486  std::unordered_set<const RelProject*> permutating_projects;
487  auto visible_projs = get_visible_projects(nodes.back().get());
488  for (auto node : nodes) {
489  auto project = std::dynamic_pointer_cast<RelProject>(node);
490  if (project && project->isSimple() &&
491  (!visible_projs.count(project.get()) || !project->isRenaming()) &&
492  is_identical_copy(project.get(), web, projects, permutating_projects)) {
493  projects.insert(project.get());
494  }
495  }
496 
497  for (auto node : nodes) {
498  redirect_inputs_of(node, projects, permutating_projects, web);
499  }
500 
501  cleanup_dead_nodes(nodes);
502 }
503 
504 namespace {
505 
506 class RexInputCollector : public RexVisitor<std::unordered_set<RexInput>> {
507  private:
509 
510  protected:
511  using RetType = std::unordered_set<RexInput>;
512  RetType aggregateResult(const RetType& aggregate,
513  const RetType& next_result) const override {
514  RetType result(aggregate.begin(), aggregate.end());
515  result.insert(next_result.begin(), next_result.end());
516  return result;
517  }
518 
519  public:
520  RexInputCollector(const RelAlgNode* node) : node_(node) {}
521  RetType visitInput(const RexInput* input) const override {
522  RetType result;
523  if (node_->inputCount() == 1) {
524  auto src = node_->getInput(0);
525  if (auto join = dynamic_cast<const RelJoin*>(src)) {
526  CHECK_EQ(join->inputCount(), size_t(2));
527  const auto src2_in_offset = join->getInput(0)->size();
528  if (input->getSourceNode() == join->getInput(1)) {
529  result.emplace(src, input->getIndex() + src2_in_offset);
530  } else {
531  result.emplace(src, input->getIndex());
532  }
533  return result;
534  }
535  }
536  result.insert(*input);
537  return result;
538  }
539 };
540 
542  CHECK(node->size());
543  RexInputCollector collector(node);
544  if (auto filter = dynamic_cast<const RelFilter*>(node)) {
545  auto rex_ins = collector.visit(filter->getCondition());
546  if (!rex_ins.empty()) {
547  return static_cast<size_t>(rex_ins.begin()->getIndex());
548  }
549  return pick_always_live_col_idx(filter->getInput(0));
550  } else if (auto join = dynamic_cast<const RelJoin*>(node)) {
551  auto rex_ins = collector.visit(join->getCondition());
552  if (!rex_ins.empty()) {
553  return static_cast<size_t>(rex_ins.begin()->getIndex());
554  }
555  if (auto lhs_idx = pick_always_live_col_idx(join->getInput(0))) {
556  return lhs_idx;
557  }
558  if (auto rhs_idx = pick_always_live_col_idx(join->getInput(0))) {
559  return rhs_idx + join->getInput(0)->size();
560  }
561  } else if (auto sort = dynamic_cast<const RelSort*>(node)) {
562  if (sort->collationCount()) {
563  return sort->getCollation(0).getField();
564  }
565  return pick_always_live_col_idx(sort->getInput(0));
566  }
567  return size_t(0);
568 }
569 
570 std::vector<std::unordered_set<size_t>> get_live_ins(
571  const RelAlgNode* node,
572  const std::unordered_map<const RelAlgNode*, std::unordered_set<size_t>>& live_outs) {
573  if (!node || dynamic_cast<const RelScan*>(node)) {
574  return {};
575  }
576  RexInputCollector collector(node);
577  auto it = live_outs.find(node);
578  CHECK(it != live_outs.end());
579  auto live_out = it->second;
580  if (auto project = dynamic_cast<const RelProject*>(node)) {
581  CHECK_EQ(size_t(1), project->inputCount());
582  std::unordered_set<size_t> live_in;
583  for (const auto& idx : live_out) {
584  CHECK_LT(idx, project->size());
585  auto partial_in = collector.visit(project->getProjectAt(idx));
586  for (auto rex_in : partial_in) {
587  live_in.insert(rex_in.getIndex());
588  }
589  }
590  if (project->size() == 1 &&
591  dynamic_cast<const RexLiteral*>(project->getProjectAt(0))) {
592  CHECK(live_in.empty());
593  live_in.insert(pick_always_live_col_idx(project->getInput(0)));
594  }
595  return {live_in};
596  }
597  if (auto aggregate = dynamic_cast<const RelAggregate*>(node)) {
598  CHECK_EQ(size_t(1), aggregate->inputCount());
599  const auto group_key_count = static_cast<size_t>(aggregate->getGroupByCount());
600  const auto agg_expr_count = static_cast<size_t>(aggregate->getAggExprsCount());
601  std::unordered_set<size_t> live_in;
602  for (size_t i = 0; i < group_key_count; ++i) {
603  live_in.insert(i);
604  }
605  bool has_count_star_only{false};
606  for (const auto& idx : live_out) {
607  if (idx < group_key_count) {
608  continue;
609  }
610  const auto agg_idx = idx - group_key_count;
611  CHECK_LT(agg_idx, agg_expr_count);
612  const auto& cur_agg_expr = aggregate->getAggExprs()[agg_idx];
613  const auto n_operands = cur_agg_expr->size();
614  for (size_t i = 0; i < n_operands; ++i) {
615  live_in.insert(static_cast<size_t>(cur_agg_expr->getOperand(i)));
616  }
617  if (n_operands == 0) {
618  has_count_star_only = true;
619  }
620  }
621  if (has_count_star_only && !group_key_count) {
622  live_in.insert(size_t(0));
623  }
624  return {live_in};
625  }
626  if (auto join = dynamic_cast<const RelJoin*>(node)) {
627  std::unordered_set<size_t> lhs_live_ins;
628  std::unordered_set<size_t> rhs_live_ins;
629  CHECK_EQ(size_t(2), join->inputCount());
630  auto lhs = join->getInput(0);
631  auto rhs = join->getInput(1);
632  const auto rhs_idx_base = lhs->size();
633  for (const auto idx : live_out) {
634  if (idx < rhs_idx_base) {
635  lhs_live_ins.insert(idx);
636  } else {
637  rhs_live_ins.insert(idx - rhs_idx_base);
638  }
639  }
640  auto rex_ins = collector.visit(join->getCondition());
641  for (const auto& rex_in : rex_ins) {
642  const auto in_idx = static_cast<size_t>(rex_in.getIndex());
643  if (rex_in.getSourceNode() == lhs) {
644  lhs_live_ins.insert(in_idx);
645  continue;
646  }
647  if (rex_in.getSourceNode() == rhs) {
648  rhs_live_ins.insert(in_idx);
649  continue;
650  }
651  CHECK(false);
652  }
653  return {lhs_live_ins, rhs_live_ins};
654  }
655  if (auto sort = dynamic_cast<const RelSort*>(node)) {
656  CHECK_EQ(size_t(1), sort->inputCount());
657  std::unordered_set<size_t> live_in(live_out.begin(), live_out.end());
658  for (size_t i = 0; i < sort->collationCount(); ++i) {
659  live_in.insert(sort->getCollation(i).getField());
660  }
661  return {live_in};
662  }
663  if (auto filter = dynamic_cast<const RelFilter*>(node)) {
664  CHECK_EQ(size_t(1), filter->inputCount());
665  std::unordered_set<size_t> live_in(live_out.begin(), live_out.end());
666  auto rex_ins = collector.visit(filter->getCondition());
667  for (const auto& rex_in : rex_ins) {
668  live_in.insert(static_cast<size_t>(rex_in.getIndex()));
669  }
670  return {live_in};
671  }
672  if (auto table_func = dynamic_cast<const RelTableFunction*>(node)) {
673  CHECK_EQ(size_t(1), table_func->inputCount());
674 
675  const auto input_count = table_func->size();
676  std::unordered_set<size_t> live_in;
677  for (size_t i = 0; i < input_count; i++) {
678  live_in.insert(i);
679  }
680  return {live_in};
681  }
682  return {};
683 }
684 
685 bool any_dead_col_in(const RelAlgNode* node,
686  const std::unordered_set<size_t>& live_outs) {
687  CHECK(!dynamic_cast<const RelScan*>(node));
688  if (auto aggregate = dynamic_cast<const RelAggregate*>(node)) {
689  for (size_t i = aggregate->getGroupByCount(); i < aggregate->size(); ++i) {
690  if (!live_outs.count(i)) {
691  return true;
692  }
693  }
694  return false;
695  }
696 
697  return node->size() > live_outs.size();
698 }
699 
700 bool does_redef_cols(const RelAlgNode* node) {
701  return dynamic_cast<const RelAggregate*>(node) || dynamic_cast<const RelProject*>(node);
702 }
703 
705  public:
707  const std::unordered_map<const RelAlgNode*, std::unordered_map<size_t, size_t>>&
708  liveouts,
709  const std::unordered_set<const RelAlgNode*>& intact_nodes)
710  : liveouts_(liveouts), intact_nodes_(intact_nodes) {}
711 
712  bool hasAllSrcReady(const RelAlgNode* node) const {
713  for (size_t i = 0; i < node->inputCount(); ++i) {
714  auto src = node->getInput(i);
715  if (!dynamic_cast<const RelScan*>(src) && liveouts_.find(src) == liveouts_.end() &&
716  !intact_nodes_.count(src)) {
717  return false;
718  }
719  }
720  return true;
721  }
722 
723  private:
724  const std::unordered_map<const RelAlgNode*, std::unordered_map<size_t, size_t>>&
726  const std::unordered_set<const RelAlgNode*>& intact_nodes_;
727 };
728 
730  const RelAlgNode* node,
731  std::unordered_map<const RelAlgNode*, std::unordered_map<size_t, size_t>>&
732  new_liveouts,
733  const std::unordered_set<size_t>& old_liveouts,
734  const std::unordered_set<const RelAlgNode*>& intact_nodes,
735  const std::unordered_map<const RelAlgNode*, size_t>& orig_node_sizes) {
736  auto live_fields = old_liveouts;
737  if (auto aggregate = dynamic_cast<const RelAggregate*>(node)) {
738  for (size_t i = 0; i < aggregate->getGroupByCount(); ++i) {
739  live_fields.insert(i);
740  }
741  }
742  auto it_ok =
743  new_liveouts.insert(std::make_pair(node, std::unordered_map<size_t, size_t>{}));
744  CHECK(it_ok.second);
745  auto& new_indices = it_ok.first->second;
746  if (intact_nodes.count(node)) {
747  for (size_t i = 0, e = node->size(); i < e; ++i) {
748  new_indices.insert(std::make_pair(i, i));
749  }
750  return;
751  }
752  if (does_redef_cols(node)) {
753  auto node_sz_it = orig_node_sizes.find(node);
754  CHECK(node_sz_it != orig_node_sizes.end());
755  const auto node_size = node_sz_it->second;
756  CHECK_GT(node_size, live_fields.size());
757  LOG(INFO) << node->toString() << " eliminated " << node_size - live_fields.size()
758  << " columns.";
759  std::vector<size_t> ordered_indices(live_fields.begin(), live_fields.end());
760  std::sort(ordered_indices.begin(), ordered_indices.end());
761  for (size_t i = 0; i < ordered_indices.size(); ++i) {
762  new_indices.insert(std::make_pair(ordered_indices[i], i));
763  }
764  return;
765  }
766  std::vector<size_t> ordered_indices;
767  for (size_t i = 0, old_base = 0, new_base = 0; i < node->inputCount(); ++i) {
768  auto src = node->getInput(i);
769  auto src_renum_it = new_liveouts.find(src);
770  if (src_renum_it != new_liveouts.end()) {
771  for (auto m : src_renum_it->second) {
772  new_indices.insert(std::make_pair(old_base + m.first, new_base + m.second));
773  }
774  new_base += src_renum_it->second.size();
775  } else if (dynamic_cast<const RelScan*>(src) || intact_nodes.count(src)) {
776  for (size_t i = 0; i < src->size(); ++i) {
777  new_indices.insert(std::make_pair(old_base + i, new_base + i));
778  }
779  new_base += src->size();
780  } else {
781  CHECK(false);
782  }
783  auto src_sz_it = orig_node_sizes.find(src);
784  CHECK(src_sz_it != orig_node_sizes.end());
785  old_base += src_sz_it->second;
786  }
787 }
788 
790  public:
792  const std::unordered_map<const RelAlgNode*, std::unordered_map<size_t, size_t>>&
793  new_numbering)
794  : node_to_input_renum_(new_numbering) {}
795  RetType visitInput(const RexInput* input) const override {
796  auto source = input->getSourceNode();
797  auto node_it = node_to_input_renum_.find(source);
798  if (node_it != node_to_input_renum_.end()) {
799  auto old_to_new_num = node_it->second;
800  auto renum_it = old_to_new_num.find(input->getIndex());
801  CHECK(renum_it != old_to_new_num.end());
802  return boost::make_unique<RexInput>(source, renum_it->second);
803  }
804  return input->deepCopy();
805  }
806 
807  private:
808  const std::unordered_map<const RelAlgNode*, std::unordered_map<size_t, size_t>>&
810 };
811 
812 std::vector<std::unique_ptr<const RexAgg>> renumber_rex_aggs(
813  std::vector<std::unique_ptr<const RexAgg>>& agg_exprs,
814  const std::unordered_map<size_t, size_t>& new_numbering) {
815  std::vector<std::unique_ptr<const RexAgg>> new_exprs;
816  for (auto& expr : agg_exprs) {
817  if (expr->size() >= 1) {
818  auto old_idx = expr->getOperand(0);
819  auto idx_it = new_numbering.find(old_idx);
820  if (idx_it != new_numbering.end()) {
821  std::vector<size_t> operands;
822  operands.push_back(idx_it->second);
823  if (expr->size() == 2) {
824  operands.push_back(expr->getOperand(1));
825  }
826  new_exprs.push_back(boost::make_unique<RexAgg>(
827  expr->getKind(), expr->isDistinct(), expr->getType(), operands));
828  continue;
829  }
830  }
831  new_exprs.push_back(std::move(expr));
832  }
833  return new_exprs;
834 }
835 
837  const std::unordered_map<size_t, size_t>& new_numbering) {
838  auto field_idx = old_field.getField();
839  auto idx_it = new_numbering.find(field_idx);
840  if (idx_it != new_numbering.end()) {
841  field_idx = idx_it->second;
842  }
843  return SortField(field_idx, old_field.getSortDir(), old_field.getNullsPosition());
844 }
845 
846 std::unordered_map<const RelAlgNode*, std::unordered_set<size_t>> mark_live_columns(
847  std::vector<std::shared_ptr<RelAlgNode>>& nodes) {
848  std::unordered_map<const RelAlgNode*, std::unordered_set<size_t>> live_outs;
849  std::vector<const RelAlgNode*> work_set;
850  for (auto node_it = nodes.rbegin(); node_it != nodes.rend(); ++node_it) {
851  auto node = node_it->get();
852  if (dynamic_cast<const RelScan*>(node) || live_outs.count(node) ||
853  dynamic_cast<const RelModify*>(node)) {
854  continue;
855  }
856  std::vector<size_t> all_live(node->size());
857  std::iota(all_live.begin(), all_live.end(), size_t(0));
858  live_outs.insert(std::make_pair(
859  node, std::unordered_set<size_t>(all_live.begin(), all_live.end())));
860  work_set.push_back(node);
861  while (!work_set.empty()) {
862  auto walker = work_set.back();
863  work_set.pop_back();
864  CHECK(!dynamic_cast<const RelScan*>(walker));
865  CHECK(live_outs.count(walker));
866  auto live_ins = get_live_ins(walker, live_outs);
867  CHECK_EQ(live_ins.size(), walker->inputCount());
868  for (size_t i = 0; i < walker->inputCount(); ++i) {
869  auto src = walker->getInput(i);
870  if (dynamic_cast<const RelScan*>(src) || live_ins[i].empty()) {
871  continue;
872  }
873  if (!live_outs.count(src)) {
874  live_outs.insert(std::make_pair(src, std::unordered_set<size_t>{}));
875  }
876  auto src_it = live_outs.find(src);
877  CHECK(src_it != live_outs.end());
878  auto& live_out = src_it->second;
879  bool changed = false;
880  if (!live_out.empty()) {
881  live_out.insert(live_ins[i].begin(), live_ins[i].end());
882  changed = true;
883  } else {
884  for (int idx : live_ins[i]) {
885  changed |= live_out.insert(idx).second;
886  }
887  }
888  if (changed) {
889  work_set.push_back(src);
890  }
891  }
892  }
893  }
894  return live_outs;
895 }
896 
897 std::string get_field_name(const RelAlgNode* node, size_t index) {
898  CHECK_LT(index, node->size());
899  if (auto scan = dynamic_cast<const RelScan*>(node)) {
900  return scan->getFieldName(index);
901  }
902  if (auto aggregate = dynamic_cast<const RelAggregate*>(node)) {
903  CHECK_EQ(aggregate->size(), aggregate->getFields().size());
904  return aggregate->getFieldName(index);
905  }
906  if (auto join = dynamic_cast<const RelJoin*>(node)) {
907  const auto lhs_size = join->getInput(0)->size();
908  if (index < lhs_size) {
909  return get_field_name(join->getInput(0), index);
910  }
911  return get_field_name(join->getInput(1), index - lhs_size);
912  }
913  if (auto project = dynamic_cast<const RelProject*>(node)) {
914  return project->getFieldName(index);
915  }
916  CHECK(dynamic_cast<const RelSort*>(node) || dynamic_cast<const RelFilter*>(node));
917  return get_field_name(node->getInput(0), index);
918 }
919 
921  std::vector<std::shared_ptr<RelAlgNode>>& nodes,
922  std::unordered_map<const RelAlgNode*, std::unordered_set<size_t>>& liveouts,
923  std::unordered_map<const RelAlgNode*, std::unordered_set<const RelAlgNode*>>&
924  du_web) {
925  std::vector<std::shared_ptr<RelAlgNode>> new_nodes;
926  for (auto node : nodes) {
927  new_nodes.push_back(node);
928  if (!std::dynamic_pointer_cast<RelFilter>(node)) {
929  continue;
930  }
931  const auto filter = node.get();
932  auto liveout_it = liveouts.find(filter);
933  CHECK(liveout_it != liveouts.end());
934  auto& outs = liveout_it->second;
935  if (!any_dead_col_in(filter, outs)) {
936  continue;
937  }
938  auto usrs_it = du_web.find(filter);
939  CHECK(usrs_it != du_web.end());
940  auto& usrs = usrs_it->second;
941  if (usrs.size() != 1 || does_redef_cols(*usrs.begin())) {
942  continue;
943  }
944  auto only_usr = const_cast<RelAlgNode*>(*usrs.begin());
945 
946  std::vector<std::unique_ptr<const RexScalar>> exprs;
947  std::vector<std::string> fields;
948  for (size_t i = 0; i < filter->size(); ++i) {
949  exprs.push_back(boost::make_unique<RexInput>(filter, i));
950  fields.push_back(get_field_name(filter, i));
951  }
952  auto project_owner = std::make_shared<RelProject>(exprs, fields, node);
953  auto project = project_owner.get();
954 
955  only_usr->replaceInput(node, project_owner);
956  if (dynamic_cast<const RelJoin*>(only_usr)) {
957  RexRebindInputsVisitor visitor(filter, project);
958  for (auto usr : du_web[only_usr]) {
959  visitor.visitNode(usr);
960  }
961  }
962 
963  liveouts.insert(std::make_pair(project, outs));
964 
965  usrs.clear();
966  usrs.insert(project);
967  du_web.insert(
968  std::make_pair(project, std::unordered_set<const RelAlgNode*>{only_usr}));
969 
970  new_nodes.push_back(project_owner);
971  }
972  if (new_nodes.size() > nodes.size()) {
973  nodes.swap(new_nodes);
974  }
975 }
976 
977 std::pair<std::unordered_map<const RelAlgNode*, std::unordered_map<size_t, size_t>>,
978  std::vector<const RelAlgNode*>>
980  const std::unordered_map<const RelAlgNode*, std::unordered_set<size_t>>& live_outs,
981  const std::vector<std::shared_ptr<RelAlgNode>>& nodes,
982  const std::unordered_set<const RelAlgNode*>& intact_nodes,
983  const std::unordered_map<const RelAlgNode*, std::unordered_set<const RelAlgNode*>>&
984  du_web,
985  const std::unordered_map<const RelAlgNode*, size_t>& orig_node_sizes) {
986  std::unordered_map<const RelAlgNode*, std::unordered_map<size_t, size_t>>
987  liveouts_renumbering;
988  std::vector<const RelAlgNode*> ready_nodes;
989  AvailabilityChecker checker(liveouts_renumbering, intact_nodes);
990  for (auto node : nodes) {
991  // Ignore empty live_out due to some invalid node
992  if (!does_redef_cols(node.get()) || intact_nodes.count(node.get())) {
993  continue;
994  }
995  auto live_pair = live_outs.find(node.get());
996  CHECK(live_pair != live_outs.end());
997  auto old_live_outs = live_pair->second;
999  node.get(), liveouts_renumbering, old_live_outs, intact_nodes, orig_node_sizes);
1000  if (auto aggregate = std::dynamic_pointer_cast<RelAggregate>(node)) {
1001  auto old_exprs = aggregate->getAggExprsAndRelease();
1002  std::vector<std::unique_ptr<const RexAgg>> new_exprs;
1003  auto key_name_it = aggregate->getFields().begin();
1004  std::vector<std::string> new_fields(key_name_it,
1005  key_name_it + aggregate->getGroupByCount());
1006  for (size_t i = aggregate->getGroupByCount(), j = 0;
1007  i < aggregate->getFields().size() && j < old_exprs.size();
1008  ++i, ++j) {
1009  if (old_live_outs.count(i)) {
1010  new_exprs.push_back(std::move(old_exprs[j]));
1011  new_fields.push_back(aggregate->getFieldName(i));
1012  }
1013  }
1014  aggregate->setAggExprs(new_exprs);
1015  aggregate->setFields(new_fields);
1016  } else if (auto project = std::dynamic_pointer_cast<RelProject>(node)) {
1017  auto old_exprs = project->getExpressionsAndRelease();
1018  std::vector<std::unique_ptr<const RexScalar>> new_exprs;
1019  std::vector<std::string> new_fields;
1020  for (size_t i = 0; i < old_exprs.size(); ++i) {
1021  if (old_live_outs.count(i)) {
1022  new_exprs.push_back(std::move(old_exprs[i]));
1023  new_fields.push_back(project->getFieldName(i));
1024  }
1025  }
1026  project->setExpressions(new_exprs);
1027  project->setFields(new_fields);
1028  } else {
1029  CHECK(false);
1030  }
1031  auto usrs_it = du_web.find(node.get());
1032  CHECK(usrs_it != du_web.end());
1033  for (auto usr : usrs_it->second) {
1034  if (checker.hasAllSrcReady(usr)) {
1035  ready_nodes.push_back(usr);
1036  }
1037  }
1038  }
1039  return {liveouts_renumbering, ready_nodes};
1040 }
1041 
1043  std::unordered_map<const RelAlgNode*, std::unordered_map<size_t, size_t>>&
1044  liveout_renumbering,
1045  const std::vector<const RelAlgNode*>& ready_nodes,
1046  const std::unordered_map<const RelAlgNode*, std::unordered_set<size_t>>& old_liveouts,
1047  const std::unordered_set<const RelAlgNode*>& intact_nodes,
1048  const std::unordered_map<const RelAlgNode*, std::unordered_set<const RelAlgNode*>>&
1049  du_web,
1050  const std::unordered_map<const RelAlgNode*, size_t>& orig_node_sizes) {
1051  RexInputRenumberVisitor renumberer(liveout_renumbering);
1052  AvailabilityChecker checker(liveout_renumbering, intact_nodes);
1053  std::deque<const RelAlgNode*> work_set(ready_nodes.begin(), ready_nodes.end());
1054  while (!work_set.empty()) {
1055  auto walker = work_set.front();
1056  work_set.pop_front();
1057  CHECK(!dynamic_cast<const RelScan*>(walker));
1058  auto node = const_cast<RelAlgNode*>(walker);
1059  if (auto project = dynamic_cast<RelProject*>(node)) {
1060  auto old_exprs = project->getExpressionsAndRelease();
1061  std::vector<std::unique_ptr<const RexScalar>> new_exprs;
1062  for (auto& expr : old_exprs) {
1063  new_exprs.push_back(renumberer.visit(expr.get()));
1064  }
1065  project->setExpressions(new_exprs);
1066  } else if (auto aggregate = dynamic_cast<RelAggregate*>(node)) {
1067  auto src_it = liveout_renumbering.find(node->getInput(0));
1068  CHECK(src_it != liveout_renumbering.end());
1069  auto old_exprs = aggregate->getAggExprsAndRelease();
1070  auto new_exprs = renumber_rex_aggs(old_exprs, src_it->second);
1071  aggregate->setAggExprs(new_exprs);
1072  } else if (auto join = dynamic_cast<RelJoin*>(node)) {
1073  auto new_condition = renumberer.visit(join->getCondition());
1074  join->setCondition(new_condition);
1075  } else if (auto filter = dynamic_cast<RelFilter*>(node)) {
1076  auto new_condition = renumberer.visit(filter->getCondition());
1077  filter->setCondition(new_condition);
1078  } else if (auto sort = dynamic_cast<RelSort*>(node)) {
1079  auto src_it = liveout_renumbering.find(node->getInput(0));
1080  CHECK(src_it != liveout_renumbering.end());
1081  std::vector<SortField> new_collations;
1082  for (size_t i = 0; i < sort->collationCount(); ++i) {
1083  new_collations.push_back(
1084  renumber_sort_field(sort->getCollation(i), src_it->second));
1085  }
1086  sort->setCollation(std::move(new_collations));
1087  } else {
1088  CHECK(false);
1089  }
1090 
1091  // Ignore empty live_out due to some invalid node
1092  if (does_redef_cols(node) || intact_nodes.count(node)) {
1093  continue;
1094  }
1095  auto live_pair = old_liveouts.find(node);
1096  CHECK(live_pair != old_liveouts.end());
1097  auto live_out = live_pair->second;
1099  node, liveout_renumbering, live_out, intact_nodes, orig_node_sizes);
1100  auto usrs_it = du_web.find(walker);
1101  CHECK(usrs_it != du_web.end());
1102  for (auto usr : usrs_it->second) {
1103  if (checker.hasAllSrcReady(usr)) {
1104  work_set.push_back(usr);
1105  }
1106  }
1107  }
1108 }
1109 
1110 } // namespace
1111 
1112 void eliminate_dead_columns(std::vector<std::shared_ptr<RelAlgNode>>& nodes) noexcept {
1113  if (nodes.empty()) {
1114  return;
1115  }
1116  auto root = nodes.back().get();
1117  if (!root) {
1118  return;
1119  }
1120  CHECK(!dynamic_cast<const RelScan*>(root) && !dynamic_cast<const RelJoin*>(root));
1121  // Mark
1122  auto old_liveouts = mark_live_columns(nodes);
1123  std::unordered_set<const RelAlgNode*> intact_nodes;
1124  bool has_dead_cols = false;
1125  for (auto live_pair : old_liveouts) {
1126  auto node = live_pair.first;
1127  const auto& outs = live_pair.second;
1128  if (outs.empty()) {
1129  LOG(WARNING) << "RA node with no used column: " << node->toString();
1130  // Ignore empty live_out due to some invalid node
1131  intact_nodes.insert(node);
1132  }
1133  if (any_dead_col_in(node, outs)) {
1134  has_dead_cols = true;
1135  } else {
1136  intact_nodes.insert(node);
1137  }
1138  }
1139  if (!has_dead_cols) {
1140  return;
1141  }
1142  auto web = build_du_web(nodes);
1143  try_insert_coalesceable_proj(nodes, old_liveouts, web);
1144 
1145  for (auto node : nodes) {
1146  if (intact_nodes.count(node.get()) || does_redef_cols(node.get())) {
1147  continue;
1148  }
1149  bool intact = true;
1150  for (size_t i = 0; i < node->inputCount(); ++i) {
1151  auto source = node->getInput(i);
1152  if (!dynamic_cast<const RelScan*>(source) && !intact_nodes.count(source)) {
1153  intact = false;
1154  break;
1155  }
1156  }
1157  if (intact) {
1158  intact_nodes.insert(node.get());
1159  }
1160  }
1161 
1162  std::unordered_map<const RelAlgNode*, size_t> orig_node_sizes;
1163  for (auto node : nodes) {
1164  orig_node_sizes.insert(std::make_pair(node.get(), node->size()));
1165  }
1166  // Sweep
1167  std::unordered_map<const RelAlgNode*, std::unordered_map<size_t, size_t>>
1168  liveout_renumbering;
1169  std::vector<const RelAlgNode*> ready_nodes;
1170  std::tie(liveout_renumbering, ready_nodes) =
1171  sweep_dead_columns(old_liveouts, nodes, intact_nodes, web, orig_node_sizes);
1172  // Propagate
1174  liveout_renumbering, ready_nodes, old_liveouts, intact_nodes, web, orig_node_sizes);
1175 }
1176 
1177 namespace {
1178 
1180  public:
1181  RexInputSinker(const std::unordered_map<size_t, size_t>& old_to_new_idx,
1182  const RelAlgNode* new_src)
1183  : old_to_new_in_idx_(old_to_new_idx), target_(new_src) {}
1184 
1185  RetType visitInput(const RexInput* input) const override {
1186  CHECK_EQ(target_->inputCount(), size_t(1));
1187  CHECK_EQ(target_->getInput(0), input->getSourceNode());
1188  auto idx_it = old_to_new_in_idx_.find(input->getIndex());
1189  CHECK(idx_it != old_to_new_in_idx_.end());
1190  return boost::make_unique<RexInput>(target_, idx_it->second);
1191  }
1192 
1193  private:
1194  const std::unordered_map<size_t, size_t>& old_to_new_in_idx_;
1196 };
1197 
1199  public:
1200  SubConditionReplacer(const std::unordered_map<size_t, std::unique_ptr<const RexScalar>>&
1201  idx_to_sub_condition)
1202  : idx_to_subcond_(idx_to_sub_condition) {}
1203  RetType visitInput(const RexInput* input) const override {
1204  auto subcond_it = idx_to_subcond_.find(input->getIndex());
1205  if (subcond_it != idx_to_subcond_.end()) {
1206  return RexDeepCopyVisitor::visit(subcond_it->second.get());
1207  }
1208  return RexDeepCopyVisitor::visitInput(input);
1209  }
1210 
1211  private:
1212  const std::unordered_map<size_t, std::unique_ptr<const RexScalar>>& idx_to_subcond_;
1213 };
1214 
1215 } // namespace
1216 
1218  std::vector<std::shared_ptr<RelAlgNode>>& nodes) noexcept {
1219  auto web = build_du_web(nodes);
1220  auto liveouts = mark_live_columns(nodes);
1221  for (auto node : nodes) {
1222  auto project = std::dynamic_pointer_cast<RelProject>(node);
1223  // TODO(miyu): relax RelScan limitation
1224  if (!project || project->isSimple() ||
1225  !dynamic_cast<const RelScan*>(project->getInput(0))) {
1226  continue;
1227  }
1228  auto usrs_it = web.find(project.get());
1229  CHECK(usrs_it != web.end());
1230  auto& usrs = usrs_it->second;
1231  if (usrs.size() != 1) {
1232  continue;
1233  }
1234  auto join = dynamic_cast<RelJoin*>(const_cast<RelAlgNode*>(*usrs.begin()));
1235  if (!join) {
1236  continue;
1237  }
1238  auto outs_it = liveouts.find(join);
1239  CHECK(outs_it != liveouts.end());
1240  std::unordered_map<size_t, size_t> in_to_out_index;
1241  std::unordered_set<size_t> boolean_expr_indicies;
1242  bool discarded = false;
1243  for (size_t i = 0; i < project->size(); ++i) {
1244  auto oper = dynamic_cast<const RexOperator*>(project->getProjectAt(i));
1245  if (oper && oper->getType().get_type() == kBOOLEAN) {
1246  boolean_expr_indicies.insert(i);
1247  } else {
1248  // TODO(miyu): relax?
1249  if (auto input = dynamic_cast<const RexInput*>(project->getProjectAt(i))) {
1250  in_to_out_index.insert(std::make_pair(input->getIndex(), i));
1251  } else {
1252  discarded = true;
1253  }
1254  }
1255  }
1256  if (discarded || boolean_expr_indicies.empty()) {
1257  continue;
1258  }
1259  const size_t index_base =
1260  join->getInput(0) == project.get() ? 0 : join->getInput(0)->size();
1261  for (auto i : boolean_expr_indicies) {
1262  auto join_idx = index_base + i;
1263  if (outs_it->second.count(join_idx)) {
1264  discarded = true;
1265  break;
1266  }
1267  }
1268  if (discarded) {
1269  continue;
1270  }
1271  RexInputCollector collector(project.get());
1272  std::vector<size_t> unloaded_input_indices;
1273  std::unordered_map<size_t, std::unique_ptr<const RexScalar>> in_idx_to_new_subcond;
1274  // Given all are dead right after join, safe to sink
1275  for (auto i : boolean_expr_indicies) {
1276  auto rex_ins = collector.visit(project->getProjectAt(i));
1277  for (auto& in : rex_ins) {
1278  CHECK_EQ(in.getSourceNode(), project->getInput(0));
1279  if (!in_to_out_index.count(in.getIndex())) {
1280  auto curr_out_index = project->size() + unloaded_input_indices.size();
1281  in_to_out_index.insert(std::make_pair(in.getIndex(), curr_out_index));
1282  unloaded_input_indices.push_back(in.getIndex());
1283  }
1284  RexInputSinker sinker(in_to_out_index, project.get());
1285  in_idx_to_new_subcond.insert(
1286  std::make_pair(i, sinker.visit(project->getProjectAt(i))));
1287  }
1288  }
1289  if (in_idx_to_new_subcond.empty()) {
1290  continue;
1291  }
1292  std::vector<std::unique_ptr<const RexScalar>> new_projections;
1293  for (size_t i = 0; i < project->size(); ++i) {
1294  if (boolean_expr_indicies.count(i)) {
1295  new_projections.push_back(boost::make_unique<RexInput>(project->getInput(0), 0));
1296  } else {
1297  auto rex_input = dynamic_cast<const RexInput*>(project->getProjectAt(i));
1298  CHECK(rex_input != nullptr);
1299  new_projections.push_back(rex_input->deepCopy());
1300  }
1301  }
1302  for (auto i : unloaded_input_indices) {
1303  new_projections.push_back(boost::make_unique<RexInput>(project->getInput(0), i));
1304  }
1305  project->setExpressions(new_projections);
1306 
1307  SubConditionReplacer replacer(in_idx_to_new_subcond);
1308  auto new_condition = replacer.visit(join->getCondition());
1309  join->setCondition(new_condition);
1310  }
1311 }
1312 
1313 namespace {
1314 
1316  public:
1317  RexInputRedirector(const RelAlgNode* old_src, const RelAlgNode* new_src)
1318  : old_src_(old_src), new_src_(new_src) {}
1319 
1320  RetType visitInput(const RexInput* input) const override {
1321  CHECK_EQ(old_src_, input->getSourceNode());
1322  CHECK_NE(old_src_, new_src_);
1323  auto actual_new_src = new_src_;
1324  if (auto join = dynamic_cast<const RelJoin*>(new_src_)) {
1325  actual_new_src = join->getInput(0);
1326  CHECK_EQ(join->inputCount(), size_t(2));
1327  auto src2_input_base = actual_new_src->size();
1328  if (input->getIndex() >= src2_input_base) {
1329  actual_new_src = join->getInput(1);
1330  return boost::make_unique<RexInput>(actual_new_src,
1331  input->getIndex() - src2_input_base);
1332  }
1333  }
1334 
1335  return boost::make_unique<RexInput>(actual_new_src, input->getIndex());
1336  }
1337 
1338  private:
1341 };
1342 
1344  std::shared_ptr<const RelAlgNode> old_def_node,
1345  std::shared_ptr<const RelAlgNode> new_def_node,
1346  std::unordered_map<const RelAlgNode*, std::shared_ptr<RelAlgNode>>& deconst_mapping,
1347  std::unordered_map<const RelAlgNode*, std::unordered_set<const RelAlgNode*>>&
1348  du_web) {
1349  auto usrs_it = du_web.find(old_def_node.get());
1350  RexInputRedirector redirector(new_def_node.get(), old_def_node.get());
1351  CHECK(usrs_it != du_web.end());
1352  for (auto usr : usrs_it->second) {
1353  auto usr_it = deconst_mapping.find(usr);
1354  CHECK(usr_it != deconst_mapping.end());
1355  usr_it->second->replaceInput(old_def_node, new_def_node);
1356  }
1357  auto new_usrs_it = du_web.find(new_def_node.get());
1358  CHECK(new_usrs_it != du_web.end());
1359  new_usrs_it->second.insert(usrs_it->second.begin(), usrs_it->second.end());
1360  usrs_it->second.clear();
1361 }
1362 
1363 } // namespace
1364 
1365 void fold_filters(std::vector<std::shared_ptr<RelAlgNode>>& nodes) noexcept {
1366  std::unordered_map<const RelAlgNode*, std::shared_ptr<RelAlgNode>> deconst_mapping;
1367  for (auto node : nodes) {
1368  deconst_mapping.insert(std::make_pair(node.get(), node));
1369  }
1370 
1371  auto web = build_du_web(nodes);
1372  for (auto node_it = nodes.rbegin(); node_it != nodes.rend(); ++node_it) {
1373  auto& node = *node_it;
1374  if (auto filter = std::dynamic_pointer_cast<RelFilter>(node)) {
1375  CHECK_EQ(filter->inputCount(), size_t(1));
1376  auto src_filter = dynamic_cast<const RelFilter*>(filter->getInput(0));
1377  if (!src_filter) {
1378  continue;
1379  }
1380  auto siblings_it = web.find(src_filter);
1381  if (siblings_it == web.end() || siblings_it->second.size() != size_t(1)) {
1382  continue;
1383  }
1384  auto src_it = deconst_mapping.find(src_filter);
1385  CHECK(src_it != deconst_mapping.end());
1386  auto folded_filter = std::dynamic_pointer_cast<RelFilter>(src_it->second);
1387  CHECK(folded_filter);
1388  // TODO(miyu) : drop filter w/ only expression valued constant TRUE?
1389  if (auto rex_operator = dynamic_cast<const RexOperator*>(filter->getCondition())) {
1390  LOG(INFO) << "ID=" << filter->getId() << " " << filter->toString()
1391  << " folded into "
1392  << "ID=" << folded_filter->getId() << " " << folded_filter->toString()
1393  << std::endl;
1394  std::vector<std::unique_ptr<const RexScalar>> operands;
1395  operands.emplace_back(folded_filter->getAndReleaseCondition());
1396  auto old_condition = dynamic_cast<const RexOperator*>(operands.back().get());
1397  CHECK(old_condition && old_condition->getType().get_type() == kBOOLEAN);
1398  RexInputRedirector redirector(folded_filter.get(), folded_filter->getInput(0));
1399  operands.push_back(redirector.visit(rex_operator));
1400  auto other_condition = dynamic_cast<const RexOperator*>(operands.back().get());
1401  CHECK(other_condition && other_condition->getType().get_type() == kBOOLEAN);
1402  const bool notnull = old_condition->getType().get_notnull() &&
1403  other_condition->getType().get_notnull();
1404  auto new_condition = std::unique_ptr<const RexScalar>(
1405  new RexOperator(kAND, operands, SQLTypeInfo(kBOOLEAN, notnull)));
1406  folded_filter->setCondition(new_condition);
1407  replace_all_usages(filter, folded_filter, deconst_mapping, web);
1408  deconst_mapping.erase(filter.get());
1409  web.erase(filter.get());
1410  web[filter->getInput(0)].erase(filter.get());
1411  node.reset();
1412  }
1413  }
1414  }
1415 
1416  if (!nodes.empty()) {
1417  auto sink = nodes.back();
1418  for (auto node_it = std::next(nodes.rend()); !sink && node_it != nodes.rbegin();
1419  ++node_it) {
1420  sink = *node_it;
1421  }
1422  CHECK(sink);
1423  cleanup_dead_nodes(nodes);
1424  }
1425 }
1426 
1427 std::vector<const RexScalar*> find_hoistable_conditions(const RexScalar* condition,
1428  const RelAlgNode* source,
1429  const size_t first_col_idx,
1430  const size_t last_col_idx) {
1431  if (auto rex_op = dynamic_cast<const RexOperator*>(condition)) {
1432  switch (rex_op->getOperator()) {
1433  case kAND: {
1434  std::vector<const RexScalar*> subconditions;
1435  size_t complete_subcond_count = 0;
1436  for (size_t i = 0; i < rex_op->size(); ++i) {
1437  auto conds = find_hoistable_conditions(
1438  rex_op->getOperand(i), source, first_col_idx, last_col_idx);
1439  if (conds.size() == size_t(1)) {
1440  ++complete_subcond_count;
1441  }
1442  subconditions.insert(subconditions.end(), conds.begin(), conds.end());
1443  }
1444  if (complete_subcond_count == rex_op->size()) {
1445  return {rex_op};
1446  } else {
1447  return {subconditions};
1448  }
1449  break;
1450  }
1451  case kEQ: {
1452  const auto lhs_conds = find_hoistable_conditions(
1453  rex_op->getOperand(0), source, first_col_idx, last_col_idx);
1454  const auto rhs_conds = find_hoistable_conditions(
1455  rex_op->getOperand(1), source, first_col_idx, last_col_idx);
1456  const auto lhs_in = lhs_conds.size() == 1
1457  ? dynamic_cast<const RexInput*>(*lhs_conds.begin())
1458  : nullptr;
1459  const auto rhs_in = rhs_conds.size() == 1
1460  ? dynamic_cast<const RexInput*>(*rhs_conds.begin())
1461  : nullptr;
1462  if (lhs_in && rhs_in) {
1463  return {rex_op};
1464  }
1465  return {};
1466  break;
1467  }
1468  default:
1469  break;
1470  }
1471  return {};
1472  }
1473  if (auto rex_in = dynamic_cast<const RexInput*>(condition)) {
1474  if (rex_in->getSourceNode() == source) {
1475  const auto col_idx = rex_in->getIndex();
1476  return {col_idx >= first_col_idx && col_idx <= last_col_idx ? condition : nullptr};
1477  }
1478  return {};
1479  }
1480  return {};
1481 }
1482 
1484  public:
1485  JoinTargetRebaser(const RelJoin* join, const unsigned old_base)
1486  : join_(join)
1487  , old_base_(old_base)
1488  , src1_base_(join->getInput(0)->size())
1489  , target_count_(join->size()) {}
1490  RetType visitInput(const RexInput* input) const override {
1491  auto curr_idx = input->getIndex();
1492  CHECK_GE(curr_idx, old_base_);
1493  CHECK_LT(static_cast<size_t>(curr_idx), target_count_);
1494  curr_idx -= old_base_;
1495  if (curr_idx >= src1_base_) {
1496  return boost::make_unique<RexInput>(join_->getInput(1), curr_idx - src1_base_);
1497  } else {
1498  return boost::make_unique<RexInput>(join_->getInput(0), curr_idx);
1499  }
1500  }
1501 
1502  private:
1503  const RelJoin* join_;
1504  const unsigned old_base_;
1505  const size_t src1_base_;
1506  const size_t target_count_;
1507 };
1508 
1510  public:
1511  SubConditionRemover(const std::vector<const RexScalar*> sub_conds)
1512  : sub_conditions_(sub_conds.begin(), sub_conds.end()) {}
1513  RetType visitOperator(const RexOperator* rex_operator) const override {
1514  if (sub_conditions_.count(rex_operator)) {
1515  return boost::make_unique<RexLiteral>(
1516  true, kBOOLEAN, kBOOLEAN, unsigned(-2147483648), 1, unsigned(-2147483648), 1);
1517  }
1518  return RexDeepCopyVisitor::visitOperator(rex_operator);
1519  }
1520 
1521  private:
1522  std::unordered_set<const RexScalar*> sub_conditions_;
1523 };
1524 
1526  std::vector<std::shared_ptr<RelAlgNode>>& nodes) noexcept {
1527  std::unordered_set<const RelAlgNode*> visited;
1528  auto web = build_du_web(nodes);
1529  for (auto node : nodes) {
1530  if (visited.count(node.get())) {
1531  continue;
1532  }
1533  visited.insert(node.get());
1534  auto join = dynamic_cast<RelJoin*>(node.get());
1535  if (join && join->getJoinType() == JoinType::INNER) {
1536  // Only allow cross join for now.
1537  if (auto literal = dynamic_cast<const RexLiteral*>(join->getCondition())) {
1538  // Assume Calcite always generates an inner join on constant boolean true for
1539  // cross join.
1540  CHECK(literal->getType() == kBOOLEAN && literal->getVal<bool>());
1541  size_t first_col_idx = 0;
1542  const RelFilter* filter = nullptr;
1543  std::vector<const RelJoin*> join_seq{join};
1544  for (const RelJoin* curr_join = join; !filter;) {
1545  auto usrs_it = web.find(curr_join);
1546  CHECK(usrs_it != web.end());
1547  if (usrs_it->second.size() != size_t(1)) {
1548  break;
1549  }
1550  auto only_usr = *usrs_it->second.begin();
1551  if (auto usr_join = dynamic_cast<const RelJoin*>(only_usr)) {
1552  if (join == usr_join->getInput(1)) {
1553  const auto src1_offset = usr_join->getInput(0)->size();
1554  first_col_idx += src1_offset;
1555  }
1556  join_seq.push_back(usr_join);
1557  curr_join = usr_join;
1558  continue;
1559  }
1560 
1561  filter = dynamic_cast<const RelFilter*>(only_usr);
1562  break;
1563  }
1564  if (!filter) {
1565  visited.insert(join_seq.begin(), join_seq.end());
1566  continue;
1567  }
1568  const auto src_join = dynamic_cast<const RelJoin*>(filter->getInput(0));
1569  CHECK(src_join);
1570  auto modified_filter = const_cast<RelFilter*>(filter);
1571 
1572  if (src_join == join) {
1573  std::unique_ptr<const RexScalar> filter_condition(
1574  modified_filter->getAndReleaseCondition());
1575  std::unique_ptr<const RexScalar> true_condition =
1576  boost::make_unique<RexLiteral>(true,
1577  kBOOLEAN,
1578  kBOOLEAN,
1579  unsigned(-2147483648),
1580  1,
1581  unsigned(-2147483648),
1582  1);
1583  modified_filter->setCondition(true_condition);
1584  join->setCondition(filter_condition);
1585  continue;
1586  }
1587  const auto src1_base = src_join->getInput(0)->size();
1588  auto source =
1589  first_col_idx < src1_base ? src_join->getInput(0) : src_join->getInput(1);
1590  first_col_idx =
1591  first_col_idx < src1_base ? first_col_idx : first_col_idx - src1_base;
1592  auto join_conditions =
1594  source,
1595  first_col_idx,
1596  first_col_idx + join->size() - 1);
1597  if (join_conditions.empty()) {
1598  continue;
1599  }
1600 
1601  JoinTargetRebaser rebaser(join, first_col_idx);
1602  if (join_conditions.size() == 1) {
1603  auto new_join_condition = rebaser.visit(*join_conditions.begin());
1604  join->setCondition(new_join_condition);
1605  } else {
1606  std::vector<std::unique_ptr<const RexScalar>> operands;
1607  bool notnull = true;
1608  for (size_t i = 0; i < join_conditions.size(); ++i) {
1609  operands.emplace_back(rebaser.visit(join_conditions[i]));
1610  auto old_subcond = dynamic_cast<const RexOperator*>(join_conditions[i]);
1611  CHECK(old_subcond && old_subcond->getType().get_type() == kBOOLEAN);
1612  notnull = notnull && old_subcond->getType().get_notnull();
1613  }
1614  auto new_join_condition = std::unique_ptr<const RexScalar>(
1615  new RexOperator(kAND, operands, SQLTypeInfo(kBOOLEAN, notnull)));
1616  join->setCondition(new_join_condition);
1617  }
1618 
1619  SubConditionRemover remover(join_conditions);
1620  auto new_filter_condition = remover.visit(filter->getCondition());
1621  modified_filter->setCondition(new_filter_condition);
1622  }
1623  }
1624  }
1625 }
1626 
1627 // For some reason, Calcite generates Sort, Project, Sort sequences where the
1628 // two Sort nodes are identical and the Project is identity. Simplify this
1629 // pattern by re-binding the input of the second sort to the input of the first.
1630 void simplify_sort(std::vector<std::shared_ptr<RelAlgNode>>& nodes) noexcept {
1631  if (nodes.size() < 3) {
1632  return;
1633  }
1634  for (size_t i = 0; i <= nodes.size() - 3;) {
1635  auto first_sort = std::dynamic_pointer_cast<RelSort>(nodes[i]);
1636  const auto project = std::dynamic_pointer_cast<const RelProject>(nodes[i + 1]);
1637  auto second_sort = std::dynamic_pointer_cast<RelSort>(nodes[i + 2]);
1638  if (first_sort && second_sort && project && project->isIdentity() &&
1639  *first_sort == *second_sort) {
1640  second_sort->replaceInput(second_sort->getAndOwnInput(0),
1641  first_sort->getAndOwnInput(0));
1642  nodes[i].reset();
1643  nodes[i + 1].reset();
1644  i += 3;
1645  } else {
1646  ++i;
1647  }
1648  }
1649 
1650  std::vector<std::shared_ptr<RelAlgNode>> new_nodes;
1651  for (auto node : nodes) {
1652  if (!node) {
1653  continue;
1654  }
1655  new_nodes.push_back(node);
1656  }
1657  nodes.swap(new_nodes);
1658 }
bool is_identical_copy(const RelProject *project, const std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * >> &du_web, const std::unordered_set< const RelProject * > &projects_to_remove, std::unordered_set< const RelProject * > &permutating_projects)
SubConditionReplacer(const std::unordered_map< size_t, std::unique_ptr< const RexScalar >> &idx_to_sub_condition)
std::vector< const RexScalar * > find_hoistable_conditions(const RexScalar *condition, const RelAlgNode *source, const size_t first_col_idx, const size_t last_col_idx)
std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * > > build_du_web(const std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
RetType visitInput(const RexInput *input) const override
#define CHECK_EQ(x, y)
Definition: Logger.h:198
const size_t target_count_
RexInputRedirector(const RelAlgNode *old_src, const RelAlgNode *new_src)
size_t get_actual_source_size(const RelProject *curr_project, const std::unordered_set< const RelProject * > &projects_to_remove)
size_t pick_always_live_col_idx(const RelAlgNode *node)
void redirect_inputs_of(std::shared_ptr< RelAlgNode > node, const std::unordered_set< const RelProject * > &projects, const std::unordered_set< const RelProject * > &permutating_projects, const std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * >> &du_web)
const std::unordered_map< const RelAlgNode *, std::unordered_map< size_t, size_t > > & node_to_input_renum_
void hoist_filter_cond_to_cross_join(std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
RexInputSinker(const std::unordered_map< size_t, size_t > &old_to_new_idx, const RelAlgNode *new_src)
AvailabilityChecker(const std::unordered_map< const RelAlgNode *, std::unordered_map< size_t, size_t >> &liveouts, const std::unordered_set< const RelAlgNode * > &intact_nodes)
size_t size() const override
void sink_projected_boolean_expr_to_join(std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
RetType visitInput(const RexInput *input) const override
void eliminate_identical_copy(std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
void setCondition(std::unique_ptr< const RexScalar > &condition)
#define LOG(tag)
Definition: Logger.h:185
SortDirection getSortDir() const
bool does_redef_cols(const RelAlgNode *node)
const RexScalar * getCondition() const
RetType visitOperator(const RexOperator *rex_operator) const override
Definition: RexVisitor.h:153
std::string join(T const &container, std::string const &delim)
void propagate_input_renumbering(std::unordered_map< const RelAlgNode *, std::unordered_map< size_t, size_t >> &liveout_renumbering, const std::vector< const RelAlgNode * > &ready_nodes, const std::unordered_map< const RelAlgNode *, std::unordered_set< size_t >> &old_liveouts, const std::unordered_set< const RelAlgNode * > &intact_nodes, const std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * >> &du_web, const std::unordered_map< const RelAlgNode *, size_t > &orig_node_sizes)
void * visitInput(const RexInput *rex_input) const override
int64_t * src
#define CHECK_GE(x, y)
Definition: Logger.h:203
size_t getField() const
RetType visitOperator(const RexOperator *rex_operator) const override
Definition: sqldefs.h:30
std::unordered_set< const RelProject * > get_visible_projects(const RelAlgNode *root)
#define CHECK_GT(x, y)
Definition: Logger.h:202
const std::unordered_set< const RelProject * > & crt_projects_
std::unordered_map< const RelAlgNode *, std::unordered_set< size_t > > mark_live_columns(std::vector< std::shared_ptr< RelAlgNode >> &nodes)
bool safe_to_redirect(const RelProject *project, const std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * >> &du_web)
void simplify_sort(std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
virtual T visit(const RexScalar *rex_scalar) const
Definition: RexVisitor.h:27
const std::unordered_map< const RelAlgNode *, std::unordered_map< size_t, size_t > > & liveouts_
unsigned getIndex() const
std::vector< std::unordered_set< size_t > > get_live_ins(const RelAlgNode *node, const std::unordered_map< const RelAlgNode *, std::unordered_set< size_t >> &live_outs)
CHECK(cgen_state)
RetType visitInput(const RexInput *input) const override
void cleanup_dead_nodes(std::vector< std::shared_ptr< RelAlgNode >> &nodes)
RetType visitInput(const RexInput *input) const override
#define CHECK_NE(x, y)
Definition: Logger.h:199
NullSortedPosition getNullsPosition() const
const std::unordered_map< size_t, size_t > & old_to_new_in_idx_
const RelAlgNode * getInput(const size_t idx) const
Definition: sqldefs.h:37
bool isSimple() const
RetType visitInput(const RexInput *input) const override
Definition: RexVisitor.h:141
virtual void replaceInput(std::shared_ptr< const RelAlgNode > old_input, std::shared_ptr< const RelAlgNode > input)
RetType aggregateResult(const RetType &aggregate, const RetType &next_result) const override
std::unique_ptr< RexInput > deepCopy() const
SQLTypeInfoCore< ArrayContextTypeSizer, ExecutorTypePackaging, DateTimeFacilities > SQLTypeInfo
Definition: sqltypes.h:852
bool any_dead_col_in(const RelAlgNode *node, const std::unordered_set< size_t > &live_outs)
const std::unordered_set< const RelAlgNode * > & intact_nodes_
RexRebindInputsVisitor(const RelAlgNode *old_input, const RelAlgNode *new_input)
const RexScalar * getProjectAt(const size_t idx) const
#define CHECK_LT(x, y)
Definition: Logger.h:200
void setSourceNode(const RelAlgNode *node) const
RetType visitInput(const RexInput *input) const override
virtual size_t size() const =0
const RelAlgNode * getSourceNode() const
RetType visitInput(const RexInput *input) const override
void try_insert_coalesceable_proj(std::vector< std::shared_ptr< RelAlgNode >> &nodes, std::unordered_map< const RelAlgNode *, std::unordered_set< size_t >> &liveouts, std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * >> &du_web)
std::vector< std::unique_ptr< const RexAgg > > renumber_rex_aggs(std::vector< std::unique_ptr< const RexAgg >> &agg_exprs, const std::unordered_map< size_t, size_t > &new_numbering)
SortField renumber_sort_field(const SortField &old_field, const std::unordered_map< size_t, size_t > &new_numbering)
virtual std::string toString() const =0
SubConditionRemover(const std::vector< const RexScalar * > sub_conds)
std::unordered_set< const RexScalar * > sub_conditions_
const RelJoin * join_
void propagate_rex_input_renumber(const RelFilter *excluded_root, const std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * >> &du_web)
void add_new_indices_for(const RelAlgNode *node, std::unordered_map< const RelAlgNode *, std::unordered_map< size_t, size_t >> &new_liveouts, const std::unordered_set< size_t > &old_liveouts, const std::unordered_set< const RelAlgNode * > &intact_nodes, const std::unordered_map< const RelAlgNode *, size_t > &orig_node_sizes)
std::string get_field_name(const RelAlgNode *node, size_t index)
void fold_filters(std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
const std::unordered_map< size_t, std::unique_ptr< const RexScalar > > & idx_to_subcond_
const size_t inputCount() const
void replace_all_usages(std::shared_ptr< const RelAlgNode > old_def_node, std::shared_ptr< const RelAlgNode > new_def_node, std::unordered_map< const RelAlgNode *, std::shared_ptr< RelAlgNode >> &deconst_mapping, std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * >> &du_web)
const unsigned old_base_
RexInputRenumberVisitor(const std::unordered_map< const RelAlgNode *, std::unordered_map< size_t, size_t >> &new_numbering)
bool is_distinct(const size_t input_idx, const RelAlgNode *node)
std::pair< std::unordered_map< const RelAlgNode *, std::unordered_map< size_t, size_t > >, std::vector< const RelAlgNode * > > sweep_dead_columns(const std::unordered_map< const RelAlgNode *, std::unordered_set< size_t >> &live_outs, const std::vector< std::shared_ptr< RelAlgNode >> &nodes, const std::unordered_set< const RelAlgNode * > &intact_nodes, const std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * >> &du_web, const std::unordered_map< const RelAlgNode *, size_t > &orig_node_sizes)
JoinTargetRebaser(const RelJoin *join, const unsigned old_base)
RexProjectInputRedirector(const std::unordered_set< const RelProject * > &crt_inputs)
std::unique_ptr< const RexScalar > RetType
Definition: RexVisitor.h:139
void eliminate_dead_columns(std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
RetType visitInput(const RexInput *input) const override