OmniSciDB  5ade3759e0
anonymous_namespace{RelAlgOptimizer.cpp} Namespace Reference

Classes

class  AvailabilityChecker
 
class  RexInputCollector
 
class  RexInputRedirector
 
class  RexInputRenumberVisitor
 
class  RexInputSinker
 
class  RexProjectInputRedirector
 
class  RexRebindInputsVisitor
 
class  SubConditionReplacer
 

Functions

size_t get_actual_source_size (const RelProject *curr_project, const std::unordered_set< const RelProject *> &projects_to_remove)
 
bool safe_to_redirect (const RelProject *project, const std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode *>> &du_web)
 
bool is_identical_copy (const RelProject *project, const std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode *>> &du_web, const std::unordered_set< const RelProject *> &projects_to_remove, std::unordered_set< const RelProject *> &permutating_projects)
 
void propagate_rex_input_renumber (const RelFilter *excluded_root, const std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode *>> &du_web, const std::unordered_map< const RelAlgNode *, RelAlgNode *> &deconst_mapping)
 
void redirect_inputs_of (std::shared_ptr< RelAlgNode > node, const std::unordered_set< const RelProject *> &projects, const std::unordered_set< const RelProject *> &permutating_projects, const std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode *>> &du_web, const std::unordered_map< const RelAlgNode *, RelAlgNode *> &deconst_mapping)
 
void cleanup_dead_nodes (std::vector< std::shared_ptr< RelAlgNode >> &nodes)
 
std::unordered_set< const RelProject * > get_visible_projects (const RelAlgNode *root)
 
bool is_distinct (const size_t input_idx, const RelAlgNode *node)
 
size_t pick_always_live_col_idx (const RelAlgNode *node)
 
std::vector< std::unordered_set< size_t > > get_live_ins (const RelAlgNode *node, const std::unordered_map< const RelAlgNode *, std::unordered_set< size_t >> &live_outs)
 
bool any_dead_col_in (const RelAlgNode *node, const std::unordered_set< size_t > &live_outs)
 
bool does_redef_cols (const RelAlgNode *node)
 
void add_new_indices_for (const RelAlgNode *node, std::unordered_map< const RelAlgNode *, std::unordered_map< size_t, size_t >> &new_liveouts, const std::unordered_set< size_t > &old_liveouts, const std::unordered_set< const RelAlgNode *> &intact_nodes, const std::unordered_map< const RelAlgNode *, size_t > &orig_node_sizes)
 
std::vector< std::unique_ptr< const RexAgg > > renumber_rex_aggs (std::vector< std::unique_ptr< const RexAgg >> &agg_exprs, const std::unordered_map< size_t, size_t > &new_numbering)
 
SortField renumber_sort_field (const SortField &old_field, const std::unordered_map< size_t, size_t > &new_numbering)
 
std::unordered_map< const RelAlgNode *, std::unordered_set< size_t > > mark_live_columns (std::vector< std::shared_ptr< RelAlgNode >> &nodes)
 
std::string get_field_name (const RelAlgNode *node, size_t index)
 
void try_insert_coalesceable_proj (std::vector< std::shared_ptr< RelAlgNode >> &nodes, std::unordered_map< const RelAlgNode *, std::unordered_set< size_t >> &liveouts, std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode *>> &du_web, std::unordered_map< const RelAlgNode *, RelAlgNode *> &deconst_mapping)
 
std::pair< std::unordered_map< const RelAlgNode *, std::unordered_map< size_t, size_t > >, std::vector< const RelAlgNode * > > sweep_dead_columns (const std::unordered_map< const RelAlgNode *, std::unordered_set< size_t >> &live_outs, const std::vector< std::shared_ptr< RelAlgNode >> &nodes, const std::unordered_set< const RelAlgNode *> &intact_nodes, const std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode *>> &du_web, const std::unordered_map< const RelAlgNode *, size_t > &orig_node_sizes)
 
void propagate_input_renumbering (std::unordered_map< const RelAlgNode *, std::unordered_map< size_t, size_t >> &liveout_renumbering, const std::vector< const RelAlgNode *> &ready_nodes, const std::unordered_map< const RelAlgNode *, std::unordered_set< size_t >> &old_liveouts, const std::unordered_map< const RelAlgNode *, RelAlgNode *> &deconst_mapping, const std::unordered_set< const RelAlgNode *> &intact_nodes, const std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode *>> &du_web, const std::unordered_map< const RelAlgNode *, size_t > &orig_node_sizes)
 
void replace_all_usages (std::shared_ptr< const RelAlgNode > old_def_node, std::shared_ptr< const RelAlgNode > new_def_node, std::unordered_map< const RelAlgNode *, std::shared_ptr< RelAlgNode >> &deconst_mapping, std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode *>> &du_web)
 

Function Documentation

◆ add_new_indices_for()

void anonymous_namespace{RelAlgOptimizer.cpp}::add_new_indices_for ( const RelAlgNode node,
std::unordered_map< const RelAlgNode *, std::unordered_map< size_t, size_t >> &  new_liveouts,
const std::unordered_set< size_t > &  old_liveouts,
const std::unordered_set< const RelAlgNode *> &  intact_nodes,
const std::unordered_map< const RelAlgNode *, size_t > &  orig_node_sizes 
)

Definition at line 726 of file RelAlgOptimizer.cpp.

References CHECK, CHECK_GT, does_redef_cols(), RelAlgNode::getInput(), logger::INFO, RelAlgNode::inputCount(), LOG, RelAlgNode::size(), src, and RelAlgNode::toString().

Referenced by propagate_input_renumbering(), and sweep_dead_columns().

732  {
733  auto live_fields = old_liveouts;
734  if (auto aggregate = dynamic_cast<const RelAggregate*>(node)) {
735  for (size_t i = 0; i < aggregate->getGroupByCount(); ++i) {
736  live_fields.insert(i);
737  }
738  }
739  auto it_ok =
740  new_liveouts.insert(std::make_pair(node, std::unordered_map<size_t, size_t>{}));
741  CHECK(it_ok.second);
742  auto& new_indices = it_ok.first->second;
743  if (intact_nodes.count(node)) {
744  for (size_t i = 0, e = node->size(); i < e; ++i) {
745  new_indices.insert(std::make_pair(i, i));
746  }
747  return;
748  }
749  if (does_redef_cols(node)) {
750  auto node_sz_it = orig_node_sizes.find(node);
751  CHECK(node_sz_it != orig_node_sizes.end());
752  const auto node_size = node_sz_it->second;
753  CHECK_GT(node_size, live_fields.size());
754  LOG(INFO) << node->toString() << " eliminated " << node_size - live_fields.size()
755  << " columns.";
756  std::vector<size_t> ordered_indices(live_fields.begin(), live_fields.end());
757  std::sort(ordered_indices.begin(), ordered_indices.end());
758  for (size_t i = 0; i < ordered_indices.size(); ++i) {
759  new_indices.insert(std::make_pair(ordered_indices[i], i));
760  }
761  return;
762  }
763  std::vector<size_t> ordered_indices;
764  for (size_t i = 0, old_base = 0, new_base = 0; i < node->inputCount(); ++i) {
765  auto src = node->getInput(i);
766  auto src_renum_it = new_liveouts.find(src);
767  if (src_renum_it != new_liveouts.end()) {
768  for (auto m : src_renum_it->second) {
769  new_indices.insert(std::make_pair(old_base + m.first, new_base + m.second));
770  }
771  new_base += src_renum_it->second.size();
772  } else if (dynamic_cast<const RelScan*>(src) || intact_nodes.count(src)) {
773  for (size_t i = 0; i < src->size(); ++i) {
774  new_indices.insert(std::make_pair(old_base + i, new_base + i));
775  }
776  new_base += src->size();
777  } else {
778  CHECK(false);
779  }
780  auto src_sz_it = orig_node_sizes.find(src);
781  CHECK(src_sz_it != orig_node_sizes.end());
782  old_base += src_sz_it->second;
783  }
784 }
#define LOG(tag)
Definition: Logger.h:182
bool does_redef_cols(const RelAlgNode *node)
int64_t * src
#define CHECK_GT(x, y)
Definition: Logger.h:199
const size_t inputCount() const
virtual size_t size() const =0
virtual std::string toString() const =0
#define CHECK(condition)
Definition: Logger.h:187
const RelAlgNode * getInput(const size_t idx) const
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ any_dead_col_in()

bool anonymous_namespace{RelAlgOptimizer.cpp}::any_dead_col_in ( const RelAlgNode node,
const std::unordered_set< size_t > &  live_outs 
)

Definition at line 682 of file RelAlgOptimizer.cpp.

References CHECK, and RelAlgNode::size().

Referenced by eliminate_dead_columns(), and try_insert_coalesceable_proj().

683  {
684  CHECK(!dynamic_cast<const RelScan*>(node));
685  if (auto aggregate = dynamic_cast<const RelAggregate*>(node)) {
686  for (size_t i = aggregate->getGroupByCount(); i < aggregate->size(); ++i) {
687  if (!live_outs.count(i)) {
688  return true;
689  }
690  }
691  return false;
692  }
693 
694  return node->size() > live_outs.size();
695 }
virtual size_t size() const =0
#define CHECK(condition)
Definition: Logger.h:187
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ cleanup_dead_nodes()

void anonymous_namespace{RelAlgOptimizer.cpp}::cleanup_dead_nodes ( std::vector< std::shared_ptr< RelAlgNode >> &  nodes)

Definition at line 319 of file RelAlgOptimizer.cpp.

References logger::INFO, and LOG.

Referenced by eliminate_identical_copy(), and fold_filters().

319  {
320  for (auto nodeIt = nodes.rbegin(); nodeIt != nodes.rend(); ++nodeIt) {
321  if (nodeIt->unique()) {
322  LOG(INFO) << "ID=" << (*nodeIt)->getId() << " " << (*nodeIt)->toString()
323  << " deleted!";
324  nodeIt->reset();
325  }
326  }
327 
328  std::vector<std::shared_ptr<RelAlgNode>> new_nodes;
329  for (auto node : nodes) {
330  if (!node) {
331  continue;
332  }
333  new_nodes.push_back(node);
334  }
335  nodes.swap(new_nodes);
336 }
#define LOG(tag)
Definition: Logger.h:182
+ Here is the caller graph for this function:

◆ does_redef_cols()

bool anonymous_namespace{RelAlgOptimizer.cpp}::does_redef_cols ( const RelAlgNode node)

Definition at line 697 of file RelAlgOptimizer.cpp.

Referenced by add_new_indices_for(), eliminate_dead_columns(), propagate_input_renumbering(), sweep_dead_columns(), and try_insert_coalesceable_proj().

697  {
698  return dynamic_cast<const RelAggregate*>(node) || dynamic_cast<const RelProject*>(node);
699 }
+ Here is the caller graph for this function:

◆ get_actual_source_size()

size_t anonymous_namespace{RelAlgOptimizer.cpp}::get_actual_source_size ( const RelProject curr_project,
const std::unordered_set< const RelProject *> &  projects_to_remove 
)

Definition at line 97 of file RelAlgOptimizer.cpp.

References RelAlgNode::getInput(), and RelAlgNode::size().

Referenced by is_identical_copy().

99  {
100  auto source = curr_project->getInput(0);
101  while (auto filter = dynamic_cast<const RelFilter*>(source)) {
102  source = filter->getInput(0);
103  }
104  if (auto src_project = dynamic_cast<const RelProject*>(source)) {
105  if (projects_to_remove.count(src_project)) {
106  return get_actual_source_size(src_project, projects_to_remove);
107  }
108  }
109  return curr_project->getInput(0)->size();
110 }
size_t get_actual_source_size(const RelProject *curr_project, const std::unordered_set< const RelProject *> &projects_to_remove)
virtual size_t size() const =0
const RelAlgNode * getInput(const size_t idx) const
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ get_field_name()

std::string anonymous_namespace{RelAlgOptimizer.cpp}::get_field_name ( const RelAlgNode node,
size_t  index 
)

Definition at line 894 of file RelAlgOptimizer.cpp.

References CHECK, CHECK_EQ, CHECK_LT, RelAlgNode::getInput(), join(), and RelAlgNode::size().

Referenced by try_insert_coalesceable_proj().

894  {
895  CHECK_LT(index, node->size());
896  if (auto scan = dynamic_cast<const RelScan*>(node)) {
897  return scan->getFieldName(index);
898  }
899  if (auto aggregate = dynamic_cast<const RelAggregate*>(node)) {
900  CHECK_EQ(aggregate->size(), aggregate->getFields().size());
901  return aggregate->getFieldName(index);
902  }
903  if (auto join = dynamic_cast<const RelJoin*>(node)) {
904  const auto lhs_size = join->getInput(0)->size();
905  if (index < lhs_size) {
906  return get_field_name(join->getInput(0), index);
907  }
908  return get_field_name(join->getInput(1), index - lhs_size);
909  }
910  if (auto project = dynamic_cast<const RelProject*>(node)) {
911  return project->getFieldName(index);
912  }
913  CHECK(dynamic_cast<const RelSort*>(node) || dynamic_cast<const RelFilter*>(node));
914  return get_field_name(node->getInput(0), index);
915 }
#define CHECK_EQ(x, y)
Definition: Logger.h:195
std::string join(T const &container, std::string const &delim)
#define CHECK_LT(x, y)
Definition: Logger.h:197
virtual size_t size() const =0
#define CHECK(condition)
Definition: Logger.h:187
const RelAlgNode * getInput(const size_t idx) const
std::string get_field_name(const RelAlgNode *node, size_t index)
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ get_live_ins()

std::vector<std::unordered_set<size_t> > anonymous_namespace{RelAlgOptimizer.cpp}::get_live_ins ( const RelAlgNode node,
const std::unordered_map< const RelAlgNode *, std::unordered_set< size_t >> &  live_outs 
)

Definition at line 577 of file RelAlgOptimizer.cpp.

References CHECK, CHECK_EQ, CHECK_LT, join(), pick_always_live_col_idx(), and RexVisitorBase< T >::visit().

Referenced by mark_live_columns().

579  {
580  if (!node || dynamic_cast<const RelScan*>(node)) {
581  return {};
582  }
583  RexInputCollector collector(node);
584  auto it = live_outs.find(node);
585  CHECK(it != live_outs.end());
586  auto live_out = it->second;
587  if (auto project = dynamic_cast<const RelProject*>(node)) {
588  CHECK_EQ(size_t(1), project->inputCount());
589  std::unordered_set<size_t> live_in;
590  for (const auto& idx : live_out) {
591  CHECK_LT(idx, project->size());
592  auto partial_in = collector.visit(project->getProjectAt(idx));
593  for (auto rex_in : partial_in) {
594  live_in.insert(rex_in.getIndex());
595  }
596  }
597  if (project->size() == 1 &&
598  dynamic_cast<const RexLiteral*>(project->getProjectAt(0))) {
599  CHECK(live_in.empty());
600  live_in.insert(pick_always_live_col_idx(project->getInput(0)));
601  }
602  return {live_in};
603  }
604  if (auto aggregate = dynamic_cast<const RelAggregate*>(node)) {
605  CHECK_EQ(size_t(1), aggregate->inputCount());
606  const auto group_key_count = static_cast<size_t>(aggregate->getGroupByCount());
607  const auto agg_expr_count = static_cast<size_t>(aggregate->getAggExprsCount());
608  std::unordered_set<size_t> live_in;
609  for (size_t i = 0; i < group_key_count; ++i) {
610  live_in.insert(i);
611  }
612  bool has_count_star_only{false};
613  for (const auto& idx : live_out) {
614  if (idx < group_key_count) {
615  continue;
616  }
617  const auto agg_idx = idx - group_key_count;
618  CHECK_LT(agg_idx, agg_expr_count);
619  const auto& cur_agg_expr = aggregate->getAggExprs()[agg_idx];
620  const auto n_operands = cur_agg_expr->size();
621  for (size_t i = 0; i < n_operands; ++i) {
622  live_in.insert(static_cast<size_t>(cur_agg_expr->getOperand(i)));
623  }
624  if (n_operands == 0) {
625  has_count_star_only = true;
626  }
627  }
628  if (has_count_star_only && !group_key_count) {
629  live_in.insert(size_t(0));
630  }
631  return {live_in};
632  }
633  if (auto join = dynamic_cast<const RelJoin*>(node)) {
634  std::unordered_set<size_t> lhs_live_ins;
635  std::unordered_set<size_t> rhs_live_ins;
636  CHECK_EQ(size_t(2), join->inputCount());
637  auto lhs = join->getInput(0);
638  auto rhs = join->getInput(1);
639  const auto rhs_idx_base = lhs->size();
640  for (const auto idx : live_out) {
641  if (idx < rhs_idx_base) {
642  lhs_live_ins.insert(idx);
643  } else {
644  rhs_live_ins.insert(idx - rhs_idx_base);
645  }
646  }
647  auto rex_ins = collector.visit(join->getCondition());
648  for (const auto& rex_in : rex_ins) {
649  const auto in_idx = static_cast<size_t>(rex_in.getIndex());
650  if (rex_in.getSourceNode() == lhs) {
651  lhs_live_ins.insert(in_idx);
652  continue;
653  }
654  if (rex_in.getSourceNode() == rhs) {
655  rhs_live_ins.insert(in_idx);
656  continue;
657  }
658  CHECK(false);
659  }
660  return {lhs_live_ins, rhs_live_ins};
661  }
662  if (auto sort = dynamic_cast<const RelSort*>(node)) {
663  CHECK_EQ(size_t(1), sort->inputCount());
664  std::unordered_set<size_t> live_in(live_out.begin(), live_out.end());
665  for (size_t i = 0; i < sort->collationCount(); ++i) {
666  live_in.insert(sort->getCollation(i).getField());
667  }
668  return {live_in};
669  }
670  if (auto filter = dynamic_cast<const RelFilter*>(node)) {
671  CHECK_EQ(size_t(1), filter->inputCount());
672  std::unordered_set<size_t> live_in(live_out.begin(), live_out.end());
673  auto rex_ins = collector.visit(filter->getCondition());
674  for (const auto& rex_in : rex_ins) {
675  live_in.insert(static_cast<size_t>(rex_in.getIndex()));
676  }
677  return {live_in};
678  }
679  return {};
680 }
#define CHECK_EQ(x, y)
Definition: Logger.h:195
size_t pick_always_live_col_idx(const RelAlgNode *node)
std::string join(T const &container, std::string const &delim)
#define CHECK_LT(x, y)
Definition: Logger.h:197
#define CHECK(condition)
Definition: Logger.h:187
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ get_visible_projects()

std::unordered_set<const RelProject*> anonymous_namespace{RelAlgOptimizer.cpp}::get_visible_projects ( const RelAlgNode root)

Definition at line 338 of file RelAlgOptimizer.cpp.

References CHECK, RelAlgNode::getInput(), and join().

Referenced by eliminate_identical_copy().

338  {
339  if (auto project = dynamic_cast<const RelProject*>(root)) {
340  return {project};
341  }
342 
343  if (dynamic_cast<const RelAggregate*>(root) || dynamic_cast<const RelScan*>(root) ||
344  dynamic_cast<const RelLogicalValues*>(root) ||
345  dynamic_cast<const RelModify*>(root)) {
346  return std::unordered_set<const RelProject*>{};
347  }
348 
349  if (auto join = dynamic_cast<const RelJoin*>(root)) {
350  auto lhs_projs = get_visible_projects(join->getInput(0));
351  auto rhs_projs = get_visible_projects(join->getInput(1));
352  lhs_projs.insert(rhs_projs.begin(), rhs_projs.end());
353  return lhs_projs;
354  }
355 
356  CHECK(dynamic_cast<const RelFilter*>(root) || dynamic_cast<const RelSort*>(root));
357  return get_visible_projects(root->getInput(0));
358 }
std::string join(T const &container, std::string const &delim)
std::unordered_set< const RelProject * > get_visible_projects(const RelAlgNode *root)
#define CHECK(condition)
Definition: Logger.h:187
const RelAlgNode * getInput(const size_t idx) const
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ is_distinct()

bool anonymous_namespace{RelAlgOptimizer.cpp}::is_distinct ( const size_t  input_idx,
const RelAlgNode node 
)

Definition at line 361 of file RelAlgOptimizer.cpp.

References CHECK, CHECK_EQ, CHECK_LT, RelAlgNode::getInput(), and RelAlgNode::inputCount().

Referenced by Parser::FunctionRef::analyze(), Parser::QuerySpec::analyze(), Analyzer::AggExpr::deep_copy(), eliminate_identical_copy(), Analyzer::AggExpr::get_is_distinct(), Analyzer::Query::get_is_distinct(), Parser::QuerySpec::get_is_distinct(), Analyzer::AggExpr::operator==(), Analyzer::AggExpr::rewrite_with_child_targetlist(), Analyzer::Query::set_is_distinct(), Parser::QuerySpec::to_string(), Analyzer::AggExpr::toString(), anonymous_namespace{CalciteAdapter.cpp}::CalciteAdapter::translateAggregate(), and RelAlgTranslator::translateAggregateRex().

361  {
362  if (dynamic_cast<const RelFilter*>(node) || dynamic_cast<const RelSort*>(node)) {
363  CHECK_EQ(size_t(1), node->inputCount());
364  return is_distinct(input_idx, node->getInput(0));
365  }
366  if (auto aggregate = dynamic_cast<const RelAggregate*>(node)) {
367  CHECK_EQ(size_t(1), node->inputCount());
368  if (aggregate->getGroupByCount() == 1 && !input_idx) {
369  return true;
370  }
371  if (input_idx < aggregate->getGroupByCount()) {
372  return is_distinct(input_idx, node->getInput(0));
373  }
374  return false;
375  }
376  if (auto project = dynamic_cast<const RelProject*>(node)) {
377  CHECK_LT(input_idx, project->size());
378  if (auto input = dynamic_cast<const RexInput*>(project->getProjectAt(input_idx))) {
379  CHECK_EQ(size_t(1), node->inputCount());
380  return is_distinct(input->getIndex(), project->getInput(0));
381  }
382  return false;
383  }
384  CHECK(dynamic_cast<const RelJoin*>(node) || dynamic_cast<const RelScan*>(node));
385  return false;
386 }
#define CHECK_EQ(x, y)
Definition: Logger.h:195
#define CHECK_LT(x, y)
Definition: Logger.h:197
const size_t inputCount() const
#define CHECK(condition)
Definition: Logger.h:187
const RelAlgNode * getInput(const size_t idx) const
bool is_distinct(const size_t input_idx, const RelAlgNode *node)
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ is_identical_copy()

bool anonymous_namespace{RelAlgOptimizer.cpp}::is_identical_copy ( const RelProject project,
const std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode *>> &  du_web,
const std::unordered_set< const RelProject *> &  projects_to_remove,
std::unordered_set< const RelProject *> &  permutating_projects 
)

Definition at line 129 of file RelAlgOptimizer.cpp.

References CHECK, get_actual_source_size(), RelProject::getProjectAt(), safe_to_redirect(), and RelProject::size().

Referenced by eliminate_identical_copy().

134  {
135  auto source_size = get_actual_source_size(project, projects_to_remove);
136  if (project->size() > source_size) {
137  return false;
138  }
139 
140  if (project->size() < source_size) {
141  auto usrs_it = du_web.find(project);
142  CHECK(usrs_it != du_web.end());
143  bool guard_found = false;
144  while (usrs_it->second.size() == size_t(1)) {
145  auto only_usr = *usrs_it->second.begin();
146  if (dynamic_cast<const RelProject*>(only_usr)) {
147  guard_found = true;
148  break;
149  }
150  if (dynamic_cast<const RelAggregate*>(only_usr) ||
151  dynamic_cast<const RelSort*>(only_usr) ||
152  dynamic_cast<const RelJoin*>(only_usr)) {
153  return false;
154  }
155  CHECK(dynamic_cast<const RelFilter*>(only_usr));
156  usrs_it = du_web.find(only_usr);
157  CHECK(usrs_it != du_web.end());
158  }
159 
160  if (!guard_found) {
161  return false;
162  }
163  }
164 
165  bool identical = true;
166  for (size_t i = 0; i < project->size(); ++i) {
167  auto target = dynamic_cast<const RexInput*>(project->getProjectAt(i));
168  CHECK(target);
169  if (i != target->getIndex()) {
170  identical = false;
171  break;
172  }
173  }
174 
175  if (identical) {
176  return true;
177  }
178 
179  if (safe_to_redirect(project, du_web)) {
180  permutating_projects.insert(project);
181  return true;
182  }
183 
184  return false;
185 }
size_t size() const override
const RexScalar * getProjectAt(const size_t idx) const
size_t get_actual_source_size(const RelProject *curr_project, const std::unordered_set< const RelProject *> &projects_to_remove)
bool safe_to_redirect(const RelProject *project, const std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode *>> &du_web)
#define CHECK(condition)
Definition: Logger.h:187
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ mark_live_columns()

std::unordered_map<const RelAlgNode*, std::unordered_set<size_t> > anonymous_namespace{RelAlgOptimizer.cpp}::mark_live_columns ( std::vector< std::shared_ptr< RelAlgNode >> &  nodes)

Definition at line 843 of file RelAlgOptimizer.cpp.

References CHECK, CHECK_EQ, get_live_ins(), and src.

Referenced by eliminate_dead_columns(), and sink_projected_boolean_expr_to_join().

844  {
845  std::unordered_map<const RelAlgNode*, std::unordered_set<size_t>> live_outs;
846  std::vector<const RelAlgNode*> work_set;
847  for (auto node_it = nodes.rbegin(); node_it != nodes.rend(); ++node_it) {
848  auto node = node_it->get();
849  if (dynamic_cast<const RelScan*>(node) || live_outs.count(node) ||
850  dynamic_cast<const RelModify*>(node)) {
851  continue;
852  }
853  std::vector<size_t> all_live(node->size());
854  std::iota(all_live.begin(), all_live.end(), size_t(0));
855  live_outs.insert(std::make_pair(
856  node, std::unordered_set<size_t>(all_live.begin(), all_live.end())));
857  work_set.push_back(node);
858  while (!work_set.empty()) {
859  auto walker = work_set.back();
860  work_set.pop_back();
861  CHECK(!dynamic_cast<const RelScan*>(walker));
862  CHECK(live_outs.count(walker));
863  auto live_ins = get_live_ins(walker, live_outs);
864  CHECK_EQ(live_ins.size(), walker->inputCount());
865  for (size_t i = 0; i < walker->inputCount(); ++i) {
866  auto src = walker->getInput(i);
867  if (dynamic_cast<const RelScan*>(src) || live_ins[i].empty()) {
868  continue;
869  }
870  if (!live_outs.count(src)) {
871  live_outs.insert(std::make_pair(src, std::unordered_set<size_t>{}));
872  }
873  auto src_it = live_outs.find(src);
874  CHECK(src_it != live_outs.end());
875  auto& live_out = src_it->second;
876  bool changed = false;
877  if (!live_out.empty()) {
878  live_out.insert(live_ins[i].begin(), live_ins[i].end());
879  changed = true;
880  } else {
881  for (int idx : live_ins[i]) {
882  changed |= live_out.insert(idx).second;
883  }
884  }
885  if (changed) {
886  work_set.push_back(src);
887  }
888  }
889  }
890  }
891  return live_outs;
892 }
#define CHECK_EQ(x, y)
Definition: Logger.h:195
int64_t * src
std::vector< std::unordered_set< size_t > > get_live_ins(const RelAlgNode *node, const std::unordered_map< const RelAlgNode *, std::unordered_set< size_t >> &live_outs)
#define CHECK(condition)
Definition: Logger.h:187
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ pick_always_live_col_idx()

size_t anonymous_namespace{RelAlgOptimizer.cpp}::pick_always_live_col_idx ( const RelAlgNode node)

Definition at line 548 of file RelAlgOptimizer.cpp.

References CHECK, join(), and RelAlgNode::size().

Referenced by get_live_ins().

548  {
549  CHECK(node->size());
550  RexInputCollector collector(node);
551  if (auto filter = dynamic_cast<const RelFilter*>(node)) {
552  auto rex_ins = collector.visit(filter->getCondition());
553  if (!rex_ins.empty()) {
554  return static_cast<size_t>(rex_ins.begin()->getIndex());
555  }
556  return pick_always_live_col_idx(filter->getInput(0));
557  } else if (auto join = dynamic_cast<const RelJoin*>(node)) {
558  auto rex_ins = collector.visit(join->getCondition());
559  if (!rex_ins.empty()) {
560  return static_cast<size_t>(rex_ins.begin()->getIndex());
561  }
562  if (auto lhs_idx = pick_always_live_col_idx(join->getInput(0))) {
563  return lhs_idx;
564  }
565  if (auto rhs_idx = pick_always_live_col_idx(join->getInput(0))) {
566  return rhs_idx + join->getInput(0)->size();
567  }
568  } else if (auto sort = dynamic_cast<const RelSort*>(node)) {
569  if (sort->collationCount()) {
570  return sort->getCollation(0).getField();
571  }
572  return pick_always_live_col_idx(sort->getInput(0));
573  }
574  return size_t(0);
575 }
size_t pick_always_live_col_idx(const RelAlgNode *node)
std::string join(T const &container, std::string const &delim)
virtual size_t size() const =0
#define CHECK(condition)
Definition: Logger.h:187
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ propagate_input_renumbering()

void anonymous_namespace{RelAlgOptimizer.cpp}::propagate_input_renumbering ( std::unordered_map< const RelAlgNode *, std::unordered_map< size_t, size_t >> &  liveout_renumbering,
const std::vector< const RelAlgNode *> &  ready_nodes,
const std::unordered_map< const RelAlgNode *, std::unordered_set< size_t >> &  old_liveouts,
const std::unordered_map< const RelAlgNode *, RelAlgNode *> &  deconst_mapping,
const std::unordered_set< const RelAlgNode *> &  intact_nodes,
const std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode *>> &  du_web,
const std::unordered_map< const RelAlgNode *, size_t > &  orig_node_sizes 
)

Definition at line 1043 of file RelAlgOptimizer.cpp.

References add_new_indices_for(), CHECK, does_redef_cols(), anonymous_namespace{RelAlgOptimizer.cpp}::AvailabilityChecker::hasAllSrcReady(), join(), renumber_rex_aggs(), renumber_sort_field(), and RexVisitorBase< T >::visit().

Referenced by eliminate_dead_columns().

1052  {
1053  RexInputRenumberVisitor renumberer(liveout_renumbering);
1054  AvailabilityChecker checker(liveout_renumbering, intact_nodes);
1055  std::deque<const RelAlgNode*> work_set(ready_nodes.begin(), ready_nodes.end());
1056  while (!work_set.empty()) {
1057  auto walker = work_set.front();
1058  work_set.pop_front();
1059  CHECK(!dynamic_cast<const RelScan*>(walker));
1060  auto node_it = deconst_mapping.find(walker);
1061  CHECK(node_it != deconst_mapping.end());
1062  auto node = node_it->second;
1063  if (auto project = dynamic_cast<RelProject*>(node)) {
1064  auto old_exprs = project->getExpressionsAndRelease();
1065  std::vector<std::unique_ptr<const RexScalar>> new_exprs;
1066  for (auto& expr : old_exprs) {
1067  new_exprs.push_back(renumberer.visit(expr.get()));
1068  }
1069  project->setExpressions(new_exprs);
1070  } else if (auto aggregate = dynamic_cast<RelAggregate*>(node)) {
1071  auto src_it = liveout_renumbering.find(node->getInput(0));
1072  CHECK(src_it != liveout_renumbering.end());
1073  auto old_exprs = aggregate->getAggExprsAndRelease();
1074  auto new_exprs = renumber_rex_aggs(old_exprs, src_it->second);
1075  aggregate->setAggExprs(new_exprs);
1076  } else if (auto join = dynamic_cast<RelJoin*>(node)) {
1077  auto new_condition = renumberer.visit(join->getCondition());
1078  join->setCondition(new_condition);
1079  } else if (auto filter = dynamic_cast<RelFilter*>(node)) {
1080  auto new_condition = renumberer.visit(filter->getCondition());
1081  filter->setCondition(new_condition);
1082  } else if (auto sort = dynamic_cast<RelSort*>(node)) {
1083  auto src_it = liveout_renumbering.find(node->getInput(0));
1084  CHECK(src_it != liveout_renumbering.end());
1085  std::vector<SortField> new_collations;
1086  for (size_t i = 0; i < sort->collationCount(); ++i) {
1087  new_collations.push_back(
1088  renumber_sort_field(sort->getCollation(i), src_it->second));
1089  }
1090  sort->setCollation(std::move(new_collations));
1091  } else {
1092  CHECK(false);
1093  }
1094 
1095  // Ignore empty live_out due to some invalid node
1096  if (does_redef_cols(node) || intact_nodes.count(node)) {
1097  continue;
1098  }
1099  auto live_pair = old_liveouts.find(node);
1100  CHECK(live_pair != old_liveouts.end());
1101  auto live_out = live_pair->second;
1103  node, liveout_renumbering, live_out, intact_nodes, orig_node_sizes);
1104  auto usrs_it = du_web.find(walker);
1105  CHECK(usrs_it != du_web.end());
1106  for (auto usr : usrs_it->second) {
1107  if (checker.hasAllSrcReady(usr)) {
1108  work_set.push_back(usr);
1109  }
1110  }
1111  }
1112 }
void add_new_indices_for(const RelAlgNode *node, std::unordered_map< const RelAlgNode *, std::unordered_map< size_t, size_t >> &new_liveouts, const std::unordered_set< size_t > &old_liveouts, const std::unordered_set< const RelAlgNode *> &intact_nodes, const std::unordered_map< const RelAlgNode *, size_t > &orig_node_sizes)
bool does_redef_cols(const RelAlgNode *node)
std::string join(T const &container, std::string const &delim)
std::vector< std::unique_ptr< const RexAgg > > renumber_rex_aggs(std::vector< std::unique_ptr< const RexAgg >> &agg_exprs, const std::unordered_map< size_t, size_t > &new_numbering)
SortField renumber_sort_field(const SortField &old_field, const std::unordered_map< size_t, size_t > &new_numbering)
#define CHECK(condition)
Definition: Logger.h:187
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ propagate_rex_input_renumber()

void anonymous_namespace{RelAlgOptimizer.cpp}::propagate_rex_input_renumber ( const RelFilter excluded_root,
const std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode *>> &  du_web,
const std::unordered_map< const RelAlgNode *, RelAlgNode *> &  deconst_mapping 
)

Definition at line 187 of file RelAlgOptimizer.cpp.

References CHECK, and RelAlgNode::getInput().

Referenced by redirect_inputs_of().

191  {
192  CHECK(excluded_root);
193  auto src_project = dynamic_cast<const RelProject*>(excluded_root->getInput(0));
194  CHECK(src_project && src_project->isSimple());
195  const auto indirect_join_src = dynamic_cast<const RelJoin*>(src_project->getInput(0));
196  std::unordered_map<size_t, size_t> old_to_new_idx;
197  for (size_t i = 0; i < src_project->size(); ++i) {
198  auto rex_in = dynamic_cast<const RexInput*>(src_project->getProjectAt(i));
199  CHECK(rex_in);
200  size_t src_base = 0;
201  if (indirect_join_src != nullptr &&
202  indirect_join_src->getInput(1) == rex_in->getSourceNode()) {
203  src_base = indirect_join_src->getInput(0)->size();
204  }
205  old_to_new_idx.insert(std::make_pair(i, src_base + rex_in->getIndex()));
206  old_to_new_idx.insert(std::make_pair(i, rex_in->getIndex()));
207  }
208  CHECK(old_to_new_idx.size());
209  RexInputRenumber<false> renumber(old_to_new_idx);
210  auto usrs_it = du_web.find(excluded_root);
211  CHECK(usrs_it != du_web.end());
212  std::vector<const RelAlgNode*> work_set(usrs_it->second.begin(), usrs_it->second.end());
213  while (!work_set.empty()) {
214  auto node = work_set.back();
215  work_set.pop_back();
216  auto node_it = deconst_mapping.find(node);
217  CHECK(node_it != deconst_mapping.end());
218  auto modified_node = node_it->second;
219  if (auto filter = dynamic_cast<RelFilter*>(modified_node)) {
220  auto new_condition = renumber.visit(filter->getCondition());
221  filter->setCondition(new_condition);
222  auto usrs_it = du_web.find(filter);
223  CHECK(usrs_it != du_web.end() && usrs_it->second.size() == 1);
224  work_set.push_back(*usrs_it->second.begin());
225  continue;
226  }
227  if (auto project = dynamic_cast<RelProject*>(modified_node)) {
228  std::vector<std::unique_ptr<const RexScalar>> new_exprs;
229  for (size_t i = 0; i < project->size(); ++i) {
230  new_exprs.push_back(renumber.visit(project->getProjectAt(i)));
231  }
232  project->setExpressions(new_exprs);
233  continue;
234  }
235  CHECK(false);
236  }
237 }
#define CHECK(condition)
Definition: Logger.h:187
const RelAlgNode * getInput(const size_t idx) const
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ redirect_inputs_of()

void anonymous_namespace{RelAlgOptimizer.cpp}::redirect_inputs_of ( std::shared_ptr< RelAlgNode node,
const std::unordered_set< const RelProject *> &  projects,
const std::unordered_set< const RelProject *> &  permutating_projects,
const std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode *>> &  du_web,
const std::unordered_map< const RelAlgNode *, RelAlgNode *> &  deconst_mapping 
)

Definition at line 239 of file RelAlgOptimizer.cpp.

References CHECK, join(), propagate_rex_input_renumber(), RexVisitorBase< T >::visit(), and anonymous_namespace{RelAlgOptimizer.cpp}::RexRebindInputsVisitor::visitNode().

Referenced by eliminate_identical_copy().

245  {
246  std::shared_ptr<const RelProject> src_project = nullptr;
247  for (size_t i = 0; i < node->inputCount(); ++i) {
248  if (auto project =
249  std::dynamic_pointer_cast<const RelProject>(node->getAndOwnInput(i))) {
250  if (projects.count(project.get())) {
251  src_project = project;
252  break;
253  }
254  }
255  }
256  if (!src_project) {
257  return;
258  }
259  if (auto join = std::dynamic_pointer_cast<RelJoin>(node)) {
260  auto other_project =
261  src_project == node->getAndOwnInput(0)
262  ? std::dynamic_pointer_cast<const RelProject>(node->getAndOwnInput(1))
263  : std::dynamic_pointer_cast<const RelProject>(node->getAndOwnInput(0));
264  join->replaceInput(src_project, src_project->getAndOwnInput(0));
265  RexRebindInputsVisitor rebinder(src_project.get(), src_project->getInput(0));
266  auto usrs_it = du_web.find(join.get());
267  CHECK(usrs_it != du_web.end());
268  for (auto usr : usrs_it->second) {
269  rebinder.visitNode(usr);
270  }
271 
272  if (other_project && projects.count(other_project.get())) {
273  join->replaceInput(other_project, other_project->getAndOwnInput(0));
274  RexRebindInputsVisitor other_rebinder(other_project.get(),
275  other_project->getInput(0));
276  for (auto usr : usrs_it->second) {
277  other_rebinder.visitNode(usr);
278  }
279  }
280  return;
281  }
282  if (auto project = std::dynamic_pointer_cast<RelProject>(node)) {
283  project->RelAlgNode::replaceInput(src_project, src_project->getAndOwnInput(0));
284  RexProjectInputRedirector redirector(projects);
285  std::vector<std::unique_ptr<const RexScalar>> new_exprs;
286  for (size_t i = 0; i < project->size(); ++i) {
287  new_exprs.push_back(redirector.visit(project->getProjectAt(i)));
288  }
289  project->setExpressions(new_exprs);
290  return;
291  }
292  if (auto filter = std::dynamic_pointer_cast<RelFilter>(node)) {
293  const bool is_permutating_proj = permutating_projects.count(src_project.get());
294  if (is_permutating_proj || dynamic_cast<const RelJoin*>(src_project->getInput(0))) {
295  if (is_permutating_proj) {
296  propagate_rex_input_renumber(filter.get(), du_web, deconst_mapping);
297  }
298  filter->RelAlgNode::replaceInput(src_project, src_project->getAndOwnInput(0));
299  RexProjectInputRedirector redirector(projects);
300  auto new_condition = redirector.visit(filter->getCondition());
301  filter->setCondition(new_condition);
302  } else {
303  filter->replaceInput(src_project, src_project->getAndOwnInput(0));
304  }
305  return;
306  }
307  if (std::dynamic_pointer_cast<RelSort>(node) &&
308  dynamic_cast<const RelScan*>(src_project->getInput(0))) {
309  return;
310  }
311  if (std::dynamic_pointer_cast<RelModify>(node)) {
312  return; // NOTE: Review this. Not sure about this.
313  }
314  CHECK(std::dynamic_pointer_cast<RelAggregate>(node) ||
315  std::dynamic_pointer_cast<RelSort>(node));
316  node->replaceInput(src_project, src_project->getAndOwnInput(0));
317 }
std::string join(T const &container, std::string const &delim)
#define CHECK(condition)
Definition: Logger.h:187
void propagate_rex_input_renumber(const RelFilter *excluded_root, const std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode *>> &du_web, const std::unordered_map< const RelAlgNode *, RelAlgNode *> &deconst_mapping)
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ renumber_rex_aggs()

std::vector<std::unique_ptr<const RexAgg> > anonymous_namespace{RelAlgOptimizer.cpp}::renumber_rex_aggs ( std::vector< std::unique_ptr< const RexAgg >> &  agg_exprs,
const std::unordered_map< size_t, size_t > &  new_numbering 
)

Definition at line 809 of file RelAlgOptimizer.cpp.

Referenced by propagate_input_renumbering().

811  {
812  std::vector<std::unique_ptr<const RexAgg>> new_exprs;
813  for (auto& expr : agg_exprs) {
814  if (expr->size() >= 1) {
815  auto old_idx = expr->getOperand(0);
816  auto idx_it = new_numbering.find(old_idx);
817  if (idx_it != new_numbering.end()) {
818  std::vector<size_t> operands;
819  operands.push_back(idx_it->second);
820  if (expr->size() == 2) {
821  operands.push_back(expr->getOperand(1));
822  }
823  new_exprs.push_back(boost::make_unique<RexAgg>(
824  expr->getKind(), expr->isDistinct(), expr->getType(), operands));
825  continue;
826  }
827  }
828  new_exprs.push_back(std::move(expr));
829  }
830  return new_exprs;
831 }
+ Here is the caller graph for this function:

◆ renumber_sort_field()

SortField anonymous_namespace{RelAlgOptimizer.cpp}::renumber_sort_field ( const SortField old_field,
const std::unordered_map< size_t, size_t > &  new_numbering 
)

Definition at line 833 of file RelAlgOptimizer.cpp.

References SortField::getField(), SortField::getNullsPosition(), and SortField::getSortDir().

Referenced by propagate_input_renumbering().

834  {
835  auto field_idx = old_field.getField();
836  auto idx_it = new_numbering.find(field_idx);
837  if (idx_it != new_numbering.end()) {
838  field_idx = idx_it->second;
839  }
840  return SortField(field_idx, old_field.getSortDir(), old_field.getNullsPosition());
841 }
SortDirection getSortDir() const
NullSortedPosition getNullsPosition() const
size_t getField() const
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ replace_all_usages()

void anonymous_namespace{RelAlgOptimizer.cpp}::replace_all_usages ( std::shared_ptr< const RelAlgNode old_def_node,
std::shared_ptr< const RelAlgNode new_def_node,
std::unordered_map< const RelAlgNode *, std::shared_ptr< RelAlgNode >> &  deconst_mapping,
std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode *>> &  du_web 
)

Definition at line 1361 of file RelAlgOptimizer.cpp.

References CHECK.

Referenced by fold_filters().

1366  {
1367  auto usrs_it = du_web.find(old_def_node.get());
1368  RexInputRedirector redirector(new_def_node.get(), old_def_node.get());
1369  CHECK(usrs_it != du_web.end());
1370  for (auto usr : usrs_it->second) {
1371  auto usr_it = deconst_mapping.find(usr);
1372  CHECK(usr_it != deconst_mapping.end());
1373  usr_it->second->replaceInput(old_def_node, new_def_node);
1374  }
1375  auto new_usrs_it = du_web.find(new_def_node.get());
1376  CHECK(new_usrs_it != du_web.end());
1377  new_usrs_it->second.insert(usrs_it->second.begin(), usrs_it->second.end());
1378  usrs_it->second.clear();
1379 }
#define CHECK(condition)
Definition: Logger.h:187
+ Here is the caller graph for this function:

◆ safe_to_redirect()

bool anonymous_namespace{RelAlgOptimizer.cpp}::safe_to_redirect ( const RelProject project,
const std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode *>> &  du_web 
)

Definition at line 112 of file RelAlgOptimizer.cpp.

References CHECK, and RelProject::isSimple().

Referenced by is_identical_copy().

115  {
116  if (!project->isSimple()) {
117  return false;
118  }
119  auto usrs_it = du_web.find(project);
120  CHECK(usrs_it != du_web.end());
121  for (auto usr : usrs_it->second) {
122  if (!dynamic_cast<const RelProject*>(usr) && !dynamic_cast<const RelFilter*>(usr)) {
123  return false;
124  }
125  }
126  return true;
127 }
#define CHECK(condition)
Definition: Logger.h:187
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ sweep_dead_columns()

std::pair<std::unordered_map<const RelAlgNode*, std::unordered_map<size_t, size_t> >, std::vector<const RelAlgNode*> > anonymous_namespace{RelAlgOptimizer.cpp}::sweep_dead_columns ( const std::unordered_map< const RelAlgNode *, std::unordered_set< size_t >> &  live_outs,
const std::vector< std::shared_ptr< RelAlgNode >> &  nodes,
const std::unordered_set< const RelAlgNode *> &  intact_nodes,
const std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode *>> &  du_web,
const std::unordered_map< const RelAlgNode *, size_t > &  orig_node_sizes 
)

Definition at line 980 of file RelAlgOptimizer.cpp.

References add_new_indices_for(), CHECK, does_redef_cols(), and anonymous_namespace{RelAlgOptimizer.cpp}::AvailabilityChecker::hasAllSrcReady().

Referenced by eliminate_dead_columns().

986  {
987  std::unordered_map<const RelAlgNode*, std::unordered_map<size_t, size_t>>
988  liveouts_renumbering;
989  std::vector<const RelAlgNode*> ready_nodes;
990  AvailabilityChecker checker(liveouts_renumbering, intact_nodes);
991  for (auto node : nodes) {
992  // Ignore empty live_out due to some invalid node
993  if (!does_redef_cols(node.get()) || intact_nodes.count(node.get())) {
994  continue;
995  }
996  auto live_pair = live_outs.find(node.get());
997  CHECK(live_pair != live_outs.end());
998  auto old_live_outs = live_pair->second;
1000  node.get(), liveouts_renumbering, old_live_outs, intact_nodes, orig_node_sizes);
1001  if (auto aggregate = std::dynamic_pointer_cast<RelAggregate>(node)) {
1002  auto old_exprs = aggregate->getAggExprsAndRelease();
1003  std::vector<std::unique_ptr<const RexAgg>> new_exprs;
1004  auto key_name_it = aggregate->getFields().begin();
1005  std::vector<std::string> new_fields(key_name_it,
1006  key_name_it + aggregate->getGroupByCount());
1007  for (size_t i = aggregate->getGroupByCount(), j = 0;
1008  i < aggregate->getFields().size() && j < old_exprs.size();
1009  ++i, ++j) {
1010  if (old_live_outs.count(i)) {
1011  new_exprs.push_back(std::move(old_exprs[j]));
1012  new_fields.push_back(aggregate->getFieldName(i));
1013  }
1014  }
1015  aggregate->setAggExprs(new_exprs);
1016  aggregate->setFields(new_fields);
1017  } else if (auto project = std::dynamic_pointer_cast<RelProject>(node)) {
1018  auto old_exprs = project->getExpressionsAndRelease();
1019  std::vector<std::unique_ptr<const RexScalar>> new_exprs;
1020  std::vector<std::string> new_fields;
1021  for (size_t i = 0; i < old_exprs.size(); ++i) {
1022  if (old_live_outs.count(i)) {
1023  new_exprs.push_back(std::move(old_exprs[i]));
1024  new_fields.push_back(project->getFieldName(i));
1025  }
1026  }
1027  project->setExpressions(new_exprs);
1028  project->setFields(new_fields);
1029  } else {
1030  CHECK(false);
1031  }
1032  auto usrs_it = du_web.find(node.get());
1033  CHECK(usrs_it != du_web.end());
1034  for (auto usr : usrs_it->second) {
1035  if (checker.hasAllSrcReady(usr)) {
1036  ready_nodes.push_back(usr);
1037  }
1038  }
1039  }
1040  return {liveouts_renumbering, ready_nodes};
1041 }
void add_new_indices_for(const RelAlgNode *node, std::unordered_map< const RelAlgNode *, std::unordered_map< size_t, size_t >> &new_liveouts, const std::unordered_set< size_t > &old_liveouts, const std::unordered_set< const RelAlgNode *> &intact_nodes, const std::unordered_map< const RelAlgNode *, size_t > &orig_node_sizes)
bool does_redef_cols(const RelAlgNode *node)
#define CHECK(condition)
Definition: Logger.h:187
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ try_insert_coalesceable_proj()

void anonymous_namespace{RelAlgOptimizer.cpp}::try_insert_coalesceable_proj ( std::vector< std::shared_ptr< RelAlgNode >> &  nodes,
std::unordered_map< const RelAlgNode *, std::unordered_set< size_t >> &  liveouts,
std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode *>> &  du_web,
std::unordered_map< const RelAlgNode *, RelAlgNode *> &  deconst_mapping 
)

Definition at line 917 of file RelAlgOptimizer.cpp.

References any_dead_col_in(), CHECK, does_redef_cols(), get_field_name(), and anonymous_namespace{RelAlgOptimizer.cpp}::RexRebindInputsVisitor::visitNode().

Referenced by eliminate_dead_columns().

921  {
922  std::vector<std::shared_ptr<RelAlgNode>> new_nodes;
923  for (auto node : nodes) {
924  new_nodes.push_back(node);
925  if (!std::dynamic_pointer_cast<RelFilter>(node)) {
926  continue;
927  }
928  const auto filter = node.get();
929  auto liveout_it = liveouts.find(filter);
930  CHECK(liveout_it != liveouts.end());
931  auto& outs = liveout_it->second;
932  if (!any_dead_col_in(filter, outs)) {
933  continue;
934  }
935  auto usrs_it = du_web.find(filter);
936  CHECK(usrs_it != du_web.end());
937  auto& usrs = usrs_it->second;
938  if (usrs.size() != 1 || does_redef_cols(*usrs.begin())) {
939  continue;
940  }
941  auto only_usr = deconst_mapping[*usrs.begin()];
942 
943  std::vector<std::unique_ptr<const RexScalar>> exprs;
944  std::vector<std::string> fields;
945  for (size_t i = 0; i < filter->size(); ++i) {
946  exprs.push_back(boost::make_unique<RexInput>(filter, i));
947  fields.push_back(get_field_name(filter, i));
948  }
949  auto project_owner = std::make_shared<RelProject>(exprs, fields, node);
950  auto project = project_owner.get();
951 
952  only_usr->replaceInput(node, project_owner);
953  if (dynamic_cast<const RelJoin*>(only_usr)) {
954  RexRebindInputsVisitor visitor(filter, project);
955  for (auto usr : du_web[only_usr]) {
956  visitor.visitNode(usr);
957  }
958  }
959 
960  liveouts.insert(std::make_pair(project, outs));
961 
962  usrs.clear();
963  usrs.insert(project);
964  du_web.insert(
965  std::make_pair(project, std::unordered_set<const RelAlgNode*>{only_usr}));
966 
967  new_nodes.push_back(project_owner);
968  }
969  if (new_nodes.size() > nodes.size()) {
970  nodes.swap(new_nodes);
971  deconst_mapping.clear();
972  for (auto node : nodes) {
973  deconst_mapping.insert(std::make_pair(node.get(), node.get()));
974  }
975  }
976 }
bool does_redef_cols(const RelAlgNode *node)
bool any_dead_col_in(const RelAlgNode *node, const std::unordered_set< size_t > &live_outs)
#define CHECK(condition)
Definition: Logger.h:187
std::string get_field_name(const RelAlgNode *node, size_t index)
+ Here is the call graph for this function:
+ Here is the caller graph for this function: