OmniSciDB  95562058bd
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
anonymous_namespace{RelAlgOptimizer.cpp} Namespace Reference

Classes

class  RexProjectInputRedirector
 
class  RexRebindInputsVisitor
 
class  RexInputCollector
 
class  AvailabilityChecker
 
class  RexInputRenumberVisitor
 
class  RexInputSinker
 
class  SubConditionReplacer
 
class  RexInputRedirector
 

Functions

size_t get_actual_source_size (const RelProject *curr_project, const std::unordered_set< const RelProject * > &projects_to_remove)
 
bool safe_to_redirect (const RelProject *project, const std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * >> &du_web)
 
bool is_identical_copy (const RelProject *project, const std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * >> &du_web, const std::unordered_set< const RelProject * > &projects_to_remove, std::unordered_set< const RelProject * > &permutating_projects)
 
void propagate_rex_input_renumber (const RelFilter *excluded_root, const std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * >> &du_web)
 
void redirect_inputs_of (std::shared_ptr< RelAlgNode > node, const std::unordered_set< const RelProject * > &projects, const std::unordered_set< const RelProject * > &permutating_projects, const std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * >> &du_web)
 
void cleanup_dead_nodes (std::vector< std::shared_ptr< RelAlgNode >> &nodes)
 
std::unordered_set< const
RelProject * > 
get_visible_projects (const RelAlgNode *root)
 
bool is_distinct (const size_t input_idx, const RelAlgNode *node)
 
size_t pick_always_live_col_idx (const RelAlgNode *node)
 
std::vector
< std::unordered_set< size_t > > 
get_live_ins (const RelAlgNode *node, const std::unordered_map< const RelAlgNode *, std::unordered_set< size_t >> &live_outs)
 
bool any_dead_col_in (const RelAlgNode *node, const std::unordered_set< size_t > &live_outs)
 
bool does_redef_cols (const RelAlgNode *node)
 
void add_new_indices_for (const RelAlgNode *node, std::unordered_map< const RelAlgNode *, std::unordered_map< size_t, size_t >> &new_liveouts, const std::unordered_set< size_t > &old_liveouts, const std::unordered_set< const RelAlgNode * > &intact_nodes, const std::unordered_map< const RelAlgNode *, size_t > &orig_node_sizes)
 
std::vector< std::unique_ptr
< const RexAgg > > 
renumber_rex_aggs (std::vector< std::unique_ptr< const RexAgg >> &agg_exprs, const std::unordered_map< size_t, size_t > &new_numbering)
 
SortField renumber_sort_field (const SortField &old_field, const std::unordered_map< size_t, size_t > &new_numbering)
 
std::unordered_map< const
RelAlgNode
*, std::unordered_set< size_t > > 
mark_live_columns (std::vector< std::shared_ptr< RelAlgNode >> &nodes)
 
std::string get_field_name (const RelAlgNode *node, size_t index)
 
void try_insert_coalesceable_proj (std::vector< std::shared_ptr< RelAlgNode >> &nodes, std::unordered_map< const RelAlgNode *, std::unordered_set< size_t >> &liveouts, std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * >> &du_web)
 
std::pair< std::unordered_map
< const RelAlgNode
*, std::unordered_map< size_t,
size_t > >, std::vector< const
RelAlgNode * > > 
sweep_dead_columns (const std::unordered_map< const RelAlgNode *, std::unordered_set< size_t >> &live_outs, const std::vector< std::shared_ptr< RelAlgNode >> &nodes, const std::unordered_set< const RelAlgNode * > &intact_nodes, const std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * >> &du_web, const std::unordered_map< const RelAlgNode *, size_t > &orig_node_sizes)
 
void propagate_input_renumbering (std::unordered_map< const RelAlgNode *, std::unordered_map< size_t, size_t >> &liveout_renumbering, const std::vector< const RelAlgNode * > &ready_nodes, const std::unordered_map< const RelAlgNode *, std::unordered_set< size_t >> &old_liveouts, const std::unordered_set< const RelAlgNode * > &intact_nodes, const std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * >> &du_web, const std::unordered_map< const RelAlgNode *, size_t > &orig_node_sizes)
 
void replace_all_usages (std::shared_ptr< const RelAlgNode > old_def_node, std::shared_ptr< const RelAlgNode > new_def_node, std::unordered_map< const RelAlgNode *, std::shared_ptr< RelAlgNode >> &deconst_mapping, std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * >> &du_web)
 

Function Documentation

void anonymous_namespace{RelAlgOptimizer.cpp}::add_new_indices_for ( const RelAlgNode node,
std::unordered_map< const RelAlgNode *, std::unordered_map< size_t, size_t >> &  new_liveouts,
const std::unordered_set< size_t > &  old_liveouts,
const std::unordered_set< const RelAlgNode * > &  intact_nodes,
const std::unordered_map< const RelAlgNode *, size_t > &  orig_node_sizes 
)

Definition at line 789 of file RelAlgOptimizer.cpp.

References CHECK, CHECK_GT, does_redef_cols(), RelAlgNode::getInput(), logger::INFO, RelAlgNode::inputCount(), LOG, RelAlgNode::size(), src, and RelAlgNode::toString().

Referenced by propagate_input_renumbering(), and sweep_dead_columns().

795  {
796  auto live_fields = old_liveouts;
797  if (auto aggregate = dynamic_cast<const RelAggregate*>(node)) {
798  for (size_t i = 0; i < aggregate->getGroupByCount(); ++i) {
799  live_fields.insert(i);
800  }
801  }
802  auto it_ok =
803  new_liveouts.insert(std::make_pair(node, std::unordered_map<size_t, size_t>{}));
804  CHECK(it_ok.second);
805  auto& new_indices = it_ok.first->second;
806  if (intact_nodes.count(node)) {
807  for (size_t i = 0, e = node->size(); i < e; ++i) {
808  new_indices.insert(std::make_pair(i, i));
809  }
810  return;
811  }
812  if (does_redef_cols(node)) {
813  auto node_sz_it = orig_node_sizes.find(node);
814  CHECK(node_sz_it != orig_node_sizes.end());
815  const auto node_size = node_sz_it->second;
816  CHECK_GT(node_size, live_fields.size());
817  LOG(INFO) << node->toString() << " eliminated " << node_size - live_fields.size()
818  << " columns.";
819  std::vector<size_t> ordered_indices(live_fields.begin(), live_fields.end());
820  std::sort(ordered_indices.begin(), ordered_indices.end());
821  for (size_t i = 0; i < ordered_indices.size(); ++i) {
822  new_indices.insert(std::make_pair(ordered_indices[i], i));
823  }
824  return;
825  }
826  std::vector<size_t> ordered_indices;
827  for (size_t i = 0, old_base = 0, new_base = 0; i < node->inputCount(); ++i) {
828  auto src = node->getInput(i);
829  auto src_renum_it = new_liveouts.find(src);
830  if (src_renum_it != new_liveouts.end()) {
831  for (auto m : src_renum_it->second) {
832  new_indices.insert(std::make_pair(old_base + m.first, new_base + m.second));
833  }
834  new_base += src_renum_it->second.size();
835  } else if (dynamic_cast<const RelScan*>(src) || intact_nodes.count(src)) {
836  for (size_t i = 0; i < src->size(); ++i) {
837  new_indices.insert(std::make_pair(old_base + i, new_base + i));
838  }
839  new_base += src->size();
840  } else {
841  CHECK(false);
842  }
843  auto src_sz_it = orig_node_sizes.find(src);
844  CHECK(src_sz_it != orig_node_sizes.end());
845  old_base += src_sz_it->second;
846  }
847 }
int64_t * src
#define LOG(tag)
Definition: Logger.h:188
bool does_redef_cols(const RelAlgNode *node)
#define CHECK_GT(x, y)
Definition: Logger.h:209
const RelAlgNode * getInput(const size_t idx) const
virtual size_t size() const =0
virtual std::string toString() const =0
#define CHECK(condition)
Definition: Logger.h:197
const size_t inputCount() const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool anonymous_namespace{RelAlgOptimizer.cpp}::any_dead_col_in ( const RelAlgNode node,
const std::unordered_set< size_t > &  live_outs 
)

Definition at line 745 of file RelAlgOptimizer.cpp.

References CHECK, and RelAlgNode::size().

Referenced by eliminate_dead_columns(), and try_insert_coalesceable_proj().

746  {
747  CHECK(!dynamic_cast<const RelScan*>(node));
748  if (auto aggregate = dynamic_cast<const RelAggregate*>(node)) {
749  for (size_t i = aggregate->getGroupByCount(); i < aggregate->size(); ++i) {
750  if (!live_outs.count(i)) {
751  return true;
752  }
753  }
754  return false;
755  }
756 
757  return node->size() > live_outs.size();
758 }
virtual size_t size() const =0
#define CHECK(condition)
Definition: Logger.h:197

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void anonymous_namespace{RelAlgOptimizer.cpp}::cleanup_dead_nodes ( std::vector< std::shared_ptr< RelAlgNode >> &  nodes)

Definition at line 327 of file RelAlgOptimizer.cpp.

References logger::INFO, and LOG.

Referenced by eliminate_identical_copy(), and fold_filters().

327  {
328  for (auto nodeIt = nodes.rbegin(); nodeIt != nodes.rend(); ++nodeIt) {
329  if (nodeIt->use_count() == 1) {
330  LOG(INFO) << "ID=" << (*nodeIt)->getId() << " " << (*nodeIt)->toString()
331  << " deleted!";
332  nodeIt->reset();
333  }
334  }
335 
336  std::vector<std::shared_ptr<RelAlgNode>> new_nodes;
337  for (auto node : nodes) {
338  if (!node) {
339  continue;
340  }
341  new_nodes.push_back(node);
342  }
343  nodes.swap(new_nodes);
344 }
#define LOG(tag)
Definition: Logger.h:188

+ Here is the caller graph for this function:

bool anonymous_namespace{RelAlgOptimizer.cpp}::does_redef_cols ( const RelAlgNode node)

Definition at line 760 of file RelAlgOptimizer.cpp.

Referenced by add_new_indices_for(), eliminate_dead_columns(), propagate_input_renumbering(), sweep_dead_columns(), and try_insert_coalesceable_proj().

760  {
761  return dynamic_cast<const RelAggregate*>(node) || dynamic_cast<const RelProject*>(node);
762 }

+ Here is the caller graph for this function:

size_t anonymous_namespace{RelAlgOptimizer.cpp}::get_actual_source_size ( const RelProject curr_project,
const std::unordered_set< const RelProject * > &  projects_to_remove 
)

Definition at line 98 of file RelAlgOptimizer.cpp.

References RelAlgNode::getInput(), and RelAlgNode::size().

Referenced by is_identical_copy().

100  {
101  auto source = curr_project->getInput(0);
102  while (auto filter = dynamic_cast<const RelFilter*>(source)) {
103  source = filter->getInput(0);
104  }
105  if (auto src_project = dynamic_cast<const RelProject*>(source)) {
106  if (projects_to_remove.count(src_project)) {
107  return get_actual_source_size(src_project, projects_to_remove);
108  }
109  }
110  return curr_project->getInput(0)->size();
111 }
size_t get_actual_source_size(const RelProject *curr_project, const std::unordered_set< const RelProject * > &projects_to_remove)
const RelAlgNode * getInput(const size_t idx) const
virtual size_t size() const =0

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::string anonymous_namespace{RelAlgOptimizer.cpp}::get_field_name ( const RelAlgNode node,
size_t  index 
)

Definition at line 960 of file RelAlgOptimizer.cpp.

References CHECK, CHECK_EQ, CHECK_LT, RelAlgNode::getInput(), join(), and RelAlgNode::size().

Referenced by try_insert_coalesceable_proj().

960  {
961  CHECK_LT(index, node->size());
962  if (auto scan = dynamic_cast<const RelScan*>(node)) {
963  return scan->getFieldName(index);
964  }
965  if (auto aggregate = dynamic_cast<const RelAggregate*>(node)) {
966  CHECK_EQ(aggregate->size(), aggregate->getFields().size());
967  return aggregate->getFieldName(index);
968  }
969  if (auto join = dynamic_cast<const RelJoin*>(node)) {
970  const auto lhs_size = join->getInput(0)->size();
971  if (index < lhs_size) {
972  return get_field_name(join->getInput(0), index);
973  }
974  return get_field_name(join->getInput(1), index - lhs_size);
975  }
976  if (auto project = dynamic_cast<const RelProject*>(node)) {
977  return project->getFieldName(index);
978  }
979  CHECK(dynamic_cast<const RelSort*>(node) || dynamic_cast<const RelFilter*>(node));
980  return get_field_name(node->getInput(0), index);
981 }
#define CHECK_EQ(x, y)
Definition: Logger.h:205
std::string join(T const &container, std::string const &delim)
const RelAlgNode * getInput(const size_t idx) const
#define CHECK_LT(x, y)
Definition: Logger.h:207
virtual size_t size() const =0
#define CHECK(condition)
Definition: Logger.h:197
std::string get_field_name(const RelAlgNode *node, size_t index)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::vector<std::unordered_set<size_t> > anonymous_namespace{RelAlgOptimizer.cpp}::get_live_ins ( const RelAlgNode node,
const std::unordered_map< const RelAlgNode *, std::unordered_set< size_t >> &  live_outs 
)

Definition at line 622 of file RelAlgOptimizer.cpp.

References CHECK, CHECK_EQ, CHECK_LT, join(), pick_always_live_col_idx(), run_benchmark_import::result, and RexVisitorBase< T >::visit().

Referenced by mark_live_columns().

624  {
625  if (!node || dynamic_cast<const RelScan*>(node)) {
626  return {};
627  }
628  RexInputCollector collector(node);
629  auto it = live_outs.find(node);
630  CHECK(it != live_outs.end());
631  auto live_out = it->second;
632  if (auto project = dynamic_cast<const RelProject*>(node)) {
633  CHECK_EQ(size_t(1), project->inputCount());
634  std::unordered_set<size_t> live_in;
635  for (const auto& idx : live_out) {
636  CHECK_LT(idx, project->size());
637  auto partial_in = collector.visit(project->getProjectAt(idx));
638  for (auto rex_in : partial_in) {
639  live_in.insert(rex_in.getIndex());
640  }
641  }
642  if (project->size() == 1 &&
643  dynamic_cast<const RexLiteral*>(project->getProjectAt(0))) {
644  CHECK(live_in.empty());
645  live_in.insert(pick_always_live_col_idx(project->getInput(0)));
646  }
647  return {live_in};
648  }
649  if (auto aggregate = dynamic_cast<const RelAggregate*>(node)) {
650  CHECK_EQ(size_t(1), aggregate->inputCount());
651  const auto group_key_count = static_cast<size_t>(aggregate->getGroupByCount());
652  const auto agg_expr_count = static_cast<size_t>(aggregate->getAggExprsCount());
653  std::unordered_set<size_t> live_in;
654  for (size_t i = 0; i < group_key_count; ++i) {
655  live_in.insert(i);
656  }
657  bool has_count_star_only{false};
658  for (const auto& idx : live_out) {
659  if (idx < group_key_count) {
660  continue;
661  }
662  const auto agg_idx = idx - group_key_count;
663  CHECK_LT(agg_idx, agg_expr_count);
664  const auto& cur_agg_expr = aggregate->getAggExprs()[agg_idx];
665  const auto n_operands = cur_agg_expr->size();
666  for (size_t i = 0; i < n_operands; ++i) {
667  live_in.insert(static_cast<size_t>(cur_agg_expr->getOperand(i)));
668  }
669  if (n_operands == 0) {
670  has_count_star_only = true;
671  }
672  }
673  if (has_count_star_only && !group_key_count) {
674  live_in.insert(size_t(0));
675  }
676  return {live_in};
677  }
678  if (auto join = dynamic_cast<const RelJoin*>(node)) {
679  std::unordered_set<size_t> lhs_live_ins;
680  std::unordered_set<size_t> rhs_live_ins;
681  CHECK_EQ(size_t(2), join->inputCount());
682  auto lhs = join->getInput(0);
683  auto rhs = join->getInput(1);
684  const auto rhs_idx_base = lhs->size();
685  for (const auto idx : live_out) {
686  if (idx < rhs_idx_base) {
687  lhs_live_ins.insert(idx);
688  } else {
689  rhs_live_ins.insert(idx - rhs_idx_base);
690  }
691  }
692  auto rex_ins = collector.visit(join->getCondition());
693  for (const auto& rex_in : rex_ins) {
694  const auto in_idx = static_cast<size_t>(rex_in.getIndex());
695  if (rex_in.getSourceNode() == lhs) {
696  lhs_live_ins.insert(in_idx);
697  continue;
698  }
699  if (rex_in.getSourceNode() == rhs) {
700  rhs_live_ins.insert(in_idx);
701  continue;
702  }
703  CHECK(false);
704  }
705  return {lhs_live_ins, rhs_live_ins};
706  }
707  if (auto sort = dynamic_cast<const RelSort*>(node)) {
708  CHECK_EQ(size_t(1), sort->inputCount());
709  std::unordered_set<size_t> live_in(live_out.begin(), live_out.end());
710  for (size_t i = 0; i < sort->collationCount(); ++i) {
711  live_in.insert(sort->getCollation(i).getField());
712  }
713  return {live_in};
714  }
715  if (auto filter = dynamic_cast<const RelFilter*>(node)) {
716  CHECK_EQ(size_t(1), filter->inputCount());
717  std::unordered_set<size_t> live_in(live_out.begin(), live_out.end());
718  auto rex_ins = collector.visit(filter->getCondition());
719  for (const auto& rex_in : rex_ins) {
720  live_in.insert(static_cast<size_t>(rex_in.getIndex()));
721  }
722  return {live_in};
723  }
724  if (auto table_func = dynamic_cast<const RelTableFunction*>(node)) {
725  const auto input_count = table_func->size();
726  std::unordered_set<size_t> live_in;
727  for (size_t i = 0; i < input_count; i++) {
728  live_in.insert(i);
729  }
730 
731  std::vector<std::unordered_set<size_t>> result;
732  // Is the computed result correct in general?
733  for (size_t i = table_func->inputCount(); i > 0; i--) {
734  result.push_back(live_in);
735  }
736 
737  return result;
738  }
739  if (auto logical_union = dynamic_cast<const RelLogicalUnion*>(node)) {
740  return std::vector<std::unordered_set<size_t>>(logical_union->inputCount(), live_out);
741  }
742  return {};
743 }
#define CHECK_EQ(x, y)
Definition: Logger.h:205
size_t pick_always_live_col_idx(const RelAlgNode *node)
std::string join(T const &container, std::string const &delim)
#define CHECK_LT(x, y)
Definition: Logger.h:207
#define CHECK(condition)
Definition: Logger.h:197

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::unordered_set<const RelProject*> anonymous_namespace{RelAlgOptimizer.cpp}::get_visible_projects ( const RelAlgNode root)

Definition at line 346 of file RelAlgOptimizer.cpp.

References CHECK, RelAlgNode::getInput(), join(), and RelAlgNode::toString().

Referenced by eliminate_identical_copy().

346  {
347  if (auto project = dynamic_cast<const RelProject*>(root)) {
348  return {project};
349  }
350 
351  if (dynamic_cast<const RelAggregate*>(root) || dynamic_cast<const RelScan*>(root) ||
352  dynamic_cast<const RelLogicalValues*>(root) ||
353  dynamic_cast<const RelModify*>(root)) {
354  return std::unordered_set<const RelProject*>{};
355  }
356 
357  if (auto join = dynamic_cast<const RelJoin*>(root)) {
358  auto lhs_projs = get_visible_projects(join->getInput(0));
359  auto rhs_projs = get_visible_projects(join->getInput(1));
360  lhs_projs.insert(rhs_projs.begin(), rhs_projs.end());
361  return lhs_projs;
362  }
363 
364  if (auto logical_union = dynamic_cast<const RelLogicalUnion*>(root)) {
365  auto projections = get_visible_projects(logical_union->getInput(0));
366  for (size_t i = 1; i < logical_union->inputCount(); ++i) {
367  auto next = get_visible_projects(logical_union->getInput(i));
368  projections.insert(next.begin(), next.end());
369  }
370  return projections;
371  }
372 
373  CHECK(dynamic_cast<const RelFilter*>(root) || dynamic_cast<const RelSort*>(root))
374  << "root = " << root->toString();
375  return get_visible_projects(root->getInput(0));
376 }
std::string join(T const &container, std::string const &delim)
std::unordered_set< const RelProject * > get_visible_projects(const RelAlgNode *root)
const RelAlgNode * getInput(const size_t idx) const
virtual std::string toString() const =0
#define CHECK(condition)
Definition: Logger.h:197

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool anonymous_namespace{RelAlgOptimizer.cpp}::is_distinct ( const size_t  input_idx,
const RelAlgNode node 
)

Definition at line 379 of file RelAlgOptimizer.cpp.

References CHECK, CHECK_EQ, CHECK_LT, RelAlgNode::getInput(), and RelAlgNode::inputCount().

Referenced by Parser::FunctionRef::analyze(), Parser::QuerySpec::analyze(), eliminate_identical_copy(), get_target_info(), Parser::QuerySpec::to_string(), and RelAlgTranslator::translateAggregateRex().

379  {
380  if (dynamic_cast<const RelFilter*>(node) || dynamic_cast<const RelSort*>(node)) {
381  CHECK_EQ(size_t(1), node->inputCount());
382  return is_distinct(input_idx, node->getInput(0));
383  }
384  if (auto aggregate = dynamic_cast<const RelAggregate*>(node)) {
385  CHECK_EQ(size_t(1), node->inputCount());
386  if (aggregate->getGroupByCount() == 1 && !input_idx) {
387  return true;
388  }
389  if (input_idx < aggregate->getGroupByCount()) {
390  return is_distinct(input_idx, node->getInput(0));
391  }
392  return false;
393  }
394  if (auto project = dynamic_cast<const RelProject*>(node)) {
395  CHECK_LT(input_idx, project->size());
396  if (auto input = dynamic_cast<const RexInput*>(project->getProjectAt(input_idx))) {
397  CHECK_EQ(size_t(1), node->inputCount());
398  return is_distinct(input->getIndex(), project->getInput(0));
399  }
400  return false;
401  }
402  CHECK(dynamic_cast<const RelJoin*>(node) || dynamic_cast<const RelScan*>(node));
403  return false;
404 }
#define CHECK_EQ(x, y)
Definition: Logger.h:205
const RelAlgNode * getInput(const size_t idx) const
#define CHECK_LT(x, y)
Definition: Logger.h:207
#define CHECK(condition)
Definition: Logger.h:197
const size_t inputCount() const
bool is_distinct(const size_t input_idx, const RelAlgNode *node)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool anonymous_namespace{RelAlgOptimizer.cpp}::is_identical_copy ( const RelProject project,
const std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * >> &  du_web,
const std::unordered_set< const RelProject * > &  projects_to_remove,
std::unordered_set< const RelProject * > &  permutating_projects 
)

Definition at line 130 of file RelAlgOptimizer.cpp.

References CHECK, get_actual_source_size(), RelProject::getProjectAt(), safe_to_redirect(), and RelProject::size().

Referenced by eliminate_identical_copy().

135  {
136  auto source_size = get_actual_source_size(project, projects_to_remove);
137  if (project->size() > source_size) {
138  return false;
139  }
140 
141  if (project->size() < source_size) {
142  auto usrs_it = du_web.find(project);
143  CHECK(usrs_it != du_web.end());
144  bool guard_found = false;
145  while (usrs_it->second.size() == size_t(1)) {
146  auto only_usr = *usrs_it->second.begin();
147  if (dynamic_cast<const RelProject*>(only_usr)) {
148  guard_found = true;
149  break;
150  }
151  if (dynamic_cast<const RelAggregate*>(only_usr) ||
152  dynamic_cast<const RelSort*>(only_usr) ||
153  dynamic_cast<const RelJoin*>(only_usr) ||
154  dynamic_cast<const RelTableFunction*>(only_usr) ||
155  dynamic_cast<const RelLogicalUnion*>(only_usr)) {
156  return false;
157  }
158  CHECK(dynamic_cast<const RelFilter*>(only_usr))
159  << "only_usr: " << only_usr->toString();
160  usrs_it = du_web.find(only_usr);
161  CHECK(usrs_it != du_web.end());
162  }
163 
164  if (!guard_found) {
165  return false;
166  }
167  }
168 
169  bool identical = true;
170  for (size_t i = 0; i < project->size(); ++i) {
171  auto target = dynamic_cast<const RexInput*>(project->getProjectAt(i));
172  CHECK(target);
173  if (i != target->getIndex()) {
174  identical = false;
175  break;
176  }
177  }
178 
179  if (identical) {
180  return true;
181  }
182 
183  if (safe_to_redirect(project, du_web)) {
184  permutating_projects.insert(project);
185  return true;
186  }
187 
188  return false;
189 }
size_t get_actual_source_size(const RelProject *curr_project, const std::unordered_set< const RelProject * > &projects_to_remove)
size_t size() const override
bool safe_to_redirect(const RelProject *project, const std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * >> &du_web)
const RexScalar * getProjectAt(const size_t idx) const
#define CHECK(condition)
Definition: Logger.h:197

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::unordered_map<const RelAlgNode*, std::unordered_set<size_t> > anonymous_namespace{RelAlgOptimizer.cpp}::mark_live_columns ( std::vector< std::shared_ptr< RelAlgNode >> &  nodes)

Definition at line 906 of file RelAlgOptimizer.cpp.

References CHECK, CHECK_EQ, get_live_ins(), and src.

Referenced by eliminate_dead_columns(), and sink_projected_boolean_expr_to_join().

907  {
908  std::unordered_map<const RelAlgNode*, std::unordered_set<size_t>> live_outs;
909  std::vector<const RelAlgNode*> work_set;
910  for (auto node_it = nodes.rbegin(); node_it != nodes.rend(); ++node_it) {
911  auto node = node_it->get();
912  if (dynamic_cast<const RelScan*>(node) || live_outs.count(node) ||
913  dynamic_cast<const RelModify*>(node) ||
914  dynamic_cast<const RelTableFunction*>(node)) {
915  continue;
916  }
917  std::vector<size_t> all_live(node->size());
918  std::iota(all_live.begin(), all_live.end(), size_t(0));
919  live_outs.insert(std::make_pair(
920  node, std::unordered_set<size_t>(all_live.begin(), all_live.end())));
921 
922  work_set.push_back(node);
923  while (!work_set.empty()) {
924  auto walker = work_set.back();
925  work_set.pop_back();
926  CHECK(!dynamic_cast<const RelScan*>(walker));
927  CHECK(live_outs.count(walker));
928  auto live_ins = get_live_ins(walker, live_outs);
929  CHECK_EQ(live_ins.size(), walker->inputCount());
930  for (size_t i = 0; i < walker->inputCount(); ++i) {
931  auto src = walker->getInput(i);
932  if (dynamic_cast<const RelScan*>(src) ||
933  dynamic_cast<const RelTableFunction*>(src) || live_ins[i].empty()) {
934  continue;
935  }
936  if (!live_outs.count(src)) {
937  live_outs.insert(std::make_pair(src, std::unordered_set<size_t>{}));
938  }
939  auto src_it = live_outs.find(src);
940  CHECK(src_it != live_outs.end());
941  auto& live_out = src_it->second;
942  bool changed = false;
943  if (!live_out.empty()) {
944  live_out.insert(live_ins[i].begin(), live_ins[i].end());
945  changed = true;
946  } else {
947  for (int idx : live_ins[i]) {
948  changed |= live_out.insert(idx).second;
949  }
950  }
951  if (changed) {
952  work_set.push_back(src);
953  }
954  }
955  }
956  }
957  return live_outs;
958 }
#define CHECK_EQ(x, y)
Definition: Logger.h:205
int64_t * src
std::vector< std::unordered_set< size_t > > get_live_ins(const RelAlgNode *node, const std::unordered_map< const RelAlgNode *, std::unordered_set< size_t >> &live_outs)
#define CHECK(condition)
Definition: Logger.h:197

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

size_t anonymous_namespace{RelAlgOptimizer.cpp}::pick_always_live_col_idx ( const RelAlgNode node)

Definition at line 593 of file RelAlgOptimizer.cpp.

References CHECK, join(), and RelAlgNode::size().

Referenced by get_live_ins().

593  {
594  CHECK(node->size());
595  RexInputCollector collector(node);
596  if (auto filter = dynamic_cast<const RelFilter*>(node)) {
597  auto rex_ins = collector.visit(filter->getCondition());
598  if (!rex_ins.empty()) {
599  return static_cast<size_t>(rex_ins.begin()->getIndex());
600  }
601  return pick_always_live_col_idx(filter->getInput(0));
602  } else if (auto join = dynamic_cast<const RelJoin*>(node)) {
603  auto rex_ins = collector.visit(join->getCondition());
604  if (!rex_ins.empty()) {
605  return static_cast<size_t>(rex_ins.begin()->getIndex());
606  }
607  if (auto lhs_idx = pick_always_live_col_idx(join->getInput(0))) {
608  return lhs_idx;
609  }
610  if (auto rhs_idx = pick_always_live_col_idx(join->getInput(0))) {
611  return rhs_idx + join->getInput(0)->size();
612  }
613  } else if (auto sort = dynamic_cast<const RelSort*>(node)) {
614  if (sort->collationCount()) {
615  return sort->getCollation(0).getField();
616  }
617  return pick_always_live_col_idx(sort->getInput(0));
618  }
619  return size_t(0);
620 }
size_t pick_always_live_col_idx(const RelAlgNode *node)
std::string join(T const &container, std::string const &delim)
virtual size_t size() const =0
#define CHECK(condition)
Definition: Logger.h:197

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void anonymous_namespace{RelAlgOptimizer.cpp}::propagate_input_renumbering ( std::unordered_map< const RelAlgNode *, std::unordered_map< size_t, size_t >> &  liveout_renumbering,
const std::vector< const RelAlgNode * > &  ready_nodes,
const std::unordered_map< const RelAlgNode *, std::unordered_set< size_t >> &  old_liveouts,
const std::unordered_set< const RelAlgNode * > &  intact_nodes,
const std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * >> &  du_web,
const std::unordered_map< const RelAlgNode *, size_t > &  orig_node_sizes 
)

Definition at line 1105 of file RelAlgOptimizer.cpp.

References add_new_indices_for(), CHECK, does_redef_cols(), logger::FATAL, anonymous_namespace{RelAlgOptimizer.cpp}::AvailabilityChecker::hasAllSrcReady(), join(), LOG, renumber_rex_aggs(), renumber_sort_field(), and RexVisitorBase< T >::visit().

Referenced by eliminate_dead_columns().

1113  {
1114  RexInputRenumberVisitor renumberer(liveout_renumbering);
1115  AvailabilityChecker checker(liveout_renumbering, intact_nodes);
1116  std::deque<const RelAlgNode*> work_set(ready_nodes.begin(), ready_nodes.end());
1117  while (!work_set.empty()) {
1118  auto walker = work_set.front();
1119  work_set.pop_front();
1120  CHECK(!dynamic_cast<const RelScan*>(walker));
1121  auto node = const_cast<RelAlgNode*>(walker);
1122  if (auto project = dynamic_cast<RelProject*>(node)) {
1123  auto old_exprs = project->getExpressionsAndRelease();
1124  std::vector<std::unique_ptr<const RexScalar>> new_exprs;
1125  for (auto& expr : old_exprs) {
1126  new_exprs.push_back(renumberer.visit(expr.get()));
1127  }
1128  project->setExpressions(new_exprs);
1129  } else if (auto aggregate = dynamic_cast<RelAggregate*>(node)) {
1130  auto src_it = liveout_renumbering.find(node->getInput(0));
1131  CHECK(src_it != liveout_renumbering.end());
1132  auto old_exprs = aggregate->getAggExprsAndRelease();
1133  auto new_exprs = renumber_rex_aggs(old_exprs, src_it->second);
1134  aggregate->setAggExprs(new_exprs);
1135  } else if (auto join = dynamic_cast<RelJoin*>(node)) {
1136  auto new_condition = renumberer.visit(join->getCondition());
1137  join->setCondition(new_condition);
1138  } else if (auto filter = dynamic_cast<RelFilter*>(node)) {
1139  auto new_condition = renumberer.visit(filter->getCondition());
1140  filter->setCondition(new_condition);
1141  } else if (auto sort = dynamic_cast<RelSort*>(node)) {
1142  auto src_it = liveout_renumbering.find(node->getInput(0));
1143  CHECK(src_it != liveout_renumbering.end());
1144  std::vector<SortField> new_collations;
1145  for (size_t i = 0; i < sort->collationCount(); ++i) {
1146  new_collations.push_back(
1147  renumber_sort_field(sort->getCollation(i), src_it->second));
1148  }
1149  sort->setCollation(std::move(new_collations));
1150  } else if (!dynamic_cast<RelLogicalUnion*>(node)) {
1151  LOG(FATAL) << "Unhandled node type: " << node->toString();
1152  }
1153 
1154  // Ignore empty live_out due to some invalid node
1155  if (does_redef_cols(node) || intact_nodes.count(node)) {
1156  continue;
1157  }
1158  auto live_pair = old_liveouts.find(node);
1159  CHECK(live_pair != old_liveouts.end());
1160  auto live_out = live_pair->second;
1162  node, liveout_renumbering, live_out, intact_nodes, orig_node_sizes);
1163  auto usrs_it = du_web.find(walker);
1164  CHECK(usrs_it != du_web.end());
1165  for (auto usr : usrs_it->second) {
1166  if (checker.hasAllSrcReady(usr)) {
1167  work_set.push_back(usr);
1168  }
1169  }
1170  }
1171 }
#define LOG(tag)
Definition: Logger.h:188
bool does_redef_cols(const RelAlgNode *node)
std::string join(T const &container, std::string const &delim)
std::vector< std::unique_ptr< const RexAgg > > renumber_rex_aggs(std::vector< std::unique_ptr< const RexAgg >> &agg_exprs, const std::unordered_map< size_t, size_t > &new_numbering)
SortField renumber_sort_field(const SortField &old_field, const std::unordered_map< size_t, size_t > &new_numbering)
#define CHECK(condition)
Definition: Logger.h:197
void add_new_indices_for(const RelAlgNode *node, std::unordered_map< const RelAlgNode *, std::unordered_map< size_t, size_t >> &new_liveouts, const std::unordered_set< size_t > &old_liveouts, const std::unordered_set< const RelAlgNode * > &intact_nodes, const std::unordered_map< const RelAlgNode *, size_t > &orig_node_sizes)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void anonymous_namespace{RelAlgOptimizer.cpp}::propagate_rex_input_renumber ( const RelFilter excluded_root,
const std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * >> &  du_web 
)

Definition at line 191 of file RelAlgOptimizer.cpp.

References CHECK, and RelAlgNode::getInput().

Referenced by redirect_inputs_of().

194  {
195  CHECK(excluded_root);
196  auto src_project = dynamic_cast<const RelProject*>(excluded_root->getInput(0));
197  CHECK(src_project && src_project->isSimple());
198  const auto indirect_join_src = dynamic_cast<const RelJoin*>(src_project->getInput(0));
199  std::unordered_map<size_t, size_t> old_to_new_idx;
200  for (size_t i = 0; i < src_project->size(); ++i) {
201  auto rex_in = dynamic_cast<const RexInput*>(src_project->getProjectAt(i));
202  CHECK(rex_in);
203  size_t src_base = 0;
204  if (indirect_join_src != nullptr &&
205  indirect_join_src->getInput(1) == rex_in->getSourceNode()) {
206  src_base = indirect_join_src->getInput(0)->size();
207  }
208  old_to_new_idx.insert(std::make_pair(i, src_base + rex_in->getIndex()));
209  old_to_new_idx.insert(std::make_pair(i, rex_in->getIndex()));
210  }
211  CHECK(old_to_new_idx.size());
212  RexInputRenumber<false> renumber(old_to_new_idx);
213  auto usrs_it = du_web.find(excluded_root);
214  CHECK(usrs_it != du_web.end());
215  std::vector<const RelAlgNode*> work_set(usrs_it->second.begin(), usrs_it->second.end());
216  while (!work_set.empty()) {
217  auto node = work_set.back();
218  work_set.pop_back();
219  auto modified_node = const_cast<RelAlgNode*>(node);
220  if (auto filter = dynamic_cast<RelFilter*>(modified_node)) {
221  auto new_condition = renumber.visit(filter->getCondition());
222  filter->setCondition(new_condition);
223  auto usrs_it = du_web.find(filter);
224  CHECK(usrs_it != du_web.end() && usrs_it->second.size() == 1);
225  work_set.push_back(*usrs_it->second.begin());
226  continue;
227  }
228  if (auto project = dynamic_cast<RelProject*>(modified_node)) {
229  std::vector<std::unique_ptr<const RexScalar>> new_exprs;
230  for (size_t i = 0; i < project->size(); ++i) {
231  new_exprs.push_back(renumber.visit(project->getProjectAt(i)));
232  }
233  project->setExpressions(new_exprs);
234  continue;
235  }
236  CHECK(false);
237  }
238 }
const RelAlgNode * getInput(const size_t idx) const
#define CHECK(condition)
Definition: Logger.h:197

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void anonymous_namespace{RelAlgOptimizer.cpp}::redirect_inputs_of ( std::shared_ptr< RelAlgNode node,
const std::unordered_set< const RelProject * > &  projects,
const std::unordered_set< const RelProject * > &  permutating_projects,
const std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * >> &  du_web 
)

Definition at line 241 of file RelAlgOptimizer.cpp.

References CHECK, join(), propagate_rex_input_renumber(), RexVisitorBase< T >::visit(), and anonymous_namespace{RelAlgOptimizer.cpp}::RexRebindInputsVisitor::visitNode().

Referenced by eliminate_identical_copy().

246  {
247  if (dynamic_cast<RelLogicalUnion*>(node.get())) {
248  return; // UNION keeps all Projection inputs.
249  }
250  std::shared_ptr<const RelProject> src_project = nullptr;
251  for (size_t i = 0; i < node->inputCount(); ++i) {
252  if (auto project =
253  std::dynamic_pointer_cast<const RelProject>(node->getAndOwnInput(i))) {
254  if (projects.count(project.get())) {
255  src_project = project;
256  break;
257  }
258  }
259  }
260  if (!src_project) {
261  return;
262  }
263  if (auto join = std::dynamic_pointer_cast<RelJoin>(node)) {
264  auto other_project =
265  src_project == node->getAndOwnInput(0)
266  ? std::dynamic_pointer_cast<const RelProject>(node->getAndOwnInput(1))
267  : std::dynamic_pointer_cast<const RelProject>(node->getAndOwnInput(0));
268  join->replaceInput(src_project, src_project->getAndOwnInput(0));
269  RexRebindInputsVisitor rebinder(src_project.get(), src_project->getInput(0));
270  auto usrs_it = du_web.find(join.get());
271  CHECK(usrs_it != du_web.end());
272  for (auto usr : usrs_it->second) {
273  rebinder.visitNode(usr);
274  }
275 
276  if (other_project && projects.count(other_project.get())) {
277  join->replaceInput(other_project, other_project->getAndOwnInput(0));
278  RexRebindInputsVisitor other_rebinder(other_project.get(),
279  other_project->getInput(0));
280  for (auto usr : usrs_it->second) {
281  other_rebinder.visitNode(usr);
282  }
283  }
284  return;
285  }
286  if (auto project = std::dynamic_pointer_cast<RelProject>(node)) {
287  project->RelAlgNode::replaceInput(src_project, src_project->getAndOwnInput(0));
288  RexProjectInputRedirector redirector(projects);
289  std::vector<std::unique_ptr<const RexScalar>> new_exprs;
290  for (size_t i = 0; i < project->size(); ++i) {
291  new_exprs.push_back(redirector.visit(project->getProjectAt(i)));
292  }
293  project->setExpressions(new_exprs);
294  return;
295  }
296  if (auto filter = std::dynamic_pointer_cast<RelFilter>(node)) {
297  const bool is_permutating_proj = permutating_projects.count(src_project.get());
298  if (is_permutating_proj || dynamic_cast<const RelJoin*>(src_project->getInput(0))) {
299  if (is_permutating_proj) {
300  propagate_rex_input_renumber(filter.get(), du_web);
301  }
302  filter->RelAlgNode::replaceInput(src_project, src_project->getAndOwnInput(0));
303  RexProjectInputRedirector redirector(projects);
304  auto new_condition = redirector.visit(filter->getCondition());
305  filter->setCondition(new_condition);
306  } else {
307  filter->replaceInput(src_project, src_project->getAndOwnInput(0));
308  }
309  return;
310  }
311  if (std::dynamic_pointer_cast<RelSort>(node)) {
312  auto const src_project_input = src_project->getInput(0);
313  if (dynamic_cast<const RelScan*>(src_project_input) ||
314  dynamic_cast<const RelLogicalValues*>(src_project_input) ||
315  dynamic_cast<const RelLogicalUnion*>(src_project_input)) {
316  return;
317  }
318  }
319  if (std::dynamic_pointer_cast<RelModify>(node)) {
320  return; // NOTE: Review this. Not sure about this.
321  }
322  CHECK(std::dynamic_pointer_cast<RelAggregate>(node) ||
323  std::dynamic_pointer_cast<RelSort>(node));
324  node->replaceInput(src_project, src_project->getAndOwnInput(0));
325 }
std::string join(T const &container, std::string const &delim)
#define CHECK(condition)
Definition: Logger.h:197
void propagate_rex_input_renumber(const RelFilter *excluded_root, const std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * >> &du_web)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::vector<std::unique_ptr<const RexAgg> > anonymous_namespace{RelAlgOptimizer.cpp}::renumber_rex_aggs ( std::vector< std::unique_ptr< const RexAgg >> &  agg_exprs,
const std::unordered_map< size_t, size_t > &  new_numbering 
)

Definition at line 872 of file RelAlgOptimizer.cpp.

Referenced by propagate_input_renumbering().

874  {
875  std::vector<std::unique_ptr<const RexAgg>> new_exprs;
876  for (auto& expr : agg_exprs) {
877  if (expr->size() >= 1) {
878  auto old_idx = expr->getOperand(0);
879  auto idx_it = new_numbering.find(old_idx);
880  if (idx_it != new_numbering.end()) {
881  std::vector<size_t> operands;
882  operands.push_back(idx_it->second);
883  if (expr->size() == 2) {
884  operands.push_back(expr->getOperand(1));
885  }
886  new_exprs.push_back(boost::make_unique<RexAgg>(
887  expr->getKind(), expr->isDistinct(), expr->getType(), operands));
888  continue;
889  }
890  }
891  new_exprs.push_back(std::move(expr));
892  }
893  return new_exprs;
894 }

+ Here is the caller graph for this function:

SortField anonymous_namespace{RelAlgOptimizer.cpp}::renumber_sort_field ( const SortField old_field,
const std::unordered_map< size_t, size_t > &  new_numbering 
)

Definition at line 896 of file RelAlgOptimizer.cpp.

References SortField::getField(), SortField::getNullsPosition(), and SortField::getSortDir().

Referenced by propagate_input_renumbering().

897  {
898  auto field_idx = old_field.getField();
899  auto idx_it = new_numbering.find(field_idx);
900  if (idx_it != new_numbering.end()) {
901  field_idx = idx_it->second;
902  }
903  return SortField(field_idx, old_field.getSortDir(), old_field.getNullsPosition());
904 }
SortDirection getSortDir() const
size_t getField() const
NullSortedPosition getNullsPosition() const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void anonymous_namespace{RelAlgOptimizer.cpp}::replace_all_usages ( std::shared_ptr< const RelAlgNode old_def_node,
std::shared_ptr< const RelAlgNode new_def_node,
std::unordered_map< const RelAlgNode *, std::shared_ptr< RelAlgNode >> &  deconst_mapping,
std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * >> &  du_web 
)

Definition at line 1433 of file RelAlgOptimizer.cpp.

References CHECK.

Referenced by fold_filters().

1438  {
1439  auto usrs_it = du_web.find(old_def_node.get());
1440  RexInputRedirector redirector(new_def_node.get(), old_def_node.get());
1441  CHECK(usrs_it != du_web.end());
1442  for (auto usr : usrs_it->second) {
1443  auto usr_it = deconst_mapping.find(usr);
1444  CHECK(usr_it != deconst_mapping.end());
1445  usr_it->second->replaceInput(old_def_node, new_def_node);
1446  }
1447  auto new_usrs_it = du_web.find(new_def_node.get());
1448  CHECK(new_usrs_it != du_web.end());
1449  new_usrs_it->second.insert(usrs_it->second.begin(), usrs_it->second.end());
1450  usrs_it->second.clear();
1451 }
#define CHECK(condition)
Definition: Logger.h:197

+ Here is the caller graph for this function:

bool anonymous_namespace{RelAlgOptimizer.cpp}::safe_to_redirect ( const RelProject project,
const std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * >> &  du_web 
)

Definition at line 113 of file RelAlgOptimizer.cpp.

References CHECK, and RelProject::isSimple().

Referenced by is_identical_copy().

116  {
117  if (!project->isSimple()) {
118  return false;
119  }
120  auto usrs_it = du_web.find(project);
121  CHECK(usrs_it != du_web.end());
122  for (auto usr : usrs_it->second) {
123  if (!dynamic_cast<const RelProject*>(usr) && !dynamic_cast<const RelFilter*>(usr)) {
124  return false;
125  }
126  }
127  return true;
128 }
bool isSimple() const
#define CHECK(condition)
Definition: Logger.h:197

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::pair<std::unordered_map<const RelAlgNode*, std::unordered_map<size_t, size_t> >, std::vector<const RelAlgNode*> > anonymous_namespace{RelAlgOptimizer.cpp}::sweep_dead_columns ( const std::unordered_map< const RelAlgNode *, std::unordered_set< size_t >> &  live_outs,
const std::vector< std::shared_ptr< RelAlgNode >> &  nodes,
const std::unordered_set< const RelAlgNode * > &  intact_nodes,
const std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * >> &  du_web,
const std::unordered_map< const RelAlgNode *, size_t > &  orig_node_sizes 
)

Definition at line 1042 of file RelAlgOptimizer.cpp.

References add_new_indices_for(), CHECK, does_redef_cols(), and anonymous_namespace{RelAlgOptimizer.cpp}::AvailabilityChecker::hasAllSrcReady().

Referenced by eliminate_dead_columns().

1048  {
1049  std::unordered_map<const RelAlgNode*, std::unordered_map<size_t, size_t>>
1050  liveouts_renumbering;
1051  std::vector<const RelAlgNode*> ready_nodes;
1052  AvailabilityChecker checker(liveouts_renumbering, intact_nodes);
1053  for (auto node : nodes) {
1054  // Ignore empty live_out due to some invalid node
1055  if (!does_redef_cols(node.get()) || intact_nodes.count(node.get())) {
1056  continue;
1057  }
1058  auto live_pair = live_outs.find(node.get());
1059  CHECK(live_pair != live_outs.end());
1060  auto old_live_outs = live_pair->second;
1062  node.get(), liveouts_renumbering, old_live_outs, intact_nodes, orig_node_sizes);
1063  if (auto aggregate = std::dynamic_pointer_cast<RelAggregate>(node)) {
1064  auto old_exprs = aggregate->getAggExprsAndRelease();
1065  std::vector<std::unique_ptr<const RexAgg>> new_exprs;
1066  auto key_name_it = aggregate->getFields().begin();
1067  std::vector<std::string> new_fields(key_name_it,
1068  key_name_it + aggregate->getGroupByCount());
1069  for (size_t i = aggregate->getGroupByCount(), j = 0;
1070  i < aggregate->getFields().size() && j < old_exprs.size();
1071  ++i, ++j) {
1072  if (old_live_outs.count(i)) {
1073  new_exprs.push_back(std::move(old_exprs[j]));
1074  new_fields.push_back(aggregate->getFieldName(i));
1075  }
1076  }
1077  aggregate->setAggExprs(new_exprs);
1078  aggregate->setFields(new_fields);
1079  } else if (auto project = std::dynamic_pointer_cast<RelProject>(node)) {
1080  auto old_exprs = project->getExpressionsAndRelease();
1081  std::vector<std::unique_ptr<const RexScalar>> new_exprs;
1082  std::vector<std::string> new_fields;
1083  for (size_t i = 0; i < old_exprs.size(); ++i) {
1084  if (old_live_outs.count(i)) {
1085  new_exprs.push_back(std::move(old_exprs[i]));
1086  new_fields.push_back(project->getFieldName(i));
1087  }
1088  }
1089  project->setExpressions(new_exprs);
1090  project->setFields(new_fields);
1091  } else {
1092  CHECK(false);
1093  }
1094  auto usrs_it = du_web.find(node.get());
1095  CHECK(usrs_it != du_web.end());
1096  for (auto usr : usrs_it->second) {
1097  if (checker.hasAllSrcReady(usr)) {
1098  ready_nodes.push_back(usr);
1099  }
1100  }
1101  }
1102  return {liveouts_renumbering, ready_nodes};
1103 }
bool does_redef_cols(const RelAlgNode *node)
#define CHECK(condition)
Definition: Logger.h:197
void add_new_indices_for(const RelAlgNode *node, std::unordered_map< const RelAlgNode *, std::unordered_map< size_t, size_t >> &new_liveouts, const std::unordered_set< size_t > &old_liveouts, const std::unordered_set< const RelAlgNode * > &intact_nodes, const std::unordered_map< const RelAlgNode *, size_t > &orig_node_sizes)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void anonymous_namespace{RelAlgOptimizer.cpp}::try_insert_coalesceable_proj ( std::vector< std::shared_ptr< RelAlgNode >> &  nodes,
std::unordered_map< const RelAlgNode *, std::unordered_set< size_t >> &  liveouts,
std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * >> &  du_web 
)

Definition at line 983 of file RelAlgOptimizer.cpp.

References any_dead_col_in(), CHECK, does_redef_cols(), get_field_name(), RelAlgNode::replaceInput(), and anonymous_namespace{RelAlgOptimizer.cpp}::RexRebindInputsVisitor::visitNode().

Referenced by eliminate_dead_columns().

987  {
988  std::vector<std::shared_ptr<RelAlgNode>> new_nodes;
989  for (auto node : nodes) {
990  new_nodes.push_back(node);
991  if (!std::dynamic_pointer_cast<RelFilter>(node)) {
992  continue;
993  }
994  const auto filter = node.get();
995  auto liveout_it = liveouts.find(filter);
996  CHECK(liveout_it != liveouts.end());
997  auto& outs = liveout_it->second;
998  if (!any_dead_col_in(filter, outs)) {
999  continue;
1000  }
1001  auto usrs_it = du_web.find(filter);
1002  CHECK(usrs_it != du_web.end());
1003  auto& usrs = usrs_it->second;
1004  if (usrs.size() != 1 || does_redef_cols(*usrs.begin())) {
1005  continue;
1006  }
1007  auto only_usr = const_cast<RelAlgNode*>(*usrs.begin());
1008 
1009  std::vector<std::unique_ptr<const RexScalar>> exprs;
1010  std::vector<std::string> fields;
1011  for (size_t i = 0; i < filter->size(); ++i) {
1012  exprs.push_back(boost::make_unique<RexInput>(filter, i));
1013  fields.push_back(get_field_name(filter, i));
1014  }
1015  auto project_owner = std::make_shared<RelProject>(exprs, fields, node);
1016  auto project = project_owner.get();
1017 
1018  only_usr->replaceInput(node, project_owner);
1019  if (dynamic_cast<const RelJoin*>(only_usr)) {
1020  RexRebindInputsVisitor visitor(filter, project);
1021  for (auto usr : du_web[only_usr]) {
1022  visitor.visitNode(usr);
1023  }
1024  }
1025 
1026  liveouts.insert(std::make_pair(project, outs));
1027 
1028  usrs.clear();
1029  usrs.insert(project);
1030  du_web.insert(
1031  std::make_pair(project, std::unordered_set<const RelAlgNode*>{only_usr}));
1032 
1033  new_nodes.push_back(project_owner);
1034  }
1035  if (new_nodes.size() > nodes.size()) {
1036  nodes.swap(new_nodes);
1037  }
1038 }
bool does_redef_cols(const RelAlgNode *node)
virtual void replaceInput(std::shared_ptr< const RelAlgNode > old_input, std::shared_ptr< const RelAlgNode > input)
bool any_dead_col_in(const RelAlgNode *node, const std::unordered_set< size_t > &live_outs)
#define CHECK(condition)
Definition: Logger.h:197
std::string get_field_name(const RelAlgNode *node, size_t index)

+ Here is the call graph for this function:

+ Here is the caller graph for this function: