OmniSciDB  04ee39c94c
RelAlgOptimizer.h File Reference
#include <memory>
#include <unordered_map>
#include <unordered_set>
#include <vector>
+ Include dependency graph for RelAlgOptimizer.h:
+ This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Functions

std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * > > build_du_web (const std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
 
void eliminate_identical_copy (std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
 
void eliminate_dead_columns (std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
 
void fold_filters (std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
 
void hoist_filter_cond_to_cross_join (std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
 
void simplify_sort (std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
 
void sink_projected_boolean_expr_to_join (std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
 

Function Documentation

◆ build_du_web()

std::unordered_map<const RelAlgNode*, std::unordered_set<const RelAlgNode*> > build_du_web ( const std::vector< std::shared_ptr< RelAlgNode >> &  nodes)
noexcept

Definition at line 390 of file RelAlgOptimizer.cpp.

References CHECK, join(), and src.

Referenced by eliminate_dead_columns(), eliminate_identical_copy(), fold_filters(), hoist_filter_cond_to_cross_join(), and sink_projected_boolean_expr_to_join().

391  {
392  std::unordered_map<const RelAlgNode*, std::unordered_set<const RelAlgNode*>> web;
393  std::unordered_set<const RelAlgNode*> visited;
394  std::vector<const RelAlgNode*> work_set;
395  for (auto node : nodes) {
396  if (std::dynamic_pointer_cast<RelScan>(node) ||
397  std::dynamic_pointer_cast<RelModify>(node) || visited.count(node.get())) {
398  continue;
399  }
400  work_set.push_back(node.get());
401  while (!work_set.empty()) {
402  auto walker = work_set.back();
403  work_set.pop_back();
404  if (visited.count(walker)) {
405  continue;
406  }
407  CHECK(!web.count(walker));
408  auto it_ok =
409  web.insert(std::make_pair(walker, std::unordered_set<const RelAlgNode*>{}));
410  CHECK(it_ok.second);
411  visited.insert(walker);
412  const auto join = dynamic_cast<const RelJoin*>(walker);
413  const auto project = dynamic_cast<const RelProject*>(walker);
414  const auto aggregate = dynamic_cast<const RelAggregate*>(walker);
415  const auto filter = dynamic_cast<const RelFilter*>(walker);
416  const auto sort = dynamic_cast<const RelSort*>(walker);
417  const auto left_deep_join = dynamic_cast<const RelLeftDeepInnerJoin*>(walker);
418  const auto logical_values = dynamic_cast<const RelLogicalValues*>(walker);
419  CHECK(join || project || aggregate || filter || sort || left_deep_join ||
420  logical_values);
421  for (size_t i = 0; i < walker->inputCount(); ++i) {
422  auto src = walker->getInput(i);
423  if (dynamic_cast<const RelScan*>(src) || dynamic_cast<const RelModify*>(src)) {
424  continue;
425  }
426  if (web.empty() || !web.count(src)) {
427  web.insert(std::make_pair(src, std::unordered_set<const RelAlgNode*>{}));
428  }
429  web[src].insert(walker);
430  work_set.push_back(src);
431  }
432  }
433  }
434  return web;
435 }
std::string join(T const &container, std::string const &delim)
int64_t * src
#define CHECK(condition)
Definition: Logger.h:187
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ eliminate_dead_columns()

void eliminate_dead_columns ( std::vector< std::shared_ptr< RelAlgNode >> &  nodes)
noexcept

Definition at line 1116 of file RelAlgOptimizer.cpp.

References anonymous_namespace{RelAlgOptimizer.cpp}::any_dead_col_in(), build_du_web(), CHECK, anonymous_namespace{RelAlgOptimizer.cpp}::does_redef_cols(), LOG, anonymous_namespace{RelAlgOptimizer.cpp}::mark_live_columns(), anonymous_namespace{RelAlgOptimizer.cpp}::propagate_input_renumbering(), anonymous_namespace{RelAlgOptimizer.cpp}::sweep_dead_columns(), anonymous_namespace{RelAlgOptimizer.cpp}::try_insert_coalesceable_proj(), and logger::WARNING.

Referenced by anonymous_namespace{RelAlgAbstractInterpreter.cpp}::RelAlgAbstractInterpreter::run().

1116  {
1117  if (nodes.empty()) {
1118  return;
1119  }
1120  auto root = nodes.back().get();
1121  if (!root) {
1122  return;
1123  }
1124  CHECK(!dynamic_cast<const RelScan*>(root) && !dynamic_cast<const RelJoin*>(root));
1125  // Mark
1126  auto old_liveouts = mark_live_columns(nodes);
1127  std::unordered_set<const RelAlgNode*> intact_nodes;
1128  bool has_dead_cols = false;
1129  for (auto live_pair : old_liveouts) {
1130  auto node = live_pair.first;
1131  const auto& outs = live_pair.second;
1132  if (outs.empty()) {
1133  LOG(WARNING) << "RA node with no used column: " << node->toString();
1134  // Ignore empty live_out due to some invalid node
1135  intact_nodes.insert(node);
1136  }
1137  if (any_dead_col_in(node, outs)) {
1138  has_dead_cols = true;
1139  } else {
1140  intact_nodes.insert(node);
1141  }
1142  }
1143  if (!has_dead_cols) {
1144  return;
1145  }
1146  // Patch
1147  std::unordered_map<const RelAlgNode*, RelAlgNode*> deconst_mapping;
1148  for (auto node : nodes) {
1149  deconst_mapping.insert(std::make_pair(node.get(), node.get()));
1150  }
1151  auto web = build_du_web(nodes);
1152  try_insert_coalesceable_proj(nodes, old_liveouts, web, deconst_mapping);
1153 
1154  for (auto node : nodes) {
1155  if (intact_nodes.count(node.get()) || does_redef_cols(node.get())) {
1156  continue;
1157  }
1158  bool intact = true;
1159  for (size_t i = 0; i < node->inputCount(); ++i) {
1160  auto source = node->getInput(i);
1161  if (!dynamic_cast<const RelScan*>(source) && !intact_nodes.count(source)) {
1162  intact = false;
1163  break;
1164  }
1165  }
1166  if (intact) {
1167  intact_nodes.insert(node.get());
1168  }
1169  }
1170 
1171  std::unordered_map<const RelAlgNode*, size_t> orig_node_sizes;
1172  for (auto node : nodes) {
1173  orig_node_sizes.insert(std::make_pair(node.get(), node->size()));
1174  }
1175  // Sweep
1176  std::unordered_map<const RelAlgNode*, std::unordered_map<size_t, size_t>>
1177  liveout_renumbering;
1178  std::vector<const RelAlgNode*> ready_nodes;
1179  std::tie(liveout_renumbering, ready_nodes) =
1180  sweep_dead_columns(old_liveouts, nodes, intact_nodes, web, orig_node_sizes);
1181  // Propagate
1182  propagate_input_renumbering(liveout_renumbering,
1183  ready_nodes,
1184  old_liveouts,
1185  deconst_mapping,
1186  intact_nodes,
1187  web,
1188  orig_node_sizes);
1189 }
std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * > > build_du_web(const std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
#define LOG(tag)
Definition: Logger.h:182
bool does_redef_cols(const RelAlgNode *node)
std::unordered_map< const RelAlgNode *, std::unordered_set< size_t > > mark_live_columns(std::vector< std::shared_ptr< RelAlgNode >> &nodes)
void propagate_input_renumbering(std::unordered_map< const RelAlgNode *, std::unordered_map< size_t, size_t >> &liveout_renumbering, const std::vector< const RelAlgNode *> &ready_nodes, const std::unordered_map< const RelAlgNode *, std::unordered_set< size_t >> &old_liveouts, const std::unordered_map< const RelAlgNode *, RelAlgNode *> &deconst_mapping, const std::unordered_set< const RelAlgNode *> &intact_nodes, const std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode *>> &du_web, const std::unordered_map< const RelAlgNode *, size_t > &orig_node_sizes)
std::pair< std::unordered_map< const RelAlgNode *, std::unordered_map< size_t, size_t > >, std::vector< const RelAlgNode * > > sweep_dead_columns(const std::unordered_map< const RelAlgNode *, std::unordered_set< size_t >> &live_outs, const std::vector< std::shared_ptr< RelAlgNode >> &nodes, const std::unordered_set< const RelAlgNode *> &intact_nodes, const std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode *>> &du_web, const std::unordered_map< const RelAlgNode *, size_t > &orig_node_sizes)
bool any_dead_col_in(const RelAlgNode *node, const std::unordered_set< size_t > &live_outs)
void try_insert_coalesceable_proj(std::vector< std::shared_ptr< RelAlgNode >> &nodes, std::unordered_map< const RelAlgNode *, std::unordered_set< size_t >> &liveouts, std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode *>> &du_web, std::unordered_map< const RelAlgNode *, RelAlgNode *> &deconst_mapping)
#define CHECK(condition)
Definition: Logger.h:187
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ eliminate_identical_copy()

void eliminate_identical_copy ( std::vector< std::shared_ptr< RelAlgNode >> &  nodes)
noexcept

Definition at line 440 of file RelAlgOptimizer.cpp.

References build_du_web(), CHECK, CHECK_EQ, anonymous_namespace{RelAlgOptimizer.cpp}::cleanup_dead_nodes(), anonymous_namespace{RelAlgOptimizer.cpp}::get_visible_projects(), anonymous_namespace{RelAlgOptimizer.cpp}::is_distinct(), anonymous_namespace{RelAlgOptimizer.cpp}::is_identical_copy(), and anonymous_namespace{RelAlgOptimizer.cpp}::redirect_inputs_of().

Referenced by anonymous_namespace{RelAlgAbstractInterpreter.cpp}::RelAlgAbstractInterpreter::run().

440  {
441  std::unordered_set<std::shared_ptr<const RelAlgNode>> copies;
442  auto sink = nodes.back();
443  for (auto node : nodes) {
444  auto aggregate = std::dynamic_pointer_cast<const RelAggregate>(node);
445  if (!aggregate || aggregate == sink ||
446  !(aggregate->getGroupByCount() == 1 && aggregate->getAggExprsCount() == 0)) {
447  continue;
448  }
449  auto project =
450  std::dynamic_pointer_cast<const RelProject>(aggregate->getAndOwnInput(0));
451  if (project && project->size() == aggregate->size() &&
452  project->getFields() == aggregate->getFields()) {
453  CHECK_EQ(size_t(0), copies.count(aggregate));
454  copies.insert(aggregate);
455  }
456  }
457  for (auto node : nodes) {
458  if (!node->inputCount()) {
459  continue;
460  }
461  auto last_source = node->getAndOwnInput(node->inputCount() - 1);
462  if (!copies.count(last_source)) {
463  continue;
464  }
465  auto aggregate = std::dynamic_pointer_cast<const RelAggregate>(last_source);
466  CHECK(aggregate);
467  if (!std::dynamic_pointer_cast<const RelJoin>(node) || aggregate->size() != 1) {
468  continue;
469  }
470  auto project =
471  std::dynamic_pointer_cast<const RelProject>(aggregate->getAndOwnInput(0));
472  CHECK(project);
473  CHECK_EQ(size_t(1), project->size());
474  if (!is_distinct(size_t(0), project.get())) {
475  continue;
476  }
477  auto new_source = project->getAndOwnInput(0);
478  if (std::dynamic_pointer_cast<const RelSort>(new_source) ||
479  std::dynamic_pointer_cast<const RelScan>(new_source)) {
480  node->replaceInput(last_source, new_source);
481  }
482  }
483  decltype(copies)().swap(copies);
484 
485  auto web = build_du_web(nodes);
486 
487  std::unordered_map<const RelAlgNode*, RelAlgNode*> deconst_mapping;
488  for (auto node : nodes) {
489  deconst_mapping.insert(std::make_pair(node.get(), node.get()));
490  }
491 
492  std::unordered_set<const RelProject*> projects;
493  std::unordered_set<const RelProject*> permutating_projects;
494  auto visible_projs = get_visible_projects(nodes.back().get());
495  for (auto node : nodes) {
496  auto project = std::dynamic_pointer_cast<RelProject>(node);
497  if (project && project->isSimple() &&
498  (!visible_projs.count(project.get()) || !project->isRenaming()) &&
499  is_identical_copy(project.get(), web, projects, permutating_projects)) {
500  projects.insert(project.get());
501  }
502  }
503 
504  for (auto node : nodes) {
505  redirect_inputs_of(node, projects, permutating_projects, web, deconst_mapping);
506  }
507 
508  cleanup_dead_nodes(nodes);
509 }
std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * > > build_du_web(const std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
#define CHECK_EQ(x, y)
Definition: Logger.h:195
void redirect_inputs_of(std::shared_ptr< RelAlgNode > node, const std::unordered_set< const RelProject *> &projects, const std::unordered_set< const RelProject *> &permutating_projects, const std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode *>> &du_web, const std::unordered_map< const RelAlgNode *, RelAlgNode *> &deconst_mapping)
std::unordered_set< const RelProject * > get_visible_projects(const RelAlgNode *root)
void cleanup_dead_nodes(std::vector< std::shared_ptr< RelAlgNode >> &nodes)
#define CHECK(condition)
Definition: Logger.h:187
bool is_identical_copy(const RelProject *project, const std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode *>> &du_web, const std::unordered_set< const RelProject *> &projects_to_remove, std::unordered_set< const RelProject *> &permutating_projects)
bool is_distinct(const size_t input_idx, const RelAlgNode *node)
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ fold_filters()

void fold_filters ( std::vector< std::shared_ptr< RelAlgNode >> &  nodes)
noexcept

Definition at line 1383 of file RelAlgOptimizer.cpp.

References build_du_web(), CHECK, CHECK_EQ, anonymous_namespace{RelAlgOptimizer.cpp}::cleanup_dead_nodes(), logger::INFO, kAND, kBOOLEAN, LOG, and anonymous_namespace{RelAlgOptimizer.cpp}::replace_all_usages().

Referenced by anonymous_namespace{RelAlgAbstractInterpreter.cpp}::RelAlgAbstractInterpreter::run().

1383  {
1384  std::unordered_map<const RelAlgNode*, std::shared_ptr<RelAlgNode>> deconst_mapping;
1385  for (auto node : nodes) {
1386  deconst_mapping.insert(std::make_pair(node.get(), node));
1387  }
1388 
1389  auto web = build_du_web(nodes);
1390  for (auto node_it = nodes.rbegin(); node_it != nodes.rend(); ++node_it) {
1391  auto& node = *node_it;
1392  if (auto filter = std::dynamic_pointer_cast<RelFilter>(node)) {
1393  CHECK_EQ(filter->inputCount(), size_t(1));
1394  auto src_filter = dynamic_cast<const RelFilter*>(filter->getInput(0));
1395  if (!src_filter) {
1396  continue;
1397  }
1398  auto siblings_it = web.find(src_filter);
1399  if (siblings_it == web.end() || siblings_it->second.size() != size_t(1)) {
1400  continue;
1401  }
1402  auto src_it = deconst_mapping.find(src_filter);
1403  CHECK(src_it != deconst_mapping.end());
1404  auto folded_filter = std::dynamic_pointer_cast<RelFilter>(src_it->second);
1405  CHECK(folded_filter);
1406  // TODO(miyu) : drop filter w/ only expression valued constant TRUE?
1407  if (auto rex_operator = dynamic_cast<const RexOperator*>(filter->getCondition())) {
1408  LOG(INFO) << "ID=" << filter->getId() << " " << filter->toString()
1409  << " folded into "
1410  << "ID=" << folded_filter->getId() << " " << folded_filter->toString()
1411  << std::endl;
1412  std::vector<std::unique_ptr<const RexScalar>> operands;
1413  operands.emplace_back(folded_filter->getAndReleaseCondition());
1414  auto old_condition = dynamic_cast<const RexOperator*>(operands.back().get());
1415  CHECK(old_condition && old_condition->getType().get_type() == kBOOLEAN);
1416  RexInputRedirector redirector(folded_filter.get(), folded_filter->getInput(0));
1417  operands.push_back(redirector.visit(rex_operator));
1418  auto other_condition = dynamic_cast<const RexOperator*>(operands.back().get());
1419  CHECK(other_condition && other_condition->getType().get_type() == kBOOLEAN);
1420  const bool notnull = old_condition->getType().get_notnull() &&
1421  other_condition->getType().get_notnull();
1422  auto new_condition = std::unique_ptr<const RexScalar>(
1423  new RexOperator(kAND, operands, SQLTypeInfo(kBOOLEAN, notnull)));
1424  folded_filter->setCondition(new_condition);
1425  replace_all_usages(filter, folded_filter, deconst_mapping, web);
1426  deconst_mapping.erase(filter.get());
1427  web.erase(filter.get());
1428  web[filter->getInput(0)].erase(filter.get());
1429  node.reset();
1430  }
1431  }
1432  }
1433 
1434  if (!nodes.empty()) {
1435  auto sink = nodes.back();
1436  for (auto node_it = std::next(nodes.rend()); !sink && node_it != nodes.rbegin();
1437  ++node_it) {
1438  sink = *node_it;
1439  }
1440  CHECK(sink);
1441  cleanup_dead_nodes(nodes);
1442  }
1443 }
std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * > > build_du_web(const std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
#define CHECK_EQ(x, y)
Definition: Logger.h:195
#define LOG(tag)
Definition: Logger.h:182
void cleanup_dead_nodes(std::vector< std::shared_ptr< RelAlgNode >> &nodes)
Definition: sqldefs.h:37
SQLTypeInfoCore< ArrayContextTypeSizer, ExecutorTypePackaging, DateTimeFacilities > SQLTypeInfo
Definition: sqltypes.h:823
#define CHECK(condition)
Definition: Logger.h:187
void replace_all_usages(std::shared_ptr< const RelAlgNode > old_def_node, std::shared_ptr< const RelAlgNode > new_def_node, std::unordered_map< const RelAlgNode *, std::shared_ptr< RelAlgNode >> &deconst_mapping, std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode *>> &du_web)
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ hoist_filter_cond_to_cross_join()

void hoist_filter_cond_to_cross_join ( std::vector< std::shared_ptr< RelAlgNode >> &  nodes)
noexcept

Definition at line 1543 of file RelAlgOptimizer.cpp.

References build_du_web(), CHECK, find_hoistable_conditions(), RelFilter::getCondition(), RelAlgNode::getInput(), INNER, join(), kAND, kBOOLEAN, and RexVisitorBase< T >::visit().

Referenced by anonymous_namespace{RelAlgAbstractInterpreter.cpp}::RelAlgAbstractInterpreter::run().

1544  {
1545  std::unordered_map<const RelAlgNode*, RelAlgNode*> deconst_mapping;
1546  for (auto node : nodes) {
1547  deconst_mapping.insert(std::make_pair(node.get(), node.get()));
1548  }
1549  std::unordered_set<const RelAlgNode*> visited;
1550  auto web = build_du_web(nodes);
1551  for (auto node : nodes) {
1552  if (visited.count(node.get())) {
1553  continue;
1554  }
1555  visited.insert(node.get());
1556  auto join = dynamic_cast<RelJoin*>(node.get());
1557  if (join && join->getJoinType() == JoinType::INNER) {
1558  // Only allow cross join for now.
1559  if (auto literal = dynamic_cast<const RexLiteral*>(join->getCondition())) {
1560  // Assume Calcite always generates an inner join on constant boolean true for
1561  // cross join.
1562  CHECK(literal->getType() == kBOOLEAN && literal->getVal<bool>());
1563  size_t first_col_idx = 0;
1564  const RelFilter* filter = nullptr;
1565  std::vector<const RelJoin*> join_seq{join};
1566  for (const RelJoin* curr_join = join; !filter;) {
1567  auto usrs_it = web.find(curr_join);
1568  CHECK(usrs_it != web.end());
1569  if (usrs_it->second.size() != size_t(1)) {
1570  break;
1571  }
1572  auto only_usr = *usrs_it->second.begin();
1573  if (auto usr_join = dynamic_cast<const RelJoin*>(only_usr)) {
1574  if (join == usr_join->getInput(1)) {
1575  const auto src1_offset = usr_join->getInput(0)->size();
1576  first_col_idx += src1_offset;
1577  }
1578  join_seq.push_back(usr_join);
1579  curr_join = usr_join;
1580  continue;
1581  }
1582 
1583  filter = dynamic_cast<const RelFilter*>(only_usr);
1584  break;
1585  }
1586  if (!filter) {
1587  visited.insert(join_seq.begin(), join_seq.end());
1588  continue;
1589  }
1590  const auto src_join = dynamic_cast<const RelJoin*>(filter->getInput(0));
1591  CHECK(src_join);
1592  auto filter_it = deconst_mapping.find(filter);
1593  CHECK(filter_it != deconst_mapping.end());
1594  auto modified_filter = dynamic_cast<RelFilter*>(filter_it->second);
1595  CHECK(modified_filter);
1596 
1597  if (src_join == join) {
1598  std::unique_ptr<const RexScalar> filter_condition(
1599  modified_filter->getAndReleaseCondition());
1600  std::unique_ptr<const RexScalar> true_condition =
1601  boost::make_unique<RexLiteral>(true,
1602  kBOOLEAN,
1603  kBOOLEAN,
1604  unsigned(-2147483648),
1605  1,
1606  unsigned(-2147483648),
1607  1);
1608  modified_filter->setCondition(true_condition);
1609  join->setCondition(filter_condition);
1610  continue;
1611  }
1612  const auto src1_base = src_join->getInput(0)->size();
1613  auto source =
1614  first_col_idx < src1_base ? src_join->getInput(0) : src_join->getInput(1);
1615  first_col_idx =
1616  first_col_idx < src1_base ? first_col_idx : first_col_idx - src1_base;
1617  auto join_conditions =
1619  source,
1620  first_col_idx,
1621  first_col_idx + join->size() - 1);
1622  if (join_conditions.empty()) {
1623  continue;
1624  }
1625 
1626  JoinTargetRebaser rebaser(join, first_col_idx);
1627  if (join_conditions.size() == 1) {
1628  auto new_join_condition = rebaser.visit(*join_conditions.begin());
1629  join->setCondition(new_join_condition);
1630  } else {
1631  std::vector<std::unique_ptr<const RexScalar>> operands;
1632  bool notnull = true;
1633  for (size_t i = 0; i < join_conditions.size(); ++i) {
1634  operands.emplace_back(rebaser.visit(join_conditions[i]));
1635  auto old_subcond = dynamic_cast<const RexOperator*>(join_conditions[i]);
1636  CHECK(old_subcond && old_subcond->getType().get_type() == kBOOLEAN);
1637  notnull = notnull && old_subcond->getType().get_notnull();
1638  }
1639  auto new_join_condition = std::unique_ptr<const RexScalar>(
1640  new RexOperator(kAND, operands, SQLTypeInfo(kBOOLEAN, notnull)));
1641  join->setCondition(new_join_condition);
1642  }
1643 
1644  SubConditionRemover remover(join_conditions);
1645  auto new_filter_condition = remover.visit(filter->getCondition());
1646  modified_filter->setCondition(new_filter_condition);
1647  }
1648  }
1649  }
1650 }
std::vector< const RexScalar * > find_hoistable_conditions(const RexScalar *condition, const RelAlgNode *source, const size_t first_col_idx, const size_t last_col_idx)
std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * > > build_du_web(const std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
std::string join(T const &container, std::string const &delim)
Definition: sqldefs.h:37
SQLTypeInfoCore< ArrayContextTypeSizer, ExecutorTypePackaging, DateTimeFacilities > SQLTypeInfo
Definition: sqltypes.h:823
const RexScalar * getCondition() const
#define CHECK(condition)
Definition: Logger.h:187
const RelAlgNode * getInput(const size_t idx) const
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ simplify_sort()

void simplify_sort ( std::vector< std::shared_ptr< RelAlgNode >> &  nodes)
noexcept

Definition at line 1655 of file RelAlgOptimizer.cpp.

References RelAlgNode::replaceInput().

Referenced by anonymous_namespace{RelAlgAbstractInterpreter.cpp}::RelAlgAbstractInterpreter::run().

1655  {
1656  if (nodes.size() < 3) {
1657  return;
1658  }
1659  for (size_t i = 0; i <= nodes.size() - 3;) {
1660  auto first_sort = std::dynamic_pointer_cast<RelSort>(nodes[i]);
1661  const auto project = std::dynamic_pointer_cast<const RelProject>(nodes[i + 1]);
1662  auto second_sort = std::dynamic_pointer_cast<RelSort>(nodes[i + 2]);
1663  if (first_sort && second_sort && project && project->isIdentity() &&
1664  *first_sort == *second_sort) {
1665  second_sort->replaceInput(second_sort->getAndOwnInput(0),
1666  first_sort->getAndOwnInput(0));
1667  nodes[i].reset();
1668  nodes[i + 1].reset();
1669  i += 3;
1670  } else {
1671  ++i;
1672  }
1673  }
1674 
1675  std::vector<std::shared_ptr<RelAlgNode>> new_nodes;
1676  for (auto node : nodes) {
1677  if (!node) {
1678  continue;
1679  }
1680  new_nodes.push_back(node);
1681  }
1682  nodes.swap(new_nodes);
1683 }
virtual void replaceInput(std::shared_ptr< const RelAlgNode > old_input, std::shared_ptr< const RelAlgNode > input)
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ sink_projected_boolean_expr_to_join()

void sink_projected_boolean_expr_to_join ( std::vector< std::shared_ptr< RelAlgNode >> &  nodes)
noexcept

Definition at line 1231 of file RelAlgOptimizer.cpp.

References build_du_web(), CHECK, CHECK_EQ, join(), kBOOLEAN, and anonymous_namespace{RelAlgOptimizer.cpp}::mark_live_columns().

Referenced by anonymous_namespace{RelAlgAbstractInterpreter.cpp}::RelAlgAbstractInterpreter::run().

1232  {
1233  auto web = build_du_web(nodes);
1234  std::unordered_map<const RelAlgNode*, RelAlgNode*> deconst_mapping;
1235  for (auto node : nodes) {
1236  deconst_mapping.insert(std::make_pair(node.get(), node.get()));
1237  }
1238  auto liveouts = mark_live_columns(nodes);
1239  for (auto node : nodes) {
1240  auto project = std::dynamic_pointer_cast<RelProject>(node);
1241  // TODO(miyu): relax RelScan limitation
1242  if (!project || project->isSimple() ||
1243  !dynamic_cast<const RelScan*>(project->getInput(0))) {
1244  continue;
1245  }
1246  auto usrs_it = web.find(project.get());
1247  CHECK(usrs_it != web.end());
1248  auto& usrs = usrs_it->second;
1249  if (usrs.size() != 1) {
1250  continue;
1251  }
1252  auto join = dynamic_cast<RelJoin*>(deconst_mapping[*usrs.begin()]);
1253  if (!join) {
1254  continue;
1255  }
1256  auto outs_it = liveouts.find(join);
1257  CHECK(outs_it != liveouts.end());
1258  std::unordered_map<size_t, size_t> in_to_out_index;
1259  std::unordered_set<size_t> boolean_expr_indicies;
1260  bool discarded = false;
1261  for (size_t i = 0; i < project->size(); ++i) {
1262  auto oper = dynamic_cast<const RexOperator*>(project->getProjectAt(i));
1263  if (oper && oper->getType().get_type() == kBOOLEAN) {
1264  boolean_expr_indicies.insert(i);
1265  } else {
1266  // TODO(miyu): relax?
1267  if (auto input = dynamic_cast<const RexInput*>(project->getProjectAt(i))) {
1268  in_to_out_index.insert(std::make_pair(input->getIndex(), i));
1269  } else {
1270  discarded = true;
1271  }
1272  }
1273  }
1274  if (discarded || boolean_expr_indicies.empty()) {
1275  continue;
1276  }
1277  const size_t index_base =
1278  join->getInput(0) == project.get() ? 0 : join->getInput(0)->size();
1279  for (auto i : boolean_expr_indicies) {
1280  auto join_idx = index_base + i;
1281  if (outs_it->second.count(join_idx)) {
1282  discarded = true;
1283  break;
1284  }
1285  }
1286  if (discarded) {
1287  continue;
1288  }
1289  RexInputCollector collector(project.get());
1290  std::vector<size_t> unloaded_input_indices;
1291  std::unordered_map<size_t, std::unique_ptr<const RexScalar>> in_idx_to_new_subcond;
1292  // Given all are dead right after join, safe to sink
1293  for (auto i : boolean_expr_indicies) {
1294  auto rex_ins = collector.visit(project->getProjectAt(i));
1295  for (auto& in : rex_ins) {
1296  CHECK_EQ(in.getSourceNode(), project->getInput(0));
1297  if (!in_to_out_index.count(in.getIndex())) {
1298  auto curr_out_index = project->size() + unloaded_input_indices.size();
1299  in_to_out_index.insert(std::make_pair(in.getIndex(), curr_out_index));
1300  unloaded_input_indices.push_back(in.getIndex());
1301  }
1302  RexInputSinker sinker(in_to_out_index, project.get());
1303  in_idx_to_new_subcond.insert(
1304  std::make_pair(i, sinker.visit(project->getProjectAt(i))));
1305  }
1306  }
1307  if (in_idx_to_new_subcond.empty()) {
1308  continue;
1309  }
1310  std::vector<std::unique_ptr<const RexScalar>> new_projections;
1311  for (size_t i = 0; i < project->size(); ++i) {
1312  if (boolean_expr_indicies.count(i)) {
1313  new_projections.push_back(boost::make_unique<RexInput>(project->getInput(0), 0));
1314  } else {
1315  auto rex_input = dynamic_cast<const RexInput*>(project->getProjectAt(i));
1316  CHECK(rex_input != nullptr);
1317  new_projections.push_back(rex_input->deepCopy());
1318  }
1319  }
1320  for (auto i : unloaded_input_indices) {
1321  new_projections.push_back(boost::make_unique<RexInput>(project->getInput(0), i));
1322  }
1323  project->setExpressions(new_projections);
1324 
1325  SubConditionReplacer replacer(in_idx_to_new_subcond);
1326  auto new_condition = replacer.visit(join->getCondition());
1327  join->setCondition(new_condition);
1328  }
1329 }
std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * > > build_du_web(const std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
#define CHECK_EQ(x, y)
Definition: Logger.h:195
std::string join(T const &container, std::string const &delim)
std::unordered_map< const RelAlgNode *, std::unordered_set< size_t > > mark_live_columns(std::vector< std::shared_ptr< RelAlgNode >> &nodes)
#define CHECK(condition)
Definition: Logger.h:187
+ Here is the call graph for this function:
+ Here is the caller graph for this function: