OmniSciDB  baf940c279
RelAlgOptimizer.h File Reference
#include <memory>
#include <unordered_map>
#include <unordered_set>
#include <vector>
+ Include dependency graph for RelAlgOptimizer.h:
+ This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Functions

std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * > > build_du_web (const std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
 
void eliminate_identical_copy (std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
 
void eliminate_dead_columns (std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
 
void eliminate_dead_subqueries (std::vector< std::shared_ptr< RexSubQuery >> &subqueries, RelAlgNode const *root)
 
void fold_filters (std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
 
void hoist_filter_cond_to_cross_join (std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
 
void simplify_sort (std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
 
void sink_projected_boolean_expr_to_join (std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
 

Function Documentation

◆ build_du_web()

std::unordered_map<const RelAlgNode*, std::unordered_set<const RelAlgNode*> > build_du_web ( const std::vector< std::shared_ptr< RelAlgNode >> &  nodes)
noexcept

Definition at line 408 of file RelAlgOptimizer.cpp.

References CHECK, and src.

Referenced by eliminate_dead_columns(), eliminate_identical_copy(), fold_filters(), hoist_filter_cond_to_cross_join(), and sink_projected_boolean_expr_to_join().

409  {
410  std::unordered_map<const RelAlgNode*, std::unordered_set<const RelAlgNode*>> web;
411  std::unordered_set<const RelAlgNode*> visited;
412  std::vector<const RelAlgNode*> work_set;
413  for (auto node : nodes) {
414  if (std::dynamic_pointer_cast<RelScan>(node) ||
415  std::dynamic_pointer_cast<RelModify>(node) || visited.count(node.get())) {
416  continue;
417  }
418  work_set.push_back(node.get());
419  while (!work_set.empty()) {
420  auto walker = work_set.back();
421  work_set.pop_back();
422  if (visited.count(walker)) {
423  continue;
424  }
425  CHECK(!web.count(walker));
426  auto it_ok =
427  web.insert(std::make_pair(walker, std::unordered_set<const RelAlgNode*>{}));
428  CHECK(it_ok.second);
429  visited.insert(walker);
430  CHECK(dynamic_cast<const RelJoin*>(walker) ||
431  dynamic_cast<const RelProject*>(walker) ||
432  dynamic_cast<const RelAggregate*>(walker) ||
433  dynamic_cast<const RelFilter*>(walker) ||
434  dynamic_cast<const RelSort*>(walker) ||
435  dynamic_cast<const RelLeftDeepInnerJoin*>(walker) ||
436  dynamic_cast<const RelLogicalValues*>(walker) ||
437  dynamic_cast<const RelTableFunction*>(walker) ||
438  dynamic_cast<const RelLogicalUnion*>(walker));
439  for (size_t i = 0; i < walker->inputCount(); ++i) {
440  auto src = walker->getInput(i);
441  if (dynamic_cast<const RelScan*>(src) || dynamic_cast<const RelModify*>(src)) {
442  continue;
443  }
444  if (web.empty() || !web.count(src)) {
445  web.insert(std::make_pair(src, std::unordered_set<const RelAlgNode*>{}));
446  }
447  web[src].insert(walker);
448  work_set.push_back(src);
449  }
450  }
451  }
452  return web;
453 }
int64_t * src
#define CHECK(condition)
Definition: Logger.h:197
+ Here is the caller graph for this function:

◆ eliminate_dead_columns()

void eliminate_dead_columns ( std::vector< std::shared_ptr< RelAlgNode >> &  nodes)
noexcept

Definition at line 1175 of file RelAlgOptimizer.cpp.

References anonymous_namespace{RelAlgOptimizer.cpp}::any_dead_col_in(), build_du_web(), CHECK, anonymous_namespace{RelAlgOptimizer.cpp}::does_redef_cols(), LOG, anonymous_namespace{RelAlgOptimizer.cpp}::mark_live_columns(), anonymous_namespace{RelAlgOptimizer.cpp}::propagate_input_renumbering(), anonymous_namespace{RelAlgOptimizer.cpp}::sweep_dead_columns(), anonymous_namespace{RelAlgOptimizer.cpp}::try_insert_coalesceable_proj(), and logger::WARNING.

Referenced by RelAlgDagBuilder::build().

1175  {
1176  if (nodes.empty()) {
1177  return;
1178  }
1179  auto root = nodes.back().get();
1180  if (!root) {
1181  return;
1182  }
1183  CHECK(!dynamic_cast<const RelScan*>(root) && !dynamic_cast<const RelJoin*>(root));
1184  // Mark
1185  auto old_liveouts = mark_live_columns(nodes);
1186  std::unordered_set<const RelAlgNode*> intact_nodes;
1187  bool has_dead_cols = false;
1188  for (auto live_pair : old_liveouts) {
1189  auto node = live_pair.first;
1190  const auto& outs = live_pair.second;
1191  if (outs.empty()) {
1192  LOG(WARNING) << "RA node with no used column: " << node->toString();
1193  // Ignore empty live_out due to some invalid node
1194  intact_nodes.insert(node);
1195  }
1196  if (any_dead_col_in(node, outs)) {
1197  has_dead_cols = true;
1198  } else {
1199  intact_nodes.insert(node);
1200  }
1201  }
1202  if (!has_dead_cols) {
1203  return;
1204  }
1205  auto web = build_du_web(nodes);
1206  try_insert_coalesceable_proj(nodes, old_liveouts, web);
1207 
1208  for (auto node : nodes) {
1209  if (intact_nodes.count(node.get()) || does_redef_cols(node.get())) {
1210  continue;
1211  }
1212  bool intact = true;
1213  for (size_t i = 0; i < node->inputCount(); ++i) {
1214  auto source = node->getInput(i);
1215  if (!dynamic_cast<const RelScan*>(source) && !intact_nodes.count(source)) {
1216  intact = false;
1217  break;
1218  }
1219  }
1220  if (intact) {
1221  intact_nodes.insert(node.get());
1222  }
1223  }
1224 
1225  std::unordered_map<const RelAlgNode*, size_t> orig_node_sizes;
1226  for (auto node : nodes) {
1227  orig_node_sizes.insert(std::make_pair(node.get(), node->size()));
1228  }
1229  // Sweep
1230  std::unordered_map<const RelAlgNode*, std::unordered_map<size_t, size_t>>
1231  liveout_renumbering;
1232  std::vector<const RelAlgNode*> ready_nodes;
1233  std::tie(liveout_renumbering, ready_nodes) =
1234  sweep_dead_columns(old_liveouts, nodes, intact_nodes, web, orig_node_sizes);
1235  // Propagate
1237  liveout_renumbering, ready_nodes, old_liveouts, intact_nodes, web, orig_node_sizes);
1238 }
std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * > > build_du_web(const std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
#define LOG(tag)
Definition: Logger.h:188
bool does_redef_cols(const RelAlgNode *node)
std::unordered_map< const RelAlgNode *, std::unordered_set< size_t > > mark_live_columns(std::vector< std::shared_ptr< RelAlgNode >> &nodes)
std::pair< std::unordered_map< const RelAlgNode *, std::unordered_map< size_t, size_t > >, std::vector< const RelAlgNode * > > sweep_dead_columns(const std::unordered_map< const RelAlgNode *, std::unordered_set< size_t >> &live_outs, const std::vector< std::shared_ptr< RelAlgNode >> &nodes, const std::unordered_set< const RelAlgNode *> &intact_nodes, const std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode *>> &du_web, const std::unordered_map< const RelAlgNode *, size_t > &orig_node_sizes)
bool any_dead_col_in(const RelAlgNode *node, const std::unordered_set< size_t > &live_outs)
void try_insert_coalesceable_proj(std::vector< std::shared_ptr< RelAlgNode >> &nodes, std::unordered_map< const RelAlgNode *, std::unordered_set< size_t >> &liveouts, std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode *>> &du_web)
void propagate_input_renumbering(std::unordered_map< const RelAlgNode *, std::unordered_map< size_t, size_t >> &liveout_renumbering, const std::vector< const RelAlgNode *> &ready_nodes, const std::unordered_map< const RelAlgNode *, std::unordered_set< size_t >> &old_liveouts, const std::unordered_set< const RelAlgNode *> &intact_nodes, const std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode *>> &du_web, const std::unordered_map< const RelAlgNode *, size_t > &orig_node_sizes)
#define CHECK(condition)
Definition: Logger.h:197
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ eliminate_dead_subqueries()

void eliminate_dead_subqueries ( std::vector< std::shared_ptr< RexSubQuery >> &  subqueries,
RelAlgNode const *  root 
)

Definition at line 1240 of file RelAlgOptimizer.cpp.

References RexSubQueryIdCollector::getLiveRexSubQueryIds(), and VLOG.

Referenced by RelAlgDagBuilder::build().

1241  {
1242  if (!subqueries.empty()) {
1243  auto live_ids = RexSubQueryIdCollector::getLiveRexSubQueryIds(root);
1244  auto sort_live_ids_first = [&live_ids](auto& a, auto& b) {
1245  return live_ids.count(a->getId()) && !live_ids.count(b->getId());
1246  };
1247  std::stable_sort(subqueries.begin(), subqueries.end(), sort_live_ids_first);
1248  size_t n_dead_subqueries;
1249  if (live_ids.count(subqueries.front()->getId())) {
1250  auto first_dead_itr = std::upper_bound(subqueries.cbegin(),
1251  subqueries.cend(),
1252  subqueries.front(),
1253  sort_live_ids_first);
1254  n_dead_subqueries = subqueries.cend() - first_dead_itr;
1255  } else {
1256  n_dead_subqueries = subqueries.size();
1257  }
1258  if (n_dead_subqueries) {
1259  VLOG(1) << "Eliminating " << n_dead_subqueries
1260  << (n_dead_subqueries == 1 ? " subquery." : " subqueries.");
1261  subqueries.resize(subqueries.size() - n_dead_subqueries);
1262  subqueries.shrink_to_fit();
1263  }
1264  }
1265 }
static Ids getLiveRexSubQueryIds(RelAlgNode const *)
#define VLOG(n)
Definition: Logger.h:291
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ eliminate_identical_copy()

void eliminate_identical_copy ( std::vector< std::shared_ptr< RelAlgNode >> &  nodes)
noexcept

Definition at line 486 of file RelAlgOptimizer.cpp.

References build_du_web(), CHECK, CHECK_EQ, anonymous_namespace{RelAlgOptimizer.cpp}::cleanup_dead_nodes(), anonymous_namespace{RelAlgOptimizer.cpp}::get_visible_projects(), anonymous_namespace{RelAlgOptimizer.cpp}::is_distinct(), anonymous_namespace{RelAlgOptimizer.cpp}::is_identical_copy(), project_separates_sort(), and anonymous_namespace{RelAlgOptimizer.cpp}::redirect_inputs_of().

Referenced by RelAlgDagBuilder::build().

486  {
487  std::unordered_set<std::shared_ptr<const RelAlgNode>> copies;
488  auto sink = nodes.back();
489  for (auto node : nodes) {
490  auto aggregate = std::dynamic_pointer_cast<const RelAggregate>(node);
491  if (!aggregate || aggregate == sink ||
492  !(aggregate->getGroupByCount() == 1 && aggregate->getAggExprsCount() == 0)) {
493  continue;
494  }
495  auto project =
496  std::dynamic_pointer_cast<const RelProject>(aggregate->getAndOwnInput(0));
497  if (project && project->size() == aggregate->size() &&
498  project->getFields() == aggregate->getFields()) {
499  CHECK_EQ(size_t(0), copies.count(aggregate));
500  copies.insert(aggregate);
501  }
502  }
503  for (auto node : nodes) {
504  if (!node->inputCount()) {
505  continue;
506  }
507  auto last_source = node->getAndOwnInput(node->inputCount() - 1);
508  if (!copies.count(last_source)) {
509  continue;
510  }
511  auto aggregate = std::dynamic_pointer_cast<const RelAggregate>(last_source);
512  CHECK(aggregate);
513  if (!std::dynamic_pointer_cast<const RelJoin>(node) || aggregate->size() != 1) {
514  continue;
515  }
516  auto project =
517  std::dynamic_pointer_cast<const RelProject>(aggregate->getAndOwnInput(0));
518  CHECK(project);
519  CHECK_EQ(size_t(1), project->size());
520  if (!is_distinct(size_t(0), project.get())) {
521  continue;
522  }
523  auto new_source = project->getAndOwnInput(0);
524  if (std::dynamic_pointer_cast<const RelSort>(new_source) ||
525  std::dynamic_pointer_cast<const RelScan>(new_source)) {
526  node->replaceInput(last_source, new_source);
527  }
528  }
529  decltype(copies)().swap(copies);
530 
531  auto web = build_du_web(nodes);
532 
533  std::unordered_set<const RelProject*> projects;
534  std::unordered_set<const RelProject*> permutating_projects;
535  auto const visible_projs = get_visible_projects(nodes.back().get());
536  for (auto node_it = nodes.begin(); node_it != nodes.end(); node_it++) {
537  auto node = *node_it;
538  auto project = std::dynamic_pointer_cast<RelProject>(node);
539  auto next_node_it = std::next(node_it);
540  if (project && project->isSimple() &&
541  (!visible_projs.count(project.get()) || !project->isRenaming()) &&
542  is_identical_copy(project.get(), web, projects, permutating_projects) &&
544  project.get(), next_node_it == nodes.end() ? nullptr : next_node_it->get())) {
545  projects.insert(project.get());
546  }
547  }
548 
549  for (auto node : nodes) {
550  redirect_inputs_of(node, projects, permutating_projects, web);
551  }
552 
553  cleanup_dead_nodes(nodes);
554 }
std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * > > build_du_web(const std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
#define CHECK_EQ(x, y)
Definition: Logger.h:205
std::unordered_set< const RelProject * > get_visible_projects(const RelAlgNode *root)
bool project_separates_sort(const RelProject *project, const RelAlgNode *next_node)
void cleanup_dead_nodes(std::vector< std::shared_ptr< RelAlgNode >> &nodes)
#define CHECK(condition)
Definition: Logger.h:197
void redirect_inputs_of(std::shared_ptr< RelAlgNode > node, const std::unordered_set< const RelProject *> &projects, const std::unordered_set< const RelProject *> &permutating_projects, const std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode *>> &du_web)
bool is_identical_copy(const RelProject *project, const std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode *>> &du_web, const std::unordered_set< const RelProject *> &projects_to_remove, std::unordered_set< const RelProject *> &permutating_projects)
bool is_distinct(const size_t input_idx, const RelAlgNode *node)
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ fold_filters()

void fold_filters ( std::vector< std::shared_ptr< RelAlgNode >> &  nodes)
noexcept

Definition at line 1455 of file RelAlgOptimizer.cpp.

References build_du_web(), CHECK, CHECK_EQ, anonymous_namespace{RelAlgOptimizer.cpp}::cleanup_dead_nodes(), logger::INFO, kAND, kBOOLEAN, LOG, and anonymous_namespace{RelAlgOptimizer.cpp}::replace_all_usages().

Referenced by RelAlgDagBuilder::build().

1455  {
1456  std::unordered_map<const RelAlgNode*, std::shared_ptr<RelAlgNode>> deconst_mapping;
1457  for (auto node : nodes) {
1458  deconst_mapping.insert(std::make_pair(node.get(), node));
1459  }
1460 
1461  auto web = build_du_web(nodes);
1462  for (auto node_it = nodes.rbegin(); node_it != nodes.rend(); ++node_it) {
1463  auto& node = *node_it;
1464  if (auto filter = std::dynamic_pointer_cast<RelFilter>(node)) {
1465  CHECK_EQ(filter->inputCount(), size_t(1));
1466  auto src_filter = dynamic_cast<const RelFilter*>(filter->getInput(0));
1467  if (!src_filter) {
1468  continue;
1469  }
1470  auto siblings_it = web.find(src_filter);
1471  if (siblings_it == web.end() || siblings_it->second.size() != size_t(1)) {
1472  continue;
1473  }
1474  auto src_it = deconst_mapping.find(src_filter);
1475  CHECK(src_it != deconst_mapping.end());
1476  auto folded_filter = std::dynamic_pointer_cast<RelFilter>(src_it->second);
1477  CHECK(folded_filter);
1478  // TODO(miyu) : drop filter w/ only expression valued constant TRUE?
1479  if (auto rex_operator = dynamic_cast<const RexOperator*>(filter->getCondition())) {
1480  LOG(INFO) << "ID=" << filter->getId() << " " << filter->toString()
1481  << " folded into "
1482  << "ID=" << folded_filter->getId() << " " << folded_filter->toString()
1483  << std::endl;
1484  std::vector<std::unique_ptr<const RexScalar>> operands;
1485  operands.emplace_back(folded_filter->getAndReleaseCondition());
1486  auto old_condition = dynamic_cast<const RexOperator*>(operands.back().get());
1487  CHECK(old_condition && old_condition->getType().get_type() == kBOOLEAN);
1488  RexInputRedirector redirector(folded_filter.get(), folded_filter->getInput(0));
1489  operands.push_back(redirector.visit(rex_operator));
1490  auto other_condition = dynamic_cast<const RexOperator*>(operands.back().get());
1491  CHECK(other_condition && other_condition->getType().get_type() == kBOOLEAN);
1492  const bool notnull = old_condition->getType().get_notnull() &&
1493  other_condition->getType().get_notnull();
1494  auto new_condition = std::unique_ptr<const RexScalar>(
1495  new RexOperator(kAND, operands, SQLTypeInfo(kBOOLEAN, notnull)));
1496  folded_filter->setCondition(new_condition);
1497  replace_all_usages(filter, folded_filter, deconst_mapping, web);
1498  deconst_mapping.erase(filter.get());
1499  web.erase(filter.get());
1500  web[filter->getInput(0)].erase(filter.get());
1501  node.reset();
1502  }
1503  }
1504  }
1505 
1506  if (!nodes.empty()) {
1507  auto sink = nodes.back();
1508  for (auto node_it = std::next(nodes.rend()); !sink && node_it != nodes.rbegin();
1509  ++node_it) {
1510  sink = *node_it;
1511  }
1512  CHECK(sink);
1513  cleanup_dead_nodes(nodes);
1514  }
1515 }
std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * > > build_du_web(const std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
#define CHECK_EQ(x, y)
Definition: Logger.h:205
#define LOG(tag)
Definition: Logger.h:188
void cleanup_dead_nodes(std::vector< std::shared_ptr< RelAlgNode >> &nodes)
Definition: sqldefs.h:37
#define CHECK(condition)
Definition: Logger.h:197
void replace_all_usages(std::shared_ptr< const RelAlgNode > old_def_node, std::shared_ptr< const RelAlgNode > new_def_node, std::unordered_map< const RelAlgNode *, std::shared_ptr< RelAlgNode >> &deconst_mapping, std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode *>> &du_web)
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ hoist_filter_cond_to_cross_join()

void hoist_filter_cond_to_cross_join ( std::vector< std::shared_ptr< RelAlgNode >> &  nodes)
noexcept

Definition at line 1615 of file RelAlgOptimizer.cpp.

References build_du_web(), CHECK, find_hoistable_conditions(), RelFilter::getCondition(), RelAlgNode::getInput(), INNER, join(), kAND, kBOOLEAN, RelFilter::setCondition(), and RexVisitorBase< T >::visit().

Referenced by RelAlgDagBuilder::build().

1616  {
1617  std::unordered_set<const RelAlgNode*> visited;
1618  auto web = build_du_web(nodes);
1619  for (auto node : nodes) {
1620  if (visited.count(node.get())) {
1621  continue;
1622  }
1623  visited.insert(node.get());
1624  auto join = dynamic_cast<RelJoin*>(node.get());
1625  if (join && join->getJoinType() == JoinType::INNER) {
1626  // Only allow cross join for now.
1627  if (auto literal = dynamic_cast<const RexLiteral*>(join->getCondition())) {
1628  // Assume Calcite always generates an inner join on constant boolean true for
1629  // cross join.
1630  CHECK(literal->getType() == kBOOLEAN && literal->getVal<bool>());
1631  size_t first_col_idx = 0;
1632  const RelFilter* filter = nullptr;
1633  std::vector<const RelJoin*> join_seq{join};
1634  for (const RelJoin* curr_join = join; !filter;) {
1635  auto usrs_it = web.find(curr_join);
1636  CHECK(usrs_it != web.end());
1637  if (usrs_it->second.size() != size_t(1)) {
1638  break;
1639  }
1640  auto only_usr = *usrs_it->second.begin();
1641  if (auto usr_join = dynamic_cast<const RelJoin*>(only_usr)) {
1642  if (join == usr_join->getInput(1)) {
1643  const auto src1_offset = usr_join->getInput(0)->size();
1644  first_col_idx += src1_offset;
1645  }
1646  join_seq.push_back(usr_join);
1647  curr_join = usr_join;
1648  continue;
1649  }
1650 
1651  filter = dynamic_cast<const RelFilter*>(only_usr);
1652  break;
1653  }
1654  if (!filter) {
1655  visited.insert(join_seq.begin(), join_seq.end());
1656  continue;
1657  }
1658  const auto src_join = dynamic_cast<const RelJoin*>(filter->getInput(0));
1659  CHECK(src_join);
1660  auto modified_filter = const_cast<RelFilter*>(filter);
1661 
1662  if (src_join == join) {
1663  std::unique_ptr<const RexScalar> filter_condition(
1664  modified_filter->getAndReleaseCondition());
1665  std::unique_ptr<const RexScalar> true_condition =
1666  boost::make_unique<RexLiteral>(true,
1667  kBOOLEAN,
1668  kBOOLEAN,
1669  unsigned(-2147483648),
1670  1,
1671  unsigned(-2147483648),
1672  1);
1673  modified_filter->setCondition(true_condition);
1674  join->setCondition(filter_condition);
1675  continue;
1676  }
1677  const auto src1_base = src_join->getInput(0)->size();
1678  auto source =
1679  first_col_idx < src1_base ? src_join->getInput(0) : src_join->getInput(1);
1680  first_col_idx =
1681  first_col_idx < src1_base ? first_col_idx : first_col_idx - src1_base;
1682  auto join_conditions =
1684  source,
1685  first_col_idx,
1686  first_col_idx + join->size() - 1);
1687  if (join_conditions.empty()) {
1688  continue;
1689  }
1690 
1691  JoinTargetRebaser rebaser(join, first_col_idx);
1692  if (join_conditions.size() == 1) {
1693  auto new_join_condition = rebaser.visit(*join_conditions.begin());
1694  join->setCondition(new_join_condition);
1695  } else {
1696  std::vector<std::unique_ptr<const RexScalar>> operands;
1697  bool notnull = true;
1698  for (size_t i = 0; i < join_conditions.size(); ++i) {
1699  operands.emplace_back(rebaser.visit(join_conditions[i]));
1700  auto old_subcond = dynamic_cast<const RexOperator*>(join_conditions[i]);
1701  CHECK(old_subcond && old_subcond->getType().get_type() == kBOOLEAN);
1702  notnull = notnull && old_subcond->getType().get_notnull();
1703  }
1704  auto new_join_condition = std::unique_ptr<const RexScalar>(
1705  new RexOperator(kAND, operands, SQLTypeInfo(kBOOLEAN, notnull)));
1706  join->setCondition(new_join_condition);
1707  }
1708 
1709  SubConditionRemover remover(join_conditions);
1710  auto new_filter_condition = remover.visit(filter->getCondition());
1711  modified_filter->setCondition(new_filter_condition);
1712  }
1713  }
1714  }
1715 }
std::vector< const RexScalar * > find_hoistable_conditions(const RexScalar *condition, const RelAlgNode *source, const size_t first_col_idx, const size_t last_col_idx)
std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * > > build_du_web(const std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
void setCondition(std::unique_ptr< const RexScalar > &condition)
std::string join(T const &container, std::string const &delim)
Definition: sqldefs.h:37
const RexScalar * getCondition() const
#define CHECK(condition)
Definition: Logger.h:197
const RelAlgNode * getInput(const size_t idx) const
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ simplify_sort()

void simplify_sort ( std::vector< std::shared_ptr< RelAlgNode >> &  nodes)
noexcept

Definition at line 1720 of file RelAlgOptimizer.cpp.

References RelAlgNode::replaceInput().

Referenced by RelAlgDagBuilder::build().

1720  {
1721  if (nodes.size() < 3) {
1722  return;
1723  }
1724  for (size_t i = 0; i <= nodes.size() - 3;) {
1725  auto first_sort = std::dynamic_pointer_cast<RelSort>(nodes[i]);
1726  const auto project = std::dynamic_pointer_cast<const RelProject>(nodes[i + 1]);
1727  auto second_sort = std::dynamic_pointer_cast<RelSort>(nodes[i + 2]);
1728  if (first_sort && second_sort && project && project->isIdentity() &&
1729  *first_sort == *second_sort) {
1730  second_sort->replaceInput(second_sort->getAndOwnInput(0),
1731  first_sort->getAndOwnInput(0));
1732  nodes[i].reset();
1733  nodes[i + 1].reset();
1734  i += 3;
1735  } else {
1736  ++i;
1737  }
1738  }
1739 
1740  std::vector<std::shared_ptr<RelAlgNode>> new_nodes;
1741  for (auto node : nodes) {
1742  if (!node) {
1743  continue;
1744  }
1745  new_nodes.push_back(node);
1746  }
1747  nodes.swap(new_nodes);
1748 }
virtual void replaceInput(std::shared_ptr< const RelAlgNode > old_input, std::shared_ptr< const RelAlgNode > input)
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ sink_projected_boolean_expr_to_join()

void sink_projected_boolean_expr_to_join ( std::vector< std::shared_ptr< RelAlgNode >> &  nodes)
noexcept

Definition at line 1307 of file RelAlgOptimizer.cpp.

References build_du_web(), CHECK, CHECK_EQ, join(), kBOOLEAN, and anonymous_namespace{RelAlgOptimizer.cpp}::mark_live_columns().

Referenced by RelAlgDagBuilder::build().

1308  {
1309  auto web = build_du_web(nodes);
1310  auto liveouts = mark_live_columns(nodes);
1311  for (auto node : nodes) {
1312  auto project = std::dynamic_pointer_cast<RelProject>(node);
1313  // TODO(miyu): relax RelScan limitation
1314  if (!project || project->isSimple() ||
1315  !dynamic_cast<const RelScan*>(project->getInput(0))) {
1316  continue;
1317  }
1318  auto usrs_it = web.find(project.get());
1319  CHECK(usrs_it != web.end());
1320  auto& usrs = usrs_it->second;
1321  if (usrs.size() != 1) {
1322  continue;
1323  }
1324  auto join = dynamic_cast<RelJoin*>(const_cast<RelAlgNode*>(*usrs.begin()));
1325  if (!join) {
1326  continue;
1327  }
1328  auto outs_it = liveouts.find(join);
1329  CHECK(outs_it != liveouts.end());
1330  std::unordered_map<size_t, size_t> in_to_out_index;
1331  std::unordered_set<size_t> boolean_expr_indicies;
1332  bool discarded = false;
1333  for (size_t i = 0; i < project->size(); ++i) {
1334  auto oper = dynamic_cast<const RexOperator*>(project->getProjectAt(i));
1335  if (oper && oper->getType().get_type() == kBOOLEAN) {
1336  boolean_expr_indicies.insert(i);
1337  } else {
1338  // TODO(miyu): relax?
1339  if (auto input = dynamic_cast<const RexInput*>(project->getProjectAt(i))) {
1340  in_to_out_index.insert(std::make_pair(input->getIndex(), i));
1341  } else {
1342  discarded = true;
1343  }
1344  }
1345  }
1346  if (discarded || boolean_expr_indicies.empty()) {
1347  continue;
1348  }
1349  const size_t index_base =
1350  join->getInput(0) == project.get() ? 0 : join->getInput(0)->size();
1351  for (auto i : boolean_expr_indicies) {
1352  auto join_idx = index_base + i;
1353  if (outs_it->second.count(join_idx)) {
1354  discarded = true;
1355  break;
1356  }
1357  }
1358  if (discarded) {
1359  continue;
1360  }
1361  RexInputCollector collector(project.get());
1362  std::vector<size_t> unloaded_input_indices;
1363  std::unordered_map<size_t, std::unique_ptr<const RexScalar>> in_idx_to_new_subcond;
1364  // Given all are dead right after join, safe to sink
1365  for (auto i : boolean_expr_indicies) {
1366  auto rex_ins = collector.visit(project->getProjectAt(i));
1367  for (auto& in : rex_ins) {
1368  CHECK_EQ(in.getSourceNode(), project->getInput(0));
1369  if (!in_to_out_index.count(in.getIndex())) {
1370  auto curr_out_index = project->size() + unloaded_input_indices.size();
1371  in_to_out_index.insert(std::make_pair(in.getIndex(), curr_out_index));
1372  unloaded_input_indices.push_back(in.getIndex());
1373  }
1374  RexInputSinker sinker(in_to_out_index, project.get());
1375  in_idx_to_new_subcond.insert(
1376  std::make_pair(i, sinker.visit(project->getProjectAt(i))));
1377  }
1378  }
1379  if (in_idx_to_new_subcond.empty()) {
1380  continue;
1381  }
1382  std::vector<std::unique_ptr<const RexScalar>> new_projections;
1383  for (size_t i = 0; i < project->size(); ++i) {
1384  if (boolean_expr_indicies.count(i)) {
1385  new_projections.push_back(boost::make_unique<RexInput>(project->getInput(0), 0));
1386  } else {
1387  auto rex_input = dynamic_cast<const RexInput*>(project->getProjectAt(i));
1388  CHECK(rex_input != nullptr);
1389  new_projections.push_back(rex_input->deepCopy());
1390  }
1391  }
1392  for (auto i : unloaded_input_indices) {
1393  new_projections.push_back(boost::make_unique<RexInput>(project->getInput(0), i));
1394  }
1395  project->setExpressions(new_projections);
1396 
1397  SubConditionReplacer replacer(in_idx_to_new_subcond);
1398  auto new_condition = replacer.visit(join->getCondition());
1399  join->setCondition(new_condition);
1400  }
1401 }
std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * > > build_du_web(const std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
#define CHECK_EQ(x, y)
Definition: Logger.h:205
std::string join(T const &container, std::string const &delim)
std::unordered_map< const RelAlgNode *, std::unordered_set< size_t > > mark_live_columns(std::vector< std::shared_ptr< RelAlgNode >> &nodes)
#define CHECK(condition)
Definition: Logger.h:197
+ Here is the call graph for this function:
+ Here is the caller graph for this function: