OmniSciDB  fe05a0c208
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
RelAlgOptimizer.h File Reference
#include <memory>
#include <unordered_map>
#include <unordered_set>
#include <vector>
+ Include dependency graph for RelAlgOptimizer.h:
+ This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Functions

std::unordered_map< const
RelAlgNode
*, std::unordered_set< const
RelAlgNode * > > 
build_du_web (const std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
 
void eliminate_identical_copy (std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
 
void eliminate_dead_columns (std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
 
void eliminate_dead_subqueries (std::vector< std::shared_ptr< RexSubQuery >> &subqueries, RelAlgNode const *root)
 
void fold_filters (std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
 
void hoist_filter_cond_to_cross_join (std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
 
void simplify_sort (std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
 
void sink_projected_boolean_expr_to_join (std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
 

Function Documentation

std::unordered_map<const RelAlgNode*, std::unordered_set<const RelAlgNode*> > build_du_web ( const std::vector< std::shared_ptr< RelAlgNode >> &  nodes)
noexcept

Definition at line 411 of file RelAlgOptimizer.cpp.

References CHECK, and i.

Referenced by eliminate_dead_columns(), eliminate_identical_copy(), fold_filters(), hoist_filter_cond_to_cross_join(), and sink_projected_boolean_expr_to_join().

412  {
413  std::unordered_map<const RelAlgNode*, std::unordered_set<const RelAlgNode*>> web;
414  std::unordered_set<const RelAlgNode*> visited;
415  std::vector<const RelAlgNode*> work_set;
416  for (auto node : nodes) {
417  if (std::dynamic_pointer_cast<RelScan>(node) ||
418  std::dynamic_pointer_cast<RelModify>(node) || visited.count(node.get())) {
419  continue;
420  }
421  work_set.push_back(node.get());
422  while (!work_set.empty()) {
423  auto walker = work_set.back();
424  work_set.pop_back();
425  if (visited.count(walker)) {
426  continue;
427  }
428  CHECK(!web.count(walker));
429  auto it_ok =
430  web.insert(std::make_pair(walker, std::unordered_set<const RelAlgNode*>{}));
431  CHECK(it_ok.second);
432  visited.insert(walker);
433  CHECK(dynamic_cast<const RelJoin*>(walker) ||
434  dynamic_cast<const RelProject*>(walker) ||
435  dynamic_cast<const RelAggregate*>(walker) ||
436  dynamic_cast<const RelFilter*>(walker) ||
437  dynamic_cast<const RelSort*>(walker) ||
438  dynamic_cast<const RelLeftDeepInnerJoin*>(walker) ||
439  dynamic_cast<const RelLogicalValues*>(walker) ||
440  dynamic_cast<const RelTableFunction*>(walker) ||
441  dynamic_cast<const RelLogicalUnion*>(walker));
442  for (size_t i = 0; i < walker->inputCount(); ++i) {
443  auto src = walker->getInput(i);
444  if (dynamic_cast<const RelScan*>(src) || dynamic_cast<const RelModify*>(src)) {
445  continue;
446  }
447  if (web.empty() || !web.count(src)) {
448  web.insert(std::make_pair(src, std::unordered_set<const RelAlgNode*>{}));
449  }
450  web[src].insert(walker);
451  work_set.push_back(src);
452  }
453  }
454  }
455  return web;
456 }
#define CHECK(condition)
Definition: Logger.h:203

+ Here is the caller graph for this function:

void eliminate_dead_columns ( std::vector< std::shared_ptr< RelAlgNode >> &  nodes)
noexcept

Definition at line 1179 of file RelAlgOptimizer.cpp.

References anonymous_namespace{RelAlgOptimizer.cpp}::any_dead_col_in(), build_du_web(), CHECK, anonymous_namespace{RelAlgOptimizer.cpp}::does_redef_cols(), i, LOG, anonymous_namespace{RelAlgOptimizer.cpp}::mark_live_columns(), anonymous_namespace{RelAlgOptimizer.cpp}::propagate_input_renumbering(), setup::root, anonymous_namespace{RelAlgOptimizer.cpp}::sweep_dead_columns(), anonymous_namespace{RelAlgOptimizer.cpp}::try_insert_coalesceable_proj(), and logger::WARNING.

Referenced by RelAlgDagBuilder::build().

1179  {
1180  if (nodes.empty()) {
1181  return;
1182  }
1183  auto root = nodes.back().get();
1184  if (!root) {
1185  return;
1186  }
1187  CHECK(!dynamic_cast<const RelScan*>(root) && !dynamic_cast<const RelJoin*>(root));
1188  // Mark
1189  auto old_liveouts = mark_live_columns(nodes);
1190  std::unordered_set<const RelAlgNode*> intact_nodes;
1191  bool has_dead_cols = false;
1192  for (auto live_pair : old_liveouts) {
1193  auto node = live_pair.first;
1194  const auto& outs = live_pair.second;
1195  if (outs.empty()) {
1196  LOG(WARNING) << "RA node with no used column: " << node->toString();
1197  // Ignore empty live_out due to some invalid node
1198  intact_nodes.insert(node);
1199  }
1200  if (any_dead_col_in(node, outs)) {
1201  has_dead_cols = true;
1202  } else {
1203  intact_nodes.insert(node);
1204  }
1205  }
1206  if (!has_dead_cols) {
1207  return;
1208  }
1209  auto web = build_du_web(nodes);
1210  try_insert_coalesceable_proj(nodes, old_liveouts, web);
1211 
1212  for (auto node : nodes) {
1213  if (intact_nodes.count(node.get()) || does_redef_cols(node.get())) {
1214  continue;
1215  }
1216  bool intact = true;
1217  for (size_t i = 0; i < node->inputCount(); ++i) {
1218  auto source = node->getInput(i);
1219  if (!dynamic_cast<const RelScan*>(source) && !intact_nodes.count(source)) {
1220  intact = false;
1221  break;
1222  }
1223  }
1224  if (intact) {
1225  intact_nodes.insert(node.get());
1226  }
1227  }
1228 
1229  std::unordered_map<const RelAlgNode*, size_t> orig_node_sizes;
1230  for (auto node : nodes) {
1231  orig_node_sizes.insert(std::make_pair(node.get(), node->size()));
1232  }
1233  // Sweep
1234  std::unordered_map<const RelAlgNode*, std::unordered_map<size_t, size_t>>
1235  liveout_renumbering;
1236  std::vector<const RelAlgNode*> ready_nodes;
1237  std::tie(liveout_renumbering, ready_nodes) =
1238  sweep_dead_columns(old_liveouts, nodes, intact_nodes, web, orig_node_sizes);
1239  // Propagate
1241  liveout_renumbering, ready_nodes, old_liveouts, intact_nodes, web, orig_node_sizes);
1242 }
std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * > > build_du_web(const std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
#define LOG(tag)
Definition: Logger.h:194
bool does_redef_cols(const RelAlgNode *node)
void propagate_input_renumbering(std::unordered_map< const RelAlgNode *, std::unordered_map< size_t, size_t >> &liveout_renumbering, const std::vector< const RelAlgNode * > &ready_nodes, const std::unordered_map< const RelAlgNode *, std::unordered_set< size_t >> &old_liveouts, const std::unordered_set< const RelAlgNode * > &intact_nodes, const std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * >> &du_web, const std::unordered_map< const RelAlgNode *, size_t > &orig_node_sizes)
tuple root
Definition: setup.in.py:14
std::unordered_map< const RelAlgNode *, std::unordered_set< size_t > > mark_live_columns(std::vector< std::shared_ptr< RelAlgNode >> &nodes)
bool any_dead_col_in(const RelAlgNode *node, const std::unordered_set< size_t > &live_outs)
void try_insert_coalesceable_proj(std::vector< std::shared_ptr< RelAlgNode >> &nodes, std::unordered_map< const RelAlgNode *, std::unordered_set< size_t >> &liveouts, std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * >> &du_web)
#define CHECK(condition)
Definition: Logger.h:203
std::pair< std::unordered_map< const RelAlgNode *, std::unordered_map< size_t, size_t > >, std::vector< const RelAlgNode * > > sweep_dead_columns(const std::unordered_map< const RelAlgNode *, std::unordered_set< size_t >> &live_outs, const std::vector< std::shared_ptr< RelAlgNode >> &nodes, const std::unordered_set< const RelAlgNode * > &intact_nodes, const std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * >> &du_web, const std::unordered_map< const RelAlgNode *, size_t > &orig_node_sizes)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void eliminate_dead_subqueries ( std::vector< std::shared_ptr< RexSubQuery >> &  subqueries,
RelAlgNode const root 
)

Definition at line 1244 of file RelAlgOptimizer.cpp.

References RexSubQueryIdCollector::getLiveRexSubQueryIds(), gpu_enabled::upper_bound(), and VLOG.

Referenced by RelAlgDagBuilder::build().

1245  {
1246  if (!subqueries.empty()) {
1248  auto sort_live_ids_first = [&live_ids](auto& a, auto& b) {
1249  return live_ids.count(a->getId()) && !live_ids.count(b->getId());
1250  };
1251  std::stable_sort(subqueries.begin(), subqueries.end(), sort_live_ids_first);
1252  size_t n_dead_subqueries;
1253  if (live_ids.count(subqueries.front()->getId())) {
1254  auto first_dead_itr = std::upper_bound(subqueries.cbegin(),
1255  subqueries.cend(),
1256  subqueries.front(),
1257  sort_live_ids_first);
1258  n_dead_subqueries = subqueries.cend() - first_dead_itr;
1259  } else {
1260  n_dead_subqueries = subqueries.size();
1261  }
1262  if (n_dead_subqueries) {
1263  VLOG(1) << "Eliminating " << n_dead_subqueries
1264  << (n_dead_subqueries == 1 ? " subquery." : " subqueries.");
1265  subqueries.resize(subqueries.size() - n_dead_subqueries);
1266  subqueries.shrink_to_fit();
1267  }
1268  }
1269 }
DEVICE auto upper_bound(ARGS &&...args)
Definition: gpu_enabled.h:123
tuple root
Definition: setup.in.py:14
static Ids getLiveRexSubQueryIds(RelAlgNode const *)
#define VLOG(n)
Definition: Logger.h:297

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void eliminate_identical_copy ( std::vector< std::shared_ptr< RelAlgNode >> &  nodes)
noexcept

Definition at line 489 of file RelAlgOptimizer.cpp.

References build_du_web(), CHECK, CHECK_EQ, anonymous_namespace{RelAlgOptimizer.cpp}::cleanup_dead_nodes(), anonymous_namespace{RelAlgOptimizer.cpp}::get_visible_projects(), anonymous_namespace{RelAlgOptimizer.cpp}::is_distinct(), anonymous_namespace{RelAlgOptimizer.cpp}::is_identical_copy(), project_separates_sort(), anonymous_namespace{RelAlgOptimizer.cpp}::redirect_inputs_of(), and gpu_enabled::swap().

Referenced by RelAlgDagBuilder::build().

489  {
490  std::unordered_set<std::shared_ptr<const RelAlgNode>> copies;
491  auto sink = nodes.back();
492  for (auto node : nodes) {
493  auto aggregate = std::dynamic_pointer_cast<const RelAggregate>(node);
494  if (!aggregate || aggregate == sink ||
495  !(aggregate->getGroupByCount() == 1 && aggregate->getAggExprsCount() == 0)) {
496  continue;
497  }
498  auto project =
499  std::dynamic_pointer_cast<const RelProject>(aggregate->getAndOwnInput(0));
500  if (project && project->size() == aggregate->size() &&
501  project->getFields() == aggregate->getFields()) {
502  CHECK_EQ(size_t(0), copies.count(aggregate));
503  copies.insert(aggregate);
504  }
505  }
506  for (auto node : nodes) {
507  if (!node->inputCount()) {
508  continue;
509  }
510  auto last_source = node->getAndOwnInput(node->inputCount() - 1);
511  if (!copies.count(last_source)) {
512  continue;
513  }
514  auto aggregate = std::dynamic_pointer_cast<const RelAggregate>(last_source);
515  CHECK(aggregate);
516  if (!std::dynamic_pointer_cast<const RelJoin>(node) || aggregate->size() != 1) {
517  continue;
518  }
519  auto project =
520  std::dynamic_pointer_cast<const RelProject>(aggregate->getAndOwnInput(0));
521  CHECK(project);
522  CHECK_EQ(size_t(1), project->size());
523  if (!is_distinct(size_t(0), project.get())) {
524  continue;
525  }
526  auto new_source = project->getAndOwnInput(0);
527  if (std::dynamic_pointer_cast<const RelSort>(new_source) ||
528  std::dynamic_pointer_cast<const RelScan>(new_source)) {
529  node->replaceInput(last_source, new_source);
530  }
531  }
532  decltype(copies)().swap(copies);
533 
534  auto web = build_du_web(nodes);
535 
536  std::unordered_set<const RelProject*> projects;
537  std::unordered_set<const RelProject*> permutating_projects;
538  auto const visible_projs = get_visible_projects(nodes.back().get());
539  for (auto node_it = nodes.begin(); node_it != nodes.end(); node_it++) {
540  auto node = *node_it;
541  auto project = std::dynamic_pointer_cast<RelProject>(node);
542  auto next_node_it = std::next(node_it);
543  if (project && project->isSimple() &&
544  (!visible_projs.count(project.get()) || !project->isRenaming()) &&
545  is_identical_copy(project.get(), web, projects, permutating_projects) &&
547  project.get(), next_node_it == nodes.end() ? nullptr : next_node_it->get())) {
548  projects.insert(project.get());
549  }
550  }
551 
552  for (auto node : nodes) {
553  redirect_inputs_of(node, projects, permutating_projects, web);
554  }
555 
556  cleanup_dead_nodes(nodes);
557 }
bool is_identical_copy(const RelProject *project, const std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * >> &du_web, const std::unordered_set< const RelProject * > &projects_to_remove, std::unordered_set< const RelProject * > &permutating_projects)
std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * > > build_du_web(const std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
#define CHECK_EQ(x, y)
Definition: Logger.h:211
void redirect_inputs_of(std::shared_ptr< RelAlgNode > node, const std::unordered_set< const RelProject * > &projects, const std::unordered_set< const RelProject * > &permutating_projects, const std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * >> &du_web)
std::unordered_set< const RelProject * > get_visible_projects(const RelAlgNode *root)
bool project_separates_sort(const RelProject *project, const RelAlgNode *next_node)
void cleanup_dead_nodes(std::vector< std::shared_ptr< RelAlgNode >> &nodes)
#define CHECK(condition)
Definition: Logger.h:203
bool is_distinct(const size_t input_idx, const RelAlgNode *node)
DEVICE void swap(ARGS &&...args)
Definition: gpu_enabled.h:114

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void fold_filters ( std::vector< std::shared_ptr< RelAlgNode >> &  nodes)
noexcept

Definition at line 1459 of file RelAlgOptimizer.cpp.

References build_du_web(), CHECK, CHECK_EQ, anonymous_namespace{RelAlgOptimizer.cpp}::cleanup_dead_nodes(), logger::INFO, kAND, kBOOLEAN, LOG, and anonymous_namespace{RelAlgOptimizer.cpp}::replace_all_usages().

Referenced by RelAlgDagBuilder::build().

1459  {
1460  std::unordered_map<const RelAlgNode*, std::shared_ptr<RelAlgNode>> deconst_mapping;
1461  for (auto node : nodes) {
1462  deconst_mapping.insert(std::make_pair(node.get(), node));
1463  }
1464 
1465  auto web = build_du_web(nodes);
1466  for (auto node_it = nodes.rbegin(); node_it != nodes.rend(); ++node_it) {
1467  auto& node = *node_it;
1468  if (auto filter = std::dynamic_pointer_cast<RelFilter>(node)) {
1469  CHECK_EQ(filter->inputCount(), size_t(1));
1470  auto src_filter = dynamic_cast<const RelFilter*>(filter->getInput(0));
1471  if (!src_filter) {
1472  continue;
1473  }
1474  auto siblings_it = web.find(src_filter);
1475  if (siblings_it == web.end() || siblings_it->second.size() != size_t(1)) {
1476  continue;
1477  }
1478  auto src_it = deconst_mapping.find(src_filter);
1479  CHECK(src_it != deconst_mapping.end());
1480  auto folded_filter = std::dynamic_pointer_cast<RelFilter>(src_it->second);
1481  CHECK(folded_filter);
1482  // TODO(miyu) : drop filter w/ only expression valued constant TRUE?
1483  if (auto rex_operator = dynamic_cast<const RexOperator*>(filter->getCondition())) {
1484  LOG(INFO) << "ID=" << filter->getId() << " " << filter->toString()
1485  << " folded into "
1486  << "ID=" << folded_filter->getId() << " " << folded_filter->toString()
1487  << std::endl;
1488  std::vector<std::unique_ptr<const RexScalar>> operands;
1489  operands.emplace_back(folded_filter->getAndReleaseCondition());
1490  auto old_condition = dynamic_cast<const RexOperator*>(operands.back().get());
1491  CHECK(old_condition && old_condition->getType().get_type() == kBOOLEAN);
1492  RexInputRedirector redirector(folded_filter.get(), folded_filter->getInput(0));
1493  operands.push_back(redirector.visit(rex_operator));
1494  auto other_condition = dynamic_cast<const RexOperator*>(operands.back().get());
1495  CHECK(other_condition && other_condition->getType().get_type() == kBOOLEAN);
1496  const bool notnull = old_condition->getType().get_notnull() &&
1497  other_condition->getType().get_notnull();
1498  auto new_condition = std::unique_ptr<const RexScalar>(
1499  new RexOperator(kAND, operands, SQLTypeInfo(kBOOLEAN, notnull)));
1500  folded_filter->setCondition(new_condition);
1501  replace_all_usages(filter, folded_filter, deconst_mapping, web);
1502  deconst_mapping.erase(filter.get());
1503  web.erase(filter.get());
1504  web[filter->getInput(0)].erase(filter.get());
1505  node.reset();
1506  }
1507  }
1508  }
1509 
1510  if (!nodes.empty()) {
1511  auto sink = nodes.back();
1512  for (auto node_it = std::next(nodes.rbegin()); node_it != nodes.rend(); ++node_it) {
1513  if (sink) {
1514  break;
1515  }
1516  sink = *node_it;
1517  }
1518  CHECK(sink);
1519  cleanup_dead_nodes(nodes);
1520  }
1521 }
std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * > > build_du_web(const std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
#define CHECK_EQ(x, y)
Definition: Logger.h:211
#define LOG(tag)
Definition: Logger.h:194
void cleanup_dead_nodes(std::vector< std::shared_ptr< RelAlgNode >> &nodes)
Definition: sqldefs.h:37
#define CHECK(condition)
Definition: Logger.h:203
void replace_all_usages(std::shared_ptr< const RelAlgNode > old_def_node, std::shared_ptr< const RelAlgNode > new_def_node, std::unordered_map< const RelAlgNode *, std::shared_ptr< RelAlgNode >> &deconst_mapping, std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * >> &du_web)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void hoist_filter_cond_to_cross_join ( std::vector< std::shared_ptr< RelAlgNode >> &  nodes)
noexcept

Definition at line 1621 of file RelAlgOptimizer.cpp.

References build_du_web(), CHECK, find_hoistable_conditions(), RelFilter::getCondition(), RelAlgNode::getInput(), i, INNER, join(), kAND, kBOOLEAN, RelFilter::setCondition(), and RexVisitorBase< T >::visit().

Referenced by RelAlgDagBuilder::build().

1622  {
1623  std::unordered_set<const RelAlgNode*> visited;
1624  auto web = build_du_web(nodes);
1625  for (auto node : nodes) {
1626  if (visited.count(node.get())) {
1627  continue;
1628  }
1629  visited.insert(node.get());
1630  auto join = dynamic_cast<RelJoin*>(node.get());
1631  if (join && join->getJoinType() == JoinType::INNER) {
1632  // Only allow cross join for now.
1633  if (auto literal = dynamic_cast<const RexLiteral*>(join->getCondition())) {
1634  // Assume Calcite always generates an inner join on constant boolean true for
1635  // cross join.
1636  CHECK(literal->getType() == kBOOLEAN && literal->getVal<bool>());
1637  size_t first_col_idx = 0;
1638  const RelFilter* filter = nullptr;
1639  std::vector<const RelJoin*> join_seq{join};
1640  for (const RelJoin* curr_join = join; !filter;) {
1641  auto usrs_it = web.find(curr_join);
1642  CHECK(usrs_it != web.end());
1643  if (usrs_it->second.size() != size_t(1)) {
1644  break;
1645  }
1646  auto only_usr = *usrs_it->second.begin();
1647  if (auto usr_join = dynamic_cast<const RelJoin*>(only_usr)) {
1648  if (join == usr_join->getInput(1)) {
1649  const auto src1_offset = usr_join->getInput(0)->size();
1650  first_col_idx += src1_offset;
1651  }
1652  join_seq.push_back(usr_join);
1653  curr_join = usr_join;
1654  continue;
1655  }
1656 
1657  filter = dynamic_cast<const RelFilter*>(only_usr);
1658  break;
1659  }
1660  if (!filter) {
1661  visited.insert(join_seq.begin(), join_seq.end());
1662  continue;
1663  }
1664  const auto src_join = dynamic_cast<const RelJoin*>(filter->getInput(0));
1665  CHECK(src_join);
1666  auto modified_filter = const_cast<RelFilter*>(filter);
1667 
1668  if (src_join == join) {
1669  std::unique_ptr<const RexScalar> filter_condition(
1670  modified_filter->getAndReleaseCondition());
1671  std::unique_ptr<const RexScalar> true_condition =
1672  boost::make_unique<RexLiteral>(true,
1673  kBOOLEAN,
1674  kBOOLEAN,
1675  unsigned(-2147483648),
1676  1,
1677  unsigned(-2147483648),
1678  1);
1679  modified_filter->setCondition(true_condition);
1680  join->setCondition(filter_condition);
1681  continue;
1682  }
1683  const auto src1_base = src_join->getInput(0)->size();
1684  auto source =
1685  first_col_idx < src1_base ? src_join->getInput(0) : src_join->getInput(1);
1686  first_col_idx =
1687  first_col_idx < src1_base ? first_col_idx : first_col_idx - src1_base;
1688  auto join_conditions =
1690  source,
1691  first_col_idx,
1692  first_col_idx + join->size() - 1);
1693  if (join_conditions.empty()) {
1694  continue;
1695  }
1696 
1697  JoinTargetRebaser rebaser(join, first_col_idx);
1698  if (join_conditions.size() == 1) {
1699  auto new_join_condition = rebaser.visit(*join_conditions.begin());
1700  join->setCondition(new_join_condition);
1701  } else {
1702  std::vector<std::unique_ptr<const RexScalar>> operands;
1703  bool notnull = true;
1704  for (size_t i = 0; i < join_conditions.size(); ++i) {
1705  operands.emplace_back(rebaser.visit(join_conditions[i]));
1706  auto old_subcond = dynamic_cast<const RexOperator*>(join_conditions[i]);
1707  CHECK(old_subcond && old_subcond->getType().get_type() == kBOOLEAN);
1708  notnull = notnull && old_subcond->getType().get_notnull();
1709  }
1710  auto new_join_condition = std::unique_ptr<const RexScalar>(
1711  new RexOperator(kAND, operands, SQLTypeInfo(kBOOLEAN, notnull)));
1712  join->setCondition(new_join_condition);
1713  }
1714 
1715  SubConditionRemover remover(join_conditions);
1716  auto new_filter_condition = remover.visit(filter->getCondition());
1717  modified_filter->setCondition(new_filter_condition);
1718  }
1719  }
1720  }
1721 }
std::vector< const RexScalar * > find_hoistable_conditions(const RexScalar *condition, const RelAlgNode *source, const size_t first_col_idx, const size_t last_col_idx)
std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * > > build_du_web(const std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
void setCondition(std::unique_ptr< const RexScalar > &condition)
const RexScalar * getCondition() const
std::string join(T const &container, std::string const &delim)
const RelAlgNode * getInput(const size_t idx) const
Definition: sqldefs.h:37
#define CHECK(condition)
Definition: Logger.h:203

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void simplify_sort ( std::vector< std::shared_ptr< RelAlgNode >> &  nodes)
noexcept

Definition at line 1726 of file RelAlgOptimizer.cpp.

References i, and RelAlgNode::replaceInput().

Referenced by RelAlgDagBuilder::build().

1726  {
1727  if (nodes.size() < 3) {
1728  return;
1729  }
1730  for (size_t i = 0; i <= nodes.size() - 3;) {
1731  auto first_sort = std::dynamic_pointer_cast<RelSort>(nodes[i]);
1732  const auto project = std::dynamic_pointer_cast<const RelProject>(nodes[i + 1]);
1733  auto second_sort = std::dynamic_pointer_cast<RelSort>(nodes[i + 2]);
1734  if (first_sort && second_sort && project && project->isIdentity() &&
1735  *first_sort == *second_sort) {
1736  second_sort->replaceInput(second_sort->getAndOwnInput(0),
1737  first_sort->getAndOwnInput(0));
1738  nodes[i].reset();
1739  nodes[i + 1].reset();
1740  i += 3;
1741  } else {
1742  ++i;
1743  }
1744  }
1745 
1746  std::vector<std::shared_ptr<RelAlgNode>> new_nodes;
1747  for (auto node : nodes) {
1748  if (!node) {
1749  continue;
1750  }
1751  new_nodes.push_back(node);
1752  }
1753  nodes.swap(new_nodes);
1754 }
virtual void replaceInput(std::shared_ptr< const RelAlgNode > old_input, std::shared_ptr< const RelAlgNode > input)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void sink_projected_boolean_expr_to_join ( std::vector< std::shared_ptr< RelAlgNode >> &  nodes)
noexcept

Definition at line 1311 of file RelAlgOptimizer.cpp.

References build_du_web(), CHECK, CHECK_EQ, i, join(), kBOOLEAN, and anonymous_namespace{RelAlgOptimizer.cpp}::mark_live_columns().

Referenced by RelAlgDagBuilder::build().

1312  {
1313  auto web = build_du_web(nodes);
1314  auto liveouts = mark_live_columns(nodes);
1315  for (auto node : nodes) {
1316  auto project = std::dynamic_pointer_cast<RelProject>(node);
1317  // TODO(miyu): relax RelScan limitation
1318  if (!project || project->isSimple() ||
1319  !dynamic_cast<const RelScan*>(project->getInput(0))) {
1320  continue;
1321  }
1322  auto usrs_it = web.find(project.get());
1323  CHECK(usrs_it != web.end());
1324  auto& usrs = usrs_it->second;
1325  if (usrs.size() != 1) {
1326  continue;
1327  }
1328  auto join = dynamic_cast<RelJoin*>(const_cast<RelAlgNode*>(*usrs.begin()));
1329  if (!join) {
1330  continue;
1331  }
1332  auto outs_it = liveouts.find(join);
1333  CHECK(outs_it != liveouts.end());
1334  std::unordered_map<size_t, size_t> in_to_out_index;
1335  std::unordered_set<size_t> boolean_expr_indicies;
1336  bool discarded = false;
1337  for (size_t i = 0; i < project->size(); ++i) {
1338  auto oper = dynamic_cast<const RexOperator*>(project->getProjectAt(i));
1339  if (oper && oper->getType().get_type() == kBOOLEAN) {
1340  boolean_expr_indicies.insert(i);
1341  } else {
1342  // TODO(miyu): relax?
1343  if (auto input = dynamic_cast<const RexInput*>(project->getProjectAt(i))) {
1344  in_to_out_index.insert(std::make_pair(input->getIndex(), i));
1345  } else {
1346  discarded = true;
1347  }
1348  }
1349  }
1350  if (discarded || boolean_expr_indicies.empty()) {
1351  continue;
1352  }
1353  const size_t index_base =
1354  join->getInput(0) == project.get() ? 0 : join->getInput(0)->size();
1355  for (auto i : boolean_expr_indicies) {
1356  auto join_idx = index_base + i;
1357  if (outs_it->second.count(join_idx)) {
1358  discarded = true;
1359  break;
1360  }
1361  }
1362  if (discarded) {
1363  continue;
1364  }
1365  RexInputCollector collector(project.get());
1366  std::vector<size_t> unloaded_input_indices;
1367  std::unordered_map<size_t, std::unique_ptr<const RexScalar>> in_idx_to_new_subcond;
1368  // Given all are dead right after join, safe to sink
1369  for (auto i : boolean_expr_indicies) {
1370  auto rex_ins = collector.visit(project->getProjectAt(i));
1371  for (auto& in : rex_ins) {
1372  CHECK_EQ(in.getSourceNode(), project->getInput(0));
1373  if (!in_to_out_index.count(in.getIndex())) {
1374  auto curr_out_index = project->size() + unloaded_input_indices.size();
1375  in_to_out_index.insert(std::make_pair(in.getIndex(), curr_out_index));
1376  unloaded_input_indices.push_back(in.getIndex());
1377  }
1378  RexInputSinker sinker(in_to_out_index, project.get());
1379  in_idx_to_new_subcond.insert(
1380  std::make_pair(i, sinker.visit(project->getProjectAt(i))));
1381  }
1382  }
1383  if (in_idx_to_new_subcond.empty()) {
1384  continue;
1385  }
1386  std::vector<std::unique_ptr<const RexScalar>> new_projections;
1387  for (size_t i = 0; i < project->size(); ++i) {
1388  if (boolean_expr_indicies.count(i)) {
1389  new_projections.push_back(boost::make_unique<RexInput>(project->getInput(0), 0));
1390  } else {
1391  auto rex_input = dynamic_cast<const RexInput*>(project->getProjectAt(i));
1392  CHECK(rex_input != nullptr);
1393  new_projections.push_back(rex_input->deepCopy());
1394  }
1395  }
1396  for (auto i : unloaded_input_indices) {
1397  new_projections.push_back(boost::make_unique<RexInput>(project->getInput(0), i));
1398  }
1399  project->setExpressions(new_projections);
1400 
1401  SubConditionReplacer replacer(in_idx_to_new_subcond);
1402  auto new_condition = replacer.visit(join->getCondition());
1403  join->setCondition(new_condition);
1404  }
1405 }
std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * > > build_du_web(const std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
#define CHECK_EQ(x, y)
Definition: Logger.h:211
std::string join(T const &container, std::string const &delim)
std::unordered_map< const RelAlgNode *, std::unordered_set< size_t > > mark_live_columns(std::vector< std::shared_ptr< RelAlgNode >> &nodes)
#define CHECK(condition)
Definition: Logger.h:203

+ Here is the call graph for this function:

+ Here is the caller graph for this function: