OmniSciDB  d2f719934e
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
RelAlgOptimizer.h File Reference
#include <memory>
#include <unordered_map>
#include <unordered_set>
#include <vector>
+ Include dependency graph for RelAlgOptimizer.h:
+ This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Functions

std::unordered_map< const
RelAlgNode
*, std::unordered_set< const
RelAlgNode * > > 
build_du_web (const std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
 
void eliminate_identical_copy (std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
 
void eliminate_dead_columns (std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
 
void eliminate_dead_subqueries (std::vector< std::shared_ptr< RexSubQuery >> &subqueries, RelAlgNode const *root)
 
void fold_filters (std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
 
void hoist_filter_cond_to_cross_join (std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
 
void simplify_sort (std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
 
void sink_projected_boolean_expr_to_join (std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
 

Function Documentation

std::unordered_map<const RelAlgNode*, std::unordered_set<const RelAlgNode*> > build_du_web ( const std::vector< std::shared_ptr< RelAlgNode >> &  nodes)
noexcept

Definition at line 420 of file RelAlgOptimizer.cpp.

References CHECK, and i.

Referenced by eliminate_dead_columns(), eliminate_identical_copy(), fold_filters(), hoist_filter_cond_to_cross_join(), and sink_projected_boolean_expr_to_join().

421  {
422  std::unordered_map<const RelAlgNode*, std::unordered_set<const RelAlgNode*>> web;
423  std::unordered_set<const RelAlgNode*> visited;
424  std::vector<const RelAlgNode*> work_set;
425  for (auto node : nodes) {
426  if (std::dynamic_pointer_cast<RelScan>(node) ||
427  std::dynamic_pointer_cast<RelModify>(node) || visited.count(node.get())) {
428  continue;
429  }
430  work_set.push_back(node.get());
431  while (!work_set.empty()) {
432  auto walker = work_set.back();
433  work_set.pop_back();
434  if (visited.count(walker)) {
435  continue;
436  }
437  CHECK(!web.count(walker));
438  auto it_ok =
439  web.insert(std::make_pair(walker, std::unordered_set<const RelAlgNode*>{}));
440  CHECK(it_ok.second);
441  visited.insert(walker);
442  CHECK(dynamic_cast<const RelJoin*>(walker) ||
443  dynamic_cast<const RelProject*>(walker) ||
444  dynamic_cast<const RelAggregate*>(walker) ||
445  dynamic_cast<const RelFilter*>(walker) ||
446  dynamic_cast<const RelSort*>(walker) ||
447  dynamic_cast<const RelLeftDeepInnerJoin*>(walker) ||
448  dynamic_cast<const RelLogicalValues*>(walker) ||
449  dynamic_cast<const RelTableFunction*>(walker) ||
450  dynamic_cast<const RelLogicalUnion*>(walker));
451  for (size_t i = 0; i < walker->inputCount(); ++i) {
452  auto src = walker->getInput(i);
453  if (dynamic_cast<const RelScan*>(src) || dynamic_cast<const RelModify*>(src)) {
454  continue;
455  }
456  if (web.empty() || !web.count(src)) {
457  web.insert(std::make_pair(src, std::unordered_set<const RelAlgNode*>{}));
458  }
459  web[src].insert(walker);
460  work_set.push_back(src);
461  }
462  }
463  }
464  return web;
465 }
#define CHECK(condition)
Definition: Logger.h:211

+ Here is the caller graph for this function:

void eliminate_dead_columns ( std::vector< std::shared_ptr< RelAlgNode >> &  nodes)
noexcept

Definition at line 1188 of file RelAlgOptimizer.cpp.

References anonymous_namespace{RelAlgOptimizer.cpp}::any_dead_col_in(), build_du_web(), CHECK, anonymous_namespace{RelAlgOptimizer.cpp}::does_redef_cols(), i, LOG, anonymous_namespace{RelAlgOptimizer.cpp}::mark_live_columns(), anonymous_namespace{RelAlgOptimizer.cpp}::propagate_input_renumbering(), setup::root, anonymous_namespace{RelAlgOptimizer.cpp}::sweep_dead_columns(), anonymous_namespace{RelAlgOptimizer.cpp}::try_insert_coalesceable_proj(), and logger::WARNING.

Referenced by RelAlgDagBuilder::build().

1188  {
1189  if (nodes.empty()) {
1190  return;
1191  }
1192  auto root = nodes.back().get();
1193  if (!root) {
1194  return;
1195  }
1196  CHECK(!dynamic_cast<const RelScan*>(root) && !dynamic_cast<const RelJoin*>(root));
1197  // Mark
1198  auto old_liveouts = mark_live_columns(nodes);
1199  std::unordered_set<const RelAlgNode*> intact_nodes;
1200  bool has_dead_cols = false;
1201  for (auto live_pair : old_liveouts) {
1202  auto node = live_pair.first;
1203  const auto& outs = live_pair.second;
1204  if (outs.empty()) {
1205  LOG(WARNING) << "RA node with no used column: " << node->toString();
1206  // Ignore empty live_out due to some invalid node
1207  intact_nodes.insert(node);
1208  }
1209  if (any_dead_col_in(node, outs)) {
1210  has_dead_cols = true;
1211  } else {
1212  intact_nodes.insert(node);
1213  }
1214  }
1215  if (!has_dead_cols) {
1216  return;
1217  }
1218  auto web = build_du_web(nodes);
1219  try_insert_coalesceable_proj(nodes, old_liveouts, web);
1220 
1221  for (auto node : nodes) {
1222  if (intact_nodes.count(node.get()) || does_redef_cols(node.get())) {
1223  continue;
1224  }
1225  bool intact = true;
1226  for (size_t i = 0; i < node->inputCount(); ++i) {
1227  auto source = node->getInput(i);
1228  if (!dynamic_cast<const RelScan*>(source) && !intact_nodes.count(source)) {
1229  intact = false;
1230  break;
1231  }
1232  }
1233  if (intact) {
1234  intact_nodes.insert(node.get());
1235  }
1236  }
1237 
1238  std::unordered_map<const RelAlgNode*, size_t> orig_node_sizes;
1239  for (auto node : nodes) {
1240  orig_node_sizes.insert(std::make_pair(node.get(), node->size()));
1241  }
1242  // Sweep
1243  std::unordered_map<const RelAlgNode*, std::unordered_map<size_t, size_t>>
1244  liveout_renumbering;
1245  std::vector<const RelAlgNode*> ready_nodes;
1246  std::tie(liveout_renumbering, ready_nodes) =
1247  sweep_dead_columns(old_liveouts, nodes, intact_nodes, web, orig_node_sizes);
1248  // Propagate
1250  liveout_renumbering, ready_nodes, old_liveouts, intact_nodes, web, orig_node_sizes);
1251 }
std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * > > build_du_web(const std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
#define LOG(tag)
Definition: Logger.h:205
bool does_redef_cols(const RelAlgNode *node)
void propagate_input_renumbering(std::unordered_map< const RelAlgNode *, std::unordered_map< size_t, size_t >> &liveout_renumbering, const std::vector< const RelAlgNode * > &ready_nodes, const std::unordered_map< const RelAlgNode *, std::unordered_set< size_t >> &old_liveouts, const std::unordered_set< const RelAlgNode * > &intact_nodes, const std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * >> &du_web, const std::unordered_map< const RelAlgNode *, size_t > &orig_node_sizes)
tuple root
Definition: setup.in.py:14
std::unordered_map< const RelAlgNode *, std::unordered_set< size_t > > mark_live_columns(std::vector< std::shared_ptr< RelAlgNode >> &nodes)
bool any_dead_col_in(const RelAlgNode *node, const std::unordered_set< size_t > &live_outs)
void try_insert_coalesceable_proj(std::vector< std::shared_ptr< RelAlgNode >> &nodes, std::unordered_map< const RelAlgNode *, std::unordered_set< size_t >> &liveouts, std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * >> &du_web)
#define CHECK(condition)
Definition: Logger.h:211
std::pair< std::unordered_map< const RelAlgNode *, std::unordered_map< size_t, size_t > >, std::vector< const RelAlgNode * > > sweep_dead_columns(const std::unordered_map< const RelAlgNode *, std::unordered_set< size_t >> &live_outs, const std::vector< std::shared_ptr< RelAlgNode >> &nodes, const std::unordered_set< const RelAlgNode * > &intact_nodes, const std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * >> &du_web, const std::unordered_map< const RelAlgNode *, size_t > &orig_node_sizes)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void eliminate_dead_subqueries ( std::vector< std::shared_ptr< RexSubQuery >> &  subqueries,
RelAlgNode const root 
)

Definition at line 1253 of file RelAlgOptimizer.cpp.

References anonymous_namespace{Utm.h}::a, RexSubQueryIdCollector::getLiveRexSubQueryIds(), gpu_enabled::upper_bound(), and VLOG.

Referenced by RelAlgDagBuilder::build().

1254  {
1255  if (!subqueries.empty()) {
1257  auto sort_live_ids_first = [&live_ids](auto& a, auto& b) {
1258  return live_ids.count(a->getId()) && !live_ids.count(b->getId());
1259  };
1260  std::stable_sort(subqueries.begin(), subqueries.end(), sort_live_ids_first);
1261  size_t n_dead_subqueries;
1262  if (live_ids.count(subqueries.front()->getId())) {
1263  auto first_dead_itr = std::upper_bound(subqueries.cbegin(),
1264  subqueries.cend(),
1265  subqueries.front(),
1266  sort_live_ids_first);
1267  n_dead_subqueries = subqueries.cend() - first_dead_itr;
1268  } else {
1269  n_dead_subqueries = subqueries.size();
1270  }
1271  if (n_dead_subqueries) {
1272  VLOG(1) << "Eliminating " << n_dead_subqueries
1273  << (n_dead_subqueries == 1 ? " subquery." : " subqueries.");
1274  subqueries.resize(subqueries.size() - n_dead_subqueries);
1275  subqueries.shrink_to_fit();
1276  }
1277  }
1278 }
DEVICE auto upper_bound(ARGS &&...args)
Definition: gpu_enabled.h:123
tuple root
Definition: setup.in.py:14
constexpr double a
Definition: Utm.h:33
static Ids getLiveRexSubQueryIds(RelAlgNode const *)
#define VLOG(n)
Definition: Logger.h:305

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void eliminate_identical_copy ( std::vector< std::shared_ptr< RelAlgNode >> &  nodes)
noexcept

Definition at line 498 of file RelAlgOptimizer.cpp.

References build_du_web(), CHECK, CHECK_EQ, anonymous_namespace{RelAlgOptimizer.cpp}::cleanup_dead_nodes(), anonymous_namespace{RelAlgOptimizer.cpp}::get_visible_projects(), anonymous_namespace{RelAlgOptimizer.cpp}::is_distinct(), anonymous_namespace{RelAlgOptimizer.cpp}::is_identical_copy(), project_separates_sort(), anonymous_namespace{RelAlgOptimizer.cpp}::redirect_inputs_of(), and gpu_enabled::swap().

Referenced by RelAlgDagBuilder::build().

498  {
499  std::unordered_set<std::shared_ptr<const RelAlgNode>> copies;
500  auto sink = nodes.back();
501  for (auto node : nodes) {
502  auto aggregate = std::dynamic_pointer_cast<const RelAggregate>(node);
503  if (!aggregate || aggregate == sink ||
504  !(aggregate->getGroupByCount() == 1 && aggregate->getAggExprsCount() == 0)) {
505  continue;
506  }
507  auto project =
508  std::dynamic_pointer_cast<const RelProject>(aggregate->getAndOwnInput(0));
509  if (project && project->size() == aggregate->size() &&
510  project->getFields() == aggregate->getFields()) {
511  CHECK_EQ(size_t(0), copies.count(aggregate));
512  copies.insert(aggregate);
513  }
514  }
515  for (auto node : nodes) {
516  if (!node->inputCount()) {
517  continue;
518  }
519  auto last_source = node->getAndOwnInput(node->inputCount() - 1);
520  if (!copies.count(last_source)) {
521  continue;
522  }
523  auto aggregate = std::dynamic_pointer_cast<const RelAggregate>(last_source);
524  CHECK(aggregate);
525  if (!std::dynamic_pointer_cast<const RelJoin>(node) || aggregate->size() != 1) {
526  continue;
527  }
528  auto project =
529  std::dynamic_pointer_cast<const RelProject>(aggregate->getAndOwnInput(0));
530  CHECK(project);
531  CHECK_EQ(size_t(1), project->size());
532  if (!is_distinct(size_t(0), project.get())) {
533  continue;
534  }
535  auto new_source = project->getAndOwnInput(0);
536  if (std::dynamic_pointer_cast<const RelSort>(new_source) ||
537  std::dynamic_pointer_cast<const RelScan>(new_source)) {
538  node->replaceInput(last_source, new_source);
539  }
540  }
541  decltype(copies)().swap(copies);
542 
543  auto web = build_du_web(nodes);
544 
545  std::unordered_set<const RelProject*> projects;
546  std::unordered_set<const RelProject*> permutating_projects;
547  auto const visible_projs = get_visible_projects(nodes.back().get());
548  for (auto node_it = nodes.begin(); node_it != nodes.end(); node_it++) {
549  auto node = *node_it;
550  auto project = std::dynamic_pointer_cast<RelProject>(node);
551  auto next_node_it = std::next(node_it);
552  if (project && project->isSimple() &&
553  (!visible_projs.count(project.get()) || !project->isRenaming()) &&
554  is_identical_copy(project.get(), web, projects, permutating_projects) &&
556  project.get(), next_node_it == nodes.end() ? nullptr : next_node_it->get())) {
557  projects.insert(project.get());
558  }
559  }
560 
561  for (auto node : nodes) {
562  redirect_inputs_of(node, projects, permutating_projects, web);
563  }
564 
565  cleanup_dead_nodes(nodes);
566 }
bool is_identical_copy(const RelProject *project, const std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * >> &du_web, const std::unordered_set< const RelProject * > &projects_to_remove, std::unordered_set< const RelProject * > &permutating_projects)
std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * > > build_du_web(const std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
#define CHECK_EQ(x, y)
Definition: Logger.h:219
void redirect_inputs_of(std::shared_ptr< RelAlgNode > node, const std::unordered_set< const RelProject * > &projects, const std::unordered_set< const RelProject * > &permutating_projects, const std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * >> &du_web)
std::unordered_set< const RelProject * > get_visible_projects(const RelAlgNode *root)
bool project_separates_sort(const RelProject *project, const RelAlgNode *next_node)
void cleanup_dead_nodes(std::vector< std::shared_ptr< RelAlgNode >> &nodes)
#define CHECK(condition)
Definition: Logger.h:211
bool is_distinct(const size_t input_idx, const RelAlgNode *node)
DEVICE void swap(ARGS &&...args)
Definition: gpu_enabled.h:114

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void fold_filters ( std::vector< std::shared_ptr< RelAlgNode >> &  nodes)
noexcept

Definition at line 1468 of file RelAlgOptimizer.cpp.

References build_du_web(), CHECK, CHECK_EQ, anonymous_namespace{RelAlgOptimizer.cpp}::cleanup_dead_nodes(), logger::DEBUG2, logger::fast_logging_check(), kAND, kBOOLEAN, anonymous_namespace{RelAlgOptimizer.cpp}::replace_all_usages(), and VLOG.

Referenced by RelAlgDagBuilder::build().

1468  {
1469  std::unordered_map<const RelAlgNode*, std::shared_ptr<RelAlgNode>> deconst_mapping;
1470  for (auto node : nodes) {
1471  deconst_mapping.insert(std::make_pair(node.get(), node));
1472  }
1473 
1474  auto web = build_du_web(nodes);
1475  for (auto node_it = nodes.rbegin(); node_it != nodes.rend(); ++node_it) {
1476  auto& node = *node_it;
1477  if (auto filter = std::dynamic_pointer_cast<RelFilter>(node)) {
1478  CHECK_EQ(filter->inputCount(), size_t(1));
1479  auto src_filter = dynamic_cast<const RelFilter*>(filter->getInput(0));
1480  if (!src_filter) {
1481  continue;
1482  }
1483  auto siblings_it = web.find(src_filter);
1484  if (siblings_it == web.end() || siblings_it->second.size() != size_t(1)) {
1485  continue;
1486  }
1487  auto src_it = deconst_mapping.find(src_filter);
1488  CHECK(src_it != deconst_mapping.end());
1489  auto folded_filter = std::dynamic_pointer_cast<RelFilter>(src_it->second);
1490  CHECK(folded_filter);
1491  // TODO(miyu) : drop filter w/ only expression valued constant TRUE?
1492  if (auto rex_operator = dynamic_cast<const RexOperator*>(filter->getCondition())) {
1493  VLOG(1) << "Node ID=" << filter->getId() << " folded into "
1494  << "ID=" << folded_filter->getId();
1496  auto node_str = folded_filter->toString();
1497  constexpr size_t max_node_print_len{500};
1498  const size_t node_str_len = node_str.size();
1499  if (node_str_len > max_node_print_len) {
1500  node_str = node_str.substr(0, max_node_print_len) + "...";
1501  }
1502  VLOG(2) << "Folded Node (ID: " << folded_filter->getId()
1503  << ") contents: " << node_str;
1504  }
1505  std::vector<std::unique_ptr<const RexScalar>> operands;
1506  operands.emplace_back(folded_filter->getAndReleaseCondition());
1507  auto old_condition = dynamic_cast<const RexOperator*>(operands.back().get());
1508  CHECK(old_condition && old_condition->getType().get_type() == kBOOLEAN);
1509  RexInputRedirector redirector(folded_filter.get(), folded_filter->getInput(0));
1510  operands.push_back(redirector.visit(rex_operator));
1511  auto other_condition = dynamic_cast<const RexOperator*>(operands.back().get());
1512  CHECK(other_condition && other_condition->getType().get_type() == kBOOLEAN);
1513  const bool notnull = old_condition->getType().get_notnull() &&
1514  other_condition->getType().get_notnull();
1515  auto new_condition = std::unique_ptr<const RexScalar>(
1516  new RexOperator(kAND, operands, SQLTypeInfo(kBOOLEAN, notnull)));
1517  folded_filter->setCondition(new_condition);
1518  replace_all_usages(filter, folded_filter, deconst_mapping, web);
1519  deconst_mapping.erase(filter.get());
1520  web.erase(filter.get());
1521  web[filter->getInput(0)].erase(filter.get());
1522  node.reset();
1523  }
1524  }
1525  }
1526 
1527  if (!nodes.empty()) {
1528  auto sink = nodes.back();
1529  for (auto node_it = std::next(nodes.rbegin()); node_it != nodes.rend(); ++node_it) {
1530  if (sink) {
1531  break;
1532  }
1533  sink = *node_it;
1534  }
1535  CHECK(sink);
1536  cleanup_dead_nodes(nodes);
1537  }
1538 }
std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * > > build_du_web(const std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
#define CHECK_EQ(x, y)
Definition: Logger.h:219
void cleanup_dead_nodes(std::vector< std::shared_ptr< RelAlgNode >> &nodes)
bool fast_logging_check(Channel)
Definition: Logger.h:185
Definition: sqldefs.h:37
#define CHECK(condition)
Definition: Logger.h:211
void replace_all_usages(std::shared_ptr< const RelAlgNode > old_def_node, std::shared_ptr< const RelAlgNode > new_def_node, std::unordered_map< const RelAlgNode *, std::shared_ptr< RelAlgNode >> &deconst_mapping, std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * >> &du_web)
#define VLOG(n)
Definition: Logger.h:305

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void hoist_filter_cond_to_cross_join ( std::vector< std::shared_ptr< RelAlgNode >> &  nodes)
noexcept

Definition at line 1638 of file RelAlgOptimizer.cpp.

References build_du_web(), CHECK, find_hoistable_conditions(), RelFilter::getCondition(), RelAlgNode::getInput(), i, INNER, join(), kAND, kBOOLEAN, RelFilter::setCondition(), and RexVisitorBase< T >::visit().

Referenced by RelAlgDagBuilder::build().

1639  {
1640  std::unordered_set<const RelAlgNode*> visited;
1641  auto web = build_du_web(nodes);
1642  for (auto node : nodes) {
1643  if (visited.count(node.get())) {
1644  continue;
1645  }
1646  visited.insert(node.get());
1647  auto join = dynamic_cast<RelJoin*>(node.get());
1648  if (join && join->getJoinType() == JoinType::INNER) {
1649  // Only allow cross join for now.
1650  if (auto literal = dynamic_cast<const RexLiteral*>(join->getCondition())) {
1651  // Assume Calcite always generates an inner join on constant boolean true for
1652  // cross join.
1653  CHECK(literal->getType() == kBOOLEAN && literal->getVal<bool>());
1654  size_t first_col_idx = 0;
1655  const RelFilter* filter = nullptr;
1656  std::vector<const RelJoin*> join_seq{join};
1657  for (const RelJoin* curr_join = join; !filter;) {
1658  auto usrs_it = web.find(curr_join);
1659  CHECK(usrs_it != web.end());
1660  if (usrs_it->second.size() != size_t(1)) {
1661  break;
1662  }
1663  auto only_usr = *usrs_it->second.begin();
1664  if (auto usr_join = dynamic_cast<const RelJoin*>(only_usr)) {
1665  if (join == usr_join->getInput(1)) {
1666  const auto src1_offset = usr_join->getInput(0)->size();
1667  first_col_idx += src1_offset;
1668  }
1669  join_seq.push_back(usr_join);
1670  curr_join = usr_join;
1671  continue;
1672  }
1673 
1674  filter = dynamic_cast<const RelFilter*>(only_usr);
1675  break;
1676  }
1677  if (!filter) {
1678  visited.insert(join_seq.begin(), join_seq.end());
1679  continue;
1680  }
1681  const auto src_join = dynamic_cast<const RelJoin*>(filter->getInput(0));
1682  CHECK(src_join);
1683  auto modified_filter = const_cast<RelFilter*>(filter);
1684 
1685  if (src_join == join) {
1686  std::unique_ptr<const RexScalar> filter_condition(
1687  modified_filter->getAndReleaseCondition());
1688  std::unique_ptr<const RexScalar> true_condition =
1689  boost::make_unique<RexLiteral>(true,
1690  kBOOLEAN,
1691  kBOOLEAN,
1692  unsigned(-2147483648),
1693  1,
1694  unsigned(-2147483648),
1695  1);
1696  modified_filter->setCondition(true_condition);
1697  join->setCondition(filter_condition);
1698  continue;
1699  }
1700  const auto src1_base = src_join->getInput(0)->size();
1701  auto source =
1702  first_col_idx < src1_base ? src_join->getInput(0) : src_join->getInput(1);
1703  first_col_idx =
1704  first_col_idx < src1_base ? first_col_idx : first_col_idx - src1_base;
1705  auto join_conditions =
1707  source,
1708  first_col_idx,
1709  first_col_idx + join->size() - 1);
1710  if (join_conditions.empty()) {
1711  continue;
1712  }
1713 
1714  JoinTargetRebaser rebaser(join, first_col_idx);
1715  if (join_conditions.size() == 1) {
1716  auto new_join_condition = rebaser.visit(*join_conditions.begin());
1717  join->setCondition(new_join_condition);
1718  } else {
1719  std::vector<std::unique_ptr<const RexScalar>> operands;
1720  bool notnull = true;
1721  for (size_t i = 0; i < join_conditions.size(); ++i) {
1722  operands.emplace_back(rebaser.visit(join_conditions[i]));
1723  auto old_subcond = dynamic_cast<const RexOperator*>(join_conditions[i]);
1724  CHECK(old_subcond && old_subcond->getType().get_type() == kBOOLEAN);
1725  notnull = notnull && old_subcond->getType().get_notnull();
1726  }
1727  auto new_join_condition = std::unique_ptr<const RexScalar>(
1728  new RexOperator(kAND, operands, SQLTypeInfo(kBOOLEAN, notnull)));
1729  join->setCondition(new_join_condition);
1730  }
1731 
1732  SubConditionRemover remover(join_conditions);
1733  auto new_filter_condition = remover.visit(filter->getCondition());
1734  modified_filter->setCondition(new_filter_condition);
1735  }
1736  }
1737  }
1738 }
std::vector< const RexScalar * > find_hoistable_conditions(const RexScalar *condition, const RelAlgNode *source, const size_t first_col_idx, const size_t last_col_idx)
std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * > > build_du_web(const std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
void setCondition(std::unique_ptr< const RexScalar > &condition)
const RexScalar * getCondition() const
std::string join(T const &container, std::string const &delim)
const RelAlgNode * getInput(const size_t idx) const
Definition: sqldefs.h:37
#define CHECK(condition)
Definition: Logger.h:211

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void simplify_sort ( std::vector< std::shared_ptr< RelAlgNode >> &  nodes)
noexcept

Definition at line 1743 of file RelAlgOptimizer.cpp.

References i, and RelAlgNode::replaceInput().

Referenced by RelAlgDagBuilder::build().

1743  {
1744  if (nodes.size() < 3) {
1745  return;
1746  }
1747  for (size_t i = 0; i <= nodes.size() - 3;) {
1748  auto first_sort = std::dynamic_pointer_cast<RelSort>(nodes[i]);
1749  const auto project = std::dynamic_pointer_cast<const RelProject>(nodes[i + 1]);
1750  auto second_sort = std::dynamic_pointer_cast<RelSort>(nodes[i + 2]);
1751  if (first_sort && second_sort && project && project->isIdentity() &&
1752  *first_sort == *second_sort) {
1753  second_sort->replaceInput(second_sort->getAndOwnInput(0),
1754  first_sort->getAndOwnInput(0));
1755  nodes[i].reset();
1756  nodes[i + 1].reset();
1757  i += 3;
1758  } else {
1759  ++i;
1760  }
1761  }
1762 
1763  std::vector<std::shared_ptr<RelAlgNode>> new_nodes;
1764  for (auto node : nodes) {
1765  if (!node) {
1766  continue;
1767  }
1768  new_nodes.push_back(node);
1769  }
1770  nodes.swap(new_nodes);
1771 }
virtual void replaceInput(std::shared_ptr< const RelAlgNode > old_input, std::shared_ptr< const RelAlgNode > input)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void sink_projected_boolean_expr_to_join ( std::vector< std::shared_ptr< RelAlgNode >> &  nodes)
noexcept

Definition at line 1320 of file RelAlgOptimizer.cpp.

References build_du_web(), CHECK, CHECK_EQ, i, join(), kBOOLEAN, and anonymous_namespace{RelAlgOptimizer.cpp}::mark_live_columns().

Referenced by RelAlgDagBuilder::build().

1321  {
1322  auto web = build_du_web(nodes);
1323  auto liveouts = mark_live_columns(nodes);
1324  for (auto node : nodes) {
1325  auto project = std::dynamic_pointer_cast<RelProject>(node);
1326  // TODO(miyu): relax RelScan limitation
1327  if (!project || project->isSimple() ||
1328  !dynamic_cast<const RelScan*>(project->getInput(0))) {
1329  continue;
1330  }
1331  auto usrs_it = web.find(project.get());
1332  CHECK(usrs_it != web.end());
1333  auto& usrs = usrs_it->second;
1334  if (usrs.size() != 1) {
1335  continue;
1336  }
1337  auto join = dynamic_cast<RelJoin*>(const_cast<RelAlgNode*>(*usrs.begin()));
1338  if (!join) {
1339  continue;
1340  }
1341  auto outs_it = liveouts.find(join);
1342  CHECK(outs_it != liveouts.end());
1343  std::unordered_map<size_t, size_t> in_to_out_index;
1344  std::unordered_set<size_t> boolean_expr_indicies;
1345  bool discarded = false;
1346  for (size_t i = 0; i < project->size(); ++i) {
1347  auto oper = dynamic_cast<const RexOperator*>(project->getProjectAt(i));
1348  if (oper && oper->getType().get_type() == kBOOLEAN) {
1349  boolean_expr_indicies.insert(i);
1350  } else {
1351  // TODO(miyu): relax?
1352  if (auto input = dynamic_cast<const RexInput*>(project->getProjectAt(i))) {
1353  in_to_out_index.insert(std::make_pair(input->getIndex(), i));
1354  } else {
1355  discarded = true;
1356  }
1357  }
1358  }
1359  if (discarded || boolean_expr_indicies.empty()) {
1360  continue;
1361  }
1362  const size_t index_base =
1363  join->getInput(0) == project.get() ? 0 : join->getInput(0)->size();
1364  for (auto i : boolean_expr_indicies) {
1365  auto join_idx = index_base + i;
1366  if (outs_it->second.count(join_idx)) {
1367  discarded = true;
1368  break;
1369  }
1370  }
1371  if (discarded) {
1372  continue;
1373  }
1374  RexInputCollector collector(project.get());
1375  std::vector<size_t> unloaded_input_indices;
1376  std::unordered_map<size_t, std::unique_ptr<const RexScalar>> in_idx_to_new_subcond;
1377  // Given all are dead right after join, safe to sink
1378  for (auto i : boolean_expr_indicies) {
1379  auto rex_ins = collector.visit(project->getProjectAt(i));
1380  for (auto& in : rex_ins) {
1381  CHECK_EQ(in.getSourceNode(), project->getInput(0));
1382  if (!in_to_out_index.count(in.getIndex())) {
1383  auto curr_out_index = project->size() + unloaded_input_indices.size();
1384  in_to_out_index.insert(std::make_pair(in.getIndex(), curr_out_index));
1385  unloaded_input_indices.push_back(in.getIndex());
1386  }
1387  RexInputSinker sinker(in_to_out_index, project.get());
1388  in_idx_to_new_subcond.insert(
1389  std::make_pair(i, sinker.visit(project->getProjectAt(i))));
1390  }
1391  }
1392  if (in_idx_to_new_subcond.empty()) {
1393  continue;
1394  }
1395  std::vector<std::unique_ptr<const RexScalar>> new_projections;
1396  for (size_t i = 0; i < project->size(); ++i) {
1397  if (boolean_expr_indicies.count(i)) {
1398  new_projections.push_back(boost::make_unique<RexInput>(project->getInput(0), 0));
1399  } else {
1400  auto rex_input = dynamic_cast<const RexInput*>(project->getProjectAt(i));
1401  CHECK(rex_input != nullptr);
1402  new_projections.push_back(rex_input->deepCopy());
1403  }
1404  }
1405  for (auto i : unloaded_input_indices) {
1406  new_projections.push_back(boost::make_unique<RexInput>(project->getInput(0), i));
1407  }
1408  project->setExpressions(new_projections);
1409 
1410  SubConditionReplacer replacer(in_idx_to_new_subcond);
1411  auto new_condition = replacer.visit(join->getCondition());
1412  join->setCondition(new_condition);
1413  }
1414 }
std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * > > build_du_web(const std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
#define CHECK_EQ(x, y)
Definition: Logger.h:219
std::string join(T const &container, std::string const &delim)
std::unordered_map< const RelAlgNode *, std::unordered_set< size_t > > mark_live_columns(std::vector< std::shared_ptr< RelAlgNode >> &nodes)
#define CHECK(condition)
Definition: Logger.h:211

+ Here is the call graph for this function:

+ Here is the caller graph for this function: