OmniSciDB  95562058bd
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
RelAlgOptimizer.cpp File Reference
#include "RelAlgOptimizer.h"
#include "Logger/Logger.h"
#include "RexVisitor.h"
#include "Visitors/RexSubQueryIdCollector.h"
#include <numeric>
#include <string>
#include <unordered_map>
+ Include dependency graph for RelAlgOptimizer.cpp:

Go to the source code of this file.

Classes

class  anonymous_namespace{RelAlgOptimizer.cpp}::RexProjectInputRedirector
 
class  anonymous_namespace{RelAlgOptimizer.cpp}::RexRebindInputsVisitor
 
class  anonymous_namespace{RelAlgOptimizer.cpp}::RexInputCollector
 
class  anonymous_namespace{RelAlgOptimizer.cpp}::AvailabilityChecker
 
class  anonymous_namespace{RelAlgOptimizer.cpp}::RexInputRenumberVisitor
 
class  anonymous_namespace{RelAlgOptimizer.cpp}::RexInputSinker
 
class  anonymous_namespace{RelAlgOptimizer.cpp}::SubConditionReplacer
 
class  anonymous_namespace{RelAlgOptimizer.cpp}::RexInputRedirector
 
class  JoinTargetRebaser
 
class  SubConditionRemover
 

Namespaces

 anonymous_namespace{RelAlgOptimizer.cpp}
 

Functions

size_t anonymous_namespace{RelAlgOptimizer.cpp}::get_actual_source_size (const RelProject *curr_project, const std::unordered_set< const RelProject * > &projects_to_remove)
 
bool anonymous_namespace{RelAlgOptimizer.cpp}::safe_to_redirect (const RelProject *project, const std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * >> &du_web)
 
bool anonymous_namespace{RelAlgOptimizer.cpp}::is_identical_copy (const RelProject *project, const std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * >> &du_web, const std::unordered_set< const RelProject * > &projects_to_remove, std::unordered_set< const RelProject * > &permutating_projects)
 
void anonymous_namespace{RelAlgOptimizer.cpp}::propagate_rex_input_renumber (const RelFilter *excluded_root, const std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * >> &du_web)
 
void anonymous_namespace{RelAlgOptimizer.cpp}::redirect_inputs_of (std::shared_ptr< RelAlgNode > node, const std::unordered_set< const RelProject * > &projects, const std::unordered_set< const RelProject * > &permutating_projects, const std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * >> &du_web)
 
void anonymous_namespace{RelAlgOptimizer.cpp}::cleanup_dead_nodes (std::vector< std::shared_ptr< RelAlgNode >> &nodes)
 
std::unordered_set< const
RelProject * > 
anonymous_namespace{RelAlgOptimizer.cpp}::get_visible_projects (const RelAlgNode *root)
 
bool anonymous_namespace{RelAlgOptimizer.cpp}::is_distinct (const size_t input_idx, const RelAlgNode *node)
 
std::unordered_map< const
RelAlgNode
*, std::unordered_set< const
RelAlgNode * > > 
build_du_web (const std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
 
bool project_separates_sort (const RelProject *project, const RelAlgNode *next_node)
 
void eliminate_identical_copy (std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
 
size_t anonymous_namespace{RelAlgOptimizer.cpp}::pick_always_live_col_idx (const RelAlgNode *node)
 
std::vector
< std::unordered_set< size_t > > 
anonymous_namespace{RelAlgOptimizer.cpp}::get_live_ins (const RelAlgNode *node, const std::unordered_map< const RelAlgNode *, std::unordered_set< size_t >> &live_outs)
 
bool anonymous_namespace{RelAlgOptimizer.cpp}::any_dead_col_in (const RelAlgNode *node, const std::unordered_set< size_t > &live_outs)
 
bool anonymous_namespace{RelAlgOptimizer.cpp}::does_redef_cols (const RelAlgNode *node)
 
void anonymous_namespace{RelAlgOptimizer.cpp}::add_new_indices_for (const RelAlgNode *node, std::unordered_map< const RelAlgNode *, std::unordered_map< size_t, size_t >> &new_liveouts, const std::unordered_set< size_t > &old_liveouts, const std::unordered_set< const RelAlgNode * > &intact_nodes, const std::unordered_map< const RelAlgNode *, size_t > &orig_node_sizes)
 
std::vector< std::unique_ptr
< const RexAgg > > 
anonymous_namespace{RelAlgOptimizer.cpp}::renumber_rex_aggs (std::vector< std::unique_ptr< const RexAgg >> &agg_exprs, const std::unordered_map< size_t, size_t > &new_numbering)
 
SortField anonymous_namespace{RelAlgOptimizer.cpp}::renumber_sort_field (const SortField &old_field, const std::unordered_map< size_t, size_t > &new_numbering)
 
std::unordered_map< const
RelAlgNode
*, std::unordered_set< size_t > > 
anonymous_namespace{RelAlgOptimizer.cpp}::mark_live_columns (std::vector< std::shared_ptr< RelAlgNode >> &nodes)
 
std::string anonymous_namespace{RelAlgOptimizer.cpp}::get_field_name (const RelAlgNode *node, size_t index)
 
void anonymous_namespace{RelAlgOptimizer.cpp}::try_insert_coalesceable_proj (std::vector< std::shared_ptr< RelAlgNode >> &nodes, std::unordered_map< const RelAlgNode *, std::unordered_set< size_t >> &liveouts, std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * >> &du_web)
 
std::pair< std::unordered_map
< const RelAlgNode
*, std::unordered_map< size_t,
size_t > >, std::vector< const
RelAlgNode * > > 
anonymous_namespace{RelAlgOptimizer.cpp}::sweep_dead_columns (const std::unordered_map< const RelAlgNode *, std::unordered_set< size_t >> &live_outs, const std::vector< std::shared_ptr< RelAlgNode >> &nodes, const std::unordered_set< const RelAlgNode * > &intact_nodes, const std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * >> &du_web, const std::unordered_map< const RelAlgNode *, size_t > &orig_node_sizes)
 
void anonymous_namespace{RelAlgOptimizer.cpp}::propagate_input_renumbering (std::unordered_map< const RelAlgNode *, std::unordered_map< size_t, size_t >> &liveout_renumbering, const std::vector< const RelAlgNode * > &ready_nodes, const std::unordered_map< const RelAlgNode *, std::unordered_set< size_t >> &old_liveouts, const std::unordered_set< const RelAlgNode * > &intact_nodes, const std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * >> &du_web, const std::unordered_map< const RelAlgNode *, size_t > &orig_node_sizes)
 
void eliminate_dead_columns (std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
 
void eliminate_dead_subqueries (std::vector< std::shared_ptr< RexSubQuery >> &subqueries, RelAlgNode const *root)
 
void sink_projected_boolean_expr_to_join (std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
 
void anonymous_namespace{RelAlgOptimizer.cpp}::replace_all_usages (std::shared_ptr< const RelAlgNode > old_def_node, std::shared_ptr< const RelAlgNode > new_def_node, std::unordered_map< const RelAlgNode *, std::shared_ptr< RelAlgNode >> &deconst_mapping, std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * >> &du_web)
 
void fold_filters (std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
 
std::vector< const RexScalar * > find_hoistable_conditions (const RexScalar *condition, const RelAlgNode *source, const size_t first_col_idx, const size_t last_col_idx)
 
void hoist_filter_cond_to_cross_join (std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
 
void simplify_sort (std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
 

Function Documentation

std::unordered_map<const RelAlgNode*, std::unordered_set<const RelAlgNode*> > build_du_web ( const std::vector< std::shared_ptr< RelAlgNode >> &  nodes)
noexcept

Definition at line 408 of file RelAlgOptimizer.cpp.

References CHECK, and src.

Referenced by eliminate_dead_columns(), eliminate_identical_copy(), fold_filters(), hoist_filter_cond_to_cross_join(), and sink_projected_boolean_expr_to_join().

409  {
410  std::unordered_map<const RelAlgNode*, std::unordered_set<const RelAlgNode*>> web;
411  std::unordered_set<const RelAlgNode*> visited;
412  std::vector<const RelAlgNode*> work_set;
413  for (auto node : nodes) {
414  if (std::dynamic_pointer_cast<RelScan>(node) ||
415  std::dynamic_pointer_cast<RelModify>(node) || visited.count(node.get())) {
416  continue;
417  }
418  work_set.push_back(node.get());
419  while (!work_set.empty()) {
420  auto walker = work_set.back();
421  work_set.pop_back();
422  if (visited.count(walker)) {
423  continue;
424  }
425  CHECK(!web.count(walker));
426  auto it_ok =
427  web.insert(std::make_pair(walker, std::unordered_set<const RelAlgNode*>{}));
428  CHECK(it_ok.second);
429  visited.insert(walker);
430  CHECK(dynamic_cast<const RelJoin*>(walker) ||
431  dynamic_cast<const RelProject*>(walker) ||
432  dynamic_cast<const RelAggregate*>(walker) ||
433  dynamic_cast<const RelFilter*>(walker) ||
434  dynamic_cast<const RelSort*>(walker) ||
435  dynamic_cast<const RelLeftDeepInnerJoin*>(walker) ||
436  dynamic_cast<const RelLogicalValues*>(walker) ||
437  dynamic_cast<const RelTableFunction*>(walker) ||
438  dynamic_cast<const RelLogicalUnion*>(walker));
439  for (size_t i = 0; i < walker->inputCount(); ++i) {
440  auto src = walker->getInput(i);
441  if (dynamic_cast<const RelScan*>(src) || dynamic_cast<const RelModify*>(src)) {
442  continue;
443  }
444  if (web.empty() || !web.count(src)) {
445  web.insert(std::make_pair(src, std::unordered_set<const RelAlgNode*>{}));
446  }
447  web[src].insert(walker);
448  work_set.push_back(src);
449  }
450  }
451  }
452  return web;
453 }
int64_t * src
#define CHECK(condition)
Definition: Logger.h:197

+ Here is the caller graph for this function:

void eliminate_dead_columns ( std::vector< std::shared_ptr< RelAlgNode >> &  nodes)
noexcept

Definition at line 1175 of file RelAlgOptimizer.cpp.

References anonymous_namespace{RelAlgOptimizer.cpp}::any_dead_col_in(), build_du_web(), CHECK, anonymous_namespace{RelAlgOptimizer.cpp}::does_redef_cols(), LOG, anonymous_namespace{RelAlgOptimizer.cpp}::mark_live_columns(), anonymous_namespace{RelAlgOptimizer.cpp}::propagate_input_renumbering(), anonymous_namespace{RelAlgOptimizer.cpp}::sweep_dead_columns(), anonymous_namespace{RelAlgOptimizer.cpp}::try_insert_coalesceable_proj(), and logger::WARNING.

Referenced by RelAlgDagBuilder::build().

1175  {
1176  if (nodes.empty()) {
1177  return;
1178  }
1179  auto root = nodes.back().get();
1180  if (!root) {
1181  return;
1182  }
1183  CHECK(!dynamic_cast<const RelScan*>(root) && !dynamic_cast<const RelJoin*>(root));
1184  // Mark
1185  auto old_liveouts = mark_live_columns(nodes);
1186  std::unordered_set<const RelAlgNode*> intact_nodes;
1187  bool has_dead_cols = false;
1188  for (auto live_pair : old_liveouts) {
1189  auto node = live_pair.first;
1190  const auto& outs = live_pair.second;
1191  if (outs.empty()) {
1192  LOG(WARNING) << "RA node with no used column: " << node->toString();
1193  // Ignore empty live_out due to some invalid node
1194  intact_nodes.insert(node);
1195  }
1196  if (any_dead_col_in(node, outs)) {
1197  has_dead_cols = true;
1198  } else {
1199  intact_nodes.insert(node);
1200  }
1201  }
1202  if (!has_dead_cols) {
1203  return;
1204  }
1205  auto web = build_du_web(nodes);
1206  try_insert_coalesceable_proj(nodes, old_liveouts, web);
1207 
1208  for (auto node : nodes) {
1209  if (intact_nodes.count(node.get()) || does_redef_cols(node.get())) {
1210  continue;
1211  }
1212  bool intact = true;
1213  for (size_t i = 0; i < node->inputCount(); ++i) {
1214  auto source = node->getInput(i);
1215  if (!dynamic_cast<const RelScan*>(source) && !intact_nodes.count(source)) {
1216  intact = false;
1217  break;
1218  }
1219  }
1220  if (intact) {
1221  intact_nodes.insert(node.get());
1222  }
1223  }
1224 
1225  std::unordered_map<const RelAlgNode*, size_t> orig_node_sizes;
1226  for (auto node : nodes) {
1227  orig_node_sizes.insert(std::make_pair(node.get(), node->size()));
1228  }
1229  // Sweep
1230  std::unordered_map<const RelAlgNode*, std::unordered_map<size_t, size_t>>
1231  liveout_renumbering;
1232  std::vector<const RelAlgNode*> ready_nodes;
1233  std::tie(liveout_renumbering, ready_nodes) =
1234  sweep_dead_columns(old_liveouts, nodes, intact_nodes, web, orig_node_sizes);
1235  // Propagate
1237  liveout_renumbering, ready_nodes, old_liveouts, intact_nodes, web, orig_node_sizes);
1238 }
std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * > > build_du_web(const std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
#define LOG(tag)
Definition: Logger.h:188
bool does_redef_cols(const RelAlgNode *node)
void propagate_input_renumbering(std::unordered_map< const RelAlgNode *, std::unordered_map< size_t, size_t >> &liveout_renumbering, const std::vector< const RelAlgNode * > &ready_nodes, const std::unordered_map< const RelAlgNode *, std::unordered_set< size_t >> &old_liveouts, const std::unordered_set< const RelAlgNode * > &intact_nodes, const std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * >> &du_web, const std::unordered_map< const RelAlgNode *, size_t > &orig_node_sizes)
std::unordered_map< const RelAlgNode *, std::unordered_set< size_t > > mark_live_columns(std::vector< std::shared_ptr< RelAlgNode >> &nodes)
bool any_dead_col_in(const RelAlgNode *node, const std::unordered_set< size_t > &live_outs)
void try_insert_coalesceable_proj(std::vector< std::shared_ptr< RelAlgNode >> &nodes, std::unordered_map< const RelAlgNode *, std::unordered_set< size_t >> &liveouts, std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * >> &du_web)
#define CHECK(condition)
Definition: Logger.h:197
std::pair< std::unordered_map< const RelAlgNode *, std::unordered_map< size_t, size_t > >, std::vector< const RelAlgNode * > > sweep_dead_columns(const std::unordered_map< const RelAlgNode *, std::unordered_set< size_t >> &live_outs, const std::vector< std::shared_ptr< RelAlgNode >> &nodes, const std::unordered_set< const RelAlgNode * > &intact_nodes, const std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * >> &du_web, const std::unordered_map< const RelAlgNode *, size_t > &orig_node_sizes)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void eliminate_dead_subqueries ( std::vector< std::shared_ptr< RexSubQuery >> &  subqueries,
RelAlgNode const *  root 
)

Definition at line 1240 of file RelAlgOptimizer.cpp.

References RexSubQueryIdCollector::getLiveRexSubQueryIds(), and VLOG.

Referenced by RelAlgDagBuilder::build().

1241  {
1242  if (!subqueries.empty()) {
1243  auto live_ids = RexSubQueryIdCollector::getLiveRexSubQueryIds(root);
1244  auto sort_live_ids_first = [&live_ids](auto& a, auto& b) {
1245  return live_ids.count(a->getId()) && !live_ids.count(b->getId());
1246  };
1247  std::stable_sort(subqueries.begin(), subqueries.end(), sort_live_ids_first);
1248  size_t n_dead_subqueries;
1249  if (live_ids.count(subqueries.front()->getId())) {
1250  auto first_dead_itr = std::upper_bound(subqueries.cbegin(),
1251  subqueries.cend(),
1252  subqueries.front(),
1253  sort_live_ids_first);
1254  n_dead_subqueries = subqueries.cend() - first_dead_itr;
1255  } else {
1256  n_dead_subqueries = subqueries.size();
1257  }
1258  if (n_dead_subqueries) {
1259  VLOG(1) << "Eliminating " << n_dead_subqueries
1260  << (n_dead_subqueries == 1 ? " subquery." : " subqueries.");
1261  subqueries.resize(subqueries.size() - n_dead_subqueries);
1262  subqueries.shrink_to_fit();
1263  }
1264  }
1265 }
static Ids getLiveRexSubQueryIds(RelAlgNode const *)
#define VLOG(n)
Definition: Logger.h:291

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void eliminate_identical_copy ( std::vector< std::shared_ptr< RelAlgNode >> &  nodes)
noexcept

Definition at line 486 of file RelAlgOptimizer.cpp.

References build_du_web(), CHECK, CHECK_EQ, anonymous_namespace{RelAlgOptimizer.cpp}::cleanup_dead_nodes(), anonymous_namespace{RelAlgOptimizer.cpp}::get_visible_projects(), anonymous_namespace{RelAlgOptimizer.cpp}::is_distinct(), anonymous_namespace{RelAlgOptimizer.cpp}::is_identical_copy(), project_separates_sort(), and anonymous_namespace{RelAlgOptimizer.cpp}::redirect_inputs_of().

Referenced by RelAlgDagBuilder::build().

486  {
487  std::unordered_set<std::shared_ptr<const RelAlgNode>> copies;
488  auto sink = nodes.back();
489  for (auto node : nodes) {
490  auto aggregate = std::dynamic_pointer_cast<const RelAggregate>(node);
491  if (!aggregate || aggregate == sink ||
492  !(aggregate->getGroupByCount() == 1 && aggregate->getAggExprsCount() == 0)) {
493  continue;
494  }
495  auto project =
496  std::dynamic_pointer_cast<const RelProject>(aggregate->getAndOwnInput(0));
497  if (project && project->size() == aggregate->size() &&
498  project->getFields() == aggregate->getFields()) {
499  CHECK_EQ(size_t(0), copies.count(aggregate));
500  copies.insert(aggregate);
501  }
502  }
503  for (auto node : nodes) {
504  if (!node->inputCount()) {
505  continue;
506  }
507  auto last_source = node->getAndOwnInput(node->inputCount() - 1);
508  if (!copies.count(last_source)) {
509  continue;
510  }
511  auto aggregate = std::dynamic_pointer_cast<const RelAggregate>(last_source);
512  CHECK(aggregate);
513  if (!std::dynamic_pointer_cast<const RelJoin>(node) || aggregate->size() != 1) {
514  continue;
515  }
516  auto project =
517  std::dynamic_pointer_cast<const RelProject>(aggregate->getAndOwnInput(0));
518  CHECK(project);
519  CHECK_EQ(size_t(1), project->size());
520  if (!is_distinct(size_t(0), project.get())) {
521  continue;
522  }
523  auto new_source = project->getAndOwnInput(0);
524  if (std::dynamic_pointer_cast<const RelSort>(new_source) ||
525  std::dynamic_pointer_cast<const RelScan>(new_source)) {
526  node->replaceInput(last_source, new_source);
527  }
528  }
529  decltype(copies)().swap(copies);
530 
531  auto web = build_du_web(nodes);
532 
533  std::unordered_set<const RelProject*> projects;
534  std::unordered_set<const RelProject*> permutating_projects;
535  auto const visible_projs = get_visible_projects(nodes.back().get());
536  for (auto node_it = nodes.begin(); node_it != nodes.end(); node_it++) {
537  auto node = *node_it;
538  auto project = std::dynamic_pointer_cast<RelProject>(node);
539  auto next_node_it = std::next(node_it);
540  if (project && project->isSimple() &&
541  (!visible_projs.count(project.get()) || !project->isRenaming()) &&
542  is_identical_copy(project.get(), web, projects, permutating_projects) &&
544  project.get(), next_node_it == nodes.end() ? nullptr : next_node_it->get())) {
545  projects.insert(project.get());
546  }
547  }
548 
549  for (auto node : nodes) {
550  redirect_inputs_of(node, projects, permutating_projects, web);
551  }
552 
553  cleanup_dead_nodes(nodes);
554 }
bool is_identical_copy(const RelProject *project, const std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * >> &du_web, const std::unordered_set< const RelProject * > &projects_to_remove, std::unordered_set< const RelProject * > &permutating_projects)
std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * > > build_du_web(const std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
#define CHECK_EQ(x, y)
Definition: Logger.h:205
void redirect_inputs_of(std::shared_ptr< RelAlgNode > node, const std::unordered_set< const RelProject * > &projects, const std::unordered_set< const RelProject * > &permutating_projects, const std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * >> &du_web)
std::unordered_set< const RelProject * > get_visible_projects(const RelAlgNode *root)
bool project_separates_sort(const RelProject *project, const RelAlgNode *next_node)
void cleanup_dead_nodes(std::vector< std::shared_ptr< RelAlgNode >> &nodes)
#define CHECK(condition)
Definition: Logger.h:197
bool is_distinct(const size_t input_idx, const RelAlgNode *node)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::vector<const RexScalar*> find_hoistable_conditions ( const RexScalar condition,
const RelAlgNode source,
const size_t  first_col_idx,
const size_t  last_col_idx 
)

Definition at line 1517 of file RelAlgOptimizer.cpp.

References RexAbstractInput::getIndex(), kAND, and kEQ.

Referenced by hoist_filter_cond_to_cross_join().

1520  {
1521  if (auto rex_op = dynamic_cast<const RexOperator*>(condition)) {
1522  switch (rex_op->getOperator()) {
1523  case kAND: {
1524  std::vector<const RexScalar*> subconditions;
1525  size_t complete_subcond_count = 0;
1526  for (size_t i = 0; i < rex_op->size(); ++i) {
1527  auto conds = find_hoistable_conditions(
1528  rex_op->getOperand(i), source, first_col_idx, last_col_idx);
1529  if (conds.size() == size_t(1)) {
1530  ++complete_subcond_count;
1531  }
1532  subconditions.insert(subconditions.end(), conds.begin(), conds.end());
1533  }
1534  if (complete_subcond_count == rex_op->size()) {
1535  return {rex_op};
1536  } else {
1537  return {subconditions};
1538  }
1539  break;
1540  }
1541  case kEQ: {
1542  const auto lhs_conds = find_hoistable_conditions(
1543  rex_op->getOperand(0), source, first_col_idx, last_col_idx);
1544  const auto rhs_conds = find_hoistable_conditions(
1545  rex_op->getOperand(1), source, first_col_idx, last_col_idx);
1546  const auto lhs_in = lhs_conds.size() == 1
1547  ? dynamic_cast<const RexInput*>(*lhs_conds.begin())
1548  : nullptr;
1549  const auto rhs_in = rhs_conds.size() == 1
1550  ? dynamic_cast<const RexInput*>(*rhs_conds.begin())
1551  : nullptr;
1552  if (lhs_in && rhs_in) {
1553  return {rex_op};
1554  }
1555  return {};
1556  break;
1557  }
1558  default:
1559  break;
1560  }
1561  return {};
1562  }
1563  if (auto rex_in = dynamic_cast<const RexInput*>(condition)) {
1564  if (rex_in->getSourceNode() == source) {
1565  const auto col_idx = rex_in->getIndex();
1566  return {col_idx >= first_col_idx && col_idx <= last_col_idx ? condition : nullptr};
1567  }
1568  return {};
1569  }
1570  return {};
1571 }
std::vector< const RexScalar * > find_hoistable_conditions(const RexScalar *condition, const RelAlgNode *source, const size_t first_col_idx, const size_t last_col_idx)
Definition: sqldefs.h:30
unsigned getIndex() const
Definition: sqldefs.h:37

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void fold_filters ( std::vector< std::shared_ptr< RelAlgNode >> &  nodes)
noexcept

Definition at line 1455 of file RelAlgOptimizer.cpp.

References build_du_web(), CHECK, CHECK_EQ, anonymous_namespace{RelAlgOptimizer.cpp}::cleanup_dead_nodes(), logger::INFO, kAND, kBOOLEAN, LOG, and anonymous_namespace{RelAlgOptimizer.cpp}::replace_all_usages().

Referenced by RelAlgDagBuilder::build().

1455  {
1456  std::unordered_map<const RelAlgNode*, std::shared_ptr<RelAlgNode>> deconst_mapping;
1457  for (auto node : nodes) {
1458  deconst_mapping.insert(std::make_pair(node.get(), node));
1459  }
1460 
1461  auto web = build_du_web(nodes);
1462  for (auto node_it = nodes.rbegin(); node_it != nodes.rend(); ++node_it) {
1463  auto& node = *node_it;
1464  if (auto filter = std::dynamic_pointer_cast<RelFilter>(node)) {
1465  CHECK_EQ(filter->inputCount(), size_t(1));
1466  auto src_filter = dynamic_cast<const RelFilter*>(filter->getInput(0));
1467  if (!src_filter) {
1468  continue;
1469  }
1470  auto siblings_it = web.find(src_filter);
1471  if (siblings_it == web.end() || siblings_it->second.size() != size_t(1)) {
1472  continue;
1473  }
1474  auto src_it = deconst_mapping.find(src_filter);
1475  CHECK(src_it != deconst_mapping.end());
1476  auto folded_filter = std::dynamic_pointer_cast<RelFilter>(src_it->second);
1477  CHECK(folded_filter);
1478  // TODO(miyu) : drop filter w/ only expression valued constant TRUE?
1479  if (auto rex_operator = dynamic_cast<const RexOperator*>(filter->getCondition())) {
1480  LOG(INFO) << "ID=" << filter->getId() << " " << filter->toString()
1481  << " folded into "
1482  << "ID=" << folded_filter->getId() << " " << folded_filter->toString()
1483  << std::endl;
1484  std::vector<std::unique_ptr<const RexScalar>> operands;
1485  operands.emplace_back(folded_filter->getAndReleaseCondition());
1486  auto old_condition = dynamic_cast<const RexOperator*>(operands.back().get());
1487  CHECK(old_condition && old_condition->getType().get_type() == kBOOLEAN);
1488  RexInputRedirector redirector(folded_filter.get(), folded_filter->getInput(0));
1489  operands.push_back(redirector.visit(rex_operator));
1490  auto other_condition = dynamic_cast<const RexOperator*>(operands.back().get());
1491  CHECK(other_condition && other_condition->getType().get_type() == kBOOLEAN);
1492  const bool notnull = old_condition->getType().get_notnull() &&
1493  other_condition->getType().get_notnull();
1494  auto new_condition = std::unique_ptr<const RexScalar>(
1495  new RexOperator(kAND, operands, SQLTypeInfo(kBOOLEAN, notnull)));
1496  folded_filter->setCondition(new_condition);
1497  replace_all_usages(filter, folded_filter, deconst_mapping, web);
1498  deconst_mapping.erase(filter.get());
1499  web.erase(filter.get());
1500  web[filter->getInput(0)].erase(filter.get());
1501  node.reset();
1502  }
1503  }
1504  }
1505 
1506  if (!nodes.empty()) {
1507  auto sink = nodes.back();
1508  for (auto node_it = std::next(nodes.rend()); !sink && node_it != nodes.rbegin();
1509  ++node_it) {
1510  sink = *node_it;
1511  }
1512  CHECK(sink);
1513  cleanup_dead_nodes(nodes);
1514  }
1515 }
std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * > > build_du_web(const std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
#define CHECK_EQ(x, y)
Definition: Logger.h:205
#define LOG(tag)
Definition: Logger.h:188
void cleanup_dead_nodes(std::vector< std::shared_ptr< RelAlgNode >> &nodes)
Definition: sqldefs.h:37
#define CHECK(condition)
Definition: Logger.h:197
void replace_all_usages(std::shared_ptr< const RelAlgNode > old_def_node, std::shared_ptr< const RelAlgNode > new_def_node, std::unordered_map< const RelAlgNode *, std::shared_ptr< RelAlgNode >> &deconst_mapping, std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * >> &du_web)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void hoist_filter_cond_to_cross_join ( std::vector< std::shared_ptr< RelAlgNode >> &  nodes)
noexcept

Definition at line 1615 of file RelAlgOptimizer.cpp.

References build_du_web(), CHECK, find_hoistable_conditions(), RelFilter::getCondition(), RelAlgNode::getInput(), INNER, join(), kAND, kBOOLEAN, RelFilter::setCondition(), and RexVisitorBase< T >::visit().

Referenced by RelAlgDagBuilder::build().

1616  {
1617  std::unordered_set<const RelAlgNode*> visited;
1618  auto web = build_du_web(nodes);
1619  for (auto node : nodes) {
1620  if (visited.count(node.get())) {
1621  continue;
1622  }
1623  visited.insert(node.get());
1624  auto join = dynamic_cast<RelJoin*>(node.get());
1625  if (join && join->getJoinType() == JoinType::INNER) {
1626  // Only allow cross join for now.
1627  if (auto literal = dynamic_cast<const RexLiteral*>(join->getCondition())) {
1628  // Assume Calcite always generates an inner join on constant boolean true for
1629  // cross join.
1630  CHECK(literal->getType() == kBOOLEAN && literal->getVal<bool>());
1631  size_t first_col_idx = 0;
1632  const RelFilter* filter = nullptr;
1633  std::vector<const RelJoin*> join_seq{join};
1634  for (const RelJoin* curr_join = join; !filter;) {
1635  auto usrs_it = web.find(curr_join);
1636  CHECK(usrs_it != web.end());
1637  if (usrs_it->second.size() != size_t(1)) {
1638  break;
1639  }
1640  auto only_usr = *usrs_it->second.begin();
1641  if (auto usr_join = dynamic_cast<const RelJoin*>(only_usr)) {
1642  if (join == usr_join->getInput(1)) {
1643  const auto src1_offset = usr_join->getInput(0)->size();
1644  first_col_idx += src1_offset;
1645  }
1646  join_seq.push_back(usr_join);
1647  curr_join = usr_join;
1648  continue;
1649  }
1650 
1651  filter = dynamic_cast<const RelFilter*>(only_usr);
1652  break;
1653  }
1654  if (!filter) {
1655  visited.insert(join_seq.begin(), join_seq.end());
1656  continue;
1657  }
1658  const auto src_join = dynamic_cast<const RelJoin*>(filter->getInput(0));
1659  CHECK(src_join);
1660  auto modified_filter = const_cast<RelFilter*>(filter);
1661 
1662  if (src_join == join) {
1663  std::unique_ptr<const RexScalar> filter_condition(
1664  modified_filter->getAndReleaseCondition());
1665  std::unique_ptr<const RexScalar> true_condition =
1666  boost::make_unique<RexLiteral>(true,
1667  kBOOLEAN,
1668  kBOOLEAN,
1669  unsigned(-2147483648),
1670  1,
1671  unsigned(-2147483648),
1672  1);
1673  modified_filter->setCondition(true_condition);
1674  join->setCondition(filter_condition);
1675  continue;
1676  }
1677  const auto src1_base = src_join->getInput(0)->size();
1678  auto source =
1679  first_col_idx < src1_base ? src_join->getInput(0) : src_join->getInput(1);
1680  first_col_idx =
1681  first_col_idx < src1_base ? first_col_idx : first_col_idx - src1_base;
1682  auto join_conditions =
1684  source,
1685  first_col_idx,
1686  first_col_idx + join->size() - 1);
1687  if (join_conditions.empty()) {
1688  continue;
1689  }
1690 
1691  JoinTargetRebaser rebaser(join, first_col_idx);
1692  if (join_conditions.size() == 1) {
1693  auto new_join_condition = rebaser.visit(*join_conditions.begin());
1694  join->setCondition(new_join_condition);
1695  } else {
1696  std::vector<std::unique_ptr<const RexScalar>> operands;
1697  bool notnull = true;
1698  for (size_t i = 0; i < join_conditions.size(); ++i) {
1699  operands.emplace_back(rebaser.visit(join_conditions[i]));
1700  auto old_subcond = dynamic_cast<const RexOperator*>(join_conditions[i]);
1701  CHECK(old_subcond && old_subcond->getType().get_type() == kBOOLEAN);
1702  notnull = notnull && old_subcond->getType().get_notnull();
1703  }
1704  auto new_join_condition = std::unique_ptr<const RexScalar>(
1705  new RexOperator(kAND, operands, SQLTypeInfo(kBOOLEAN, notnull)));
1706  join->setCondition(new_join_condition);
1707  }
1708 
1709  SubConditionRemover remover(join_conditions);
1710  auto new_filter_condition = remover.visit(filter->getCondition());
1711  modified_filter->setCondition(new_filter_condition);
1712  }
1713  }
1714  }
1715 }
std::vector< const RexScalar * > find_hoistable_conditions(const RexScalar *condition, const RelAlgNode *source, const size_t first_col_idx, const size_t last_col_idx)
std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * > > build_du_web(const std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
void setCondition(std::unique_ptr< const RexScalar > &condition)
const RexScalar * getCondition() const
std::string join(T const &container, std::string const &delim)
const RelAlgNode * getInput(const size_t idx) const
Definition: sqldefs.h:37
#define CHECK(condition)
Definition: Logger.h:197

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool project_separates_sort ( const RelProject project,
const RelAlgNode next_node 
)

Return true if the input project separates two sort nodes, i.e. Sort -> Project -> Sort. This pattern often occurs in machine generated SQL, e.g. SELECT * FROM (SELECT * FROM t LIMIT 10) t0 LIMIT 1; Use this function to prevent optimizing out the intermediate project, as the project is required to ensure the first sort runs to completion prior to the second sort. Back to back sort nodes are not executable and will throw an error.

Definition at line 463 of file RelAlgOptimizer.cpp.

References CHECK, RelAlgNode::getInput(), and RelAlgNode::inputCount().

Referenced by eliminate_identical_copy().

463  {
464  CHECK(project);
465  if (!next_node) {
466  return false;
467  }
468 
469  auto sort = dynamic_cast<const RelSort*>(next_node);
470  if (!sort) {
471  return false;
472  }
473  if (!(project->inputCount() == 1)) {
474  return false;
475  }
476 
477  if (dynamic_cast<const RelSort*>(project->getInput(0))) {
478  return true;
479  }
480  return false;
481 }
const RelAlgNode * getInput(const size_t idx) const
#define CHECK(condition)
Definition: Logger.h:197
const size_t inputCount() const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void simplify_sort ( std::vector< std::shared_ptr< RelAlgNode >> &  nodes)
noexcept

Definition at line 1720 of file RelAlgOptimizer.cpp.

References RelAlgNode::replaceInput().

Referenced by RelAlgDagBuilder::build().

1720  {
1721  if (nodes.size() < 3) {
1722  return;
1723  }
1724  for (size_t i = 0; i <= nodes.size() - 3;) {
1725  auto first_sort = std::dynamic_pointer_cast<RelSort>(nodes[i]);
1726  const auto project = std::dynamic_pointer_cast<const RelProject>(nodes[i + 1]);
1727  auto second_sort = std::dynamic_pointer_cast<RelSort>(nodes[i + 2]);
1728  if (first_sort && second_sort && project && project->isIdentity() &&
1729  *first_sort == *second_sort) {
1730  second_sort->replaceInput(second_sort->getAndOwnInput(0),
1731  first_sort->getAndOwnInput(0));
1732  nodes[i].reset();
1733  nodes[i + 1].reset();
1734  i += 3;
1735  } else {
1736  ++i;
1737  }
1738  }
1739 
1740  std::vector<std::shared_ptr<RelAlgNode>> new_nodes;
1741  for (auto node : nodes) {
1742  if (!node) {
1743  continue;
1744  }
1745  new_nodes.push_back(node);
1746  }
1747  nodes.swap(new_nodes);
1748 }
virtual void replaceInput(std::shared_ptr< const RelAlgNode > old_input, std::shared_ptr< const RelAlgNode > input)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void sink_projected_boolean_expr_to_join ( std::vector< std::shared_ptr< RelAlgNode >> &  nodes)
noexcept

Definition at line 1307 of file RelAlgOptimizer.cpp.

References build_du_web(), CHECK, CHECK_EQ, join(), kBOOLEAN, and anonymous_namespace{RelAlgOptimizer.cpp}::mark_live_columns().

Referenced by RelAlgDagBuilder::build().

1308  {
1309  auto web = build_du_web(nodes);
1310  auto liveouts = mark_live_columns(nodes);
1311  for (auto node : nodes) {
1312  auto project = std::dynamic_pointer_cast<RelProject>(node);
1313  // TODO(miyu): relax RelScan limitation
1314  if (!project || project->isSimple() ||
1315  !dynamic_cast<const RelScan*>(project->getInput(0))) {
1316  continue;
1317  }
1318  auto usrs_it = web.find(project.get());
1319  CHECK(usrs_it != web.end());
1320  auto& usrs = usrs_it->second;
1321  if (usrs.size() != 1) {
1322  continue;
1323  }
1324  auto join = dynamic_cast<RelJoin*>(const_cast<RelAlgNode*>(*usrs.begin()));
1325  if (!join) {
1326  continue;
1327  }
1328  auto outs_it = liveouts.find(join);
1329  CHECK(outs_it != liveouts.end());
1330  std::unordered_map<size_t, size_t> in_to_out_index;
1331  std::unordered_set<size_t> boolean_expr_indicies;
1332  bool discarded = false;
1333  for (size_t i = 0; i < project->size(); ++i) {
1334  auto oper = dynamic_cast<const RexOperator*>(project->getProjectAt(i));
1335  if (oper && oper->getType().get_type() == kBOOLEAN) {
1336  boolean_expr_indicies.insert(i);
1337  } else {
1338  // TODO(miyu): relax?
1339  if (auto input = dynamic_cast<const RexInput*>(project->getProjectAt(i))) {
1340  in_to_out_index.insert(std::make_pair(input->getIndex(), i));
1341  } else {
1342  discarded = true;
1343  }
1344  }
1345  }
1346  if (discarded || boolean_expr_indicies.empty()) {
1347  continue;
1348  }
1349  const size_t index_base =
1350  join->getInput(0) == project.get() ? 0 : join->getInput(0)->size();
1351  for (auto i : boolean_expr_indicies) {
1352  auto join_idx = index_base + i;
1353  if (outs_it->second.count(join_idx)) {
1354  discarded = true;
1355  break;
1356  }
1357  }
1358  if (discarded) {
1359  continue;
1360  }
1361  RexInputCollector collector(project.get());
1362  std::vector<size_t> unloaded_input_indices;
1363  std::unordered_map<size_t, std::unique_ptr<const RexScalar>> in_idx_to_new_subcond;
1364  // Given all are dead right after join, safe to sink
1365  for (auto i : boolean_expr_indicies) {
1366  auto rex_ins = collector.visit(project->getProjectAt(i));
1367  for (auto& in : rex_ins) {
1368  CHECK_EQ(in.getSourceNode(), project->getInput(0));
1369  if (!in_to_out_index.count(in.getIndex())) {
1370  auto curr_out_index = project->size() + unloaded_input_indices.size();
1371  in_to_out_index.insert(std::make_pair(in.getIndex(), curr_out_index));
1372  unloaded_input_indices.push_back(in.getIndex());
1373  }
1374  RexInputSinker sinker(in_to_out_index, project.get());
1375  in_idx_to_new_subcond.insert(
1376  std::make_pair(i, sinker.visit(project->getProjectAt(i))));
1377  }
1378  }
1379  if (in_idx_to_new_subcond.empty()) {
1380  continue;
1381  }
1382  std::vector<std::unique_ptr<const RexScalar>> new_projections;
1383  for (size_t i = 0; i < project->size(); ++i) {
1384  if (boolean_expr_indicies.count(i)) {
1385  new_projections.push_back(boost::make_unique<RexInput>(project->getInput(0), 0));
1386  } else {
1387  auto rex_input = dynamic_cast<const RexInput*>(project->getProjectAt(i));
1388  CHECK(rex_input != nullptr);
1389  new_projections.push_back(rex_input->deepCopy());
1390  }
1391  }
1392  for (auto i : unloaded_input_indices) {
1393  new_projections.push_back(boost::make_unique<RexInput>(project->getInput(0), i));
1394  }
1395  project->setExpressions(new_projections);
1396 
1397  SubConditionReplacer replacer(in_idx_to_new_subcond);
1398  auto new_condition = replacer.visit(join->getCondition());
1399  join->setCondition(new_condition);
1400  }
1401 }
std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * > > build_du_web(const std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
#define CHECK_EQ(x, y)
Definition: Logger.h:205
std::string join(T const &container, std::string const &delim)
std::unordered_map< const RelAlgNode *, std::unordered_set< size_t > > mark_live_columns(std::vector< std::shared_ptr< RelAlgNode >> &nodes)
#define CHECK(condition)
Definition: Logger.h:197

+ Here is the call graph for this function:

+ Here is the caller graph for this function: