OmniSciDB  06b3bd477c
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
RelAlgOptimizer.cpp File Reference
#include "RelAlgOptimizer.h"
#include "RexVisitor.h"
#include "Shared/Logger.h"
#include "Visitors/RexSubQueryIdCollector.h"
#include <numeric>
#include <string>
#include <unordered_map>
+ Include dependency graph for RelAlgOptimizer.cpp:

Go to the source code of this file.

Classes

class  anonymous_namespace{RelAlgOptimizer.cpp}::RexProjectInputRedirector
 
class  anonymous_namespace{RelAlgOptimizer.cpp}::RexRebindInputsVisitor
 
class  anonymous_namespace{RelAlgOptimizer.cpp}::RexInputCollector
 
class  anonymous_namespace{RelAlgOptimizer.cpp}::AvailabilityChecker
 
class  anonymous_namespace{RelAlgOptimizer.cpp}::RexInputRenumberVisitor
 
class  anonymous_namespace{RelAlgOptimizer.cpp}::RexInputSinker
 
class  anonymous_namespace{RelAlgOptimizer.cpp}::SubConditionReplacer
 
class  anonymous_namespace{RelAlgOptimizer.cpp}::RexInputRedirector
 
class  JoinTargetRebaser
 
class  SubConditionRemover
 

Namespaces

 anonymous_namespace{RelAlgOptimizer.cpp}
 

Functions

size_t anonymous_namespace{RelAlgOptimizer.cpp}::get_actual_source_size (const RelProject *curr_project, const std::unordered_set< const RelProject * > &projects_to_remove)
 
bool anonymous_namespace{RelAlgOptimizer.cpp}::safe_to_redirect (const RelProject *project, const std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * >> &du_web)
 
bool anonymous_namespace{RelAlgOptimizer.cpp}::is_identical_copy (const RelProject *project, const std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * >> &du_web, const std::unordered_set< const RelProject * > &projects_to_remove, std::unordered_set< const RelProject * > &permutating_projects)
 
void anonymous_namespace{RelAlgOptimizer.cpp}::propagate_rex_input_renumber (const RelFilter *excluded_root, const std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * >> &du_web)
 
void anonymous_namespace{RelAlgOptimizer.cpp}::redirect_inputs_of (std::shared_ptr< RelAlgNode > node, const std::unordered_set< const RelProject * > &projects, const std::unordered_set< const RelProject * > &permutating_projects, const std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * >> &du_web)
 
void anonymous_namespace{RelAlgOptimizer.cpp}::cleanup_dead_nodes (std::vector< std::shared_ptr< RelAlgNode >> &nodes)
 
std::unordered_set< const
RelProject * > 
anonymous_namespace{RelAlgOptimizer.cpp}::get_visible_projects (const RelAlgNode *root)
 
bool anonymous_namespace{RelAlgOptimizer.cpp}::is_distinct (const size_t input_idx, const RelAlgNode *node)
 
std::unordered_map< const
RelAlgNode
*, std::unordered_set< const
RelAlgNode * > > 
build_du_web (const std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
 
bool project_separates_sort (const RelProject *project, const RelAlgNode *next_node)
 
void eliminate_identical_copy (std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
 
size_t anonymous_namespace{RelAlgOptimizer.cpp}::pick_always_live_col_idx (const RelAlgNode *node)
 
std::vector
< std::unordered_set< size_t > > 
anonymous_namespace{RelAlgOptimizer.cpp}::get_live_ins (const RelAlgNode *node, const std::unordered_map< const RelAlgNode *, std::unordered_set< size_t >> &live_outs)
 
bool anonymous_namespace{RelAlgOptimizer.cpp}::any_dead_col_in (const RelAlgNode *node, const std::unordered_set< size_t > &live_outs)
 
bool anonymous_namespace{RelAlgOptimizer.cpp}::does_redef_cols (const RelAlgNode *node)
 
void anonymous_namespace{RelAlgOptimizer.cpp}::add_new_indices_for (const RelAlgNode *node, std::unordered_map< const RelAlgNode *, std::unordered_map< size_t, size_t >> &new_liveouts, const std::unordered_set< size_t > &old_liveouts, const std::unordered_set< const RelAlgNode * > &intact_nodes, const std::unordered_map< const RelAlgNode *, size_t > &orig_node_sizes)
 
std::vector< std::unique_ptr
< const RexAgg > > 
anonymous_namespace{RelAlgOptimizer.cpp}::renumber_rex_aggs (std::vector< std::unique_ptr< const RexAgg >> &agg_exprs, const std::unordered_map< size_t, size_t > &new_numbering)
 
SortField anonymous_namespace{RelAlgOptimizer.cpp}::renumber_sort_field (const SortField &old_field, const std::unordered_map< size_t, size_t > &new_numbering)
 
std::unordered_map< const
RelAlgNode
*, std::unordered_set< size_t > > 
anonymous_namespace{RelAlgOptimizer.cpp}::mark_live_columns (std::vector< std::shared_ptr< RelAlgNode >> &nodes)
 
std::string anonymous_namespace{RelAlgOptimizer.cpp}::get_field_name (const RelAlgNode *node, size_t index)
 
void anonymous_namespace{RelAlgOptimizer.cpp}::try_insert_coalesceable_proj (std::vector< std::shared_ptr< RelAlgNode >> &nodes, std::unordered_map< const RelAlgNode *, std::unordered_set< size_t >> &liveouts, std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * >> &du_web)
 
std::pair< std::unordered_map
< const RelAlgNode
*, std::unordered_map< size_t,
size_t > >, std::vector< const
RelAlgNode * > > 
anonymous_namespace{RelAlgOptimizer.cpp}::sweep_dead_columns (const std::unordered_map< const RelAlgNode *, std::unordered_set< size_t >> &live_outs, const std::vector< std::shared_ptr< RelAlgNode >> &nodes, const std::unordered_set< const RelAlgNode * > &intact_nodes, const std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * >> &du_web, const std::unordered_map< const RelAlgNode *, size_t > &orig_node_sizes)
 
void anonymous_namespace{RelAlgOptimizer.cpp}::propagate_input_renumbering (std::unordered_map< const RelAlgNode *, std::unordered_map< size_t, size_t >> &liveout_renumbering, const std::vector< const RelAlgNode * > &ready_nodes, const std::unordered_map< const RelAlgNode *, std::unordered_set< size_t >> &old_liveouts, const std::unordered_set< const RelAlgNode * > &intact_nodes, const std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * >> &du_web, const std::unordered_map< const RelAlgNode *, size_t > &orig_node_sizes)
 
void eliminate_dead_columns (std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
 
void eliminate_dead_subqueries (std::vector< std::shared_ptr< RexSubQuery >> &subqueries, RelAlgNode const *root)
 
void sink_projected_boolean_expr_to_join (std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
 
void anonymous_namespace{RelAlgOptimizer.cpp}::replace_all_usages (std::shared_ptr< const RelAlgNode > old_def_node, std::shared_ptr< const RelAlgNode > new_def_node, std::unordered_map< const RelAlgNode *, std::shared_ptr< RelAlgNode >> &deconst_mapping, std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * >> &du_web)
 
void fold_filters (std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
 
std::vector< const RexScalar * > find_hoistable_conditions (const RexScalar *condition, const RelAlgNode *source, const size_t first_col_idx, const size_t last_col_idx)
 
void hoist_filter_cond_to_cross_join (std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
 
void simplify_sort (std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
 

Function Documentation

std::unordered_map<const RelAlgNode*, std::unordered_set<const RelAlgNode*> > build_du_web ( const std::vector< std::shared_ptr< RelAlgNode >> &  nodes)
noexcept

Definition at line 408 of file RelAlgOptimizer.cpp.

References CHECK(), and src.

Referenced by eliminate_dead_columns(), eliminate_identical_copy(), fold_filters(), hoist_filter_cond_to_cross_join(), and sink_projected_boolean_expr_to_join().

409  {
410  std::unordered_map<const RelAlgNode*, std::unordered_set<const RelAlgNode*>> web;
411  std::unordered_set<const RelAlgNode*> visited;
412  std::vector<const RelAlgNode*> work_set;
413  for (auto node : nodes) {
414  if (std::dynamic_pointer_cast<RelScan>(node) ||
415  std::dynamic_pointer_cast<RelModify>(node) || visited.count(node.get())) {
416  continue;
417  }
418  work_set.push_back(node.get());
419  while (!work_set.empty()) {
420  auto walker = work_set.back();
421  work_set.pop_back();
422  if (visited.count(walker)) {
423  continue;
424  }
425  CHECK(!web.count(walker));
426  auto it_ok =
427  web.insert(std::make_pair(walker, std::unordered_set<const RelAlgNode*>{}));
428  CHECK(it_ok.second);
429  visited.insert(walker);
430  CHECK(dynamic_cast<const RelJoin*>(walker) ||
431  dynamic_cast<const RelProject*>(walker) ||
432  dynamic_cast<const RelAggregate*>(walker) ||
433  dynamic_cast<const RelFilter*>(walker) ||
434  dynamic_cast<const RelSort*>(walker) ||
435  dynamic_cast<const RelLeftDeepInnerJoin*>(walker) ||
436  dynamic_cast<const RelLogicalValues*>(walker) ||
437  dynamic_cast<const RelTableFunction*>(walker) ||
438  dynamic_cast<const RelLogicalUnion*>(walker));
439  for (size_t i = 0; i < walker->inputCount(); ++i) {
440  auto src = walker->getInput(i);
441  if (dynamic_cast<const RelScan*>(src) || dynamic_cast<const RelModify*>(src)) {
442  continue;
443  }
444  if (web.empty() || !web.count(src)) {
445  web.insert(std::make_pair(src, std::unordered_set<const RelAlgNode*>{}));
446  }
447  web[src].insert(walker);
448  work_set.push_back(src);
449  }
450  }
451  }
452  return web;
453 }
int64_t * src
CHECK(cgen_state)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void eliminate_dead_columns ( std::vector< std::shared_ptr< RelAlgNode >> &  nodes)
noexcept

Definition at line 1167 of file RelAlgOptimizer.cpp.

References anonymous_namespace{RelAlgOptimizer.cpp}::any_dead_col_in(), build_du_web(), CHECK(), anonymous_namespace{RelAlgOptimizer.cpp}::does_redef_cols(), LOG, anonymous_namespace{RelAlgOptimizer.cpp}::mark_live_columns(), anonymous_namespace{RelAlgOptimizer.cpp}::propagate_input_renumbering(), anonymous_namespace{RelAlgOptimizer.cpp}::sweep_dead_columns(), anonymous_namespace{RelAlgOptimizer.cpp}::try_insert_coalesceable_proj(), and logger::WARNING.

Referenced by RelAlgDagBuilder::build().

1167  {
1168  if (nodes.empty()) {
1169  return;
1170  }
1171  auto root = nodes.back().get();
1172  if (!root) {
1173  return;
1174  }
1175  CHECK(!dynamic_cast<const RelScan*>(root) && !dynamic_cast<const RelJoin*>(root));
1176  // Mark
1177  auto old_liveouts = mark_live_columns(nodes);
1178  std::unordered_set<const RelAlgNode*> intact_nodes;
1179  bool has_dead_cols = false;
1180  for (auto live_pair : old_liveouts) {
1181  auto node = live_pair.first;
1182  const auto& outs = live_pair.second;
1183  if (outs.empty()) {
1184  LOG(WARNING) << "RA node with no used column: " << node->toString();
1185  // Ignore empty live_out due to some invalid node
1186  intact_nodes.insert(node);
1187  }
1188  if (any_dead_col_in(node, outs)) {
1189  has_dead_cols = true;
1190  } else {
1191  intact_nodes.insert(node);
1192  }
1193  }
1194  if (!has_dead_cols) {
1195  return;
1196  }
1197  auto web = build_du_web(nodes);
1198  try_insert_coalesceable_proj(nodes, old_liveouts, web);
1199 
1200  for (auto node : nodes) {
1201  if (intact_nodes.count(node.get()) || does_redef_cols(node.get())) {
1202  continue;
1203  }
1204  bool intact = true;
1205  for (size_t i = 0; i < node->inputCount(); ++i) {
1206  auto source = node->getInput(i);
1207  if (!dynamic_cast<const RelScan*>(source) && !intact_nodes.count(source)) {
1208  intact = false;
1209  break;
1210  }
1211  }
1212  if (intact) {
1213  intact_nodes.insert(node.get());
1214  }
1215  }
1216 
1217  std::unordered_map<const RelAlgNode*, size_t> orig_node_sizes;
1218  for (auto node : nodes) {
1219  orig_node_sizes.insert(std::make_pair(node.get(), node->size()));
1220  }
1221  // Sweep
1222  std::unordered_map<const RelAlgNode*, std::unordered_map<size_t, size_t>>
1223  liveout_renumbering;
1224  std::vector<const RelAlgNode*> ready_nodes;
1225  std::tie(liveout_renumbering, ready_nodes) =
1226  sweep_dead_columns(old_liveouts, nodes, intact_nodes, web, orig_node_sizes);
1227  // Propagate
1229  liveout_renumbering, ready_nodes, old_liveouts, intact_nodes, web, orig_node_sizes);
1230 }
std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * > > build_du_web(const std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
#define LOG(tag)
Definition: Logger.h:188
bool does_redef_cols(const RelAlgNode *node)
void propagate_input_renumbering(std::unordered_map< const RelAlgNode *, std::unordered_map< size_t, size_t >> &liveout_renumbering, const std::vector< const RelAlgNode * > &ready_nodes, const std::unordered_map< const RelAlgNode *, std::unordered_set< size_t >> &old_liveouts, const std::unordered_set< const RelAlgNode * > &intact_nodes, const std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * >> &du_web, const std::unordered_map< const RelAlgNode *, size_t > &orig_node_sizes)
std::unordered_map< const RelAlgNode *, std::unordered_set< size_t > > mark_live_columns(std::vector< std::shared_ptr< RelAlgNode >> &nodes)
CHECK(cgen_state)
bool any_dead_col_in(const RelAlgNode *node, const std::unordered_set< size_t > &live_outs)
void try_insert_coalesceable_proj(std::vector< std::shared_ptr< RelAlgNode >> &nodes, std::unordered_map< const RelAlgNode *, std::unordered_set< size_t >> &liveouts, std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * >> &du_web)
std::pair< std::unordered_map< const RelAlgNode *, std::unordered_map< size_t, size_t > >, std::vector< const RelAlgNode * > > sweep_dead_columns(const std::unordered_map< const RelAlgNode *, std::unordered_set< size_t >> &live_outs, const std::vector< std::shared_ptr< RelAlgNode >> &nodes, const std::unordered_set< const RelAlgNode * > &intact_nodes, const std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * >> &du_web, const std::unordered_map< const RelAlgNode *, size_t > &orig_node_sizes)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void eliminate_dead_subqueries ( std::vector< std::shared_ptr< RexSubQuery >> &  subqueries,
RelAlgNode const *  root 
)

Definition at line 1232 of file RelAlgOptimizer.cpp.

References RexSubQueryIdCollector::getLiveRexSubQueryIds(), and VLOG.

Referenced by RelAlgDagBuilder::build().

1233  {
1234  if (!subqueries.empty()) {
1235  auto live_ids = RexSubQueryIdCollector::getLiveRexSubQueryIds(root);
1236  auto sort_live_ids_first = [&live_ids](auto& a, auto& b) {
1237  return live_ids.count(a->getId()) && !live_ids.count(b->getId());
1238  };
1239  std::stable_sort(subqueries.begin(), subqueries.end(), sort_live_ids_first);
1240  size_t n_dead_subqueries;
1241  if (live_ids.count(subqueries.front()->getId())) {
1242  auto first_dead_itr = std::upper_bound(subqueries.cbegin(),
1243  subqueries.cend(),
1244  subqueries.front(),
1245  sort_live_ids_first);
1246  n_dead_subqueries = subqueries.cend() - first_dead_itr;
1247  } else {
1248  n_dead_subqueries = subqueries.size();
1249  }
1250  if (n_dead_subqueries) {
1251  VLOG(1) << "Eliminating " << n_dead_subqueries
1252  << (n_dead_subqueries == 1 ? " subquery." : " subqueries.");
1253  subqueries.resize(subqueries.size() - n_dead_subqueries);
1254  subqueries.shrink_to_fit();
1255  }
1256  }
1257 }
static Ids getLiveRexSubQueryIds(RelAlgNode const *)
#define VLOG(n)
Definition: Logger.h:291

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void eliminate_identical_copy ( std::vector< std::shared_ptr< RelAlgNode >> &  nodes)
noexcept

Definition at line 486 of file RelAlgOptimizer.cpp.

References build_du_web(), CHECK(), CHECK_EQ, anonymous_namespace{RelAlgOptimizer.cpp}::cleanup_dead_nodes(), anonymous_namespace{RelAlgOptimizer.cpp}::get_visible_projects(), anonymous_namespace{RelAlgOptimizer.cpp}::is_distinct(), anonymous_namespace{RelAlgOptimizer.cpp}::is_identical_copy(), project_separates_sort(), and anonymous_namespace{RelAlgOptimizer.cpp}::redirect_inputs_of().

Referenced by RelAlgDagBuilder::build().

486  {
487  std::unordered_set<std::shared_ptr<const RelAlgNode>> copies;
488  auto sink = nodes.back();
489  for (auto node : nodes) {
490  auto aggregate = std::dynamic_pointer_cast<const RelAggregate>(node);
491  if (!aggregate || aggregate == sink ||
492  !(aggregate->getGroupByCount() == 1 && aggregate->getAggExprsCount() == 0)) {
493  continue;
494  }
495  auto project =
496  std::dynamic_pointer_cast<const RelProject>(aggregate->getAndOwnInput(0));
497  if (project && project->size() == aggregate->size() &&
498  project->getFields() == aggregate->getFields()) {
499  CHECK_EQ(size_t(0), copies.count(aggregate));
500  copies.insert(aggregate);
501  }
502  }
503  for (auto node : nodes) {
504  if (!node->inputCount()) {
505  continue;
506  }
507  auto last_source = node->getAndOwnInput(node->inputCount() - 1);
508  if (!copies.count(last_source)) {
509  continue;
510  }
511  auto aggregate = std::dynamic_pointer_cast<const RelAggregate>(last_source);
512  CHECK(aggregate);
513  if (!std::dynamic_pointer_cast<const RelJoin>(node) || aggregate->size() != 1) {
514  continue;
515  }
516  auto project =
517  std::dynamic_pointer_cast<const RelProject>(aggregate->getAndOwnInput(0));
518  CHECK(project);
519  CHECK_EQ(size_t(1), project->size());
520  if (!is_distinct(size_t(0), project.get())) {
521  continue;
522  }
523  auto new_source = project->getAndOwnInput(0);
524  if (std::dynamic_pointer_cast<const RelSort>(new_source) ||
525  std::dynamic_pointer_cast<const RelScan>(new_source)) {
526  node->replaceInput(last_source, new_source);
527  }
528  }
529  decltype(copies)().swap(copies);
530 
531  auto web = build_du_web(nodes);
532 
533  std::unordered_set<const RelProject*> projects;
534  std::unordered_set<const RelProject*> permutating_projects;
535  auto const visible_projs = get_visible_projects(nodes.back().get());
536  for (auto node_it = nodes.begin(); node_it != nodes.end(); node_it++) {
537  auto node = *node_it;
538  auto project = std::dynamic_pointer_cast<RelProject>(node);
539  auto next_node_it = std::next(node_it);
540  if (project && project->isSimple() &&
541  (!visible_projs.count(project.get()) || !project->isRenaming()) &&
542  is_identical_copy(project.get(), web, projects, permutating_projects) &&
544  project.get(), next_node_it == nodes.end() ? nullptr : next_node_it->get())) {
545  projects.insert(project.get());
546  }
547  }
548 
549  for (auto node : nodes) {
550  redirect_inputs_of(node, projects, permutating_projects, web);
551  }
552 
553  cleanup_dead_nodes(nodes);
554 }
bool is_identical_copy(const RelProject *project, const std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * >> &du_web, const std::unordered_set< const RelProject * > &projects_to_remove, std::unordered_set< const RelProject * > &permutating_projects)
std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * > > build_du_web(const std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
#define CHECK_EQ(x, y)
Definition: Logger.h:205
void redirect_inputs_of(std::shared_ptr< RelAlgNode > node, const std::unordered_set< const RelProject * > &projects, const std::unordered_set< const RelProject * > &permutating_projects, const std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * >> &du_web)
std::unordered_set< const RelProject * > get_visible_projects(const RelAlgNode *root)
bool project_separates_sort(const RelProject *project, const RelAlgNode *next_node)
CHECK(cgen_state)
void cleanup_dead_nodes(std::vector< std::shared_ptr< RelAlgNode >> &nodes)
bool is_distinct(const size_t input_idx, const RelAlgNode *node)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::vector<const RexScalar*> find_hoistable_conditions ( const RexScalar condition,
const RelAlgNode source,
const size_t  first_col_idx,
const size_t  last_col_idx 
)

Definition at line 1509 of file RelAlgOptimizer.cpp.

References RexAbstractInput::getIndex(), kAND, and kEQ.

Referenced by hoist_filter_cond_to_cross_join().

1512  {
1513  if (auto rex_op = dynamic_cast<const RexOperator*>(condition)) {
1514  switch (rex_op->getOperator()) {
1515  case kAND: {
1516  std::vector<const RexScalar*> subconditions;
1517  size_t complete_subcond_count = 0;
1518  for (size_t i = 0; i < rex_op->size(); ++i) {
1519  auto conds = find_hoistable_conditions(
1520  rex_op->getOperand(i), source, first_col_idx, last_col_idx);
1521  if (conds.size() == size_t(1)) {
1522  ++complete_subcond_count;
1523  }
1524  subconditions.insert(subconditions.end(), conds.begin(), conds.end());
1525  }
1526  if (complete_subcond_count == rex_op->size()) {
1527  return {rex_op};
1528  } else {
1529  return {subconditions};
1530  }
1531  break;
1532  }
1533  case kEQ: {
1534  const auto lhs_conds = find_hoistable_conditions(
1535  rex_op->getOperand(0), source, first_col_idx, last_col_idx);
1536  const auto rhs_conds = find_hoistable_conditions(
1537  rex_op->getOperand(1), source, first_col_idx, last_col_idx);
1538  const auto lhs_in = lhs_conds.size() == 1
1539  ? dynamic_cast<const RexInput*>(*lhs_conds.begin())
1540  : nullptr;
1541  const auto rhs_in = rhs_conds.size() == 1
1542  ? dynamic_cast<const RexInput*>(*rhs_conds.begin())
1543  : nullptr;
1544  if (lhs_in && rhs_in) {
1545  return {rex_op};
1546  }
1547  return {};
1548  break;
1549  }
1550  default:
1551  break;
1552  }
1553  return {};
1554  }
1555  if (auto rex_in = dynamic_cast<const RexInput*>(condition)) {
1556  if (rex_in->getSourceNode() == source) {
1557  const auto col_idx = rex_in->getIndex();
1558  return {col_idx >= first_col_idx && col_idx <= last_col_idx ? condition : nullptr};
1559  }
1560  return {};
1561  }
1562  return {};
1563 }
std::vector< const RexScalar * > find_hoistable_conditions(const RexScalar *condition, const RelAlgNode *source, const size_t first_col_idx, const size_t last_col_idx)
Definition: sqldefs.h:30
unsigned getIndex() const
Definition: sqldefs.h:37

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void fold_filters ( std::vector< std::shared_ptr< RelAlgNode >> &  nodes)
noexcept

Definition at line 1447 of file RelAlgOptimizer.cpp.

References build_du_web(), CHECK(), CHECK_EQ, anonymous_namespace{RelAlgOptimizer.cpp}::cleanup_dead_nodes(), logger::INFO, kAND, kBOOLEAN, LOG, and anonymous_namespace{RelAlgOptimizer.cpp}::replace_all_usages().

Referenced by RelAlgDagBuilder::build().

1447  {
1448  std::unordered_map<const RelAlgNode*, std::shared_ptr<RelAlgNode>> deconst_mapping;
1449  for (auto node : nodes) {
1450  deconst_mapping.insert(std::make_pair(node.get(), node));
1451  }
1452 
1453  auto web = build_du_web(nodes);
1454  for (auto node_it = nodes.rbegin(); node_it != nodes.rend(); ++node_it) {
1455  auto& node = *node_it;
1456  if (auto filter = std::dynamic_pointer_cast<RelFilter>(node)) {
1457  CHECK_EQ(filter->inputCount(), size_t(1));
1458  auto src_filter = dynamic_cast<const RelFilter*>(filter->getInput(0));
1459  if (!src_filter) {
1460  continue;
1461  }
1462  auto siblings_it = web.find(src_filter);
1463  if (siblings_it == web.end() || siblings_it->second.size() != size_t(1)) {
1464  continue;
1465  }
1466  auto src_it = deconst_mapping.find(src_filter);
1467  CHECK(src_it != deconst_mapping.end());
1468  auto folded_filter = std::dynamic_pointer_cast<RelFilter>(src_it->second);
1469  CHECK(folded_filter);
1470  // TODO(miyu) : drop filter w/ only expression valued constant TRUE?
1471  if (auto rex_operator = dynamic_cast<const RexOperator*>(filter->getCondition())) {
1472  LOG(INFO) << "ID=" << filter->getId() << " " << filter->toString()
1473  << " folded into "
1474  << "ID=" << folded_filter->getId() << " " << folded_filter->toString()
1475  << std::endl;
1476  std::vector<std::unique_ptr<const RexScalar>> operands;
1477  operands.emplace_back(folded_filter->getAndReleaseCondition());
1478  auto old_condition = dynamic_cast<const RexOperator*>(operands.back().get());
1479  CHECK(old_condition && old_condition->getType().get_type() == kBOOLEAN);
1480  RexInputRedirector redirector(folded_filter.get(), folded_filter->getInput(0));
1481  operands.push_back(redirector.visit(rex_operator));
1482  auto other_condition = dynamic_cast<const RexOperator*>(operands.back().get());
1483  CHECK(other_condition && other_condition->getType().get_type() == kBOOLEAN);
1484  const bool notnull = old_condition->getType().get_notnull() &&
1485  other_condition->getType().get_notnull();
1486  auto new_condition = std::unique_ptr<const RexScalar>(
1487  new RexOperator(kAND, operands, SQLTypeInfo(kBOOLEAN, notnull)));
1488  folded_filter->setCondition(new_condition);
1489  replace_all_usages(filter, folded_filter, deconst_mapping, web);
1490  deconst_mapping.erase(filter.get());
1491  web.erase(filter.get());
1492  web[filter->getInput(0)].erase(filter.get());
1493  node.reset();
1494  }
1495  }
1496  }
1497 
1498  if (!nodes.empty()) {
1499  auto sink = nodes.back();
1500  for (auto node_it = std::next(nodes.rend()); !sink && node_it != nodes.rbegin();
1501  ++node_it) {
1502  sink = *node_it;
1503  }
1504  CHECK(sink);
1505  cleanup_dead_nodes(nodes);
1506  }
1507 }
std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * > > build_du_web(const std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
#define CHECK_EQ(x, y)
Definition: Logger.h:205
#define LOG(tag)
Definition: Logger.h:188
CHECK(cgen_state)
void cleanup_dead_nodes(std::vector< std::shared_ptr< RelAlgNode >> &nodes)
Definition: sqldefs.h:37
void replace_all_usages(std::shared_ptr< const RelAlgNode > old_def_node, std::shared_ptr< const RelAlgNode > new_def_node, std::unordered_map< const RelAlgNode *, std::shared_ptr< RelAlgNode >> &deconst_mapping, std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * >> &du_web)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void hoist_filter_cond_to_cross_join ( std::vector< std::shared_ptr< RelAlgNode >> &  nodes)
noexcept

Definition at line 1607 of file RelAlgOptimizer.cpp.

References build_du_web(), CHECK(), find_hoistable_conditions(), RelFilter::getCondition(), RelAlgNode::getInput(), INNER, join(), kAND, kBOOLEAN, RelFilter::setCondition(), and RexVisitorBase< T >::visit().

Referenced by RelAlgDagBuilder::build().

1608  {
1609  std::unordered_set<const RelAlgNode*> visited;
1610  auto web = build_du_web(nodes);
1611  for (auto node : nodes) {
1612  if (visited.count(node.get())) {
1613  continue;
1614  }
1615  visited.insert(node.get());
1616  auto join = dynamic_cast<RelJoin*>(node.get());
1617  if (join && join->getJoinType() == JoinType::INNER) {
1618  // Only allow cross join for now.
1619  if (auto literal = dynamic_cast<const RexLiteral*>(join->getCondition())) {
1620  // Assume Calcite always generates an inner join on constant boolean true for
1621  // cross join.
1622  CHECK(literal->getType() == kBOOLEAN && literal->getVal<bool>());
1623  size_t first_col_idx = 0;
1624  const RelFilter* filter = nullptr;
1625  std::vector<const RelJoin*> join_seq{join};
1626  for (const RelJoin* curr_join = join; !filter;) {
1627  auto usrs_it = web.find(curr_join);
1628  CHECK(usrs_it != web.end());
1629  if (usrs_it->second.size() != size_t(1)) {
1630  break;
1631  }
1632  auto only_usr = *usrs_it->second.begin();
1633  if (auto usr_join = dynamic_cast<const RelJoin*>(only_usr)) {
1634  if (join == usr_join->getInput(1)) {
1635  const auto src1_offset = usr_join->getInput(0)->size();
1636  first_col_idx += src1_offset;
1637  }
1638  join_seq.push_back(usr_join);
1639  curr_join = usr_join;
1640  continue;
1641  }
1642 
1643  filter = dynamic_cast<const RelFilter*>(only_usr);
1644  break;
1645  }
1646  if (!filter) {
1647  visited.insert(join_seq.begin(), join_seq.end());
1648  continue;
1649  }
1650  const auto src_join = dynamic_cast<const RelJoin*>(filter->getInput(0));
1651  CHECK(src_join);
1652  auto modified_filter = const_cast<RelFilter*>(filter);
1653 
1654  if (src_join == join) {
1655  std::unique_ptr<const RexScalar> filter_condition(
1656  modified_filter->getAndReleaseCondition());
1657  std::unique_ptr<const RexScalar> true_condition =
1658  boost::make_unique<RexLiteral>(true,
1659  kBOOLEAN,
1660  kBOOLEAN,
1661  unsigned(-2147483648),
1662  1,
1663  unsigned(-2147483648),
1664  1);
1665  modified_filter->setCondition(true_condition);
1666  join->setCondition(filter_condition);
1667  continue;
1668  }
1669  const auto src1_base = src_join->getInput(0)->size();
1670  auto source =
1671  first_col_idx < src1_base ? src_join->getInput(0) : src_join->getInput(1);
1672  first_col_idx =
1673  first_col_idx < src1_base ? first_col_idx : first_col_idx - src1_base;
1674  auto join_conditions =
1676  source,
1677  first_col_idx,
1678  first_col_idx + join->size() - 1);
1679  if (join_conditions.empty()) {
1680  continue;
1681  }
1682 
1683  JoinTargetRebaser rebaser(join, first_col_idx);
1684  if (join_conditions.size() == 1) {
1685  auto new_join_condition = rebaser.visit(*join_conditions.begin());
1686  join->setCondition(new_join_condition);
1687  } else {
1688  std::vector<std::unique_ptr<const RexScalar>> operands;
1689  bool notnull = true;
1690  for (size_t i = 0; i < join_conditions.size(); ++i) {
1691  operands.emplace_back(rebaser.visit(join_conditions[i]));
1692  auto old_subcond = dynamic_cast<const RexOperator*>(join_conditions[i]);
1693  CHECK(old_subcond && old_subcond->getType().get_type() == kBOOLEAN);
1694  notnull = notnull && old_subcond->getType().get_notnull();
1695  }
1696  auto new_join_condition = std::unique_ptr<const RexScalar>(
1697  new RexOperator(kAND, operands, SQLTypeInfo(kBOOLEAN, notnull)));
1698  join->setCondition(new_join_condition);
1699  }
1700 
1701  SubConditionRemover remover(join_conditions);
1702  auto new_filter_condition = remover.visit(filter->getCondition());
1703  modified_filter->setCondition(new_filter_condition);
1704  }
1705  }
1706  }
1707 }
std::vector< const RexScalar * > find_hoistable_conditions(const RexScalar *condition, const RelAlgNode *source, const size_t first_col_idx, const size_t last_col_idx)
std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * > > build_du_web(const std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
void setCondition(std::unique_ptr< const RexScalar > &condition)
const RexScalar * getCondition() const
std::string join(T const &container, std::string const &delim)
CHECK(cgen_state)
const RelAlgNode * getInput(const size_t idx) const
Definition: sqldefs.h:37

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool project_separates_sort ( const RelProject project,
const RelAlgNode next_node 
)

Return true if the input project separates two sort nodes, i.e. Sort -> Project -> Sort. This pattern often occurs in machine generated SQL, e.g. SELECT * FROM (SELECT * FROM t LIMIT 10) t0 LIMIT 1; Use this function to prevent optimizing out the intermediate project, as the project is required to ensure the first sort runs to completion prior to the second sort. Back to back sort nodes are not executable and will throw an error.

Definition at line 463 of file RelAlgOptimizer.cpp.

References CHECK(), RelAlgNode::getInput(), and RelAlgNode::inputCount().

Referenced by eliminate_identical_copy().

463  {
464  CHECK(project);
465  if (!next_node) {
466  return false;
467  }
468 
469  auto sort = dynamic_cast<const RelSort*>(next_node);
470  if (!sort) {
471  return false;
472  }
473  if (!(project->inputCount() == 1)) {
474  return false;
475  }
476 
477  if (dynamic_cast<const RelSort*>(project->getInput(0))) {
478  return true;
479  }
480  return false;
481 }
CHECK(cgen_state)
const RelAlgNode * getInput(const size_t idx) const
const size_t inputCount() const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void simplify_sort ( std::vector< std::shared_ptr< RelAlgNode >> &  nodes)
noexcept

Definition at line 1712 of file RelAlgOptimizer.cpp.

References RelAlgNode::replaceInput().

Referenced by RelAlgDagBuilder::build().

1712  {
1713  if (nodes.size() < 3) {
1714  return;
1715  }
1716  for (size_t i = 0; i <= nodes.size() - 3;) {
1717  auto first_sort = std::dynamic_pointer_cast<RelSort>(nodes[i]);
1718  const auto project = std::dynamic_pointer_cast<const RelProject>(nodes[i + 1]);
1719  auto second_sort = std::dynamic_pointer_cast<RelSort>(nodes[i + 2]);
1720  if (first_sort && second_sort && project && project->isIdentity() &&
1721  *first_sort == *second_sort) {
1722  second_sort->replaceInput(second_sort->getAndOwnInput(0),
1723  first_sort->getAndOwnInput(0));
1724  nodes[i].reset();
1725  nodes[i + 1].reset();
1726  i += 3;
1727  } else {
1728  ++i;
1729  }
1730  }
1731 
1732  std::vector<std::shared_ptr<RelAlgNode>> new_nodes;
1733  for (auto node : nodes) {
1734  if (!node) {
1735  continue;
1736  }
1737  new_nodes.push_back(node);
1738  }
1739  nodes.swap(new_nodes);
1740 }
virtual void replaceInput(std::shared_ptr< const RelAlgNode > old_input, std::shared_ptr< const RelAlgNode > input)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void sink_projected_boolean_expr_to_join ( std::vector< std::shared_ptr< RelAlgNode >> &  nodes)
noexcept

Definition at line 1299 of file RelAlgOptimizer.cpp.

References build_du_web(), CHECK(), CHECK_EQ, join(), kBOOLEAN, and anonymous_namespace{RelAlgOptimizer.cpp}::mark_live_columns().

Referenced by RelAlgDagBuilder::build().

1300  {
1301  auto web = build_du_web(nodes);
1302  auto liveouts = mark_live_columns(nodes);
1303  for (auto node : nodes) {
1304  auto project = std::dynamic_pointer_cast<RelProject>(node);
1305  // TODO(miyu): relax RelScan limitation
1306  if (!project || project->isSimple() ||
1307  !dynamic_cast<const RelScan*>(project->getInput(0))) {
1308  continue;
1309  }
1310  auto usrs_it = web.find(project.get());
1311  CHECK(usrs_it != web.end());
1312  auto& usrs = usrs_it->second;
1313  if (usrs.size() != 1) {
1314  continue;
1315  }
1316  auto join = dynamic_cast<RelJoin*>(const_cast<RelAlgNode*>(*usrs.begin()));
1317  if (!join) {
1318  continue;
1319  }
1320  auto outs_it = liveouts.find(join);
1321  CHECK(outs_it != liveouts.end());
1322  std::unordered_map<size_t, size_t> in_to_out_index;
1323  std::unordered_set<size_t> boolean_expr_indicies;
1324  bool discarded = false;
1325  for (size_t i = 0; i < project->size(); ++i) {
1326  auto oper = dynamic_cast<const RexOperator*>(project->getProjectAt(i));
1327  if (oper && oper->getType().get_type() == kBOOLEAN) {
1328  boolean_expr_indicies.insert(i);
1329  } else {
1330  // TODO(miyu): relax?
1331  if (auto input = dynamic_cast<const RexInput*>(project->getProjectAt(i))) {
1332  in_to_out_index.insert(std::make_pair(input->getIndex(), i));
1333  } else {
1334  discarded = true;
1335  }
1336  }
1337  }
1338  if (discarded || boolean_expr_indicies.empty()) {
1339  continue;
1340  }
1341  const size_t index_base =
1342  join->getInput(0) == project.get() ? 0 : join->getInput(0)->size();
1343  for (auto i : boolean_expr_indicies) {
1344  auto join_idx = index_base + i;
1345  if (outs_it->second.count(join_idx)) {
1346  discarded = true;
1347  break;
1348  }
1349  }
1350  if (discarded) {
1351  continue;
1352  }
1353  RexInputCollector collector(project.get());
1354  std::vector<size_t> unloaded_input_indices;
1355  std::unordered_map<size_t, std::unique_ptr<const RexScalar>> in_idx_to_new_subcond;
1356  // Given all are dead right after join, safe to sink
1357  for (auto i : boolean_expr_indicies) {
1358  auto rex_ins = collector.visit(project->getProjectAt(i));
1359  for (auto& in : rex_ins) {
1360  CHECK_EQ(in.getSourceNode(), project->getInput(0));
1361  if (!in_to_out_index.count(in.getIndex())) {
1362  auto curr_out_index = project->size() + unloaded_input_indices.size();
1363  in_to_out_index.insert(std::make_pair(in.getIndex(), curr_out_index));
1364  unloaded_input_indices.push_back(in.getIndex());
1365  }
1366  RexInputSinker sinker(in_to_out_index, project.get());
1367  in_idx_to_new_subcond.insert(
1368  std::make_pair(i, sinker.visit(project->getProjectAt(i))));
1369  }
1370  }
1371  if (in_idx_to_new_subcond.empty()) {
1372  continue;
1373  }
1374  std::vector<std::unique_ptr<const RexScalar>> new_projections;
1375  for (size_t i = 0; i < project->size(); ++i) {
1376  if (boolean_expr_indicies.count(i)) {
1377  new_projections.push_back(boost::make_unique<RexInput>(project->getInput(0), 0));
1378  } else {
1379  auto rex_input = dynamic_cast<const RexInput*>(project->getProjectAt(i));
1380  CHECK(rex_input != nullptr);
1381  new_projections.push_back(rex_input->deepCopy());
1382  }
1383  }
1384  for (auto i : unloaded_input_indices) {
1385  new_projections.push_back(boost::make_unique<RexInput>(project->getInput(0), i));
1386  }
1387  project->setExpressions(new_projections);
1388 
1389  SubConditionReplacer replacer(in_idx_to_new_subcond);
1390  auto new_condition = replacer.visit(join->getCondition());
1391  join->setCondition(new_condition);
1392  }
1393 }
std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * > > build_du_web(const std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
#define CHECK_EQ(x, y)
Definition: Logger.h:205
std::string join(T const &container, std::string const &delim)
std::unordered_map< const RelAlgNode *, std::unordered_set< size_t > > mark_live_columns(std::vector< std::shared_ptr< RelAlgNode >> &nodes)
CHECK(cgen_state)

+ Here is the call graph for this function:

+ Here is the caller graph for this function: