OmniSciDB  bf83d84833
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
RelAlgOptimizer.cpp File Reference
#include "RelAlgOptimizer.h"
#include "Logger/Logger.h"
#include "RexVisitor.h"
#include "Visitors/RexSubQueryIdCollector.h"
#include <numeric>
#include <string>
#include <unordered_map>
+ Include dependency graph for RelAlgOptimizer.cpp:

Go to the source code of this file.

Classes

class  anonymous_namespace{RelAlgOptimizer.cpp}::RexProjectInputRedirector
 
class  anonymous_namespace{RelAlgOptimizer.cpp}::RexRebindInputsVisitor
 
class  anonymous_namespace{RelAlgOptimizer.cpp}::RexInputCollector
 
class  anonymous_namespace{RelAlgOptimizer.cpp}::AvailabilityChecker
 
class  anonymous_namespace{RelAlgOptimizer.cpp}::RexInputRenumberVisitor
 
class  anonymous_namespace{RelAlgOptimizer.cpp}::RexInputSinker
 
class  anonymous_namespace{RelAlgOptimizer.cpp}::SubConditionReplacer
 
class  anonymous_namespace{RelAlgOptimizer.cpp}::RexInputRedirector
 
class  JoinTargetRebaser
 
class  SubConditionRemover
 

Namespaces

 anonymous_namespace{RelAlgOptimizer.cpp}
 

Functions

size_t anonymous_namespace{RelAlgOptimizer.cpp}::get_actual_source_size (const RelProject *curr_project, const std::unordered_set< const RelProject * > &projects_to_remove)
 
bool anonymous_namespace{RelAlgOptimizer.cpp}::safe_to_redirect (const RelProject *project, const std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * >> &du_web)
 
bool anonymous_namespace{RelAlgOptimizer.cpp}::is_identical_copy (const RelProject *project, const std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * >> &du_web, const std::unordered_set< const RelProject * > &projects_to_remove, std::unordered_set< const RelProject * > &permutating_projects)
 
void anonymous_namespace{RelAlgOptimizer.cpp}::propagate_rex_input_renumber (const RelFilter *excluded_root, const std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * >> &du_web)
 
void anonymous_namespace{RelAlgOptimizer.cpp}::redirect_inputs_of (std::shared_ptr< RelAlgNode > node, const std::unordered_set< const RelProject * > &projects, const std::unordered_set< const RelProject * > &permutating_projects, const std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * >> &du_web)
 
void anonymous_namespace{RelAlgOptimizer.cpp}::cleanup_dead_nodes (std::vector< std::shared_ptr< RelAlgNode >> &nodes)
 
std::unordered_set< const
RelProject * > 
anonymous_namespace{RelAlgOptimizer.cpp}::get_visible_projects (const RelAlgNode *root)
 
bool anonymous_namespace{RelAlgOptimizer.cpp}::is_distinct (const size_t input_idx, const RelAlgNode *node)
 
std::unordered_map< const
RelAlgNode
*, std::unordered_set< const
RelAlgNode * > > 
build_du_web (const std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
 
bool project_separates_sort (const RelProject *project, const RelAlgNode *next_node)
 
void eliminate_identical_copy (std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
 
size_t anonymous_namespace{RelAlgOptimizer.cpp}::pick_always_live_col_idx (const RelAlgNode *node)
 
std::vector
< std::unordered_set< size_t > > 
anonymous_namespace{RelAlgOptimizer.cpp}::get_live_ins (const RelAlgNode *node, const std::unordered_map< const RelAlgNode *, std::unordered_set< size_t >> &live_outs)
 
bool anonymous_namespace{RelAlgOptimizer.cpp}::any_dead_col_in (const RelAlgNode *node, const std::unordered_set< size_t > &live_outs)
 
bool anonymous_namespace{RelAlgOptimizer.cpp}::does_redef_cols (const RelAlgNode *node)
 
void anonymous_namespace{RelAlgOptimizer.cpp}::add_new_indices_for (const RelAlgNode *node, std::unordered_map< const RelAlgNode *, std::unordered_map< size_t, size_t >> &new_liveouts, const std::unordered_set< size_t > &old_liveouts, const std::unordered_set< const RelAlgNode * > &intact_nodes, const std::unordered_map< const RelAlgNode *, size_t > &orig_node_sizes)
 
std::vector< std::unique_ptr
< const RexAgg > > 
anonymous_namespace{RelAlgOptimizer.cpp}::renumber_rex_aggs (std::vector< std::unique_ptr< const RexAgg >> &agg_exprs, const std::unordered_map< size_t, size_t > &new_numbering)
 
SortField anonymous_namespace{RelAlgOptimizer.cpp}::renumber_sort_field (const SortField &old_field, const std::unordered_map< size_t, size_t > &new_numbering)
 
std::unordered_map< const
RelAlgNode
*, std::unordered_set< size_t > > 
anonymous_namespace{RelAlgOptimizer.cpp}::mark_live_columns (std::vector< std::shared_ptr< RelAlgNode >> &nodes)
 
std::string anonymous_namespace{RelAlgOptimizer.cpp}::get_field_name (const RelAlgNode *node, size_t index)
 
void anonymous_namespace{RelAlgOptimizer.cpp}::try_insert_coalesceable_proj (std::vector< std::shared_ptr< RelAlgNode >> &nodes, std::unordered_map< const RelAlgNode *, std::unordered_set< size_t >> &liveouts, std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * >> &du_web)
 
std::pair< std::unordered_map
< const RelAlgNode
*, std::unordered_map< size_t,
size_t > >, std::vector< const
RelAlgNode * > > 
anonymous_namespace{RelAlgOptimizer.cpp}::sweep_dead_columns (const std::unordered_map< const RelAlgNode *, std::unordered_set< size_t >> &live_outs, const std::vector< std::shared_ptr< RelAlgNode >> &nodes, const std::unordered_set< const RelAlgNode * > &intact_nodes, const std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * >> &du_web, const std::unordered_map< const RelAlgNode *, size_t > &orig_node_sizes)
 
void anonymous_namespace{RelAlgOptimizer.cpp}::propagate_input_renumbering (std::unordered_map< const RelAlgNode *, std::unordered_map< size_t, size_t >> &liveout_renumbering, const std::vector< const RelAlgNode * > &ready_nodes, const std::unordered_map< const RelAlgNode *, std::unordered_set< size_t >> &old_liveouts, const std::unordered_set< const RelAlgNode * > &intact_nodes, const std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * >> &du_web, const std::unordered_map< const RelAlgNode *, size_t > &orig_node_sizes)
 
void eliminate_dead_columns (std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
 
void eliminate_dead_subqueries (std::vector< std::shared_ptr< RexSubQuery >> &subqueries, RelAlgNode const *root)
 
void sink_projected_boolean_expr_to_join (std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
 
void anonymous_namespace{RelAlgOptimizer.cpp}::replace_all_usages (std::shared_ptr< const RelAlgNode > old_def_node, std::shared_ptr< const RelAlgNode > new_def_node, std::unordered_map< const RelAlgNode *, std::shared_ptr< RelAlgNode >> &deconst_mapping, std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * >> &du_web)
 
void fold_filters (std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
 
std::vector< const RexScalar * > find_hoistable_conditions (const RexScalar *condition, const RelAlgNode *source, const size_t first_col_idx, const size_t last_col_idx)
 
void hoist_filter_cond_to_cross_join (std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
 
void simplify_sort (std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
 

Function Documentation

std::unordered_map<const RelAlgNode*, std::unordered_set<const RelAlgNode*> > build_du_web ( const std::vector< std::shared_ptr< RelAlgNode >> &  nodes)
noexcept

Definition at line 408 of file RelAlgOptimizer.cpp.

References CHECK.

Referenced by eliminate_dead_columns(), eliminate_identical_copy(), fold_filters(), hoist_filter_cond_to_cross_join(), and sink_projected_boolean_expr_to_join().

409  {
410  std::unordered_map<const RelAlgNode*, std::unordered_set<const RelAlgNode*>> web;
411  std::unordered_set<const RelAlgNode*> visited;
412  std::vector<const RelAlgNode*> work_set;
413  for (auto node : nodes) {
414  if (std::dynamic_pointer_cast<RelScan>(node) ||
415  std::dynamic_pointer_cast<RelModify>(node) || visited.count(node.get())) {
416  continue;
417  }
418  work_set.push_back(node.get());
419  while (!work_set.empty()) {
420  auto walker = work_set.back();
421  work_set.pop_back();
422  if (visited.count(walker)) {
423  continue;
424  }
425  CHECK(!web.count(walker));
426  auto it_ok =
427  web.insert(std::make_pair(walker, std::unordered_set<const RelAlgNode*>{}));
428  CHECK(it_ok.second);
429  visited.insert(walker);
430  CHECK(dynamic_cast<const RelJoin*>(walker) ||
431  dynamic_cast<const RelProject*>(walker) ||
432  dynamic_cast<const RelAggregate*>(walker) ||
433  dynamic_cast<const RelFilter*>(walker) ||
434  dynamic_cast<const RelSort*>(walker) ||
435  dynamic_cast<const RelLeftDeepInnerJoin*>(walker) ||
436  dynamic_cast<const RelLogicalValues*>(walker) ||
437  dynamic_cast<const RelTableFunction*>(walker) ||
438  dynamic_cast<const RelLogicalUnion*>(walker));
439  for (size_t i = 0; i < walker->inputCount(); ++i) {
440  auto src = walker->getInput(i);
441  if (dynamic_cast<const RelScan*>(src) || dynamic_cast<const RelModify*>(src)) {
442  continue;
443  }
444  if (web.empty() || !web.count(src)) {
445  web.insert(std::make_pair(src, std::unordered_set<const RelAlgNode*>{}));
446  }
447  web[src].insert(walker);
448  work_set.push_back(src);
449  }
450  }
451  }
452  return web;
453 }
#define CHECK(condition)
Definition: Logger.h:197

+ Here is the caller graph for this function:

void eliminate_dead_columns ( std::vector< std::shared_ptr< RelAlgNode >> &  nodes)
noexcept

Definition at line 1175 of file RelAlgOptimizer.cpp.

References anonymous_namespace{RelAlgOptimizer.cpp}::any_dead_col_in(), build_du_web(), CHECK, anonymous_namespace{RelAlgOptimizer.cpp}::does_redef_cols(), LOG, anonymous_namespace{RelAlgOptimizer.cpp}::mark_live_columns(), anonymous_namespace{RelAlgOptimizer.cpp}::propagate_input_renumbering(), anonymous_namespace{RelAlgOptimizer.cpp}::sweep_dead_columns(), anonymous_namespace{RelAlgOptimizer.cpp}::try_insert_coalesceable_proj(), and logger::WARNING.

Referenced by RelAlgDagBuilder::build().

1175  {
1176  if (nodes.empty()) {
1177  return;
1178  }
1179  auto root = nodes.back().get();
1180  if (!root) {
1181  return;
1182  }
1183  CHECK(!dynamic_cast<const RelScan*>(root) && !dynamic_cast<const RelJoin*>(root));
1184  // Mark
1185  auto old_liveouts = mark_live_columns(nodes);
1186  std::unordered_set<const RelAlgNode*> intact_nodes;
1187  bool has_dead_cols = false;
1188  for (auto live_pair : old_liveouts) {
1189  auto node = live_pair.first;
1190  const auto& outs = live_pair.second;
1191  if (outs.empty()) {
1192  LOG(WARNING) << "RA node with no used column: " << node->toString();
1193  // Ignore empty live_out due to some invalid node
1194  intact_nodes.insert(node);
1195  }
1196  if (any_dead_col_in(node, outs)) {
1197  has_dead_cols = true;
1198  } else {
1199  intact_nodes.insert(node);
1200  }
1201  }
1202  if (!has_dead_cols) {
1203  return;
1204  }
1205  auto web = build_du_web(nodes);
1206  try_insert_coalesceable_proj(nodes, old_liveouts, web);
1207 
1208  for (auto node : nodes) {
1209  if (intact_nodes.count(node.get()) || does_redef_cols(node.get())) {
1210  continue;
1211  }
1212  bool intact = true;
1213  for (size_t i = 0; i < node->inputCount(); ++i) {
1214  auto source = node->getInput(i);
1215  if (!dynamic_cast<const RelScan*>(source) && !intact_nodes.count(source)) {
1216  intact = false;
1217  break;
1218  }
1219  }
1220  if (intact) {
1221  intact_nodes.insert(node.get());
1222  }
1223  }
1224 
1225  std::unordered_map<const RelAlgNode*, size_t> orig_node_sizes;
1226  for (auto node : nodes) {
1227  orig_node_sizes.insert(std::make_pair(node.get(), node->size()));
1228  }
1229  // Sweep
1230  std::unordered_map<const RelAlgNode*, std::unordered_map<size_t, size_t>>
1231  liveout_renumbering;
1232  std::vector<const RelAlgNode*> ready_nodes;
1233  std::tie(liveout_renumbering, ready_nodes) =
1234  sweep_dead_columns(old_liveouts, nodes, intact_nodes, web, orig_node_sizes);
1235  // Propagate
1237  liveout_renumbering, ready_nodes, old_liveouts, intact_nodes, web, orig_node_sizes);
1238 }
std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * > > build_du_web(const std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
#define LOG(tag)
Definition: Logger.h:188
bool does_redef_cols(const RelAlgNode *node)
void propagate_input_renumbering(std::unordered_map< const RelAlgNode *, std::unordered_map< size_t, size_t >> &liveout_renumbering, const std::vector< const RelAlgNode * > &ready_nodes, const std::unordered_map< const RelAlgNode *, std::unordered_set< size_t >> &old_liveouts, const std::unordered_set< const RelAlgNode * > &intact_nodes, const std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * >> &du_web, const std::unordered_map< const RelAlgNode *, size_t > &orig_node_sizes)
std::unordered_map< const RelAlgNode *, std::unordered_set< size_t > > mark_live_columns(std::vector< std::shared_ptr< RelAlgNode >> &nodes)
bool any_dead_col_in(const RelAlgNode *node, const std::unordered_set< size_t > &live_outs)
void try_insert_coalesceable_proj(std::vector< std::shared_ptr< RelAlgNode >> &nodes, std::unordered_map< const RelAlgNode *, std::unordered_set< size_t >> &liveouts, std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * >> &du_web)
#define CHECK(condition)
Definition: Logger.h:197
std::pair< std::unordered_map< const RelAlgNode *, std::unordered_map< size_t, size_t > >, std::vector< const RelAlgNode * > > sweep_dead_columns(const std::unordered_map< const RelAlgNode *, std::unordered_set< size_t >> &live_outs, const std::vector< std::shared_ptr< RelAlgNode >> &nodes, const std::unordered_set< const RelAlgNode * > &intact_nodes, const std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * >> &du_web, const std::unordered_map< const RelAlgNode *, size_t > &orig_node_sizes)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void eliminate_dead_subqueries ( std::vector< std::shared_ptr< RexSubQuery >> &  subqueries,
RelAlgNode const *  root 
)

Definition at line 1240 of file RelAlgOptimizer.cpp.

References RexSubQueryIdCollector::getLiveRexSubQueryIds(), gpu_enabled::upper_bound(), and VLOG.

Referenced by RelAlgDagBuilder::build().

1241  {
1242  if (!subqueries.empty()) {
1243  auto live_ids = RexSubQueryIdCollector::getLiveRexSubQueryIds(root);
1244  auto sort_live_ids_first = [&live_ids](auto& a, auto& b) {
1245  return live_ids.count(a->getId()) && !live_ids.count(b->getId());
1246  };
1247  std::stable_sort(subqueries.begin(), subqueries.end(), sort_live_ids_first);
1248  size_t n_dead_subqueries;
1249  if (live_ids.count(subqueries.front()->getId())) {
1250  auto first_dead_itr = std::upper_bound(subqueries.cbegin(),
1251  subqueries.cend(),
1252  subqueries.front(),
1253  sort_live_ids_first);
1254  n_dead_subqueries = subqueries.cend() - first_dead_itr;
1255  } else {
1256  n_dead_subqueries = subqueries.size();
1257  }
1258  if (n_dead_subqueries) {
1259  VLOG(1) << "Eliminating " << n_dead_subqueries
1260  << (n_dead_subqueries == 1 ? " subquery." : " subqueries.");
1261  subqueries.resize(subqueries.size() - n_dead_subqueries);
1262  subqueries.shrink_to_fit();
1263  }
1264  }
1265 }
DEVICE auto upper_bound(ARGS &&...args)
Definition: gpu_enabled.h:123
static Ids getLiveRexSubQueryIds(RelAlgNode const *)
#define VLOG(n)
Definition: Logger.h:291

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void eliminate_identical_copy ( std::vector< std::shared_ptr< RelAlgNode >> &  nodes)
noexcept

Definition at line 486 of file RelAlgOptimizer.cpp.

References build_du_web(), CHECK, CHECK_EQ, anonymous_namespace{RelAlgOptimizer.cpp}::cleanup_dead_nodes(), anonymous_namespace{RelAlgOptimizer.cpp}::get_visible_projects(), anonymous_namespace{RelAlgOptimizer.cpp}::is_distinct(), anonymous_namespace{RelAlgOptimizer.cpp}::is_identical_copy(), project_separates_sort(), anonymous_namespace{RelAlgOptimizer.cpp}::redirect_inputs_of(), and gpu_enabled::swap().

Referenced by RelAlgDagBuilder::build().

486  {
487  std::unordered_set<std::shared_ptr<const RelAlgNode>> copies;
488  auto sink = nodes.back();
489  for (auto node : nodes) {
490  auto aggregate = std::dynamic_pointer_cast<const RelAggregate>(node);
491  if (!aggregate || aggregate == sink ||
492  !(aggregate->getGroupByCount() == 1 && aggregate->getAggExprsCount() == 0)) {
493  continue;
494  }
495  auto project =
496  std::dynamic_pointer_cast<const RelProject>(aggregate->getAndOwnInput(0));
497  if (project && project->size() == aggregate->size() &&
498  project->getFields() == aggregate->getFields()) {
499  CHECK_EQ(size_t(0), copies.count(aggregate));
500  copies.insert(aggregate);
501  }
502  }
503  for (auto node : nodes) {
504  if (!node->inputCount()) {
505  continue;
506  }
507  auto last_source = node->getAndOwnInput(node->inputCount() - 1);
508  if (!copies.count(last_source)) {
509  continue;
510  }
511  auto aggregate = std::dynamic_pointer_cast<const RelAggregate>(last_source);
512  CHECK(aggregate);
513  if (!std::dynamic_pointer_cast<const RelJoin>(node) || aggregate->size() != 1) {
514  continue;
515  }
516  auto project =
517  std::dynamic_pointer_cast<const RelProject>(aggregate->getAndOwnInput(0));
518  CHECK(project);
519  CHECK_EQ(size_t(1), project->size());
520  if (!is_distinct(size_t(0), project.get())) {
521  continue;
522  }
523  auto new_source = project->getAndOwnInput(0);
524  if (std::dynamic_pointer_cast<const RelSort>(new_source) ||
525  std::dynamic_pointer_cast<const RelScan>(new_source)) {
526  node->replaceInput(last_source, new_source);
527  }
528  }
529  decltype(copies)().swap(copies);
530 
531  auto web = build_du_web(nodes);
532 
533  std::unordered_set<const RelProject*> projects;
534  std::unordered_set<const RelProject*> permutating_projects;
535  auto const visible_projs = get_visible_projects(nodes.back().get());
536  for (auto node_it = nodes.begin(); node_it != nodes.end(); node_it++) {
537  auto node = *node_it;
538  auto project = std::dynamic_pointer_cast<RelProject>(node);
539  auto next_node_it = std::next(node_it);
540  if (project && project->isSimple() &&
541  (!visible_projs.count(project.get()) || !project->isRenaming()) &&
542  is_identical_copy(project.get(), web, projects, permutating_projects) &&
544  project.get(), next_node_it == nodes.end() ? nullptr : next_node_it->get())) {
545  projects.insert(project.get());
546  }
547  }
548 
549  for (auto node : nodes) {
550  redirect_inputs_of(node, projects, permutating_projects, web);
551  }
552 
553  cleanup_dead_nodes(nodes);
554 }
bool is_identical_copy(const RelProject *project, const std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * >> &du_web, const std::unordered_set< const RelProject * > &projects_to_remove, std::unordered_set< const RelProject * > &permutating_projects)
std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * > > build_du_web(const std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
#define CHECK_EQ(x, y)
Definition: Logger.h:205
void redirect_inputs_of(std::shared_ptr< RelAlgNode > node, const std::unordered_set< const RelProject * > &projects, const std::unordered_set< const RelProject * > &permutating_projects, const std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * >> &du_web)
std::unordered_set< const RelProject * > get_visible_projects(const RelAlgNode *root)
bool project_separates_sort(const RelProject *project, const RelAlgNode *next_node)
void cleanup_dead_nodes(std::vector< std::shared_ptr< RelAlgNode >> &nodes)
#define CHECK(condition)
Definition: Logger.h:197
bool is_distinct(const size_t input_idx, const RelAlgNode *node)
DEVICE void swap(ARGS &&...args)
Definition: gpu_enabled.h:114

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::vector<const RexScalar*> find_hoistable_conditions ( const RexScalar condition,
const RelAlgNode source,
const size_t  first_col_idx,
const size_t  last_col_idx 
)

Definition at line 1519 of file RelAlgOptimizer.cpp.

References RexAbstractInput::getIndex(), kAND, and kEQ.

Referenced by hoist_filter_cond_to_cross_join().

1522  {
1523  if (auto rex_op = dynamic_cast<const RexOperator*>(condition)) {
1524  switch (rex_op->getOperator()) {
1525  case kAND: {
1526  std::vector<const RexScalar*> subconditions;
1527  size_t complete_subcond_count = 0;
1528  for (size_t i = 0; i < rex_op->size(); ++i) {
1529  auto conds = find_hoistable_conditions(
1530  rex_op->getOperand(i), source, first_col_idx, last_col_idx);
1531  if (conds.size() == size_t(1)) {
1532  ++complete_subcond_count;
1533  }
1534  subconditions.insert(subconditions.end(), conds.begin(), conds.end());
1535  }
1536  if (complete_subcond_count == rex_op->size()) {
1537  return {rex_op};
1538  } else {
1539  return {subconditions};
1540  }
1541  break;
1542  }
1543  case kEQ: {
1544  const auto lhs_conds = find_hoistable_conditions(
1545  rex_op->getOperand(0), source, first_col_idx, last_col_idx);
1546  const auto rhs_conds = find_hoistable_conditions(
1547  rex_op->getOperand(1), source, first_col_idx, last_col_idx);
1548  const auto lhs_in = lhs_conds.size() == 1
1549  ? dynamic_cast<const RexInput*>(*lhs_conds.begin())
1550  : nullptr;
1551  const auto rhs_in = rhs_conds.size() == 1
1552  ? dynamic_cast<const RexInput*>(*rhs_conds.begin())
1553  : nullptr;
1554  if (lhs_in && rhs_in) {
1555  return {rex_op};
1556  }
1557  return {};
1558  break;
1559  }
1560  default:
1561  break;
1562  }
1563  return {};
1564  }
1565  if (auto rex_in = dynamic_cast<const RexInput*>(condition)) {
1566  if (rex_in->getSourceNode() == source) {
1567  const auto col_idx = rex_in->getIndex();
1568  return {col_idx >= first_col_idx && col_idx <= last_col_idx ? condition : nullptr};
1569  }
1570  return {};
1571  }
1572  return {};
1573 }
std::vector< const RexScalar * > find_hoistable_conditions(const RexScalar *condition, const RelAlgNode *source, const size_t first_col_idx, const size_t last_col_idx)
Definition: sqldefs.h:30
unsigned getIndex() const
Definition: sqldefs.h:37

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void fold_filters ( std::vector< std::shared_ptr< RelAlgNode >> &  nodes)
noexcept

Definition at line 1455 of file RelAlgOptimizer.cpp.

References build_du_web(), CHECK, CHECK_EQ, anonymous_namespace{RelAlgOptimizer.cpp}::cleanup_dead_nodes(), logger::INFO, kAND, kBOOLEAN, LOG, and anonymous_namespace{RelAlgOptimizer.cpp}::replace_all_usages().

Referenced by RelAlgDagBuilder::build().

1455  {
1456  std::unordered_map<const RelAlgNode*, std::shared_ptr<RelAlgNode>> deconst_mapping;
1457  for (auto node : nodes) {
1458  deconst_mapping.insert(std::make_pair(node.get(), node));
1459  }
1460 
1461  auto web = build_du_web(nodes);
1462  for (auto node_it = nodes.rbegin(); node_it != nodes.rend(); ++node_it) {
1463  auto& node = *node_it;
1464  if (auto filter = std::dynamic_pointer_cast<RelFilter>(node)) {
1465  CHECK_EQ(filter->inputCount(), size_t(1));
1466  auto src_filter = dynamic_cast<const RelFilter*>(filter->getInput(0));
1467  if (!src_filter) {
1468  continue;
1469  }
1470  auto siblings_it = web.find(src_filter);
1471  if (siblings_it == web.end() || siblings_it->second.size() != size_t(1)) {
1472  continue;
1473  }
1474  auto src_it = deconst_mapping.find(src_filter);
1475  CHECK(src_it != deconst_mapping.end());
1476  auto folded_filter = std::dynamic_pointer_cast<RelFilter>(src_it->second);
1477  CHECK(folded_filter);
1478  // TODO(miyu) : drop filter w/ only expression valued constant TRUE?
1479  if (auto rex_operator = dynamic_cast<const RexOperator*>(filter->getCondition())) {
1480  LOG(INFO) << "ID=" << filter->getId() << " " << filter->toString()
1481  << " folded into "
1482  << "ID=" << folded_filter->getId() << " " << folded_filter->toString()
1483  << std::endl;
1484  std::vector<std::unique_ptr<const RexScalar>> operands;
1485  operands.emplace_back(folded_filter->getAndReleaseCondition());
1486  auto old_condition = dynamic_cast<const RexOperator*>(operands.back().get());
1487  CHECK(old_condition && old_condition->getType().get_type() == kBOOLEAN);
1488  RexInputRedirector redirector(folded_filter.get(), folded_filter->getInput(0));
1489  operands.push_back(redirector.visit(rex_operator));
1490  auto other_condition = dynamic_cast<const RexOperator*>(operands.back().get());
1491  CHECK(other_condition && other_condition->getType().get_type() == kBOOLEAN);
1492  const bool notnull = old_condition->getType().get_notnull() &&
1493  other_condition->getType().get_notnull();
1494  auto new_condition = std::unique_ptr<const RexScalar>(
1495  new RexOperator(kAND, operands, SQLTypeInfo(kBOOLEAN, notnull)));
1496  folded_filter->setCondition(new_condition);
1497  replace_all_usages(filter, folded_filter, deconst_mapping, web);
1498  deconst_mapping.erase(filter.get());
1499  web.erase(filter.get());
1500  web[filter->getInput(0)].erase(filter.get());
1501  node.reset();
1502  }
1503  }
1504  }
1505 
1506  if (!nodes.empty()) {
1507  auto sink = nodes.back();
1508  for (auto node_it = std::next(nodes.rbegin()); node_it != nodes.rend(); ++node_it) {
1509  if (sink) {
1510  break;
1511  }
1512  sink = *node_it;
1513  }
1514  CHECK(sink);
1515  cleanup_dead_nodes(nodes);
1516  }
1517 }
std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * > > build_du_web(const std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
#define CHECK_EQ(x, y)
Definition: Logger.h:205
#define LOG(tag)
Definition: Logger.h:188
void cleanup_dead_nodes(std::vector< std::shared_ptr< RelAlgNode >> &nodes)
Definition: sqldefs.h:37
#define CHECK(condition)
Definition: Logger.h:197
void replace_all_usages(std::shared_ptr< const RelAlgNode > old_def_node, std::shared_ptr< const RelAlgNode > new_def_node, std::unordered_map< const RelAlgNode *, std::shared_ptr< RelAlgNode >> &deconst_mapping, std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * >> &du_web)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void hoist_filter_cond_to_cross_join ( std::vector< std::shared_ptr< RelAlgNode >> &  nodes)
noexcept

Definition at line 1617 of file RelAlgOptimizer.cpp.

References build_du_web(), CHECK, find_hoistable_conditions(), RelFilter::getCondition(), RelAlgNode::getInput(), INNER, join(), kAND, kBOOLEAN, RelFilter::setCondition(), and RexVisitorBase< T >::visit().

Referenced by RelAlgDagBuilder::build().

1618  {
1619  std::unordered_set<const RelAlgNode*> visited;
1620  auto web = build_du_web(nodes);
1621  for (auto node : nodes) {
1622  if (visited.count(node.get())) {
1623  continue;
1624  }
1625  visited.insert(node.get());
1626  auto join = dynamic_cast<RelJoin*>(node.get());
1627  if (join && join->getJoinType() == JoinType::INNER) {
1628  // Only allow cross join for now.
1629  if (auto literal = dynamic_cast<const RexLiteral*>(join->getCondition())) {
1630  // Assume Calcite always generates an inner join on constant boolean true for
1631  // cross join.
1632  CHECK(literal->getType() == kBOOLEAN && literal->getVal<bool>());
1633  size_t first_col_idx = 0;
1634  const RelFilter* filter = nullptr;
1635  std::vector<const RelJoin*> join_seq{join};
1636  for (const RelJoin* curr_join = join; !filter;) {
1637  auto usrs_it = web.find(curr_join);
1638  CHECK(usrs_it != web.end());
1639  if (usrs_it->second.size() != size_t(1)) {
1640  break;
1641  }
1642  auto only_usr = *usrs_it->second.begin();
1643  if (auto usr_join = dynamic_cast<const RelJoin*>(only_usr)) {
1644  if (join == usr_join->getInput(1)) {
1645  const auto src1_offset = usr_join->getInput(0)->size();
1646  first_col_idx += src1_offset;
1647  }
1648  join_seq.push_back(usr_join);
1649  curr_join = usr_join;
1650  continue;
1651  }
1652 
1653  filter = dynamic_cast<const RelFilter*>(only_usr);
1654  break;
1655  }
1656  if (!filter) {
1657  visited.insert(join_seq.begin(), join_seq.end());
1658  continue;
1659  }
1660  const auto src_join = dynamic_cast<const RelJoin*>(filter->getInput(0));
1661  CHECK(src_join);
1662  auto modified_filter = const_cast<RelFilter*>(filter);
1663 
1664  if (src_join == join) {
1665  std::unique_ptr<const RexScalar> filter_condition(
1666  modified_filter->getAndReleaseCondition());
1667  std::unique_ptr<const RexScalar> true_condition =
1668  boost::make_unique<RexLiteral>(true,
1669  kBOOLEAN,
1670  kBOOLEAN,
1671  unsigned(-2147483648),
1672  1,
1673  unsigned(-2147483648),
1674  1);
1675  modified_filter->setCondition(true_condition);
1676  join->setCondition(filter_condition);
1677  continue;
1678  }
1679  const auto src1_base = src_join->getInput(0)->size();
1680  auto source =
1681  first_col_idx < src1_base ? src_join->getInput(0) : src_join->getInput(1);
1682  first_col_idx =
1683  first_col_idx < src1_base ? first_col_idx : first_col_idx - src1_base;
1684  auto join_conditions =
1686  source,
1687  first_col_idx,
1688  first_col_idx + join->size() - 1);
1689  if (join_conditions.empty()) {
1690  continue;
1691  }
1692 
1693  JoinTargetRebaser rebaser(join, first_col_idx);
1694  if (join_conditions.size() == 1) {
1695  auto new_join_condition = rebaser.visit(*join_conditions.begin());
1696  join->setCondition(new_join_condition);
1697  } else {
1698  std::vector<std::unique_ptr<const RexScalar>> operands;
1699  bool notnull = true;
1700  for (size_t i = 0; i < join_conditions.size(); ++i) {
1701  operands.emplace_back(rebaser.visit(join_conditions[i]));
1702  auto old_subcond = dynamic_cast<const RexOperator*>(join_conditions[i]);
1703  CHECK(old_subcond && old_subcond->getType().get_type() == kBOOLEAN);
1704  notnull = notnull && old_subcond->getType().get_notnull();
1705  }
1706  auto new_join_condition = std::unique_ptr<const RexScalar>(
1707  new RexOperator(kAND, operands, SQLTypeInfo(kBOOLEAN, notnull)));
1708  join->setCondition(new_join_condition);
1709  }
1710 
1711  SubConditionRemover remover(join_conditions);
1712  auto new_filter_condition = remover.visit(filter->getCondition());
1713  modified_filter->setCondition(new_filter_condition);
1714  }
1715  }
1716  }
1717 }
std::vector< const RexScalar * > find_hoistable_conditions(const RexScalar *condition, const RelAlgNode *source, const size_t first_col_idx, const size_t last_col_idx)
std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * > > build_du_web(const std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
void setCondition(std::unique_ptr< const RexScalar > &condition)
const RexScalar * getCondition() const
std::string join(T const &container, std::string const &delim)
const RelAlgNode * getInput(const size_t idx) const
Definition: sqldefs.h:37
#define CHECK(condition)
Definition: Logger.h:197

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool project_separates_sort ( const RelProject project,
const RelAlgNode next_node 
)

Return true if the input project separates two sort nodes, i.e. Sort -> Project -> Sort. This pattern often occurs in machine generated SQL, e.g. SELECT * FROM (SELECT * FROM t LIMIT 10) t0 LIMIT 1; Use this function to prevent optimizing out the intermediate project, as the project is required to ensure the first sort runs to completion prior to the second sort. Back to back sort nodes are not executable and will throw an error.

Definition at line 463 of file RelAlgOptimizer.cpp.

References CHECK, RelAlgNode::getInput(), RelAlgNode::inputCount(), and gpu_enabled::sort().

Referenced by eliminate_identical_copy().

463  {
464  CHECK(project);
465  if (!next_node) {
466  return false;
467  }
468 
469  auto sort = dynamic_cast<const RelSort*>(next_node);
470  if (!sort) {
471  return false;
472  }
473  if (!(project->inputCount() == 1)) {
474  return false;
475  }
476 
477  if (dynamic_cast<const RelSort*>(project->getInput(0))) {
478  return true;
479  }
480  return false;
481 }
DEVICE void sort(ARGS &&...args)
Definition: gpu_enabled.h:105
const RelAlgNode * getInput(const size_t idx) const
#define CHECK(condition)
Definition: Logger.h:197
const size_t inputCount() const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void simplify_sort ( std::vector< std::shared_ptr< RelAlgNode >> &  nodes)
noexcept

Definition at line 1722 of file RelAlgOptimizer.cpp.

References RelAlgNode::replaceInput().

Referenced by RelAlgDagBuilder::build().

1722  {
1723  if (nodes.size() < 3) {
1724  return;
1725  }
1726  for (size_t i = 0; i <= nodes.size() - 3;) {
1727  auto first_sort = std::dynamic_pointer_cast<RelSort>(nodes[i]);
1728  const auto project = std::dynamic_pointer_cast<const RelProject>(nodes[i + 1]);
1729  auto second_sort = std::dynamic_pointer_cast<RelSort>(nodes[i + 2]);
1730  if (first_sort && second_sort && project && project->isIdentity() &&
1731  *first_sort == *second_sort) {
1732  second_sort->replaceInput(second_sort->getAndOwnInput(0),
1733  first_sort->getAndOwnInput(0));
1734  nodes[i].reset();
1735  nodes[i + 1].reset();
1736  i += 3;
1737  } else {
1738  ++i;
1739  }
1740  }
1741 
1742  std::vector<std::shared_ptr<RelAlgNode>> new_nodes;
1743  for (auto node : nodes) {
1744  if (!node) {
1745  continue;
1746  }
1747  new_nodes.push_back(node);
1748  }
1749  nodes.swap(new_nodes);
1750 }
virtual void replaceInput(std::shared_ptr< const RelAlgNode > old_input, std::shared_ptr< const RelAlgNode > input)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void sink_projected_boolean_expr_to_join ( std::vector< std::shared_ptr< RelAlgNode >> &  nodes)
noexcept

Definition at line 1307 of file RelAlgOptimizer.cpp.

References build_du_web(), CHECK, CHECK_EQ, join(), kBOOLEAN, and anonymous_namespace{RelAlgOptimizer.cpp}::mark_live_columns().

Referenced by RelAlgDagBuilder::build().

1308  {
1309  auto web = build_du_web(nodes);
1310  auto liveouts = mark_live_columns(nodes);
1311  for (auto node : nodes) {
1312  auto project = std::dynamic_pointer_cast<RelProject>(node);
1313  // TODO(miyu): relax RelScan limitation
1314  if (!project || project->isSimple() ||
1315  !dynamic_cast<const RelScan*>(project->getInput(0))) {
1316  continue;
1317  }
1318  auto usrs_it = web.find(project.get());
1319  CHECK(usrs_it != web.end());
1320  auto& usrs = usrs_it->second;
1321  if (usrs.size() != 1) {
1322  continue;
1323  }
1324  auto join = dynamic_cast<RelJoin*>(const_cast<RelAlgNode*>(*usrs.begin()));
1325  if (!join) {
1326  continue;
1327  }
1328  auto outs_it = liveouts.find(join);
1329  CHECK(outs_it != liveouts.end());
1330  std::unordered_map<size_t, size_t> in_to_out_index;
1331  std::unordered_set<size_t> boolean_expr_indicies;
1332  bool discarded = false;
1333  for (size_t i = 0; i < project->size(); ++i) {
1334  auto oper = dynamic_cast<const RexOperator*>(project->getProjectAt(i));
1335  if (oper && oper->getType().get_type() == kBOOLEAN) {
1336  boolean_expr_indicies.insert(i);
1337  } else {
1338  // TODO(miyu): relax?
1339  if (auto input = dynamic_cast<const RexInput*>(project->getProjectAt(i))) {
1340  in_to_out_index.insert(std::make_pair(input->getIndex(), i));
1341  } else {
1342  discarded = true;
1343  }
1344  }
1345  }
1346  if (discarded || boolean_expr_indicies.empty()) {
1347  continue;
1348  }
1349  const size_t index_base =
1350  join->getInput(0) == project.get() ? 0 : join->getInput(0)->size();
1351  for (auto i : boolean_expr_indicies) {
1352  auto join_idx = index_base + i;
1353  if (outs_it->second.count(join_idx)) {
1354  discarded = true;
1355  break;
1356  }
1357  }
1358  if (discarded) {
1359  continue;
1360  }
1361  RexInputCollector collector(project.get());
1362  std::vector<size_t> unloaded_input_indices;
1363  std::unordered_map<size_t, std::unique_ptr<const RexScalar>> in_idx_to_new_subcond;
1364  // Given all are dead right after join, safe to sink
1365  for (auto i : boolean_expr_indicies) {
1366  auto rex_ins = collector.visit(project->getProjectAt(i));
1367  for (auto& in : rex_ins) {
1368  CHECK_EQ(in.getSourceNode(), project->getInput(0));
1369  if (!in_to_out_index.count(in.getIndex())) {
1370  auto curr_out_index = project->size() + unloaded_input_indices.size();
1371  in_to_out_index.insert(std::make_pair(in.getIndex(), curr_out_index));
1372  unloaded_input_indices.push_back(in.getIndex());
1373  }
1374  RexInputSinker sinker(in_to_out_index, project.get());
1375  in_idx_to_new_subcond.insert(
1376  std::make_pair(i, sinker.visit(project->getProjectAt(i))));
1377  }
1378  }
1379  if (in_idx_to_new_subcond.empty()) {
1380  continue;
1381  }
1382  std::vector<std::unique_ptr<const RexScalar>> new_projections;
1383  for (size_t i = 0; i < project->size(); ++i) {
1384  if (boolean_expr_indicies.count(i)) {
1385  new_projections.push_back(boost::make_unique<RexInput>(project->getInput(0), 0));
1386  } else {
1387  auto rex_input = dynamic_cast<const RexInput*>(project->getProjectAt(i));
1388  CHECK(rex_input != nullptr);
1389  new_projections.push_back(rex_input->deepCopy());
1390  }
1391  }
1392  for (auto i : unloaded_input_indices) {
1393  new_projections.push_back(boost::make_unique<RexInput>(project->getInput(0), i));
1394  }
1395  project->setExpressions(new_projections);
1396 
1397  SubConditionReplacer replacer(in_idx_to_new_subcond);
1398  auto new_condition = replacer.visit(join->getCondition());
1399  join->setCondition(new_condition);
1400  }
1401 }
std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * > > build_du_web(const std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
#define CHECK_EQ(x, y)
Definition: Logger.h:205
std::string join(T const &container, std::string const &delim)
std::unordered_map< const RelAlgNode *, std::unordered_set< size_t > > mark_live_columns(std::vector< std::shared_ptr< RelAlgNode >> &nodes)
#define CHECK(condition)
Definition: Logger.h:197

+ Here is the call graph for this function:

+ Here is the caller graph for this function: