OmniSciDB  c1a53651b2
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
anonymous_namespace{RelAlgDag.cpp} Namespace Reference

Namespaces

 anonymous_namespace{RelAlgDag.cpp}
 

Classes

class  RexRebindInputsVisitor
 
class  RexRebindReindexInputsVisitor
 
class  PushDownGenericExpressionInWindowFunction
 
class  RANodeIterator
 
class  WindowFunctionCollector
 
class  RexWindowFuncReplacementVisitor
 
class  RexInputCollector
 

Typedefs

using RexInputSet = std::unordered_set< RexInput >
 

Functions

std::vector< RexInputn_outputs (const RelAlgNode *node, const size_t n)
 
bool isRenamedInput (const RelAlgNode *node, const size_t index, const std::string &new_name)
 
std::vector< std::unique_ptr
< const RexAgg > > 
copyAggExprs (std::vector< std::unique_ptr< const RexAgg >> const &agg_exprs)
 
std::vector< std::unique_ptr
< const RexScalar > > 
copyRexScalars (std::vector< std::unique_ptr< const RexScalar >> const &scalar_sources)
 
std::vector< const Rex * > remapTargetPointers (std::vector< std::unique_ptr< const RexAgg >> const &agg_exprs_new, std::vector< std::unique_ptr< const RexScalar >> const &scalar_sources_new, std::vector< std::unique_ptr< const RexAgg >> const &agg_exprs_old, std::vector< std::unique_ptr< const RexScalar >> const &scalar_sources_old, std::vector< const Rex * > const &target_exprs_old)
 
void reset_table_function_inputs (std::vector< const Rex * > &column_inputs, const std::vector< std::unique_ptr< const RexScalar >> &old_table_func_inputs, const std::vector< std::unique_ptr< const RexScalar >> &new_table_func_inputs)
 
std::set< std::pair< const
RelAlgNode *, int > > 
get_equiv_cols (const RelAlgNode *node, const size_t which_col)
 
std::vector< bool > get_notnulls (std::vector< TargetMetaInfo > const &tmis0)
 
bool same_ignoring_notnull (SQLTypeInfo ti0, SQLTypeInfo ti1)
 
void set_notnulls (std::vector< TargetMetaInfo > *tmis0, std::vector< bool > const &notnulls)
 
unsigned node_id (const rapidjson::Value &ra_node) noexcept
 
std::string json_node_to_string (const rapidjson::Value &node) noexcept
 
std::unique_ptr< RexAbstractInputparse_abstract_input (const rapidjson::Value &expr) noexcept
 
std::unique_ptr< RexLiteralparse_literal (const rapidjson::Value &expr)
 
std::unique_ptr< const RexScalarparse_scalar_expr (const rapidjson::Value &expr, RelAlgDag &root_dag)
 
SQLTypeInfo parse_type (const rapidjson::Value &type_obj)
 
std::vector< std::unique_ptr
< const RexScalar > > 
parse_expr_array (const rapidjson::Value &arr, RelAlgDag &root_dag)
 
SqlWindowFunctionKind parse_window_function_kind (const std::string &name)
 
std::vector< std::unique_ptr
< const RexScalar > > 
parse_window_order_exprs (const rapidjson::Value &arr, RelAlgDag &root_dag)
 
SortDirection parse_sort_direction (const rapidjson::Value &collation)
 
NullSortedPosition parse_nulls_position (const rapidjson::Value &collation)
 
std::vector< SortFieldparse_window_order_collation (const rapidjson::Value &arr, RelAlgDag &root_dag)
 
RexWindowFunctionOperator::RexWindowBound parse_window_bound (const rapidjson::Value &window_bound_obj, RelAlgDag &root_dag)
 
std::unique_ptr< const
RexSubQuery
parse_subquery (const rapidjson::Value &expr, RelAlgDag &root_dag)
 
std::unique_ptr< RexOperatorparse_operator (const rapidjson::Value &expr, RelAlgDag &root_dag)
 
std::unique_ptr< RexCaseparse_case (const rapidjson::Value &expr, RelAlgDag &root_dag)
 
std::vector< std::string > strings_from_json_array (const rapidjson::Value &json_str_arr) noexcept
 
std::vector< size_t > indices_from_json_array (const rapidjson::Value &json_idx_arr) noexcept
 
std::unique_ptr< const RexAggparse_aggregate_expr (const rapidjson::Value &expr)
 
JoinType to_join_type (const std::string &join_type_name)
 
std::unique_ptr< const RexScalardisambiguate_rex (const RexScalar *, const RANodeOutput &)
 
std::unique_ptr< const
RexOperator
disambiguate_operator (const RexOperator *rex_operator, const RANodeOutput &ra_output) noexcept
 
std::unique_ptr< const RexCasedisambiguate_case (const RexCase *rex_case, const RANodeOutput &ra_output)
 
void bind_project_to_input (RelProject *project_node, const RANodeOutput &input) noexcept
 
void bind_table_func_to_input (RelTableFunction *table_func_node, const RANodeOutput &input) noexcept
 
void bind_inputs (const std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
 
void handle_query_hint (const std::vector< std::shared_ptr< RelAlgNode >> &nodes, RelAlgDag &rel_alg_dag) noexcept
 
void compute_node_hash (const std::vector< std::shared_ptr< RelAlgNode >> &nodes)
 
void mark_nops (const std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
 
void create_compound (std::vector< std::shared_ptr< RelAlgNode >> &nodes, const std::vector< size_t > &pattern, std::unordered_map< size_t, std::unordered_map< unsigned, RegisteredQueryHint >> &query_hints) noexcept
 
void coalesce_nodes (std::vector< std::shared_ptr< RelAlgNode >> &nodes, const std::vector< const RelAlgNode * > &left_deep_joins, std::unordered_map< size_t, std::unordered_map< unsigned, RegisteredQueryHint >> &query_hints)
 
void propagate_hints_to_new_project (std::shared_ptr< RelProject > prev_node, std::shared_ptr< RelProject > new_node, std::unordered_map< size_t, std::unordered_map< unsigned, RegisteredQueryHint >> &query_hints)
 
void separate_window_function_expressions (std::vector< std::shared_ptr< RelAlgNode >> &nodes, std::unordered_map< size_t, std::unordered_map< unsigned, RegisteredQueryHint >> &query_hints)
 
void add_window_function_pre_project (std::vector< std::shared_ptr< RelAlgNode >> &nodes, const bool always_add_project_if_first_project_is_window_expr, std::unordered_map< size_t, std::unordered_map< unsigned, RegisteredQueryHint >> &query_hints)
 
int64_t get_int_literal_field (const rapidjson::Value &obj, const char field[], const int64_t default_val) noexcept
 
void check_empty_inputs_field (const rapidjson::Value &node) noexcept
 
const std::pair< const
Catalog_Namespace::Catalog
*, const TableDescriptor * > 
getCatalogAndTableFromScanNode (const rapidjson::Value &scan_ra)
 
std::vector< std::string > getFieldNamesFromScanNode (const rapidjson::Value &scan_ra)
 

Variables

const unsigned FIRST_RA_NODE_ID = 1
 

Typedef Documentation

using anonymous_namespace{RelAlgDag.cpp}::RexInputSet = typedef std::unordered_set<RexInput>

Definition at line 2475 of file RelAlgDag.cpp.

Function Documentation

void anonymous_namespace{RelAlgDag.cpp}::add_window_function_pre_project ( std::vector< std::shared_ptr< RelAlgNode >> &  nodes,
const bool  always_add_project_if_first_project_is_window_expr,
std::unordered_map< size_t, std::unordered_map< unsigned, RegisteredQueryHint >> &  query_hints 
)

Inserts a simple project before any project containing a window function node. Forces all window function inputs into a single contiguous buffer for centralized processing (e.g. in distributed mode). This is also needed when a window function node is preceded by a filter node, both for correctness (otherwise a window operator will be coalesced with its preceding filter node and be computer over unfiltered results, and for performance, as currently filter nodes that are not coalesced into projects keep all columns from the table as inputs, and hence bring everything in memory. Once the new project has been created, the inputs in the window function project must be rewritten to read from the new project, and to index off the projected exprs in the new project.

Definition at line 2555 of file RelAlgDag.cpp.

References anonymous_namespace{Utm.h}::a, CHECK, CHECK_EQ, RelAggregate::getGroupByCount(), anonymous_namespace{RelAlgDag.cpp}::anonymous_namespace{RelAlgDag.cpp}::need_pushdown_generic_expr(), propagate_hints_to_new_project(), gpu_enabled::sort(), and VLOG.

Referenced by RelAlgDagBuilder::optimizeDag().

2559  {
2560  std::list<std::shared_ptr<RelAlgNode>> node_list(nodes.begin(), nodes.end());
2561  size_t project_node_counter{0};
2562  for (auto node_itr = node_list.begin(); node_itr != node_list.end(); ++node_itr) {
2563  const auto node = *node_itr;
2564 
2565  auto window_func_project_node = std::dynamic_pointer_cast<RelProject>(node);
2566  if (!window_func_project_node) {
2567  continue;
2568  }
2569  project_node_counter++;
2570  if (!window_func_project_node->hasWindowFunctionExpr()) {
2571  // this projection node does not have a window function
2572  // expression -- skip to the next node in the DAG.
2573  continue;
2574  }
2575 
2576  const auto prev_node_itr = std::prev(node_itr);
2577  const auto prev_node = *prev_node_itr;
2578  CHECK(prev_node);
2579 
2580  auto filter_node = std::dynamic_pointer_cast<RelFilter>(prev_node);
2581  auto join_node = std::dynamic_pointer_cast<RelJoin>(prev_node);
2582 
2583  auto scan_node = std::dynamic_pointer_cast<RelScan>(prev_node);
2584  const bool has_multi_fragment_scan_input =
2585  (scan_node &&
2586  (scan_node->getNumShards() > 0 || scan_node->getNumFragments() > 1));
2587  auto const [has_generic_expr_in_window_func, needs_expr_pushdown] =
2588  need_pushdown_generic_expr(window_func_project_node.get());
2589 
2590  // We currently add a preceding project node in one of two conditions:
2591  // 1. always_add_project_if_first_project_is_window_expr = true, which
2592  // we currently only set for distributed, but could also be set to support
2593  // multi-frag window function inputs, either if we can detect that an input table
2594  // is multi-frag up front, or using a retry mechanism like we do for join filter
2595  // push down.
2596  // TODO(todd): Investigate a viable approach for the above.
2597  // 2. Regardless of #1, if the window function project node is preceded by a
2598  // filter node. This is required both for correctness and to avoid pulling
2599  // all source input columns into memory since non-coalesced filter node
2600  // inputs are currently not pruned or eliminated via dead column elimination.
2601  // Note that we expect any filter node followed by a project node to be coalesced
2602  // into a single compound node in RelAlgDag::coalesce_nodes, and that action
2603  // prunes unused inputs.
2604  // TODO(todd): Investigate whether the shotgun filter node issue affects other
2605  // query plans, i.e. filters before joins, and whether there is a more general
2606  // approach to solving this (will still need the preceding project node for
2607  // window functions preceded by filter nodes for correctness though)
2608  // 3. Similar to the above, when the window function project node is preceded
2609  // by a join node.
2610  // 4. when partition by / order by clauses have a general expression instead of
2611  // referencing column
2612 
2613  if (!((always_add_project_if_first_project_is_window_expr &&
2614  project_node_counter == 1) ||
2615  filter_node || join_node || has_multi_fragment_scan_input ||
2616  needs_expr_pushdown)) {
2617  continue;
2618  }
2619 
2620  if (needs_expr_pushdown || join_node) {
2621  // previous logic cannot cover join_node case well, so use the newly introduced
2622  // push-down expression logic to safely add pre_project node before processing
2623  // window function
2624  std::unordered_map<size_t, size_t> expr_offset_cache;
2625  std::vector<std::unique_ptr<const RexScalar>> scalar_exprs_for_new_project;
2626  std::vector<std::unique_ptr<const RexScalar>> scalar_exprs_for_window_project;
2627  std::vector<std::string> fields_for_window_project;
2628  std::vector<std::string> fields_for_new_project;
2629 
2630  // step 0. create new project node with an empty scalar expr to rebind target exprs
2631  std::vector<std::unique_ptr<const RexScalar>> dummy_scalar_exprs;
2632  std::vector<std::string> dummy_fields;
2633  auto new_project =
2634  std::make_shared<RelProject>(dummy_scalar_exprs, dummy_fields, prev_node);
2635 
2636  // step 1 - 2
2637  PushDownGenericExpressionInWindowFunction visitor(new_project,
2638  scalar_exprs_for_new_project,
2639  fields_for_new_project,
2640  expr_offset_cache);
2641  for (size_t i = 0; i < window_func_project_node->size(); ++i) {
2642  auto projected_target = window_func_project_node->getProjectAt(i);
2643  auto new_projection_target = visitor.visit(projected_target);
2644  scalar_exprs_for_window_project.emplace_back(
2645  std::move(new_projection_target.release()));
2646  }
2647  new_project->setExpressions(scalar_exprs_for_new_project);
2648  new_project->setFields(std::move(fields_for_new_project));
2649  bool has_groupby = false;
2650  auto aggregate = std::dynamic_pointer_cast<RelAggregate>(prev_node);
2651  if (aggregate) {
2652  has_groupby = aggregate->getGroupByCount() > 0;
2653  }
2654  // force rowwise output to prevent computing incorrect query result
2655  if (has_groupby && visitor.hasPartitionExpression()) {
2656  // we currently may compute incorrect result with columnar output when
2657  // 1) the window function has partition expression, and
2658  // 2) a parent node of the window function projection node has group by expression
2659  // todo (yoonmin) : relax this
2660  VLOG(1)
2661  << "Query output overridden to row-wise format due to presence of a window "
2662  "function with partition expression and group-by expression.";
2663  new_project->forceRowwiseOutput();
2664  } else if (has_generic_expr_in_window_func) {
2665  VLOG(1) << "Query output overridden to row-wise format due to presence of a "
2666  "generic expression as an input expression of the window "
2667  "function.";
2668  new_project->forceRowwiseOutput();
2669  } else if (visitor.hasCaseExprAsWindowOperand()) {
2670  VLOG(1)
2671  << "Query output overridden to row-wise format due to presence of a window "
2672  "function with a case statement as its operand.";
2673  new_project->forceRowwiseOutput();
2674  }
2675 
2676  // step 3. finalize
2677  propagate_hints_to_new_project(window_func_project_node, new_project, query_hints);
2678  new_project->setPushedDownWindowExpr();
2679  node_list.insert(node_itr, new_project);
2680  window_func_project_node->replaceInput(prev_node, new_project);
2681  window_func_project_node->setExpressions(scalar_exprs_for_window_project);
2682  } else {
2683  // only push rex_inputs listed in the window function down to a new project node
2684  RexInputSet inputs;
2685  RexInputCollector input_collector;
2686  for (size_t i = 0; i < window_func_project_node->size(); i++) {
2687  auto new_inputs =
2688  input_collector.visit(window_func_project_node->getProjectAt(i));
2689  inputs.insert(new_inputs.begin(), new_inputs.end());
2690  }
2691 
2692  // Note: Technically not required since we are mapping old inputs to new input
2693  // indices, but makes the re-mapping of inputs easier to follow.
2694  std::vector<RexInput> sorted_inputs(inputs.begin(), inputs.end());
2695  std::sort(sorted_inputs.begin(),
2696  sorted_inputs.end(),
2697  [](const auto& a, const auto& b) { return a.getIndex() < b.getIndex(); });
2698 
2699  std::vector<std::unique_ptr<const RexScalar>> scalar_exprs;
2700  std::vector<std::string> fields;
2701  std::unordered_map<unsigned, unsigned> old_index_to_new_index;
2702  for (auto& input : sorted_inputs) {
2703  CHECK_EQ(input.getSourceNode(), prev_node.get());
2704  CHECK(old_index_to_new_index
2705  .insert(std::make_pair(input.getIndex(), scalar_exprs.size()))
2706  .second);
2707  scalar_exprs.emplace_back(input.deepCopy());
2708  fields.emplace_back("");
2709  }
2710 
2711  auto new_project = std::make_shared<RelProject>(scalar_exprs, fields, prev_node);
2712  propagate_hints_to_new_project(window_func_project_node, new_project, query_hints);
2713  new_project->setPushedDownWindowExpr();
2714  node_list.insert(node_itr, new_project);
2715  window_func_project_node->replaceInput(
2716  prev_node, new_project, old_index_to_new_index);
2717  }
2718  }
2719  nodes.assign(node_list.begin(), node_list.end());
2720 }
const size_t getGroupByCount() const
Definition: RelAlgDag.h:1342
#define CHECK_EQ(x, y)
Definition: Logger.h:301
std::pair< bool, bool > need_pushdown_generic_expr(RelProject const *window_func_project_node)
Definition: RelAlgDag.cpp:2517
DEVICE void sort(ARGS &&...args)
Definition: gpu_enabled.h:105
void propagate_hints_to_new_project(std::shared_ptr< RelProject > prev_node, std::shared_ptr< RelProject > new_node, std::unordered_map< size_t, std::unordered_map< unsigned, RegisteredQueryHint >> &query_hints)
Definition: RelAlgDag.cpp:2317
constexpr double a
Definition: Utm.h:32
std::unordered_set< RexInput > RexInputSet
Definition: RelAlgDag.cpp:2475
#define CHECK(condition)
Definition: Logger.h:291
#define VLOG(n)
Definition: Logger.h:387

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void anonymous_namespace{RelAlgDag.cpp}::bind_inputs ( const std::vector< std::shared_ptr< RelAlgNode >> &  nodes)
noexcept

Definition at line 1498 of file RelAlgDag.cpp.

References bind_project_to_input(), bind_table_func_to_input(), CHECK_EQ, disambiguate_rex(), and get_node_output().

Referenced by RelAlgDagBuilder::build().

1498  {
1499  for (auto ra_node : nodes) {
1500  const auto filter_node = std::dynamic_pointer_cast<RelFilter>(ra_node);
1501  if (filter_node) {
1502  CHECK_EQ(size_t(1), filter_node->inputCount());
1503  auto disambiguated_condition = disambiguate_rex(
1504  filter_node->getCondition(), get_node_output(filter_node->getInput(0)));
1505  filter_node->setCondition(disambiguated_condition);
1506  continue;
1507  }
1508  const auto join_node = std::dynamic_pointer_cast<RelJoin>(ra_node);
1509  if (join_node) {
1510  CHECK_EQ(size_t(2), join_node->inputCount());
1511  auto disambiguated_condition =
1512  disambiguate_rex(join_node->getCondition(), get_node_output(join_node.get()));
1513  join_node->setCondition(disambiguated_condition);
1514  continue;
1515  }
1516  const auto project_node = std::dynamic_pointer_cast<RelProject>(ra_node);
1517  if (project_node) {
1518  bind_project_to_input(project_node.get(),
1519  get_node_output(project_node->getInput(0)));
1520  continue;
1521  }
1522  const auto table_func_node = std::dynamic_pointer_cast<RelTableFunction>(ra_node);
1523  if (table_func_node) {
1524  /*
1525  Collect all inputs from table function input (non-literal)
1526  arguments.
1527  */
1528  RANodeOutput input;
1529  input.reserve(table_func_node->inputCount());
1530  for (size_t i = 0; i < table_func_node->inputCount(); i++) {
1531  auto node_output = get_node_output(table_func_node->getInput(i));
1532  input.insert(input.end(), node_output.begin(), node_output.end());
1533  }
1534  bind_table_func_to_input(table_func_node.get(), input);
1535  }
1536  }
1537 }
#define CHECK_EQ(x, y)
Definition: Logger.h:301
std::vector< RexInput > RANodeOutput
Definition: RelAlgDag.h:3066
std::unique_ptr< const RexScalar > disambiguate_rex(const RexScalar *, const RANodeOutput &)
Definition: RelAlgDag.cpp:1444
void bind_project_to_input(RelProject *project_node, const RANodeOutput &input) noexcept
Definition: RelAlgDag.cpp:1470
void bind_table_func_to_input(RelTableFunction *table_func_node, const RANodeOutput &input) noexcept
Definition: RelAlgDag.cpp:1484
RANodeOutput get_node_output(const RelAlgNode *ra_node)
Definition: RelAlgDag.cpp:370

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void anonymous_namespace{RelAlgDag.cpp}::bind_project_to_input ( RelProject project_node,
const RANodeOutput input 
)
noexcept

Definition at line 1470 of file RelAlgDag.cpp.

References CHECK_EQ, and disambiguate_rex().

Referenced by bind_inputs(), and create_compound().

1470  {
1471  CHECK_EQ(size_t(1), project_node->inputCount());
1472  std::vector<std::unique_ptr<const RexScalar>> disambiguated_exprs;
1473  for (size_t i = 0; i < project_node->size(); ++i) {
1474  const auto projected_expr = project_node->getProjectAt(i);
1475  if (dynamic_cast<const RexSubQuery*>(projected_expr)) {
1476  disambiguated_exprs.emplace_back(project_node->getProjectAtAndRelease(i));
1477  } else {
1478  disambiguated_exprs.emplace_back(disambiguate_rex(projected_expr, input));
1479  }
1480  }
1481  project_node->setExpressions(disambiguated_exprs);
1482 }
#define CHECK_EQ(x, y)
Definition: Logger.h:301
size_t size() const override
Definition: RelAlgDag.h:1172
std::unique_ptr< const RexScalar > disambiguate_rex(const RexScalar *, const RANodeOutput &)
Definition: RelAlgDag.cpp:1444
void setExpressions(std::vector< std::unique_ptr< const RexScalar >> &exprs) const
Definition: RelAlgDag.h:1152
const RexScalar * getProjectAtAndRelease(const size_t idx) const
Definition: RelAlgDag.h:1209
const RexScalar * getProjectAt(const size_t idx) const
Definition: RelAlgDag.h:1204
const size_t inputCount() const
Definition: RelAlgDag.h:890

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void anonymous_namespace{RelAlgDag.cpp}::bind_table_func_to_input ( RelTableFunction table_func_node,
const RANodeOutput input 
)
noexcept

Definition at line 1484 of file RelAlgDag.cpp.

References disambiguate_rex().

Referenced by bind_inputs().

1485  {
1486  std::vector<std::unique_ptr<const RexScalar>> disambiguated_exprs;
1487  for (size_t i = 0; i < table_func_node->getTableFuncInputsSize(); ++i) {
1488  const auto target_expr = table_func_node->getTableFuncInputAt(i);
1489  if (dynamic_cast<const RexSubQuery*>(target_expr)) {
1490  disambiguated_exprs.emplace_back(table_func_node->getTableFuncInputAtAndRelease(i));
1491  } else {
1492  disambiguated_exprs.emplace_back(disambiguate_rex(target_expr, input));
1493  }
1494  }
1495  table_func_node->setTableFuncInputs(std::move(disambiguated_exprs));
1496 }
std::unique_ptr< const RexScalar > disambiguate_rex(const RexScalar *, const RANodeOutput &)
Definition: RelAlgDag.cpp:1444
const RexScalar * getTableFuncInputAtAndRelease(const size_t idx)
Definition: RelAlgDag.h:2295
size_t getTableFuncInputsSize() const
Definition: RelAlgDag.h:2284
void setTableFuncInputs(std::vector< std::unique_ptr< const RexScalar >> &&exprs)
Definition: RelAlgDag.cpp:729
const RexScalar * getTableFuncInputAt(const size_t idx) const
Definition: RelAlgDag.h:2290

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void anonymous_namespace{RelAlgDag.cpp}::check_empty_inputs_field ( const rapidjson::Value &  node)
noexcept

Definition at line 2736 of file RelAlgDag.cpp.

References CHECK, and field().

Referenced by details::RelAlgDispatcher::dispatchTableScan().

2736  {
2737  const auto& inputs_json = field(node, "inputs");
2738  CHECK(inputs_json.IsArray() && !inputs_json.Size());
2739 }
const rapidjson::Value & field(const rapidjson::Value &obj, const char field[]) noexcept
Definition: JsonAccessors.h:31
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void anonymous_namespace{RelAlgDag.cpp}::coalesce_nodes ( std::vector< std::shared_ptr< RelAlgNode >> &  nodes,
const std::vector< const RelAlgNode * > &  left_deep_joins,
std::unordered_map< size_t, std::unordered_map< unsigned, RegisteredQueryHint >> &  query_hints 
)

Definition at line 1991 of file RelAlgDag.cpp.

References anonymous_namespace{RelAlgDag.cpp}::RANodeIterator::allVisited(), CHECK, CHECK_GE, create_compound(), anonymous_namespace{RelAlgDag.cpp}::anonymous_namespace{RelAlgDag.cpp}::input_can_be_coalesced(), and gpu_enabled::swap().

Referenced by RelAlgDagBuilder::optimizeDag().

1995  {
1996  enum class CoalesceState { Initial, Filter, FirstProject, Aggregate };
1997  std::vector<size_t> crt_pattern;
1998  CoalesceState crt_state{CoalesceState::Initial};
1999 
2000  auto reset_state = [&crt_pattern, &crt_state]() {
2001  crt_state = CoalesceState::Initial;
2002  std::vector<size_t>().swap(crt_pattern);
2003  };
2004 
2005  for (RANodeIterator nodeIt(nodes); !nodeIt.allVisited();) {
2006  const auto ra_node = nodeIt != nodes.end() ? *nodeIt : nullptr;
2007  switch (crt_state) {
2008  case CoalesceState::Initial: {
2009  if (std::dynamic_pointer_cast<const RelFilter>(ra_node) &&
2010  std::find(left_deep_joins.begin(), left_deep_joins.end(), ra_node.get()) ==
2011  left_deep_joins.end()) {
2012  crt_pattern.push_back(size_t(nodeIt));
2013  crt_state = CoalesceState::Filter;
2014  nodeIt.advance(RANodeIterator::AdvancingMode::DUChain);
2015  } else if (auto project_node =
2016  std::dynamic_pointer_cast<const RelProject>(ra_node)) {
2017  if (project_node->hasWindowFunctionExpr()) {
2018  nodeIt.advance(RANodeIterator::AdvancingMode::InOrder);
2019  } else {
2020  crt_pattern.push_back(size_t(nodeIt));
2021  crt_state = CoalesceState::FirstProject;
2022  nodeIt.advance(RANodeIterator::AdvancingMode::DUChain);
2023  }
2024  } else {
2025  nodeIt.advance(RANodeIterator::AdvancingMode::InOrder);
2026  }
2027  break;
2028  }
2029  case CoalesceState::Filter: {
2030  if (auto project_node = std::dynamic_pointer_cast<const RelProject>(ra_node)) {
2031  // Given we now add preceding projects for all window functions following
2032  // RelFilter nodes, the following should never occur
2033  CHECK(!project_node->hasWindowFunctionExpr());
2034  crt_pattern.push_back(size_t(nodeIt));
2035  crt_state = CoalesceState::FirstProject;
2036  nodeIt.advance(RANodeIterator::AdvancingMode::DUChain);
2037  } else {
2038  reset_state();
2039  }
2040  break;
2041  }
2042  case CoalesceState::FirstProject: {
2043  if (std::dynamic_pointer_cast<const RelAggregate>(ra_node)) {
2044  crt_pattern.push_back(size_t(nodeIt));
2045  crt_state = CoalesceState::Aggregate;
2046  nodeIt.advance(RANodeIterator::AdvancingMode::DUChain);
2047  } else {
2048  if (crt_pattern.size() >= 2) {
2049  create_compound(nodes, crt_pattern, query_hints);
2050  }
2051  reset_state();
2052  }
2053  break;
2054  }
2055  case CoalesceState::Aggregate: {
2056  if (auto project_node = std::dynamic_pointer_cast<const RelProject>(ra_node)) {
2057  if (!project_node->hasWindowFunctionExpr()) {
2058  // TODO(adb): overloading the simple project terminology again here
2059  bool is_simple_project{true};
2060  for (size_t i = 0; i < project_node->size(); i++) {
2061  const auto scalar_rex = project_node->getProjectAt(i);
2062  // If the top level scalar rex is an input node, we can bypass the visitor
2063  if (auto input_rex = dynamic_cast<const RexInput*>(scalar_rex)) {
2065  input_rex->getSourceNode(), input_rex->getIndex(), true)) {
2066  is_simple_project = false;
2067  break;
2068  }
2069  continue;
2070  }
2071  CoalesceSecondaryProjectVisitor visitor;
2072  if (!visitor.visit(project_node->getProjectAt(i))) {
2073  is_simple_project = false;
2074  break;
2075  }
2076  }
2077  if (is_simple_project) {
2078  crt_pattern.push_back(size_t(nodeIt));
2079  nodeIt.advance(RANodeIterator::AdvancingMode::InOrder);
2080  }
2081  }
2082  }
2083  CHECK_GE(crt_pattern.size(), size_t(2));
2084  create_compound(nodes, crt_pattern, query_hints);
2085  reset_state();
2086  break;
2087  }
2088  default:
2089  CHECK(false);
2090  }
2091  }
2092  if (crt_state == CoalesceState::FirstProject || crt_state == CoalesceState::Aggregate) {
2093  if (crt_pattern.size() >= 2) {
2094  create_compound(nodes, crt_pattern, query_hints);
2095  }
2096  CHECK(!crt_pattern.empty());
2097  }
2098 }
bool input_can_be_coalesced(const RelAlgNode *parent_node, const size_t index, const bool first_rex_is_input)
Definition: RelAlgDag.cpp:1891
#define CHECK_GE(x, y)
Definition: Logger.h:306
#define CHECK(condition)
Definition: Logger.h:291
void create_compound(std::vector< std::shared_ptr< RelAlgNode >> &nodes, const std::vector< size_t > &pattern, std::unordered_map< size_t, std::unordered_map< unsigned, RegisteredQueryHint >> &query_hints) noexcept
Definition: RelAlgDag.cpp:1651
DEVICE void swap(ARGS &&...args)
Definition: gpu_enabled.h:114

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void anonymous_namespace{RelAlgDag.cpp}::compute_node_hash ( const std::vector< std::shared_ptr< RelAlgNode >> &  nodes)

Definition at line 1577 of file RelAlgDag.cpp.

References CHECK_NE.

Referenced by RelAlgDagBuilder::optimizeDag().

1577  {
1578  // compute each rel node's hash value in advance to avoid inconsistency of their hash
1579  // values depending on the toHash's caller
1580  // specifically, we manipulate our logical query plan before retrieving query step
1581  // sequence but once we compute a hash value we cached it so there is no way to update
1582  // it after the plan has been changed starting from the top node, we compute the hash
1583  // value (top-down manner)
1584  std::for_each(
1585  nodes.rbegin(), nodes.rend(), [](const std::shared_ptr<RelAlgNode>& node) {
1586  auto node_hash = node->toHash();
1587  CHECK_NE(node_hash, static_cast<size_t>(0));
1588  });
1589 }
#define CHECK_NE(x, y)
Definition: Logger.h:302

+ Here is the caller graph for this function:

std::vector<std::unique_ptr<const RexAgg> > anonymous_namespace{RelAlgDag.cpp}::copyAggExprs ( std::vector< std::unique_ptr< const RexAgg >> const &  agg_exprs)

Definition at line 616 of file RelAlgDag.cpp.

617  {
618  std::vector<std::unique_ptr<const RexAgg>> agg_exprs_copy;
619  agg_exprs_copy.reserve(agg_exprs.size());
620  for (auto const& agg_expr : agg_exprs) {
621  agg_exprs_copy.push_back(agg_expr->deepCopy());
622  }
623  return agg_exprs_copy;
624 }
std::vector<std::unique_ptr<const RexScalar> > anonymous_namespace{RelAlgDag.cpp}::copyRexScalars ( std::vector< std::unique_ptr< const RexScalar >> const &  scalar_sources)

Definition at line 626 of file RelAlgDag.cpp.

References RexVisitorBase< T >::visit().

627  {
628  std::vector<std::unique_ptr<const RexScalar>> scalar_sources_copy;
629  scalar_sources_copy.reserve(scalar_sources.size());
630  RexDeepCopyVisitor copier;
631  for (auto const& scalar_source : scalar_sources) {
632  scalar_sources_copy.push_back(copier.visit(scalar_source.get()));
633  }
634  return scalar_sources_copy;
635 }
virtual T visit(const RexScalar *rex_scalar) const
Definition: RexVisitor.h:27

+ Here is the call graph for this function:

void anonymous_namespace{RelAlgDag.cpp}::create_compound ( std::vector< std::shared_ptr< RelAlgNode >> &  nodes,
const std::vector< size_t > &  pattern,
std::unordered_map< size_t, std::unordered_map< unsigned, RegisteredQueryHint >> &  query_hints 
)
noexcept

Definition at line 1651 of file RelAlgDag.cpp.

References bind_project_to_input(), CHECK, CHECK_EQ, CHECK_GE, CHECK_LE, CHECK_LT, RegisteredQueryHint::defaults(), get_node_output(), RelProject::getFields(), anonymous_namespace{RelAlgExecutor.cpp}::is_agg(), node_id(), anonymous_namespace{RelAlgDag.cpp}::anonymous_namespace{RelAlgDag.cpp}::reproject_targets(), and run_benchmark_import::result.

Referenced by coalesce_nodes().

1655  {
1656  CHECK_GE(pattern.size(), size_t(2));
1657  CHECK_LE(pattern.size(), size_t(4));
1658 
1659  std::unique_ptr<const RexScalar> filter_rex;
1660  std::vector<std::unique_ptr<const RexScalar>> scalar_sources;
1661  size_t groupby_count{0};
1662  std::vector<std::string> fields;
1663  std::vector<const RexAgg*> agg_exprs;
1664  std::vector<const Rex*> target_exprs;
1665  bool first_project{true};
1666  bool is_agg{false};
1667  RelAlgNode* last_node{nullptr};
1668 
1669  std::shared_ptr<ModifyManipulationTarget> manipulation_target;
1670  size_t node_hash{0};
1671  unsigned node_id{0};
1672  bool hint_registered{false};
1673  RegisteredQueryHint registered_query_hint = RegisteredQueryHint::defaults();
1674  for (const auto node_idx : pattern) {
1675  const auto ra_node = nodes[node_idx];
1676  auto registered_query_hint_map_it = query_hints.find(ra_node->toHash());
1677  if (registered_query_hint_map_it != query_hints.end()) {
1678  auto& registered_query_hint_map = registered_query_hint_map_it->second;
1679  auto registered_query_hint_it = registered_query_hint_map.find(ra_node->getId());
1680  if (registered_query_hint_it != registered_query_hint_map.end()) {
1681  hint_registered = true;
1682  node_hash = registered_query_hint_map_it->first;
1683  node_id = registered_query_hint_it->first;
1684  registered_query_hint = registered_query_hint_it->second;
1685  }
1686  }
1687  const auto ra_filter = std::dynamic_pointer_cast<RelFilter>(ra_node);
1688  if (ra_filter) {
1689  CHECK(!filter_rex);
1690  filter_rex.reset(ra_filter->getAndReleaseCondition());
1691  CHECK(filter_rex);
1692  last_node = ra_node.get();
1693  continue;
1694  }
1695  const auto ra_project = std::dynamic_pointer_cast<RelProject>(ra_node);
1696  if (ra_project) {
1697  fields = ra_project->getFields();
1698  manipulation_target = ra_project;
1699 
1700  if (first_project) {
1701  CHECK_EQ(size_t(1), ra_project->inputCount());
1702  // Rebind the input of the project to the input of the filter itself
1703  // since we know that we'll evaluate the filter on the fly, with no
1704  // intermediate buffer.
1705  const auto filter_input = dynamic_cast<const RelFilter*>(ra_project->getInput(0));
1706  if (filter_input) {
1707  CHECK_EQ(size_t(1), filter_input->inputCount());
1708  bind_project_to_input(ra_project.get(),
1709  get_node_output(filter_input->getInput(0)));
1710  }
1711  scalar_sources = ra_project->getExpressionsAndRelease();
1712  for (const auto& scalar_expr : scalar_sources) {
1713  target_exprs.push_back(scalar_expr.get());
1714  }
1715  first_project = false;
1716  } else {
1717  if (ra_project->isSimple()) {
1718  target_exprs = reproject_targets(ra_project.get(), target_exprs);
1719  } else {
1720  // TODO(adb): This is essentially a more general case of simple project, we
1721  // could likely merge the two
1722  std::vector<const Rex*> result;
1723  RexInputReplacementVisitor visitor(last_node, scalar_sources);
1724  for (size_t i = 0; i < ra_project->size(); ++i) {
1725  const auto rex = ra_project->getProjectAt(i);
1726  if (auto rex_input = dynamic_cast<const RexInput*>(rex)) {
1727  const auto index = rex_input->getIndex();
1728  CHECK_LT(index, target_exprs.size());
1729  result.push_back(target_exprs[index]);
1730  } else {
1731  scalar_sources.push_back(visitor.visit(rex));
1732  result.push_back(scalar_sources.back().get());
1733  }
1734  }
1735  target_exprs = result;
1736  }
1737  }
1738  last_node = ra_node.get();
1739  continue;
1740  }
1741  const auto ra_aggregate = std::dynamic_pointer_cast<RelAggregate>(ra_node);
1742  if (ra_aggregate) {
1743  is_agg = true;
1744  fields = ra_aggregate->getFields();
1745  agg_exprs = ra_aggregate->getAggregatesAndRelease();
1746  groupby_count = ra_aggregate->getGroupByCount();
1747  decltype(target_exprs){}.swap(target_exprs);
1748  CHECK_LE(groupby_count, scalar_sources.size());
1749  for (size_t group_idx = 0; group_idx < groupby_count; ++group_idx) {
1750  const auto rex_ref = new RexRef(group_idx + 1);
1751  target_exprs.push_back(rex_ref);
1752  scalar_sources.emplace_back(rex_ref);
1753  }
1754  for (const auto rex_agg : agg_exprs) {
1755  target_exprs.push_back(rex_agg);
1756  }
1757  last_node = ra_node.get();
1758  continue;
1759  }
1760  }
1761 
1762  auto compound_node =
1763  std::make_shared<RelCompound>(filter_rex,
1764  target_exprs,
1765  groupby_count,
1766  agg_exprs,
1767  fields,
1768  scalar_sources,
1769  is_agg,
1770  manipulation_target->isUpdateViaSelect(),
1771  manipulation_target->isDeleteViaSelect(),
1772  manipulation_target->isVarlenUpdateRequired(),
1773  manipulation_target->getModifiedTableDescriptor(),
1774  manipulation_target->getTargetColumns(),
1775  manipulation_target->getModifiedTableCatalog());
1776  auto old_node = nodes[pattern.back()];
1777  nodes[pattern.back()] = compound_node;
1778  auto first_node = nodes[pattern.front()];
1779  CHECK_EQ(size_t(1), first_node->inputCount());
1780  compound_node->addManagedInput(first_node->getAndOwnInput(0));
1781  if (hint_registered) {
1782  // pass the registered hint from the origin node to newly created compound node
1783  // where it is coalesced
1784  auto registered_query_hint_map_it = query_hints.find(node_hash);
1785  CHECK(registered_query_hint_map_it != query_hints.end());
1786  auto registered_query_hint_map = registered_query_hint_map_it->second;
1787  if (registered_query_hint_map.size() > 1) {
1788  registered_query_hint_map.erase(node_id);
1789  } else {
1790  CHECK_EQ(registered_query_hint_map.size(), static_cast<size_t>(1));
1791  query_hints.erase(node_hash);
1792  }
1793  std::unordered_map<unsigned, RegisteredQueryHint> hint_map;
1794  hint_map.emplace(compound_node->getId(), registered_query_hint);
1795  query_hints.emplace(compound_node->toHash(), hint_map);
1796  }
1797  for (size_t i = 0; i < pattern.size() - 1; ++i) {
1798  nodes[pattern[i]].reset();
1799  }
1800  for (auto node : nodes) {
1801  if (!node) {
1802  continue;
1803  }
1804  node->replaceInput(old_node, compound_node);
1805  }
1806 }
bool is_agg(const Analyzer::Expr *expr)
#define CHECK_EQ(x, y)
Definition: Logger.h:301
#define CHECK_GE(x, y)
Definition: Logger.h:306
void bind_project_to_input(RelProject *project_node, const RANodeOutput &input) noexcept
Definition: RelAlgDag.cpp:1470
#define CHECK_LT(x, y)
Definition: Logger.h:303
static RegisteredQueryHint defaults()
Definition: QueryHint.h:329
std::vector< const Rex * > reproject_targets(const RelProject *simple_project, const std::vector< const Rex * > &target_exprs) noexcept
Definition: RelAlgDag.cpp:1608
#define CHECK_LE(x, y)
Definition: Logger.h:304
const std::vector< std::string > & getFields() const
Definition: RelAlgDag.h:1218
#define CHECK(condition)
Definition: Logger.h:291
unsigned node_id(const rapidjson::Value &ra_node) noexcept
Definition: RelAlgDag.cpp:957
RANodeOutput get_node_output(const RelAlgNode *ra_node)
Definition: RelAlgDag.cpp:370

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::unique_ptr<const RexCase> anonymous_namespace{RelAlgDag.cpp}::disambiguate_case ( const RexCase rex_case,
const RANodeOutput ra_output 
)

Definition at line 1423 of file RelAlgDag.cpp.

References RexCase::branchCount(), disambiguate_rex(), RexCase::getElse(), RexCase::getThen(), and RexCase::getWhen().

Referenced by disambiguate_rex().

1424  {
1425  std::vector<
1426  std::pair<std::unique_ptr<const RexScalar>, std::unique_ptr<const RexScalar>>>
1427  disambiguated_expr_pair_list;
1428  for (size_t i = 0; i < rex_case->branchCount(); ++i) {
1429  auto disambiguated_when = disambiguate_rex(rex_case->getWhen(i), ra_output);
1430  auto disambiguated_then = disambiguate_rex(rex_case->getThen(i), ra_output);
1431  disambiguated_expr_pair_list.emplace_back(std::move(disambiguated_when),
1432  std::move(disambiguated_then));
1433  }
1434  std::unique_ptr<const RexScalar> disambiguated_else{
1435  disambiguate_rex(rex_case->getElse(), ra_output)};
1436  return std::unique_ptr<const RexCase>(
1437  new RexCase(disambiguated_expr_pair_list, disambiguated_else));
1438 }
const RexScalar * getThen(const size_t idx) const
Definition: RelAlgDag.h:443
const RexScalar * getElse() const
Definition: RelAlgDag.h:448
const RexScalar * getWhen(const size_t idx) const
Definition: RelAlgDag.h:438
std::unique_ptr< const RexScalar > disambiguate_rex(const RexScalar *, const RANodeOutput &)
Definition: RelAlgDag.cpp:1444
size_t branchCount() const
Definition: RelAlgDag.h:436

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::unique_ptr<const RexOperator> anonymous_namespace{RelAlgDag.cpp}::disambiguate_operator ( const RexOperator rex_operator,
const RANodeOutput ra_output 
)
noexcept

Definition at line 1388 of file RelAlgDag.cpp.

References disambiguate_rex(), and RexWindowFunctionOperator::getPartitionKeys().

Referenced by disambiguate_rex().

1390  {
1391  std::vector<std::unique_ptr<const RexScalar>> disambiguated_operands;
1392  for (size_t i = 0; i < rex_operator->size(); ++i) {
1393  auto operand = rex_operator->getOperand(i);
1394  if (dynamic_cast<const RexSubQuery*>(operand)) {
1395  disambiguated_operands.emplace_back(rex_operator->getOperandAndRelease(i));
1396  } else {
1397  disambiguated_operands.emplace_back(disambiguate_rex(operand, ra_output));
1398  }
1399  }
1400  const auto rex_window_function_operator =
1401  dynamic_cast<const RexWindowFunctionOperator*>(rex_operator);
1402  if (rex_window_function_operator) {
1403  const auto& partition_keys = rex_window_function_operator->getPartitionKeys();
1404  std::vector<std::unique_ptr<const RexScalar>> disambiguated_partition_keys;
1405  for (const auto& partition_key : partition_keys) {
1406  disambiguated_partition_keys.emplace_back(
1407  disambiguate_rex(partition_key.get(), ra_output));
1408  }
1409  std::vector<std::unique_ptr<const RexScalar>> disambiguated_order_keys;
1410  const auto& order_keys = rex_window_function_operator->getOrderKeys();
1411  for (const auto& order_key : order_keys) {
1412  disambiguated_order_keys.emplace_back(disambiguate_rex(order_key.get(), ra_output));
1413  }
1414  return rex_window_function_operator->disambiguatedOperands(
1415  disambiguated_operands,
1416  disambiguated_partition_keys,
1417  disambiguated_order_keys,
1418  rex_window_function_operator->getCollation());
1419  }
1420  return rex_operator->getDisambiguated(disambiguated_operands);
1421 }
size_t size() const
Definition: RelAlgDag.h:270
const RexScalar * getOperand(const size_t idx) const
Definition: RelAlgDag.h:272
const RexScalar * getOperandAndRelease(const size_t idx) const
Definition: RelAlgDag.h:277
virtual std::unique_ptr< const RexOperator > getDisambiguated(std::vector< std::unique_ptr< const RexScalar >> &operands) const
Definition: RelAlgDag.h:265
std::unique_ptr< const RexScalar > disambiguate_rex(const RexScalar *, const RANodeOutput &)
Definition: RelAlgDag.cpp:1444
const ConstRexScalarPtrVector & getPartitionKeys() const
Definition: RelAlgDag.h:627

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::unique_ptr< const RexScalar > anonymous_namespace{RelAlgDag.cpp}::disambiguate_rex ( const RexScalar rex_scalar,
const RANodeOutput ra_output 
)

Definition at line 1444 of file RelAlgDag.cpp.

References CHECK_LT, disambiguate_case(), and disambiguate_operator().

Referenced by bind_inputs(), bind_project_to_input(), bind_table_func_to_input(), disambiguate_case(), and disambiguate_operator().

1445  {
1446  const auto rex_abstract_input = dynamic_cast<const RexAbstractInput*>(rex_scalar);
1447  if (rex_abstract_input) {
1448  CHECK_LT(static_cast<size_t>(rex_abstract_input->getIndex()), ra_output.size());
1449  return std::unique_ptr<const RexInput>(
1450  new RexInput(ra_output[rex_abstract_input->getIndex()]));
1451  }
1452  const auto rex_operator = dynamic_cast<const RexOperator*>(rex_scalar);
1453  if (rex_operator) {
1454  return disambiguate_operator(rex_operator, ra_output);
1455  }
1456  const auto rex_case = dynamic_cast<const RexCase*>(rex_scalar);
1457  if (rex_case) {
1458  return disambiguate_case(rex_case, ra_output);
1459  }
1460  if (auto const rex_literal = dynamic_cast<const RexLiteral*>(rex_scalar)) {
1461  return rex_literal->deepCopy();
1462  } else if (auto const rex_subquery = dynamic_cast<const RexSubQuery*>(rex_scalar)) {
1463  return rex_subquery->deepCopy();
1464  } else {
1465  throw QueryNotSupported("Unable to disambiguate expression of type " +
1466  std::string(typeid(*rex_scalar).name()));
1467  }
1468 }
std::unique_ptr< const RexOperator > disambiguate_operator(const RexOperator *rex_operator, const RANodeOutput &ra_output) noexcept
Definition: RelAlgDag.cpp:1388
std::unique_ptr< const RexCase > disambiguate_case(const RexCase *rex_case, const RANodeOutput &ra_output)
Definition: RelAlgDag.cpp:1423
#define CHECK_LT(x, y)
Definition: Logger.h:303

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::set<std::pair<const RelAlgNode*, int> > anonymous_namespace{RelAlgDag.cpp}::get_equiv_cols ( const RelAlgNode node,
const size_t  which_col 
)

Definition at line 763 of file RelAlgDag.cpp.

References CHECK, and CHECK_EQ.

Referenced by RelSort::hasEquivCollationOf().

764  {
765  std::set<std::pair<const RelAlgNode*, int>> work_set;
766  auto walker = node;
767  auto curr_col = which_col;
768  while (true) {
769  work_set.insert(std::make_pair(walker, curr_col));
770  if (dynamic_cast<const RelScan*>(walker) || dynamic_cast<const RelJoin*>(walker)) {
771  break;
772  }
773  CHECK_EQ(size_t(1), walker->inputCount());
774  auto only_source = walker->getInput(0);
775  if (auto project = dynamic_cast<const RelProject*>(walker)) {
776  if (auto input = dynamic_cast<const RexInput*>(project->getProjectAt(curr_col))) {
777  const auto join_source = dynamic_cast<const RelJoin*>(only_source);
778  if (join_source) {
779  CHECK_EQ(size_t(2), join_source->inputCount());
780  auto lhs = join_source->getInput(0);
781  CHECK((input->getIndex() < lhs->size() && lhs == input->getSourceNode()) ||
782  join_source->getInput(1) == input->getSourceNode());
783  } else {
784  CHECK_EQ(input->getSourceNode(), only_source);
785  }
786  curr_col = input->getIndex();
787  } else {
788  break;
789  }
790  } else if (auto aggregate = dynamic_cast<const RelAggregate*>(walker)) {
791  if (curr_col >= aggregate->getGroupByCount()) {
792  break;
793  }
794  }
795  walker = only_source;
796  }
797  return work_set;
798 }
#define CHECK_EQ(x, y)
Definition: Logger.h:301
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the caller graph for this function:

int64_t anonymous_namespace{RelAlgDag.cpp}::get_int_literal_field ( const rapidjson::Value &  obj,
const char  field[],
const int64_t  default_val 
)
noexcept

Definition at line 2722 of file RelAlgDag.cpp.

References CHECK_EQ, field(), kDECIMAL, and parse_literal().

Referenced by details::RelAlgDispatcher::dispatchSort().

2724  {
2725  const auto it = obj.FindMember(field);
2726  if (it == obj.MemberEnd()) {
2727  return default_val;
2728  }
2729  std::unique_ptr<RexLiteral> lit(parse_literal(it->value));
2730  CHECK_EQ(kDECIMAL, lit->getType());
2731  CHECK_EQ(unsigned(0), lit->getScale());
2732  CHECK_EQ(unsigned(0), lit->getTargetScale());
2733  return lit->getVal<int64_t>();
2734 }
#define CHECK_EQ(x, y)
Definition: Logger.h:301
const rapidjson::Value & field(const rapidjson::Value &obj, const char field[]) noexcept
Definition: JsonAccessors.h:31
std::unique_ptr< RexLiteral > parse_literal(const rapidjson::Value &expr)
Definition: RelAlgDag.cpp:979

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::vector<bool> anonymous_namespace{RelAlgDag.cpp}::get_notnulls ( std::vector< TargetMetaInfo > const &  tmis0)

Definition at line 882 of file RelAlgDag.cpp.

Referenced by RelLogicalUnion::getCompatibleMetainfoTypes().

882  {
883  std::vector<bool> notnulls(tmis0.size());
884  for (size_t j = 0; j < tmis0.size(); ++j) {
885  notnulls[j] = tmis0[j].get_type_info().get_notnull();
886  }
887  return notnulls;
888 }

+ Here is the caller graph for this function:

const std::pair<const Catalog_Namespace::Catalog*, const TableDescriptor*> anonymous_namespace{RelAlgDag.cpp}::getCatalogAndTableFromScanNode ( const rapidjson::Value &  scan_ra)

Definition at line 2742 of file RelAlgDag.cpp.

References cat(), CHECK, CHECK_EQ, field(), Catalog_Namespace::SysCatalog::getCatalog(), and Catalog_Namespace::SysCatalog::instance().

Referenced by details::RelAlgDispatcher::dispatchModify(), and details::RelAlgDispatcher::dispatchTableScan().

2742  {
2743  const auto& table_json = field(scan_ra, "table");
2744  CHECK(table_json.IsArray());
2745  CHECK_EQ(unsigned(2), table_json.Size());
2746  const auto cat =
2747  Catalog_Namespace::SysCatalog::instance().getCatalog(table_json[0].GetString());
2748  CHECK(cat);
2749  const auto td = cat->getMetadataForTable(table_json[1].GetString());
2750  CHECK(td);
2751  return {cat.get(), td};
2752 }
#define CHECK_EQ(x, y)
Definition: Logger.h:301
std::string cat(Ts &&...args)
const rapidjson::Value & field(const rapidjson::Value &obj, const char field[]) noexcept
Definition: JsonAccessors.h:31
static SysCatalog & instance()
Definition: SysCatalog.h:343
std::shared_ptr< Catalog > getCatalog(const std::string &dbName)
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::vector<std::string> anonymous_namespace{RelAlgDag.cpp}::getFieldNamesFromScanNode ( const rapidjson::Value &  scan_ra)

Definition at line 2754 of file RelAlgDag.cpp.

References field(), and strings_from_json_array().

Referenced by details::RelAlgDispatcher::dispatchTableScan().

2754  {
2755  const auto& fields_json = field(scan_ra, "fieldNames");
2756  return strings_from_json_array(fields_json);
2757 }
const rapidjson::Value & field(const rapidjson::Value &obj, const char field[]) noexcept
Definition: JsonAccessors.h:31
std::vector< std::string > strings_from_json_array(const rapidjson::Value &json_str_arr) noexcept
Definition: RelAlgDag.cpp:1305

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void anonymous_namespace{RelAlgDag.cpp}::handle_query_hint ( const std::vector< std::shared_ptr< RelAlgNode >> &  nodes,
RelAlgDag rel_alg_dag 
)
noexcept

Definition at line 1539 of file RelAlgDag.cpp.

References RelProject::getDeliveredHints(), RelAggregate::getDeliveredHints(), and RelCompound::getDeliveredHints().

Referenced by RelAlgDagBuilder::optimizeDag().

1540  {
1541  // query hint is delivered by the above three nodes
1542  // when a query block has top-sort node, a hint is registered to
1543  // one of the node which locates at the nearest from the sort node
1544  RegisteredQueryHint global_query_hint;
1545  for (auto node : nodes) {
1546  Hints* hint_delivered = nullptr;
1547  const auto agg_node = std::dynamic_pointer_cast<RelAggregate>(node);
1548  if (agg_node) {
1549  if (agg_node->hasDeliveredHint()) {
1550  hint_delivered = agg_node->getDeliveredHints();
1551  }
1552  }
1553  const auto project_node = std::dynamic_pointer_cast<RelProject>(node);
1554  if (project_node) {
1555  if (project_node->hasDeliveredHint()) {
1556  hint_delivered = project_node->getDeliveredHints();
1557  }
1558  }
1559  const auto compound_node = std::dynamic_pointer_cast<RelCompound>(node);
1560  if (compound_node) {
1561  if (compound_node->hasDeliveredHint()) {
1562  hint_delivered = compound_node->getDeliveredHints();
1563  }
1564  }
1565  if (hint_delivered && !hint_delivered->empty()) {
1566  rel_alg_dag.registerQueryHints(node, hint_delivered, global_query_hint);
1567  }
1568  }
1569  // the current rel_alg_dag may contain global query hints from the subquery
1570  // so we combine the current global hint we collected with the original one together
1571  // to propagate global query hints correctly
1572  const auto existing_global_query_hints = rel_alg_dag.getGlobalHints();
1573  const auto new_global_query_hints = existing_global_query_hints || global_query_hint;
1574  rel_alg_dag.setGlobalQueryHints(new_global_query_hints);
1575 }
void setGlobalQueryHints(const RegisteredQueryHint &global_hints)
Definition: RelAlgDag.h:2974
Hints * getDeliveredHints()
Definition: RelAlgDag.h:1292
const RegisteredQueryHint & getGlobalHints() const
Definition: RelAlgDag.h:2972
Hints * getDeliveredHints()
Definition: RelAlgDag.h:1439
Hints * getDeliveredHints()
Definition: RelAlgDag.h:1911
std::unordered_map< QueryHint, ExplainedQueryHint > Hints
Definition: QueryHint.h:355
void registerQueryHints(std::shared_ptr< RelAlgNode > node, Hints *hints_delivered, RegisteredQueryHint &global_query_hint)
Definition: RelAlgDag.h:2542

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::vector<size_t> anonymous_namespace{RelAlgDag.cpp}::indices_from_json_array ( const rapidjson::Value &  json_idx_arr)
noexcept

Definition at line 1317 of file RelAlgDag.cpp.

References CHECK, and CHECK_GE.

Referenced by details::RelAlgDispatcher::dispatchAggregate(), and parse_aggregate_expr().

1318  {
1319  CHECK(json_idx_arr.IsArray());
1320  std::vector<size_t> indices;
1321  for (auto json_idx_arr_it = json_idx_arr.Begin(); json_idx_arr_it != json_idx_arr.End();
1322  ++json_idx_arr_it) {
1323  CHECK(json_idx_arr_it->IsInt());
1324  CHECK_GE(json_idx_arr_it->GetInt(), 0);
1325  indices.emplace_back(json_idx_arr_it->GetInt());
1326  }
1327  return indices;
1328 }
#define CHECK_GE(x, y)
Definition: Logger.h:306
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the caller graph for this function:

bool anonymous_namespace{RelAlgDag.cpp}::isRenamedInput ( const RelAlgNode node,
const size_t  index,
const std::string &  new_name 
)

Definition at line 469 of file RelAlgDag.cpp.

References CHECK, CHECK_EQ, CHECK_GE, CHECK_LT, RelAlgNode::getInput(), join(), and RelAlgNode::size().

Referenced by RelProject::isRenaming().

471  {
472  CHECK_LT(index, node->size());
473  if (auto join = dynamic_cast<const RelJoin*>(node)) {
474  CHECK_EQ(size_t(2), join->inputCount());
475  const auto lhs_size = join->getInput(0)->size();
476  if (index < lhs_size) {
477  return isRenamedInput(join->getInput(0), index, new_name);
478  }
479  CHECK_GE(index, lhs_size);
480  return isRenamedInput(join->getInput(1), index - lhs_size, new_name);
481  }
482 
483  if (auto scan = dynamic_cast<const RelScan*>(node)) {
484  return new_name != scan->getFieldName(index);
485  }
486 
487  if (auto aggregate = dynamic_cast<const RelAggregate*>(node)) {
488  return new_name != aggregate->getFieldName(index);
489  }
490 
491  if (auto project = dynamic_cast<const RelProject*>(node)) {
492  return new_name != project->getFieldName(index);
493  }
494 
495  if (auto table_func = dynamic_cast<const RelTableFunction*>(node)) {
496  return new_name != table_func->getFieldName(index);
497  }
498 
499  if (auto logical_values = dynamic_cast<const RelLogicalValues*>(node)) {
500  const auto& tuple_type = logical_values->getTupleType();
501  CHECK_LT(index, tuple_type.size());
502  return new_name != tuple_type[index].get_resname();
503  }
504 
505  CHECK(dynamic_cast<const RelSort*>(node) || dynamic_cast<const RelFilter*>(node) ||
506  dynamic_cast<const RelLogicalUnion*>(node));
507  return isRenamedInput(node->getInput(0), index, new_name);
508 }
#define CHECK_EQ(x, y)
Definition: Logger.h:301
std::string join(T const &container, std::string const &delim)
#define CHECK_GE(x, y)
Definition: Logger.h:306
bool isRenamedInput(const RelAlgNode *node, const size_t index, const std::string &new_name)
Definition: RelAlgDag.cpp:469
const RelAlgNode * getInput(const size_t idx) const
Definition: RelAlgDag.h:892
#define CHECK_LT(x, y)
Definition: Logger.h:303
virtual size_t size() const =0
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::string anonymous_namespace{RelAlgDag.cpp}::json_node_to_string ( const rapidjson::Value &  node)
noexcept

Definition at line 962 of file RelAlgDag.cpp.

Referenced by details::RelAlgDispatcher::dispatchModify(), parse_scalar_expr(), and parse_type().

962  {
963  rapidjson::StringBuffer buffer;
964  rapidjson::Writer<rapidjson::StringBuffer> writer(buffer);
965  node.Accept(writer);
966  return buffer.GetString();
967 }

+ Here is the caller graph for this function:

void anonymous_namespace{RelAlgDag.cpp}::mark_nops ( const std::vector< std::shared_ptr< RelAlgNode >> &  nodes)
noexcept

Definition at line 1591 of file RelAlgDag.cpp.

References CHECK_EQ, and RelAlgNode::markAsNop().

Referenced by RelAlgDagBuilder::optimizeDag().

1591  {
1592  for (auto node : nodes) {
1593  const auto agg_node = std::dynamic_pointer_cast<RelAggregate>(node);
1594  if (!agg_node || agg_node->getAggExprsCount()) {
1595  continue;
1596  }
1597  CHECK_EQ(size_t(1), node->inputCount());
1598  const auto agg_input_node = dynamic_cast<const RelAggregate*>(node->getInput(0));
1599  if (agg_input_node && !agg_input_node->getAggExprsCount() &&
1600  agg_node->getGroupByCount() == agg_input_node->getGroupByCount()) {
1601  agg_node->markAsNop();
1602  }
1603  }
1604 }
#define CHECK_EQ(x, y)
Definition: Logger.h:301
void markAsNop()
Definition: RelAlgDag.h:932

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::vector<RexInput> anonymous_namespace{RelAlgDag.cpp}::n_outputs ( const RelAlgNode node,
const size_t  n 
)

Definition at line 95 of file RelAlgDag.cpp.

References anonymous_namespace{Utm.h}::n.

Referenced by get_node_output().

95  {
96  std::vector<RexInput> outputs;
97  outputs.reserve(n);
98  for (size_t i = 0; i < n; ++i) {
99  outputs.emplace_back(node, i);
100  }
101  return outputs;
102 }
constexpr double n
Definition: Utm.h:38

+ Here is the caller graph for this function:

unsigned anonymous_namespace{RelAlgDag.cpp}::node_id ( const rapidjson::Value &  ra_node)
noexcept

Definition at line 957 of file RelAlgDag.cpp.

References field(), and json_str().

Referenced by create_compound(), details::RelAlgDispatcher::dispatchFilter(), RaExecutionSequence::extractQueryStepSkippingInfo(), QueryPlanDagExtractor::handleTranslatedJoin(), details::RelAlgDispatcher::prev(), and details::RelAlgDispatcher::run().

957  {
958  const auto& id = field(ra_node, "id");
959  return std::stoi(json_str(id));
960 }
const std::string json_str(const rapidjson::Value &obj) noexcept
Definition: JsonAccessors.h:44
const rapidjson::Value & field(const rapidjson::Value &obj, const char field[]) noexcept
Definition: JsonAccessors.h:31

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::unique_ptr<RexAbstractInput> anonymous_namespace{RelAlgDag.cpp}::parse_abstract_input ( const rapidjson::Value &  expr)
noexcept

Definition at line 973 of file RelAlgDag.cpp.

References field(), and json_i64().

Referenced by parse_scalar_expr().

974  {
975  const auto& input = field(expr, "input");
976  return std::unique_ptr<RexAbstractInput>(new RexAbstractInput(json_i64(input)));
977 }
const rapidjson::Value & field(const rapidjson::Value &obj, const char field[]) noexcept
Definition: JsonAccessors.h:31
const int64_t json_i64(const rapidjson::Value &obj) noexcept
Definition: JsonAccessors.h:39

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::unique_ptr<const RexAgg> anonymous_namespace{RelAlgDag.cpp}::parse_aggregate_expr ( const rapidjson::Value &  expr)

Definition at line 1330 of file RelAlgDag.cpp.

References field(), indices_from_json_array(), logger::INFO, json_bool(), json_str(), LOG, parse_type(), and to_agg_kind().

Referenced by details::RelAlgDispatcher::dispatchAggregate().

1330  {
1331  const auto agg_str = json_str(field(expr, "agg"));
1332  if (agg_str == "APPROX_QUANTILE") {
1333  LOG(INFO) << "APPROX_QUANTILE is deprecated. Please use APPROX_PERCENTILE instead.";
1334  }
1335  const auto agg = to_agg_kind(agg_str);
1336  const auto distinct = json_bool(field(expr, "distinct"));
1337  const auto agg_ti = parse_type(field(expr, "type"));
1338  const auto operands = indices_from_json_array(field(expr, "operands"));
1339  bool const allow_multiple_args =
1340  shared::is_any<kAPPROX_COUNT_DISTINCT, kAPPROX_QUANTILE, kSUM_IF>(agg);
1341  if (operands.size() > 1 && (operands.size() != 2 || !allow_multiple_args)) {
1342  throw QueryNotSupported("Multiple arguments for aggregates aren't supported");
1343  }
1344  return std::unique_ptr<const RexAgg>(new RexAgg(agg, distinct, agg_ti, operands));
1345 }
SQLAgg to_agg_kind(const std::string &agg_name)
#define LOG(tag)
Definition: Logger.h:285
const bool json_bool(const rapidjson::Value &obj) noexcept
Definition: JsonAccessors.h:49
const std::string json_str(const rapidjson::Value &obj) noexcept
Definition: JsonAccessors.h:44
std::vector< size_t > indices_from_json_array(const rapidjson::Value &json_idx_arr) noexcept
Definition: RelAlgDag.cpp:1317
const rapidjson::Value & field(const rapidjson::Value &obj, const char field[]) noexcept
Definition: JsonAccessors.h:31
SQLTypeInfo parse_type(const rapidjson::Value &type_obj)
Definition: RelAlgDag.cpp:1064

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::unique_ptr<RexCase> anonymous_namespace{RelAlgDag.cpp}::parse_case ( const rapidjson::Value &  expr,
RelAlgDag root_dag 
)

Definition at line 1285 of file RelAlgDag.cpp.

References CHECK, CHECK_GE, field(), and parse_scalar_expr().

Referenced by parse_scalar_expr().

1285  {
1286  const auto& operands = field(expr, "operands");
1287  CHECK(operands.IsArray());
1288  CHECK_GE(operands.Size(), unsigned(2));
1289  std::unique_ptr<const RexScalar> else_expr;
1290  std::vector<
1291  std::pair<std::unique_ptr<const RexScalar>, std::unique_ptr<const RexScalar>>>
1292  expr_pair_list;
1293  for (auto operands_it = operands.Begin(); operands_it != operands.End();) {
1294  auto when_expr = parse_scalar_expr(*operands_it++, root_dag);
1295  if (operands_it == operands.End()) {
1296  else_expr = std::move(when_expr);
1297  break;
1298  }
1299  auto then_expr = parse_scalar_expr(*operands_it++, root_dag);
1300  expr_pair_list.emplace_back(std::move(when_expr), std::move(then_expr));
1301  }
1302  return std::unique_ptr<RexCase>(new RexCase(expr_pair_list, else_expr));
1303 }
#define CHECK_GE(x, y)
Definition: Logger.h:306
const rapidjson::Value & field(const rapidjson::Value &obj, const char field[]) noexcept
Definition: JsonAccessors.h:31
#define CHECK(condition)
Definition: Logger.h:291
std::unique_ptr< const RexScalar > parse_scalar_expr(const rapidjson::Value &expr, RelAlgDag &root_dag)
Definition: RelAlgDag.cpp:1347

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::vector<std::unique_ptr<const RexScalar> > anonymous_namespace{RelAlgDag.cpp}::parse_expr_array ( const rapidjson::Value &  arr,
RelAlgDag root_dag 
)

Definition at line 1083 of file RelAlgDag.cpp.

References parse_scalar_expr().

Referenced by parse_operator().

1085  {
1086  std::vector<std::unique_ptr<const RexScalar>> exprs;
1087  for (auto it = arr.Begin(); it != arr.End(); ++it) {
1088  exprs.emplace_back(parse_scalar_expr(*it, root_dag));
1089  }
1090  return exprs;
1091 }
std::unique_ptr< const RexScalar > parse_scalar_expr(const rapidjson::Value &expr, RelAlgDag &root_dag)
Definition: RelAlgDag.cpp:1347

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::unique_ptr<RexLiteral> anonymous_namespace{RelAlgDag.cpp}::parse_literal ( const rapidjson::Value &  expr)

Definition at line 979 of file RelAlgDag.cpp.

References CHECK, field(), json_bool(), json_double(), json_i64(), json_str(), kBIGINT, kBOOLEAN, kDATE, kDECIMAL, kDOUBLE, kINT, kINTERVAL_DAY_TIME, kINTERVAL_YEAR_MONTH, kNULLT, kTEXT, kTIME, kTIMESTAMP, to_sql_type(), run_benchmark_import::type, and UNREACHABLE.

Referenced by details::RelAlgDispatcher::dispatchLogicalValues(), get_int_literal_field(), and parse_scalar_expr().

979  {
980  CHECK(expr.IsObject());
981  const auto& literal = field(expr, "literal");
982  const auto type = to_sql_type(json_str(field(expr, "type")));
983  const auto target_type = to_sql_type(json_str(field(expr, "target_type")));
984  const auto scale = json_i64(field(expr, "scale"));
985  const auto precision = json_i64(field(expr, "precision"));
986  const auto type_scale = json_i64(field(expr, "type_scale"));
987  const auto type_precision = json_i64(field(expr, "type_precision"));
988  if (literal.IsNull()) {
989  return std::unique_ptr<RexLiteral>(new RexLiteral(target_type));
990  }
991  switch (type) {
992  case kINT:
993  case kBIGINT:
994  case kDECIMAL:
995  case kINTERVAL_DAY_TIME:
997  case kTIME:
998  case kTIMESTAMP:
999  case kDATE:
1000  return std::unique_ptr<RexLiteral>(new RexLiteral(json_i64(literal),
1001  type,
1002  target_type,
1003  scale,
1004  precision,
1005  type_scale,
1006  type_precision));
1007  case kDOUBLE: {
1008  if (literal.IsDouble()) {
1009  return std::unique_ptr<RexLiteral>(new RexLiteral(json_double(literal),
1010  type,
1011  target_type,
1012  scale,
1013  precision,
1014  type_scale,
1015  type_precision));
1016  } else if (literal.IsInt64()) {
1017  return std::make_unique<RexLiteral>(static_cast<double>(literal.GetInt64()),
1018  type,
1019  target_type,
1020  scale,
1021  precision,
1022  type_scale,
1023  type_precision);
1024 
1025  } else if (literal.IsUint64()) {
1026  return std::make_unique<RexLiteral>(static_cast<double>(literal.GetUint64()),
1027  type,
1028  target_type,
1029  scale,
1030  precision,
1031  type_scale,
1032  type_precision);
1033  }
1034  UNREACHABLE() << "Unhandled type: " << literal.GetType();
1035  }
1036  case kTEXT:
1037  return std::unique_ptr<RexLiteral>(new RexLiteral(json_str(literal),
1038  type,
1039  target_type,
1040  scale,
1041  precision,
1042  type_scale,
1043  type_precision));
1044  case kBOOLEAN:
1045  return std::unique_ptr<RexLiteral>(new RexLiteral(json_bool(literal),
1046  type,
1047  target_type,
1048  scale,
1049  precision,
1050  type_scale,
1051  type_precision));
1052  case kNULLT:
1053  return std::unique_ptr<RexLiteral>(new RexLiteral(target_type));
1054  default:
1055  CHECK(false);
1056  }
1057  CHECK(false);
1058  return nullptr;
1059 }
SQLTypes to_sql_type(const std::string &type_name)
Definition: sqltypes.h:66
const bool json_bool(const rapidjson::Value &obj) noexcept
Definition: JsonAccessors.h:49
const std::string json_str(const rapidjson::Value &obj) noexcept
Definition: JsonAccessors.h:44
#define UNREACHABLE()
Definition: Logger.h:337
const rapidjson::Value & field(const rapidjson::Value &obj, const char field[]) noexcept
Definition: JsonAccessors.h:31
const int64_t json_i64(const rapidjson::Value &obj) noexcept
Definition: JsonAccessors.h:39
const double json_double(const rapidjson::Value &obj) noexcept
Definition: JsonAccessors.h:54
Definition: sqltypes.h:69
Definition: sqltypes.h:70
#define CHECK(condition)
Definition: Logger.h:291
Definition: sqltypes.h:62

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

NullSortedPosition anonymous_namespace{RelAlgDag.cpp}::parse_nulls_position ( const rapidjson::Value &  collation)

Definition at line 1179 of file RelAlgDag.cpp.

References field(), First, json_str(), and Last.

Referenced by details::RelAlgDispatcher::dispatchSort(), and parse_window_order_collation().

1179  {
1180  return json_str(field(collation, "nulls")) == std::string("FIRST")
1183 }
const std::string json_str(const rapidjson::Value &obj) noexcept
Definition: JsonAccessors.h:44
const rapidjson::Value & field(const rapidjson::Value &obj, const char field[]) noexcept
Definition: JsonAccessors.h:31

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::unique_ptr<RexOperator> anonymous_namespace{RelAlgDag.cpp}::parse_operator ( const rapidjson::Value &  expr,
RelAlgDag root_dag 
)

Definition at line 1243 of file RelAlgDag.cpp.

References CHECK, field(), json_bool(), json_str(), kFUNCTION, kIN, gpu_enabled::lower_bound(), parse_expr_array(), parse_subquery(), parse_type(), parse_window_bound(), parse_window_function_kind(), parse_window_order_collation(), parse_window_order_exprs(), to_sql_op(), and gpu_enabled::upper_bound().

Referenced by parse_scalar_expr().

1244  {
1245  const auto op_name = json_str(field(expr, "op"));
1246  const bool is_quantifier =
1247  op_name == std::string("PG_ANY") || op_name == std::string("PG_ALL");
1248  const auto op = is_quantifier ? kFUNCTION : to_sql_op(op_name);
1249  const auto& operators_json_arr = field(expr, "operands");
1250  CHECK(operators_json_arr.IsArray());
1251  auto operands = parse_expr_array(operators_json_arr, root_dag);
1252  const auto type_it = expr.FindMember("type");
1253  CHECK(type_it != expr.MemberEnd());
1254  auto ti = parse_type(type_it->value);
1255  if (op == kIN && expr.HasMember("subquery")) {
1256  auto subquery = parse_subquery(expr, root_dag);
1257  operands.emplace_back(std::move(subquery));
1258  }
1259  if (expr.FindMember("partition_keys") != expr.MemberEnd()) {
1260  const auto& partition_keys_arr = field(expr, "partition_keys");
1261  auto partition_keys = parse_expr_array(partition_keys_arr, root_dag);
1262  const auto& order_keys_arr = field(expr, "order_keys");
1263  auto order_keys = parse_window_order_exprs(order_keys_arr, root_dag);
1264  const auto collation = parse_window_order_collation(order_keys_arr, root_dag);
1265  const auto kind = parse_window_function_kind(op_name);
1266  const auto lower_bound = parse_window_bound(field(expr, "lower_bound"), root_dag);
1267  const auto upper_bound = parse_window_bound(field(expr, "upper_bound"), root_dag);
1268  bool is_rows = json_bool(field(expr, "is_rows"));
1269  ti.set_notnull(false);
1270  return std::make_unique<RexWindowFunctionOperator>(kind,
1271  operands,
1272  partition_keys,
1273  order_keys,
1274  collation,
1275  lower_bound,
1276  upper_bound,
1277  is_rows,
1278  ti);
1279  }
1280  return std::unique_ptr<RexOperator>(op == kFUNCTION
1281  ? new RexFunctionOperator(op_name, operands, ti)
1282  : new RexOperator(op, operands, ti));
1283 }
DEVICE auto upper_bound(ARGS &&...args)
Definition: gpu_enabled.h:123
std::vector< std::unique_ptr< const RexScalar > > parse_window_order_exprs(const rapidjson::Value &arr, RelAlgDag &root_dag)
Definition: RelAlgDag.cpp:1163
SqlWindowFunctionKind parse_window_function_kind(const std::string &name)
Definition: RelAlgDag.cpp:1093
const bool json_bool(const rapidjson::Value &obj) noexcept
Definition: JsonAccessors.h:49
const std::string json_str(const rapidjson::Value &obj) noexcept
Definition: JsonAccessors.h:44
std::unique_ptr< const RexSubQuery > parse_subquery(const rapidjson::Value &expr, RelAlgDag &root_dag)
Definition: RelAlgDag.cpp:1216
const rapidjson::Value & field(const rapidjson::Value &obj, const char field[]) noexcept
Definition: JsonAccessors.h:31
SQLOps to_sql_op(const std::string &op_str)
SQLTypeInfo parse_type(const rapidjson::Value &type_obj)
Definition: RelAlgDag.cpp:1064
std::vector< SortField > parse_window_order_collation(const rapidjson::Value &arr, RelAlgDag &root_dag)
Definition: RelAlgDag.cpp:1185
DEVICE auto lower_bound(ARGS &&...args)
Definition: gpu_enabled.h:78
std::vector< std::unique_ptr< const RexScalar > > parse_expr_array(const rapidjson::Value &arr, RelAlgDag &root_dag)
Definition: RelAlgDag.cpp:1083
Definition: sqldefs.h:52
#define CHECK(condition)
Definition: Logger.h:291
RexWindowFunctionOperator::RexWindowBound parse_window_bound(const rapidjson::Value &window_bound_obj, RelAlgDag &root_dag)
Definition: RelAlgDag.cpp:1197

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::unique_ptr< const RexScalar > anonymous_namespace{RelAlgDag.cpp}::parse_scalar_expr ( const rapidjson::Value &  expr,
RelAlgDag root_dag 
)

Definition at line 1347 of file RelAlgDag.cpp.

References CHECK, field(), json_node_to_string(), json_str(), parse_abstract_input(), parse_case(), parse_literal(), parse_operator(), and parse_subquery().

Referenced by details::RelAlgDispatcher::dispatchFilter(), details::RelAlgDispatcher::dispatchJoin(), details::RelAlgDispatcher::dispatchProject(), details::RelAlgDispatcher::dispatchTableFunction(), parse_case(), parse_expr_array(), parse_window_bound(), and parse_window_order_exprs().

1348  {
1349  CHECK(expr.IsObject());
1350  if (expr.IsObject() && expr.HasMember("input")) {
1351  return std::unique_ptr<const RexScalar>(parse_abstract_input(expr));
1352  }
1353  if (expr.IsObject() && expr.HasMember("literal")) {
1354  return std::unique_ptr<const RexScalar>(parse_literal(expr));
1355  }
1356  if (expr.IsObject() && expr.HasMember("op")) {
1357  const auto op_str = json_str(field(expr, "op"));
1358  if (op_str == std::string("CASE")) {
1359  return std::unique_ptr<const RexScalar>(parse_case(expr, root_dag));
1360  }
1361  if (op_str == std::string("$SCALAR_QUERY")) {
1362  return std::unique_ptr<const RexScalar>(parse_subquery(expr, root_dag));
1363  }
1364  return std::unique_ptr<const RexScalar>(parse_operator(expr, root_dag));
1365  }
1366  throw QueryNotSupported("Expression node " + json_node_to_string(expr) +
1367  " not supported");
1368 }
std::unique_ptr< RexCase > parse_case(const rapidjson::Value &expr, RelAlgDag &root_dag)
Definition: RelAlgDag.cpp:1285
std::string json_node_to_string(const rapidjson::Value &node) noexcept
Definition: RelAlgDag.cpp:962
const std::string json_str(const rapidjson::Value &obj) noexcept
Definition: JsonAccessors.h:44
std::unique_ptr< const RexSubQuery > parse_subquery(const rapidjson::Value &expr, RelAlgDag &root_dag)
Definition: RelAlgDag.cpp:1216
std::unique_ptr< RexAbstractInput > parse_abstract_input(const rapidjson::Value &expr) noexcept
Definition: RelAlgDag.cpp:973
const rapidjson::Value & field(const rapidjson::Value &obj, const char field[]) noexcept
Definition: JsonAccessors.h:31
std::unique_ptr< RexOperator > parse_operator(const rapidjson::Value &expr, RelAlgDag &root_dag)
Definition: RelAlgDag.cpp:1243
std::unique_ptr< RexLiteral > parse_literal(const rapidjson::Value &expr)
Definition: RelAlgDag.cpp:979
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

SortDirection anonymous_namespace{RelAlgDag.cpp}::parse_sort_direction ( const rapidjson::Value &  collation)

Definition at line 1173 of file RelAlgDag.cpp.

References Ascending, Descending, field(), and json_str().

Referenced by details::RelAlgDispatcher::dispatchSort(), and parse_window_order_collation().

1173  {
1174  return json_str(field(collation, "direction")) == std::string("DESCENDING")
1177 }
const std::string json_str(const rapidjson::Value &obj) noexcept
Definition: JsonAccessors.h:44
const rapidjson::Value & field(const rapidjson::Value &obj, const char field[]) noexcept
Definition: JsonAccessors.h:31

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::unique_ptr<const RexSubQuery> anonymous_namespace{RelAlgDag.cpp}::parse_subquery ( const rapidjson::Value &  expr,
RelAlgDag root_dag 
)

Definition at line 1216 of file RelAlgDag.cpp.

References RelAlgDagBuilder::buildDagForSubquery(), CHECK, CHECK_GE, field(), RelAlgDag::getGlobalHints(), RelAlgDag::registerQueryHint(), RelAlgDag::registerSubquery(), and RelAlgDag::setGlobalQueryHints().

Referenced by parse_operator(), and parse_scalar_expr().

1217  {
1218  const auto& operands = field(expr, "operands");
1219  CHECK(operands.IsArray());
1220  CHECK_GE(operands.Size(), unsigned(0));
1221  const auto& subquery_ast = field(expr, "subquery");
1222 
1223  auto subquery_dag = RelAlgDagBuilder::buildDagForSubquery(root_dag, subquery_ast);
1224  const auto subquery_root_node = subquery_dag->getRootNodeShPtr();
1225  auto subquery = std::make_shared<RexSubQuery>(subquery_root_node);
1226  auto query_hint = subquery_dag->getQueryHint(subquery_dag->getRootNodeShPtr().get());
1227  root_dag.registerSubquery(subquery);
1228  const auto subquery_global_hint = subquery_dag->getGlobalHints();
1229  if (subquery_global_hint.isAnyQueryHintDelivered()) {
1230  // we need to propagate global query hint found in this subquery to its parent
1231  const auto new_global_hint = root_dag.getGlobalHints() || subquery_global_hint;
1232  root_dag.setGlobalQueryHints(new_global_hint);
1233  }
1234  const auto subquery_local_hint = subquery_dag->getQueryHint(subquery_root_node.get());
1235  if (subquery_local_hint) {
1236  // register local query hint of this subquery to its parent to correctly
1237  // enables them when executing this subquery
1238  root_dag.registerQueryHint(subquery_root_node.get(), *subquery_local_hint);
1239  }
1240  return subquery->deepCopy();
1241 }
void setGlobalQueryHints(const RegisteredQueryHint &global_hints)
Definition: RelAlgDag.h:2974
#define CHECK_GE(x, y)
Definition: Logger.h:306
void registerQueryHint(const RelAlgNode *node, const RegisteredQueryHint &query_hint)
Definition: RelAlgDag.h:2932
const RegisteredQueryHint & getGlobalHints() const
Definition: RelAlgDag.h:2972
const rapidjson::Value & field(const rapidjson::Value &obj, const char field[]) noexcept
Definition: JsonAccessors.h:31
static std::unique_ptr< RelAlgDag > buildDagForSubquery(RelAlgDag &root_dag, const rapidjson::Value &query_ast)
Definition: RelAlgDag.cpp:3257
void registerSubquery(std::shared_ptr< RexSubQuery > subquery)
Definition: RelAlgDag.h:2530
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

SQLTypeInfo anonymous_namespace{RelAlgDag.cpp}::parse_type ( const rapidjson::Value &  type_obj)

Definition at line 1064 of file RelAlgDag.cpp.

References CHECK, field(), json_bool(), json_i64(), json_node_to_string(), json_str(), SQLTypeInfo::set_precision(), SQLTypeInfo::set_scale(), to_sql_type(), and run_benchmark_import::type.

Referenced by details::RelAlgDispatcher::dispatchLogicalValues(), parse_aggregate_expr(), and parse_operator().

1064  {
1065  if (type_obj.IsArray()) {
1066  throw QueryNotSupported("Composite types are not currently supported.");
1067  }
1068  CHECK(type_obj.IsObject() && type_obj.MemberCount() >= 2)
1069  << json_node_to_string(type_obj);
1070  const auto type = to_sql_type(json_str(field(type_obj, "type")));
1071  const auto nullable = json_bool(field(type_obj, "nullable"));
1072  const auto precision_it = type_obj.FindMember("precision");
1073  const int precision =
1074  precision_it != type_obj.MemberEnd() ? json_i64(precision_it->value) : 0;
1075  const auto scale_it = type_obj.FindMember("scale");
1076  const int scale = scale_it != type_obj.MemberEnd() ? json_i64(scale_it->value) : 0;
1077  SQLTypeInfo ti(type, !nullable);
1078  ti.set_precision(precision);
1079  ti.set_scale(scale);
1080  return ti;
1081 }
SQLTypes to_sql_type(const std::string &type_name)
const bool json_bool(const rapidjson::Value &obj) noexcept
Definition: JsonAccessors.h:49
std::string json_node_to_string(const rapidjson::Value &node) noexcept
Definition: RelAlgDag.cpp:962
const std::string json_str(const rapidjson::Value &obj) noexcept
Definition: JsonAccessors.h:44
const rapidjson::Value & field(const rapidjson::Value &obj, const char field[]) noexcept
Definition: JsonAccessors.h:31
const int64_t json_i64(const rapidjson::Value &obj) noexcept
Definition: JsonAccessors.h:39
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

RexWindowFunctionOperator::RexWindowBound anonymous_namespace{RelAlgDag.cpp}::parse_window_bound ( const rapidjson::Value &  window_bound_obj,
RelAlgDag root_dag 
)

Definition at line 1197 of file RelAlgDag.cpp.

References CHECK, field(), json_bool(), json_i64(), parse_scalar_expr(), and RexWindowFunctionOperator::RexWindowBound::unbounded.

Referenced by parse_operator().

1199  {
1200  CHECK(window_bound_obj.IsObject());
1202  window_bound.unbounded = json_bool(field(window_bound_obj, "unbounded"));
1203  window_bound.preceding = json_bool(field(window_bound_obj, "preceding"));
1204  window_bound.following = json_bool(field(window_bound_obj, "following"));
1205  window_bound.is_current_row = json_bool(field(window_bound_obj, "is_current_row"));
1206  const auto& offset_field = field(window_bound_obj, "offset");
1207  if (offset_field.IsObject()) {
1208  window_bound.bound_expr = parse_scalar_expr(offset_field, root_dag);
1209  } else {
1210  CHECK(offset_field.IsNull());
1211  }
1212  window_bound.order_key = json_i64(field(window_bound_obj, "order_key"));
1213  return window_bound;
1214 }
const bool json_bool(const rapidjson::Value &obj) noexcept
Definition: JsonAccessors.h:49
const rapidjson::Value & field(const rapidjson::Value &obj, const char field[]) noexcept
Definition: JsonAccessors.h:31
const int64_t json_i64(const rapidjson::Value &obj) noexcept
Definition: JsonAccessors.h:39
#define CHECK(condition)
Definition: Logger.h:291
std::unique_ptr< const RexScalar > parse_scalar_expr(const rapidjson::Value &expr, RelAlgDag &root_dag)
Definition: RelAlgDag.cpp:1347

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

SqlWindowFunctionKind anonymous_namespace{RelAlgDag.cpp}::parse_window_function_kind ( const std::string &  name)

Definition at line 1093 of file RelAlgDag.cpp.

References AVG, COUNT, COUNT_IF, CUME_DIST, DENSE_RANK, FIRST_VALUE, LAG, LAG_IN_FRAME, LAST_VALUE, LEAD, LEAD_IN_FRAME, MAX, MIN, NTH_VALUE, NTH_VALUE_IN_FRAME, NTILE, PERCENT_RANK, RANK, ROW_NUMBER, SUM, SUM_IF, and SUM_INTERNAL.

Referenced by parse_operator().

1093  {
1094  if (name == "ROW_NUMBER") {
1096  }
1097  if (name == "RANK") {
1099  }
1100  if (name == "DENSE_RANK") {
1102  }
1103  if (name == "PERCENT_RANK") {
1105  }
1106  if (name == "CUME_DIST") {
1108  }
1109  if (name == "NTILE") {
1111  }
1112  if (name == "LAG") {
1114  }
1115  if (name == "LAG_IN_FRAME") {
1117  }
1118  if (name == "LEAD") {
1120  }
1121  if (name == "LEAD_IN_FRAME") {
1123  }
1124  if (name == "FIRST_VALUE") {
1126  }
1127  if (name == "LAST_VALUE") {
1129  }
1130  if (name == "NTH_VALUE") {
1132  }
1133  if (name == "NTH_VALUE_IN_FRAME") {
1135  }
1136  if (name == "AVG") {
1138  }
1139  if (name == "MIN") {
1141  }
1142  if (name == "MAX") {
1144  }
1145  if (name == "SUM") {
1147  }
1148  if (name == "COUNT") {
1150  }
1151  if (name == "COUNT_IF") {
1153  }
1154  if (name == "SUM_IF") {
1156  }
1157  if (name == "$SUM0") {
1159  }
1160  throw std::runtime_error("Unsupported window function: " + name);
1161 }
string name
Definition: setup.in.py:72

+ Here is the caller graph for this function:

std::vector<SortField> anonymous_namespace{RelAlgDag.cpp}::parse_window_order_collation ( const rapidjson::Value &  arr,
RelAlgDag root_dag 
)

Definition at line 1185 of file RelAlgDag.cpp.

References parse_nulls_position(), and parse_sort_direction().

Referenced by parse_operator().

1186  {
1187  std::vector<SortField> collation;
1188  size_t field_idx = 0;
1189  for (auto it = arr.Begin(); it != arr.End(); ++it, ++field_idx) {
1190  const auto sort_dir = parse_sort_direction(*it);
1191  const auto null_pos = parse_nulls_position(*it);
1192  collation.emplace_back(field_idx, sort_dir, null_pos);
1193  }
1194  return collation;
1195 }
NullSortedPosition parse_nulls_position(const rapidjson::Value &collation)
Definition: RelAlgDag.cpp:1179
SortDirection parse_sort_direction(const rapidjson::Value &collation)
Definition: RelAlgDag.cpp:1173

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::vector<std::unique_ptr<const RexScalar> > anonymous_namespace{RelAlgDag.cpp}::parse_window_order_exprs ( const rapidjson::Value &  arr,
RelAlgDag root_dag 
)

Definition at line 1163 of file RelAlgDag.cpp.

References field(), and parse_scalar_expr().

Referenced by parse_operator().

1165  {
1166  std::vector<std::unique_ptr<const RexScalar>> exprs;
1167  for (auto it = arr.Begin(); it != arr.End(); ++it) {
1168  exprs.emplace_back(parse_scalar_expr(field(*it, "field"), root_dag));
1169  }
1170  return exprs;
1171 }
const rapidjson::Value & field(const rapidjson::Value &obj, const char field[]) noexcept
Definition: JsonAccessors.h:31
std::unique_ptr< const RexScalar > parse_scalar_expr(const rapidjson::Value &expr, RelAlgDag &root_dag)
Definition: RelAlgDag.cpp:1347

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void anonymous_namespace{RelAlgDag.cpp}::propagate_hints_to_new_project ( std::shared_ptr< RelProject prev_node,
std::shared_ptr< RelProject new_node,
std::unordered_map< size_t, std::unordered_map< unsigned, RegisteredQueryHint >> &  query_hints 
)

Definition at line 2317 of file RelAlgDag.cpp.

References CHECK.

Referenced by add_window_function_pre_project(), and separate_window_function_expressions().

2321  {
2322  auto delivered_hints = prev_node->getDeliveredHints();
2323  bool needs_propagate_hints = !delivered_hints->empty();
2324  if (needs_propagate_hints) {
2325  for (auto& kv : *delivered_hints) {
2326  new_node->addHint(kv.second);
2327  }
2328  auto prev_it = query_hints.find(prev_node->toHash());
2329  // query hint for the prev projection node should be registered
2330  CHECK(prev_it != query_hints.end());
2331  auto prev_hint_it = prev_it->second.find(prev_node->getId());
2332  CHECK(prev_hint_it != prev_it->second.end());
2333  std::unordered_map<unsigned, RegisteredQueryHint> hint_map;
2334  hint_map.emplace(new_node->getId(), prev_hint_it->second);
2335  query_hints.emplace(new_node->toHash(), hint_map);
2336  }
2337 }
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the caller graph for this function:

std::vector<const Rex*> anonymous_namespace{RelAlgDag.cpp}::remapTargetPointers ( std::vector< std::unique_ptr< const RexAgg >> const &  agg_exprs_new,
std::vector< std::unique_ptr< const RexScalar >> const &  scalar_sources_new,
std::vector< std::unique_ptr< const RexAgg >> const &  agg_exprs_old,
std::vector< std::unique_ptr< const RexScalar >> const &  scalar_sources_old,
std::vector< const Rex * > const &  target_exprs_old 
)

Definition at line 637 of file RelAlgDag.cpp.

References CHECK.

642  {
643  std::vector<const Rex*> target_exprs(target_exprs_old);
644  std::unordered_map<const Rex*, const Rex*> old_to_new_target(target_exprs.size());
645  for (size_t i = 0; i < agg_exprs_new.size(); ++i) {
646  old_to_new_target.emplace(agg_exprs_old[i].get(), agg_exprs_new[i].get());
647  }
648  for (size_t i = 0; i < scalar_sources_new.size(); ++i) {
649  old_to_new_target.emplace(scalar_sources_old[i].get(), scalar_sources_new[i].get());
650  }
651  for (auto& target : target_exprs) {
652  auto target_it = old_to_new_target.find(target);
653  CHECK(target_it != old_to_new_target.end());
654  target = target_it->second;
655  }
656  return target_exprs;
657 }
#define CHECK(condition)
Definition: Logger.h:291
void anonymous_namespace{RelAlgDag.cpp}::reset_table_function_inputs ( std::vector< const Rex * > &  column_inputs,
const std::vector< std::unique_ptr< const RexScalar >> &  old_table_func_inputs,
const std::vector< std::unique_ptr< const RexScalar >> &  new_table_func_inputs 
)

Definition at line 710 of file RelAlgDag.cpp.

References CHECK, and CHECK_EQ.

Referenced by RelTableFunction::RelTableFunction(), and RelTableFunction::setTableFuncInputs().

713  {
714  CHECK_EQ(old_table_func_inputs.size(), new_table_func_inputs.size());
715  std::unordered_map<const Rex*, const Rex*> old_to_new_input;
716  for (size_t i = 0; i < old_table_func_inputs.size(); ++i) {
717  old_to_new_input.emplace(old_table_func_inputs[i].get(),
718  new_table_func_inputs[i].get());
719  }
720  for (auto& target : column_inputs) {
721  auto target_it = old_to_new_input.find(target);
722  CHECK(target_it != old_to_new_input.end());
723  target = target_it->second;
724  }
725 }
#define CHECK_EQ(x, y)
Definition: Logger.h:301
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the caller graph for this function:

bool anonymous_namespace{RelAlgDag.cpp}::same_ignoring_notnull ( SQLTypeInfo  ti0,
SQLTypeInfo  ti1 
)

Definition at line 890 of file RelAlgDag.cpp.

References SQLTypeInfo::set_notnull().

Referenced by RelLogicalUnion::getCompatibleMetainfoTypes().

890  {
891  ti0.set_notnull({}); // Actual value doesn't matter
892  ti1.set_notnull({}); // as long as they are the same.
893  return ti0 == ti1;
894 }
void set_notnull(bool n)
Definition: sqltypes.h:500

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void anonymous_namespace{RelAlgDag.cpp}::separate_window_function_expressions ( std::vector< std::shared_ptr< RelAlgNode >> &  nodes,
std::unordered_map< size_t, std::unordered_map< unsigned, RegisteredQueryHint >> &  query_hints 
)

Detect the presence of window function operators nested inside expressions. Separate the window function operator from the expression, computing the expression as a subsequent step by pushing the expression to a new project node, and replacing the nested window function operator with a RexInput. Also move all input nodes to the newly created project node. Overall, we have the following query plan: from: Window_Project -> Child to: Window_Project -> New_Project -> Child In pseudocode: for each rex in project list: detect nested window function expression if nested window function expression: push the nested window function expression to the new project P create a new RexInput r_i which references the w_i in P and put it to M (M: a map between nested window function expression w_i and r_i) else push it down to the new project P create a new RexInput r_i which references the rex in P and put it to M for each rex in the project list: visit the rex and find a chance to replace it (or its operand) by using M

Definition at line 2361 of file RelAlgDag.cpp.

References CHECK, anonymous_namespace{RelAlgDag.cpp}::anonymous_namespace{RelAlgDag.cpp}::is_window_function_operator(), propagate_hints_to_new_project(), and RexVisitorBase< T >::visit().

Referenced by RelAlgDagBuilder::optimizeDag().

2364  {
2365  std::list<std::shared_ptr<RelAlgNode>> node_list(nodes.begin(), nodes.end());
2366  for (auto node_itr = node_list.begin(); node_itr != node_list.end(); ++node_itr) {
2367  const auto node = *node_itr;
2368  auto window_func_project_node = std::dynamic_pointer_cast<RelProject>(node);
2369  if (!window_func_project_node) {
2370  continue;
2371  }
2372 
2373  const auto prev_node_itr = std::prev(node_itr);
2374  const auto prev_node = *prev_node_itr;
2375  CHECK(prev_node);
2376 
2377  // map scalar expression index in the project node to window function ptr
2378  std::unordered_map<size_t, const RexScalar*> collected_window_func;
2379  WindowFunctionCollector collector(collected_window_func, false);
2380  // Iterate the target exprs of the project node and check for window function
2381  // expressions. If an embedded expression exists, collect it
2382  for (size_t i = 0; i < window_func_project_node->size(); i++) {
2383  const auto scalar_rex = window_func_project_node->getProjectAt(i);
2384  if (is_window_function_operator(scalar_rex)) {
2385  // top level window function exprs are fine
2386  continue;
2387  }
2388  collector.visit(scalar_rex);
2389  }
2390 
2391  if (!collected_window_func.empty()) {
2392  // we have a nested window function expression
2393  std::unordered_set<size_t> collected_window_func_hash;
2394  // the current window function needs a set of new rex input which references
2395  // expressions in the newly introduced projection node
2396  std::vector<std::unique_ptr<const RexScalar>> new_rex_input_for_window_func;
2397  // a target projection expression of the newly created projection node
2398  std::vector<std::unique_ptr<const RexScalar>> new_scalar_expr_for_window_project;
2399  // a map between nested window function (hash val) and
2400  // its rex index stored in the `new_rex_input_for_window_func`
2401  std::unordered_map<size_t, size_t> window_func_to_new_rex_input_idx_map;
2402  // a map between RexInput of the current window function projection node (hash val)
2403  // and its corresponding new RexInput which is pushed down to the new projection
2404  // node
2405  std::unordered_map<size_t, std::unique_ptr<const RexInput>>
2406  new_rex_input_from_child_node;
2407  RexDeepCopyVisitor copier;
2408 
2409  std::vector<std::unique_ptr<const RexScalar>> dummy_scalar_exprs;
2410  std::vector<std::string> dummy_fields;
2411  std::vector<std::string> new_project_field_names;
2412  // create a new project node, it will contain window function expressions
2413  auto new_project =
2414  std::make_shared<RelProject>(dummy_scalar_exprs, dummy_fields, prev_node);
2415  // insert this new project node between the current window project node and its
2416  // child node
2417  node_list.insert(node_itr, new_project);
2418 
2419  // retrieve various information to replace expressions in the current window
2420  // function project node w/ considering scalar expressions in the new project node
2421  std::for_each(collected_window_func.begin(),
2422  collected_window_func.end(),
2423  [&new_project_field_names,
2424  &collected_window_func_hash,
2425  &new_rex_input_for_window_func,
2426  &new_scalar_expr_for_window_project,
2427  &copier,
2428  &new_project,
2429  &window_func_to_new_rex_input_idx_map](const auto& kv) {
2430  // compute window function expr's hash, and create a new rex_input
2431  // for it
2432  collected_window_func_hash.insert(kv.first);
2433 
2434  // map an old expression in the window function project node
2435  // to an index of the corresponding new RexInput
2436  const auto rex_idx = new_rex_input_for_window_func.size();
2437  window_func_to_new_rex_input_idx_map.emplace(kv.first, rex_idx);
2438 
2439  // create a new RexInput and make it as one of new expression of the
2440  // newly created project node
2441  new_rex_input_for_window_func.emplace_back(
2442  std::make_unique<const RexInput>(new_project.get(), rex_idx));
2443  new_scalar_expr_for_window_project.push_back(
2444  std::move(copier.visit(kv.second)));
2445  new_project_field_names.emplace_back("");
2446  });
2447  new_project->setExpressions(new_scalar_expr_for_window_project);
2448  new_project->setFields(std::move(new_project_field_names));
2449 
2450  auto window_func_scalar_exprs =
2451  window_func_project_node->getExpressionsAndRelease();
2452  RexWindowFuncReplacementVisitor replacer(collected_window_func_hash,
2453  new_rex_input_for_window_func,
2454  window_func_to_new_rex_input_idx_map,
2455  new_project.get(),
2456  new_rex_input_from_child_node);
2457  size_t rex_idx = 0;
2458  for (auto& scalar_expr : window_func_scalar_exprs) {
2459  // try to replace the old expressions in the window function project node
2460  // with expressions of the newly created project node
2461  auto new_parent_rex = replacer.visit(scalar_expr.get());
2462  window_func_scalar_exprs[rex_idx] = std::move(new_parent_rex);
2463  rex_idx++;
2464  }
2465  // Update the previous window project node
2466  window_func_project_node->setExpressions(window_func_scalar_exprs);
2467  window_func_project_node->replaceInput(prev_node, new_project);
2468  propagate_hints_to_new_project(window_func_project_node, new_project, query_hints);
2469  new_project->setPushedDownWindowExpr();
2470  }
2471  }
2472  nodes.assign(node_list.begin(), node_list.end());
2473 }
void propagate_hints_to_new_project(std::shared_ptr< RelProject > prev_node, std::shared_ptr< RelProject > new_node, std::unordered_map< size_t, std::unordered_map< unsigned, RegisteredQueryHint >> &query_hints)
Definition: RelAlgDag.cpp:2317
virtual T visit(const RexScalar *rex_scalar) const
Definition: RexVisitor.h:27
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void anonymous_namespace{RelAlgDag.cpp}::set_notnulls ( std::vector< TargetMetaInfo > *  tmis0,
std::vector< bool > const &  notnulls 
)

Definition at line 896 of file RelAlgDag.cpp.

References SQLTypeInfo::set_notnull().

Referenced by RelLogicalUnion::getCompatibleMetainfoTypes().

896  {
897  for (size_t j = 0; j < tmis0->size(); ++j) {
898  SQLTypeInfo ti = (*tmis0)[j].get_type_info();
899  SQLTypeInfo physical_ti = (*tmis0)[j].get_physical_type_info();
900  ti.set_notnull(notnulls[j]);
901  physical_ti.set_notnull(notnulls[j]);
902  (*tmis0)[j] = TargetMetaInfo((*tmis0)[j].get_resname(), ti, physical_ti);
903  }
904 }
void set_notnull(bool n)
Definition: sqltypes.h:500

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::vector<std::string> anonymous_namespace{RelAlgDag.cpp}::strings_from_json_array ( const rapidjson::Value &  json_str_arr)
noexcept

Definition at line 1305 of file RelAlgDag.cpp.

References CHECK.

Referenced by details::RelAlgDispatcher::dispatchAggregate(), details::RelAlgDispatcher::dispatchProject(), getFieldNamesFromScanNode(), and details::RelAlgDispatcher::getRelAlgInputs().

1306  {
1307  CHECK(json_str_arr.IsArray());
1308  std::vector<std::string> fields;
1309  for (auto json_str_arr_it = json_str_arr.Begin(); json_str_arr_it != json_str_arr.End();
1310  ++json_str_arr_it) {
1311  CHECK(json_str_arr_it->IsString());
1312  fields.emplace_back(json_str_arr_it->GetString());
1313  }
1314  return fields;
1315 }
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the caller graph for this function:

JoinType anonymous_namespace{RelAlgDag.cpp}::to_join_type ( const std::string &  join_type_name)

Definition at line 1370 of file RelAlgDag.cpp.

References ANTI, INNER, LEFT, and SEMI.

Referenced by details::RelAlgDispatcher::dispatchJoin().

1370  {
1371  if (join_type_name == "inner") {
1372  return JoinType::INNER;
1373  }
1374  if (join_type_name == "left") {
1375  return JoinType::LEFT;
1376  }
1377  if (join_type_name == "semi") {
1378  return JoinType::SEMI;
1379  }
1380  if (join_type_name == "anti") {
1381  return JoinType::ANTI;
1382  }
1383  throw QueryNotSupported("Join type (" + join_type_name + ") not supported");
1384 }

+ Here is the caller graph for this function:

Variable Documentation

const unsigned anonymous_namespace{RelAlgDag.cpp}::FIRST_RA_NODE_ID = 1

Definition at line 40 of file RelAlgDag.cpp.

Referenced by RelAlgNode::resetRelAlgFirstId().