OmniSciDB  d2f719934e
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
QueryPlanDagCache.h
Go to the documentation of this file.
1 /*
2  * Copyright 2021 OmniSci, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
19 #include <boost/graph/adjacency_list.hpp>
20 #include <boost/graph/graph_utility.hpp>
21 #include <boost/graph/graphviz.hpp>
22 #include <boost/graph/labeled_graph.hpp>
23 
24 #include <iostream>
25 #include <memory>
26 #include <vector>
27 
28 #include "RelAlgDagBuilder.h"
29 #include "RelAlgExecutionUnit.h"
30 #include "ScalarExprVisitor.h"
31 
32 constexpr size_t MAX_NODE_CACHE_SIZE = 1e9; // set ~1GB as cache size threshold
33 
34 // we manage the uniqueness of node ID by its explained contents that each rel node has
35 using RelNodeMap = std::unordered_map<RelNodeExplainedHash, RelNodeId>;
36 // we also maintain labeled graph to manage extracted query plan DAG
37 // this can be used in a future to support advanced features such as partial resultset
38 // reuse and compiled kernel reuse by exploiting graph-centric computation like subgraph
39 // matching and graph isomorphism
40 using QueryPlanDag = boost::labeled_graph<AdjacentList, RelNodeId, boost::hash_mapS>;
41 
43  : public ScalarExprVisitor<std::vector<const Analyzer::ColumnVar*>> {
44  protected:
45  std::vector<const Analyzer::ColumnVar*> visitColumnVar(
46  const Analyzer::ColumnVar* column) const override {
47  return {column};
48  }
49 
50  std::vector<const Analyzer::ColumnVar*> visitColumnVarTuple(
51  const Analyzer::ExpressionTuple* expr_tuple) const override {
52  ColumnVarsVisitor visitor;
53  std::vector<const Analyzer::ColumnVar*> result;
54  for (size_t i = 0; i < expr_tuple->getTuple().size(); ++i) {
55  const auto col_vars = visitor.visit(expr_tuple->getTuple()[i].get());
56  for (const auto col_var : col_vars) {
57  result.push_back(col_var);
58  }
59  }
60  return result;
61  }
62 
63  std::vector<const Analyzer::ColumnVar*> aggregateResult(
64  const std::vector<const Analyzer::ColumnVar*>& aggregate,
65  const std::vector<const Analyzer::ColumnVar*>& next_result) const override {
66  auto result = aggregate;
67  for (const auto col_var : next_result) {
68  result.push_back(col_var);
69  }
70  return result;
71  }
72 };
73 
74 // This is one of main data structure for data recycling which manages a query plan shape
75 // as a DAG representation
76 // A query plan DAG is a sequence of unique node ID, and it means that we can assign the
77 // same node ID to a node iff we already saw that node in a different query plan that we
78 // extracted to retrieve a query plan DAG we visit each rel node of an input query plan
79 // starting from the root to the bottom (left-to-right child visiting), and check whether
80 // it is valid for DAG extraction and its usage (we do not allow dag extraction if a query
81 // plan has not supported rel node for data recycling such as logical value) and if that
82 // visited node is valid then we check its uniqueness against DAG cache and assign the
83 // unique ID once it is unique one (otherwise we reuse node id) after visiting a query
84 // plan we have a sequence of node IDs and return it as an extracted query plan DAG
86  public:
87  QueryPlanDagCache(size_t max_node_cache_size = MAX_NODE_CACHE_SIZE)
88  : max_node_map_size_(max_node_cache_size) {}
89 
90  QueryPlanDagCache(QueryPlanDagCache&& other) = delete;
92  QueryPlanDagCache(const QueryPlanDagCache&) = delete;
94 
95  std::optional<RelNodeId> addNodeIfAbsent(const RelAlgNode*);
96 
97  void connectNodes(const RelNodeId parent_id, const RelNodeId child_id);
98 
99  std::vector<const Analyzer::ColumnVar*> collectColVars(const Analyzer::Expr* target);
100 
101  size_t getCurrentNodeMapSize() const;
102 
103  void setNodeMapMaxSize(const size_t map_size);
104 
105  size_t getCurrentNodeMapCardinality() const;
106 
108  JoinColumnSide target_side,
109  bool extract_only_col_id);
110 
112  std::vector<const Analyzer::ColumnVar*>& col_vars,
113  bool col_id_only) const;
114 
115  void clearQueryPlanCache();
116 
117  void printDag();
118 
119  private:
120  // a map btw. rel node and its unique node id
122  // a graph structure that represents relationships among extracted query plan DAGs
124  // a limitation of the maximum size of DAG cache (to prevent unlimited usage of memory
125  // for DAG maintanence)
127  // a lock to protect contentions while accessing internal data structure of DAG cache
128  std::mutex cache_lock_;
130 };
std::string JoinColumnsInfo
JoinColumnsInfo getJoinColumnsInfoString(const Analyzer::Expr *join_expr, JoinColumnSide target_side, bool extract_only_col_id)
std::optional< RelNodeId > addNodeIfAbsent(const RelAlgNode *)
void connectNodes(const RelNodeId parent_id, const RelNodeId child_id)
JoinColumnSide
T visit(const Analyzer::Expr *expr) const
ColumnVarsVisitor col_var_visitor_
QueryPlanDag cached_query_plan_dag_
const std::vector< std::shared_ptr< Analyzer::Expr > > & getTuple() const
Definition: Analyzer.h:243
std::vector< const Analyzer::ColumnVar * > visitColumnVarTuple(const Analyzer::ExpressionTuple *expr_tuple) const override
size_t getCurrentNodeMapCardinality() const
std::unordered_map< RelNodeExplainedHash, RelNodeId > RelNodeMap
std::vector< const Analyzer::ColumnVar * > aggregateResult(const std::vector< const Analyzer::ColumnVar * > &aggregate, const std::vector< const Analyzer::ColumnVar * > &next_result) const override
JoinColumnsInfo translateColVarsToInfoString(std::vector< const Analyzer::ColumnVar * > &col_vars, bool col_id_only) const
QueryPlanDagCache(size_t max_node_cache_size=MAX_NODE_CACHE_SIZE)
boost::labeled_graph< AdjacentList, RelNodeId, boost::hash_mapS > QueryPlanDag
constexpr size_t MAX_NODE_CACHE_SIZE
void setNodeMapMaxSize(const size_t map_size)
std::vector< const Analyzer::ColumnVar * > visitColumnVar(const Analyzer::ColumnVar *column) const override
size_t getCurrentNodeMapSize() const
Execution unit for relational algebra. It&#39;s a low-level description of any relational algebra operati...
std::vector< const Analyzer::ColumnVar * > collectColVars(const Analyzer::Expr *target)
size_t RelNodeId
QueryPlanDagCache & operator=(QueryPlanDagCache &&other)=delete