OmniSciDB  471d68cefb
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
QueryPlanDagCache.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2021 OmniSci, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "QueryPlanDagCache.h"
18 #include "RexVisitor.h"
19 
20 #include <unordered_set>
21 
22 // an approximation of each rel node's size: 2 * sizeof(size_t): unique ID for node_map_
23 // and for DAG graph
24 constexpr size_t elem_size_ = 2 * sizeof(size_t);
25 
26 std::optional<RelNodeId> QueryPlanDagCache::addNodeIfAbsent(const RelAlgNode* node) {
27  std::lock_guard<std::mutex> cache_lock(cache_lock_);
28  auto key = node->toHash();
29  auto const result = node_map_.emplace(key, getCurrentNodeMapCardinality());
30  if (result.second) {
31  // key did not already exist in node_map_. Check max size wasn't hit.
34  // unfortunately our DAG cache becomes full if we add this node to it
35  // so we skip to cache this plan DAG and clear cache map
36  // b/c this can be happen in a middle of dag extraction
37  node_map_.clear();
38  cached_query_plan_dag_.graph().clear();
39  // assume we cannot keep 'InvalidQueryPlanHash' nodes for our DAG cache
40  return std::nullopt;
41  }
42  }
43  return result.first->second;
44 }
45 
47  const RelNodeId child_id) {
48  std::lock_guard<std::mutex> cache_lock(cache_lock_);
49  boost::add_vertex(parent_id, cached_query_plan_dag_);
50  boost::add_vertex(child_id, cached_query_plan_dag_);
51  add_edge_by_label(parent_id, child_id, cached_query_plan_dag_);
52 }
53 
54 void QueryPlanDagCache::setNodeMapMaxSize(const size_t map_size) {
55  std::lock_guard<std::mutex> cache_lock(cache_lock_);
56  max_node_map_size_ = map_size;
57 }
58 
60  std::vector<const Analyzer::ColumnVar*>& col_vars,
61  bool col_id_only) const {
62  // we need to sort col ids to prevent missing data reuse case in multi column qual
63  // scenarios like a) A.a = B.b and A.c = B.c and b) A.c = B.c and A.a = B.a
64  std::sort(col_vars.begin(),
65  col_vars.end(),
66  [](const Analyzer::ColumnVar* lhs, const Analyzer::ColumnVar* rhs) {
67  return lhs->get_column_id() < rhs->get_column_id();
68  });
69  if (col_id_only) {
70  std::vector<int> sorted_col_ids;
71  for (auto cv : col_vars) {
72  sorted_col_ids.push_back(cv->get_column_id());
73  }
74  return ::toString(sorted_col_ids);
75  } else {
76  return ::toString(col_vars);
77  }
78 }
79 
81  const Analyzer::Expr* join_expr,
82  JoinColumnSide target_side,
83  bool extract_only_col_id) {
84  // this function returns qual_bin_oper's info depending on the requested context
85  // such as extracted col_id of inner join cols
86  // (target_side = JoinColumnSide::kInner, extract_only_col_id = true)
87  // and extract all infos of an entire join quals
88  // (target_side = JoinColumnSide::kQual, extract_only_col_id = false)
89  // todo (yoonmin): we may need to use a whole "EXPR" contents in a future
90  // to support a join qual with more general expression like A.a + 1 = (B.b * 2) / 2
91  if (!join_expr) {
92  return "";
93  }
94  auto get_sorted_col_info = [&](const Analyzer::Expr* join_cols) -> JoinColumnsInfo {
95  auto join_col_vars = collectColVars(join_cols);
96  if (join_col_vars.empty()) {
97  return "";
98  }
99  return translateColVarsToInfoString(join_col_vars, extract_only_col_id);
100  };
101 
102  if (target_side == JoinColumnSide::kQual) {
103  auto qual_bin_oper = reinterpret_cast<const Analyzer::BinOper*>(join_expr);
104  CHECK(qual_bin_oper);
105  auto inner_join_col_info = get_sorted_col_info(qual_bin_oper->get_left_operand());
106  auto outer_join_col_info = get_sorted_col_info(qual_bin_oper->get_right_operand());
107  return outer_join_col_info + "|" + inner_join_col_info;
108  } else if (target_side == JoinColumnSide::kInner) {
109  auto qual_bin_oper = reinterpret_cast<const Analyzer::BinOper*>(join_expr);
110  CHECK(qual_bin_oper);
111  return get_sorted_col_info(qual_bin_oper->get_left_operand());
112  } else if (target_side == JoinColumnSide::kOuter) {
113  auto qual_bin_oper = reinterpret_cast<const Analyzer::BinOper*>(join_expr);
114  CHECK(qual_bin_oper);
115  return get_sorted_col_info(qual_bin_oper->get_right_operand());
116  } else {
117  CHECK(target_side == JoinColumnSide::kDirect);
118  return get_sorted_col_info(join_expr);
119  }
120 }
121 
123  std::cout << "Edge list:" << std::endl;
124  boost::print_graph(cached_query_plan_dag_.graph());
125  std::ostringstream os;
126  os << "\n\nNodeMap:\n";
127  for (auto& kv : node_map_) {
128  os << "[" << kv.second << "] " << kv.first << "\n";
129  }
130  std::cout << os.str() << std::endl;
131 }
132 
134  return node_map_.size() * elem_size_;
135 }
136 
138  return node_map_.size();
139 }
140 
142  std::lock_guard<std::mutex> cache_lock(cache_lock_);
143  node_map_.clear();
144  cached_query_plan_dag_.graph().clear();
145 }
146 
147 std::vector<const Analyzer::ColumnVar*> QueryPlanDagCache::collectColVars(
148  const Analyzer::Expr* target) {
149  if (target) {
150  return col_var_visitor_.visit(target);
151  }
152  return {};
153 }
std::string JoinColumnsInfo
std::string toString(const ExtArgumentType &sig_type)
JoinColumnsInfo getJoinColumnsInfoString(const Analyzer::Expr *join_expr, JoinColumnSide target_side, bool extract_only_col_id)
std::optional< RelNodeId > addNodeIfAbsent(const RelAlgNode *)
void connectNodes(const RelNodeId parent_id, const RelNodeId child_id)
#define SIZE_MAX
JoinColumnSide
DEVICE void sort(ARGS &&...args)
Definition: gpu_enabled.h:105
T visit(const Analyzer::Expr *expr) const
ColumnVarsVisitor col_var_visitor_
QueryPlanDag cached_query_plan_dag_
size_t getCurrentNodeMapCardinality() const
JoinColumnsInfo translateColVarsToInfoString(std::vector< const Analyzer::ColumnVar * > &col_vars, bool col_id_only) const
void setNodeMapMaxSize(const size_t map_size)
constexpr size_t elem_size_
#define CHECK(condition)
Definition: Logger.h:209
size_t getCurrentNodeMapSize() const
virtual size_t toHash() const =0
int get_column_id() const
Definition: Analyzer.h:194
std::vector< const Analyzer::ColumnVar * > collectColVars(const Analyzer::Expr *target)
size_t RelNodeId