OmniSciDB  cde582ebc3
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
QueryPlanDagCache.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "QueryPlanDagCache.h"
18 #include "RexVisitor.h"
19 
20 #include <unordered_set>
21 
22 // an approximation of DAG cache's size by considering an edge between two nodes
23 constexpr size_t elem_size_ = 2 * sizeof(size_t);
24 
25 std::optional<RelNodeId> QueryPlanDagCache::addNodeIfAbsent(const RelAlgNode* node) {
26  std::lock_guard<std::mutex> cache_lock(cache_lock_);
27  auto key = node->toHash();
28  auto const result = node_map_.emplace(key, getCurrentNodeMapCardinality());
29  if (result.second) {
31  getCurrentNodeMapCardinality() == SIZE_MAX) {
32  // unfortunately our DAG cache becomes full
33  // so clear the internal status and skip the query plan DAG extraction
34  node_map_.clear();
35  cached_query_plan_dag_.graph().clear();
36  return std::nullopt;
37  }
38  }
39  return result.first->second;
40 }
41 
43  const RelNodeId child_id) {
44  std::lock_guard<std::mutex> cache_lock(cache_lock_);
45  boost::add_vertex(parent_id, cached_query_plan_dag_);
46  boost::add_vertex(child_id, cached_query_plan_dag_);
47  add_edge_by_label(parent_id, child_id, cached_query_plan_dag_);
48 }
49 
50 void QueryPlanDagCache::setNodeMapMaxSize(const size_t map_size) {
51  std::lock_guard<std::mutex> cache_lock(cache_lock_);
52  max_node_map_size_ = map_size;
53 }
54 
56  std::vector<const Analyzer::ColumnVar*>& col_vars,
57  bool col_id_only) const {
58  // we need to sort col ids to prevent missing data reuse case in multi column qual
59  // scenarios like a) A.a = B.b and A.c = B.c and b) A.c = B.c and A.a = B.a
60  std::sort(col_vars.begin(),
61  col_vars.end(),
62  [](const Analyzer::ColumnVar* lhs, const Analyzer::ColumnVar* rhs) {
63  return lhs->get_column_id() < rhs->get_column_id();
64  });
65  size_t col_vars_info_hash = EMPTY_HASHED_PLAN_DAG_KEY;
66  using Hasher = std::function<void(Analyzer::ColumnVar const*)>;
67  Hasher hash_col_id = [&col_vars_info_hash](auto const* cv) {
68  boost::hash_combine(col_vars_info_hash, cv->get_column_id());
69  };
70  Hasher hash_cv_string = [&col_vars_info_hash](auto const* cv) {
71  boost::hash_combine(col_vars_info_hash, cv->toString());
72  };
73  std::for_each(
74  col_vars.begin(), col_vars.end(), col_id_only ? hash_col_id : hash_cv_string);
75  return col_vars_info_hash;
76 }
77 
79  JoinColumnSide target_side,
80  bool extract_only_col_id) {
81  // this function returns qual_bin_oper's info depending on the requested context
82  // such as extracted col_id of inner join cols
83  // (target_side = JoinColumnSide::kInner, extract_only_col_id = true)
84  // and extract all infos of an entire join quals
85  // (target_side = JoinColumnSide::kQual, extract_only_col_id = false)
86  // todo (yoonmin): we may need to use a whole "EXPR" contents in a future
87  // to support a join qual with more general expression like A.a + 1 = (B.b * 2) / 2
88  if (!join_expr) {
90  }
91  auto get_sorted_col_info = [=](const Analyzer::Expr* join_cols) -> size_t {
92  auto join_col_vars = collectColVars(join_cols);
93  CHECK(!join_col_vars.empty())
94  << "Join expression should have at least one join column variable";
95  return translateColVarsToInfoHash(join_col_vars, extract_only_col_id);
96  };
97  auto hashed_join_col_info = EMPTY_HASHED_PLAN_DAG_KEY;
98  if (target_side == JoinColumnSide::kQual) {
99  auto qual_bin_oper = dynamic_cast<const Analyzer::BinOper*>(join_expr);
100  CHECK(qual_bin_oper);
101  boost::hash_combine(hashed_join_col_info,
102  get_sorted_col_info(qual_bin_oper->get_left_operand()));
103  boost::hash_combine(hashed_join_col_info,
104  get_sorted_col_info(qual_bin_oper->get_right_operand()));
105  } else if (target_side == JoinColumnSide::kInner) {
106  auto qual_bin_oper = dynamic_cast<const Analyzer::BinOper*>(join_expr);
107  CHECK(qual_bin_oper);
108  boost::hash_combine(hashed_join_col_info,
109  get_sorted_col_info(qual_bin_oper->get_left_operand()));
110  } else if (target_side == JoinColumnSide::kOuter) {
111  auto qual_bin_oper = dynamic_cast<const Analyzer::BinOper*>(join_expr);
112  CHECK(qual_bin_oper);
113  boost::hash_combine(hashed_join_col_info,
114  get_sorted_col_info(qual_bin_oper->get_right_operand()));
115  } else {
116  CHECK(target_side == JoinColumnSide::kDirect);
117  boost::hash_combine(hashed_join_col_info, get_sorted_col_info(join_expr));
118  }
119  return hashed_join_col_info;
120 }
121 
123  std::cout << "Edge list:" << std::endl;
124  boost::print_graph(cached_query_plan_dag_.graph());
125  std::ostringstream os;
126  os << "\n\nNodeMap:\n";
127  for (auto& kv : node_map_) {
128  os << "[" << kv.second << "] " << kv.first << "\n";
129  }
130  std::cout << os.str() << std::endl;
131 }
132 
134  return node_map_.size() * elem_size_;
135 }
136 
138  return node_map_.size();
139 }
140 
142  std::lock_guard<std::mutex> cache_lock(cache_lock_);
143  node_map_.clear();
144  cached_query_plan_dag_.graph().clear();
145 }
146 
147 std::vector<const Analyzer::ColumnVar*> QueryPlanDagCache::collectColVars(
148  const Analyzer::Expr* target) {
149  if (target) {
150  return col_var_visitor_.visit(target);
151  }
152  return {};
153 }
std::optional< RelNodeId > addNodeIfAbsent(const RelAlgNode *)
void connectNodes(const RelNodeId parent_id, const RelNodeId child_id)
constexpr QueryPlanHash EMPTY_HASHED_PLAN_DAG_KEY
JoinColumnSide
DEVICE void sort(ARGS &&...args)
Definition: gpu_enabled.h:105
T visit(const Analyzer::Expr *expr) const
size_t getJoinColumnsInfoHash(const Analyzer::Expr *join_expr, JoinColumnSide target_side, bool extract_only_col_id)
ColumnVarsVisitor col_var_visitor_
QueryPlanDag cached_query_plan_dag_
size_t translateColVarsToInfoHash(std::vector< const Analyzer::ColumnVar * > &col_vars, bool col_id_only) const
size_t getCurrentNodeMapCardinality() const
void setNodeMapMaxSize(const size_t map_size)
constexpr size_t elem_size_
#define CHECK(condition)
Definition: Logger.h:222
size_t getCurrentNodeMapSize() const
virtual size_t toHash() const =0
int get_column_id() const
Definition: Analyzer.h:201
std::vector< const Analyzer::ColumnVar * > collectColVars(const Analyzer::Expr *target)
size_t RelNodeId