OmniSciDB  91042dcc5b
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
QueryPlanDagExtractor.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2021 OmniSci, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "QueryPlanDagExtractor.h"
18 #include "RexVisitor.h"
20 
21 #include <boost/algorithm/cxx11/any_of.hpp>
22 
23 extern bool g_is_test_env;
24 
25 namespace {
26 struct IsEquivBinOp {
27  bool operator()(std::shared_ptr<Analyzer::Expr> const& qual) {
28  if (auto oper = std::dynamic_pointer_cast<const Analyzer::BinOper>(qual)) {
29  return IS_EQUIVALENCE(oper->get_optype());
30  }
31  return false;
32  }
33 };
34 } // namespace
35 
36 std::vector<InnerOuterOrLoopQual> QueryPlanDagExtractor::normalizeColumnsPair(
37  const Analyzer::BinOper* condition) {
38  std::vector<InnerOuterOrLoopQual> result;
39  const auto lhs_tuple_expr =
40  dynamic_cast<const Analyzer::ExpressionTuple*>(condition->get_left_operand());
41  const auto rhs_tuple_expr =
42  dynamic_cast<const Analyzer::ExpressionTuple*>(condition->get_right_operand());
43 
44  CHECK_EQ(static_cast<bool>(lhs_tuple_expr), static_cast<bool>(rhs_tuple_expr));
45  auto do_normalize_inner_outer_pair = [&result, &condition, this](
46  const Analyzer::Expr* lhs,
47  const Analyzer::Expr* rhs,
48  const TemporaryTables* temporary_table) {
49  try {
50  auto inner_outer_pair =
52  rhs,
53  *executor_->getCatalog(),
54  temporary_table,
55  condition->is_overlaps_oper());
56  InnerOuterOrLoopQual valid_qual{
57  std::make_pair(inner_outer_pair.first, inner_outer_pair.second), false};
58  result.push_back(valid_qual);
59  } catch (HashJoinFail& e) {
60  InnerOuterOrLoopQual invalid_qual{std::make_pair(lhs, rhs), true};
61  result.push_back(invalid_qual);
62  }
63  };
64  if (lhs_tuple_expr) {
65  const auto& lhs_tuple = lhs_tuple_expr->getTuple();
66  const auto& rhs_tuple = rhs_tuple_expr->getTuple();
67  CHECK_EQ(lhs_tuple.size(), rhs_tuple.size());
68  for (size_t i = 0; i < lhs_tuple.size(); ++i) {
69  do_normalize_inner_outer_pair(
70  lhs_tuple[i].get(), rhs_tuple[i].get(), executor_->getTemporaryTables());
71  }
72  } else {
73  do_normalize_inner_outer_pair(condition->get_left_operand(),
74  condition->get_right_operand(),
75  executor_->getTemporaryTables());
76  }
77  return result;
78 }
79 
80 // To extract query plan DAG, we call this function with root node of the query plan
81 // and some objects required while extracting DAG
82 // We consider a DAG representation of a query plan as a series of "unique" rel node ids
83 // We decide each rel node's node id by searching the cached plan DAG first,
84 // and assign a new id iff there exists no duplicated rel node that can reuse
86  const RelAlgNode* top_node,
87  Executor* executor) {
88  auto dag_checker_res = QueryPlanDagChecker::hasNonSupportedNodeInDag(top_node);
89  if (dag_checker_res.first) {
90  VLOG(1) << "Stop DAG extraction (" << dag_checker_res.second << ")";
91  return {EMPTY_QUERY_PLAN, true};
92  }
93  mapd_unique_lock<mapd_shared_mutex> lock(executor->getDataRecyclerLock());
94  auto& cached_dag = executor->getQueryPlanDagCache();
95  QueryPlanDagExtractor dag_extractor(cached_dag, {}, executor, false);
96  extractQueryPlanDagImpl(top_node, dag_extractor);
97  auto extracted_query_plan_dag = dag_extractor.getExtractedQueryPlanDagStr();
98  top_node->setQueryPlanDag(extracted_query_plan_dag);
99  if (auto sort_node = dynamic_cast<RelSort const*>(top_node)) {
100  // we evaluate sort node based on the resultset of its child node
101  // so we need to mark the extracted query plan of the child node
102  // for the resultset recycling
103  auto child_dag = dag_extractor.getExtractedQueryPlanDagStr(1);
104  sort_node->getInput(0)->setQueryPlanDag(child_dag);
105  }
106  VLOG(1) << "Extracted query plan DAG: " << extracted_query_plan_dag
107  << ", current DAG cache size: " << cached_dag.getCurrentNodeMapSize();
108  return {extracted_query_plan_dag, dag_extractor.isDagExtractionAvailable()};
109 }
110 
112  const RelAlgNode* top_node,
113  std::optional<unsigned> left_deep_tree_id,
114  std::unordered_map<unsigned, JoinQualsPerNestingLevel> left_deep_tree_infos,
115  Executor* executor) {
116  // we already extract a query plan dag for the input query which is stored at top_node
117  if (top_node->getQueryPlanDagHash() == EMPTY_HASHED_PLAN_DAG_KEY) {
118  return {};
119  }
120  mapd_unique_lock<mapd_shared_mutex> lock(executor->getDataRecyclerLock());
121  auto& cached_dag = executor->getQueryPlanDagCache();
122  QueryPlanDagExtractor dag_extractor(cached_dag, left_deep_tree_infos, executor, true);
123  extractQueryPlanDagImpl(top_node, dag_extractor);
124  return {dag_extractor.getHashTableBuildDag(), dag_extractor.getTableIdToNodeMap()};
125 }
126 
128  const RelAlgNode* top_node,
129  QueryPlanDagExtractor& dag_extractor) {
130  // add the root node of this query plan DAG
131  auto res = dag_extractor.global_dag_.addNodeIfAbsent(top_node);
132  if (!res) {
133  VLOG(1) << "Stop DAG extraction (Query plan dag cache reaches the maximum capacity)";
134  dag_extractor.contain_not_supported_rel_node_ = true;
135  return;
136  }
137  CHECK(res.has_value());
138  top_node->setRelNodeDagId(res.value());
139  dag_extractor.extracted_dag_.push_back(::toString(res.value()));
140 
141  // visit child node if necessary
142  if (auto table_func_node = dynamic_cast<const RelTableFunction*>(top_node)) {
143  for (size_t i = 0; i < table_func_node->inputCount(); ++i) {
144  dag_extractor.visit(table_func_node, table_func_node->getInput(i));
145  }
146  } else {
147  auto num_child_node = top_node->inputCount();
148  switch (num_child_node) {
149  case 1: // unary op
150  dag_extractor.visit(top_node, top_node->getInput(0));
151  break;
152  case 2: // binary op
153  if (auto trans_join_node = dynamic_cast<const RelTranslatedJoin*>(top_node)) {
154  dag_extractor.visit(trans_join_node, trans_join_node->getLHS());
155  dag_extractor.visit(trans_join_node, trans_join_node->getRHS());
156  break;
157  }
158  VLOG(1) << "Visit an invalid rel node while extracting query plan DAG: "
159  << ::toString(top_node);
160  return;
161  case 0: // leaf node
162  break;
163  default:
164  // since we replace RelLeftDeepJoin as a set of RelTranslatedJoin
165  // which is a binary op, # child nodes for every rel node should be <= 2
166  UNREACHABLE();
167  }
168  }
169 
170  // check whether extracted DAG is available to use
171  if (dag_extractor.extracted_dag_.empty() || dag_extractor.isDagExtractionAvailable()) {
172  dag_extractor.contain_not_supported_rel_node_ = true;
173  return;
174  }
175 
176  if (g_is_test_env) {
177  dag_extractor.executor_->registerExtractedQueryPlanDag(
178  dag_extractor.getExtractedQueryPlanDagStr());
179  }
180  return;
181 }
182 
184  std::ostringstream oss;
185  size_t cnt = 0;
186  if (start_pos > extracted_dag_.size()) {
187  return EMPTY_QUERY_PLAN;
188  }
189  for (auto& dag_node_id : extracted_dag_) {
190  if (cnt >= start_pos) {
191  oss << dag_node_id << "|";
192  }
193  ++cnt;
194  }
195  return oss.str();
196 }
197 
199  std::optional<RelNodeId> retrieved_node_id) {
200  if (!retrieved_node_id) {
201  VLOG(1) << "Stop DAG extraction (Detect an invalid dag id)";
203  return false;
204  }
205  CHECK(retrieved_node_id.has_value());
206  node->setRelNodeDagId(retrieved_node_id.value());
207  return true;
208 }
209 
211  const RelAlgNode* parent_node,
212  const RelAlgNode* child_node,
213  std::optional<RelNodeId> retrieved_node_id) {
214  CHECK(parent_node);
215  CHECK(child_node);
216  CHECK(retrieved_node_id.has_value());
217  auto parent_node_id = parent_node->getRelNodeDagId();
218  global_dag_.connectNodes(parent_node_id, retrieved_node_id.value());
219  extracted_dag_.push_back(::toString(retrieved_node_id.value()));
220  return true;
221 }
222 
224  const RelAlgNode* child_node) {
225  // This function takes a responsibility for all rel nodes
226  // except 1) RelLeftDeepJoinTree and 2) RelTranslatedJoin
227  auto res = global_dag_.addNodeIfAbsent(child_node);
228  if (validateNodeId(child_node, res) &&
229  registerNodeToDagCache(parent_node, child_node, res)) {
230  for (size_t i = 0; i < child_node->inputCount(); i++) {
231  visit(child_node, child_node->getInput(i));
232  }
233  }
234 }
235 
236 // we recursively visit each rel node starting from the root
237 // and collect assigned rel node ids and return them as query plan DAG
238 // for join operations we additionally generate additional information
239 // to recycle each hashtable that needs to process a given query
240 void QueryPlanDagExtractor::visit(const RelAlgNode* parent_node,
241  const RelAlgNode* child_node) {
242  if (!child_node || contain_not_supported_rel_node_) {
243  return;
244  }
245  bool child_visited = false;
246  if (analyze_join_ops_) {
247  if (auto left_deep_joins = dynamic_cast<const RelLeftDeepInnerJoin*>(child_node)) {
248  if (left_deep_tree_infos_.empty()) {
249  // we should have left_deep_tree_info for input left deep tree node
250  VLOG(1) << "Stop DAG extraction (Detect non-supported join pattern)";
252  return;
253  }
254  auto true_parent_node = parent_node;
255  std::shared_ptr<RelFilter> dummy_filter_node{nullptr};
256  const auto inner_cond = left_deep_joins->getInnerCondition();
257  // we analyze left-deep join tree as per-join qual level, so
258  // when visiting RelLeftDeepInnerJoin we decompose it into individual join node
259  // (RelTranslatedJoin).
260  // Thus, this RelLeftDeepInnerJoin object itself is useless when recycling data
261  // but sometimes it has inner condition that has to consider so we add an extra
262  // RelFilter node containing the condition to keep query semantic correctly
263  if (auto cond = dynamic_cast<const RexOperator*>(inner_cond)) {
264  RexDeepCopyVisitor copier;
265  auto copied_inner_cond = copier.visit(cond);
266  dummy_filter_node = std::make_shared<RelFilter>(copied_inner_cond);
267  register_and_visit(parent_node, dummy_filter_node.get());
268  true_parent_node = dummy_filter_node.get();
269  }
270  handleLeftDeepJoinTree(true_parent_node, left_deep_joins);
271  child_visited = true;
272  } else if (auto translated_join_node =
273  dynamic_cast<const RelTranslatedJoin*>(child_node)) {
274  handleTranslatedJoin(parent_node, translated_join_node);
275  child_visited = true;
276  }
277  }
278  if (!child_visited) {
279  register_and_visit(parent_node, child_node);
280  }
281 }
282 
284  const RelAlgNode* parent_node,
285  const RelTranslatedJoin* rel_trans_join) {
286  // when left-deep tree has multiple joins this rel_trans_join can be revisited
287  // but we need to mark the child query plan to accurately catch the query plan dag
288  // here we do not create new dag id since all rel nodes are visited already
289  CHECK(parent_node);
290  CHECK(rel_trans_join);
291 
292  auto res = global_dag_.addNodeIfAbsent(rel_trans_join);
293  if (!validateNodeId(rel_trans_join, res) ||
294  !registerNodeToDagCache(parent_node, rel_trans_join, res)) {
295  return;
296  }
297 
298  // To extract an access path (query plan DAG) for hashtable is to use a difference of
299  // two query plan DAGs 1) query plan DAG after visiting RHS node and 2) query plan DAG
300  // after visiting LHS node so by comparing 1) and 2) we can extract which query plan DAG
301  // is necessary to project join cols that are used to build a hashtable and we use it as
302  // hashtable access path
303 
304  // this lambda function deals with the case of recycled query resultset
305  // specifically, we can avoid unnecessary visiting of child tree(s) when we already have
306  // the extracted query plan DAG for the given child rel node
307  // instead, we just fill the node id vector (a sequence of node ids we visited) by using
308  // the dag of the child node
309  auto fill_node_ids_to_dag_vec = [&](const std::string& node_ids) {
310  auto node_ids_vec = split(node_ids, "|");
311  // the last elem is an empty one
312  std::for_each(node_ids_vec.begin(),
313  std::prev(node_ids_vec.end()),
314  [&](const std::string& node_id) { extracted_dag_.push_back(node_id); });
315  };
316  QueryPlanDAG current_plan_dag, after_rhs_visited, after_lhs_visited;
317  current_plan_dag = getExtractedQueryPlanDagStr();
318  auto rhs_node = rel_trans_join->getRHS();
319  std::unordered_set<size_t> rhs_input_keys, lhs_input_keys;
320  if (rhs_node) {
321  if (rhs_node->getQueryPlanDagHash() == EMPTY_HASHED_PLAN_DAG_KEY) {
322  visit(rel_trans_join, rhs_node);
323  } else {
324  fill_node_ids_to_dag_vec(rhs_node->getQueryPlanDag());
325  }
326  after_rhs_visited = getExtractedQueryPlanDagStr();
327  addTableIdToNodeLink(rhs_node->getId(), rhs_node);
328  rhs_input_keys = ScanNodeTableKeyCollector::getScanNodeTableKey(rhs_node);
329  }
330  auto lhs_node = rel_trans_join->getLHS();
331  if (rel_trans_join->getLHS()) {
332  if (lhs_node->getQueryPlanDagHash() == EMPTY_HASHED_PLAN_DAG_KEY) {
333  visit(rel_trans_join, lhs_node);
334  } else {
335  fill_node_ids_to_dag_vec(lhs_node->getQueryPlanDag());
336  }
337  after_lhs_visited = getExtractedQueryPlanDagStr();
338  addTableIdToNodeLink(lhs_node->getId(), lhs_node);
339  lhs_input_keys = ScanNodeTableKeyCollector::getScanNodeTableKey(lhs_node);
340  }
341  if (isEmptyQueryPlanDag(after_lhs_visited) || isEmptyQueryPlanDag(after_rhs_visited)) {
342  VLOG(1) << "Stop DAG extraction (Detect invalid query plan dag of join col(s))";
344  return;
345  }
346  // after visiting new node, we have added node id(s) which can be used as an access path
347  // so, we extract that node id(s) by splitting the new plan dag by the current plan dag
348  auto outer_table_identifier = split(after_rhs_visited, current_plan_dag)[1];
349  auto hash_table_identfier = split(after_lhs_visited, after_rhs_visited)[1];
350  auto join_qual_info = EMPTY_HASHED_PLAN_DAG_KEY;
351  if (!rel_trans_join->isNestedLoopQual()) {
352  auto inner_join_cols = rel_trans_join->getJoinCols(true);
353  auto inner_join_col_info =
354  global_dag_.translateColVarsToInfoHash(inner_join_cols, false);
355  boost::hash_combine(join_qual_info, inner_join_col_info);
356  auto outer_join_cols = rel_trans_join->getJoinCols(false);
357  auto outer_join_col_info =
358  global_dag_.translateColVarsToInfoHash(outer_join_cols, false);
359  boost::hash_combine(join_qual_info, outer_join_col_info);
360  // collect table keys from both rhs and lhs side
361  std::unordered_set<size_t> collected_table_keys;
362  collected_table_keys.insert(lhs_input_keys.begin(), lhs_input_keys.end());
363  if (!inner_join_cols.empty() &&
364  inner_join_cols[0]->get_type_info().is_dict_encoded_type()) {
365  collected_table_keys.insert(rhs_input_keys.begin(), rhs_input_keys.end());
366  }
367 
368  auto it = hash_table_query_plan_dag_.find(join_qual_info);
369  if (it == hash_table_query_plan_dag_.end()) {
370  VLOG(2) << "Add hashtable access path"
371  << ", inner join col info: " << inner_join_col_info
372  << " (access path: " << hash_table_identfier << ")"
373  << ", outer join col info: " << outer_join_col_info
374  << " (access path: " << outer_table_identifier << ")";
376  join_qual_info,
377  HashTableBuildDag(inner_join_col_info,
378  outer_join_col_info,
379  boost::hash_value(hash_table_identfier),
380  boost::hash_value(outer_table_identifier),
381  std::move(collected_table_keys)));
382  }
383  } else {
384  VLOG(2) << "Add loop join access path, for LHS: " << outer_table_identifier
385  << ", for RHS: " << hash_table_identfier << "\n";
386  }
387 }
388 
389 namespace {
390 struct OpInfo {
391  std::string type_;
392  std::string qualifier_;
393  std::string typeinfo_;
394 };
395 
396 // Return the input index whose tableId matches the given tbl_id.
397 // If none then return -1.
398 int get_input_idx(const RelLeftDeepInnerJoin* rel_left_deep_join, int const tbl_id) {
399  for (size_t input_idx = 0; input_idx < rel_left_deep_join->inputCount(); ++input_idx) {
400  auto const input_node = rel_left_deep_join->getInput(input_idx);
401  auto const scan_node = dynamic_cast<const RelScan*>(input_node);
402  int const target_table_id = scan_node ? scan_node->getTableDescriptor()->tableId
403  : -1 * input_node->getId(); // temporary table
404  if (target_table_id == tbl_id) {
405  return input_idx;
406  }
407  }
408  return -1;
409 }
410 } // namespace
411 
412 std::vector<Analyzer::ColumnVar const*> QueryPlanDagExtractor::getColVar(
413  Analyzer::Expr const* col_info) {
414  if (auto col_var = dynamic_cast<const Analyzer::ColumnVar*>(col_info)) {
415  return {col_var};
416  } else {
417  return global_dag_.collectColVars(col_info);
418  }
419 }
420 
421 // we coalesce join quals and related filter conditions into a single RelLeftDeepInnerJoin
422 // node when converting calcite AST to logical query plan, but to recycle hashtable(s) we
423 // need to know access path of each hashtable, so we disassemble it into a set of join
424 // qual and collect hashtable info from there
426  const RelAlgNode* parent_node,
427  const RelLeftDeepInnerJoin* rel_left_deep_join) {
428  CHECK(parent_node);
429  CHECK(rel_left_deep_join);
430 
431  // RelLeftDeepInnerJoin node does not need to be added to DAG since
432  // RelLeftDeepInnerJoin is a logical node and
433  // we add all join nodes of this `RelLeftDeepInnerJoin`
434  // thus, the below `left_deep_tree_id` is not the same as its DAG id
435  // (we do not have a DAG node id for this `RelLeftDeepInnerJoin`)
436  auto left_deep_tree_id = rel_left_deep_join->getId();
437  auto left_deep_join_info = getPerNestingJoinQualInfo(left_deep_tree_id);
438  if (!left_deep_join_info) {
439  // we should have left_deep_tree_info for input left deep tree node
440  VLOG(1) << "Stop DAG extraction (Detect Non-supported join pattern)";
442  return;
443  }
444 
445  // gathering per-join qual level info to correctly recycle each hashtable (and necessary
446  // info) that we created
447  // Here we visit each join qual in bottom-up manner to distinct DAGs among join quals
448  // Let say we have three joins- #1: R.a = S.a / #2: R.b = T.b / #3. R.c = X.c
449  // When we start to visit #1, we cannot determine outer col's dag clearly
450  // since we need to visit #2 and #3 due to the current visitor's behavior
451  // In contrast, when starting from #3, we clearly retrieve both inputs' dag
452  // by skipping already visited nodes
453  // So when we visit #2 after visiting #3, we can skip to consider nodes beloning to
454  // qual #3 so we clearly retrieve DAG only corresponding to #2's
455  for (size_t level_idx = 0; level_idx < left_deep_join_info->size(); ++level_idx) {
456  const auto& current_level_join_conditions = left_deep_join_info->at(level_idx);
457  std::vector<const Analyzer::ColumnVar*> inner_join_cols;
458  std::vector<const Analyzer::ColumnVar*> outer_join_cols;
459  std::vector<std::shared_ptr<const Analyzer::Expr>> filter_ops;
460  int inner_input_idx{-1};
461  int outer_input_idx{-1};
462  OpInfo op_info{"UNDEFINED", "UNDEFINED", "UNDEFINED"};
463  std::unordered_set<std::string> visited_filter_ops;
464 
465  // we first check whether this qual needs nested loop
466  const bool found_eq_join_qual =
467  current_level_join_conditions.type != JoinType::INVALID &&
468  boost::algorithm::any_of(current_level_join_conditions.quals, IsEquivBinOp{});
469  const bool nested_loop = !found_eq_join_qual;
470 
471  RexScalar const* const outer_join_cond =
472  current_level_join_conditions.type == JoinType::LEFT
473  ? rel_left_deep_join->getOuterCondition(level_idx + 1)
474  : nullptr;
475 
476  // collect join col, filter ops, and detailed info of join operation, i.e., op_type,
477  // qualifier, ...
478  // when we have more than one quals, i.e., current_level_join_conditions.quals.size()
479  // > 1, we consider the first qual is used as hashtable building
480  bool is_geo_join{false};
481  for (const auto& join_qual : current_level_join_conditions.quals) {
482  auto qual_bin_oper = std::dynamic_pointer_cast<const Analyzer::BinOper>(join_qual);
483  auto join_qual_str = ::toString(join_qual);
484  if (qual_bin_oper) {
485  is_geo_join = qual_bin_oper->is_overlaps_oper();
486  if (join_qual == current_level_join_conditions.quals.front()) {
487  // set op_info based on the first qual
488  op_info = OpInfo{::toString(qual_bin_oper->get_optype()),
489  ::toString(qual_bin_oper->get_qualifier()),
490  qual_bin_oper->get_type_info().to_string()};
491  }
492  for (auto& col_pair_info : normalizeColumnsPair(qual_bin_oper.get())) {
493  if (col_pair_info.loop_join_qual && !found_eq_join_qual) {
494  // we only consider that cur level's join is loop join if we have no
495  // equi-join qual and both lhs and rhs are not col_var,
496  // i.e., lhs: col_var / rhs: constant / bin_op: kGE
497  if (visited_filter_ops.insert(join_qual_str).second) {
498  filter_ops.push_back(join_qual);
499  }
500  } else {
501  // a qual_bin_oper becomes an inner join qual iff both lhs and rhs are col_var
502  // otherwise it becomes a filter qual
503  bool found_valid_col_vars = false;
504  std::vector<const Analyzer::ColumnVar*> lhs_cvs, rhs_cvs;
505  if (col_pair_info.inner_outer.first && col_pair_info.inner_outer.second) {
506  // we need to modify lhs and rhs if range_oper is detected
507  if (auto range_oper = dynamic_cast<const Analyzer::RangeOper*>(
508  col_pair_info.inner_outer.second)) {
509  lhs_cvs = getColVar(range_oper->get_left_operand());
510  rhs_cvs = getColVar(col_pair_info.inner_outer.first);
511  is_geo_join = true;
512  } else {
513  // this qual is valid and used for typical hash join op
514  lhs_cvs = getColVar(col_pair_info.inner_outer.first);
515  rhs_cvs = getColVar(col_pair_info.inner_outer.second);
516  }
517  if (!lhs_cvs.empty() && !rhs_cvs.empty()) {
518  found_valid_col_vars = true;
519  if (inner_input_idx == -1) {
520  inner_input_idx =
521  get_input_idx(rel_left_deep_join, lhs_cvs.front()->get_table_id());
522  }
523  if (outer_input_idx == -1) {
524  outer_input_idx =
525  get_input_idx(rel_left_deep_join, rhs_cvs.front()->get_table_id());
526  }
527  std::copy(
528  lhs_cvs.begin(), lhs_cvs.end(), std::back_inserter(inner_join_cols));
529  std::copy(
530  rhs_cvs.begin(), rhs_cvs.end(), std::back_inserter(outer_join_cols));
531  }
532  }
533  if (!found_valid_col_vars &&
534  visited_filter_ops.insert(join_qual_str).second) {
535  filter_ops.push_back(join_qual);
536  }
537  }
538  }
539  } else {
540  if (visited_filter_ops.insert(join_qual_str).second) {
541  filter_ops.push_back(join_qual);
542  }
543  }
544  }
545  if (!is_geo_join && (inner_join_cols.size() != outer_join_cols.size())) {
546  VLOG(1) << "Stop DAG extraction (Detect inner/outer col mismatch)";
548  return;
549  }
550 
551  // create RelTranslatedJoin based on the collected info from the join qual
552  // there are total seven types of join query pattern
553  // 1. INNER HASH ONLY
554  // 2. INNER LOOP ONLY (!)
555  // 3. LEFT LOOP ONLY
556  // 4. INNER HASH + INNER LOOP (!)
557  // 5. LEFT LOOP + INNER HASH
558  // 6. LEFT LOOP + INNER LOOP (!)
559  // 7. LEFT LOOP + INNER HASH + INNER LOOP (!)
560  // here, if a query contains INNER LOOP join, its qual has nothing
561  // so, some patterns do not have bin_oper at the specific join nest level
562  // if we find INNER LOOP, corresponding RelTranslatedJoin has nulled LHS and RHS
563  // to mark it as loop join
564  const RelAlgNode* lhs;
565  const RelAlgNode* rhs;
566  if (inner_input_idx != -1 && outer_input_idx != -1) {
567  lhs = rel_left_deep_join->getInput(inner_input_idx);
568  rhs = rel_left_deep_join->getInput(outer_input_idx);
569  } else {
570  if (level_idx == 0) {
571  lhs = rel_left_deep_join->getInput(0);
572  rhs = rel_left_deep_join->getInput(1);
573  } else {
574  lhs = translated_join_info_->rbegin()->get();
575  rhs = rel_left_deep_join->getInput(level_idx + 1);
576  }
577  }
578  CHECK(lhs);
579  CHECK(rhs);
580  auto cur_translated_join_node =
581  std::make_shared<RelTranslatedJoin>(lhs,
582  rhs,
583  inner_join_cols,
584  outer_join_cols,
585  filter_ops,
586  outer_join_cond,
587  nested_loop,
588  current_level_join_conditions.type,
589  op_info.type_,
590  op_info.qualifier_,
591  op_info.typeinfo_);
592  CHECK(cur_translated_join_node);
593  handleTranslatedJoin(parent_node, cur_translated_join_node.get());
594  translated_join_info_->push_back(std::move(cur_translated_join_node));
595  }
596 }
bool isNestedLoopQual() const
#define CHECK_EQ(x, y)
Definition: Logger.h:219
QueryPlanDagCache & global_dag_
TableIdToNodeMap getTableIdToNodeMap()
static ExtractedQueryPlanDag extractQueryPlanDag(const RelAlgNode *top_node, Executor *executor)
static ExtractedJoinInfo extractJoinInfo(const RelAlgNode *top_node, std::optional< unsigned > left_deep_tree_id, std::unordered_map< unsigned, JoinQualsPerNestingLevel > left_deep_tree_infos, Executor *executor)
std::optional< RelNodeId > addNodeIfAbsent(const RelAlgNode *)
#define IS_EQUIVALENCE(X)
Definition: sqldefs.h:67
void connectNodes(const RelNodeId parent_id, const RelNodeId child_id)
std::vector< InnerOuterOrLoopQual > normalizeColumnsPair(const Analyzer::BinOper *condition)
const RexScalar * getOuterCondition(const size_t nesting_level) const
void setQueryPlanDag(const std::string &extracted_query_plan_dag) const
std::string QueryPlanDAG
constexpr QueryPlanHash EMPTY_HASHED_PLAN_DAG_KEY
const Expr * get_right_operand() const
Definition: Analyzer.h:442
void setRelNodeDagId(const size_t id) const
#define UNREACHABLE()
Definition: Logger.h:255
static std::pair< bool, std::string > hasNonSupportedNodeInDag(const RelAlgNode *rel_alg_node)
std::vector< std::string > extracted_dag_
std::vector< const Analyzer::ColumnVar * > getJoinCols(bool lhs) const
bool g_is_test_env
Definition: Execute.cpp:136
static std::unordered_set< size_t > getScanNodeTableKey(RelAlgNode const *rel_alg_node)
std::vector< std::string > split(std::string_view str, std::string_view delim, std::optional< size_t > maxsplit)
split apart a string into a vector of substrings
size_t getQueryPlanDagHash() const
void register_and_visit(const RelAlgNode *parent_node, const RelAlgNode *child_node)
std::unordered_map< int, const ResultSetPtr & > TemporaryTables
Definition: InputMetadata.h:31
virtual T visit(const RexScalar *rex_scalar) const
Definition: RexVisitor.h:27
const RelAlgNode * getRHS() const
bool validateNodeId(const RelAlgNode *node, std::optional< RelNodeId > retrieved_node_id)
unsigned getId() const
size_t translateColVarsToInfoHash(std::vector< const Analyzer::ColumnVar * > &col_vars, bool col_id_only) const
bool isEmptyQueryPlanDag(const std::string &dag)
DEVICE auto copy(ARGS &&...args)
Definition: gpu_enabled.h:51
void handleTranslatedJoin(const RelAlgNode *, const RelTranslatedJoin *)
const RelAlgNode * getInput(const size_t idx) const
HashTableBuildDagMap getHashTableBuildDag()
bool registerNodeToDagCache(const RelAlgNode *parent_node, const RelAlgNode *child_node, std::optional< RelNodeId > retrieved_node_id)
static void extractQueryPlanDagImpl(const RelAlgNode *top_npde, QueryPlanDagExtractor &dag_extractor)
std::unordered_map< unsigned, JoinQualsPerNestingLevel > left_deep_tree_infos_
std::string toString(const Executor::ExtModuleKinds &kind)
Definition: Execute.h:1379
HashTableBuildDagMap hash_table_query_plan_dag_
void handleLeftDeepJoinTree(const RelAlgNode *, const RelLeftDeepInnerJoin *)
void addTableIdToNodeLink(const int table_id, const RelAlgNode *node)
const RelAlgNode * getLHS() const
void visit(const RelAlgNode *, const RelAlgNode *)
std::shared_ptr< TranslatedJoinInfo > translated_join_info_
static InnerOuter normalizeColumnPair(const Analyzer::Expr *lhs, const Analyzer::Expr *rhs, const Catalog_Namespace::Catalog &cat, const TemporaryTables *temporary_tables, const bool is_overlaps_join=false)
Definition: HashJoin.cpp:649
constexpr char const * EMPTY_QUERY_PLAN
#define CHECK(condition)
Definition: Logger.h:211
unsigned node_id(const rapidjson::Value &ra_node) noexcept
const Expr * get_left_operand() const
Definition: Analyzer.h:441
std::string getExtractedQueryPlanDagStr(size_t start_pos=0)
std::vector< Analyzer::ColumnVar const * > getColVar(const Analyzer::Expr *col_info)
const size_t inputCount() const
size_t getRelNodeDagId() const
const TableDescriptor * getTableDescriptor() const
#define VLOG(n)
Definition: Logger.h:305
std::vector< const Analyzer::ColumnVar * > collectColVars(const Analyzer::Expr *target)
const JoinQualsPerNestingLevel * getPerNestingJoinQualInfo(std::optional< unsigned > left_deep_join_tree_id)
bool operator()(std::shared_ptr< Analyzer::Expr > const &qual)
int get_input_idx(RelAlgExecutionUnit const &ra_exe_unit, int const outer_table_id)