OmniSciDB  cde582ebc3
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
QueryPlanDagExtractor.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "QueryPlanDagExtractor.h"
18 #include "RexVisitor.h"
20 
21 #include <boost/algorithm/cxx11/any_of.hpp>
22 
23 extern bool g_is_test_env;
24 
25 namespace {
26 struct IsEquivBinOp {
27  bool operator()(std::shared_ptr<Analyzer::Expr> const& qual) {
28  if (auto oper = std::dynamic_pointer_cast<const Analyzer::BinOper>(qual)) {
29  return IS_EQUIVALENCE(oper->get_optype());
30  }
31  return false;
32  }
33 };
34 } // namespace
35 
36 std::vector<InnerOuterOrLoopQual> QueryPlanDagExtractor::normalizeColumnsPair(
37  const Analyzer::BinOper* condition) {
38  std::vector<InnerOuterOrLoopQual> result;
39  const auto lhs_tuple_expr =
40  dynamic_cast<const Analyzer::ExpressionTuple*>(condition->get_left_operand());
41  const auto rhs_tuple_expr =
42  dynamic_cast<const Analyzer::ExpressionTuple*>(condition->get_right_operand());
43 
44  CHECK_EQ(static_cast<bool>(lhs_tuple_expr), static_cast<bool>(rhs_tuple_expr));
45  auto do_normalize_inner_outer_pair = [&result, &condition, this](
46  const Analyzer::Expr* lhs,
47  const Analyzer::Expr* rhs,
48  const TemporaryTables* temporary_table) {
49  try {
50  auto inner_outer_pair = HashJoin::normalizeColumnPair(lhs,
51  rhs,
52  *executor_->getCatalog(),
53  temporary_table,
54  condition->is_overlaps_oper())
55  .first;
56  InnerOuterOrLoopQual valid_qual{
57  std::make_pair(inner_outer_pair.first, inner_outer_pair.second), false};
58  result.push_back(valid_qual);
59  } catch (HashJoinFail& e) {
60  InnerOuterOrLoopQual invalid_qual{std::make_pair(lhs, rhs), true};
61  result.push_back(invalid_qual);
62  }
63  };
64  if (lhs_tuple_expr) {
65  const auto& lhs_tuple = lhs_tuple_expr->getTuple();
66  const auto& rhs_tuple = rhs_tuple_expr->getTuple();
67  CHECK_EQ(lhs_tuple.size(), rhs_tuple.size());
68  for (size_t i = 0; i < lhs_tuple.size(); ++i) {
69  do_normalize_inner_outer_pair(
70  lhs_tuple[i].get(), rhs_tuple[i].get(), executor_->getTemporaryTables());
71  }
72  } else {
73  do_normalize_inner_outer_pair(condition->get_left_operand(),
74  condition->get_right_operand(),
75  executor_->getTemporaryTables());
76  }
77  return result;
78 }
79 
80 // To extract query plan DAG, we call this function with root node of the query plan
81 // and some objects required while extracting DAG
82 // We consider a DAG representation of a query plan as a series of "unique" rel node ids
83 // We decide each rel node's node id by searching the cached plan DAG first,
84 // and assign a new id iff there exists no duplicated rel node that can reuse
86  const RelAlgNode* top_node,
87  Executor* executor) {
88  auto dag_checker_res = QueryPlanDagChecker::hasNonSupportedNodeInDag(top_node);
89  if (dag_checker_res.first) {
90  VLOG(1) << "Stop DAG extraction (" << dag_checker_res.second << ")";
91  return {EMPTY_QUERY_PLAN, true};
92  }
93  heavyai::unique_lock<heavyai::shared_mutex> lock(executor->getDataRecyclerLock());
94  auto& cached_dag = executor->getQueryPlanDagCache();
95  QueryPlanDagExtractor dag_extractor(cached_dag, {}, executor, false);
96  extractQueryPlanDagImpl(top_node, dag_extractor);
97  auto extracted_query_plan_dag = dag_extractor.getExtractedQueryPlanDagStr();
98  top_node->setQueryPlanDag(extracted_query_plan_dag);
99  if (auto sort_node = dynamic_cast<RelSort const*>(top_node)) {
100  // we evaluate sort node based on the resultset of its child node
101  // so we need to mark the extracted query plan of the child node
102  // for the resultset recycling
103  auto child_dag = dag_extractor.getExtractedQueryPlanDagStr(1);
104  sort_node->getInput(0)->setQueryPlanDag(child_dag);
105  }
106  return {extracted_query_plan_dag, dag_extractor.isDagExtractionAvailable()};
107 }
108 
110  const RelAlgNode* top_node,
111  std::optional<unsigned> left_deep_tree_id,
112  std::unordered_map<unsigned, JoinQualsPerNestingLevel> left_deep_tree_infos,
113  Executor* executor) {
114  // we already extract a query plan dag for the input query which is stored at top_node
115  if (top_node->getQueryPlanDagHash() == EMPTY_HASHED_PLAN_DAG_KEY) {
116  return {};
117  }
118  heavyai::unique_lock<heavyai::shared_mutex> lock(executor->getDataRecyclerLock());
119  auto& cached_dag = executor->getQueryPlanDagCache();
120  QueryPlanDagExtractor dag_extractor(cached_dag, left_deep_tree_infos, executor, true);
121  extractQueryPlanDagImpl(top_node, dag_extractor);
122  return {dag_extractor.getHashTableBuildDag(), dag_extractor.getTableIdToNodeMap()};
123 }
124 
126  const RelAlgNode* top_node,
127  QueryPlanDagExtractor& dag_extractor) {
128  // add the root node of this query plan DAG
129  auto res = dag_extractor.global_dag_.addNodeIfAbsent(top_node);
130  if (!res) {
131  VLOG(1) << "Stop DAG extraction (Query plan dag cache reaches the maximum capacity)";
132  dag_extractor.contain_not_supported_rel_node_ = true;
133  return;
134  }
135  CHECK(res.has_value());
136  top_node->setRelNodeDagId(res.value());
137  dag_extractor.extracted_dag_.push_back(::toString(res.value()));
138 
139  // visit child node if necessary
140  if (auto table_func_node = dynamic_cast<const RelTableFunction*>(top_node)) {
141  for (size_t i = 0; i < table_func_node->inputCount(); ++i) {
142  dag_extractor.visit(table_func_node, table_func_node->getInput(i));
143  }
144  } else {
145  auto num_child_node = top_node->inputCount();
146  switch (num_child_node) {
147  case 1: // unary op
148  dag_extractor.visit(top_node, top_node->getInput(0));
149  break;
150  case 2: // binary op
151  if (auto trans_join_node = dynamic_cast<const RelTranslatedJoin*>(top_node)) {
152  dag_extractor.visit(trans_join_node, trans_join_node->getLHS());
153  dag_extractor.visit(trans_join_node, trans_join_node->getRHS());
154  break;
155  }
156  VLOG(1) << "Visit an invalid rel node while extracting query plan DAG: "
157  << ::toString(top_node);
158  return;
159  case 0: // leaf node
160  break;
161  default:
162  // since we replace RelLeftDeepJoin as a set of RelTranslatedJoin
163  // which is a binary op, # child nodes for every rel node should be <= 2
164  UNREACHABLE();
165  }
166  }
167 
168  // check whether extracted DAG is available to use
169  if (dag_extractor.extracted_dag_.empty() || dag_extractor.isDagExtractionAvailable()) {
170  dag_extractor.contain_not_supported_rel_node_ = true;
171  return;
172  }
173 
174  if (g_is_test_env) {
175  dag_extractor.executor_->registerExtractedQueryPlanDag(
176  dag_extractor.getExtractedQueryPlanDagStr());
177  }
178  return;
179 }
180 
182  std::ostringstream oss;
183  size_t cnt = 0;
184  if (start_pos > extracted_dag_.size()) {
185  return EMPTY_QUERY_PLAN;
186  }
187  for (auto& dag_node_id : extracted_dag_) {
188  if (cnt >= start_pos) {
189  oss << dag_node_id << "|";
190  }
191  ++cnt;
192  }
193  return oss.str();
194 }
195 
197  std::optional<RelNodeId> retrieved_node_id) {
198  if (!retrieved_node_id) {
199  VLOG(1) << "Stop DAG extraction (Detect an invalid dag id)";
201  return false;
202  }
203  CHECK(retrieved_node_id.has_value());
204  node->setRelNodeDagId(retrieved_node_id.value());
205  return true;
206 }
207 
209  const RelAlgNode* parent_node,
210  const RelAlgNode* child_node,
211  std::optional<RelNodeId> retrieved_node_id) {
212  CHECK(parent_node);
213  CHECK(child_node);
214  CHECK(retrieved_node_id.has_value());
215  auto parent_node_id = parent_node->getRelNodeDagId();
216  global_dag_.connectNodes(parent_node_id, retrieved_node_id.value());
217  extracted_dag_.push_back(::toString(retrieved_node_id.value()));
218  return true;
219 }
220 
222  const RelAlgNode* child_node) {
223  // This function takes a responsibility for all rel nodes
224  // except 1) RelLeftDeepJoinTree and 2) RelTranslatedJoin
225  auto res = global_dag_.addNodeIfAbsent(child_node);
226  if (validateNodeId(child_node, res) &&
227  registerNodeToDagCache(parent_node, child_node, res)) {
228  for (size_t i = 0; i < child_node->inputCount(); i++) {
229  visit(child_node, child_node->getInput(i));
230  }
231  }
232 }
233 
234 // we recursively visit each rel node starting from the root
235 // and collect assigned rel node ids and return them as query plan DAG
236 // for join operations we additionally generate additional information
237 // to recycle each hashtable that needs to process a given query
238 void QueryPlanDagExtractor::visit(const RelAlgNode* parent_node,
239  const RelAlgNode* child_node) {
240  if (!child_node || contain_not_supported_rel_node_) {
241  return;
242  }
243  bool child_visited = false;
244  if (analyze_join_ops_) {
245  if (auto left_deep_joins = dynamic_cast<const RelLeftDeepInnerJoin*>(child_node)) {
246  if (left_deep_tree_infos_.empty()) {
247  // we should have left_deep_tree_info for input left deep tree node
248  VLOG(1) << "Stop DAG extraction (Detect non-supported join pattern)";
250  return;
251  }
252  auto true_parent_node = parent_node;
253  std::shared_ptr<RelFilter> dummy_filter_node{nullptr};
254  const auto inner_cond = left_deep_joins->getInnerCondition();
255  // we analyze left-deep join tree as per-join qual level, so
256  // when visiting RelLeftDeepInnerJoin we decompose it into individual join node
257  // (RelTranslatedJoin).
258  // Thus, this RelLeftDeepInnerJoin object itself is useless when recycling data
259  // but sometimes it has inner condition that has to consider so we add an extra
260  // RelFilter node containing the condition to keep query semantic correctly
261  if (auto cond = dynamic_cast<const RexOperator*>(inner_cond)) {
262  RexDeepCopyVisitor copier;
263  auto copied_inner_cond = copier.visit(cond);
264  dummy_filter_node = std::make_shared<RelFilter>(copied_inner_cond);
265  register_and_visit(parent_node, dummy_filter_node.get());
266  true_parent_node = dummy_filter_node.get();
267  }
268  handleLeftDeepJoinTree(true_parent_node, left_deep_joins);
269  child_visited = true;
270  } else if (auto translated_join_node =
271  dynamic_cast<const RelTranslatedJoin*>(child_node)) {
272  handleTranslatedJoin(parent_node, translated_join_node);
273  child_visited = true;
274  }
275  }
276  if (!child_visited) {
277  register_and_visit(parent_node, child_node);
278  }
279 }
280 
282  const RelAlgNode* parent_node,
283  const RelTranslatedJoin* rel_trans_join) {
284  // when left-deep tree has multiple joins this rel_trans_join can be revisited
285  // but we need to mark the child query plan to accurately catch the query plan dag
286  // here we do not create new dag id since all rel nodes are visited already
287  CHECK(parent_node);
288  CHECK(rel_trans_join);
289 
290  auto res = global_dag_.addNodeIfAbsent(rel_trans_join);
291  if (!validateNodeId(rel_trans_join, res) ||
292  !registerNodeToDagCache(parent_node, rel_trans_join, res)) {
293  return;
294  }
295 
296  // To extract an access path (query plan DAG) for hashtable is to use a difference of
297  // two query plan DAGs 1) query plan DAG after visiting RHS node and 2) query plan DAG
298  // after visiting LHS node so by comparing 1) and 2) we can extract which query plan DAG
299  // is necessary to project join cols that are used to build a hashtable and we use it as
300  // hashtable access path
301 
302  // this lambda function deals with the case of recycled query resultset
303  // specifically, we can avoid unnecessary visiting of child tree(s) when we already have
304  // the extracted query plan DAG for the given child rel node
305  // instead, we just fill the node id vector (a sequence of node ids we visited) by using
306  // the dag of the child node
307  auto fill_node_ids_to_dag_vec = [&](const std::string& node_ids) {
308  auto node_ids_vec = split(node_ids, "|");
309  // the last elem is an empty one
310  std::for_each(node_ids_vec.begin(),
311  std::prev(node_ids_vec.end()),
312  [&](const std::string& node_id) { extracted_dag_.push_back(node_id); });
313  };
314  QueryPlanDAG current_plan_dag, after_rhs_visited, after_lhs_visited;
315  current_plan_dag = getExtractedQueryPlanDagStr();
316  auto rhs_node = rel_trans_join->getRHS();
317  std::unordered_set<size_t> rhs_input_keys, lhs_input_keys;
318  if (rhs_node) {
319  if (rhs_node->getQueryPlanDagHash() == EMPTY_HASHED_PLAN_DAG_KEY) {
320  visit(rel_trans_join, rhs_node);
321  } else {
322  fill_node_ids_to_dag_vec(rhs_node->getQueryPlanDag());
323  }
324  after_rhs_visited = getExtractedQueryPlanDagStr();
325  addTableIdToNodeLink(rhs_node->getId(), rhs_node);
326  rhs_input_keys = ScanNodeTableKeyCollector::getScanNodeTableKey(rhs_node);
327  }
328  auto lhs_node = rel_trans_join->getLHS();
329  if (rel_trans_join->getLHS()) {
330  if (lhs_node->getQueryPlanDagHash() == EMPTY_HASHED_PLAN_DAG_KEY) {
331  visit(rel_trans_join, lhs_node);
332  } else {
333  fill_node_ids_to_dag_vec(lhs_node->getQueryPlanDag());
334  }
335  after_lhs_visited = getExtractedQueryPlanDagStr();
336  addTableIdToNodeLink(lhs_node->getId(), lhs_node);
337  lhs_input_keys = ScanNodeTableKeyCollector::getScanNodeTableKey(lhs_node);
338  }
339  if (isEmptyQueryPlanDag(after_lhs_visited) || isEmptyQueryPlanDag(after_rhs_visited)) {
340  VLOG(1) << "Stop DAG extraction (Detect invalid query plan dag of join col(s))";
342  return;
343  }
344  // after visiting new node, we have added node id(s) which can be used as an access path
345  // so, we extract that node id(s) by splitting the new plan dag by the current plan dag
346  auto outer_table_identifier = split(after_rhs_visited, current_plan_dag)[1];
347  auto hash_table_identfier = split(after_lhs_visited, after_rhs_visited)[1];
348  auto join_qual_info = EMPTY_HASHED_PLAN_DAG_KEY;
349  if (!rel_trans_join->isNestedLoopQual()) {
350  auto inner_join_cols = rel_trans_join->getJoinCols(true);
351  auto inner_join_col_info =
352  global_dag_.translateColVarsToInfoHash(inner_join_cols, false);
353  boost::hash_combine(join_qual_info, inner_join_col_info);
354  auto outer_join_cols = rel_trans_join->getJoinCols(false);
355  auto outer_join_col_info =
356  global_dag_.translateColVarsToInfoHash(outer_join_cols, false);
357  boost::hash_combine(join_qual_info, outer_join_col_info);
358  // collect table keys from both rhs and lhs side
359  std::unordered_set<size_t> collected_table_keys;
360  collected_table_keys.insert(lhs_input_keys.begin(), lhs_input_keys.end());
361  if (!inner_join_cols.empty() &&
362  inner_join_cols[0]->get_type_info().is_dict_encoded_type()) {
363  collected_table_keys.insert(rhs_input_keys.begin(), rhs_input_keys.end());
364  }
365 
366  auto it = hash_table_query_plan_dag_.find(join_qual_info);
367  if (it == hash_table_query_plan_dag_.end()) {
368  VLOG(2) << "Add hashtable access path"
369  << ", inner join col info: " << inner_join_col_info
370  << " (access path: " << hash_table_identfier << ")"
371  << ", outer join col info: " << outer_join_col_info
372  << " (access path: " << outer_table_identifier << ")";
374  join_qual_info,
375  HashTableBuildDag(inner_join_col_info,
376  outer_join_col_info,
377  boost::hash_value(hash_table_identfier),
378  boost::hash_value(outer_table_identifier),
379  std::move(collected_table_keys)));
380  }
381  } else {
382  VLOG(2) << "Add loop join access path, for LHS: " << outer_table_identifier
383  << ", for RHS: " << hash_table_identfier << "\n";
384  }
385 }
386 
387 namespace {
388 struct OpInfo {
389  std::string type_;
390  std::string qualifier_;
391  std::string typeinfo_;
392 };
393 
394 // Return the input index whose tableId matches the given tbl_id.
395 // If none then return -1.
396 int get_input_idx(const RelLeftDeepInnerJoin* rel_left_deep_join, int const tbl_id) {
397  for (size_t input_idx = 0; input_idx < rel_left_deep_join->inputCount(); ++input_idx) {
398  auto const input_node = rel_left_deep_join->getInput(input_idx);
399  auto const scan_node = dynamic_cast<const RelScan*>(input_node);
400  int const target_table_id = scan_node ? scan_node->getTableDescriptor()->tableId
401  : -1 * input_node->getId(); // temporary table
402  if (target_table_id == tbl_id) {
403  return input_idx;
404  }
405  }
406  return -1;
407 }
408 } // namespace
409 
410 std::vector<Analyzer::ColumnVar const*> QueryPlanDagExtractor::getColVar(
411  Analyzer::Expr const* col_info) {
412  if (auto col_var = dynamic_cast<const Analyzer::ColumnVar*>(col_info)) {
413  return {col_var};
414  } else {
415  return global_dag_.collectColVars(col_info);
416  }
417 }
418 
419 // we coalesce join quals and related filter conditions into a single RelLeftDeepInnerJoin
420 // node when converting calcite AST to logical query plan, but to recycle hashtable(s) we
421 // need to know access path of each hashtable, so we disassemble it into a set of join
422 // qual and collect hashtable info from there
424  const RelAlgNode* parent_node,
425  const RelLeftDeepInnerJoin* rel_left_deep_join) {
426  CHECK(parent_node);
427  CHECK(rel_left_deep_join);
428 
429  // RelLeftDeepInnerJoin node does not need to be added to DAG since
430  // RelLeftDeepInnerJoin is a logical node and
431  // we add all join nodes of this `RelLeftDeepInnerJoin`
432  // thus, the below `left_deep_tree_id` is not the same as its DAG id
433  // (we do not have a DAG node id for this `RelLeftDeepInnerJoin`)
434  auto left_deep_tree_id = rel_left_deep_join->getId();
435  auto left_deep_join_info = getPerNestingJoinQualInfo(left_deep_tree_id);
436  if (!left_deep_join_info) {
437  // we should have left_deep_tree_info for input left deep tree node
438  VLOG(1) << "Stop DAG extraction (Detect Non-supported join pattern)";
440  return;
441  }
442 
443  // gathering per-join qual level info to correctly recycle each hashtable (and necessary
444  // info) that we created
445  // Here we visit each join qual in bottom-up manner to distinct DAGs among join quals
446  // Let say we have three joins- #1: R.a = S.a / #2: R.b = T.b / #3. R.c = X.c
447  // When we start to visit #1, we cannot determine outer col's dag clearly
448  // since we need to visit #2 and #3 due to the current visitor's behavior
449  // In contrast, when starting from #3, we clearly retrieve both inputs' dag
450  // by skipping already visited nodes
451  // So when we visit #2 after visiting #3, we can skip to consider nodes beloning to
452  // qual #3 so we clearly retrieve DAG only corresponding to #2's
453  for (size_t level_idx = 0; level_idx < left_deep_join_info->size(); ++level_idx) {
454  const auto& current_level_join_conditions = left_deep_join_info->at(level_idx);
455  std::vector<const Analyzer::ColumnVar*> inner_join_cols;
456  std::vector<const Analyzer::ColumnVar*> outer_join_cols;
457  std::vector<std::shared_ptr<const Analyzer::Expr>> filter_ops;
458  int inner_input_idx{-1};
459  int outer_input_idx{-1};
460  OpInfo op_info{"UNDEFINED", "UNDEFINED", "UNDEFINED"};
461  std::unordered_set<std::string> visited_filter_ops;
462 
463  // we first check whether this qual needs nested loop
464  const bool found_eq_join_qual =
465  current_level_join_conditions.type != JoinType::INVALID &&
466  boost::algorithm::any_of(current_level_join_conditions.quals, IsEquivBinOp{});
467  const bool nested_loop = !found_eq_join_qual;
468  const bool is_left_join = current_level_join_conditions.type == JoinType::LEFT;
469  RexScalar const* const outer_join_cond =
470  is_left_join ? rel_left_deep_join->getOuterCondition(level_idx + 1) : nullptr;
471 
472  // collect join col, filter ops, and detailed info of join operation, i.e., op_type,
473  // qualifier, ...
474  // when we have more than one quals, i.e., current_level_join_conditions.quals.size()
475  // > 1, we consider the first qual is used as hashtable building
476  bool is_geo_join{false};
477  for (const auto& join_qual : current_level_join_conditions.quals) {
478  auto qual_bin_oper = std::dynamic_pointer_cast<const Analyzer::BinOper>(join_qual);
479  auto join_qual_str = ::toString(join_qual);
480  if (qual_bin_oper) {
481  is_geo_join = qual_bin_oper->is_overlaps_oper();
482  if (join_qual == current_level_join_conditions.quals.front()) {
483  // set op_info based on the first qual
484  op_info = OpInfo{::toString(qual_bin_oper->get_optype()),
485  ::toString(qual_bin_oper->get_qualifier()),
486  qual_bin_oper->get_type_info().to_string()};
487  }
488  for (auto& col_pair_info : normalizeColumnsPair(qual_bin_oper.get())) {
489  // even though we fall back to loop join when left outer join has
490  // non-eq comparator so we additionally check it here to classify the qual
491  // properly todo(yoonmin): relax left join case once we have an improved logic
492  // for this
493  if (!found_eq_join_qual && (is_left_join || col_pair_info.loop_join_qual)) {
494  // we only consider that cur level's join is loop join if we have no
495  // equi-join qual and both lhs and rhs are not col_var,
496  // i.e., lhs: col_var / rhs: constant / bin_op: kGE
497  // also, currently we fallback to loop-join when left outer join
498  // has non-eq join qual
499  if (visited_filter_ops.insert(join_qual_str).second) {
500  filter_ops.push_back(join_qual);
501  }
502  } else {
503  // a qual_bin_oper becomes an inner join qual iff both lhs and rhs are col_var
504  // otherwise it becomes a filter qual
505  bool found_valid_col_vars = false;
506  std::vector<const Analyzer::ColumnVar*> lhs_cvs, rhs_cvs;
507  if (col_pair_info.inner_outer.first && col_pair_info.inner_outer.second) {
508  // we need to modify lhs and rhs if range_oper is detected
509  if (auto range_oper = dynamic_cast<const Analyzer::RangeOper*>(
510  col_pair_info.inner_outer.second)) {
511  lhs_cvs = getColVar(range_oper->get_left_operand());
512  rhs_cvs = getColVar(col_pair_info.inner_outer.first);
513  is_geo_join = true;
514  } else {
515  // this qual is valid and used for typical hash join op
516  lhs_cvs = getColVar(col_pair_info.inner_outer.first);
517  rhs_cvs = getColVar(col_pair_info.inner_outer.second);
518  }
519  if (!lhs_cvs.empty() && !rhs_cvs.empty()) {
520  found_valid_col_vars = true;
521  if (inner_input_idx == -1) {
522  inner_input_idx =
523  get_input_idx(rel_left_deep_join, lhs_cvs.front()->get_table_id());
524  }
525  if (outer_input_idx == -1) {
526  outer_input_idx =
527  get_input_idx(rel_left_deep_join, rhs_cvs.front()->get_table_id());
528  }
529  std::copy(
530  lhs_cvs.begin(), lhs_cvs.end(), std::back_inserter(inner_join_cols));
531  std::copy(
532  rhs_cvs.begin(), rhs_cvs.end(), std::back_inserter(outer_join_cols));
533  }
534  }
535  if (!found_valid_col_vars &&
536  visited_filter_ops.insert(join_qual_str).second) {
537  filter_ops.push_back(join_qual);
538  }
539  }
540  }
541  } else {
542  if (visited_filter_ops.insert(join_qual_str).second) {
543  filter_ops.push_back(join_qual);
544  }
545  }
546  }
547  if (!is_geo_join && (inner_join_cols.size() != outer_join_cols.size())) {
548  VLOG(1) << "Stop DAG extraction (Detect inner/outer col mismatch)";
550  return;
551  }
552 
553  // create RelTranslatedJoin based on the collected info from the join qual
554  // there are total seven types of join query pattern
555  // 1. INNER HASH ONLY
556  // 2. INNER LOOP ONLY (!)
557  // 3. LEFT LOOP ONLY
558  // 4. INNER HASH + INNER LOOP (!)
559  // 5. LEFT LOOP + INNER HASH
560  // 6. LEFT LOOP + INNER LOOP (!)
561  // 7. LEFT LOOP + INNER HASH + INNER LOOP (!)
562  // here, if a query contains INNER LOOP join, its qual has nothing
563  // so, some patterns do not have bin_oper at the specific join nest level
564  // if we find INNER LOOP, corresponding RelTranslatedJoin has nulled LHS and RHS
565  // to mark it as loop join
566  const RelAlgNode* lhs;
567  const RelAlgNode* rhs;
568  if (inner_input_idx != -1 && outer_input_idx != -1) {
569  lhs = rel_left_deep_join->getInput(inner_input_idx);
570  rhs = rel_left_deep_join->getInput(outer_input_idx);
571  } else {
572  if (level_idx == 0) {
573  lhs = rel_left_deep_join->getInput(0);
574  rhs = rel_left_deep_join->getInput(1);
575  } else {
576  lhs = translated_join_info_->rbegin()->get();
577  rhs = rel_left_deep_join->getInput(level_idx + 1);
578  }
579  }
580  CHECK(lhs);
581  CHECK(rhs);
582  auto cur_translated_join_node =
583  std::make_shared<RelTranslatedJoin>(lhs,
584  rhs,
585  inner_join_cols,
586  outer_join_cols,
587  filter_ops,
588  outer_join_cond,
589  nested_loop,
590  current_level_join_conditions.type,
591  op_info.type_,
592  op_info.qualifier_,
593  op_info.typeinfo_);
594  CHECK(cur_translated_join_node);
595  handleTranslatedJoin(parent_node, cur_translated_join_node.get());
596  translated_join_info_->push_back(std::move(cur_translated_join_node));
597  }
598 }
bool isNestedLoopQual() const
Definition: RelAlgDag.h:1507
#define CHECK_EQ(x, y)
Definition: Logger.h:230
QueryPlanDagCache & global_dag_
TableIdToNodeMap getTableIdToNodeMap()
static ExtractedQueryPlanDag extractQueryPlanDag(const RelAlgNode *top_node, Executor *executor)
static ExtractedJoinInfo extractJoinInfo(const RelAlgNode *top_node, std::optional< unsigned > left_deep_tree_id, std::unordered_map< unsigned, JoinQualsPerNestingLevel > left_deep_tree_infos, Executor *executor)
std::optional< RelNodeId > addNodeIfAbsent(const RelAlgNode *)
#define IS_EQUIVALENCE(X)
Definition: sqldefs.h:68
void connectNodes(const RelNodeId parent_id, const RelNodeId child_id)
std::vector< InnerOuterOrLoopQual > normalizeColumnsPair(const Analyzer::BinOper *condition)
const RexScalar * getOuterCondition(const size_t nesting_level) const
void setQueryPlanDag(const std::string &extracted_query_plan_dag) const
Definition: RelAlgDag.h:799
std::string QueryPlanDAG
constexpr QueryPlanHash EMPTY_HASHED_PLAN_DAG_KEY
const Expr * get_right_operand() const
Definition: Analyzer.h:450
void setRelNodeDagId(const size_t id) const
Definition: RelAlgDag.h:860
#define UNREACHABLE()
Definition: Logger.h:266
static std::pair< bool, std::string > hasNonSupportedNodeInDag(const RelAlgNode *rel_alg_node)
std::vector< std::string > extracted_dag_
std::vector< const Analyzer::ColumnVar * > getJoinCols(bool lhs) const
Definition: RelAlgDag.h:1501
bool g_is_test_env
Definition: Execute.cpp:141
static std::unordered_set< size_t > getScanNodeTableKey(RelAlgNode const *rel_alg_node)
std::vector< std::string > split(std::string_view str, std::string_view delim, std::optional< size_t > maxsplit)
split apart a string into a vector of substrings
size_t getQueryPlanDagHash() const
Definition: RelAlgDag.h:808
void register_and_visit(const RelAlgNode *parent_node, const RelAlgNode *child_node)
std::unordered_map< int, const ResultSetPtr & > TemporaryTables
Definition: InputMetadata.h:31
virtual T visit(const RexScalar *rex_scalar) const
Definition: RexVisitor.h:27
const RelAlgNode * getRHS() const
Definition: RelAlgDag.h:1472
bool validateNodeId(const RelAlgNode *node, std::optional< RelNodeId > retrieved_node_id)
static std::pair< InnerOuter, InnerOuterStringOpInfos > normalizeColumnPair(const Analyzer::Expr *lhs, const Analyzer::Expr *rhs, const Catalog_Namespace::Catalog &cat, const TemporaryTables *temporary_tables, const bool is_overlaps_join=false)
Definition: HashJoin.cpp:797
unsigned getId() const
Definition: RelAlgDag.h:814
size_t translateColVarsToInfoHash(std::vector< const Analyzer::ColumnVar * > &col_vars, bool col_id_only) const
bool isEmptyQueryPlanDag(const std::string &dag)
DEVICE auto copy(ARGS &&...args)
Definition: gpu_enabled.h:51
void handleTranslatedJoin(const RelAlgNode *, const RelTranslatedJoin *)
std::unique_lock< T > unique_lock
const RelAlgNode * getInput(const size_t idx) const
Definition: RelAlgDag.h:826
HashTableBuildDagMap getHashTableBuildDag()
bool registerNodeToDagCache(const RelAlgNode *parent_node, const RelAlgNode *child_node, std::optional< RelNodeId > retrieved_node_id)
static void extractQueryPlanDagImpl(const RelAlgNode *top_npde, QueryPlanDagExtractor &dag_extractor)
std::unordered_map< unsigned, JoinQualsPerNestingLevel > left_deep_tree_infos_
std::string toString(const Executor::ExtModuleKinds &kind)
Definition: Execute.h:1448
HashTableBuildDagMap hash_table_query_plan_dag_
void handleLeftDeepJoinTree(const RelAlgNode *, const RelLeftDeepInnerJoin *)
void addTableIdToNodeLink(const int table_id, const RelAlgNode *node)
const RelAlgNode * getLHS() const
Definition: RelAlgDag.h:1471
void visit(const RelAlgNode *, const RelAlgNode *)
std::shared_ptr< TranslatedJoinInfo > translated_join_info_
constexpr char const * EMPTY_QUERY_PLAN
#define CHECK(condition)
Definition: Logger.h:222
const Expr * get_left_operand() const
Definition: Analyzer.h:449
std::string getExtractedQueryPlanDagStr(size_t start_pos=0)
std::vector< Analyzer::ColumnVar const * > getColVar(const Analyzer::Expr *col_info)
const size_t inputCount() const
Definition: RelAlgDag.h:824
size_t getRelNodeDagId() const
Definition: RelAlgDag.h:862
unsigned node_id(const rapidjson::Value &ra_node) noexcept
Definition: RelAlgDag.cpp:899
const TableDescriptor * getTableDescriptor() const
Definition: RelAlgDag.h:911
#define VLOG(n)
Definition: Logger.h:316
std::vector< const Analyzer::ColumnVar * > collectColVars(const Analyzer::Expr *target)
const JoinQualsPerNestingLevel * getPerNestingJoinQualInfo(std::optional< unsigned > left_deep_join_tree_id)
bool operator()(std::shared_ptr< Analyzer::Expr > const &qual)
int get_input_idx(RelAlgExecutionUnit const &ra_exe_unit, int const outer_table_id)