OmniSciDB  471d68cefb
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
QueryPlanDagExtractor Class Reference

#include <QueryPlanDagExtractor.h>

+ Collaboration diagram for QueryPlanDagExtractor:

Public Member Functions

 QueryPlanDagExtractor (QueryPlanDagCache &global_dag, const Catalog_Namespace::Catalog &catalog, std::unordered_map< unsigned, JoinQualsPerNestingLevel > &left_deep_tree_infos, const TemporaryTables &temporary_tables, Executor *executor)
 
HashTableBuildDagMapgetHashTableBuildDag ()
 
std::shared_ptr
< TranslatedJoinInfo
getTranslatedJoinInfo ()
 
const JoinQualsPerNestingLevelgetPerNestingJoinQualInfo (std::optional< unsigned > left_deep_join_tree_id)
 
bool isDagExtractionAvailable ()
 
std::string getExtractedQueryPlanDagStr ()
 
std::vector< InnerOuterOrLoopQualnormalizeColumnsPair (const Analyzer::BinOper *condition, const Catalog_Namespace::Catalog &cat)
 
bool isEmptyQueryPlanDag (const std::string &dag)
 
TableIdToNodeMapgetTableIdToNodeMap ()
 
void addTableIdToNodeLink (const int table_id, const RelAlgNode *node)
 
void clearInternaStatus ()
 

Static Public Member Functions

static ExtractedPlanDag extractQueryPlanDag (const RelAlgNode *node, const Catalog_Namespace::Catalog &catalog, std::optional< unsigned > left_deep_tree_id, std::unordered_map< unsigned, JoinQualsPerNestingLevel > &left_deep_tree_infos, const TemporaryTables &temporary_tables, Executor *executor, const RelAlgTranslator &rel_alg_translator)
 

Private Member Functions

void visit (const RelAlgNode *, const RelAlgNode *)
 
Analyzer::ColumnVar constgetColVar (const Analyzer::Expr *col_info)
 
void handleLeftDeepJoinTree (const RelAlgNode *, const RelLeftDeepInnerJoin *)
 
void handleTranslatedJoin (const RelAlgNode *, const RelTranslatedJoin *)
 
bool validateNodeId (const RelAlgNode *node, std::optional< RelNodeId > retrieved_node_id)
 
bool registerNodeToDagCache (const RelAlgNode *parent_node, const RelAlgNode *child_node, std::optional< RelNodeId > retrieved_node_id)
 

Static Private Member Functions

static ExtractedPlanDag extractQueryPlanDagImpl (const RelAlgNode *node, const Catalog_Namespace::Catalog &catalog, std::optional< unsigned > left_deep_tree_id, std::unordered_map< unsigned, JoinQualsPerNestingLevel > &left_deep_tree_infos, const TemporaryTables &temporary_tables, Executor *executor)
 

Private Attributes

QueryPlanDagCacheglobal_dag_
 
const Catalog_Namespace::Catalogcatalog_
 
bool contain_not_supported_rel_node_
 
std::unordered_map< unsigned,
JoinQualsPerNestingLevel > & 
left_deep_tree_infos_
 
const TemporaryTablestemporary_tables_
 
Executorexecutor_
 
std::shared_ptr
< TranslatedJoinInfo
translated_join_info_
 
HashTableBuildDagMap hash_table_query_plan_dag_
 
TableIdToNodeMap table_id_to_node_map_
 
std::vector< size_t > extracted_dag_
 

Detailed Description

Definition at line 55 of file QueryPlanDagExtractor.h.

Constructor & Destructor Documentation

QueryPlanDagExtractor::QueryPlanDagExtractor ( QueryPlanDagCache global_dag,
const Catalog_Namespace::Catalog catalog,
std::unordered_map< unsigned, JoinQualsPerNestingLevel > &  left_deep_tree_infos,
const TemporaryTables temporary_tables,
Executor executor 
)
inline

Definition at line 57 of file QueryPlanDagExtractor.h.

References translated_join_info_.

63  : global_dag_(global_dag)
64  , catalog_(catalog)
66  , left_deep_tree_infos_(left_deep_tree_infos)
67  , temporary_tables_(temporary_tables)
68  , executor_(executor) {
69  translated_join_info_ = std::make_shared<TranslatedJoinInfo>();
70  }
QueryPlanDagCache & global_dag_
const Catalog_Namespace::Catalog & catalog_
const TemporaryTables & temporary_tables_
std::shared_ptr< TranslatedJoinInfo > translated_join_info_
std::unordered_map< unsigned, JoinQualsPerNestingLevel > & left_deep_tree_infos_

Member Function Documentation

void QueryPlanDagExtractor::addTableIdToNodeLink ( const int  table_id,
const RelAlgNode node 
)
inline

Definition at line 113 of file QueryPlanDagExtractor.h.

References table_id_to_node_map_.

Referenced by handleTranslatedJoin().

113  {
114  auto it = table_id_to_node_map_.find(table_id);
115  if (it == table_id_to_node_map_.end()) {
116  table_id_to_node_map_.emplace(table_id, node);
117  }
118  }
TableIdToNodeMap table_id_to_node_map_

+ Here is the caller graph for this function:

void QueryPlanDagExtractor::clearInternaStatus ( )
inline

Definition at line 120 of file QueryPlanDagExtractor.h.

References contain_not_supported_rel_node_, extracted_dag_, and table_id_to_node_map_.

Referenced by handleLeftDeepJoinTree(), handleTranslatedJoin(), validateNodeId(), and visit().

120  {
122  extracted_dag_.clear();
123  table_id_to_node_map_.clear();
124  }
TableIdToNodeMap table_id_to_node_map_
std::vector< size_t > extracted_dag_

+ Here is the caller graph for this function:

ExtractedPlanDag QueryPlanDagExtractor::extractQueryPlanDag ( const RelAlgNode node,
const Catalog_Namespace::Catalog catalog,
std::optional< unsigned >  left_deep_tree_id,
std::unordered_map< unsigned, JoinQualsPerNestingLevel > &  left_deep_tree_infos,
const TemporaryTables temporary_tables,
Executor executor,
const RelAlgTranslator rel_alg_translator 
)
static

Definition at line 80 of file QueryPlanDagExtractor.cpp.

References EMPTY_QUERY_PLAN, extractQueryPlanDagImpl(), QueryPlanDagChecker::isNotSupportedDag(), RelAlgNode::toString(), and VLOG.

Referenced by RelAlgExecutor::createAggregateWorkUnit(), RelAlgExecutor::createCompoundWorkUnit(), RelAlgExecutor::createFilterWorkUnit(), RelAlgExecutor::createProjectWorkUnit(), and QueryRunner::QueryRunner::extractQueryPlanDag().

88  {
89  // check if this plan tree has not supported pattern for DAG extraction
90  if (QueryPlanDagChecker::isNotSupportedDag(node, rel_alg_translator)) {
91  VLOG(1) << "Stop DAG extraction due to not supproed node: " << node->toString();
92  return {node, EMPTY_QUERY_PLAN, nullptr, nullptr, {}, {}, true};
93  }
94 
96  node, catalog, left_deep_tree_id, left_deep_tree_infos, temporary_tables, executor);
97 }
static bool isNotSupportedDag(const RelAlgNode *rel_alg_node, const RelAlgTranslator &rel_alg_translator)
virtual std::string toString() const =0
constexpr char const * EMPTY_QUERY_PLAN
static ExtractedPlanDag extractQueryPlanDagImpl(const RelAlgNode *node, const Catalog_Namespace::Catalog &catalog, std::optional< unsigned > left_deep_tree_id, std::unordered_map< unsigned, JoinQualsPerNestingLevel > &left_deep_tree_infos, const TemporaryTables &temporary_tables, Executor *executor)
#define VLOG(n)
Definition: Logger.h:303

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

ExtractedPlanDag QueryPlanDagExtractor::extractQueryPlanDagImpl ( const RelAlgNode node,
const Catalog_Namespace::Catalog catalog,
std::optional< unsigned >  left_deep_tree_id,
std::unordered_map< unsigned, JoinQualsPerNestingLevel > &  left_deep_tree_infos,
const TemporaryTables temporary_tables,
Executor executor 
)
staticprivate

Definition at line 99 of file QueryPlanDagExtractor.cpp.

References CHECK, EMPTY_QUERY_PLAN, extracted_dag_, getExtractedQueryPlanDagStr(), getHashTableBuildDag(), RelAlgNode::getInput(), getPerNestingJoinQualInfo(), getTableIdToNodeMap(), getTranslatedJoinInfo(), RelAlgNode::inputCount(), isDagExtractionAvailable(), run_benchmark_import::res, RelAlgNode::setRelNodeDagId(), toString(), RelAlgNode::toString(), UNREACHABLE, visit(), and VLOG.

Referenced by extractQueryPlanDag().

106  {
107  mapd_unique_lock<mapd_shared_mutex> lock(executor->getDataRecyclerLock());
108 
109  auto& cached_dag = executor->getQueryPlanDagCache();
110  QueryPlanDagExtractor dag_extractor(
111  cached_dag, catalog, left_deep_tree_infos, temporary_tables, executor);
112 
113  // add the root node of this query plan DAG
114  auto res = cached_dag.addNodeIfAbsent(node);
115  if (!res) {
116  VLOG(1) << "Stop DAG extraction while adding node to the DAG node cache: "
117  << node->toString();
118  return {node, EMPTY_QUERY_PLAN, nullptr, nullptr, {}, {}, true};
119  }
120  CHECK(res.has_value());
121  node->setRelNodeDagId(res.value());
122  dag_extractor.extracted_dag_.push_back(res.value());
123 
124  // visit child node if necessary
125  auto num_child_node = node->inputCount();
126  switch (num_child_node) {
127  case 1: // unary op
128  dag_extractor.visit(node, node->getInput(0));
129  break;
130  case 2: // binary op
131  if (auto trans_join_node = dynamic_cast<const RelTranslatedJoin*>(node)) {
132  dag_extractor.visit(trans_join_node, trans_join_node->getLHS());
133  dag_extractor.visit(trans_join_node, trans_join_node->getRHS());
134  break;
135  }
136  VLOG(1) << "Visit an invalid rel node while extracting query plan DAG: "
137  << ::toString(node);
138  return {node, EMPTY_QUERY_PLAN, nullptr, nullptr, {}, {}, true};
139  case 0: // leaf node
140  break;
141  default:
142  // since we replace RelLeftDeepJoin as a set of RelTranslatedJoin
143  // which is a binary op, # child nodes for every rel node should be <= 2
144  UNREACHABLE();
145  }
146 
147  // check whether extracted DAG is available to use
148  if (dag_extractor.extracted_dag_.empty() || dag_extractor.isDagExtractionAvailable()) {
149  return {node, EMPTY_QUERY_PLAN, nullptr, nullptr, {}, {}, true};
150  }
151 
152  return {node,
153  dag_extractor.getExtractedQueryPlanDagStr(),
154  dag_extractor.getTranslatedJoinInfo(),
155  dag_extractor.getPerNestingJoinQualInfo(left_deep_tree_id),
156  dag_extractor.getHashTableBuildDag(),
157  dag_extractor.getTableIdToNodeMap(),
158  false};
159 }
std::string toString(const ExtArgumentType &sig_type)
void setRelNodeDagId(const size_t id) const
#define UNREACHABLE()
Definition: Logger.h:253
const RelAlgNode * getInput(const size_t idx) const
virtual std::string toString() const =0
constexpr char const * EMPTY_QUERY_PLAN
#define CHECK(condition)
Definition: Logger.h:209
const size_t inputCount() const
#define VLOG(n)
Definition: Logger.h:303

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

Analyzer::ColumnVar const * QueryPlanDagExtractor::getColVar ( const Analyzer::Expr col_info)
private

Definition at line 343 of file QueryPlanDagExtractor.cpp.

References QueryPlanDagCache::collectColVars(), and global_dag_.

Referenced by handleLeftDeepJoinTree().

344  {
345  auto col_var = dynamic_cast<const Analyzer::ColumnVar*>(col_info);
346  if (!col_var) {
347  auto visited_cols = global_dag_.collectColVars(col_info);
348  if (visited_cols.size() == 1) {
349  col_var = dynamic_cast<const Analyzer::ColumnVar*>(visited_cols[0]);
350  }
351  }
352  return col_var;
353 }
QueryPlanDagCache & global_dag_
std::vector< const Analyzer::ColumnVar * > collectColVars(const Analyzer::Expr *target)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::string QueryPlanDagExtractor::getExtractedQueryPlanDagStr ( )

Definition at line 161 of file QueryPlanDagExtractor.cpp.

References contain_not_supported_rel_node_, and extracted_dag_.

Referenced by extractQueryPlanDagImpl(), and handleTranslatedJoin().

161  {
162  std::ostringstream oss;
164  oss << "N/A";
165  } else {
166  for (auto& dag_node_id : extracted_dag_) {
167  oss << dag_node_id << "|";
168  }
169  }
170  return oss.str();
171 }
std::vector< size_t > extracted_dag_

+ Here is the caller graph for this function:

HashTableBuildDagMap& QueryPlanDagExtractor::getHashTableBuildDag ( )
inline

Definition at line 82 of file QueryPlanDagExtractor.h.

References hash_table_query_plan_dag_.

Referenced by extractQueryPlanDagImpl().

HashTableBuildDagMap hash_table_query_plan_dag_

+ Here is the caller graph for this function:

const JoinQualsPerNestingLevel* QueryPlanDagExtractor::getPerNestingJoinQualInfo ( std::optional< unsigned >  left_deep_join_tree_id)
inline

Definition at line 88 of file QueryPlanDagExtractor.h.

References CHECK, and left_deep_tree_infos_.

Referenced by extractQueryPlanDagImpl(), and handleLeftDeepJoinTree().

89  {
90  if (left_deep_tree_infos_.empty() || !left_deep_join_tree_id) {
91  return nullptr;
92  }
93  CHECK(left_deep_join_tree_id.has_value());
94  auto it = left_deep_tree_infos_.find(left_deep_join_tree_id.value());
95  if (it == left_deep_tree_infos_.end()) {
96  return nullptr;
97  }
98  return &it->second;
99  }
#define CHECK(condition)
Definition: Logger.h:209
std::unordered_map< unsigned, JoinQualsPerNestingLevel > & left_deep_tree_infos_

+ Here is the caller graph for this function:

TableIdToNodeMap& QueryPlanDagExtractor::getTableIdToNodeMap ( )
inline

Definition at line 111 of file QueryPlanDagExtractor.h.

References table_id_to_node_map_.

Referenced by extractQueryPlanDagImpl().

111 { return table_id_to_node_map_; }
TableIdToNodeMap table_id_to_node_map_

+ Here is the caller graph for this function:

std::shared_ptr<TranslatedJoinInfo> QueryPlanDagExtractor::getTranslatedJoinInfo ( )
inline

Definition at line 84 of file QueryPlanDagExtractor.h.

References translated_join_info_.

Referenced by extractQueryPlanDagImpl().

84  {
85  return translated_join_info_;
86  }
std::shared_ptr< TranslatedJoinInfo > translated_join_info_

+ Here is the caller graph for this function:

void QueryPlanDagExtractor::handleLeftDeepJoinTree ( const RelAlgNode parent_node,
const RelLeftDeepInnerJoin rel_left_deep_join 
)
private

Definition at line 359 of file QueryPlanDagExtractor.cpp.

References catalog_, CHECK, clearInternaStatus(), anonymous_namespace{QueryPlanDagExtractor.cpp}::get_input_idx(), getColVar(), RelAlgNode::getId(), RelAlgNode::getInput(), RelLeftDeepInnerJoin::getOuterCondition(), getPerNestingJoinQualInfo(), handleTranslatedJoin(), INVALID, LEFT, normalizeColumnsPair(), toString(), translated_join_info_, and VLOG.

Referenced by visit().

361  {
362  CHECK(parent_node);
363  CHECK(rel_left_deep_join);
364 
365  // RelLeftDeepInnerJoin node does not need to be added to DAG since
366  // RelLeftDeepInnerJoin is a logical node and
367  // we add all join nodes of this `RelLeftDeepInnerJoin`
368  // thus, the below `left_deep_tree_id` is not the same as its DAG id
369  // (we do not have a DAG node id for this `RelLeftDeepInnerJoin`)
370  auto left_deep_tree_id = rel_left_deep_join->getId();
371  auto left_deep_join_info = getPerNestingJoinQualInfo(left_deep_tree_id);
372  if (!left_deep_join_info) {
373  // we should have left_deep_tree_info for input left deep tree node
374  VLOG(1) << "Stop DAG extraction due to not supported join pattern";
376  return;
377  }
378 
379  // gathering per-join qual level info to correctly recycle each hashtable (and necessary
380  // info) that we created
381  // Here we visit each join qual in bottom-up manner to distinct DAGs among join quals
382  // Let say we have three joins- #1: R.a = S.a / #2: R.b = T.b / #3. R.c = X.c
383  // When we start to visit #1, we cannot determine outer col's dag clearly
384  // since we need to visit #2 and #3 due to the current visitor's behavior
385  // In contrast, when starting from #3, we clearly retrieve both inputs' dag
386  // by skipping already visited nodes
387  // So when we visit #2 after visiting #3, we can skip to consider nodes beloning to
388  // qual #3 so we clearly retrieve DAG only corresponding to #2's
389  for (size_t level_idx = 0; level_idx < left_deep_join_info->size(); ++level_idx) {
390  const auto& current_level_join_conditions = left_deep_join_info->at(level_idx);
391  std::vector<const Analyzer::ColumnVar*> inner_join_cols;
392  std::vector<const Analyzer::ColumnVar*> outer_join_cols;
393  std::vector<std::shared_ptr<const Analyzer::Expr>> filter_ops;
394  int inner_input_idx{-1};
395  int outer_input_idx{-1};
396  OpInfo op_info{"UNDEFINED", "UNDEFINED", "UNDEFINED"};
397  std::unordered_set<std::string> visited_filter_ops;
398 
399  // we first check whether this qual needs nested loop
400  const bool found_eq_join_qual =
401  current_level_join_conditions.type != JoinType::INVALID &&
402  boost::algorithm::any_of(current_level_join_conditions.quals, IsEquivBinOp{});
403  const bool nested_loop = !found_eq_join_qual;
404 
405  RexScalar const* const outer_join_cond =
406  current_level_join_conditions.type == JoinType::LEFT
407  ? rel_left_deep_join->getOuterCondition(level_idx + 1)
408  : nullptr;
409 
410  // collect join col, filter ops, and detailed info of join operation, i.e., op_type,
411  // qualifier, ...
412  // when we have more than one quals, i.e., current_level_join_conditions.quals.size()
413  // > 1, we consider the first qual is used as hashtable building
414  for (const auto& join_qual : current_level_join_conditions.quals) {
415  auto qual_bin_oper = std::dynamic_pointer_cast<const Analyzer::BinOper>(join_qual);
416  auto join_qual_str = ::toString(join_qual);
417  if (qual_bin_oper) {
418  if (join_qual == current_level_join_conditions.quals.front()) {
419  // set op_info based on the first qual
420  op_info = OpInfo{::toString(qual_bin_oper->get_optype()),
421  ::toString(qual_bin_oper->get_qualifier()),
422  qual_bin_oper->get_type_info().to_string()};
423  }
424  for (auto& col_pair_info : normalizeColumnsPair(qual_bin_oper.get(), catalog_)) {
425  if (col_pair_info.loop_join_qual && !found_eq_join_qual) {
426  // we only consider that cur level's join is loop join if we have no
427  // equi-join qual and both lhs and rhs are not col_var,
428  // i.e., lhs: col_var / rhs: constant / bin_op: kGE
429  if (visited_filter_ops.emplace(std::move(join_qual_str)).second) {
430  filter_ops.push_back(join_qual);
431  }
432  } else {
433  // a qual_bin_oper becomes an inner join qual iff both lhs and rhs are col_var
434  // otherwise it becomes a filter qual
435  bool found_valid_col_vars = false;
436  if (col_pair_info.inner_outer.first && col_pair_info.inner_outer.second) {
437  auto const* lhs_col_var = getColVar(col_pair_info.inner_outer.first);
438  auto const* rhs_col_var = getColVar(col_pair_info.inner_outer.second);
439  // this qual is valid and used for join op
440  if (lhs_col_var && rhs_col_var) {
441  found_valid_col_vars = true;
442  if (inner_input_idx == -1) {
443  inner_input_idx =
444  get_input_idx(rel_left_deep_join, lhs_col_var->get_table_id());
445  }
446  if (outer_input_idx == -1) {
447  outer_input_idx =
448  get_input_idx(rel_left_deep_join, rhs_col_var->get_table_id());
449  }
450  inner_join_cols.push_back(lhs_col_var);
451  outer_join_cols.push_back(rhs_col_var);
452  }
453  }
454  if (!found_valid_col_vars &&
455  visited_filter_ops.emplace(std::move(join_qual_str)).second) {
456  filter_ops.push_back(join_qual);
457  }
458  }
459  }
460  } else {
461  if (visited_filter_ops.emplace(std::move(join_qual_str)).second) {
462  filter_ops.push_back(join_qual);
463  }
464  }
465  }
466  if (inner_join_cols.size() != outer_join_cols.size()) {
467  VLOG(1) << "Stop DAG extraction due to inner/outer col mismatch";
469  return;
470  }
471 
472  // create RelTranslatedJoin based on the collected info from the join qual
473  // there are total seven types of join query pattern
474  // 1. INNER HASH ONLY
475  // 2. INNER LOOP ONLY (!)
476  // 3. LEFT LOOP ONLY
477  // 4. INNER HASH + INNER LOOP (!)
478  // 5. LEFT LOOP + INNER HASH
479  // 6. LEFT LOOP + INNER LOOP (!)
480  // 7. LEFT LOOP + INNER HASH + INNER LOOP (!)
481  // here, if a query contains INNER LOOP join, its qual has nothing
482  // so, some patterns do not have bin_oper at the specific join nest level
483  // if we find INNER LOOP, corresponding RelTranslatedJoin has nulled LHS and RHS
484  // to mark it as loop join
485  const RelAlgNode* lhs;
486  const RelAlgNode* rhs;
487  if (inner_input_idx != -1 && outer_input_idx != -1) {
488  lhs = rel_left_deep_join->getInput(inner_input_idx);
489  rhs = rel_left_deep_join->getInput(outer_input_idx);
490  } else {
491  if (level_idx == 0) {
492  lhs = rel_left_deep_join->getInput(0);
493  rhs = rel_left_deep_join->getInput(1);
494  } else {
495  lhs = translated_join_info_->rbegin()->get();
496  rhs = rel_left_deep_join->getInput(level_idx + 1);
497  }
498  }
499  CHECK(lhs);
500  CHECK(rhs);
501  auto cur_translated_join_node =
502  std::make_shared<RelTranslatedJoin>(lhs,
503  rhs,
504  inner_join_cols,
505  outer_join_cols,
506  filter_ops,
507  outer_join_cond,
508  nested_loop,
509  current_level_join_conditions.type,
510  op_info.type_,
511  op_info.qualifier_,
512  op_info.typeinfo_);
513  CHECK(cur_translated_join_node);
514  handleTranslatedJoin(parent_node, cur_translated_join_node.get());
515  translated_join_info_->push_back(std::move(cur_translated_join_node));
516  }
517 }
std::string toString(const ExtArgumentType &sig_type)
std::vector< InnerOuterOrLoopQual > normalizeColumnsPair(const Analyzer::BinOper *condition, const Catalog_Namespace::Catalog &cat)
const Catalog_Namespace::Catalog & catalog_
const RexScalar * getOuterCondition(const size_t nesting_level) const
unsigned getId() const
void handleTranslatedJoin(const RelAlgNode *, const RelTranslatedJoin *)
const RelAlgNode * getInput(const size_t idx) const
std::shared_ptr< TranslatedJoinInfo > translated_join_info_
#define CHECK(condition)
Definition: Logger.h:209
int get_input_idx(const RelLeftDeepInnerJoin *rel_left_deep_join, int const tbl_id)
#define VLOG(n)
Definition: Logger.h:303
const JoinQualsPerNestingLevel * getPerNestingJoinQualInfo(std::optional< unsigned > left_deep_join_tree_id)
Analyzer::ColumnVar const * getColVar(const Analyzer::Expr *col_info)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void QueryPlanDagExtractor::handleTranslatedJoin ( const RelAlgNode parent_node,
const RelTranslatedJoin rel_trans_join 
)
private

Definition at line 251 of file QueryPlanDagExtractor.cpp.

References QueryPlanDagCache::addNodeIfAbsent(), addTableIdToNodeLink(), CHECK, clearInternaStatus(), getExtractedQueryPlanDagStr(), RelTranslatedJoin::getJoinCols(), RelTranslatedJoin::getLHS(), RelTranslatedJoin::getRHS(), global_dag_, hash_table_query_plan_dag_, isEmptyQueryPlanDag(), RelTranslatedJoin::isNestedLoopQual(), registerNodeToDagCache(), run_benchmark_import::res, split(), QueryPlanDagCache::translateColVarsToInfoString(), validateNodeId(), visit(), and VLOG.

Referenced by handleLeftDeepJoinTree(), and visit().

253  {
254  // when left-deep tree has multiple joins this rel_trans_join can be revisited
255  // but we need to mark the child query plan to accurately catch the query plan dag
256  // here we do not create new dag id since all rel nodes are visited already
257  CHECK(parent_node);
258  CHECK(rel_trans_join);
259 
260  auto res = global_dag_.addNodeIfAbsent(rel_trans_join);
261  if (!validateNodeId(rel_trans_join, res) ||
262  !registerNodeToDagCache(parent_node, rel_trans_join, res)) {
263  return;
264  }
265 
266  // To extract an access path (query plan DAG) for hashtable is to use a difference of
267  // two query plan DAGs 1) query plan DAG after visiting RHS node and 2) query plan DAG
268  // after visiting LHS node so by comparing 1) and 2) we can extract which query plan DAG
269  // is necessary to project join cols that are used to build a hashtable and we use it as
270  // hashtable access path
271  QueryPlan current_plan_dag, after_rhs_visited, after_lhs_visited;
272  current_plan_dag = getExtractedQueryPlanDagStr();
273  auto rhs_node = rel_trans_join->getRHS();
274  if (rhs_node) {
275  visit(rel_trans_join, rhs_node);
276  after_rhs_visited = getExtractedQueryPlanDagStr();
277  addTableIdToNodeLink(rhs_node->getId(), rhs_node);
278  }
279  auto lhs_node = rel_trans_join->getLHS();
280  if (rel_trans_join->getLHS()) {
281  visit(rel_trans_join, lhs_node);
282  after_lhs_visited = getExtractedQueryPlanDagStr();
283  addTableIdToNodeLink(lhs_node->getId(), lhs_node);
284  }
285  if (isEmptyQueryPlanDag(after_lhs_visited) || isEmptyQueryPlanDag(after_rhs_visited)) {
286  VLOG(1) << "Stop DAG extraction while extracting query plan DAG for join qual";
288  return;
289  }
290  // after visiting new node, we have added node id(s) which can be used as an access path
291  // so, we extract that node id(s) by splitting the new plan dag by the current plan dag
292  auto outer_table_identifier = split(after_rhs_visited, current_plan_dag)[1];
293  auto hash_table_identfier = split(after_lhs_visited, after_rhs_visited)[1];
294 
295  if (!rel_trans_join->isNestedLoopQual()) {
296  std::ostringstream oss;
297  auto inner_join_cols = rel_trans_join->getJoinCols(true);
298  oss << global_dag_.translateColVarsToInfoString(inner_join_cols, false);
299  auto hash_table_cols_info = oss.str();
300  oss << "|";
301  auto outer_join_cols = rel_trans_join->getJoinCols(false);
302  oss << global_dag_.translateColVarsToInfoString(outer_join_cols, false);
303  auto join_qual_info = oss.str();
304  // hash table join cols info | hash table build plan dag (hashtable identifier or
305  // hashtable access path)
306  auto it = hash_table_query_plan_dag_.find(join_qual_info);
307  if (it == hash_table_query_plan_dag_.end()) {
308  VLOG(2) << "Add hashtable access path"
309  << ", join col info: " << hash_table_cols_info
310  << ", access path: " << hash_table_identfier << "\n";
312  join_qual_info, std::make_pair(hash_table_cols_info, hash_table_identfier));
313  }
314  } else {
315  VLOG(2) << "Add loop join access path, for LHS: " << outer_table_identifier
316  << ", for RHS: " << hash_table_identfier << "\n";
317  }
318 }
bool isNestedLoopQual() const
QueryPlanDagCache & global_dag_
std::optional< RelNodeId > addNodeIfAbsent(const RelAlgNode *)
std::vector< const Analyzer::ColumnVar * > getJoinCols(bool lhs) const
std::vector< std::string > split(std::string_view str, std::string_view delim, std::optional< size_t > maxsplit)
split apart a string into a vector of substrings
const RelAlgNode * getRHS() const
bool validateNodeId(const RelAlgNode *node, std::optional< RelNodeId > retrieved_node_id)
std::string getExtractedQueryPlanDagStr()
bool isEmptyQueryPlanDag(const std::string &dag)
bool registerNodeToDagCache(const RelAlgNode *parent_node, const RelAlgNode *child_node, std::optional< RelNodeId > retrieved_node_id)
JoinColumnsInfo translateColVarsToInfoString(std::vector< const Analyzer::ColumnVar * > &col_vars, bool col_id_only) const
HashTableBuildDagMap hash_table_query_plan_dag_
void addTableIdToNodeLink(const int table_id, const RelAlgNode *node)
const RelAlgNode * getLHS() const
void visit(const RelAlgNode *, const RelAlgNode *)
#define CHECK(condition)
Definition: Logger.h:209
std::string QueryPlan
#define VLOG(n)
Definition: Logger.h:303

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool QueryPlanDagExtractor::isDagExtractionAvailable ( )
inline

Definition at line 101 of file QueryPlanDagExtractor.h.

References contain_not_supported_rel_node_.

Referenced by extractQueryPlanDagImpl().

+ Here is the caller graph for this function:

bool QueryPlanDagExtractor::isEmptyQueryPlanDag ( const std::string &  dag)
inline

Definition at line 109 of file QueryPlanDagExtractor.h.

Referenced by handleTranslatedJoin().

109 { return dag.compare("N/A") == 0; }

+ Here is the caller graph for this function:

std::vector< InnerOuterOrLoopQual > QueryPlanDagExtractor::normalizeColumnsPair ( const Analyzer::BinOper condition,
const Catalog_Namespace::Catalog cat 
)

Definition at line 34 of file QueryPlanDagExtractor.cpp.

References cat(), CHECK_EQ, Analyzer::BinOper::get_left_operand(), Analyzer::BinOper::get_right_operand(), i, Analyzer::BinOper::is_overlaps_oper(), HashJoin::normalizeColumnPair(), run_benchmark_import::result, and temporary_tables_.

Referenced by handleLeftDeepJoinTree().

36  {
37  std::vector<InnerOuterOrLoopQual> result;
38  const auto lhs_tuple_expr =
39  dynamic_cast<const Analyzer::ExpressionTuple*>(condition->get_left_operand());
40  const auto rhs_tuple_expr =
41  dynamic_cast<const Analyzer::ExpressionTuple*>(condition->get_right_operand());
42 
43  CHECK_EQ(static_cast<bool>(lhs_tuple_expr), static_cast<bool>(rhs_tuple_expr));
44  auto do_normalize_inner_outer_pair = [&result, &cat, &condition](
45  const Analyzer::Expr* lhs,
46  const Analyzer::Expr* rhs,
47  const TemporaryTables* temporary_table) {
48  try {
49  auto inner_outer_pair = HashJoin::normalizeColumnPair(
50  lhs, rhs, cat, temporary_table, condition->is_overlaps_oper());
51  InnerOuterOrLoopQual valid_qual{
52  std::make_pair(inner_outer_pair.first, inner_outer_pair.second), false};
53  result.push_back(valid_qual);
54  } catch (HashJoinFail& e) {
55  InnerOuterOrLoopQual invalid_qual{std::make_pair(lhs, rhs), true};
56  result.push_back(invalid_qual);
57  }
58  };
59  if (lhs_tuple_expr) {
60  const auto& lhs_tuple = lhs_tuple_expr->getTuple();
61  const auto& rhs_tuple = rhs_tuple_expr->getTuple();
62  CHECK_EQ(lhs_tuple.size(), rhs_tuple.size());
63  for (size_t i = 0; i < lhs_tuple.size(); ++i) {
64  do_normalize_inner_outer_pair(
65  lhs_tuple[i].get(), rhs_tuple[i].get(), &temporary_tables_);
66  }
67  } else {
68  do_normalize_inner_outer_pair(condition->get_left_operand(),
69  condition->get_right_operand(),
71  }
72  return result;
73 }
#define CHECK_EQ(x, y)
Definition: Logger.h:217
std::string cat(Ts &&...args)
const Expr * get_right_operand() const
Definition: Analyzer.h:442
const TemporaryTables & temporary_tables_
std::unordered_map< int, const ResultSetPtr & > TemporaryTables
Definition: InputMetadata.h:31
static InnerOuter normalizeColumnPair(const Analyzer::Expr *lhs, const Analyzer::Expr *rhs, const Catalog_Namespace::Catalog &cat, const TemporaryTables *temporary_tables, const bool is_overlaps_join=false)
Definition: HashJoin.cpp:603
const Expr * get_left_operand() const
Definition: Analyzer.h:441
bool is_overlaps_oper() const
Definition: Analyzer.h:439

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool QueryPlanDagExtractor::registerNodeToDagCache ( const RelAlgNode parent_node,
const RelAlgNode child_node,
std::optional< RelNodeId retrieved_node_id 
)
private

Definition at line 186 of file QueryPlanDagExtractor.cpp.

References CHECK, QueryPlanDagCache::connectNodes(), extracted_dag_, RelAlgNode::getRelNodeDagId(), and global_dag_.

Referenced by handleTranslatedJoin(), and visit().

189  {
190  CHECK(parent_node);
191  CHECK(child_node);
192  CHECK(retrieved_node_id.has_value());
193  auto parent_node_id = parent_node->getRelNodeDagId();
194  global_dag_.connectNodes(parent_node_id, retrieved_node_id.value());
195  extracted_dag_.push_back(retrieved_node_id.value());
196  return true;
197 }
QueryPlanDagCache & global_dag_
void connectNodes(const RelNodeId parent_id, const RelNodeId child_id)
std::vector< size_t > extracted_dag_
#define CHECK(condition)
Definition: Logger.h:209
size_t getRelNodeDagId() const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool QueryPlanDagExtractor::validateNodeId ( const RelAlgNode node,
std::optional< RelNodeId retrieved_node_id 
)
private

Definition at line 173 of file QueryPlanDagExtractor.cpp.

References CHECK, clearInternaStatus(), RelAlgNode::setRelNodeDagId(), RelAlgNode::toString(), and VLOG.

Referenced by handleTranslatedJoin(), and visit().

174  {
175  if (!retrieved_node_id) {
176  VLOG(1) << "Stop DAG extraction while adding node to the DAG node cache: "
177  << node->toString();
179  return false;
180  }
181  CHECK(retrieved_node_id.has_value());
182  node->setRelNodeDagId(retrieved_node_id.value());
183  return true;
184 }
void setRelNodeDagId(const size_t id) const
virtual std::string toString() const =0
#define CHECK(condition)
Definition: Logger.h:209
#define VLOG(n)
Definition: Logger.h:303

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void QueryPlanDagExtractor::visit ( const RelAlgNode parent_node,
const RelAlgNode child_node 
)
private

Definition at line 203 of file QueryPlanDagExtractor.cpp.

References QueryPlanDagCache::addNodeIfAbsent(), clearInternaStatus(), contain_not_supported_rel_node_, RelAlgNode::getInput(), global_dag_, handleLeftDeepJoinTree(), handleTranslatedJoin(), i, RelAlgNode::inputCount(), left_deep_tree_infos_, registerNodeToDagCache(), run_benchmark_import::res, validateNodeId(), RexVisitorBase< T >::visit(), and VLOG.

Referenced by extractQueryPlanDagImpl(), and handleTranslatedJoin().

204  {
205  if (!child_node || contain_not_supported_rel_node_) {
206  return;
207  }
208  auto register_and_visit = [this](const RelAlgNode* parent_node,
209  const RelAlgNode* child_node) {
210  // This function takes a responsibility for all rel nodes
211  // except 1) RelLeftDeepJoinTree and 2) RelTranslatedJoin
212  auto res = global_dag_.addNodeIfAbsent(child_node);
213  if (validateNodeId(child_node, res) &&
214  registerNodeToDagCache(parent_node, child_node, res)) {
215  for (size_t i = 0; i < child_node->inputCount(); i++) {
216  visit(child_node, child_node->getInput(i));
217  }
218  }
219  };
220  if (auto left_deep_joins = dynamic_cast<const RelLeftDeepInnerJoin*>(child_node)) {
221  if (left_deep_tree_infos_.empty()) {
222  // we should have left_deep_tree_info for input left deep tree node
223  VLOG(1) << "Stop DAG extraction due to not supported join pattern";
225  return;
226  }
227  const auto inner_cond = left_deep_joins->getInnerCondition();
228  // we analyze left-deep join tree as per-join qual level, so
229  // when visiting RelLeftDeepInnerJoin we decompose it into individual join node
230  // (RelTranslatedJoin).
231  // Thus, this RelLeftDeepInnerJoin object itself is useless when recycling data
232  // but sometimes it has inner condition that has to consider so we add an extra
233  // RelFilter node containing the condition to keep query semantic correctly
234  if (auto cond = dynamic_cast<const RexOperator*>(inner_cond)) {
235  RexDeepCopyVisitor copier;
236  auto copied_inner_cond = copier.visit(cond);
237  auto dummy_filter = std::make_shared<RelFilter>(copied_inner_cond);
238  register_and_visit(parent_node, dummy_filter.get());
239  handleLeftDeepJoinTree(dummy_filter.get(), left_deep_joins);
240  } else {
241  handleLeftDeepJoinTree(parent_node, left_deep_joins);
242  }
243  } else if (auto translated_join_node =
244  dynamic_cast<const RelTranslatedJoin*>(child_node)) {
245  handleTranslatedJoin(parent_node, translated_join_node);
246  } else {
247  register_and_visit(parent_node, child_node);
248  }
249 }
QueryPlanDagCache & global_dag_
std::optional< RelNodeId > addNodeIfAbsent(const RelAlgNode *)
virtual T visit(const RexScalar *rex_scalar) const
Definition: RexVisitor.h:27
bool validateNodeId(const RelAlgNode *node, std::optional< RelNodeId > retrieved_node_id)
void handleTranslatedJoin(const RelAlgNode *, const RelTranslatedJoin *)
const RelAlgNode * getInput(const size_t idx) const
bool registerNodeToDagCache(const RelAlgNode *parent_node, const RelAlgNode *child_node, std::optional< RelNodeId > retrieved_node_id)
void handleLeftDeepJoinTree(const RelAlgNode *, const RelLeftDeepInnerJoin *)
void visit(const RelAlgNode *, const RelAlgNode *)
std::unordered_map< unsigned, JoinQualsPerNestingLevel > & left_deep_tree_infos_
const size_t inputCount() const
#define VLOG(n)
Definition: Logger.h:303

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

Member Data Documentation

const Catalog_Namespace::Catalog& QueryPlanDagExtractor::catalog_
private

Definition at line 144 of file QueryPlanDagExtractor.h.

Referenced by handleLeftDeepJoinTree().

bool QueryPlanDagExtractor::contain_not_supported_rel_node_
private
Executor* QueryPlanDagExtractor::executor_
private

Definition at line 148 of file QueryPlanDagExtractor.h.

std::vector<size_t> QueryPlanDagExtractor::extracted_dag_
private
QueryPlanDagCache& QueryPlanDagExtractor::global_dag_
private
HashTableBuildDagMap QueryPlanDagExtractor::hash_table_query_plan_dag_
private

Definition at line 150 of file QueryPlanDagExtractor.h.

Referenced by getHashTableBuildDag(), and handleTranslatedJoin().

std::unordered_map<unsigned, JoinQualsPerNestingLevel>& QueryPlanDagExtractor::left_deep_tree_infos_
private

Definition at line 146 of file QueryPlanDagExtractor.h.

Referenced by getPerNestingJoinQualInfo(), and visit().

TableIdToNodeMap QueryPlanDagExtractor::table_id_to_node_map_
private
const TemporaryTables& QueryPlanDagExtractor::temporary_tables_
private

Definition at line 147 of file QueryPlanDagExtractor.h.

Referenced by normalizeColumnsPair().

std::shared_ptr<TranslatedJoinInfo> QueryPlanDagExtractor::translated_join_info_
private

The documentation for this class was generated from the following files: