OmniSciDB  72c90bc290
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
RelAlgExecutionUnit.h
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
24 #pragma once
25 
27 #include "QueryHint.h"
28 #include "RelAlgDag.h"
29 #include "Shared/DbObjectKeys.h"
30 #include "Shared/sqldefs.h"
31 #include "Shared/toString.h"
35 
36 #include <boost/graph/adjacency_list.hpp>
37 
38 #include <list>
39 #include <memory>
40 #include <optional>
41 #include <vector>
42 
43 using AdjacentList = boost::adjacency_list<boost::setS, boost::vecS, boost::directedS>;
44 // node ID used when extracting query plan DAG
45 // note this ID is different from RelNode's id since query plan DAG extractor assigns an
46 // unique node ID only to a rel node which is included in extracted DAG (if we cannot
47 // extract a DAG from the query plan DAG extractor skips to assign unique IDs to rel nodes
48 // in that query plan
49 using RelNodeId = size_t;
50 // hash value of explained rel node
51 using RelNodeExplainedHash = size_t;
52 // a string representation of a query plan that is collected by visiting query plan DAG
53 // starting from root to leaf and concatenate each rel node's id
54 // where two adjacent rel nodes in a QueryPlanDAG are connected via '|' delimiter
55 // i.e., 1|2|3|4|
56 using QueryPlanDAG = std::string;
57 // hashed value of QueryPlanNodeIds
58 using QueryPlanHash = size_t;
59 // hold query plan dag and column info of join columns
60 // used to detect a correct cached hashtable
62  public:
63  HashTableBuildDag(size_t in_inner_cols_info,
64  size_t in_outer_cols_info,
65  QueryPlanHash in_inner_cols_access_path,
66  QueryPlanHash in_outer_cols_access_path,
67  std::unordered_set<size_t>&& inputTableKeys)
68  : inner_cols_info(in_inner_cols_info)
69  , outer_cols_info(in_outer_cols_info)
70  , inner_cols_access_path(in_inner_cols_access_path)
71  , outer_cols_access_path(in_outer_cols_access_path)
72  , inputTableKeys(std::move(inputTableKeys)) {}
77  std::unordered_set<size_t>
78  inputTableKeys; // table keys of input(s), e.g., scan node or subquery's DAG
79 };
80 // A map btw. join qual's column info and its corresponding hashtable access path as query
81 // plan DAG i.e., A.a = B.b and build hashtable on B.b? <(A.a = B.b) --> query plan DAG of
82 // projecting B.b> here, this two-level mapping (join qual -> inner join col -> hashtable
83 // access plan DAG) is required since we have to extract query plan before deciding which
84 // join col becomes inner since rel alg related metadata is required to extract query
85 // plan, and the actual decision happens at the time of building hashtable
86 using HashTableBuildDagMap = std::unordered_map<size_t, HashTableBuildDag>;
87 // A map btw. join column's input table id to its corresponding rel node
88 // for each hash join operation, we can determine whether its input source
89 // has inconsistency in its source data, e.g., row ordering
90 // by seeing a type of input node, e.g., RelSort
91 // note that disabling DAG extraction when we find sort node from join's input
92 // is too restrict when a query becomes complex (and so have multiple joins)
93 // since it eliminates a change of data recycling
94 using TableIdToNodeMap = std::unordered_map<shared::TableKey, const RelAlgNode*>;
95 
99  kQual, // INNER + OUTER
100  kDirect // set target directly (i.e., put Analyzer::Expr* instead of
101  // Analyzer::BinOper*)
102 };
103 constexpr char const* EMPTY_QUERY_PLAN = "";
105 
107 
108 namespace Analyzer {
109 class Expr;
110 class ColumnVar;
111 class Estimator;
112 struct OrderEntry;
113 
114 } // namespace Analyzer
115 
116 struct SortInfo {
118  : order_entries({})
120  , limit(std::nullopt)
121  , offset(0) {}
122 
123  SortInfo(const std::list<Analyzer::OrderEntry>& oe,
124  const SortAlgorithm sa,
125  std::optional<size_t> l,
126  size_t o)
127  : order_entries(oe), algorithm(sa), limit(l), offset(o) {}
128 
129  SortInfo& operator=(const SortInfo& other) {
131  algorithm = other.algorithm;
132  limit = other.limit;
133  offset = other.offset;
134  return *this;
135  }
136 
137  static SortInfo createFromSortNode(const RelSort* sort_node) {
138  return {sort_node->getOrderEntries(),
140  sort_node->getLimit(),
141  sort_node->getOffset()};
142  }
143 
144  size_t hashLimit() const {
145  size_t hash{0};
146  boost::hash_combine(hash, limit.has_value());
147  boost::hash_combine(hash, limit.value_or(0));
148  return hash;
149  }
150 
151  std::list<Analyzer::OrderEntry> order_entries;
153  std::optional<size_t> limit;
154  size_t offset;
155 };
156 
158  std::list<std::shared_ptr<Analyzer::Expr>> quals;
160 };
161 
162 using JoinQualsPerNestingLevel = std::vector<JoinCondition>;
163 
165  std::vector<InputDescriptor> input_descs;
166  std::list<std::shared_ptr<const InputColDescriptor>> input_col_descs;
167  std::list<std::shared_ptr<Analyzer::Expr>> simple_quals;
168  std::list<std::shared_ptr<Analyzer::Expr>> quals;
170  const std::list<std::shared_ptr<Analyzer::Expr>> groupby_exprs;
171  std::vector<Analyzer::Expr*> target_exprs;
172  std::unordered_map<size_t, SQLTypeInfo> target_exprs_original_type_infos;
173  const std::shared_ptr<Analyzer::Estimator> estimator;
175  size_t scan_limit;
180  bool use_bump_allocator{false};
181  // empty if not a UNION, true if UNION ALL, false if regular UNION
182  const std::optional<bool> union_all;
183  std::shared_ptr<const query_state::QueryState> query_state;
184  std::vector<Analyzer::Expr*> target_exprs_union; // targets in second subquery of UNION
185  mutable std::vector<std::pair<std::vector<size_t>, size_t>> per_device_cardinality;
186 
187  RelAlgExecutionUnit createNdvExecutionUnit(const int64_t range) const;
189  Analyzer::Expr* replacement_target) const;
190 };
191 
192 std::ostream& operator<<(std::ostream& os, const RelAlgExecutionUnit& ra_exe_unit);
193 
195  const std::vector<InputDescriptor> input_descs;
196  std::list<std::shared_ptr<const InputColDescriptor>> input_col_descs;
197  std::vector<Analyzer::Expr*> input_exprs;
198  std::vector<Analyzer::ColumnVar*> table_func_inputs;
199  std::vector<Analyzer::Expr*> target_exprs;
200  mutable size_t output_buffer_size_param;
203 
204  public:
205  std::string toString() const {
206  return typeName(this) + "(" + "input_exprs=" + ::toString(input_exprs) +
207  ", table_func_inputs=" + ::toString(table_func_inputs) +
208  ", target_exprs=" + ::toString(target_exprs) +
209  ", output_buffer_size_param=" + ::toString(output_buffer_size_param) +
210  ", table_func=" + ::toString(table_func) +
211  ", query_plan_dag=" + ::toString(query_plan_dag_hash) + ")";
212  }
213 };
214 
215 class ResultSet;
216 using ResultSetPtr = std::shared_ptr<ResultSet>;
std::vector< Analyzer::Expr * > target_exprs
size_t getOffset() const
Definition: RelAlgDag.h:2228
JoinType
Definition: sqldefs.h:174
std::list< Analyzer::OrderEntry > getOrderEntries() const
Definition: RelAlgDag.h:2264
std::vector< Analyzer::Expr * > input_exprs
std::vector< Analyzer::ColumnVar * > table_func_inputs
QueryPlanHash query_plan_dag_hash
const std::optional< bool > union_all
const table_functions::TableFunction table_func
std::string QueryPlanDAG
std::ostream & operator<<(std::ostream &os, const SessionInfo &session_info)
Definition: SessionInfo.cpp:57
constexpr QueryPlanHash EMPTY_HASHED_PLAN_DAG_KEY
QueryPlanHash outer_cols_access_path
JoinColumnSide
const std::vector< InputDescriptor > input_descs
std::vector< InputDescriptor > input_descs
boost::adjacency_list< boost::setS, boost::vecS, boost::directedS > AdjacentList
std::vector< JoinCondition > JoinQualsPerNestingLevel
std::shared_ptr< ResultSet > ResultSetPtr
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
static SortInfo createFromSortNode(const RelSort *sort_node)
SortAlgorithm algorithm
std::unordered_set< size_t > inputTableKeys
std::vector< Analyzer::Expr * > target_exprs_union
std::vector< std::pair< std::vector< size_t >, size_t > > per_device_cardinality
std::unordered_map< size_t, HashTableBuildDag > HashTableBuildDagMap
SortInfo & operator=(const SortInfo &other)
const JoinQualsPerNestingLevel join_quals
std::optional< size_t > limit
TableIdToNodeMap table_id_to_node_map
std::list< Analyzer::OrderEntry > order_entries
RelAlgExecutionUnit createCountAllExecutionUnit(Analyzer::Expr *replacement_target) const
const std::shared_ptr< Analyzer::Estimator > estimator
std::string toString() const
HashTableBuildDag(size_t in_inner_cols_info, size_t in_outer_cols_info, QueryPlanHash in_inner_cols_access_path, QueryPlanHash in_outer_cols_access_path, std::unordered_set< size_t > &&inputTableKeys)
std::unordered_map< shared::TableKey, const RelAlgNode * > TableIdToNodeMap
QueryPlanHash inner_cols_access_path
SortInfo(const std::list< Analyzer::OrderEntry > &oe, const SortAlgorithm sa, std::optional< size_t > l, size_t o)
size_t RelNodeExplainedHash
std::unordered_map< size_t, SQLTypeInfo > target_exprs_original_type_infos
RelAlgExecutionUnit createNdvExecutionUnit(const int64_t range) const
size_t hashLimit() const
size_t QueryPlanHash
std::string typeName(const T *v)
Definition: toString.h:106
std::list< std::shared_ptr< Analyzer::Expr > > quals
std::list< std::shared_ptr< Analyzer::Expr > > quals
RegisteredQueryHint query_hint
constexpr char const * EMPTY_QUERY_PLAN
std::list< std::shared_ptr< const InputColDescriptor > > input_col_descs
std::shared_ptr< const query_state::QueryState > query_state
Common Enum definitions for SQL processing.
std::vector< Analyzer::Expr * > target_exprs
std::list< std::shared_ptr< const InputColDescriptor > > input_col_descs
std::optional< size_t > getLimit() const
Definition: RelAlgDag.h:2226
ResultSet(const std::vector< TargetInfo > &targets, const ExecutorDeviceType device_type, const QueryMemoryDescriptor &query_mem_desc, const std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner, const unsigned block_size, const unsigned grid_size)
Definition: ResultSet.cpp:64
std::list< std::shared_ptr< Analyzer::Expr > > simple_quals
size_t RelNodeId
HashTableBuildDagMap hash_table_build_plan_dag