OmniSciDB  a667adc9c8
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
FromTableReordering.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2018, OmniSci, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "FromTableReordering.h"
18 #include "../Analyzer/Analyzer.h"
19 #include "Execute.h"
20 #include "RangeTableIndexVisitor.h"
21 
22 #include <numeric>
23 #include <queue>
24 #include <regex>
25 
26 namespace {
27 
28 using cost_t = unsigned;
29 using node_t = size_t;
30 
31 static std::unordered_map<SQLTypes, cost_t> GEO_TYPE_COSTS{{kPOINT, 60},
32  {kARRAY, 60},
33  {kLINESTRING, 70},
34  {kPOLYGON, 80},
35  {kMULTIPOLYGON, 90}};
36 
37 // Returns a lhs/rhs cost for the given qualifier. Must be strictly greater than 0.
38 std::pair<cost_t, cost_t> get_join_qual_cost(const Analyzer::Expr* qual,
39  const Executor* executor) {
40  const auto func_oper = dynamic_cast<const Analyzer::FunctionOper*>(qual);
41  if (func_oper) {
42  std::vector<SQLTypes> geo_types_for_func;
43  for (size_t i = 0; i < func_oper->getArity(); i++) {
44  const auto arg_expr = func_oper->getArg(i);
45  const auto& ti = arg_expr->get_type_info();
46  if (ti.is_geometry() || is_constructed_point(arg_expr)) {
47  geo_types_for_func.push_back(ti.get_type());
48  }
49  }
50  std::regex geo_func_regex("ST_[\\w]*");
51  std::smatch geo_func_match;
52  const auto& func_name = func_oper->getName();
53  if (geo_types_for_func.size() == 2 &&
54  std::regex_match(func_name, geo_func_match, geo_func_regex)) {
55  const auto rhs_cost = GEO_TYPE_COSTS[geo_types_for_func[0]];
56  const auto lhs_cost = GEO_TYPE_COSTS[geo_types_for_func[1]];
57  return {lhs_cost, rhs_cost};
58  }
59  return {200, 200};
60  }
61  const auto bin_oper = dynamic_cast<const Analyzer::BinOper*>(qual);
62  if (!bin_oper || !IS_EQUIVALENCE(bin_oper->get_optype())) {
63  return {200, 200};
64  }
65  if (executor) {
66  try {
68  bin_oper, *executor->getCatalog(), executor->getTemporaryTables());
69  } catch (...) {
70  return {200, 200};
71  }
72  }
73  return {100, 100};
74 }
75 
76 // Builds a graph with nesting levels as nodes and join condition costs as edges.
77 std::vector<std::map<node_t, cost_t>> build_join_cost_graph(
78  const JoinQualsPerNestingLevel& left_deep_join_quals,
79  const std::vector<InputTableInfo>& table_infos,
80  const Executor* executor) {
81  CHECK_EQ(left_deep_join_quals.size() + 1, table_infos.size());
82  std::vector<std::map<node_t, cost_t>> join_cost_graph(table_infos.size());
84  // Build the constraints graph: nodes are nest levels, edges are the existence of
85  // qualifiers between levels.
86  for (const auto& current_level_join_conditions : left_deep_join_quals) {
87  for (const auto& qual : current_level_join_conditions.quals) {
88  std::set<int> qual_nest_levels = visitor.visit(qual.get());
89  if (qual_nest_levels.size() != 2) {
90  continue;
91  }
92  int lhs_nest_level = *qual_nest_levels.begin();
93  CHECK_GE(lhs_nest_level, 0);
94  qual_nest_levels.erase(qual_nest_levels.begin());
95  int rhs_nest_level = *qual_nest_levels.begin();
96  CHECK_GE(rhs_nest_level, 0);
97 
98  // Get the {lhs, rhs} cost for the qual
99  const auto cost_pair = get_join_qual_cost(qual.get(), executor);
100  const auto edge_it = join_cost_graph[lhs_nest_level].find(rhs_nest_level);
101  if (edge_it == join_cost_graph[lhs_nest_level].end() ||
102  edge_it->second > cost_pair.second) {
103  join_cost_graph[lhs_nest_level][rhs_nest_level] = cost_pair.second;
104  join_cost_graph[rhs_nest_level][lhs_nest_level] = cost_pair.first;
105  }
106  }
107  }
108  return join_cost_graph;
109 }
110 
111 // Tracks dependencies between nodes.
113  public:
114  SchedulingDependencyTracking(const size_t node_count) : inbound_(node_count) {}
115 
116  // Add a from -> to dependency.
117  void addEdge(const node_t from, const node_t to) { inbound_[to].insert(from); }
118 
119  // Removes from's outbound dependencies.
120  void removeNode(const node_t from) {
121  for (auto& inbound_for_node : inbound_) {
122  inbound_for_node.erase(from);
123  }
124  }
125 
126  // Returns the set of all nodes without dependencies.
127  std::unordered_set<node_t> getRoots() const {
128  std::unordered_set<node_t> roots;
129  for (node_t candidate = 0; candidate < inbound_.size(); ++candidate) {
130  if (inbound_[candidate].empty()) {
131  roots.insert(candidate);
132  }
133  }
134  return roots;
135  }
136 
137  private:
138  std::vector<std::unordered_set<node_t>> inbound_;
139 };
140 
141 // The tree edge for traversal of the cost graph.
145 };
146 
147 // Builds dependency tracking based on left joins and based on geo costs.
149  const JoinQualsPerNestingLevel& left_deep_join_quals,
150  const std::vector<std::map<node_t, cost_t>>& join_cost_graph) {
151  SchedulingDependencyTracking dependency_tracking(left_deep_join_quals.size() + 1);
152  // Add directed graph edges for left join dependencies.
153  // See also start_it inside traverse_join_cost_graph(). These
154  // edges prevent start_it from pointing to a table with a
155  // left join dependency on another table.
156  for (size_t level_idx = 0; level_idx < left_deep_join_quals.size(); ++level_idx) {
157  if (left_deep_join_quals[level_idx].type == JoinType::LEFT) {
158  dependency_tracking.addEdge(level_idx, level_idx + 1);
159  }
160  }
161  // Add directed graph edges for geojoin type dependencies.
162  // See also start_it inside traverse_join_cost_graph(). These
163  // edges prevent start_it from pointing to a table with a
164  // geojoin type dependency as defined by GEO_TYPE_COSTS.
165  for (size_t idx1 = 0; idx1 < join_cost_graph.size(); ++idx1) {
166  for (auto& inner_map : join_cost_graph[idx1]) {
167  auto& idx2 = inner_map.first;
168  auto& cost_forward = inner_map.second;
169  auto reverse_it = join_cost_graph[idx2].find(idx1);
170  CHECK(reverse_it != join_cost_graph[idx2].end());
171  auto& cost_backward = reverse_it->second;
172  if (cost_forward > cost_backward) {
173  dependency_tracking.addEdge(idx1, idx2);
174  }
175  }
176  }
177  return dependency_tracking;
178 }
179 
180 // Do a breadth-first traversal of the cost graph. This avoids scheduling a nest level
181 // before the ones which constraint it are scheduled and it favors equi joins over loop
182 // joins.
183 std::vector<node_t> traverse_join_cost_graph(
184  const std::vector<std::map<node_t, cost_t>>& join_cost_graph,
185  const std::vector<InputTableInfo>& table_infos,
186  const std::function<bool(const node_t lhs_nest_level, const node_t rhs_nest_level)>&
187  compare_node,
188  const std::function<bool(const TraversalEdge&, const TraversalEdge&)>& compare_edge,
189  const JoinQualsPerNestingLevel& left_deep_join_quals) {
190  std::vector<node_t> all_nest_levels(table_infos.size());
191  std::iota(all_nest_levels.begin(), all_nest_levels.end(), 0);
192  std::vector<node_t> input_permutation;
193  std::unordered_set<node_t> visited;
194  auto dependency_tracking =
195  build_dependency_tracking(left_deep_join_quals, join_cost_graph);
196  auto schedulable_node = [&dependency_tracking, &visited](const node_t node) {
197  const auto nodes_ready = dependency_tracking.getRoots();
198  return nodes_ready.find(node) != nodes_ready.end() &&
199  visited.find(node) == visited.end();
200  };
201  while (visited.size() < table_infos.size()) {
202  // Filter out nest levels which are already visited or have pending dependencies.
203  std::vector<node_t> remaining_nest_levels;
204  std::copy_if(all_nest_levels.begin(),
205  all_nest_levels.end(),
206  std::back_inserter(remaining_nest_levels),
207  schedulable_node);
208  CHECK(!remaining_nest_levels.empty());
209  // Start with the table with most tuples.
210  const auto start_it = std::max_element(
211  remaining_nest_levels.begin(), remaining_nest_levels.end(), compare_node);
212  CHECK(start_it != remaining_nest_levels.end());
213  std::priority_queue<TraversalEdge, std::vector<TraversalEdge>, decltype(compare_edge)>
214  worklist(compare_edge);
215  worklist.push(TraversalEdge{*start_it, 0});
216  const auto it_ok = visited.insert(*start_it);
217  CHECK(it_ok.second);
218  while (!worklist.empty()) {
219  // Extract a node and add it to the permutation.
220  TraversalEdge crt = worklist.top();
221  worklist.pop();
222  input_permutation.push_back(crt.nest_level);
223  dependency_tracking.removeNode(crt.nest_level);
224  // Add successors which are ready and not visited yet to the queue.
225  for (const auto& graph_edge : join_cost_graph[crt.nest_level]) {
226  const node_t succ = graph_edge.first;
227  if (!schedulable_node(succ)) {
228  continue;
229  }
230  worklist.push(TraversalEdge{succ, graph_edge.second});
231  const auto it_ok = visited.insert(succ);
232  CHECK(it_ok.second);
233  }
234  }
235  }
236  return input_permutation;
237 }
238 
239 } // namespace
240 
241 std::vector<node_t> get_node_input_permutation(
242  const JoinQualsPerNestingLevel& left_deep_join_quals,
243  const std::vector<InputTableInfo>& table_infos,
244  const Executor* executor) {
245  const auto join_cost_graph =
246  build_join_cost_graph(left_deep_join_quals, table_infos, executor);
247  // Use the number of tuples in each table to break ties in BFS.
248  const auto compare_node = [&table_infos](const node_t lhs_nest_level,
249  const node_t rhs_nest_level) {
250  return table_infos[lhs_nest_level].info.getNumTuplesUpperBound() <
251  table_infos[rhs_nest_level].info.getNumTuplesUpperBound();
252  };
253  const auto compare_edge = [&compare_node](const TraversalEdge& lhs_edge,
254  const TraversalEdge& rhs_edge) {
255  // Only use the number of tuples as a tie-breaker, if costs are equal.
256  if (lhs_edge.join_cost == rhs_edge.join_cost) {
257  return compare_node(lhs_edge.nest_level, rhs_edge.nest_level);
258  }
259  return lhs_edge.join_cost < rhs_edge.join_cost;
260  };
262  join_cost_graph, table_infos, compare_node, compare_edge, left_deep_join_quals);
263 }
#define CHECK_EQ(x, y)
Definition: Logger.h:205
#define IS_EQUIVALENCE(X)
Definition: sqldefs.h:67
SchedulingDependencyTracking build_dependency_tracking(const JoinQualsPerNestingLevel &left_deep_join_quals, const std::vector< std::map< node_t, cost_t >> &join_cost_graph)
bool is_constructed_point(const Analyzer::Expr *expr)
Definition: Execute.h:1176
#define CHECK_GE(x, y)
Definition: Logger.h:210
std::vector< JoinCondition > JoinQualsPerNestingLevel
T visit(const Analyzer::Expr *expr) const
std::vector< node_t > get_node_input_permutation(const JoinQualsPerNestingLevel &left_deep_join_quals, const std::vector< InputTableInfo > &table_infos, const Executor *executor)
static std::unordered_map< SQLTypes, cost_t > GEO_TYPE_COSTS
static std::unordered_map< SQLTypes, cost_t > std::pair< cost_t, cost_t > get_join_qual_cost(const Analyzer::Expr *qual, const Executor *executor)
std::vector< InnerOuter > normalize_column_pairs(const Analyzer::BinOper *condition, const Catalog_Namespace::Catalog &cat, const TemporaryTables *temporary_tables)
Definition: HashJoin.cpp:705
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:78
char * to
std::vector< node_t > traverse_join_cost_graph(const std::vector< std::map< node_t, cost_t >> &join_cost_graph, const std::vector< InputTableInfo > &table_infos, const std::function< bool(const node_t lhs_nest_level, const node_t rhs_nest_level)> &compare_node, const std::function< bool(const TraversalEdge &, const TraversalEdge &)> &compare_edge, const JoinQualsPerNestingLevel &left_deep_join_quals)
const Analyzer::Expr * getArg(const size_t i) const
Definition: Analyzer.h:1362
DEVICE void iota(ARGS &&...args)
Definition: gpu_enabled.h:69
#define CHECK(condition)
Definition: Logger.h:197
std::vector< std::map< node_t, cost_t > > build_join_cost_graph(const JoinQualsPerNestingLevel &left_deep_join_quals, const std::vector< InputTableInfo > &table_infos, const Executor *executor)