OmniSciDB  8fa3bf436f
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
FromTableReordering.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2018, OmniSci, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "FromTableReordering.h"
18 #include "../Analyzer/Analyzer.h"
19 #include "Execute.h"
20 #include "RangeTableIndexVisitor.h"
21 
22 #include <numeric>
23 #include <queue>
24 #include <regex>
25 
26 namespace {
27 
28 using cost_t = unsigned;
29 using node_t = size_t;
30 
31 static std::unordered_map<SQLTypes, cost_t> GEO_TYPE_COSTS{{kPOINT, 60},
32  {kARRAY, 60},
33  {kLINESTRING, 70},
34  {kPOLYGON, 80},
35  {kMULTIPOLYGON, 90}};
36 
37 // Returns a lhs/rhs cost for the given qualifier. Must be strictly greater than 0.
38 std::pair<cost_t, cost_t> get_join_qual_cost(const Analyzer::Expr* qual,
39  const Executor* executor) {
40  const auto func_oper = dynamic_cast<const Analyzer::FunctionOper*>(qual);
41  if (func_oper) {
42  std::vector<SQLTypes> geo_types_for_func;
43  for (size_t i = 0; i < func_oper->getArity(); i++) {
44  const auto arg_expr = func_oper->getArg(i);
45  const auto& ti = arg_expr->get_type_info();
46  if (ti.is_geometry() || is_constructed_point(arg_expr)) {
47  geo_types_for_func.push_back(ti.get_type());
48  }
49  }
50  std::regex geo_func_regex("ST_[\\w]*");
51  std::smatch geo_func_match;
52  const auto& func_name = func_oper->getName();
53  if (geo_types_for_func.size() == 2 &&
54  std::regex_match(func_name, geo_func_match, geo_func_regex)) {
55  const auto rhs_cost = GEO_TYPE_COSTS[geo_types_for_func[0]];
56  const auto lhs_cost = GEO_TYPE_COSTS[geo_types_for_func[1]];
57  return {lhs_cost, rhs_cost};
58  }
59  return {200, 200};
60  }
61  const auto bin_oper = dynamic_cast<const Analyzer::BinOper*>(qual);
62  if (!bin_oper || !IS_EQUIVALENCE(bin_oper->get_optype())) {
63  return {200, 200};
64  }
65  if (executor) {
66  try {
68  bin_oper, *executor->getCatalog(), executor->getTemporaryTables());
69  } catch (...) {
70  return {200, 200};
71  }
72  }
73  return {100, 100};
74 }
75 
76 // Builds a graph with nesting levels as nodes and join condition costs as edges.
77 std::vector<std::map<node_t, cost_t>> build_join_cost_graph(
78  const JoinQualsPerNestingLevel& left_deep_join_quals,
79  const std::vector<InputTableInfo>& table_infos,
80  const Executor* executor) {
81  CHECK_EQ(left_deep_join_quals.size() + 1, table_infos.size());
82  std::vector<std::map<node_t, cost_t>> join_cost_graph(table_infos.size());
84  // Build the constraints graph: nodes are nest levels, edges are the existence of
85  // qualifiers between levels.
86  for (const auto& current_level_join_conditions : left_deep_join_quals) {
87  for (const auto& qual : current_level_join_conditions.quals) {
88  std::set<int> qual_nest_levels = visitor.visit(qual.get());
89  if (qual_nest_levels.size() != 2) {
90  continue;
91  }
92  int lhs_nest_level = *qual_nest_levels.begin();
93  CHECK_GE(lhs_nest_level, 0);
94  qual_nest_levels.erase(qual_nest_levels.begin());
95  int rhs_nest_level = *qual_nest_levels.begin();
96  CHECK_GE(rhs_nest_level, 0);
97 
98  // Get the {lhs, rhs} cost for the qual
99  const auto cost_pair = get_join_qual_cost(qual.get(), executor);
100  const auto edge_it = join_cost_graph[lhs_nest_level].find(rhs_nest_level);
101  if (edge_it == join_cost_graph[lhs_nest_level].end() ||
102  edge_it->second > cost_pair.second) {
103  join_cost_graph[lhs_nest_level][rhs_nest_level] = cost_pair.second;
104  join_cost_graph[rhs_nest_level][lhs_nest_level] = cost_pair.first;
105  }
106  }
107  }
108  return join_cost_graph;
109 }
110 
111 // Tracks dependencies between nodes.
113  public:
114  SchedulingDependencyTracking(const size_t node_count) : inbound_(node_count) {}
115 
116  // Add a from -> to dependency.
117  void addEdge(const node_t from, const node_t to) { inbound_[to].insert(from); }
118 
119  // Removes from's outbound dependencies.
120  void removeNode(const node_t from) {
121  for (auto& inbound_for_node : inbound_) {
122  inbound_for_node.erase(from);
123  }
124  }
125 
126  // Returns the set of all nodes without dependencies.
127  std::unordered_set<node_t> getRoots() const {
128  std::unordered_set<node_t> roots;
129  for (node_t candidate = 0; candidate < inbound_.size(); ++candidate) {
130  if (inbound_[candidate].empty()) {
131  roots.insert(candidate);
132  }
133  }
134  return roots;
135  }
136 
137  private:
138  std::vector<std::unordered_set<node_t>> inbound_;
139 };
140 
141 // The tree edge for traversal of the cost graph.
145 };
146 
147 // Builds dependency tracking based on left joins
149  const JoinQualsPerNestingLevel& left_deep_join_quals,
150  const std::vector<std::map<node_t, cost_t>>& join_cost_graph) {
151  SchedulingDependencyTracking dependency_tracking(left_deep_join_quals.size() + 1);
152  // Add directed graph edges for left join dependencies.
153  // See also start_it inside traverse_join_cost_graph(). These
154  // edges prevent start_it from pointing to a table with a
155  // left join dependency on another table.
156  for (size_t level_idx = 0; level_idx < left_deep_join_quals.size(); ++level_idx) {
157  if (left_deep_join_quals[level_idx].type == JoinType::LEFT) {
158  dependency_tracking.addEdge(level_idx, level_idx + 1);
159  }
160  }
161  return dependency_tracking;
162 }
163 
164 // Do a breadth-first traversal of the cost graph. This avoids scheduling a nest level
165 // before the ones which constraint it are scheduled and it favors equi joins over loop
166 // joins.
167 std::vector<node_t> traverse_join_cost_graph(
168  const std::vector<std::map<node_t, cost_t>>& join_cost_graph,
169  const std::vector<InputTableInfo>& table_infos,
170  const std::function<bool(const node_t lhs_nest_level, const node_t rhs_nest_level)>&
171  compare_node,
172  const std::function<bool(const TraversalEdge&, const TraversalEdge&)>& compare_edge,
173  const JoinQualsPerNestingLevel& left_deep_join_quals) {
174  std::vector<node_t> all_nest_levels(table_infos.size());
175  std::iota(all_nest_levels.begin(), all_nest_levels.end(), 0);
176  std::vector<node_t> input_permutation;
177  std::unordered_set<node_t> visited;
178  auto dependency_tracking =
179  build_dependency_tracking(left_deep_join_quals, join_cost_graph);
180  auto schedulable_node = [&dependency_tracking, &visited](const node_t node) {
181  const auto nodes_ready = dependency_tracking.getRoots();
182  return nodes_ready.find(node) != nodes_ready.end() &&
183  visited.find(node) == visited.end();
184  };
185  while (visited.size() < table_infos.size()) {
186  // Filter out nest levels which are already visited or have pending dependencies.
187  std::vector<node_t> remaining_nest_levels;
188  std::copy_if(all_nest_levels.begin(),
189  all_nest_levels.end(),
190  std::back_inserter(remaining_nest_levels),
191  schedulable_node);
192  CHECK(!remaining_nest_levels.empty());
193  // Start with the table with most tuples.
194  const auto start_it = std::max_element(
195  remaining_nest_levels.begin(), remaining_nest_levels.end(), compare_node);
196  CHECK(start_it != remaining_nest_levels.end());
197  std::priority_queue<TraversalEdge, std::vector<TraversalEdge>, decltype(compare_edge)>
198  worklist(compare_edge);
199  // look at all edges, compare the
200  // cost of our edge vs theirs, and pick the best start edge
201  node_t start = *start_it;
202  TraversalEdge start_edge{start, 0};
203  VLOG(2) << "Table reordering starting with nest level " << start;
204  for (const auto& graph_edge : join_cost_graph[*start_it]) {
205  const node_t succ = graph_edge.first;
206  if (!schedulable_node(succ)) {
207  continue;
208  }
209  const TraversalEdge succ_edge{succ, graph_edge.second};
210  for (const auto& successor_edge : join_cost_graph[succ]) {
211  if (successor_edge.first == start) {
212  start_edge.join_cost = successor_edge.second;
213  // lhs cost / num tuples less than rhs cost if compare edge is true, swap nest
214  // levels
215  if (compare_edge(start_edge, succ_edge)) {
216  VLOG(2) << "Table reordering changing start nest level from " << start
217  << " to " << succ;
218  start = succ;
219  start_edge = succ_edge;
220  }
221  }
222  }
223  }
224  VLOG(2) << "Table reordering picked start nest level " << start << " with cost "
225  << start_edge.join_cost;
226  CHECK_EQ(start, start_edge.nest_level);
227  worklist.push(start_edge);
228  const auto it_ok = visited.insert(start);
229  CHECK(it_ok.second);
230  while (!worklist.empty()) {
231  // Extract a node and add it to the permutation.
232  TraversalEdge crt = worklist.top();
233  worklist.pop();
234  input_permutation.push_back(crt.nest_level);
235  dependency_tracking.removeNode(crt.nest_level);
236  // Add successors which are ready and not visited yet to the queue.
237  for (const auto& graph_edge : join_cost_graph[crt.nest_level]) {
238  const node_t succ = graph_edge.first;
239  if (!schedulable_node(succ)) {
240  continue;
241  }
242  worklist.push(TraversalEdge{succ, graph_edge.second});
243  const auto it_ok = visited.insert(succ);
244  CHECK(it_ok.second);
245  }
246  }
247  }
248  return input_permutation;
249 }
250 
251 } // namespace
252 
253 std::vector<node_t> get_node_input_permutation(
254  const JoinQualsPerNestingLevel& left_deep_join_quals,
255  const std::vector<InputTableInfo>& table_infos,
256  const Executor* executor) {
257  const auto join_cost_graph =
258  build_join_cost_graph(left_deep_join_quals, table_infos, executor);
259  // Use the number of tuples in each table to break ties in BFS.
260  const auto compare_node = [&table_infos](const node_t lhs_nest_level,
261  const node_t rhs_nest_level) {
262  return table_infos[lhs_nest_level].info.getNumTuplesUpperBound() <
263  table_infos[rhs_nest_level].info.getNumTuplesUpperBound();
264  };
265  const auto compare_edge = [&compare_node](const TraversalEdge& lhs_edge,
266  const TraversalEdge& rhs_edge) {
267  // Only use the number of tuples as a tie-breaker, if costs are equal.
268  if (lhs_edge.join_cost == rhs_edge.join_cost) {
269  return compare_node(lhs_edge.nest_level, rhs_edge.nest_level);
270  }
271  return lhs_edge.join_cost > rhs_edge.join_cost;
272  };
274  join_cost_graph, table_infos, compare_node, compare_edge, left_deep_join_quals);
275 }
#define CHECK_EQ(x, y)
Definition: Logger.h:211
#define IS_EQUIVALENCE(X)
Definition: sqldefs.h:67
SchedulingDependencyTracking build_dependency_tracking(const JoinQualsPerNestingLevel &left_deep_join_quals, const std::vector< std::map< node_t, cost_t >> &join_cost_graph)
bool is_constructed_point(const Analyzer::Expr *expr)
Definition: Execute.h:1176
#define CHECK_GE(x, y)
Definition: Logger.h:216
std::vector< JoinCondition > JoinQualsPerNestingLevel
T visit(const Analyzer::Expr *expr) const
std::vector< node_t > get_node_input_permutation(const JoinQualsPerNestingLevel &left_deep_join_quals, const std::vector< InputTableInfo > &table_infos, const Executor *executor)
static std::unordered_map< SQLTypes, cost_t > GEO_TYPE_COSTS
static std::unordered_map< SQLTypes, cost_t > std::pair< cost_t, cost_t > get_join_qual_cost(const Analyzer::Expr *qual, const Executor *executor)
std::vector< InnerOuter > normalize_column_pairs(const Analyzer::BinOper *condition, const Catalog_Namespace::Catalog &cat, const TemporaryTables *temporary_tables)
Definition: HashJoin.cpp:705
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:78
char * to
std::vector< node_t > traverse_join_cost_graph(const std::vector< std::map< node_t, cost_t >> &join_cost_graph, const std::vector< InputTableInfo > &table_infos, const std::function< bool(const node_t lhs_nest_level, const node_t rhs_nest_level)> &compare_node, const std::function< bool(const TraversalEdge &, const TraversalEdge &)> &compare_edge, const JoinQualsPerNestingLevel &left_deep_join_quals)
const Analyzer::Expr * getArg(const size_t i) const
Definition: Analyzer.h:1362
DEVICE void iota(ARGS &&...args)
Definition: gpu_enabled.h:69
#define CHECK(condition)
Definition: Logger.h:203
std::vector< std::map< node_t, cost_t > > build_join_cost_graph(const JoinQualsPerNestingLevel &left_deep_join_quals, const std::vector< InputTableInfo > &table_infos, const Executor *executor)
#define VLOG(n)
Definition: Logger.h:297