OmniSciDB  a5dc49c757
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
FromTableReordering.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "FromTableReordering.h"
18 #include "Execute.h"
19 #include "ExpressionRewrite.h"
20 #include "RangeTableIndexVisitor.h"
22 
23 #include <numeric>
24 #include <queue>
25 #include <regex>
26 
27 namespace {
28 
29 using cost_t = unsigned;
30 using node_t = size_t;
31 
33  std::vector<const Analyzer::ColumnVar*> const& geo_args,
34  shared::TableKey const& table_key) {
35  auto it = std::find_if(
36  geo_args.begin(), geo_args.end(), [&table_key](const Analyzer::ColumnVar* cv) {
37  return cv->getTableKey() == table_key;
38  });
39  return it == geo_args.end() ? nullptr : *it;
40 }
41 
42 static std::unordered_map<SQLTypes, cost_t> GEO_TYPE_COSTS{{kPOINT, 60},
43  {kARRAY, 60},
44  {kLINESTRING, 70},
45  {kPOLYGON, 80},
46  {kMULTIPOLYGON, 90}};
47 
48 static bool force_table_reordering_st_contain_func(std::string_view target_func_name) {
50  ST_CONTAIN_FORCE_TABLE_REORDERING_TARGET_FUNC.begin(),
53  [target_func_name](std::string_view func_name) {
54  return target_func_name == func_name;
55  });
56 }
57 
58 static bool force_table_reordering_st_intersects_func(std::string_view target_func_name) {
60  ST_INTERSECTS_FORCE_TABLE_REORDERING_TARGET_FUNC.begin(),
63  [target_func_name](std::string_view func_name) {
64  return target_func_name == func_name;
65  });
66 }
67 
69  SQLTypes const inner_type,
70  shared::TableKey const& outer_arg_key,
71  SQLTypes const outer_type,
72  std::string const& geo_func_name,
73  const std::vector<InputTableInfo>& table_infos) {
74  // if, |R| > |S|
75  // case-1: SELECT ... FROM R, S WHERE ST_...(R.c, S.c);
76  // case-2: SELECT ... FROM R, S WHERE ST_...(S.c, R.c);
77  // case-3: SELECT ... FROM S, R WHERE ST_...(R.c, S.c);
78  // case-4: SELECT ... FROM S, R WHERE ST_...(S.c, R.c);
79  auto const inner_poly_outer_pt_pair =
80  shared::is_any<kPOLYGON, kMULTIPOLYGON>(inner_type) && outer_type == kPOINT;
81  auto const outer_poly_inner_pt_pair =
82  shared::is_any<kPOLYGON, kMULTIPOLYGON>(outer_type) && inner_type == kPOINT;
83  auto const force_swap_st_contains =
85  auto const force_swap_st_intersects =
87  size_t inner_idx = 0;
88  size_t outer_idx = 0;
89  for (size_t i = 0; i < table_infos.size(); i++) {
90  if (table_infos[i].table_key == inner_arg_key) {
91  inner_idx = i;
92  } else if (table_infos[i].table_key == outer_arg_key) {
93  outer_idx = i;
94  }
95  }
96  size_t first_listed_idx = std::min(inner_idx, outer_idx);
97  size_t first_listed_card = table_infos[first_listed_idx].info.getNumTuples();
98  size_t last_listed_idx = std::max(inner_idx, outer_idx);
99  size_t last_listed_card = table_infos[last_listed_idx].info.getNumTuples();
100  if (first_listed_card > last_listed_card) {
101  if (inner_arg_key == table_infos[first_listed_idx].table_key) {
102  // case 1
103  return inner_poly_outer_pt_pair &&
104  (force_swap_st_contains || force_swap_st_intersects);
105  } else {
106  // case 2
107  CHECK_EQ(outer_arg_key, table_infos[first_listed_idx].table_key);
108  return outer_poly_inner_pt_pair && force_swap_st_intersects;
109  }
110  }
111  return false;
112 }
113 
114 // Returns a lhs/rhs cost for the given qualifier. Must be strictly greater than 0.
115 // todo (yoonmin): compute the cost of inner join edge and outer join edge
116 // Currently, we set 100 for inner join and 200 for loop join
117 // for geometries, we use types of geometries as its cost factor
118 std::tuple<cost_t, cost_t, InnerQualDecision> get_join_qual_cost(
119  const Analyzer::Expr* qual,
120  const std::vector<InputTableInfo>& table_infos,
121  const Executor* executor) {
122  if (executor) {
123  GeospatialFunctionFinder geo_func_finder;
124  geo_func_finder.visit(qual);
125  if (auto table_key_pair = geo_func_finder.getJoinTableKeyPair()) {
126  auto const inner_table_key = (*table_key_pair).inner_table_key;
127  auto const outer_table_key = (*table_key_pair).outer_table_key;
128  // try to find a chance to swap tables in the binary join
129  // note that self-join does not need to be swapped
130  CHECK_NE(inner_table_key, outer_table_key);
131  const auto& target_geo_func_name = geo_func_finder.getGeoFunctionName();
132  auto const inner_table_cardinality =
133  get_table_cardinality(inner_table_key, executor);
134  auto const outer_table_cardinality =
135  get_table_cardinality(outer_table_key, executor);
136  auto inner_qual_decision = inner_table_cardinality > outer_table_cardinality
139  // detect the case when table reordering by cardinality incurs unexpected overhead
140  // i.e., SELECT ... FROM R, S where ST_Interesects(S.poly, R.pt) where |R| > |S|
141  // but |R| is not that larger than |S|, i.e., |R| / |S| < 10.0
142  // in this case, it might be better if keeping the existing ordering
143  // to exploit bounding-box intersection w/ hash join framework instead of loop join
144  const auto inner_cv = get_geo_cv(geo_func_finder.getGeoArgCvs(), inner_table_key);
145  CHECK(inner_cv);
146  bool needs_table_reordering = inner_table_cardinality < outer_table_cardinality;
147  const auto outer_inner_card_ratio =
148  outer_table_cardinality / static_cast<double>(inner_table_cardinality);
150  target_geo_func_name) ||
152  target_geo_func_name)) {
153  // the goal of this is to maximize the chance of using bounding-box intersection
154  // (aka. bbox-intersection) to filter out unnecessary pair of geometries before
155  // computing geo function to achieve this, point column has zero for its rte_idx
156  // (so we build a hash table based on poly column which has rte_idx = 1) but if
157  // it's cardinality is smaller than that of polygon table more than 10x we try to
158  // fall back to loop join to avoid too expensive bbox-intersection
159  if (inner_cv->get_rte_idx() == 0 &&
160  (inner_cv->get_type_info().get_type() == kPOINT)) {
161  // outer is poly, and we can use hash join w/ bbox-intersection
162  if (needs_table_reordering && outer_inner_card_ratio > 10.0 &&
163  inner_table_cardinality < 10000) {
164  // but current pt table is small enough and hash table is larger than
165  // the pt table at least 10 times, then we fall back to loop join
166  // to avoid too expensive hash join
167  // so let's try to set inner table as poly table to invalidate
168  // rte index requirement
169  VLOG(1) << "Force loop-join to avoid unexpected overhead of building large "
170  "hash table";
171  return {200, 200, InnerQualDecision::RHS};
172  } else {
173  // otherwise, try to keep the existing ordering
174  return {180, 190, InnerQualDecision::IGNORE};
175  }
176  } else {
177  // poly is the inner table, so we need to reorder tables to use
178  // bbox-intersection
179  auto const geo_func_name = geo_func_finder.getGeoFunctionName();
180  const auto outer_cv =
181  get_geo_cv(geo_func_finder.getGeoArgCvs(), outer_table_key);
182  CHECK(outer_cv);
183  auto const inner_type = inner_cv->get_type_info().get_type();
184  auto const outer_type = outer_cv->get_type_info().get_type();
185  if (!needs_table_reordering && should_force_table_reordering(inner_table_key,
186  inner_type,
187  outer_table_key,
188  outer_type,
189  geo_func_name,
190  table_infos)) {
191  VLOG(1) << "Force reordering tables to enable a hash join for "
192  << geo_func_name;
193  // let's reorder them regardless of table cardinality to build a hash table on
194  // polygon side which can exploit bounding-box intersection
195  return {190, 180, InnerQualDecision::RHS};
196  }
197  if (needs_table_reordering) {
198  // outer point table is larger than inner poly table, so let's reorder them
199  // by table cardinality
200  VLOG(1) << "Try to reorder tables based on table cardinality";
201  return {200, 200, InnerQualDecision::RHS};
202  } else {
203  // otherwise, try to keep the existing ordering
204  return {180, 190, InnerQualDecision::IGNORE};
205  }
206  }
207  }
208  // rest of bbox-intersection-available and unavailable geo functions
209  // can reach here, and they are reordered by table cardinality
210  // specifically, bbox-intersection-available geo join functions are satisfied one of
211  // followings: ST_INTERSECTSBOX_sv and is_poly_mpoly_rewrite_target_func we can use
212  // bbox-intersection hash join for those functions regardless of table ordering see
213  // rewrite_bbox_intersection function in ExpressionRewrite.cpp
214  VLOG(2) << "Detect geo join operator, initial_inner_table(db_id: "
215  << inner_table_key.db_id << ", table_id: " << inner_table_key.table_id
216  << "), cardinality: " << inner_table_cardinality
217  << "), initial_outer_table(db_id: " << outer_table_key.db_id
218  << ", table_id: " << outer_table_key.table_id
219  << "), cardinality: " << outer_table_cardinality
220  << "), inner_qual_decision: " << inner_qual_decision;
221  return {200, 200, inner_qual_decision};
222 
223  } else {
224  // let's fall back to the old strategy by ordering tables by types of geometries
225  const auto func_oper = dynamic_cast<const Analyzer::FunctionOper*>(qual);
226  if (func_oper) {
227  std::vector<SQLTypes> geo_types_for_func;
228  for (size_t i = 0; i < func_oper->getArity(); i++) {
229  const auto arg_expr = func_oper->getArg(i);
230  const auto& ti = arg_expr->get_type_info();
231  if (ti.is_geometry() || is_constructed_point(arg_expr)) {
232  geo_types_for_func.push_back(ti.get_type());
233  }
234  }
235  std::regex geo_func_regex("ST_[\\w]*");
236  std::smatch geo_func_match;
237  const auto& func_name = func_oper->getName();
238  if (geo_types_for_func.size() == 2 &&
239  std::regex_match(func_name, geo_func_match, geo_func_regex)) {
240  const auto rhs_cost = GEO_TYPE_COSTS[geo_types_for_func[0]];
241  const auto lhs_cost = GEO_TYPE_COSTS[geo_types_for_func[1]];
242  return {lhs_cost, rhs_cost, InnerQualDecision::IGNORE};
243  }
244  return {200, 200, InnerQualDecision::IGNORE};
245  }
246  }
247  }
248 
249  const auto bin_oper = dynamic_cast<const Analyzer::BinOper*>(qual);
250  if (!bin_oper || !IS_EQUIVALENCE(bin_oper->get_optype())) {
251  return {200, 200, InnerQualDecision::IGNORE};
252  }
253  InnerQualDecision inner_qual_decision = InnerQualDecision::UNKNOWN;
254  if (executor) {
255  try {
256  const auto normalized_bin_oper =
257  HashJoin::normalizeColumnPairs(bin_oper, executor->getTemporaryTables());
258  const auto& inner_outer = normalized_bin_oper.first;
259  // normalization success, so we need to figure out which cv becomes an inner
260  auto lhs = bin_oper->get_left_operand();
261  if (auto lhs_tuple = dynamic_cast<const Analyzer::ExpressionTuple*>(
262  bin_oper->get_left_operand())) {
263  lhs = lhs_tuple->getTuple().front().get();
264  }
265  CHECK(lhs);
266  if (lhs == inner_outer.front().first) {
267  inner_qual_decision = InnerQualDecision::LHS;
268  } else if (lhs == inner_outer.front().second) {
269  inner_qual_decision = InnerQualDecision::RHS;
270  }
271  } catch (const HashJoinFail& e) {
272  return {200, 200, e.inner_qual_decision};
273  } catch (...) {
274  return {200, 200, inner_qual_decision};
275  }
276  }
277  return {100, 100, inner_qual_decision};
278 }
279 
280 // Builds a graph with nesting levels as nodes and join condition costs as edges.
281 std::vector<std::map<node_t, cost_t>> build_join_cost_graph(
282  const JoinQualsPerNestingLevel& left_deep_join_quals,
283  const std::vector<InputTableInfo>& table_infos,
284  const Executor* executor,
285  std::vector<std::map<node_t, InnerQualDecision>>& qual_detection_res) {
286  CHECK_EQ(left_deep_join_quals.size() + 1, table_infos.size());
287  std::vector<std::map<node_t, cost_t>> join_cost_graph(table_infos.size());
289  // Build the constraints graph: nodes are nest levels, edges are the existence of
290  // qualifiers between levels.
291  for (const auto& current_level_join_conditions : left_deep_join_quals) {
292  for (const auto& qual : current_level_join_conditions.quals) {
293  std::set<int> qual_nest_levels = visitor.visit(qual.get());
294  if (qual_nest_levels.size() != 2) {
295  continue;
296  }
297  int lhs_nest_level = *qual_nest_levels.begin();
298  CHECK_GE(lhs_nest_level, 0);
299  qual_nest_levels.erase(qual_nest_levels.begin());
300  int rhs_nest_level = *qual_nest_levels.begin();
301  CHECK_GE(rhs_nest_level, 0);
302  // Get the {lhs, rhs} cost for the qual
303  const auto qual_costing = get_join_qual_cost(qual.get(), table_infos, executor);
304  qual_detection_res[lhs_nest_level][rhs_nest_level] = std::get<2>(qual_costing);
305  qual_detection_res[rhs_nest_level][lhs_nest_level] = std::get<2>(qual_costing);
306  const auto edge_it = join_cost_graph[lhs_nest_level].find(rhs_nest_level);
307  auto rhs_cost = std::get<1>(qual_costing);
308  if (edge_it == join_cost_graph[lhs_nest_level].end() ||
309  edge_it->second > rhs_cost) {
310  auto lhs_cost = std::get<0>(qual_costing);
311  join_cost_graph[lhs_nest_level][rhs_nest_level] = rhs_cost;
312  join_cost_graph[rhs_nest_level][lhs_nest_level] = lhs_cost;
313  }
314  }
315  }
316  return join_cost_graph;
317 }
318 
319 // Tracks dependencies between nodes.
321  public:
322  SchedulingDependencyTracking(const size_t node_count) : inbound_(node_count) {}
323 
324  // Add a from -> to dependency.
325  void addEdge(const node_t from, const node_t to) { inbound_[to].insert(from); }
326 
327  // Removes from's outbound dependencies.
328  void removeNode(const node_t from) {
329  for (auto& inbound_for_node : inbound_) {
330  inbound_for_node.erase(from);
331  }
332  }
333 
334  // Returns the set of all nodes without dependencies.
335  std::unordered_set<node_t> getRoots() const {
336  std::unordered_set<node_t> roots;
337  for (node_t candidate = 0; candidate < inbound_.size(); ++candidate) {
338  if (inbound_[candidate].empty()) {
339  roots.insert(candidate);
340  }
341  }
342  return roots;
343  }
344 
345  private:
346  std::vector<std::unordered_set<node_t>> inbound_;
347 };
348 
349 // The tree edge for traversal of the cost graph.
353 };
354 
355 // Builds dependency tracking based on left joins
357  const JoinQualsPerNestingLevel& left_deep_join_quals,
358  const std::vector<std::map<node_t, cost_t>>& join_cost_graph) {
359  SchedulingDependencyTracking dependency_tracking(left_deep_join_quals.size() + 1);
360  // Add directed graph edges for left join dependencies.
361  // See also start_it inside traverse_join_cost_graph(). These
362  // edges prevent start_it from pointing to a table with a
363  // left join dependency on another table.
364  for (size_t level_idx = 0; level_idx < left_deep_join_quals.size(); ++level_idx) {
365  if (left_deep_join_quals[level_idx].type == JoinType::LEFT) {
366  dependency_tracking.addEdge(level_idx, level_idx + 1);
367  }
368  }
369  return dependency_tracking;
370 }
371 
372 // Do a breadth-first traversal of the cost graph. This avoids scheduling a nest level
373 // before the ones which constraint it are scheduled and it favors equi joins over loop
374 // joins.
375 std::vector<node_t> traverse_join_cost_graph(
376  const std::vector<std::map<node_t, cost_t>>& join_cost_graph,
377  const std::vector<InputTableInfo>& table_infos,
378  const std::function<bool(const node_t lhs_nest_level, const node_t rhs_nest_level)>&
379  compare_node,
380  const std::function<bool(const TraversalEdge&, const TraversalEdge&)>& compare_edge,
381  const JoinQualsPerNestingLevel& left_deep_join_quals,
382  std::vector<std::map<node_t, InnerQualDecision>>& qual_normalization_res) {
383  std::vector<node_t> all_nest_levels(table_infos.size());
384  std::iota(all_nest_levels.begin(), all_nest_levels.end(), 0);
385  std::vector<node_t> input_permutation;
386  std::unordered_set<node_t> visited;
387  auto dependency_tracking =
388  build_dependency_tracking(left_deep_join_quals, join_cost_graph);
389  auto schedulable_node = [&dependency_tracking, &visited](const node_t node) {
390  const auto nodes_ready = dependency_tracking.getRoots();
391  return nodes_ready.find(node) != nodes_ready.end() &&
392  visited.find(node) == visited.end();
393  };
394  while (visited.size() < table_infos.size()) {
395  // Filter out nest levels which are already visited or have pending dependencies.
396  std::vector<node_t> remaining_nest_levels;
397  std::copy_if(all_nest_levels.begin(),
398  all_nest_levels.end(),
399  std::back_inserter(remaining_nest_levels),
400  schedulable_node);
401  CHECK(!remaining_nest_levels.empty());
402  // Start with the table with most tuples.
403  const auto start_it = std::max_element(
404  remaining_nest_levels.begin(), remaining_nest_levels.end(), compare_node);
405  CHECK(start_it != remaining_nest_levels.end());
406  std::priority_queue<TraversalEdge, std::vector<TraversalEdge>, decltype(compare_edge)>
407  worklist(compare_edge);
408  // look at all edges, compare the
409  // cost of our edge vs theirs, and pick the best start edge
410  node_t start = *start_it;
411  // we adaptively switch the inner and outer when we have a chance to exploit
412  // hash join framework for a query with a single binary join
413  TraversalEdge start_edge{start, 0};
414 
415  // when we have a single binary join in the query, we can analyze the qual and apply
416  // more smart table reordering logic that maximizes the chance of exploiting hash join
417  // todo (yoonmin) : generalize this for an arbitrary join pipeline
418  if (remaining_nest_levels.size() == 2 && qual_normalization_res[start].size() == 1) {
419  auto inner_qual_decision = qual_normalization_res[start].begin()->second;
420  auto join_qual = left_deep_join_quals.begin()->quals;
421  using ColvarSet =
422  std::set<const Analyzer::ColumnVar*,
423  bool (*)(const Analyzer::ColumnVar*, const Analyzer::ColumnVar*)>;
424 
425  auto set_new_rte_idx = [](ColvarSet& cv_set, int new_rte) {
426  std::for_each(
427  cv_set.begin(), cv_set.end(), [new_rte](const Analyzer::ColumnVar* cv) {
428  const_cast<Analyzer::ColumnVar*>(cv)->set_rte_idx(new_rte);
429  });
430  };
431 
432  // IGNORE: use the existing table reordering logic
433  // KEEP: return the existing table permutation and related cvs (column variables)
434  // SWAP: change the starting table of the table reordering logic and relevant
435  // columns' rte index
436  enum class Decision { IGNORE, KEEP, SWAP };
437 
438  auto analyze_join_qual = [&start,
439  &remaining_nest_levels,
440  &inner_qual_decision,
441  &table_infos,
442  compare_node](const std::shared_ptr<Analyzer::Expr>& lhs,
443  ColvarSet& lhs_colvar_set,
444  const std::shared_ptr<Analyzer::Expr>& rhs,
445  ColvarSet& rhs_colvar_set) {
446  if (!lhs || !rhs || lhs_colvar_set.empty() || rhs_colvar_set.empty()) {
447  return std::make_pair(Decision::IGNORE, start);
448  }
449 
450  auto alternative_it = std::find_if(
451  remaining_nest_levels.begin(),
452  remaining_nest_levels.end(),
453  [start](const size_t nest_level) { return start != nest_level; });
454  CHECK(alternative_it != remaining_nest_levels.end());
455  auto alternative_rte = *alternative_it;
456 
457  Decision decision = Decision::IGNORE;
458  // inner col's rte should be larger than outer col
459  int inner_rte = -1;
460  int outer_rte = -1;
461  bool is_outer_col_valid = false;
462  auto check_expr_is_valid_col = [&is_outer_col_valid](const Analyzer::Expr* expr) {
463  if (auto expr_tuple = dynamic_cast<const Analyzer::ExpressionTuple*>(expr)) {
464  for (auto& inner_expr : expr_tuple->getTuple()) {
465  auto cv_from_expr =
466  HashJoin::getHashJoinColumn<Analyzer::ColumnVar>(inner_expr.get());
467  if (!cv_from_expr) {
468  is_outer_col_valid = false;
469  return;
470  }
471  }
472  } else {
473  auto cv_from_expr = HashJoin::getHashJoinColumn<Analyzer::ColumnVar>(expr);
474  if (!cv_from_expr) {
475  is_outer_col_valid = false;
476  return;
477  }
478  }
479  is_outer_col_valid = true;
480  };
481  if (inner_qual_decision == InnerQualDecision::LHS) {
482  inner_rte = (*lhs_colvar_set.begin())->get_rte_idx();
483  outer_rte = (*rhs_colvar_set.begin())->get_rte_idx();
484  check_expr_is_valid_col(rhs.get());
485  } else if (inner_qual_decision == InnerQualDecision::RHS) {
486  inner_rte = (*rhs_colvar_set.begin())->get_rte_idx();
487  outer_rte = (*lhs_colvar_set.begin())->get_rte_idx();
488  check_expr_is_valid_col(lhs.get());
489  }
490  if (inner_rte >= 0 && outer_rte >= 0) {
491  const auto inner_cardinality =
492  table_infos[inner_rte].info.getNumTuplesUpperBound();
493  const auto outer_cardinality =
494  table_infos[outer_rte].info.getNumTuplesUpperBound();
495  if (inner_cardinality > g_trivial_loop_join_threshold) {
496  if (inner_rte == static_cast<int>(start)) {
497  // inner is driving the join loop but also has a valid join column
498  // which is available for building a hash table
499  // but ignore swapping when inner's cardinality is larger than that of
500  // outer's / otherwise swap inner and outer (to use the valid inner)
501  decision = is_outer_col_valid && inner_cardinality > outer_cardinality
502  ? Decision::IGNORE
503  : Decision::SWAP;
504  } else {
505  CHECK_EQ(inner_rte, static_cast<int>(alternative_rte));
506  // now, a valid inner column is outer table
507  if (compare_node(inner_rte, start)) {
508  // but outer table is larger than the current inner
509  // so we can exploit the existing table reordering logic
510  decision = Decision::IGNORE;
511  } else {
512  // and outer table is smaller than the current inner
513  // so we do not need to reorder the table starting from the inner
514  decision = Decision::KEEP;
515  }
516  }
517  }
518  }
519 
520  if (decision == Decision::KEEP) {
521  return std::make_pair(decision, start);
522  } else if (decision == Decision::SWAP) {
523  return std::make_pair(decision, alternative_rte);
524  }
525  return std::make_pair(Decision::IGNORE, start);
526  };
527 
528  auto collect_colvars = [](const std::shared_ptr<Analyzer::Expr> expr,
529  ColvarSet& cv_set) {
530  expr->collect_column_var(cv_set, false);
531  };
532 
533  auto adjust_reordering_logic = [&start, &start_edge, &start_it, set_new_rte_idx](
534  Decision decision,
535  int alternative_rte,
536  ColvarSet& lhs_colvar_set,
537  ColvarSet& rhs_colvar_set) {
538  CHECK(decision == Decision::SWAP);
539  start = alternative_rte;
540  set_new_rte_idx(lhs_colvar_set, alternative_rte);
541  set_new_rte_idx(rhs_colvar_set, *start_it);
542  start_edge.join_cost = 0;
543  start_edge.nest_level = start;
544  };
545 
546  auto bin_op = dynamic_cast<Analyzer::BinOper*>(join_qual.begin()->get());
547  if (bin_op) {
548  auto lhs = bin_op->get_own_left_operand();
549  auto rhs = bin_op->get_own_right_operand();
550  if (auto lhs_exp = dynamic_cast<Analyzer::ExpressionTuple*>(lhs.get())) {
551  // retrieve the decision and info for adjusting reordering by referring the
552  // first cv and apply them to the rest of cvs
553  auto rhs_exp = dynamic_cast<Analyzer::ExpressionTuple*>(rhs.get());
554  CHECK(rhs_exp);
555  auto& lhs_exprs = lhs_exp->getTuple();
556  auto& rhs_exprs = rhs_exp->getTuple();
557  CHECK_EQ(lhs_exprs.size(), rhs_exprs.size());
558  for (size_t i = 0; i < lhs_exprs.size(); ++i) {
559  Decision decision{Decision::IGNORE};
560  int alternative_rte_idx = -1;
561  ColvarSet lhs_colvar_set(Analyzer::ColumnVar::colvar_comp);
562  ColvarSet rhs_colvar_set(Analyzer::ColumnVar::colvar_comp);
563  collect_colvars(lhs_exprs.at(i), lhs_colvar_set);
564  collect_colvars(rhs_exprs.at(i), rhs_colvar_set);
565  if (i == 0) {
566  auto investigation_res =
567  analyze_join_qual(lhs, lhs_colvar_set, rhs, rhs_colvar_set);
568  decision = investigation_res.first;
569  if (decision == Decision::KEEP) {
570  return remaining_nest_levels;
571  }
572  alternative_rte_idx = investigation_res.second;
573  }
574  if (decision == Decision::SWAP) {
575  adjust_reordering_logic(
576  decision, alternative_rte_idx, lhs_colvar_set, rhs_colvar_set);
577  }
578  }
579  } else {
580  ColvarSet lhs_colvar_set(Analyzer::ColumnVar::colvar_comp);
581  ColvarSet rhs_colvar_set(Analyzer::ColumnVar::colvar_comp);
582  collect_colvars(lhs, lhs_colvar_set);
583  collect_colvars(rhs, rhs_colvar_set);
584  auto investigation_res =
585  analyze_join_qual(lhs, lhs_colvar_set, rhs, rhs_colvar_set);
586  if (investigation_res.first == Decision::KEEP) {
587  return remaining_nest_levels;
588  } else if (investigation_res.first == Decision::SWAP) {
589  adjust_reordering_logic(investigation_res.first,
590  investigation_res.second,
591  lhs_colvar_set,
592  rhs_colvar_set);
593  }
594  }
595  }
596  }
597 
598  VLOG(2) << "Table reordering starting with nest level " << start;
599  for (const auto& graph_edge : join_cost_graph[*start_it]) {
600  const node_t succ = graph_edge.first;
601  if (!schedulable_node(succ)) {
602  continue;
603  }
604  const TraversalEdge succ_edge{succ, graph_edge.second};
605  for (const auto& successor_edge : join_cost_graph[succ]) {
606  if (successor_edge.first == start) {
607  start_edge.join_cost = successor_edge.second;
608  // lhs cost / num tuples less than rhs cost if compare edge is true, swap nest
609  // levels
610  if (compare_edge(start_edge, succ_edge)) {
611  VLOG(2) << "Table reordering changing start nest level from " << start
612  << " to " << succ;
613  start = succ;
614  start_edge = succ_edge;
615  }
616  }
617  }
618  }
619  VLOG(2) << "Table reordering picked start nest level " << start << " with cost "
620  << start_edge.join_cost;
621  CHECK_EQ(start, start_edge.nest_level);
622  worklist.push(start_edge);
623  const auto it_ok = visited.insert(start);
624  CHECK(it_ok.second);
625  while (!worklist.empty()) {
626  // Extract a node and add it to the permutation.
627  TraversalEdge crt = worklist.top();
628  worklist.pop();
629  VLOG(1) << "Insert input permutation, idx: " << input_permutation.size()
630  << ", nest_level: " << crt.nest_level;
631  input_permutation.push_back(crt.nest_level);
632  dependency_tracking.removeNode(crt.nest_level);
633  // Add successors which are ready and not visited yet to the queue.
634  for (const auto& graph_edge : join_cost_graph[crt.nest_level]) {
635  const node_t succ = graph_edge.first;
636  if (!schedulable_node(succ)) {
637  continue;
638  }
639  worklist.push(TraversalEdge{succ, graph_edge.second});
640  const auto it_ok = visited.insert(succ);
641  CHECK(it_ok.second);
642  }
643  }
644  }
645  return input_permutation;
646 }
647 
648 } // namespace
649 
650 std::vector<node_t> get_node_input_permutation(
651  const JoinQualsPerNestingLevel& left_deep_join_quals,
652  const std::vector<InputTableInfo>& table_infos,
653  const Executor* executor) {
654  std::vector<std::map<node_t, InnerQualDecision>> qual_normalization_res(
655  table_infos.size());
656  const auto join_cost_graph = build_join_cost_graph(
657  left_deep_join_quals, table_infos, executor, qual_normalization_res);
658  // Use the number of tuples in each table to break ties in BFS.
659  const auto compare_node = [&table_infos](const node_t lhs_nest_level,
660  const node_t rhs_nest_level) {
661  return table_infos[lhs_nest_level].info.getNumTuplesUpperBound() <
662  table_infos[rhs_nest_level].info.getNumTuplesUpperBound();
663  };
664  const auto compare_edge = [&compare_node](const TraversalEdge& lhs_edge,
665  const TraversalEdge& rhs_edge) {
666  // Only use the number of tuples as a tie-breaker, if costs are equal.
667  if (lhs_edge.join_cost == rhs_edge.join_cost) {
668  return compare_node(lhs_edge.nest_level, rhs_edge.nest_level);
669  }
670  return lhs_edge.join_cost > rhs_edge.join_cost;
671  };
672  return traverse_join_cost_graph(join_cost_graph,
673  table_infos,
674  compare_node,
675  compare_edge,
676  left_deep_join_quals,
677  qual_normalization_res);
678 }
#define CHECK_EQ(x, y)
Definition: Logger.h:301
static bool is_point_poly_rewrite_target_func(std::string_view target_func_name)
static constexpr std::array< std::string_view, 6 > ST_INTERSECTS_FORCE_TABLE_REORDERING_TARGET_FUNC
static bool colvar_comp(const ColumnVar *l, const ColumnVar *r)
Definition: Analyzer.h:215
SQLTypes
Definition: sqltypes.h:65
#define IS_EQUIVALENCE(X)
Definition: sqldefs.h:72
SchedulingDependencyTracking build_dependency_tracking(const JoinQualsPerNestingLevel &left_deep_join_quals, const std::vector< std::map< node_t, cost_t >> &join_cost_graph)
bool is_constructed_point(const Analyzer::Expr *expr)
Definition: Execute.h:1682
size_t get_table_cardinality(shared::TableKey const &table_key, Executor const *executor)
#define CHECK_GE(x, y)
Definition: Logger.h:306
const std::vector< const Analyzer::ColumnVar * > & getGeoArgCvs() const
std::vector< JoinCondition > JoinQualsPerNestingLevel
T visit(const Analyzer::Expr *expr) const
unsigned g_trivial_loop_join_threshold
Definition: Execute.cpp:96
const Analyzer::ColumnVar * get_geo_cv(std::vector< const Analyzer::ColumnVar * > const &geo_args, shared::TableKey const &table_key)
std::tuple< cost_t, cost_t, InnerQualDecision > get_join_qual_cost(const Analyzer::Expr *qual, const std::vector< InputTableInfo > &table_infos, const Executor *executor)
std::vector< node_t > get_node_input_permutation(const JoinQualsPerNestingLevel &left_deep_join_quals, const std::vector< InputTableInfo > &table_infos, const Executor *executor)
static std::unordered_map< SQLTypes, cost_t > GEO_TYPE_COSTS
#define CHECK_NE(x, y)
Definition: Logger.h:302
std::vector< std::map< node_t, cost_t > > build_join_cost_graph(const JoinQualsPerNestingLevel &left_deep_join_quals, const std::vector< InputTableInfo > &table_infos, const Executor *executor, std::vector< std::map< node_t, InnerQualDecision >> &qual_detection_res)
const std::string & getGeoFunctionName() const
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:79
const Analyzer::Expr * getArg(const size_t i) const
Definition: Analyzer.h:2748
DEVICE void iota(ARGS &&...args)
Definition: gpu_enabled.h:69
const std::optional< GeoJoinOperandsTableKeyPair > getJoinTableKeyPair() const
bool should_force_table_reordering(shared::TableKey const &inner_arg_key, SQLTypes const inner_type, shared::TableKey const &outer_arg_key, SQLTypes const outer_type, std::string const &geo_func_name, const std::vector< InputTableInfo > &table_infos)
static std::unordered_map< SQLTypes, cost_t > static bool force_table_reordering_st_contain_func(std::string_view target_func_name)
static constexpr std::array< std::string_view, 4 > ST_CONTAIN_FORCE_TABLE_REORDERING_TARGET_FUNC
#define CHECK(condition)
Definition: Logger.h:291
static std::pair< std::vector< InnerOuter >, std::vector< InnerOuterStringOpInfos > > normalizeColumnPairs(const Analyzer::BinOper *condition, const TemporaryTables *temporary_tables)
Definition: HashJoin.cpp:1015
static bool is_poly_point_rewrite_target_func(std::string_view target_func_name)
InnerQualDecision
Definition: HashJoin.h:61
bool any_of(std::vector< Analyzer::Expr * > const &target_exprs)
static bool force_table_reordering_st_intersects_func(std::string_view target_func_name)
std::vector< node_t > traverse_join_cost_graph(const std::vector< std::map< node_t, cost_t >> &join_cost_graph, const std::vector< InputTableInfo > &table_infos, const std::function< bool(const node_t lhs_nest_level, const node_t rhs_nest_level)> &compare_node, const std::function< bool(const TraversalEdge &, const TraversalEdge &)> &compare_edge, const JoinQualsPerNestingLevel &left_deep_join_quals, std::vector< std::map< node_t, InnerQualDecision >> &qual_normalization_res)
AccessManager::Decision Decision
InnerQualDecision inner_qual_decision
Definition: HashJoin.h:77
const std::shared_ptr< Analyzer::Expr > get_own_left_operand() const
Definition: Analyzer.h:457
#define VLOG(n)
Definition: Logger.h:388