OmniSciDB  fe05a0c208
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
anonymous_namespace{FromTableReordering.cpp} Namespace Reference

Classes

class  SchedulingDependencyTracking
 
struct  TraversalEdge
 

Typedefs

using cost_t = unsigned
 
using node_t = size_t
 

Functions

static std::unordered_map
< SQLTypes, cost_t > std::pair
< cost_t, cost_t
get_join_qual_cost (const Analyzer::Expr *qual, const Executor *executor)
 
std::vector< std::map< node_t,
cost_t > > 
build_join_cost_graph (const JoinQualsPerNestingLevel &left_deep_join_quals, const std::vector< InputTableInfo > &table_infos, const Executor *executor)
 
SchedulingDependencyTracking build_dependency_tracking (const JoinQualsPerNestingLevel &left_deep_join_quals, const std::vector< std::map< node_t, cost_t >> &join_cost_graph)
 
std::vector< node_ttraverse_join_cost_graph (const std::vector< std::map< node_t, cost_t >> &join_cost_graph, const std::vector< InputTableInfo > &table_infos, const std::function< bool(const node_t lhs_nest_level, const node_t rhs_nest_level)> &compare_node, const std::function< bool(const TraversalEdge &, const TraversalEdge &)> &compare_edge, const JoinQualsPerNestingLevel &left_deep_join_quals)
 

Variables

static std::unordered_map
< SQLTypes, cost_t
GEO_TYPE_COSTS {kPOINT, 60}
 

Typedef Documentation

using anonymous_namespace{FromTableReordering.cpp}::cost_t = typedef unsigned

Definition at line 28 of file FromTableReordering.cpp.

using anonymous_namespace{FromTableReordering.cpp}::node_t = typedef size_t

Definition at line 29 of file FromTableReordering.cpp.

Function Documentation

SchedulingDependencyTracking anonymous_namespace{FromTableReordering.cpp}::build_dependency_tracking ( const JoinQualsPerNestingLevel left_deep_join_quals,
const std::vector< std::map< node_t, cost_t >> &  join_cost_graph 
)

Definition at line 148 of file FromTableReordering.cpp.

References anonymous_namespace{FromTableReordering.cpp}::SchedulingDependencyTracking::addEdge(), LEFT, and run_benchmark_import::type.

Referenced by traverse_join_cost_graph().

150  {
151  SchedulingDependencyTracking dependency_tracking(left_deep_join_quals.size() + 1);
152  // Add directed graph edges for left join dependencies.
153  // See also start_it inside traverse_join_cost_graph(). These
154  // edges prevent start_it from pointing to a table with a
155  // left join dependency on another table.
156  for (size_t level_idx = 0; level_idx < left_deep_join_quals.size(); ++level_idx) {
157  if (left_deep_join_quals[level_idx].type == JoinType::LEFT) {
158  dependency_tracking.addEdge(level_idx, level_idx + 1);
159  }
160  }
161  return dependency_tracking;
162 }

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::vector<std::map<node_t, cost_t> > anonymous_namespace{FromTableReordering.cpp}::build_join_cost_graph ( const JoinQualsPerNestingLevel left_deep_join_quals,
const std::vector< InputTableInfo > &  table_infos,
const Executor executor 
)

Definition at line 77 of file FromTableReordering.cpp.

References CHECK_EQ, CHECK_GE, get_join_qual_cost(), and ScalarExprVisitor< T >::visit().

Referenced by get_node_input_permutation().

80  {
81  CHECK_EQ(left_deep_join_quals.size() + 1, table_infos.size());
82  std::vector<std::map<node_t, cost_t>> join_cost_graph(table_infos.size());
84  // Build the constraints graph: nodes are nest levels, edges are the existence of
85  // qualifiers between levels.
86  for (const auto& current_level_join_conditions : left_deep_join_quals) {
87  for (const auto& qual : current_level_join_conditions.quals) {
88  std::set<int> qual_nest_levels = visitor.visit(qual.get());
89  if (qual_nest_levels.size() != 2) {
90  continue;
91  }
92  int lhs_nest_level = *qual_nest_levels.begin();
93  CHECK_GE(lhs_nest_level, 0);
94  qual_nest_levels.erase(qual_nest_levels.begin());
95  int rhs_nest_level = *qual_nest_levels.begin();
96  CHECK_GE(rhs_nest_level, 0);
97 
98  // Get the {lhs, rhs} cost for the qual
99  const auto cost_pair = get_join_qual_cost(qual.get(), executor);
100  const auto edge_it = join_cost_graph[lhs_nest_level].find(rhs_nest_level);
101  if (edge_it == join_cost_graph[lhs_nest_level].end() ||
102  edge_it->second > cost_pair.second) {
103  join_cost_graph[lhs_nest_level][rhs_nest_level] = cost_pair.second;
104  join_cost_graph[rhs_nest_level][lhs_nest_level] = cost_pair.first;
105  }
106  }
107  }
108  return join_cost_graph;
109 }
#define CHECK_EQ(x, y)
Definition: Logger.h:211
#define CHECK_GE(x, y)
Definition: Logger.h:216
T visit(const Analyzer::Expr *expr) const
static std::unordered_map< SQLTypes, cost_t > std::pair< cost_t, cost_t > get_join_qual_cost(const Analyzer::Expr *qual, const Executor *executor)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

static std::unordered_map<SQLTypes, cost_t> std::pair<cost_t, cost_t> anonymous_namespace{FromTableReordering.cpp}::get_join_qual_cost ( const Analyzer::Expr qual,
const Executor executor 
)

Definition at line 38 of file FromTableReordering.cpp.

References GEO_TYPE_COSTS, Analyzer::Expr::get_type_info(), Analyzer::FunctionOper::getArg(), i, is_constructed_point(), IS_EQUIVALENCE, and normalize_column_pairs().

Referenced by build_join_cost_graph().

39  {
40  const auto func_oper = dynamic_cast<const Analyzer::FunctionOper*>(qual);
41  if (func_oper) {
42  std::vector<SQLTypes> geo_types_for_func;
43  for (size_t i = 0; i < func_oper->getArity(); i++) {
44  const auto arg_expr = func_oper->getArg(i);
45  const auto& ti = arg_expr->get_type_info();
46  if (ti.is_geometry() || is_constructed_point(arg_expr)) {
47  geo_types_for_func.push_back(ti.get_type());
48  }
49  }
50  std::regex geo_func_regex("ST_[\\w]*");
51  std::smatch geo_func_match;
52  const auto& func_name = func_oper->getName();
53  if (geo_types_for_func.size() == 2 &&
54  std::regex_match(func_name, geo_func_match, geo_func_regex)) {
55  const auto rhs_cost = GEO_TYPE_COSTS[geo_types_for_func[0]];
56  const auto lhs_cost = GEO_TYPE_COSTS[geo_types_for_func[1]];
57  return {lhs_cost, rhs_cost};
58  }
59  return {200, 200};
60  }
61  const auto bin_oper = dynamic_cast<const Analyzer::BinOper*>(qual);
62  if (!bin_oper || !IS_EQUIVALENCE(bin_oper->get_optype())) {
63  return {200, 200};
64  }
65  if (executor) {
66  try {
68  bin_oper, *executor->getCatalog(), executor->getTemporaryTables());
69  } catch (...) {
70  return {200, 200};
71  }
72  }
73  return {100, 100};
74 }
#define IS_EQUIVALENCE(X)
Definition: sqldefs.h:67
bool is_constructed_point(const Analyzer::Expr *expr)
Definition: Execute.h:1180
static std::unordered_map< SQLTypes, cost_t > GEO_TYPE_COSTS
std::vector< InnerOuter > normalize_column_pairs(const Analyzer::BinOper *condition, const Catalog_Namespace::Catalog &cat, const TemporaryTables *temporary_tables)
Definition: HashJoin.cpp:705
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:78
const Analyzer::Expr * getArg(const size_t i) const
Definition: Analyzer.h:1362

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::vector<node_t> anonymous_namespace{FromTableReordering.cpp}::traverse_join_cost_graph ( const std::vector< std::map< node_t, cost_t >> &  join_cost_graph,
const std::vector< InputTableInfo > &  table_infos,
const std::function< bool(const node_t lhs_nest_level, const node_t rhs_nest_level)> &  compare_node,
const std::function< bool(const TraversalEdge &, const TraversalEdge &)> &  compare_edge,
const JoinQualsPerNestingLevel left_deep_join_quals 
)

Definition at line 167 of file FromTableReordering.cpp.

References build_dependency_tracking(), CHECK, CHECK_EQ, gpu_enabled::iota(), anonymous_namespace{FromTableReordering.cpp}::TraversalEdge::join_cost, anonymous_namespace{FromTableReordering.cpp}::TraversalEdge::nest_level, and VLOG.

Referenced by get_node_input_permutation().

173  {
174  std::vector<node_t> all_nest_levels(table_infos.size());
175  std::iota(all_nest_levels.begin(), all_nest_levels.end(), 0);
176  std::vector<node_t> input_permutation;
177  std::unordered_set<node_t> visited;
178  auto dependency_tracking =
179  build_dependency_tracking(left_deep_join_quals, join_cost_graph);
180  auto schedulable_node = [&dependency_tracking, &visited](const node_t node) {
181  const auto nodes_ready = dependency_tracking.getRoots();
182  return nodes_ready.find(node) != nodes_ready.end() &&
183  visited.find(node) == visited.end();
184  };
185  while (visited.size() < table_infos.size()) {
186  // Filter out nest levels which are already visited or have pending dependencies.
187  std::vector<node_t> remaining_nest_levels;
188  std::copy_if(all_nest_levels.begin(),
189  all_nest_levels.end(),
190  std::back_inserter(remaining_nest_levels),
191  schedulable_node);
192  CHECK(!remaining_nest_levels.empty());
193  // Start with the table with most tuples.
194  const auto start_it = std::max_element(
195  remaining_nest_levels.begin(), remaining_nest_levels.end(), compare_node);
196  CHECK(start_it != remaining_nest_levels.end());
197  std::priority_queue<TraversalEdge, std::vector<TraversalEdge>, decltype(compare_edge)>
198  worklist(compare_edge);
199  // look at all edges, compare the
200  // cost of our edge vs theirs, and pick the best start edge
201  node_t start = *start_it;
202  TraversalEdge start_edge{start, 0};
203  VLOG(2) << "Table reordering starting with nest level " << start;
204  for (const auto& graph_edge : join_cost_graph[*start_it]) {
205  const node_t succ = graph_edge.first;
206  if (!schedulable_node(succ)) {
207  continue;
208  }
209  const TraversalEdge succ_edge{succ, graph_edge.second};
210  for (const auto& successor_edge : join_cost_graph[succ]) {
211  if (successor_edge.first == start) {
212  start_edge.join_cost = successor_edge.second;
213  // lhs cost / num tuples less than rhs cost if compare edge is true, swap nest
214  // levels
215  if (compare_edge(start_edge, succ_edge)) {
216  VLOG(2) << "Table reordering changing start nest level from " << start
217  << " to " << succ;
218  start = succ;
219  start_edge = succ_edge;
220  }
221  }
222  }
223  }
224  VLOG(2) << "Table reordering picked start nest level " << start << " with cost "
225  << start_edge.join_cost;
226  CHECK_EQ(start, start_edge.nest_level);
227  worklist.push(start_edge);
228  const auto it_ok = visited.insert(start);
229  CHECK(it_ok.second);
230  while (!worklist.empty()) {
231  // Extract a node and add it to the permutation.
232  TraversalEdge crt = worklist.top();
233  worklist.pop();
234  input_permutation.push_back(crt.nest_level);
235  dependency_tracking.removeNode(crt.nest_level);
236  // Add successors which are ready and not visited yet to the queue.
237  for (const auto& graph_edge : join_cost_graph[crt.nest_level]) {
238  const node_t succ = graph_edge.first;
239  if (!schedulable_node(succ)) {
240  continue;
241  }
242  worklist.push(TraversalEdge{succ, graph_edge.second});
243  const auto it_ok = visited.insert(succ);
244  CHECK(it_ok.second);
245  }
246  }
247  }
248  return input_permutation;
249 }
#define CHECK_EQ(x, y)
Definition: Logger.h:211
SchedulingDependencyTracking build_dependency_tracking(const JoinQualsPerNestingLevel &left_deep_join_quals, const std::vector< std::map< node_t, cost_t >> &join_cost_graph)
DEVICE void iota(ARGS &&...args)
Definition: gpu_enabled.h:69
#define CHECK(condition)
Definition: Logger.h:203
#define VLOG(n)
Definition: Logger.h:297

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

Variable Documentation

std::unordered_map<SQLTypes, cost_t> anonymous_namespace{FromTableReordering.cpp}::GEO_TYPE_COSTS {kPOINT, 60}
static

Definition at line 31 of file FromTableReordering.cpp.

Referenced by get_join_qual_cost().