OmniSciDB  72c90bc290
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
RelAlgDagViewer.h
Go to the documentation of this file.
1 /*
2  * Copyright 2023 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 // RelAlgDagViewer: Converts a RelRexDag tree or container into a human-readable plan
18 // string. Use by calling accept() on the RelAlgDagViewer from the root node of the tree,
19 // then outputting the RelAlgDagViewer using operator<<. Unless you have a
20 // std::vector<std::shared_ptr<RelAlgNode>> from the Query Engine, in which case you'll
21 // get better results by calling handleQueryEngineVector() on that vector instead of
22 // accept.
23 
24 #pragma once
25 
26 #include <functional>
27 #include <optional>
28 #include <sstream>
29 
31  public:
33  RelAlgDagViewer(bool showing_steps, bool verbose)
34  : showing_steps_(showing_steps), verbose_(verbose) {}
35 
36  virtual void clear() { *this = RelAlgDagViewer(showing_steps_, verbose_); }
37 
38  virtual std::string str() const {
39  std::string ret{out_.str()};
40  if (!ret.empty()) {
41  ret += '\n';
42  }
43  return ret;
44  }
45 
46  typedef struct {
47  size_t id;
48  bool top_node; // A top node is directly emplaced or directly visited by the user.
49  bool used_node; // A used node is a child of any other node (or is the root node).
50  } NodeInfo;
51  using id_map = std::unordered_map<std::uintptr_t, NodeInfo>;
52 
53  virtual id_map::iterator emplace(RelAlgDagNode const* n) {
54  // Ensures we always emplace RelAlgDagNode* to give a consistent uintptr_t key.
55  auto it = emplaceVoid(n);
56  n->setIdInPlanTree(it->second.id);
57  return it;
58  }
59 
60  // Helper function needed because the Query Engine uses this vector of nodes to store
61  // only some of the most important plan nodes from what is conceptually a DAG tree with
62  // a single root node. The root node is always the last node in the vector.
63  virtual inline void handleQueryEngineVector(
64  std::vector<std::shared_ptr<RelAlgNode>> const nodes);
65 
67 
68  protected:
69  virtual id_map::iterator emplaceVoid(void const* n) {
70  CHECK(n);
71  auto [it, id_is_new] = ids_.emplace(
72  std::uintptr_t(n), NodeInfo{next_id_, top_call_, /*used_node=*/false});
73  if (id_is_new) {
74  ++next_id_;
75  }
76  return it;
77  }
78 
79  void beginNextLine(std::optional<size_t> step = {}) {
80  if (!already_indented_) {
81  if (needs_colon_) {
82  out_ << ":";
83  }
84  if (needs_newline_) {
85  out_ << "\n";
86  } else {
87  needs_newline_ = true;
88  }
89  if (showing_steps_) {
90  if (!step || *step == 0) {
91  out_ << " ";
92  } else {
93  out_ << "STEP " << std::to_string(*step) << ": ";
94  }
95  }
96  out_ << std::string(indent_level_ * indent_spaces_, ' ');
97  } else {
98  already_indented_ = false;
99  }
100  ++line_number_;
101  }
102 
104 
106  BreadthFirstSearch(std::function<void(RelAlgDagNode const*)> callback)
107  : callback(callback) {}
108 
109  virtual void search(RelAlgDagNode const* root) {
110  nodes.clear();
111  root->accept(*this, {});
112  while (!nodes.empty()) {
113  auto temp = std::move(nodes);
114  for (auto& node : temp) {
115  if (node) {
116  node->acceptChildren(*this);
117  }
118  }
119  }
120  }
121 
122  std::vector<RelAlgDagNode const*> nodes;
123 
124  protected:
125  virtual bool visitAny(RelAlgDagNode const* n, std::string s) override {
126  nodes.push_back(n);
127  callback(n);
128  return /*recurse=*/false;
129  }
130  std::function<void(RelAlgDagNode const*)> callback;
131  }; // struct BreadthFirstSearch
132 
134  std::vector<std::pair<std::string, RelAlgDagNode const*>> nodes;
135 
136  protected:
137  virtual bool visitAny(RelAlgDagNode const* n, std::string s) override {
138  nodes.emplace_back(s, n);
139  return /*recurse=*/false;
140  }
141  }; // struct CollectImmediateChildren
142 
144 
145  template <typename T>
146  void visitChild(T child, std::string name) {
147  static_assert(std::is_base_of_v<RelAlgDagNode, std::remove_pointer_t<T>>);
148  auto it = ids_.find(std::uintptr_t(static_cast<RelAlgDagNode const*>(child)));
149  CHECK(it != ids_.end());
150  it->second.used_node = true; // Node is a child node.
151  already_indented_ = true;
152  needs_colon_ = true;
153  child->accept(*this, name);
154  needs_colon_ = false;
155  }
156 
157  template <typename T>
158  bool innerVisit(T t, std::string name, bool recurse = true) {
159  static_assert(std::is_base_of_v<RelAlgDagNode, std::remove_pointer_t<T>>);
160 
161  // Top nodes are directly emplaced by or directly visited by the user and must start
162  // with all the default settings, except that we want to preserve any node ID's that
163  // were assigned during the user's previous calls.
164  bool const top_call{top_call_};
165  if (top_call_) {
166  auto ids = std::move(ids_);
167  auto next_id = std::move(next_id_);
168  auto line_number = std::move(line_number_);
169  clear();
170  ids_ = std::move(ids);
171  next_id_ = std::move(next_id);
172  line_number_ = line_number;
173  }
174 
175  if (!t) {
176  return /*recurse=*/false;
177  }
178  id_map::iterator it = emplace(t); // Needs the original top_call_ setting.
179  CHECK(it != ids_.end());
180 
181  // Output some info about this node.
182  beginNextLine(t->getStepNumber());
183  std::string key_text{!name.empty() ? ("[" + name + "] ") : ""};
184  out_ << "#" << it->second.id << " " << key_text;
185 
187  cfg.skip_input_nodes = true;
188  cfg.attributes_only = true;
189  out_ << t->toString(cfg);
190 
191  // Recurse to child nodes.
192  // Non-top child nodes always will be recursed.
193  // Top child nodes normally won't be recursed because they'll be visited with their
194  // own direct call from the user (or from operator<< for the user). If we're here from
195  // a direct user call (top_call is true), the node will be recursed.
196  if (recurse) {
197  top_call_ = false; // Must turn off top_call_ before recursing to any children.
198  ++indent_level_;
199  CollectImmediateChildren visitor;
200  t->acceptChildren(visitor);
201  for (auto [key, child] : visitor.nodes) {
202  if (child) {
203  if (!verbose_) {
204  // Only show RelScan or RexInput nodes when verbose_ is true.
205  if (dynamic_cast<RelScan const*>(child) ||
206  dynamic_cast<RexInput const*>(child)) {
207  continue;
208  }
209  }
210  beginNextLine(child->getStepNumber());
211  visitChild(child, key);
212  }
213  }
214  --indent_level_;
215  }
216 
217  // Resets top_call_ to clear the out_ buffer if user directly calls visit again.
218  top_call_ = top_call;
219 
220  return /*recurse=*/false;
221  }
222 
224 
225  bool visit(RelAggregate const* n, std::string s) override { return innerVisit(n, s); }
226 
227  bool visit(RelCompound const* n, std::string s) override { return innerVisit(n, s); }
228 
229  bool visit(RelFilter const* n, std::string s) override { return innerVisit(n, s); }
230 
231  bool visit(RelJoin const* n, std::string s) override { return innerVisit(n, s); }
232 
233  bool visit(RelLeftDeepInnerJoin const* n, std::string s) override {
234  return innerVisit(n, s);
235  }
236 
237  bool visit(RelLogicalUnion const* n, std::string s) override {
238  return innerVisit(n, s);
239  }
240 
241  bool visit(RelLogicalValues const* n, std::string s) override {
242  return innerVisit(n, s);
243  }
244 
245  bool visit(RelModify const* n, std::string s) override { return innerVisit(n, s); }
246 
247  bool visit(RelProject const* n, std::string s) override { return innerVisit(n, s); }
248 
249  bool visit(RelScan const* n, std::string s) override { return innerVisit(n, s); }
250 
251  bool visit(RelSort const* n, std::string s) override { return innerVisit(n, s); }
252 
253  bool visit(RelTableFunction const* n, std::string s) override {
254  return innerVisit(n, s);
255  }
256 
257  bool visit(RelTranslatedJoin const* n, std::string s) override {
258  return innerVisit(n, s);
259  }
260 
262 
263  bool visit(RexAbstractInput const* n, std::string s) override {
264  return innerVisit(n, s);
265  }
266 
267  bool visit(RexCase const* n, std::string s) override { return innerVisit(n, s); }
268 
269  bool visit(RexFunctionOperator const* n, std::string s) override {
270  return innerVisit(n, s);
271  }
272 
273  bool visit(RexInput const* n, std::string s) override {
274  // As a special case, don't print out the addtional lines for the source child under a
275  // RexInput node. The information is already shown inline elsewhere in the plan.
276  auto ret = innerVisit(n, s, /*recurse=*/false);
277  auto it =
278  ids_.find(std::uintptr_t(static_cast<RelAlgDagNode const*>(n->getSourceNode())));
279  if (it != ids_.end()) {
280  out_ << ": source #" << it->second.id;
281  }
282  return ret;
283  }
284 
285  bool visit(RexLiteral const* n, std::string s) override { return innerVisit(n, s); }
286 
287  bool visit(RexOperator const* n, std::string s) override { return innerVisit(n, s); }
288 
289  bool visit(RexRef const* n, std::string s) override { return innerVisit(n, s); }
290 
291  bool visit(RexAgg const* n, std::string s) override { return innerVisit(n, s); }
292 
293  bool visit(RexSubQuery const* n, std::string s) override { return innerVisit(n, s); }
294 
295  bool visit(RexWindowFunctionOperator const* n, std::string s) override {
296  return innerVisit(n, s);
297  }
298 
300 
301  std::ostringstream out_;
302  size_t indent_level_{0};
303  size_t line_number_{1};
304  bool showing_steps_{false};
306  size_t next_id_{1};
307  bool top_call_{true};
308  bool already_indented_{false};
309  bool needs_newline_{false};
310  bool needs_colon_{false};
311  bool verbose_{false};
312 
313  static constexpr size_t indent_spaces_{4};
314 }; // class RelAlgDagViewer
315 
317  std::vector<std::shared_ptr<RelAlgNode>> const nodes) {
318  if (nodes.empty()) {
319  return;
320  }
321 
322  // Determines if a steps column needs to be displayed.
323  // TODO(sy): Do this recursively from the root node?
324  for (auto const& node : nodes) {
325  if (node && node->getStepNumber() > 1) {
326  showing_steps_ = true;
327  break;
328  }
329  }
330 
331  // Ensure that the root node is always node #1.
332  emplace(nodes.back().get()); // The last node in the vector is the root node.
333 
334  // Breadth-first search starting at the root node to assign node ID's to all used nodes,
335  // in order of importance. BFS is done instead of recursing which would give a
336  // depth-first search and a less helpful ordering of node ID's.
337  top_call_ = false;
338  BreadthFirstSearch bfs([this](RelAlgDagNode const* n) {
339  auto it = emplace(n);
340  CHECK(it != ids_.end());
341  it->second.used_node = true;
342  });
343  bfs.search(nodes.back().get());
344  top_call_ = true;
345 
346  // Visit all nodes, building the text output.
347  nodes.back()->accept(*this, {});
348 }
349 
350 inline std::ostream& operator<<(std::ostream& os, RelAlgDagViewer const& dagv) {
351  os << dagv.str();
352  return os;
353 }
std::unordered_map< std::uintptr_t, NodeInfo > id_map
bool visit(RelLogicalValues const *n, std::string s) override
virtual bool visitAny(RelAlgDagNode const *n, std::string s) override
virtual id_map::iterator emplace(RelAlgDagNode const *n)
std::ostream & operator<<(std::ostream &os, const SessionInfo &session_info)
Definition: SessionInfo.cpp:57
RelAlgDagViewer(bool showing_steps, bool verbose)
bool visit(RelLogicalUnion const *n, std::string s) override
bool visit(RelModify const *n, std::string s) override
tuple root
Definition: setup.in.py:14
bool visit(RelJoin const *n, std::string s) override
bool visit(RexSubQuery const *n, std::string s) override
virtual void handleQueryEngineVector(std::vector< std::shared_ptr< RelAlgNode >> const nodes)
bool visit(RexCase const *n, std::string s) override
bool visit(RelProject const *n, std::string s) override
std::string to_string(char const *&&v)
bool visit(RelCompound const *n, std::string s) override
static constexpr size_t indent_spaces_
bool visit(RelTableFunction const *n, std::string s) override
virtual id_map::iterator emplaceVoid(void const *n)
bool visit(RelScan const *n, std::string s) override
bool visit(RexInput const *n, std::string s) override
std::ostringstream out_
bool visit(RexWindowFunctionOperator const *n, std::string s) override
bool visit(RelLeftDeepInnerJoin const *n, std::string s) override
std::function< void(RelAlgDagNode const *)> callback
bool visit(RexLiteral const *n, std::string s) override
std::vector< std::pair< std::string, RelAlgDagNode const * > > nodes
virtual void clear()
virtual void accept(Visitor &v, std::string name) const =0
const RelAlgNode * getSourceNode() const
Definition: RelAlgDag.h:1056
bool innerVisit(T t, std::string name, bool recurse=true)
virtual bool visitAny(RelAlgDagNode const *n, std::string s) override
bool visit(RexFunctionOperator const *n, std::string s) override
void setIdInPlanTree(size_t id) const
Definition: RelAlgDag.h:135
bool visit(RelFilter const *n, std::string s) override
#define CHECK(condition)
Definition: Logger.h:291
bool visit(RelTranslatedJoin const *n, std::string s) override
bool visit(RexRef const *n, std::string s) override
string name
Definition: setup.in.py:72
constexpr double n
Definition: Utm.h:38
void visitChild(T child, std::string name)
bool visit(RelAggregate const *n, std::string s) override
bool visit(RexAgg const *n, std::string s) override
bool visit(RexAbstractInput const *n, std::string s) override
bool visit(RexOperator const *n, std::string s) override
void beginNextLine(std::optional< size_t > step={})
virtual std::string str() const
std::vector< RelAlgDagNode const * > nodes
bool visit(RelSort const *n, std::string s) override