OmniSciDB  c07336695a
RelAlgAbstractInterpreter.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2017 MapD Technologies, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
18 #include "../Shared/sqldefs.h"
20 #include "JsonAccessors.h"
21 #include "RelAlgExecutor.h"
22 #include "RelAlgOptimizer.h"
23 #include "RelLeftDeepInnerJoin.h"
25 #include "RexVisitor.h"
26 
27 #include <rapidjson/stringbuffer.h>
28 #include <rapidjson/writer.h>
29 
30 #include <string>
31 #include <unordered_set>
32 
33 namespace {
34 
35 const unsigned FIRST_RA_NODE_ID = 1;
36 
37 } // namespace
38 
39 thread_local unsigned RelAlgNode::crt_id_ = FIRST_RA_NODE_ID;
40 
43 }
44 
46  const std::shared_ptr<const ExecutionResult> result) {
47  auto row_set = result->getRows();
48  CHECK(row_set);
49  CHECK_EQ(size_t(1), row_set->colCount());
50  *(type_.get()) = row_set->getColType(0);
51  (*(result_.get())) = result;
52 }
53 
54 std::unique_ptr<RexSubQuery> RexSubQuery::deepCopy() const {
55  return std::make_unique<RexSubQuery>(type_, result_, ra_->deepCopy());
56 }
57 
58 namespace {
59 
60 class RexRebindInputsVisitor : public RexVisitor<void*> {
61  public:
62  RexRebindInputsVisitor(const RelAlgNode* old_input, const RelAlgNode* new_input)
63  : old_input_(old_input), new_input_(new_input) {}
64 
65  void* visitInput(const RexInput* rex_input) const override {
66  const auto old_source = rex_input->getSourceNode();
67  if (old_source == old_input_) {
68  const auto left_deep_join = dynamic_cast<const RelLeftDeepInnerJoin*>(new_input_);
69  if (left_deep_join) {
70  rebind_inputs_from_left_deep_join(rex_input, left_deep_join);
71  return nullptr;
72  }
73  rex_input->setSourceNode(new_input_);
74  }
75  return nullptr;
76  };
77 
78  private:
79  const RelAlgNode* old_input_;
81 };
82 
83 // Creates an output with n columns.
84 std::vector<RexInput> n_outputs(const RelAlgNode* node, const size_t n) {
85  std::vector<RexInput> outputs;
86  for (size_t i = 0; i < n; ++i) {
87  outputs.emplace_back(node, i);
88  }
89  return outputs;
90 }
91 
92 } // namespace
93 
94 void RelProject::replaceInput(std::shared_ptr<const RelAlgNode> old_input,
95  std::shared_ptr<const RelAlgNode> input) {
96  RelAlgNode::replaceInput(old_input, input);
97  RexRebindInputsVisitor rebind_inputs(old_input.get(), input.get());
98  for (const auto& scalar_expr : scalar_exprs_) {
99  rebind_inputs.visit(scalar_expr.get());
100  }
101 }
102 
103 void RelProject::appendInput(std::string new_field_name,
104  std::unique_ptr<const RexScalar> new_input) {
105  fields_.emplace_back(std::move(new_field_name));
106  scalar_exprs_.emplace_back(std::move(new_input));
107 }
108 
110  RANodeOutput outputs;
111  const auto scan_node = dynamic_cast<const RelScan*>(ra_node);
112  if (scan_node) {
113  // Scan node has no inputs, output contains all columns in the table.
114  CHECK_EQ(size_t(0), scan_node->inputCount());
115  return n_outputs(scan_node, scan_node->size());
116  }
117  const auto project_node = dynamic_cast<const RelProject*>(ra_node);
118  if (project_node) {
119  // Project output count doesn't depend on the input
120  CHECK_EQ(size_t(1), project_node->inputCount());
121  return n_outputs(project_node, project_node->size());
122  }
123  const auto filter_node = dynamic_cast<const RelFilter*>(ra_node);
124  if (filter_node) {
125  // Filter preserves shape
126  CHECK_EQ(size_t(1), filter_node->inputCount());
127  const auto prev_out = get_node_output(filter_node->getInput(0));
128  return n_outputs(filter_node, prev_out.size());
129  }
130  const auto aggregate_node = dynamic_cast<const RelAggregate*>(ra_node);
131  if (aggregate_node) {
132  // Aggregate output count doesn't depend on the input
133  CHECK_EQ(size_t(1), aggregate_node->inputCount());
134  return n_outputs(aggregate_node, aggregate_node->size());
135  }
136  const auto compound_node = dynamic_cast<const RelCompound*>(ra_node);
137  if (compound_node) {
138  // Compound output count doesn't depend on the input
139  CHECK_EQ(size_t(1), compound_node->inputCount());
140  return n_outputs(compound_node, compound_node->size());
141  }
142  const auto join_node = dynamic_cast<const RelJoin*>(ra_node);
143  if (join_node) {
144  // Join concatenates the outputs from the inputs and the output
145  // directly references the nodes in the input.
146  CHECK_EQ(size_t(2), join_node->inputCount());
147  auto lhs_out =
148  n_outputs(join_node->getInput(0), get_node_output(join_node->getInput(0)).size());
149  const auto rhs_out =
150  n_outputs(join_node->getInput(1), get_node_output(join_node->getInput(1)).size());
151  lhs_out.insert(lhs_out.end(), rhs_out.begin(), rhs_out.end());
152  return lhs_out;
153  }
154  const auto sort_node = dynamic_cast<const RelSort*>(ra_node);
155  if (sort_node) {
156  // Sort preserves shape
157  CHECK_EQ(size_t(1), sort_node->inputCount());
158  const auto prev_out = get_node_output(sort_node->getInput(0));
159  return n_outputs(sort_node, prev_out.size());
160  }
161  const auto logical_values_node = dynamic_cast<const RelLogicalValues*>(ra_node);
162  if (logical_values_node) {
163  CHECK_EQ(size_t(0), logical_values_node->inputCount());
164  return n_outputs(logical_values_node, logical_values_node->size());
165  }
166  CHECK(false);
167  return outputs;
168 }
169 
171  if (!isSimple()) {
172  return false;
173  }
174  CHECK_EQ(size_t(1), inputCount());
175  const auto source = getInput(0);
176  if (dynamic_cast<const RelJoin*>(source)) {
177  return false;
178  }
179  const auto source_shape = get_node_output(source);
180  if (source_shape.size() != scalar_exprs_.size()) {
181  return false;
182  }
183  for (size_t i = 0; i < scalar_exprs_.size(); ++i) {
184  const auto& scalar_expr = scalar_exprs_[i];
185  const auto input = dynamic_cast<const RexInput*>(scalar_expr.get());
186  CHECK(input);
187  CHECK_EQ(source, input->getSourceNode());
188  // We should add the additional check that input->getIndex() !=
189  // source_shape[i].getIndex(), but Calcite doesn't generate the right
190  // Sort-Project-Sort sequence when joins are involved.
191  if (input->getSourceNode() != source_shape[i].getSourceNode()) {
192  return false;
193  }
194  }
195  return true;
196 }
197 
198 namespace {
199 
200 bool isRenamedInput(const RelAlgNode* node,
201  const size_t index,
202  const std::string& new_name) {
203  CHECK_LT(index, node->size());
204  if (auto join = dynamic_cast<const RelJoin*>(node)) {
205  CHECK_EQ(size_t(2), join->inputCount());
206  const auto lhs_size = join->getInput(0)->size();
207  if (index < lhs_size) {
208  return isRenamedInput(join->getInput(0), index, new_name);
209  }
210  CHECK_GE(index, lhs_size);
211  return isRenamedInput(join->getInput(1), index - lhs_size, new_name);
212  }
213 
214  if (auto scan = dynamic_cast<const RelScan*>(node)) {
215  return new_name != scan->getFieldName(index);
216  }
217 
218  if (auto aggregate = dynamic_cast<const RelAggregate*>(node)) {
219  return new_name != aggregate->getFieldName(index);
220  }
221 
222  if (auto project = dynamic_cast<const RelProject*>(node)) {
223  return new_name != project->getFieldName(index);
224  }
225 
226  if (auto logical_values = dynamic_cast<const RelLogicalValues*>(node)) {
227  const auto& tuple_type = logical_values->getTupleType();
228  CHECK_LT(index, tuple_type.size());
229  return new_name != tuple_type[index].get_resname();
230  }
231 
232  CHECK(dynamic_cast<const RelSort*>(node) || dynamic_cast<const RelFilter*>(node));
233  return isRenamedInput(node->getInput(0), index, new_name);
234 }
235 
236 } // namespace
237 
239  if (!isSimple()) {
240  return false;
241  }
242  CHECK_EQ(scalar_exprs_.size(), fields_.size());
243  for (size_t i = 0; i < fields_.size(); ++i) {
244  auto rex_in = dynamic_cast<const RexInput*>(scalar_exprs_[i].get());
245  CHECK(rex_in);
246  if (isRenamedInput(rex_in->getSourceNode(), rex_in->getIndex(), fields_[i])) {
247  return true;
248  }
249  }
250  return false;
251 }
252 
253 void RelJoin::replaceInput(std::shared_ptr<const RelAlgNode> old_input,
254  std::shared_ptr<const RelAlgNode> input) {
255  RelAlgNode::replaceInput(old_input, input);
256  RexRebindInputsVisitor rebind_inputs(old_input.get(), input.get());
257  if (condition_) {
258  rebind_inputs.visit(condition_.get());
259  }
260 }
261 
262 void RelFilter::replaceInput(std::shared_ptr<const RelAlgNode> old_input,
263  std::shared_ptr<const RelAlgNode> input) {
264  RelAlgNode::replaceInput(old_input, input);
265  RexRebindInputsVisitor rebind_inputs(old_input.get(), input.get());
266  rebind_inputs.visit(filter_.get());
267 }
268 
269 void RelCompound::replaceInput(std::shared_ptr<const RelAlgNode> old_input,
270  std::shared_ptr<const RelAlgNode> input) {
271  RelAlgNode::replaceInput(old_input, input);
272  RexRebindInputsVisitor rebind_inputs(old_input.get(), input.get());
273  for (const auto& scalar_source : scalar_sources_) {
274  rebind_inputs.visit(scalar_source.get());
275  }
276  if (filter_expr_) {
277  rebind_inputs.visit(filter_expr_.get());
278  }
279 }
280 
281 std::shared_ptr<RelAlgNode> RelProject::deepCopy() const {
282  RexDeepCopyVisitor copier;
283  std::vector<std::unique_ptr<const RexScalar>> exprs_copy;
284  for (auto& expr : scalar_exprs_) {
285  exprs_copy.push_back(copier.visit(expr.get()));
286  }
287  return std::make_shared<RelProject>(exprs_copy, fields_, inputs_[0]);
288 }
289 
290 std::shared_ptr<RelAlgNode> RelFilter::deepCopy() const {
291  RexDeepCopyVisitor copier;
292  auto filter_copy = copier.visit(filter_.get());
293  return std::make_shared<RelFilter>(filter_copy, inputs_[0]);
294 }
295 
296 std::shared_ptr<RelAlgNode> RelAggregate::deepCopy() const {
297  std::vector<std::unique_ptr<const RexAgg>> aggs_copy;
298  for (auto& agg : agg_exprs_) {
299  auto copy = agg->deepCopy();
300  aggs_copy.push_back(std::move(copy));
301  }
302  return std::make_shared<RelAggregate>(groupby_count_, aggs_copy, fields_, inputs_[0]);
303 }
304 
305 std::shared_ptr<RelAlgNode> RelJoin::deepCopy() const {
306  RexDeepCopyVisitor copier;
307  auto condition_copy = copier.visit(condition_.get());
308  return std::make_shared<RelJoin>(inputs_[0], inputs_[1], condition_copy, join_type_);
309 }
310 
311 std::shared_ptr<RelAlgNode> RelCompound::deepCopy() const {
312  RexDeepCopyVisitor copier;
313  auto filter_copy = filter_expr_ ? copier.visit(filter_expr_.get()) : nullptr;
314  std::unordered_map<const Rex*, const Rex*> old_to_new_target;
315  std::vector<const RexAgg*> aggs_copy;
316  for (auto& agg : agg_exprs_) {
317  auto copy = agg->deepCopy();
318  old_to_new_target.insert(std::make_pair(agg.get(), copy.get()));
319  aggs_copy.push_back(copy.release());
320  }
321  std::vector<std::unique_ptr<const RexScalar>> sources_copy;
322  for (size_t i = 0; i < scalar_sources_.size(); ++i) {
323  auto copy = copier.visit(scalar_sources_[i].get());
324  old_to_new_target.insert(std::make_pair(scalar_sources_[i].get(), copy.get()));
325  sources_copy.push_back(std::move(copy));
326  }
327  std::vector<const Rex*> target_exprs_copy;
328  for (auto target : target_exprs_) {
329  auto target_it = old_to_new_target.find(target);
330  CHECK(target_it != old_to_new_target.end());
331  target_exprs_copy.push_back(target_it->second);
332  }
333  auto new_compound = std::make_shared<RelCompound>(filter_copy,
334  target_exprs_copy,
335  groupby_count_,
336  aggs_copy,
337  fields_,
338  sources_copy,
339  is_agg_);
340  new_compound->addManagedInput(inputs_[0]);
341  return new_compound;
342 }
343 
344 std::shared_ptr<RelAlgNode> RelSort::deepCopy() const {
345  return std::make_shared<RelSort>(collation_, limit_, offset_, inputs_[0]);
346 }
347 
348 namespace std {
349 template <>
350 struct hash<std::pair<const RelAlgNode*, int>> {
351  size_t operator()(const std::pair<const RelAlgNode*, int>& input_col) const {
352  auto ptr_val = reinterpret_cast<const int64_t*>(&input_col.first);
353  return static_cast<int64_t>(*ptr_val) ^ input_col.second;
354  }
355 };
356 } // namespace std
357 
358 namespace {
359 
360 std::set<std::pair<const RelAlgNode*, int>> get_equiv_cols(const RelAlgNode* node,
361  const size_t which_col) {
362  std::set<std::pair<const RelAlgNode*, int>> work_set;
363  auto walker = node;
364  auto curr_col = which_col;
365  while (true) {
366  work_set.insert(std::make_pair(walker, curr_col));
367  if (dynamic_cast<const RelScan*>(walker) || dynamic_cast<const RelJoin*>(walker)) {
368  break;
369  }
370  CHECK_EQ(size_t(1), walker->inputCount());
371  auto only_source = walker->getInput(0);
372  if (auto project = dynamic_cast<const RelProject*>(walker)) {
373  if (auto input = dynamic_cast<const RexInput*>(project->getProjectAt(curr_col))) {
374  const auto join_source = dynamic_cast<const RelJoin*>(only_source);
375  if (join_source) {
376  CHECK_EQ(size_t(2), join_source->inputCount());
377  auto lhs = join_source->getInput(0);
378  CHECK((input->getIndex() < lhs->size() && lhs == input->getSourceNode()) ||
379  join_source->getInput(1) == input->getSourceNode());
380  } else {
381  CHECK_EQ(input->getSourceNode(), only_source);
382  }
383  curr_col = input->getIndex();
384  } else {
385  break;
386  }
387  } else if (auto aggregate = dynamic_cast<const RelAggregate*>(walker)) {
388  if (curr_col >= aggregate->getGroupByCount()) {
389  break;
390  }
391  }
392  walker = only_source;
393  }
394  return work_set;
395 }
396 
397 } // namespace
398 
399 bool RelSort::hasEquivCollationOf(const RelSort& that) const {
400  if (collation_.size() != that.collation_.size()) {
401  return false;
402  }
403 
404  for (size_t i = 0, e = collation_.size(); i < e; ++i) {
405  auto this_sort_key = collation_[i];
406  auto that_sort_key = that.collation_[i];
407  if (this_sort_key.getSortDir() != that_sort_key.getSortDir()) {
408  return false;
409  }
410  if (this_sort_key.getNullsPosition() != that_sort_key.getNullsPosition()) {
411  return false;
412  }
413  auto this_equiv_keys = get_equiv_cols(this, this_sort_key.getField());
414  auto that_equiv_keys = get_equiv_cols(&that, that_sort_key.getField());
415  std::vector<std::pair<const RelAlgNode*, int>> intersect;
416  std::set_intersection(this_equiv_keys.begin(),
417  this_equiv_keys.end(),
418  that_equiv_keys.begin(),
419  that_equiv_keys.end(),
420  std::back_inserter(intersect));
421  if (intersect.empty()) {
422  return false;
423  }
424  }
425  return true;
426 }
427 
428 namespace {
429 
430 unsigned node_id(const rapidjson::Value& ra_node) noexcept {
431  const auto& id = field(ra_node, "id");
432  return std::stoi(json_str(id));
433 }
434 
435 // The parse_* functions below de-serialize expressions as they come from Calcite.
436 // RelAlgAbstractInterpreter will take care of making the representation easy to
437 // navigate for lower layers, for example by replacing RexAbstractInput with RexInput.
438 
439 std::unique_ptr<RexAbstractInput> parse_abstract_input(
440  const rapidjson::Value& expr) noexcept {
441  const auto& input = field(expr, "input");
442  return std::unique_ptr<RexAbstractInput>(new RexAbstractInput(json_i64(input)));
443 }
444 
445 std::unique_ptr<RexLiteral> parse_literal(const rapidjson::Value& expr) {
446  CHECK(expr.IsObject());
447  const auto& literal = field(expr, "literal");
448  const auto type = to_sql_type(json_str(field(expr, "type")));
449  const auto target_type = to_sql_type(json_str(field(expr, "target_type")));
450  const auto scale = json_i64(field(expr, "scale"));
451  const auto precision = json_i64(field(expr, "precision"));
452  const auto type_scale = json_i64(field(expr, "type_scale"));
453  const auto type_precision = json_i64(field(expr, "type_precision"));
454  switch (type) {
455  case kDECIMAL:
456  case kINTERVAL_DAY_TIME:
458  case kTIME:
459  case kTIMESTAMP:
460  case kDATE:
461  return std::unique_ptr<RexLiteral>(new RexLiteral(json_i64(literal),
462  type,
463  target_type,
464  scale,
465  precision,
466  type_scale,
467  type_precision));
468  case kDOUBLE: {
469  if (literal.IsDouble()) {
470  return std::unique_ptr<RexLiteral>(new RexLiteral(json_double(literal),
471  type,
472  target_type,
473  scale,
474  precision,
475  type_scale,
476  type_precision));
477  }
478  CHECK(literal.IsInt64());
479  return std::unique_ptr<RexLiteral>(
480  new RexLiteral(static_cast<double>(json_i64(literal)),
481  type,
482  target_type,
483  scale,
484  precision,
485  type_scale,
486  type_precision));
487  }
488  case kTEXT:
489  return std::unique_ptr<RexLiteral>(new RexLiteral(json_str(literal),
490  type,
491  target_type,
492  scale,
493  precision,
494  type_scale,
495  type_precision));
496  case kBOOLEAN:
497  return std::unique_ptr<RexLiteral>(new RexLiteral(json_bool(literal),
498  type,
499  target_type,
500  scale,
501  precision,
502  type_scale,
503  type_precision));
504  case kNULLT:
505  return std::unique_ptr<RexLiteral>(new RexLiteral(target_type));
506  default:
507  CHECK(false);
508  }
509  CHECK(false);
510  return nullptr;
511 }
512 
513 std::unique_ptr<const RexScalar> parse_scalar_expr(const rapidjson::Value& expr,
514  const Catalog_Namespace::Catalog& cat,
515  RelAlgExecutor* ra_executor);
516 
517 std::unique_ptr<const RexSubQuery> parse_subquery(const rapidjson::Value& expr,
518  const Catalog_Namespace::Catalog& cat,
519  RelAlgExecutor* ra_executor);
520 
521 SQLTypeInfo parse_type(const rapidjson::Value& type_obj) {
522  CHECK(type_obj.IsObject() &&
523  (type_obj.MemberCount() >= 2 && type_obj.MemberCount() <= 4));
524  const auto type = to_sql_type(json_str(field(type_obj, "type")));
525  const auto nullable = json_bool(field(type_obj, "nullable"));
526  const auto precision_it = type_obj.FindMember("precision");
527  const int precision =
528  precision_it != type_obj.MemberEnd() ? json_i64(precision_it->value) : 0;
529  const auto scale_it = type_obj.FindMember("scale");
530  const int scale = scale_it != type_obj.MemberEnd() ? json_i64(scale_it->value) : 0;
531  SQLTypeInfo ti(type, !nullable);
532  ti.set_precision(precision);
533  ti.set_scale(scale);
534  return ti;
535 }
536 
537 std::vector<std::unique_ptr<const RexScalar>> parse_expr_array(
538  const rapidjson::Value& arr,
539  const Catalog_Namespace::Catalog& cat,
540  RelAlgExecutor* ra_executor) {
541  std::vector<std::unique_ptr<const RexScalar>> exprs;
542  for (auto it = arr.Begin(); it != arr.End(); ++it) {
543  exprs.emplace_back(parse_scalar_expr(*it, cat, ra_executor));
544  }
545  return exprs;
546 }
547 
549  if (name == "ROW_NUMBER") {
551  }
552  if (name == "RANK") {
554  }
555  if (name == "DENSE_RANK") {
557  }
558  if (name == "PERCENT_RANK") {
560  }
561  if (name == "CUME_DIST") {
563  }
564  if (name == "NTILE") {
566  }
567  if (name == "LAG") {
569  }
570  if (name == "LEAD") {
572  }
573  if (name == "FIRST_VALUE") {
575  }
576  if (name == "LAST_VALUE") {
578  }
579  if (name == "AVG") {
581  }
582  if (name == "MIN") {
584  }
585  if (name == "MAX") {
587  }
588  if (name == "SUM") {
590  }
591  if (name == "COUNT") {
593  }
594  if (name == "$SUM0") {
596  }
597  throw std::runtime_error("Unsupported window function: " + name);
598 }
599 
600 std::vector<std::unique_ptr<const RexScalar>> parse_window_order_exprs(
601  const rapidjson::Value& arr,
602  const Catalog_Namespace::Catalog& cat,
603  RelAlgExecutor* ra_executor) {
604  std::vector<std::unique_ptr<const RexScalar>> exprs;
605  for (auto it = arr.Begin(); it != arr.End(); ++it) {
606  exprs.emplace_back(parse_scalar_expr(field(*it, "field"), cat, ra_executor));
607  }
608  return exprs;
609 }
610 
611 SortDirection parse_sort_direction(const rapidjson::Value& collation) {
612  return json_str(field(collation, "direction")) == std::string("DESCENDING")
615 }
616 
617 NullSortedPosition parse_nulls_position(const rapidjson::Value& collation) {
618  return json_str(field(collation, "nulls")) == std::string("FIRST")
621 }
622 
623 std::vector<SortField> parse_window_order_collation(const rapidjson::Value& arr,
624  const Catalog_Namespace::Catalog& cat,
625  RelAlgExecutor* ra_executor) {
626  std::vector<SortField> collation;
627  size_t field_idx = 0;
628  for (auto it = arr.Begin(); it != arr.End(); ++it, ++field_idx) {
629  const auto sort_dir = parse_sort_direction(*it);
630  const auto null_pos = parse_nulls_position(*it);
631  collation.emplace_back(field_idx, sort_dir, null_pos);
632  }
633  return collation;
634 }
635 
637  const rapidjson::Value& window_bound_obj,
638  const Catalog_Namespace::Catalog& cat,
639  RelAlgExecutor* ra_executor) {
640  CHECK(window_bound_obj.IsObject());
642  window_bound.unbounded = json_bool(field(window_bound_obj, "unbounded"));
643  window_bound.preceding = json_bool(field(window_bound_obj, "preceding"));
644  window_bound.following = json_bool(field(window_bound_obj, "following"));
645  window_bound.is_current_row = json_bool(field(window_bound_obj, "is_current_row"));
646  const auto& offset_field = field(window_bound_obj, "offset");
647  if (offset_field.IsObject()) {
648  window_bound.offset = parse_scalar_expr(offset_field, cat, ra_executor);
649  } else {
650  CHECK(offset_field.IsNull());
651  }
652  window_bound.order_key = json_i64(field(window_bound_obj, "order_key"));
653  return window_bound;
654 }
655 
656 std::unique_ptr<RexOperator> parse_operator(const rapidjson::Value& expr,
657  const Catalog_Namespace::Catalog& cat,
658  RelAlgExecutor* ra_executor) {
659  const auto op_name = json_str(field(expr, "op"));
660  const bool is_quantifier =
661  op_name == std::string("PG_ANY") || op_name == std::string("PG_ALL");
662  const auto op = is_quantifier ? kFUNCTION : to_sql_op(op_name);
663  const auto& operators_json_arr = field(expr, "operands");
664  CHECK(operators_json_arr.IsArray());
665  auto operands = parse_expr_array(operators_json_arr, cat, ra_executor);
666  const auto type_it = expr.FindMember("type");
667  CHECK(type_it != expr.MemberEnd());
668  auto ti = parse_type(type_it->value);
669  if (op == kIN && expr.HasMember("subquery")) {
670  auto subquery = parse_subquery(expr, cat, ra_executor);
671  operands.emplace_back(std::move(subquery));
672  }
673  if (expr.FindMember("partition_keys") != expr.MemberEnd()) {
674  const auto& partition_keys_arr = field(expr, "partition_keys");
675  auto partition_keys = parse_expr_array(partition_keys_arr, cat, ra_executor);
676  const auto& order_keys_arr = field(expr, "order_keys");
677  auto order_keys = parse_window_order_exprs(order_keys_arr, cat, ra_executor);
678  const auto collation = parse_window_order_collation(order_keys_arr, cat, ra_executor);
679  const auto kind = parse_window_function_kind(op_name);
680  const auto lower_bound =
681  parse_window_bound(field(expr, "lower_bound"), cat, ra_executor);
682  const auto upper_bound =
683  parse_window_bound(field(expr, "upper_bound"), cat, ra_executor);
684  bool is_rows = json_bool(field(expr, "is_rows"));
685  ti.set_notnull(false);
686  return std::make_unique<RexWindowFunctionOperator>(kind,
687  operands,
688  partition_keys,
689  order_keys,
690  collation,
691  lower_bound,
692  upper_bound,
693  is_rows,
694  ti);
695  }
696  return std::unique_ptr<RexOperator>(op == kFUNCTION
697  ? new RexFunctionOperator(op_name, operands, ti)
698  : new RexOperator(op, operands, ti));
699 }
700 
701 std::unique_ptr<RexCase> parse_case(const rapidjson::Value& expr,
702  const Catalog_Namespace::Catalog& cat,
703  RelAlgExecutor* ra_executor) {
704  const auto& operands = field(expr, "operands");
705  CHECK(operands.IsArray());
706  CHECK_GE(operands.Size(), unsigned(2));
707  std::unique_ptr<const RexScalar> else_expr;
708  std::vector<
709  std::pair<std::unique_ptr<const RexScalar>, std::unique_ptr<const RexScalar>>>
710  expr_pair_list;
711  for (auto operands_it = operands.Begin(); operands_it != operands.End();) {
712  auto when_expr = parse_scalar_expr(*operands_it++, cat, ra_executor);
713  if (operands_it == operands.End()) {
714  else_expr = std::move(when_expr);
715  break;
716  }
717  auto then_expr = parse_scalar_expr(*operands_it++, cat, ra_executor);
718  expr_pair_list.emplace_back(std::move(when_expr), std::move(then_expr));
719  }
720  return std::unique_ptr<RexCase>(new RexCase(expr_pair_list, else_expr));
721 }
722 
723 std::vector<std::string> strings_from_json_array(
724  const rapidjson::Value& json_str_arr) noexcept {
725  CHECK(json_str_arr.IsArray());
726  std::vector<std::string> fields;
727  for (auto json_str_arr_it = json_str_arr.Begin(); json_str_arr_it != json_str_arr.End();
728  ++json_str_arr_it) {
729  CHECK(json_str_arr_it->IsString());
730  fields.emplace_back(json_str_arr_it->GetString());
731  }
732  return fields;
733 }
734 
735 std::vector<size_t> indices_from_json_array(
736  const rapidjson::Value& json_idx_arr) noexcept {
737  CHECK(json_idx_arr.IsArray());
738  std::vector<size_t> indices;
739  for (auto json_idx_arr_it = json_idx_arr.Begin(); json_idx_arr_it != json_idx_arr.End();
740  ++json_idx_arr_it) {
741  CHECK(json_idx_arr_it->IsInt());
742  CHECK_GE(json_idx_arr_it->GetInt(), 0);
743  indices.emplace_back(json_idx_arr_it->GetInt());
744  }
745  return indices;
746 }
747 
748 std::string json_node_to_string(const rapidjson::Value& node) noexcept {
749  rapidjson::StringBuffer buffer;
750  rapidjson::Writer<rapidjson::StringBuffer> writer(buffer);
751  node.Accept(writer);
752  return buffer.GetString();
753 }
754 
755 std::unique_ptr<const RexAgg> parse_aggregate_expr(const rapidjson::Value& expr) {
756  const auto agg = to_agg_kind(json_str(field(expr, "agg")));
757  const auto distinct = json_bool(field(expr, "distinct"));
758  const auto agg_ti = parse_type(field(expr, "type"));
759  const auto operands = indices_from_json_array(field(expr, "operands"));
760  if (operands.size() > 1 && (operands.size() != 2 || agg != kAPPROX_COUNT_DISTINCT)) {
761  throw QueryNotSupported("Multiple arguments for aggregates aren't supported");
762  }
763  return std::unique_ptr<const RexAgg>(new RexAgg(agg, distinct, agg_ti, operands));
764 }
765 
766 std::unique_ptr<const RexScalar> parse_scalar_expr(const rapidjson::Value& expr,
767  const Catalog_Namespace::Catalog& cat,
768  RelAlgExecutor* ra_executor) {
769  CHECK(expr.IsObject());
770  if (expr.IsObject() && expr.HasMember("input")) {
771  return std::unique_ptr<const RexScalar>(parse_abstract_input(expr));
772  }
773  if (expr.IsObject() && expr.HasMember("literal")) {
774  return std::unique_ptr<const RexScalar>(parse_literal(expr));
775  }
776  if (expr.IsObject() && expr.HasMember("op")) {
777  const auto op_str = json_str(field(expr, "op"));
778  if (op_str == std::string("CASE")) {
779  return std::unique_ptr<const RexScalar>(parse_case(expr, cat, ra_executor));
780  }
781  if (op_str == std::string("$SCALAR_QUERY")) {
782  return std::unique_ptr<const RexScalar>(parse_subquery(expr, cat, ra_executor));
783  }
784  return std::unique_ptr<const RexScalar>(parse_operator(expr, cat, ra_executor));
785  }
786  throw QueryNotSupported("Expression node " + json_node_to_string(expr) +
787  " not supported");
788 }
789 
790 JoinType to_join_type(const std::string& join_type_name) {
791  if (join_type_name == "inner") {
792  return JoinType::INNER;
793  }
794  if (join_type_name == "left") {
795  return JoinType::LEFT;
796  }
797  throw QueryNotSupported("Join type (" + join_type_name + ") not supported");
798 }
799 
800 std::unique_ptr<const RexScalar> disambiguate_rex(const RexScalar*, const RANodeOutput&);
801 
802 std::unique_ptr<const RexOperator> disambiguate_operator(
803  const RexOperator* rex_operator,
804  const RANodeOutput& ra_output) noexcept {
805  std::vector<std::unique_ptr<const RexScalar>> disambiguated_operands;
806  for (size_t i = 0; i < rex_operator->size(); ++i) {
807  auto operand = rex_operator->getOperand(i);
808  if (dynamic_cast<const RexSubQuery*>(operand)) {
809  disambiguated_operands.emplace_back(rex_operator->getOperandAndRelease(i));
810  } else {
811  disambiguated_operands.emplace_back(disambiguate_rex(operand, ra_output));
812  }
813  }
814  const auto rex_window_function_operator =
815  dynamic_cast<const RexWindowFunctionOperator*>(rex_operator);
816  if (rex_window_function_operator) {
817  const auto& partition_keys = rex_window_function_operator->getPartitionKeys();
818  std::vector<std::unique_ptr<const RexScalar>> disambiguated_partition_keys;
819  for (const auto& partition_key : partition_keys) {
820  disambiguated_partition_keys.emplace_back(
821  disambiguate_rex(partition_key.get(), ra_output));
822  }
823  std::vector<std::unique_ptr<const RexScalar>> disambiguated_order_keys;
824  const auto& order_keys = rex_window_function_operator->getOrderKeys();
825  for (const auto& order_key : order_keys) {
826  disambiguated_order_keys.emplace_back(disambiguate_rex(order_key.get(), ra_output));
827  }
828  return rex_window_function_operator->disambiguatedOperands(
829  disambiguated_operands,
830  disambiguated_partition_keys,
831  disambiguated_order_keys,
832  rex_window_function_operator->getCollation());
833  }
834  return rex_operator->getDisambiguated(disambiguated_operands);
835 }
836 
837 std::unique_ptr<const RexCase> disambiguate_case(const RexCase* rex_case,
838  const RANodeOutput& ra_output) {
839  std::vector<
840  std::pair<std::unique_ptr<const RexScalar>, std::unique_ptr<const RexScalar>>>
841  disambiguated_expr_pair_list;
842  for (size_t i = 0; i < rex_case->branchCount(); ++i) {
843  auto disambiguated_when = disambiguate_rex(rex_case->getWhen(i), ra_output);
844  auto disambiguated_then = disambiguate_rex(rex_case->getThen(i), ra_output);
845  disambiguated_expr_pair_list.emplace_back(std::move(disambiguated_when),
846  std::move(disambiguated_then));
847  }
848  std::unique_ptr<const RexScalar> disambiguated_else{
849  disambiguate_rex(rex_case->getElse(), ra_output)};
850  return std::unique_ptr<const RexCase>(
851  new RexCase(disambiguated_expr_pair_list, disambiguated_else));
852 }
853 
854 // The inputs used by scalar expressions are given as indices in the serialized
855 // representation of the query. This is hard to navigate; make the relationship
856 // explicit by creating RexInput expressions which hold a pointer to the source
857 // relational algebra node and the index relative to the output of that node.
858 std::unique_ptr<const RexScalar> disambiguate_rex(const RexScalar* rex_scalar,
859  const RANodeOutput& ra_output) {
860  const auto rex_abstract_input = dynamic_cast<const RexAbstractInput*>(rex_scalar);
861  if (rex_abstract_input) {
862  CHECK_LT(static_cast<size_t>(rex_abstract_input->getIndex()), ra_output.size());
863  return std::unique_ptr<const RexInput>(
864  new RexInput(ra_output[rex_abstract_input->getIndex()]));
865  }
866  const auto rex_operator = dynamic_cast<const RexOperator*>(rex_scalar);
867  if (rex_operator) {
868  return disambiguate_operator(rex_operator, ra_output);
869  }
870  const auto rex_case = dynamic_cast<const RexCase*>(rex_scalar);
871  if (rex_case) {
872  return disambiguate_case(rex_case, ra_output);
873  }
874  const auto rex_literal = dynamic_cast<const RexLiteral*>(rex_scalar);
875  CHECK(rex_literal);
876  return std::unique_ptr<const RexLiteral>(new RexLiteral(*rex_literal));
877 }
878 
879 void bind_project_to_input(RelProject* project_node, const RANodeOutput& input) noexcept {
880  CHECK_EQ(size_t(1), project_node->inputCount());
881  std::vector<std::unique_ptr<const RexScalar>> disambiguated_exprs;
882  for (size_t i = 0; i < project_node->size(); ++i) {
883  const auto projected_expr = project_node->getProjectAt(i);
884  if (dynamic_cast<const RexSubQuery*>(projected_expr)) {
885  disambiguated_exprs.emplace_back(project_node->getProjectAtAndRelease(i));
886  } else {
887  disambiguated_exprs.emplace_back(disambiguate_rex(projected_expr, input));
888  }
889  }
890  project_node->setExpressions(disambiguated_exprs);
891 }
892 
893 void bind_inputs(const std::vector<std::shared_ptr<RelAlgNode>>& nodes) noexcept {
894  for (auto ra_node : nodes) {
895  const auto filter_node = std::dynamic_pointer_cast<RelFilter>(ra_node);
896  if (filter_node) {
897  CHECK_EQ(size_t(1), filter_node->inputCount());
898  auto disambiguated_condition = disambiguate_rex(
899  filter_node->getCondition(), get_node_output(filter_node->getInput(0)));
900  filter_node->setCondition(disambiguated_condition);
901  continue;
902  }
903  const auto join_node = std::dynamic_pointer_cast<RelJoin>(ra_node);
904  if (join_node) {
905  CHECK_EQ(size_t(2), join_node->inputCount());
906  auto disambiguated_condition =
907  disambiguate_rex(join_node->getCondition(), get_node_output(join_node.get()));
908  join_node->setCondition(disambiguated_condition);
909  continue;
910  }
911  const auto project_node = std::dynamic_pointer_cast<RelProject>(ra_node);
912  if (project_node) {
913  bind_project_to_input(project_node.get(),
914  get_node_output(project_node->getInput(0)));
915  continue;
916  }
917  }
918 }
919 
920 void mark_nops(const std::vector<std::shared_ptr<RelAlgNode>>& nodes) noexcept {
921  for (auto node : nodes) {
922  const auto agg_node = std::dynamic_pointer_cast<RelAggregate>(node);
923  if (!agg_node || agg_node->getAggExprsCount()) {
924  continue;
925  }
926  CHECK_EQ(size_t(1), node->inputCount());
927  const auto agg_input_node = dynamic_cast<const RelAggregate*>(node->getInput(0));
928  if (agg_input_node && !agg_input_node->getAggExprsCount() &&
929  agg_node->getGroupByCount() == agg_input_node->getGroupByCount()) {
930  agg_node->markAsNop();
931  }
932  }
933 }
934 
935 namespace {
936 
937 std::vector<const Rex*> reproject_targets(
938  const RelProject* simple_project,
939  const std::vector<const Rex*>& target_exprs) noexcept {
940  std::vector<const Rex*> result;
941  for (size_t i = 0; i < simple_project->size(); ++i) {
942  const auto input_rex = dynamic_cast<const RexInput*>(simple_project->getProjectAt(i));
943  CHECK(input_rex);
944  CHECK_LT(static_cast<size_t>(input_rex->getIndex()), target_exprs.size());
945  result.push_back(target_exprs[input_rex->getIndex()]);
946  }
947  return result;
948 }
949 
956  public:
958  const RelAlgNode* node_to_keep,
959  const std::vector<std::unique_ptr<const RexScalar>>& scalar_sources)
960  : node_to_keep_(node_to_keep), scalar_sources_(scalar_sources) {}
961 
962  // Reproject the RexInput from its current RA Node to the RA Node we intend to keep
963  RetType visitInput(const RexInput* input) const final {
964  if (input->getSourceNode() == node_to_keep_) {
965  const auto index = input->getIndex();
966  CHECK_LT(index, scalar_sources_.size());
967  return visit(scalar_sources_[index].get());
968  } else {
969  return input->deepCopy();
970  }
971  }
972 
973  private:
975  const std::vector<std::unique_ptr<const RexScalar>>& scalar_sources_;
976 };
977 
978 } // namespace
979 
980 void create_compound(std::vector<std::shared_ptr<RelAlgNode>>& nodes,
981  const std::vector<size_t>& pattern) noexcept {
982  CHECK_GE(pattern.size(), size_t(2));
983  CHECK_LE(pattern.size(), size_t(4));
984 
985  std::unique_ptr<const RexScalar> filter_rex;
986  std::vector<std::unique_ptr<const RexScalar>> scalar_sources;
987  size_t groupby_count{0};
988  std::vector<std::string> fields;
989  std::vector<const RexAgg*> agg_exprs;
990  std::vector<const Rex*> target_exprs;
991  bool first_project{true};
992  bool is_agg{false};
993  RelAlgNode* last_node{nullptr};
994 
995  std::shared_ptr<ModifyManipulationTarget> manipulation_target;
996 
997  for (const auto node_idx : pattern) {
998  const auto ra_node = nodes[node_idx];
999  const auto ra_filter = std::dynamic_pointer_cast<RelFilter>(ra_node);
1000  if (ra_filter) {
1001  CHECK(!filter_rex);
1002  filter_rex.reset(ra_filter->getAndReleaseCondition());
1003  CHECK(filter_rex);
1004  last_node = ra_node.get();
1005  continue;
1006  }
1007  const auto ra_project = std::dynamic_pointer_cast<RelProject>(ra_node);
1008  if (ra_project) {
1009  fields = ra_project->getFields();
1010  manipulation_target = ra_project;
1011 
1012  if (first_project) {
1013  CHECK_EQ(size_t(1), ra_project->inputCount());
1014  // Rebind the input of the project to the input of the filter itself
1015  // since we know that we'll evaluate the filter on the fly, with no
1016  // intermediate buffer.
1017  const auto filter_input = dynamic_cast<const RelFilter*>(ra_project->getInput(0));
1018  if (filter_input) {
1019  CHECK_EQ(size_t(1), filter_input->inputCount());
1020  bind_project_to_input(ra_project.get(),
1021  get_node_output(filter_input->getInput(0)));
1022  }
1023  scalar_sources = ra_project->getExpressionsAndRelease();
1024  for (const auto& scalar_expr : scalar_sources) {
1025  target_exprs.push_back(scalar_expr.get());
1026  }
1027  first_project = false;
1028  } else {
1029  if (ra_project->isSimple()) {
1030  target_exprs = reproject_targets(ra_project.get(), target_exprs);
1031  } else {
1032  // TODO(adb): This is essentially a more general case of simple project, we
1033  // could likely merge the two
1034  std::vector<const Rex*> result;
1035  RexInputReplacementVisitor visitor(last_node, scalar_sources);
1036  for (size_t i = 0; i < ra_project->size(); ++i) {
1037  const auto rex = ra_project->getProjectAt(i);
1038  if (auto rex_input = dynamic_cast<const RexInput*>(rex)) {
1039  const auto index = rex_input->getIndex();
1040  CHECK_LT(index, target_exprs.size());
1041  result.push_back(target_exprs[index]);
1042  } else {
1043  scalar_sources.push_back(visitor.visit(rex));
1044  result.push_back(scalar_sources.back().get());
1045  }
1046  }
1047  target_exprs = result;
1048  }
1049  }
1050  last_node = ra_node.get();
1051  continue;
1052  }
1053  const auto ra_aggregate = std::dynamic_pointer_cast<RelAggregate>(ra_node);
1054  if (ra_aggregate) {
1055  is_agg = true;
1056  fields = ra_aggregate->getFields();
1057  agg_exprs = ra_aggregate->getAggregatesAndRelease();
1058  groupby_count = ra_aggregate->getGroupByCount();
1059  decltype(target_exprs){}.swap(target_exprs);
1060  CHECK_LE(groupby_count, scalar_sources.size());
1061  for (size_t group_idx = 0; group_idx < groupby_count; ++group_idx) {
1062  const auto rex_ref = new RexRef(group_idx + 1);
1063  target_exprs.push_back(rex_ref);
1064  scalar_sources.emplace_back(rex_ref);
1065  }
1066  for (const auto rex_agg : agg_exprs) {
1067  target_exprs.push_back(rex_agg);
1068  }
1069  last_node = ra_node.get();
1070  continue;
1071  }
1072  }
1073 
1074  auto compound_node =
1075  std::make_shared<RelCompound>(filter_rex,
1076  target_exprs,
1077  groupby_count,
1078  agg_exprs,
1079  fields,
1080  scalar_sources,
1081  is_agg,
1082  manipulation_target->isUpdateViaSelect(),
1083  manipulation_target->isDeleteViaSelect(),
1084  manipulation_target->isVarlenUpdateRequired(),
1085  manipulation_target->getModifiedTableDescriptor(),
1086  manipulation_target->getTargetColumns());
1087  auto old_node = nodes[pattern.back()];
1088  nodes[pattern.back()] = compound_node;
1089  auto first_node = nodes[pattern.front()];
1090  CHECK_EQ(size_t(1), first_node->inputCount());
1091  compound_node->addManagedInput(first_node->getAndOwnInput(0));
1092  for (size_t i = 0; i < pattern.size() - 1; ++i) {
1093  nodes[pattern[i]].reset();
1094  }
1095  for (auto node : nodes) {
1096  if (!node) {
1097  continue;
1098  }
1099  node->replaceInput(old_node, compound_node);
1100  }
1101 }
1102 
1103 class RANodeIterator : public std::vector<std::shared_ptr<RelAlgNode>>::const_iterator {
1104  using ElementType = std::shared_ptr<RelAlgNode>;
1105  using Super = std::vector<ElementType>::const_iterator;
1106  using Container = std::vector<ElementType>;
1107 
1108  public:
1109  enum class AdvancingMode { DUChain, InOrder };
1110 
1111  explicit RANodeIterator(const Container& nodes)
1112  : Super(nodes.begin()), owner_(nodes), nodeCount_([&nodes]() -> size_t {
1113  size_t non_zero_count = 0;
1114  for (const auto& node : nodes) {
1115  if (node) {
1116  ++non_zero_count;
1117  }
1118  }
1119  return non_zero_count;
1120  }()) {}
1121 
1122  explicit operator size_t() {
1123  return std::distance(owner_.begin(), *static_cast<Super*>(this));
1124  }
1125 
1126  RANodeIterator operator++() = delete;
1127 
1128  void advance(AdvancingMode mode) {
1129  Super& super = *this;
1130  switch (mode) {
1131  case AdvancingMode::DUChain: {
1132  size_t use_count = 0;
1133  Super only_use = owner_.end();
1134  for (Super nodeIt = std::next(super); nodeIt != owner_.end(); ++nodeIt) {
1135  if (!*nodeIt) {
1136  continue;
1137  }
1138  for (size_t i = 0; i < (*nodeIt)->inputCount(); ++i) {
1139  if ((*super) == (*nodeIt)->getAndOwnInput(i)) {
1140  ++use_count;
1141  if (1 == use_count) {
1142  only_use = nodeIt;
1143  } else {
1144  super = owner_.end();
1145  return;
1146  }
1147  }
1148  }
1149  }
1150  super = only_use;
1151  break;
1152  }
1153  case AdvancingMode::InOrder:
1154  for (size_t i = 0; i != owner_.size(); ++i) {
1155  if (!visited_.count(i)) {
1156  super = owner_.begin();
1157  std::advance(super, i);
1158  return;
1159  }
1160  }
1161  super = owner_.end();
1162  break;
1163  default:
1164  CHECK(false);
1165  }
1166  }
1167 
1168  bool allVisited() { return visited_.size() == nodeCount_; }
1169 
1171  visited_.insert(size_t(*this));
1172  Super& super = *this;
1173  return *super;
1174  }
1175 
1176  const ElementType* operator->() { return &(operator*()); }
1177 
1178  private:
1180  const size_t nodeCount_;
1181  std::unordered_set<size_t> visited_;
1182 };
1183 
1184 namespace {
1185 
1186 bool input_can_be_coalesced(const RelAlgNode* parent_node,
1187  const size_t index,
1188  const bool first_rex_is_input) {
1189  if (auto agg_node = dynamic_cast<const RelAggregate*>(parent_node)) {
1190  if (index == 0 && agg_node->getGroupByCount() > 0) {
1191  return true;
1192  } else {
1193  // Is an aggregated target, only allow the project to be elided if the aggregate
1194  // target is simply passed through (i.e. if the top level expression attached to
1195  // the project node is a RexInput expression)
1196  return first_rex_is_input;
1197  }
1198  }
1199  return first_rex_is_input;
1200 }
1201 
1208  public:
1209  bool visitInput(const RexInput* input) const final {
1210  // The top level expression node is checked before we apply the visitor. If we get
1211  // here, this input rex is a child of another rex node, and we handle the can be
1212  // coalesced check slightly differently
1213  return input_can_be_coalesced(input->getSourceNode(), input->getIndex(), false);
1214  }
1215 
1216  bool visitLiteral(const RexLiteral*) const final { return false; }
1217 
1218  bool visitSubQuery(const RexSubQuery*) const final { return false; }
1219 
1220  bool visitRef(const RexRef*) const final { return false; }
1221 
1222  protected:
1223  bool aggregateResult(const bool& aggregate, const bool& next_result) const final {
1224  return aggregate && next_result;
1225  }
1226 
1227  bool defaultResult() const final { return true; }
1228 };
1229 
1230 inline bool project_has_window_function_input(const RelProject* ra_project) {
1231  for (size_t i = 0; i < ra_project->size(); i++) {
1232  if (dynamic_cast<const RexWindowFunctionOperator*>(ra_project->getProjectAt(i))) {
1233  return true;
1234  }
1235  }
1236  return false;
1237 }
1238 
1239 } // namespace
1240 
1241 void coalesce_nodes(std::vector<std::shared_ptr<RelAlgNode>>& nodes,
1242  const std::vector<const RelAlgNode*>& left_deep_joins) {
1243  enum class CoalesceState { Initial, Filter, FirstProject, Aggregate };
1244  std::vector<size_t> crt_pattern;
1245  CoalesceState crt_state{CoalesceState::Initial};
1246 
1247  auto reset_state = [&crt_pattern, &crt_state]() {
1248  crt_state = CoalesceState::Initial;
1249  decltype(crt_pattern)().swap(crt_pattern);
1250  };
1251 
1252  for (RANodeIterator nodeIt(nodes); !nodeIt.allVisited();) {
1253  const auto ra_node = nodeIt != nodes.end() ? *nodeIt : nullptr;
1254  switch (crt_state) {
1255  case CoalesceState::Initial: {
1256  if (std::dynamic_pointer_cast<const RelFilter>(ra_node) &&
1257  std::find(left_deep_joins.begin(), left_deep_joins.end(), ra_node.get()) ==
1258  left_deep_joins.end()) {
1259  crt_pattern.push_back(size_t(nodeIt));
1260  crt_state = CoalesceState::Filter;
1261  nodeIt.advance(RANodeIterator::AdvancingMode::DUChain);
1262  } else if (std::dynamic_pointer_cast<const RelProject>(ra_node)) {
1263  crt_pattern.push_back(size_t(nodeIt));
1264  crt_state = CoalesceState::FirstProject;
1265  nodeIt.advance(RANodeIterator::AdvancingMode::DUChain);
1266  } else {
1267  nodeIt.advance(RANodeIterator::AdvancingMode::InOrder);
1268  }
1269  break;
1270  }
1271  case CoalesceState::Filter: {
1272  if (auto project_node = std::dynamic_pointer_cast<const RelProject>(ra_node)) {
1273  if (project_has_window_function_input(project_node.get())) {
1274  reset_state();
1275  break;
1276  }
1277  crt_pattern.push_back(size_t(nodeIt));
1278  crt_state = CoalesceState::FirstProject;
1279  nodeIt.advance(RANodeIterator::AdvancingMode::DUChain);
1280  } else {
1281  reset_state();
1282  }
1283  break;
1284  }
1285  case CoalesceState::FirstProject: {
1286  if (std::dynamic_pointer_cast<const RelAggregate>(ra_node)) {
1287  crt_pattern.push_back(size_t(nodeIt));
1288  crt_state = CoalesceState::Aggregate;
1289  nodeIt.advance(RANodeIterator::AdvancingMode::DUChain);
1290  } else {
1291  if (crt_pattern.size() >= 2) {
1292  create_compound(nodes, crt_pattern);
1293  }
1294  reset_state();
1295  }
1296  break;
1297  }
1298  case CoalesceState::Aggregate: {
1299  if (auto project_node = std::dynamic_pointer_cast<const RelProject>(ra_node)) {
1300  // TODO(adb): overloading the simple project terminology again here
1301  bool is_simple_project{true};
1302  for (size_t i = 0; i < project_node->size(); i++) {
1303  const auto scalar_rex = project_node->getProjectAt(i);
1304  // If the top level scalar rex is an input node, we can bypass the visitor
1305  if (auto input_rex = dynamic_cast<const RexInput*>(scalar_rex)) {
1307  input_rex->getSourceNode(), input_rex->getIndex(), true)) {
1308  is_simple_project = false;
1309  break;
1310  }
1311  continue;
1312  }
1313  CoalesceSecondaryProjectVisitor visitor;
1314  if (!visitor.visit(project_node->getProjectAt(i))) {
1315  is_simple_project = false;
1316  break;
1317  }
1318  }
1319  if (is_simple_project) {
1320  crt_pattern.push_back(size_t(nodeIt));
1321  nodeIt.advance(RANodeIterator::AdvancingMode::InOrder);
1322  }
1323  }
1324  CHECK_GE(crt_pattern.size(), size_t(2));
1325  create_compound(nodes, crt_pattern);
1326  reset_state();
1327  break;
1328  }
1329  default:
1330  CHECK(false);
1331  }
1332  }
1333  if (crt_state == CoalesceState::FirstProject || crt_state == CoalesceState::Aggregate) {
1334  if (crt_pattern.size() >= 2) {
1335  create_compound(nodes, crt_pattern);
1336  }
1337  CHECK(!crt_pattern.empty());
1338  }
1339 }
1340 
1341 int64_t get_int_literal_field(const rapidjson::Value& obj,
1342  const char field[],
1343  const int64_t default_val) noexcept {
1344  const auto it = obj.FindMember(field);
1345  if (it == obj.MemberEnd()) {
1346  return default_val;
1347  }
1348  std::unique_ptr<RexLiteral> lit(parse_literal(it->value));
1349  CHECK_EQ(kDECIMAL, lit->getType());
1350  CHECK_EQ(unsigned(0), lit->getScale());
1351  CHECK_EQ(unsigned(0), lit->getTypeScale());
1352  return lit->getVal<int64_t>();
1353 }
1354 
1355 void check_empty_inputs_field(const rapidjson::Value& node) noexcept {
1356  const auto& inputs_json = field(node, "inputs");
1357  CHECK(inputs_json.IsArray() && !inputs_json.Size());
1358 }
1359 
1360 // Create an in-memory, easy to navigate relational algebra DAG from its serialized,
1361 // JSON representation. Also, apply high level optimizations which can be expressed
1362 // through relational algebra extended with RelCompound. The RelCompound node is an
1363 // equivalent representation for sequences of RelFilter, RelProject and RelAggregate
1364 // nodes. This coalescing minimizes the amount of intermediate buffers required to
1365 // evaluate a query. Lower level optimizations are taken care by lower levels, mainly
1366 // RelAlgTranslator and the IR code generation.
1368  public:
1369  RelAlgAbstractInterpreter(const rapidjson::Value& query_ast,
1370  const Catalog_Namespace::Catalog& cat,
1371  RelAlgExecutor* ra_executor,
1372  const RenderQueryOptions* render_opts)
1373  : query_ast_(query_ast)
1374  , cat_(cat)
1375  , ra_executor_(ra_executor)
1376  , render_opts_(render_opts) {}
1377 
1378  std::shared_ptr<const RelAlgNode> run() {
1379  const auto& rels = field(query_ast_, "rels");
1380  CHECK(rels.IsArray());
1381  try {
1382  dispatchNodes(rels);
1383  } catch (const QueryNotSupported&) {
1384  throw;
1385  }
1386  CHECK(!nodes_.empty());
1387  bind_inputs(nodes_);
1388 
1389  if (render_opts_) {
1390  // Alter the RA for render. Do this before any flattening/optimizations are done to
1391  // the tree.
1392  alterRAForRender(nodes_, *render_opts_);
1393  }
1394 
1395  mark_nops(nodes_);
1396  simplify_sort(nodes_);
1398  eliminate_identical_copy(nodes_);
1399  fold_filters(nodes_);
1400  std::vector<const RelAlgNode*> filtered_left_deep_joins;
1401  std::vector<const RelAlgNode*> left_deep_joins;
1402  for (const auto& node : nodes_) {
1403  const auto left_deep_join_root = get_left_deep_join_root(node);
1404  // The filter which starts a left-deep join pattern must not be coalesced
1405  // since it contains (part of) the join condition.
1406  if (left_deep_join_root) {
1407  left_deep_joins.push_back(left_deep_join_root.get());
1408  if (std::dynamic_pointer_cast<const RelFilter>(left_deep_join_root)) {
1409  filtered_left_deep_joins.push_back(left_deep_join_root.get());
1410  }
1411  }
1412  }
1413  if (filtered_left_deep_joins.empty()) {
1415  }
1416  eliminate_dead_columns(nodes_);
1417  coalesce_nodes(nodes_, left_deep_joins);
1418  CHECK(nodes_.back().unique());
1419  create_left_deep_join(nodes_);
1420  return nodes_.back();
1421  }
1422 
1423  private:
1424  void dispatchNodes(const rapidjson::Value& rels) {
1425  for (auto rels_it = rels.Begin(); rels_it != rels.End(); ++rels_it) {
1426  const auto& crt_node = *rels_it;
1427  const auto id = node_id(crt_node);
1428  CHECK_EQ(static_cast<size_t>(id), nodes_.size());
1429  CHECK(crt_node.IsObject());
1430  std::shared_ptr<RelAlgNode> ra_node = nullptr;
1431  const auto rel_op = json_str(field(crt_node, "relOp"));
1432  if (rel_op == std::string("EnumerableTableScan")) {
1433  ra_node = dispatchTableScan(crt_node);
1434  } else if (rel_op == std::string("LogicalProject")) {
1435  ra_node = dispatchProject(crt_node);
1436  } else if (rel_op == std::string("LogicalFilter")) {
1437  ra_node = dispatchFilter(crt_node);
1438  } else if (rel_op == std::string("LogicalAggregate")) {
1439  ra_node = dispatchAggregate(crt_node);
1440  } else if (rel_op == std::string("LogicalJoin")) {
1441  ra_node = dispatchJoin(crt_node);
1442  } else if (rel_op == std::string("LogicalSort")) {
1443  ra_node = dispatchSort(crt_node);
1444  } else if (rel_op == std::string("LogicalValues")) {
1445  ra_node = dispatchLogicalValues(crt_node);
1446  } else if (rel_op == std::string("LogicalTableModify")) {
1447  ra_node = dispatchModify(crt_node);
1448  } else {
1449  throw QueryNotSupported(std::string("Node ") + rel_op + " not supported yet");
1450  }
1451  nodes_.push_back(ra_node);
1452  }
1453  }
1454 
1455  std::shared_ptr<RelScan> dispatchTableScan(const rapidjson::Value& scan_ra) {
1456  check_empty_inputs_field(scan_ra);
1457  CHECK(scan_ra.IsObject());
1458  const auto td = getTableFromScanNode(scan_ra);
1459  const auto field_names = getFieldNamesFromScanNode(scan_ra);
1460  return std::make_shared<RelScan>(td, field_names);
1461  }
1462 
1463  std::shared_ptr<RelProject> dispatchProject(const rapidjson::Value& proj_ra) {
1464  const auto inputs = getRelAlgInputs(proj_ra);
1465  CHECK_EQ(size_t(1), inputs.size());
1466  const auto& exprs_json = field(proj_ra, "exprs");
1467  CHECK(exprs_json.IsArray());
1468  std::vector<std::unique_ptr<const RexScalar>> exprs;
1469  for (auto exprs_json_it = exprs_json.Begin(); exprs_json_it != exprs_json.End();
1470  ++exprs_json_it) {
1471  exprs.emplace_back(parse_scalar_expr(*exprs_json_it, cat_, ra_executor_));
1472  }
1473  const auto& fields = field(proj_ra, "fields");
1474  return std::make_shared<RelProject>(
1475  exprs, strings_from_json_array(fields), inputs.front());
1476  }
1477 
1478  std::shared_ptr<RelFilter> dispatchFilter(const rapidjson::Value& filter_ra) {
1479  const auto inputs = getRelAlgInputs(filter_ra);
1480  CHECK_EQ(size_t(1), inputs.size());
1481  const auto id = node_id(filter_ra);
1482  CHECK(id);
1483  auto condition = parse_scalar_expr(field(filter_ra, "condition"), cat_, ra_executor_);
1484  return std::make_shared<RelFilter>(condition, inputs.front());
1485  }
1486 
1487  std::shared_ptr<RelAggregate> dispatchAggregate(const rapidjson::Value& agg_ra) {
1488  const auto inputs = getRelAlgInputs(agg_ra);
1489  CHECK_EQ(size_t(1), inputs.size());
1490  const auto fields = strings_from_json_array(field(agg_ra, "fields"));
1491  const auto group = indices_from_json_array(field(agg_ra, "group"));
1492  for (size_t i = 0; i < group.size(); ++i) {
1493  CHECK_EQ(i, group[i]);
1494  }
1495  if (agg_ra.HasMember("groups") || agg_ra.HasMember("indicator")) {
1496  throw QueryNotSupported("GROUP BY extensions not supported");
1497  }
1498  const auto& aggs_json_arr = field(agg_ra, "aggs");
1499  CHECK(aggs_json_arr.IsArray());
1500  std::vector<std::unique_ptr<const RexAgg>> aggs;
1501  for (auto aggs_json_arr_it = aggs_json_arr.Begin();
1502  aggs_json_arr_it != aggs_json_arr.End();
1503  ++aggs_json_arr_it) {
1504  aggs.emplace_back(parse_aggregate_expr(*aggs_json_arr_it));
1505  }
1506  return std::make_shared<RelAggregate>(group.size(), aggs, fields, inputs.front());
1507  }
1508 
1509  std::shared_ptr<RelJoin> dispatchJoin(const rapidjson::Value& join_ra) {
1510  const auto inputs = getRelAlgInputs(join_ra);
1511  CHECK_EQ(size_t(2), inputs.size());
1512  const auto join_type = to_join_type(json_str(field(join_ra, "joinType")));
1513  auto filter_rex = parse_scalar_expr(field(join_ra, "condition"), cat_, ra_executor_);
1514  return std::make_shared<RelJoin>(inputs[0], inputs[1], filter_rex, join_type);
1515  }
1516 
1517  std::shared_ptr<RelSort> dispatchSort(const rapidjson::Value& sort_ra) {
1518  const auto inputs = getRelAlgInputs(sort_ra);
1519  CHECK_EQ(size_t(1), inputs.size());
1520  std::vector<SortField> collation;
1521  const auto& collation_arr = field(sort_ra, "collation");
1522  CHECK(collation_arr.IsArray());
1523  for (auto collation_arr_it = collation_arr.Begin();
1524  collation_arr_it != collation_arr.End();
1525  ++collation_arr_it) {
1526  const size_t field_idx = json_i64(field(*collation_arr_it, "field"));
1527  const auto sort_dir = parse_sort_direction(*collation_arr_it);
1528  const auto null_pos = parse_nulls_position(*collation_arr_it);
1529  collation.emplace_back(field_idx, sort_dir, null_pos);
1530  }
1531  auto limit = get_int_literal_field(sort_ra, "fetch", -1);
1532  if (limit == 0) {
1533  throw QueryNotSupported("LIMIT 0 not supported");
1534  }
1535  const auto offset = get_int_literal_field(sort_ra, "offset", 0);
1536  return std::make_shared<RelSort>(
1537  collation, limit > 0 ? limit : 0, offset, inputs.front());
1538  }
1539 
1540  std::shared_ptr<RelModify> dispatchModify(const rapidjson::Value& logical_modify_ra) {
1541  const auto inputs = getRelAlgInputs(logical_modify_ra);
1542  CHECK_EQ(size_t(1), inputs.size());
1543 
1544  const auto table_descriptor = getTableFromScanNode(logical_modify_ra);
1545  if (table_descriptor->isView) {
1546  throw std::runtime_error("UPDATE of a view is unsupported.");
1547  }
1548 
1549  bool flattened = json_bool(field(logical_modify_ra, "flattened"));
1550  std::string op = json_str(field(logical_modify_ra, "operation"));
1551  RelModify::TargetColumnList target_column_list;
1552 
1553  if (op == "UPDATE") {
1554  const auto& update_columns = field(logical_modify_ra, "updateColumnList");
1555  CHECK(update_columns.IsArray());
1556 
1557  for (auto column_arr_it = update_columns.Begin();
1558  column_arr_it != update_columns.End();
1559  ++column_arr_it) {
1560  target_column_list.push_back(column_arr_it->GetString());
1561  }
1562  }
1563 
1564  auto modify_node = std::make_shared<RelModify>(
1565  cat_, table_descriptor, flattened, op, target_column_list, inputs[0]);
1566  switch (modify_node->getOperation()) {
1568  modify_node->applyDeleteModificationsToInputNode();
1569  break;
1570  }
1572  modify_node->applyUpdateModificationsToInputNode();
1573  }
1574  default:
1575  break;
1576  }
1577 
1578  return modify_node;
1579  }
1580 
1581  std::shared_ptr<RelLogicalValues> dispatchLogicalValues(
1582  const rapidjson::Value& logical_values_ra) {
1583  const auto& tuple_type_arr = field(logical_values_ra, "type");
1584  CHECK(tuple_type_arr.IsArray());
1585  std::vector<TargetMetaInfo> tuple_type;
1586  for (auto tuple_type_arr_it = tuple_type_arr.Begin();
1587  tuple_type_arr_it != tuple_type_arr.End();
1588  ++tuple_type_arr_it) {
1589  const auto component_type = parse_type(*tuple_type_arr_it);
1590  const auto component_name = json_str(field(*tuple_type_arr_it, "name"));
1591  tuple_type.emplace_back(component_name, component_type);
1592  }
1593  const auto& inputs_arr = field(logical_values_ra, "inputs");
1594  CHECK(inputs_arr.IsArray());
1595  const auto& tuples_arr = field(logical_values_ra, "tuples");
1596  CHECK(tuples_arr.IsArray());
1597 
1598  if (inputs_arr.Size() || tuples_arr.Size()) {
1599  throw QueryNotSupported("Non-empty LogicalValues not supported yet");
1600  }
1601  return std::make_shared<RelLogicalValues>(tuple_type);
1602  }
1603 
1604  const TableDescriptor* getTableFromScanNode(const rapidjson::Value& scan_ra) const {
1605  const auto& table_json = field(scan_ra, "table");
1606  CHECK(table_json.IsArray());
1607  CHECK_EQ(unsigned(2), table_json.Size());
1608  const auto td = cat_.getMetadataForTable(table_json[1].GetString());
1609  CHECK(td);
1610  return td;
1611  }
1612 
1613  std::vector<std::string> getFieldNamesFromScanNode(
1614  const rapidjson::Value& scan_ra) const {
1615  const auto& fields_json = field(scan_ra, "fieldNames");
1616  return strings_from_json_array(fields_json);
1617  }
1618 
1619  std::vector<std::shared_ptr<const RelAlgNode>> getRelAlgInputs(
1620  const rapidjson::Value& node) {
1621  if (node.HasMember("inputs")) {
1622  const auto str_input_ids = strings_from_json_array(field(node, "inputs"));
1623  std::vector<std::shared_ptr<const RelAlgNode>> ra_inputs;
1624  for (const auto str_id : str_input_ids) {
1625  ra_inputs.push_back(nodes_[std::stoi(str_id)]);
1626  }
1627  return ra_inputs;
1628  }
1629  return {prev(node)};
1630  }
1631 
1632  std::shared_ptr<const RelAlgNode> prev(const rapidjson::Value& crt_node) {
1633  const auto id = node_id(crt_node);
1634  CHECK(id);
1635  CHECK_EQ(static_cast<size_t>(id), nodes_.size());
1636  return nodes_.back();
1637  }
1638 
1639  const rapidjson::Value& query_ast_;
1641  std::vector<std::shared_ptr<RelAlgNode>> nodes_;
1644 };
1645 
1646 std::shared_ptr<const RelAlgNode> ra_interpret(const rapidjson::Value& query_ast,
1647  const Catalog_Namespace::Catalog& cat,
1648  RelAlgExecutor* ra_executor,
1649  const RenderQueryOptions* render_opts) {
1650  RelAlgAbstractInterpreter interp(query_ast, cat, ra_executor, render_opts);
1651  return interp.run();
1652 }
1653 
1654 std::unique_ptr<const RexSubQuery> parse_subquery(const rapidjson::Value& expr,
1655  const Catalog_Namespace::Catalog& cat,
1656  RelAlgExecutor* ra_executor) {
1657  const auto& operands = field(expr, "operands");
1658  CHECK(operands.IsArray());
1659  CHECK_GE(operands.Size(), unsigned(0));
1660  const auto& subquery_ast = field(expr, "subquery");
1661 
1662  const auto ra = ra_interpret(subquery_ast, cat, ra_executor, nullptr);
1663  auto subquery = std::make_shared<RexSubQuery>(ra);
1664  ra_executor->registerSubquery(subquery);
1665  return subquery->deepCopy();
1666 }
1667 
1668 } // namespace
1669 
1670 // Driver for the query de-serialization and high level optimization.
1671 std::shared_ptr<const RelAlgNode> deserialize_ra_dag(
1672  const std::string& query_ra,
1673  const Catalog_Namespace::Catalog& cat,
1674  RelAlgExecutor* ra_executor,
1675  const RenderQueryOptions* render_opts) {
1676  rapidjson::Document query_ast;
1677  query_ast.Parse(query_ra.c_str());
1678  CHECK(!query_ast.HasParseError());
1679  CHECK(query_ast.IsObject());
1681  return ra_interpret(query_ast, cat, ra_executor, render_opts);
1682 }
1683 
1684 // Prints the relational algebra as a tree; useful for debugging.
1685 std::string tree_string(const RelAlgNode* ra, const size_t indent) {
1686  std::string result = std::string(indent, ' ') + ra->toString() + "\n";
1687  for (size_t i = 0; i < ra->inputCount(); ++i) {
1688  result += tree_string(ra->getInput(i), indent + 2);
1689  }
1690  return result;
1691 }
SQLTypes to_sql_type(const std::string &type_name)
std::vector< std::shared_ptr< const RelAlgNode > > inputs_
void registerSubquery(std::shared_ptr< RexSubQuery > subquery) noexcept
#define CHECK_EQ(x, y)
Definition: Logger.h:195
std::unique_ptr< RexAbstractInput > parse_abstract_input(const rapidjson::Value &expr) noexcept
const ConstRexScalarPtrVector & getPartitionKeys() const
void alterRAForRender(std::vector< std::shared_ptr< RelAlgNode >> &nodes, const RenderQueryOptions &render_opts)
void bind_project_to_input(RelProject *project_node, const RANodeOutput &input) noexcept
void replaceInput(std::shared_ptr< const RelAlgNode > old_input, std::shared_ptr< const RelAlgNode > input) override
JoinType
Definition: sqldefs.h:98
void setSourceNode(const RelAlgNode *node) const
std::unique_ptr< const RexSubQuery > parse_subquery(const rapidjson::Value &expr, const Catalog_Namespace::Catalog &cat, RelAlgExecutor *ra_executor)
std::vector< std::unique_ptr< const RexScalar > > parse_window_order_exprs(const rapidjson::Value &arr, const Catalog_Namespace::Catalog &cat, RelAlgExecutor *ra_executor)
std::unique_ptr< const RexCase > disambiguate_case(const RexCase *rex_case, const RANodeOutput &ra_output)
void hoist_filter_cond_to_cross_join(std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
class for a per-database catalog. also includes metadata for the current database and the current use...
Definition: Catalog.h:81
Definition: sqltypes.h:51
std::vector< std::unique_ptr< const RexScalar > > parse_expr_array(const rapidjson::Value &arr, const Catalog_Namespace::Catalog &cat, RelAlgExecutor *ra_executor)
std::string json_node_to_string(const rapidjson::Value &node) noexcept
size_t size() const override
std::shared_ptr< const RelAlgNode > get_left_deep_join_root(const std::shared_ptr< RelAlgNode > &node)
void sink_projected_boolean_expr_to_join(std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
std::shared_ptr< RelProject > dispatchProject(const rapidjson::Value &proj_ra)
void eliminate_identical_copy(std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
static thread_local unsigned crt_id_
std::shared_ptr< RelLogicalValues > dispatchLogicalValues(const rapidjson::Value &logical_values_ra)
std::pair< std::shared_ptr< RelLeftDeepInnerJoin >, std::shared_ptr< const RelAlgNode > > create_left_deep_join(const std::shared_ptr< RelAlgNode > &left_deep_join_root)
SQLAgg to_agg_kind(const std::string &agg_name)
std::vector< std::string > TargetColumnList
const bool json_bool(const rapidjson::Value &obj) noexcept
Definition: JsonAccessors.h:49
unsigned node_id(const rapidjson::Value &ra_node) noexcept
const RexScalar * getProjectAt(const size_t idx) const
const std::string json_str(const rapidjson::Value &obj) noexcept
Definition: JsonAccessors.h:44
size_t branchCount() const
std::string join(T const &container, std::string const &delim)
NullSortedPosition parse_nulls_position(const rapidjson::Value &collation)
#define CHECK_GE(x, y)
Definition: Logger.h:200
std::vector< const Rex * > reproject_targets(const RelProject *simple_project, const std::vector< const Rex *> &target_exprs) noexcept
std::vector< size_t > indices_from_json_array(const rapidjson::Value &json_idx_arr) noexcept
void appendInput(std::string new_field_name, std::unique_ptr< const RexScalar > new_input)
std::unique_ptr< RexOperator > parse_operator(const rapidjson::Value &expr, const Catalog_Namespace::Catalog &cat, RelAlgExecutor *ra_executor)
std::unique_ptr< RexSubQuery > deepCopy() const
SqlWindowFunctionKind parse_window_function_kind(const std::string &name)
std::vector< RexInput > n_outputs(const RelAlgNode *node, const size_t n)
void set_scale(int s)
Definition: sqltypes.h:414
const std::vector< std::string > & getFields() const
std::vector< SortField > parse_window_order_collation(const rapidjson::Value &arr, const Catalog_Namespace::Catalog &cat, RelAlgExecutor *ra_executor)
std::shared_ptr< const RelAlgNode > deserialize_ra_dag(const std::string &query_ra, const Catalog_Namespace::Catalog &cat, RelAlgExecutor *ra_executor, const RenderQueryOptions *render_opts)
void simplify_sort(std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
std::vector< SortField > collation_
bool hasEquivCollationOf(const RelSort &that) const
const RexScalar * getWhen(const size_t idx) const
std::vector< RexInput > RANodeOutput
const rapidjson::Value & field(const rapidjson::Value &obj, const char field[]) noexcept
Definition: JsonAccessors.h:31
void set_precision(int d)
Definition: sqltypes.h:412
std::unique_ptr< const RexScalar > parse_scalar_expr(const rapidjson::Value &expr, const Catalog_Namespace::Catalog &cat, RelAlgExecutor *ra_executor)
const RexScalar * getThen(const size_t idx) const
std::shared_ptr< RelAlgNode > deepCopy() const override
const RelAlgNode * getSourceNode() const
void create_compound(std::vector< std::shared_ptr< RelAlgNode >> &nodes, const std::vector< size_t > &pattern) noexcept
std::unique_ptr< const RexScalar > disambiguate_rex(const RexScalar *, const RANodeOutput &)
bool isRenamedInput(const RelAlgNode *node, const size_t index, const std::string &new_name)
std::unique_ptr< RexLiteral > parse_literal(const rapidjson::Value &expr)
SQLOps to_sql_op(const std::string &op_str)
const int64_t json_i64(const rapidjson::Value &obj) noexcept
Definition: JsonAccessors.h:39
std::unique_ptr< RexCase > parse_case(const rapidjson::Value &expr, const Catalog_Namespace::Catalog &cat, RelAlgExecutor *ra_executor)
const double json_double(const rapidjson::Value &obj) noexcept
Definition: JsonAccessors.h:54
std::shared_ptr< RelAlgNode > deepCopy() const override
std::vector< std::shared_ptr< const RelAlgNode > > getRelAlgInputs(const rapidjson::Value &node)
std::set< std::pair< const RelAlgNode *, int > > get_equiv_cols(const RelAlgNode *node, const size_t which_col)
std::shared_ptr< RelAlgNode > deepCopy() const override
void bind_inputs(const std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
std::shared_ptr< RelJoin > dispatchJoin(const rapidjson::Value &join_ra)
std::vector< std::string > strings_from_json_array(const rapidjson::Value &json_str_arr) noexcept
RexWindowFunctionOperator::RexWindowBound parse_window_bound(const rapidjson::Value &window_bound_obj, const Catalog_Namespace::Catalog &cat, RelAlgExecutor *ra_executor)
SQLTypeInfo parse_type(const rapidjson::Value &type_obj)
virtual void replaceInput(std::shared_ptr< const RelAlgNode > old_input, std::shared_ptr< const RelAlgNode > input)
std::shared_ptr< RelSort > dispatchSort(const rapidjson::Value &sort_ra)
std::vector< std::string > getFieldNamesFromScanNode(const rapidjson::Value &scan_ra) const
std::shared_ptr< RelAggregate > dispatchAggregate(const rapidjson::Value &agg_ra)
void coalesce_nodes(std::vector< std::shared_ptr< RelAlgNode >> &nodes, const std::vector< const RelAlgNode *> &left_deep_joins)
void replaceInput(std::shared_ptr< const RelAlgNode > old_input, std::shared_ptr< const RelAlgNode > input) override
std::shared_ptr< RelModify > dispatchModify(const rapidjson::Value &logical_modify_ra)
virtual T visit(const RexScalar *rex_scalar) const
Definition: RexVisitor.h:27
#define CHECK_LT(x, y)
Definition: Logger.h:197
Definition: sqltypes.h:54
Definition: sqltypes.h:55
#define CHECK_LE(x, y)
Definition: Logger.h:198
void replaceInput(std::shared_ptr< const RelAlgNode > old_input, std::shared_ptr< const RelAlgNode > input) override
bool input_can_be_coalesced(const RelAlgNode *parent_node, const size_t index, const bool first_rex_is_input)
const size_t inputCount() const
std::unique_ptr< const RexOperator > disambiguate_operator(const RexOperator *rex_operator, const RANodeOutput &ra_output) noexcept
virtual size_t size() const =0
void setExecutionResult(const std::shared_ptr< const ExecutionResult > result)
void check_empty_inputs_field(const rapidjson::Value &node) noexcept
SqlWindowFunctionKind
Definition: sqldefs.h:73
RexRebindInputsVisitor(const RelAlgNode *old_input, const RelAlgNode *new_input)
Definition: sqldefs.h:53
RANodeOutput get_node_output(const RelAlgNode *ra_node)
JoinType to_join_type(const std::string &join_type_name)
virtual std::string toString() const =0
std::shared_ptr< const RelAlgNode > ra_interpret(const rapidjson::Value &query_ast, const Catalog_Namespace::Catalog &cat, RelAlgExecutor *ra_executor, const RenderQueryOptions *render_opts)
int64_t get_int_literal_field(const rapidjson::Value &obj, const char field[], const int64_t default_val) noexcept
#define CHECK(condition)
Definition: Logger.h:187
std::string tree_string(const RelAlgNode *ra, const size_t indent)
size_t operator()(const std::pair< const RelAlgNode *, int > &input_col) const
std::shared_ptr< const RelAlgNode > prev(const rapidjson::Value &crt_node)
const RelAlgNode * getInput(const size_t idx) const
const std::vector< std::string > & getFields() const
void replaceInput(std::shared_ptr< const RelAlgNode > old_input, std::shared_ptr< const RelAlgNode > input) override
const TableDescriptor * getTableFromScanNode(const rapidjson::Value &scan_ra) const
std::shared_ptr< RelScan > dispatchTableScan(const rapidjson::Value &scan_ra)
void fold_filters(std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
specifies the content in-memory of a row in the table metadata table
void rebind_inputs_from_left_deep_join(const RexScalar *rex, const RelLeftDeepInnerJoin *left_deep_join)
std::shared_ptr< RelFilter > dispatchFilter(const rapidjson::Value &filter_ra)
std::shared_ptr< RelAlgNode > deepCopy() const override
SortDirection parse_sort_direction(const rapidjson::Value &collation)
std::unique_ptr< const RexAgg > parse_aggregate_expr(const rapidjson::Value &expr)
RelAlgAbstractInterpreter(const rapidjson::Value &query_ast, const Catalog_Namespace::Catalog &cat, RelAlgExecutor *ra_executor, const RenderQueryOptions *render_opts)
void mark_nops(const std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
std::unique_ptr< const RexScalar > RetType
Definition: RexVisitor.h:138
std::shared_ptr< RelAlgNode > deepCopy() const override
RexInputReplacementVisitor(const RelAlgNode *node_to_keep, const std::vector< std::unique_ptr< const RexScalar >> &scalar_sources)
void eliminate_dead_columns(std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
std::shared_ptr< RelAlgNode > deepCopy() const override
const RexScalar * getElse() const
static void resetRelAlgFirstId() noexcept