OmniSciDB  0bd2ec9cf4
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
RelAlgDagBuilder.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2017 MapD Technologies, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "RelAlgDagBuilder.h"
18 #include "../Shared/sqldefs.h"
20 #include "Catalog/Catalog.h"
22 #include "JsonAccessors.h"
23 #include "RelAlgOptimizer.h"
24 #include "RelLeftDeepInnerJoin.h"
26 #include "RexVisitor.h"
27 
28 #include <rapidjson/error/en.h>
29 #include <rapidjson/error/error.h>
30 #include <rapidjson/stringbuffer.h>
31 #include <rapidjson/writer.h>
32 
33 #include <string>
34 #include <unordered_set>
35 
36 namespace {
37 
38 const unsigned FIRST_RA_NODE_ID = 1;
39 
40 } // namespace
41 
42 thread_local unsigned RelAlgNode::crt_id_ = FIRST_RA_NODE_ID;
43 
46 }
47 
49  const std::shared_ptr<const ExecutionResult> result) {
50  auto row_set = result->getRows();
51  CHECK(row_set);
52  CHECK_EQ(size_t(1), row_set->colCount());
53  *(type_.get()) = row_set->getColType(0);
54  (*(result_.get())) = result;
55 }
56 
57 std::unique_ptr<RexSubQuery> RexSubQuery::deepCopy() const {
58  return std::make_unique<RexSubQuery>(type_, result_, ra_->deepCopy());
59 }
60 
61 namespace {
62 
63 class RexRebindInputsVisitor : public RexVisitor<void*> {
64  public:
65  RexRebindInputsVisitor(const RelAlgNode* old_input, const RelAlgNode* new_input)
66  : old_input_(old_input), new_input_(new_input) {}
67 
68  void* visitInput(const RexInput* rex_input) const override {
69  const auto old_source = rex_input->getSourceNode();
70  if (old_source == old_input_) {
71  const auto left_deep_join = dynamic_cast<const RelLeftDeepInnerJoin*>(new_input_);
72  if (left_deep_join) {
73  rebind_inputs_from_left_deep_join(rex_input, left_deep_join);
74  return nullptr;
75  }
76  rex_input->setSourceNode(new_input_);
77  }
78  return nullptr;
79  };
80 
81  private:
82  const RelAlgNode* old_input_;
84 };
85 
86 // Creates an output with n columns.
87 std::vector<RexInput> n_outputs(const RelAlgNode* node, const size_t n) {
88  std::vector<RexInput> outputs;
89  for (size_t i = 0; i < n; ++i) {
90  outputs.emplace_back(node, i);
91  }
92  return outputs;
93 }
94 
95 } // namespace
96 
97 void RelProject::replaceInput(std::shared_ptr<const RelAlgNode> old_input,
98  std::shared_ptr<const RelAlgNode> input) {
99  RelAlgNode::replaceInput(old_input, input);
100  RexRebindInputsVisitor rebind_inputs(old_input.get(), input.get());
101  for (const auto& scalar_expr : scalar_exprs_) {
102  rebind_inputs.visit(scalar_expr.get());
103  }
104 }
105 
106 void RelProject::appendInput(std::string new_field_name,
107  std::unique_ptr<const RexScalar> new_input) {
108  fields_.emplace_back(std::move(new_field_name));
109  scalar_exprs_.emplace_back(std::move(new_input));
110 }
111 
113  RANodeOutput outputs;
114  const auto scan_node = dynamic_cast<const RelScan*>(ra_node);
115  if (scan_node) {
116  // Scan node has no inputs, output contains all columns in the table.
117  CHECK_EQ(size_t(0), scan_node->inputCount());
118  return n_outputs(scan_node, scan_node->size());
119  }
120  const auto project_node = dynamic_cast<const RelProject*>(ra_node);
121  if (project_node) {
122  // Project output count doesn't depend on the input
123  CHECK_EQ(size_t(1), project_node->inputCount());
124  return n_outputs(project_node, project_node->size());
125  }
126  const auto filter_node = dynamic_cast<const RelFilter*>(ra_node);
127  if (filter_node) {
128  // Filter preserves shape
129  CHECK_EQ(size_t(1), filter_node->inputCount());
130  const auto prev_out = get_node_output(filter_node->getInput(0));
131  return n_outputs(filter_node, prev_out.size());
132  }
133  const auto aggregate_node = dynamic_cast<const RelAggregate*>(ra_node);
134  if (aggregate_node) {
135  // Aggregate output count doesn't depend on the input
136  CHECK_EQ(size_t(1), aggregate_node->inputCount());
137  return n_outputs(aggregate_node, aggregate_node->size());
138  }
139  const auto compound_node = dynamic_cast<const RelCompound*>(ra_node);
140  if (compound_node) {
141  // Compound output count doesn't depend on the input
142  CHECK_EQ(size_t(1), compound_node->inputCount());
143  return n_outputs(compound_node, compound_node->size());
144  }
145  const auto join_node = dynamic_cast<const RelJoin*>(ra_node);
146  if (join_node) {
147  // Join concatenates the outputs from the inputs and the output
148  // directly references the nodes in the input.
149  CHECK_EQ(size_t(2), join_node->inputCount());
150  auto lhs_out =
151  n_outputs(join_node->getInput(0), get_node_output(join_node->getInput(0)).size());
152  const auto rhs_out =
153  n_outputs(join_node->getInput(1), get_node_output(join_node->getInput(1)).size());
154  lhs_out.insert(lhs_out.end(), rhs_out.begin(), rhs_out.end());
155  return lhs_out;
156  }
157  const auto table_func_node = dynamic_cast<const RelTableFunction*>(ra_node);
158  if (table_func_node) {
159  // Table Function output count doesn't depend on the input
160  CHECK_EQ(size_t(1), table_func_node->inputCount());
161  return n_outputs(table_func_node, table_func_node->size());
162  }
163  const auto sort_node = dynamic_cast<const RelSort*>(ra_node);
164  if (sort_node) {
165  // Sort preserves shape
166  CHECK_EQ(size_t(1), sort_node->inputCount());
167  const auto prev_out = get_node_output(sort_node->getInput(0));
168  return n_outputs(sort_node, prev_out.size());
169  }
170  const auto logical_values_node = dynamic_cast<const RelLogicalValues*>(ra_node);
171  if (logical_values_node) {
172  CHECK_EQ(size_t(0), logical_values_node->inputCount());
173  return n_outputs(logical_values_node, logical_values_node->size());
174  }
175  CHECK(false);
176  return outputs;
177 }
178 
180  if (!isSimple()) {
181  return false;
182  }
183  CHECK_EQ(size_t(1), inputCount());
184  const auto source = getInput(0);
185  if (dynamic_cast<const RelJoin*>(source)) {
186  return false;
187  }
188  const auto source_shape = get_node_output(source);
189  if (source_shape.size() != scalar_exprs_.size()) {
190  return false;
191  }
192  for (size_t i = 0; i < scalar_exprs_.size(); ++i) {
193  const auto& scalar_expr = scalar_exprs_[i];
194  const auto input = dynamic_cast<const RexInput*>(scalar_expr.get());
195  CHECK(input);
196  CHECK_EQ(source, input->getSourceNode());
197  // We should add the additional check that input->getIndex() !=
198  // source_shape[i].getIndex(), but Calcite doesn't generate the right
199  // Sort-Project-Sort sequence when joins are involved.
200  if (input->getSourceNode() != source_shape[i].getSourceNode()) {
201  return false;
202  }
203  }
204  return true;
205 }
206 
207 namespace {
208 
209 bool isRenamedInput(const RelAlgNode* node,
210  const size_t index,
211  const std::string& new_name) {
212  CHECK_LT(index, node->size());
213  if (auto join = dynamic_cast<const RelJoin*>(node)) {
214  CHECK_EQ(size_t(2), join->inputCount());
215  const auto lhs_size = join->getInput(0)->size();
216  if (index < lhs_size) {
217  return isRenamedInput(join->getInput(0), index, new_name);
218  }
219  CHECK_GE(index, lhs_size);
220  return isRenamedInput(join->getInput(1), index - lhs_size, new_name);
221  }
222 
223  if (auto scan = dynamic_cast<const RelScan*>(node)) {
224  return new_name != scan->getFieldName(index);
225  }
226 
227  if (auto aggregate = dynamic_cast<const RelAggregate*>(node)) {
228  return new_name != aggregate->getFieldName(index);
229  }
230 
231  if (auto project = dynamic_cast<const RelProject*>(node)) {
232  return new_name != project->getFieldName(index);
233  }
234 
235  if (auto table_func = dynamic_cast<const RelTableFunction*>(node)) {
236  return new_name != table_func->getFieldName(index);
237  }
238 
239  if (auto logical_values = dynamic_cast<const RelLogicalValues*>(node)) {
240  const auto& tuple_type = logical_values->getTupleType();
241  CHECK_LT(index, tuple_type.size());
242  return new_name != tuple_type[index].get_resname();
243  }
244 
245  CHECK(dynamic_cast<const RelSort*>(node) || dynamic_cast<const RelFilter*>(node));
246  return isRenamedInput(node->getInput(0), index, new_name);
247 }
248 
249 } // namespace
250 
252  if (!isSimple()) {
253  return false;
254  }
255  CHECK_EQ(scalar_exprs_.size(), fields_.size());
256  for (size_t i = 0; i < fields_.size(); ++i) {
257  auto rex_in = dynamic_cast<const RexInput*>(scalar_exprs_[i].get());
258  CHECK(rex_in);
259  if (isRenamedInput(rex_in->getSourceNode(), rex_in->getIndex(), fields_[i])) {
260  return true;
261  }
262  }
263  return false;
264 }
265 
266 void RelJoin::replaceInput(std::shared_ptr<const RelAlgNode> old_input,
267  std::shared_ptr<const RelAlgNode> input) {
268  RelAlgNode::replaceInput(old_input, input);
269  RexRebindInputsVisitor rebind_inputs(old_input.get(), input.get());
270  if (condition_) {
271  rebind_inputs.visit(condition_.get());
272  }
273 }
274 
275 void RelFilter::replaceInput(std::shared_ptr<const RelAlgNode> old_input,
276  std::shared_ptr<const RelAlgNode> input) {
277  RelAlgNode::replaceInput(old_input, input);
278  RexRebindInputsVisitor rebind_inputs(old_input.get(), input.get());
279  rebind_inputs.visit(filter_.get());
280 }
281 
282 void RelCompound::replaceInput(std::shared_ptr<const RelAlgNode> old_input,
283  std::shared_ptr<const RelAlgNode> input) {
284  RelAlgNode::replaceInput(old_input, input);
285  RexRebindInputsVisitor rebind_inputs(old_input.get(), input.get());
286  for (const auto& scalar_source : scalar_sources_) {
287  rebind_inputs.visit(scalar_source.get());
288  }
289  if (filter_expr_) {
290  rebind_inputs.visit(filter_expr_.get());
291  }
292 }
293 
294 std::shared_ptr<RelAlgNode> RelProject::deepCopy() const {
295  RexDeepCopyVisitor copier;
296  std::vector<std::unique_ptr<const RexScalar>> exprs_copy;
297  for (auto& expr : scalar_exprs_) {
298  exprs_copy.push_back(copier.visit(expr.get()));
299  }
300  return std::make_shared<RelProject>(exprs_copy, fields_, inputs_[0]);
301 }
302 
303 std::shared_ptr<RelAlgNode> RelFilter::deepCopy() const {
304  RexDeepCopyVisitor copier;
305  auto filter_copy = copier.visit(filter_.get());
306  return std::make_shared<RelFilter>(filter_copy, inputs_[0]);
307 }
308 
309 std::shared_ptr<RelAlgNode> RelAggregate::deepCopy() const {
310  std::vector<std::unique_ptr<const RexAgg>> aggs_copy;
311  for (auto& agg : agg_exprs_) {
312  auto copy = agg->deepCopy();
313  aggs_copy.push_back(std::move(copy));
314  }
315  return std::make_shared<RelAggregate>(groupby_count_, aggs_copy, fields_, inputs_[0]);
316 }
317 
318 std::shared_ptr<RelAlgNode> RelJoin::deepCopy() const {
319  RexDeepCopyVisitor copier;
320  auto condition_copy = copier.visit(condition_.get());
321  return std::make_shared<RelJoin>(inputs_[0], inputs_[1], condition_copy, join_type_);
322 }
323 
324 std::shared_ptr<RelAlgNode> RelCompound::deepCopy() const {
325  RexDeepCopyVisitor copier;
326  auto filter_copy = filter_expr_ ? copier.visit(filter_expr_.get()) : nullptr;
327  std::unordered_map<const Rex*, const Rex*> old_to_new_target;
328  std::vector<const RexAgg*> aggs_copy;
329  for (auto& agg : agg_exprs_) {
330  auto copy = agg->deepCopy();
331  old_to_new_target.insert(std::make_pair(agg.get(), copy.get()));
332  aggs_copy.push_back(copy.release());
333  }
334  std::vector<std::unique_ptr<const RexScalar>> sources_copy;
335  for (size_t i = 0; i < scalar_sources_.size(); ++i) {
336  auto copy = copier.visit(scalar_sources_[i].get());
337  old_to_new_target.insert(std::make_pair(scalar_sources_[i].get(), copy.get()));
338  sources_copy.push_back(std::move(copy));
339  }
340  std::vector<const Rex*> target_exprs_copy;
341  for (auto target : target_exprs_) {
342  auto target_it = old_to_new_target.find(target);
343  CHECK(target_it != old_to_new_target.end());
344  target_exprs_copy.push_back(target_it->second);
345  }
346  auto new_compound = std::make_shared<RelCompound>(filter_copy,
347  target_exprs_copy,
349  aggs_copy,
350  fields_,
351  sources_copy,
352  is_agg_);
353  new_compound->addManagedInput(inputs_[0]);
354  return new_compound;
355 }
356 
357 std::shared_ptr<RelAlgNode> RelSort::deepCopy() const {
358  auto ret = std::make_shared<RelSort>(collation_, limit_, offset_, inputs_[0]);
359  ret->setEmptyResult(isEmptyResult());
360  return ret;
361 }
362 
363 void RelTableFunction::replaceInput(std::shared_ptr<const RelAlgNode> old_input,
364  std::shared_ptr<const RelAlgNode> input) {
365  RelAlgNode::replaceInput(old_input, input);
366  RexRebindInputsVisitor rebind_inputs(old_input.get(), input.get());
367  for (const auto& target_expr : target_exprs_) {
368  rebind_inputs.visit(target_expr.get());
369  }
370  for (const auto& func_input : table_func_inputs_) {
371  rebind_inputs.visit(func_input.get());
372  }
373 }
374 
375 std::shared_ptr<RelAlgNode> RelTableFunction::deepCopy() const {
376  RexDeepCopyVisitor copier;
377 
378  std::unordered_map<const Rex*, const Rex*> old_to_new_input;
379 
380  std::vector<std::unique_ptr<const RexScalar>> table_func_inputs_copy;
381  for (auto& expr : table_func_inputs_) {
382  table_func_inputs_copy.push_back(copier.visit(expr.get()));
383  old_to_new_input.insert(
384  std::make_pair(expr.get(), table_func_inputs_copy.back().get()));
385  }
386 
387  std::vector<const Rex*> col_inputs_copy;
388  for (auto target : col_inputs_) {
389  auto target_it = old_to_new_input.find(target);
390  CHECK(target_it != old_to_new_input.end());
391  col_inputs_copy.push_back(target_it->second);
392  }
393  auto fields_copy = fields_;
394 
395  std::vector<std::unique_ptr<const RexScalar>> target_exprs_copy;
396  for (auto& expr : target_exprs_) {
397  target_exprs_copy.push_back(copier.visit(expr.get()));
398  }
399 
400  return std::make_shared<RelTableFunction>(function_name_,
401  inputs_[0],
402  fields_copy,
403  col_inputs_copy,
404  table_func_inputs_copy,
405  target_exprs_copy);
406 }
407 
408 namespace std {
409 template <>
410 struct hash<std::pair<const RelAlgNode*, int>> {
411  size_t operator()(const std::pair<const RelAlgNode*, int>& input_col) const {
412  auto ptr_val = reinterpret_cast<const int64_t*>(&input_col.first);
413  return static_cast<int64_t>(*ptr_val) ^ input_col.second;
414  }
415 };
416 } // namespace std
417 
418 namespace {
419 
420 std::set<std::pair<const RelAlgNode*, int>> get_equiv_cols(const RelAlgNode* node,
421  const size_t which_col) {
422  std::set<std::pair<const RelAlgNode*, int>> work_set;
423  auto walker = node;
424  auto curr_col = which_col;
425  while (true) {
426  work_set.insert(std::make_pair(walker, curr_col));
427  if (dynamic_cast<const RelScan*>(walker) || dynamic_cast<const RelJoin*>(walker)) {
428  break;
429  }
430  CHECK_EQ(size_t(1), walker->inputCount());
431  auto only_source = walker->getInput(0);
432  if (auto project = dynamic_cast<const RelProject*>(walker)) {
433  if (auto input = dynamic_cast<const RexInput*>(project->getProjectAt(curr_col))) {
434  const auto join_source = dynamic_cast<const RelJoin*>(only_source);
435  if (join_source) {
436  CHECK_EQ(size_t(2), join_source->inputCount());
437  auto lhs = join_source->getInput(0);
438  CHECK((input->getIndex() < lhs->size() && lhs == input->getSourceNode()) ||
439  join_source->getInput(1) == input->getSourceNode());
440  } else {
441  CHECK_EQ(input->getSourceNode(), only_source);
442  }
443  curr_col = input->getIndex();
444  } else {
445  break;
446  }
447  } else if (auto aggregate = dynamic_cast<const RelAggregate*>(walker)) {
448  if (curr_col >= aggregate->getGroupByCount()) {
449  break;
450  }
451  }
452  walker = only_source;
453  }
454  return work_set;
455 }
456 
457 } // namespace
458 
459 bool RelSort::hasEquivCollationOf(const RelSort& that) const {
460  if (collation_.size() != that.collation_.size()) {
461  return false;
462  }
463 
464  for (size_t i = 0, e = collation_.size(); i < e; ++i) {
465  auto this_sort_key = collation_[i];
466  auto that_sort_key = that.collation_[i];
467  if (this_sort_key.getSortDir() != that_sort_key.getSortDir()) {
468  return false;
469  }
470  if (this_sort_key.getNullsPosition() != that_sort_key.getNullsPosition()) {
471  return false;
472  }
473  auto this_equiv_keys = get_equiv_cols(this, this_sort_key.getField());
474  auto that_equiv_keys = get_equiv_cols(&that, that_sort_key.getField());
475  std::vector<std::pair<const RelAlgNode*, int>> intersect;
476  std::set_intersection(this_equiv_keys.begin(),
477  this_equiv_keys.end(),
478  that_equiv_keys.begin(),
479  that_equiv_keys.end(),
480  std::back_inserter(intersect));
481  if (intersect.empty()) {
482  return false;
483  }
484  }
485  return true;
486 }
487 
488 namespace {
489 
490 unsigned node_id(const rapidjson::Value& ra_node) noexcept {
491  const auto& id = field(ra_node, "id");
492  return std::stoi(json_str(id));
493 }
494 
495 // The parse_* functions below de-serialize expressions as they come from Calcite.
496 // RelAlgDagBuilder will take care of making the representation easy to
497 // navigate for lower layers, for example by replacing RexAbstractInput with RexInput.
498 
499 std::unique_ptr<RexAbstractInput> parse_abstract_input(
500  const rapidjson::Value& expr) noexcept {
501  const auto& input = field(expr, "input");
502  return std::unique_ptr<RexAbstractInput>(new RexAbstractInput(json_i64(input)));
503 }
504 
505 std::unique_ptr<RexLiteral> parse_literal(const rapidjson::Value& expr) {
506  CHECK(expr.IsObject());
507  const auto& literal = field(expr, "literal");
508  const auto type = to_sql_type(json_str(field(expr, "type")));
509  const auto target_type = to_sql_type(json_str(field(expr, "target_type")));
510  const auto scale = json_i64(field(expr, "scale"));
511  const auto precision = json_i64(field(expr, "precision"));
512  const auto type_scale = json_i64(field(expr, "type_scale"));
513  const auto type_precision = json_i64(field(expr, "type_precision"));
514  switch (type) {
515  case kDECIMAL:
516  case kINTERVAL_DAY_TIME:
518  case kTIME:
519  case kTIMESTAMP:
520  case kDATE:
521  return std::unique_ptr<RexLiteral>(new RexLiteral(json_i64(literal),
522  type,
523  target_type,
524  scale,
525  precision,
526  type_scale,
527  type_precision));
528  case kDOUBLE: {
529  if (literal.IsDouble()) {
530  return std::unique_ptr<RexLiteral>(new RexLiteral(json_double(literal),
531  type,
532  target_type,
533  scale,
534  precision,
535  type_scale,
536  type_precision));
537  }
538  CHECK(literal.IsInt64());
539  return std::unique_ptr<RexLiteral>(
540  new RexLiteral(static_cast<double>(json_i64(literal)),
541  type,
542  target_type,
543  scale,
544  precision,
545  type_scale,
546  type_precision));
547  }
548  case kTEXT:
549  return std::unique_ptr<RexLiteral>(new RexLiteral(json_str(literal),
550  type,
551  target_type,
552  scale,
553  precision,
554  type_scale,
555  type_precision));
556  case kBOOLEAN:
557  return std::unique_ptr<RexLiteral>(new RexLiteral(json_bool(literal),
558  type,
559  target_type,
560  scale,
561  precision,
562  type_scale,
563  type_precision));
564  case kNULLT:
565  return std::unique_ptr<RexLiteral>(new RexLiteral(target_type));
566  default:
567  CHECK(false);
568  }
569  CHECK(false);
570  return nullptr;
571 }
572 
573 std::unique_ptr<const RexScalar> parse_scalar_expr(const rapidjson::Value& expr,
574  const Catalog_Namespace::Catalog& cat,
575  RelAlgDagBuilder& root_dag_builder);
576 
577 SQLTypeInfo parse_type(const rapidjson::Value& type_obj) {
578  CHECK(type_obj.IsObject() && type_obj.MemberCount() >= 2);
579  const auto type = to_sql_type(json_str(field(type_obj, "type")));
580  const auto nullable = json_bool(field(type_obj, "nullable"));
581  const auto precision_it = type_obj.FindMember("precision");
582  const int precision =
583  precision_it != type_obj.MemberEnd() ? json_i64(precision_it->value) : 0;
584  const auto scale_it = type_obj.FindMember("scale");
585  const int scale = scale_it != type_obj.MemberEnd() ? json_i64(scale_it->value) : 0;
586  SQLTypeInfo ti(type, !nullable);
587  ti.set_precision(precision);
588  ti.set_scale(scale);
589  return ti;
590 }
591 
592 std::vector<std::unique_ptr<const RexScalar>> parse_expr_array(
593  const rapidjson::Value& arr,
594  const Catalog_Namespace::Catalog& cat,
595  RelAlgDagBuilder& root_dag_builder) {
596  std::vector<std::unique_ptr<const RexScalar>> exprs;
597  for (auto it = arr.Begin(); it != arr.End(); ++it) {
598  exprs.emplace_back(parse_scalar_expr(*it, cat, root_dag_builder));
599  }
600  return exprs;
601 }
602 
604  if (name == "ROW_NUMBER") {
606  }
607  if (name == "RANK") {
609  }
610  if (name == "DENSE_RANK") {
612  }
613  if (name == "PERCENT_RANK") {
615  }
616  if (name == "CUME_DIST") {
618  }
619  if (name == "NTILE") {
621  }
622  if (name == "LAG") {
624  }
625  if (name == "LEAD") {
627  }
628  if (name == "FIRST_VALUE") {
630  }
631  if (name == "LAST_VALUE") {
633  }
634  if (name == "AVG") {
636  }
637  if (name == "MIN") {
639  }
640  if (name == "MAX") {
642  }
643  if (name == "SUM") {
645  }
646  if (name == "COUNT") {
648  }
649  if (name == "$SUM0") {
651  }
652  throw std::runtime_error("Unsupported window function: " + name);
653 }
654 
655 std::vector<std::unique_ptr<const RexScalar>> parse_window_order_exprs(
656  const rapidjson::Value& arr,
657  const Catalog_Namespace::Catalog& cat,
658  RelAlgDagBuilder& root_dag_builder) {
659  std::vector<std::unique_ptr<const RexScalar>> exprs;
660  for (auto it = arr.Begin(); it != arr.End(); ++it) {
661  exprs.emplace_back(parse_scalar_expr(field(*it, "field"), cat, root_dag_builder));
662  }
663  return exprs;
664 }
665 
666 SortDirection parse_sort_direction(const rapidjson::Value& collation) {
667  return json_str(field(collation, "direction")) == std::string("DESCENDING")
670 }
671 
672 NullSortedPosition parse_nulls_position(const rapidjson::Value& collation) {
673  return json_str(field(collation, "nulls")) == std::string("FIRST")
676 }
677 
678 std::vector<SortField> parse_window_order_collation(const rapidjson::Value& arr,
679  const Catalog_Namespace::Catalog& cat,
680  RelAlgDagBuilder& root_dag_builder) {
681  std::vector<SortField> collation;
682  size_t field_idx = 0;
683  for (auto it = arr.Begin(); it != arr.End(); ++it, ++field_idx) {
684  const auto sort_dir = parse_sort_direction(*it);
685  const auto null_pos = parse_nulls_position(*it);
686  collation.emplace_back(field_idx, sort_dir, null_pos);
687  }
688  return collation;
689 }
690 
692  const rapidjson::Value& window_bound_obj,
693  const Catalog_Namespace::Catalog& cat,
694  RelAlgDagBuilder& root_dag_builder) {
695  CHECK(window_bound_obj.IsObject());
697  window_bound.unbounded = json_bool(field(window_bound_obj, "unbounded"));
698  window_bound.preceding = json_bool(field(window_bound_obj, "preceding"));
699  window_bound.following = json_bool(field(window_bound_obj, "following"));
700  window_bound.is_current_row = json_bool(field(window_bound_obj, "is_current_row"));
701  const auto& offset_field = field(window_bound_obj, "offset");
702  if (offset_field.IsObject()) {
703  window_bound.offset = parse_scalar_expr(offset_field, cat, root_dag_builder);
704  } else {
705  CHECK(offset_field.IsNull());
706  }
707  window_bound.order_key = json_i64(field(window_bound_obj, "order_key"));
708  return window_bound;
709 }
710 
711 std::unique_ptr<const RexSubQuery> parse_subquery(const rapidjson::Value& expr,
712  const Catalog_Namespace::Catalog& cat,
713  RelAlgDagBuilder& root_dag_builder) {
714  const auto& operands = field(expr, "operands");
715  CHECK(operands.IsArray());
716  CHECK_GE(operands.Size(), unsigned(0));
717  const auto& subquery_ast = field(expr, "subquery");
718 
719  RelAlgDagBuilder subquery_dag(root_dag_builder, subquery_ast, cat, nullptr);
720  auto subquery = std::make_shared<RexSubQuery>(subquery_dag.getRootNodeShPtr());
721  root_dag_builder.registerSubquery(subquery);
722  return subquery->deepCopy();
723 }
724 
725 std::unique_ptr<RexOperator> parse_operator(const rapidjson::Value& expr,
726  const Catalog_Namespace::Catalog& cat,
727  RelAlgDagBuilder& root_dag_builder) {
728  const auto op_name = json_str(field(expr, "op"));
729  const bool is_quantifier =
730  op_name == std::string("PG_ANY") || op_name == std::string("PG_ALL");
731  const auto op = is_quantifier ? kFUNCTION : to_sql_op(op_name);
732  const auto& operators_json_arr = field(expr, "operands");
733  CHECK(operators_json_arr.IsArray());
734  auto operands = parse_expr_array(operators_json_arr, cat, root_dag_builder);
735  const auto type_it = expr.FindMember("type");
736  CHECK(type_it != expr.MemberEnd());
737  auto ti = parse_type(type_it->value);
738  if (op == kIN && expr.HasMember("subquery")) {
739  auto subquery = parse_subquery(expr, cat, root_dag_builder);
740  operands.emplace_back(std::move(subquery));
741  }
742  if (expr.FindMember("partition_keys") != expr.MemberEnd()) {
743  const auto& partition_keys_arr = field(expr, "partition_keys");
744  auto partition_keys = parse_expr_array(partition_keys_arr, cat, root_dag_builder);
745  const auto& order_keys_arr = field(expr, "order_keys");
746  auto order_keys = parse_window_order_exprs(order_keys_arr, cat, root_dag_builder);
747  const auto collation =
748  parse_window_order_collation(order_keys_arr, cat, root_dag_builder);
749  const auto kind = parse_window_function_kind(op_name);
750  const auto lower_bound =
751  parse_window_bound(field(expr, "lower_bound"), cat, root_dag_builder);
752  const auto upper_bound =
753  parse_window_bound(field(expr, "upper_bound"), cat, root_dag_builder);
754  bool is_rows = json_bool(field(expr, "is_rows"));
755  ti.set_notnull(false);
756  return std::make_unique<RexWindowFunctionOperator>(kind,
757  operands,
758  partition_keys,
759  order_keys,
760  collation,
761  lower_bound,
762  upper_bound,
763  is_rows,
764  ti);
765  }
766  return std::unique_ptr<RexOperator>(op == kFUNCTION
767  ? new RexFunctionOperator(op_name, operands, ti)
768  : new RexOperator(op, operands, ti));
769 }
770 
771 std::unique_ptr<RexCase> parse_case(const rapidjson::Value& expr,
772  const Catalog_Namespace::Catalog& cat,
773  RelAlgDagBuilder& root_dag_builder) {
774  const auto& operands = field(expr, "operands");
775  CHECK(operands.IsArray());
776  CHECK_GE(operands.Size(), unsigned(2));
777  std::unique_ptr<const RexScalar> else_expr;
778  std::vector<
779  std::pair<std::unique_ptr<const RexScalar>, std::unique_ptr<const RexScalar>>>
780  expr_pair_list;
781  for (auto operands_it = operands.Begin(); operands_it != operands.End();) {
782  auto when_expr = parse_scalar_expr(*operands_it++, cat, root_dag_builder);
783  if (operands_it == operands.End()) {
784  else_expr = std::move(when_expr);
785  break;
786  }
787  auto then_expr = parse_scalar_expr(*operands_it++, cat, root_dag_builder);
788  expr_pair_list.emplace_back(std::move(when_expr), std::move(then_expr));
789  }
790  return std::unique_ptr<RexCase>(new RexCase(expr_pair_list, else_expr));
791 }
792 
793 std::vector<std::string> strings_from_json_array(
794  const rapidjson::Value& json_str_arr) noexcept {
795  CHECK(json_str_arr.IsArray());
796  std::vector<std::string> fields;
797  for (auto json_str_arr_it = json_str_arr.Begin(); json_str_arr_it != json_str_arr.End();
798  ++json_str_arr_it) {
799  CHECK(json_str_arr_it->IsString());
800  fields.emplace_back(json_str_arr_it->GetString());
801  }
802  return fields;
803 }
804 
805 std::vector<size_t> indices_from_json_array(
806  const rapidjson::Value& json_idx_arr) noexcept {
807  CHECK(json_idx_arr.IsArray());
808  std::vector<size_t> indices;
809  for (auto json_idx_arr_it = json_idx_arr.Begin(); json_idx_arr_it != json_idx_arr.End();
810  ++json_idx_arr_it) {
811  CHECK(json_idx_arr_it->IsInt());
812  CHECK_GE(json_idx_arr_it->GetInt(), 0);
813  indices.emplace_back(json_idx_arr_it->GetInt());
814  }
815  return indices;
816 }
817 
818 std::string json_node_to_string(const rapidjson::Value& node) noexcept {
819  rapidjson::StringBuffer buffer;
820  rapidjson::Writer<rapidjson::StringBuffer> writer(buffer);
821  node.Accept(writer);
822  return buffer.GetString();
823 }
824 
825 std::unique_ptr<const RexAgg> parse_aggregate_expr(const rapidjson::Value& expr) {
826  const auto agg = to_agg_kind(json_str(field(expr, "agg")));
827  const auto distinct = json_bool(field(expr, "distinct"));
828  const auto agg_ti = parse_type(field(expr, "type"));
829  const auto operands = indices_from_json_array(field(expr, "operands"));
830  if (operands.size() > 1 && (operands.size() != 2 || agg != kAPPROX_COUNT_DISTINCT)) {
831  throw QueryNotSupported("Multiple arguments for aggregates aren't supported");
832  }
833  return std::unique_ptr<const RexAgg>(new RexAgg(agg, distinct, agg_ti, operands));
834 }
835 
836 std::unique_ptr<const RexScalar> parse_scalar_expr(const rapidjson::Value& expr,
837  const Catalog_Namespace::Catalog& cat,
838  RelAlgDagBuilder& root_dag_builder) {
839  CHECK(expr.IsObject());
840  if (expr.IsObject() && expr.HasMember("input")) {
841  return std::unique_ptr<const RexScalar>(parse_abstract_input(expr));
842  }
843  if (expr.IsObject() && expr.HasMember("literal")) {
844  return std::unique_ptr<const RexScalar>(parse_literal(expr));
845  }
846  if (expr.IsObject() && expr.HasMember("op")) {
847  const auto op_str = json_str(field(expr, "op"));
848  if (op_str == std::string("CASE")) {
849  return std::unique_ptr<const RexScalar>(parse_case(expr, cat, root_dag_builder));
850  }
851  if (op_str == std::string("$SCALAR_QUERY")) {
852  return std::unique_ptr<const RexScalar>(
853  parse_subquery(expr, cat, root_dag_builder));
854  }
855  return std::unique_ptr<const RexScalar>(parse_operator(expr, cat, root_dag_builder));
856  }
857  throw QueryNotSupported("Expression node " + json_node_to_string(expr) +
858  " not supported");
859 }
860 
861 JoinType to_join_type(const std::string& join_type_name) {
862  if (join_type_name == "inner") {
863  return JoinType::INNER;
864  }
865  if (join_type_name == "left") {
866  return JoinType::LEFT;
867  }
868  throw QueryNotSupported("Join type (" + join_type_name + ") not supported");
869 }
870 
871 std::unique_ptr<const RexScalar> disambiguate_rex(const RexScalar*, const RANodeOutput&);
872 
873 std::unique_ptr<const RexOperator> disambiguate_operator(
874  const RexOperator* rex_operator,
875  const RANodeOutput& ra_output) noexcept {
876  std::vector<std::unique_ptr<const RexScalar>> disambiguated_operands;
877  for (size_t i = 0; i < rex_operator->size(); ++i) {
878  auto operand = rex_operator->getOperand(i);
879  if (dynamic_cast<const RexSubQuery*>(operand)) {
880  disambiguated_operands.emplace_back(rex_operator->getOperandAndRelease(i));
881  } else {
882  disambiguated_operands.emplace_back(disambiguate_rex(operand, ra_output));
883  }
884  }
885  const auto rex_window_function_operator =
886  dynamic_cast<const RexWindowFunctionOperator*>(rex_operator);
887  if (rex_window_function_operator) {
888  const auto& partition_keys = rex_window_function_operator->getPartitionKeys();
889  std::vector<std::unique_ptr<const RexScalar>> disambiguated_partition_keys;
890  for (const auto& partition_key : partition_keys) {
891  disambiguated_partition_keys.emplace_back(
892  disambiguate_rex(partition_key.get(), ra_output));
893  }
894  std::vector<std::unique_ptr<const RexScalar>> disambiguated_order_keys;
895  const auto& order_keys = rex_window_function_operator->getOrderKeys();
896  for (const auto& order_key : order_keys) {
897  disambiguated_order_keys.emplace_back(disambiguate_rex(order_key.get(), ra_output));
898  }
899  return rex_window_function_operator->disambiguatedOperands(
900  disambiguated_operands,
901  disambiguated_partition_keys,
902  disambiguated_order_keys,
903  rex_window_function_operator->getCollation());
904  }
905  return rex_operator->getDisambiguated(disambiguated_operands);
906 }
907 
908 std::unique_ptr<const RexCase> disambiguate_case(const RexCase* rex_case,
909  const RANodeOutput& ra_output) {
910  std::vector<
911  std::pair<std::unique_ptr<const RexScalar>, std::unique_ptr<const RexScalar>>>
912  disambiguated_expr_pair_list;
913  for (size_t i = 0; i < rex_case->branchCount(); ++i) {
914  auto disambiguated_when = disambiguate_rex(rex_case->getWhen(i), ra_output);
915  auto disambiguated_then = disambiguate_rex(rex_case->getThen(i), ra_output);
916  disambiguated_expr_pair_list.emplace_back(std::move(disambiguated_when),
917  std::move(disambiguated_then));
918  }
919  std::unique_ptr<const RexScalar> disambiguated_else{
920  disambiguate_rex(rex_case->getElse(), ra_output)};
921  return std::unique_ptr<const RexCase>(
922  new RexCase(disambiguated_expr_pair_list, disambiguated_else));
923 }
924 
925 // The inputs used by scalar expressions are given as indices in the serialized
926 // representation of the query. This is hard to navigate; make the relationship
927 // explicit by creating RexInput expressions which hold a pointer to the source
928 // relational algebra node and the index relative to the output of that node.
929 std::unique_ptr<const RexScalar> disambiguate_rex(const RexScalar* rex_scalar,
930  const RANodeOutput& ra_output) {
931  const auto rex_abstract_input = dynamic_cast<const RexAbstractInput*>(rex_scalar);
932  if (rex_abstract_input) {
933  CHECK_LT(static_cast<size_t>(rex_abstract_input->getIndex()), ra_output.size());
934  return std::unique_ptr<const RexInput>(
935  new RexInput(ra_output[rex_abstract_input->getIndex()]));
936  }
937  const auto rex_operator = dynamic_cast<const RexOperator*>(rex_scalar);
938  if (rex_operator) {
939  return disambiguate_operator(rex_operator, ra_output);
940  }
941  const auto rex_case = dynamic_cast<const RexCase*>(rex_scalar);
942  if (rex_case) {
943  return disambiguate_case(rex_case, ra_output);
944  }
945  const auto rex_literal = dynamic_cast<const RexLiteral*>(rex_scalar);
946  CHECK(rex_literal);
947  return std::unique_ptr<const RexLiteral>(new RexLiteral(*rex_literal));
948 }
949 
950 void bind_project_to_input(RelProject* project_node, const RANodeOutput& input) noexcept {
951  CHECK_EQ(size_t(1), project_node->inputCount());
952  std::vector<std::unique_ptr<const RexScalar>> disambiguated_exprs;
953  for (size_t i = 0; i < project_node->size(); ++i) {
954  const auto projected_expr = project_node->getProjectAt(i);
955  if (dynamic_cast<const RexSubQuery*>(projected_expr)) {
956  disambiguated_exprs.emplace_back(project_node->getProjectAtAndRelease(i));
957  } else {
958  disambiguated_exprs.emplace_back(disambiguate_rex(projected_expr, input));
959  }
960  }
961  project_node->setExpressions(disambiguated_exprs);
962 }
963 
965  const RANodeOutput& input) noexcept {
966  CHECK_EQ(size_t(1), table_func_node->inputCount());
967  std::vector<std::unique_ptr<const RexScalar>> disambiguated_exprs;
968  for (size_t i = 0; i < table_func_node->getTableFuncInputsSize(); ++i) {
969  const auto target_expr = table_func_node->getTableFuncInputAt(i);
970  if (dynamic_cast<const RexSubQuery*>(target_expr)) {
971  disambiguated_exprs.emplace_back(table_func_node->getTableFuncInputAtAndRelease(i));
972  } else {
973  disambiguated_exprs.emplace_back(disambiguate_rex(target_expr, input));
974  }
975  }
976  table_func_node->setTableFuncInputs(disambiguated_exprs);
977 }
978 
979 void bind_inputs(const std::vector<std::shared_ptr<RelAlgNode>>& nodes) noexcept {
980  for (auto ra_node : nodes) {
981  const auto filter_node = std::dynamic_pointer_cast<RelFilter>(ra_node);
982  if (filter_node) {
983  CHECK_EQ(size_t(1), filter_node->inputCount());
984  auto disambiguated_condition = disambiguate_rex(
985  filter_node->getCondition(), get_node_output(filter_node->getInput(0)));
986  filter_node->setCondition(disambiguated_condition);
987  continue;
988  }
989  const auto join_node = std::dynamic_pointer_cast<RelJoin>(ra_node);
990  if (join_node) {
991  CHECK_EQ(size_t(2), join_node->inputCount());
992  auto disambiguated_condition =
993  disambiguate_rex(join_node->getCondition(), get_node_output(join_node.get()));
994  join_node->setCondition(disambiguated_condition);
995  continue;
996  }
997  const auto project_node = std::dynamic_pointer_cast<RelProject>(ra_node);
998  if (project_node) {
999  bind_project_to_input(project_node.get(),
1000  get_node_output(project_node->getInput(0)));
1001  continue;
1002  }
1003  const auto table_func_node = std::dynamic_pointer_cast<RelTableFunction>(ra_node);
1004  if (table_func_node) {
1005  bind_table_func_to_input(table_func_node.get(),
1006  get_node_output(table_func_node->getInput(0)));
1007  }
1008  }
1009 }
1010 
1011 void mark_nops(const std::vector<std::shared_ptr<RelAlgNode>>& nodes) noexcept {
1012  for (auto node : nodes) {
1013  const auto agg_node = std::dynamic_pointer_cast<RelAggregate>(node);
1014  if (!agg_node || agg_node->getAggExprsCount()) {
1015  continue;
1016  }
1017  CHECK_EQ(size_t(1), node->inputCount());
1018  const auto agg_input_node = dynamic_cast<const RelAggregate*>(node->getInput(0));
1019  if (agg_input_node && !agg_input_node->getAggExprsCount() &&
1020  agg_node->getGroupByCount() == agg_input_node->getGroupByCount()) {
1021  agg_node->markAsNop();
1022  }
1023  }
1024 }
1025 
1026 namespace {
1027 
1028 std::vector<const Rex*> reproject_targets(
1029  const RelProject* simple_project,
1030  const std::vector<const Rex*>& target_exprs) noexcept {
1031  std::vector<const Rex*> result;
1032  for (size_t i = 0; i < simple_project->size(); ++i) {
1033  const auto input_rex = dynamic_cast<const RexInput*>(simple_project->getProjectAt(i));
1034  CHECK(input_rex);
1035  CHECK_LT(static_cast<size_t>(input_rex->getIndex()), target_exprs.size());
1036  result.push_back(target_exprs[input_rex->getIndex()]);
1037  }
1038  return result;
1039 }
1040 
1047  public:
1049  const RelAlgNode* node_to_keep,
1050  const std::vector<std::unique_ptr<const RexScalar>>& scalar_sources)
1051  : node_to_keep_(node_to_keep), scalar_sources_(scalar_sources) {}
1052 
1053  // Reproject the RexInput from its current RA Node to the RA Node we intend to keep
1054  RetType visitInput(const RexInput* input) const final {
1055  if (input->getSourceNode() == node_to_keep_) {
1056  const auto index = input->getIndex();
1057  CHECK_LT(index, scalar_sources_.size());
1058  return visit(scalar_sources_[index].get());
1059  } else {
1060  return input->deepCopy();
1061  }
1062  }
1063 
1064  private:
1066  const std::vector<std::unique_ptr<const RexScalar>>& scalar_sources_;
1067 };
1068 
1069 } // namespace
1070 
1071 void create_compound(std::vector<std::shared_ptr<RelAlgNode>>& nodes,
1072  const std::vector<size_t>& pattern) noexcept {
1073  CHECK_GE(pattern.size(), size_t(2));
1074  CHECK_LE(pattern.size(), size_t(4));
1075 
1076  std::unique_ptr<const RexScalar> filter_rex;
1077  std::vector<std::unique_ptr<const RexScalar>> scalar_sources;
1078  size_t groupby_count{0};
1079  std::vector<std::string> fields;
1080  std::vector<const RexAgg*> agg_exprs;
1081  std::vector<const Rex*> target_exprs;
1082  bool first_project{true};
1083  bool is_agg{false};
1084  RelAlgNode* last_node{nullptr};
1085 
1086  std::shared_ptr<ModifyManipulationTarget> manipulation_target;
1087 
1088  for (const auto node_idx : pattern) {
1089  const auto ra_node = nodes[node_idx];
1090  const auto ra_filter = std::dynamic_pointer_cast<RelFilter>(ra_node);
1091  if (ra_filter) {
1092  CHECK(!filter_rex);
1093  filter_rex.reset(ra_filter->getAndReleaseCondition());
1094  CHECK(filter_rex);
1095  last_node = ra_node.get();
1096  continue;
1097  }
1098  const auto ra_project = std::dynamic_pointer_cast<RelProject>(ra_node);
1099  if (ra_project) {
1100  fields = ra_project->getFields();
1101  manipulation_target = ra_project;
1102 
1103  if (first_project) {
1104  CHECK_EQ(size_t(1), ra_project->inputCount());
1105  // Rebind the input of the project to the input of the filter itself
1106  // since we know that we'll evaluate the filter on the fly, with no
1107  // intermediate buffer.
1108  const auto filter_input = dynamic_cast<const RelFilter*>(ra_project->getInput(0));
1109  if (filter_input) {
1110  CHECK_EQ(size_t(1), filter_input->inputCount());
1111  bind_project_to_input(ra_project.get(),
1112  get_node_output(filter_input->getInput(0)));
1113  }
1114  scalar_sources = ra_project->getExpressionsAndRelease();
1115  for (const auto& scalar_expr : scalar_sources) {
1116  target_exprs.push_back(scalar_expr.get());
1117  }
1118  first_project = false;
1119  } else {
1120  if (ra_project->isSimple()) {
1121  target_exprs = reproject_targets(ra_project.get(), target_exprs);
1122  } else {
1123  // TODO(adb): This is essentially a more general case of simple project, we
1124  // could likely merge the two
1125  std::vector<const Rex*> result;
1126  RexInputReplacementVisitor visitor(last_node, scalar_sources);
1127  for (size_t i = 0; i < ra_project->size(); ++i) {
1128  const auto rex = ra_project->getProjectAt(i);
1129  if (auto rex_input = dynamic_cast<const RexInput*>(rex)) {
1130  const auto index = rex_input->getIndex();
1131  CHECK_LT(index, target_exprs.size());
1132  result.push_back(target_exprs[index]);
1133  } else {
1134  scalar_sources.push_back(visitor.visit(rex));
1135  result.push_back(scalar_sources.back().get());
1136  }
1137  }
1138  target_exprs = result;
1139  }
1140  }
1141  last_node = ra_node.get();
1142  continue;
1143  }
1144  const auto ra_aggregate = std::dynamic_pointer_cast<RelAggregate>(ra_node);
1145  if (ra_aggregate) {
1146  is_agg = true;
1147  fields = ra_aggregate->getFields();
1148  agg_exprs = ra_aggregate->getAggregatesAndRelease();
1149  groupby_count = ra_aggregate->getGroupByCount();
1150  decltype(target_exprs){}.swap(target_exprs);
1151  CHECK_LE(groupby_count, scalar_sources.size());
1152  for (size_t group_idx = 0; group_idx < groupby_count; ++group_idx) {
1153  const auto rex_ref = new RexRef(group_idx + 1);
1154  target_exprs.push_back(rex_ref);
1155  scalar_sources.emplace_back(rex_ref);
1156  }
1157  for (const auto rex_agg : agg_exprs) {
1158  target_exprs.push_back(rex_agg);
1159  }
1160  last_node = ra_node.get();
1161  continue;
1162  }
1163  }
1164 
1165  auto compound_node =
1166  std::make_shared<RelCompound>(filter_rex,
1167  target_exprs,
1168  groupby_count,
1169  agg_exprs,
1170  fields,
1171  scalar_sources,
1172  is_agg,
1173  manipulation_target->isUpdateViaSelect(),
1174  manipulation_target->isDeleteViaSelect(),
1175  manipulation_target->isVarlenUpdateRequired(),
1176  manipulation_target->getModifiedTableDescriptor(),
1177  manipulation_target->getTargetColumns());
1178  auto old_node = nodes[pattern.back()];
1179  nodes[pattern.back()] = compound_node;
1180  auto first_node = nodes[pattern.front()];
1181  CHECK_EQ(size_t(1), first_node->inputCount());
1182  compound_node->addManagedInput(first_node->getAndOwnInput(0));
1183  for (size_t i = 0; i < pattern.size() - 1; ++i) {
1184  nodes[pattern[i]].reset();
1185  }
1186  for (auto node : nodes) {
1187  if (!node) {
1188  continue;
1189  }
1190  node->replaceInput(old_node, compound_node);
1191  }
1192 }
1193 
1194 class RANodeIterator : public std::vector<std::shared_ptr<RelAlgNode>>::const_iterator {
1195  using ElementType = std::shared_ptr<RelAlgNode>;
1196  using Super = std::vector<ElementType>::const_iterator;
1197  using Container = std::vector<ElementType>;
1198 
1199  public:
1200  enum class AdvancingMode { DUChain, InOrder };
1201 
1202  explicit RANodeIterator(const Container& nodes)
1203  : Super(nodes.begin()), owner_(nodes), nodeCount_([&nodes]() -> size_t {
1204  size_t non_zero_count = 0;
1205  for (const auto& node : nodes) {
1206  if (node) {
1207  ++non_zero_count;
1208  }
1209  }
1211  }()) {}
1212 
1213  explicit operator size_t() {
1214  return std::distance(owner_.begin(), *static_cast<Super*>(this));
1215  }
1216 
1217  RANodeIterator operator++() = delete;
1218 
1219  void advance(AdvancingMode mode) {
1220  Super& super = *this;
1221  switch (mode) {
1222  case AdvancingMode::DUChain: {
1223  size_t use_count = 0;
1224  Super only_use = owner_.end();
1225  for (Super nodeIt = std::next(super); nodeIt != owner_.end(); ++nodeIt) {
1226  if (!*nodeIt) {
1227  continue;
1228  }
1229  for (size_t i = 0; i < (*nodeIt)->inputCount(); ++i) {
1230  if ((*super) == (*nodeIt)->getAndOwnInput(i)) {
1231  ++use_count;
1232  if (1 == use_count) {
1233  only_use = nodeIt;
1234  } else {
1235  super = owner_.end();
1236  return;
1237  }
1238  }
1239  }
1240  }
1241  super = only_use;
1242  break;
1243  }
1244  case AdvancingMode::InOrder:
1245  for (size_t i = 0; i != owner_.size(); ++i) {
1246  if (!visited_.count(i)) {
1247  super = owner_.begin();
1248  std::advance(super, i);
1249  return;
1250  }
1251  }
1252  super = owner_.end();
1253  break;
1254  default:
1255  CHECK(false);
1256  }
1257  }
1258 
1259  bool allVisited() { return visited_.size() == nodeCount_; }
1260 
1262  visited_.insert(size_t(*this));
1263  Super& super = *this;
1264  return *super;
1265  }
1266 
1267  const ElementType* operator->() { return &(operator*()); }
1268 
1269  private:
1271  const size_t nodeCount_;
1272  std::unordered_set<size_t> visited_;
1273 };
1274 
1275 namespace {
1276 
1277 bool input_can_be_coalesced(const RelAlgNode* parent_node,
1278  const size_t index,
1279  const bool first_rex_is_input) {
1280  if (auto agg_node = dynamic_cast<const RelAggregate*>(parent_node)) {
1281  if (index == 0 && agg_node->getGroupByCount() > 0) {
1282  return true;
1283  } else {
1284  // Is an aggregated target, only allow the project to be elided if the aggregate
1285  // target is simply passed through (i.e. if the top level expression attached to
1286  // the project node is a RexInput expression)
1287  return first_rex_is_input;
1288  }
1289  }
1290  return first_rex_is_input;
1291 }
1292 
1299  public:
1300  bool visitInput(const RexInput* input) const final {
1301  // The top level expression node is checked before we apply the visitor. If we get
1302  // here, this input rex is a child of another rex node, and we handle the can be
1303  // coalesced check slightly differently
1304  return input_can_be_coalesced(input->getSourceNode(), input->getIndex(), false);
1305  }
1306 
1307  bool visitLiteral(const RexLiteral*) const final { return false; }
1308 
1309  bool visitSubQuery(const RexSubQuery*) const final { return false; }
1310 
1311  bool visitRef(const RexRef*) const final { return false; }
1312 
1313  protected:
1314  bool aggregateResult(const bool& aggregate, const bool& next_result) const final {
1315  return aggregate && next_result;
1316  }
1317 
1318  bool defaultResult() const final { return true; }
1319 };
1320 
1321 // Detect the window function SUM pattern: CASE WHEN COUNT() > 0 THEN SUM ELSE 0
1323  const auto case_operator = dynamic_cast<const RexCase*>(rex);
1324  if (case_operator && case_operator->branchCount() == 1) {
1325  const auto then_window =
1326  dynamic_cast<const RexWindowFunctionOperator*>(case_operator->getThen(0));
1327  if (then_window && then_window->getKind() == SqlWindowFunctionKind::SUM_INTERNAL) {
1328  return true;
1329  }
1330  }
1331  return false;
1332 }
1333 
1334 // Detect both window function operators and window function operators embedded in case
1335 // statements (for null handling)
1337  if (dynamic_cast<const RexWindowFunctionOperator*>(rex)) {
1338  return true;
1339  }
1340 
1341  // unwrap from casts, if they exist
1342  const auto rex_cast = dynamic_cast<const RexOperator*>(rex);
1343  if (rex_cast && rex_cast->getOperator() == kCAST) {
1344  CHECK_EQ(rex_cast->size(), size_t(1));
1345  return is_window_function_operator(rex_cast->getOperand(0));
1346  }
1347 
1348  if (is_window_function_sum(rex)) {
1349  return true;
1350  }
1351  // Check for Window Function AVG:
1352  // (CASE WHEN count > 0 THEN sum ELSE 0) / COUNT
1353  const RexOperator* divide_operator = dynamic_cast<const RexOperator*>(rex);
1354  if (divide_operator && divide_operator->getOperator() == kDIVIDE) {
1355  CHECK_EQ(divide_operator->size(), size_t(2));
1356  const auto case_operator =
1357  dynamic_cast<const RexCase*>(divide_operator->getOperand(0));
1358  const auto second_window =
1359  dynamic_cast<const RexWindowFunctionOperator*>(divide_operator->getOperand(1));
1360  if (case_operator && second_window &&
1361  second_window->getKind() == SqlWindowFunctionKind::COUNT) {
1362  if (is_window_function_sum(case_operator)) {
1363  return true;
1364  }
1365  }
1366  }
1367  return false;
1368 }
1369 
1370 inline bool project_has_window_function_input(const RelProject* ra_project) {
1371  for (size_t i = 0; i < ra_project->size(); i++) {
1372  if (is_window_function_operator(ra_project->getProjectAt(i))) {
1373  return true;
1374  }
1375  }
1376  return false;
1377 }
1378 
1379 } // namespace
1380 
1381 void coalesce_nodes(std::vector<std::shared_ptr<RelAlgNode>>& nodes,
1382  const std::vector<const RelAlgNode*>& left_deep_joins) {
1383  enum class CoalesceState { Initial, Filter, FirstProject, Aggregate };
1384  std::vector<size_t> crt_pattern;
1385  CoalesceState crt_state{CoalesceState::Initial};
1386 
1387  auto reset_state = [&crt_pattern, &crt_state]() {
1388  crt_state = CoalesceState::Initial;
1389  decltype(crt_pattern)().swap(crt_pattern);
1390  };
1391 
1392  for (RANodeIterator nodeIt(nodes); !nodeIt.allVisited();) {
1393  const auto ra_node = nodeIt != nodes.end() ? *nodeIt : nullptr;
1394  switch (crt_state) {
1395  case CoalesceState::Initial: {
1396  if (std::dynamic_pointer_cast<const RelFilter>(ra_node) &&
1397  std::find(left_deep_joins.begin(), left_deep_joins.end(), ra_node.get()) ==
1398  left_deep_joins.end()) {
1399  crt_pattern.push_back(size_t(nodeIt));
1400  crt_state = CoalesceState::Filter;
1401  nodeIt.advance(RANodeIterator::AdvancingMode::DUChain);
1402  } else if (std::dynamic_pointer_cast<const RelProject>(ra_node)) {
1403  crt_pattern.push_back(size_t(nodeIt));
1404  crt_state = CoalesceState::FirstProject;
1405  nodeIt.advance(RANodeIterator::AdvancingMode::DUChain);
1406  } else {
1407  nodeIt.advance(RANodeIterator::AdvancingMode::InOrder);
1408  }
1409  break;
1410  }
1411  case CoalesceState::Filter: {
1412  if (auto project_node = std::dynamic_pointer_cast<const RelProject>(ra_node)) {
1413  if (project_has_window_function_input(project_node.get())) {
1414  reset_state();
1415  break;
1416  }
1417  crt_pattern.push_back(size_t(nodeIt));
1418  crt_state = CoalesceState::FirstProject;
1419  nodeIt.advance(RANodeIterator::AdvancingMode::DUChain);
1420  } else {
1421  reset_state();
1422  }
1423  break;
1424  }
1425  case CoalesceState::FirstProject: {
1426  if (std::dynamic_pointer_cast<const RelAggregate>(ra_node)) {
1427  crt_pattern.push_back(size_t(nodeIt));
1428  crt_state = CoalesceState::Aggregate;
1429  nodeIt.advance(RANodeIterator::AdvancingMode::DUChain);
1430  } else {
1431  if (crt_pattern.size() >= 2) {
1432  create_compound(nodes, crt_pattern);
1433  }
1434  reset_state();
1435  }
1436  break;
1437  }
1438  case CoalesceState::Aggregate: {
1439  if (auto project_node = std::dynamic_pointer_cast<const RelProject>(ra_node)) {
1440  // TODO(adb): overloading the simple project terminology again here
1441  bool is_simple_project{true};
1442  for (size_t i = 0; i < project_node->size(); i++) {
1443  const auto scalar_rex = project_node->getProjectAt(i);
1444  // If the top level scalar rex is an input node, we can bypass the visitor
1445  if (auto input_rex = dynamic_cast<const RexInput*>(scalar_rex)) {
1447  input_rex->getSourceNode(), input_rex->getIndex(), true)) {
1448  is_simple_project = false;
1449  break;
1450  }
1451  continue;
1452  }
1453  CoalesceSecondaryProjectVisitor visitor;
1454  if (!visitor.visit(project_node->getProjectAt(i))) {
1455  is_simple_project = false;
1456  break;
1457  }
1458  }
1459  if (is_simple_project) {
1460  crt_pattern.push_back(size_t(nodeIt));
1461  nodeIt.advance(RANodeIterator::AdvancingMode::InOrder);
1462  }
1463  }
1464  CHECK_GE(crt_pattern.size(), size_t(2));
1465  create_compound(nodes, crt_pattern);
1466  reset_state();
1467  break;
1468  }
1469  default:
1470  CHECK(false);
1471  }
1472  }
1473  if (crt_state == CoalesceState::FirstProject || crt_state == CoalesceState::Aggregate) {
1474  if (crt_pattern.size() >= 2) {
1475  create_compound(nodes, crt_pattern);
1476  }
1477  CHECK(!crt_pattern.empty());
1478  }
1479 }
1480 
1488 class WindowFunctionDetectionVisitor : public RexVisitor<const RexScalar*> {
1489  protected:
1490  // Detect embedded window function expressions in operators
1491  const RexScalar* visitOperator(const RexOperator* rex_operator) const final {
1492  if (is_window_function_operator(rex_operator)) {
1493  return rex_operator;
1494  }
1495 
1496  const size_t operand_count = rex_operator->size();
1497  for (size_t i = 0; i < operand_count; ++i) {
1498  const auto operand = rex_operator->getOperand(i);
1499  if (is_window_function_operator(operand)) {
1500  // Handle both RexWindowFunctionOperators and window functions built up from
1501  // multiple RexScalar objects (e.g. AVG)
1502  return operand;
1503  }
1504  const auto operandResult = visit(operand);
1505  if (operandResult) {
1506  return operandResult;
1507  }
1508  }
1509 
1510  return defaultResult();
1511  }
1512 
1513  // Detect embedded window function expressions in case statements. Note that this may
1514  // manifest as a nested case statement inside a top level case statement, as some window
1515  // functions (sum, avg) are represented as a case statement. Use the
1516  // is_window_function_operator helper to detect complete window function expressions.
1517  const RexScalar* visitCase(const RexCase* rex_case) const final {
1518  if (is_window_function_operator(rex_case)) {
1519  return rex_case;
1520  }
1521 
1522  auto result = defaultResult();
1523  for (size_t i = 0; i < rex_case->branchCount(); ++i) {
1524  const auto when = rex_case->getWhen(i);
1525  result = is_window_function_operator(when) ? when : visit(when);
1526  if (result) {
1527  return result;
1528  }
1529  const auto then = rex_case->getThen(i);
1530  result = is_window_function_operator(then) ? then : visit(then);
1531  if (result) {
1532  return result;
1533  }
1534  }
1535  if (rex_case->getElse()) {
1536  auto else_expr = rex_case->getElse();
1537  result = is_window_function_operator(else_expr) ? else_expr : visit(else_expr);
1538  }
1539  return result;
1540  }
1541 
1542  const RexScalar* aggregateResult(const RexScalar* const& aggregate,
1543  const RexScalar* const& next_result) const final {
1544  // all methods calling aggregate result should be overriden
1545  UNREACHABLE();
1546  return nullptr;
1547  }
1548 
1549  const RexScalar* defaultResult() const final { return nullptr; }
1550 };
1551 
1561  public:
1562  RexWindowFuncReplacementVisitor(std::unique_ptr<const RexScalar> replacement_rex)
1563  : replacement_rex_(std::move(replacement_rex)) {}
1564 
1565  ~RexWindowFuncReplacementVisitor() { CHECK(replacement_rex_ == nullptr); }
1566 
1567  protected:
1568  RetType visitOperator(const RexOperator* rex_operator) const final {
1569  if (should_replace_operand(rex_operator)) {
1570  return std::move(replacement_rex_);
1571  }
1572 
1573  const auto rex_window_function_operator =
1574  dynamic_cast<const RexWindowFunctionOperator*>(rex_operator);
1575  if (rex_window_function_operator) {
1576  // Deep copy the embedded window function operator
1577  return visitWindowFunctionOperator(rex_window_function_operator);
1578  }
1579 
1580  const size_t operand_count = rex_operator->size();
1581  std::vector<RetType> new_opnds;
1582  for (size_t i = 0; i < operand_count; ++i) {
1583  const auto operand = rex_operator->getOperand(i);
1584  if (should_replace_operand(operand)) {
1585  new_opnds.push_back(std::move(replacement_rex_));
1586  } else {
1587  new_opnds.emplace_back(visit(rex_operator->getOperand(i)));
1588  }
1589  }
1590  return rex_operator->getDisambiguated(new_opnds);
1591  }
1592 
1593  RetType visitCase(const RexCase* rex_case) const final {
1594  if (should_replace_operand(rex_case)) {
1595  return std::move(replacement_rex_);
1596  }
1597 
1598  std::vector<std::pair<RetType, RetType>> new_pair_list;
1599  for (size_t i = 0; i < rex_case->branchCount(); ++i) {
1600  auto when_operand = rex_case->getWhen(i);
1601  auto then_operand = rex_case->getThen(i);
1602  new_pair_list.emplace_back(
1603  should_replace_operand(when_operand) ? std::move(replacement_rex_)
1604  : visit(when_operand),
1605  should_replace_operand(then_operand) ? std::move(replacement_rex_)
1606  : visit(then_operand));
1607  }
1608  auto new_else = should_replace_operand(rex_case->getElse())
1609  ? std::move(replacement_rex_)
1610  : visit(rex_case->getElse());
1611  return std::make_unique<RexCase>(new_pair_list, new_else);
1612  }
1613 
1614  private:
1615  bool should_replace_operand(const RexScalar* rex) const {
1616  return replacement_rex_ && is_window_function_operator(rex);
1617  }
1618 
1619  mutable std::unique_ptr<const RexScalar> replacement_rex_;
1620 };
1621 
1632  public:
1633  RexInputBackpropagationVisitor(RelProject* node) : node_(node) { CHECK(node_); }
1634 
1635  protected:
1636  RetType visitInput(const RexInput* rex_input) const final {
1637  if (rex_input->getSourceNode() != node_) {
1638  const auto cur_index = rex_input->getIndex();
1639  auto cur_source_node = rex_input->getSourceNode();
1640  std::string field_name = "";
1641  if (auto cur_project_node = dynamic_cast<const RelProject*>(cur_source_node)) {
1642  field_name = cur_project_node->getFieldName(cur_index);
1643  }
1644  node_->appendInput(field_name, rex_input->deepCopy());
1645  return std::make_unique<RexInput>(node_, node_->size() - 1);
1646  } else {
1647  return rex_input->deepCopy();
1648  }
1649  }
1650 
1651  private:
1652  mutable RelProject* node_;
1653 };
1654 
1671  std::vector<std::shared_ptr<RelAlgNode>>& nodes) {
1672  std::list<std::shared_ptr<RelAlgNode>> node_list(nodes.begin(), nodes.end());
1673 
1675  for (auto node_itr = node_list.begin(); node_itr != node_list.end(); ++node_itr) {
1676  const auto node = *node_itr;
1677  auto window_func_project_node = std::dynamic_pointer_cast<RelProject>(node);
1678  if (!window_func_project_node) {
1679  continue;
1680  }
1681 
1682  // map scalar expression index in the project node to wiondow function ptr
1683  std::unordered_map<size_t, const RexScalar*> embedded_window_function_expressions;
1684 
1685  // Iterate the target exprs of the project node and check for window function
1686  // expressions. If an embedded expression exists, save it in the
1687  // embedded_window_function_expressions map and split the expression into a window
1688  // function expression and a parent expression in a subsequent project node
1689  for (size_t i = 0; i < window_func_project_node->size(); i++) {
1690  const auto scalar_rex = window_func_project_node->getProjectAt(i);
1691  if (is_window_function_operator(scalar_rex)) {
1692  // top level window function exprs are fine
1693  continue;
1694  }
1695 
1696  if (const auto window_func_rex = visitor.visit(scalar_rex)) {
1697  const auto ret = embedded_window_function_expressions.insert(
1698  std::make_pair(i, window_func_rex));
1699  CHECK(ret.second);
1700  }
1701  }
1702 
1703  if (!embedded_window_function_expressions.empty()) {
1704  std::vector<std::unique_ptr<const RexScalar>> new_scalar_exprs;
1705 
1706  auto window_func_scalar_exprs =
1707  window_func_project_node->getExpressionsAndRelease();
1708  for (size_t rex_idx = 0; rex_idx < window_func_scalar_exprs.size(); ++rex_idx) {
1709  const auto embedded_window_func_expr_pair =
1710  embedded_window_function_expressions.find(rex_idx);
1711  if (embedded_window_func_expr_pair ==
1712  embedded_window_function_expressions.end()) {
1713  new_scalar_exprs.emplace_back(
1714  std::make_unique<const RexInput>(window_func_project_node.get(), rex_idx));
1715  } else {
1716  const auto window_func_rex_idx = embedded_window_func_expr_pair->first;
1717  CHECK_LT(window_func_rex_idx, window_func_scalar_exprs.size());
1718 
1719  const auto& window_func_rex = embedded_window_func_expr_pair->second;
1720 
1721  RexDeepCopyVisitor copier;
1722  auto window_func_rex_copy = copier.visit(window_func_rex);
1723 
1724  auto window_func_parent_expr =
1725  window_func_scalar_exprs[window_func_rex_idx].get();
1726 
1727  // Replace window func rex with an input rex
1728  auto window_func_result_input = std::make_unique<const RexInput>(
1729  window_func_project_node.get(), window_func_rex_idx);
1730  RexWindowFuncReplacementVisitor replacer(std::move(window_func_result_input));
1731  auto new_parent_rex = replacer.visit(window_func_parent_expr);
1732 
1733  // Put the parent expr in the new scalar exprs
1734  new_scalar_exprs.emplace_back(std::move(new_parent_rex));
1735 
1736  // Put the window func expr in cur scalar exprs
1737  window_func_scalar_exprs[window_func_rex_idx] = std::move(window_func_rex_copy);
1738  }
1739  }
1740 
1741  CHECK_EQ(window_func_scalar_exprs.size(), new_scalar_exprs.size());
1742  window_func_project_node->setExpressions(window_func_scalar_exprs);
1743 
1744  // Ensure any inputs from the node containing the expression (the "new" node)
1745  // exist on the window function project node, e.g. if we had a binary operation
1746  // involving an aggregate value or column not included in the top level
1747  // projection list.
1748  RexInputBackpropagationVisitor input_visitor(window_func_project_node.get());
1749  for (size_t i = 0; i < new_scalar_exprs.size(); i++) {
1750  if (dynamic_cast<const RexInput*>(new_scalar_exprs[i].get())) {
1751  // ignore top level inputs, these were copied directly from the previous
1752  // node
1753  continue;
1754  }
1755  new_scalar_exprs[i] = input_visitor.visit(new_scalar_exprs[i].get());
1756  }
1757 
1758  // Build the new project node and insert it into the list after the project node
1759  // containing the window function
1760  auto new_project =
1761  std::make_shared<RelProject>(new_scalar_exprs,
1762  window_func_project_node->getFields(),
1763  window_func_project_node);
1764  node_list.insert(std::next(node_itr), new_project);
1765 
1766  // Rebind all the following inputs
1767  for (auto rebind_itr = std::next(node_itr, 2); rebind_itr != node_list.end();
1768  rebind_itr++) {
1769  (*rebind_itr)->replaceInput(window_func_project_node, new_project);
1770  }
1771  }
1772  }
1773  nodes.assign(node_list.begin(), node_list.end());
1774 }
1775 
1776 int64_t get_int_literal_field(const rapidjson::Value& obj,
1777  const char field[],
1778  const int64_t default_val) noexcept {
1779  const auto it = obj.FindMember(field);
1780  if (it == obj.MemberEnd()) {
1781  return default_val;
1782  }
1783  std::unique_ptr<RexLiteral> lit(parse_literal(it->value));
1784  CHECK_EQ(kDECIMAL, lit->getType());
1785  CHECK_EQ(unsigned(0), lit->getScale());
1786  CHECK_EQ(unsigned(0), lit->getTypeScale());
1787  return lit->getVal<int64_t>();
1788 }
1789 
1790 void check_empty_inputs_field(const rapidjson::Value& node) noexcept {
1791  const auto& inputs_json = field(node, "inputs");
1792  CHECK(inputs_json.IsArray() && !inputs_json.Size());
1793 }
1794 
1796  const rapidjson::Value& scan_ra) {
1797  const auto& table_json = field(scan_ra, "table");
1798  CHECK(table_json.IsArray());
1799  CHECK_EQ(unsigned(2), table_json.Size());
1800  const auto td = cat.getMetadataForTable(table_json[1].GetString());
1801  CHECK(td);
1802  return td;
1803 }
1804 
1805 std::vector<std::string> getFieldNamesFromScanNode(const rapidjson::Value& scan_ra) {
1806  const auto& fields_json = field(scan_ra, "fieldNames");
1807  return strings_from_json_array(fields_json);
1808 }
1809 
1810 } // namespace
1811 
1812 namespace details {
1813 
1815  public:
1817 
1818  std::vector<std::shared_ptr<RelAlgNode>> run(const rapidjson::Value& rels,
1819  RelAlgDagBuilder& root_dag_builder) {
1820  for (auto rels_it = rels.Begin(); rels_it != rels.End(); ++rels_it) {
1821  const auto& crt_node = *rels_it;
1822  const auto id = node_id(crt_node);
1823  CHECK_EQ(static_cast<size_t>(id), nodes_.size());
1824  CHECK(crt_node.IsObject());
1825  std::shared_ptr<RelAlgNode> ra_node = nullptr;
1826  const auto rel_op = json_str(field(crt_node, "relOp"));
1827  if (rel_op == std::string("EnumerableTableScan")) {
1828  ra_node = dispatchTableScan(crt_node);
1829  } else if (rel_op == std::string("LogicalProject")) {
1830  ra_node = dispatchProject(crt_node, root_dag_builder);
1831  } else if (rel_op == std::string("LogicalFilter")) {
1832  ra_node = dispatchFilter(crt_node, root_dag_builder);
1833  } else if (rel_op == std::string("LogicalAggregate")) {
1834  ra_node = dispatchAggregate(crt_node);
1835  } else if (rel_op == std::string("LogicalJoin")) {
1836  ra_node = dispatchJoin(crt_node, root_dag_builder);
1837  } else if (rel_op == std::string("LogicalSort")) {
1838  ra_node = dispatchSort(crt_node);
1839  } else if (rel_op == std::string("LogicalValues")) {
1840  ra_node = dispatchLogicalValues(crt_node);
1841  } else if (rel_op == std::string("LogicalTableModify")) {
1842  ra_node = dispatchModify(crt_node);
1843  } else if (rel_op == std::string("LogicalTableFunctionScan")) {
1844  ra_node = dispatchTableFunction(crt_node, root_dag_builder);
1845  } else {
1846  throw QueryNotSupported(std::string("Node ") + rel_op + " not supported yet");
1847  }
1848  nodes_.push_back(ra_node);
1849  }
1850 
1851  return std::move(nodes_);
1852  }
1853 
1854  private:
1855  std::shared_ptr<RelScan> dispatchTableScan(const rapidjson::Value& scan_ra) {
1856  check_empty_inputs_field(scan_ra);
1857  CHECK(scan_ra.IsObject());
1858  const auto td = getTableFromScanNode(cat_, scan_ra);
1859  const auto field_names = getFieldNamesFromScanNode(scan_ra);
1860  return std::make_shared<RelScan>(td, field_names);
1861  }
1862 
1863  std::shared_ptr<RelProject> dispatchProject(const rapidjson::Value& proj_ra,
1864  RelAlgDagBuilder& root_dag_builder) {
1865  const auto inputs = getRelAlgInputs(proj_ra);
1866  CHECK_EQ(size_t(1), inputs.size());
1867  const auto& exprs_json = field(proj_ra, "exprs");
1868  CHECK(exprs_json.IsArray());
1869  std::vector<std::unique_ptr<const RexScalar>> exprs;
1870  for (auto exprs_json_it = exprs_json.Begin(); exprs_json_it != exprs_json.End();
1871  ++exprs_json_it) {
1872  exprs.emplace_back(parse_scalar_expr(*exprs_json_it, cat_, root_dag_builder));
1873  }
1874  const auto& fields = field(proj_ra, "fields");
1875  return std::make_shared<RelProject>(
1876  exprs, strings_from_json_array(fields), inputs.front());
1877  }
1878 
1879  std::shared_ptr<RelFilter> dispatchFilter(const rapidjson::Value& filter_ra,
1880  RelAlgDagBuilder& root_dag_builder) {
1881  const auto inputs = getRelAlgInputs(filter_ra);
1882  CHECK_EQ(size_t(1), inputs.size());
1883  const auto id = node_id(filter_ra);
1884  CHECK(id);
1885  auto condition =
1886  parse_scalar_expr(field(filter_ra, "condition"), cat_, root_dag_builder);
1887  return std::make_shared<RelFilter>(condition, inputs.front());
1888  }
1889 
1890  std::shared_ptr<RelAggregate> dispatchAggregate(const rapidjson::Value& agg_ra) {
1891  const auto inputs = getRelAlgInputs(agg_ra);
1892  CHECK_EQ(size_t(1), inputs.size());
1893  const auto fields = strings_from_json_array(field(agg_ra, "fields"));
1894  const auto group = indices_from_json_array(field(agg_ra, "group"));
1895  for (size_t i = 0; i < group.size(); ++i) {
1896  CHECK_EQ(i, group[i]);
1897  }
1898  if (agg_ra.HasMember("groups") || agg_ra.HasMember("indicator")) {
1899  throw QueryNotSupported("GROUP BY extensions not supported");
1900  }
1901  const auto& aggs_json_arr = field(agg_ra, "aggs");
1902  CHECK(aggs_json_arr.IsArray());
1903  std::vector<std::unique_ptr<const RexAgg>> aggs;
1904  for (auto aggs_json_arr_it = aggs_json_arr.Begin();
1905  aggs_json_arr_it != aggs_json_arr.End();
1906  ++aggs_json_arr_it) {
1907  aggs.emplace_back(parse_aggregate_expr(*aggs_json_arr_it));
1908  }
1909  return std::make_shared<RelAggregate>(group.size(), aggs, fields, inputs.front());
1910  }
1911 
1912  std::shared_ptr<RelJoin> dispatchJoin(const rapidjson::Value& join_ra,
1913  RelAlgDagBuilder& root_dag_builder) {
1914  const auto inputs = getRelAlgInputs(join_ra);
1915  CHECK_EQ(size_t(2), inputs.size());
1916  const auto join_type = to_join_type(json_str(field(join_ra, "joinType")));
1917  auto filter_rex =
1918  parse_scalar_expr(field(join_ra, "condition"), cat_, root_dag_builder);
1919  return std::make_shared<RelJoin>(inputs[0], inputs[1], filter_rex, join_type);
1920  }
1921 
1922  std::shared_ptr<RelSort> dispatchSort(const rapidjson::Value& sort_ra) {
1923  const auto inputs = getRelAlgInputs(sort_ra);
1924  CHECK_EQ(size_t(1), inputs.size());
1925  std::vector<SortField> collation;
1926  const auto& collation_arr = field(sort_ra, "collation");
1927  CHECK(collation_arr.IsArray());
1928  for (auto collation_arr_it = collation_arr.Begin();
1929  collation_arr_it != collation_arr.End();
1930  ++collation_arr_it) {
1931  const size_t field_idx = json_i64(field(*collation_arr_it, "field"));
1932  const auto sort_dir = parse_sort_direction(*collation_arr_it);
1933  const auto null_pos = parse_nulls_position(*collation_arr_it);
1934  collation.emplace_back(field_idx, sort_dir, null_pos);
1935  }
1936  auto limit = get_int_literal_field(sort_ra, "fetch", -1);
1937  const auto offset = get_int_literal_field(sort_ra, "offset", 0);
1938  auto ret = std::make_shared<RelSort>(
1939  collation, limit > 0 ? limit : 0, offset, inputs.front());
1940  ret->setEmptyResult(limit == 0);
1941  return ret;
1942  }
1943 
1944  std::shared_ptr<RelModify> dispatchModify(const rapidjson::Value& logical_modify_ra) {
1945  const auto inputs = getRelAlgInputs(logical_modify_ra);
1946  CHECK_EQ(size_t(1), inputs.size());
1947 
1948  const auto table_descriptor = getTableFromScanNode(cat_, logical_modify_ra);
1949  if (table_descriptor->isView) {
1950  throw std::runtime_error("UPDATE of a view is unsupported.");
1951  }
1952 
1953  bool flattened = json_bool(field(logical_modify_ra, "flattened"));
1954  std::string op = json_str(field(logical_modify_ra, "operation"));
1955  RelModify::TargetColumnList target_column_list;
1956 
1957  if (op == "UPDATE") {
1958  const auto& update_columns = field(logical_modify_ra, "updateColumnList");
1959  CHECK(update_columns.IsArray());
1960 
1961  for (auto column_arr_it = update_columns.Begin();
1962  column_arr_it != update_columns.End();
1963  ++column_arr_it) {
1964  target_column_list.push_back(column_arr_it->GetString());
1965  }
1966  }
1967 
1968  auto modify_node = std::make_shared<RelModify>(
1969  cat_, table_descriptor, flattened, op, target_column_list, inputs[0]);
1970  switch (modify_node->getOperation()) {
1972  modify_node->applyDeleteModificationsToInputNode();
1973  break;
1974  }
1976  modify_node->applyUpdateModificationsToInputNode();
1977  }
1978  default:
1979  break;
1980  }
1981 
1982  return modify_node;
1983  }
1984 
1985  std::shared_ptr<RelTableFunction> dispatchTableFunction(
1986  const rapidjson::Value& table_func_ra,
1987  RelAlgDagBuilder& root_dag_builder) {
1988  const auto inputs = getRelAlgInputs(table_func_ra);
1989  CHECK_EQ(size_t(1), inputs.size());
1990 
1991  const auto& invocation = field(table_func_ra, "invocation");
1992  CHECK(invocation.IsObject());
1993 
1994  const auto& operands = field(invocation, "operands");
1995  CHECK(operands.IsArray());
1996  CHECK_GE(operands.Size(), unsigned(0));
1997 
1998  std::vector<const Rex*> col_inputs;
1999  std::vector<std::unique_ptr<const RexScalar>> table_func_inputs;
2000  std::vector<std::string> fields;
2001 
2002  for (auto exprs_json_it = operands.Begin(); exprs_json_it != operands.End();
2003  ++exprs_json_it) {
2004  const auto& expr_json = *exprs_json_it;
2005  CHECK(expr_json.IsObject());
2006 
2007  if (expr_json.HasMember("op")) {
2008  const auto op_str = json_str(field(expr_json, "op"));
2009  if (op_str == "CAST" && expr_json.HasMember("type")) {
2010  const auto& expr_type = field(expr_json, "type");
2011  CHECK(expr_type.IsObject());
2012  CHECK(expr_type.HasMember("type"));
2013  const auto& expr_type_name = json_str(field(expr_type, "type"));
2014  if (expr_type_name == "CURSOR") {
2015  CHECK(expr_json.HasMember("operands"));
2016  const auto& expr_operands = field(expr_json, "operands");
2017  CHECK(expr_operands.IsArray());
2018  if (expr_operands.Size() != 1) {
2019  throw std::runtime_error(
2020  "Table functions currently only support one ResultSet input");
2021  }
2022 
2023  CHECK(expr_json.HasMember("type"));
2024  const auto& expr_types = field(invocation, "type");
2025  CHECK(expr_types.IsArray());
2026 
2027  const auto prior_node = prev(table_func_ra);
2028  CHECK(prior_node);
2029  CHECK_EQ(prior_node->size(), expr_types.Size());
2030 
2031  // Forward the values from the prior node as RexInputs
2032  for (size_t i = 0; i < prior_node->size(); i++) {
2033  table_func_inputs.emplace_back(std::make_unique<RexAbstractInput>(i));
2034  col_inputs.emplace_back(table_func_inputs.back().get());
2035  }
2036  continue;
2037  }
2038  }
2039  }
2040  table_func_inputs.emplace_back(
2041  parse_scalar_expr(*exprs_json_it, cat_, root_dag_builder));
2042  }
2043 
2044  const auto& op_name = field(invocation, "op");
2045  CHECK(op_name.IsString());
2046 
2047  std::vector<std::unique_ptr<const RexScalar>> table_function_projected_outputs;
2048  const auto& row_types = field(table_func_ra, "rowType");
2049  CHECK(row_types.IsArray());
2050  CHECK_GE(row_types.Size(), unsigned(0));
2051  const auto& row_types_array = row_types.GetArray();
2052 
2053  for (size_t i = 0; i < row_types_array.Size(); i++) {
2054  // We don't care about the type information in rowType -- replace each output with a
2055  // reference to be resolved later in the translator
2056  table_function_projected_outputs.emplace_back(std::make_unique<RexRef>(i));
2057  fields.emplace_back("");
2058  }
2059 
2060  return std::make_shared<RelTableFunction>(op_name.GetString(),
2061  inputs[0],
2062  fields,
2063  col_inputs,
2064  table_func_inputs,
2065  table_function_projected_outputs);
2066  }
2067 
2068  std::shared_ptr<RelLogicalValues> dispatchLogicalValues(
2069  const rapidjson::Value& logical_values_ra) {
2070  const auto& tuple_type_arr = field(logical_values_ra, "type");
2071  CHECK(tuple_type_arr.IsArray());
2072  std::vector<TargetMetaInfo> tuple_type;
2073  for (auto tuple_type_arr_it = tuple_type_arr.Begin();
2074  tuple_type_arr_it != tuple_type_arr.End();
2075  ++tuple_type_arr_it) {
2076  const auto component_type = parse_type(*tuple_type_arr_it);
2077  const auto component_name = json_str(field(*tuple_type_arr_it, "name"));
2078  tuple_type.emplace_back(component_name, component_type);
2079  }
2080  const auto& inputs_arr = field(logical_values_ra, "inputs");
2081  CHECK(inputs_arr.IsArray());
2082  const auto& tuples_arr = field(logical_values_ra, "tuples");
2083  CHECK(tuples_arr.IsArray());
2084 
2085  if (inputs_arr.Size() || tuples_arr.Size()) {
2086  throw QueryNotSupported("Non-empty LogicalValues not supported yet");
2087  }
2088  return std::make_shared<RelLogicalValues>(tuple_type);
2089  }
2090 
2091  std::vector<std::shared_ptr<const RelAlgNode>> getRelAlgInputs(
2092  const rapidjson::Value& node) {
2093  if (node.HasMember("inputs")) {
2094  const auto str_input_ids = strings_from_json_array(field(node, "inputs"));
2095  std::vector<std::shared_ptr<const RelAlgNode>> ra_inputs;
2096  for (const auto str_id : str_input_ids) {
2097  ra_inputs.push_back(nodes_[std::stoi(str_id)]);
2098  }
2099  return ra_inputs;
2100  }
2101  return {prev(node)};
2102  }
2103 
2104  std::shared_ptr<const RelAlgNode> prev(const rapidjson::Value& crt_node) {
2105  const auto id = node_id(crt_node);
2106  CHECK(id);
2107  CHECK_EQ(static_cast<size_t>(id), nodes_.size());
2108  return nodes_.back();
2109  }
2110 
2112  std::vector<std::shared_ptr<RelAlgNode>> nodes_;
2113 };
2114 
2115 } // namespace details
2116 
2117 RelAlgDagBuilder::RelAlgDagBuilder(const std::string& query_ra,
2118  const Catalog_Namespace::Catalog& cat,
2119  const RenderInfo* render_info)
2120  : cat_(cat), render_info_(render_info) {
2121  rapidjson::Document query_ast;
2122  query_ast.Parse(query_ra.c_str());
2123  if (query_ast.HasParseError()) {
2124  query_ast.GetParseError();
2125  LOG(ERROR) << "Failed to parse RA tree from Calcite (offset "
2126  << query_ast.GetErrorOffset() << "):\n"
2127  << rapidjson::GetParseError_En(query_ast.GetParseError());
2128  VLOG(1) << "Failed to parse query RA: " << query_ra;
2129  throw std::runtime_error(
2130  "Failed to parse relational algebra tree. Possible query syntax error.");
2131  }
2132  CHECK(query_ast.IsObject());
2134  build(query_ast, *this);
2135 }
2136 
2138  const rapidjson::Value& query_ast,
2139  const Catalog_Namespace::Catalog& cat,
2140  const RenderInfo* render_info)
2141  : cat_(cat), render_info_(render_info) {
2142  build(query_ast, root_dag_builder);
2143 }
2144 
2145 void RelAlgDagBuilder::build(const rapidjson::Value& query_ast,
2146  RelAlgDagBuilder& lead_dag_builder) {
2147  const auto& rels = field(query_ast, "rels");
2148  CHECK(rels.IsArray());
2149  try {
2150  nodes_ = details::RelAlgDispatcher(cat_).run(rels, lead_dag_builder);
2151  } catch (const QueryNotSupported&) {
2152  throw;
2153  }
2154  CHECK(!nodes_.empty());
2156 
2157  if (render_info_) {
2158  // Alter the RA for render. Do this before any flattening/optimizations are done to
2159  // the tree.
2161  }
2162 
2163  mark_nops(nodes_);
2168  std::vector<const RelAlgNode*> filtered_left_deep_joins;
2169  std::vector<const RelAlgNode*> left_deep_joins;
2170  for (const auto& node : nodes_) {
2171  const auto left_deep_join_root = get_left_deep_join_root(node);
2172  // The filter which starts a left-deep join pattern must not be coalesced
2173  // since it contains (part of) the join condition.
2174  if (left_deep_join_root) {
2175  left_deep_joins.push_back(left_deep_join_root.get());
2176  if (std::dynamic_pointer_cast<const RelFilter>(left_deep_join_root)) {
2177  filtered_left_deep_joins.push_back(left_deep_join_root.get());
2178  }
2179  }
2180  }
2181  if (filtered_left_deep_joins.empty()) {
2183  }
2184  eliminate_dead_columns(nodes_);
2186  coalesce_nodes(nodes_, left_deep_joins);
2187  CHECK(nodes_.back().unique());
2188  create_left_deep_join(nodes_);
2189 }
2190 
2192  for (auto& node : nodes_) {
2193  if (node) {
2194  node->resetQueryExecutionState();
2195  }
2196  }
2197 }
2198 
2199 // Prints the relational algebra as a tree; useful for debugging.
2200 std::string tree_string(const RelAlgNode* ra, const size_t indent) {
2201  std::string result = std::string(indent, ' ') + ra->toString() + "\n";
2202  for (size_t i = 0; i < ra->inputCount(); ++i) {
2203  result += tree_string(ra->getInput(i), indent + 2);
2204  }
2205  return result;
2206 }
SQLTypes to_sql_type(const std::string &type_name)
std::vector< std::shared_ptr< const RelAlgNode > > inputs_
std::shared_ptr< const RelAlgNode > getRootNodeShPtr() const
bool is_agg(const Analyzer::Expr *expr)
std::unique_ptr< const RexScalar > condition_
SQLTypeInfo parse_type(const rapidjson::Value &type_obj)
const RexScalar * getThen(const size_t idx) const
std::vector< std::string > strings_from_json_array(const rapidjson::Value &json_str_arr) noexcept
std::unique_ptr< RexCase > parse_case(const rapidjson::Value &expr, const Catalog_Namespace::Catalog &cat, RelAlgDagBuilder &root_dag_builder)
std::shared_ptr< RelAggregate > dispatchAggregate(const rapidjson::Value &agg_ra)
#define CHECK_EQ(x, y)
Definition: Logger.h:201
JoinType to_join_type(const std::string &join_type_name)
const Catalog_Namespace::Catalog & cat_
const size_t limit_
std::shared_ptr< RelProject > dispatchProject(const rapidjson::Value &proj_ra, RelAlgDagBuilder &root_dag_builder)
std::unique_ptr< RexSubQuery > deepCopy() const
void replaceInput(std::shared_ptr< const RelAlgNode > old_input, std::shared_ptr< const RelAlgNode > input) override
JoinType
Definition: sqldefs.h:107
std::vector< std::unique_ptr< const RexScalar > > table_func_inputs_
void bind_inputs(const std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
void hoist_filter_cond_to_cross_join(std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
class for a per-database catalog. also includes metadata for the current database and the current use...
Definition: Catalog.h:81
Definition: sqltypes.h:52
size_t size() const override
std::shared_ptr< const RelAlgNode > get_left_deep_join_root(const std::shared_ptr< RelAlgNode > &node)
void sink_projected_boolean_expr_to_join(std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
std::unique_ptr< const RexAgg > parse_aggregate_expr(const rapidjson::Value &expr)
void eliminate_identical_copy(std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
const RexScalar * getElse() const
static thread_local unsigned crt_id_
std::string function_name_
std::shared_ptr< RelScan > dispatchTableScan(const rapidjson::Value &scan_ra)
std::pair< std::shared_ptr< RelLeftDeepInnerJoin >, std::shared_ptr< const RelAlgNode > > create_left_deep_join(const std::shared_ptr< RelAlgNode > &left_deep_join_root)
void replaceInput(std::shared_ptr< const RelAlgNode > old_input, std::shared_ptr< const RelAlgNode > input) override
SQLAgg to_agg_kind(const std::string &agg_name)
const JoinType join_type_
#define LOG(tag)
Definition: Logger.h:188
NullSortedPosition
std::vector< std::string > TargetColumnList
size_t size() const
const bool json_bool(const rapidjson::Value &obj) noexcept
Definition: JsonAccessors.h:49
const RexScalar * getOperand(const size_t idx) const
std::vector< const Rex * > col_inputs_
bool hasEquivCollationOf(const RelSort &that) const
const std::vector< std::string > fields_
const std::string json_str(const rapidjson::Value &obj) noexcept
Definition: JsonAccessors.h:44
void build(const rapidjson::Value &query_ast, RelAlgDagBuilder &root_dag_builder)
RexWindowFunctionOperator::RexWindowBound parse_window_bound(const rapidjson::Value &window_bound_obj, const Catalog_Namespace::Catalog &cat, RelAlgDagBuilder &root_dag_builder)
std::string join(T const &container, std::string const &delim)
void bind_table_func_to_input(RelTableFunction *table_func_node, const RANodeOutput &input) noexcept
std::unique_ptr< RexOperator > parse_operator(const rapidjson::Value &expr, const Catalog_Namespace::Catalog &cat, RelAlgDagBuilder &root_dag_builder)
#define UNREACHABLE()
Definition: Logger.h:237
bool isRenamedInput(const RelAlgNode *node, const size_t index, const std::string &new_name)
#define CHECK_GE(x, y)
Definition: Logger.h:206
std::vector< std::string > fields_
Definition: sqldefs.h:49
const RexScalar * getWhen(const size_t idx) const
void appendInput(std::string new_field_name, std::unique_ptr< const RexScalar > new_input)
RetType visitOperator(const RexOperator *rex_operator) const final
void bind_project_to_input(RelProject *project_node, const RANodeOutput &input) noexcept
const Catalog_Namespace::Catalog & cat_
std::vector< std::unique_ptr< const RexScalar > > parse_window_order_exprs(const rapidjson::Value &arr, const Catalog_Namespace::Catalog &cat, RelAlgDagBuilder &root_dag_builder)
void set_scale(int s)
Definition: sqltypes.h:421
std::vector< std::unique_ptr< const RexScalar > > scalar_sources_
const size_t groupby_count_
SqlWindowFunctionKind parse_window_function_kind(const std::string &name)
void simplify_sort(std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
std::vector< SortField > collation_
RexRebindInputsVisitor(const RelAlgNode *old_input, const RelAlgNode *new_input)
int64_t get_int_literal_field(const rapidjson::Value &obj, const char field[], const int64_t default_val) noexcept
std::vector< std::unique_ptr< const RexScalar > > parse_expr_array(const rapidjson::Value &arr, const Catalog_Namespace::Catalog &cat, RelAlgDagBuilder &root_dag_builder)
This file contains the class specification and related data structures for Catalog.
virtual T visit(const RexScalar *rex_scalar) const
Definition: RexVisitor.h:27
RexWindowFuncReplacementVisitor(std::unique_ptr< const RexScalar > replacement_rex)
const RenderInfo * render_info_
const rapidjson::Value & field(const rapidjson::Value &obj, const char field[]) noexcept
Definition: JsonAccessors.h:31
const RexScalar * visitOperator(const RexOperator *rex_operator) const final
void set_precision(int d)
Definition: sqltypes.h:419
SQLOps getOperator() const
std::vector< std::shared_ptr< RelAlgNode > > nodes_
CHECK(cgen_state)
const TableDescriptor * getTableFromScanNode(const Catalog_Namespace::Catalog &cat, const rapidjson::Value &scan_ra)
SortDirection parse_sort_direction(const rapidjson::Value &collation)
std::shared_ptr< RelAlgNode > deepCopy() const override
std::unique_ptr< const RexScalar > disambiguate_rex(const RexScalar *, const RANodeOutput &)
const size_t offset_
bool isRenaming() const
std::vector< SortField > parse_window_order_collation(const rapidjson::Value &arr, const Catalog_Namespace::Catalog &cat, RelAlgDagBuilder &root_dag_builder)
SQLOps to_sql_op(const std::string &op_str)
const int64_t json_i64(const rapidjson::Value &obj) noexcept
Definition: JsonAccessors.h:39
void * visitInput(const RexInput *rex_input) const override
std::vector< std::unique_ptr< const RexScalar > > scalar_exprs_
const double json_double(const rapidjson::Value &obj) noexcept
Definition: JsonAccessors.h:54
std::shared_ptr< RelAlgNode > deepCopy() const override
size_t branchCount() const
const RelAlgNode * getInput(const size_t idx) const
std::vector< std::shared_ptr< const RelAlgNode > > getRelAlgInputs(const rapidjson::Value &node)
std::shared_ptr< RelAlgNode > deepCopy() const override
std::vector< std::shared_ptr< RelAlgNode > > nodes_
std::unique_ptr< const RexScalar > filter_
bool isSimple() const
RexInputReplacementVisitor(const RelAlgNode *node_to_keep, const std::vector< std::unique_ptr< const RexScalar >> &scalar_sources)
std::tuple< const rapidjson::Value *, SQLTypeInfo, SQLTypeInfo > parse_literal(const rapidjson::Value &expr)
const size_t groupby_count_
std::vector< std::string > fields_
virtual void replaceInput(std::shared_ptr< const RelAlgNode > old_input, std::shared_ptr< const RelAlgNode > input)
NullSortedPosition parse_nulls_position(const rapidjson::Value &collation)
std::shared_ptr< RelFilter > dispatchFilter(const rapidjson::Value &filter_ra, RelAlgDagBuilder &root_dag_builder)
std::unique_ptr< RexAbstractInput > parse_abstract_input(const rapidjson::Value &expr) noexcept
std::vector< std::unique_ptr< const RexAgg > > agg_exprs_
std::set< std::pair< const RelAlgNode *, int > > get_equiv_cols(const RelAlgNode *node, const size_t which_col)
void replaceInput(std::shared_ptr< const RelAlgNode > old_input, std::shared_ptr< const RelAlgNode > input) override
std::shared_ptr< RelAlgNode > deepCopy() const override
bool isEmptyResult() const
SortDirection
const RexScalar * getProjectAt(const size_t idx) const
#define CHECK_LT(x, y)
Definition: Logger.h:203
Definition: sqltypes.h:55
Definition: sqltypes.h:56
std::unique_ptr< const RexOperator > disambiguate_operator(const RexOperator *rex_operator, const RANodeOutput &ra_output) noexcept
const ConstRexScalarPtrVector & getPartitionKeys() const
#define CHECK_LE(x, y)
Definition: Logger.h:204
const std::vector< const Rex * > target_exprs_
std::vector< ElementType >::const_iterator Super
void replaceInput(std::shared_ptr< const RelAlgNode > old_input, std::shared_ptr< const RelAlgNode > input) override
std::vector< std::unique_ptr< const RexAgg > > agg_exprs_
std::unique_ptr< const RexCase > disambiguate_case(const RexCase *rex_case, const RANodeOutput &ra_output)
std::shared_ptr< RelJoin > dispatchJoin(const rapidjson::Value &join_ra, RelAlgDagBuilder &root_dag_builder)
void separate_window_function_expressions(std::vector< std::shared_ptr< RelAlgNode >> &nodes)
std::unique_ptr< const RexScalar > filter_expr_
void setSourceNode(const RelAlgNode *node) const
const RexScalar * aggregateResult(const RexScalar *const &aggregate, const RexScalar *const &next_result) const final
std::shared_ptr< RelModify > dispatchModify(const rapidjson::Value &logical_modify_ra)
virtual size_t size() const =0
const RelAlgNode * getSourceNode() const
void registerSubquery(std::shared_ptr< RexSubQuery > subquery)
void setExecutionResult(const std::shared_ptr< const ExecutionResult > result)
std::string tree_string(const RelAlgNode *ra, const size_t indent)
RelAlgDagBuilder()=delete
SqlWindowFunctionKind
Definition: sqldefs.h:82
void mark_nops(const std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
Definition: sqldefs.h:53
std::shared_ptr< RelSort > dispatchSort(const rapidjson::Value &sort_ra)
const std::vector< std::string > & getFields() const
std::vector< size_t > indices_from_json_array(const rapidjson::Value &json_idx_arr) noexcept
virtual std::string toString() const =0
std::unique_ptr< const RexScalar > parse_scalar_expr(const rapidjson::Value &expr, const Catalog_Namespace::Catalog &cat, RelAlgDagBuilder &root_dag_builder)
unsigned node_id(const rapidjson::Value &ra_node) noexcept
RANodeOutput get_node_output(const RelAlgNode *ra_node)
std::vector< std::string > getFieldNamesFromScanNode(const rapidjson::Value &scan_ra)
void create_compound(std::vector< std::shared_ptr< RelAlgNode >> &nodes, const std::vector< size_t > &pattern) noexcept
void alterRAForRender(std::vector< std::shared_ptr< RelAlgNode >> &nodes, const RenderInfo &render_info)
std::shared_ptr< const RelAlgNode > prev(const rapidjson::Value &crt_node)
void replaceInput(std::shared_ptr< const RelAlgNode > old_input, std::shared_ptr< const RelAlgNode > input) override
std::vector< std::shared_ptr< RelAlgNode > > run(const rapidjson::Value &rels, RelAlgDagBuilder &root_dag_builder)
RelAlgDispatcher(const Catalog_Namespace::Catalog &cat)
void fold_filters(std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
const TableDescriptor * getMetadataForTable(const std::string &tableName, const bool populateFragmenter=true) const
Returns a pointer to a const TableDescriptor struct matching the provided tableName.
std::vector< RexInput > RANodeOutput
const size_t inputCount() const
specifies the content in-memory of a row in the table metadata table
void rebind_inputs_from_left_deep_join(const RexScalar *rex, const RelLeftDeepInnerJoin *left_deep_join)
std::unique_ptr< const RexSubQuery > parse_subquery(const rapidjson::Value &expr, const Catalog_Namespace::Catalog &cat, RelAlgDagBuilder &root_dag_builder)
size_t operator()(const std::pair< const RelAlgNode *, int > &input_col) const
bool input_can_be_coalesced(const RelAlgNode *parent_node, const size_t index, const bool first_rex_is_input)
std::shared_ptr< RelAlgNode > deepCopy() const override
std::shared_ptr< RelLogicalValues > dispatchLogicalValues(const rapidjson::Value &logical_values_ra)
std::vector< std::string > fields_
std::shared_ptr< RelTableFunction > dispatchTableFunction(const rapidjson::Value &table_func_ra, RelAlgDagBuilder &root_dag_builder)
std::unique_ptr< const RexScalar > RetType
Definition: RexVisitor.h:139
std::shared_ptr< RelAlgNode > deepCopy() const override
#define VLOG(n)
Definition: Logger.h:283
void eliminate_dead_columns(std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
std::shared_ptr< RelAlgNode > deepCopy() const override
void check_empty_inputs_field(const rapidjson::Value &node) noexcept
std::vector< const Rex * > reproject_targets(const RelProject *simple_project, const std::vector< const Rex * > &target_exprs) noexcept
bool isIdentity() const
std::vector< std::unique_ptr< const RexScalar > > target_exprs_
std::vector< RexInput > n_outputs(const RelAlgNode *node, const size_t n)
const bool is_agg_
void coalesce_nodes(std::vector< std::shared_ptr< RelAlgNode >> &nodes, const std::vector< const RelAlgNode * > &left_deep_joins)
const RexScalar * visitCase(const RexCase *rex_case) const final
static void resetRelAlgFirstId() noexcept
std::string json_node_to_string(const rapidjson::Value &node) noexcept