OmniSciDB  21ac014ffc
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
RelAlgDagBuilder.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2017 MapD Technologies, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "RelAlgDagBuilder.h"
19 #include "Catalog/Catalog.h"
21 #include "JsonAccessors.h"
22 #include "RelAlgOptimizer.h"
23 #include "RelLeftDeepInnerJoin.h"
25 #include "RexVisitor.h"
26 #include "Shared/sqldefs.h"
27 
28 #include <rapidjson/error/en.h>
29 #include <rapidjson/error/error.h>
30 #include <rapidjson/stringbuffer.h>
31 #include <rapidjson/writer.h>
32 
33 #include <string>
34 #include <unordered_set>
35 
36 extern bool g_cluster;
37 extern bool g_enable_union;
38 
39 namespace {
40 
41 const unsigned FIRST_RA_NODE_ID = 1;
42 
43 } // namespace
44 
45 thread_local unsigned RelAlgNode::crt_id_ = FIRST_RA_NODE_ID;
46 
49 }
50 
52  const std::shared_ptr<const ExecutionResult> result) {
53  auto row_set = result->getRows();
54  CHECK(row_set);
55  CHECK_EQ(size_t(1), row_set->colCount());
56  *(type_.get()) = row_set->getColType(0);
57  (*(result_.get())) = result;
58 }
59 
60 std::unique_ptr<RexSubQuery> RexSubQuery::deepCopy() const {
61  return std::make_unique<RexSubQuery>(type_, result_, ra_->deepCopy());
62 }
63 
64 unsigned RexSubQuery::getId() const {
65  return ra_->getId();
66 }
67 
68 namespace {
69 
70 class RexRebindInputsVisitor : public RexVisitor<void*> {
71  public:
72  RexRebindInputsVisitor(const RelAlgNode* old_input, const RelAlgNode* new_input)
73  : old_input_(old_input), new_input_(new_input) {}
74 
75  virtual ~RexRebindInputsVisitor() = default;
76 
77  void* visitInput(const RexInput* rex_input) const override {
78  const auto old_source = rex_input->getSourceNode();
79  if (old_source == old_input_) {
80  const auto left_deep_join = dynamic_cast<const RelLeftDeepInnerJoin*>(new_input_);
81  if (left_deep_join) {
82  rebind_inputs_from_left_deep_join(rex_input, left_deep_join);
83  return nullptr;
84  }
85  rex_input->setSourceNode(new_input_);
86  }
87  return nullptr;
88  };
89 
90  private:
91  const RelAlgNode* old_input_;
93 };
94 
95 // Creates an output with n columns.
96 std::vector<RexInput> n_outputs(const RelAlgNode* node, const size_t n) {
97  std::vector<RexInput> outputs;
98  outputs.reserve(n);
99  for (size_t i = 0; i < n; ++i) {
100  outputs.emplace_back(node, i);
101  }
102  return outputs;
103 }
104 
106  public:
108  const RelAlgNode* old_input,
109  const RelAlgNode* new_input,
110  std::unordered_map<unsigned, unsigned> old_to_new_index_map)
111  : RexRebindInputsVisitor(old_input, new_input), mapping_(old_to_new_index_map) {}
112 
113  void* visitInput(const RexInput* rex_input) const override {
114  RexRebindInputsVisitor::visitInput(rex_input);
115  auto mapping_itr = mapping_.find(rex_input->getIndex());
116  CHECK(mapping_itr != mapping_.end());
117  rex_input->setIndex(mapping_itr->second);
118  return nullptr;
119  }
120 
121  private:
122  const std::unordered_map<unsigned, unsigned> mapping_;
123 };
124 
125 } // namespace
126 
128  std::shared_ptr<const RelAlgNode> old_input,
129  std::shared_ptr<const RelAlgNode> input,
130  std::optional<std::unordered_map<unsigned, unsigned>> old_to_new_index_map) {
131  RelAlgNode::replaceInput(old_input, input);
132  std::unique_ptr<RexRebindInputsVisitor> rebind_inputs;
133  if (old_to_new_index_map) {
134  rebind_inputs = std::make_unique<RexRebindReindexInputsVisitor>(
135  old_input.get(), input.get(), *old_to_new_index_map);
136  } else {
137  rebind_inputs =
138  std::make_unique<RexRebindInputsVisitor>(old_input.get(), input.get());
139  }
140  CHECK(rebind_inputs);
141  for (const auto& scalar_expr : scalar_exprs_) {
142  rebind_inputs->visit(scalar_expr.get());
143  }
144 }
145 
146 void RelProject::appendInput(std::string new_field_name,
147  std::unique_ptr<const RexScalar> new_input) {
148  fields_.emplace_back(std::move(new_field_name));
149  scalar_exprs_.emplace_back(std::move(new_input));
150 }
151 
153  const auto scan_node = dynamic_cast<const RelScan*>(ra_node);
154  if (scan_node) {
155  // Scan node has no inputs, output contains all columns in the table.
156  CHECK_EQ(size_t(0), scan_node->inputCount());
157  return n_outputs(scan_node, scan_node->size());
158  }
159  const auto project_node = dynamic_cast<const RelProject*>(ra_node);
160  if (project_node) {
161  // Project output count doesn't depend on the input
162  CHECK_EQ(size_t(1), project_node->inputCount());
163  return n_outputs(project_node, project_node->size());
164  }
165  const auto filter_node = dynamic_cast<const RelFilter*>(ra_node);
166  if (filter_node) {
167  // Filter preserves shape
168  CHECK_EQ(size_t(1), filter_node->inputCount());
169  const auto prev_out = get_node_output(filter_node->getInput(0));
170  return n_outputs(filter_node, prev_out.size());
171  }
172  const auto aggregate_node = dynamic_cast<const RelAggregate*>(ra_node);
173  if (aggregate_node) {
174  // Aggregate output count doesn't depend on the input
175  CHECK_EQ(size_t(1), aggregate_node->inputCount());
176  return n_outputs(aggregate_node, aggregate_node->size());
177  }
178  const auto compound_node = dynamic_cast<const RelCompound*>(ra_node);
179  if (compound_node) {
180  // Compound output count doesn't depend on the input
181  CHECK_EQ(size_t(1), compound_node->inputCount());
182  return n_outputs(compound_node, compound_node->size());
183  }
184  const auto join_node = dynamic_cast<const RelJoin*>(ra_node);
185  if (join_node) {
186  // Join concatenates the outputs from the inputs and the output
187  // directly references the nodes in the input.
188  CHECK_EQ(size_t(2), join_node->inputCount());
189  auto lhs_out =
190  n_outputs(join_node->getInput(0), get_node_output(join_node->getInput(0)).size());
191  const auto rhs_out =
192  n_outputs(join_node->getInput(1), get_node_output(join_node->getInput(1)).size());
193  lhs_out.insert(lhs_out.end(), rhs_out.begin(), rhs_out.end());
194  return lhs_out;
195  }
196  const auto table_func_node = dynamic_cast<const RelTableFunction*>(ra_node);
197  if (table_func_node) {
198  // Table Function output count doesn't depend on the input
199  return n_outputs(table_func_node, table_func_node->size());
200  }
201  const auto sort_node = dynamic_cast<const RelSort*>(ra_node);
202  if (sort_node) {
203  // Sort preserves shape
204  CHECK_EQ(size_t(1), sort_node->inputCount());
205  const auto prev_out = get_node_output(sort_node->getInput(0));
206  return n_outputs(sort_node, prev_out.size());
207  }
208  const auto logical_values_node = dynamic_cast<const RelLogicalValues*>(ra_node);
209  if (logical_values_node) {
210  CHECK_EQ(size_t(0), logical_values_node->inputCount());
211  return n_outputs(logical_values_node, logical_values_node->size());
212  }
213  const auto logical_union_node = dynamic_cast<const RelLogicalUnion*>(ra_node);
214  if (logical_union_node) {
215  return n_outputs(logical_union_node, logical_union_node->size());
216  }
217  LOG(FATAL) << "Unhandled ra_node type: " << ::toString(ra_node);
218  return {};
219 }
220 
222  if (!isSimple()) {
223  return false;
224  }
225  CHECK_EQ(size_t(1), inputCount());
226  const auto source = getInput(0);
227  if (dynamic_cast<const RelJoin*>(source)) {
228  return false;
229  }
230  const auto source_shape = get_node_output(source);
231  if (source_shape.size() != scalar_exprs_.size()) {
232  return false;
233  }
234  for (size_t i = 0; i < scalar_exprs_.size(); ++i) {
235  const auto& scalar_expr = scalar_exprs_[i];
236  const auto input = dynamic_cast<const RexInput*>(scalar_expr.get());
237  CHECK(input);
238  CHECK_EQ(source, input->getSourceNode());
239  // We should add the additional check that input->getIndex() !=
240  // source_shape[i].getIndex(), but Calcite doesn't generate the right
241  // Sort-Project-Sort sequence when joins are involved.
242  if (input->getSourceNode() != source_shape[i].getSourceNode()) {
243  return false;
244  }
245  }
246  return true;
247 }
248 
249 namespace {
250 
251 bool isRenamedInput(const RelAlgNode* node,
252  const size_t index,
253  const std::string& new_name) {
254  CHECK_LT(index, node->size());
255  if (auto join = dynamic_cast<const RelJoin*>(node)) {
256  CHECK_EQ(size_t(2), join->inputCount());
257  const auto lhs_size = join->getInput(0)->size();
258  if (index < lhs_size) {
259  return isRenamedInput(join->getInput(0), index, new_name);
260  }
261  CHECK_GE(index, lhs_size);
262  return isRenamedInput(join->getInput(1), index - lhs_size, new_name);
263  }
264 
265  if (auto scan = dynamic_cast<const RelScan*>(node)) {
266  return new_name != scan->getFieldName(index);
267  }
268 
269  if (auto aggregate = dynamic_cast<const RelAggregate*>(node)) {
270  return new_name != aggregate->getFieldName(index);
271  }
272 
273  if (auto project = dynamic_cast<const RelProject*>(node)) {
274  return new_name != project->getFieldName(index);
275  }
276 
277  if (auto table_func = dynamic_cast<const RelTableFunction*>(node)) {
278  return new_name != table_func->getFieldName(index);
279  }
280 
281  if (auto logical_values = dynamic_cast<const RelLogicalValues*>(node)) {
282  const auto& tuple_type = logical_values->getTupleType();
283  CHECK_LT(index, tuple_type.size());
284  return new_name != tuple_type[index].get_resname();
285  }
286 
287  CHECK(dynamic_cast<const RelSort*>(node) || dynamic_cast<const RelFilter*>(node) ||
288  dynamic_cast<const RelLogicalUnion*>(node));
289  return isRenamedInput(node->getInput(0), index, new_name);
290 }
291 
292 } // namespace
293 
295  if (!isSimple()) {
296  return false;
297  }
298  CHECK_EQ(scalar_exprs_.size(), fields_.size());
299  for (size_t i = 0; i < fields_.size(); ++i) {
300  auto rex_in = dynamic_cast<const RexInput*>(scalar_exprs_[i].get());
301  CHECK(rex_in);
302  if (isRenamedInput(rex_in->getSourceNode(), rex_in->getIndex(), fields_[i])) {
303  return true;
304  }
305  }
306  return false;
307 }
308 
309 void RelJoin::replaceInput(std::shared_ptr<const RelAlgNode> old_input,
310  std::shared_ptr<const RelAlgNode> input) {
311  RelAlgNode::replaceInput(old_input, input);
312  RexRebindInputsVisitor rebind_inputs(old_input.get(), input.get());
313  if (condition_) {
314  rebind_inputs.visit(condition_.get());
315  }
316 }
317 
318 void RelFilter::replaceInput(std::shared_ptr<const RelAlgNode> old_input,
319  std::shared_ptr<const RelAlgNode> input) {
320  RelAlgNode::replaceInput(old_input, input);
321  RexRebindInputsVisitor rebind_inputs(old_input.get(), input.get());
322  rebind_inputs.visit(filter_.get());
323 }
324 
325 void RelCompound::replaceInput(std::shared_ptr<const RelAlgNode> old_input,
326  std::shared_ptr<const RelAlgNode> input) {
327  RelAlgNode::replaceInput(old_input, input);
328  RexRebindInputsVisitor rebind_inputs(old_input.get(), input.get());
329  for (const auto& scalar_source : scalar_sources_) {
330  rebind_inputs.visit(scalar_source.get());
331  }
332  if (filter_expr_) {
333  rebind_inputs.visit(filter_expr_.get());
334  }
335 }
336 
338  : RelAlgNode(rhs)
340  , fields_(rhs.fields_)
341  , hint_applied_(false)
342  , hints_(std::make_unique<Hints>()) {
343  RexDeepCopyVisitor copier;
344  for (auto const& expr : rhs.scalar_exprs_) {
345  scalar_exprs_.push_back(copier.visit(expr.get()));
346  }
347  if (rhs.hint_applied_) {
348  for (auto const& kv : *rhs.hints_) {
349  addHint(kv.second);
350  }
351  }
352 }
353 
355  : RelAlgNode(rhs)
356  , tuple_type_(rhs.tuple_type_)
357  , values_(RexDeepCopyVisitor::copy(rhs.values_)) {}
358 
360  RexDeepCopyVisitor copier;
361  filter_ = copier.visit(rhs.filter_.get());
362 }
363 
365  : RelAlgNode(rhs)
366  , groupby_count_(rhs.groupby_count_)
367  , fields_(rhs.fields_)
368  , hint_applied_(false)
369  , hints_(std::make_unique<Hints>()) {
370  agg_exprs_.reserve(rhs.agg_exprs_.size());
371  for (auto const& agg : rhs.agg_exprs_) {
372  agg_exprs_.push_back(agg->deepCopy());
373  }
374  if (rhs.hint_applied_) {
375  for (auto const& kv : *rhs.hints_) {
376  addHint(kv.second);
377  }
378  }
379 }
380 
382  : RelAlgNode(rhs)
383  , join_type_(rhs.join_type_)
384  , hint_applied_(false)
385  , hints_(std::make_unique<Hints>()) {
386  RexDeepCopyVisitor copier;
387  condition_ = copier.visit(rhs.condition_.get());
388  if (rhs.hint_applied_) {
389  for (auto const& kv : *rhs.hints_) {
390  addHint(kv.second);
391  }
392  }
393 }
394 
395 namespace {
396 
397 std::vector<std::unique_ptr<const RexAgg>> copyAggExprs(
398  std::vector<std::unique_ptr<const RexAgg>> const& agg_exprs) {
399  std::vector<std::unique_ptr<const RexAgg>> agg_exprs_copy;
400  agg_exprs_copy.reserve(agg_exprs.size());
401  for (auto const& agg_expr : agg_exprs) {
402  agg_exprs_copy.push_back(agg_expr->deepCopy());
403  }
404  return agg_exprs_copy;
405 }
406 
407 std::vector<std::unique_ptr<const RexScalar>> copyRexScalars(
408  std::vector<std::unique_ptr<const RexScalar>> const& scalar_sources) {
409  std::vector<std::unique_ptr<const RexScalar>> scalar_sources_copy;
410  scalar_sources_copy.reserve(scalar_sources.size());
411  RexDeepCopyVisitor copier;
412  for (auto const& scalar_source : scalar_sources) {
413  scalar_sources_copy.push_back(copier.visit(scalar_source.get()));
414  }
415  return scalar_sources_copy;
416 }
417 
418 std::vector<const Rex*> remapTargetPointers(
419  std::vector<std::unique_ptr<const RexAgg>> const& agg_exprs_new,
420  std::vector<std::unique_ptr<const RexScalar>> const& scalar_sources_new,
421  std::vector<std::unique_ptr<const RexAgg>> const& agg_exprs_old,
422  std::vector<std::unique_ptr<const RexScalar>> const& scalar_sources_old,
423  std::vector<const Rex*> const& target_exprs_old) {
424  std::vector<const Rex*> target_exprs(target_exprs_old);
425  std::unordered_map<const Rex*, const Rex*> old_to_new_target(target_exprs.size());
426  for (size_t i = 0; i < agg_exprs_new.size(); ++i) {
427  old_to_new_target.emplace(agg_exprs_old[i].get(), agg_exprs_new[i].get());
428  }
429  for (size_t i = 0; i < scalar_sources_new.size(); ++i) {
430  old_to_new_target.emplace(scalar_sources_old[i].get(), scalar_sources_new[i].get());
431  }
432  for (auto& target : target_exprs) {
433  auto target_it = old_to_new_target.find(target);
434  CHECK(target_it != old_to_new_target.end());
435  target = target_it->second;
436  }
437  return target_exprs;
438 }
439 
440 } // namespace
441 
443  : RelAlgNode(rhs)
445  , groupby_count_(rhs.groupby_count_)
446  , agg_exprs_(copyAggExprs(rhs.agg_exprs_))
447  , fields_(rhs.fields_)
448  , is_agg_(rhs.is_agg_)
449  , scalar_sources_(copyRexScalars(rhs.scalar_sources_))
450  , target_exprs_(remapTargetPointers(agg_exprs_,
451  scalar_sources_,
452  rhs.agg_exprs_,
453  rhs.scalar_sources_,
454  rhs.target_exprs_))
455  , hint_applied_(false)
456  , hints_(std::make_unique<Hints>()) {
457  RexDeepCopyVisitor copier;
458  filter_expr_ = rhs.filter_expr_ ? copier.visit(rhs.filter_expr_.get()) : nullptr;
459  if (rhs.hint_applied_) {
460  for (auto const& kv : *rhs.hints_) {
461  addHint(kv.second);
462  }
463  }
464 }
465 
466 void RelTableFunction::replaceInput(std::shared_ptr<const RelAlgNode> old_input,
467  std::shared_ptr<const RelAlgNode> input) {
468  RelAlgNode::replaceInput(old_input, input);
469  RexRebindInputsVisitor rebind_inputs(old_input.get(), input.get());
470  for (const auto& target_expr : target_exprs_) {
471  rebind_inputs.visit(target_expr.get());
472  }
473  for (const auto& func_input : table_func_inputs_) {
474  rebind_inputs.visit(func_input.get());
475  }
476 }
477 
479  int32_t literal_args = 0;
480  for (const auto& arg : table_func_inputs_) {
481  const auto rex_literal = dynamic_cast<const RexLiteral*>(arg.get());
482  if (rex_literal) {
483  literal_args += 1;
484  }
485  }
486  return literal_args;
487 }
488 
490  : RelAlgNode(rhs)
491  , function_name_(rhs.function_name_)
492  , fields_(rhs.fields_)
493  , col_inputs_(rhs.col_inputs_)
494  , table_func_inputs_(copyRexScalars(rhs.table_func_inputs_))
495  , target_exprs_(copyRexScalars(rhs.target_exprs_)) {
496  std::unordered_map<const Rex*, const Rex*> old_to_new_input;
497  for (size_t i = 0; i < table_func_inputs_.size(); ++i) {
498  old_to_new_input.emplace(rhs.table_func_inputs_[i].get(),
499  table_func_inputs_[i].get());
500  }
501  for (auto& target : col_inputs_) {
502  auto target_it = old_to_new_input.find(target);
503  CHECK(target_it != old_to_new_input.end());
504  target = target_it->second;
505  }
506 }
507 
508 namespace std {
509 template <>
510 struct hash<std::pair<const RelAlgNode*, int>> {
511  size_t operator()(const std::pair<const RelAlgNode*, int>& input_col) const {
512  auto ptr_val = reinterpret_cast<const int64_t*>(&input_col.first);
513  return static_cast<int64_t>(*ptr_val) ^ input_col.second;
514  }
515 };
516 } // namespace std
517 
518 namespace {
519 
520 std::set<std::pair<const RelAlgNode*, int>> get_equiv_cols(const RelAlgNode* node,
521  const size_t which_col) {
522  std::set<std::pair<const RelAlgNode*, int>> work_set;
523  auto walker = node;
524  auto curr_col = which_col;
525  while (true) {
526  work_set.insert(std::make_pair(walker, curr_col));
527  if (dynamic_cast<const RelScan*>(walker) || dynamic_cast<const RelJoin*>(walker)) {
528  break;
529  }
530  CHECK_EQ(size_t(1), walker->inputCount());
531  auto only_source = walker->getInput(0);
532  if (auto project = dynamic_cast<const RelProject*>(walker)) {
533  if (auto input = dynamic_cast<const RexInput*>(project->getProjectAt(curr_col))) {
534  const auto join_source = dynamic_cast<const RelJoin*>(only_source);
535  if (join_source) {
536  CHECK_EQ(size_t(2), join_source->inputCount());
537  auto lhs = join_source->getInput(0);
538  CHECK((input->getIndex() < lhs->size() && lhs == input->getSourceNode()) ||
539  join_source->getInput(1) == input->getSourceNode());
540  } else {
541  CHECK_EQ(input->getSourceNode(), only_source);
542  }
543  curr_col = input->getIndex();
544  } else {
545  break;
546  }
547  } else if (auto aggregate = dynamic_cast<const RelAggregate*>(walker)) {
548  if (curr_col >= aggregate->getGroupByCount()) {
549  break;
550  }
551  }
552  walker = only_source;
553  }
554  return work_set;
555 }
556 
557 } // namespace
558 
559 bool RelSort::hasEquivCollationOf(const RelSort& that) const {
560  if (collation_.size() != that.collation_.size()) {
561  return false;
562  }
563 
564  for (size_t i = 0, e = collation_.size(); i < e; ++i) {
565  auto this_sort_key = collation_[i];
566  auto that_sort_key = that.collation_[i];
567  if (this_sort_key.getSortDir() != that_sort_key.getSortDir()) {
568  return false;
569  }
570  if (this_sort_key.getNullsPosition() != that_sort_key.getNullsPosition()) {
571  return false;
572  }
573  auto this_equiv_keys = get_equiv_cols(this, this_sort_key.getField());
574  auto that_equiv_keys = get_equiv_cols(&that, that_sort_key.getField());
575  std::vector<std::pair<const RelAlgNode*, int>> intersect;
576  std::set_intersection(this_equiv_keys.begin(),
577  this_equiv_keys.end(),
578  that_equiv_keys.begin(),
579  that_equiv_keys.end(),
580  std::back_inserter(intersect));
581  if (intersect.empty()) {
582  return false;
583  }
584  }
585  return true;
586 }
587 
588 // class RelLogicalUnion methods
589 
591  : RelAlgNode(std::move(inputs)), is_all_(is_all) {
592  if (!g_enable_union) {
593  throw QueryNotSupported(
594  "UNION is not supported yet. There is an experimental enable-union option "
595  "available to enable UNION ALL queries.");
596  }
597  CHECK_EQ(2u, inputs_.size());
598  if (!is_all_) {
599  throw QueryNotSupported("UNION without ALL is not supported yet.");
600  }
601 }
602 
603 size_t RelLogicalUnion::size() const {
604  return inputs_.front()->size();
605 }
606 
607 std::string RelLogicalUnion::toString() const {
608  return cat(::typeName(this), "(is_all(", is_all_, "))");
609 }
610 
611 size_t RelLogicalUnion::toHash() const {
612  if (!hash_) {
613  hash_ = typeid(RelLogicalUnion).hash_code();
614  boost::hash_combine(*hash_, is_all_);
615  }
616  return *hash_;
617 }
618 
619 std::string RelLogicalUnion::getFieldName(const size_t i) const {
620  if (auto const* input = dynamic_cast<RelCompound const*>(inputs_[0].get())) {
621  return input->getFieldName(i);
622  } else if (auto const* input = dynamic_cast<RelProject const*>(inputs_[0].get())) {
623  return input->getFieldName(i);
624  } else if (auto const* input = dynamic_cast<RelLogicalUnion const*>(inputs_[0].get())) {
625  return input->getFieldName(i);
626  } else if (auto const* input = dynamic_cast<RelAggregate const*>(inputs_[0].get())) {
627  return input->getFieldName(i);
628  } else if (auto const* input = dynamic_cast<RelScan const*>(inputs_[0].get())) {
629  return input->getFieldName(i);
630  } else if (auto const* input =
631  dynamic_cast<RelTableFunction const*>(inputs_[0].get())) {
632  return input->getFieldName(i);
633  }
634  UNREACHABLE() << "Unhandled input type: " << ::toString(inputs_.front());
635  return {};
636 }
637 
639  std::vector<TargetMetaInfo> const& tmis0 = inputs_[0]->getOutputMetainfo();
640  std::vector<TargetMetaInfo> const& tmis1 = inputs_[1]->getOutputMetainfo();
641  if (tmis0.size() != tmis1.size()) {
642  VLOG(2) << "tmis0.size() = " << tmis0.size() << " != " << tmis1.size()
643  << " = tmis1.size()";
644  throw std::runtime_error("Subqueries of a UNION must have matching data types.");
645  }
646  for (size_t i = 0; i < tmis0.size(); ++i) {
647  if (tmis0[i].get_type_info() != tmis1[i].get_type_info()) {
648  SQLTypeInfo const& ti0 = tmis0[i].get_type_info();
649  SQLTypeInfo const& ti1 = tmis1[i].get_type_info();
650  VLOG(2) << "Types do not match for UNION:\n tmis0[" << i
651  << "].get_type_info().to_string() = " << ti0.to_string() << "\n tmis1["
652  << i << "].get_type_info().to_string() = " << ti1.to_string();
653  if (ti0.is_dict_encoded_string() && ti1.is_dict_encoded_string() &&
654  ti0.get_comp_param() != ti1.get_comp_param()) {
655  throw std::runtime_error(
656  "Taking the UNION of different text-encoded dictionaries is not yet "
657  "supported. This may be resolved by using shared dictionaries. For example, "
658  "by making one a shared dictionary reference to the other.");
659  } else {
660  throw std::runtime_error(
661  "Subqueries of a UNION must have the exact same data types.");
662  }
663  }
664  }
665 }
666 
667 // Rest of code requires a raw pointer, but RexInput object needs to live somewhere.
669  size_t input_idx) const {
670  if (auto const* rex_input_ptr = dynamic_cast<RexInput const*>(rex_scalar)) {
671  RexInput rex_input(*rex_input_ptr);
672  rex_input.setSourceNode(getInput(input_idx));
673  scalar_exprs_.emplace_back(std::make_shared<RexInput const>(std::move(rex_input)));
674  return scalar_exprs_.back().get();
675  }
676  return rex_scalar;
677 }
678 
679 namespace {
680 
681 unsigned node_id(const rapidjson::Value& ra_node) noexcept {
682  const auto& id = field(ra_node, "id");
683  return std::stoi(json_str(id));
684 }
685 
686 std::string json_node_to_string(const rapidjson::Value& node) noexcept {
687  rapidjson::StringBuffer buffer;
688  rapidjson::Writer<rapidjson::StringBuffer> writer(buffer);
689  node.Accept(writer);
690  return buffer.GetString();
691 }
692 
693 // The parse_* functions below de-serialize expressions as they come from Calcite.
694 // RelAlgDagBuilder will take care of making the representation easy to
695 // navigate for lower layers, for example by replacing RexAbstractInput with RexInput.
696 
697 std::unique_ptr<RexAbstractInput> parse_abstract_input(
698  const rapidjson::Value& expr) noexcept {
699  const auto& input = field(expr, "input");
700  return std::unique_ptr<RexAbstractInput>(new RexAbstractInput(json_i64(input)));
701 }
702 
703 std::unique_ptr<RexLiteral> parse_literal(const rapidjson::Value& expr) {
704  CHECK(expr.IsObject());
705  const auto& literal = field(expr, "literal");
706  const auto type = to_sql_type(json_str(field(expr, "type")));
707  const auto target_type = to_sql_type(json_str(field(expr, "target_type")));
708  const auto scale = json_i64(field(expr, "scale"));
709  const auto precision = json_i64(field(expr, "precision"));
710  const auto type_scale = json_i64(field(expr, "type_scale"));
711  const auto type_precision = json_i64(field(expr, "type_precision"));
712  if (literal.IsNull()) {
713  return std::unique_ptr<RexLiteral>(new RexLiteral(target_type));
714  }
715  switch (type) {
716  case kINT:
717  case kBIGINT:
718  case kDECIMAL:
719  case kINTERVAL_DAY_TIME:
721  case kTIME:
722  case kTIMESTAMP:
723  case kDATE:
724  return std::unique_ptr<RexLiteral>(new RexLiteral(json_i64(literal),
725  type,
726  target_type,
727  scale,
728  precision,
729  type_scale,
730  type_precision));
731  case kDOUBLE: {
732  if (literal.IsDouble()) {
733  return std::unique_ptr<RexLiteral>(new RexLiteral(json_double(literal),
734  type,
735  target_type,
736  scale,
737  precision,
738  type_scale,
739  type_precision));
740  } else if (literal.IsInt64()) {
741  return std::make_unique<RexLiteral>(static_cast<double>(literal.GetInt64()),
742  type,
743  target_type,
744  scale,
745  precision,
746  type_scale,
747  type_precision);
748 
749  } else if (literal.IsUint64()) {
750  return std::make_unique<RexLiteral>(static_cast<double>(literal.GetUint64()),
751  type,
752  target_type,
753  scale,
754  precision,
755  type_scale,
756  type_precision);
757  }
758  UNREACHABLE() << "Unhandled type: " << literal.GetType();
759  }
760  case kTEXT:
761  return std::unique_ptr<RexLiteral>(new RexLiteral(json_str(literal),
762  type,
763  target_type,
764  scale,
765  precision,
766  type_scale,
767  type_precision));
768  case kBOOLEAN:
769  return std::unique_ptr<RexLiteral>(new RexLiteral(json_bool(literal),
770  type,
771  target_type,
772  scale,
773  precision,
774  type_scale,
775  type_precision));
776  case kNULLT:
777  return std::unique_ptr<RexLiteral>(new RexLiteral(target_type));
778  default:
779  CHECK(false);
780  }
781  CHECK(false);
782  return nullptr;
783 }
784 
785 std::unique_ptr<const RexScalar> parse_scalar_expr(const rapidjson::Value& expr,
787  RelAlgDagBuilder& root_dag_builder);
788 
789 SQLTypeInfo parse_type(const rapidjson::Value& type_obj) {
790  if (type_obj.IsArray()) {
791  throw QueryNotSupported("Composite types are not currently supported.");
792  }
793  CHECK(type_obj.IsObject() && type_obj.MemberCount() >= 2)
794  << json_node_to_string(type_obj);
795  const auto type = to_sql_type(json_str(field(type_obj, "type")));
796  const auto nullable = json_bool(field(type_obj, "nullable"));
797  const auto precision_it = type_obj.FindMember("precision");
798  const int precision =
799  precision_it != type_obj.MemberEnd() ? json_i64(precision_it->value) : 0;
800  const auto scale_it = type_obj.FindMember("scale");
801  const int scale = scale_it != type_obj.MemberEnd() ? json_i64(scale_it->value) : 0;
802  SQLTypeInfo ti(type, !nullable);
803  ti.set_precision(precision);
804  ti.set_scale(scale);
805  return ti;
806 }
807 
808 std::vector<std::unique_ptr<const RexScalar>> parse_expr_array(
809  const rapidjson::Value& arr,
811  RelAlgDagBuilder& root_dag_builder) {
812  std::vector<std::unique_ptr<const RexScalar>> exprs;
813  for (auto it = arr.Begin(); it != arr.End(); ++it) {
814  exprs.emplace_back(parse_scalar_expr(*it, cat, root_dag_builder));
815  }
816  return exprs;
817 }
818 
820  if (name == "ROW_NUMBER") {
822  }
823  if (name == "RANK") {
825  }
826  if (name == "DENSE_RANK") {
828  }
829  if (name == "PERCENT_RANK") {
831  }
832  if (name == "CUME_DIST") {
834  }
835  if (name == "NTILE") {
837  }
838  if (name == "LAG") {
840  }
841  if (name == "LEAD") {
843  }
844  if (name == "FIRST_VALUE") {
846  }
847  if (name == "LAST_VALUE") {
849  }
850  if (name == "AVG") {
852  }
853  if (name == "MIN") {
855  }
856  if (name == "MAX") {
858  }
859  if (name == "SUM") {
861  }
862  if (name == "COUNT") {
864  }
865  if (name == "$SUM0") {
867  }
868  throw std::runtime_error("Unsupported window function: " + name);
869 }
870 
871 std::vector<std::unique_ptr<const RexScalar>> parse_window_order_exprs(
872  const rapidjson::Value& arr,
874  RelAlgDagBuilder& root_dag_builder) {
875  std::vector<std::unique_ptr<const RexScalar>> exprs;
876  for (auto it = arr.Begin(); it != arr.End(); ++it) {
877  exprs.emplace_back(parse_scalar_expr(field(*it, "field"), cat, root_dag_builder));
878  }
879  return exprs;
880 }
881 
882 SortDirection parse_sort_direction(const rapidjson::Value& collation) {
883  return json_str(field(collation, "direction")) == std::string("DESCENDING")
886 }
887 
888 NullSortedPosition parse_nulls_position(const rapidjson::Value& collation) {
889  return json_str(field(collation, "nulls")) == std::string("FIRST")
892 }
893 
894 std::vector<SortField> parse_window_order_collation(const rapidjson::Value& arr,
896  RelAlgDagBuilder& root_dag_builder) {
897  std::vector<SortField> collation;
898  size_t field_idx = 0;
899  for (auto it = arr.Begin(); it != arr.End(); ++it, ++field_idx) {
900  const auto sort_dir = parse_sort_direction(*it);
901  const auto null_pos = parse_nulls_position(*it);
902  collation.emplace_back(field_idx, sort_dir, null_pos);
903  }
904  return collation;
905 }
906 
908  const rapidjson::Value& window_bound_obj,
910  RelAlgDagBuilder& root_dag_builder) {
911  CHECK(window_bound_obj.IsObject());
913  window_bound.unbounded = json_bool(field(window_bound_obj, "unbounded"));
914  window_bound.preceding = json_bool(field(window_bound_obj, "preceding"));
915  window_bound.following = json_bool(field(window_bound_obj, "following"));
916  window_bound.is_current_row = json_bool(field(window_bound_obj, "is_current_row"));
917  const auto& offset_field = field(window_bound_obj, "offset");
918  if (offset_field.IsObject()) {
919  window_bound.offset = parse_scalar_expr(offset_field, cat, root_dag_builder);
920  } else {
921  CHECK(offset_field.IsNull());
922  }
923  window_bound.order_key = json_i64(field(window_bound_obj, "order_key"));
924  return window_bound;
925 }
926 
927 std::unique_ptr<const RexSubQuery> parse_subquery(const rapidjson::Value& expr,
929  RelAlgDagBuilder& root_dag_builder) {
930  const auto& operands = field(expr, "operands");
931  CHECK(operands.IsArray());
932  CHECK_GE(operands.Size(), unsigned(0));
933  const auto& subquery_ast = field(expr, "subquery");
934 
935  RelAlgDagBuilder subquery_dag(root_dag_builder, subquery_ast, cat, nullptr);
936  auto subquery = std::make_shared<RexSubQuery>(subquery_dag.getRootNodeShPtr());
937  root_dag_builder.registerSubquery(subquery);
938  return subquery->deepCopy();
939 }
940 
941 std::unique_ptr<RexOperator> parse_operator(const rapidjson::Value& expr,
943  RelAlgDagBuilder& root_dag_builder) {
944  const auto op_name = json_str(field(expr, "op"));
945  const bool is_quantifier =
946  op_name == std::string("PG_ANY") || op_name == std::string("PG_ALL");
947  const auto op = is_quantifier ? kFUNCTION : to_sql_op(op_name);
948  const auto& operators_json_arr = field(expr, "operands");
949  CHECK(operators_json_arr.IsArray());
950  auto operands = parse_expr_array(operators_json_arr, cat, root_dag_builder);
951  const auto type_it = expr.FindMember("type");
952  CHECK(type_it != expr.MemberEnd());
953  auto ti = parse_type(type_it->value);
954  if (op == kIN && expr.HasMember("subquery")) {
955  auto subquery = parse_subquery(expr, cat, root_dag_builder);
956  operands.emplace_back(std::move(subquery));
957  }
958  if (expr.FindMember("partition_keys") != expr.MemberEnd()) {
959  const auto& partition_keys_arr = field(expr, "partition_keys");
960  auto partition_keys = parse_expr_array(partition_keys_arr, cat, root_dag_builder);
961  const auto& order_keys_arr = field(expr, "order_keys");
962  auto order_keys = parse_window_order_exprs(order_keys_arr, cat, root_dag_builder);
963  const auto collation =
964  parse_window_order_collation(order_keys_arr, cat, root_dag_builder);
965  const auto kind = parse_window_function_kind(op_name);
966  const auto lower_bound =
967  parse_window_bound(field(expr, "lower_bound"), cat, root_dag_builder);
968  const auto upper_bound =
969  parse_window_bound(field(expr, "upper_bound"), cat, root_dag_builder);
970  bool is_rows = json_bool(field(expr, "is_rows"));
971  ti.set_notnull(false);
972  return std::make_unique<RexWindowFunctionOperator>(kind,
973  operands,
974  partition_keys,
975  order_keys,
976  collation,
977  lower_bound,
978  upper_bound,
979  is_rows,
980  ti);
981  }
982  return std::unique_ptr<RexOperator>(op == kFUNCTION
983  ? new RexFunctionOperator(op_name, operands, ti)
984  : new RexOperator(op, operands, ti));
985 }
986 
987 std::unique_ptr<RexCase> parse_case(const rapidjson::Value& expr,
989  RelAlgDagBuilder& root_dag_builder) {
990  const auto& operands = field(expr, "operands");
991  CHECK(operands.IsArray());
992  CHECK_GE(operands.Size(), unsigned(2));
993  std::unique_ptr<const RexScalar> else_expr;
994  std::vector<
995  std::pair<std::unique_ptr<const RexScalar>, std::unique_ptr<const RexScalar>>>
996  expr_pair_list;
997  for (auto operands_it = operands.Begin(); operands_it != operands.End();) {
998  auto when_expr = parse_scalar_expr(*operands_it++, cat, root_dag_builder);
999  if (operands_it == operands.End()) {
1000  else_expr = std::move(when_expr);
1001  break;
1002  }
1003  auto then_expr = parse_scalar_expr(*operands_it++, cat, root_dag_builder);
1004  expr_pair_list.emplace_back(std::move(when_expr), std::move(then_expr));
1005  }
1006  return std::unique_ptr<RexCase>(new RexCase(expr_pair_list, else_expr));
1007 }
1008 
1009 std::vector<std::string> strings_from_json_array(
1010  const rapidjson::Value& json_str_arr) noexcept {
1011  CHECK(json_str_arr.IsArray());
1012  std::vector<std::string> fields;
1013  for (auto json_str_arr_it = json_str_arr.Begin(); json_str_arr_it != json_str_arr.End();
1014  ++json_str_arr_it) {
1015  CHECK(json_str_arr_it->IsString());
1016  fields.emplace_back(json_str_arr_it->GetString());
1017  }
1018  return fields;
1019 }
1020 
1021 std::vector<size_t> indices_from_json_array(
1022  const rapidjson::Value& json_idx_arr) noexcept {
1023  CHECK(json_idx_arr.IsArray());
1024  std::vector<size_t> indices;
1025  for (auto json_idx_arr_it = json_idx_arr.Begin(); json_idx_arr_it != json_idx_arr.End();
1026  ++json_idx_arr_it) {
1027  CHECK(json_idx_arr_it->IsInt());
1028  CHECK_GE(json_idx_arr_it->GetInt(), 0);
1029  indices.emplace_back(json_idx_arr_it->GetInt());
1030  }
1031  return indices;
1032 }
1033 
1034 std::unique_ptr<const RexAgg> parse_aggregate_expr(const rapidjson::Value& expr) {
1035  const auto agg = to_agg_kind(json_str(field(expr, "agg")));
1036  const auto distinct = json_bool(field(expr, "distinct"));
1037  const auto agg_ti = parse_type(field(expr, "type"));
1038  const auto operands = indices_from_json_array(field(expr, "operands"));
1039  if (operands.size() > 1 && (operands.size() != 2 || (agg != kAPPROX_COUNT_DISTINCT &&
1040  agg != kAPPROX_QUANTILE))) {
1041  throw QueryNotSupported("Multiple arguments for aggregates aren't supported");
1042  }
1043  return std::unique_ptr<const RexAgg>(new RexAgg(agg, distinct, agg_ti, operands));
1044 }
1045 
1046 std::unique_ptr<const RexScalar> parse_scalar_expr(const rapidjson::Value& expr,
1048  RelAlgDagBuilder& root_dag_builder) {
1049  CHECK(expr.IsObject());
1050  if (expr.IsObject() && expr.HasMember("input")) {
1051  return std::unique_ptr<const RexScalar>(parse_abstract_input(expr));
1052  }
1053  if (expr.IsObject() && expr.HasMember("literal")) {
1054  return std::unique_ptr<const RexScalar>(parse_literal(expr));
1055  }
1056  if (expr.IsObject() && expr.HasMember("op")) {
1057  const auto op_str = json_str(field(expr, "op"));
1058  if (op_str == std::string("CASE")) {
1059  return std::unique_ptr<const RexScalar>(parse_case(expr, cat, root_dag_builder));
1060  }
1061  if (op_str == std::string("$SCALAR_QUERY")) {
1062  return std::unique_ptr<const RexScalar>(
1063  parse_subquery(expr, cat, root_dag_builder));
1064  }
1065  return std::unique_ptr<const RexScalar>(parse_operator(expr, cat, root_dag_builder));
1066  }
1067  throw QueryNotSupported("Expression node " + json_node_to_string(expr) +
1068  " not supported");
1069 }
1070 
1071 JoinType to_join_type(const std::string& join_type_name) {
1072  if (join_type_name == "inner") {
1073  return JoinType::INNER;
1074  }
1075  if (join_type_name == "left") {
1076  return JoinType::LEFT;
1077  }
1078  if (join_type_name == "semi") {
1079  return JoinType::SEMI;
1080  }
1081  if (join_type_name == "anti") {
1082  return JoinType::ANTI;
1083  }
1084  throw QueryNotSupported("Join type (" + join_type_name + ") not supported");
1085 }
1086 
1087 std::unique_ptr<const RexScalar> disambiguate_rex(const RexScalar*, const RANodeOutput&);
1088 
1089 std::unique_ptr<const RexOperator> disambiguate_operator(
1090  const RexOperator* rex_operator,
1091  const RANodeOutput& ra_output) noexcept {
1092  std::vector<std::unique_ptr<const RexScalar>> disambiguated_operands;
1093  for (size_t i = 0; i < rex_operator->size(); ++i) {
1094  auto operand = rex_operator->getOperand(i);
1095  if (dynamic_cast<const RexSubQuery*>(operand)) {
1096  disambiguated_operands.emplace_back(rex_operator->getOperandAndRelease(i));
1097  } else {
1098  disambiguated_operands.emplace_back(disambiguate_rex(operand, ra_output));
1099  }
1100  }
1101  const auto rex_window_function_operator =
1102  dynamic_cast<const RexWindowFunctionOperator*>(rex_operator);
1103  if (rex_window_function_operator) {
1104  const auto& partition_keys = rex_window_function_operator->getPartitionKeys();
1105  std::vector<std::unique_ptr<const RexScalar>> disambiguated_partition_keys;
1106  for (const auto& partition_key : partition_keys) {
1107  disambiguated_partition_keys.emplace_back(
1108  disambiguate_rex(partition_key.get(), ra_output));
1109  }
1110  std::vector<std::unique_ptr<const RexScalar>> disambiguated_order_keys;
1111  const auto& order_keys = rex_window_function_operator->getOrderKeys();
1112  for (const auto& order_key : order_keys) {
1113  disambiguated_order_keys.emplace_back(disambiguate_rex(order_key.get(), ra_output));
1114  }
1115  return rex_window_function_operator->disambiguatedOperands(
1116  disambiguated_operands,
1117  disambiguated_partition_keys,
1118  disambiguated_order_keys,
1119  rex_window_function_operator->getCollation());
1120  }
1121  return rex_operator->getDisambiguated(disambiguated_operands);
1122 }
1123 
1124 std::unique_ptr<const RexCase> disambiguate_case(const RexCase* rex_case,
1125  const RANodeOutput& ra_output) {
1126  std::vector<
1127  std::pair<std::unique_ptr<const RexScalar>, std::unique_ptr<const RexScalar>>>
1128  disambiguated_expr_pair_list;
1129  for (size_t i = 0; i < rex_case->branchCount(); ++i) {
1130  auto disambiguated_when = disambiguate_rex(rex_case->getWhen(i), ra_output);
1131  auto disambiguated_then = disambiguate_rex(rex_case->getThen(i), ra_output);
1132  disambiguated_expr_pair_list.emplace_back(std::move(disambiguated_when),
1133  std::move(disambiguated_then));
1134  }
1135  std::unique_ptr<const RexScalar> disambiguated_else{
1136  disambiguate_rex(rex_case->getElse(), ra_output)};
1137  return std::unique_ptr<const RexCase>(
1138  new RexCase(disambiguated_expr_pair_list, disambiguated_else));
1139 }
1140 
1141 // The inputs used by scalar expressions are given as indices in the serialized
1142 // representation of the query. This is hard to navigate; make the relationship
1143 // explicit by creating RexInput expressions which hold a pointer to the source
1144 // relational algebra node and the index relative to the output of that node.
1145 std::unique_ptr<const RexScalar> disambiguate_rex(const RexScalar* rex_scalar,
1146  const RANodeOutput& ra_output) {
1147  const auto rex_abstract_input = dynamic_cast<const RexAbstractInput*>(rex_scalar);
1148  if (rex_abstract_input) {
1149  CHECK_LT(static_cast<size_t>(rex_abstract_input->getIndex()), ra_output.size());
1150  return std::unique_ptr<const RexInput>(
1151  new RexInput(ra_output[rex_abstract_input->getIndex()]));
1152  }
1153  const auto rex_operator = dynamic_cast<const RexOperator*>(rex_scalar);
1154  if (rex_operator) {
1155  return disambiguate_operator(rex_operator, ra_output);
1156  }
1157  const auto rex_case = dynamic_cast<const RexCase*>(rex_scalar);
1158  if (rex_case) {
1159  return disambiguate_case(rex_case, ra_output);
1160  }
1161  const auto rex_literal = dynamic_cast<const RexLiteral*>(rex_scalar);
1162  CHECK(rex_literal);
1163  return std::unique_ptr<const RexLiteral>(new RexLiteral(*rex_literal));
1164 }
1165 
1166 void bind_project_to_input(RelProject* project_node, const RANodeOutput& input) noexcept {
1167  CHECK_EQ(size_t(1), project_node->inputCount());
1168  std::vector<std::unique_ptr<const RexScalar>> disambiguated_exprs;
1169  for (size_t i = 0; i < project_node->size(); ++i) {
1170  const auto projected_expr = project_node->getProjectAt(i);
1171  if (dynamic_cast<const RexSubQuery*>(projected_expr)) {
1172  disambiguated_exprs.emplace_back(project_node->getProjectAtAndRelease(i));
1173  } else {
1174  disambiguated_exprs.emplace_back(disambiguate_rex(projected_expr, input));
1175  }
1176  }
1177  project_node->setExpressions(disambiguated_exprs);
1178 }
1179 
1181  const RANodeOutput& input) noexcept {
1182  std::vector<std::unique_ptr<const RexScalar>> disambiguated_exprs;
1183  for (size_t i = 0; i < table_func_node->getTableFuncInputsSize(); ++i) {
1184  const auto target_expr = table_func_node->getTableFuncInputAt(i);
1185  if (dynamic_cast<const RexSubQuery*>(target_expr)) {
1186  disambiguated_exprs.emplace_back(table_func_node->getTableFuncInputAtAndRelease(i));
1187  } else {
1188  disambiguated_exprs.emplace_back(disambiguate_rex(target_expr, input));
1189  }
1190  }
1191  table_func_node->setTableFuncInputs(disambiguated_exprs);
1192 }
1193 
1194 void bind_inputs(const std::vector<std::shared_ptr<RelAlgNode>>& nodes) noexcept {
1195  for (auto ra_node : nodes) {
1196  const auto filter_node = std::dynamic_pointer_cast<RelFilter>(ra_node);
1197  if (filter_node) {
1198  CHECK_EQ(size_t(1), filter_node->inputCount());
1199  auto disambiguated_condition = disambiguate_rex(
1200  filter_node->getCondition(), get_node_output(filter_node->getInput(0)));
1201  filter_node->setCondition(disambiguated_condition);
1202  continue;
1203  }
1204  const auto join_node = std::dynamic_pointer_cast<RelJoin>(ra_node);
1205  if (join_node) {
1206  CHECK_EQ(size_t(2), join_node->inputCount());
1207  auto disambiguated_condition =
1208  disambiguate_rex(join_node->getCondition(), get_node_output(join_node.get()));
1209  join_node->setCondition(disambiguated_condition);
1210  continue;
1211  }
1212  const auto project_node = std::dynamic_pointer_cast<RelProject>(ra_node);
1213  if (project_node) {
1214  bind_project_to_input(project_node.get(),
1215  get_node_output(project_node->getInput(0)));
1216  continue;
1217  }
1218  const auto table_func_node = std::dynamic_pointer_cast<RelTableFunction>(ra_node);
1219  if (table_func_node) {
1220  /*
1221  Collect all inputs from table function input (non-literal)
1222  arguments.
1223  */
1224  RANodeOutput input;
1225  input.reserve(table_func_node->inputCount());
1226  for (size_t i = 0; i < table_func_node->inputCount(); i++) {
1227  auto node_output = get_node_output(table_func_node->getInput(i));
1228  input.insert(input.end(), node_output.begin(), node_output.end());
1229  }
1230  bind_table_func_to_input(table_func_node.get(), input);
1231  }
1232  }
1233 }
1234 
1235 void handleQueryHint(const std::vector<std::shared_ptr<RelAlgNode>>& nodes,
1236  RelAlgDagBuilder* dag_builder) noexcept {
1237  Hints* hint_delivered = nullptr;
1238  for (auto node : nodes) {
1239  const auto agg_node = std::dynamic_pointer_cast<RelAggregate>(node);
1240  if (agg_node) {
1241  if (agg_node->hasDeliveredHint()) {
1242  hint_delivered = agg_node->getDeliveredHints();
1243  break;
1244  }
1245  }
1246  const auto project_node = std::dynamic_pointer_cast<RelProject>(node);
1247  if (project_node) {
1248  if (project_node->hasDeliveredHint()) {
1249  hint_delivered = project_node->getDeliveredHints();
1250  break;
1251  }
1252  }
1253  const auto scan_node = std::dynamic_pointer_cast<RelScan>(node);
1254  if (scan_node) {
1255  if (scan_node->hasDeliveredHint()) {
1256  hint_delivered = scan_node->getDeliveredHints();
1257  break;
1258  }
1259  }
1260  const auto join_node = std::dynamic_pointer_cast<RelJoin>(node);
1261  if (join_node) {
1262  if (join_node->hasDeliveredHint()) {
1263  hint_delivered = join_node->getDeliveredHints();
1264  break;
1265  }
1266  }
1267  const auto compound_node = std::dynamic_pointer_cast<RelCompound>(node);
1268  if (compound_node) {
1269  if (compound_node->hasDeliveredHint()) {
1270  hint_delivered = compound_node->getDeliveredHints();
1271  break;
1272  }
1273  }
1274  }
1275  if (hint_delivered && !hint_delivered->empty()) {
1276  dag_builder->registerQueryHints(hint_delivered);
1277  }
1278 }
1279 
1280 void mark_nops(const std::vector<std::shared_ptr<RelAlgNode>>& nodes) noexcept {
1281  for (auto node : nodes) {
1282  const auto agg_node = std::dynamic_pointer_cast<RelAggregate>(node);
1283  if (!agg_node || agg_node->getAggExprsCount()) {
1284  continue;
1285  }
1286  CHECK_EQ(size_t(1), node->inputCount());
1287  const auto agg_input_node = dynamic_cast<const RelAggregate*>(node->getInput(0));
1288  if (agg_input_node && !agg_input_node->getAggExprsCount() &&
1289  agg_node->getGroupByCount() == agg_input_node->getGroupByCount()) {
1290  agg_node->markAsNop();
1291  }
1292  }
1293 }
1294 
1295 namespace {
1296 
1297 std::vector<const Rex*> reproject_targets(
1298  const RelProject* simple_project,
1299  const std::vector<const Rex*>& target_exprs) noexcept {
1300  std::vector<const Rex*> result;
1301  for (size_t i = 0; i < simple_project->size(); ++i) {
1302  const auto input_rex = dynamic_cast<const RexInput*>(simple_project->getProjectAt(i));
1303  CHECK(input_rex);
1304  CHECK_LT(static_cast<size_t>(input_rex->getIndex()), target_exprs.size());
1305  result.push_back(target_exprs[input_rex->getIndex()]);
1306  }
1307  return result;
1308 }
1309 
1316  public:
1318  const RelAlgNode* node_to_keep,
1319  const std::vector<std::unique_ptr<const RexScalar>>& scalar_sources)
1320  : node_to_keep_(node_to_keep), scalar_sources_(scalar_sources) {}
1321 
1322  // Reproject the RexInput from its current RA Node to the RA Node we intend to keep
1323  RetType visitInput(const RexInput* input) const final {
1324  if (input->getSourceNode() == node_to_keep_) {
1325  const auto index = input->getIndex();
1326  CHECK_LT(index, scalar_sources_.size());
1327  return visit(scalar_sources_[index].get());
1328  } else {
1329  return input->deepCopy();
1330  }
1331  }
1332 
1333  private:
1335  const std::vector<std::unique_ptr<const RexScalar>>& scalar_sources_;
1336 };
1337 
1338 } // namespace
1339 
1340 void create_compound(std::vector<std::shared_ptr<RelAlgNode>>& nodes,
1341  const std::vector<size_t>& pattern) noexcept {
1342  CHECK_GE(pattern.size(), size_t(2));
1343  CHECK_LE(pattern.size(), size_t(4));
1344 
1345  std::unique_ptr<const RexScalar> filter_rex;
1346  std::vector<std::unique_ptr<const RexScalar>> scalar_sources;
1347  size_t groupby_count{0};
1348  std::vector<std::string> fields;
1349  std::vector<const RexAgg*> agg_exprs;
1350  std::vector<const Rex*> target_exprs;
1351  bool first_project{true};
1352  bool is_agg{false};
1353  RelAlgNode* last_node{nullptr};
1354 
1355  std::shared_ptr<ModifyManipulationTarget> manipulation_target;
1356 
1357  for (const auto node_idx : pattern) {
1358  const auto ra_node = nodes[node_idx];
1359  const auto ra_filter = std::dynamic_pointer_cast<RelFilter>(ra_node);
1360  if (ra_filter) {
1361  CHECK(!filter_rex);
1362  filter_rex.reset(ra_filter->getAndReleaseCondition());
1363  CHECK(filter_rex);
1364  last_node = ra_node.get();
1365  continue;
1366  }
1367  const auto ra_project = std::dynamic_pointer_cast<RelProject>(ra_node);
1368  if (ra_project) {
1369  fields = ra_project->getFields();
1370  manipulation_target = ra_project;
1371 
1372  if (first_project) {
1373  CHECK_EQ(size_t(1), ra_project->inputCount());
1374  // Rebind the input of the project to the input of the filter itself
1375  // since we know that we'll evaluate the filter on the fly, with no
1376  // intermediate buffer.
1377  const auto filter_input = dynamic_cast<const RelFilter*>(ra_project->getInput(0));
1378  if (filter_input) {
1379  CHECK_EQ(size_t(1), filter_input->inputCount());
1380  bind_project_to_input(ra_project.get(),
1381  get_node_output(filter_input->getInput(0)));
1382  }
1383  scalar_sources = ra_project->getExpressionsAndRelease();
1384  for (const auto& scalar_expr : scalar_sources) {
1385  target_exprs.push_back(scalar_expr.get());
1386  }
1387  first_project = false;
1388  } else {
1389  if (ra_project->isSimple()) {
1390  target_exprs = reproject_targets(ra_project.get(), target_exprs);
1391  } else {
1392  // TODO(adb): This is essentially a more general case of simple project, we
1393  // could likely merge the two
1394  std::vector<const Rex*> result;
1395  RexInputReplacementVisitor visitor(last_node, scalar_sources);
1396  for (size_t i = 0; i < ra_project->size(); ++i) {
1397  const auto rex = ra_project->getProjectAt(i);
1398  if (auto rex_input = dynamic_cast<const RexInput*>(rex)) {
1399  const auto index = rex_input->getIndex();
1400  CHECK_LT(index, target_exprs.size());
1401  result.push_back(target_exprs[index]);
1402  } else {
1403  scalar_sources.push_back(visitor.visit(rex));
1404  result.push_back(scalar_sources.back().get());
1405  }
1406  }
1407  target_exprs = result;
1408  }
1409  }
1410  last_node = ra_node.get();
1411  continue;
1412  }
1413  const auto ra_aggregate = std::dynamic_pointer_cast<RelAggregate>(ra_node);
1414  if (ra_aggregate) {
1415  is_agg = true;
1416  fields = ra_aggregate->getFields();
1417  agg_exprs = ra_aggregate->getAggregatesAndRelease();
1418  groupby_count = ra_aggregate->getGroupByCount();
1419  decltype(target_exprs){}.swap(target_exprs);
1420  CHECK_LE(groupby_count, scalar_sources.size());
1421  for (size_t group_idx = 0; group_idx < groupby_count; ++group_idx) {
1422  const auto rex_ref = new RexRef(group_idx + 1);
1423  target_exprs.push_back(rex_ref);
1424  scalar_sources.emplace_back(rex_ref);
1425  }
1426  for (const auto rex_agg : agg_exprs) {
1427  target_exprs.push_back(rex_agg);
1428  }
1429  last_node = ra_node.get();
1430  continue;
1431  }
1432  }
1433 
1434  auto compound_node =
1435  std::make_shared<RelCompound>(filter_rex,
1436  target_exprs,
1437  groupby_count,
1438  agg_exprs,
1439  fields,
1440  scalar_sources,
1441  is_agg,
1442  manipulation_target->isUpdateViaSelect(),
1443  manipulation_target->isDeleteViaSelect(),
1444  manipulation_target->isVarlenUpdateRequired(),
1445  manipulation_target->getModifiedTableDescriptor(),
1446  manipulation_target->getTargetColumns());
1447  auto old_node = nodes[pattern.back()];
1448  nodes[pattern.back()] = compound_node;
1449  auto first_node = nodes[pattern.front()];
1450  CHECK_EQ(size_t(1), first_node->inputCount());
1451  compound_node->addManagedInput(first_node->getAndOwnInput(0));
1452  for (size_t i = 0; i < pattern.size() - 1; ++i) {
1453  nodes[pattern[i]].reset();
1454  }
1455  for (auto node : nodes) {
1456  if (!node) {
1457  continue;
1458  }
1459  node->replaceInput(old_node, compound_node);
1460  }
1461 }
1462 
1463 class RANodeIterator : public std::vector<std::shared_ptr<RelAlgNode>>::const_iterator {
1464  using ElementType = std::shared_ptr<RelAlgNode>;
1465  using Super = std::vector<ElementType>::const_iterator;
1466  using Container = std::vector<ElementType>;
1467 
1468  public:
1469  enum class AdvancingMode { DUChain, InOrder };
1470 
1471  explicit RANodeIterator(const Container& nodes)
1472  : Super(nodes.begin()), owner_(nodes), nodeCount_([&nodes]() -> size_t {
1473  size_t non_zero_count = 0;
1474  for (const auto& node : nodes) {
1475  if (node) {
1476  ++non_zero_count;
1477  }
1478  }
1480  }()) {}
1481 
1482  explicit operator size_t() {
1483  return std::distance(owner_.begin(), *static_cast<Super*>(this));
1484  }
1485 
1486  RANodeIterator operator++() = delete;
1487 
1488  void advance(AdvancingMode mode) {
1489  Super& super = *this;
1490  switch (mode) {
1491  case AdvancingMode::DUChain: {
1492  size_t use_count = 0;
1493  Super only_use = owner_.end();
1494  for (Super nodeIt = std::next(super); nodeIt != owner_.end(); ++nodeIt) {
1495  if (!*nodeIt) {
1496  continue;
1497  }
1498  for (size_t i = 0; i < (*nodeIt)->inputCount(); ++i) {
1499  if ((*super) == (*nodeIt)->getAndOwnInput(i)) {
1500  ++use_count;
1501  if (1 == use_count) {
1502  only_use = nodeIt;
1503  } else {
1504  super = owner_.end();
1505  return;
1506  }
1507  }
1508  }
1509  }
1510  super = only_use;
1511  break;
1512  }
1513  case AdvancingMode::InOrder:
1514  for (size_t i = 0; i != owner_.size(); ++i) {
1515  if (!visited_.count(i)) {
1516  super = owner_.begin();
1517  std::advance(super, i);
1518  return;
1519  }
1520  }
1521  super = owner_.end();
1522  break;
1523  default:
1524  CHECK(false);
1525  }
1526  }
1527 
1528  bool allVisited() { return visited_.size() == nodeCount_; }
1529 
1531  visited_.insert(size_t(*this));
1532  Super& super = *this;
1533  return *super;
1534  }
1535 
1536  const ElementType* operator->() { return &(operator*()); }
1537 
1538  private:
1540  const size_t nodeCount_;
1541  std::unordered_set<size_t> visited_;
1542 };
1543 
1544 namespace {
1545 
1546 bool input_can_be_coalesced(const RelAlgNode* parent_node,
1547  const size_t index,
1548  const bool first_rex_is_input) {
1549  if (auto agg_node = dynamic_cast<const RelAggregate*>(parent_node)) {
1550  if (index == 0 && agg_node->getGroupByCount() > 0) {
1551  return true;
1552  } else {
1553  // Is an aggregated target, only allow the project to be elided if the aggregate
1554  // target is simply passed through (i.e. if the top level expression attached to
1555  // the project node is a RexInput expression)
1556  return first_rex_is_input;
1557  }
1558  }
1559  return first_rex_is_input;
1560 }
1561 
1568  public:
1569  bool visitInput(const RexInput* input) const final {
1570  // The top level expression node is checked before we apply the visitor. If we get
1571  // here, this input rex is a child of another rex node, and we handle the can be
1572  // coalesced check slightly differently
1573  return input_can_be_coalesced(input->getSourceNode(), input->getIndex(), false);
1574  }
1575 
1576  bool visitLiteral(const RexLiteral*) const final { return false; }
1577 
1578  bool visitSubQuery(const RexSubQuery*) const final { return false; }
1579 
1580  bool visitRef(const RexRef*) const final { return false; }
1581 
1582  protected:
1583  bool aggregateResult(const bool& aggregate, const bool& next_result) const final {
1584  return aggregate && next_result;
1585  }
1586 
1587  bool defaultResult() const final { return true; }
1588 };
1589 
1590 // Detect the window function SUM pattern: CASE WHEN COUNT() > 0 THEN SUM ELSE 0
1592  const auto case_operator = dynamic_cast<const RexCase*>(rex);
1593  if (case_operator && case_operator->branchCount() == 1) {
1594  const auto then_window =
1595  dynamic_cast<const RexWindowFunctionOperator*>(case_operator->getThen(0));
1596  if (then_window && then_window->getKind() == SqlWindowFunctionKind::SUM_INTERNAL) {
1597  return true;
1598  }
1599  }
1600  return false;
1601 }
1602 
1603 // Detect both window function operators and window function operators embedded in case
1604 // statements (for null handling)
1606  if (dynamic_cast<const RexWindowFunctionOperator*>(rex)) {
1607  return true;
1608  }
1609 
1610  // unwrap from casts, if they exist
1611  const auto rex_cast = dynamic_cast<const RexOperator*>(rex);
1612  if (rex_cast && rex_cast->getOperator() == kCAST) {
1613  CHECK_EQ(rex_cast->size(), size_t(1));
1614  return is_window_function_operator(rex_cast->getOperand(0));
1615  }
1616 
1617  if (is_window_function_sum(rex)) {
1618  return true;
1619  }
1620  // Check for Window Function AVG:
1621  // (CASE WHEN count > 0 THEN sum ELSE 0) / COUNT
1622  const RexOperator* divide_operator = dynamic_cast<const RexOperator*>(rex);
1623  if (divide_operator && divide_operator->getOperator() == kDIVIDE) {
1624  CHECK_EQ(divide_operator->size(), size_t(2));
1625  const auto case_operator =
1626  dynamic_cast<const RexCase*>(divide_operator->getOperand(0));
1627  const auto second_window =
1628  dynamic_cast<const RexWindowFunctionOperator*>(divide_operator->getOperand(1));
1629  if (case_operator && second_window &&
1630  second_window->getKind() == SqlWindowFunctionKind::COUNT) {
1631  if (is_window_function_sum(case_operator)) {
1632  return true;
1633  }
1634  }
1635  }
1636  return false;
1637 }
1638 
1639 } // namespace
1640 
1641 void coalesce_nodes(std::vector<std::shared_ptr<RelAlgNode>>& nodes,
1642  const std::vector<const RelAlgNode*>& left_deep_joins) {
1643  enum class CoalesceState { Initial, Filter, FirstProject, Aggregate };
1644  std::vector<size_t> crt_pattern;
1645  CoalesceState crt_state{CoalesceState::Initial};
1646 
1647  auto reset_state = [&crt_pattern, &crt_state]() {
1648  crt_state = CoalesceState::Initial;
1649  std::vector<size_t>().swap(crt_pattern);
1650  };
1651 
1652  for (RANodeIterator nodeIt(nodes); !nodeIt.allVisited();) {
1653  const auto ra_node = nodeIt != nodes.end() ? *nodeIt : nullptr;
1654  switch (crt_state) {
1655  case CoalesceState::Initial: {
1656  if (std::dynamic_pointer_cast<const RelFilter>(ra_node) &&
1657  std::find(left_deep_joins.begin(), left_deep_joins.end(), ra_node.get()) ==
1658  left_deep_joins.end()) {
1659  crt_pattern.push_back(size_t(nodeIt));
1660  crt_state = CoalesceState::Filter;
1661  nodeIt.advance(RANodeIterator::AdvancingMode::DUChain);
1662  } else if (std::dynamic_pointer_cast<const RelProject>(ra_node)) {
1663  crt_pattern.push_back(size_t(nodeIt));
1664  crt_state = CoalesceState::FirstProject;
1665  nodeIt.advance(RANodeIterator::AdvancingMode::DUChain);
1666  } else {
1667  nodeIt.advance(RANodeIterator::AdvancingMode::InOrder);
1668  }
1669  break;
1670  }
1671  case CoalesceState::Filter: {
1672  if (auto project_node = std::dynamic_pointer_cast<const RelProject>(ra_node)) {
1673  if (project_node->hasWindowFunctionExpr()) {
1674  reset_state();
1675  break;
1676  }
1677  crt_pattern.push_back(size_t(nodeIt));
1678  crt_state = CoalesceState::FirstProject;
1679  nodeIt.advance(RANodeIterator::AdvancingMode::DUChain);
1680  } else {
1681  reset_state();
1682  }
1683  break;
1684  }
1685  case CoalesceState::FirstProject: {
1686  if (std::dynamic_pointer_cast<const RelAggregate>(ra_node)) {
1687  crt_pattern.push_back(size_t(nodeIt));
1688  crt_state = CoalesceState::Aggregate;
1689  nodeIt.advance(RANodeIterator::AdvancingMode::DUChain);
1690  } else {
1691  if (crt_pattern.size() >= 2) {
1692  create_compound(nodes, crt_pattern);
1693  }
1694  reset_state();
1695  }
1696  break;
1697  }
1698  case CoalesceState::Aggregate: {
1699  if (auto project_node = std::dynamic_pointer_cast<const RelProject>(ra_node)) {
1700  // TODO(adb): overloading the simple project terminology again here
1701  bool is_simple_project{true};
1702  for (size_t i = 0; i < project_node->size(); i++) {
1703  const auto scalar_rex = project_node->getProjectAt(i);
1704  // If the top level scalar rex is an input node, we can bypass the visitor
1705  if (auto input_rex = dynamic_cast<const RexInput*>(scalar_rex)) {
1707  input_rex->getSourceNode(), input_rex->getIndex(), true)) {
1708  is_simple_project = false;
1709  break;
1710  }
1711  continue;
1712  }
1713  CoalesceSecondaryProjectVisitor visitor;
1714  if (!visitor.visit(project_node->getProjectAt(i))) {
1715  is_simple_project = false;
1716  break;
1717  }
1718  }
1719  if (is_simple_project) {
1720  crt_pattern.push_back(size_t(nodeIt));
1721  nodeIt.advance(RANodeIterator::AdvancingMode::InOrder);
1722  }
1723  }
1724  CHECK_GE(crt_pattern.size(), size_t(2));
1725  create_compound(nodes, crt_pattern);
1726  reset_state();
1727  break;
1728  }
1729  default:
1730  CHECK(false);
1731  }
1732  }
1733  if (crt_state == CoalesceState::FirstProject || crt_state == CoalesceState::Aggregate) {
1734  if (crt_pattern.size() >= 2) {
1735  create_compound(nodes, crt_pattern);
1736  }
1737  CHECK(!crt_pattern.empty());
1738  }
1739 }
1740 
1748 class WindowFunctionDetectionVisitor : public RexVisitor<const RexScalar*> {
1749  protected:
1750  // Detect embedded window function expressions in operators
1751  const RexScalar* visitOperator(const RexOperator* rex_operator) const final {
1752  if (is_window_function_operator(rex_operator)) {
1753  return rex_operator;
1754  }
1755 
1756  const size_t operand_count = rex_operator->size();
1757  for (size_t i = 0; i < operand_count; ++i) {
1758  const auto operand = rex_operator->getOperand(i);
1759  if (is_window_function_operator(operand)) {
1760  // Handle both RexWindowFunctionOperators and window functions built up from
1761  // multiple RexScalar objects (e.g. AVG)
1762  return operand;
1763  }
1764  const auto operandResult = visit(operand);
1765  if (operandResult) {
1766  return operandResult;
1767  }
1768  }
1769 
1770  return defaultResult();
1771  }
1772 
1773  // Detect embedded window function expressions in case statements. Note that this may
1774  // manifest as a nested case statement inside a top level case statement, as some
1775  // window functions (sum, avg) are represented as a case statement. Use the
1776  // is_window_function_operator helper to detect complete window function expressions.
1777  const RexScalar* visitCase(const RexCase* rex_case) const final {
1778  if (is_window_function_operator(rex_case)) {
1779  return rex_case;
1780  }
1781 
1782  auto result = defaultResult();
1783  for (size_t i = 0; i < rex_case->branchCount(); ++i) {
1784  const auto when = rex_case->getWhen(i);
1785  result = is_window_function_operator(when) ? when : visit(when);
1786  if (result) {
1787  return result;
1788  }
1789  const auto then = rex_case->getThen(i);
1790  result = is_window_function_operator(then) ? then : visit(then);
1791  if (result) {
1792  return result;
1793  }
1794  }
1795  if (rex_case->getElse()) {
1796  auto else_expr = rex_case->getElse();
1797  result = is_window_function_operator(else_expr) ? else_expr : visit(else_expr);
1798  }
1799  return result;
1800  }
1801 
1802  const RexScalar* aggregateResult(const RexScalar* const& aggregate,
1803  const RexScalar* const& next_result) const final {
1804  // all methods calling aggregate result should be overriden
1805  UNREACHABLE();
1806  return nullptr;
1807  }
1808 
1809  const RexScalar* defaultResult() const final { return nullptr; }
1810 };
1811 
1821  public:
1822  RexWindowFuncReplacementVisitor(std::unique_ptr<const RexScalar> replacement_rex)
1823  : replacement_rex_(std::move(replacement_rex)) {}
1824 
1825  ~RexWindowFuncReplacementVisitor() { CHECK(replacement_rex_ == nullptr); }
1826 
1827  protected:
1828  RetType visitOperator(const RexOperator* rex_operator) const final {
1829  if (should_replace_operand(rex_operator)) {
1830  return std::move(replacement_rex_);
1831  }
1832 
1833  const auto rex_window_function_operator =
1834  dynamic_cast<const RexWindowFunctionOperator*>(rex_operator);
1835  if (rex_window_function_operator) {
1836  // Deep copy the embedded window function operator
1837  return visitWindowFunctionOperator(rex_window_function_operator);
1838  }
1839 
1840  const size_t operand_count = rex_operator->size();
1841  std::vector<RetType> new_opnds;
1842  for (size_t i = 0; i < operand_count; ++i) {
1843  const auto operand = rex_operator->getOperand(i);
1844  if (should_replace_operand(operand)) {
1845  new_opnds.push_back(std::move(replacement_rex_));
1846  } else {
1847  new_opnds.emplace_back(visit(rex_operator->getOperand(i)));
1848  }
1849  }
1850  return rex_operator->getDisambiguated(new_opnds);
1851  }
1852 
1853  RetType visitCase(const RexCase* rex_case) const final {
1854  if (should_replace_operand(rex_case)) {
1855  return std::move(replacement_rex_);
1856  }
1857 
1858  std::vector<std::pair<RetType, RetType>> new_pair_list;
1859  for (size_t i = 0; i < rex_case->branchCount(); ++i) {
1860  auto when_operand = rex_case->getWhen(i);
1861  auto then_operand = rex_case->getThen(i);
1862  new_pair_list.emplace_back(
1863  should_replace_operand(when_operand) ? std::move(replacement_rex_)
1864  : visit(when_operand),
1865  should_replace_operand(then_operand) ? std::move(replacement_rex_)
1866  : visit(then_operand));
1867  }
1868  auto new_else = should_replace_operand(rex_case->getElse())
1869  ? std::move(replacement_rex_)
1870  : visit(rex_case->getElse());
1871  return std::make_unique<RexCase>(new_pair_list, new_else);
1872  }
1873 
1874  private:
1875  bool should_replace_operand(const RexScalar* rex) const {
1876  return replacement_rex_ && is_window_function_operator(rex);
1877  }
1878 
1879  mutable std::unique_ptr<const RexScalar> replacement_rex_;
1880 };
1881 
1892  public:
1893  RexInputBackpropagationVisitor(RelProject* node) : node_(node) { CHECK(node_); }
1894 
1895  protected:
1896  RetType visitInput(const RexInput* rex_input) const final {
1897  if (rex_input->getSourceNode() != node_) {
1898  const auto cur_index = rex_input->getIndex();
1899  auto cur_source_node = rex_input->getSourceNode();
1900  std::string field_name = "";
1901  if (auto cur_project_node = dynamic_cast<const RelProject*>(cur_source_node)) {
1902  field_name = cur_project_node->getFieldName(cur_index);
1903  }
1904  node_->appendInput(field_name, rex_input->deepCopy());
1905  return std::make_unique<RexInput>(node_, node_->size() - 1);
1906  } else {
1907  return rex_input->deepCopy();
1908  }
1909  }
1910 
1911  private:
1912  mutable RelProject* node_;
1913 };
1914 
1931  std::vector<std::shared_ptr<RelAlgNode>>& nodes) {
1932  std::list<std::shared_ptr<RelAlgNode>> node_list(nodes.begin(), nodes.end());
1933 
1935  for (auto node_itr = node_list.begin(); node_itr != node_list.end(); ++node_itr) {
1936  const auto node = *node_itr;
1937  auto window_func_project_node = std::dynamic_pointer_cast<RelProject>(node);
1938  if (!window_func_project_node) {
1939  continue;
1940  }
1941 
1942  // map scalar expression index in the project node to wiondow function ptr
1943  std::unordered_map<size_t, const RexScalar*> embedded_window_function_expressions;
1944 
1945  // Iterate the target exprs of the project node and check for window function
1946  // expressions. If an embedded expression exists, save it in the
1947  // embedded_window_function_expressions map and split the expression into a window
1948  // function expression and a parent expression in a subsequent project node
1949  for (size_t i = 0; i < window_func_project_node->size(); i++) {
1950  const auto scalar_rex = window_func_project_node->getProjectAt(i);
1951  if (is_window_function_operator(scalar_rex)) {
1952  // top level window function exprs are fine
1953  continue;
1954  }
1955 
1956  if (const auto window_func_rex = visitor.visit(scalar_rex)) {
1957  const auto ret = embedded_window_function_expressions.insert(
1958  std::make_pair(i, window_func_rex));
1959  CHECK(ret.second);
1960  }
1961  }
1962 
1963  if (!embedded_window_function_expressions.empty()) {
1964  std::vector<std::unique_ptr<const RexScalar>> new_scalar_exprs;
1965 
1966  auto window_func_scalar_exprs =
1967  window_func_project_node->getExpressionsAndRelease();
1968  for (size_t rex_idx = 0; rex_idx < window_func_scalar_exprs.size(); ++rex_idx) {
1969  const auto embedded_window_func_expr_pair =
1970  embedded_window_function_expressions.find(rex_idx);
1971  if (embedded_window_func_expr_pair ==
1972  embedded_window_function_expressions.end()) {
1973  new_scalar_exprs.emplace_back(
1974  std::make_unique<const RexInput>(window_func_project_node.get(), rex_idx));
1975  } else {
1976  const auto window_func_rex_idx = embedded_window_func_expr_pair->first;
1977  CHECK_LT(window_func_rex_idx, window_func_scalar_exprs.size());
1978 
1979  const auto& window_func_rex = embedded_window_func_expr_pair->second;
1980 
1981  RexDeepCopyVisitor copier;
1982  auto window_func_rex_copy = copier.visit(window_func_rex);
1983 
1984  auto window_func_parent_expr =
1985  window_func_scalar_exprs[window_func_rex_idx].get();
1986 
1987  // Replace window func rex with an input rex
1988  auto window_func_result_input = std::make_unique<const RexInput>(
1989  window_func_project_node.get(), window_func_rex_idx);
1990  RexWindowFuncReplacementVisitor replacer(std::move(window_func_result_input));
1991  auto new_parent_rex = replacer.visit(window_func_parent_expr);
1992 
1993  // Put the parent expr in the new scalar exprs
1994  new_scalar_exprs.emplace_back(std::move(new_parent_rex));
1995 
1996  // Put the window func expr in cur scalar exprs
1997  window_func_scalar_exprs[window_func_rex_idx] = std::move(window_func_rex_copy);
1998  }
1999  }
2000 
2001  CHECK_EQ(window_func_scalar_exprs.size(), new_scalar_exprs.size());
2002  window_func_project_node->setExpressions(window_func_scalar_exprs);
2003 
2004  // Ensure any inputs from the node containing the expression (the "new" node)
2005  // exist on the window function project node, e.g. if we had a binary operation
2006  // involving an aggregate value or column not included in the top level
2007  // projection list.
2008  RexInputBackpropagationVisitor input_visitor(window_func_project_node.get());
2009  for (size_t i = 0; i < new_scalar_exprs.size(); i++) {
2010  if (dynamic_cast<const RexInput*>(new_scalar_exprs[i].get())) {
2011  // ignore top level inputs, these were copied directly from the previous
2012  // node
2013  continue;
2014  }
2015  new_scalar_exprs[i] = input_visitor.visit(new_scalar_exprs[i].get());
2016  }
2017 
2018  // Build the new project node and insert it into the list after the project node
2019  // containing the window function
2020  auto new_project =
2021  std::make_shared<RelProject>(new_scalar_exprs,
2022  window_func_project_node->getFields(),
2023  window_func_project_node);
2024  node_list.insert(std::next(node_itr), new_project);
2025 
2026  // Rebind all the following inputs
2027  for (auto rebind_itr = std::next(node_itr, 2); rebind_itr != node_list.end();
2028  rebind_itr++) {
2029  (*rebind_itr)->replaceInput(window_func_project_node, new_project);
2030  }
2031  }
2032  }
2033  nodes.assign(node_list.begin(), node_list.end());
2034 }
2035 
2036 using RexInputSet = std::unordered_set<RexInput>;
2037 
2038 class RexInputCollector : public RexVisitor<RexInputSet> {
2039  public:
2040  RexInputSet visitInput(const RexInput* input) const override {
2041  return RexInputSet{*input};
2042  }
2043 
2044  protected:
2046  const RexInputSet& next_result) const override {
2047  auto result = aggregate;
2048  result.insert(next_result.begin(), next_result.end());
2049  return result;
2050  }
2051 };
2052 
2060 void add_window_function_pre_project(std::vector<std::shared_ptr<RelAlgNode>>& nodes) {
2061  std::list<std::shared_ptr<RelAlgNode>> node_list(nodes.begin(), nodes.end());
2062 
2063  for (auto node_itr = node_list.begin(); node_itr != node_list.end(); ++node_itr) {
2064  const auto node = *node_itr;
2065  auto window_func_project_node = std::dynamic_pointer_cast<RelProject>(node);
2066  if (!window_func_project_node) {
2067  continue;
2068  }
2069  if (!window_func_project_node->hasWindowFunctionExpr()) {
2070  // the first projection node in the query plan does not have a window function
2071  // expression -- this step is not requierd.
2072  return;
2073  }
2074 
2075  const auto prev_node_itr = std::prev(node_itr);
2076  const auto prev_node = *prev_node_itr;
2077  CHECK(prev_node);
2078 
2079  RexInputSet inputs;
2080  RexInputCollector input_collector;
2081  for (size_t i = 0; i < window_func_project_node->size(); i++) {
2082  auto new_inputs = input_collector.visit(window_func_project_node->getProjectAt(i));
2083  inputs.insert(new_inputs.begin(), new_inputs.end());
2084  }
2085 
2086  // Note: Technically not required since we are mapping old inputs to new input
2087  // indices, but makes the re-mapping of inputs easier to follow.
2088  std::vector<RexInput> sorted_inputs(inputs.begin(), inputs.end());
2089  std::sort(sorted_inputs.begin(),
2090  sorted_inputs.end(),
2091  [](const auto& a, const auto& b) { return a.getIndex() < b.getIndex(); });
2092 
2093  std::vector<std::unique_ptr<const RexScalar>> scalar_exprs;
2094  std::vector<std::string> fields;
2095  std::unordered_map<unsigned, unsigned> old_index_to_new_index;
2096  for (auto& input : sorted_inputs) {
2097  CHECK_EQ(input.getSourceNode(), prev_node.get());
2098  CHECK(old_index_to_new_index
2099  .insert(std::make_pair(input.getIndex(), scalar_exprs.size()))
2100  .second);
2101  scalar_exprs.emplace_back(input.deepCopy());
2102  fields.emplace_back("");
2103  }
2104 
2105  auto new_project = std::make_shared<RelProject>(scalar_exprs, fields, prev_node);
2106  node_list.insert(node_itr, new_project);
2107  window_func_project_node->replaceInput(
2108  prev_node, new_project, old_index_to_new_index);
2109 
2110  break;
2111  }
2112 
2113  nodes.assign(node_list.begin(), node_list.end());
2114 }
2115 
2116 int64_t get_int_literal_field(const rapidjson::Value& obj,
2117  const char field[],
2118  const int64_t default_val) noexcept {
2119  const auto it = obj.FindMember(field);
2120  if (it == obj.MemberEnd()) {
2121  return default_val;
2122  }
2123  std::unique_ptr<RexLiteral> lit(parse_literal(it->value));
2124  CHECK_EQ(kDECIMAL, lit->getType());
2125  CHECK_EQ(unsigned(0), lit->getScale());
2126  CHECK_EQ(unsigned(0), lit->getTypeScale());
2127  return lit->getVal<int64_t>();
2128 }
2129 
2130 void check_empty_inputs_field(const rapidjson::Value& node) noexcept {
2131  const auto& inputs_json = field(node, "inputs");
2132  CHECK(inputs_json.IsArray() && !inputs_json.Size());
2133 }
2134 
2136  const rapidjson::Value& scan_ra) {
2137  const auto& table_json = field(scan_ra, "table");
2138  CHECK(table_json.IsArray());
2139  CHECK_EQ(unsigned(2), table_json.Size());
2140  const auto td = cat.getMetadataForTable(table_json[1].GetString());
2141  CHECK(td);
2142  return td;
2143 }
2144 
2145 std::vector<std::string> getFieldNamesFromScanNode(const rapidjson::Value& scan_ra) {
2146  const auto& fields_json = field(scan_ra, "fieldNames");
2147  return strings_from_json_array(fields_json);
2148 }
2149 
2150 } // namespace
2151 
2153  for (const auto& expr : scalar_exprs_) {
2154  if (is_window_function_operator(expr.get())) {
2155  return true;
2156  }
2157  }
2158  return false;
2159 }
2160 namespace details {
2161 
2163  public:
2165 
2166  std::vector<std::shared_ptr<RelAlgNode>> run(const rapidjson::Value& rels,
2167  RelAlgDagBuilder& root_dag_builder) {
2168  for (auto rels_it = rels.Begin(); rels_it != rels.End(); ++rels_it) {
2169  const auto& crt_node = *rels_it;
2170  const auto id = node_id(crt_node);
2171  CHECK_EQ(static_cast<size_t>(id), nodes_.size());
2172  CHECK(crt_node.IsObject());
2173  std::shared_ptr<RelAlgNode> ra_node = nullptr;
2174  const auto rel_op = json_str(field(crt_node, "relOp"));
2175  if (rel_op == std::string("EnumerableTableScan") ||
2176  rel_op == std::string("LogicalTableScan")) {
2177  ra_node = dispatchTableScan(crt_node);
2178  } else if (rel_op == std::string("LogicalProject")) {
2179  ra_node = dispatchProject(crt_node, root_dag_builder);
2180  } else if (rel_op == std::string("LogicalFilter")) {
2181  ra_node = dispatchFilter(crt_node, root_dag_builder);
2182  } else if (rel_op == std::string("LogicalAggregate")) {
2183  ra_node = dispatchAggregate(crt_node);
2184  } else if (rel_op == std::string("LogicalJoin")) {
2185  ra_node = dispatchJoin(crt_node, root_dag_builder);
2186  } else if (rel_op == std::string("LogicalSort")) {
2187  ra_node = dispatchSort(crt_node);
2188  } else if (rel_op == std::string("LogicalValues")) {
2189  ra_node = dispatchLogicalValues(crt_node);
2190  } else if (rel_op == std::string("LogicalTableModify")) {
2191  ra_node = dispatchModify(crt_node);
2192  } else if (rel_op == std::string("LogicalTableFunctionScan")) {
2193  ra_node = dispatchTableFunction(crt_node, root_dag_builder);
2194  } else if (rel_op == std::string("LogicalUnion")) {
2195  ra_node = dispatchUnion(crt_node);
2196  } else {
2197  throw QueryNotSupported(std::string("Node ") + rel_op + " not supported yet");
2198  }
2199  nodes_.push_back(ra_node);
2200  }
2201 
2202  return std::move(nodes_);
2203  }
2204 
2205  private:
2206  std::shared_ptr<RelScan> dispatchTableScan(const rapidjson::Value& scan_ra) {
2207  check_empty_inputs_field(scan_ra);
2208  CHECK(scan_ra.IsObject());
2209  const auto td = getTableFromScanNode(cat_, scan_ra);
2210  const auto field_names = getFieldNamesFromScanNode(scan_ra);
2211  if (scan_ra.HasMember("hints")) {
2212  auto scan_node = std::make_shared<RelScan>(td, field_names);
2213  getRelAlgHints(scan_ra, scan_node);
2214  return scan_node;
2215  }
2216  return std::make_shared<RelScan>(td, field_names);
2217  }
2218 
2219  std::shared_ptr<RelProject> dispatchProject(const rapidjson::Value& proj_ra,
2220  RelAlgDagBuilder& root_dag_builder) {
2221  const auto inputs = getRelAlgInputs(proj_ra);
2222  CHECK_EQ(size_t(1), inputs.size());
2223  const auto& exprs_json = field(proj_ra, "exprs");
2224  CHECK(exprs_json.IsArray());
2225  std::vector<std::unique_ptr<const RexScalar>> exprs;
2226  for (auto exprs_json_it = exprs_json.Begin(); exprs_json_it != exprs_json.End();
2227  ++exprs_json_it) {
2228  exprs.emplace_back(parse_scalar_expr(*exprs_json_it, cat_, root_dag_builder));
2229  }
2230  const auto& fields = field(proj_ra, "fields");
2231  if (proj_ra.HasMember("hints")) {
2232  auto project_node = std::make_shared<RelProject>(
2233  exprs, strings_from_json_array(fields), inputs.front());
2234  getRelAlgHints(proj_ra, project_node);
2235  return project_node;
2236  }
2237  return std::make_shared<RelProject>(
2238  exprs, strings_from_json_array(fields), inputs.front());
2239  }
2240 
2241  std::shared_ptr<RelFilter> dispatchFilter(const rapidjson::Value& filter_ra,
2242  RelAlgDagBuilder& root_dag_builder) {
2243  const auto inputs = getRelAlgInputs(filter_ra);
2244  CHECK_EQ(size_t(1), inputs.size());
2245  const auto id = node_id(filter_ra);
2246  CHECK(id);
2247  auto condition =
2248  parse_scalar_expr(field(filter_ra, "condition"), cat_, root_dag_builder);
2249  return std::make_shared<RelFilter>(condition, inputs.front());
2250  }
2251 
2252  std::shared_ptr<RelAggregate> dispatchAggregate(const rapidjson::Value& agg_ra) {
2253  const auto inputs = getRelAlgInputs(agg_ra);
2254  CHECK_EQ(size_t(1), inputs.size());
2255  const auto fields = strings_from_json_array(field(agg_ra, "fields"));
2256  const auto group = indices_from_json_array(field(agg_ra, "group"));
2257  for (size_t i = 0; i < group.size(); ++i) {
2258  CHECK_EQ(i, group[i]);
2259  }
2260  if (agg_ra.HasMember("groups") || agg_ra.HasMember("indicator")) {
2261  throw QueryNotSupported("GROUP BY extensions not supported");
2262  }
2263  const auto& aggs_json_arr = field(agg_ra, "aggs");
2264  CHECK(aggs_json_arr.IsArray());
2265  std::vector<std::unique_ptr<const RexAgg>> aggs;
2266  for (auto aggs_json_arr_it = aggs_json_arr.Begin();
2267  aggs_json_arr_it != aggs_json_arr.End();
2268  ++aggs_json_arr_it) {
2269  aggs.emplace_back(parse_aggregate_expr(*aggs_json_arr_it));
2270  }
2271  if (agg_ra.HasMember("hints")) {
2272  auto agg_node =
2273  std::make_shared<RelAggregate>(group.size(), aggs, fields, inputs.front());
2274  getRelAlgHints(agg_ra, agg_node);
2275  return agg_node;
2276  }
2277  return std::make_shared<RelAggregate>(group.size(), aggs, fields, inputs.front());
2278  }
2279 
2280  std::shared_ptr<RelJoin> dispatchJoin(const rapidjson::Value& join_ra,
2281  RelAlgDagBuilder& root_dag_builder) {
2282  const auto inputs = getRelAlgInputs(join_ra);
2283  CHECK_EQ(size_t(2), inputs.size());
2284  const auto join_type = to_join_type(json_str(field(join_ra, "joinType")));
2285  auto filter_rex =
2286  parse_scalar_expr(field(join_ra, "condition"), cat_, root_dag_builder);
2287  if (join_ra.HasMember("hints")) {
2288  auto join_node =
2289  std::make_shared<RelJoin>(inputs[0], inputs[1], filter_rex, join_type);
2290  getRelAlgHints(join_ra, join_node);
2291  return join_node;
2292  }
2293  return std::make_shared<RelJoin>(inputs[0], inputs[1], filter_rex, join_type);
2294  }
2295 
2296  std::shared_ptr<RelSort> dispatchSort(const rapidjson::Value& sort_ra) {
2297  const auto inputs = getRelAlgInputs(sort_ra);
2298  CHECK_EQ(size_t(1), inputs.size());
2299  std::vector<SortField> collation;
2300  const auto& collation_arr = field(sort_ra, "collation");
2301  CHECK(collation_arr.IsArray());
2302  for (auto collation_arr_it = collation_arr.Begin();
2303  collation_arr_it != collation_arr.End();
2304  ++collation_arr_it) {
2305  const size_t field_idx = json_i64(field(*collation_arr_it, "field"));
2306  const auto sort_dir = parse_sort_direction(*collation_arr_it);
2307  const auto null_pos = parse_nulls_position(*collation_arr_it);
2308  collation.emplace_back(field_idx, sort_dir, null_pos);
2309  }
2310  auto limit = get_int_literal_field(sort_ra, "fetch", -1);
2311  const auto offset = get_int_literal_field(sort_ra, "offset", 0);
2312  auto ret = std::make_shared<RelSort>(
2313  collation, limit > 0 ? limit : 0, offset, inputs.front());
2314  ret->setEmptyResult(limit == 0);
2315  return ret;
2316  }
2317 
2318  std::shared_ptr<RelModify> dispatchModify(const rapidjson::Value& logical_modify_ra) {
2319  const auto inputs = getRelAlgInputs(logical_modify_ra);
2320  CHECK_EQ(size_t(1), inputs.size());
2321 
2322  const auto table_descriptor = getTableFromScanNode(cat_, logical_modify_ra);
2323  if (table_descriptor->isView) {
2324  throw std::runtime_error("UPDATE of a view is unsupported.");
2325  }
2326 
2327  bool flattened = json_bool(field(logical_modify_ra, "flattened"));
2328  std::string op = json_str(field(logical_modify_ra, "operation"));
2329  RelModify::TargetColumnList target_column_list;
2330 
2331  if (op == "UPDATE") {
2332  const auto& update_columns = field(logical_modify_ra, "updateColumnList");
2333  CHECK(update_columns.IsArray());
2334 
2335  for (auto column_arr_it = update_columns.Begin();
2336  column_arr_it != update_columns.End();
2337  ++column_arr_it) {
2338  target_column_list.push_back(column_arr_it->GetString());
2339  }
2340  }
2341 
2342  auto modify_node = std::make_shared<RelModify>(
2343  cat_, table_descriptor, flattened, op, target_column_list, inputs[0]);
2344  switch (modify_node->getOperation()) {
2346  modify_node->applyDeleteModificationsToInputNode();
2347  break;
2348  }
2350  modify_node->applyUpdateModificationsToInputNode();
2351  break;
2352  }
2353  default:
2354  throw std::runtime_error("Unsupported RelModify operation: " +
2355  json_node_to_string(logical_modify_ra));
2356  }
2357 
2358  return modify_node;
2359  }
2360 
2361  std::shared_ptr<RelTableFunction> dispatchTableFunction(
2362  const rapidjson::Value& table_func_ra,
2363  RelAlgDagBuilder& root_dag_builder) {
2364  const auto inputs = getRelAlgInputs(table_func_ra);
2365  const auto& invocation = field(table_func_ra, "invocation");
2366  CHECK(invocation.IsObject());
2367 
2368  const auto& operands = field(invocation, "operands");
2369  CHECK(operands.IsArray());
2370  CHECK_GE(operands.Size(), unsigned(0));
2371 
2372  std::vector<const Rex*> col_inputs;
2373  std::vector<std::unique_ptr<const RexScalar>> table_func_inputs;
2374  std::vector<std::string> fields;
2375 
2376  for (auto exprs_json_it = operands.Begin(); exprs_json_it != operands.End();
2377  ++exprs_json_it) {
2378  const auto& expr_json = *exprs_json_it;
2379  CHECK(expr_json.IsObject());
2380  if (expr_json.HasMember("op")) {
2381  const auto op_str = json_str(field(expr_json, "op"));
2382  if (op_str == "CAST" && expr_json.HasMember("type")) {
2383  const auto& expr_type = field(expr_json, "type");
2384  CHECK(expr_type.IsObject());
2385  CHECK(expr_type.HasMember("type"));
2386  const auto& expr_type_name = json_str(field(expr_type, "type"));
2387  if (expr_type_name == "CURSOR") {
2388  CHECK(expr_json.HasMember("operands"));
2389  const auto& expr_operands = field(expr_json, "operands");
2390  CHECK(expr_operands.IsArray());
2391  if (expr_operands.Size() != 1) {
2392  throw std::runtime_error(
2393  "Table functions currently only support one ResultSet input");
2394  }
2395  auto pos = field(expr_operands[0], "input").GetInt();
2396  CHECK_LT(pos, inputs.size());
2397  for (size_t i = inputs[pos]->size(); i > 0; i--) {
2398  table_func_inputs.emplace_back(
2399  std::make_unique<RexAbstractInput>(col_inputs.size()));
2400  col_inputs.emplace_back(table_func_inputs.back().get());
2401  }
2402  continue;
2403  }
2404  }
2405  }
2406  table_func_inputs.emplace_back(
2407  parse_scalar_expr(*exprs_json_it, cat_, root_dag_builder));
2408  }
2409 
2410  const auto& op_name = field(invocation, "op");
2411  CHECK(op_name.IsString());
2412 
2413  std::vector<std::unique_ptr<const RexScalar>> table_function_projected_outputs;
2414  const auto& row_types = field(table_func_ra, "rowType");
2415  CHECK(row_types.IsArray());
2416  CHECK_GE(row_types.Size(), unsigned(0));
2417  const auto& row_types_array = row_types.GetArray();
2418  for (size_t i = 0; i < row_types_array.Size(); i++) {
2419  // We don't care about the type information in rowType -- replace each output with
2420  // a reference to be resolved later in the translator
2421  table_function_projected_outputs.emplace_back(std::make_unique<RexRef>(i));
2422  fields.emplace_back("");
2423  }
2424  return std::make_shared<RelTableFunction>(op_name.GetString(),
2425  inputs,
2426  fields,
2427  col_inputs,
2428  table_func_inputs,
2429  table_function_projected_outputs);
2430  }
2431 
2432  std::shared_ptr<RelLogicalValues> dispatchLogicalValues(
2433  const rapidjson::Value& logical_values_ra) {
2434  const auto& tuple_type_arr = field(logical_values_ra, "type");
2435  CHECK(tuple_type_arr.IsArray());
2436  std::vector<TargetMetaInfo> tuple_type;
2437  for (auto tuple_type_arr_it = tuple_type_arr.Begin();
2438  tuple_type_arr_it != tuple_type_arr.End();
2439  ++tuple_type_arr_it) {
2440  const auto component_type = parse_type(*tuple_type_arr_it);
2441  const auto component_name = json_str(field(*tuple_type_arr_it, "name"));
2442  tuple_type.emplace_back(component_name, component_type);
2443  }
2444  const auto& inputs_arr = field(logical_values_ra, "inputs");
2445  CHECK(inputs_arr.IsArray());
2446  const auto& tuples_arr = field(logical_values_ra, "tuples");
2447  CHECK(tuples_arr.IsArray());
2448 
2449  if (inputs_arr.Size()) {
2450  throw QueryNotSupported("Inputs not supported in logical values yet.");
2451  }
2452 
2453  std::vector<RelLogicalValues::RowValues> values;
2454  if (tuples_arr.Size()) {
2455  for (const auto& row : tuples_arr.GetArray()) {
2456  CHECK(row.IsArray());
2457  const auto values_json = row.GetArray();
2458  if (!values.empty()) {
2459  CHECK_EQ(values[0].size(), values_json.Size());
2460  }
2461  values.emplace_back(RelLogicalValues::RowValues{});
2462  for (const auto& value : values_json) {
2463  CHECK(value.IsObject());
2464  CHECK(value.HasMember("literal"));
2465  values.back().emplace_back(parse_literal(value));
2466  }
2467  }
2468  }
2469 
2470  return std::make_shared<RelLogicalValues>(tuple_type, values);
2471  }
2472 
2473  std::shared_ptr<RelLogicalUnion> dispatchUnion(
2474  const rapidjson::Value& logical_union_ra) {
2475  auto inputs = getRelAlgInputs(logical_union_ra);
2476  auto const& all_type_bool = field(logical_union_ra, "all");
2477  CHECK(all_type_bool.IsBool());
2478  return std::make_shared<RelLogicalUnion>(std::move(inputs), all_type_bool.GetBool());
2479  }
2480 
2481  RelAlgInputs getRelAlgInputs(const rapidjson::Value& node) {
2482  if (node.HasMember("inputs")) {
2483  const auto str_input_ids = strings_from_json_array(field(node, "inputs"));
2484  RelAlgInputs ra_inputs;
2485  for (const auto& str_id : str_input_ids) {
2486  ra_inputs.push_back(nodes_[std::stoi(str_id)]);
2487  }
2488  return ra_inputs;
2489  }
2490  return {prev(node)};
2491  }
2492 
2493  std::pair<std::string, std::string> getKVOptionPair(std::string& str, size_t& pos) {
2494  auto option = str.substr(0, pos);
2495  std::string delim = "=";
2496  size_t delim_pos = option.find(delim);
2497  auto key = option.substr(0, delim_pos);
2498  auto val = option.substr(delim_pos + 1, option.length());
2499  str.erase(0, pos + delim.length() + 1);
2500  return {key, val};
2501  }
2502 
2503  ExplainedQueryHint parseHintString(std::string& hint_string) {
2504  std::string white_space_delim = " ";
2505  int l = hint_string.length();
2506  hint_string = hint_string.erase(0, 1).substr(0, l - 2);
2507  size_t pos = 0;
2508  if ((pos = hint_string.find("options:")) != std::string::npos) {
2509  // need to parse hint options
2510  std::vector<std::string> tokens;
2511  std::string hint_name = hint_string.substr(0, hint_string.find(white_space_delim));
2512  auto hint_type = RegisteredQueryHint::translateQueryHint(hint_name);
2513  bool kv_list_op = false;
2514  std::string raw_options = hint_string.substr(pos + 8, hint_string.length() - 2);
2515  if (raw_options.find('{') != std::string::npos) {
2516  kv_list_op = true;
2517  } else {
2518  CHECK(raw_options.find('[') != std::string::npos);
2519  }
2520  auto t1 = raw_options.erase(0, 1);
2521  raw_options = t1.substr(0, t1.length() - 1);
2522  std::string op_delim = ", ";
2523  if (kv_list_op) {
2524  // kv options
2525  std::unordered_map<std::string, std::string> kv_options;
2526  while ((pos = raw_options.find(op_delim)) != std::string::npos) {
2527  auto kv_pair = getKVOptionPair(raw_options, pos);
2528  kv_options.emplace(kv_pair.first, kv_pair.second);
2529  }
2530  // handle the last kv pair
2531  auto kv_pair = getKVOptionPair(raw_options, pos);
2532  kv_options.emplace(kv_pair.first, kv_pair.second);
2533  return {hint_type, true, false, true, kv_options};
2534  } else {
2535  std::vector<std::string> list_options;
2536  while ((pos = raw_options.find(op_delim)) != std::string::npos) {
2537  list_options.emplace_back(raw_options.substr(0, pos));
2538  raw_options.erase(0, pos + white_space_delim.length() + 1);
2539  }
2540  // handle the last option
2541  list_options.emplace_back(raw_options.substr(0, pos));
2542  return {hint_type, true, false, false, list_options};
2543  }
2544  } else {
2545  // marker hint: no extra option for this hint
2546  std::string hint_name = hint_string.substr(0, hint_string.find(white_space_delim));
2547  auto hint_type = RegisteredQueryHint::translateQueryHint(hint_name);
2548  return {hint_type, true, true, false};
2549  }
2550  }
2551 
2552  void getRelAlgHints(const rapidjson::Value& json_node,
2553  std::shared_ptr<RelAlgNode> node) {
2554  std::string hint_explained = json_str(field(json_node, "hints"));
2555  size_t pos = 0;
2556  std::string delim = "|";
2557  std::vector<std::string> hint_list;
2558  while ((pos = hint_explained.find(delim)) != std::string::npos) {
2559  hint_list.emplace_back(hint_explained.substr(0, pos));
2560  hint_explained.erase(0, pos + delim.length());
2561  }
2562  // handling the last one
2563  hint_list.emplace_back(hint_explained.substr(0, pos));
2564 
2565  const auto agg_node = std::dynamic_pointer_cast<RelAggregate>(node);
2566  if (agg_node) {
2567  for (std::string& hint : hint_list) {
2568  auto parsed_hint = parseHintString(hint);
2569  agg_node->addHint(parsed_hint);
2570  }
2571  }
2572  const auto project_node = std::dynamic_pointer_cast<RelProject>(node);
2573  if (project_node) {
2574  for (std::string& hint : hint_list) {
2575  auto parsed_hint = parseHintString(hint);
2576  project_node->addHint(parsed_hint);
2577  }
2578  }
2579  const auto scan_node = std::dynamic_pointer_cast<RelScan>(node);
2580  if (scan_node) {
2581  for (std::string& hint : hint_list) {
2582  auto parsed_hint = parseHintString(hint);
2583  scan_node->addHint(parsed_hint);
2584  }
2585  }
2586  const auto join_node = std::dynamic_pointer_cast<RelJoin>(node);
2587  if (join_node) {
2588  for (std::string& hint : hint_list) {
2589  auto parsed_hint = parseHintString(hint);
2590  join_node->addHint(parsed_hint);
2591  }
2592  }
2593 
2594  const auto compound_node = std::dynamic_pointer_cast<RelCompound>(node);
2595  if (compound_node) {
2596  for (std::string& hint : hint_list) {
2597  auto parsed_hint = parseHintString(hint);
2598  compound_node->addHint(parsed_hint);
2599  }
2600  }
2601  }
2602 
2603  std::shared_ptr<const RelAlgNode> prev(const rapidjson::Value& crt_node) {
2604  const auto id = node_id(crt_node);
2605  CHECK(id);
2606  CHECK_EQ(static_cast<size_t>(id), nodes_.size());
2607  return nodes_.back();
2608  }
2609 
2611  std::vector<std::shared_ptr<RelAlgNode>> nodes_;
2612 };
2613 
2614 } // namespace details
2615 
2616 RelAlgDagBuilder::RelAlgDagBuilder(const std::string& query_ra,
2618  const RenderInfo* render_info)
2619  : cat_(cat), render_info_(render_info), query_hint_(RegisteredQueryHint::defaults()) {
2620  rapidjson::Document query_ast;
2621  query_ast.Parse(query_ra.c_str());
2622  VLOG(2) << "Parsing query RA JSON: " << query_ra;
2623  if (query_ast.HasParseError()) {
2624  query_ast.GetParseError();
2625  LOG(ERROR) << "Failed to parse RA tree from Calcite (offset "
2626  << query_ast.GetErrorOffset() << "):\n"
2627  << rapidjson::GetParseError_En(query_ast.GetParseError());
2628  VLOG(1) << "Failed to parse query RA: " << query_ra;
2629  throw std::runtime_error(
2630  "Failed to parse relational algebra tree. Possible query syntax error.");
2631  }
2632  CHECK(query_ast.IsObject());
2634  build(query_ast, *this);
2635 }
2636 
2638  const rapidjson::Value& query_ast,
2640  const RenderInfo* render_info)
2641  : cat_(cat), render_info_(render_info), query_hint_(RegisteredQueryHint::defaults()) {
2642  build(query_ast, root_dag_builder);
2643 }
2644 
2645 void RelAlgDagBuilder::build(const rapidjson::Value& query_ast,
2646  RelAlgDagBuilder& lead_dag_builder) {
2647  const auto& rels = field(query_ast, "rels");
2648  CHECK(rels.IsArray());
2649  try {
2650  nodes_ = details::RelAlgDispatcher(cat_).run(rels, lead_dag_builder);
2651  } catch (const QueryNotSupported&) {
2652  throw;
2653  }
2654  CHECK(!nodes_.empty());
2656 
2657  if (render_info_) {
2658  // Alter the RA for render. Do this before any flattening/optimizations are done to
2659  // the tree.
2661  }
2662 
2663  handleQueryHint(nodes_, this);
2664  mark_nops(nodes_);
2669  std::vector<const RelAlgNode*> filtered_left_deep_joins;
2670  std::vector<const RelAlgNode*> left_deep_joins;
2671  for (const auto& node : nodes_) {
2672  const auto left_deep_join_root = get_left_deep_join_root(node);
2673  // The filter which starts a left-deep join pattern must not be coalesced
2674  // since it contains (part of) the join condition.
2675  if (left_deep_join_root) {
2676  left_deep_joins.push_back(left_deep_join_root.get());
2677  if (std::dynamic_pointer_cast<const RelFilter>(left_deep_join_root)) {
2678  filtered_left_deep_joins.push_back(left_deep_join_root.get());
2679  }
2680  }
2681  }
2682  if (filtered_left_deep_joins.empty()) {
2684  }
2685  eliminate_dead_columns(nodes_);
2686  eliminate_dead_subqueries(subqueries_, nodes_.back().get());
2688  if (g_cluster) {
2690  }
2691  coalesce_nodes(nodes_, left_deep_joins);
2692  CHECK(nodes_.back().use_count() == 1);
2693  create_left_deep_join(nodes_);
2694 }
2695 
2697  std::function<void(RelAlgNode const*)> const& callback) const {
2698  for (auto const& node : nodes_) {
2699  if (node) {
2700  callback(node.get());
2701  }
2702  }
2703 }
2704 
2706  for (auto& node : nodes_) {
2707  if (node) {
2708  node->resetQueryExecutionState();
2709  }
2710  }
2711 }
2712 
2713 // Return tree with depth represented by indentations.
2714 std::string tree_string(const RelAlgNode* ra, const size_t depth) {
2715  std::string result = std::string(2 * depth, ' ') + ::toString(ra) + '\n';
2716  for (size_t i = 0; i < ra->inputCount(); ++i) {
2717  result += tree_string(ra->getInput(i), depth + 1);
2718  }
2719  return result;
2720 }
2721 
2722 std::string RexSubQuery::toString() const {
2723  return cat(::typeName(this), "(", ::toString(ra_.get()), ")");
2724 }
2725 
2726 size_t RexSubQuery::toHash() const {
2727  if (!hash_) {
2728  hash_ = typeid(RexSubQuery).hash_code();
2729  boost::hash_combine(*hash_, ra_->toHash());
2730  }
2731  return *hash_;
2732 }
2733 
2734 std::string RexInput::toString() const {
2735  const auto scan_node = dynamic_cast<const RelScan*>(node_);
2736  if (scan_node) {
2737  auto field_name = scan_node->getFieldName(getIndex());
2738  auto table_name = scan_node->getTableDescriptor()->tableName;
2739  return ::typeName(this) + "(" + table_name + "." + field_name + ")";
2740  }
2741  return cat(::typeName(this),
2742  "(node=",
2743  ::toString(node_),
2744  ", in_index=",
2746  ")");
2747 }
2748 
2749 size_t RexInput::toHash() const {
2750  if (!hash_) {
2751  hash_ = typeid(RexInput).hash_code();
2752  boost::hash_combine(*hash_, node_->toHash());
2753  boost::hash_combine(*hash_, getIndex());
2754  }
2755  return *hash_;
2756 }
2757 
2758 std::string RelCompound::toString() const {
2759  return cat(::typeName(this),
2760  "(",
2761  (filter_expr_ ? filter_expr_->toString() : "null"),
2762  ", target_exprs=",
2764  ", ",
2766  ", agg_exps=",
2768  ", fields=",
2770  ", scalar_sources=",
2772  ", is_agg=",
2774  ")");
2775 }
2776 
2777 size_t RelCompound::toHash() const {
2778  if (!hash_) {
2779  hash_ = typeid(RelCompound).hash_code();
2780  boost::hash_combine(*hash_,
2781  filter_expr_ ? filter_expr_->toHash() : boost::hash_value("n"));
2782  boost::hash_combine(*hash_, is_agg_);
2783  for (auto& target_expr : target_exprs_) {
2784  if (auto rex_scalar = dynamic_cast<const RexScalar*>(target_expr)) {
2785  boost::hash_combine(*hash_, rex_scalar->toHash());
2786  }
2787  }
2788  for (auto& agg_expr : agg_exprs_) {
2789  boost::hash_combine(*hash_, agg_expr->toHash());
2790  }
2791  for (auto& scalar_source : scalar_sources_) {
2792  boost::hash_combine(*hash_, scalar_source->toHash());
2793  }
2794  boost::hash_combine(*hash_, groupby_count_);
2795  boost::hash_combine(*hash_, ::toString(fields_));
2796  }
2797  return *hash_;
2798 }
std::vector< std::shared_ptr< const RexScalar > > scalar_exprs_
DEVICE auto upper_bound(ARGS &&...args)
Definition: gpu_enabled.h:123
SQLTypes to_sql_type(const std::string &type_name)
void handleQueryHint(const std::vector< std::shared_ptr< RelAlgNode >> &nodes, RelAlgDagBuilder *dag_builder) noexcept
std::shared_ptr< const RelAlgNode > getRootNodeShPtr() const
bool is_agg(const Analyzer::Expr *expr)
std::unique_ptr< const RexScalar > condition_
SQLTypeInfo parse_type(const rapidjson::Value &type_obj)
const RexScalar * getThen(const size_t idx) const
std::vector< std::string > strings_from_json_array(const rapidjson::Value &json_str_arr) noexcept
std::unique_ptr< RexCase > parse_case(const rapidjson::Value &expr, const Catalog_Namespace::Catalog &cat, RelAlgDagBuilder &root_dag_builder)
std::shared_ptr< RelAggregate > dispatchAggregate(const rapidjson::Value &agg_ra)
#define CHECK_EQ(x, y)
Definition: Logger.h:214
JoinType to_join_type(const std::string &join_type_name)
const Catalog_Namespace::Catalog & cat_
std::shared_ptr< RelProject > dispatchProject(const rapidjson::Value &proj_ra, RelAlgDagBuilder &root_dag_builder)
std::unique_ptr< RexSubQuery > deepCopy() const
void replaceInput(std::shared_ptr< const RelAlgNode > old_input, std::shared_ptr< const RelAlgNode > input) override
JoinType
Definition: sqldefs.h:108
std::vector< const Rex * > remapTargetPointers(std::vector< std::unique_ptr< const RexAgg >> const &agg_exprs_new, std::vector< std::unique_ptr< const RexScalar >> const &scalar_sources_new, std::vector< std::unique_ptr< const RexAgg >> const &agg_exprs_old, std::vector< std::unique_ptr< const RexScalar >> const &scalar_sources_old, std::vector< const Rex * > const &target_exprs_old)
std::vector< std::unique_ptr< const RexScalar > > table_func_inputs_
std::string cat(Ts &&...args)
std::string toString(const ExtArgumentType &sig_type)
void bind_inputs(const std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
void hoist_filter_cond_to_cross_join(std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
class for a per-database catalog. also includes metadata for the current database and the current use...
Definition: Catalog.h:102
Definition: sqltypes.h:48
Hints * getDeliveredHints()
void addHint(const ExplainedQueryHint &hint_explained)
std::shared_ptr< const RelAlgNode > get_left_deep_join_root(const std::shared_ptr< RelAlgNode > &node)
void sink_projected_boolean_expr_to_join(std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
std::unique_ptr< const RexAgg > parse_aggregate_expr(const rapidjson::Value &expr)
void eliminate_identical_copy(std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
size_t toHash() const override
const RexScalar * getElse() const
RelCompound(std::unique_ptr< const RexScalar > &filter_expr, const std::vector< const Rex * > &target_exprs, const size_t groupby_count, const std::vector< const RexAgg * > &agg_exprs, const std::vector< std::string > &fields, std::vector< std::unique_ptr< const RexScalar >> &scalar_sources, const bool is_agg, bool update_disguised_as_select=false, bool delete_disguised_as_select=false, bool varlen_update_required=false, TableDescriptor const *manipulation_target_table=nullptr, ColumnNameList target_columns=ColumnNameList())
static thread_local unsigned crt_id_
std::shared_ptr< RelScan > dispatchTableScan(const rapidjson::Value &scan_ra)
std::pair< std::shared_ptr< RelLeftDeepInnerJoin >, std::shared_ptr< const RelAlgNode > > create_left_deep_join(const std::shared_ptr< RelAlgNode > &left_deep_join_root)
RexScalar const * copyAndRedirectSource(RexScalar const *, size_t input_idx) const
void replaceInput(std::shared_ptr< const RelAlgNode > old_input, std::shared_ptr< const RelAlgNode > input) override
#define const
SQLAgg to_agg_kind(const std::string &agg_name)
std::shared_ptr< RelLogicalUnion > dispatchUnion(const rapidjson::Value &logical_union_ra)
#define LOG(tag)
Definition: Logger.h:200
NullSortedPosition
std::vector< std::string > TargetColumnList
size_t size() const
Hints * getDeliveredHints()
const bool json_bool(const rapidjson::Value &obj) noexcept
Definition: JsonAccessors.h:49
const RexScalar * getOperand(const size_t idx) const
std::vector< const Rex * > col_inputs_
bool hasEquivCollationOf(const RelSort &that) const
const std::vector< std::string > fields_
std::string toString() const override
const std::string json_str(const rapidjson::Value &obj) noexcept
Definition: JsonAccessors.h:44
void build(const rapidjson::Value &query_ast, RelAlgDagBuilder &root_dag_builder)
string name
Definition: setup.in.py:72
RexWindowFunctionOperator::RexWindowBound parse_window_bound(const rapidjson::Value &window_bound_obj, const Catalog_Namespace::Catalog &cat, RelAlgDagBuilder &root_dag_builder)
std::string join(T const &container, std::string const &delim)
void bind_table_func_to_input(RelTableFunction *table_func_node, const RANodeOutput &input) noexcept
std::unique_ptr< RexOperator > parse_operator(const rapidjson::Value &expr, const Catalog_Namespace::Catalog &cat, RelAlgDagBuilder &root_dag_builder)
#define UNREACHABLE()
Definition: Logger.h:250
bool hint_applied_
DEVICE void sort(ARGS &&...args)
Definition: gpu_enabled.h:105
bool isRenamedInput(const RelAlgNode *node, const size_t index, const std::string &new_name)
#define CHECK_GE(x, y)
Definition: Logger.h:219
std::vector< std::string > fields_
RexInput(const RelAlgNode *node, const unsigned in_index)
void addHint(const ExplainedQueryHint &hint_explained)
Definition: sqldefs.h:49
const RexScalar * getWhen(const size_t idx) const
void appendInput(std::string new_field_name, std::unique_ptr< const RexScalar > new_input)
RetType visitOperator(const RexOperator *rex_operator) const final
void addHint(const ExplainedQueryHint &hint_explained)
void bind_project_to_input(RelProject *project_node, const RANodeOutput &input) noexcept
const Catalog_Namespace::Catalog & cat_
std::vector< std::unique_ptr< const RexScalar > > parse_window_order_exprs(const rapidjson::Value &arr, const Catalog_Namespace::Catalog &cat, RelAlgDagBuilder &root_dag_builder)
void checkForMatchingMetaInfoTypes() const
std::vector< std::unique_ptr< const RexScalar > > scalar_sources_
std::string to_string(char const *&&v)
SqlWindowFunctionKind parse_window_function_kind(const std::string &name)
const std::string getFieldName(const size_t i) const
void simplify_sort(std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
std::vector< SortField > collation_
RexRebindInputsVisitor(const RelAlgNode *old_input, const RelAlgNode *new_input)
int64_t get_int_literal_field(const rapidjson::Value &obj, const char field[], const int64_t default_val) noexcept
std::vector< std::unique_ptr< const RexScalar > > parse_expr_array(const rapidjson::Value &arr, const Catalog_Namespace::Catalog &cat, RelAlgDagBuilder &root_dag_builder)
This file contains the class specification and related data structures for Catalog.
virtual T visit(const RexScalar *rex_scalar) const
Definition: RexVisitor.h:27
RexInputSet visitInput(const RexInput *input) const override
std::vector< std::shared_ptr< RexSubQuery > > subqueries_
RexWindowFuncReplacementVisitor(std::unique_ptr< const RexScalar > replacement_rex)
const RenderInfo * render_info_
std::string to_string() const
Definition: sqltypes.h:466
const rapidjson::Value & field(const rapidjson::Value &obj, const char field[]) noexcept
Definition: JsonAccessors.h:31
const RexScalar * visitOperator(const RexOperator *rex_operator) const final
unsigned getIndex() const
SQLOps getOperator() const
std::vector< std::shared_ptr< RelAlgNode > > nodes_
const TableDescriptor * getTableFromScanNode(const Catalog_Namespace::Catalog &cat, const rapidjson::Value &scan_ra)
SortDirection parse_sort_direction(const rapidjson::Value &collation)
std::unique_ptr< const RexScalar > disambiguate_rex(const RexScalar *, const RANodeOutput &)
static QueryHint translateQueryHint(const std::string &hint_name)
Definition: QueryHint.h:178
DEVICE auto copy(ARGS &&...args)
Definition: gpu_enabled.h:51
bool isRenaming() const
std::vector< SortField > parse_window_order_collation(const rapidjson::Value &arr, const Catalog_Namespace::Catalog &cat, RelAlgDagBuilder &root_dag_builder)
void setIndex(const unsigned in_index) const
Hints * getDeliveredHints()
size_t toHash() const override
SQLOps to_sql_op(const std::string &op_str)
void add_window_function_pre_project(std::vector< std::shared_ptr< RelAlgNode >> &nodes)
std::unique_ptr< Hints > hints_
void set_scale(int s)
Definition: sqltypes.h:418
const int64_t json_i64(const rapidjson::Value &obj) noexcept
Definition: JsonAccessors.h:39
void * visitInput(const RexInput *rex_input) const override
std::unique_ptr< Hints > hints_
std::vector< std::unique_ptr< const RexScalar > > scalar_exprs_
const double json_double(const rapidjson::Value &obj) noexcept
Definition: JsonAccessors.h:54
void addHint(const ExplainedQueryHint &hint_explained)
size_t branchCount() const
std::unordered_set< RexInput > RexInputSet
const RelAlgNode * getInput(const size_t idx) const
RelFilter(std::unique_ptr< const RexScalar > &filter, std::shared_ptr< const RelAlgNode > input)
std::vector< std::shared_ptr< RelAlgNode > > nodes_
std::string toString() const override
RelAggregate(const size_t groupby_count, std::vector< std::unique_ptr< const RexAgg >> &agg_exprs, const std::vector< std::string > &fields, std::shared_ptr< const RelAlgNode > input)
std::unique_ptr< const RexScalar > filter_
bool isSimple() const
RexInputReplacementVisitor(const RelAlgNode *node_to_keep, const std::vector< std::unique_ptr< const RexScalar >> &scalar_sources)
std::optional< size_t > hash_
const size_t groupby_count_
RexRebindReindexInputsVisitor(const RelAlgNode *old_input, const RelAlgNode *new_input, std::unordered_map< unsigned, unsigned > old_to_new_index_map)
std::optional< size_t > hash_
unsigned getId() const
const RelAlgNode * node_
virtual void replaceInput(std::shared_ptr< const RelAlgNode > old_input, std::shared_ptr< const RelAlgNode > input)
void eachNode(std::function< void(RelAlgNode const *)> const &) const
NullSortedPosition parse_nulls_position(const rapidjson::Value &collation)
std::shared_ptr< RelFilter > dispatchFilter(const rapidjson::Value &filter_ra, RelAlgDagBuilder &root_dag_builder)
std::unique_ptr< RexAbstractInput > parse_abstract_input(const rapidjson::Value &expr) noexcept
std::vector< std::unique_ptr< const RexAgg > > agg_exprs_
std::set< std::pair< const RelAlgNode *, int > > get_equiv_cols(const RelAlgNode *node, const size_t which_col)
Hints * getDeliveredHints()
void replaceInput(std::shared_ptr< const RelAlgNode > old_input, std::shared_ptr< const RelAlgNode > input) override
std::unique_ptr< RexLiteral > parse_literal(const rapidjson::Value &expr)
SortDirection
const RexScalar * getProjectAt(const size_t idx) const
std::vector< std::unique_ptr< const RexAgg > > copyAggExprs(std::vector< std::unique_ptr< const RexAgg >> const &agg_exprs)
std::vector< std::shared_ptr< const RelAlgNode >> RelAlgInputs
#define CHECK_LT(x, y)
Definition: Logger.h:216
Definition: sqltypes.h:51
Definition: sqltypes.h:52
int32_t countRexLiteralArgs() const
std::unique_ptr< Hints > hints_
std::unique_ptr< const RexOperator > disambiguate_operator(const RexOperator *rex_operator, const RANodeOutput &ra_output) noexcept
const ConstRexScalarPtrVector & getPartitionKeys() const
std::string tree_string(const RelAlgNode *ra, const size_t depth)
DEVICE auto lower_bound(ARGS &&...args)
Definition: gpu_enabled.h:78
#define CHECK_LE(x, y)
Definition: Logger.h:217
std::unique_ptr< Hints > hints_
const std::vector< const Rex * > target_exprs_
std::vector< ElementType >::const_iterator Super
void replaceInput(std::shared_ptr< const RelAlgNode > old_input, std::shared_ptr< const RelAlgNode > input) override
RelLogicalUnion(RelAlgInputs, bool is_all)
std::vector< std::unique_ptr< const RexAgg > > agg_exprs_
std::unique_ptr< const RexCase > disambiguate_case(const RexCase *rex_case, const RANodeOutput &ra_output)
std::shared_ptr< RelJoin > dispatchJoin(const rapidjson::Value &join_ra, RelAlgDagBuilder &root_dag_builder)
void separate_window_function_expressions(std::vector< std::shared_ptr< RelAlgNode >> &nodes)
std::unique_ptr< const RexScalar > filter_expr_
void setSourceNode(const RelAlgNode *node) const
const RexScalar * aggregateResult(const RexScalar *const &aggregate, const RexScalar *const &next_result) const final
bool hasWindowFunctionExpr() const
std::shared_ptr< RelModify > dispatchModify(const rapidjson::Value &logical_modify_ra)
std::unordered_map< QueryHint, ExplainedQueryHint > Hints
Definition: QueryHint.h:213
virtual size_t size() const =0
const RelAlgNode * getSourceNode() const
void registerSubquery(std::shared_ptr< RexSubQuery > subquery)
void setExecutionResult(const std::shared_ptr< const ExecutionResult > result)
RelLogicalValues(const std::vector< TargetMetaInfo > &tuple_type, std::vector< RowValues > &values)
size_t toHash() const override
std::string typeName(const T *v)
Definition: toString.h:85
ExplainedQueryHint parseHintString(std::string &hint_string)
RelAlgDagBuilder()=delete
SqlWindowFunctionKind
Definition: sqldefs.h:83
HOST DEVICE int get_comp_param() const
Definition: sqltypes.h:332
void mark_nops(const std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
RexInputSet aggregateResult(const RexInputSet &aggregate, const RexInputSet &next_result) const override
Definition: sqldefs.h:53
std::shared_ptr< RelSort > dispatchSort(const rapidjson::Value &sort_ra)
RelTableFunction(const std::string &function_name, RelAlgInputs inputs, std::vector< std::string > &fields, std::vector< const Rex * > col_inputs, std::vector< std::unique_ptr< const RexScalar >> &table_func_inputs, std::vector< std::unique_ptr< const RexScalar >> &target_exprs)
const std::vector< std::string > & getFields() const
std::string getFieldName(const size_t i) const
std::vector< size_t > indices_from_json_array(const rapidjson::Value &json_idx_arr) noexcept
bool g_enable_watchdog false
Definition: Execute.cpp:75
#define CHECK(condition)
Definition: Logger.h:206
std::unique_ptr< const RexScalar > parse_scalar_expr(const rapidjson::Value &expr, const Catalog_Namespace::Catalog &cat, RelAlgDagBuilder &root_dag_builder)
RelProject(std::vector< std::unique_ptr< const RexScalar >> &scalar_exprs, const std::vector< std::string > &fields, std::shared_ptr< const RelAlgNode > input)
unsigned node_id(const rapidjson::Value &ra_node) noexcept
RANodeOutput get_node_output(const RelAlgNode *ra_node)
bool g_enable_union
std::vector< std::string > getFieldNamesFromScanNode(const rapidjson::Value &scan_ra)
void create_compound(std::vector< std::shared_ptr< RelAlgNode >> &nodes, const std::vector< size_t > &pattern) noexcept
bool g_cluster
void alterRAForRender(std::vector< std::shared_ptr< RelAlgNode >> &nodes, const RenderInfo &render_info)
std::shared_ptr< const RelAlgNode > prev(const rapidjson::Value &crt_node)
void replaceInput(std::shared_ptr< const RelAlgNode > old_input, std::shared_ptr< const RelAlgNode > input) override
std::vector< std::shared_ptr< RelAlgNode > > run(const rapidjson::Value &rels, RelAlgDagBuilder &root_dag_builder)
void getRelAlgHints(const rapidjson::Value &json_node, std::shared_ptr< RelAlgNode > node)
virtual size_t toHash() const =0
RelAlgDispatcher(const Catalog_Namespace::Catalog &cat)
Common Enum definitions for SQL processing.
bool is_dict_encoded_string() const
Definition: sqltypes.h:535
Definition: sqltypes.h:44
void fold_filters(std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
const TableDescriptor * getMetadataForTable(const std::string &tableName, const bool populateFragmenter=true) const
Returns a pointer to a const TableDescriptor struct matching the provided tableName.
std::vector< RexInput > RANodeOutput
std::vector< std::unique_ptr< const RexScalar >> RowValues
const size_t inputCount() const
void rebind_inputs_from_left_deep_join(const RexScalar *rex, const RelLeftDeepInnerJoin *left_deep_join)
std::unique_ptr< const RexSubQuery > parse_subquery(const rapidjson::Value &expr, const Catalog_Namespace::Catalog &cat, RelAlgDagBuilder &root_dag_builder)
void eliminate_dead_subqueries(std::vector< std::shared_ptr< RexSubQuery >> &subqueries, RelAlgNode const *root)
size_t size() const override
size_t operator()(const std::pair< const RelAlgNode *, int > &input_col) const
bool input_can_be_coalesced(const RelAlgNode *parent_node, const size_t index, const bool first_rex_is_input)
RelAlgInputs getRelAlgInputs(const rapidjson::Value &node)
std::shared_ptr< RelLogicalValues > dispatchLogicalValues(const rapidjson::Value &logical_values_ra)
std::shared_ptr< RelTableFunction > dispatchTableFunction(const rapidjson::Value &table_func_ra, RelAlgDagBuilder &root_dag_builder)
DEVICE void swap(ARGS &&...args)
Definition: gpu_enabled.h:114
std::unique_ptr< const RexScalar > RetType
Definition: RexVisitor.h:139
std::vector< std::unique_ptr< const RexScalar > > copyRexScalars(std::vector< std::unique_ptr< const RexScalar >> const &scalar_sources)
size_t toHash() const override
#define VLOG(n)
Definition: Logger.h:300
RelJoin(std::shared_ptr< const RelAlgNode > lhs, std::shared_ptr< const RelAlgNode > rhs, std::unique_ptr< const RexScalar > &condition, const JoinType join_type)
RelAlgInputs inputs_
void set_precision(int d)
Definition: sqltypes.h:416
std::string toString() const override
std::pair< std::string, std::string > getKVOptionPair(std::string &str, size_t &pos)
void eliminate_dead_columns(std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
void check_empty_inputs_field(const rapidjson::Value &node) noexcept
std::vector< const Rex * > reproject_targets(const RelProject *simple_project, const std::vector< const Rex * > &target_exprs) noexcept
bool isIdentity() const
std::vector< std::unique_ptr< const RexScalar > > target_exprs_
Hints * getDeliveredHints()
std::vector< RexInput > n_outputs(const RelAlgNode *node, const size_t n)
const bool is_agg_
void coalesce_nodes(std::vector< std::shared_ptr< RelAlgNode >> &nodes, const std::vector< const RelAlgNode * > &left_deep_joins)
const RexScalar * visitCase(const RexCase *rex_case) const final
std::string toString() const override
static void resetRelAlgFirstId() noexcept
std::string json_node_to_string(const rapidjson::Value &node) noexcept