OmniSciDB  fe05a0c208
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
RelAlgDagBuilder.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2017 MapD Technologies, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "RelAlgDagBuilder.h"
19 #include "Catalog/Catalog.h"
21 #include "JsonAccessors.h"
22 #include "RelAlgOptimizer.h"
23 #include "RelLeftDeepInnerJoin.h"
25 #include "RexVisitor.h"
26 #include "Shared/sqldefs.h"
27 
28 #include <rapidjson/error/en.h>
29 #include <rapidjson/error/error.h>
30 #include <rapidjson/stringbuffer.h>
31 #include <rapidjson/writer.h>
32 
33 #include <string>
34 #include <unordered_set>
35 
36 extern bool g_cluster;
37 extern bool g_enable_union;
38 
39 namespace {
40 
41 const unsigned FIRST_RA_NODE_ID = 1;
42 
43 } // namespace
44 
45 thread_local unsigned RelAlgNode::crt_id_ = FIRST_RA_NODE_ID;
46 
49 }
50 
52  const std::shared_ptr<const ExecutionResult> result) {
53  auto row_set = result->getRows();
54  CHECK(row_set);
55  CHECK_EQ(size_t(1), row_set->colCount());
56  *(type_.get()) = row_set->getColType(0);
57  (*(result_.get())) = result;
58 }
59 
60 std::unique_ptr<RexSubQuery> RexSubQuery::deepCopy() const {
61  return std::make_unique<RexSubQuery>(type_, result_, ra_->deepCopy());
62 }
63 
64 unsigned RexSubQuery::getId() const {
65  return ra_->getId();
66 }
67 
68 namespace {
69 
70 class RexRebindInputsVisitor : public RexVisitor<void*> {
71  public:
72  RexRebindInputsVisitor(const RelAlgNode* old_input, const RelAlgNode* new_input)
73  : old_input_(old_input), new_input_(new_input) {}
74 
75  virtual ~RexRebindInputsVisitor() = default;
76 
77  void* visitInput(const RexInput* rex_input) const override {
78  const auto old_source = rex_input->getSourceNode();
79  if (old_source == old_input_) {
80  const auto left_deep_join = dynamic_cast<const RelLeftDeepInnerJoin*>(new_input_);
81  if (left_deep_join) {
82  rebind_inputs_from_left_deep_join(rex_input, left_deep_join);
83  return nullptr;
84  }
85  rex_input->setSourceNode(new_input_);
86  }
87  return nullptr;
88  };
89 
90  private:
91  const RelAlgNode* old_input_;
93 };
94 
95 // Creates an output with n columns.
96 std::vector<RexInput> n_outputs(const RelAlgNode* node, const size_t n) {
97  std::vector<RexInput> outputs;
98  outputs.reserve(n);
99  for (size_t i = 0; i < n; ++i) {
100  outputs.emplace_back(node, i);
101  }
102  return outputs;
103 }
104 
106  public:
108  const RelAlgNode* old_input,
109  const RelAlgNode* new_input,
110  std::unordered_map<unsigned, unsigned> old_to_new_index_map)
111  : RexRebindInputsVisitor(old_input, new_input), mapping_(old_to_new_index_map) {}
112 
113  void* visitInput(const RexInput* rex_input) const override {
114  RexRebindInputsVisitor::visitInput(rex_input);
115  auto mapping_itr = mapping_.find(rex_input->getIndex());
116  CHECK(mapping_itr != mapping_.end());
117  rex_input->setIndex(mapping_itr->second);
118  return nullptr;
119  }
120 
121  private:
122  const std::unordered_map<unsigned, unsigned> mapping_;
123 };
124 
125 } // namespace
126 
128  std::shared_ptr<const RelAlgNode> old_input,
129  std::shared_ptr<const RelAlgNode> input,
130  std::optional<std::unordered_map<unsigned, unsigned>> old_to_new_index_map) {
131  RelAlgNode::replaceInput(old_input, input);
132  std::unique_ptr<RexRebindInputsVisitor> rebind_inputs;
133  if (old_to_new_index_map) {
134  rebind_inputs = std::make_unique<RexRebindReindexInputsVisitor>(
135  old_input.get(), input.get(), *old_to_new_index_map);
136  } else {
137  rebind_inputs =
138  std::make_unique<RexRebindInputsVisitor>(old_input.get(), input.get());
139  }
140  CHECK(rebind_inputs);
141  for (const auto& scalar_expr : scalar_exprs_) {
142  rebind_inputs->visit(scalar_expr.get());
143  }
144 }
145 
146 void RelProject::appendInput(std::string new_field_name,
147  std::unique_ptr<const RexScalar> new_input) {
148  fields_.emplace_back(std::move(new_field_name));
149  scalar_exprs_.emplace_back(std::move(new_input));
150 }
151 
153  const auto scan_node = dynamic_cast<const RelScan*>(ra_node);
154  if (scan_node) {
155  // Scan node has no inputs, output contains all columns in the table.
156  CHECK_EQ(size_t(0), scan_node->inputCount());
157  return n_outputs(scan_node, scan_node->size());
158  }
159  const auto project_node = dynamic_cast<const RelProject*>(ra_node);
160  if (project_node) {
161  // Project output count doesn't depend on the input
162  CHECK_EQ(size_t(1), project_node->inputCount());
163  return n_outputs(project_node, project_node->size());
164  }
165  const auto filter_node = dynamic_cast<const RelFilter*>(ra_node);
166  if (filter_node) {
167  // Filter preserves shape
168  CHECK_EQ(size_t(1), filter_node->inputCount());
169  const auto prev_out = get_node_output(filter_node->getInput(0));
170  return n_outputs(filter_node, prev_out.size());
171  }
172  const auto aggregate_node = dynamic_cast<const RelAggregate*>(ra_node);
173  if (aggregate_node) {
174  // Aggregate output count doesn't depend on the input
175  CHECK_EQ(size_t(1), aggregate_node->inputCount());
176  return n_outputs(aggregate_node, aggregate_node->size());
177  }
178  const auto compound_node = dynamic_cast<const RelCompound*>(ra_node);
179  if (compound_node) {
180  // Compound output count doesn't depend on the input
181  CHECK_EQ(size_t(1), compound_node->inputCount());
182  return n_outputs(compound_node, compound_node->size());
183  }
184  const auto join_node = dynamic_cast<const RelJoin*>(ra_node);
185  if (join_node) {
186  // Join concatenates the outputs from the inputs and the output
187  // directly references the nodes in the input.
188  CHECK_EQ(size_t(2), join_node->inputCount());
189  auto lhs_out =
190  n_outputs(join_node->getInput(0), get_node_output(join_node->getInput(0)).size());
191  const auto rhs_out =
192  n_outputs(join_node->getInput(1), get_node_output(join_node->getInput(1)).size());
193  lhs_out.insert(lhs_out.end(), rhs_out.begin(), rhs_out.end());
194  return lhs_out;
195  }
196  const auto table_func_node = dynamic_cast<const RelTableFunction*>(ra_node);
197  if (table_func_node) {
198  // Table Function output count doesn't depend on the input
199  return n_outputs(table_func_node, table_func_node->size());
200  }
201  const auto sort_node = dynamic_cast<const RelSort*>(ra_node);
202  if (sort_node) {
203  // Sort preserves shape
204  CHECK_EQ(size_t(1), sort_node->inputCount());
205  const auto prev_out = get_node_output(sort_node->getInput(0));
206  return n_outputs(sort_node, prev_out.size());
207  }
208  const auto logical_values_node = dynamic_cast<const RelLogicalValues*>(ra_node);
209  if (logical_values_node) {
210  CHECK_EQ(size_t(0), logical_values_node->inputCount());
211  return n_outputs(logical_values_node, logical_values_node->size());
212  }
213  const auto logical_union_node = dynamic_cast<const RelLogicalUnion*>(ra_node);
214  if (logical_union_node) {
215  return n_outputs(logical_union_node, logical_union_node->size());
216  }
217  LOG(FATAL) << "Unhandled ra_node type: " << ra_node->toString();
218  return {};
219 }
220 
222  if (!isSimple()) {
223  return false;
224  }
225  CHECK_EQ(size_t(1), inputCount());
226  const auto source = getInput(0);
227  if (dynamic_cast<const RelJoin*>(source)) {
228  return false;
229  }
230  const auto source_shape = get_node_output(source);
231  if (source_shape.size() != scalar_exprs_.size()) {
232  return false;
233  }
234  for (size_t i = 0; i < scalar_exprs_.size(); ++i) {
235  const auto& scalar_expr = scalar_exprs_[i];
236  const auto input = dynamic_cast<const RexInput*>(scalar_expr.get());
237  CHECK(input);
238  CHECK_EQ(source, input->getSourceNode());
239  // We should add the additional check that input->getIndex() !=
240  // source_shape[i].getIndex(), but Calcite doesn't generate the right
241  // Sort-Project-Sort sequence when joins are involved.
242  if (input->getSourceNode() != source_shape[i].getSourceNode()) {
243  return false;
244  }
245  }
246  return true;
247 }
248 
249 namespace {
250 
251 bool isRenamedInput(const RelAlgNode* node,
252  const size_t index,
253  const std::string& new_name) {
254  CHECK_LT(index, node->size());
255  if (auto join = dynamic_cast<const RelJoin*>(node)) {
256  CHECK_EQ(size_t(2), join->inputCount());
257  const auto lhs_size = join->getInput(0)->size();
258  if (index < lhs_size) {
259  return isRenamedInput(join->getInput(0), index, new_name);
260  }
261  CHECK_GE(index, lhs_size);
262  return isRenamedInput(join->getInput(1), index - lhs_size, new_name);
263  }
264 
265  if (auto scan = dynamic_cast<const RelScan*>(node)) {
266  return new_name != scan->getFieldName(index);
267  }
268 
269  if (auto aggregate = dynamic_cast<const RelAggregate*>(node)) {
270  return new_name != aggregate->getFieldName(index);
271  }
272 
273  if (auto project = dynamic_cast<const RelProject*>(node)) {
274  return new_name != project->getFieldName(index);
275  }
276 
277  if (auto table_func = dynamic_cast<const RelTableFunction*>(node)) {
278  return new_name != table_func->getFieldName(index);
279  }
280 
281  if (auto logical_values = dynamic_cast<const RelLogicalValues*>(node)) {
282  const auto& tuple_type = logical_values->getTupleType();
283  CHECK_LT(index, tuple_type.size());
284  return new_name != tuple_type[index].get_resname();
285  }
286 
287  CHECK(dynamic_cast<const RelSort*>(node) || dynamic_cast<const RelFilter*>(node) ||
288  dynamic_cast<const RelLogicalUnion*>(node));
289  return isRenamedInput(node->getInput(0), index, new_name);
290 }
291 
292 } // namespace
293 
295  if (!isSimple()) {
296  return false;
297  }
298  CHECK_EQ(scalar_exprs_.size(), fields_.size());
299  for (size_t i = 0; i < fields_.size(); ++i) {
300  auto rex_in = dynamic_cast<const RexInput*>(scalar_exprs_[i].get());
301  CHECK(rex_in);
302  if (isRenamedInput(rex_in->getSourceNode(), rex_in->getIndex(), fields_[i])) {
303  return true;
304  }
305  }
306  return false;
307 }
308 
309 void RelJoin::replaceInput(std::shared_ptr<const RelAlgNode> old_input,
310  std::shared_ptr<const RelAlgNode> input) {
311  RelAlgNode::replaceInput(old_input, input);
312  RexRebindInputsVisitor rebind_inputs(old_input.get(), input.get());
313  if (condition_) {
314  rebind_inputs.visit(condition_.get());
315  }
316 }
317 
318 void RelFilter::replaceInput(std::shared_ptr<const RelAlgNode> old_input,
319  std::shared_ptr<const RelAlgNode> input) {
320  RelAlgNode::replaceInput(old_input, input);
321  RexRebindInputsVisitor rebind_inputs(old_input.get(), input.get());
322  rebind_inputs.visit(filter_.get());
323 }
324 
325 void RelCompound::replaceInput(std::shared_ptr<const RelAlgNode> old_input,
326  std::shared_ptr<const RelAlgNode> input) {
327  RelAlgNode::replaceInput(old_input, input);
328  RexRebindInputsVisitor rebind_inputs(old_input.get(), input.get());
329  for (const auto& scalar_source : scalar_sources_) {
330  rebind_inputs.visit(scalar_source.get());
331  }
332  if (filter_expr_) {
333  rebind_inputs.visit(filter_expr_.get());
334  }
335 }
336 
338  : RelAlgNode(rhs)
340  , fields_(rhs.fields_)
341  , hint_applied_(false)
342  , hints_(std::make_unique<Hints>()) {
343  RexDeepCopyVisitor copier;
344  for (auto const& expr : rhs.scalar_exprs_) {
345  scalar_exprs_.push_back(copier.visit(expr.get()));
346  }
347  if (rhs.hint_applied_) {
348  for (auto const& kv : *rhs.hints_) {
349  addHint(kv.second);
350  }
351  }
352 }
353 
355  : RelAlgNode(rhs)
356  , tuple_type_(rhs.tuple_type_)
357  , values_(RexDeepCopyVisitor::copy(rhs.values_)) {}
358 
360  RexDeepCopyVisitor copier;
361  filter_ = copier.visit(rhs.filter_.get());
362 }
363 
365  : RelAlgNode(rhs)
366  , groupby_count_(rhs.groupby_count_)
367  , fields_(rhs.fields_)
368  , hint_applied_(false)
369  , hints_(std::make_unique<Hints>()) {
370  agg_exprs_.reserve(rhs.agg_exprs_.size());
371  for (auto const& agg : rhs.agg_exprs_) {
372  agg_exprs_.push_back(agg->deepCopy());
373  }
374  if (rhs.hint_applied_) {
375  for (auto const& kv : *rhs.hints_) {
376  addHint(kv.second);
377  }
378  }
379 }
380 
382  : RelAlgNode(rhs)
383  , join_type_(rhs.join_type_)
384  , hint_applied_(false)
385  , hints_(std::make_unique<Hints>()) {
386  RexDeepCopyVisitor copier;
387  condition_ = copier.visit(rhs.condition_.get());
388  if (rhs.hint_applied_) {
389  for (auto const& kv : *rhs.hints_) {
390  addHint(kv.second);
391  }
392  }
393 }
394 
395 namespace {
396 
397 std::vector<std::unique_ptr<const RexAgg>> copyAggExprs(
398  std::vector<std::unique_ptr<const RexAgg>> const& agg_exprs) {
399  std::vector<std::unique_ptr<const RexAgg>> agg_exprs_copy;
400  agg_exprs_copy.reserve(agg_exprs.size());
401  for (auto const& agg_expr : agg_exprs) {
402  agg_exprs_copy.push_back(agg_expr->deepCopy());
403  }
404  return agg_exprs_copy;
405 }
406 
407 std::vector<std::unique_ptr<const RexScalar>> copyRexScalars(
408  std::vector<std::unique_ptr<const RexScalar>> const& scalar_sources) {
409  std::vector<std::unique_ptr<const RexScalar>> scalar_sources_copy;
410  scalar_sources_copy.reserve(scalar_sources.size());
411  RexDeepCopyVisitor copier;
412  for (auto const& scalar_source : scalar_sources) {
413  scalar_sources_copy.push_back(copier.visit(scalar_source.get()));
414  }
415  return scalar_sources_copy;
416 }
417 
418 std::vector<const Rex*> remapTargetPointers(
419  std::vector<std::unique_ptr<const RexAgg>> const& agg_exprs_new,
420  std::vector<std::unique_ptr<const RexScalar>> const& scalar_sources_new,
421  std::vector<std::unique_ptr<const RexAgg>> const& agg_exprs_old,
422  std::vector<std::unique_ptr<const RexScalar>> const& scalar_sources_old,
423  std::vector<const Rex*> const& target_exprs_old) {
424  std::vector<const Rex*> target_exprs(target_exprs_old);
425  std::unordered_map<const Rex*, const Rex*> old_to_new_target(target_exprs.size());
426  for (size_t i = 0; i < agg_exprs_new.size(); ++i) {
427  old_to_new_target.emplace(agg_exprs_old[i].get(), agg_exprs_new[i].get());
428  }
429  for (size_t i = 0; i < scalar_sources_new.size(); ++i) {
430  old_to_new_target.emplace(scalar_sources_old[i].get(), scalar_sources_new[i].get());
431  }
432  for (auto& target : target_exprs) {
433  auto target_it = old_to_new_target.find(target);
434  CHECK(target_it != old_to_new_target.end());
435  target = target_it->second;
436  }
437  return target_exprs;
438 }
439 
440 } // namespace
441 
443  : RelAlgNode(rhs)
445  , groupby_count_(rhs.groupby_count_)
446  , agg_exprs_(copyAggExprs(rhs.agg_exprs_))
447  , fields_(rhs.fields_)
448  , is_agg_(rhs.is_agg_)
449  , scalar_sources_(copyRexScalars(rhs.scalar_sources_))
450  , target_exprs_(remapTargetPointers(agg_exprs_,
451  scalar_sources_,
452  rhs.agg_exprs_,
453  rhs.scalar_sources_,
454  rhs.target_exprs_))
455  , hint_applied_(false)
456  , hints_(std::make_unique<Hints>()) {
457  RexDeepCopyVisitor copier;
458  filter_expr_ = rhs.filter_expr_ ? copier.visit(rhs.filter_expr_.get()) : nullptr;
459  if (rhs.hint_applied_) {
460  for (auto const& kv : *rhs.hints_) {
461  addHint(kv.second);
462  }
463  }
464 }
465 
466 void RelTableFunction::replaceInput(std::shared_ptr<const RelAlgNode> old_input,
467  std::shared_ptr<const RelAlgNode> input) {
468  RelAlgNode::replaceInput(old_input, input);
469  RexRebindInputsVisitor rebind_inputs(old_input.get(), input.get());
470  for (const auto& target_expr : target_exprs_) {
471  rebind_inputs.visit(target_expr.get());
472  }
473  for (const auto& func_input : table_func_inputs_) {
474  rebind_inputs.visit(func_input.get());
475  }
476 }
477 
479  int32_t literal_args = 0;
480  for (const auto& arg : table_func_inputs_) {
481  const auto rex_literal = dynamic_cast<const RexLiteral*>(arg.get());
482  if (rex_literal) {
483  literal_args += 1;
484  }
485  }
486  return literal_args;
487 }
488 
490  : RelAlgNode(rhs)
491  , function_name_(rhs.function_name_)
492  , fields_(rhs.fields_)
493  , col_inputs_(rhs.col_inputs_)
494  , table_func_inputs_(copyRexScalars(rhs.table_func_inputs_))
495  , target_exprs_(copyRexScalars(rhs.target_exprs_)) {
496  std::unordered_map<const Rex*, const Rex*> old_to_new_input;
497  for (size_t i = 0; i < table_func_inputs_.size(); ++i) {
498  old_to_new_input.emplace(rhs.table_func_inputs_[i].get(),
499  table_func_inputs_[i].get());
500  }
501  for (auto& target : col_inputs_) {
502  auto target_it = old_to_new_input.find(target);
503  CHECK(target_it != old_to_new_input.end());
504  target = target_it->second;
505  }
506 }
507 
508 namespace std {
509 template <>
510 struct hash<std::pair<const RelAlgNode*, int>> {
511  size_t operator()(const std::pair<const RelAlgNode*, int>& input_col) const {
512  auto ptr_val = reinterpret_cast<const int64_t*>(&input_col.first);
513  return static_cast<int64_t>(*ptr_val) ^ input_col.second;
514  }
515 };
516 } // namespace std
517 
518 namespace {
519 
520 std::set<std::pair<const RelAlgNode*, int>> get_equiv_cols(const RelAlgNode* node,
521  const size_t which_col) {
522  std::set<std::pair<const RelAlgNode*, int>> work_set;
523  auto walker = node;
524  auto curr_col = which_col;
525  while (true) {
526  work_set.insert(std::make_pair(walker, curr_col));
527  if (dynamic_cast<const RelScan*>(walker) || dynamic_cast<const RelJoin*>(walker)) {
528  break;
529  }
530  CHECK_EQ(size_t(1), walker->inputCount());
531  auto only_source = walker->getInput(0);
532  if (auto project = dynamic_cast<const RelProject*>(walker)) {
533  if (auto input = dynamic_cast<const RexInput*>(project->getProjectAt(curr_col))) {
534  const auto join_source = dynamic_cast<const RelJoin*>(only_source);
535  if (join_source) {
536  CHECK_EQ(size_t(2), join_source->inputCount());
537  auto lhs = join_source->getInput(0);
538  CHECK((input->getIndex() < lhs->size() && lhs == input->getSourceNode()) ||
539  join_source->getInput(1) == input->getSourceNode());
540  } else {
541  CHECK_EQ(input->getSourceNode(), only_source);
542  }
543  curr_col = input->getIndex();
544  } else {
545  break;
546  }
547  } else if (auto aggregate = dynamic_cast<const RelAggregate*>(walker)) {
548  if (curr_col >= aggregate->getGroupByCount()) {
549  break;
550  }
551  }
552  walker = only_source;
553  }
554  return work_set;
555 }
556 
557 } // namespace
558 
559 bool RelSort::hasEquivCollationOf(const RelSort& that) const {
560  if (collation_.size() != that.collation_.size()) {
561  return false;
562  }
563 
564  for (size_t i = 0, e = collation_.size(); i < e; ++i) {
565  auto this_sort_key = collation_[i];
566  auto that_sort_key = that.collation_[i];
567  if (this_sort_key.getSortDir() != that_sort_key.getSortDir()) {
568  return false;
569  }
570  if (this_sort_key.getNullsPosition() != that_sort_key.getNullsPosition()) {
571  return false;
572  }
573  auto this_equiv_keys = get_equiv_cols(this, this_sort_key.getField());
574  auto that_equiv_keys = get_equiv_cols(&that, that_sort_key.getField());
575  std::vector<std::pair<const RelAlgNode*, int>> intersect;
576  std::set_intersection(this_equiv_keys.begin(),
577  this_equiv_keys.end(),
578  that_equiv_keys.begin(),
579  that_equiv_keys.end(),
580  std::back_inserter(intersect));
581  if (intersect.empty()) {
582  return false;
583  }
584  }
585  return true;
586 }
587 
588 // class RelLogicalUnion methods
589 
591  : RelAlgNode(std::move(inputs)), is_all_(is_all) {
592  if (!g_enable_union) {
593  throw QueryNotSupported(
594  "UNION is not supported yet. There is an experimental enable-union option "
595  "available to enable UNION ALL queries.");
596  }
597  CHECK_EQ(2u, inputs_.size());
598  if (!is_all_) {
599  throw QueryNotSupported("UNION without ALL is not supported yet.");
600  }
601 }
602 
603 size_t RelLogicalUnion::size() const {
604  return inputs_.front()->size();
605 }
606 
607 std::string RelLogicalUnion::toString() const {
608  return cat(::typeName(this), "(is_all(", is_all_, "))");
609 }
610 
611 std::string RelLogicalUnion::getFieldName(const size_t i) const {
612  if (auto const* input = dynamic_cast<RelCompound const*>(inputs_[0].get())) {
613  return input->getFieldName(i);
614  } else if (auto const* input = dynamic_cast<RelProject const*>(inputs_[0].get())) {
615  return input->getFieldName(i);
616  } else if (auto const* input = dynamic_cast<RelLogicalUnion const*>(inputs_[0].get())) {
617  return input->getFieldName(i);
618  } else if (auto const* input = dynamic_cast<RelAggregate const*>(inputs_[0].get())) {
619  return input->getFieldName(i);
620  } else if (auto const* input = dynamic_cast<RelScan const*>(inputs_[0].get())) {
621  return input->getFieldName(i);
622  } else if (auto const* input =
623  dynamic_cast<RelTableFunction const*>(inputs_[0].get())) {
624  return input->getFieldName(i);
625  }
626  UNREACHABLE() << "Unhandled input type: " << inputs_.front()->toString();
627  return {};
628 }
629 
631  std::vector<TargetMetaInfo> const& tmis0 = inputs_[0]->getOutputMetainfo();
632  std::vector<TargetMetaInfo> const& tmis1 = inputs_[1]->getOutputMetainfo();
633  if (tmis0.size() != tmis1.size()) {
634  VLOG(2) << "tmis0.size() = " << tmis0.size() << " != " << tmis1.size()
635  << " = tmis1.size()";
636  throw std::runtime_error("Subqueries of a UNION must have matching data types.");
637  }
638  for (size_t i = 0; i < tmis0.size(); ++i) {
639  if (tmis0[i].get_type_info() != tmis1[i].get_type_info()) {
640  SQLTypeInfo const& ti0 = tmis0[i].get_type_info();
641  SQLTypeInfo const& ti1 = tmis1[i].get_type_info();
642  VLOG(2) << "Types do not match for UNION:\n tmis0[" << i
643  << "].get_type_info().to_string() = " << ti0.to_string() << "\n tmis1["
644  << i << "].get_type_info().to_string() = " << ti1.to_string();
645  if (ti0.is_dict_encoded_string() && ti1.is_dict_encoded_string() &&
646  ti0.get_comp_param() != ti1.get_comp_param()) {
647  throw std::runtime_error(
648  "Taking the UNION of different text-encoded dictionaries is not yet "
649  "supported. This may be resolved by using shared dictionaries. For example, "
650  "by making one a shared dictionary reference to the other.");
651  } else {
652  throw std::runtime_error(
653  "Subqueries of a UNION must have the exact same data types.");
654  }
655  }
656  }
657 }
658 
659 // Rest of code requires a raw pointer, but RexInput object needs to live somewhere.
661  size_t input_idx) const {
662  if (auto const* rex_input_ptr = dynamic_cast<RexInput const*>(rex_scalar)) {
663  RexInput rex_input(*rex_input_ptr);
664  rex_input.setSourceNode(getInput(input_idx));
665  scalar_exprs_.emplace_back(std::make_shared<RexInput const>(std::move(rex_input)));
666  return scalar_exprs_.back().get();
667  }
668  return rex_scalar;
669 }
670 
671 namespace {
672 
673 unsigned node_id(const rapidjson::Value& ra_node) noexcept {
674  const auto& id = field(ra_node, "id");
675  return std::stoi(json_str(id));
676 }
677 
678 std::string json_node_to_string(const rapidjson::Value& node) noexcept {
679  rapidjson::StringBuffer buffer;
680  rapidjson::Writer<rapidjson::StringBuffer> writer(buffer);
681  node.Accept(writer);
682  return buffer.GetString();
683 }
684 
685 // The parse_* functions below de-serialize expressions as they come from Calcite.
686 // RelAlgDagBuilder will take care of making the representation easy to
687 // navigate for lower layers, for example by replacing RexAbstractInput with RexInput.
688 
689 std::unique_ptr<RexAbstractInput> parse_abstract_input(
690  const rapidjson::Value& expr) noexcept {
691  const auto& input = field(expr, "input");
692  return std::unique_ptr<RexAbstractInput>(new RexAbstractInput(json_i64(input)));
693 }
694 
695 std::unique_ptr<RexLiteral> parse_literal(const rapidjson::Value& expr) {
696  CHECK(expr.IsObject());
697  const auto& literal = field(expr, "literal");
698  const auto type = to_sql_type(json_str(field(expr, "type")));
699  const auto target_type = to_sql_type(json_str(field(expr, "target_type")));
700  const auto scale = json_i64(field(expr, "scale"));
701  const auto precision = json_i64(field(expr, "precision"));
702  const auto type_scale = json_i64(field(expr, "type_scale"));
703  const auto type_precision = json_i64(field(expr, "type_precision"));
704  if (literal.IsNull()) {
705  return std::unique_ptr<RexLiteral>(new RexLiteral(target_type));
706  }
707  switch (type) {
708  case kINT:
709  case kBIGINT:
710  case kDECIMAL:
711  case kINTERVAL_DAY_TIME:
713  case kTIME:
714  case kTIMESTAMP:
715  case kDATE:
716  return std::unique_ptr<RexLiteral>(new RexLiteral(json_i64(literal),
717  type,
718  target_type,
719  scale,
720  precision,
721  type_scale,
722  type_precision));
723  case kDOUBLE: {
724  if (literal.IsDouble()) {
725  return std::unique_ptr<RexLiteral>(new RexLiteral(json_double(literal),
726  type,
727  target_type,
728  scale,
729  precision,
730  type_scale,
731  type_precision));
732  } else if (literal.IsInt64()) {
733  return std::make_unique<RexLiteral>(static_cast<double>(literal.GetInt64()),
734  type,
735  target_type,
736  scale,
737  precision,
738  type_scale,
739  type_precision);
740 
741  } else if (literal.IsUint64()) {
742  return std::make_unique<RexLiteral>(static_cast<double>(literal.GetUint64()),
743  type,
744  target_type,
745  scale,
746  precision,
747  type_scale,
748  type_precision);
749  }
750  UNREACHABLE() << "Unhandled type: " << literal.GetType();
751  }
752  case kTEXT:
753  return std::unique_ptr<RexLiteral>(new RexLiteral(json_str(literal),
754  type,
755  target_type,
756  scale,
757  precision,
758  type_scale,
759  type_precision));
760  case kBOOLEAN:
761  return std::unique_ptr<RexLiteral>(new RexLiteral(json_bool(literal),
762  type,
763  target_type,
764  scale,
765  precision,
766  type_scale,
767  type_precision));
768  case kNULLT:
769  return std::unique_ptr<RexLiteral>(new RexLiteral(target_type));
770  default:
771  CHECK(false);
772  }
773  CHECK(false);
774  return nullptr;
775 }
776 
777 std::unique_ptr<const RexScalar> parse_scalar_expr(const rapidjson::Value& expr,
779  RelAlgDagBuilder& root_dag_builder);
780 
781 SQLTypeInfo parse_type(const rapidjson::Value& type_obj) {
782  if (type_obj.IsArray()) {
783  throw QueryNotSupported("Composite types are not currently supported.");
784  }
785  CHECK(type_obj.IsObject() && type_obj.MemberCount() >= 2)
786  << json_node_to_string(type_obj);
787  const auto type = to_sql_type(json_str(field(type_obj, "type")));
788  const auto nullable = json_bool(field(type_obj, "nullable"));
789  const auto precision_it = type_obj.FindMember("precision");
790  const int precision =
791  precision_it != type_obj.MemberEnd() ? json_i64(precision_it->value) : 0;
792  const auto scale_it = type_obj.FindMember("scale");
793  const int scale = scale_it != type_obj.MemberEnd() ? json_i64(scale_it->value) : 0;
794  SQLTypeInfo ti(type, !nullable);
795  ti.set_precision(precision);
796  ti.set_scale(scale);
797  return ti;
798 }
799 
800 std::vector<std::unique_ptr<const RexScalar>> parse_expr_array(
801  const rapidjson::Value& arr,
803  RelAlgDagBuilder& root_dag_builder) {
804  std::vector<std::unique_ptr<const RexScalar>> exprs;
805  for (auto it = arr.Begin(); it != arr.End(); ++it) {
806  exprs.emplace_back(parse_scalar_expr(*it, cat, root_dag_builder));
807  }
808  return exprs;
809 }
810 
812  if (name == "ROW_NUMBER") {
814  }
815  if (name == "RANK") {
817  }
818  if (name == "DENSE_RANK") {
820  }
821  if (name == "PERCENT_RANK") {
823  }
824  if (name == "CUME_DIST") {
826  }
827  if (name == "NTILE") {
829  }
830  if (name == "LAG") {
832  }
833  if (name == "LEAD") {
835  }
836  if (name == "FIRST_VALUE") {
838  }
839  if (name == "LAST_VALUE") {
841  }
842  if (name == "AVG") {
844  }
845  if (name == "MIN") {
847  }
848  if (name == "MAX") {
850  }
851  if (name == "SUM") {
853  }
854  if (name == "COUNT") {
856  }
857  if (name == "$SUM0") {
859  }
860  throw std::runtime_error("Unsupported window function: " + name);
861 }
862 
863 std::vector<std::unique_ptr<const RexScalar>> parse_window_order_exprs(
864  const rapidjson::Value& arr,
866  RelAlgDagBuilder& root_dag_builder) {
867  std::vector<std::unique_ptr<const RexScalar>> exprs;
868  for (auto it = arr.Begin(); it != arr.End(); ++it) {
869  exprs.emplace_back(parse_scalar_expr(field(*it, "field"), cat, root_dag_builder));
870  }
871  return exprs;
872 }
873 
874 SortDirection parse_sort_direction(const rapidjson::Value& collation) {
875  return json_str(field(collation, "direction")) == std::string("DESCENDING")
878 }
879 
880 NullSortedPosition parse_nulls_position(const rapidjson::Value& collation) {
881  return json_str(field(collation, "nulls")) == std::string("FIRST")
884 }
885 
886 std::vector<SortField> parse_window_order_collation(const rapidjson::Value& arr,
888  RelAlgDagBuilder& root_dag_builder) {
889  std::vector<SortField> collation;
890  size_t field_idx = 0;
891  for (auto it = arr.Begin(); it != arr.End(); ++it, ++field_idx) {
892  const auto sort_dir = parse_sort_direction(*it);
893  const auto null_pos = parse_nulls_position(*it);
894  collation.emplace_back(field_idx, sort_dir, null_pos);
895  }
896  return collation;
897 }
898 
900  const rapidjson::Value& window_bound_obj,
902  RelAlgDagBuilder& root_dag_builder) {
903  CHECK(window_bound_obj.IsObject());
905  window_bound.unbounded = json_bool(field(window_bound_obj, "unbounded"));
906  window_bound.preceding = json_bool(field(window_bound_obj, "preceding"));
907  window_bound.following = json_bool(field(window_bound_obj, "following"));
908  window_bound.is_current_row = json_bool(field(window_bound_obj, "is_current_row"));
909  const auto& offset_field = field(window_bound_obj, "offset");
910  if (offset_field.IsObject()) {
911  window_bound.offset = parse_scalar_expr(offset_field, cat, root_dag_builder);
912  } else {
913  CHECK(offset_field.IsNull());
914  }
915  window_bound.order_key = json_i64(field(window_bound_obj, "order_key"));
916  return window_bound;
917 }
918 
919 std::unique_ptr<const RexSubQuery> parse_subquery(const rapidjson::Value& expr,
921  RelAlgDagBuilder& root_dag_builder) {
922  const auto& operands = field(expr, "operands");
923  CHECK(operands.IsArray());
924  CHECK_GE(operands.Size(), unsigned(0));
925  const auto& subquery_ast = field(expr, "subquery");
926 
927  RelAlgDagBuilder subquery_dag(root_dag_builder, subquery_ast, cat, nullptr);
928  auto subquery = std::make_shared<RexSubQuery>(subquery_dag.getRootNodeShPtr());
929  root_dag_builder.registerSubquery(subquery);
930  return subquery->deepCopy();
931 }
932 
933 std::unique_ptr<RexOperator> parse_operator(const rapidjson::Value& expr,
935  RelAlgDagBuilder& root_dag_builder) {
936  const auto op_name = json_str(field(expr, "op"));
937  const bool is_quantifier =
938  op_name == std::string("PG_ANY") || op_name == std::string("PG_ALL");
939  const auto op = is_quantifier ? kFUNCTION : to_sql_op(op_name);
940  const auto& operators_json_arr = field(expr, "operands");
941  CHECK(operators_json_arr.IsArray());
942  auto operands = parse_expr_array(operators_json_arr, cat, root_dag_builder);
943  const auto type_it = expr.FindMember("type");
944  CHECK(type_it != expr.MemberEnd());
945  auto ti = parse_type(type_it->value);
946  if (op == kIN && expr.HasMember("subquery")) {
947  auto subquery = parse_subquery(expr, cat, root_dag_builder);
948  operands.emplace_back(std::move(subquery));
949  }
950  if (expr.FindMember("partition_keys") != expr.MemberEnd()) {
951  const auto& partition_keys_arr = field(expr, "partition_keys");
952  auto partition_keys = parse_expr_array(partition_keys_arr, cat, root_dag_builder);
953  const auto& order_keys_arr = field(expr, "order_keys");
954  auto order_keys = parse_window_order_exprs(order_keys_arr, cat, root_dag_builder);
955  const auto collation =
956  parse_window_order_collation(order_keys_arr, cat, root_dag_builder);
957  const auto kind = parse_window_function_kind(op_name);
958  const auto lower_bound =
959  parse_window_bound(field(expr, "lower_bound"), cat, root_dag_builder);
960  const auto upper_bound =
961  parse_window_bound(field(expr, "upper_bound"), cat, root_dag_builder);
962  bool is_rows = json_bool(field(expr, "is_rows"));
963  ti.set_notnull(false);
964  return std::make_unique<RexWindowFunctionOperator>(kind,
965  operands,
966  partition_keys,
967  order_keys,
968  collation,
969  lower_bound,
970  upper_bound,
971  is_rows,
972  ti);
973  }
974  return std::unique_ptr<RexOperator>(op == kFUNCTION
975  ? new RexFunctionOperator(op_name, operands, ti)
976  : new RexOperator(op, operands, ti));
977 }
978 
979 std::unique_ptr<RexCase> parse_case(const rapidjson::Value& expr,
981  RelAlgDagBuilder& root_dag_builder) {
982  const auto& operands = field(expr, "operands");
983  CHECK(operands.IsArray());
984  CHECK_GE(operands.Size(), unsigned(2));
985  std::unique_ptr<const RexScalar> else_expr;
986  std::vector<
987  std::pair<std::unique_ptr<const RexScalar>, std::unique_ptr<const RexScalar>>>
988  expr_pair_list;
989  for (auto operands_it = operands.Begin(); operands_it != operands.End();) {
990  auto when_expr = parse_scalar_expr(*operands_it++, cat, root_dag_builder);
991  if (operands_it == operands.End()) {
992  else_expr = std::move(when_expr);
993  break;
994  }
995  auto then_expr = parse_scalar_expr(*operands_it++, cat, root_dag_builder);
996  expr_pair_list.emplace_back(std::move(when_expr), std::move(then_expr));
997  }
998  return std::unique_ptr<RexCase>(new RexCase(expr_pair_list, else_expr));
999 }
1000 
1001 std::vector<std::string> strings_from_json_array(
1002  const rapidjson::Value& json_str_arr) noexcept {
1003  CHECK(json_str_arr.IsArray());
1004  std::vector<std::string> fields;
1005  for (auto json_str_arr_it = json_str_arr.Begin(); json_str_arr_it != json_str_arr.End();
1006  ++json_str_arr_it) {
1007  CHECK(json_str_arr_it->IsString());
1008  fields.emplace_back(json_str_arr_it->GetString());
1009  }
1010  return fields;
1011 }
1012 
1013 std::vector<size_t> indices_from_json_array(
1014  const rapidjson::Value& json_idx_arr) noexcept {
1015  CHECK(json_idx_arr.IsArray());
1016  std::vector<size_t> indices;
1017  for (auto json_idx_arr_it = json_idx_arr.Begin(); json_idx_arr_it != json_idx_arr.End();
1018  ++json_idx_arr_it) {
1019  CHECK(json_idx_arr_it->IsInt());
1020  CHECK_GE(json_idx_arr_it->GetInt(), 0);
1021  indices.emplace_back(json_idx_arr_it->GetInt());
1022  }
1023  return indices;
1024 }
1025 
1026 std::unique_ptr<const RexAgg> parse_aggregate_expr(const rapidjson::Value& expr) {
1027  const auto agg = to_agg_kind(json_str(field(expr, "agg")));
1028  const auto distinct = json_bool(field(expr, "distinct"));
1029  const auto agg_ti = parse_type(field(expr, "type"));
1030  const auto operands = indices_from_json_array(field(expr, "operands"));
1031  if (operands.size() > 1 && (operands.size() != 2 || agg != kAPPROX_COUNT_DISTINCT)) {
1032  throw QueryNotSupported("Multiple arguments for aggregates aren't supported");
1033  }
1034  return std::unique_ptr<const RexAgg>(new RexAgg(agg, distinct, agg_ti, operands));
1035 }
1036 
1037 std::unique_ptr<const RexScalar> parse_scalar_expr(const rapidjson::Value& expr,
1039  RelAlgDagBuilder& root_dag_builder) {
1040  CHECK(expr.IsObject());
1041  if (expr.IsObject() && expr.HasMember("input")) {
1042  return std::unique_ptr<const RexScalar>(parse_abstract_input(expr));
1043  }
1044  if (expr.IsObject() && expr.HasMember("literal")) {
1045  return std::unique_ptr<const RexScalar>(parse_literal(expr));
1046  }
1047  if (expr.IsObject() && expr.HasMember("op")) {
1048  const auto op_str = json_str(field(expr, "op"));
1049  if (op_str == std::string("CASE")) {
1050  return std::unique_ptr<const RexScalar>(parse_case(expr, cat, root_dag_builder));
1051  }
1052  if (op_str == std::string("$SCALAR_QUERY")) {
1053  return std::unique_ptr<const RexScalar>(
1054  parse_subquery(expr, cat, root_dag_builder));
1055  }
1056  return std::unique_ptr<const RexScalar>(parse_operator(expr, cat, root_dag_builder));
1057  }
1058  throw QueryNotSupported("Expression node " + json_node_to_string(expr) +
1059  " not supported");
1060 }
1061 
1062 JoinType to_join_type(const std::string& join_type_name) {
1063  if (join_type_name == "inner") {
1064  return JoinType::INNER;
1065  }
1066  if (join_type_name == "left") {
1067  return JoinType::LEFT;
1068  }
1069  throw QueryNotSupported("Join type (" + join_type_name + ") not supported");
1070 }
1071 
1072 std::unique_ptr<const RexScalar> disambiguate_rex(const RexScalar*, const RANodeOutput&);
1073 
1074 std::unique_ptr<const RexOperator> disambiguate_operator(
1075  const RexOperator* rex_operator,
1076  const RANodeOutput& ra_output) noexcept {
1077  std::vector<std::unique_ptr<const RexScalar>> disambiguated_operands;
1078  for (size_t i = 0; i < rex_operator->size(); ++i) {
1079  auto operand = rex_operator->getOperand(i);
1080  if (dynamic_cast<const RexSubQuery*>(operand)) {
1081  disambiguated_operands.emplace_back(rex_operator->getOperandAndRelease(i));
1082  } else {
1083  disambiguated_operands.emplace_back(disambiguate_rex(operand, ra_output));
1084  }
1085  }
1086  const auto rex_window_function_operator =
1087  dynamic_cast<const RexWindowFunctionOperator*>(rex_operator);
1088  if (rex_window_function_operator) {
1089  const auto& partition_keys = rex_window_function_operator->getPartitionKeys();
1090  std::vector<std::unique_ptr<const RexScalar>> disambiguated_partition_keys;
1091  for (const auto& partition_key : partition_keys) {
1092  disambiguated_partition_keys.emplace_back(
1093  disambiguate_rex(partition_key.get(), ra_output));
1094  }
1095  std::vector<std::unique_ptr<const RexScalar>> disambiguated_order_keys;
1096  const auto& order_keys = rex_window_function_operator->getOrderKeys();
1097  for (const auto& order_key : order_keys) {
1098  disambiguated_order_keys.emplace_back(disambiguate_rex(order_key.get(), ra_output));
1099  }
1100  return rex_window_function_operator->disambiguatedOperands(
1101  disambiguated_operands,
1102  disambiguated_partition_keys,
1103  disambiguated_order_keys,
1104  rex_window_function_operator->getCollation());
1105  }
1106  return rex_operator->getDisambiguated(disambiguated_operands);
1107 }
1108 
1109 std::unique_ptr<const RexCase> disambiguate_case(const RexCase* rex_case,
1110  const RANodeOutput& ra_output) {
1111  std::vector<
1112  std::pair<std::unique_ptr<const RexScalar>, std::unique_ptr<const RexScalar>>>
1113  disambiguated_expr_pair_list;
1114  for (size_t i = 0; i < rex_case->branchCount(); ++i) {
1115  auto disambiguated_when = disambiguate_rex(rex_case->getWhen(i), ra_output);
1116  auto disambiguated_then = disambiguate_rex(rex_case->getThen(i), ra_output);
1117  disambiguated_expr_pair_list.emplace_back(std::move(disambiguated_when),
1118  std::move(disambiguated_then));
1119  }
1120  std::unique_ptr<const RexScalar> disambiguated_else{
1121  disambiguate_rex(rex_case->getElse(), ra_output)};
1122  return std::unique_ptr<const RexCase>(
1123  new RexCase(disambiguated_expr_pair_list, disambiguated_else));
1124 }
1125 
1126 // The inputs used by scalar expressions are given as indices in the serialized
1127 // representation of the query. This is hard to navigate; make the relationship
1128 // explicit by creating RexInput expressions which hold a pointer to the source
1129 // relational algebra node and the index relative to the output of that node.
1130 std::unique_ptr<const RexScalar> disambiguate_rex(const RexScalar* rex_scalar,
1131  const RANodeOutput& ra_output) {
1132  const auto rex_abstract_input = dynamic_cast<const RexAbstractInput*>(rex_scalar);
1133  if (rex_abstract_input) {
1134  CHECK_LT(static_cast<size_t>(rex_abstract_input->getIndex()), ra_output.size());
1135  return std::unique_ptr<const RexInput>(
1136  new RexInput(ra_output[rex_abstract_input->getIndex()]));
1137  }
1138  const auto rex_operator = dynamic_cast<const RexOperator*>(rex_scalar);
1139  if (rex_operator) {
1140  return disambiguate_operator(rex_operator, ra_output);
1141  }
1142  const auto rex_case = dynamic_cast<const RexCase*>(rex_scalar);
1143  if (rex_case) {
1144  return disambiguate_case(rex_case, ra_output);
1145  }
1146  const auto rex_literal = dynamic_cast<const RexLiteral*>(rex_scalar);
1147  CHECK(rex_literal);
1148  return std::unique_ptr<const RexLiteral>(new RexLiteral(*rex_literal));
1149 }
1150 
1151 void bind_project_to_input(RelProject* project_node, const RANodeOutput& input) noexcept {
1152  CHECK_EQ(size_t(1), project_node->inputCount());
1153  std::vector<std::unique_ptr<const RexScalar>> disambiguated_exprs;
1154  for (size_t i = 0; i < project_node->size(); ++i) {
1155  const auto projected_expr = project_node->getProjectAt(i);
1156  if (dynamic_cast<const RexSubQuery*>(projected_expr)) {
1157  disambiguated_exprs.emplace_back(project_node->getProjectAtAndRelease(i));
1158  } else {
1159  disambiguated_exprs.emplace_back(disambiguate_rex(projected_expr, input));
1160  }
1161  }
1162  project_node->setExpressions(disambiguated_exprs);
1163 }
1164 
1166  const RANodeOutput& input) noexcept {
1167  std::vector<std::unique_ptr<const RexScalar>> disambiguated_exprs;
1168  for (size_t i = 0; i < table_func_node->getTableFuncInputsSize(); ++i) {
1169  const auto target_expr = table_func_node->getTableFuncInputAt(i);
1170  if (dynamic_cast<const RexSubQuery*>(target_expr)) {
1171  disambiguated_exprs.emplace_back(table_func_node->getTableFuncInputAtAndRelease(i));
1172  } else {
1173  disambiguated_exprs.emplace_back(disambiguate_rex(target_expr, input));
1174  }
1175  }
1176  table_func_node->setTableFuncInputs(disambiguated_exprs);
1177 }
1178 
1179 void bind_inputs(const std::vector<std::shared_ptr<RelAlgNode>>& nodes) noexcept {
1180  for (auto ra_node : nodes) {
1181  const auto filter_node = std::dynamic_pointer_cast<RelFilter>(ra_node);
1182  if (filter_node) {
1183  CHECK_EQ(size_t(1), filter_node->inputCount());
1184  auto disambiguated_condition = disambiguate_rex(
1185  filter_node->getCondition(), get_node_output(filter_node->getInput(0)));
1186  filter_node->setCondition(disambiguated_condition);
1187  continue;
1188  }
1189  const auto join_node = std::dynamic_pointer_cast<RelJoin>(ra_node);
1190  if (join_node) {
1191  CHECK_EQ(size_t(2), join_node->inputCount());
1192  auto disambiguated_condition =
1193  disambiguate_rex(join_node->getCondition(), get_node_output(join_node.get()));
1194  join_node->setCondition(disambiguated_condition);
1195  continue;
1196  }
1197  const auto project_node = std::dynamic_pointer_cast<RelProject>(ra_node);
1198  if (project_node) {
1199  bind_project_to_input(project_node.get(),
1200  get_node_output(project_node->getInput(0)));
1201  continue;
1202  }
1203  const auto table_func_node = std::dynamic_pointer_cast<RelTableFunction>(ra_node);
1204  if (table_func_node) {
1205  /*
1206  Collect all inputs from table function input (non-literal)
1207  arguments.
1208  */
1209  RANodeOutput input;
1210  input.reserve(table_func_node->inputCount());
1211  for (size_t i = 0; i < table_func_node->inputCount(); i++) {
1212  auto node_output = get_node_output(table_func_node->getInput(i));
1213  input.insert(input.end(), node_output.begin(), node_output.end());
1214  }
1215  bind_table_func_to_input(table_func_node.get(), input);
1216  }
1217  }
1218 }
1219 
1220 void handleQueryHint(const std::vector<std::shared_ptr<RelAlgNode>>& nodes,
1221  RelAlgDagBuilder* dag_builder) noexcept {
1222  Hints* hint_delivered = nullptr;
1223  for (auto node : nodes) {
1224  const auto agg_node = std::dynamic_pointer_cast<RelAggregate>(node);
1225  if (agg_node) {
1226  if (agg_node->hasDeliveredHint()) {
1227  hint_delivered = agg_node->getDeliveredHints();
1228  break;
1229  }
1230  }
1231  const auto project_node = std::dynamic_pointer_cast<RelProject>(node);
1232  if (project_node) {
1233  if (project_node->hasDeliveredHint()) {
1234  hint_delivered = project_node->getDeliveredHints();
1235  break;
1236  }
1237  }
1238  const auto scan_node = std::dynamic_pointer_cast<RelScan>(node);
1239  if (scan_node) {
1240  if (scan_node->hasDeliveredHint()) {
1241  hint_delivered = scan_node->getDeliveredHints();
1242  break;
1243  }
1244  }
1245  const auto join_node = std::dynamic_pointer_cast<RelJoin>(node);
1246  if (join_node) {
1247  if (join_node->hasDeliveredHint()) {
1248  hint_delivered = join_node->getDeliveredHints();
1249  break;
1250  }
1251  }
1252  const auto compound_node = std::dynamic_pointer_cast<RelCompound>(node);
1253  if (compound_node) {
1254  if (compound_node->hasDeliveredHint()) {
1255  hint_delivered = compound_node->getDeliveredHints();
1256  break;
1257  }
1258  }
1259  }
1260  if (hint_delivered && !hint_delivered->empty()) {
1261  dag_builder->registerQueryHints(hint_delivered);
1262  }
1263 }
1264 
1265 void mark_nops(const std::vector<std::shared_ptr<RelAlgNode>>& nodes) noexcept {
1266  for (auto node : nodes) {
1267  const auto agg_node = std::dynamic_pointer_cast<RelAggregate>(node);
1268  if (!agg_node || agg_node->getAggExprsCount()) {
1269  continue;
1270  }
1271  CHECK_EQ(size_t(1), node->inputCount());
1272  const auto agg_input_node = dynamic_cast<const RelAggregate*>(node->getInput(0));
1273  if (agg_input_node && !agg_input_node->getAggExprsCount() &&
1274  agg_node->getGroupByCount() == agg_input_node->getGroupByCount()) {
1275  agg_node->markAsNop();
1276  }
1277  }
1278 }
1279 
1280 namespace {
1281 
1282 std::vector<const Rex*> reproject_targets(
1283  const RelProject* simple_project,
1284  const std::vector<const Rex*>& target_exprs) noexcept {
1285  std::vector<const Rex*> result;
1286  for (size_t i = 0; i < simple_project->size(); ++i) {
1287  const auto input_rex = dynamic_cast<const RexInput*>(simple_project->getProjectAt(i));
1288  CHECK(input_rex);
1289  CHECK_LT(static_cast<size_t>(input_rex->getIndex()), target_exprs.size());
1290  result.push_back(target_exprs[input_rex->getIndex()]);
1291  }
1292  return result;
1293 }
1294 
1301  public:
1303  const RelAlgNode* node_to_keep,
1304  const std::vector<std::unique_ptr<const RexScalar>>& scalar_sources)
1305  : node_to_keep_(node_to_keep), scalar_sources_(scalar_sources) {}
1306 
1307  // Reproject the RexInput from its current RA Node to the RA Node we intend to keep
1308  RetType visitInput(const RexInput* input) const final {
1309  if (input->getSourceNode() == node_to_keep_) {
1310  const auto index = input->getIndex();
1311  CHECK_LT(index, scalar_sources_.size());
1312  return visit(scalar_sources_[index].get());
1313  } else {
1314  return input->deepCopy();
1315  }
1316  }
1317 
1318  private:
1320  const std::vector<std::unique_ptr<const RexScalar>>& scalar_sources_;
1321 };
1322 
1323 } // namespace
1324 
1325 void create_compound(std::vector<std::shared_ptr<RelAlgNode>>& nodes,
1326  const std::vector<size_t>& pattern) noexcept {
1327  CHECK_GE(pattern.size(), size_t(2));
1328  CHECK_LE(pattern.size(), size_t(4));
1329 
1330  std::unique_ptr<const RexScalar> filter_rex;
1331  std::vector<std::unique_ptr<const RexScalar>> scalar_sources;
1332  size_t groupby_count{0};
1333  std::vector<std::string> fields;
1334  std::vector<const RexAgg*> agg_exprs;
1335  std::vector<const Rex*> target_exprs;
1336  bool first_project{true};
1337  bool is_agg{false};
1338  RelAlgNode* last_node{nullptr};
1339 
1340  std::shared_ptr<ModifyManipulationTarget> manipulation_target;
1341 
1342  for (const auto node_idx : pattern) {
1343  const auto ra_node = nodes[node_idx];
1344  const auto ra_filter = std::dynamic_pointer_cast<RelFilter>(ra_node);
1345  if (ra_filter) {
1346  CHECK(!filter_rex);
1347  filter_rex.reset(ra_filter->getAndReleaseCondition());
1348  CHECK(filter_rex);
1349  last_node = ra_node.get();
1350  continue;
1351  }
1352  const auto ra_project = std::dynamic_pointer_cast<RelProject>(ra_node);
1353  if (ra_project) {
1354  fields = ra_project->getFields();
1355  manipulation_target = ra_project;
1356 
1357  if (first_project) {
1358  CHECK_EQ(size_t(1), ra_project->inputCount());
1359  // Rebind the input of the project to the input of the filter itself
1360  // since we know that we'll evaluate the filter on the fly, with no
1361  // intermediate buffer.
1362  const auto filter_input = dynamic_cast<const RelFilter*>(ra_project->getInput(0));
1363  if (filter_input) {
1364  CHECK_EQ(size_t(1), filter_input->inputCount());
1365  bind_project_to_input(ra_project.get(),
1366  get_node_output(filter_input->getInput(0)));
1367  }
1368  scalar_sources = ra_project->getExpressionsAndRelease();
1369  for (const auto& scalar_expr : scalar_sources) {
1370  target_exprs.push_back(scalar_expr.get());
1371  }
1372  first_project = false;
1373  } else {
1374  if (ra_project->isSimple()) {
1375  target_exprs = reproject_targets(ra_project.get(), target_exprs);
1376  } else {
1377  // TODO(adb): This is essentially a more general case of simple project, we
1378  // could likely merge the two
1379  std::vector<const Rex*> result;
1380  RexInputReplacementVisitor visitor(last_node, scalar_sources);
1381  for (size_t i = 0; i < ra_project->size(); ++i) {
1382  const auto rex = ra_project->getProjectAt(i);
1383  if (auto rex_input = dynamic_cast<const RexInput*>(rex)) {
1384  const auto index = rex_input->getIndex();
1385  CHECK_LT(index, target_exprs.size());
1386  result.push_back(target_exprs[index]);
1387  } else {
1388  scalar_sources.push_back(visitor.visit(rex));
1389  result.push_back(scalar_sources.back().get());
1390  }
1391  }
1392  target_exprs = result;
1393  }
1394  }
1395  last_node = ra_node.get();
1396  continue;
1397  }
1398  const auto ra_aggregate = std::dynamic_pointer_cast<RelAggregate>(ra_node);
1399  if (ra_aggregate) {
1400  is_agg = true;
1401  fields = ra_aggregate->getFields();
1402  agg_exprs = ra_aggregate->getAggregatesAndRelease();
1403  groupby_count = ra_aggregate->getGroupByCount();
1404  decltype(target_exprs){}.swap(target_exprs);
1405  CHECK_LE(groupby_count, scalar_sources.size());
1406  for (size_t group_idx = 0; group_idx < groupby_count; ++group_idx) {
1407  const auto rex_ref = new RexRef(group_idx + 1);
1408  target_exprs.push_back(rex_ref);
1409  scalar_sources.emplace_back(rex_ref);
1410  }
1411  for (const auto rex_agg : agg_exprs) {
1412  target_exprs.push_back(rex_agg);
1413  }
1414  last_node = ra_node.get();
1415  continue;
1416  }
1417  }
1418 
1419  auto compound_node =
1420  std::make_shared<RelCompound>(filter_rex,
1421  target_exprs,
1422  groupby_count,
1423  agg_exprs,
1424  fields,
1425  scalar_sources,
1426  is_agg,
1427  manipulation_target->isUpdateViaSelect(),
1428  manipulation_target->isDeleteViaSelect(),
1429  manipulation_target->isVarlenUpdateRequired(),
1430  manipulation_target->getModifiedTableDescriptor(),
1431  manipulation_target->getTargetColumns());
1432  auto old_node = nodes[pattern.back()];
1433  nodes[pattern.back()] = compound_node;
1434  auto first_node = nodes[pattern.front()];
1435  CHECK_EQ(size_t(1), first_node->inputCount());
1436  compound_node->addManagedInput(first_node->getAndOwnInput(0));
1437  for (size_t i = 0; i < pattern.size() - 1; ++i) {
1438  nodes[pattern[i]].reset();
1439  }
1440  for (auto node : nodes) {
1441  if (!node) {
1442  continue;
1443  }
1444  node->replaceInput(old_node, compound_node);
1445  }
1446 }
1447 
1448 class RANodeIterator : public std::vector<std::shared_ptr<RelAlgNode>>::const_iterator {
1449  using ElementType = std::shared_ptr<RelAlgNode>;
1450  using Super = std::vector<ElementType>::const_iterator;
1451  using Container = std::vector<ElementType>;
1452 
1453  public:
1454  enum class AdvancingMode { DUChain, InOrder };
1455 
1456  explicit RANodeIterator(const Container& nodes)
1457  : Super(nodes.begin()), owner_(nodes), nodeCount_([&nodes]() -> size_t {
1458  size_t non_zero_count = 0;
1459  for (const auto& node : nodes) {
1460  if (node) {
1461  ++non_zero_count;
1462  }
1463  }
1465  }()) {}
1466 
1467  explicit operator size_t() {
1468  return std::distance(owner_.begin(), *static_cast<Super*>(this));
1469  }
1470 
1471  RANodeIterator operator++() = delete;
1472 
1473  void advance(AdvancingMode mode) {
1474  Super& super = *this;
1475  switch (mode) {
1476  case AdvancingMode::DUChain: {
1477  size_t use_count = 0;
1478  Super only_use = owner_.end();
1479  for (Super nodeIt = std::next(super); nodeIt != owner_.end(); ++nodeIt) {
1480  if (!*nodeIt) {
1481  continue;
1482  }
1483  for (size_t i = 0; i < (*nodeIt)->inputCount(); ++i) {
1484  if ((*super) == (*nodeIt)->getAndOwnInput(i)) {
1485  ++use_count;
1486  if (1 == use_count) {
1487  only_use = nodeIt;
1488  } else {
1489  super = owner_.end();
1490  return;
1491  }
1492  }
1493  }
1494  }
1495  super = only_use;
1496  break;
1497  }
1498  case AdvancingMode::InOrder:
1499  for (size_t i = 0; i != owner_.size(); ++i) {
1500  if (!visited_.count(i)) {
1501  super = owner_.begin();
1502  std::advance(super, i);
1503  return;
1504  }
1505  }
1506  super = owner_.end();
1507  break;
1508  default:
1509  CHECK(false);
1510  }
1511  }
1512 
1513  bool allVisited() { return visited_.size() == nodeCount_; }
1514 
1516  visited_.insert(size_t(*this));
1517  Super& super = *this;
1518  return *super;
1519  }
1520 
1521  const ElementType* operator->() { return &(operator*()); }
1522 
1523  private:
1525  const size_t nodeCount_;
1526  std::unordered_set<size_t> visited_;
1527 };
1528 
1529 namespace {
1530 
1531 bool input_can_be_coalesced(const RelAlgNode* parent_node,
1532  const size_t index,
1533  const bool first_rex_is_input) {
1534  if (auto agg_node = dynamic_cast<const RelAggregate*>(parent_node)) {
1535  if (index == 0 && agg_node->getGroupByCount() > 0) {
1536  return true;
1537  } else {
1538  // Is an aggregated target, only allow the project to be elided if the aggregate
1539  // target is simply passed through (i.e. if the top level expression attached to
1540  // the project node is a RexInput expression)
1541  return first_rex_is_input;
1542  }
1543  }
1544  return first_rex_is_input;
1545 }
1546 
1553  public:
1554  bool visitInput(const RexInput* input) const final {
1555  // The top level expression node is checked before we apply the visitor. If we get
1556  // here, this input rex is a child of another rex node, and we handle the can be
1557  // coalesced check slightly differently
1558  return input_can_be_coalesced(input->getSourceNode(), input->getIndex(), false);
1559  }
1560 
1561  bool visitLiteral(const RexLiteral*) const final { return false; }
1562 
1563  bool visitSubQuery(const RexSubQuery*) const final { return false; }
1564 
1565  bool visitRef(const RexRef*) const final { return false; }
1566 
1567  protected:
1568  bool aggregateResult(const bool& aggregate, const bool& next_result) const final {
1569  return aggregate && next_result;
1570  }
1571 
1572  bool defaultResult() const final { return true; }
1573 };
1574 
1575 // Detect the window function SUM pattern: CASE WHEN COUNT() > 0 THEN SUM ELSE 0
1577  const auto case_operator = dynamic_cast<const RexCase*>(rex);
1578  if (case_operator && case_operator->branchCount() == 1) {
1579  const auto then_window =
1580  dynamic_cast<const RexWindowFunctionOperator*>(case_operator->getThen(0));
1581  if (then_window && then_window->getKind() == SqlWindowFunctionKind::SUM_INTERNAL) {
1582  return true;
1583  }
1584  }
1585  return false;
1586 }
1587 
1588 // Detect both window function operators and window function operators embedded in case
1589 // statements (for null handling)
1591  if (dynamic_cast<const RexWindowFunctionOperator*>(rex)) {
1592  return true;
1593  }
1594 
1595  // unwrap from casts, if they exist
1596  const auto rex_cast = dynamic_cast<const RexOperator*>(rex);
1597  if (rex_cast && rex_cast->getOperator() == kCAST) {
1598  CHECK_EQ(rex_cast->size(), size_t(1));
1599  return is_window_function_operator(rex_cast->getOperand(0));
1600  }
1601 
1602  if (is_window_function_sum(rex)) {
1603  return true;
1604  }
1605  // Check for Window Function AVG:
1606  // (CASE WHEN count > 0 THEN sum ELSE 0) / COUNT
1607  const RexOperator* divide_operator = dynamic_cast<const RexOperator*>(rex);
1608  if (divide_operator && divide_operator->getOperator() == kDIVIDE) {
1609  CHECK_EQ(divide_operator->size(), size_t(2));
1610  const auto case_operator =
1611  dynamic_cast<const RexCase*>(divide_operator->getOperand(0));
1612  const auto second_window =
1613  dynamic_cast<const RexWindowFunctionOperator*>(divide_operator->getOperand(1));
1614  if (case_operator && second_window &&
1615  second_window->getKind() == SqlWindowFunctionKind::COUNT) {
1616  if (is_window_function_sum(case_operator)) {
1617  return true;
1618  }
1619  }
1620  }
1621  return false;
1622 }
1623 
1624 } // namespace
1625 
1626 void coalesce_nodes(std::vector<std::shared_ptr<RelAlgNode>>& nodes,
1627  const std::vector<const RelAlgNode*>& left_deep_joins) {
1628  enum class CoalesceState { Initial, Filter, FirstProject, Aggregate };
1629  std::vector<size_t> crt_pattern;
1630  CoalesceState crt_state{CoalesceState::Initial};
1631 
1632  auto reset_state = [&crt_pattern, &crt_state]() {
1633  crt_state = CoalesceState::Initial;
1634  std::vector<size_t>().swap(crt_pattern);
1635  };
1636 
1637  for (RANodeIterator nodeIt(nodes); !nodeIt.allVisited();) {
1638  const auto ra_node = nodeIt != nodes.end() ? *nodeIt : nullptr;
1639  switch (crt_state) {
1640  case CoalesceState::Initial: {
1641  if (std::dynamic_pointer_cast<const RelFilter>(ra_node) &&
1642  std::find(left_deep_joins.begin(), left_deep_joins.end(), ra_node.get()) ==
1643  left_deep_joins.end()) {
1644  crt_pattern.push_back(size_t(nodeIt));
1645  crt_state = CoalesceState::Filter;
1646  nodeIt.advance(RANodeIterator::AdvancingMode::DUChain);
1647  } else if (std::dynamic_pointer_cast<const RelProject>(ra_node)) {
1648  crt_pattern.push_back(size_t(nodeIt));
1649  crt_state = CoalesceState::FirstProject;
1650  nodeIt.advance(RANodeIterator::AdvancingMode::DUChain);
1651  } else {
1652  nodeIt.advance(RANodeIterator::AdvancingMode::InOrder);
1653  }
1654  break;
1655  }
1656  case CoalesceState::Filter: {
1657  if (auto project_node = std::dynamic_pointer_cast<const RelProject>(ra_node)) {
1658  if (project_node->hasWindowFunctionExpr()) {
1659  reset_state();
1660  break;
1661  }
1662  crt_pattern.push_back(size_t(nodeIt));
1663  crt_state = CoalesceState::FirstProject;
1664  nodeIt.advance(RANodeIterator::AdvancingMode::DUChain);
1665  } else {
1666  reset_state();
1667  }
1668  break;
1669  }
1670  case CoalesceState::FirstProject: {
1671  if (std::dynamic_pointer_cast<const RelAggregate>(ra_node)) {
1672  crt_pattern.push_back(size_t(nodeIt));
1673  crt_state = CoalesceState::Aggregate;
1674  nodeIt.advance(RANodeIterator::AdvancingMode::DUChain);
1675  } else {
1676  if (crt_pattern.size() >= 2) {
1677  create_compound(nodes, crt_pattern);
1678  }
1679  reset_state();
1680  }
1681  break;
1682  }
1683  case CoalesceState::Aggregate: {
1684  if (auto project_node = std::dynamic_pointer_cast<const RelProject>(ra_node)) {
1685  // TODO(adb): overloading the simple project terminology again here
1686  bool is_simple_project{true};
1687  for (size_t i = 0; i < project_node->size(); i++) {
1688  const auto scalar_rex = project_node->getProjectAt(i);
1689  // If the top level scalar rex is an input node, we can bypass the visitor
1690  if (auto input_rex = dynamic_cast<const RexInput*>(scalar_rex)) {
1692  input_rex->getSourceNode(), input_rex->getIndex(), true)) {
1693  is_simple_project = false;
1694  break;
1695  }
1696  continue;
1697  }
1698  CoalesceSecondaryProjectVisitor visitor;
1699  if (!visitor.visit(project_node->getProjectAt(i))) {
1700  is_simple_project = false;
1701  break;
1702  }
1703  }
1704  if (is_simple_project) {
1705  crt_pattern.push_back(size_t(nodeIt));
1706  nodeIt.advance(RANodeIterator::AdvancingMode::InOrder);
1707  }
1708  }
1709  CHECK_GE(crt_pattern.size(), size_t(2));
1710  create_compound(nodes, crt_pattern);
1711  reset_state();
1712  break;
1713  }
1714  default:
1715  CHECK(false);
1716  }
1717  }
1718  if (crt_state == CoalesceState::FirstProject || crt_state == CoalesceState::Aggregate) {
1719  if (crt_pattern.size() >= 2) {
1720  create_compound(nodes, crt_pattern);
1721  }
1722  CHECK(!crt_pattern.empty());
1723  }
1724 }
1725 
1733 class WindowFunctionDetectionVisitor : public RexVisitor<const RexScalar*> {
1734  protected:
1735  // Detect embedded window function expressions in operators
1736  const RexScalar* visitOperator(const RexOperator* rex_operator) const final {
1737  if (is_window_function_operator(rex_operator)) {
1738  return rex_operator;
1739  }
1740 
1741  const size_t operand_count = rex_operator->size();
1742  for (size_t i = 0; i < operand_count; ++i) {
1743  const auto operand = rex_operator->getOperand(i);
1744  if (is_window_function_operator(operand)) {
1745  // Handle both RexWindowFunctionOperators and window functions built up from
1746  // multiple RexScalar objects (e.g. AVG)
1747  return operand;
1748  }
1749  const auto operandResult = visit(operand);
1750  if (operandResult) {
1751  return operandResult;
1752  }
1753  }
1754 
1755  return defaultResult();
1756  }
1757 
1758  // Detect embedded window function expressions in case statements. Note that this may
1759  // manifest as a nested case statement inside a top level case statement, as some
1760  // window functions (sum, avg) are represented as a case statement. Use the
1761  // is_window_function_operator helper to detect complete window function expressions.
1762  const RexScalar* visitCase(const RexCase* rex_case) const final {
1763  if (is_window_function_operator(rex_case)) {
1764  return rex_case;
1765  }
1766 
1767  auto result = defaultResult();
1768  for (size_t i = 0; i < rex_case->branchCount(); ++i) {
1769  const auto when = rex_case->getWhen(i);
1770  result = is_window_function_operator(when) ? when : visit(when);
1771  if (result) {
1772  return result;
1773  }
1774  const auto then = rex_case->getThen(i);
1775  result = is_window_function_operator(then) ? then : visit(then);
1776  if (result) {
1777  return result;
1778  }
1779  }
1780  if (rex_case->getElse()) {
1781  auto else_expr = rex_case->getElse();
1782  result = is_window_function_operator(else_expr) ? else_expr : visit(else_expr);
1783  }
1784  return result;
1785  }
1786 
1787  const RexScalar* aggregateResult(const RexScalar* const& aggregate,
1788  const RexScalar* const& next_result) const final {
1789  // all methods calling aggregate result should be overriden
1790  UNREACHABLE();
1791  return nullptr;
1792  }
1793 
1794  const RexScalar* defaultResult() const final { return nullptr; }
1795 };
1796 
1806  public:
1807  RexWindowFuncReplacementVisitor(std::unique_ptr<const RexScalar> replacement_rex)
1808  : replacement_rex_(std::move(replacement_rex)) {}
1809 
1810  ~RexWindowFuncReplacementVisitor() { CHECK(replacement_rex_ == nullptr); }
1811 
1812  protected:
1813  RetType visitOperator(const RexOperator* rex_operator) const final {
1814  if (should_replace_operand(rex_operator)) {
1815  return std::move(replacement_rex_);
1816  }
1817 
1818  const auto rex_window_function_operator =
1819  dynamic_cast<const RexWindowFunctionOperator*>(rex_operator);
1820  if (rex_window_function_operator) {
1821  // Deep copy the embedded window function operator
1822  return visitWindowFunctionOperator(rex_window_function_operator);
1823  }
1824 
1825  const size_t operand_count = rex_operator->size();
1826  std::vector<RetType> new_opnds;
1827  for (size_t i = 0; i < operand_count; ++i) {
1828  const auto operand = rex_operator->getOperand(i);
1829  if (should_replace_operand(operand)) {
1830  new_opnds.push_back(std::move(replacement_rex_));
1831  } else {
1832  new_opnds.emplace_back(visit(rex_operator->getOperand(i)));
1833  }
1834  }
1835  return rex_operator->getDisambiguated(new_opnds);
1836  }
1837 
1838  RetType visitCase(const RexCase* rex_case) const final {
1839  if (should_replace_operand(rex_case)) {
1840  return std::move(replacement_rex_);
1841  }
1842 
1843  std::vector<std::pair<RetType, RetType>> new_pair_list;
1844  for (size_t i = 0; i < rex_case->branchCount(); ++i) {
1845  auto when_operand = rex_case->getWhen(i);
1846  auto then_operand = rex_case->getThen(i);
1847  new_pair_list.emplace_back(
1848  should_replace_operand(when_operand) ? std::move(replacement_rex_)
1849  : visit(when_operand),
1850  should_replace_operand(then_operand) ? std::move(replacement_rex_)
1851  : visit(then_operand));
1852  }
1853  auto new_else = should_replace_operand(rex_case->getElse())
1854  ? std::move(replacement_rex_)
1855  : visit(rex_case->getElse());
1856  return std::make_unique<RexCase>(new_pair_list, new_else);
1857  }
1858 
1859  private:
1860  bool should_replace_operand(const RexScalar* rex) const {
1861  return replacement_rex_ && is_window_function_operator(rex);
1862  }
1863 
1864  mutable std::unique_ptr<const RexScalar> replacement_rex_;
1865 };
1866 
1877  public:
1878  RexInputBackpropagationVisitor(RelProject* node) : node_(node) { CHECK(node_); }
1879 
1880  protected:
1881  RetType visitInput(const RexInput* rex_input) const final {
1882  if (rex_input->getSourceNode() != node_) {
1883  const auto cur_index = rex_input->getIndex();
1884  auto cur_source_node = rex_input->getSourceNode();
1885  std::string field_name = "";
1886  if (auto cur_project_node = dynamic_cast<const RelProject*>(cur_source_node)) {
1887  field_name = cur_project_node->getFieldName(cur_index);
1888  }
1889  node_->appendInput(field_name, rex_input->deepCopy());
1890  return std::make_unique<RexInput>(node_, node_->size() - 1);
1891  } else {
1892  return rex_input->deepCopy();
1893  }
1894  }
1895 
1896  private:
1897  mutable RelProject* node_;
1898 };
1899 
1916  std::vector<std::shared_ptr<RelAlgNode>>& nodes) {
1917  std::list<std::shared_ptr<RelAlgNode>> node_list(nodes.begin(), nodes.end());
1918 
1920  for (auto node_itr = node_list.begin(); node_itr != node_list.end(); ++node_itr) {
1921  const auto node = *node_itr;
1922  auto window_func_project_node = std::dynamic_pointer_cast<RelProject>(node);
1923  if (!window_func_project_node) {
1924  continue;
1925  }
1926 
1927  // map scalar expression index in the project node to wiondow function ptr
1928  std::unordered_map<size_t, const RexScalar*> embedded_window_function_expressions;
1929 
1930  // Iterate the target exprs of the project node and check for window function
1931  // expressions. If an embedded expression exists, save it in the
1932  // embedded_window_function_expressions map and split the expression into a window
1933  // function expression and a parent expression in a subsequent project node
1934  for (size_t i = 0; i < window_func_project_node->size(); i++) {
1935  const auto scalar_rex = window_func_project_node->getProjectAt(i);
1936  if (is_window_function_operator(scalar_rex)) {
1937  // top level window function exprs are fine
1938  continue;
1939  }
1940 
1941  if (const auto window_func_rex = visitor.visit(scalar_rex)) {
1942  const auto ret = embedded_window_function_expressions.insert(
1943  std::make_pair(i, window_func_rex));
1944  CHECK(ret.second);
1945  }
1946  }
1947 
1948  if (!embedded_window_function_expressions.empty()) {
1949  std::vector<std::unique_ptr<const RexScalar>> new_scalar_exprs;
1950 
1951  auto window_func_scalar_exprs =
1952  window_func_project_node->getExpressionsAndRelease();
1953  for (size_t rex_idx = 0; rex_idx < window_func_scalar_exprs.size(); ++rex_idx) {
1954  const auto embedded_window_func_expr_pair =
1955  embedded_window_function_expressions.find(rex_idx);
1956  if (embedded_window_func_expr_pair ==
1957  embedded_window_function_expressions.end()) {
1958  new_scalar_exprs.emplace_back(
1959  std::make_unique<const RexInput>(window_func_project_node.get(), rex_idx));
1960  } else {
1961  const auto window_func_rex_idx = embedded_window_func_expr_pair->first;
1962  CHECK_LT(window_func_rex_idx, window_func_scalar_exprs.size());
1963 
1964  const auto& window_func_rex = embedded_window_func_expr_pair->second;
1965 
1966  RexDeepCopyVisitor copier;
1967  auto window_func_rex_copy = copier.visit(window_func_rex);
1968 
1969  auto window_func_parent_expr =
1970  window_func_scalar_exprs[window_func_rex_idx].get();
1971 
1972  // Replace window func rex with an input rex
1973  auto window_func_result_input = std::make_unique<const RexInput>(
1974  window_func_project_node.get(), window_func_rex_idx);
1975  RexWindowFuncReplacementVisitor replacer(std::move(window_func_result_input));
1976  auto new_parent_rex = replacer.visit(window_func_parent_expr);
1977 
1978  // Put the parent expr in the new scalar exprs
1979  new_scalar_exprs.emplace_back(std::move(new_parent_rex));
1980 
1981  // Put the window func expr in cur scalar exprs
1982  window_func_scalar_exprs[window_func_rex_idx] = std::move(window_func_rex_copy);
1983  }
1984  }
1985 
1986  CHECK_EQ(window_func_scalar_exprs.size(), new_scalar_exprs.size());
1987  window_func_project_node->setExpressions(window_func_scalar_exprs);
1988 
1989  // Ensure any inputs from the node containing the expression (the "new" node)
1990  // exist on the window function project node, e.g. if we had a binary operation
1991  // involving an aggregate value or column not included in the top level
1992  // projection list.
1993  RexInputBackpropagationVisitor input_visitor(window_func_project_node.get());
1994  for (size_t i = 0; i < new_scalar_exprs.size(); i++) {
1995  if (dynamic_cast<const RexInput*>(new_scalar_exprs[i].get())) {
1996  // ignore top level inputs, these were copied directly from the previous
1997  // node
1998  continue;
1999  }
2000  new_scalar_exprs[i] = input_visitor.visit(new_scalar_exprs[i].get());
2001  }
2002 
2003  // Build the new project node and insert it into the list after the project node
2004  // containing the window function
2005  auto new_project =
2006  std::make_shared<RelProject>(new_scalar_exprs,
2007  window_func_project_node->getFields(),
2008  window_func_project_node);
2009  node_list.insert(std::next(node_itr), new_project);
2010 
2011  // Rebind all the following inputs
2012  for (auto rebind_itr = std::next(node_itr, 2); rebind_itr != node_list.end();
2013  rebind_itr++) {
2014  (*rebind_itr)->replaceInput(window_func_project_node, new_project);
2015  }
2016  }
2017  }
2018  nodes.assign(node_list.begin(), node_list.end());
2019 }
2020 
2021 using RexInputSet = std::unordered_set<RexInput>;
2022 
2023 class RexInputCollector : public RexVisitor<RexInputSet> {
2024  public:
2025  RexInputSet visitInput(const RexInput* input) const override {
2026  return RexInputSet{*input};
2027  }
2028 
2029  protected:
2031  const RexInputSet& next_result) const override {
2032  auto result = aggregate;
2033  result.insert(next_result.begin(), next_result.end());
2034  return result;
2035  }
2036 };
2037 
2045 void add_window_function_pre_project(std::vector<std::shared_ptr<RelAlgNode>>& nodes) {
2046  std::list<std::shared_ptr<RelAlgNode>> node_list(nodes.begin(), nodes.end());
2047 
2048  for (auto node_itr = node_list.begin(); node_itr != node_list.end(); ++node_itr) {
2049  const auto node = *node_itr;
2050  auto window_func_project_node = std::dynamic_pointer_cast<RelProject>(node);
2051  if (!window_func_project_node) {
2052  continue;
2053  }
2054  if (!window_func_project_node->hasWindowFunctionExpr()) {
2055  // the first projection node in the query plan does not have a window function
2056  // expression -- this step is not requierd.
2057  return;
2058  }
2059 
2060  const auto prev_node_itr = std::prev(node_itr);
2061  const auto prev_node = *prev_node_itr;
2062  CHECK(prev_node);
2063 
2064  RexInputSet inputs;
2065  RexInputCollector input_collector;
2066  for (size_t i = 0; i < window_func_project_node->size(); i++) {
2067  auto new_inputs = input_collector.visit(window_func_project_node->getProjectAt(i));
2068  inputs.insert(new_inputs.begin(), new_inputs.end());
2069  }
2070 
2071  // Note: Technically not required since we are mapping old inputs to new input
2072  // indices, but makes the re-mapping of inputs easier to follow.
2073  std::vector<RexInput> sorted_inputs(inputs.begin(), inputs.end());
2074  std::sort(sorted_inputs.begin(),
2075  sorted_inputs.end(),
2076  [](const auto& a, const auto& b) { return a.getIndex() < b.getIndex(); });
2077 
2078  std::vector<std::unique_ptr<const RexScalar>> scalar_exprs;
2079  std::vector<std::string> fields;
2080  std::unordered_map<unsigned, unsigned> old_index_to_new_index;
2081  for (auto& input : sorted_inputs) {
2082  CHECK_EQ(input.getSourceNode(), prev_node.get());
2083  CHECK(old_index_to_new_index
2084  .insert(std::make_pair(input.getIndex(), scalar_exprs.size()))
2085  .second);
2086  scalar_exprs.emplace_back(input.deepCopy());
2087  fields.emplace_back("");
2088  }
2089 
2090  auto new_project = std::make_shared<RelProject>(scalar_exprs, fields, prev_node);
2091  node_list.insert(node_itr, new_project);
2092  window_func_project_node->replaceInput(
2093  prev_node, new_project, old_index_to_new_index);
2094 
2095  break;
2096  }
2097 
2098  nodes.assign(node_list.begin(), node_list.end());
2099 }
2100 
2101 int64_t get_int_literal_field(const rapidjson::Value& obj,
2102  const char field[],
2103  const int64_t default_val) noexcept {
2104  const auto it = obj.FindMember(field);
2105  if (it == obj.MemberEnd()) {
2106  return default_val;
2107  }
2108  std::unique_ptr<RexLiteral> lit(parse_literal(it->value));
2109  CHECK_EQ(kDECIMAL, lit->getType());
2110  CHECK_EQ(unsigned(0), lit->getScale());
2111  CHECK_EQ(unsigned(0), lit->getTypeScale());
2112  return lit->getVal<int64_t>();
2113 }
2114 
2115 void check_empty_inputs_field(const rapidjson::Value& node) noexcept {
2116  const auto& inputs_json = field(node, "inputs");
2117  CHECK(inputs_json.IsArray() && !inputs_json.Size());
2118 }
2119 
2121  const rapidjson::Value& scan_ra) {
2122  const auto& table_json = field(scan_ra, "table");
2123  CHECK(table_json.IsArray());
2124  CHECK_EQ(unsigned(2), table_json.Size());
2125  const auto td = cat.getMetadataForTable(table_json[1].GetString());
2126  CHECK(td);
2127  return td;
2128 }
2129 
2130 std::vector<std::string> getFieldNamesFromScanNode(const rapidjson::Value& scan_ra) {
2131  const auto& fields_json = field(scan_ra, "fieldNames");
2132  return strings_from_json_array(fields_json);
2133 }
2134 
2135 } // namespace
2136 
2138  for (const auto& expr : scalar_exprs_) {
2139  if (is_window_function_operator(expr.get())) {
2140  return true;
2141  }
2142  }
2143  return false;
2144 }
2145 namespace details {
2146 
2148  public:
2150 
2151  std::vector<std::shared_ptr<RelAlgNode>> run(const rapidjson::Value& rels,
2152  RelAlgDagBuilder& root_dag_builder) {
2153  for (auto rels_it = rels.Begin(); rels_it != rels.End(); ++rels_it) {
2154  const auto& crt_node = *rels_it;
2155  const auto id = node_id(crt_node);
2156  CHECK_EQ(static_cast<size_t>(id), nodes_.size());
2157  CHECK(crt_node.IsObject());
2158  std::shared_ptr<RelAlgNode> ra_node = nullptr;
2159  const auto rel_op = json_str(field(crt_node, "relOp"));
2160  if (rel_op == std::string("EnumerableTableScan") ||
2161  rel_op == std::string("LogicalTableScan")) {
2162  ra_node = dispatchTableScan(crt_node);
2163  } else if (rel_op == std::string("LogicalProject")) {
2164  ra_node = dispatchProject(crt_node, root_dag_builder);
2165  } else if (rel_op == std::string("LogicalFilter")) {
2166  ra_node = dispatchFilter(crt_node, root_dag_builder);
2167  } else if (rel_op == std::string("LogicalAggregate")) {
2168  ra_node = dispatchAggregate(crt_node);
2169  } else if (rel_op == std::string("LogicalJoin")) {
2170  ra_node = dispatchJoin(crt_node, root_dag_builder);
2171  } else if (rel_op == std::string("LogicalSort")) {
2172  ra_node = dispatchSort(crt_node);
2173  } else if (rel_op == std::string("LogicalValues")) {
2174  ra_node = dispatchLogicalValues(crt_node);
2175  } else if (rel_op == std::string("LogicalTableModify")) {
2176  ra_node = dispatchModify(crt_node);
2177  } else if (rel_op == std::string("LogicalTableFunctionScan")) {
2178  ra_node = dispatchTableFunction(crt_node, root_dag_builder);
2179  } else if (rel_op == std::string("LogicalUnion")) {
2180  ra_node = dispatchUnion(crt_node);
2181  } else {
2182  throw QueryNotSupported(std::string("Node ") + rel_op + " not supported yet");
2183  }
2184  nodes_.push_back(ra_node);
2185  }
2186 
2187  return std::move(nodes_);
2188  }
2189 
2190  private:
2191  std::shared_ptr<RelScan> dispatchTableScan(const rapidjson::Value& scan_ra) {
2192  check_empty_inputs_field(scan_ra);
2193  CHECK(scan_ra.IsObject());
2194  const auto td = getTableFromScanNode(cat_, scan_ra);
2195  const auto field_names = getFieldNamesFromScanNode(scan_ra);
2196  if (scan_ra.HasMember("hints")) {
2197  auto scan_node = std::make_shared<RelScan>(td, field_names);
2198  getRelAlgHints(scan_ra, scan_node);
2199  return scan_node;
2200  }
2201  return std::make_shared<RelScan>(td, field_names);
2202  }
2203 
2204  std::shared_ptr<RelProject> dispatchProject(const rapidjson::Value& proj_ra,
2205  RelAlgDagBuilder& root_dag_builder) {
2206  const auto inputs = getRelAlgInputs(proj_ra);
2207  CHECK_EQ(size_t(1), inputs.size());
2208  const auto& exprs_json = field(proj_ra, "exprs");
2209  CHECK(exprs_json.IsArray());
2210  std::vector<std::unique_ptr<const RexScalar>> exprs;
2211  for (auto exprs_json_it = exprs_json.Begin(); exprs_json_it != exprs_json.End();
2212  ++exprs_json_it) {
2213  exprs.emplace_back(parse_scalar_expr(*exprs_json_it, cat_, root_dag_builder));
2214  }
2215  const auto& fields = field(proj_ra, "fields");
2216  if (proj_ra.HasMember("hints")) {
2217  auto project_node = std::make_shared<RelProject>(
2218  exprs, strings_from_json_array(fields), inputs.front());
2219  getRelAlgHints(proj_ra, project_node);
2220  return project_node;
2221  }
2222  return std::make_shared<RelProject>(
2223  exprs, strings_from_json_array(fields), inputs.front());
2224  }
2225 
2226  std::shared_ptr<RelFilter> dispatchFilter(const rapidjson::Value& filter_ra,
2227  RelAlgDagBuilder& root_dag_builder) {
2228  const auto inputs = getRelAlgInputs(filter_ra);
2229  CHECK_EQ(size_t(1), inputs.size());
2230  const auto id = node_id(filter_ra);
2231  CHECK(id);
2232  auto condition =
2233  parse_scalar_expr(field(filter_ra, "condition"), cat_, root_dag_builder);
2234  return std::make_shared<RelFilter>(condition, inputs.front());
2235  }
2236 
2237  std::shared_ptr<RelAggregate> dispatchAggregate(const rapidjson::Value& agg_ra) {
2238  const auto inputs = getRelAlgInputs(agg_ra);
2239  CHECK_EQ(size_t(1), inputs.size());
2240  const auto fields = strings_from_json_array(field(agg_ra, "fields"));
2241  const auto group = indices_from_json_array(field(agg_ra, "group"));
2242  for (size_t i = 0; i < group.size(); ++i) {
2243  CHECK_EQ(i, group[i]);
2244  }
2245  if (agg_ra.HasMember("groups") || agg_ra.HasMember("indicator")) {
2246  throw QueryNotSupported("GROUP BY extensions not supported");
2247  }
2248  const auto& aggs_json_arr = field(agg_ra, "aggs");
2249  CHECK(aggs_json_arr.IsArray());
2250  std::vector<std::unique_ptr<const RexAgg>> aggs;
2251  for (auto aggs_json_arr_it = aggs_json_arr.Begin();
2252  aggs_json_arr_it != aggs_json_arr.End();
2253  ++aggs_json_arr_it) {
2254  aggs.emplace_back(parse_aggregate_expr(*aggs_json_arr_it));
2255  }
2256  if (agg_ra.HasMember("hints")) {
2257  auto agg_node =
2258  std::make_shared<RelAggregate>(group.size(), aggs, fields, inputs.front());
2259  getRelAlgHints(agg_ra, agg_node);
2260  return agg_node;
2261  }
2262  return std::make_shared<RelAggregate>(group.size(), aggs, fields, inputs.front());
2263  }
2264 
2265  std::shared_ptr<RelJoin> dispatchJoin(const rapidjson::Value& join_ra,
2266  RelAlgDagBuilder& root_dag_builder) {
2267  const auto inputs = getRelAlgInputs(join_ra);
2268  CHECK_EQ(size_t(2), inputs.size());
2269  const auto join_type = to_join_type(json_str(field(join_ra, "joinType")));
2270  auto filter_rex =
2271  parse_scalar_expr(field(join_ra, "condition"), cat_, root_dag_builder);
2272  if (join_ra.HasMember("hints")) {
2273  auto join_node =
2274  std::make_shared<RelJoin>(inputs[0], inputs[1], filter_rex, join_type);
2275  getRelAlgHints(join_ra, join_node);
2276  return join_node;
2277  }
2278  return std::make_shared<RelJoin>(inputs[0], inputs[1], filter_rex, join_type);
2279  }
2280 
2281  std::shared_ptr<RelSort> dispatchSort(const rapidjson::Value& sort_ra) {
2282  const auto inputs = getRelAlgInputs(sort_ra);
2283  CHECK_EQ(size_t(1), inputs.size());
2284  std::vector<SortField> collation;
2285  const auto& collation_arr = field(sort_ra, "collation");
2286  CHECK(collation_arr.IsArray());
2287  for (auto collation_arr_it = collation_arr.Begin();
2288  collation_arr_it != collation_arr.End();
2289  ++collation_arr_it) {
2290  const size_t field_idx = json_i64(field(*collation_arr_it, "field"));
2291  const auto sort_dir = parse_sort_direction(*collation_arr_it);
2292  const auto null_pos = parse_nulls_position(*collation_arr_it);
2293  collation.emplace_back(field_idx, sort_dir, null_pos);
2294  }
2295  auto limit = get_int_literal_field(sort_ra, "fetch", -1);
2296  const auto offset = get_int_literal_field(sort_ra, "offset", 0);
2297  auto ret = std::make_shared<RelSort>(
2298  collation, limit > 0 ? limit : 0, offset, inputs.front());
2299  ret->setEmptyResult(limit == 0);
2300  return ret;
2301  }
2302 
2303  std::shared_ptr<RelModify> dispatchModify(const rapidjson::Value& logical_modify_ra) {
2304  const auto inputs = getRelAlgInputs(logical_modify_ra);
2305  CHECK_EQ(size_t(1), inputs.size());
2306 
2307  const auto table_descriptor = getTableFromScanNode(cat_, logical_modify_ra);
2308  if (table_descriptor->isView) {
2309  throw std::runtime_error("UPDATE of a view is unsupported.");
2310  }
2311 
2312  bool flattened = json_bool(field(logical_modify_ra, "flattened"));
2313  std::string op = json_str(field(logical_modify_ra, "operation"));
2314  RelModify::TargetColumnList target_column_list;
2315 
2316  if (op == "UPDATE") {
2317  const auto& update_columns = field(logical_modify_ra, "updateColumnList");
2318  CHECK(update_columns.IsArray());
2319 
2320  for (auto column_arr_it = update_columns.Begin();
2321  column_arr_it != update_columns.End();
2322  ++column_arr_it) {
2323  target_column_list.push_back(column_arr_it->GetString());
2324  }
2325  }
2326 
2327  auto modify_node = std::make_shared<RelModify>(
2328  cat_, table_descriptor, flattened, op, target_column_list, inputs[0]);
2329  switch (modify_node->getOperation()) {
2331  modify_node->applyDeleteModificationsToInputNode();
2332  break;
2333  }
2335  modify_node->applyUpdateModificationsToInputNode();
2336  break;
2337  }
2338  default:
2339  throw std::runtime_error("Unsupported RelModify operation: " +
2340  json_node_to_string(logical_modify_ra));
2341  }
2342 
2343  return modify_node;
2344  }
2345 
2346  std::shared_ptr<RelTableFunction> dispatchTableFunction(
2347  const rapidjson::Value& table_func_ra,
2348  RelAlgDagBuilder& root_dag_builder) {
2349  const auto inputs = getRelAlgInputs(table_func_ra);
2350  const auto& invocation = field(table_func_ra, "invocation");
2351  CHECK(invocation.IsObject());
2352 
2353  const auto& operands = field(invocation, "operands");
2354  CHECK(operands.IsArray());
2355  CHECK_GE(operands.Size(), unsigned(0));
2356 
2357  std::vector<const Rex*> col_inputs;
2358  std::vector<std::unique_ptr<const RexScalar>> table_func_inputs;
2359  std::vector<std::string> fields;
2360 
2361  for (auto exprs_json_it = operands.Begin(); exprs_json_it != operands.End();
2362  ++exprs_json_it) {
2363  const auto& expr_json = *exprs_json_it;
2364  CHECK(expr_json.IsObject());
2365  if (expr_json.HasMember("op")) {
2366  const auto op_str = json_str(field(expr_json, "op"));
2367  if (op_str == "CAST" && expr_json.HasMember("type")) {
2368  const auto& expr_type = field(expr_json, "type");
2369  CHECK(expr_type.IsObject());
2370  CHECK(expr_type.HasMember("type"));
2371  const auto& expr_type_name = json_str(field(expr_type, "type"));
2372  if (expr_type_name == "CURSOR") {
2373  CHECK(expr_json.HasMember("operands"));
2374  const auto& expr_operands = field(expr_json, "operands");
2375  CHECK(expr_operands.IsArray());
2376  if (expr_operands.Size() != 1) {
2377  throw std::runtime_error(
2378  "Table functions currently only support one ResultSet input");
2379  }
2380  auto pos = field(expr_operands[0], "input").GetInt();
2381  CHECK_LT(pos, inputs.size());
2382  for (size_t i = inputs[pos]->size(); i > 0; i--) {
2383  table_func_inputs.emplace_back(
2384  std::make_unique<RexAbstractInput>(col_inputs.size()));
2385  col_inputs.emplace_back(table_func_inputs.back().get());
2386  }
2387  continue;
2388  }
2389  }
2390  }
2391  table_func_inputs.emplace_back(
2392  parse_scalar_expr(*exprs_json_it, cat_, root_dag_builder));
2393  }
2394 
2395  const auto& op_name = field(invocation, "op");
2396  CHECK(op_name.IsString());
2397 
2398  std::vector<std::unique_ptr<const RexScalar>> table_function_projected_outputs;
2399  const auto& row_types = field(table_func_ra, "rowType");
2400  CHECK(row_types.IsArray());
2401  CHECK_GE(row_types.Size(), unsigned(0));
2402  const auto& row_types_array = row_types.GetArray();
2403  for (size_t i = 0; i < row_types_array.Size(); i++) {
2404  // We don't care about the type information in rowType -- replace each output with
2405  // a reference to be resolved later in the translator
2406  table_function_projected_outputs.emplace_back(std::make_unique<RexRef>(i));
2407  fields.emplace_back("");
2408  }
2409  return std::make_shared<RelTableFunction>(op_name.GetString(),
2410  inputs,
2411  fields,
2412  col_inputs,
2413  table_func_inputs,
2414  table_function_projected_outputs);
2415  }
2416 
2417  std::shared_ptr<RelLogicalValues> dispatchLogicalValues(
2418  const rapidjson::Value& logical_values_ra) {
2419  const auto& tuple_type_arr = field(logical_values_ra, "type");
2420  CHECK(tuple_type_arr.IsArray());
2421  std::vector<TargetMetaInfo> tuple_type;
2422  for (auto tuple_type_arr_it = tuple_type_arr.Begin();
2423  tuple_type_arr_it != tuple_type_arr.End();
2424  ++tuple_type_arr_it) {
2425  const auto component_type = parse_type(*tuple_type_arr_it);
2426  const auto component_name = json_str(field(*tuple_type_arr_it, "name"));
2427  tuple_type.emplace_back(component_name, component_type);
2428  }
2429  const auto& inputs_arr = field(logical_values_ra, "inputs");
2430  CHECK(inputs_arr.IsArray());
2431  const auto& tuples_arr = field(logical_values_ra, "tuples");
2432  CHECK(tuples_arr.IsArray());
2433 
2434  if (inputs_arr.Size()) {
2435  throw QueryNotSupported("Inputs not supported in logical values yet.");
2436  }
2437 
2438  std::vector<RelLogicalValues::RowValues> values;
2439  if (tuples_arr.Size()) {
2440  for (const auto& row : tuples_arr.GetArray()) {
2441  CHECK(row.IsArray());
2442  const auto values_json = row.GetArray();
2443  if (!values.empty()) {
2444  CHECK_EQ(values[0].size(), values_json.Size());
2445  }
2446  values.emplace_back(RelLogicalValues::RowValues{});
2447  for (const auto& value : values_json) {
2448  CHECK(value.IsObject());
2449  CHECK(value.HasMember("literal"));
2450  values.back().emplace_back(parse_literal(value));
2451  }
2452  }
2453  }
2454 
2455  return std::make_shared<RelLogicalValues>(tuple_type, values);
2456  }
2457 
2458  std::shared_ptr<RelLogicalUnion> dispatchUnion(
2459  const rapidjson::Value& logical_union_ra) {
2460  auto inputs = getRelAlgInputs(logical_union_ra);
2461  auto const& all_type_bool = field(logical_union_ra, "all");
2462  CHECK(all_type_bool.IsBool());
2463  return std::make_shared<RelLogicalUnion>(std::move(inputs), all_type_bool.GetBool());
2464  }
2465 
2466  RelAlgInputs getRelAlgInputs(const rapidjson::Value& node) {
2467  if (node.HasMember("inputs")) {
2468  const auto str_input_ids = strings_from_json_array(field(node, "inputs"));
2469  RelAlgInputs ra_inputs;
2470  for (const auto& str_id : str_input_ids) {
2471  ra_inputs.push_back(nodes_[std::stoi(str_id)]);
2472  }
2473  return ra_inputs;
2474  }
2475  return {prev(node)};
2476  }
2477 
2478  std::pair<std::string, std::string> getKVOptionPair(std::string& str, size_t& pos) {
2479  auto option = str.substr(0, pos);
2480  std::string delim = "=";
2481  size_t delim_pos = option.find(delim);
2482  auto key = option.substr(0, delim_pos);
2483  auto val = option.substr(delim_pos + 1, option.length());
2484  str.erase(0, pos + delim.length() + 1);
2485  return {key, val};
2486  }
2487 
2488  ExplainedQueryHint parseHintString(std::string& hint_string) {
2489  std::string white_space_delim = " ";
2490  int l = hint_string.length();
2491  hint_string = hint_string.erase(0, 1).substr(0, l - 2);
2492  size_t pos = 0;
2493  if ((pos = hint_string.find("options:")) != std::string::npos) {
2494  // need to parse hint options
2495  std::vector<std::string> tokens;
2496  std::string hint_name = hint_string.substr(0, hint_string.find(white_space_delim));
2497  auto hint_type = RegisteredQueryHint::translateQueryHint(hint_name);
2498  bool kv_list_op = false;
2499  std::string raw_options = hint_string.substr(pos + 8, hint_string.length() - 2);
2500  if (raw_options.find('{') != std::string::npos) {
2501  kv_list_op = true;
2502  } else {
2503  CHECK(raw_options.find('[') != std::string::npos);
2504  }
2505  auto t1 = raw_options.erase(0, 1);
2506  raw_options = t1.substr(0, t1.length() - 1);
2507  std::string op_delim = ", ";
2508  if (kv_list_op) {
2509  // kv options
2510  std::unordered_map<std::string, std::string> kv_options;
2511  while ((pos = raw_options.find(op_delim)) != std::string::npos) {
2512  auto kv_pair = getKVOptionPair(raw_options, pos);
2513  kv_options.emplace(kv_pair.first, kv_pair.second);
2514  }
2515  // handle the last kv pair
2516  auto kv_pair = getKVOptionPair(raw_options, pos);
2517  kv_options.emplace(kv_pair.first, kv_pair.second);
2518  return {hint_type, true, false, true, kv_options};
2519  } else {
2520  std::vector<std::string> list_options;
2521  while ((pos = raw_options.find(op_delim)) != std::string::npos) {
2522  list_options.emplace_back(raw_options.substr(0, pos));
2523  raw_options.erase(0, pos + white_space_delim.length() + 1);
2524  }
2525  // handle the last option
2526  list_options.emplace_back(raw_options.substr(0, pos));
2527  return {hint_type, true, false, false, list_options};
2528  }
2529  } else {
2530  // marker hint: no extra option for this hint
2531  std::string hint_name = hint_string.substr(0, hint_string.find(white_space_delim));
2532  auto hint_type = RegisteredQueryHint::translateQueryHint(hint_name);
2533  return {hint_type, true, true, false};
2534  }
2535  }
2536 
2537  void getRelAlgHints(const rapidjson::Value& json_node,
2538  std::shared_ptr<RelAlgNode> node) {
2539  std::string hint_explained = json_str(field(json_node, "hints"));
2540  size_t pos = 0;
2541  std::string delim = "|";
2542  std::vector<std::string> hint_list;
2543  while ((pos = hint_explained.find(delim)) != std::string::npos) {
2544  hint_list.emplace_back(hint_explained.substr(0, pos));
2545  hint_explained.erase(0, pos + delim.length());
2546  }
2547  // handling the last one
2548  hint_list.emplace_back(hint_explained.substr(0, pos));
2549 
2550  const auto agg_node = std::dynamic_pointer_cast<RelAggregate>(node);
2551  if (agg_node) {
2552  for (std::string& hint : hint_list) {
2553  auto parsed_hint = parseHintString(hint);
2554  agg_node->addHint(parsed_hint);
2555  }
2556  }
2557  const auto project_node = std::dynamic_pointer_cast<RelProject>(node);
2558  if (project_node) {
2559  for (std::string& hint : hint_list) {
2560  auto parsed_hint = parseHintString(hint);
2561  project_node->addHint(parsed_hint);
2562  }
2563  }
2564  const auto scan_node = std::dynamic_pointer_cast<RelScan>(node);
2565  if (scan_node) {
2566  for (std::string& hint : hint_list) {
2567  auto parsed_hint = parseHintString(hint);
2568  scan_node->addHint(parsed_hint);
2569  }
2570  }
2571  const auto join_node = std::dynamic_pointer_cast<RelJoin>(node);
2572  if (join_node) {
2573  for (std::string& hint : hint_list) {
2574  auto parsed_hint = parseHintString(hint);
2575  join_node->addHint(parsed_hint);
2576  }
2577  }
2578 
2579  const auto compound_node = std::dynamic_pointer_cast<RelCompound>(node);
2580  if (compound_node) {
2581  for (std::string& hint : hint_list) {
2582  auto parsed_hint = parseHintString(hint);
2583  compound_node->addHint(parsed_hint);
2584  }
2585  }
2586  }
2587 
2588  std::shared_ptr<const RelAlgNode> prev(const rapidjson::Value& crt_node) {
2589  const auto id = node_id(crt_node);
2590  CHECK(id);
2591  CHECK_EQ(static_cast<size_t>(id), nodes_.size());
2592  return nodes_.back();
2593  }
2594 
2596  std::vector<std::shared_ptr<RelAlgNode>> nodes_;
2597 };
2598 
2599 } // namespace details
2600 
2601 RelAlgDagBuilder::RelAlgDagBuilder(const std::string& query_ra,
2603  const RenderInfo* render_info)
2604  : cat_(cat), render_info_(render_info), query_hint_(RegisteredQueryHint::defaults()) {
2605  rapidjson::Document query_ast;
2606  query_ast.Parse(query_ra.c_str());
2607  VLOG(2) << "Parsing query RA JSON: " << query_ra;
2608  if (query_ast.HasParseError()) {
2609  query_ast.GetParseError();
2610  LOG(ERROR) << "Failed to parse RA tree from Calcite (offset "
2611  << query_ast.GetErrorOffset() << "):\n"
2612  << rapidjson::GetParseError_En(query_ast.GetParseError());
2613  VLOG(1) << "Failed to parse query RA: " << query_ra;
2614  throw std::runtime_error(
2615  "Failed to parse relational algebra tree. Possible query syntax error.");
2616  }
2617  CHECK(query_ast.IsObject());
2619  build(query_ast, *this);
2620 }
2621 
2623  const rapidjson::Value& query_ast,
2625  const RenderInfo* render_info)
2626  : cat_(cat), render_info_(render_info), query_hint_(RegisteredQueryHint::defaults()) {
2627  build(query_ast, root_dag_builder);
2628 }
2629 
2630 void RelAlgDagBuilder::build(const rapidjson::Value& query_ast,
2631  RelAlgDagBuilder& lead_dag_builder) {
2632  const auto& rels = field(query_ast, "rels");
2633  CHECK(rels.IsArray());
2634  try {
2635  nodes_ = details::RelAlgDispatcher(cat_).run(rels, lead_dag_builder);
2636  } catch (const QueryNotSupported&) {
2637  throw;
2638  }
2639  CHECK(!nodes_.empty());
2641 
2642  if (render_info_) {
2643  // Alter the RA for render. Do this before any flattening/optimizations are done to
2644  // the tree.
2646  }
2647 
2648  handleQueryHint(nodes_, this);
2649  mark_nops(nodes_);
2654  std::vector<const RelAlgNode*> filtered_left_deep_joins;
2655  std::vector<const RelAlgNode*> left_deep_joins;
2656  for (const auto& node : nodes_) {
2657  const auto left_deep_join_root = get_left_deep_join_root(node);
2658  // The filter which starts a left-deep join pattern must not be coalesced
2659  // since it contains (part of) the join condition.
2660  if (left_deep_join_root) {
2661  left_deep_joins.push_back(left_deep_join_root.get());
2662  if (std::dynamic_pointer_cast<const RelFilter>(left_deep_join_root)) {
2663  filtered_left_deep_joins.push_back(left_deep_join_root.get());
2664  }
2665  }
2666  }
2667  if (filtered_left_deep_joins.empty()) {
2669  }
2670  eliminate_dead_columns(nodes_);
2671  eliminate_dead_subqueries(subqueries_, nodes_.back().get());
2673  if (g_cluster) {
2675  }
2676  coalesce_nodes(nodes_, left_deep_joins);
2677  CHECK(nodes_.back().use_count() == 1);
2678  create_left_deep_join(nodes_);
2679 }
2680 
2682  std::function<void(RelAlgNode const*)> const& callback) const {
2683  for (auto const& node : nodes_) {
2684  if (node) {
2685  callback(node.get());
2686  }
2687  }
2688 }
2689 
2691  for (auto& node : nodes_) {
2692  if (node) {
2693  node->resetQueryExecutionState();
2694  }
2695  }
2696 }
2697 
2698 // Return tree with depth represented by indentations.
2699 std::string tree_string(const RelAlgNode* ra, const size_t depth) {
2700  std::string result = std::string(2 * depth, ' ') + ra->toString() + '\n';
2701  for (size_t i = 0; i < ra->inputCount(); ++i) {
2702  result += tree_string(ra->getInput(i), depth + 1);
2703  }
2704  return result;
2705 }
2706 
2707 std::string RexSubQuery::toString() const {
2708  return cat(::typeName(this), "(", ::toString(ra_.get()), ")");
2709 }
2710 
2711 std::string RexInput::toString() const {
2712  const auto scan_node = dynamic_cast<const RelScan*>(node_);
2713  if (scan_node) {
2714  auto field_name = scan_node->getFieldName(getIndex());
2715  auto table_name = scan_node->getTableDescriptor()->tableName;
2716  return ::typeName(this) + "(" + table_name + "." + field_name + ")";
2717  }
2718  return cat(::typeName(this),
2719  "(node=",
2720  ::toString(node_),
2721  ", in_index=",
2723  ")");
2724 }
2725 
2726 std::string RelCompound::toString() const {
2727  return cat(::typeName(this),
2728  "(",
2729  (filter_expr_ ? filter_expr_->toString() : "null"),
2730  ", target_exprs=",
2732  ", ",
2734  ", agg_exps=",
2736  ", fields=",
2738  ", scalar_sources=",
2740  ", is_agg=",
2742  ")");
2743 }
std::vector< std::shared_ptr< const RexScalar > > scalar_exprs_
DEVICE auto upper_bound(ARGS &&...args)
Definition: gpu_enabled.h:123
SQLTypes to_sql_type(const std::string &type_name)
void handleQueryHint(const std::vector< std::shared_ptr< RelAlgNode >> &nodes, RelAlgDagBuilder *dag_builder) noexcept
std::shared_ptr< const RelAlgNode > getRootNodeShPtr() const
bool is_agg(const Analyzer::Expr *expr)
std::unique_ptr< const RexScalar > condition_
SQLTypeInfo parse_type(const rapidjson::Value &type_obj)
const RexScalar * getThen(const size_t idx) const
std::vector< std::string > strings_from_json_array(const rapidjson::Value &json_str_arr) noexcept
std::unique_ptr< RexCase > parse_case(const rapidjson::Value &expr, const Catalog_Namespace::Catalog &cat, RelAlgDagBuilder &root_dag_builder)
std::shared_ptr< RelAggregate > dispatchAggregate(const rapidjson::Value &agg_ra)
#define CHECK_EQ(x, y)
Definition: Logger.h:211
JoinType to_join_type(const std::string &join_type_name)
const Catalog_Namespace::Catalog & cat_
std::shared_ptr< RelProject > dispatchProject(const rapidjson::Value &proj_ra, RelAlgDagBuilder &root_dag_builder)
std::unique_ptr< RexSubQuery > deepCopy() const
void replaceInput(std::shared_ptr< const RelAlgNode > old_input, std::shared_ptr< const RelAlgNode > input) override
JoinType
Definition: sqldefs.h:108
std::vector< const Rex * > remapTargetPointers(std::vector< std::unique_ptr< const RexAgg >> const &agg_exprs_new, std::vector< std::unique_ptr< const RexScalar >> const &scalar_sources_new, std::vector< std::unique_ptr< const RexAgg >> const &agg_exprs_old, std::vector< std::unique_ptr< const RexScalar >> const &scalar_sources_old, std::vector< const Rex * > const &target_exprs_old)
std::vector< std::unique_ptr< const RexScalar > > table_func_inputs_
std::string cat(Ts &&...args)
std::string toString(const ExtArgumentType &sig_type)
void bind_inputs(const std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
void hoist_filter_cond_to_cross_join(std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
class for a per-database catalog. also includes metadata for the current database and the current use...
Definition: Catalog.h:102
Definition: sqltypes.h:48
Hints * getDeliveredHints()
void addHint(const ExplainedQueryHint &hint_explained)
std::shared_ptr< const RelAlgNode > get_left_deep_join_root(const std::shared_ptr< RelAlgNode > &node)
void sink_projected_boolean_expr_to_join(std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
std::unique_ptr< const RexAgg > parse_aggregate_expr(const rapidjson::Value &expr)
void eliminate_identical_copy(std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
const RexScalar * getElse() const
RelCompound(std::unique_ptr< const RexScalar > &filter_expr, const std::vector< const Rex * > &target_exprs, const size_t groupby_count, const std::vector< const RexAgg * > &agg_exprs, const std::vector< std::string > &fields, std::vector< std::unique_ptr< const RexScalar >> &scalar_sources, const bool is_agg, bool update_disguised_as_select=false, bool delete_disguised_as_select=false, bool varlen_update_required=false, TableDescriptor const *manipulation_target_table=nullptr, ColumnNameList target_columns=ColumnNameList())
static thread_local unsigned crt_id_
std::shared_ptr< RelScan > dispatchTableScan(const rapidjson::Value &scan_ra)
std::pair< std::shared_ptr< RelLeftDeepInnerJoin >, std::shared_ptr< const RelAlgNode > > create_left_deep_join(const std::shared_ptr< RelAlgNode > &left_deep_join_root)
RexScalar const * copyAndRedirectSource(RexScalar const *, size_t input_idx) const
void replaceInput(std::shared_ptr< const RelAlgNode > old_input, std::shared_ptr< const RelAlgNode > input) override
#define const
SQLAgg to_agg_kind(const std::string &agg_name)
std::shared_ptr< RelLogicalUnion > dispatchUnion(const rapidjson::Value &logical_union_ra)
#define LOG(tag)
Definition: Logger.h:194
NullSortedPosition
std::vector< std::string > TargetColumnList
size_t size() const
Hints * getDeliveredHints()
const bool json_bool(const rapidjson::Value &obj) noexcept
Definition: JsonAccessors.h:49
const RexScalar * getOperand(const size_t idx) const
std::vector< const Rex * > col_inputs_
bool hasEquivCollationOf(const RelSort &that) const
const std::vector< std::string > fields_
std::string toString() const override
const std::string json_str(const rapidjson::Value &obj) noexcept
Definition: JsonAccessors.h:44
void build(const rapidjson::Value &query_ast, RelAlgDagBuilder &root_dag_builder)
string name
Definition: setup.in.py:72
RexWindowFunctionOperator::RexWindowBound parse_window_bound(const rapidjson::Value &window_bound_obj, const Catalog_Namespace::Catalog &cat, RelAlgDagBuilder &root_dag_builder)
std::string join(T const &container, std::string const &delim)
void bind_table_func_to_input(RelTableFunction *table_func_node, const RANodeOutput &input) noexcept
std::unique_ptr< RexOperator > parse_operator(const rapidjson::Value &expr, const Catalog_Namespace::Catalog &cat, RelAlgDagBuilder &root_dag_builder)
#define UNREACHABLE()
Definition: Logger.h:247
bool hint_applied_
DEVICE void sort(ARGS &&...args)
Definition: gpu_enabled.h:105
bool isRenamedInput(const RelAlgNode *node, const size_t index, const std::string &new_name)
#define CHECK_GE(x, y)
Definition: Logger.h:216
std::vector< std::string > fields_
void addHint(const ExplainedQueryHint &hint_explained)
Definition: sqldefs.h:49
const RexScalar * getWhen(const size_t idx) const
void appendInput(std::string new_field_name, std::unique_ptr< const RexScalar > new_input)
RetType visitOperator(const RexOperator *rex_operator) const final
void addHint(const ExplainedQueryHint &hint_explained)
void bind_project_to_input(RelProject *project_node, const RANodeOutput &input) noexcept
const Catalog_Namespace::Catalog & cat_
std::vector< std::unique_ptr< const RexScalar > > parse_window_order_exprs(const rapidjson::Value &arr, const Catalog_Namespace::Catalog &cat, RelAlgDagBuilder &root_dag_builder)
void checkForMatchingMetaInfoTypes() const
std::vector< std::unique_ptr< const RexScalar > > scalar_sources_
std::string to_string(char const *&&v)
SqlWindowFunctionKind parse_window_function_kind(const std::string &name)
const std::string getFieldName(const size_t i) const
void simplify_sort(std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
std::vector< SortField > collation_
RexRebindInputsVisitor(const RelAlgNode *old_input, const RelAlgNode *new_input)
int64_t get_int_literal_field(const rapidjson::Value &obj, const char field[], const int64_t default_val) noexcept
std::vector< std::unique_ptr< const RexScalar > > parse_expr_array(const rapidjson::Value &arr, const Catalog_Namespace::Catalog &cat, RelAlgDagBuilder &root_dag_builder)
This file contains the class specification and related data structures for Catalog.
virtual T visit(const RexScalar *rex_scalar) const
Definition: RexVisitor.h:27
RexInputSet visitInput(const RexInput *input) const override
std::vector< std::shared_ptr< RexSubQuery > > subqueries_
RexWindowFuncReplacementVisitor(std::unique_ptr< const RexScalar > replacement_rex)
const RenderInfo * render_info_
std::string to_string() const
Definition: sqltypes.h:457
const rapidjson::Value & field(const rapidjson::Value &obj, const char field[]) noexcept
Definition: JsonAccessors.h:31
const RexScalar * visitOperator(const RexOperator *rex_operator) const final
unsigned getIndex() const
SQLOps getOperator() const
std::vector< std::shared_ptr< RelAlgNode > > nodes_
const TableDescriptor * getTableFromScanNode(const Catalog_Namespace::Catalog &cat, const rapidjson::Value &scan_ra)
SortDirection parse_sort_direction(const rapidjson::Value &collation)
std::unique_ptr< const RexScalar > disambiguate_rex(const RexScalar *, const RANodeOutput &)
static QueryHint translateQueryHint(const std::string &hint_name)
Definition: QueryHint.h:178
DEVICE auto copy(ARGS &&...args)
Definition: gpu_enabled.h:51
bool isRenaming() const
std::vector< SortField > parse_window_order_collation(const rapidjson::Value &arr, const Catalog_Namespace::Catalog &cat, RelAlgDagBuilder &root_dag_builder)
void setIndex(const unsigned in_index) const
Hints * getDeliveredHints()
SQLOps to_sql_op(const std::string &op_str)
void add_window_function_pre_project(std::vector< std::shared_ptr< RelAlgNode >> &nodes)
std::unique_ptr< Hints > hints_
void set_scale(int s)
Definition: sqltypes.h:409
const int64_t json_i64(const rapidjson::Value &obj) noexcept
Definition: JsonAccessors.h:39
void * visitInput(const RexInput *rex_input) const override
std::unique_ptr< Hints > hints_
std::vector< std::unique_ptr< const RexScalar > > scalar_exprs_
const double json_double(const rapidjson::Value &obj) noexcept
Definition: JsonAccessors.h:54
void addHint(const ExplainedQueryHint &hint_explained)
size_t branchCount() const
std::unordered_set< RexInput > RexInputSet
const RelAlgNode * getInput(const size_t idx) const
RelFilter(std::unique_ptr< const RexScalar > &filter, std::shared_ptr< const RelAlgNode > input)
std::vector< std::shared_ptr< RelAlgNode > > nodes_
std::string toString() const override
RelAggregate(const size_t groupby_count, std::vector< std::unique_ptr< const RexAgg >> &agg_exprs, const std::vector< std::string > &fields, std::shared_ptr< const RelAlgNode > input)
std::unique_ptr< const RexScalar > filter_
bool isSimple() const
RexInputReplacementVisitor(const RelAlgNode *node_to_keep, const std::vector< std::unique_ptr< const RexScalar >> &scalar_sources)
const size_t groupby_count_
RexRebindReindexInputsVisitor(const RelAlgNode *old_input, const RelAlgNode *new_input, std::unordered_map< unsigned, unsigned > old_to_new_index_map)
unsigned getId() const
const RelAlgNode * node_
virtual void replaceInput(std::shared_ptr< const RelAlgNode > old_input, std::shared_ptr< const RelAlgNode > input)
void eachNode(std::function< void(RelAlgNode const *)> const &) const
NullSortedPosition parse_nulls_position(const rapidjson::Value &collation)
std::shared_ptr< RelFilter > dispatchFilter(const rapidjson::Value &filter_ra, RelAlgDagBuilder &root_dag_builder)
std::unique_ptr< RexAbstractInput > parse_abstract_input(const rapidjson::Value &expr) noexcept
std::vector< std::unique_ptr< const RexAgg > > agg_exprs_
std::set< std::pair< const RelAlgNode *, int > > get_equiv_cols(const RelAlgNode *node, const size_t which_col)
Hints * getDeliveredHints()
void replaceInput(std::shared_ptr< const RelAlgNode > old_input, std::shared_ptr< const RelAlgNode > input) override
std::unique_ptr< RexLiteral > parse_literal(const rapidjson::Value &expr)
SortDirection
const RexScalar * getProjectAt(const size_t idx) const
std::vector< std::unique_ptr< const RexAgg > > copyAggExprs(std::vector< std::unique_ptr< const RexAgg >> const &agg_exprs)
std::vector< std::shared_ptr< const RelAlgNode >> RelAlgInputs
#define CHECK_LT(x, y)
Definition: Logger.h:213
Definition: sqltypes.h:51
Definition: sqltypes.h:52
int32_t countRexLiteralArgs() const
std::unique_ptr< Hints > hints_
std::unique_ptr< const RexOperator > disambiguate_operator(const RexOperator *rex_operator, const RANodeOutput &ra_output) noexcept
const ConstRexScalarPtrVector & getPartitionKeys() const
std::string tree_string(const RelAlgNode *ra, const size_t depth)
DEVICE auto lower_bound(ARGS &&...args)
Definition: gpu_enabled.h:78
#define CHECK_LE(x, y)
Definition: Logger.h:214
std::unique_ptr< Hints > hints_
const std::vector< const Rex * > target_exprs_
std::vector< ElementType >::const_iterator Super
void replaceInput(std::shared_ptr< const RelAlgNode > old_input, std::shared_ptr< const RelAlgNode > input) override
RelLogicalUnion(RelAlgInputs, bool is_all)
std::vector< std::unique_ptr< const RexAgg > > agg_exprs_
std::unique_ptr< const RexCase > disambiguate_case(const RexCase *rex_case, const RANodeOutput &ra_output)
std::shared_ptr< RelJoin > dispatchJoin(const rapidjson::Value &join_ra, RelAlgDagBuilder &root_dag_builder)
void separate_window_function_expressions(std::vector< std::shared_ptr< RelAlgNode >> &nodes)
std::unique_ptr< const RexScalar > filter_expr_
void setSourceNode(const RelAlgNode *node) const
const RexScalar * aggregateResult(const RexScalar *const &aggregate, const RexScalar *const &next_result) const final
bool hasWindowFunctionExpr() const
std::shared_ptr< RelModify > dispatchModify(const rapidjson::Value &logical_modify_ra)
std::unordered_map< QueryHint, ExplainedQueryHint > Hints
Definition: QueryHint.h:213
virtual size_t size() const =0
const RelAlgNode * getSourceNode() const
void registerSubquery(std::shared_ptr< RexSubQuery > subquery)
void setExecutionResult(const std::shared_ptr< const ExecutionResult > result)
RelLogicalValues(const std::vector< TargetMetaInfo > &tuple_type, std::vector< RowValues > &values)
std::string typeName(const T *v)
Definition: toString.h:82
ExplainedQueryHint parseHintString(std::string &hint_string)
RelAlgDagBuilder()=delete
SqlWindowFunctionKind
Definition: sqldefs.h:83
HOST DEVICE int get_comp_param() const
Definition: sqltypes.h:323
void mark_nops(const std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
RexInputSet aggregateResult(const RexInputSet &aggregate, const RexInputSet &next_result) const override
Definition: sqldefs.h:53
std::shared_ptr< RelSort > dispatchSort(const rapidjson::Value &sort_ra)
RelTableFunction(const std::string &function_name, RelAlgInputs inputs, std::vector< std::string > &fields, std::vector< const Rex * > col_inputs, std::vector< std::unique_ptr< const RexScalar >> &table_func_inputs, std::vector< std::unique_ptr< const RexScalar >> &target_exprs)
const std::vector< std::string > & getFields() const
std::string getFieldName(const size_t i) const
std::vector< size_t > indices_from_json_array(const rapidjson::Value &json_idx_arr) noexcept
bool g_enable_watchdog false
Definition: Execute.cpp:76
virtual std::string toString() const =0
#define CHECK(condition)
Definition: Logger.h:203
std::unique_ptr< const RexScalar > parse_scalar_expr(const rapidjson::Value &expr, const Catalog_Namespace::Catalog &cat, RelAlgDagBuilder &root_dag_builder)
RelProject(std::vector< std::unique_ptr< const RexScalar >> &scalar_exprs, const std::vector< std::string > &fields, std::shared_ptr< const RelAlgNode > input)
unsigned node_id(const rapidjson::Value &ra_node) noexcept
RANodeOutput get_node_output(const RelAlgNode *ra_node)
bool g_enable_union
std::vector< std::string > getFieldNamesFromScanNode(const rapidjson::Value &scan_ra)
void create_compound(std::vector< std::shared_ptr< RelAlgNode >> &nodes, const std::vector< size_t > &pattern) noexcept
bool g_cluster
void alterRAForRender(std::vector< std::shared_ptr< RelAlgNode >> &nodes, const RenderInfo &render_info)
std::shared_ptr< const RelAlgNode > prev(const rapidjson::Value &crt_node)
void replaceInput(std::shared_ptr< const RelAlgNode > old_input, std::shared_ptr< const RelAlgNode > input) override
std::vector< std::shared_ptr< RelAlgNode > > run(const rapidjson::Value &rels, RelAlgDagBuilder &root_dag_builder)
void getRelAlgHints(const rapidjson::Value &json_node, std::shared_ptr< RelAlgNode > node)
RelAlgDispatcher(const Catalog_Namespace::Catalog &cat)
Common Enum definitions for SQL processing.
bool is_dict_encoded_string() const
Definition: sqltypes.h:526
Definition: sqltypes.h:44
void fold_filters(std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
const TableDescriptor * getMetadataForTable(const std::string &tableName, const bool populateFragmenter=true) const
Returns a pointer to a const TableDescriptor struct matching the provided tableName.
std::vector< RexInput > RANodeOutput
std::vector< std::unique_ptr< const RexScalar >> RowValues
const size_t inputCount() const
void rebind_inputs_from_left_deep_join(const RexScalar *rex, const RelLeftDeepInnerJoin *left_deep_join)
std::unique_ptr< const RexSubQuery > parse_subquery(const rapidjson::Value &expr, const Catalog_Namespace::Catalog &cat, RelAlgDagBuilder &root_dag_builder)
void eliminate_dead_subqueries(std::vector< std::shared_ptr< RexSubQuery >> &subqueries, RelAlgNode const *root)
size_t size() const override
size_t operator()(const std::pair< const RelAlgNode *, int > &input_col) const
bool input_can_be_coalesced(const RelAlgNode *parent_node, const size_t index, const bool first_rex_is_input)
RelAlgInputs getRelAlgInputs(const rapidjson::Value &node)
std::shared_ptr< RelLogicalValues > dispatchLogicalValues(const rapidjson::Value &logical_values_ra)
std::shared_ptr< RelTableFunction > dispatchTableFunction(const rapidjson::Value &table_func_ra, RelAlgDagBuilder &root_dag_builder)
DEVICE void swap(ARGS &&...args)
Definition: gpu_enabled.h:114
std::unique_ptr< const RexScalar > RetType
Definition: RexVisitor.h:139
std::vector< std::unique_ptr< const RexScalar > > copyRexScalars(std::vector< std::unique_ptr< const RexScalar >> const &scalar_sources)
#define VLOG(n)
Definition: Logger.h:297
RelJoin(std::shared_ptr< const RelAlgNode > lhs, std::shared_ptr< const RelAlgNode > rhs, std::unique_ptr< const RexScalar > &condition, const JoinType join_type)
RelAlgInputs inputs_
void set_precision(int d)
Definition: sqltypes.h:407
std::string toString() const override
std::pair< std::string, std::string > getKVOptionPair(std::string &str, size_t &pos)
void eliminate_dead_columns(std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
void check_empty_inputs_field(const rapidjson::Value &node) noexcept
std::vector< const Rex * > reproject_targets(const RelProject *simple_project, const std::vector< const Rex * > &target_exprs) noexcept
bool isIdentity() const
std::vector< std::unique_ptr< const RexScalar > > target_exprs_
Hints * getDeliveredHints()
std::vector< RexInput > n_outputs(const RelAlgNode *node, const size_t n)
const bool is_agg_
void coalesce_nodes(std::vector< std::shared_ptr< RelAlgNode >> &nodes, const std::vector< const RelAlgNode * > &left_deep_joins)
const RexScalar * visitCase(const RexCase *rex_case) const final
std::string toString() const override
static void resetRelAlgFirstId() noexcept
std::string json_node_to_string(const rapidjson::Value &node) noexcept