OmniSciDB  94e8789169
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
RelAlgDagBuilder.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2017 MapD Technologies, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "RelAlgDagBuilder.h"
19 #include "Catalog/Catalog.h"
21 #include "JsonAccessors.h"
22 #include "RelAlgOptimizer.h"
23 #include "RelLeftDeepInnerJoin.h"
25 #include "RexVisitor.h"
26 #include "Shared/sqldefs.h"
27 
28 #include <rapidjson/error/en.h>
29 #include <rapidjson/error/error.h>
30 #include <rapidjson/stringbuffer.h>
31 #include <rapidjson/writer.h>
32 
33 #include <string>
34 #include <unordered_set>
35 
36 extern bool g_cluster;
37 extern bool g_enable_union;
38 
39 namespace {
40 
41 const unsigned FIRST_RA_NODE_ID = 1;
42 
43 } // namespace
44 
45 thread_local unsigned RelAlgNode::crt_id_ = FIRST_RA_NODE_ID;
46 
49 }
50 
52  const std::shared_ptr<const ExecutionResult> result) {
53  auto row_set = result->getRows();
54  CHECK(row_set);
55  CHECK_EQ(size_t(1), row_set->colCount());
56  *(type_.get()) = row_set->getColType(0);
57  (*(result_.get())) = result;
58 }
59 
60 std::unique_ptr<RexSubQuery> RexSubQuery::deepCopy() const {
61  return std::make_unique<RexSubQuery>(type_, result_, ra_->deepCopy());
62 }
63 
64 unsigned RexSubQuery::getId() const {
65  return ra_->getId();
66 }
67 
68 namespace {
69 
70 class RexRebindInputsVisitor : public RexVisitor<void*> {
71  public:
72  RexRebindInputsVisitor(const RelAlgNode* old_input, const RelAlgNode* new_input)
73  : old_input_(old_input), new_input_(new_input) {}
74 
75  virtual ~RexRebindInputsVisitor() = default;
76 
77  void* visitInput(const RexInput* rex_input) const override {
78  const auto old_source = rex_input->getSourceNode();
79  if (old_source == old_input_) {
80  const auto left_deep_join = dynamic_cast<const RelLeftDeepInnerJoin*>(new_input_);
81  if (left_deep_join) {
82  rebind_inputs_from_left_deep_join(rex_input, left_deep_join);
83  return nullptr;
84  }
85  rex_input->setSourceNode(new_input_);
86  }
87  return nullptr;
88  };
89 
90  private:
91  const RelAlgNode* old_input_;
93 };
94 
95 // Creates an output with n columns.
96 std::vector<RexInput> n_outputs(const RelAlgNode* node, const size_t n) {
97  std::vector<RexInput> outputs;
98  outputs.reserve(n);
99  for (size_t i = 0; i < n; ++i) {
100  outputs.emplace_back(node, i);
101  }
102  return outputs;
103 }
104 
106  public:
108  const RelAlgNode* old_input,
109  const RelAlgNode* new_input,
110  std::unordered_map<unsigned, unsigned> old_to_new_index_map)
111  : RexRebindInputsVisitor(old_input, new_input), mapping_(old_to_new_index_map) {}
112 
113  void* visitInput(const RexInput* rex_input) const override {
114  RexRebindInputsVisitor::visitInput(rex_input);
115  auto mapping_itr = mapping_.find(rex_input->getIndex());
116  CHECK(mapping_itr != mapping_.end());
117  rex_input->setIndex(mapping_itr->second);
118  return nullptr;
119  }
120 
121  private:
122  const std::unordered_map<unsigned, unsigned> mapping_;
123 };
124 
125 } // namespace
126 
128  std::shared_ptr<const RelAlgNode> old_input,
129  std::shared_ptr<const RelAlgNode> input,
130  std::optional<std::unordered_map<unsigned, unsigned>> old_to_new_index_map) {
131  RelAlgNode::replaceInput(old_input, input);
132  std::unique_ptr<RexRebindInputsVisitor> rebind_inputs;
133  if (old_to_new_index_map) {
134  rebind_inputs = std::make_unique<RexRebindReindexInputsVisitor>(
135  old_input.get(), input.get(), *old_to_new_index_map);
136  } else {
137  rebind_inputs =
138  std::make_unique<RexRebindInputsVisitor>(old_input.get(), input.get());
139  }
140  CHECK(rebind_inputs);
141  for (const auto& scalar_expr : scalar_exprs_) {
142  rebind_inputs->visit(scalar_expr.get());
143  }
144 }
145 
146 void RelProject::appendInput(std::string new_field_name,
147  std::unique_ptr<const RexScalar> new_input) {
148  fields_.emplace_back(std::move(new_field_name));
149  scalar_exprs_.emplace_back(std::move(new_input));
150 }
151 
153  const auto scan_node = dynamic_cast<const RelScan*>(ra_node);
154  if (scan_node) {
155  // Scan node has no inputs, output contains all columns in the table.
156  CHECK_EQ(size_t(0), scan_node->inputCount());
157  return n_outputs(scan_node, scan_node->size());
158  }
159  const auto project_node = dynamic_cast<const RelProject*>(ra_node);
160  if (project_node) {
161  // Project output count doesn't depend on the input
162  CHECK_EQ(size_t(1), project_node->inputCount());
163  return n_outputs(project_node, project_node->size());
164  }
165  const auto filter_node = dynamic_cast<const RelFilter*>(ra_node);
166  if (filter_node) {
167  // Filter preserves shape
168  CHECK_EQ(size_t(1), filter_node->inputCount());
169  const auto prev_out = get_node_output(filter_node->getInput(0));
170  return n_outputs(filter_node, prev_out.size());
171  }
172  const auto aggregate_node = dynamic_cast<const RelAggregate*>(ra_node);
173  if (aggregate_node) {
174  // Aggregate output count doesn't depend on the input
175  CHECK_EQ(size_t(1), aggregate_node->inputCount());
176  return n_outputs(aggregate_node, aggregate_node->size());
177  }
178  const auto compound_node = dynamic_cast<const RelCompound*>(ra_node);
179  if (compound_node) {
180  // Compound output count doesn't depend on the input
181  CHECK_EQ(size_t(1), compound_node->inputCount());
182  return n_outputs(compound_node, compound_node->size());
183  }
184  const auto join_node = dynamic_cast<const RelJoin*>(ra_node);
185  if (join_node) {
186  // Join concatenates the outputs from the inputs and the output
187  // directly references the nodes in the input.
188  CHECK_EQ(size_t(2), join_node->inputCount());
189  auto lhs_out =
190  n_outputs(join_node->getInput(0), get_node_output(join_node->getInput(0)).size());
191  const auto rhs_out =
192  n_outputs(join_node->getInput(1), get_node_output(join_node->getInput(1)).size());
193  lhs_out.insert(lhs_out.end(), rhs_out.begin(), rhs_out.end());
194  return lhs_out;
195  }
196  const auto table_func_node = dynamic_cast<const RelTableFunction*>(ra_node);
197  if (table_func_node) {
198  // Table Function output count doesn't depend on the input
199  return n_outputs(table_func_node, table_func_node->size());
200  }
201  const auto sort_node = dynamic_cast<const RelSort*>(ra_node);
202  if (sort_node) {
203  // Sort preserves shape
204  CHECK_EQ(size_t(1), sort_node->inputCount());
205  const auto prev_out = get_node_output(sort_node->getInput(0));
206  return n_outputs(sort_node, prev_out.size());
207  }
208  const auto logical_values_node = dynamic_cast<const RelLogicalValues*>(ra_node);
209  if (logical_values_node) {
210  CHECK_EQ(size_t(0), logical_values_node->inputCount());
211  return n_outputs(logical_values_node, logical_values_node->size());
212  }
213  const auto logical_union_node = dynamic_cast<const RelLogicalUnion*>(ra_node);
214  if (logical_union_node) {
215  return n_outputs(logical_union_node, logical_union_node->size());
216  }
217  LOG(FATAL) << "Unhandled ra_node type: " << ra_node->toString();
218  return {};
219 }
220 
222  if (!isSimple()) {
223  return false;
224  }
225  CHECK_EQ(size_t(1), inputCount());
226  const auto source = getInput(0);
227  if (dynamic_cast<const RelJoin*>(source)) {
228  return false;
229  }
230  const auto source_shape = get_node_output(source);
231  if (source_shape.size() != scalar_exprs_.size()) {
232  return false;
233  }
234  for (size_t i = 0; i < scalar_exprs_.size(); ++i) {
235  const auto& scalar_expr = scalar_exprs_[i];
236  const auto input = dynamic_cast<const RexInput*>(scalar_expr.get());
237  CHECK(input);
238  CHECK_EQ(source, input->getSourceNode());
239  // We should add the additional check that input->getIndex() !=
240  // source_shape[i].getIndex(), but Calcite doesn't generate the right
241  // Sort-Project-Sort sequence when joins are involved.
242  if (input->getSourceNode() != source_shape[i].getSourceNode()) {
243  return false;
244  }
245  }
246  return true;
247 }
248 
249 namespace {
250 
251 bool isRenamedInput(const RelAlgNode* node,
252  const size_t index,
253  const std::string& new_name) {
254  CHECK_LT(index, node->size());
255  if (auto join = dynamic_cast<const RelJoin*>(node)) {
256  CHECK_EQ(size_t(2), join->inputCount());
257  const auto lhs_size = join->getInput(0)->size();
258  if (index < lhs_size) {
259  return isRenamedInput(join->getInput(0), index, new_name);
260  }
261  CHECK_GE(index, lhs_size);
262  return isRenamedInput(join->getInput(1), index - lhs_size, new_name);
263  }
264 
265  if (auto scan = dynamic_cast<const RelScan*>(node)) {
266  return new_name != scan->getFieldName(index);
267  }
268 
269  if (auto aggregate = dynamic_cast<const RelAggregate*>(node)) {
270  return new_name != aggregate->getFieldName(index);
271  }
272 
273  if (auto project = dynamic_cast<const RelProject*>(node)) {
274  return new_name != project->getFieldName(index);
275  }
276 
277  if (auto table_func = dynamic_cast<const RelTableFunction*>(node)) {
278  return new_name != table_func->getFieldName(index);
279  }
280 
281  if (auto logical_values = dynamic_cast<const RelLogicalValues*>(node)) {
282  const auto& tuple_type = logical_values->getTupleType();
283  CHECK_LT(index, tuple_type.size());
284  return new_name != tuple_type[index].get_resname();
285  }
286 
287  CHECK(dynamic_cast<const RelSort*>(node) || dynamic_cast<const RelFilter*>(node) ||
288  dynamic_cast<const RelLogicalUnion*>(node));
289  return isRenamedInput(node->getInput(0), index, new_name);
290 }
291 
292 } // namespace
293 
295  if (!isSimple()) {
296  return false;
297  }
298  CHECK_EQ(scalar_exprs_.size(), fields_.size());
299  for (size_t i = 0; i < fields_.size(); ++i) {
300  auto rex_in = dynamic_cast<const RexInput*>(scalar_exprs_[i].get());
301  CHECK(rex_in);
302  if (isRenamedInput(rex_in->getSourceNode(), rex_in->getIndex(), fields_[i])) {
303  return true;
304  }
305  }
306  return false;
307 }
308 
309 void RelJoin::replaceInput(std::shared_ptr<const RelAlgNode> old_input,
310  std::shared_ptr<const RelAlgNode> input) {
311  RelAlgNode::replaceInput(old_input, input);
312  RexRebindInputsVisitor rebind_inputs(old_input.get(), input.get());
313  if (condition_) {
314  rebind_inputs.visit(condition_.get());
315  }
316 }
317 
318 void RelFilter::replaceInput(std::shared_ptr<const RelAlgNode> old_input,
319  std::shared_ptr<const RelAlgNode> input) {
320  RelAlgNode::replaceInput(old_input, input);
321  RexRebindInputsVisitor rebind_inputs(old_input.get(), input.get());
322  rebind_inputs.visit(filter_.get());
323 }
324 
325 void RelCompound::replaceInput(std::shared_ptr<const RelAlgNode> old_input,
326  std::shared_ptr<const RelAlgNode> input) {
327  RelAlgNode::replaceInput(old_input, input);
328  RexRebindInputsVisitor rebind_inputs(old_input.get(), input.get());
329  for (const auto& scalar_source : scalar_sources_) {
330  rebind_inputs.visit(scalar_source.get());
331  }
332  if (filter_expr_) {
333  rebind_inputs.visit(filter_expr_.get());
334  }
335 }
336 
338  : RelAlgNode(rhs)
340  , fields_(rhs.fields_)
341  , hint_applied_(false)
342  , hints_(std::make_unique<Hints>()) {
343  RexDeepCopyVisitor copier;
344  for (auto const& expr : rhs.scalar_exprs_) {
345  scalar_exprs_.push_back(copier.visit(expr.get()));
346  }
347  if (rhs.hint_applied_) {
348  for (auto const& kv : *rhs.hints_) {
349  addHint(kv.second);
350  }
351  }
352 }
353 
355  : RelAlgNode(rhs)
356  , tuple_type_(rhs.tuple_type_)
357  , values_(RexDeepCopyVisitor::copy(rhs.values_)) {}
358 
360  RexDeepCopyVisitor copier;
361  filter_ = copier.visit(rhs.filter_.get());
362 }
363 
365  : RelAlgNode(rhs)
366  , groupby_count_(rhs.groupby_count_)
367  , fields_(rhs.fields_)
368  , hint_applied_(false)
369  , hints_(std::make_unique<Hints>()) {
370  agg_exprs_.reserve(rhs.agg_exprs_.size());
371  for (auto const& agg : rhs.agg_exprs_) {
372  agg_exprs_.push_back(agg->deepCopy());
373  }
374  if (rhs.hint_applied_) {
375  for (auto const& kv : *rhs.hints_) {
376  addHint(kv.second);
377  }
378  }
379 }
380 
382  : RelAlgNode(rhs)
383  , join_type_(rhs.join_type_)
384  , hint_applied_(false)
385  , hints_(std::make_unique<Hints>()) {
386  RexDeepCopyVisitor copier;
387  condition_ = copier.visit(rhs.condition_.get());
388  if (rhs.hint_applied_) {
389  for (auto const& kv : *rhs.hints_) {
390  addHint(kv.second);
391  }
392  }
393 }
394 
395 namespace {
396 
397 std::vector<std::unique_ptr<const RexAgg>> copyAggExprs(
398  std::vector<std::unique_ptr<const RexAgg>> const& agg_exprs) {
399  std::vector<std::unique_ptr<const RexAgg>> agg_exprs_copy;
400  agg_exprs_copy.reserve(agg_exprs.size());
401  for (auto const& agg_expr : agg_exprs) {
402  agg_exprs_copy.push_back(agg_expr->deepCopy());
403  }
404  return agg_exprs_copy;
405 }
406 
407 std::vector<std::unique_ptr<const RexScalar>> copyRexScalars(
408  std::vector<std::unique_ptr<const RexScalar>> const& scalar_sources) {
409  std::vector<std::unique_ptr<const RexScalar>> scalar_sources_copy;
410  scalar_sources_copy.reserve(scalar_sources.size());
411  RexDeepCopyVisitor copier;
412  for (auto const& scalar_source : scalar_sources) {
413  scalar_sources_copy.push_back(copier.visit(scalar_source.get()));
414  }
415  return scalar_sources_copy;
416 }
417 
418 std::vector<const Rex*> remapTargetPointers(
419  std::vector<std::unique_ptr<const RexAgg>> const& agg_exprs_new,
420  std::vector<std::unique_ptr<const RexScalar>> const& scalar_sources_new,
421  std::vector<std::unique_ptr<const RexAgg>> const& agg_exprs_old,
422  std::vector<std::unique_ptr<const RexScalar>> const& scalar_sources_old,
423  std::vector<const Rex*> const& target_exprs_old) {
424  std::vector<const Rex*> target_exprs(target_exprs_old);
425  std::unordered_map<const Rex*, const Rex*> old_to_new_target(target_exprs.size());
426  for (size_t i = 0; i < agg_exprs_new.size(); ++i) {
427  old_to_new_target.emplace(agg_exprs_old[i].get(), agg_exprs_new[i].get());
428  }
429  for (size_t i = 0; i < scalar_sources_new.size(); ++i) {
430  old_to_new_target.emplace(scalar_sources_old[i].get(), scalar_sources_new[i].get());
431  }
432  for (auto& target : target_exprs) {
433  auto target_it = old_to_new_target.find(target);
434  CHECK(target_it != old_to_new_target.end());
435  target = target_it->second;
436  }
437  return target_exprs;
438 }
439 
440 } // namespace
441 
443  : RelAlgNode(rhs)
445  , groupby_count_(rhs.groupby_count_)
446  , agg_exprs_(copyAggExprs(rhs.agg_exprs_))
447  , fields_(rhs.fields_)
448  , is_agg_(rhs.is_agg_)
449  , scalar_sources_(copyRexScalars(rhs.scalar_sources_))
450  , target_exprs_(remapTargetPointers(agg_exprs_,
451  scalar_sources_,
452  rhs.agg_exprs_,
453  rhs.scalar_sources_,
454  rhs.target_exprs_))
455  , hint_applied_(false)
456  , hints_(std::make_unique<Hints>()) {
457  RexDeepCopyVisitor copier;
458  filter_expr_ = rhs.filter_expr_ ? copier.visit(rhs.filter_expr_.get()) : nullptr;
459  if (rhs.hint_applied_) {
460  for (auto const& kv : *rhs.hints_) {
461  addHint(kv.second);
462  }
463  }
464 }
465 
466 void RelTableFunction::replaceInput(std::shared_ptr<const RelAlgNode> old_input,
467  std::shared_ptr<const RelAlgNode> input) {
468  RelAlgNode::replaceInput(old_input, input);
469  RexRebindInputsVisitor rebind_inputs(old_input.get(), input.get());
470  for (const auto& target_expr : target_exprs_) {
471  rebind_inputs.visit(target_expr.get());
472  }
473  for (const auto& func_input : table_func_inputs_) {
474  rebind_inputs.visit(func_input.get());
475  }
476 }
477 
479  : RelAlgNode(rhs)
480  , function_name_(rhs.function_name_)
481  , fields_(rhs.fields_)
482  , col_inputs_(rhs.col_inputs_)
483  , table_func_inputs_(copyRexScalars(rhs.table_func_inputs_))
484  , target_exprs_(copyRexScalars(rhs.target_exprs_)) {
485  std::unordered_map<const Rex*, const Rex*> old_to_new_input;
486  for (size_t i = 0; i < table_func_inputs_.size(); ++i) {
487  old_to_new_input.emplace(rhs.table_func_inputs_[i].get(),
488  table_func_inputs_[i].get());
489  }
490  for (auto& target : col_inputs_) {
491  auto target_it = old_to_new_input.find(target);
492  CHECK(target_it != old_to_new_input.end());
493  target = target_it->second;
494  }
495 }
496 
497 namespace std {
498 template <>
499 struct hash<std::pair<const RelAlgNode*, int>> {
500  size_t operator()(const std::pair<const RelAlgNode*, int>& input_col) const {
501  auto ptr_val = reinterpret_cast<const int64_t*>(&input_col.first);
502  return static_cast<int64_t>(*ptr_val) ^ input_col.second;
503  }
504 };
505 } // namespace std
506 
507 namespace {
508 
509 std::set<std::pair<const RelAlgNode*, int>> get_equiv_cols(const RelAlgNode* node,
510  const size_t which_col) {
511  std::set<std::pair<const RelAlgNode*, int>> work_set;
512  auto walker = node;
513  auto curr_col = which_col;
514  while (true) {
515  work_set.insert(std::make_pair(walker, curr_col));
516  if (dynamic_cast<const RelScan*>(walker) || dynamic_cast<const RelJoin*>(walker)) {
517  break;
518  }
519  CHECK_EQ(size_t(1), walker->inputCount());
520  auto only_source = walker->getInput(0);
521  if (auto project = dynamic_cast<const RelProject*>(walker)) {
522  if (auto input = dynamic_cast<const RexInput*>(project->getProjectAt(curr_col))) {
523  const auto join_source = dynamic_cast<const RelJoin*>(only_source);
524  if (join_source) {
525  CHECK_EQ(size_t(2), join_source->inputCount());
526  auto lhs = join_source->getInput(0);
527  CHECK((input->getIndex() < lhs->size() && lhs == input->getSourceNode()) ||
528  join_source->getInput(1) == input->getSourceNode());
529  } else {
530  CHECK_EQ(input->getSourceNode(), only_source);
531  }
532  curr_col = input->getIndex();
533  } else {
534  break;
535  }
536  } else if (auto aggregate = dynamic_cast<const RelAggregate*>(walker)) {
537  if (curr_col >= aggregate->getGroupByCount()) {
538  break;
539  }
540  }
541  walker = only_source;
542  }
543  return work_set;
544 }
545 
546 } // namespace
547 
548 bool RelSort::hasEquivCollationOf(const RelSort& that) const {
549  if (collation_.size() != that.collation_.size()) {
550  return false;
551  }
552 
553  for (size_t i = 0, e = collation_.size(); i < e; ++i) {
554  auto this_sort_key = collation_[i];
555  auto that_sort_key = that.collation_[i];
556  if (this_sort_key.getSortDir() != that_sort_key.getSortDir()) {
557  return false;
558  }
559  if (this_sort_key.getNullsPosition() != that_sort_key.getNullsPosition()) {
560  return false;
561  }
562  auto this_equiv_keys = get_equiv_cols(this, this_sort_key.getField());
563  auto that_equiv_keys = get_equiv_cols(&that, that_sort_key.getField());
564  std::vector<std::pair<const RelAlgNode*, int>> intersect;
565  std::set_intersection(this_equiv_keys.begin(),
566  this_equiv_keys.end(),
567  that_equiv_keys.begin(),
568  that_equiv_keys.end(),
569  std::back_inserter(intersect));
570  if (intersect.empty()) {
571  return false;
572  }
573  }
574  return true;
575 }
576 
577 // class RelLogicalUnion methods
578 
580  : RelAlgNode(std::move(inputs)), is_all_(is_all) {
581  if (!g_enable_union) {
582  throw QueryNotSupported(
583  "UNION is not supported yet. There is an experimental enable-union option "
584  "available to enable UNION ALL queries.");
585  }
586  CHECK_EQ(2u, inputs_.size());
587  if (!is_all_) {
588  throw QueryNotSupported("UNION without ALL is not supported yet.");
589  }
590 }
591 
592 size_t RelLogicalUnion::size() const {
593  return inputs_.front()->size();
594 }
595 
596 std::string RelLogicalUnion::toString() const {
597  return cat(::typeName(this), "(is_all(", is_all_, "))");
598 }
599 
600 std::string RelLogicalUnion::getFieldName(const size_t i) const {
601  if (auto const* input = dynamic_cast<RelCompound const*>(inputs_[0].get())) {
602  return input->getFieldName(i);
603  } else if (auto const* input = dynamic_cast<RelProject const*>(inputs_[0].get())) {
604  return input->getFieldName(i);
605  } else if (auto const* input = dynamic_cast<RelLogicalUnion const*>(inputs_[0].get())) {
606  return input->getFieldName(i);
607  } else if (auto const* input = dynamic_cast<RelAggregate const*>(inputs_[0].get())) {
608  return input->getFieldName(i);
609  } else if (auto const* input = dynamic_cast<RelScan const*>(inputs_[0].get())) {
610  return input->getFieldName(i);
611  } else if (auto const* input =
612  dynamic_cast<RelTableFunction const*>(inputs_[0].get())) {
613  return input->getFieldName(i);
614  }
615  UNREACHABLE() << "Unhandled input type: " << inputs_.front()->toString();
616  return {};
617 }
618 
620  std::vector<TargetMetaInfo> const& tmis0 = inputs_[0]->getOutputMetainfo();
621  std::vector<TargetMetaInfo> const& tmis1 = inputs_[1]->getOutputMetainfo();
622  if (tmis0.size() != tmis1.size()) {
623  VLOG(2) << "tmis0.size() = " << tmis0.size() << " != " << tmis1.size()
624  << " = tmis1.size()";
625  throw std::runtime_error("Subqueries of a UNION must have matching data types.");
626  }
627  for (size_t i = 0; i < tmis0.size(); ++i) {
628  if (tmis0[i].get_type_info() != tmis1[i].get_type_info()) {
629  SQLTypeInfo const& ti0 = tmis0[i].get_type_info();
630  SQLTypeInfo const& ti1 = tmis1[i].get_type_info();
631  VLOG(2) << "Types do not match for UNION:\n tmis0[" << i
632  << "].get_type_info().to_string() = " << ti0.to_string() << "\n tmis1["
633  << i << "].get_type_info().to_string() = " << ti1.to_string();
634  if (ti0.is_dict_encoded_string() && ti1.is_dict_encoded_string() &&
635  ti0.get_comp_param() != ti1.get_comp_param()) {
636  throw std::runtime_error(
637  "Taking the UNION of different text-encoded dictionaries is not yet "
638  "supported. This may be resolved by using shared dictionaries. For example, "
639  "by making one a shared dictionary reference to the other.");
640  } else {
641  throw std::runtime_error(
642  "Subqueries of a UNION must have the exact same data types.");
643  }
644  }
645  }
646 }
647 
648 // Rest of code requires a raw pointer, but RexInput object needs to live somewhere.
650  size_t input_idx) const {
651  if (auto const* rex_input_ptr = dynamic_cast<RexInput const*>(rex_scalar)) {
652  RexInput rex_input(*rex_input_ptr);
653  rex_input.setSourceNode(getInput(input_idx));
654  scalar_exprs_.emplace_back(std::make_shared<RexInput const>(std::move(rex_input)));
655  return scalar_exprs_.back().get();
656  }
657  return rex_scalar;
658 }
659 
660 namespace {
661 
662 unsigned node_id(const rapidjson::Value& ra_node) noexcept {
663  const auto& id = field(ra_node, "id");
664  return std::stoi(json_str(id));
665 }
666 
667 std::string json_node_to_string(const rapidjson::Value& node) noexcept {
668  rapidjson::StringBuffer buffer;
669  rapidjson::Writer<rapidjson::StringBuffer> writer(buffer);
670  node.Accept(writer);
671  return buffer.GetString();
672 }
673 
674 // The parse_* functions below de-serialize expressions as they come from Calcite.
675 // RelAlgDagBuilder will take care of making the representation easy to
676 // navigate for lower layers, for example by replacing RexAbstractInput with RexInput.
677 
678 std::unique_ptr<RexAbstractInput> parse_abstract_input(
679  const rapidjson::Value& expr) noexcept {
680  const auto& input = field(expr, "input");
681  return std::unique_ptr<RexAbstractInput>(new RexAbstractInput(json_i64(input)));
682 }
683 
684 std::unique_ptr<RexLiteral> parse_literal(const rapidjson::Value& expr) {
685  CHECK(expr.IsObject());
686  const auto& literal = field(expr, "literal");
687  const auto type = to_sql_type(json_str(field(expr, "type")));
688  const auto target_type = to_sql_type(json_str(field(expr, "target_type")));
689  const auto scale = json_i64(field(expr, "scale"));
690  const auto precision = json_i64(field(expr, "precision"));
691  const auto type_scale = json_i64(field(expr, "type_scale"));
692  const auto type_precision = json_i64(field(expr, "type_precision"));
693  if (literal.IsNull()) {
694  return std::unique_ptr<RexLiteral>(new RexLiteral(target_type));
695  }
696  switch (type) {
697  case kINT:
698  case kBIGINT:
699  case kDECIMAL:
700  case kINTERVAL_DAY_TIME:
702  case kTIME:
703  case kTIMESTAMP:
704  case kDATE:
705  return std::unique_ptr<RexLiteral>(new RexLiteral(json_i64(literal),
706  type,
707  target_type,
708  scale,
709  precision,
710  type_scale,
711  type_precision));
712  case kDOUBLE: {
713  if (literal.IsDouble()) {
714  return std::unique_ptr<RexLiteral>(new RexLiteral(json_double(literal),
715  type,
716  target_type,
717  scale,
718  precision,
719  type_scale,
720  type_precision));
721  }
722  CHECK(literal.IsInt64());
723  return std::unique_ptr<RexLiteral>(
724  new RexLiteral(static_cast<double>(json_i64(literal)),
725  type,
726  target_type,
727  scale,
728  precision,
729  type_scale,
730  type_precision));
731  }
732  case kTEXT:
733  return std::unique_ptr<RexLiteral>(new RexLiteral(json_str(literal),
734  type,
735  target_type,
736  scale,
737  precision,
738  type_scale,
739  type_precision));
740  case kBOOLEAN:
741  return std::unique_ptr<RexLiteral>(new RexLiteral(json_bool(literal),
742  type,
743  target_type,
744  scale,
745  precision,
746  type_scale,
747  type_precision));
748  case kNULLT:
749  return std::unique_ptr<RexLiteral>(new RexLiteral(target_type));
750  default:
751  CHECK(false);
752  }
753  CHECK(false);
754  return nullptr;
755 }
756 
757 std::unique_ptr<const RexScalar> parse_scalar_expr(const rapidjson::Value& expr,
759  RelAlgDagBuilder& root_dag_builder);
760 
761 SQLTypeInfo parse_type(const rapidjson::Value& type_obj) {
762  if (type_obj.IsArray()) {
763  throw QueryNotSupported("Composite types are not currently supported.");
764  }
765  CHECK(type_obj.IsObject() && type_obj.MemberCount() >= 2)
766  << json_node_to_string(type_obj);
767  const auto type = to_sql_type(json_str(field(type_obj, "type")));
768  const auto nullable = json_bool(field(type_obj, "nullable"));
769  const auto precision_it = type_obj.FindMember("precision");
770  const int precision =
771  precision_it != type_obj.MemberEnd() ? json_i64(precision_it->value) : 0;
772  const auto scale_it = type_obj.FindMember("scale");
773  const int scale = scale_it != type_obj.MemberEnd() ? json_i64(scale_it->value) : 0;
774  SQLTypeInfo ti(type, !nullable);
775  ti.set_precision(precision);
776  ti.set_scale(scale);
777  return ti;
778 }
779 
780 std::vector<std::unique_ptr<const RexScalar>> parse_expr_array(
781  const rapidjson::Value& arr,
783  RelAlgDagBuilder& root_dag_builder) {
784  std::vector<std::unique_ptr<const RexScalar>> exprs;
785  for (auto it = arr.Begin(); it != arr.End(); ++it) {
786  exprs.emplace_back(parse_scalar_expr(*it, cat, root_dag_builder));
787  }
788  return exprs;
789 }
790 
792  if (name == "ROW_NUMBER") {
794  }
795  if (name == "RANK") {
797  }
798  if (name == "DENSE_RANK") {
800  }
801  if (name == "PERCENT_RANK") {
803  }
804  if (name == "CUME_DIST") {
806  }
807  if (name == "NTILE") {
809  }
810  if (name == "LAG") {
812  }
813  if (name == "LEAD") {
815  }
816  if (name == "FIRST_VALUE") {
818  }
819  if (name == "LAST_VALUE") {
821  }
822  if (name == "AVG") {
824  }
825  if (name == "MIN") {
827  }
828  if (name == "MAX") {
830  }
831  if (name == "SUM") {
833  }
834  if (name == "COUNT") {
836  }
837  if (name == "$SUM0") {
839  }
840  throw std::runtime_error("Unsupported window function: " + name);
841 }
842 
843 std::vector<std::unique_ptr<const RexScalar>> parse_window_order_exprs(
844  const rapidjson::Value& arr,
846  RelAlgDagBuilder& root_dag_builder) {
847  std::vector<std::unique_ptr<const RexScalar>> exprs;
848  for (auto it = arr.Begin(); it != arr.End(); ++it) {
849  exprs.emplace_back(parse_scalar_expr(field(*it, "field"), cat, root_dag_builder));
850  }
851  return exprs;
852 }
853 
854 SortDirection parse_sort_direction(const rapidjson::Value& collation) {
855  return json_str(field(collation, "direction")) == std::string("DESCENDING")
858 }
859 
860 NullSortedPosition parse_nulls_position(const rapidjson::Value& collation) {
861  return json_str(field(collation, "nulls")) == std::string("FIRST")
864 }
865 
866 std::vector<SortField> parse_window_order_collation(const rapidjson::Value& arr,
868  RelAlgDagBuilder& root_dag_builder) {
869  std::vector<SortField> collation;
870  size_t field_idx = 0;
871  for (auto it = arr.Begin(); it != arr.End(); ++it, ++field_idx) {
872  const auto sort_dir = parse_sort_direction(*it);
873  const auto null_pos = parse_nulls_position(*it);
874  collation.emplace_back(field_idx, sort_dir, null_pos);
875  }
876  return collation;
877 }
878 
880  const rapidjson::Value& window_bound_obj,
882  RelAlgDagBuilder& root_dag_builder) {
883  CHECK(window_bound_obj.IsObject());
885  window_bound.unbounded = json_bool(field(window_bound_obj, "unbounded"));
886  window_bound.preceding = json_bool(field(window_bound_obj, "preceding"));
887  window_bound.following = json_bool(field(window_bound_obj, "following"));
888  window_bound.is_current_row = json_bool(field(window_bound_obj, "is_current_row"));
889  const auto& offset_field = field(window_bound_obj, "offset");
890  if (offset_field.IsObject()) {
891  window_bound.offset = parse_scalar_expr(offset_field, cat, root_dag_builder);
892  } else {
893  CHECK(offset_field.IsNull());
894  }
895  window_bound.order_key = json_i64(field(window_bound_obj, "order_key"));
896  return window_bound;
897 }
898 
899 std::unique_ptr<const RexSubQuery> parse_subquery(const rapidjson::Value& expr,
901  RelAlgDagBuilder& root_dag_builder) {
902  const auto& operands = field(expr, "operands");
903  CHECK(operands.IsArray());
904  CHECK_GE(operands.Size(), unsigned(0));
905  const auto& subquery_ast = field(expr, "subquery");
906 
907  RelAlgDagBuilder subquery_dag(root_dag_builder, subquery_ast, cat, nullptr);
908  auto subquery = std::make_shared<RexSubQuery>(subquery_dag.getRootNodeShPtr());
909  root_dag_builder.registerSubquery(subquery);
910  return subquery->deepCopy();
911 }
912 
913 std::unique_ptr<RexOperator> parse_operator(const rapidjson::Value& expr,
915  RelAlgDagBuilder& root_dag_builder) {
916  const auto op_name = json_str(field(expr, "op"));
917  const bool is_quantifier =
918  op_name == std::string("PG_ANY") || op_name == std::string("PG_ALL");
919  const auto op = is_quantifier ? kFUNCTION : to_sql_op(op_name);
920  const auto& operators_json_arr = field(expr, "operands");
921  CHECK(operators_json_arr.IsArray());
922  auto operands = parse_expr_array(operators_json_arr, cat, root_dag_builder);
923  const auto type_it = expr.FindMember("type");
924  CHECK(type_it != expr.MemberEnd());
925  auto ti = parse_type(type_it->value);
926  if (op == kIN && expr.HasMember("subquery")) {
927  auto subquery = parse_subquery(expr, cat, root_dag_builder);
928  operands.emplace_back(std::move(subquery));
929  }
930  if (expr.FindMember("partition_keys") != expr.MemberEnd()) {
931  const auto& partition_keys_arr = field(expr, "partition_keys");
932  auto partition_keys = parse_expr_array(partition_keys_arr, cat, root_dag_builder);
933  const auto& order_keys_arr = field(expr, "order_keys");
934  auto order_keys = parse_window_order_exprs(order_keys_arr, cat, root_dag_builder);
935  const auto collation =
936  parse_window_order_collation(order_keys_arr, cat, root_dag_builder);
937  const auto kind = parse_window_function_kind(op_name);
938  const auto lower_bound =
939  parse_window_bound(field(expr, "lower_bound"), cat, root_dag_builder);
940  const auto upper_bound =
941  parse_window_bound(field(expr, "upper_bound"), cat, root_dag_builder);
942  bool is_rows = json_bool(field(expr, "is_rows"));
943  ti.set_notnull(false);
944  return std::make_unique<RexWindowFunctionOperator>(kind,
945  operands,
946  partition_keys,
947  order_keys,
948  collation,
949  lower_bound,
950  upper_bound,
951  is_rows,
952  ti);
953  }
954  return std::unique_ptr<RexOperator>(op == kFUNCTION
955  ? new RexFunctionOperator(op_name, operands, ti)
956  : new RexOperator(op, operands, ti));
957 }
958 
959 std::unique_ptr<RexCase> parse_case(const rapidjson::Value& expr,
961  RelAlgDagBuilder& root_dag_builder) {
962  const auto& operands = field(expr, "operands");
963  CHECK(operands.IsArray());
964  CHECK_GE(operands.Size(), unsigned(2));
965  std::unique_ptr<const RexScalar> else_expr;
966  std::vector<
967  std::pair<std::unique_ptr<const RexScalar>, std::unique_ptr<const RexScalar>>>
968  expr_pair_list;
969  for (auto operands_it = operands.Begin(); operands_it != operands.End();) {
970  auto when_expr = parse_scalar_expr(*operands_it++, cat, root_dag_builder);
971  if (operands_it == operands.End()) {
972  else_expr = std::move(when_expr);
973  break;
974  }
975  auto then_expr = parse_scalar_expr(*operands_it++, cat, root_dag_builder);
976  expr_pair_list.emplace_back(std::move(when_expr), std::move(then_expr));
977  }
978  return std::unique_ptr<RexCase>(new RexCase(expr_pair_list, else_expr));
979 }
980 
981 std::vector<std::string> strings_from_json_array(
982  const rapidjson::Value& json_str_arr) noexcept {
983  CHECK(json_str_arr.IsArray());
984  std::vector<std::string> fields;
985  for (auto json_str_arr_it = json_str_arr.Begin(); json_str_arr_it != json_str_arr.End();
986  ++json_str_arr_it) {
987  CHECK(json_str_arr_it->IsString());
988  fields.emplace_back(json_str_arr_it->GetString());
989  }
990  return fields;
991 }
992 
993 std::vector<size_t> indices_from_json_array(
994  const rapidjson::Value& json_idx_arr) noexcept {
995  CHECK(json_idx_arr.IsArray());
996  std::vector<size_t> indices;
997  for (auto json_idx_arr_it = json_idx_arr.Begin(); json_idx_arr_it != json_idx_arr.End();
998  ++json_idx_arr_it) {
999  CHECK(json_idx_arr_it->IsInt());
1000  CHECK_GE(json_idx_arr_it->GetInt(), 0);
1001  indices.emplace_back(json_idx_arr_it->GetInt());
1002  }
1003  return indices;
1004 }
1005 
1006 std::unique_ptr<const RexAgg> parse_aggregate_expr(const rapidjson::Value& expr) {
1007  const auto agg = to_agg_kind(json_str(field(expr, "agg")));
1008  const auto distinct = json_bool(field(expr, "distinct"));
1009  const auto agg_ti = parse_type(field(expr, "type"));
1010  const auto operands = indices_from_json_array(field(expr, "operands"));
1011  if (operands.size() > 1 && (operands.size() != 2 || agg != kAPPROX_COUNT_DISTINCT)) {
1012  throw QueryNotSupported("Multiple arguments for aggregates aren't supported");
1013  }
1014  return std::unique_ptr<const RexAgg>(new RexAgg(agg, distinct, agg_ti, operands));
1015 }
1016 
1017 std::unique_ptr<const RexScalar> parse_scalar_expr(const rapidjson::Value& expr,
1019  RelAlgDagBuilder& root_dag_builder) {
1020  CHECK(expr.IsObject());
1021  if (expr.IsObject() && expr.HasMember("input")) {
1022  return std::unique_ptr<const RexScalar>(parse_abstract_input(expr));
1023  }
1024  if (expr.IsObject() && expr.HasMember("literal")) {
1025  return std::unique_ptr<const RexScalar>(parse_literal(expr));
1026  }
1027  if (expr.IsObject() && expr.HasMember("op")) {
1028  const auto op_str = json_str(field(expr, "op"));
1029  if (op_str == std::string("CASE")) {
1030  return std::unique_ptr<const RexScalar>(parse_case(expr, cat, root_dag_builder));
1031  }
1032  if (op_str == std::string("$SCALAR_QUERY")) {
1033  return std::unique_ptr<const RexScalar>(
1034  parse_subquery(expr, cat, root_dag_builder));
1035  }
1036  return std::unique_ptr<const RexScalar>(parse_operator(expr, cat, root_dag_builder));
1037  }
1038  throw QueryNotSupported("Expression node " + json_node_to_string(expr) +
1039  " not supported");
1040 }
1041 
1042 JoinType to_join_type(const std::string& join_type_name) {
1043  if (join_type_name == "inner") {
1044  return JoinType::INNER;
1045  }
1046  if (join_type_name == "left") {
1047  return JoinType::LEFT;
1048  }
1049  throw QueryNotSupported("Join type (" + join_type_name + ") not supported");
1050 }
1051 
1052 std::unique_ptr<const RexScalar> disambiguate_rex(const RexScalar*, const RANodeOutput&);
1053 
1054 std::unique_ptr<const RexOperator> disambiguate_operator(
1055  const RexOperator* rex_operator,
1056  const RANodeOutput& ra_output) noexcept {
1057  std::vector<std::unique_ptr<const RexScalar>> disambiguated_operands;
1058  for (size_t i = 0; i < rex_operator->size(); ++i) {
1059  auto operand = rex_operator->getOperand(i);
1060  if (dynamic_cast<const RexSubQuery*>(operand)) {
1061  disambiguated_operands.emplace_back(rex_operator->getOperandAndRelease(i));
1062  } else {
1063  disambiguated_operands.emplace_back(disambiguate_rex(operand, ra_output));
1064  }
1065  }
1066  const auto rex_window_function_operator =
1067  dynamic_cast<const RexWindowFunctionOperator*>(rex_operator);
1068  if (rex_window_function_operator) {
1069  const auto& partition_keys = rex_window_function_operator->getPartitionKeys();
1070  std::vector<std::unique_ptr<const RexScalar>> disambiguated_partition_keys;
1071  for (const auto& partition_key : partition_keys) {
1072  disambiguated_partition_keys.emplace_back(
1073  disambiguate_rex(partition_key.get(), ra_output));
1074  }
1075  std::vector<std::unique_ptr<const RexScalar>> disambiguated_order_keys;
1076  const auto& order_keys = rex_window_function_operator->getOrderKeys();
1077  for (const auto& order_key : order_keys) {
1078  disambiguated_order_keys.emplace_back(disambiguate_rex(order_key.get(), ra_output));
1079  }
1080  return rex_window_function_operator->disambiguatedOperands(
1081  disambiguated_operands,
1082  disambiguated_partition_keys,
1083  disambiguated_order_keys,
1084  rex_window_function_operator->getCollation());
1085  }
1086  return rex_operator->getDisambiguated(disambiguated_operands);
1087 }
1088 
1089 std::unique_ptr<const RexCase> disambiguate_case(const RexCase* rex_case,
1090  const RANodeOutput& ra_output) {
1091  std::vector<
1092  std::pair<std::unique_ptr<const RexScalar>, std::unique_ptr<const RexScalar>>>
1093  disambiguated_expr_pair_list;
1094  for (size_t i = 0; i < rex_case->branchCount(); ++i) {
1095  auto disambiguated_when = disambiguate_rex(rex_case->getWhen(i), ra_output);
1096  auto disambiguated_then = disambiguate_rex(rex_case->getThen(i), ra_output);
1097  disambiguated_expr_pair_list.emplace_back(std::move(disambiguated_when),
1098  std::move(disambiguated_then));
1099  }
1100  std::unique_ptr<const RexScalar> disambiguated_else{
1101  disambiguate_rex(rex_case->getElse(), ra_output)};
1102  return std::unique_ptr<const RexCase>(
1103  new RexCase(disambiguated_expr_pair_list, disambiguated_else));
1104 }
1105 
1106 // The inputs used by scalar expressions are given as indices in the serialized
1107 // representation of the query. This is hard to navigate; make the relationship
1108 // explicit by creating RexInput expressions which hold a pointer to the source
1109 // relational algebra node and the index relative to the output of that node.
1110 std::unique_ptr<const RexScalar> disambiguate_rex(const RexScalar* rex_scalar,
1111  const RANodeOutput& ra_output) {
1112  const auto rex_abstract_input = dynamic_cast<const RexAbstractInput*>(rex_scalar);
1113  if (rex_abstract_input) {
1114  CHECK_LT(static_cast<size_t>(rex_abstract_input->getIndex()), ra_output.size());
1115  return std::unique_ptr<const RexInput>(
1116  new RexInput(ra_output[rex_abstract_input->getIndex()]));
1117  }
1118  const auto rex_operator = dynamic_cast<const RexOperator*>(rex_scalar);
1119  if (rex_operator) {
1120  return disambiguate_operator(rex_operator, ra_output);
1121  }
1122  const auto rex_case = dynamic_cast<const RexCase*>(rex_scalar);
1123  if (rex_case) {
1124  return disambiguate_case(rex_case, ra_output);
1125  }
1126  const auto rex_literal = dynamic_cast<const RexLiteral*>(rex_scalar);
1127  CHECK(rex_literal);
1128  return std::unique_ptr<const RexLiteral>(new RexLiteral(*rex_literal));
1129 }
1130 
1131 void bind_project_to_input(RelProject* project_node, const RANodeOutput& input) noexcept {
1132  CHECK_EQ(size_t(1), project_node->inputCount());
1133  std::vector<std::unique_ptr<const RexScalar>> disambiguated_exprs;
1134  for (size_t i = 0; i < project_node->size(); ++i) {
1135  const auto projected_expr = project_node->getProjectAt(i);
1136  if (dynamic_cast<const RexSubQuery*>(projected_expr)) {
1137  disambiguated_exprs.emplace_back(project_node->getProjectAtAndRelease(i));
1138  } else {
1139  disambiguated_exprs.emplace_back(disambiguate_rex(projected_expr, input));
1140  }
1141  }
1142  project_node->setExpressions(disambiguated_exprs);
1143 }
1144 
1146  const RANodeOutput& input) noexcept {
1147  std::vector<std::unique_ptr<const RexScalar>> disambiguated_exprs;
1148  for (size_t i = 0; i < table_func_node->getTableFuncInputsSize(); ++i) {
1149  const auto target_expr = table_func_node->getTableFuncInputAt(i);
1150  if (dynamic_cast<const RexSubQuery*>(target_expr)) {
1151  disambiguated_exprs.emplace_back(table_func_node->getTableFuncInputAtAndRelease(i));
1152  } else {
1153  disambiguated_exprs.emplace_back(disambiguate_rex(target_expr, input));
1154  }
1155  }
1156  table_func_node->setTableFuncInputs(disambiguated_exprs);
1157 }
1158 
1159 void bind_inputs(const std::vector<std::shared_ptr<RelAlgNode>>& nodes) noexcept {
1160  for (auto ra_node : nodes) {
1161  const auto filter_node = std::dynamic_pointer_cast<RelFilter>(ra_node);
1162  if (filter_node) {
1163  CHECK_EQ(size_t(1), filter_node->inputCount());
1164  auto disambiguated_condition = disambiguate_rex(
1165  filter_node->getCondition(), get_node_output(filter_node->getInput(0)));
1166  filter_node->setCondition(disambiguated_condition);
1167  continue;
1168  }
1169  const auto join_node = std::dynamic_pointer_cast<RelJoin>(ra_node);
1170  if (join_node) {
1171  CHECK_EQ(size_t(2), join_node->inputCount());
1172  auto disambiguated_condition =
1173  disambiguate_rex(join_node->getCondition(), get_node_output(join_node.get()));
1174  join_node->setCondition(disambiguated_condition);
1175  continue;
1176  }
1177  const auto project_node = std::dynamic_pointer_cast<RelProject>(ra_node);
1178  if (project_node) {
1179  bind_project_to_input(project_node.get(),
1180  get_node_output(project_node->getInput(0)));
1181  continue;
1182  }
1183  const auto table_func_node = std::dynamic_pointer_cast<RelTableFunction>(ra_node);
1184  if (table_func_node) {
1185  /*
1186  Collect all inputs from table function input (non-literal)
1187  arguments.
1188  */
1189  RANodeOutput input;
1190  input.reserve(table_func_node->inputCount());
1191  for (size_t i = 0; i < table_func_node->inputCount(); i++) {
1192  auto node_output = get_node_output(table_func_node->getInput(i));
1193  input.insert(input.end(), node_output.begin(), node_output.end());
1194  }
1195  bind_table_func_to_input(table_func_node.get(), input);
1196  }
1197  }
1198 }
1199 
1200 void handleQueryHint(const std::vector<std::shared_ptr<RelAlgNode>>& nodes,
1201  RelAlgDagBuilder* dag_builder) noexcept {
1202  Hints* hint_delivered = nullptr;
1203  for (auto node : nodes) {
1204  const auto agg_node = std::dynamic_pointer_cast<RelAggregate>(node);
1205  if (agg_node) {
1206  if (agg_node->hasDeliveredHint()) {
1207  hint_delivered = agg_node->getDeliveredHints();
1208  break;
1209  }
1210  }
1211  const auto project_node = std::dynamic_pointer_cast<RelProject>(node);
1212  if (project_node) {
1213  if (project_node->hasDeliveredHint()) {
1214  hint_delivered = project_node->getDeliveredHints();
1215  break;
1216  }
1217  }
1218  const auto scan_node = std::dynamic_pointer_cast<RelScan>(node);
1219  if (scan_node) {
1220  if (scan_node->hasDeliveredHint()) {
1221  hint_delivered = scan_node->getDeliveredHints();
1222  break;
1223  }
1224  }
1225  const auto join_node = std::dynamic_pointer_cast<RelJoin>(node);
1226  if (join_node) {
1227  if (join_node->hasDeliveredHint()) {
1228  hint_delivered = join_node->getDeliveredHints();
1229  break;
1230  }
1231  }
1232  const auto compound_node = std::dynamic_pointer_cast<RelCompound>(node);
1233  if (compound_node) {
1234  if (compound_node->hasDeliveredHint()) {
1235  hint_delivered = compound_node->getDeliveredHints();
1236  break;
1237  }
1238  }
1239  }
1240  if (hint_delivered && !hint_delivered->empty()) {
1241  dag_builder->registerQueryHints(hint_delivered);
1242  }
1243 }
1244 
1245 void mark_nops(const std::vector<std::shared_ptr<RelAlgNode>>& nodes) noexcept {
1246  for (auto node : nodes) {
1247  const auto agg_node = std::dynamic_pointer_cast<RelAggregate>(node);
1248  if (!agg_node || agg_node->getAggExprsCount()) {
1249  continue;
1250  }
1251  CHECK_EQ(size_t(1), node->inputCount());
1252  const auto agg_input_node = dynamic_cast<const RelAggregate*>(node->getInput(0));
1253  if (agg_input_node && !agg_input_node->getAggExprsCount() &&
1254  agg_node->getGroupByCount() == agg_input_node->getGroupByCount()) {
1255  agg_node->markAsNop();
1256  }
1257  }
1258 }
1259 
1260 namespace {
1261 
1262 std::vector<const Rex*> reproject_targets(
1263  const RelProject* simple_project,
1264  const std::vector<const Rex*>& target_exprs) noexcept {
1265  std::vector<const Rex*> result;
1266  for (size_t i = 0; i < simple_project->size(); ++i) {
1267  const auto input_rex = dynamic_cast<const RexInput*>(simple_project->getProjectAt(i));
1268  CHECK(input_rex);
1269  CHECK_LT(static_cast<size_t>(input_rex->getIndex()), target_exprs.size());
1270  result.push_back(target_exprs[input_rex->getIndex()]);
1271  }
1272  return result;
1273 }
1274 
1281  public:
1283  const RelAlgNode* node_to_keep,
1284  const std::vector<std::unique_ptr<const RexScalar>>& scalar_sources)
1285  : node_to_keep_(node_to_keep), scalar_sources_(scalar_sources) {}
1286 
1287  // Reproject the RexInput from its current RA Node to the RA Node we intend to keep
1288  RetType visitInput(const RexInput* input) const final {
1289  if (input->getSourceNode() == node_to_keep_) {
1290  const auto index = input->getIndex();
1291  CHECK_LT(index, scalar_sources_.size());
1292  return visit(scalar_sources_[index].get());
1293  } else {
1294  return input->deepCopy();
1295  }
1296  }
1297 
1298  private:
1300  const std::vector<std::unique_ptr<const RexScalar>>& scalar_sources_;
1301 };
1302 
1303 } // namespace
1304 
1305 void create_compound(std::vector<std::shared_ptr<RelAlgNode>>& nodes,
1306  const std::vector<size_t>& pattern) noexcept {
1307  CHECK_GE(pattern.size(), size_t(2));
1308  CHECK_LE(pattern.size(), size_t(4));
1309 
1310  std::unique_ptr<const RexScalar> filter_rex;
1311  std::vector<std::unique_ptr<const RexScalar>> scalar_sources;
1312  size_t groupby_count{0};
1313  std::vector<std::string> fields;
1314  std::vector<const RexAgg*> agg_exprs;
1315  std::vector<const Rex*> target_exprs;
1316  bool first_project{true};
1317  bool is_agg{false};
1318  RelAlgNode* last_node{nullptr};
1319 
1320  std::shared_ptr<ModifyManipulationTarget> manipulation_target;
1321 
1322  for (const auto node_idx : pattern) {
1323  const auto ra_node = nodes[node_idx];
1324  const auto ra_filter = std::dynamic_pointer_cast<RelFilter>(ra_node);
1325  if (ra_filter) {
1326  CHECK(!filter_rex);
1327  filter_rex.reset(ra_filter->getAndReleaseCondition());
1328  CHECK(filter_rex);
1329  last_node = ra_node.get();
1330  continue;
1331  }
1332  const auto ra_project = std::dynamic_pointer_cast<RelProject>(ra_node);
1333  if (ra_project) {
1334  fields = ra_project->getFields();
1335  manipulation_target = ra_project;
1336 
1337  if (first_project) {
1338  CHECK_EQ(size_t(1), ra_project->inputCount());
1339  // Rebind the input of the project to the input of the filter itself
1340  // since we know that we'll evaluate the filter on the fly, with no
1341  // intermediate buffer.
1342  const auto filter_input = dynamic_cast<const RelFilter*>(ra_project->getInput(0));
1343  if (filter_input) {
1344  CHECK_EQ(size_t(1), filter_input->inputCount());
1345  bind_project_to_input(ra_project.get(),
1346  get_node_output(filter_input->getInput(0)));
1347  }
1348  scalar_sources = ra_project->getExpressionsAndRelease();
1349  for (const auto& scalar_expr : scalar_sources) {
1350  target_exprs.push_back(scalar_expr.get());
1351  }
1352  first_project = false;
1353  } else {
1354  if (ra_project->isSimple()) {
1355  target_exprs = reproject_targets(ra_project.get(), target_exprs);
1356  } else {
1357  // TODO(adb): This is essentially a more general case of simple project, we
1358  // could likely merge the two
1359  std::vector<const Rex*> result;
1360  RexInputReplacementVisitor visitor(last_node, scalar_sources);
1361  for (size_t i = 0; i < ra_project->size(); ++i) {
1362  const auto rex = ra_project->getProjectAt(i);
1363  if (auto rex_input = dynamic_cast<const RexInput*>(rex)) {
1364  const auto index = rex_input->getIndex();
1365  CHECK_LT(index, target_exprs.size());
1366  result.push_back(target_exprs[index]);
1367  } else {
1368  scalar_sources.push_back(visitor.visit(rex));
1369  result.push_back(scalar_sources.back().get());
1370  }
1371  }
1372  target_exprs = result;
1373  }
1374  }
1375  last_node = ra_node.get();
1376  continue;
1377  }
1378  const auto ra_aggregate = std::dynamic_pointer_cast<RelAggregate>(ra_node);
1379  if (ra_aggregate) {
1380  is_agg = true;
1381  fields = ra_aggregate->getFields();
1382  agg_exprs = ra_aggregate->getAggregatesAndRelease();
1383  groupby_count = ra_aggregate->getGroupByCount();
1384  decltype(target_exprs){}.swap(target_exprs);
1385  CHECK_LE(groupby_count, scalar_sources.size());
1386  for (size_t group_idx = 0; group_idx < groupby_count; ++group_idx) {
1387  const auto rex_ref = new RexRef(group_idx + 1);
1388  target_exprs.push_back(rex_ref);
1389  scalar_sources.emplace_back(rex_ref);
1390  }
1391  for (const auto rex_agg : agg_exprs) {
1392  target_exprs.push_back(rex_agg);
1393  }
1394  last_node = ra_node.get();
1395  continue;
1396  }
1397  }
1398 
1399  auto compound_node =
1400  std::make_shared<RelCompound>(filter_rex,
1401  target_exprs,
1402  groupby_count,
1403  agg_exprs,
1404  fields,
1405  scalar_sources,
1406  is_agg,
1407  manipulation_target->isUpdateViaSelect(),
1408  manipulation_target->isDeleteViaSelect(),
1409  manipulation_target->isVarlenUpdateRequired(),
1410  manipulation_target->getModifiedTableDescriptor(),
1411  manipulation_target->getTargetColumns());
1412  auto old_node = nodes[pattern.back()];
1413  nodes[pattern.back()] = compound_node;
1414  auto first_node = nodes[pattern.front()];
1415  CHECK_EQ(size_t(1), first_node->inputCount());
1416  compound_node->addManagedInput(first_node->getAndOwnInput(0));
1417  for (size_t i = 0; i < pattern.size() - 1; ++i) {
1418  nodes[pattern[i]].reset();
1419  }
1420  for (auto node : nodes) {
1421  if (!node) {
1422  continue;
1423  }
1424  node->replaceInput(old_node, compound_node);
1425  }
1426 }
1427 
1428 class RANodeIterator : public std::vector<std::shared_ptr<RelAlgNode>>::const_iterator {
1429  using ElementType = std::shared_ptr<RelAlgNode>;
1430  using Super = std::vector<ElementType>::const_iterator;
1431  using Container = std::vector<ElementType>;
1432 
1433  public:
1434  enum class AdvancingMode { DUChain, InOrder };
1435 
1436  explicit RANodeIterator(const Container& nodes)
1437  : Super(nodes.begin()), owner_(nodes), nodeCount_([&nodes]() -> size_t {
1438  size_t non_zero_count = 0;
1439  for (const auto& node : nodes) {
1440  if (node) {
1441  ++non_zero_count;
1442  }
1443  }
1445  }()) {}
1446 
1447  explicit operator size_t() {
1448  return std::distance(owner_.begin(), *static_cast<Super*>(this));
1449  }
1450 
1451  RANodeIterator operator++() = delete;
1452 
1453  void advance(AdvancingMode mode) {
1454  Super& super = *this;
1455  switch (mode) {
1456  case AdvancingMode::DUChain: {
1457  size_t use_count = 0;
1458  Super only_use = owner_.end();
1459  for (Super nodeIt = std::next(super); nodeIt != owner_.end(); ++nodeIt) {
1460  if (!*nodeIt) {
1461  continue;
1462  }
1463  for (size_t i = 0; i < (*nodeIt)->inputCount(); ++i) {
1464  if ((*super) == (*nodeIt)->getAndOwnInput(i)) {
1465  ++use_count;
1466  if (1 == use_count) {
1467  only_use = nodeIt;
1468  } else {
1469  super = owner_.end();
1470  return;
1471  }
1472  }
1473  }
1474  }
1475  super = only_use;
1476  break;
1477  }
1478  case AdvancingMode::InOrder:
1479  for (size_t i = 0; i != owner_.size(); ++i) {
1480  if (!visited_.count(i)) {
1481  super = owner_.begin();
1482  std::advance(super, i);
1483  return;
1484  }
1485  }
1486  super = owner_.end();
1487  break;
1488  default:
1489  CHECK(false);
1490  }
1491  }
1492 
1493  bool allVisited() { return visited_.size() == nodeCount_; }
1494 
1496  visited_.insert(size_t(*this));
1497  Super& super = *this;
1498  return *super;
1499  }
1500 
1501  const ElementType* operator->() { return &(operator*()); }
1502 
1503  private:
1505  const size_t nodeCount_;
1506  std::unordered_set<size_t> visited_;
1507 };
1508 
1509 namespace {
1510 
1511 bool input_can_be_coalesced(const RelAlgNode* parent_node,
1512  const size_t index,
1513  const bool first_rex_is_input) {
1514  if (auto agg_node = dynamic_cast<const RelAggregate*>(parent_node)) {
1515  if (index == 0 && agg_node->getGroupByCount() > 0) {
1516  return true;
1517  } else {
1518  // Is an aggregated target, only allow the project to be elided if the aggregate
1519  // target is simply passed through (i.e. if the top level expression attached to
1520  // the project node is a RexInput expression)
1521  return first_rex_is_input;
1522  }
1523  }
1524  return first_rex_is_input;
1525 }
1526 
1533  public:
1534  bool visitInput(const RexInput* input) const final {
1535  // The top level expression node is checked before we apply the visitor. If we get
1536  // here, this input rex is a child of another rex node, and we handle the can be
1537  // coalesced check slightly differently
1538  return input_can_be_coalesced(input->getSourceNode(), input->getIndex(), false);
1539  }
1540 
1541  bool visitLiteral(const RexLiteral*) const final { return false; }
1542 
1543  bool visitSubQuery(const RexSubQuery*) const final { return false; }
1544 
1545  bool visitRef(const RexRef*) const final { return false; }
1546 
1547  protected:
1548  bool aggregateResult(const bool& aggregate, const bool& next_result) const final {
1549  return aggregate && next_result;
1550  }
1551 
1552  bool defaultResult() const final { return true; }
1553 };
1554 
1555 // Detect the window function SUM pattern: CASE WHEN COUNT() > 0 THEN SUM ELSE 0
1557  const auto case_operator = dynamic_cast<const RexCase*>(rex);
1558  if (case_operator && case_operator->branchCount() == 1) {
1559  const auto then_window =
1560  dynamic_cast<const RexWindowFunctionOperator*>(case_operator->getThen(0));
1561  if (then_window && then_window->getKind() == SqlWindowFunctionKind::SUM_INTERNAL) {
1562  return true;
1563  }
1564  }
1565  return false;
1566 }
1567 
1568 // Detect both window function operators and window function operators embedded in case
1569 // statements (for null handling)
1571  if (dynamic_cast<const RexWindowFunctionOperator*>(rex)) {
1572  return true;
1573  }
1574 
1575  // unwrap from casts, if they exist
1576  const auto rex_cast = dynamic_cast<const RexOperator*>(rex);
1577  if (rex_cast && rex_cast->getOperator() == kCAST) {
1578  CHECK_EQ(rex_cast->size(), size_t(1));
1579  return is_window_function_operator(rex_cast->getOperand(0));
1580  }
1581 
1582  if (is_window_function_sum(rex)) {
1583  return true;
1584  }
1585  // Check for Window Function AVG:
1586  // (CASE WHEN count > 0 THEN sum ELSE 0) / COUNT
1587  const RexOperator* divide_operator = dynamic_cast<const RexOperator*>(rex);
1588  if (divide_operator && divide_operator->getOperator() == kDIVIDE) {
1589  CHECK_EQ(divide_operator->size(), size_t(2));
1590  const auto case_operator =
1591  dynamic_cast<const RexCase*>(divide_operator->getOperand(0));
1592  const auto second_window =
1593  dynamic_cast<const RexWindowFunctionOperator*>(divide_operator->getOperand(1));
1594  if (case_operator && second_window &&
1595  second_window->getKind() == SqlWindowFunctionKind::COUNT) {
1596  if (is_window_function_sum(case_operator)) {
1597  return true;
1598  }
1599  }
1600  }
1601  return false;
1602 }
1603 
1604 } // namespace
1605 
1606 void coalesce_nodes(std::vector<std::shared_ptr<RelAlgNode>>& nodes,
1607  const std::vector<const RelAlgNode*>& left_deep_joins) {
1608  enum class CoalesceState { Initial, Filter, FirstProject, Aggregate };
1609  std::vector<size_t> crt_pattern;
1610  CoalesceState crt_state{CoalesceState::Initial};
1611 
1612  auto reset_state = [&crt_pattern, &crt_state]() {
1613  crt_state = CoalesceState::Initial;
1614  decltype(crt_pattern)().swap(crt_pattern);
1615  };
1616 
1617  for (RANodeIterator nodeIt(nodes); !nodeIt.allVisited();) {
1618  const auto ra_node = nodeIt != nodes.end() ? *nodeIt : nullptr;
1619  switch (crt_state) {
1620  case CoalesceState::Initial: {
1621  if (std::dynamic_pointer_cast<const RelFilter>(ra_node) &&
1622  std::find(left_deep_joins.begin(), left_deep_joins.end(), ra_node.get()) ==
1623  left_deep_joins.end()) {
1624  crt_pattern.push_back(size_t(nodeIt));
1625  crt_state = CoalesceState::Filter;
1626  nodeIt.advance(RANodeIterator::AdvancingMode::DUChain);
1627  } else if (std::dynamic_pointer_cast<const RelProject>(ra_node)) {
1628  crt_pattern.push_back(size_t(nodeIt));
1629  crt_state = CoalesceState::FirstProject;
1630  nodeIt.advance(RANodeIterator::AdvancingMode::DUChain);
1631  } else {
1632  nodeIt.advance(RANodeIterator::AdvancingMode::InOrder);
1633  }
1634  break;
1635  }
1636  case CoalesceState::Filter: {
1637  if (auto project_node = std::dynamic_pointer_cast<const RelProject>(ra_node)) {
1638  if (project_node->hasWindowFunctionExpr()) {
1639  reset_state();
1640  break;
1641  }
1642  crt_pattern.push_back(size_t(nodeIt));
1643  crt_state = CoalesceState::FirstProject;
1644  nodeIt.advance(RANodeIterator::AdvancingMode::DUChain);
1645  } else {
1646  reset_state();
1647  }
1648  break;
1649  }
1650  case CoalesceState::FirstProject: {
1651  if (std::dynamic_pointer_cast<const RelAggregate>(ra_node)) {
1652  crt_pattern.push_back(size_t(nodeIt));
1653  crt_state = CoalesceState::Aggregate;
1654  nodeIt.advance(RANodeIterator::AdvancingMode::DUChain);
1655  } else {
1656  if (crt_pattern.size() >= 2) {
1657  create_compound(nodes, crt_pattern);
1658  }
1659  reset_state();
1660  }
1661  break;
1662  }
1663  case CoalesceState::Aggregate: {
1664  if (auto project_node = std::dynamic_pointer_cast<const RelProject>(ra_node)) {
1665  // TODO(adb): overloading the simple project terminology again here
1666  bool is_simple_project{true};
1667  for (size_t i = 0; i < project_node->size(); i++) {
1668  const auto scalar_rex = project_node->getProjectAt(i);
1669  // If the top level scalar rex is an input node, we can bypass the visitor
1670  if (auto input_rex = dynamic_cast<const RexInput*>(scalar_rex)) {
1672  input_rex->getSourceNode(), input_rex->getIndex(), true)) {
1673  is_simple_project = false;
1674  break;
1675  }
1676  continue;
1677  }
1678  CoalesceSecondaryProjectVisitor visitor;
1679  if (!visitor.visit(project_node->getProjectAt(i))) {
1680  is_simple_project = false;
1681  break;
1682  }
1683  }
1684  if (is_simple_project) {
1685  crt_pattern.push_back(size_t(nodeIt));
1686  nodeIt.advance(RANodeIterator::AdvancingMode::InOrder);
1687  }
1688  }
1689  CHECK_GE(crt_pattern.size(), size_t(2));
1690  create_compound(nodes, crt_pattern);
1691  reset_state();
1692  break;
1693  }
1694  default:
1695  CHECK(false);
1696  }
1697  }
1698  if (crt_state == CoalesceState::FirstProject || crt_state == CoalesceState::Aggregate) {
1699  if (crt_pattern.size() >= 2) {
1700  create_compound(nodes, crt_pattern);
1701  }
1702  CHECK(!crt_pattern.empty());
1703  }
1704 }
1705 
1713 class WindowFunctionDetectionVisitor : public RexVisitor<const RexScalar*> {
1714  protected:
1715  // Detect embedded window function expressions in operators
1716  const RexScalar* visitOperator(const RexOperator* rex_operator) const final {
1717  if (is_window_function_operator(rex_operator)) {
1718  return rex_operator;
1719  }
1720 
1721  const size_t operand_count = rex_operator->size();
1722  for (size_t i = 0; i < operand_count; ++i) {
1723  const auto operand = rex_operator->getOperand(i);
1724  if (is_window_function_operator(operand)) {
1725  // Handle both RexWindowFunctionOperators and window functions built up from
1726  // multiple RexScalar objects (e.g. AVG)
1727  return operand;
1728  }
1729  const auto operandResult = visit(operand);
1730  if (operandResult) {
1731  return operandResult;
1732  }
1733  }
1734 
1735  return defaultResult();
1736  }
1737 
1738  // Detect embedded window function expressions in case statements. Note that this may
1739  // manifest as a nested case statement inside a top level case statement, as some
1740  // window functions (sum, avg) are represented as a case statement. Use the
1741  // is_window_function_operator helper to detect complete window function expressions.
1742  const RexScalar* visitCase(const RexCase* rex_case) const final {
1743  if (is_window_function_operator(rex_case)) {
1744  return rex_case;
1745  }
1746 
1747  auto result = defaultResult();
1748  for (size_t i = 0; i < rex_case->branchCount(); ++i) {
1749  const auto when = rex_case->getWhen(i);
1750  result = is_window_function_operator(when) ? when : visit(when);
1751  if (result) {
1752  return result;
1753  }
1754  const auto then = rex_case->getThen(i);
1755  result = is_window_function_operator(then) ? then : visit(then);
1756  if (result) {
1757  return result;
1758  }
1759  }
1760  if (rex_case->getElse()) {
1761  auto else_expr = rex_case->getElse();
1762  result = is_window_function_operator(else_expr) ? else_expr : visit(else_expr);
1763  }
1764  return result;
1765  }
1766 
1767  const RexScalar* aggregateResult(const RexScalar* const& aggregate,
1768  const RexScalar* const& next_result) const final {
1769  // all methods calling aggregate result should be overriden
1770  UNREACHABLE();
1771  return nullptr;
1772  }
1773 
1774  const RexScalar* defaultResult() const final { return nullptr; }
1775 };
1776 
1786  public:
1787  RexWindowFuncReplacementVisitor(std::unique_ptr<const RexScalar> replacement_rex)
1788  : replacement_rex_(std::move(replacement_rex)) {}
1789 
1790  ~RexWindowFuncReplacementVisitor() { CHECK(replacement_rex_ == nullptr); }
1791 
1792  protected:
1793  RetType visitOperator(const RexOperator* rex_operator) const final {
1794  if (should_replace_operand(rex_operator)) {
1795  return std::move(replacement_rex_);
1796  }
1797 
1798  const auto rex_window_function_operator =
1799  dynamic_cast<const RexWindowFunctionOperator*>(rex_operator);
1800  if (rex_window_function_operator) {
1801  // Deep copy the embedded window function operator
1802  return visitWindowFunctionOperator(rex_window_function_operator);
1803  }
1804 
1805  const size_t operand_count = rex_operator->size();
1806  std::vector<RetType> new_opnds;
1807  for (size_t i = 0; i < operand_count; ++i) {
1808  const auto operand = rex_operator->getOperand(i);
1809  if (should_replace_operand(operand)) {
1810  new_opnds.push_back(std::move(replacement_rex_));
1811  } else {
1812  new_opnds.emplace_back(visit(rex_operator->getOperand(i)));
1813  }
1814  }
1815  return rex_operator->getDisambiguated(new_opnds);
1816  }
1817 
1818  RetType visitCase(const RexCase* rex_case) const final {
1819  if (should_replace_operand(rex_case)) {
1820  return std::move(replacement_rex_);
1821  }
1822 
1823  std::vector<std::pair<RetType, RetType>> new_pair_list;
1824  for (size_t i = 0; i < rex_case->branchCount(); ++i) {
1825  auto when_operand = rex_case->getWhen(i);
1826  auto then_operand = rex_case->getThen(i);
1827  new_pair_list.emplace_back(
1828  should_replace_operand(when_operand) ? std::move(replacement_rex_)
1829  : visit(when_operand),
1830  should_replace_operand(then_operand) ? std::move(replacement_rex_)
1831  : visit(then_operand));
1832  }
1833  auto new_else = should_replace_operand(rex_case->getElse())
1834  ? std::move(replacement_rex_)
1835  : visit(rex_case->getElse());
1836  return std::make_unique<RexCase>(new_pair_list, new_else);
1837  }
1838 
1839  private:
1840  bool should_replace_operand(const RexScalar* rex) const {
1841  return replacement_rex_ && is_window_function_operator(rex);
1842  }
1843 
1844  mutable std::unique_ptr<const RexScalar> replacement_rex_;
1845 };
1846 
1857  public:
1858  RexInputBackpropagationVisitor(RelProject* node) : node_(node) { CHECK(node_); }
1859 
1860  protected:
1861  RetType visitInput(const RexInput* rex_input) const final {
1862  if (rex_input->getSourceNode() != node_) {
1863  const auto cur_index = rex_input->getIndex();
1864  auto cur_source_node = rex_input->getSourceNode();
1865  std::string field_name = "";
1866  if (auto cur_project_node = dynamic_cast<const RelProject*>(cur_source_node)) {
1867  field_name = cur_project_node->getFieldName(cur_index);
1868  }
1869  node_->appendInput(field_name, rex_input->deepCopy());
1870  return std::make_unique<RexInput>(node_, node_->size() - 1);
1871  } else {
1872  return rex_input->deepCopy();
1873  }
1874  }
1875 
1876  private:
1877  mutable RelProject* node_;
1878 };
1879 
1896  std::vector<std::shared_ptr<RelAlgNode>>& nodes) {
1897  std::list<std::shared_ptr<RelAlgNode>> node_list(nodes.begin(), nodes.end());
1898 
1900  for (auto node_itr = node_list.begin(); node_itr != node_list.end(); ++node_itr) {
1901  const auto node = *node_itr;
1902  auto window_func_project_node = std::dynamic_pointer_cast<RelProject>(node);
1903  if (!window_func_project_node) {
1904  continue;
1905  }
1906 
1907  // map scalar expression index in the project node to wiondow function ptr
1908  std::unordered_map<size_t, const RexScalar*> embedded_window_function_expressions;
1909 
1910  // Iterate the target exprs of the project node and check for window function
1911  // expressions. If an embedded expression exists, save it in the
1912  // embedded_window_function_expressions map and split the expression into a window
1913  // function expression and a parent expression in a subsequent project node
1914  for (size_t i = 0; i < window_func_project_node->size(); i++) {
1915  const auto scalar_rex = window_func_project_node->getProjectAt(i);
1916  if (is_window_function_operator(scalar_rex)) {
1917  // top level window function exprs are fine
1918  continue;
1919  }
1920 
1921  if (const auto window_func_rex = visitor.visit(scalar_rex)) {
1922  const auto ret = embedded_window_function_expressions.insert(
1923  std::make_pair(i, window_func_rex));
1924  CHECK(ret.second);
1925  }
1926  }
1927 
1928  if (!embedded_window_function_expressions.empty()) {
1929  std::vector<std::unique_ptr<const RexScalar>> new_scalar_exprs;
1930 
1931  auto window_func_scalar_exprs =
1932  window_func_project_node->getExpressionsAndRelease();
1933  for (size_t rex_idx = 0; rex_idx < window_func_scalar_exprs.size(); ++rex_idx) {
1934  const auto embedded_window_func_expr_pair =
1935  embedded_window_function_expressions.find(rex_idx);
1936  if (embedded_window_func_expr_pair ==
1937  embedded_window_function_expressions.end()) {
1938  new_scalar_exprs.emplace_back(
1939  std::make_unique<const RexInput>(window_func_project_node.get(), rex_idx));
1940  } else {
1941  const auto window_func_rex_idx = embedded_window_func_expr_pair->first;
1942  CHECK_LT(window_func_rex_idx, window_func_scalar_exprs.size());
1943 
1944  const auto& window_func_rex = embedded_window_func_expr_pair->second;
1945 
1946  RexDeepCopyVisitor copier;
1947  auto window_func_rex_copy = copier.visit(window_func_rex);
1948 
1949  auto window_func_parent_expr =
1950  window_func_scalar_exprs[window_func_rex_idx].get();
1951 
1952  // Replace window func rex with an input rex
1953  auto window_func_result_input = std::make_unique<const RexInput>(
1954  window_func_project_node.get(), window_func_rex_idx);
1955  RexWindowFuncReplacementVisitor replacer(std::move(window_func_result_input));
1956  auto new_parent_rex = replacer.visit(window_func_parent_expr);
1957 
1958  // Put the parent expr in the new scalar exprs
1959  new_scalar_exprs.emplace_back(std::move(new_parent_rex));
1960 
1961  // Put the window func expr in cur scalar exprs
1962  window_func_scalar_exprs[window_func_rex_idx] = std::move(window_func_rex_copy);
1963  }
1964  }
1965 
1966  CHECK_EQ(window_func_scalar_exprs.size(), new_scalar_exprs.size());
1967  window_func_project_node->setExpressions(window_func_scalar_exprs);
1968 
1969  // Ensure any inputs from the node containing the expression (the "new" node)
1970  // exist on the window function project node, e.g. if we had a binary operation
1971  // involving an aggregate value or column not included in the top level
1972  // projection list.
1973  RexInputBackpropagationVisitor input_visitor(window_func_project_node.get());
1974  for (size_t i = 0; i < new_scalar_exprs.size(); i++) {
1975  if (dynamic_cast<const RexInput*>(new_scalar_exprs[i].get())) {
1976  // ignore top level inputs, these were copied directly from the previous
1977  // node
1978  continue;
1979  }
1980  new_scalar_exprs[i] = input_visitor.visit(new_scalar_exprs[i].get());
1981  }
1982 
1983  // Build the new project node and insert it into the list after the project node
1984  // containing the window function
1985  auto new_project =
1986  std::make_shared<RelProject>(new_scalar_exprs,
1987  window_func_project_node->getFields(),
1988  window_func_project_node);
1989  node_list.insert(std::next(node_itr), new_project);
1990 
1991  // Rebind all the following inputs
1992  for (auto rebind_itr = std::next(node_itr, 2); rebind_itr != node_list.end();
1993  rebind_itr++) {
1994  (*rebind_itr)->replaceInput(window_func_project_node, new_project);
1995  }
1996  }
1997  }
1998  nodes.assign(node_list.begin(), node_list.end());
1999 }
2000 
2001 using RexInputSet = std::unordered_set<RexInput>;
2002 
2003 class RexInputCollector : public RexVisitor<RexInputSet> {
2004  public:
2005  RexInputSet visitInput(const RexInput* input) const override {
2006  return RexInputSet{*input};
2007  }
2008 
2009  protected:
2011  const RexInputSet& next_result) const override {
2012  auto result = aggregate;
2013  result.insert(next_result.begin(), next_result.end());
2014  return result;
2015  }
2016 };
2017 
2025 void add_window_function_pre_project(std::vector<std::shared_ptr<RelAlgNode>>& nodes) {
2026  std::list<std::shared_ptr<RelAlgNode>> node_list(nodes.begin(), nodes.end());
2027 
2028  for (auto node_itr = node_list.begin(); node_itr != node_list.end(); ++node_itr) {
2029  const auto node = *node_itr;
2030  auto window_func_project_node = std::dynamic_pointer_cast<RelProject>(node);
2031  if (!window_func_project_node) {
2032  continue;
2033  }
2034  if (!window_func_project_node->hasWindowFunctionExpr()) {
2035  // the first projection node in the query plan does not have a window function
2036  // expression -- this step is not requierd.
2037  return;
2038  }
2039 
2040  const auto prev_node_itr = std::prev(node_itr);
2041  const auto prev_node = *prev_node_itr;
2042  CHECK(prev_node);
2043 
2044  RexInputSet inputs;
2045  RexInputCollector input_collector;
2046  for (size_t i = 0; i < window_func_project_node->size(); i++) {
2047  auto new_inputs = input_collector.visit(window_func_project_node->getProjectAt(i));
2048  inputs.insert(new_inputs.begin(), new_inputs.end());
2049  }
2050 
2051  // Note: Technically not required since we are mapping old inputs to new input
2052  // indices, but makes the re-mapping of inputs easier to follow.
2053  std::vector<RexInput> sorted_inputs(inputs.begin(), inputs.end());
2054  std::sort(sorted_inputs.begin(),
2055  sorted_inputs.end(),
2056  [](const auto& a, const auto& b) { return a.getIndex() < b.getIndex(); });
2057 
2058  std::vector<std::unique_ptr<const RexScalar>> scalar_exprs;
2059  std::vector<std::string> fields;
2060  std::unordered_map<unsigned, unsigned> old_index_to_new_index;
2061  for (auto& input : sorted_inputs) {
2062  CHECK_EQ(input.getSourceNode(), prev_node.get());
2063  CHECK(old_index_to_new_index
2064  .insert(std::make_pair(input.getIndex(), scalar_exprs.size()))
2065  .second);
2066  scalar_exprs.emplace_back(input.deepCopy());
2067  fields.emplace_back("");
2068  }
2069 
2070  auto new_project = std::make_shared<RelProject>(scalar_exprs, fields, prev_node);
2071  node_list.insert(node_itr, new_project);
2072  window_func_project_node->replaceInput(
2073  prev_node, new_project, old_index_to_new_index);
2074 
2075  break;
2076  }
2077 
2078  nodes.assign(node_list.begin(), node_list.end());
2079 }
2080 
2081 int64_t get_int_literal_field(const rapidjson::Value& obj,
2082  const char field[],
2083  const int64_t default_val) noexcept {
2084  const auto it = obj.FindMember(field);
2085  if (it == obj.MemberEnd()) {
2086  return default_val;
2087  }
2088  std::unique_ptr<RexLiteral> lit(parse_literal(it->value));
2089  CHECK_EQ(kDECIMAL, lit->getType());
2090  CHECK_EQ(unsigned(0), lit->getScale());
2091  CHECK_EQ(unsigned(0), lit->getTypeScale());
2092  return lit->getVal<int64_t>();
2093 }
2094 
2095 void check_empty_inputs_field(const rapidjson::Value& node) noexcept {
2096  const auto& inputs_json = field(node, "inputs");
2097  CHECK(inputs_json.IsArray() && !inputs_json.Size());
2098 }
2099 
2101  const rapidjson::Value& scan_ra) {
2102  const auto& table_json = field(scan_ra, "table");
2103  CHECK(table_json.IsArray());
2104  CHECK_EQ(unsigned(2), table_json.Size());
2105  const auto td = cat.getMetadataForTable(table_json[1].GetString());
2106  CHECK(td);
2107  return td;
2108 }
2109 
2110 std::vector<std::string> getFieldNamesFromScanNode(const rapidjson::Value& scan_ra) {
2111  const auto& fields_json = field(scan_ra, "fieldNames");
2112  return strings_from_json_array(fields_json);
2113 }
2114 
2115 } // namespace
2116 
2118  for (const auto& expr : scalar_exprs_) {
2119  if (is_window_function_operator(expr.get())) {
2120  return true;
2121  }
2122  }
2123  return false;
2124 }
2125 namespace details {
2126 
2128  public:
2130 
2131  std::vector<std::shared_ptr<RelAlgNode>> run(const rapidjson::Value& rels,
2132  RelAlgDagBuilder& root_dag_builder) {
2133  for (auto rels_it = rels.Begin(); rels_it != rels.End(); ++rels_it) {
2134  const auto& crt_node = *rels_it;
2135  const auto id = node_id(crt_node);
2136  CHECK_EQ(static_cast<size_t>(id), nodes_.size());
2137  CHECK(crt_node.IsObject());
2138  std::shared_ptr<RelAlgNode> ra_node = nullptr;
2139  const auto rel_op = json_str(field(crt_node, "relOp"));
2140  if (rel_op == std::string("EnumerableTableScan") ||
2141  rel_op == std::string("LogicalTableScan")) {
2142  ra_node = dispatchTableScan(crt_node);
2143  } else if (rel_op == std::string("LogicalProject")) {
2144  ra_node = dispatchProject(crt_node, root_dag_builder);
2145  } else if (rel_op == std::string("LogicalFilter")) {
2146  ra_node = dispatchFilter(crt_node, root_dag_builder);
2147  } else if (rel_op == std::string("LogicalAggregate")) {
2148  ra_node = dispatchAggregate(crt_node);
2149  } else if (rel_op == std::string("LogicalJoin")) {
2150  ra_node = dispatchJoin(crt_node, root_dag_builder);
2151  } else if (rel_op == std::string("LogicalSort")) {
2152  ra_node = dispatchSort(crt_node);
2153  } else if (rel_op == std::string("LogicalValues")) {
2154  ra_node = dispatchLogicalValues(crt_node);
2155  } else if (rel_op == std::string("LogicalTableModify")) {
2156  ra_node = dispatchModify(crt_node);
2157  } else if (rel_op == std::string("LogicalTableFunctionScan")) {
2158  ra_node = dispatchTableFunction(crt_node, root_dag_builder);
2159  } else if (rel_op == std::string("LogicalUnion")) {
2160  ra_node = dispatchUnion(crt_node);
2161  } else {
2162  throw QueryNotSupported(std::string("Node ") + rel_op + " not supported yet");
2163  }
2164  nodes_.push_back(ra_node);
2165  }
2166 
2167  return std::move(nodes_);
2168  }
2169 
2170  private:
2171  std::shared_ptr<RelScan> dispatchTableScan(const rapidjson::Value& scan_ra) {
2172  check_empty_inputs_field(scan_ra);
2173  CHECK(scan_ra.IsObject());
2174  const auto td = getTableFromScanNode(cat_, scan_ra);
2175  const auto field_names = getFieldNamesFromScanNode(scan_ra);
2176  if (scan_ra.HasMember("hints")) {
2177  auto scan_node = std::make_shared<RelScan>(td, field_names);
2178  getRelAlgHints(scan_ra, scan_node);
2179  return scan_node;
2180  }
2181  return std::make_shared<RelScan>(td, field_names);
2182  }
2183 
2184  std::shared_ptr<RelProject> dispatchProject(const rapidjson::Value& proj_ra,
2185  RelAlgDagBuilder& root_dag_builder) {
2186  const auto inputs = getRelAlgInputs(proj_ra);
2187  CHECK_EQ(size_t(1), inputs.size());
2188  const auto& exprs_json = field(proj_ra, "exprs");
2189  CHECK(exprs_json.IsArray());
2190  std::vector<std::unique_ptr<const RexScalar>> exprs;
2191  for (auto exprs_json_it = exprs_json.Begin(); exprs_json_it != exprs_json.End();
2192  ++exprs_json_it) {
2193  exprs.emplace_back(parse_scalar_expr(*exprs_json_it, cat_, root_dag_builder));
2194  }
2195  const auto& fields = field(proj_ra, "fields");
2196  if (proj_ra.HasMember("hints")) {
2197  auto project_node = std::make_shared<RelProject>(
2198  exprs, strings_from_json_array(fields), inputs.front());
2199  getRelAlgHints(proj_ra, project_node);
2200  return project_node;
2201  }
2202  return std::make_shared<RelProject>(
2203  exprs, strings_from_json_array(fields), inputs.front());
2204  }
2205 
2206  std::shared_ptr<RelFilter> dispatchFilter(const rapidjson::Value& filter_ra,
2207  RelAlgDagBuilder& root_dag_builder) {
2208  const auto inputs = getRelAlgInputs(filter_ra);
2209  CHECK_EQ(size_t(1), inputs.size());
2210  const auto id = node_id(filter_ra);
2211  CHECK(id);
2212  auto condition =
2213  parse_scalar_expr(field(filter_ra, "condition"), cat_, root_dag_builder);
2214  return std::make_shared<RelFilter>(condition, inputs.front());
2215  }
2216 
2217  std::shared_ptr<RelAggregate> dispatchAggregate(const rapidjson::Value& agg_ra) {
2218  const auto inputs = getRelAlgInputs(agg_ra);
2219  CHECK_EQ(size_t(1), inputs.size());
2220  const auto fields = strings_from_json_array(field(agg_ra, "fields"));
2221  const auto group = indices_from_json_array(field(agg_ra, "group"));
2222  for (size_t i = 0; i < group.size(); ++i) {
2223  CHECK_EQ(i, group[i]);
2224  }
2225  if (agg_ra.HasMember("groups") || agg_ra.HasMember("indicator")) {
2226  throw QueryNotSupported("GROUP BY extensions not supported");
2227  }
2228  const auto& aggs_json_arr = field(agg_ra, "aggs");
2229  CHECK(aggs_json_arr.IsArray());
2230  std::vector<std::unique_ptr<const RexAgg>> aggs;
2231  for (auto aggs_json_arr_it = aggs_json_arr.Begin();
2232  aggs_json_arr_it != aggs_json_arr.End();
2233  ++aggs_json_arr_it) {
2234  aggs.emplace_back(parse_aggregate_expr(*aggs_json_arr_it));
2235  }
2236  if (agg_ra.HasMember("hints")) {
2237  auto agg_node =
2238  std::make_shared<RelAggregate>(group.size(), aggs, fields, inputs.front());
2239  getRelAlgHints(agg_ra, agg_node);
2240  return agg_node;
2241  }
2242  return std::make_shared<RelAggregate>(group.size(), aggs, fields, inputs.front());
2243  }
2244 
2245  std::shared_ptr<RelJoin> dispatchJoin(const rapidjson::Value& join_ra,
2246  RelAlgDagBuilder& root_dag_builder) {
2247  const auto inputs = getRelAlgInputs(join_ra);
2248  CHECK_EQ(size_t(2), inputs.size());
2249  const auto join_type = to_join_type(json_str(field(join_ra, "joinType")));
2250  auto filter_rex =
2251  parse_scalar_expr(field(join_ra, "condition"), cat_, root_dag_builder);
2252  if (join_ra.HasMember("hints")) {
2253  auto join_node =
2254  std::make_shared<RelJoin>(inputs[0], inputs[1], filter_rex, join_type);
2255  getRelAlgHints(join_ra, join_node);
2256  return join_node;
2257  }
2258  return std::make_shared<RelJoin>(inputs[0], inputs[1], filter_rex, join_type);
2259  }
2260 
2261  std::shared_ptr<RelSort> dispatchSort(const rapidjson::Value& sort_ra) {
2262  const auto inputs = getRelAlgInputs(sort_ra);
2263  CHECK_EQ(size_t(1), inputs.size());
2264  std::vector<SortField> collation;
2265  const auto& collation_arr = field(sort_ra, "collation");
2266  CHECK(collation_arr.IsArray());
2267  for (auto collation_arr_it = collation_arr.Begin();
2268  collation_arr_it != collation_arr.End();
2269  ++collation_arr_it) {
2270  const size_t field_idx = json_i64(field(*collation_arr_it, "field"));
2271  const auto sort_dir = parse_sort_direction(*collation_arr_it);
2272  const auto null_pos = parse_nulls_position(*collation_arr_it);
2273  collation.emplace_back(field_idx, sort_dir, null_pos);
2274  }
2275  auto limit = get_int_literal_field(sort_ra, "fetch", -1);
2276  const auto offset = get_int_literal_field(sort_ra, "offset", 0);
2277  auto ret = std::make_shared<RelSort>(
2278  collation, limit > 0 ? limit : 0, offset, inputs.front());
2279  ret->setEmptyResult(limit == 0);
2280  return ret;
2281  }
2282 
2283  std::shared_ptr<RelModify> dispatchModify(const rapidjson::Value& logical_modify_ra) {
2284  const auto inputs = getRelAlgInputs(logical_modify_ra);
2285  CHECK_EQ(size_t(1), inputs.size());
2286 
2287  const auto table_descriptor = getTableFromScanNode(cat_, logical_modify_ra);
2288  if (table_descriptor->isView) {
2289  throw std::runtime_error("UPDATE of a view is unsupported.");
2290  }
2291 
2292  bool flattened = json_bool(field(logical_modify_ra, "flattened"));
2293  std::string op = json_str(field(logical_modify_ra, "operation"));
2294  RelModify::TargetColumnList target_column_list;
2295 
2296  if (op == "UPDATE") {
2297  const auto& update_columns = field(logical_modify_ra, "updateColumnList");
2298  CHECK(update_columns.IsArray());
2299 
2300  for (auto column_arr_it = update_columns.Begin();
2301  column_arr_it != update_columns.End();
2302  ++column_arr_it) {
2303  target_column_list.push_back(column_arr_it->GetString());
2304  }
2305  }
2306 
2307  auto modify_node = std::make_shared<RelModify>(
2308  cat_, table_descriptor, flattened, op, target_column_list, inputs[0]);
2309  switch (modify_node->getOperation()) {
2311  modify_node->applyDeleteModificationsToInputNode();
2312  break;
2313  }
2315  modify_node->applyUpdateModificationsToInputNode();
2316  break;
2317  }
2318  default:
2319  throw std::runtime_error("Unsupported RelModify operation: " +
2320  json_node_to_string(logical_modify_ra));
2321  }
2322 
2323  return modify_node;
2324  }
2325 
2326  std::shared_ptr<RelTableFunction> dispatchTableFunction(
2327  const rapidjson::Value& table_func_ra,
2328  RelAlgDagBuilder& root_dag_builder) {
2329  const auto inputs = getRelAlgInputs(table_func_ra);
2330  const auto& invocation = field(table_func_ra, "invocation");
2331  CHECK(invocation.IsObject());
2332 
2333  const auto& operands = field(invocation, "operands");
2334  CHECK(operands.IsArray());
2335  CHECK_GE(operands.Size(), unsigned(0));
2336 
2337  std::vector<const Rex*> col_inputs;
2338  std::vector<std::unique_ptr<const RexScalar>> table_func_inputs;
2339  std::vector<std::string> fields;
2340 
2341  for (auto exprs_json_it = operands.Begin(); exprs_json_it != operands.End();
2342  ++exprs_json_it) {
2343  const auto& expr_json = *exprs_json_it;
2344  CHECK(expr_json.IsObject());
2345  if (expr_json.HasMember("op")) {
2346  const auto op_str = json_str(field(expr_json, "op"));
2347  if (op_str == "CAST" && expr_json.HasMember("type")) {
2348  const auto& expr_type = field(expr_json, "type");
2349  CHECK(expr_type.IsObject());
2350  CHECK(expr_type.HasMember("type"));
2351  const auto& expr_type_name = json_str(field(expr_type, "type"));
2352  if (expr_type_name == "CURSOR") {
2353  CHECK(expr_json.HasMember("operands"));
2354  const auto& expr_operands = field(expr_json, "operands");
2355  CHECK(expr_operands.IsArray());
2356  if (expr_operands.Size() != 1) {
2357  throw std::runtime_error(
2358  "Table functions currently only support one ResultSet input");
2359  }
2360  auto pos = field(expr_operands[0], "input").GetInt();
2361  CHECK_LT(pos, inputs.size());
2362  for (size_t i = inputs[pos]->size(); i > 0; i--) {
2363  table_func_inputs.emplace_back(
2364  std::make_unique<RexAbstractInput>(col_inputs.size()));
2365  col_inputs.emplace_back(table_func_inputs.back().get());
2366  }
2367  continue;
2368  }
2369  }
2370  }
2371  table_func_inputs.emplace_back(
2372  parse_scalar_expr(*exprs_json_it, cat_, root_dag_builder));
2373  }
2374 
2375  const auto& op_name = field(invocation, "op");
2376  CHECK(op_name.IsString());
2377 
2378  std::vector<std::unique_ptr<const RexScalar>> table_function_projected_outputs;
2379  const auto& row_types = field(table_func_ra, "rowType");
2380  CHECK(row_types.IsArray());
2381  CHECK_GE(row_types.Size(), unsigned(0));
2382  const auto& row_types_array = row_types.GetArray();
2383  for (size_t i = 0; i < row_types_array.Size(); i++) {
2384  // We don't care about the type information in rowType -- replace each output with
2385  // a reference to be resolved later in the translator
2386  table_function_projected_outputs.emplace_back(std::make_unique<RexRef>(i));
2387  fields.emplace_back("");
2388  }
2389  return std::make_shared<RelTableFunction>(op_name.GetString(),
2390  inputs,
2391  fields,
2392  col_inputs,
2393  table_func_inputs,
2394  table_function_projected_outputs);
2395  }
2396 
2397  std::shared_ptr<RelLogicalValues> dispatchLogicalValues(
2398  const rapidjson::Value& logical_values_ra) {
2399  const auto& tuple_type_arr = field(logical_values_ra, "type");
2400  CHECK(tuple_type_arr.IsArray());
2401  std::vector<TargetMetaInfo> tuple_type;
2402  for (auto tuple_type_arr_it = tuple_type_arr.Begin();
2403  tuple_type_arr_it != tuple_type_arr.End();
2404  ++tuple_type_arr_it) {
2405  const auto component_type = parse_type(*tuple_type_arr_it);
2406  const auto component_name = json_str(field(*tuple_type_arr_it, "name"));
2407  tuple_type.emplace_back(component_name, component_type);
2408  }
2409  const auto& inputs_arr = field(logical_values_ra, "inputs");
2410  CHECK(inputs_arr.IsArray());
2411  const auto& tuples_arr = field(logical_values_ra, "tuples");
2412  CHECK(tuples_arr.IsArray());
2413 
2414  if (inputs_arr.Size()) {
2415  throw QueryNotSupported("Inputs not supported in logical values yet.");
2416  }
2417 
2418  std::vector<RelLogicalValues::RowValues> values;
2419  if (tuples_arr.Size()) {
2420  for (const auto& row : tuples_arr.GetArray()) {
2421  CHECK(row.IsArray());
2422  const auto values_json = row.GetArray();
2423  if (!values.empty()) {
2424  CHECK_EQ(values[0].size(), values_json.Size());
2425  }
2426  values.emplace_back(RelLogicalValues::RowValues{});
2427  for (const auto& value : values_json) {
2428  CHECK(value.IsObject());
2429  CHECK(value.HasMember("literal"));
2430  values.back().emplace_back(parse_literal(value));
2431  }
2432  }
2433  }
2434 
2435  return std::make_shared<RelLogicalValues>(tuple_type, values);
2436  }
2437 
2438  std::shared_ptr<RelLogicalUnion> dispatchUnion(
2439  const rapidjson::Value& logical_union_ra) {
2440  auto inputs = getRelAlgInputs(logical_union_ra);
2441  auto const& all_type_bool = field(logical_union_ra, "all");
2442  CHECK(all_type_bool.IsBool());
2443  return std::make_shared<RelLogicalUnion>(std::move(inputs), all_type_bool.GetBool());
2444  }
2445 
2446  RelAlgInputs getRelAlgInputs(const rapidjson::Value& node) {
2447  if (node.HasMember("inputs")) {
2448  const auto str_input_ids = strings_from_json_array(field(node, "inputs"));
2449  RelAlgInputs ra_inputs;
2450  for (const auto& str_id : str_input_ids) {
2451  ra_inputs.push_back(nodes_[std::stoi(str_id)]);
2452  }
2453  return ra_inputs;
2454  }
2455  return {prev(node)};
2456  }
2457 
2458  std::pair<std::string, std::string> getKVOptionPair(std::string& str, size_t& pos) {
2459  auto option = str.substr(0, pos);
2460  std::string delim = "=";
2461  size_t delim_pos = option.find(delim);
2462  auto key = option.substr(0, delim_pos);
2463  auto val = option.substr(delim_pos + 1, option.length());
2464  str.erase(0, pos + delim.length() + 1);
2465  return {key, val};
2466  }
2467 
2468  HintExplained parseHintString(std::string& hint_string) {
2469  std::string white_space_delim = " ";
2470  int l = hint_string.length();
2471  hint_string = hint_string.erase(0, 1).substr(0, l - 2);
2472  size_t pos = 0;
2473  if ((pos = hint_string.find("options:")) != std::string::npos) {
2474  // need to parse hint options
2475  std::vector<std::string> tokens;
2476  std::string hint_name = hint_string.substr(0, hint_string.find(white_space_delim));
2477  bool kv_list_op = false;
2478  std::string raw_options = hint_string.substr(pos + 8, hint_string.length() - 2);
2479  if (raw_options.find('{') != std::string::npos) {
2480  kv_list_op = true;
2481  } else {
2482  CHECK(raw_options.find('[') != std::string::npos);
2483  }
2484  auto t1 = raw_options.erase(0, 1);
2485  raw_options = t1.substr(0, t1.length() - 1);
2486  std::string op_delim = ", ";
2487  if (kv_list_op) {
2488  // kv options
2489  std::unordered_map<std::string, std::string> kv_options;
2490  while ((pos = raw_options.find(op_delim)) != std::string::npos) {
2491  auto kv_pair = getKVOptionPair(raw_options, pos);
2492  kv_options.emplace(kv_pair.first, kv_pair.second);
2493  }
2494  // handle the last kv pair
2495  auto kv_pair = getKVOptionPair(raw_options, pos);
2496  kv_options.emplace(kv_pair.first, kv_pair.second);
2497  return {hint_name, true, false, true, kv_options};
2498  } else {
2499  std::vector<std::string> list_options;
2500  while ((pos = raw_options.find(op_delim)) != std::string::npos) {
2501  list_options.emplace_back(raw_options.substr(0, pos));
2502  raw_options.erase(0, pos + white_space_delim.length() + 1);
2503  }
2504  // handle the last option
2505  list_options.emplace_back(raw_options.substr(0, pos));
2506  return {hint_name, true, false, false, list_options};
2507  }
2508  } else {
2509  // marker hint: no extra option for this hint
2510  std::string hint_name = hint_string.substr(0, hint_string.find(white_space_delim));
2511  return {hint_name, true, true, false};
2512  }
2513  }
2514 
2515  void getRelAlgHints(const rapidjson::Value& json_node,
2516  std::shared_ptr<RelAlgNode> node) {
2517  std::string hint_explained = json_str(field(json_node, "hints"));
2518  size_t pos = 0;
2519  std::string delim = "|";
2520  std::vector<std::string> hint_list;
2521  while ((pos = hint_explained.find(delim)) != std::string::npos) {
2522  hint_list.emplace_back(hint_explained.substr(0, pos));
2523  hint_explained.erase(0, pos + delim.length());
2524  }
2525  // handling the last one
2526  hint_list.emplace_back(hint_explained.substr(0, pos));
2527 
2528  const auto agg_node = std::dynamic_pointer_cast<RelAggregate>(node);
2529  if (agg_node) {
2530  for (std::string& hint : hint_list) {
2531  auto parsed_hint = parseHintString(hint);
2532  agg_node->addHint(parsed_hint);
2533  }
2534  }
2535  const auto project_node = std::dynamic_pointer_cast<RelProject>(node);
2536  if (project_node) {
2537  for (std::string& hint : hint_list) {
2538  auto parsed_hint = parseHintString(hint);
2539  project_node->addHint(parsed_hint);
2540  }
2541  }
2542  const auto scan_node = std::dynamic_pointer_cast<RelScan>(node);
2543  if (scan_node) {
2544  for (std::string& hint : hint_list) {
2545  auto parsed_hint = parseHintString(hint);
2546  scan_node->addHint(parsed_hint);
2547  }
2548  }
2549  const auto join_node = std::dynamic_pointer_cast<RelJoin>(node);
2550  if (join_node) {
2551  for (std::string& hint : hint_list) {
2552  auto parsed_hint = parseHintString(hint);
2553  join_node->addHint(parsed_hint);
2554  }
2555  }
2556 
2557  const auto compound_node = std::dynamic_pointer_cast<RelCompound>(node);
2558  if (compound_node) {
2559  for (std::string& hint : hint_list) {
2560  auto parsed_hint = parseHintString(hint);
2561  compound_node->addHint(parsed_hint);
2562  }
2563  }
2564  }
2565 
2566  std::shared_ptr<const RelAlgNode> prev(const rapidjson::Value& crt_node) {
2567  const auto id = node_id(crt_node);
2568  CHECK(id);
2569  CHECK_EQ(static_cast<size_t>(id), nodes_.size());
2570  return nodes_.back();
2571  }
2572 
2574  std::vector<std::shared_ptr<RelAlgNode>> nodes_;
2575 };
2576 
2577 } // namespace details
2578 
2579 RelAlgDagBuilder::RelAlgDagBuilder(const std::string& query_ra,
2581  const RenderInfo* render_info)
2582  : cat_(cat), render_info_(render_info), query_hint_(QueryHint::defaults()) {
2583  rapidjson::Document query_ast;
2584  query_ast.Parse(query_ra.c_str());
2585  VLOG(2) << "Parsing query RA JSON: " << query_ra;
2586  if (query_ast.HasParseError()) {
2587  query_ast.GetParseError();
2588  LOG(ERROR) << "Failed to parse RA tree from Calcite (offset "
2589  << query_ast.GetErrorOffset() << "):\n"
2590  << rapidjson::GetParseError_En(query_ast.GetParseError());
2591  VLOG(1) << "Failed to parse query RA: " << query_ra;
2592  throw std::runtime_error(
2593  "Failed to parse relational algebra tree. Possible query syntax error.");
2594  }
2595  CHECK(query_ast.IsObject());
2597  build(query_ast, *this);
2598 }
2599 
2601  const rapidjson::Value& query_ast,
2603  const RenderInfo* render_info)
2604  : cat_(cat), render_info_(render_info), query_hint_(QueryHint::defaults()) {
2605  build(query_ast, root_dag_builder);
2606 }
2607 
2608 void RelAlgDagBuilder::build(const rapidjson::Value& query_ast,
2609  RelAlgDagBuilder& lead_dag_builder) {
2610  const auto& rels = field(query_ast, "rels");
2611  CHECK(rels.IsArray());
2612  try {
2613  nodes_ = details::RelAlgDispatcher(cat_).run(rels, lead_dag_builder);
2614  } catch (const QueryNotSupported&) {
2615  throw;
2616  }
2617  CHECK(!nodes_.empty());
2619 
2620  if (render_info_) {
2621  // Alter the RA for render. Do this before any flattening/optimizations are done to
2622  // the tree.
2624  }
2625 
2626  handleQueryHint(nodes_, this);
2627  mark_nops(nodes_);
2632  std::vector<const RelAlgNode*> filtered_left_deep_joins;
2633  std::vector<const RelAlgNode*> left_deep_joins;
2634  for (const auto& node : nodes_) {
2635  const auto left_deep_join_root = get_left_deep_join_root(node);
2636  // The filter which starts a left-deep join pattern must not be coalesced
2637  // since it contains (part of) the join condition.
2638  if (left_deep_join_root) {
2639  left_deep_joins.push_back(left_deep_join_root.get());
2640  if (std::dynamic_pointer_cast<const RelFilter>(left_deep_join_root)) {
2641  filtered_left_deep_joins.push_back(left_deep_join_root.get());
2642  }
2643  }
2644  }
2645  if (filtered_left_deep_joins.empty()) {
2647  }
2648  eliminate_dead_columns(nodes_);
2649  eliminate_dead_subqueries(subqueries_, nodes_.back().get());
2651  if (g_cluster) {
2653  }
2654  coalesce_nodes(nodes_, left_deep_joins);
2655  CHECK(nodes_.back().use_count() == 1);
2656  create_left_deep_join(nodes_);
2657 }
2658 
2660  std::function<void(RelAlgNode const*)> const& callback) const {
2661  for (auto const& node : nodes_) {
2662  if (node) {
2663  callback(node.get());
2664  }
2665  }
2666 }
2667 
2669  for (auto& node : nodes_) {
2670  if (node) {
2671  node->resetQueryExecutionState();
2672  }
2673  }
2674 }
2675 
2676 // Return tree with depth represented by indentations.
2677 std::string tree_string(const RelAlgNode* ra, const size_t depth) {
2678  std::string result = std::string(2 * depth, ' ') + ra->toString() + '\n';
2679  for (size_t i = 0; i < ra->inputCount(); ++i) {
2680  result += tree_string(ra->getInput(i), depth + 1);
2681  }
2682  return result;
2683 }
2684 
2685 std::string RexSubQuery::toString() const {
2686  return cat(::typeName(this), "(", ::toString(ra_.get()), ")");
2687 }
2688 
2689 std::string RexInput::toString() const {
2690  const auto scan_node = dynamic_cast<const RelScan*>(node_);
2691  if (scan_node) {
2692  auto field_name = scan_node->getFieldName(getIndex());
2693  auto table_name = scan_node->getTableDescriptor()->tableName;
2694  return ::typeName(this) + "(" + table_name + "." + field_name + ")";
2695  }
2696  return cat(::typeName(this),
2697  "(node=",
2698  ::toString(node_),
2699  ", in_index=",
2701  ")");
2702 }
2703 
2704 std::string RelCompound::toString() const {
2705  return cat(::typeName(this),
2706  "(",
2707  (filter_expr_ ? filter_expr_->toString() : "null"),
2708  ", target_exprs=",
2710  ", ",
2712  ", agg_exps=",
2714  ", fields=",
2716  ", scalar_sources=",
2718  ", is_agg=",
2720  ")");
2721 }
std::vector< std::shared_ptr< const RexScalar > > scalar_exprs_
DEVICE auto upper_bound(ARGS &&...args)
Definition: gpu_enabled.h:123
SQLTypes to_sql_type(const std::string &type_name)
void handleQueryHint(const std::vector< std::shared_ptr< RelAlgNode >> &nodes, RelAlgDagBuilder *dag_builder) noexcept
std::shared_ptr< const RelAlgNode > getRootNodeShPtr() const
bool is_agg(const Analyzer::Expr *expr)
std::unique_ptr< const RexScalar > condition_
SQLTypeInfo parse_type(const rapidjson::Value &type_obj)
const RexScalar * getThen(const size_t idx) const
std::vector< std::string > strings_from_json_array(const rapidjson::Value &json_str_arr) noexcept
std::unique_ptr< RexCase > parse_case(const rapidjson::Value &expr, const Catalog_Namespace::Catalog &cat, RelAlgDagBuilder &root_dag_builder)
std::shared_ptr< RelAggregate > dispatchAggregate(const rapidjson::Value &agg_ra)
#define CHECK_EQ(x, y)
Definition: Logger.h:205
JoinType to_join_type(const std::string &join_type_name)
const Catalog_Namespace::Catalog & cat_
std::shared_ptr< RelProject > dispatchProject(const rapidjson::Value &proj_ra, RelAlgDagBuilder &root_dag_builder)
std::unique_ptr< RexSubQuery > deepCopy() const
void replaceInput(std::shared_ptr< const RelAlgNode > old_input, std::shared_ptr< const RelAlgNode > input) override
JoinType
Definition: sqldefs.h:108
std::vector< const Rex * > remapTargetPointers(std::vector< std::unique_ptr< const RexAgg >> const &agg_exprs_new, std::vector< std::unique_ptr< const RexScalar >> const &scalar_sources_new, std::vector< std::unique_ptr< const RexAgg >> const &agg_exprs_old, std::vector< std::unique_ptr< const RexScalar >> const &scalar_sources_old, std::vector< const Rex * > const &target_exprs_old)
std::vector< std::unique_ptr< const RexScalar > > table_func_inputs_
std::string cat(Ts &&...args)
std::string toString(const ExtArgumentType &sig_type)
void bind_inputs(const std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
void hoist_filter_cond_to_cross_join(std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
class for a per-database catalog. also includes metadata for the current database and the current use...
Definition: Catalog.h:101
Definition: sqltypes.h:48
Hints * getDeliveredHints()
std::shared_ptr< const RelAlgNode > get_left_deep_join_root(const std::shared_ptr< RelAlgNode > &node)
void sink_projected_boolean_expr_to_join(std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
std::unique_ptr< const RexAgg > parse_aggregate_expr(const rapidjson::Value &expr)
void eliminate_identical_copy(std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
const RexScalar * getElse() const
RelCompound(std::unique_ptr< const RexScalar > &filter_expr, const std::vector< const Rex * > &target_exprs, const size_t groupby_count, const std::vector< const RexAgg * > &agg_exprs, const std::vector< std::string > &fields, std::vector< std::unique_ptr< const RexScalar >> &scalar_sources, const bool is_agg, bool update_disguised_as_select=false, bool delete_disguised_as_select=false, bool varlen_update_required=false, TableDescriptor const *manipulation_target_table=nullptr, ColumnNameList target_columns=ColumnNameList())
static thread_local unsigned crt_id_
void addHint(const HintExplained &hint_explained)
std::shared_ptr< RelScan > dispatchTableScan(const rapidjson::Value &scan_ra)
std::pair< std::shared_ptr< RelLeftDeepInnerJoin >, std::shared_ptr< const RelAlgNode > > create_left_deep_join(const std::shared_ptr< RelAlgNode > &left_deep_join_root)
RexScalar const * copyAndRedirectSource(RexScalar const *, size_t input_idx) const
void replaceInput(std::shared_ptr< const RelAlgNode > old_input, std::shared_ptr< const RelAlgNode > input) override
SQLAgg to_agg_kind(const std::string &agg_name)
std::shared_ptr< RelLogicalUnion > dispatchUnion(const rapidjson::Value &logical_union_ra)
#define LOG(tag)
Definition: Logger.h:188
NullSortedPosition
std::vector< std::string > TargetColumnList
bool g_enable_union
size_t size() const
Hints * getDeliveredHints()
const bool json_bool(const rapidjson::Value &obj) noexcept
Definition: JsonAccessors.h:49
const RexScalar * getOperand(const size_t idx) const
std::vector< const Rex * > col_inputs_
bool hasEquivCollationOf(const RelSort &that) const
const std::vector< std::string > fields_
std::string toString() const override
const std::string json_str(const rapidjson::Value &obj) noexcept
Definition: JsonAccessors.h:44
void build(const rapidjson::Value &query_ast, RelAlgDagBuilder &root_dag_builder)
RexWindowFunctionOperator::RexWindowBound parse_window_bound(const rapidjson::Value &window_bound_obj, const Catalog_Namespace::Catalog &cat, RelAlgDagBuilder &root_dag_builder)
std::string join(T const &container, std::string const &delim)
void bind_table_func_to_input(RelTableFunction *table_func_node, const RANodeOutput &input) noexcept
std::unique_ptr< RexOperator > parse_operator(const rapidjson::Value &expr, const Catalog_Namespace::Catalog &cat, RelAlgDagBuilder &root_dag_builder)
#define UNREACHABLE()
Definition: Logger.h:241
bool hint_applied_
DEVICE void sort(ARGS &&...args)
Definition: gpu_enabled.h:105
bool isRenamedInput(const RelAlgNode *node, const size_t index, const std::string &new_name)
#define CHECK_GE(x, y)
Definition: Logger.h:210
std::vector< std::string > fields_
Definition: sqldefs.h:49
const RexScalar * getWhen(const size_t idx) const
void addHint(const HintExplained &hint_explained)
void appendInput(std::string new_field_name, std::unique_ptr< const RexScalar > new_input)
RetType visitOperator(const RexOperator *rex_operator) const final
void bind_project_to_input(RelProject *project_node, const RANodeOutput &input) noexcept
const Catalog_Namespace::Catalog & cat_
std::vector< std::unique_ptr< const RexScalar > > parse_window_order_exprs(const rapidjson::Value &arr, const Catalog_Namespace::Catalog &cat, RelAlgDagBuilder &root_dag_builder)
void checkForMatchingMetaInfoTypes() const
std::vector< std::unique_ptr< const RexScalar > > scalar_sources_
void addHint(const HintExplained &hint_explained)
std::string to_string(char const *&&v)
SqlWindowFunctionKind parse_window_function_kind(const std::string &name)
const std::string getFieldName(const size_t i) const
void simplify_sort(std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
std::vector< SortField > collation_
RexRebindInputsVisitor(const RelAlgNode *old_input, const RelAlgNode *new_input)
int64_t get_int_literal_field(const rapidjson::Value &obj, const char field[], const int64_t default_val) noexcept
void addHint(const HintExplained &hint_explained)
std::vector< std::unique_ptr< const RexScalar > > parse_expr_array(const rapidjson::Value &arr, const Catalog_Namespace::Catalog &cat, RelAlgDagBuilder &root_dag_builder)
This file contains the class specification and related data structures for Catalog.
virtual T visit(const RexScalar *rex_scalar) const
Definition: RexVisitor.h:27
RexInputSet visitInput(const RexInput *input) const override
std::vector< std::shared_ptr< RexSubQuery > > subqueries_
RexWindowFuncReplacementVisitor(std::unique_ptr< const RexScalar > replacement_rex)
const RenderInfo * render_info_
std::string to_string() const
Definition: sqltypes.h:446
const rapidjson::Value & field(const rapidjson::Value &obj, const char field[]) noexcept
Definition: JsonAccessors.h:31
const RexScalar * visitOperator(const RexOperator *rex_operator) const final
unsigned getIndex() const
SQLOps getOperator() const
std::vector< std::shared_ptr< RelAlgNode > > nodes_
const TableDescriptor * getTableFromScanNode(const Catalog_Namespace::Catalog &cat, const rapidjson::Value &scan_ra)
SortDirection parse_sort_direction(const rapidjson::Value &collation)
std::unique_ptr< const RexScalar > disambiguate_rex(const RexScalar *, const RANodeOutput &)
DEVICE auto copy(ARGS &&...args)
Definition: gpu_enabled.h:51
bool isRenaming() const
std::vector< SortField > parse_window_order_collation(const rapidjson::Value &arr, const Catalog_Namespace::Catalog &cat, RelAlgDagBuilder &root_dag_builder)
void setIndex(const unsigned in_index) const
Hints * getDeliveredHints()
SQLOps to_sql_op(const std::string &op_str)
void add_window_function_pre_project(std::vector< std::shared_ptr< RelAlgNode >> &nodes)
std::unique_ptr< Hints > hints_
void set_scale(int s)
Definition: sqltypes.h:406
const int64_t json_i64(const rapidjson::Value &obj) noexcept
Definition: JsonAccessors.h:39
void * visitInput(const RexInput *rex_input) const override
std::unique_ptr< Hints > hints_
std::vector< std::unique_ptr< const RexScalar > > scalar_exprs_
const double json_double(const rapidjson::Value &obj) noexcept
Definition: JsonAccessors.h:54
size_t branchCount() const
std::unordered_set< RexInput > RexInputSet
const RelAlgNode * getInput(const size_t idx) const
RelFilter(std::unique_ptr< const RexScalar > &filter, std::shared_ptr< const RelAlgNode > input)
std::vector< std::shared_ptr< RelAlgNode > > nodes_
std::string toString() const override
RelAggregate(const size_t groupby_count, std::vector< std::unique_ptr< const RexAgg >> &agg_exprs, const std::vector< std::string > &fields, std::shared_ptr< const RelAlgNode > input)
std::unique_ptr< const RexScalar > filter_
bool isSimple() const
RexInputReplacementVisitor(const RelAlgNode *node_to_keep, const std::vector< std::unique_ptr< const RexScalar >> &scalar_sources)
const size_t groupby_count_
RexRebindReindexInputsVisitor(const RelAlgNode *old_input, const RelAlgNode *new_input, std::unordered_map< unsigned, unsigned > old_to_new_index_map)
unsigned getId() const
const RelAlgNode * node_
HintExplained parseHintString(std::string &hint_string)
virtual void replaceInput(std::shared_ptr< const RelAlgNode > old_input, std::shared_ptr< const RelAlgNode > input)
void eachNode(std::function< void(RelAlgNode const *)> const &) const
NullSortedPosition parse_nulls_position(const rapidjson::Value &collation)
std::shared_ptr< RelFilter > dispatchFilter(const rapidjson::Value &filter_ra, RelAlgDagBuilder &root_dag_builder)
std::unique_ptr< RexAbstractInput > parse_abstract_input(const rapidjson::Value &expr) noexcept
std::vector< std::unique_ptr< const RexAgg > > agg_exprs_
std::set< std::pair< const RelAlgNode *, int > > get_equiv_cols(const RelAlgNode *node, const size_t which_col)
Hints * getDeliveredHints()
void replaceInput(std::shared_ptr< const RelAlgNode > old_input, std::shared_ptr< const RelAlgNode > input) override
std::unique_ptr< RexLiteral > parse_literal(const rapidjson::Value &expr)
std::unordered_map< std::string, HintExplained > Hints
SortDirection
const RexScalar * getProjectAt(const size_t idx) const
std::vector< std::unique_ptr< const RexAgg > > copyAggExprs(std::vector< std::unique_ptr< const RexAgg >> const &agg_exprs)
std::vector< std::shared_ptr< const RelAlgNode >> RelAlgInputs
#define CHECK_LT(x, y)
Definition: Logger.h:207
Definition: sqltypes.h:51
Definition: sqltypes.h:52
std::unique_ptr< Hints > hints_
std::unique_ptr< const RexOperator > disambiguate_operator(const RexOperator *rex_operator, const RANodeOutput &ra_output) noexcept
const ConstRexScalarPtrVector & getPartitionKeys() const
std::string tree_string(const RelAlgNode *ra, const size_t depth)
DEVICE auto lower_bound(ARGS &&...args)
Definition: gpu_enabled.h:78
#define CHECK_LE(x, y)
Definition: Logger.h:208
std::unique_ptr< Hints > hints_
const std::vector< const Rex * > target_exprs_
std::vector< ElementType >::const_iterator Super
void replaceInput(std::shared_ptr< const RelAlgNode > old_input, std::shared_ptr< const RelAlgNode > input) override
RelLogicalUnion(RelAlgInputs, bool is_all)
std::vector< std::unique_ptr< const RexAgg > > agg_exprs_
std::unique_ptr< const RexCase > disambiguate_case(const RexCase *rex_case, const RANodeOutput &ra_output)
std::shared_ptr< RelJoin > dispatchJoin(const rapidjson::Value &join_ra, RelAlgDagBuilder &root_dag_builder)
void separate_window_function_expressions(std::vector< std::shared_ptr< RelAlgNode >> &nodes)
std::unique_ptr< const RexScalar > filter_expr_
void setSourceNode(const RelAlgNode *node) const
const RexScalar * aggregateResult(const RexScalar *const &aggregate, const RexScalar *const &next_result) const final
bool hasWindowFunctionExpr() const
std::shared_ptr< RelModify > dispatchModify(const rapidjson::Value &logical_modify_ra)
virtual size_t size() const =0
const RelAlgNode * getSourceNode() const
void registerSubquery(std::shared_ptr< RexSubQuery > subquery)
void setExecutionResult(const std::shared_ptr< const ExecutionResult > result)
RelLogicalValues(const std::vector< TargetMetaInfo > &tuple_type, std::vector< RowValues > &values)
RelAlgDagBuilder()=delete
SqlWindowFunctionKind
Definition: sqldefs.h:83
HOST DEVICE int get_comp_param() const
Definition: sqltypes.h:320
void mark_nops(const std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
RexInputSet aggregateResult(const RexInputSet &aggregate, const RexInputSet &next_result) const override
Definition: sqldefs.h:53
std::shared_ptr< RelSort > dispatchSort(const rapidjson::Value &sort_ra)
RelTableFunction(const std::string &function_name, RelAlgInputs inputs, std::vector< std::string > &fields, std::vector< const Rex * > col_inputs, std::vector< std::unique_ptr< const RexScalar >> &table_func_inputs, std::vector< std::unique_ptr< const RexScalar >> &target_exprs)
const std::vector< std::string > & getFields() const
std::string getFieldName(const size_t i) const
std::vector< size_t > indices_from_json_array(const rapidjson::Value &json_idx_arr) noexcept
bool g_enable_watchdog false
Definition: Execute.cpp:76
virtual std::string toString() const =0
#define CHECK(condition)
Definition: Logger.h:197
std::unique_ptr< const RexScalar > parse_scalar_expr(const rapidjson::Value &expr, const Catalog_Namespace::Catalog &cat, RelAlgDagBuilder &root_dag_builder)
RelProject(std::vector< std::unique_ptr< const RexScalar >> &scalar_exprs, const std::vector< std::string > &fields, std::shared_ptr< const RelAlgNode > input)
unsigned node_id(const rapidjson::Value &ra_node) noexcept
RANodeOutput get_node_output(const RelAlgNode *ra_node)
std::vector< std::string > getFieldNamesFromScanNode(const rapidjson::Value &scan_ra)
void create_compound(std::vector< std::shared_ptr< RelAlgNode >> &nodes, const std::vector< size_t > &pattern) noexcept
bool g_cluster
void alterRAForRender(std::vector< std::shared_ptr< RelAlgNode >> &nodes, const RenderInfo &render_info)
std::shared_ptr< const RelAlgNode > prev(const rapidjson::Value &crt_node)
void replaceInput(std::shared_ptr< const RelAlgNode > old_input, std::shared_ptr< const RelAlgNode > input) override
std::vector< std::shared_ptr< RelAlgNode > > run(const rapidjson::Value &rels, RelAlgDagBuilder &root_dag_builder)
void getRelAlgHints(const rapidjson::Value &json_node, std::shared_ptr< RelAlgNode > node)
RelAlgDispatcher(const Catalog_Namespace::Catalog &cat)
Common Enum definitions for SQL processing.
bool is_dict_encoded_string() const
Definition: sqltypes.h:512
Definition: sqltypes.h:44
void fold_filters(std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
const TableDescriptor * getMetadataForTable(const std::string &tableName, const bool populateFragmenter=true) const
Returns a pointer to a const TableDescriptor struct matching the provided tableName.
std::vector< RexInput > RANodeOutput
std::vector< std::unique_ptr< const RexScalar >> RowValues
const size_t inputCount() const
string name
Definition: setup.py:44
void rebind_inputs_from_left_deep_join(const RexScalar *rex, const RelLeftDeepInnerJoin *left_deep_join)
std::unique_ptr< const RexSubQuery > parse_subquery(const rapidjson::Value &expr, const Catalog_Namespace::Catalog &cat, RelAlgDagBuilder &root_dag_builder)
void eliminate_dead_subqueries(std::vector< std::shared_ptr< RexSubQuery >> &subqueries, RelAlgNode const *root)
size_t size() const override
size_t operator()(const std::pair< const RelAlgNode *, int > &input_col) const
bool input_can_be_coalesced(const RelAlgNode *parent_node, const size_t index, const bool first_rex_is_input)
RelAlgInputs getRelAlgInputs(const rapidjson::Value &node)
std::shared_ptr< RelLogicalValues > dispatchLogicalValues(const rapidjson::Value &logical_values_ra)
std::shared_ptr< RelTableFunction > dispatchTableFunction(const rapidjson::Value &table_func_ra, RelAlgDagBuilder &root_dag_builder)
DEVICE void swap(ARGS &&...args)
Definition: gpu_enabled.h:114
std::unique_ptr< const RexScalar > RetType
Definition: RexVisitor.h:139
std::vector< std::unique_ptr< const RexScalar > > copyRexScalars(std::vector< std::unique_ptr< const RexScalar >> const &scalar_sources)
#define VLOG(n)
Definition: Logger.h:291
RelJoin(std::shared_ptr< const RelAlgNode > lhs, std::shared_ptr< const RelAlgNode > rhs, std::unique_ptr< const RexScalar > &condition, const JoinType join_type)
RelAlgInputs inputs_
void set_precision(int d)
Definition: sqltypes.h:404
std::string toString() const override
std::pair< std::string, std::string > getKVOptionPair(std::string &str, size_t &pos)
void eliminate_dead_columns(std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
void check_empty_inputs_field(const rapidjson::Value &node) noexcept
std::vector< const Rex * > reproject_targets(const RelProject *simple_project, const std::vector< const Rex * > &target_exprs) noexcept
bool isIdentity() const
std::vector< std::unique_ptr< const RexScalar > > target_exprs_
Hints * getDeliveredHints()
std::vector< RexInput > n_outputs(const RelAlgNode *node, const size_t n)
const bool is_agg_
void coalesce_nodes(std::vector< std::shared_ptr< RelAlgNode >> &nodes, const std::vector< const RelAlgNode * > &left_deep_joins)
const RexScalar * visitCase(const RexCase *rex_case) const final
std::string toString() const override
static void resetRelAlgFirstId() noexcept
std::string json_node_to_string(const rapidjson::Value &node) noexcept