OmniSciDB  340b00dbf6
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
RelAlgDagBuilder.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2017 MapD Technologies, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "RelAlgDagBuilder.h"
19 #include "Catalog/Catalog.h"
21 #include "JsonAccessors.h"
22 #include "RelAlgOptimizer.h"
23 #include "RelLeftDeepInnerJoin.h"
25 #include "RexVisitor.h"
26 #include "Shared/sqldefs.h"
27 
28 #include <rapidjson/error/en.h>
29 #include <rapidjson/error/error.h>
30 #include <rapidjson/stringbuffer.h>
31 #include <rapidjson/writer.h>
32 
33 #include <string>
34 #include <unordered_set>
35 
36 extern bool g_cluster;
37 extern bool g_enable_union;
38 
39 namespace {
40 
41 const unsigned FIRST_RA_NODE_ID = 1;
42 
43 } // namespace
44 
45 thread_local unsigned RelAlgNode::crt_id_ = FIRST_RA_NODE_ID;
46 
49 }
50 
52  const std::shared_ptr<const ExecutionResult> result) {
53  auto row_set = result->getRows();
54  CHECK(row_set);
55  CHECK_EQ(size_t(1), row_set->colCount());
56  *(type_.get()) = row_set->getColType(0);
57  (*(result_.get())) = result;
58 }
59 
60 std::unique_ptr<RexSubQuery> RexSubQuery::deepCopy() const {
61  return std::make_unique<RexSubQuery>(type_, result_, ra_->deepCopy());
62 }
63 
64 unsigned RexSubQuery::getId() const {
65  return ra_->getId();
66 }
67 
68 namespace {
69 
70 class RexRebindInputsVisitor : public RexVisitor<void*> {
71  public:
72  RexRebindInputsVisitor(const RelAlgNode* old_input, const RelAlgNode* new_input)
73  : old_input_(old_input), new_input_(new_input) {}
74 
75  virtual ~RexRebindInputsVisitor() = default;
76 
77  void* visitInput(const RexInput* rex_input) const override {
78  const auto old_source = rex_input->getSourceNode();
79  if (old_source == old_input_) {
80  const auto left_deep_join = dynamic_cast<const RelLeftDeepInnerJoin*>(new_input_);
81  if (left_deep_join) {
82  rebind_inputs_from_left_deep_join(rex_input, left_deep_join);
83  return nullptr;
84  }
85  rex_input->setSourceNode(new_input_);
86  }
87  return nullptr;
88  };
89 
90  private:
91  const RelAlgNode* old_input_;
93 };
94 
95 // Creates an output with n columns.
96 std::vector<RexInput> n_outputs(const RelAlgNode* node, const size_t n) {
97  std::vector<RexInput> outputs;
98  outputs.reserve(n);
99  for (size_t i = 0; i < n; ++i) {
100  outputs.emplace_back(node, i);
101  }
102  return outputs;
103 }
104 
106  public:
108  const RelAlgNode* old_input,
109  const RelAlgNode* new_input,
110  std::unordered_map<unsigned, unsigned> old_to_new_index_map)
111  : RexRebindInputsVisitor(old_input, new_input), mapping_(old_to_new_index_map) {}
112 
113  void* visitInput(const RexInput* rex_input) const override {
114  RexRebindInputsVisitor::visitInput(rex_input);
115  auto mapping_itr = mapping_.find(rex_input->getIndex());
116  CHECK(mapping_itr != mapping_.end());
117  rex_input->setIndex(mapping_itr->second);
118  return nullptr;
119  }
120 
121  private:
122  const std::unordered_map<unsigned, unsigned> mapping_;
123 };
124 
125 } // namespace
126 
128  std::shared_ptr<const RelAlgNode> old_input,
129  std::shared_ptr<const RelAlgNode> input,
130  std::optional<std::unordered_map<unsigned, unsigned>> old_to_new_index_map) {
131  RelAlgNode::replaceInput(old_input, input);
132  std::unique_ptr<RexRebindInputsVisitor> rebind_inputs;
133  if (old_to_new_index_map) {
134  rebind_inputs = std::make_unique<RexRebindReindexInputsVisitor>(
135  old_input.get(), input.get(), *old_to_new_index_map);
136  } else {
137  rebind_inputs =
138  std::make_unique<RexRebindInputsVisitor>(old_input.get(), input.get());
139  }
140  CHECK(rebind_inputs);
141  for (const auto& scalar_expr : scalar_exprs_) {
142  rebind_inputs->visit(scalar_expr.get());
143  }
144 }
145 
146 void RelProject::appendInput(std::string new_field_name,
147  std::unique_ptr<const RexScalar> new_input) {
148  fields_.emplace_back(std::move(new_field_name));
149  scalar_exprs_.emplace_back(std::move(new_input));
150 }
151 
153  const auto scan_node = dynamic_cast<const RelScan*>(ra_node);
154  if (scan_node) {
155  // Scan node has no inputs, output contains all columns in the table.
156  CHECK_EQ(size_t(0), scan_node->inputCount());
157  return n_outputs(scan_node, scan_node->size());
158  }
159  const auto project_node = dynamic_cast<const RelProject*>(ra_node);
160  if (project_node) {
161  // Project output count doesn't depend on the input
162  CHECK_EQ(size_t(1), project_node->inputCount());
163  return n_outputs(project_node, project_node->size());
164  }
165  const auto filter_node = dynamic_cast<const RelFilter*>(ra_node);
166  if (filter_node) {
167  // Filter preserves shape
168  CHECK_EQ(size_t(1), filter_node->inputCount());
169  const auto prev_out = get_node_output(filter_node->getInput(0));
170  return n_outputs(filter_node, prev_out.size());
171  }
172  const auto aggregate_node = dynamic_cast<const RelAggregate*>(ra_node);
173  if (aggregate_node) {
174  // Aggregate output count doesn't depend on the input
175  CHECK_EQ(size_t(1), aggregate_node->inputCount());
176  return n_outputs(aggregate_node, aggregate_node->size());
177  }
178  const auto compound_node = dynamic_cast<const RelCompound*>(ra_node);
179  if (compound_node) {
180  // Compound output count doesn't depend on the input
181  CHECK_EQ(size_t(1), compound_node->inputCount());
182  return n_outputs(compound_node, compound_node->size());
183  }
184  const auto join_node = dynamic_cast<const RelJoin*>(ra_node);
185  if (join_node) {
186  // Join concatenates the outputs from the inputs and the output
187  // directly references the nodes in the input.
188  CHECK_EQ(size_t(2), join_node->inputCount());
189  auto lhs_out =
190  n_outputs(join_node->getInput(0), get_node_output(join_node->getInput(0)).size());
191  const auto rhs_out =
192  n_outputs(join_node->getInput(1), get_node_output(join_node->getInput(1)).size());
193  lhs_out.insert(lhs_out.end(), rhs_out.begin(), rhs_out.end());
194  return lhs_out;
195  }
196  const auto table_func_node = dynamic_cast<const RelTableFunction*>(ra_node);
197  if (table_func_node) {
198  // Table Function output count doesn't depend on the input
199  return n_outputs(table_func_node, table_func_node->size());
200  }
201  const auto sort_node = dynamic_cast<const RelSort*>(ra_node);
202  if (sort_node) {
203  // Sort preserves shape
204  CHECK_EQ(size_t(1), sort_node->inputCount());
205  const auto prev_out = get_node_output(sort_node->getInput(0));
206  return n_outputs(sort_node, prev_out.size());
207  }
208  const auto logical_values_node = dynamic_cast<const RelLogicalValues*>(ra_node);
209  if (logical_values_node) {
210  CHECK_EQ(size_t(0), logical_values_node->inputCount());
211  return n_outputs(logical_values_node, logical_values_node->size());
212  }
213  const auto logical_union_node = dynamic_cast<const RelLogicalUnion*>(ra_node);
214  if (logical_union_node) {
215  return n_outputs(logical_union_node, logical_union_node->size());
216  }
217  LOG(FATAL) << "Unhandled ra_node type: " << ra_node->toString();
218  return {};
219 }
220 
222  if (!isSimple()) {
223  return false;
224  }
225  CHECK_EQ(size_t(1), inputCount());
226  const auto source = getInput(0);
227  if (dynamic_cast<const RelJoin*>(source)) {
228  return false;
229  }
230  const auto source_shape = get_node_output(source);
231  if (source_shape.size() != scalar_exprs_.size()) {
232  return false;
233  }
234  for (size_t i = 0; i < scalar_exprs_.size(); ++i) {
235  const auto& scalar_expr = scalar_exprs_[i];
236  const auto input = dynamic_cast<const RexInput*>(scalar_expr.get());
237  CHECK(input);
238  CHECK_EQ(source, input->getSourceNode());
239  // We should add the additional check that input->getIndex() !=
240  // source_shape[i].getIndex(), but Calcite doesn't generate the right
241  // Sort-Project-Sort sequence when joins are involved.
242  if (input->getSourceNode() != source_shape[i].getSourceNode()) {
243  return false;
244  }
245  }
246  return true;
247 }
248 
249 namespace {
250 
251 bool isRenamedInput(const RelAlgNode* node,
252  const size_t index,
253  const std::string& new_name) {
254  CHECK_LT(index, node->size());
255  if (auto join = dynamic_cast<const RelJoin*>(node)) {
256  CHECK_EQ(size_t(2), join->inputCount());
257  const auto lhs_size = join->getInput(0)->size();
258  if (index < lhs_size) {
259  return isRenamedInput(join->getInput(0), index, new_name);
260  }
261  CHECK_GE(index, lhs_size);
262  return isRenamedInput(join->getInput(1), index - lhs_size, new_name);
263  }
264 
265  if (auto scan = dynamic_cast<const RelScan*>(node)) {
266  return new_name != scan->getFieldName(index);
267  }
268 
269  if (auto aggregate = dynamic_cast<const RelAggregate*>(node)) {
270  return new_name != aggregate->getFieldName(index);
271  }
272 
273  if (auto project = dynamic_cast<const RelProject*>(node)) {
274  return new_name != project->getFieldName(index);
275  }
276 
277  if (auto table_func = dynamic_cast<const RelTableFunction*>(node)) {
278  return new_name != table_func->getFieldName(index);
279  }
280 
281  if (auto logical_values = dynamic_cast<const RelLogicalValues*>(node)) {
282  const auto& tuple_type = logical_values->getTupleType();
283  CHECK_LT(index, tuple_type.size());
284  return new_name != tuple_type[index].get_resname();
285  }
286 
287  CHECK(dynamic_cast<const RelSort*>(node) || dynamic_cast<const RelFilter*>(node) ||
288  dynamic_cast<const RelLogicalUnion*>(node));
289  return isRenamedInput(node->getInput(0), index, new_name);
290 }
291 
292 } // namespace
293 
295  if (!isSimple()) {
296  return false;
297  }
298  CHECK_EQ(scalar_exprs_.size(), fields_.size());
299  for (size_t i = 0; i < fields_.size(); ++i) {
300  auto rex_in = dynamic_cast<const RexInput*>(scalar_exprs_[i].get());
301  CHECK(rex_in);
302  if (isRenamedInput(rex_in->getSourceNode(), rex_in->getIndex(), fields_[i])) {
303  return true;
304  }
305  }
306  return false;
307 }
308 
309 void RelJoin::replaceInput(std::shared_ptr<const RelAlgNode> old_input,
310  std::shared_ptr<const RelAlgNode> input) {
311  RelAlgNode::replaceInput(old_input, input);
312  RexRebindInputsVisitor rebind_inputs(old_input.get(), input.get());
313  if (condition_) {
314  rebind_inputs.visit(condition_.get());
315  }
316 }
317 
318 void RelFilter::replaceInput(std::shared_ptr<const RelAlgNode> old_input,
319  std::shared_ptr<const RelAlgNode> input) {
320  RelAlgNode::replaceInput(old_input, input);
321  RexRebindInputsVisitor rebind_inputs(old_input.get(), input.get());
322  rebind_inputs.visit(filter_.get());
323 }
324 
325 void RelCompound::replaceInput(std::shared_ptr<const RelAlgNode> old_input,
326  std::shared_ptr<const RelAlgNode> input) {
327  RelAlgNode::replaceInput(old_input, input);
328  RexRebindInputsVisitor rebind_inputs(old_input.get(), input.get());
329  for (const auto& scalar_source : scalar_sources_) {
330  rebind_inputs.visit(scalar_source.get());
331  }
332  if (filter_expr_) {
333  rebind_inputs.visit(filter_expr_.get());
334  }
335 }
336 
338  : RelAlgNode(rhs)
340  , fields_(rhs.fields_)
341  , hint_applied_(false)
342  , hints_(std::make_unique<Hints>()) {
343  RexDeepCopyVisitor copier;
344  for (auto const& expr : rhs.scalar_exprs_) {
345  scalar_exprs_.push_back(copier.visit(expr.get()));
346  }
347  if (rhs.hint_applied_) {
348  for (auto const& kv : *rhs.hints_) {
349  addHint(kv.second);
350  }
351  }
352 }
353 
355  : RelAlgNode(rhs)
356  , tuple_type_(rhs.tuple_type_)
357  , values_(RexDeepCopyVisitor::copy(rhs.values_)) {}
358 
360  RexDeepCopyVisitor copier;
361  filter_ = copier.visit(rhs.filter_.get());
362 }
363 
365  : RelAlgNode(rhs)
366  , groupby_count_(rhs.groupby_count_)
367  , fields_(rhs.fields_)
368  , hint_applied_(false)
369  , hints_(std::make_unique<Hints>()) {
370  agg_exprs_.reserve(rhs.agg_exprs_.size());
371  for (auto const& agg : rhs.agg_exprs_) {
372  agg_exprs_.push_back(agg->deepCopy());
373  }
374  if (rhs.hint_applied_) {
375  for (auto const& kv : *rhs.hints_) {
376  addHint(kv.second);
377  }
378  }
379 }
380 
382  : RelAlgNode(rhs)
383  , join_type_(rhs.join_type_)
384  , hint_applied_(false)
385  , hints_(std::make_unique<Hints>()) {
386  RexDeepCopyVisitor copier;
387  condition_ = copier.visit(rhs.condition_.get());
388  if (rhs.hint_applied_) {
389  for (auto const& kv : *rhs.hints_) {
390  addHint(kv.second);
391  }
392  }
393 }
394 
395 namespace {
396 
397 std::vector<std::unique_ptr<const RexAgg>> copyAggExprs(
398  std::vector<std::unique_ptr<const RexAgg>> const& agg_exprs) {
399  std::vector<std::unique_ptr<const RexAgg>> agg_exprs_copy;
400  agg_exprs_copy.reserve(agg_exprs.size());
401  for (auto const& agg_expr : agg_exprs) {
402  agg_exprs_copy.push_back(agg_expr->deepCopy());
403  }
404  return agg_exprs_copy;
405 }
406 
407 std::vector<std::unique_ptr<const RexScalar>> copyRexScalars(
408  std::vector<std::unique_ptr<const RexScalar>> const& scalar_sources) {
409  std::vector<std::unique_ptr<const RexScalar>> scalar_sources_copy;
410  scalar_sources_copy.reserve(scalar_sources.size());
411  RexDeepCopyVisitor copier;
412  for (auto const& scalar_source : scalar_sources) {
413  scalar_sources_copy.push_back(copier.visit(scalar_source.get()));
414  }
415  return scalar_sources_copy;
416 }
417 
418 std::vector<const Rex*> remapTargetPointers(
419  std::vector<std::unique_ptr<const RexAgg>> const& agg_exprs_new,
420  std::vector<std::unique_ptr<const RexScalar>> const& scalar_sources_new,
421  std::vector<std::unique_ptr<const RexAgg>> const& agg_exprs_old,
422  std::vector<std::unique_ptr<const RexScalar>> const& scalar_sources_old,
423  std::vector<const Rex*> const& target_exprs_old) {
424  std::vector<const Rex*> target_exprs(target_exprs_old);
425  std::unordered_map<const Rex*, const Rex*> old_to_new_target(target_exprs.size());
426  for (size_t i = 0; i < agg_exprs_new.size(); ++i) {
427  old_to_new_target.emplace(agg_exprs_old[i].get(), agg_exprs_new[i].get());
428  }
429  for (size_t i = 0; i < scalar_sources_new.size(); ++i) {
430  old_to_new_target.emplace(scalar_sources_old[i].get(), scalar_sources_new[i].get());
431  }
432  for (auto& target : target_exprs) {
433  auto target_it = old_to_new_target.find(target);
434  CHECK(target_it != old_to_new_target.end());
435  target = target_it->second;
436  }
437  return target_exprs;
438 }
439 
440 } // namespace
441 
443  : RelAlgNode(rhs)
445  , groupby_count_(rhs.groupby_count_)
446  , agg_exprs_(copyAggExprs(rhs.agg_exprs_))
447  , fields_(rhs.fields_)
448  , is_agg_(rhs.is_agg_)
449  , scalar_sources_(copyRexScalars(rhs.scalar_sources_))
450  , target_exprs_(remapTargetPointers(agg_exprs_,
451  scalar_sources_,
452  rhs.agg_exprs_,
453  rhs.scalar_sources_,
454  rhs.target_exprs_))
455  , hint_applied_(false)
456  , hints_(std::make_unique<Hints>()) {
457  RexDeepCopyVisitor copier;
458  filter_expr_ = rhs.filter_expr_ ? copier.visit(rhs.filter_expr_.get()) : nullptr;
459  if (rhs.hint_applied_) {
460  for (auto const& kv : *rhs.hints_) {
461  addHint(kv.second);
462  }
463  }
464 }
465 
466 void RelTableFunction::replaceInput(std::shared_ptr<const RelAlgNode> old_input,
467  std::shared_ptr<const RelAlgNode> input) {
468  RelAlgNode::replaceInput(old_input, input);
469  RexRebindInputsVisitor rebind_inputs(old_input.get(), input.get());
470  for (const auto& target_expr : target_exprs_) {
471  rebind_inputs.visit(target_expr.get());
472  }
473  for (const auto& func_input : table_func_inputs_) {
474  rebind_inputs.visit(func_input.get());
475  }
476 }
477 
479  : RelAlgNode(rhs)
480  , function_name_(rhs.function_name_)
481  , fields_(rhs.fields_)
482  , col_inputs_(rhs.col_inputs_)
483  , table_func_inputs_(copyRexScalars(rhs.table_func_inputs_))
484  , target_exprs_(copyRexScalars(rhs.target_exprs_)) {
485  std::unordered_map<const Rex*, const Rex*> old_to_new_input;
486  for (size_t i = 0; i < table_func_inputs_.size(); ++i) {
487  old_to_new_input.emplace(rhs.table_func_inputs_[i].get(),
488  table_func_inputs_[i].get());
489  }
490  for (auto& target : col_inputs_) {
491  auto target_it = old_to_new_input.find(target);
492  CHECK(target_it != old_to_new_input.end());
493  target = target_it->second;
494  }
495 }
496 
497 namespace std {
498 template <>
499 struct hash<std::pair<const RelAlgNode*, int>> {
500  size_t operator()(const std::pair<const RelAlgNode*, int>& input_col) const {
501  auto ptr_val = reinterpret_cast<const int64_t*>(&input_col.first);
502  return static_cast<int64_t>(*ptr_val) ^ input_col.second;
503  }
504 };
505 } // namespace std
506 
507 namespace {
508 
509 std::set<std::pair<const RelAlgNode*, int>> get_equiv_cols(const RelAlgNode* node,
510  const size_t which_col) {
511  std::set<std::pair<const RelAlgNode*, int>> work_set;
512  auto walker = node;
513  auto curr_col = which_col;
514  while (true) {
515  work_set.insert(std::make_pair(walker, curr_col));
516  if (dynamic_cast<const RelScan*>(walker) || dynamic_cast<const RelJoin*>(walker)) {
517  break;
518  }
519  CHECK_EQ(size_t(1), walker->inputCount());
520  auto only_source = walker->getInput(0);
521  if (auto project = dynamic_cast<const RelProject*>(walker)) {
522  if (auto input = dynamic_cast<const RexInput*>(project->getProjectAt(curr_col))) {
523  const auto join_source = dynamic_cast<const RelJoin*>(only_source);
524  if (join_source) {
525  CHECK_EQ(size_t(2), join_source->inputCount());
526  auto lhs = join_source->getInput(0);
527  CHECK((input->getIndex() < lhs->size() && lhs == input->getSourceNode()) ||
528  join_source->getInput(1) == input->getSourceNode());
529  } else {
530  CHECK_EQ(input->getSourceNode(), only_source);
531  }
532  curr_col = input->getIndex();
533  } else {
534  break;
535  }
536  } else if (auto aggregate = dynamic_cast<const RelAggregate*>(walker)) {
537  if (curr_col >= aggregate->getGroupByCount()) {
538  break;
539  }
540  }
541  walker = only_source;
542  }
543  return work_set;
544 }
545 
546 } // namespace
547 
548 bool RelSort::hasEquivCollationOf(const RelSort& that) const {
549  if (collation_.size() != that.collation_.size()) {
550  return false;
551  }
552 
553  for (size_t i = 0, e = collation_.size(); i < e; ++i) {
554  auto this_sort_key = collation_[i];
555  auto that_sort_key = that.collation_[i];
556  if (this_sort_key.getSortDir() != that_sort_key.getSortDir()) {
557  return false;
558  }
559  if (this_sort_key.getNullsPosition() != that_sort_key.getNullsPosition()) {
560  return false;
561  }
562  auto this_equiv_keys = get_equiv_cols(this, this_sort_key.getField());
563  auto that_equiv_keys = get_equiv_cols(&that, that_sort_key.getField());
564  std::vector<std::pair<const RelAlgNode*, int>> intersect;
565  std::set_intersection(this_equiv_keys.begin(),
566  this_equiv_keys.end(),
567  that_equiv_keys.begin(),
568  that_equiv_keys.end(),
569  std::back_inserter(intersect));
570  if (intersect.empty()) {
571  return false;
572  }
573  }
574  return true;
575 }
576 
577 // class RelLogicalUnion methods
578 
580  : RelAlgNode(std::move(inputs)), is_all_(is_all) {
581  if (!g_enable_union) {
582  throw QueryNotSupported(
583  "UNION is not supported yet. There is an experimental enable-union option "
584  "available to enable UNION ALL queries.");
585  }
586  CHECK_EQ(2u, inputs_.size());
587  if (!is_all_) {
588  throw QueryNotSupported("UNION without ALL is not supported yet.");
589  }
590 }
591 
592 size_t RelLogicalUnion::size() const {
593  return inputs_.front()->size();
594 }
595 
596 std::string RelLogicalUnion::toString() const {
597  return cat(::typeName(this), "(is_all(", is_all_, "))");
598 }
599 
600 std::string RelLogicalUnion::getFieldName(const size_t i) const {
601  if (auto const* input = dynamic_cast<RelCompound const*>(inputs_[0].get())) {
602  return input->getFieldName(i);
603  } else if (auto const* input = dynamic_cast<RelProject const*>(inputs_[0].get())) {
604  return input->getFieldName(i);
605  } else if (auto const* input = dynamic_cast<RelLogicalUnion const*>(inputs_[0].get())) {
606  return input->getFieldName(i);
607  } else if (auto const* input = dynamic_cast<RelAggregate const*>(inputs_[0].get())) {
608  return input->getFieldName(i);
609  } else if (auto const* input = dynamic_cast<RelScan const*>(inputs_[0].get())) {
610  return input->getFieldName(i);
611  } else if (auto const* input =
612  dynamic_cast<RelTableFunction const*>(inputs_[0].get())) {
613  return input->getFieldName(i);
614  }
615  UNREACHABLE() << "Unhandled input type: " << inputs_.front()->toString();
616  return {};
617 }
618 
620  std::vector<TargetMetaInfo> const& tmis0 = inputs_[0]->getOutputMetainfo();
621  std::vector<TargetMetaInfo> const& tmis1 = inputs_[1]->getOutputMetainfo();
622  if (tmis0.size() != tmis1.size()) {
623  VLOG(2) << "tmis0.size() = " << tmis0.size() << " != " << tmis1.size()
624  << " = tmis1.size()";
625  throw std::runtime_error("Subqueries of a UNION must have matching data types.");
626  }
627  for (size_t i = 0; i < tmis0.size(); ++i) {
628  if (tmis0[i].get_type_info() != tmis1[i].get_type_info()) {
629  SQLTypeInfo const& ti0 = tmis0[i].get_type_info();
630  SQLTypeInfo const& ti1 = tmis1[i].get_type_info();
631  VLOG(2) << "Types do not match for UNION:\n tmis0[" << i
632  << "].get_type_info().to_string() = " << ti0.to_string() << "\n tmis1["
633  << i << "].get_type_info().to_string() = " << ti1.to_string();
634  if (ti0.is_dict_encoded_string() && ti1.is_dict_encoded_string() &&
635  ti0.get_comp_param() != ti1.get_comp_param()) {
636  throw std::runtime_error(
637  "Taking the UNION of different text-encoded dictionaries is not yet "
638  "supported. This may be resolved by using shared dictionaries. For example, "
639  "by making one a shared dictionary reference to the other.");
640  } else {
641  throw std::runtime_error(
642  "Subqueries of a UNION must have the exact same data types.");
643  }
644  }
645  }
646 }
647 
648 // Rest of code requires a raw pointer, but RexInput object needs to live somewhere.
650  size_t input_idx) const {
651  if (auto const* rex_input_ptr = dynamic_cast<RexInput const*>(rex_scalar)) {
652  RexInput rex_input(*rex_input_ptr);
653  rex_input.setSourceNode(getInput(input_idx));
654  scalar_exprs_.emplace_back(std::make_shared<RexInput const>(std::move(rex_input)));
655  return scalar_exprs_.back().get();
656  }
657  return rex_scalar;
658 }
659 
660 namespace {
661 
662 unsigned node_id(const rapidjson::Value& ra_node) noexcept {
663  const auto& id = field(ra_node, "id");
664  return std::stoi(json_str(id));
665 }
666 
667 std::string json_node_to_string(const rapidjson::Value& node) noexcept {
668  rapidjson::StringBuffer buffer;
669  rapidjson::Writer<rapidjson::StringBuffer> writer(buffer);
670  node.Accept(writer);
671  return buffer.GetString();
672 }
673 
674 // The parse_* functions below de-serialize expressions as they come from Calcite.
675 // RelAlgDagBuilder will take care of making the representation easy to
676 // navigate for lower layers, for example by replacing RexAbstractInput with RexInput.
677 
678 std::unique_ptr<RexAbstractInput> parse_abstract_input(
679  const rapidjson::Value& expr) noexcept {
680  const auto& input = field(expr, "input");
681  return std::unique_ptr<RexAbstractInput>(new RexAbstractInput(json_i64(input)));
682 }
683 
684 std::unique_ptr<RexLiteral> parse_literal(const rapidjson::Value& expr) {
685  CHECK(expr.IsObject());
686  const auto& literal = field(expr, "literal");
687  const auto type = to_sql_type(json_str(field(expr, "type")));
688  const auto target_type = to_sql_type(json_str(field(expr, "target_type")));
689  const auto scale = json_i64(field(expr, "scale"));
690  const auto precision = json_i64(field(expr, "precision"));
691  const auto type_scale = json_i64(field(expr, "type_scale"));
692  const auto type_precision = json_i64(field(expr, "type_precision"));
693  if (literal.IsNull()) {
694  return std::unique_ptr<RexLiteral>(new RexLiteral(target_type));
695  }
696  switch (type) {
697  case kDECIMAL:
698  case kINTERVAL_DAY_TIME:
700  case kTIME:
701  case kTIMESTAMP:
702  case kDATE:
703  return std::unique_ptr<RexLiteral>(new RexLiteral(json_i64(literal),
704  type,
705  target_type,
706  scale,
707  precision,
708  type_scale,
709  type_precision));
710  case kDOUBLE: {
711  if (literal.IsDouble()) {
712  return std::unique_ptr<RexLiteral>(new RexLiteral(json_double(literal),
713  type,
714  target_type,
715  scale,
716  precision,
717  type_scale,
718  type_precision));
719  }
720  CHECK(literal.IsInt64());
721  return std::unique_ptr<RexLiteral>(
722  new RexLiteral(static_cast<double>(json_i64(literal)),
723  type,
724  target_type,
725  scale,
726  precision,
727  type_scale,
728  type_precision));
729  }
730  case kTEXT:
731  return std::unique_ptr<RexLiteral>(new RexLiteral(json_str(literal),
732  type,
733  target_type,
734  scale,
735  precision,
736  type_scale,
737  type_precision));
738  case kBOOLEAN:
739  return std::unique_ptr<RexLiteral>(new RexLiteral(json_bool(literal),
740  type,
741  target_type,
742  scale,
743  precision,
744  type_scale,
745  type_precision));
746  case kNULLT:
747  return std::unique_ptr<RexLiteral>(new RexLiteral(target_type));
748  default:
749  CHECK(false);
750  }
751  CHECK(false);
752  return nullptr;
753 }
754 
755 std::unique_ptr<const RexScalar> parse_scalar_expr(const rapidjson::Value& expr,
757  RelAlgDagBuilder& root_dag_builder);
758 
759 SQLTypeInfo parse_type(const rapidjson::Value& type_obj) {
760  if (type_obj.IsArray()) {
761  throw QueryNotSupported("Composite types are not currently supported.");
762  }
763  CHECK(type_obj.IsObject() && type_obj.MemberCount() >= 2)
764  << json_node_to_string(type_obj);
765  const auto type = to_sql_type(json_str(field(type_obj, "type")));
766  const auto nullable = json_bool(field(type_obj, "nullable"));
767  const auto precision_it = type_obj.FindMember("precision");
768  const int precision =
769  precision_it != type_obj.MemberEnd() ? json_i64(precision_it->value) : 0;
770  const auto scale_it = type_obj.FindMember("scale");
771  const int scale = scale_it != type_obj.MemberEnd() ? json_i64(scale_it->value) : 0;
772  SQLTypeInfo ti(type, !nullable);
773  ti.set_precision(precision);
774  ti.set_scale(scale);
775  return ti;
776 }
777 
778 std::vector<std::unique_ptr<const RexScalar>> parse_expr_array(
779  const rapidjson::Value& arr,
781  RelAlgDagBuilder& root_dag_builder) {
782  std::vector<std::unique_ptr<const RexScalar>> exprs;
783  for (auto it = arr.Begin(); it != arr.End(); ++it) {
784  exprs.emplace_back(parse_scalar_expr(*it, cat, root_dag_builder));
785  }
786  return exprs;
787 }
788 
790  if (name == "ROW_NUMBER") {
792  }
793  if (name == "RANK") {
795  }
796  if (name == "DENSE_RANK") {
798  }
799  if (name == "PERCENT_RANK") {
801  }
802  if (name == "CUME_DIST") {
804  }
805  if (name == "NTILE") {
807  }
808  if (name == "LAG") {
810  }
811  if (name == "LEAD") {
813  }
814  if (name == "FIRST_VALUE") {
816  }
817  if (name == "LAST_VALUE") {
819  }
820  if (name == "AVG") {
822  }
823  if (name == "MIN") {
825  }
826  if (name == "MAX") {
828  }
829  if (name == "SUM") {
831  }
832  if (name == "COUNT") {
834  }
835  if (name == "$SUM0") {
837  }
838  throw std::runtime_error("Unsupported window function: " + name);
839 }
840 
841 std::vector<std::unique_ptr<const RexScalar>> parse_window_order_exprs(
842  const rapidjson::Value& arr,
844  RelAlgDagBuilder& root_dag_builder) {
845  std::vector<std::unique_ptr<const RexScalar>> exprs;
846  for (auto it = arr.Begin(); it != arr.End(); ++it) {
847  exprs.emplace_back(parse_scalar_expr(field(*it, "field"), cat, root_dag_builder));
848  }
849  return exprs;
850 }
851 
852 SortDirection parse_sort_direction(const rapidjson::Value& collation) {
853  return json_str(field(collation, "direction")) == std::string("DESCENDING")
856 }
857 
858 NullSortedPosition parse_nulls_position(const rapidjson::Value& collation) {
859  return json_str(field(collation, "nulls")) == std::string("FIRST")
862 }
863 
864 std::vector<SortField> parse_window_order_collation(const rapidjson::Value& arr,
866  RelAlgDagBuilder& root_dag_builder) {
867  std::vector<SortField> collation;
868  size_t field_idx = 0;
869  for (auto it = arr.Begin(); it != arr.End(); ++it, ++field_idx) {
870  const auto sort_dir = parse_sort_direction(*it);
871  const auto null_pos = parse_nulls_position(*it);
872  collation.emplace_back(field_idx, sort_dir, null_pos);
873  }
874  return collation;
875 }
876 
878  const rapidjson::Value& window_bound_obj,
880  RelAlgDagBuilder& root_dag_builder) {
881  CHECK(window_bound_obj.IsObject());
883  window_bound.unbounded = json_bool(field(window_bound_obj, "unbounded"));
884  window_bound.preceding = json_bool(field(window_bound_obj, "preceding"));
885  window_bound.following = json_bool(field(window_bound_obj, "following"));
886  window_bound.is_current_row = json_bool(field(window_bound_obj, "is_current_row"));
887  const auto& offset_field = field(window_bound_obj, "offset");
888  if (offset_field.IsObject()) {
889  window_bound.offset = parse_scalar_expr(offset_field, cat, root_dag_builder);
890  } else {
891  CHECK(offset_field.IsNull());
892  }
893  window_bound.order_key = json_i64(field(window_bound_obj, "order_key"));
894  return window_bound;
895 }
896 
897 std::unique_ptr<const RexSubQuery> parse_subquery(const rapidjson::Value& expr,
899  RelAlgDagBuilder& root_dag_builder) {
900  const auto& operands = field(expr, "operands");
901  CHECK(operands.IsArray());
902  CHECK_GE(operands.Size(), unsigned(0));
903  const auto& subquery_ast = field(expr, "subquery");
904 
905  RelAlgDagBuilder subquery_dag(root_dag_builder, subquery_ast, cat, nullptr);
906  auto subquery = std::make_shared<RexSubQuery>(subquery_dag.getRootNodeShPtr());
907  root_dag_builder.registerSubquery(subquery);
908  return subquery->deepCopy();
909 }
910 
911 std::unique_ptr<RexOperator> parse_operator(const rapidjson::Value& expr,
913  RelAlgDagBuilder& root_dag_builder) {
914  const auto op_name = json_str(field(expr, "op"));
915  const bool is_quantifier =
916  op_name == std::string("PG_ANY") || op_name == std::string("PG_ALL");
917  const auto op = is_quantifier ? kFUNCTION : to_sql_op(op_name);
918  const auto& operators_json_arr = field(expr, "operands");
919  CHECK(operators_json_arr.IsArray());
920  auto operands = parse_expr_array(operators_json_arr, cat, root_dag_builder);
921  const auto type_it = expr.FindMember("type");
922  CHECK(type_it != expr.MemberEnd());
923  auto ti = parse_type(type_it->value);
924  if (op == kIN && expr.HasMember("subquery")) {
925  auto subquery = parse_subquery(expr, cat, root_dag_builder);
926  operands.emplace_back(std::move(subquery));
927  }
928  if (expr.FindMember("partition_keys") != expr.MemberEnd()) {
929  const auto& partition_keys_arr = field(expr, "partition_keys");
930  auto partition_keys = parse_expr_array(partition_keys_arr, cat, root_dag_builder);
931  const auto& order_keys_arr = field(expr, "order_keys");
932  auto order_keys = parse_window_order_exprs(order_keys_arr, cat, root_dag_builder);
933  const auto collation =
934  parse_window_order_collation(order_keys_arr, cat, root_dag_builder);
935  const auto kind = parse_window_function_kind(op_name);
936  const auto lower_bound =
937  parse_window_bound(field(expr, "lower_bound"), cat, root_dag_builder);
938  const auto upper_bound =
939  parse_window_bound(field(expr, "upper_bound"), cat, root_dag_builder);
940  bool is_rows = json_bool(field(expr, "is_rows"));
941  ti.set_notnull(false);
942  return std::make_unique<RexWindowFunctionOperator>(kind,
943  operands,
944  partition_keys,
945  order_keys,
946  collation,
947  lower_bound,
948  upper_bound,
949  is_rows,
950  ti);
951  }
952  return std::unique_ptr<RexOperator>(op == kFUNCTION
953  ? new RexFunctionOperator(op_name, operands, ti)
954  : new RexOperator(op, operands, ti));
955 }
956 
957 std::unique_ptr<RexCase> parse_case(const rapidjson::Value& expr,
959  RelAlgDagBuilder& root_dag_builder) {
960  const auto& operands = field(expr, "operands");
961  CHECK(operands.IsArray());
962  CHECK_GE(operands.Size(), unsigned(2));
963  std::unique_ptr<const RexScalar> else_expr;
964  std::vector<
965  std::pair<std::unique_ptr<const RexScalar>, std::unique_ptr<const RexScalar>>>
966  expr_pair_list;
967  for (auto operands_it = operands.Begin(); operands_it != operands.End();) {
968  auto when_expr = parse_scalar_expr(*operands_it++, cat, root_dag_builder);
969  if (operands_it == operands.End()) {
970  else_expr = std::move(when_expr);
971  break;
972  }
973  auto then_expr = parse_scalar_expr(*operands_it++, cat, root_dag_builder);
974  expr_pair_list.emplace_back(std::move(when_expr), std::move(then_expr));
975  }
976  return std::unique_ptr<RexCase>(new RexCase(expr_pair_list, else_expr));
977 }
978 
979 std::vector<std::string> strings_from_json_array(
980  const rapidjson::Value& json_str_arr) noexcept {
981  CHECK(json_str_arr.IsArray());
982  std::vector<std::string> fields;
983  for (auto json_str_arr_it = json_str_arr.Begin(); json_str_arr_it != json_str_arr.End();
984  ++json_str_arr_it) {
985  CHECK(json_str_arr_it->IsString());
986  fields.emplace_back(json_str_arr_it->GetString());
987  }
988  return fields;
989 }
990 
991 std::vector<size_t> indices_from_json_array(
992  const rapidjson::Value& json_idx_arr) noexcept {
993  CHECK(json_idx_arr.IsArray());
994  std::vector<size_t> indices;
995  for (auto json_idx_arr_it = json_idx_arr.Begin(); json_idx_arr_it != json_idx_arr.End();
996  ++json_idx_arr_it) {
997  CHECK(json_idx_arr_it->IsInt());
998  CHECK_GE(json_idx_arr_it->GetInt(), 0);
999  indices.emplace_back(json_idx_arr_it->GetInt());
1000  }
1001  return indices;
1002 }
1003 
1004 std::unique_ptr<const RexAgg> parse_aggregate_expr(const rapidjson::Value& expr) {
1005  const auto agg = to_agg_kind(json_str(field(expr, "agg")));
1006  const auto distinct = json_bool(field(expr, "distinct"));
1007  const auto agg_ti = parse_type(field(expr, "type"));
1008  const auto operands = indices_from_json_array(field(expr, "operands"));
1009  if (operands.size() > 1 && (operands.size() != 2 || agg != kAPPROX_COUNT_DISTINCT)) {
1010  throw QueryNotSupported("Multiple arguments for aggregates aren't supported");
1011  }
1012  return std::unique_ptr<const RexAgg>(new RexAgg(agg, distinct, agg_ti, operands));
1013 }
1014 
1015 std::unique_ptr<const RexScalar> parse_scalar_expr(const rapidjson::Value& expr,
1017  RelAlgDagBuilder& root_dag_builder) {
1018  CHECK(expr.IsObject());
1019  if (expr.IsObject() && expr.HasMember("input")) {
1020  return std::unique_ptr<const RexScalar>(parse_abstract_input(expr));
1021  }
1022  if (expr.IsObject() && expr.HasMember("literal")) {
1023  return std::unique_ptr<const RexScalar>(parse_literal(expr));
1024  }
1025  if (expr.IsObject() && expr.HasMember("op")) {
1026  const auto op_str = json_str(field(expr, "op"));
1027  if (op_str == std::string("CASE")) {
1028  return std::unique_ptr<const RexScalar>(parse_case(expr, cat, root_dag_builder));
1029  }
1030  if (op_str == std::string("$SCALAR_QUERY")) {
1031  return std::unique_ptr<const RexScalar>(
1032  parse_subquery(expr, cat, root_dag_builder));
1033  }
1034  return std::unique_ptr<const RexScalar>(parse_operator(expr, cat, root_dag_builder));
1035  }
1036  throw QueryNotSupported("Expression node " + json_node_to_string(expr) +
1037  " not supported");
1038 }
1039 
1040 JoinType to_join_type(const std::string& join_type_name) {
1041  if (join_type_name == "inner") {
1042  return JoinType::INNER;
1043  }
1044  if (join_type_name == "left") {
1045  return JoinType::LEFT;
1046  }
1047  throw QueryNotSupported("Join type (" + join_type_name + ") not supported");
1048 }
1049 
1050 std::unique_ptr<const RexScalar> disambiguate_rex(const RexScalar*, const RANodeOutput&);
1051 
1052 std::unique_ptr<const RexOperator> disambiguate_operator(
1053  const RexOperator* rex_operator,
1054  const RANodeOutput& ra_output) noexcept {
1055  std::vector<std::unique_ptr<const RexScalar>> disambiguated_operands;
1056  for (size_t i = 0; i < rex_operator->size(); ++i) {
1057  auto operand = rex_operator->getOperand(i);
1058  if (dynamic_cast<const RexSubQuery*>(operand)) {
1059  disambiguated_operands.emplace_back(rex_operator->getOperandAndRelease(i));
1060  } else {
1061  disambiguated_operands.emplace_back(disambiguate_rex(operand, ra_output));
1062  }
1063  }
1064  const auto rex_window_function_operator =
1065  dynamic_cast<const RexWindowFunctionOperator*>(rex_operator);
1066  if (rex_window_function_operator) {
1067  const auto& partition_keys = rex_window_function_operator->getPartitionKeys();
1068  std::vector<std::unique_ptr<const RexScalar>> disambiguated_partition_keys;
1069  for (const auto& partition_key : partition_keys) {
1070  disambiguated_partition_keys.emplace_back(
1071  disambiguate_rex(partition_key.get(), ra_output));
1072  }
1073  std::vector<std::unique_ptr<const RexScalar>> disambiguated_order_keys;
1074  const auto& order_keys = rex_window_function_operator->getOrderKeys();
1075  for (const auto& order_key : order_keys) {
1076  disambiguated_order_keys.emplace_back(disambiguate_rex(order_key.get(), ra_output));
1077  }
1078  return rex_window_function_operator->disambiguatedOperands(
1079  disambiguated_operands,
1080  disambiguated_partition_keys,
1081  disambiguated_order_keys,
1082  rex_window_function_operator->getCollation());
1083  }
1084  return rex_operator->getDisambiguated(disambiguated_operands);
1085 }
1086 
1087 std::unique_ptr<const RexCase> disambiguate_case(const RexCase* rex_case,
1088  const RANodeOutput& ra_output) {
1089  std::vector<
1090  std::pair<std::unique_ptr<const RexScalar>, std::unique_ptr<const RexScalar>>>
1091  disambiguated_expr_pair_list;
1092  for (size_t i = 0; i < rex_case->branchCount(); ++i) {
1093  auto disambiguated_when = disambiguate_rex(rex_case->getWhen(i), ra_output);
1094  auto disambiguated_then = disambiguate_rex(rex_case->getThen(i), ra_output);
1095  disambiguated_expr_pair_list.emplace_back(std::move(disambiguated_when),
1096  std::move(disambiguated_then));
1097  }
1098  std::unique_ptr<const RexScalar> disambiguated_else{
1099  disambiguate_rex(rex_case->getElse(), ra_output)};
1100  return std::unique_ptr<const RexCase>(
1101  new RexCase(disambiguated_expr_pair_list, disambiguated_else));
1102 }
1103 
1104 // The inputs used by scalar expressions are given as indices in the serialized
1105 // representation of the query. This is hard to navigate; make the relationship
1106 // explicit by creating RexInput expressions which hold a pointer to the source
1107 // relational algebra node and the index relative to the output of that node.
1108 std::unique_ptr<const RexScalar> disambiguate_rex(const RexScalar* rex_scalar,
1109  const RANodeOutput& ra_output) {
1110  const auto rex_abstract_input = dynamic_cast<const RexAbstractInput*>(rex_scalar);
1111  if (rex_abstract_input) {
1112  CHECK_LT(static_cast<size_t>(rex_abstract_input->getIndex()), ra_output.size());
1113  return std::unique_ptr<const RexInput>(
1114  new RexInput(ra_output[rex_abstract_input->getIndex()]));
1115  }
1116  const auto rex_operator = dynamic_cast<const RexOperator*>(rex_scalar);
1117  if (rex_operator) {
1118  return disambiguate_operator(rex_operator, ra_output);
1119  }
1120  const auto rex_case = dynamic_cast<const RexCase*>(rex_scalar);
1121  if (rex_case) {
1122  return disambiguate_case(rex_case, ra_output);
1123  }
1124  const auto rex_literal = dynamic_cast<const RexLiteral*>(rex_scalar);
1125  CHECK(rex_literal);
1126  return std::unique_ptr<const RexLiteral>(new RexLiteral(*rex_literal));
1127 }
1128 
1129 void bind_project_to_input(RelProject* project_node, const RANodeOutput& input) noexcept {
1130  CHECK_EQ(size_t(1), project_node->inputCount());
1131  std::vector<std::unique_ptr<const RexScalar>> disambiguated_exprs;
1132  for (size_t i = 0; i < project_node->size(); ++i) {
1133  const auto projected_expr = project_node->getProjectAt(i);
1134  if (dynamic_cast<const RexSubQuery*>(projected_expr)) {
1135  disambiguated_exprs.emplace_back(project_node->getProjectAtAndRelease(i));
1136  } else {
1137  disambiguated_exprs.emplace_back(disambiguate_rex(projected_expr, input));
1138  }
1139  }
1140  project_node->setExpressions(disambiguated_exprs);
1141 }
1142 
1144  const RANodeOutput& input) noexcept {
1145  std::vector<std::unique_ptr<const RexScalar>> disambiguated_exprs;
1146  for (size_t i = 0; i < table_func_node->getTableFuncInputsSize(); ++i) {
1147  const auto target_expr = table_func_node->getTableFuncInputAt(i);
1148  if (dynamic_cast<const RexSubQuery*>(target_expr)) {
1149  disambiguated_exprs.emplace_back(table_func_node->getTableFuncInputAtAndRelease(i));
1150  } else {
1151  disambiguated_exprs.emplace_back(disambiguate_rex(target_expr, input));
1152  }
1153  }
1154  table_func_node->setTableFuncInputs(disambiguated_exprs);
1155 }
1156 
1157 void bind_inputs(const std::vector<std::shared_ptr<RelAlgNode>>& nodes) noexcept {
1158  for (auto ra_node : nodes) {
1159  const auto filter_node = std::dynamic_pointer_cast<RelFilter>(ra_node);
1160  if (filter_node) {
1161  CHECK_EQ(size_t(1), filter_node->inputCount());
1162  auto disambiguated_condition = disambiguate_rex(
1163  filter_node->getCondition(), get_node_output(filter_node->getInput(0)));
1164  filter_node->setCondition(disambiguated_condition);
1165  continue;
1166  }
1167  const auto join_node = std::dynamic_pointer_cast<RelJoin>(ra_node);
1168  if (join_node) {
1169  CHECK_EQ(size_t(2), join_node->inputCount());
1170  auto disambiguated_condition =
1171  disambiguate_rex(join_node->getCondition(), get_node_output(join_node.get()));
1172  join_node->setCondition(disambiguated_condition);
1173  continue;
1174  }
1175  const auto project_node = std::dynamic_pointer_cast<RelProject>(ra_node);
1176  if (project_node) {
1177  bind_project_to_input(project_node.get(),
1178  get_node_output(project_node->getInput(0)));
1179  continue;
1180  }
1181  const auto table_func_node = std::dynamic_pointer_cast<RelTableFunction>(ra_node);
1182  if (table_func_node) {
1183  /*
1184  Collect all inputs from table function input (non-literal)
1185  arguments.
1186  */
1187  RANodeOutput input;
1188  input.reserve(table_func_node->inputCount());
1189  for (size_t i = 0; i < table_func_node->inputCount(); i++) {
1190  auto node_output = get_node_output(table_func_node->getInput(i));
1191  input.insert(input.end(), node_output.begin(), node_output.end());
1192  }
1193  bind_table_func_to_input(table_func_node.get(), input);
1194  }
1195  }
1196 }
1197 
1198 void handleQueryHint(const std::vector<std::shared_ptr<RelAlgNode>>& nodes,
1199  RelAlgDagBuilder* dag_builder) noexcept {
1200  QueryHint query_hints;
1201  for (auto node : nodes) {
1202  const auto agg_node = std::dynamic_pointer_cast<RelAggregate>(node);
1203  if (agg_node) {
1204  if (agg_node->hasHintEnabled("cpu_mode")) {
1205  query_hints.cpu_mode = true;
1206  }
1207  }
1208  const auto project_node = std::dynamic_pointer_cast<RelProject>(node);
1209  if (project_node) {
1210  if (project_node->hasHintEnabled("cpu_mode")) {
1211  query_hints.cpu_mode = true;
1212  }
1213  }
1214  const auto scan_node = std::dynamic_pointer_cast<RelScan>(node);
1215  if (scan_node) {
1216  if (scan_node->hasHintEnabled("cpu_mode")) {
1217  query_hints.cpu_mode = true;
1218  }
1219  }
1220  const auto join_node = std::dynamic_pointer_cast<RelJoin>(node);
1221  if (join_node) {
1222  if (join_node->hasHintEnabled("cpu_mode")) {
1223  query_hints.cpu_mode = true;
1224  }
1225  }
1226  const auto compound_node = std::dynamic_pointer_cast<RelCompound>(node);
1227  if (compound_node) {
1228  if (compound_node->hasHintEnabled("cpu_mode")) {
1229  query_hints.cpu_mode = true;
1230  }
1231  }
1232  }
1233  if (query_hints.cpu_mode) {
1234  VLOG(1) << "A user forces to run the query on the CPU execution mode";
1235  }
1236  dag_builder->registerQueryHints(query_hints);
1237 }
1238 
1239 void mark_nops(const std::vector<std::shared_ptr<RelAlgNode>>& nodes) noexcept {
1240  for (auto node : nodes) {
1241  const auto agg_node = std::dynamic_pointer_cast<RelAggregate>(node);
1242  if (!agg_node || agg_node->getAggExprsCount()) {
1243  continue;
1244  }
1245  CHECK_EQ(size_t(1), node->inputCount());
1246  const auto agg_input_node = dynamic_cast<const RelAggregate*>(node->getInput(0));
1247  if (agg_input_node && !agg_input_node->getAggExprsCount() &&
1248  agg_node->getGroupByCount() == agg_input_node->getGroupByCount()) {
1249  agg_node->markAsNop();
1250  }
1251  }
1252 }
1253 
1254 namespace {
1255 
1256 std::vector<const Rex*> reproject_targets(
1257  const RelProject* simple_project,
1258  const std::vector<const Rex*>& target_exprs) noexcept {
1259  std::vector<const Rex*> result;
1260  for (size_t i = 0; i < simple_project->size(); ++i) {
1261  const auto input_rex = dynamic_cast<const RexInput*>(simple_project->getProjectAt(i));
1262  CHECK(input_rex);
1263  CHECK_LT(static_cast<size_t>(input_rex->getIndex()), target_exprs.size());
1264  result.push_back(target_exprs[input_rex->getIndex()]);
1265  }
1266  return result;
1267 }
1268 
1275  public:
1277  const RelAlgNode* node_to_keep,
1278  const std::vector<std::unique_ptr<const RexScalar>>& scalar_sources)
1279  : node_to_keep_(node_to_keep), scalar_sources_(scalar_sources) {}
1280 
1281  // Reproject the RexInput from its current RA Node to the RA Node we intend to keep
1282  RetType visitInput(const RexInput* input) const final {
1283  if (input->getSourceNode() == node_to_keep_) {
1284  const auto index = input->getIndex();
1285  CHECK_LT(index, scalar_sources_.size());
1286  return visit(scalar_sources_[index].get());
1287  } else {
1288  return input->deepCopy();
1289  }
1290  }
1291 
1292  private:
1294  const std::vector<std::unique_ptr<const RexScalar>>& scalar_sources_;
1295 };
1296 
1297 } // namespace
1298 
1299 void create_compound(std::vector<std::shared_ptr<RelAlgNode>>& nodes,
1300  const std::vector<size_t>& pattern) noexcept {
1301  CHECK_GE(pattern.size(), size_t(2));
1302  CHECK_LE(pattern.size(), size_t(4));
1303 
1304  std::unique_ptr<const RexScalar> filter_rex;
1305  std::vector<std::unique_ptr<const RexScalar>> scalar_sources;
1306  size_t groupby_count{0};
1307  std::vector<std::string> fields;
1308  std::vector<const RexAgg*> agg_exprs;
1309  std::vector<const Rex*> target_exprs;
1310  bool first_project{true};
1311  bool is_agg{false};
1312  RelAlgNode* last_node{nullptr};
1313 
1314  std::shared_ptr<ModifyManipulationTarget> manipulation_target;
1315 
1316  for (const auto node_idx : pattern) {
1317  const auto ra_node = nodes[node_idx];
1318  const auto ra_filter = std::dynamic_pointer_cast<RelFilter>(ra_node);
1319  if (ra_filter) {
1320  CHECK(!filter_rex);
1321  filter_rex.reset(ra_filter->getAndReleaseCondition());
1322  CHECK(filter_rex);
1323  last_node = ra_node.get();
1324  continue;
1325  }
1326  const auto ra_project = std::dynamic_pointer_cast<RelProject>(ra_node);
1327  if (ra_project) {
1328  fields = ra_project->getFields();
1329  manipulation_target = ra_project;
1330 
1331  if (first_project) {
1332  CHECK_EQ(size_t(1), ra_project->inputCount());
1333  // Rebind the input of the project to the input of the filter itself
1334  // since we know that we'll evaluate the filter on the fly, with no
1335  // intermediate buffer.
1336  const auto filter_input = dynamic_cast<const RelFilter*>(ra_project->getInput(0));
1337  if (filter_input) {
1338  CHECK_EQ(size_t(1), filter_input->inputCount());
1339  bind_project_to_input(ra_project.get(),
1340  get_node_output(filter_input->getInput(0)));
1341  }
1342  scalar_sources = ra_project->getExpressionsAndRelease();
1343  for (const auto& scalar_expr : scalar_sources) {
1344  target_exprs.push_back(scalar_expr.get());
1345  }
1346  first_project = false;
1347  } else {
1348  if (ra_project->isSimple()) {
1349  target_exprs = reproject_targets(ra_project.get(), target_exprs);
1350  } else {
1351  // TODO(adb): This is essentially a more general case of simple project, we
1352  // could likely merge the two
1353  std::vector<const Rex*> result;
1354  RexInputReplacementVisitor visitor(last_node, scalar_sources);
1355  for (size_t i = 0; i < ra_project->size(); ++i) {
1356  const auto rex = ra_project->getProjectAt(i);
1357  if (auto rex_input = dynamic_cast<const RexInput*>(rex)) {
1358  const auto index = rex_input->getIndex();
1359  CHECK_LT(index, target_exprs.size());
1360  result.push_back(target_exprs[index]);
1361  } else {
1362  scalar_sources.push_back(visitor.visit(rex));
1363  result.push_back(scalar_sources.back().get());
1364  }
1365  }
1366  target_exprs = result;
1367  }
1368  }
1369  last_node = ra_node.get();
1370  continue;
1371  }
1372  const auto ra_aggregate = std::dynamic_pointer_cast<RelAggregate>(ra_node);
1373  if (ra_aggregate) {
1374  is_agg = true;
1375  fields = ra_aggregate->getFields();
1376  agg_exprs = ra_aggregate->getAggregatesAndRelease();
1377  groupby_count = ra_aggregate->getGroupByCount();
1378  decltype(target_exprs){}.swap(target_exprs);
1379  CHECK_LE(groupby_count, scalar_sources.size());
1380  for (size_t group_idx = 0; group_idx < groupby_count; ++group_idx) {
1381  const auto rex_ref = new RexRef(group_idx + 1);
1382  target_exprs.push_back(rex_ref);
1383  scalar_sources.emplace_back(rex_ref);
1384  }
1385  for (const auto rex_agg : agg_exprs) {
1386  target_exprs.push_back(rex_agg);
1387  }
1388  last_node = ra_node.get();
1389  continue;
1390  }
1391  }
1392 
1393  auto compound_node =
1394  std::make_shared<RelCompound>(filter_rex,
1395  target_exprs,
1396  groupby_count,
1397  agg_exprs,
1398  fields,
1399  scalar_sources,
1400  is_agg,
1401  manipulation_target->isUpdateViaSelect(),
1402  manipulation_target->isDeleteViaSelect(),
1403  manipulation_target->isVarlenUpdateRequired(),
1404  manipulation_target->getModifiedTableDescriptor(),
1405  manipulation_target->getTargetColumns());
1406  auto old_node = nodes[pattern.back()];
1407  nodes[pattern.back()] = compound_node;
1408  auto first_node = nodes[pattern.front()];
1409  CHECK_EQ(size_t(1), first_node->inputCount());
1410  compound_node->addManagedInput(first_node->getAndOwnInput(0));
1411  for (size_t i = 0; i < pattern.size() - 1; ++i) {
1412  nodes[pattern[i]].reset();
1413  }
1414  for (auto node : nodes) {
1415  if (!node) {
1416  continue;
1417  }
1418  node->replaceInput(old_node, compound_node);
1419  }
1420 }
1421 
1422 class RANodeIterator : public std::vector<std::shared_ptr<RelAlgNode>>::const_iterator {
1423  using ElementType = std::shared_ptr<RelAlgNode>;
1424  using Super = std::vector<ElementType>::const_iterator;
1425  using Container = std::vector<ElementType>;
1426 
1427  public:
1428  enum class AdvancingMode { DUChain, InOrder };
1429 
1430  explicit RANodeIterator(const Container& nodes)
1431  : Super(nodes.begin()), owner_(nodes), nodeCount_([&nodes]() -> size_t {
1432  size_t non_zero_count = 0;
1433  for (const auto& node : nodes) {
1434  if (node) {
1435  ++non_zero_count;
1436  }
1437  }
1439  }()) {}
1440 
1441  explicit operator size_t() {
1442  return std::distance(owner_.begin(), *static_cast<Super*>(this));
1443  }
1444 
1445  RANodeIterator operator++() = delete;
1446 
1447  void advance(AdvancingMode mode) {
1448  Super& super = *this;
1449  switch (mode) {
1450  case AdvancingMode::DUChain: {
1451  size_t use_count = 0;
1452  Super only_use = owner_.end();
1453  for (Super nodeIt = std::next(super); nodeIt != owner_.end(); ++nodeIt) {
1454  if (!*nodeIt) {
1455  continue;
1456  }
1457  for (size_t i = 0; i < (*nodeIt)->inputCount(); ++i) {
1458  if ((*super) == (*nodeIt)->getAndOwnInput(i)) {
1459  ++use_count;
1460  if (1 == use_count) {
1461  only_use = nodeIt;
1462  } else {
1463  super = owner_.end();
1464  return;
1465  }
1466  }
1467  }
1468  }
1469  super = only_use;
1470  break;
1471  }
1472  case AdvancingMode::InOrder:
1473  for (size_t i = 0; i != owner_.size(); ++i) {
1474  if (!visited_.count(i)) {
1475  super = owner_.begin();
1476  std::advance(super, i);
1477  return;
1478  }
1479  }
1480  super = owner_.end();
1481  break;
1482  default:
1483  CHECK(false);
1484  }
1485  }
1486 
1487  bool allVisited() { return visited_.size() == nodeCount_; }
1488 
1490  visited_.insert(size_t(*this));
1491  Super& super = *this;
1492  return *super;
1493  }
1494 
1495  const ElementType* operator->() { return &(operator*()); }
1496 
1497  private:
1499  const size_t nodeCount_;
1500  std::unordered_set<size_t> visited_;
1501 };
1502 
1503 namespace {
1504 
1505 bool input_can_be_coalesced(const RelAlgNode* parent_node,
1506  const size_t index,
1507  const bool first_rex_is_input) {
1508  if (auto agg_node = dynamic_cast<const RelAggregate*>(parent_node)) {
1509  if (index == 0 && agg_node->getGroupByCount() > 0) {
1510  return true;
1511  } else {
1512  // Is an aggregated target, only allow the project to be elided if the aggregate
1513  // target is simply passed through (i.e. if the top level expression attached to
1514  // the project node is a RexInput expression)
1515  return first_rex_is_input;
1516  }
1517  }
1518  return first_rex_is_input;
1519 }
1520 
1527  public:
1528  bool visitInput(const RexInput* input) const final {
1529  // The top level expression node is checked before we apply the visitor. If we get
1530  // here, this input rex is a child of another rex node, and we handle the can be
1531  // coalesced check slightly differently
1532  return input_can_be_coalesced(input->getSourceNode(), input->getIndex(), false);
1533  }
1534 
1535  bool visitLiteral(const RexLiteral*) const final { return false; }
1536 
1537  bool visitSubQuery(const RexSubQuery*) const final { return false; }
1538 
1539  bool visitRef(const RexRef*) const final { return false; }
1540 
1541  protected:
1542  bool aggregateResult(const bool& aggregate, const bool& next_result) const final {
1543  return aggregate && next_result;
1544  }
1545 
1546  bool defaultResult() const final { return true; }
1547 };
1548 
1549 // Detect the window function SUM pattern: CASE WHEN COUNT() > 0 THEN SUM ELSE 0
1551  const auto case_operator = dynamic_cast<const RexCase*>(rex);
1552  if (case_operator && case_operator->branchCount() == 1) {
1553  const auto then_window =
1554  dynamic_cast<const RexWindowFunctionOperator*>(case_operator->getThen(0));
1555  if (then_window && then_window->getKind() == SqlWindowFunctionKind::SUM_INTERNAL) {
1556  return true;
1557  }
1558  }
1559  return false;
1560 }
1561 
1562 // Detect both window function operators and window function operators embedded in case
1563 // statements (for null handling)
1565  if (dynamic_cast<const RexWindowFunctionOperator*>(rex)) {
1566  return true;
1567  }
1568 
1569  // unwrap from casts, if they exist
1570  const auto rex_cast = dynamic_cast<const RexOperator*>(rex);
1571  if (rex_cast && rex_cast->getOperator() == kCAST) {
1572  CHECK_EQ(rex_cast->size(), size_t(1));
1573  return is_window_function_operator(rex_cast->getOperand(0));
1574  }
1575 
1576  if (is_window_function_sum(rex)) {
1577  return true;
1578  }
1579  // Check for Window Function AVG:
1580  // (CASE WHEN count > 0 THEN sum ELSE 0) / COUNT
1581  const RexOperator* divide_operator = dynamic_cast<const RexOperator*>(rex);
1582  if (divide_operator && divide_operator->getOperator() == kDIVIDE) {
1583  CHECK_EQ(divide_operator->size(), size_t(2));
1584  const auto case_operator =
1585  dynamic_cast<const RexCase*>(divide_operator->getOperand(0));
1586  const auto second_window =
1587  dynamic_cast<const RexWindowFunctionOperator*>(divide_operator->getOperand(1));
1588  if (case_operator && second_window &&
1589  second_window->getKind() == SqlWindowFunctionKind::COUNT) {
1590  if (is_window_function_sum(case_operator)) {
1591  return true;
1592  }
1593  }
1594  }
1595  return false;
1596 }
1597 
1598 } // namespace
1599 
1600 void coalesce_nodes(std::vector<std::shared_ptr<RelAlgNode>>& nodes,
1601  const std::vector<const RelAlgNode*>& left_deep_joins) {
1602  enum class CoalesceState { Initial, Filter, FirstProject, Aggregate };
1603  std::vector<size_t> crt_pattern;
1604  CoalesceState crt_state{CoalesceState::Initial};
1605 
1606  auto reset_state = [&crt_pattern, &crt_state]() {
1607  crt_state = CoalesceState::Initial;
1608  decltype(crt_pattern)().swap(crt_pattern);
1609  };
1610 
1611  for (RANodeIterator nodeIt(nodes); !nodeIt.allVisited();) {
1612  const auto ra_node = nodeIt != nodes.end() ? *nodeIt : nullptr;
1613  switch (crt_state) {
1614  case CoalesceState::Initial: {
1615  if (std::dynamic_pointer_cast<const RelFilter>(ra_node) &&
1616  std::find(left_deep_joins.begin(), left_deep_joins.end(), ra_node.get()) ==
1617  left_deep_joins.end()) {
1618  crt_pattern.push_back(size_t(nodeIt));
1619  crt_state = CoalesceState::Filter;
1620  nodeIt.advance(RANodeIterator::AdvancingMode::DUChain);
1621  } else if (std::dynamic_pointer_cast<const RelProject>(ra_node)) {
1622  crt_pattern.push_back(size_t(nodeIt));
1623  crt_state = CoalesceState::FirstProject;
1624  nodeIt.advance(RANodeIterator::AdvancingMode::DUChain);
1625  } else {
1626  nodeIt.advance(RANodeIterator::AdvancingMode::InOrder);
1627  }
1628  break;
1629  }
1630  case CoalesceState::Filter: {
1631  if (auto project_node = std::dynamic_pointer_cast<const RelProject>(ra_node)) {
1632  if (project_node->hasWindowFunctionExpr()) {
1633  reset_state();
1634  break;
1635  }
1636  crt_pattern.push_back(size_t(nodeIt));
1637  crt_state = CoalesceState::FirstProject;
1638  nodeIt.advance(RANodeIterator::AdvancingMode::DUChain);
1639  } else {
1640  reset_state();
1641  }
1642  break;
1643  }
1644  case CoalesceState::FirstProject: {
1645  if (std::dynamic_pointer_cast<const RelAggregate>(ra_node)) {
1646  crt_pattern.push_back(size_t(nodeIt));
1647  crt_state = CoalesceState::Aggregate;
1648  nodeIt.advance(RANodeIterator::AdvancingMode::DUChain);
1649  } else {
1650  if (crt_pattern.size() >= 2) {
1651  create_compound(nodes, crt_pattern);
1652  }
1653  reset_state();
1654  }
1655  break;
1656  }
1657  case CoalesceState::Aggregate: {
1658  if (auto project_node = std::dynamic_pointer_cast<const RelProject>(ra_node)) {
1659  // TODO(adb): overloading the simple project terminology again here
1660  bool is_simple_project{true};
1661  for (size_t i = 0; i < project_node->size(); i++) {
1662  const auto scalar_rex = project_node->getProjectAt(i);
1663  // If the top level scalar rex is an input node, we can bypass the visitor
1664  if (auto input_rex = dynamic_cast<const RexInput*>(scalar_rex)) {
1666  input_rex->getSourceNode(), input_rex->getIndex(), true)) {
1667  is_simple_project = false;
1668  break;
1669  }
1670  continue;
1671  }
1672  CoalesceSecondaryProjectVisitor visitor;
1673  if (!visitor.visit(project_node->getProjectAt(i))) {
1674  is_simple_project = false;
1675  break;
1676  }
1677  }
1678  if (is_simple_project) {
1679  crt_pattern.push_back(size_t(nodeIt));
1680  nodeIt.advance(RANodeIterator::AdvancingMode::InOrder);
1681  }
1682  }
1683  CHECK_GE(crt_pattern.size(), size_t(2));
1684  create_compound(nodes, crt_pattern);
1685  reset_state();
1686  break;
1687  }
1688  default:
1689  CHECK(false);
1690  }
1691  }
1692  if (crt_state == CoalesceState::FirstProject || crt_state == CoalesceState::Aggregate) {
1693  if (crt_pattern.size() >= 2) {
1694  create_compound(nodes, crt_pattern);
1695  }
1696  CHECK(!crt_pattern.empty());
1697  }
1698 }
1699 
1707 class WindowFunctionDetectionVisitor : public RexVisitor<const RexScalar*> {
1708  protected:
1709  // Detect embedded window function expressions in operators
1710  const RexScalar* visitOperator(const RexOperator* rex_operator) const final {
1711  if (is_window_function_operator(rex_operator)) {
1712  return rex_operator;
1713  }
1714 
1715  const size_t operand_count = rex_operator->size();
1716  for (size_t i = 0; i < operand_count; ++i) {
1717  const auto operand = rex_operator->getOperand(i);
1718  if (is_window_function_operator(operand)) {
1719  // Handle both RexWindowFunctionOperators and window functions built up from
1720  // multiple RexScalar objects (e.g. AVG)
1721  return operand;
1722  }
1723  const auto operandResult = visit(operand);
1724  if (operandResult) {
1725  return operandResult;
1726  }
1727  }
1728 
1729  return defaultResult();
1730  }
1731 
1732  // Detect embedded window function expressions in case statements. Note that this may
1733  // manifest as a nested case statement inside a top level case statement, as some
1734  // window functions (sum, avg) are represented as a case statement. Use the
1735  // is_window_function_operator helper to detect complete window function expressions.
1736  const RexScalar* visitCase(const RexCase* rex_case) const final {
1737  if (is_window_function_operator(rex_case)) {
1738  return rex_case;
1739  }
1740 
1741  auto result = defaultResult();
1742  for (size_t i = 0; i < rex_case->branchCount(); ++i) {
1743  const auto when = rex_case->getWhen(i);
1744  result = is_window_function_operator(when) ? when : visit(when);
1745  if (result) {
1746  return result;
1747  }
1748  const auto then = rex_case->getThen(i);
1749  result = is_window_function_operator(then) ? then : visit(then);
1750  if (result) {
1751  return result;
1752  }
1753  }
1754  if (rex_case->getElse()) {
1755  auto else_expr = rex_case->getElse();
1756  result = is_window_function_operator(else_expr) ? else_expr : visit(else_expr);
1757  }
1758  return result;
1759  }
1760 
1761  const RexScalar* aggregateResult(const RexScalar* const& aggregate,
1762  const RexScalar* const& next_result) const final {
1763  // all methods calling aggregate result should be overriden
1764  UNREACHABLE();
1765  return nullptr;
1766  }
1767 
1768  const RexScalar* defaultResult() const final { return nullptr; }
1769 };
1770 
1780  public:
1781  RexWindowFuncReplacementVisitor(std::unique_ptr<const RexScalar> replacement_rex)
1782  : replacement_rex_(std::move(replacement_rex)) {}
1783 
1784  ~RexWindowFuncReplacementVisitor() { CHECK(replacement_rex_ == nullptr); }
1785 
1786  protected:
1787  RetType visitOperator(const RexOperator* rex_operator) const final {
1788  if (should_replace_operand(rex_operator)) {
1789  return std::move(replacement_rex_);
1790  }
1791 
1792  const auto rex_window_function_operator =
1793  dynamic_cast<const RexWindowFunctionOperator*>(rex_operator);
1794  if (rex_window_function_operator) {
1795  // Deep copy the embedded window function operator
1796  return visitWindowFunctionOperator(rex_window_function_operator);
1797  }
1798 
1799  const size_t operand_count = rex_operator->size();
1800  std::vector<RetType> new_opnds;
1801  for (size_t i = 0; i < operand_count; ++i) {
1802  const auto operand = rex_operator->getOperand(i);
1803  if (should_replace_operand(operand)) {
1804  new_opnds.push_back(std::move(replacement_rex_));
1805  } else {
1806  new_opnds.emplace_back(visit(rex_operator->getOperand(i)));
1807  }
1808  }
1809  return rex_operator->getDisambiguated(new_opnds);
1810  }
1811 
1812  RetType visitCase(const RexCase* rex_case) const final {
1813  if (should_replace_operand(rex_case)) {
1814  return std::move(replacement_rex_);
1815  }
1816 
1817  std::vector<std::pair<RetType, RetType>> new_pair_list;
1818  for (size_t i = 0; i < rex_case->branchCount(); ++i) {
1819  auto when_operand = rex_case->getWhen(i);
1820  auto then_operand = rex_case->getThen(i);
1821  new_pair_list.emplace_back(
1822  should_replace_operand(when_operand) ? std::move(replacement_rex_)
1823  : visit(when_operand),
1824  should_replace_operand(then_operand) ? std::move(replacement_rex_)
1825  : visit(then_operand));
1826  }
1827  auto new_else = should_replace_operand(rex_case->getElse())
1828  ? std::move(replacement_rex_)
1829  : visit(rex_case->getElse());
1830  return std::make_unique<RexCase>(new_pair_list, new_else);
1831  }
1832 
1833  private:
1834  bool should_replace_operand(const RexScalar* rex) const {
1835  return replacement_rex_ && is_window_function_operator(rex);
1836  }
1837 
1838  mutable std::unique_ptr<const RexScalar> replacement_rex_;
1839 };
1840 
1851  public:
1852  RexInputBackpropagationVisitor(RelProject* node) : node_(node) { CHECK(node_); }
1853 
1854  protected:
1855  RetType visitInput(const RexInput* rex_input) const final {
1856  if (rex_input->getSourceNode() != node_) {
1857  const auto cur_index = rex_input->getIndex();
1858  auto cur_source_node = rex_input->getSourceNode();
1859  std::string field_name = "";
1860  if (auto cur_project_node = dynamic_cast<const RelProject*>(cur_source_node)) {
1861  field_name = cur_project_node->getFieldName(cur_index);
1862  }
1863  node_->appendInput(field_name, rex_input->deepCopy());
1864  return std::make_unique<RexInput>(node_, node_->size() - 1);
1865  } else {
1866  return rex_input->deepCopy();
1867  }
1868  }
1869 
1870  private:
1871  mutable RelProject* node_;
1872 };
1873 
1890  std::vector<std::shared_ptr<RelAlgNode>>& nodes) {
1891  std::list<std::shared_ptr<RelAlgNode>> node_list(nodes.begin(), nodes.end());
1892 
1894  for (auto node_itr = node_list.begin(); node_itr != node_list.end(); ++node_itr) {
1895  const auto node = *node_itr;
1896  auto window_func_project_node = std::dynamic_pointer_cast<RelProject>(node);
1897  if (!window_func_project_node) {
1898  continue;
1899  }
1900 
1901  // map scalar expression index in the project node to wiondow function ptr
1902  std::unordered_map<size_t, const RexScalar*> embedded_window_function_expressions;
1903 
1904  // Iterate the target exprs of the project node and check for window function
1905  // expressions. If an embedded expression exists, save it in the
1906  // embedded_window_function_expressions map and split the expression into a window
1907  // function expression and a parent expression in a subsequent project node
1908  for (size_t i = 0; i < window_func_project_node->size(); i++) {
1909  const auto scalar_rex = window_func_project_node->getProjectAt(i);
1910  if (is_window_function_operator(scalar_rex)) {
1911  // top level window function exprs are fine
1912  continue;
1913  }
1914 
1915  if (const auto window_func_rex = visitor.visit(scalar_rex)) {
1916  const auto ret = embedded_window_function_expressions.insert(
1917  std::make_pair(i, window_func_rex));
1918  CHECK(ret.second);
1919  }
1920  }
1921 
1922  if (!embedded_window_function_expressions.empty()) {
1923  std::vector<std::unique_ptr<const RexScalar>> new_scalar_exprs;
1924 
1925  auto window_func_scalar_exprs =
1926  window_func_project_node->getExpressionsAndRelease();
1927  for (size_t rex_idx = 0; rex_idx < window_func_scalar_exprs.size(); ++rex_idx) {
1928  const auto embedded_window_func_expr_pair =
1929  embedded_window_function_expressions.find(rex_idx);
1930  if (embedded_window_func_expr_pair ==
1931  embedded_window_function_expressions.end()) {
1932  new_scalar_exprs.emplace_back(
1933  std::make_unique<const RexInput>(window_func_project_node.get(), rex_idx));
1934  } else {
1935  const auto window_func_rex_idx = embedded_window_func_expr_pair->first;
1936  CHECK_LT(window_func_rex_idx, window_func_scalar_exprs.size());
1937 
1938  const auto& window_func_rex = embedded_window_func_expr_pair->second;
1939 
1940  RexDeepCopyVisitor copier;
1941  auto window_func_rex_copy = copier.visit(window_func_rex);
1942 
1943  auto window_func_parent_expr =
1944  window_func_scalar_exprs[window_func_rex_idx].get();
1945 
1946  // Replace window func rex with an input rex
1947  auto window_func_result_input = std::make_unique<const RexInput>(
1948  window_func_project_node.get(), window_func_rex_idx);
1949  RexWindowFuncReplacementVisitor replacer(std::move(window_func_result_input));
1950  auto new_parent_rex = replacer.visit(window_func_parent_expr);
1951 
1952  // Put the parent expr in the new scalar exprs
1953  new_scalar_exprs.emplace_back(std::move(new_parent_rex));
1954 
1955  // Put the window func expr in cur scalar exprs
1956  window_func_scalar_exprs[window_func_rex_idx] = std::move(window_func_rex_copy);
1957  }
1958  }
1959 
1960  CHECK_EQ(window_func_scalar_exprs.size(), new_scalar_exprs.size());
1961  window_func_project_node->setExpressions(window_func_scalar_exprs);
1962 
1963  // Ensure any inputs from the node containing the expression (the "new" node)
1964  // exist on the window function project node, e.g. if we had a binary operation
1965  // involving an aggregate value or column not included in the top level
1966  // projection list.
1967  RexInputBackpropagationVisitor input_visitor(window_func_project_node.get());
1968  for (size_t i = 0; i < new_scalar_exprs.size(); i++) {
1969  if (dynamic_cast<const RexInput*>(new_scalar_exprs[i].get())) {
1970  // ignore top level inputs, these were copied directly from the previous
1971  // node
1972  continue;
1973  }
1974  new_scalar_exprs[i] = input_visitor.visit(new_scalar_exprs[i].get());
1975  }
1976 
1977  // Build the new project node and insert it into the list after the project node
1978  // containing the window function
1979  auto new_project =
1980  std::make_shared<RelProject>(new_scalar_exprs,
1981  window_func_project_node->getFields(),
1982  window_func_project_node);
1983  node_list.insert(std::next(node_itr), new_project);
1984 
1985  // Rebind all the following inputs
1986  for (auto rebind_itr = std::next(node_itr, 2); rebind_itr != node_list.end();
1987  rebind_itr++) {
1988  (*rebind_itr)->replaceInput(window_func_project_node, new_project);
1989  }
1990  }
1991  }
1992  nodes.assign(node_list.begin(), node_list.end());
1993 }
1994 
1995 using RexInputSet = std::unordered_set<RexInput>;
1996 
1997 class RexInputCollector : public RexVisitor<RexInputSet> {
1998  public:
1999  RexInputSet visitInput(const RexInput* input) const override {
2000  return RexInputSet{*input};
2001  }
2002 
2003  protected:
2005  const RexInputSet& next_result) const override {
2006  auto result = aggregate;
2007  result.insert(next_result.begin(), next_result.end());
2008  return result;
2009  }
2010 };
2011 
2019 void add_window_function_pre_project(std::vector<std::shared_ptr<RelAlgNode>>& nodes) {
2020  std::list<std::shared_ptr<RelAlgNode>> node_list(nodes.begin(), nodes.end());
2021 
2022  for (auto node_itr = node_list.begin(); node_itr != node_list.end(); ++node_itr) {
2023  const auto node = *node_itr;
2024  auto window_func_project_node = std::dynamic_pointer_cast<RelProject>(node);
2025  if (!window_func_project_node) {
2026  continue;
2027  }
2028  if (!window_func_project_node->hasWindowFunctionExpr()) {
2029  // the first projection node in the query plan does not have a window function
2030  // expression -- this step is not requierd.
2031  return;
2032  }
2033 
2034  const auto prev_node_itr = std::prev(node_itr);
2035  const auto prev_node = *prev_node_itr;
2036  CHECK(prev_node);
2037 
2038  RexInputSet inputs;
2039  RexInputCollector input_collector;
2040  for (size_t i = 0; i < window_func_project_node->size(); i++) {
2041  auto new_inputs = input_collector.visit(window_func_project_node->getProjectAt(i));
2042  inputs.insert(new_inputs.begin(), new_inputs.end());
2043  }
2044 
2045  // Note: Technically not required since we are mapping old inputs to new input
2046  // indices, but makes the re-mapping of inputs easier to follow.
2047  std::vector<RexInput> sorted_inputs(inputs.begin(), inputs.end());
2048  std::sort(sorted_inputs.begin(),
2049  sorted_inputs.end(),
2050  [](const auto& a, const auto& b) { return a.getIndex() < b.getIndex(); });
2051 
2052  std::vector<std::unique_ptr<const RexScalar>> scalar_exprs;
2053  std::vector<std::string> fields;
2054  std::unordered_map<unsigned, unsigned> old_index_to_new_index;
2055  for (auto& input : sorted_inputs) {
2056  CHECK_EQ(input.getSourceNode(), prev_node.get());
2057  CHECK(old_index_to_new_index
2058  .insert(std::make_pair(input.getIndex(), scalar_exprs.size()))
2059  .second);
2060  scalar_exprs.emplace_back(input.deepCopy());
2061  fields.emplace_back("");
2062  }
2063 
2064  auto new_project = std::make_shared<RelProject>(scalar_exprs, fields, prev_node);
2065  node_list.insert(node_itr, new_project);
2066  window_func_project_node->replaceInput(
2067  prev_node, new_project, old_index_to_new_index);
2068 
2069  break;
2070  }
2071 
2072  nodes.assign(node_list.begin(), node_list.end());
2073 }
2074 
2075 int64_t get_int_literal_field(const rapidjson::Value& obj,
2076  const char field[],
2077  const int64_t default_val) noexcept {
2078  const auto it = obj.FindMember(field);
2079  if (it == obj.MemberEnd()) {
2080  return default_val;
2081  }
2082  std::unique_ptr<RexLiteral> lit(parse_literal(it->value));
2083  CHECK_EQ(kDECIMAL, lit->getType());
2084  CHECK_EQ(unsigned(0), lit->getScale());
2085  CHECK_EQ(unsigned(0), lit->getTypeScale());
2086  return lit->getVal<int64_t>();
2087 }
2088 
2089 void check_empty_inputs_field(const rapidjson::Value& node) noexcept {
2090  const auto& inputs_json = field(node, "inputs");
2091  CHECK(inputs_json.IsArray() && !inputs_json.Size());
2092 }
2093 
2095  const rapidjson::Value& scan_ra) {
2096  const auto& table_json = field(scan_ra, "table");
2097  CHECK(table_json.IsArray());
2098  CHECK_EQ(unsigned(2), table_json.Size());
2099  const auto td = cat.getMetadataForTable(table_json[1].GetString());
2100  CHECK(td);
2101  return td;
2102 }
2103 
2104 std::vector<std::string> getFieldNamesFromScanNode(const rapidjson::Value& scan_ra) {
2105  const auto& fields_json = field(scan_ra, "fieldNames");
2106  return strings_from_json_array(fields_json);
2107 }
2108 
2109 } // namespace
2110 
2112  for (const auto& expr : scalar_exprs_) {
2113  if (is_window_function_operator(expr.get())) {
2114  return true;
2115  }
2116  }
2117  return false;
2118 }
2119 namespace details {
2120 
2122  public:
2124 
2125  std::vector<std::shared_ptr<RelAlgNode>> run(const rapidjson::Value& rels,
2126  RelAlgDagBuilder& root_dag_builder) {
2127  for (auto rels_it = rels.Begin(); rels_it != rels.End(); ++rels_it) {
2128  const auto& crt_node = *rels_it;
2129  const auto id = node_id(crt_node);
2130  CHECK_EQ(static_cast<size_t>(id), nodes_.size());
2131  CHECK(crt_node.IsObject());
2132  std::shared_ptr<RelAlgNode> ra_node = nullptr;
2133  const auto rel_op = json_str(field(crt_node, "relOp"));
2134  if (rel_op == std::string("EnumerableTableScan") ||
2135  rel_op == std::string("LogicalTableScan")) {
2136  ra_node = dispatchTableScan(crt_node);
2137  } else if (rel_op == std::string("LogicalProject")) {
2138  ra_node = dispatchProject(crt_node, root_dag_builder);
2139  } else if (rel_op == std::string("LogicalFilter")) {
2140  ra_node = dispatchFilter(crt_node, root_dag_builder);
2141  } else if (rel_op == std::string("LogicalAggregate")) {
2142  ra_node = dispatchAggregate(crt_node);
2143  } else if (rel_op == std::string("LogicalJoin")) {
2144  ra_node = dispatchJoin(crt_node, root_dag_builder);
2145  } else if (rel_op == std::string("LogicalSort")) {
2146  ra_node = dispatchSort(crt_node);
2147  } else if (rel_op == std::string("LogicalValues")) {
2148  ra_node = dispatchLogicalValues(crt_node);
2149  } else if (rel_op == std::string("LogicalTableModify")) {
2150  ra_node = dispatchModify(crt_node);
2151  } else if (rel_op == std::string("LogicalTableFunctionScan")) {
2152  ra_node = dispatchTableFunction(crt_node, root_dag_builder);
2153  } else if (rel_op == std::string("LogicalUnion")) {
2154  ra_node = dispatchUnion(crt_node);
2155  } else {
2156  throw QueryNotSupported(std::string("Node ") + rel_op + " not supported yet");
2157  }
2158  nodes_.push_back(ra_node);
2159  }
2160 
2161  return std::move(nodes_);
2162  }
2163 
2164  private:
2165  std::shared_ptr<RelScan> dispatchTableScan(const rapidjson::Value& scan_ra) {
2166  check_empty_inputs_field(scan_ra);
2167  CHECK(scan_ra.IsObject());
2168  const auto td = getTableFromScanNode(cat_, scan_ra);
2169  const auto field_names = getFieldNamesFromScanNode(scan_ra);
2170  if (scan_ra.HasMember("hints")) {
2171  auto scan_node = std::make_shared<RelScan>(td, field_names);
2172  getRelAlgHints(scan_ra, scan_node);
2173  return scan_node;
2174  }
2175  return std::make_shared<RelScan>(td, field_names);
2176  }
2177 
2178  std::shared_ptr<RelProject> dispatchProject(const rapidjson::Value& proj_ra,
2179  RelAlgDagBuilder& root_dag_builder) {
2180  const auto inputs = getRelAlgInputs(proj_ra);
2181  CHECK_EQ(size_t(1), inputs.size());
2182  const auto& exprs_json = field(proj_ra, "exprs");
2183  CHECK(exprs_json.IsArray());
2184  std::vector<std::unique_ptr<const RexScalar>> exprs;
2185  for (auto exprs_json_it = exprs_json.Begin(); exprs_json_it != exprs_json.End();
2186  ++exprs_json_it) {
2187  exprs.emplace_back(parse_scalar_expr(*exprs_json_it, cat_, root_dag_builder));
2188  }
2189  const auto& fields = field(proj_ra, "fields");
2190  if (proj_ra.HasMember("hints")) {
2191  auto project_node = std::make_shared<RelProject>(
2192  exprs, strings_from_json_array(fields), inputs.front());
2193  getRelAlgHints(proj_ra, project_node);
2194  return project_node;
2195  }
2196  return std::make_shared<RelProject>(
2197  exprs, strings_from_json_array(fields), inputs.front());
2198  }
2199 
2200  std::shared_ptr<RelFilter> dispatchFilter(const rapidjson::Value& filter_ra,
2201  RelAlgDagBuilder& root_dag_builder) {
2202  const auto inputs = getRelAlgInputs(filter_ra);
2203  CHECK_EQ(size_t(1), inputs.size());
2204  const auto id = node_id(filter_ra);
2205  CHECK(id);
2206  auto condition =
2207  parse_scalar_expr(field(filter_ra, "condition"), cat_, root_dag_builder);
2208  return std::make_shared<RelFilter>(condition, inputs.front());
2209  }
2210 
2211  std::shared_ptr<RelAggregate> dispatchAggregate(const rapidjson::Value& agg_ra) {
2212  const auto inputs = getRelAlgInputs(agg_ra);
2213  CHECK_EQ(size_t(1), inputs.size());
2214  const auto fields = strings_from_json_array(field(agg_ra, "fields"));
2215  const auto group = indices_from_json_array(field(agg_ra, "group"));
2216  for (size_t i = 0; i < group.size(); ++i) {
2217  CHECK_EQ(i, group[i]);
2218  }
2219  if (agg_ra.HasMember("groups") || agg_ra.HasMember("indicator")) {
2220  throw QueryNotSupported("GROUP BY extensions not supported");
2221  }
2222  const auto& aggs_json_arr = field(agg_ra, "aggs");
2223  CHECK(aggs_json_arr.IsArray());
2224  std::vector<std::unique_ptr<const RexAgg>> aggs;
2225  for (auto aggs_json_arr_it = aggs_json_arr.Begin();
2226  aggs_json_arr_it != aggs_json_arr.End();
2227  ++aggs_json_arr_it) {
2228  aggs.emplace_back(parse_aggregate_expr(*aggs_json_arr_it));
2229  }
2230  if (agg_ra.HasMember("hints")) {
2231  auto agg_node =
2232  std::make_shared<RelAggregate>(group.size(), aggs, fields, inputs.front());
2233  getRelAlgHints(agg_ra, agg_node);
2234  return agg_node;
2235  }
2236  return std::make_shared<RelAggregate>(group.size(), aggs, fields, inputs.front());
2237  }
2238 
2239  std::shared_ptr<RelJoin> dispatchJoin(const rapidjson::Value& join_ra,
2240  RelAlgDagBuilder& root_dag_builder) {
2241  const auto inputs = getRelAlgInputs(join_ra);
2242  CHECK_EQ(size_t(2), inputs.size());
2243  const auto join_type = to_join_type(json_str(field(join_ra, "joinType")));
2244  auto filter_rex =
2245  parse_scalar_expr(field(join_ra, "condition"), cat_, root_dag_builder);
2246  if (join_ra.HasMember("hints")) {
2247  auto join_node =
2248  std::make_shared<RelJoin>(inputs[0], inputs[1], filter_rex, join_type);
2249  getRelAlgHints(join_ra, join_node);
2250  return join_node;
2251  }
2252  return std::make_shared<RelJoin>(inputs[0], inputs[1], filter_rex, join_type);
2253  }
2254 
2255  std::shared_ptr<RelSort> dispatchSort(const rapidjson::Value& sort_ra) {
2256  const auto inputs = getRelAlgInputs(sort_ra);
2257  CHECK_EQ(size_t(1), inputs.size());
2258  std::vector<SortField> collation;
2259  const auto& collation_arr = field(sort_ra, "collation");
2260  CHECK(collation_arr.IsArray());
2261  for (auto collation_arr_it = collation_arr.Begin();
2262  collation_arr_it != collation_arr.End();
2263  ++collation_arr_it) {
2264  const size_t field_idx = json_i64(field(*collation_arr_it, "field"));
2265  const auto sort_dir = parse_sort_direction(*collation_arr_it);
2266  const auto null_pos = parse_nulls_position(*collation_arr_it);
2267  collation.emplace_back(field_idx, sort_dir, null_pos);
2268  }
2269  auto limit = get_int_literal_field(sort_ra, "fetch", -1);
2270  const auto offset = get_int_literal_field(sort_ra, "offset", 0);
2271  auto ret = std::make_shared<RelSort>(
2272  collation, limit > 0 ? limit : 0, offset, inputs.front());
2273  ret->setEmptyResult(limit == 0);
2274  return ret;
2275  }
2276 
2277  std::shared_ptr<RelModify> dispatchModify(const rapidjson::Value& logical_modify_ra) {
2278  const auto inputs = getRelAlgInputs(logical_modify_ra);
2279  CHECK_EQ(size_t(1), inputs.size());
2280 
2281  const auto table_descriptor = getTableFromScanNode(cat_, logical_modify_ra);
2282  if (table_descriptor->isView) {
2283  throw std::runtime_error("UPDATE of a view is unsupported.");
2284  }
2285 
2286  bool flattened = json_bool(field(logical_modify_ra, "flattened"));
2287  std::string op = json_str(field(logical_modify_ra, "operation"));
2288  RelModify::TargetColumnList target_column_list;
2289 
2290  if (op == "UPDATE") {
2291  const auto& update_columns = field(logical_modify_ra, "updateColumnList");
2292  CHECK(update_columns.IsArray());
2293 
2294  for (auto column_arr_it = update_columns.Begin();
2295  column_arr_it != update_columns.End();
2296  ++column_arr_it) {
2297  target_column_list.push_back(column_arr_it->GetString());
2298  }
2299  }
2300 
2301  auto modify_node = std::make_shared<RelModify>(
2302  cat_, table_descriptor, flattened, op, target_column_list, inputs[0]);
2303  switch (modify_node->getOperation()) {
2305  modify_node->applyDeleteModificationsToInputNode();
2306  break;
2307  }
2309  modify_node->applyUpdateModificationsToInputNode();
2310  break;
2311  }
2312  default:
2313  throw std::runtime_error("Unsupported RelModify operation: " +
2314  json_node_to_string(logical_modify_ra));
2315  }
2316 
2317  return modify_node;
2318  }
2319 
2320  std::shared_ptr<RelTableFunction> dispatchTableFunction(
2321  const rapidjson::Value& table_func_ra,
2322  RelAlgDagBuilder& root_dag_builder) {
2323  const auto inputs = getRelAlgInputs(table_func_ra);
2324  const auto& invocation = field(table_func_ra, "invocation");
2325  CHECK(invocation.IsObject());
2326 
2327  const auto& operands = field(invocation, "operands");
2328  CHECK(operands.IsArray());
2329  CHECK_GE(operands.Size(), unsigned(0));
2330 
2331  std::vector<const Rex*> col_inputs;
2332  std::vector<std::unique_ptr<const RexScalar>> table_func_inputs;
2333  std::vector<std::string> fields;
2334 
2335  for (auto exprs_json_it = operands.Begin(); exprs_json_it != operands.End();
2336  ++exprs_json_it) {
2337  const auto& expr_json = *exprs_json_it;
2338  CHECK(expr_json.IsObject());
2339  if (expr_json.HasMember("op")) {
2340  const auto op_str = json_str(field(expr_json, "op"));
2341  if (op_str == "CAST" && expr_json.HasMember("type")) {
2342  const auto& expr_type = field(expr_json, "type");
2343  CHECK(expr_type.IsObject());
2344  CHECK(expr_type.HasMember("type"));
2345  const auto& expr_type_name = json_str(field(expr_type, "type"));
2346  if (expr_type_name == "CURSOR") {
2347  CHECK(expr_json.HasMember("operands"));
2348  const auto& expr_operands = field(expr_json, "operands");
2349  CHECK(expr_operands.IsArray());
2350  if (expr_operands.Size() != 1) {
2351  throw std::runtime_error(
2352  "Table functions currently only support one ResultSet input");
2353  }
2354 
2355  const auto prior_node = prev(table_func_ra);
2356  CHECK(prior_node);
2357  // Forward the values from the prior node as RexInputs
2358  for (size_t i = 0; i < prior_node->size(); i++) {
2359  table_func_inputs.emplace_back(
2360  std::make_unique<RexAbstractInput>(col_inputs.size()));
2361  col_inputs.emplace_back(table_func_inputs.back().get());
2362  }
2363  continue;
2364  }
2365  }
2366  }
2367  table_func_inputs.emplace_back(
2368  parse_scalar_expr(*exprs_json_it, cat_, root_dag_builder));
2369  }
2370 
2371  const auto& op_name = field(invocation, "op");
2372  CHECK(op_name.IsString());
2373 
2374  std::vector<std::unique_ptr<const RexScalar>> table_function_projected_outputs;
2375  const auto& row_types = field(table_func_ra, "rowType");
2376  CHECK(row_types.IsArray());
2377  CHECK_GE(row_types.Size(), unsigned(0));
2378  const auto& row_types_array = row_types.GetArray();
2379  for (size_t i = 0; i < row_types_array.Size(); i++) {
2380  // We don't care about the type information in rowType -- replace each output with
2381  // a reference to be resolved later in the translator
2382  table_function_projected_outputs.emplace_back(std::make_unique<RexRef>(i));
2383  fields.emplace_back("");
2384  }
2385 
2386  return std::make_shared<RelTableFunction>(op_name.GetString(),
2387  inputs,
2388  fields,
2389  col_inputs,
2390  table_func_inputs,
2391  table_function_projected_outputs);
2392  }
2393 
2394  std::shared_ptr<RelLogicalValues> dispatchLogicalValues(
2395  const rapidjson::Value& logical_values_ra) {
2396  const auto& tuple_type_arr = field(logical_values_ra, "type");
2397  CHECK(tuple_type_arr.IsArray());
2398  std::vector<TargetMetaInfo> tuple_type;
2399  for (auto tuple_type_arr_it = tuple_type_arr.Begin();
2400  tuple_type_arr_it != tuple_type_arr.End();
2401  ++tuple_type_arr_it) {
2402  const auto component_type = parse_type(*tuple_type_arr_it);
2403  const auto component_name = json_str(field(*tuple_type_arr_it, "name"));
2404  tuple_type.emplace_back(component_name, component_type);
2405  }
2406  const auto& inputs_arr = field(logical_values_ra, "inputs");
2407  CHECK(inputs_arr.IsArray());
2408  const auto& tuples_arr = field(logical_values_ra, "tuples");
2409  CHECK(tuples_arr.IsArray());
2410 
2411  if (inputs_arr.Size()) {
2412  throw QueryNotSupported("Inputs not supported in logical values yet.");
2413  }
2414 
2415  std::vector<RelLogicalValues::RowValues> values;
2416  if (tuples_arr.Size()) {
2417  for (const auto& row : tuples_arr.GetArray()) {
2418  CHECK(row.IsArray());
2419  const auto values_json = row.GetArray();
2420  if (!values.empty()) {
2421  CHECK_EQ(values[0].size(), values_json.Size());
2422  }
2423  values.emplace_back(RelLogicalValues::RowValues{});
2424  for (const auto& value : values_json) {
2425  CHECK(value.IsObject());
2426  CHECK(value.HasMember("literal"));
2427  values.back().emplace_back(parse_literal(value));
2428  }
2429  }
2430  }
2431 
2432  return std::make_shared<RelLogicalValues>(tuple_type, values);
2433  }
2434 
2435  std::shared_ptr<RelLogicalUnion> dispatchUnion(
2436  const rapidjson::Value& logical_union_ra) {
2437  auto inputs = getRelAlgInputs(logical_union_ra);
2438  auto const& all_type_bool = field(logical_union_ra, "all");
2439  CHECK(all_type_bool.IsBool());
2440  return std::make_shared<RelLogicalUnion>(std::move(inputs), all_type_bool.GetBool());
2441  }
2442 
2443  RelAlgInputs getRelAlgInputs(const rapidjson::Value& node) {
2444  if (node.HasMember("inputs")) {
2445  const auto str_input_ids = strings_from_json_array(field(node, "inputs"));
2446  RelAlgInputs ra_inputs;
2447  for (const auto& str_id : str_input_ids) {
2448  ra_inputs.push_back(nodes_[std::stoi(str_id)]);
2449  }
2450  return ra_inputs;
2451  }
2452  return {prev(node)};
2453  }
2454 
2455  std::pair<std::string, std::string> getKVOptionPair(std::string& str, size_t& pos) {
2456  auto option = str.substr(0, pos);
2457  std::string delim = "=";
2458  size_t delim_pos = option.find(delim);
2459  auto key = option.substr(0, delim_pos);
2460  auto val = option.substr(delim_pos + 1, option.length());
2461  str.erase(0, pos + delim.length() + 1);
2462  return {key, val};
2463  }
2464 
2465  HintExplained parseHintString(std::string& hint_string) {
2466  std::string white_space_delim = " ";
2467  int l = hint_string.length();
2468  hint_string = hint_string.erase(0, 1).substr(0, l - 2);
2469  size_t pos = 0;
2470  if ((pos = hint_string.find("options:")) != std::string::npos) {
2471  // need to parse hint options
2472  std::vector<std::string> tokens;
2473  std::string hint_name = hint_string.substr(0, hint_string.find(white_space_delim));
2474  bool kv_list_op = false;
2475  std::string raw_options = hint_string.substr(pos + 8, hint_string.length() - 2);
2476  if (raw_options.find('{') != std::string::npos) {
2477  kv_list_op = true;
2478  } else {
2479  CHECK(raw_options.find('[') != std::string::npos);
2480  }
2481  auto t1 = raw_options.erase(0, 1);
2482  raw_options = t1.substr(0, t1.length() - 1);
2483  std::string op_delim = ", ";
2484  if (kv_list_op) {
2485  // kv options
2486  std::unordered_map<std::string, std::string> kv_options;
2487  while ((pos = raw_options.find(op_delim)) != std::string::npos) {
2488  auto kv_pair = getKVOptionPair(raw_options, pos);
2489  kv_options.emplace(kv_pair.first, kv_pair.second);
2490  }
2491  // handle the last kv pair
2492  auto kv_pair = getKVOptionPair(raw_options, pos);
2493  kv_options.emplace(kv_pair.first, kv_pair.second);
2494  return {hint_name, true, false, true, kv_options};
2495  } else {
2496  std::vector<std::string> list_options;
2497  while ((pos = raw_options.find(op_delim)) != std::string::npos) {
2498  list_options.emplace_back(raw_options.substr(0, pos));
2499  raw_options.erase(0, pos + white_space_delim.length() + 1);
2500  }
2501  // handle the last option
2502  list_options.emplace_back(raw_options.substr(0, pos));
2503  return {hint_name, true, false, false, list_options};
2504  }
2505  } else {
2506  // marker hint: no extra option for this hint
2507  std::string hint_name = hint_string.substr(0, hint_string.find(white_space_delim));
2508  return {hint_name, true, true, false};
2509  }
2510  }
2511 
2512  void getRelAlgHints(const rapidjson::Value& json_node,
2513  std::shared_ptr<RelAlgNode> node) {
2514  std::string hint_explained = json_str(field(json_node, "hints"));
2515  size_t pos = 0;
2516  std::string delim = "|";
2517  std::vector<std::string> hint_list;
2518  while ((pos = hint_explained.find(delim)) != std::string::npos) {
2519  hint_list.emplace_back(hint_explained.substr(0, pos));
2520  hint_explained.erase(0, pos + delim.length());
2521  }
2522  // handling the last one
2523  hint_list.emplace_back(hint_explained.substr(0, pos));
2524 
2525  const auto agg_node = std::dynamic_pointer_cast<RelAggregate>(node);
2526  if (agg_node) {
2527  for (std::string& hint : hint_list) {
2528  auto parsed_hint = parseHintString(hint);
2529  agg_node->addHint(parsed_hint);
2530  }
2531  }
2532  const auto project_node = std::dynamic_pointer_cast<RelProject>(node);
2533  if (project_node) {
2534  for (std::string& hint : hint_list) {
2535  auto parsed_hint = parseHintString(hint);
2536  project_node->addHint(parsed_hint);
2537  }
2538  }
2539  const auto scan_node = std::dynamic_pointer_cast<RelScan>(node);
2540  if (scan_node) {
2541  for (std::string& hint : hint_list) {
2542  auto parsed_hint = parseHintString(hint);
2543  scan_node->addHint(parsed_hint);
2544  }
2545  }
2546  const auto join_node = std::dynamic_pointer_cast<RelJoin>(node);
2547  if (join_node) {
2548  for (std::string& hint : hint_list) {
2549  auto parsed_hint = parseHintString(hint);
2550  join_node->addHint(parsed_hint);
2551  }
2552  }
2553 
2554  const auto compound_node = std::dynamic_pointer_cast<RelCompound>(node);
2555  if (compound_node) {
2556  for (std::string& hint : hint_list) {
2557  auto parsed_hint = parseHintString(hint);
2558  compound_node->addHint(parsed_hint);
2559  }
2560  }
2561  }
2562 
2563  std::shared_ptr<const RelAlgNode> prev(const rapidjson::Value& crt_node) {
2564  const auto id = node_id(crt_node);
2565  CHECK(id);
2566  CHECK_EQ(static_cast<size_t>(id), nodes_.size());
2567  return nodes_.back();
2568  }
2569 
2571  std::vector<std::shared_ptr<RelAlgNode>> nodes_;
2572 };
2573 
2574 } // namespace details
2575 
2576 RelAlgDagBuilder::RelAlgDagBuilder(const std::string& query_ra,
2578  const RenderInfo* render_info)
2579  : cat_(cat), render_info_(render_info) {
2580  rapidjson::Document query_ast;
2581  query_ast.Parse(query_ra.c_str());
2582  VLOG(2) << "Parsing query RA JSON: " << query_ra;
2583  if (query_ast.HasParseError()) {
2584  query_ast.GetParseError();
2585  LOG(ERROR) << "Failed to parse RA tree from Calcite (offset "
2586  << query_ast.GetErrorOffset() << "):\n"
2587  << rapidjson::GetParseError_En(query_ast.GetParseError());
2588  VLOG(1) << "Failed to parse query RA: " << query_ra;
2589  throw std::runtime_error(
2590  "Failed to parse relational algebra tree. Possible query syntax error.");
2591  }
2592  CHECK(query_ast.IsObject());
2594  build(query_ast, *this);
2595 }
2596 
2598  const rapidjson::Value& query_ast,
2600  const RenderInfo* render_info)
2601  : cat_(cat), render_info_(render_info) {
2602  build(query_ast, root_dag_builder);
2603 }
2604 
2605 void RelAlgDagBuilder::build(const rapidjson::Value& query_ast,
2606  RelAlgDagBuilder& lead_dag_builder) {
2607  const auto& rels = field(query_ast, "rels");
2608  CHECK(rels.IsArray());
2609  try {
2610  nodes_ = details::RelAlgDispatcher(cat_).run(rels, lead_dag_builder);
2611  } catch (const QueryNotSupported&) {
2612  throw;
2613  }
2614  CHECK(!nodes_.empty());
2616 
2617  if (render_info_) {
2618  // Alter the RA for render. Do this before any flattening/optimizations are done to
2619  // the tree.
2621  }
2622 
2623  handleQueryHint(nodes_, this);
2624  mark_nops(nodes_);
2629  std::vector<const RelAlgNode*> filtered_left_deep_joins;
2630  std::vector<const RelAlgNode*> left_deep_joins;
2631  for (const auto& node : nodes_) {
2632  const auto left_deep_join_root = get_left_deep_join_root(node);
2633  // The filter which starts a left-deep join pattern must not be coalesced
2634  // since it contains (part of) the join condition.
2635  if (left_deep_join_root) {
2636  left_deep_joins.push_back(left_deep_join_root.get());
2637  if (std::dynamic_pointer_cast<const RelFilter>(left_deep_join_root)) {
2638  filtered_left_deep_joins.push_back(left_deep_join_root.get());
2639  }
2640  }
2641  }
2642  if (filtered_left_deep_joins.empty()) {
2644  }
2645  eliminate_dead_columns(nodes_);
2646  eliminate_dead_subqueries(subqueries_, nodes_.back().get());
2648  if (g_cluster) {
2650  }
2651  coalesce_nodes(nodes_, left_deep_joins);
2652  CHECK(nodes_.back().use_count() == 1);
2653  create_left_deep_join(nodes_);
2654 }
2655 
2657  std::function<void(RelAlgNode const*)> const& callback) const {
2658  for (auto const& node : nodes_) {
2659  if (node) {
2660  callback(node.get());
2661  }
2662  }
2663 }
2664 
2666  for (auto& node : nodes_) {
2667  if (node) {
2668  node->resetQueryExecutionState();
2669  }
2670  }
2671 }
2672 
2673 // Return tree with depth represented by indentations.
2674 std::string tree_string(const RelAlgNode* ra, const size_t depth) {
2675  std::string result = std::string(2 * depth, ' ') + ra->toString() + '\n';
2676  for (size_t i = 0; i < ra->inputCount(); ++i) {
2677  result += tree_string(ra->getInput(i), depth + 1);
2678  }
2679  return result;
2680 }
2681 
2682 std::string RexSubQuery::toString() const {
2683  return cat(::typeName(this), "(", ::toString(ra_.get()), ")");
2684 }
2685 
2686 std::string RexInput::toString() const {
2687  return cat(::typeName(this),
2688  "(node=",
2689  ::toString(node_),
2690  ", in_index=",
2692  ")");
2693 }
2694 
2695 std::string RelCompound::toString() const {
2696  return cat(::typeName(this),
2697  "(",
2698  (filter_expr_ ? filter_expr_->toString() : "null"),
2699  ", target_exprs=",
2701  ", ",
2703  ", agg_exps=",
2705  ", fields=",
2707  ", scalar_sources=",
2709  ", is_agg=",
2711  ")");
2712 }
std::vector< std::shared_ptr< const RexScalar > > scalar_exprs_
SQLTypes to_sql_type(const std::string &type_name)
void handleQueryHint(const std::vector< std::shared_ptr< RelAlgNode >> &nodes, RelAlgDagBuilder *dag_builder) noexcept
std::shared_ptr< const RelAlgNode > getRootNodeShPtr() const
bool is_agg(const Analyzer::Expr *expr)
std::unique_ptr< const RexScalar > condition_
SQLTypeInfo parse_type(const rapidjson::Value &type_obj)
const RexScalar * getThen(const size_t idx) const
std::vector< std::string > strings_from_json_array(const rapidjson::Value &json_str_arr) noexcept
std::unique_ptr< RexCase > parse_case(const rapidjson::Value &expr, const Catalog_Namespace::Catalog &cat, RelAlgDagBuilder &root_dag_builder)
std::shared_ptr< RelAggregate > dispatchAggregate(const rapidjson::Value &agg_ra)
#define CHECK_EQ(x, y)
Definition: Logger.h:205
JoinType to_join_type(const std::string &join_type_name)
const Catalog_Namespace::Catalog & cat_
std::shared_ptr< RelProject > dispatchProject(const rapidjson::Value &proj_ra, RelAlgDagBuilder &root_dag_builder)
std::unique_ptr< RexSubQuery > deepCopy() const
void replaceInput(std::shared_ptr< const RelAlgNode > old_input, std::shared_ptr< const RelAlgNode > input) override
JoinType
Definition: sqldefs.h:107
std::vector< const Rex * > remapTargetPointers(std::vector< std::unique_ptr< const RexAgg >> const &agg_exprs_new, std::vector< std::unique_ptr< const RexScalar >> const &scalar_sources_new, std::vector< std::unique_ptr< const RexAgg >> const &agg_exprs_old, std::vector< std::unique_ptr< const RexScalar >> const &scalar_sources_old, std::vector< const Rex * > const &target_exprs_old)
std::vector< std::unique_ptr< const RexScalar > > table_func_inputs_
std::string cat(Ts &&...args)
std::string toString(const ExtArgumentType &sig_type)
void bind_inputs(const std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
void hoist_filter_cond_to_cross_join(std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
class for a per-database catalog. also includes metadata for the current database and the current use...
Definition: Catalog.h:97
Definition: sqltypes.h:51
std::shared_ptr< const RelAlgNode > get_left_deep_join_root(const std::shared_ptr< RelAlgNode > &node)
void sink_projected_boolean_expr_to_join(std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
std::unique_ptr< const RexAgg > parse_aggregate_expr(const rapidjson::Value &expr)
void eliminate_identical_copy(std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
const RexScalar * getElse() const
RelCompound(std::unique_ptr< const RexScalar > &filter_expr, const std::vector< const Rex * > &target_exprs, const size_t groupby_count, const std::vector< const RexAgg * > &agg_exprs, const std::vector< std::string > &fields, std::vector< std::unique_ptr< const RexScalar >> &scalar_sources, const bool is_agg, bool update_disguised_as_select=false, bool delete_disguised_as_select=false, bool varlen_update_required=false, TableDescriptor const *manipulation_target_table=nullptr, ColumnNameList target_columns=ColumnNameList())
static thread_local unsigned crt_id_
void addHint(const HintExplained &hint_explained)
std::shared_ptr< RelScan > dispatchTableScan(const rapidjson::Value &scan_ra)
std::pair< std::shared_ptr< RelLeftDeepInnerJoin >, std::shared_ptr< const RelAlgNode > > create_left_deep_join(const std::shared_ptr< RelAlgNode > &left_deep_join_root)
RexScalar const * copyAndRedirectSource(RexScalar const *, size_t input_idx) const
void replaceInput(std::shared_ptr< const RelAlgNode > old_input, std::shared_ptr< const RelAlgNode > input) override
SQLAgg to_agg_kind(const std::string &agg_name)
std::shared_ptr< RelLogicalUnion > dispatchUnion(const rapidjson::Value &logical_union_ra)
#define LOG(tag)
Definition: Logger.h:188
NullSortedPosition
std::vector< std::string > TargetColumnList
bool g_enable_union
size_t size() const
const bool json_bool(const rapidjson::Value &obj) noexcept
Definition: JsonAccessors.h:49
const RexScalar * getOperand(const size_t idx) const
std::vector< const Rex * > col_inputs_
bool hasEquivCollationOf(const RelSort &that) const
const std::vector< std::string > fields_
std::string toString() const override
const std::string json_str(const rapidjson::Value &obj) noexcept
Definition: JsonAccessors.h:44
void build(const rapidjson::Value &query_ast, RelAlgDagBuilder &root_dag_builder)
RexWindowFunctionOperator::RexWindowBound parse_window_bound(const rapidjson::Value &window_bound_obj, const Catalog_Namespace::Catalog &cat, RelAlgDagBuilder &root_dag_builder)
std::string join(T const &container, std::string const &delim)
void bind_table_func_to_input(RelTableFunction *table_func_node, const RANodeOutput &input) noexcept
std::unique_ptr< RexOperator > parse_operator(const rapidjson::Value &expr, const Catalog_Namespace::Catalog &cat, RelAlgDagBuilder &root_dag_builder)
#define UNREACHABLE()
Definition: Logger.h:241
bool hint_applied_
bool isRenamedInput(const RelAlgNode *node, const size_t index, const std::string &new_name)
#define CHECK_GE(x, y)
Definition: Logger.h:210
std::vector< std::string > fields_
Definition: sqldefs.h:49
const RexScalar * getWhen(const size_t idx) const
void addHint(const HintExplained &hint_explained)
void appendInput(std::string new_field_name, std::unique_ptr< const RexScalar > new_input)
RetType visitOperator(const RexOperator *rex_operator) const final
void bind_project_to_input(RelProject *project_node, const RANodeOutput &input) noexcept
const Catalog_Namespace::Catalog & cat_
std::vector< std::unique_ptr< const RexScalar > > parse_window_order_exprs(const rapidjson::Value &arr, const Catalog_Namespace::Catalog &cat, RelAlgDagBuilder &root_dag_builder)
void checkForMatchingMetaInfoTypes() const
std::vector< std::unique_ptr< const RexScalar > > scalar_sources_
void addHint(const HintExplained &hint_explained)
std::string to_string(char const *&&v)
SqlWindowFunctionKind parse_window_function_kind(const std::string &name)
void simplify_sort(std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
std::vector< SortField > collation_
RexRebindInputsVisitor(const RelAlgNode *old_input, const RelAlgNode *new_input)
int64_t get_int_literal_field(const rapidjson::Value &obj, const char field[], const int64_t default_val) noexcept
void addHint(const HintExplained &hint_explained)
std::vector< std::unique_ptr< const RexScalar > > parse_expr_array(const rapidjson::Value &arr, const Catalog_Namespace::Catalog &cat, RelAlgDagBuilder &root_dag_builder)
This file contains the class specification and related data structures for Catalog.
virtual T visit(const RexScalar *rex_scalar) const
Definition: RexVisitor.h:27
RexInputSet visitInput(const RexInput *input) const override
std::vector< std::shared_ptr< RexSubQuery > > subqueries_
RexWindowFuncReplacementVisitor(std::unique_ptr< const RexScalar > replacement_rex)
const RenderInfo * render_info_
std::string to_string() const
Definition: sqltypes.h:465
const rapidjson::Value & field(const rapidjson::Value &obj, const char field[]) noexcept
Definition: JsonAccessors.h:31
const RexScalar * visitOperator(const RexOperator *rex_operator) const final
unsigned getIndex() const
SQLOps getOperator() const
std::vector< std::shared_ptr< RelAlgNode > > nodes_
const TableDescriptor * getTableFromScanNode(const Catalog_Namespace::Catalog &cat, const rapidjson::Value &scan_ra)
SortDirection parse_sort_direction(const rapidjson::Value &collation)
std::unique_ptr< const RexScalar > disambiguate_rex(const RexScalar *, const RANodeOutput &)
bool isRenaming() const
std::vector< SortField > parse_window_order_collation(const rapidjson::Value &arr, const Catalog_Namespace::Catalog &cat, RelAlgDagBuilder &root_dag_builder)
void setIndex(const unsigned in_index) const
SQLOps to_sql_op(const std::string &op_str)
void add_window_function_pre_project(std::vector< std::shared_ptr< RelAlgNode >> &nodes)
std::unique_ptr< Hints > hints_
void set_scale(int s)
Definition: sqltypes.h:425
const int64_t json_i64(const rapidjson::Value &obj) noexcept
Definition: JsonAccessors.h:39
void * visitInput(const RexInput *rex_input) const override
std::unique_ptr< Hints > hints_
std::vector< std::unique_ptr< const RexScalar > > scalar_exprs_
const double json_double(const rapidjson::Value &obj) noexcept
Definition: JsonAccessors.h:54
size_t branchCount() const
std::unordered_set< RexInput > RexInputSet
const RelAlgNode * getInput(const size_t idx) const
RelFilter(std::unique_ptr< const RexScalar > &filter, std::shared_ptr< const RelAlgNode > input)
bool cpu_mode
Definition: QueryHint.h:21
std::vector< std::shared_ptr< RelAlgNode > > nodes_
std::string toString() const override
RelAggregate(const size_t groupby_count, std::vector< std::unique_ptr< const RexAgg >> &agg_exprs, const std::vector< std::string > &fields, std::shared_ptr< const RelAlgNode > input)
std::unique_ptr< const RexScalar > filter_
bool isSimple() const
RexInputReplacementVisitor(const RelAlgNode *node_to_keep, const std::vector< std::unique_ptr< const RexScalar >> &scalar_sources)
const size_t groupby_count_
RexRebindReindexInputsVisitor(const RelAlgNode *old_input, const RelAlgNode *new_input, std::unordered_map< unsigned, unsigned > old_to_new_index_map)
unsigned getId() const
const RelAlgNode * node_
HintExplained parseHintString(std::string &hint_string)
virtual void replaceInput(std::shared_ptr< const RelAlgNode > old_input, std::shared_ptr< const RelAlgNode > input)
void eachNode(std::function< void(RelAlgNode const *)> const &) const
NullSortedPosition parse_nulls_position(const rapidjson::Value &collation)
std::shared_ptr< RelFilter > dispatchFilter(const rapidjson::Value &filter_ra, RelAlgDagBuilder &root_dag_builder)
std::unique_ptr< RexAbstractInput > parse_abstract_input(const rapidjson::Value &expr) noexcept
std::vector< std::unique_ptr< const RexAgg > > agg_exprs_
std::set< std::pair< const RelAlgNode *, int > > get_equiv_cols(const RelAlgNode *node, const size_t which_col)
void replaceInput(std::shared_ptr< const RelAlgNode > old_input, std::shared_ptr< const RelAlgNode > input) override
std::unique_ptr< RexLiteral > parse_literal(const rapidjson::Value &expr)
std::unordered_map< std::string, HintExplained > Hints
SortDirection
const RexScalar * getProjectAt(const size_t idx) const
std::vector< std::unique_ptr< const RexAgg > > copyAggExprs(std::vector< std::unique_ptr< const RexAgg >> const &agg_exprs)
std::vector< std::shared_ptr< const RelAlgNode >> RelAlgInputs
#define CHECK_LT(x, y)
Definition: Logger.h:207
Definition: sqltypes.h:54
Definition: sqltypes.h:55
std::unique_ptr< Hints > hints_
std::unique_ptr< const RexOperator > disambiguate_operator(const RexOperator *rex_operator, const RANodeOutput &ra_output) noexcept
const ConstRexScalarPtrVector & getPartitionKeys() const
std::string tree_string(const RelAlgNode *ra, const size_t depth)
#define CHECK_LE(x, y)
Definition: Logger.h:208
std::unique_ptr< Hints > hints_
const std::vector< const Rex * > target_exprs_
std::vector< ElementType >::const_iterator Super
void replaceInput(std::shared_ptr< const RelAlgNode > old_input, std::shared_ptr< const RelAlgNode > input) override
RelLogicalUnion(RelAlgInputs, bool is_all)
std::vector< std::unique_ptr< const RexAgg > > agg_exprs_
std::unique_ptr< const RexCase > disambiguate_case(const RexCase *rex_case, const RANodeOutput &ra_output)
std::shared_ptr< RelJoin > dispatchJoin(const rapidjson::Value &join_ra, RelAlgDagBuilder &root_dag_builder)
void separate_window_function_expressions(std::vector< std::shared_ptr< RelAlgNode >> &nodes)
std::unique_ptr< const RexScalar > filter_expr_
void setSourceNode(const RelAlgNode *node) const
const RexScalar * aggregateResult(const RexScalar *const &aggregate, const RexScalar *const &next_result) const final
bool hasWindowFunctionExpr() const
std::shared_ptr< RelModify > dispatchModify(const rapidjson::Value &logical_modify_ra)
virtual size_t size() const =0
const RelAlgNode * getSourceNode() const
void registerSubquery(std::shared_ptr< RexSubQuery > subquery)
void setExecutionResult(const std::shared_ptr< const ExecutionResult > result)
RelLogicalValues(const std::vector< TargetMetaInfo > &tuple_type, std::vector< RowValues > &values)
RelAlgDagBuilder()=delete
SqlWindowFunctionKind
Definition: sqldefs.h:82
HOST DEVICE int get_comp_param() const
Definition: sqltypes.h:339
void mark_nops(const std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
RexInputSet aggregateResult(const RexInputSet &aggregate, const RexInputSet &next_result) const override
Definition: sqldefs.h:53
std::shared_ptr< RelSort > dispatchSort(const rapidjson::Value &sort_ra)
RelTableFunction(const std::string &function_name, RelAlgInputs inputs, std::vector< std::string > &fields, std::vector< const Rex * > col_inputs, std::vector< std::unique_ptr< const RexScalar >> &table_func_inputs, std::vector< std::unique_ptr< const RexScalar >> &target_exprs)
const std::vector< std::string > & getFields() const
std::string getFieldName(const size_t i) const
std::vector< size_t > indices_from_json_array(const rapidjson::Value &json_idx_arr) noexcept
bool g_enable_watchdog false
Definition: Execute.cpp:73
virtual std::string toString() const =0
#define CHECK(condition)
Definition: Logger.h:197
std::unique_ptr< const RexScalar > parse_scalar_expr(const rapidjson::Value &expr, const Catalog_Namespace::Catalog &cat, RelAlgDagBuilder &root_dag_builder)
RelProject(std::vector< std::unique_ptr< const RexScalar >> &scalar_exprs, const std::vector< std::string > &fields, std::shared_ptr< const RelAlgNode > input)
unsigned node_id(const rapidjson::Value &ra_node) noexcept
RANodeOutput get_node_output(const RelAlgNode *ra_node)
std::vector< std::string > getFieldNamesFromScanNode(const rapidjson::Value &scan_ra)
void create_compound(std::vector< std::shared_ptr< RelAlgNode >> &nodes, const std::vector< size_t > &pattern) noexcept
bool g_cluster
void alterRAForRender(std::vector< std::shared_ptr< RelAlgNode >> &nodes, const RenderInfo &render_info)
std::shared_ptr< const RelAlgNode > prev(const rapidjson::Value &crt_node)
void replaceInput(std::shared_ptr< const RelAlgNode > old_input, std::shared_ptr< const RelAlgNode > input) override
std::vector< std::shared_ptr< RelAlgNode > > run(const rapidjson::Value &rels, RelAlgDagBuilder &root_dag_builder)
void getRelAlgHints(const rapidjson::Value &json_node, std::shared_ptr< RelAlgNode > node)
RelAlgDispatcher(const Catalog_Namespace::Catalog &cat)
Common Enum definitions for SQL processing.
bool is_dict_encoded_string() const
Definition: sqltypes.h:518
void fold_filters(std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
const TableDescriptor * getMetadataForTable(const std::string &tableName, const bool populateFragmenter=true) const
Returns a pointer to a const TableDescriptor struct matching the provided tableName.
std::vector< RexInput > RANodeOutput
std::vector< std::unique_ptr< const RexScalar >> RowValues
const size_t inputCount() const
specifies the content in-memory of a row in the table metadata table
string name
Definition: setup.py:35
void rebind_inputs_from_left_deep_join(const RexScalar *rex, const RelLeftDeepInnerJoin *left_deep_join)
std::unique_ptr< const RexSubQuery > parse_subquery(const rapidjson::Value &expr, const Catalog_Namespace::Catalog &cat, RelAlgDagBuilder &root_dag_builder)
void eliminate_dead_subqueries(std::vector< std::shared_ptr< RexSubQuery >> &subqueries, RelAlgNode const *root)
size_t size() const override
size_t operator()(const std::pair< const RelAlgNode *, int > &input_col) const
bool input_can_be_coalesced(const RelAlgNode *parent_node, const size_t index, const bool first_rex_is_input)
RelAlgInputs getRelAlgInputs(const rapidjson::Value &node)
std::shared_ptr< RelLogicalValues > dispatchLogicalValues(const rapidjson::Value &logical_values_ra)
std::shared_ptr< RelTableFunction > dispatchTableFunction(const rapidjson::Value &table_func_ra, RelAlgDagBuilder &root_dag_builder)
std::unique_ptr< const RexScalar > RetType
Definition: RexVisitor.h:139
std::vector< std::unique_ptr< const RexScalar > > copyRexScalars(std::vector< std::unique_ptr< const RexScalar >> const &scalar_sources)
#define VLOG(n)
Definition: Logger.h:291
RelJoin(std::shared_ptr< const RelAlgNode > lhs, std::shared_ptr< const RelAlgNode > rhs, std::unique_ptr< const RexScalar > &condition, const JoinType join_type)
RelAlgInputs inputs_
void set_precision(int d)
Definition: sqltypes.h:423
std::string toString() const override
std::pair< std::string, std::string > getKVOptionPair(std::string &str, size_t &pos)
void eliminate_dead_columns(std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
void check_empty_inputs_field(const rapidjson::Value &node) noexcept
std::vector< const Rex * > reproject_targets(const RelProject *simple_project, const std::vector< const Rex * > &target_exprs) noexcept
bool isIdentity() const
std::vector< std::unique_ptr< const RexScalar > > target_exprs_
std::vector< RexInput > n_outputs(const RelAlgNode *node, const size_t n)
const bool is_agg_
void coalesce_nodes(std::vector< std::shared_ptr< RelAlgNode >> &nodes, const std::vector< const RelAlgNode * > &left_deep_joins)
const RexScalar * visitCase(const RexCase *rex_case) const final
std::string toString() const override
static void resetRelAlgFirstId() noexcept
std::string json_node_to_string(const rapidjson::Value &node) noexcept