OmniSciDB  72c90bc290
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
RelAlgDag.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "RelAlgDag.h"
19 #include "Catalog/Catalog.h"
21 #include "JsonAccessors.h"
23 #include "RelAlgOptimizer.h"
24 #include "RelLeftDeepInnerJoin.h"
25 #include "RexVisitor.h"
26 #include "Shared/sqldefs.h"
27 
28 #include <rapidjson/error/en.h>
29 #include <rapidjson/error/error.h>
30 #include <rapidjson/stringbuffer.h>
31 #include <rapidjson/writer.h>
32 
33 #include <string>
34 #include <unordered_set>
35 
36 extern bool g_cluster;
37 extern bool g_enable_union;
38 
39 namespace {
40 
41 const unsigned FIRST_RA_NODE_ID = 1;
42 
43 } // namespace
44 
45 thread_local unsigned RelAlgNode::crt_id_ = FIRST_RA_NODE_ID;
46 
49 }
50 
52  const std::shared_ptr<const ExecutionResult> result) {
53  auto row_set = result->getRows();
54  CHECK(row_set);
55  CHECK_EQ(size_t(1), row_set->colCount());
56  *(type_.get()) = row_set->getColType(0);
57  (*(result_.get())) = result;
58 }
59 
60 std::unique_ptr<RexSubQuery> RexSubQuery::deepCopy() const {
61  return std::make_unique<RexSubQuery>(type_, result_, ra_->deepCopy());
62 }
63 
64 unsigned RexSubQuery::getId() const {
65  return ra_->getId();
66 }
67 
68 namespace {
69 
70 class RexRebindInputsVisitor : public RexVisitor<void*> {
71  public:
72  RexRebindInputsVisitor(const RelAlgNode* old_input, const RelAlgNode* new_input)
73  : old_input_(old_input), new_input_(new_input) {}
74 
75  virtual ~RexRebindInputsVisitor() = default;
76 
77  void* visitInput(const RexInput* rex_input) const override {
78  const auto old_source = rex_input->getSourceNode();
79  if (old_source == old_input_) {
80  const auto left_deep_join = dynamic_cast<const RelLeftDeepInnerJoin*>(new_input_);
81  if (left_deep_join) {
82  rebind_inputs_from_left_deep_join(rex_input, left_deep_join);
83  return nullptr;
84  }
85  rex_input->setSourceNode(new_input_);
86  }
87  return nullptr;
88  };
89 
90  private:
91  const RelAlgNode* old_input_;
93 };
94 
95 // Creates an output with n columns.
96 std::vector<RexInput> n_outputs(const RelAlgNode* node, const size_t n) {
97  std::vector<RexInput> outputs;
98  outputs.reserve(n);
99  for (size_t i = 0; i < n; ++i) {
100  outputs.emplace_back(node, i);
101  }
102  return outputs;
103 }
104 
106  public:
108  const RelAlgNode* old_input,
109  const RelAlgNode* new_input,
110  std::unordered_map<unsigned, unsigned> old_to_new_index_map)
111  : RexRebindInputsVisitor(old_input, new_input), mapping_(old_to_new_index_map) {}
112 
113  void* visitInput(const RexInput* rex_input) const override {
114  RexRebindInputsVisitor::visitInput(rex_input);
115  auto mapping_itr = mapping_.find(rex_input->getIndex());
116  CHECK(mapping_itr != mapping_.end());
117  rex_input->setIndex(mapping_itr->second);
118  return nullptr;
119  }
120 
121  private:
122  const std::unordered_map<unsigned, unsigned> mapping_;
123 };
124 
126  : public RexVisitorBase<std::unique_ptr<const RexScalar>> {
127  public:
128  enum class WindowExprType { PARTITION_KEY, ORDER_KEY };
130  std::shared_ptr<RelProject> new_project,
131  std::vector<std::unique_ptr<const RexScalar>>& scalar_exprs_for_new_project,
132  std::vector<std::string>& fields_for_new_project,
133  std::unordered_map<size_t, size_t>& expr_offset_cache)
134  : new_project_(new_project)
135  , scalar_exprs_for_new_project_(scalar_exprs_for_new_project)
136  , fields_for_new_project_(fields_for_new_project)
137  , expr_offset_cache_(expr_offset_cache)
138  , found_case_expr_window_operand_(false)
139  , has_partition_expr_(false) {}
140 
141  size_t pushDownExpressionImpl(const RexScalar* expr) const {
142  auto hash = expr->toHash();
143  auto it = expr_offset_cache_.find(hash);
144  auto new_offset = -1;
145  if (it == expr_offset_cache_.end()) {
146  CHECK(
147  expr_offset_cache_.emplace(hash, scalar_exprs_for_new_project_.size()).second);
148  new_offset = scalar_exprs_for_new_project_.size();
149  fields_for_new_project_.emplace_back("");
150  scalar_exprs_for_new_project_.emplace_back(deep_copier_.visit(expr));
151  } else {
152  // we already pushed down the same expression, so reuse it
153  new_offset = it->second;
154  }
155  return new_offset;
156  }
157 
159  size_t expr_offset) const {
160  // given window expr offset and inner expr's offset,
161  // return a (push-downed) expression's offset from the new projection node
162  switch (type) {
163  case WindowExprType::PARTITION_KEY: {
164  auto it = pushed_down_partition_key_offset_.find(expr_offset);
165  CHECK(it != pushed_down_partition_key_offset_.end());
166  return it->second;
167  }
168  case WindowExprType::ORDER_KEY: {
169  auto it = pushed_down_order_key_offset_.find(expr_offset);
170  CHECK(it != pushed_down_order_key_offset_.end());
171  return it->second;
172  }
173  default:
174  UNREACHABLE();
175  return std::nullopt;
176  }
177  }
178 
180  const RexWindowFunctionOperator* window_expr) const {
181  // step 1. push "all" target expressions of the window_func_project_node down to the
182  // new child projection
183  // each window expr is a separate target expression of the projection node
184  // and they have their own inner expression related to partition / order clauses
185  // so we capture their offsets to correctly rebind their input
186  pushed_down_window_operands_offset_.clear();
187  pushed_down_partition_key_offset_.clear();
188  pushed_down_order_key_offset_.clear();
189  for (size_t offset = 0; offset < window_expr->size(); ++offset) {
190  auto expr = window_expr->getOperand(offset);
191  auto literal_expr = dynamic_cast<const RexLiteral*>(expr);
192  auto case_expr = dynamic_cast<const RexCase*>(expr);
193  if (case_expr) {
194  // when columnar output is enabled, pushdown case expr can incur an issue
195  // during columnarization, so we record this and try to force rowwise-output
196  // until we fix the issue
197  // todo (yoonmin) : relax this
198  found_case_expr_window_operand_ = true;
199  }
200  if (!literal_expr) {
201  auto new_offset = pushDownExpressionImpl(expr);
202  pushed_down_window_operands_offset_.emplace(offset, new_offset);
203  }
204  }
205  size_t offset = 0;
206  for (const auto& partition_key : window_expr->getPartitionKeys()) {
207  auto new_offset = pushDownExpressionImpl(partition_key.get());
208  pushed_down_partition_key_offset_.emplace(offset, new_offset);
209  ++offset;
210  }
211  has_partition_expr_ = !window_expr->getPartitionKeys().empty();
212  offset = 0;
213  for (const auto& order_key : window_expr->getOrderKeys()) {
214  auto new_offset = pushDownExpressionImpl(order_key.get());
215  pushed_down_order_key_offset_.emplace(offset, new_offset);
216  ++offset;
217  }
218 
219  // step 2. rebind projected targets of the window_func_project_node with the new
220  // project node
221  std::vector<std::unique_ptr<const RexScalar>> window_operands;
222  auto deconst_window_expr = const_cast<RexWindowFunctionOperator*>(window_expr);
223  for (size_t idx = 0; idx < window_expr->size(); ++idx) {
224  auto it = pushed_down_window_operands_offset_.find(idx);
225  if (it != pushed_down_window_operands_offset_.end()) {
226  auto new_input = std::make_unique<const RexInput>(new_project_.get(), it->second);
227  CHECK(new_input);
228  window_operands.emplace_back(std::move(new_input));
229  } else {
230  auto copied_expr = deep_copier_.visit(window_expr->getOperand(idx));
231  window_operands.emplace_back(std::move(copied_expr));
232  }
233  }
234  deconst_window_expr->replaceOperands(std::move(window_operands));
235 
236  for (size_t idx = 0; idx < window_expr->getPartitionKeys().size(); ++idx) {
237  auto new_offset = getOffsetForPushedDownExpr(WindowExprType::PARTITION_KEY, idx);
238  CHECK(new_offset);
239  auto new_input = std::make_unique<const RexInput>(new_project_.get(), *new_offset);
240  CHECK(new_input);
241  deconst_window_expr->replacePartitionKey(idx, std::move(new_input));
242  }
243 
244  for (size_t idx = 0; idx < window_expr->getOrderKeys().size(); ++idx) {
245  auto new_offset = getOffsetForPushedDownExpr(WindowExprType::ORDER_KEY, idx);
246  CHECK(new_offset);
247  auto new_input = std::make_unique<const RexInput>(new_project_.get(), *new_offset);
248  CHECK(new_input);
249  deconst_window_expr->replaceOrderKey(idx, std::move(new_input));
250  }
251  }
252 
253  std::unique_ptr<const RexScalar> visitInput(const RexInput* rex_input) const override {
254  auto new_offset = pushDownExpressionImpl(rex_input);
255  CHECK_LT(new_offset, scalar_exprs_for_new_project_.size());
256  auto hash = rex_input->toHash();
257  auto it = expr_offset_cache_.find(hash);
258  CHECK(it != expr_offset_cache_.end());
259  CHECK_EQ(new_offset, it->second);
260  auto new_input = std::make_unique<const RexInput>(new_project_.get(), new_offset);
261  CHECK(new_input);
262  return new_input;
263  }
264 
265  std::unique_ptr<const RexScalar> visitLiteral(
266  const RexLiteral* rex_literal) const override {
267  return deep_copier_.visit(rex_literal);
268  }
269 
270  std::unique_ptr<const RexScalar> visitRef(const RexRef* rex_ref) const override {
271  return deep_copier_.visit(rex_ref);
272  }
273 
274  std::unique_ptr<const RexScalar> visitSubQuery(
275  const RexSubQuery* rex_subquery) const override {
276  return deep_copier_.visit(rex_subquery);
277  }
278 
279  std::unique_ptr<const RexScalar> visitCase(const RexCase* rex_case) const override {
280  std::vector<
281  std::pair<std::unique_ptr<const RexScalar>, std::unique_ptr<const RexScalar>>>
282  new_expr_pair_list;
283  std::unique_ptr<const RexScalar> new_else_expr;
284  for (size_t i = 0; i < rex_case->branchCount(); ++i) {
285  const auto when = rex_case->getWhen(i);
286  auto new_when = PushDownGenericExpressionInWindowFunction::visit(when);
287  const auto then = rex_case->getThen(i);
288  auto new_then = PushDownGenericExpressionInWindowFunction::visit(then);
289  new_expr_pair_list.emplace_back(std::move(new_when), std::move(new_then));
290  }
291  if (rex_case->getElse()) {
292  new_else_expr = deep_copier_.visit(rex_case->getElse());
293  }
294  auto new_case = std::make_unique<const RexCase>(new_expr_pair_list, new_else_expr);
295  return new_case;
296  }
297 
298  std::unique_ptr<const RexScalar> visitOperator(
299  const RexOperator* rex_operator) const override {
300  const auto rex_window_func_operator =
301  dynamic_cast<const RexWindowFunctionOperator*>(rex_operator);
302  if (rex_window_func_operator) {
303  pushDownExpressionInWindowFunction(rex_window_func_operator);
304  return deep_copier_.visit(rex_operator);
305  } else {
306  std::unique_ptr<const RexOperator> new_operator{nullptr};
307  std::vector<std::unique_ptr<const RexScalar>> new_operands;
308  for (size_t i = 0; i < rex_operator->size(); ++i) {
309  const auto operand = rex_operator->getOperand(i);
310  auto new_operand = PushDownGenericExpressionInWindowFunction::visit(operand);
311  new_operands.emplace_back(std::move(new_operand));
312  }
313  if (auto function_op = dynamic_cast<const RexFunctionOperator*>(rex_operator)) {
314  new_operator = std::make_unique<const RexFunctionOperator>(
315  function_op->getName(), new_operands, rex_operator->getType());
316  } else {
317  new_operator = std::make_unique<const RexOperator>(
318  rex_operator->getOperator(), new_operands, rex_operator->getType());
319  }
320  CHECK(new_operator);
321  return new_operator;
322  }
323  }
324 
325  bool hasCaseExprAsWindowOperand() { return found_case_expr_window_operand_; }
326 
327  bool hasPartitionExpression() { return has_partition_expr_; }
328 
329  private:
330  std::unique_ptr<const RexScalar> defaultResult() const override { return nullptr; }
331 
332  std::shared_ptr<RelProject> new_project_;
333  std::vector<std::unique_ptr<const RexScalar>>& scalar_exprs_for_new_project_;
334  std::vector<std::string>& fields_for_new_project_;
335  std::unordered_map<size_t, size_t>& expr_offset_cache_;
337  mutable bool has_partition_expr_;
338  mutable std::unordered_map<size_t, size_t> pushed_down_window_operands_offset_;
339  mutable std::unordered_map<size_t, size_t> pushed_down_partition_key_offset_;
340  mutable std::unordered_map<size_t, size_t> pushed_down_order_key_offset_;
342 };
343 
344 } // namespace
345 
347  std::shared_ptr<const RelAlgNode> old_input,
348  std::shared_ptr<const RelAlgNode> input,
349  std::optional<std::unordered_map<unsigned, unsigned>> old_to_new_index_map) {
350  RelAlgNode::replaceInput(old_input, input);
351  std::unique_ptr<RexRebindInputsVisitor> rebind_inputs;
352  if (old_to_new_index_map) {
353  rebind_inputs = std::make_unique<RexRebindReindexInputsVisitor>(
354  old_input.get(), input.get(), *old_to_new_index_map);
355  } else {
356  rebind_inputs =
357  std::make_unique<RexRebindInputsVisitor>(old_input.get(), input.get());
358  }
359  CHECK(rebind_inputs);
360  for (const auto& scalar_expr : scalar_exprs_) {
361  rebind_inputs->visit(scalar_expr.get());
362  }
363 }
364 
365 void RelProject::appendInput(std::string new_field_name,
366  std::unique_ptr<const RexScalar> new_input) {
367  fields_.emplace_back(std::move(new_field_name));
368  scalar_exprs_.emplace_back(std::move(new_input));
369 }
370 
372  const auto scan_node = dynamic_cast<const RelScan*>(ra_node);
373  if (scan_node) {
374  // Scan node has no inputs, output contains all columns in the table.
375  CHECK_EQ(size_t(0), scan_node->inputCount());
376  return n_outputs(scan_node, scan_node->size());
377  }
378  const auto project_node = dynamic_cast<const RelProject*>(ra_node);
379  if (project_node) {
380  // Project output count doesn't depend on the input
381  CHECK_EQ(size_t(1), project_node->inputCount());
382  return n_outputs(project_node, project_node->size());
383  }
384  const auto filter_node = dynamic_cast<const RelFilter*>(ra_node);
385  if (filter_node) {
386  // Filter preserves shape
387  CHECK_EQ(size_t(1), filter_node->inputCount());
388  const auto prev_out = get_node_output(filter_node->getInput(0));
389  return n_outputs(filter_node, prev_out.size());
390  }
391  const auto aggregate_node = dynamic_cast<const RelAggregate*>(ra_node);
392  if (aggregate_node) {
393  // Aggregate output count doesn't depend on the input
394  CHECK_EQ(size_t(1), aggregate_node->inputCount());
395  return n_outputs(aggregate_node, aggregate_node->size());
396  }
397  const auto compound_node = dynamic_cast<const RelCompound*>(ra_node);
398  if (compound_node) {
399  // Compound output count doesn't depend on the input
400  CHECK_EQ(size_t(1), compound_node->inputCount());
401  return n_outputs(compound_node, compound_node->size());
402  }
403  const auto join_node = dynamic_cast<const RelJoin*>(ra_node);
404  if (join_node) {
405  // Join concatenates the outputs from the inputs and the output
406  // directly references the nodes in the input.
407  CHECK_EQ(size_t(2), join_node->inputCount());
408  auto lhs_out =
409  n_outputs(join_node->getInput(0), get_node_output(join_node->getInput(0)).size());
410  const auto rhs_out =
411  n_outputs(join_node->getInput(1), get_node_output(join_node->getInput(1)).size());
412  lhs_out.insert(lhs_out.end(), rhs_out.begin(), rhs_out.end());
413  return lhs_out;
414  }
415  const auto table_func_node = dynamic_cast<const RelTableFunction*>(ra_node);
416  if (table_func_node) {
417  // Table Function output count doesn't depend on the input
418  return n_outputs(table_func_node, table_func_node->size());
419  }
420  const auto sort_node = dynamic_cast<const RelSort*>(ra_node);
421  if (sort_node) {
422  // Sort preserves shape
423  CHECK_EQ(size_t(1), sort_node->inputCount());
424  const auto prev_out = get_node_output(sort_node->getInput(0));
425  return n_outputs(sort_node, prev_out.size());
426  }
427  const auto logical_values_node = dynamic_cast<const RelLogicalValues*>(ra_node);
428  if (logical_values_node) {
429  CHECK_EQ(size_t(0), logical_values_node->inputCount());
430  return n_outputs(logical_values_node, logical_values_node->size());
431  }
432  const auto logical_union_node = dynamic_cast<const RelLogicalUnion*>(ra_node);
433  if (logical_union_node) {
434  return n_outputs(logical_union_node, logical_union_node->size());
435  }
436  LOG(FATAL) << "Unhandled ra_node type: " << ::toString(ra_node);
437  return {};
438 }
439 
441  if (!isSimple()) {
442  return false;
443  }
444  CHECK_EQ(size_t(1), inputCount());
445  const auto source = getInput(0);
446  if (dynamic_cast<const RelJoin*>(source)) {
447  return false;
448  }
449  const auto source_shape = get_node_output(source);
450  if (source_shape.size() != scalar_exprs_.size()) {
451  return false;
452  }
453  for (size_t i = 0; i < scalar_exprs_.size(); ++i) {
454  const auto& scalar_expr = scalar_exprs_[i];
455  const auto input = dynamic_cast<const RexInput*>(scalar_expr.get());
456  CHECK(input);
457  CHECK_EQ(source, input->getSourceNode());
458  // We should add the additional check that input->getIndex() !=
459  // source_shape[i].getIndex(), but Calcite doesn't generate the right
460  // Sort-Project-Sort sequence when joins are involved.
461  if (input->getSourceNode() != source_shape[i].getSourceNode()) {
462  return false;
463  }
464  }
465  return true;
466 }
467 
468 namespace {
469 
470 bool isRenamedInput(const RelAlgNode* node,
471  const size_t index,
472  const std::string& new_name) {
473  CHECK_LT(index, node->size());
474  if (auto join = dynamic_cast<const RelJoin*>(node)) {
475  CHECK_EQ(size_t(2), join->inputCount());
476  const auto lhs_size = join->getInput(0)->size();
477  if (index < lhs_size) {
478  return isRenamedInput(join->getInput(0), index, new_name);
479  }
480  CHECK_GE(index, lhs_size);
481  return isRenamedInput(join->getInput(1), index - lhs_size, new_name);
482  }
483 
484  if (auto scan = dynamic_cast<const RelScan*>(node)) {
485  return new_name != scan->getFieldName(index);
486  }
487 
488  if (auto aggregate = dynamic_cast<const RelAggregate*>(node)) {
489  return new_name != aggregate->getFieldName(index);
490  }
491 
492  if (auto project = dynamic_cast<const RelProject*>(node)) {
493  return new_name != project->getFieldName(index);
494  }
495 
496  if (auto table_func = dynamic_cast<const RelTableFunction*>(node)) {
497  return new_name != table_func->getFieldName(index);
498  }
499 
500  if (auto logical_values = dynamic_cast<const RelLogicalValues*>(node)) {
501  const auto& tuple_type = logical_values->getTupleType();
502  CHECK_LT(index, tuple_type.size());
503  return new_name != tuple_type[index].get_resname();
504  }
505 
506  CHECK(dynamic_cast<const RelSort*>(node) || dynamic_cast<const RelFilter*>(node) ||
507  dynamic_cast<const RelLogicalUnion*>(node));
508  return isRenamedInput(node->getInput(0), index, new_name);
509 }
510 
511 } // namespace
512 
514  if (!isSimple()) {
515  return false;
516  }
517  CHECK_EQ(scalar_exprs_.size(), fields_.size());
518  for (size_t i = 0; i < fields_.size(); ++i) {
519  auto rex_in = dynamic_cast<const RexInput*>(scalar_exprs_[i].get());
520  CHECK(rex_in);
521  if (isRenamedInput(rex_in->getSourceNode(), rex_in->getIndex(), fields_[i])) {
522  return true;
523  }
524  }
525  return false;
526 }
527 
528 void RelJoin::replaceInput(std::shared_ptr<const RelAlgNode> old_input,
529  std::shared_ptr<const RelAlgNode> input) {
530  RelAlgNode::replaceInput(old_input, input);
531  RexRebindInputsVisitor rebind_inputs(old_input.get(), input.get());
532  if (condition_) {
533  rebind_inputs.visit(condition_.get());
534  }
535 }
536 
537 void RelFilter::replaceInput(std::shared_ptr<const RelAlgNode> old_input,
538  std::shared_ptr<const RelAlgNode> input) {
539  RelAlgNode::replaceInput(old_input, input);
540  RexRebindInputsVisitor rebind_inputs(old_input.get(), input.get());
541  rebind_inputs.visit(filter_.get());
542 }
543 
544 void RelCompound::replaceInput(std::shared_ptr<const RelAlgNode> old_input,
545  std::shared_ptr<const RelAlgNode> input) {
546  RelAlgNode::replaceInput(old_input, input);
547  RexRebindInputsVisitor rebind_inputs(old_input.get(), input.get());
548  for (const auto& scalar_source : scalar_sources_) {
549  rebind_inputs.visit(scalar_source.get());
550  }
551  if (filter_expr_) {
552  rebind_inputs.visit(filter_expr_.get());
553  }
554 }
555 
557  : RelAlgNode(rhs)
559  , fields_(rhs.fields_)
560  , hint_applied_(false)
561  , hints_(std::make_unique<Hints>())
562  , has_pushed_down_window_expr_(rhs.has_pushed_down_window_expr_) {
563  RexDeepCopyVisitor copier;
564  for (auto const& expr : rhs.scalar_exprs_) {
565  scalar_exprs_.push_back(copier.visit(expr.get()));
566  }
567  if (rhs.hint_applied_) {
568  for (auto const& kv : *rhs.hints_) {
569  addHint(kv.second);
570  }
571  }
572 }
573 
575  : RelAlgNode(rhs)
576  , tuple_type_(rhs.tuple_type_)
577  , values_(RexDeepCopyVisitor::copy(rhs.values_)) {}
578 
580  RexDeepCopyVisitor copier;
581  filter_ = copier.visit(rhs.filter_.get());
582 }
583 
585  : RelAlgNode(rhs)
586  , groupby_count_(rhs.groupby_count_)
587  , fields_(rhs.fields_)
588  , hint_applied_(false)
589  , hints_(std::make_unique<Hints>()) {
590  agg_exprs_.reserve(rhs.agg_exprs_.size());
591  for (auto const& agg : rhs.agg_exprs_) {
592  agg_exprs_.push_back(agg->deepCopy());
593  }
594  if (rhs.hint_applied_) {
595  for (auto const& kv : *rhs.hints_) {
596  addHint(kv.second);
597  }
598  }
599 }
600 
602  : RelAlgNode(rhs)
603  , join_type_(rhs.join_type_)
604  , hint_applied_(false)
605  , hints_(std::make_unique<Hints>()) {
606  RexDeepCopyVisitor copier;
607  condition_ = copier.visit(rhs.condition_.get());
608  if (rhs.hint_applied_) {
609  for (auto const& kv : *rhs.hints_) {
610  addHint(kv.second);
611  }
612  }
613 }
614 
615 namespace {
616 
617 std::vector<std::unique_ptr<const RexAgg>> copyAggExprs(
618  std::vector<std::unique_ptr<const RexAgg>> const& agg_exprs) {
619  std::vector<std::unique_ptr<const RexAgg>> agg_exprs_copy;
620  agg_exprs_copy.reserve(agg_exprs.size());
621  for (auto const& agg_expr : agg_exprs) {
622  agg_exprs_copy.push_back(agg_expr->deepCopy());
623  }
624  return agg_exprs_copy;
625 }
626 
627 std::vector<std::unique_ptr<const RexScalar>> copyRexScalars(
628  std::vector<std::unique_ptr<const RexScalar>> const& scalar_sources) {
629  std::vector<std::unique_ptr<const RexScalar>> scalar_sources_copy;
630  scalar_sources_copy.reserve(scalar_sources.size());
631  RexDeepCopyVisitor copier;
632  for (auto const& scalar_source : scalar_sources) {
633  scalar_sources_copy.push_back(copier.visit(scalar_source.get()));
634  }
635  return scalar_sources_copy;
636 }
637 
638 std::vector<const Rex*> remapTargetPointers(
639  std::vector<std::unique_ptr<const RexAgg>> const& agg_exprs_new,
640  std::vector<std::unique_ptr<const RexScalar>> const& scalar_sources_new,
641  std::vector<std::unique_ptr<const RexAgg>> const& agg_exprs_old,
642  std::vector<std::unique_ptr<const RexScalar>> const& scalar_sources_old,
643  std::vector<const Rex*> const& target_exprs_old) {
644  std::vector<const Rex*> target_exprs(target_exprs_old);
645  std::unordered_map<const Rex*, const Rex*> old_to_new_target(target_exprs.size());
646  for (size_t i = 0; i < agg_exprs_new.size(); ++i) {
647  old_to_new_target.emplace(agg_exprs_old[i].get(), agg_exprs_new[i].get());
648  }
649  for (size_t i = 0; i < scalar_sources_new.size(); ++i) {
650  old_to_new_target.emplace(scalar_sources_old[i].get(), scalar_sources_new[i].get());
651  }
652  for (auto& target : target_exprs) {
653  auto target_it = old_to_new_target.find(target);
654  CHECK(target_it != old_to_new_target.end());
655  target = target_it->second;
656  }
657  return target_exprs;
658 }
659 
660 } // namespace
661 
663  : RelAlgNode(rhs)
665  , groupby_count_(rhs.groupby_count_)
666  , agg_exprs_(copyAggExprs(rhs.agg_exprs_))
667  , fields_(rhs.fields_)
668  , is_agg_(rhs.is_agg_)
669  , scalar_sources_(copyRexScalars(rhs.scalar_sources_))
670  , target_exprs_(remapTargetPointers(agg_exprs_,
671  scalar_sources_,
672  rhs.agg_exprs_,
673  rhs.scalar_sources_,
674  rhs.target_exprs_))
675  , hint_applied_(false)
676  , hints_(std::make_unique<Hints>()) {
677  RexDeepCopyVisitor copier;
678  filter_expr_ = rhs.filter_expr_ ? copier.visit(rhs.filter_expr_.get()) : nullptr;
679  if (rhs.hint_applied_) {
680  for (auto const& kv : *rhs.hints_) {
681  addHint(kv.second);
682  }
683  }
684 }
685 
686 void RelTableFunction::replaceInput(std::shared_ptr<const RelAlgNode> old_input,
687  std::shared_ptr<const RelAlgNode> input) {
688  RelAlgNode::replaceInput(old_input, input);
689  RexRebindInputsVisitor rebind_inputs(old_input.get(), input.get());
690  for (const auto& target_expr : target_exprs_) {
691  rebind_inputs.visit(target_expr.get());
692  }
693  for (const auto& func_input : table_func_inputs_) {
694  rebind_inputs.visit(func_input.get());
695  }
696 }
697 
699  int32_t literal_args = 0;
700  for (const auto& arg : table_func_inputs_) {
701  const auto rex_literal = dynamic_cast<const RexLiteral*>(arg.get());
702  if (rex_literal) {
703  literal_args += 1;
704  }
705  }
706  return literal_args;
707 }
708 
709 namespace {
710 
712  std::vector<const Rex*>& column_inputs,
713  const std::vector<std::unique_ptr<const RexScalar>>& old_table_func_inputs,
714  const std::vector<std::unique_ptr<const RexScalar>>& new_table_func_inputs) {
715  CHECK_EQ(old_table_func_inputs.size(), new_table_func_inputs.size());
716  std::unordered_map<const Rex*, const Rex*> old_to_new_input;
717  for (size_t i = 0; i < old_table_func_inputs.size(); ++i) {
718  old_to_new_input.emplace(old_table_func_inputs[i].get(),
719  new_table_func_inputs[i].get());
720  }
721  for (auto& target : column_inputs) {
722  auto target_it = old_to_new_input.find(target);
723  CHECK(target_it != old_to_new_input.end());
724  target = target_it->second;
725  }
726 }
727 
728 } // namespace
729 
731  std::vector<std::unique_ptr<const RexScalar>>&& exprs) {
732  // this should only be called in the event of disambiguating inputs, which means the
733  // RexScalar types in the exprs argument should be the exact same as those previously.
734  // So we can then assume all col_inputs_ would be in the same place. So just re-adjust
735  // the pointers.
737  table_func_inputs_ = std::move(exprs);
738 }
739 
741  : RelAlgNode(rhs)
742  , function_name_(rhs.function_name_)
743  , fields_(rhs.fields_)
744  , col_inputs_(rhs.col_inputs_)
745  , table_func_inputs_(copyRexScalars(rhs.table_func_inputs_))
746  , target_exprs_(copyRexScalars(rhs.target_exprs_)) {
748 }
749 
750 namespace std {
751 template <>
752 struct hash<std::pair<const RelAlgNode*, int>> {
753  size_t operator()(const std::pair<const RelAlgNode*, int>& input_col) const {
754  auto ptr_val = reinterpret_cast<const int64_t*>(&input_col.first);
755  auto h = static_cast<size_t>(*ptr_val);
756  boost::hash_combine(h, input_col.second);
757  return h;
758  }
759 };
760 } // namespace std
761 
762 namespace {
763 
764 std::set<std::pair<const RelAlgNode*, int>> get_equiv_cols(const RelAlgNode* node,
765  const size_t which_col) {
766  std::set<std::pair<const RelAlgNode*, int>> work_set;
767  auto walker = node;
768  auto curr_col = which_col;
769  while (true) {
770  work_set.insert(std::make_pair(walker, curr_col));
771  if (dynamic_cast<const RelScan*>(walker) || dynamic_cast<const RelJoin*>(walker)) {
772  break;
773  }
774  CHECK_EQ(size_t(1), walker->inputCount());
775  auto only_source = walker->getInput(0);
776  if (auto project = dynamic_cast<const RelProject*>(walker)) {
777  if (auto input = dynamic_cast<const RexInput*>(project->getProjectAt(curr_col))) {
778  const auto join_source = dynamic_cast<const RelJoin*>(only_source);
779  if (join_source) {
780  CHECK_EQ(size_t(2), join_source->inputCount());
781  auto lhs = join_source->getInput(0);
782  CHECK((input->getIndex() < lhs->size() && lhs == input->getSourceNode()) ||
783  join_source->getInput(1) == input->getSourceNode());
784  } else {
785  CHECK_EQ(input->getSourceNode(), only_source);
786  }
787  curr_col = input->getIndex();
788  } else {
789  break;
790  }
791  } else if (auto aggregate = dynamic_cast<const RelAggregate*>(walker)) {
792  if (curr_col >= aggregate->getGroupByCount()) {
793  break;
794  }
795  }
796  walker = only_source;
797  }
798  return work_set;
799 }
800 
801 } // namespace
802 
803 bool RelSort::hasEquivCollationOf(const RelSort& that) const {
804  if (collation_.size() != that.collation_.size()) {
805  return false;
806  }
807 
808  for (size_t i = 0, e = collation_.size(); i < e; ++i) {
809  auto this_sort_key = collation_[i];
810  auto that_sort_key = that.collation_[i];
811  if (this_sort_key.getSortDir() != that_sort_key.getSortDir()) {
812  return false;
813  }
814  if (this_sort_key.getNullsPosition() != that_sort_key.getNullsPosition()) {
815  return false;
816  }
817  auto this_equiv_keys = get_equiv_cols(this, this_sort_key.getField());
818  auto that_equiv_keys = get_equiv_cols(&that, that_sort_key.getField());
819  std::vector<std::pair<const RelAlgNode*, int>> intersect;
820  std::set_intersection(this_equiv_keys.begin(),
821  this_equiv_keys.end(),
822  that_equiv_keys.begin(),
823  that_equiv_keys.end(),
824  std::back_inserter(intersect));
825  if (intersect.empty()) {
826  return false;
827  }
828  }
829  return true;
830 }
831 
832 // class RelLogicalUnion methods
833 
835  : RelAlgNode(std::move(inputs)), is_all_(is_all) {
836  if (!g_enable_union) {
837  throw QueryNotSupported(
838  "The DEPRECATED enable-union option is set to off. Please remove this option as "
839  "it may be disabled in the future.");
840  }
841  CHECK_LE(2u, inputs_.size());
842  if (!is_all_) {
843  throw QueryNotSupported("UNION without ALL is not supported yet.");
844  }
846  throw QueryNotSupported(
847  "Unsupported CAST in UNION: Currently, we only allow casting text type to "
848  "dictionary-encoded strings.");
849  }
850 }
851 
852 size_t RelLogicalUnion::size() const {
853  return inputs_.front()->size();
854 }
855 
857  return cat(::typeName(this), "(is_all(", is_all_, "))");
858 }
859 
860 std::string RelLogicalUnion::getFieldName(const size_t i) const {
861  if (auto const* compound = dynamic_cast<RelCompound const*>(inputs_[0].get())) {
862  return compound->getFieldName(i);
863  } else if (auto const* project = dynamic_cast<RelProject const*>(inputs_[0].get())) {
864  return project->getFieldName(i);
865  } else if (auto const* logical_union =
866  dynamic_cast<RelLogicalUnion const*>(inputs_[0].get())) {
867  return logical_union->getFieldName(i);
868  } else if (auto const* aggregate =
869  dynamic_cast<RelAggregate const*>(inputs_[0].get())) {
870  return aggregate->getFieldName(i);
871  } else if (auto const* scan = dynamic_cast<RelScan const*>(inputs_[0].get())) {
872  return scan->getFieldName(i);
873  } else if (auto const* table_func =
874  dynamic_cast<RelTableFunction const*>(inputs_[0].get())) {
875  return table_func->getFieldName(i);
876  }
877  UNREACHABLE() << "Unhandled input type: " << ::toString(inputs_.front());
878  return {};
879 }
880 
881 namespace {
882 std::vector<bool> get_notnulls(std::vector<TargetMetaInfo> const& tmis0) {
883  std::vector<bool> notnulls(tmis0.size());
884  for (size_t j = 0; j < tmis0.size(); ++j) {
885  notnulls[j] = tmis0[j].get_type_info().get_notnull();
886  }
887  return notnulls;
888 }
889 
891  ti0.set_notnull({}); // Actual value doesn't matter
892  ti1.set_notnull({}); // as long as they are the same.
893  return ti0 == ti1;
894 }
895 
896 void set_notnulls(std::vector<TargetMetaInfo>* tmis0, std::vector<bool> const& notnulls) {
897  for (size_t j = 0; j < tmis0->size(); ++j) {
898  SQLTypeInfo ti = (*tmis0)[j].get_type_info();
899  SQLTypeInfo physical_ti = (*tmis0)[j].get_physical_type_info();
900  ti.set_notnull(notnulls[j]);
901  physical_ti.set_notnull(notnulls[j]);
902  (*tmis0)[j] = TargetMetaInfo((*tmis0)[j].get_resname(), ti, physical_ti);
903  }
904 }
905 } // namespace
906 
907 // The returned std::vector<TargetMetaInfo> is identical to
908 // inputs_[0]->getOutputMetainfo() except for its SQLTypeInfo::notnull values, which is
909 // the logical AND over each input. In other words, the returned columns are notnull iff
910 // all corresponding input columns are notnull.
911 std::vector<TargetMetaInfo> RelLogicalUnion::getCompatibleMetainfoTypes() const {
912  std::vector<TargetMetaInfo> tmis0 = inputs_[0]->getOutputMetainfo();
913  std::vector<bool> notnulls = get_notnulls(tmis0);
914  for (size_t i = 1; i < inputs_.size(); ++i) {
915  std::vector<TargetMetaInfo> const& tmisi = inputs_[i]->getOutputMetainfo();
916  if (tmis0.size() != tmisi.size()) {
917  LOG(INFO) << "tmis0.size()=" << tmis0.size() << " != " << tmisi.size()
918  << "=tmisi.size() for i=" << i;
919  throw std::runtime_error("Subqueries of a UNION must have matching data types.");
920  }
921  for (size_t j = 0; j < tmis0.size(); ++j) {
922  SQLTypeInfo const& ti0 = tmis0[j].get_type_info();
923  SQLTypeInfo const& ti1 = tmisi[j].get_type_info();
924  // Allow types of different nullability to be UNIONed.
925  if (!same_ignoring_notnull(ti0, ti1)) {
926  LOG(INFO) << "Types do not match for UNION:\n tmis0[" << j
927  << "].get_type_info().to_string() = " << ti0.to_string() << "\n tmis"
928  << i << '[' << j
929  << "].get_type_info().to_string() = " << ti1.to_string();
930  // The only permitted difference is when both columns are dictionary-encoded.
931  if (!(ti0.is_dict_encoded_string() && ti1.is_dict_encoded_string())) {
932  throw std::runtime_error(
933  "Subqueries of a UNION must have the exact same data types.");
934  }
935  }
936  notnulls[j] = notnulls[j] && ti1.get_notnull();
937  }
938  }
939  set_notnulls(&tmis0, notnulls); // Set each SQLTypeInfo::notnull to compatible values.
940  return tmis0;
941 }
942 
943 // Rest of code requires a raw pointer, but RexInput object needs to live somewhere.
945  size_t input_idx) const {
946  if (auto const* rex_input_ptr = dynamic_cast<RexInput const*>(rex_scalar)) {
947  RexInput rex_input(*rex_input_ptr);
948  rex_input.setSourceNode(getInput(input_idx));
949  scalar_exprs_.emplace_back(std::make_shared<RexInput const>(std::move(rex_input)));
950  return scalar_exprs_.back().get();
951  }
952  return rex_scalar;
953 }
954 
956  for (auto const& input : inputs_) {
957  if (auto* proj_node = dynamic_cast<RelProject const*>(input.get())) {
958  for (size_t i = 0; i < proj_node->size(); i++) {
959  if (auto* oper = dynamic_cast<RexOperator const*>(proj_node->getProjectAt(i))) {
960  if (oper->getOperator() == SQLOps::kCAST && oper->getType().is_string() &&
961  !oper->getType().is_dict_encoded_string()) {
962  return false;
963  }
964  }
965  }
966  }
967  }
968  return true;
969 }
970 
971 namespace {
972 
973 unsigned node_id(const rapidjson::Value& ra_node) noexcept {
974  const auto& id = field(ra_node, "id");
975  return std::stoi(json_str(id));
976 }
977 
978 std::string json_node_to_string(const rapidjson::Value& node) noexcept {
979  rapidjson::StringBuffer buffer;
980  rapidjson::Writer<rapidjson::StringBuffer> writer(buffer);
981  node.Accept(writer);
982  return buffer.GetString();
983 }
984 
985 // The parse_* functions below de-serialize expressions as they come from Calcite.
986 // RelAlgDagBuilder will take care of making the representation easy to
987 // navigate for lower layers, for example by replacing RexAbstractInput with RexInput.
988 
989 std::unique_ptr<RexAbstractInput> parse_abstract_input(
990  const rapidjson::Value& expr) noexcept {
991  const auto& input = field(expr, "input");
992  return std::unique_ptr<RexAbstractInput>(new RexAbstractInput(json_i64(input)));
993 }
994 
995 std::unique_ptr<RexLiteral> parse_literal(const rapidjson::Value& expr) {
996  CHECK(expr.IsObject());
997  const auto& literal = field(expr, "literal");
998  const auto type = to_sql_type(json_str(field(expr, "type")));
999  const auto target_type = to_sql_type(json_str(field(expr, "target_type")));
1000  const auto scale = json_i64(field(expr, "scale"));
1001  const auto precision = json_i64(field(expr, "precision"));
1002  const auto type_scale = json_i64(field(expr, "type_scale"));
1003  const auto type_precision = json_i64(field(expr, "type_precision"));
1004  if (literal.IsNull()) {
1005  return std::unique_ptr<RexLiteral>(new RexLiteral(target_type));
1006  }
1007  switch (type) {
1008  case kINT:
1009  case kBIGINT:
1010  case kDECIMAL:
1011  case kINTERVAL_DAY_TIME:
1012  case kINTERVAL_YEAR_MONTH:
1013  case kTIME:
1014  case kTIMESTAMP:
1015  case kDATE:
1016  return std::unique_ptr<RexLiteral>(new RexLiteral(json_i64(literal),
1017  type,
1018  target_type,
1019  scale,
1020  precision,
1021  type_scale,
1022  type_precision));
1023  case kDOUBLE: {
1024  if (literal.IsDouble()) {
1025  return std::unique_ptr<RexLiteral>(new RexLiteral(json_double(literal),
1026  type,
1027  target_type,
1028  scale,
1029  precision,
1030  type_scale,
1031  type_precision));
1032  } else if (literal.IsInt64()) {
1033  return std::make_unique<RexLiteral>(static_cast<double>(literal.GetInt64()),
1034  type,
1035  target_type,
1036  scale,
1037  precision,
1038  type_scale,
1039  type_precision);
1040 
1041  } else if (literal.IsUint64()) {
1042  return std::make_unique<RexLiteral>(static_cast<double>(literal.GetUint64()),
1043  type,
1044  target_type,
1045  scale,
1046  precision,
1047  type_scale,
1048  type_precision);
1049  }
1050  UNREACHABLE() << "Unhandled type: " << literal.GetType();
1051  }
1052  case kTEXT:
1053  return std::unique_ptr<RexLiteral>(new RexLiteral(json_str(literal),
1054  type,
1055  target_type,
1056  scale,
1057  precision,
1058  type_scale,
1059  type_precision));
1060  case kBOOLEAN:
1061  return std::unique_ptr<RexLiteral>(new RexLiteral(json_bool(literal),
1062  type,
1063  target_type,
1064  scale,
1065  precision,
1066  type_scale,
1067  type_precision));
1068  case kNULLT:
1069  return std::unique_ptr<RexLiteral>(new RexLiteral(target_type));
1070  default:
1071  CHECK(false);
1072  }
1073  CHECK(false);
1074  return nullptr;
1075 }
1076 
1077 std::unique_ptr<const RexScalar> parse_scalar_expr(const rapidjson::Value& expr,
1078  RelAlgDag& root_dag);
1079 
1080 SQLTypeInfo parse_type(const rapidjson::Value& type_obj) {
1081  if (type_obj.IsArray()) {
1082  throw QueryNotSupported("Composite types are not currently supported.");
1083  }
1084  CHECK(type_obj.IsObject() && type_obj.MemberCount() >= 2)
1085  << json_node_to_string(type_obj);
1086  const auto type = to_sql_type(json_str(field(type_obj, "type")));
1087  const auto nullable = json_bool(field(type_obj, "nullable"));
1088  const auto precision_it = type_obj.FindMember("precision");
1089  const int precision =
1090  precision_it != type_obj.MemberEnd() ? json_i64(precision_it->value) : 0;
1091  const auto scale_it = type_obj.FindMember("scale");
1092  const int scale = scale_it != type_obj.MemberEnd() ? json_i64(scale_it->value) : 0;
1093  SQLTypeInfo ti(type, !nullable);
1094  ti.set_precision(precision);
1095  ti.set_scale(scale);
1096  return ti;
1097 }
1098 
1099 std::vector<std::unique_ptr<const RexScalar>> parse_expr_array(
1100  const rapidjson::Value& arr,
1101  RelAlgDag& root_dag) {
1102  std::vector<std::unique_ptr<const RexScalar>> exprs;
1103  for (auto it = arr.Begin(); it != arr.End(); ++it) {
1104  exprs.emplace_back(parse_scalar_expr(*it, root_dag));
1105  }
1106  return exprs;
1107 }
1108 
1110  if (name == "ROW_NUMBER") {
1112  }
1113  if (name == "RANK") {
1115  }
1116  if (name == "DENSE_RANK") {
1118  }
1119  if (name == "PERCENT_RANK") {
1121  }
1122  if (name == "CUME_DIST") {
1124  }
1125  if (name == "NTILE") {
1127  }
1128  if (name == "LAG") {
1130  }
1131  if (name == "LAG_IN_FRAME") {
1133  }
1134  if (name == "LEAD") {
1136  }
1137  if (name == "LEAD_IN_FRAME") {
1139  }
1140  if (name == "FIRST_VALUE") {
1142  }
1143  if (name == "LAST_VALUE") {
1145  }
1146  if (name == "NTH_VALUE") {
1148  }
1149  if (name == "NTH_VALUE_IN_FRAME") {
1151  }
1152  if (name == "FIRST_VALUE_IN_FRAME") {
1154  }
1155  if (name == "LAST_VALUE_IN_FRAME") {
1157  }
1158  if (name == "AVG") {
1160  }
1161  if (name == "MIN") {
1163  }
1164  if (name == "MAX") {
1166  }
1167  if (name == "SUM") {
1169  }
1170  if (name == "COUNT") {
1172  }
1173  if (name == "COUNT_IF") {
1175  }
1176  if (name == "SUM_IF") {
1178  }
1179  if (name == "$SUM0") {
1181  }
1182  if (name == "FORWARD_FILL") {
1184  }
1185  if (name == "BACKWARD_FILL") {
1187  }
1188  if (name == "CONDITIONAL_CHANGE_EVENT") {
1190  }
1191  throw std::runtime_error("Unsupported window function: " + name);
1192 }
1193 
1194 std::vector<std::unique_ptr<const RexScalar>> parse_window_order_exprs(
1195  const rapidjson::Value& arr,
1196  RelAlgDag& root_dag) {
1197  std::vector<std::unique_ptr<const RexScalar>> exprs;
1198  for (auto it = arr.Begin(); it != arr.End(); ++it) {
1199  exprs.emplace_back(parse_scalar_expr(field(*it, "field"), root_dag));
1200  }
1201  return exprs;
1202 }
1203 
1204 SortDirection parse_sort_direction(const rapidjson::Value& collation) {
1205  return json_str(field(collation, "direction")) == std::string("DESCENDING")
1208 }
1209 
1210 NullSortedPosition parse_nulls_position(const rapidjson::Value& collation) {
1211  return json_str(field(collation, "nulls")) == std::string("FIRST")
1214 }
1215 
1216 std::vector<SortField> parse_window_order_collation(const rapidjson::Value& arr,
1217  RelAlgDag& root_dag) {
1218  std::vector<SortField> collation;
1219  size_t field_idx = 0;
1220  for (auto it = arr.Begin(); it != arr.End(); ++it, ++field_idx) {
1221  const auto sort_dir = parse_sort_direction(*it);
1222  const auto null_pos = parse_nulls_position(*it);
1223  collation.emplace_back(field_idx, sort_dir, null_pos);
1224  }
1225  return collation;
1226 }
1227 
1229  const rapidjson::Value& window_bound_obj,
1230  RelAlgDag& root_dag) {
1231  CHECK(window_bound_obj.IsObject());
1233  window_bound.unbounded = json_bool(field(window_bound_obj, "unbounded"));
1234  window_bound.preceding = json_bool(field(window_bound_obj, "preceding"));
1235  window_bound.following = json_bool(field(window_bound_obj, "following"));
1236  window_bound.is_current_row = json_bool(field(window_bound_obj, "is_current_row"));
1237  const auto& offset_field = field(window_bound_obj, "offset");
1238  if (offset_field.IsObject()) {
1239  window_bound.bound_expr = parse_scalar_expr(offset_field, root_dag);
1240  } else {
1241  CHECK(offset_field.IsNull());
1242  }
1243  window_bound.order_key = json_i64(field(window_bound_obj, "order_key"));
1244  return window_bound;
1245 }
1246 
1247 std::unique_ptr<const RexSubQuery> parse_subquery(const rapidjson::Value& expr,
1248  RelAlgDag& root_dag) {
1249  const auto& operands = field(expr, "operands");
1250  CHECK(operands.IsArray());
1251  CHECK_GE(operands.Size(), unsigned(0));
1252  const auto& subquery_ast = field(expr, "subquery");
1253 
1254  auto subquery_dag = RelAlgDagBuilder::buildDagForSubquery(root_dag, subquery_ast);
1255  const auto subquery_root_node = subquery_dag->getRootNodeShPtr();
1256  auto subquery = std::make_shared<RexSubQuery>(subquery_root_node);
1257  auto query_hint = subquery_dag->getQueryHint(subquery_dag->getRootNodeShPtr().get());
1258  root_dag.registerSubquery(subquery);
1259  const auto subquery_global_hint = subquery_dag->getGlobalHints();
1260  if (subquery_global_hint.isAnyQueryHintDelivered()) {
1261  // we need to propagate global query hint found in this subquery to its parent
1262  const auto new_global_hint = root_dag.getGlobalHints() || subquery_global_hint;
1263  root_dag.setGlobalQueryHints(new_global_hint);
1264  }
1265  const auto subquery_local_hint = subquery_dag->getQueryHint(subquery_root_node.get());
1266  if (subquery_local_hint) {
1267  // register local query hint of this subquery to its parent to correctly
1268  // enables them when executing this subquery
1269  root_dag.registerQueryHint(subquery_root_node.get(), *subquery_local_hint);
1270  }
1271  return subquery->deepCopy();
1272 }
1273 
1274 std::unique_ptr<RexOperator> parse_operator(const rapidjson::Value& expr,
1275  RelAlgDag& root_dag) {
1276  const auto op_name = json_str(field(expr, "op"));
1277  const bool is_quantifier =
1278  op_name == std::string("PG_ANY") || op_name == std::string("PG_ALL");
1279  const auto op = is_quantifier ? kFUNCTION : to_sql_op(op_name);
1280  const auto& operators_json_arr = field(expr, "operands");
1281  CHECK(operators_json_arr.IsArray());
1282  auto operands = parse_expr_array(operators_json_arr, root_dag);
1283  const auto type_it = expr.FindMember("type");
1284  CHECK(type_it != expr.MemberEnd());
1285  auto ti = parse_type(type_it->value);
1286  if (op == kIN && expr.HasMember("subquery")) {
1287  auto subquery = parse_subquery(expr, root_dag);
1288  operands.emplace_back(std::move(subquery));
1289  }
1290  if (expr.FindMember("partition_keys") != expr.MemberEnd()) {
1291  const auto& partition_keys_arr = field(expr, "partition_keys");
1292  auto partition_keys = parse_expr_array(partition_keys_arr, root_dag);
1293  const auto& order_keys_arr = field(expr, "order_keys");
1294  auto order_keys = parse_window_order_exprs(order_keys_arr, root_dag);
1295  const auto collation = parse_window_order_collation(order_keys_arr, root_dag);
1296  const auto kind = parse_window_function_kind(op_name);
1297  const auto lower_bound = parse_window_bound(field(expr, "lower_bound"), root_dag);
1298  const auto upper_bound = parse_window_bound(field(expr, "upper_bound"), root_dag);
1299  bool is_rows = json_bool(field(expr, "is_rows"));
1300  ti.set_notnull(false);
1301  return std::make_unique<RexWindowFunctionOperator>(kind,
1302  operands,
1303  partition_keys,
1304  order_keys,
1305  collation,
1306  lower_bound,
1307  upper_bound,
1308  is_rows,
1309  ti);
1310  }
1311  return std::unique_ptr<RexOperator>(op == kFUNCTION
1312  ? new RexFunctionOperator(op_name, operands, ti)
1313  : new RexOperator(op, operands, ti));
1314 }
1315 
1316 std::unique_ptr<RexCase> parse_case(const rapidjson::Value& expr, RelAlgDag& root_dag) {
1317  const auto& operands = field(expr, "operands");
1318  CHECK(operands.IsArray());
1319  CHECK_GE(operands.Size(), unsigned(2));
1320  std::unique_ptr<const RexScalar> else_expr;
1321  std::vector<
1322  std::pair<std::unique_ptr<const RexScalar>, std::unique_ptr<const RexScalar>>>
1323  expr_pair_list;
1324  for (auto operands_it = operands.Begin(); operands_it != operands.End();) {
1325  auto when_expr = parse_scalar_expr(*operands_it++, root_dag);
1326  if (operands_it == operands.End()) {
1327  else_expr = std::move(when_expr);
1328  break;
1329  }
1330  auto then_expr = parse_scalar_expr(*operands_it++, root_dag);
1331  expr_pair_list.emplace_back(std::move(when_expr), std::move(then_expr));
1332  }
1333  return std::unique_ptr<RexCase>(new RexCase(expr_pair_list, else_expr));
1334 }
1335 
1336 std::vector<std::string> strings_from_json_array(
1337  const rapidjson::Value& json_str_arr) noexcept {
1338  CHECK(json_str_arr.IsArray());
1339  std::vector<std::string> fields;
1340  for (auto json_str_arr_it = json_str_arr.Begin(); json_str_arr_it != json_str_arr.End();
1341  ++json_str_arr_it) {
1342  CHECK(json_str_arr_it->IsString());
1343  fields.emplace_back(json_str_arr_it->GetString());
1344  }
1345  return fields;
1346 }
1347 
1348 std::vector<size_t> indices_from_json_array(
1349  const rapidjson::Value& json_idx_arr) noexcept {
1350  CHECK(json_idx_arr.IsArray());
1351  std::vector<size_t> indices;
1352  for (auto json_idx_arr_it = json_idx_arr.Begin(); json_idx_arr_it != json_idx_arr.End();
1353  ++json_idx_arr_it) {
1354  CHECK(json_idx_arr_it->IsInt());
1355  CHECK_GE(json_idx_arr_it->GetInt(), 0);
1356  indices.emplace_back(json_idx_arr_it->GetInt());
1357  }
1358  return indices;
1359 }
1360 
1361 std::unique_ptr<const RexAgg> parse_aggregate_expr(const rapidjson::Value& expr) {
1362  const auto agg_str = json_str(field(expr, "agg"));
1363  if (agg_str == "APPROX_QUANTILE") {
1364  LOG(INFO) << "APPROX_QUANTILE is deprecated. Please use APPROX_PERCENTILE instead.";
1365  }
1366  const auto agg = to_agg_kind(agg_str);
1367  const auto distinct = json_bool(field(expr, "distinct"));
1368  const auto agg_ti = parse_type(field(expr, "type"));
1369  const auto operands = indices_from_json_array(field(expr, "operands"));
1370  bool const allow_multiple_args =
1371  shared::is_any<kAPPROX_COUNT_DISTINCT, kAPPROX_QUANTILE, kSUM_IF>(agg);
1372  if (operands.size() > 1 && (operands.size() != 2 || !allow_multiple_args)) {
1373  throw QueryNotSupported("Multiple arguments for aggregates aren't supported");
1374  }
1375  return std::unique_ptr<const RexAgg>(new RexAgg(agg, distinct, agg_ti, operands));
1376 }
1377 
1378 std::unique_ptr<const RexScalar> parse_scalar_expr(const rapidjson::Value& expr,
1379  RelAlgDag& root_dag) {
1380  CHECK(expr.IsObject());
1381  if (expr.IsObject() && expr.HasMember("input")) {
1382  return std::unique_ptr<const RexScalar>(parse_abstract_input(expr));
1383  }
1384  if (expr.IsObject() && expr.HasMember("literal")) {
1385  return std::unique_ptr<const RexScalar>(parse_literal(expr));
1386  }
1387  if (expr.IsObject() && expr.HasMember("op")) {
1388  const auto op_str = json_str(field(expr, "op"));
1389  if (op_str == std::string("CASE")) {
1390  return std::unique_ptr<const RexScalar>(parse_case(expr, root_dag));
1391  }
1392  if (op_str == std::string("$SCALAR_QUERY")) {
1393  return std::unique_ptr<const RexScalar>(parse_subquery(expr, root_dag));
1394  }
1395  return std::unique_ptr<const RexScalar>(parse_operator(expr, root_dag));
1396  }
1397  throw QueryNotSupported("Expression node " + json_node_to_string(expr) +
1398  " not supported");
1399 }
1400 
1401 JoinType to_join_type(const std::string& join_type_name) {
1402  if (join_type_name == "inner") {
1403  return JoinType::INNER;
1404  }
1405  if (join_type_name == "left") {
1406  return JoinType::LEFT;
1407  }
1408  if (join_type_name == "semi") {
1409  return JoinType::SEMI;
1410  }
1411  if (join_type_name == "anti") {
1412  return JoinType::ANTI;
1413  }
1414  throw QueryNotSupported("Join type (" + join_type_name + ") not supported");
1415 }
1416 
1417 std::unique_ptr<const RexScalar> disambiguate_rex(const RexScalar*, const RANodeOutput&);
1418 
1419 std::unique_ptr<const RexOperator> disambiguate_operator(
1420  const RexOperator* rex_operator,
1421  const RANodeOutput& ra_output) noexcept {
1422  std::vector<std::unique_ptr<const RexScalar>> disambiguated_operands;
1423  for (size_t i = 0; i < rex_operator->size(); ++i) {
1424  auto operand = rex_operator->getOperand(i);
1425  if (dynamic_cast<const RexSubQuery*>(operand)) {
1426  disambiguated_operands.emplace_back(rex_operator->getOperandAndRelease(i));
1427  } else {
1428  disambiguated_operands.emplace_back(disambiguate_rex(operand, ra_output));
1429  }
1430  }
1431  const auto rex_window_function_operator =
1432  dynamic_cast<const RexWindowFunctionOperator*>(rex_operator);
1433  if (rex_window_function_operator) {
1434  const auto& partition_keys = rex_window_function_operator->getPartitionKeys();
1435  std::vector<std::unique_ptr<const RexScalar>> disambiguated_partition_keys;
1436  for (const auto& partition_key : partition_keys) {
1437  disambiguated_partition_keys.emplace_back(
1438  disambiguate_rex(partition_key.get(), ra_output));
1439  }
1440  std::vector<std::unique_ptr<const RexScalar>> disambiguated_order_keys;
1441  const auto& order_keys = rex_window_function_operator->getOrderKeys();
1442  for (const auto& order_key : order_keys) {
1443  disambiguated_order_keys.emplace_back(disambiguate_rex(order_key.get(), ra_output));
1444  }
1445  return rex_window_function_operator->disambiguatedOperands(
1446  disambiguated_operands,
1447  disambiguated_partition_keys,
1448  disambiguated_order_keys,
1449  rex_window_function_operator->getCollation());
1450  }
1451  return rex_operator->getDisambiguated(disambiguated_operands);
1452 }
1453 
1454 std::unique_ptr<const RexCase> disambiguate_case(const RexCase* rex_case,
1455  const RANodeOutput& ra_output) {
1456  std::vector<
1457  std::pair<std::unique_ptr<const RexScalar>, std::unique_ptr<const RexScalar>>>
1458  disambiguated_expr_pair_list;
1459  for (size_t i = 0; i < rex_case->branchCount(); ++i) {
1460  auto disambiguated_when = disambiguate_rex(rex_case->getWhen(i), ra_output);
1461  auto disambiguated_then = disambiguate_rex(rex_case->getThen(i), ra_output);
1462  disambiguated_expr_pair_list.emplace_back(std::move(disambiguated_when),
1463  std::move(disambiguated_then));
1464  }
1465  std::unique_ptr<const RexScalar> disambiguated_else{
1466  disambiguate_rex(rex_case->getElse(), ra_output)};
1467  return std::unique_ptr<const RexCase>(
1468  new RexCase(disambiguated_expr_pair_list, disambiguated_else));
1469 }
1470 
1471 // The inputs used by scalar expressions are given as indices in the serialized
1472 // representation of the query. This is hard to navigate; make the relationship
1473 // explicit by creating RexInput expressions which hold a pointer to the source
1474 // relational algebra node and the index relative to the output of that node.
1475 std::unique_ptr<const RexScalar> disambiguate_rex(const RexScalar* rex_scalar,
1476  const RANodeOutput& ra_output) {
1477  const auto rex_abstract_input = dynamic_cast<const RexAbstractInput*>(rex_scalar);
1478  if (rex_abstract_input) {
1479  CHECK_LT(static_cast<size_t>(rex_abstract_input->getIndex()), ra_output.size());
1480  return std::unique_ptr<const RexInput>(
1481  new RexInput(ra_output[rex_abstract_input->getIndex()]));
1482  }
1483  const auto rex_operator = dynamic_cast<const RexOperator*>(rex_scalar);
1484  if (rex_operator) {
1485  return disambiguate_operator(rex_operator, ra_output);
1486  }
1487  const auto rex_case = dynamic_cast<const RexCase*>(rex_scalar);
1488  if (rex_case) {
1489  return disambiguate_case(rex_case, ra_output);
1490  }
1491  if (auto const rex_literal = dynamic_cast<const RexLiteral*>(rex_scalar)) {
1492  return rex_literal->deepCopy();
1493  } else if (auto const rex_subquery = dynamic_cast<const RexSubQuery*>(rex_scalar)) {
1494  return rex_subquery->deepCopy();
1495  } else {
1496  throw QueryNotSupported("Unable to disambiguate expression of type " +
1497  std::string(typeid(*rex_scalar).name()));
1498  }
1499 }
1500 
1501 void bind_project_to_input(RelProject* project_node, const RANodeOutput& input) noexcept {
1502  CHECK_EQ(size_t(1), project_node->inputCount());
1503  std::vector<std::unique_ptr<const RexScalar>> disambiguated_exprs;
1504  for (size_t i = 0; i < project_node->size(); ++i) {
1505  const auto projected_expr = project_node->getProjectAt(i);
1506  if (dynamic_cast<const RexSubQuery*>(projected_expr)) {
1507  disambiguated_exprs.emplace_back(project_node->getProjectAtAndRelease(i));
1508  } else {
1509  disambiguated_exprs.emplace_back(disambiguate_rex(projected_expr, input));
1510  }
1511  }
1512  project_node->setExpressions(disambiguated_exprs);
1513 }
1514 
1516  const RANodeOutput& input) noexcept {
1517  std::vector<std::unique_ptr<const RexScalar>> disambiguated_exprs;
1518  for (size_t i = 0; i < table_func_node->getTableFuncInputsSize(); ++i) {
1519  const auto target_expr = table_func_node->getTableFuncInputAt(i);
1520  if (dynamic_cast<const RexSubQuery*>(target_expr)) {
1521  disambiguated_exprs.emplace_back(table_func_node->getTableFuncInputAtAndRelease(i));
1522  } else {
1523  disambiguated_exprs.emplace_back(disambiguate_rex(target_expr, input));
1524  }
1525  }
1526  table_func_node->setTableFuncInputs(std::move(disambiguated_exprs));
1527 }
1528 
1529 void bind_inputs(const std::vector<std::shared_ptr<RelAlgNode>>& nodes) noexcept {
1530  for (auto ra_node : nodes) {
1531  const auto filter_node = std::dynamic_pointer_cast<RelFilter>(ra_node);
1532  if (filter_node) {
1533  CHECK_EQ(size_t(1), filter_node->inputCount());
1534  auto disambiguated_condition = disambiguate_rex(
1535  filter_node->getCondition(), get_node_output(filter_node->getInput(0)));
1536  filter_node->setCondition(disambiguated_condition);
1537  continue;
1538  }
1539  const auto join_node = std::dynamic_pointer_cast<RelJoin>(ra_node);
1540  if (join_node) {
1541  CHECK_EQ(size_t(2), join_node->inputCount());
1542  auto disambiguated_condition =
1543  disambiguate_rex(join_node->getCondition(), get_node_output(join_node.get()));
1544  join_node->setCondition(disambiguated_condition);
1545  continue;
1546  }
1547  const auto project_node = std::dynamic_pointer_cast<RelProject>(ra_node);
1548  if (project_node) {
1549  bind_project_to_input(project_node.get(),
1550  get_node_output(project_node->getInput(0)));
1551  continue;
1552  }
1553  const auto table_func_node = std::dynamic_pointer_cast<RelTableFunction>(ra_node);
1554  if (table_func_node) {
1555  /*
1556  Collect all inputs from table function input (non-literal)
1557  arguments.
1558  */
1559  RANodeOutput input;
1560  input.reserve(table_func_node->inputCount());
1561  for (size_t i = 0; i < table_func_node->inputCount(); i++) {
1562  auto node_output = get_node_output(table_func_node->getInput(i));
1563  input.insert(input.end(), node_output.begin(), node_output.end());
1564  }
1565  bind_table_func_to_input(table_func_node.get(), input);
1566  }
1567  }
1568 }
1569 
1570 void handle_query_hint(const std::vector<std::shared_ptr<RelAlgNode>>& nodes,
1571  RelAlgDag& rel_alg_dag) noexcept {
1572  // query hint is delivered by the above three nodes
1573  // when a query block has top-sort node, a hint is registered to
1574  // one of the node which locates at the nearest from the sort node
1575  RegisteredQueryHint global_query_hint;
1576  for (auto node : nodes) {
1577  Hints* hint_delivered = nullptr;
1578  const auto agg_node = std::dynamic_pointer_cast<RelAggregate>(node);
1579  if (agg_node) {
1580  if (agg_node->hasDeliveredHint()) {
1581  hint_delivered = agg_node->getDeliveredHints();
1582  }
1583  }
1584  const auto project_node = std::dynamic_pointer_cast<RelProject>(node);
1585  if (project_node) {
1586  if (project_node->hasDeliveredHint()) {
1587  hint_delivered = project_node->getDeliveredHints();
1588  }
1589  }
1590  const auto compound_node = std::dynamic_pointer_cast<RelCompound>(node);
1591  if (compound_node) {
1592  if (compound_node->hasDeliveredHint()) {
1593  hint_delivered = compound_node->getDeliveredHints();
1594  }
1595  }
1596  if (hint_delivered && !hint_delivered->empty()) {
1597  rel_alg_dag.registerQueryHints(node, hint_delivered, global_query_hint);
1598  }
1599  }
1600  // the current rel_alg_dag may contain global query hints from the subquery
1601  // so we combine the current global hint we collected with the original one together
1602  // to propagate global query hints correctly
1603  const auto existing_global_query_hints = rel_alg_dag.getGlobalHints();
1604  const auto new_global_query_hints = existing_global_query_hints || global_query_hint;
1605  rel_alg_dag.setGlobalQueryHints(new_global_query_hints);
1606 }
1607 
1608 void compute_node_hash(const std::vector<std::shared_ptr<RelAlgNode>>& nodes) {
1609  // compute each rel node's hash value in advance to avoid inconsistency of their hash
1610  // values depending on the toHash's caller
1611  // specifically, we manipulate our logical query plan before retrieving query step
1612  // sequence but once we compute a hash value we cached it so there is no way to update
1613  // it after the plan has been changed starting from the top node, we compute the hash
1614  // value (top-down manner)
1615  std::for_each(
1616  nodes.rbegin(), nodes.rend(), [](const std::shared_ptr<RelAlgNode>& node) {
1617  auto node_hash = node->toHash();
1618  CHECK_NE(node_hash, static_cast<size_t>(0));
1619  });
1620 }
1621 
1622 void mark_nops(const std::vector<std::shared_ptr<RelAlgNode>>& nodes) noexcept {
1623  for (auto node : nodes) {
1624  const auto agg_node = std::dynamic_pointer_cast<RelAggregate>(node);
1625  if (!agg_node || agg_node->getAggExprsCount()) {
1626  continue;
1627  }
1628  CHECK_EQ(size_t(1), node->inputCount());
1629  const auto agg_input_node = dynamic_cast<const RelAggregate*>(node->getInput(0));
1630  if (agg_input_node && !agg_input_node->getAggExprsCount() &&
1631  agg_node->getGroupByCount() == agg_input_node->getGroupByCount()) {
1632  agg_node->markAsNop();
1633  }
1634  }
1635 }
1636 
1637 namespace {
1638 
1639 std::vector<const Rex*> reproject_targets(
1640  const RelProject* simple_project,
1641  const std::vector<const Rex*>& target_exprs) noexcept {
1642  std::vector<const Rex*> result;
1643  for (size_t i = 0; i < simple_project->size(); ++i) {
1644  const auto input_rex = dynamic_cast<const RexInput*>(simple_project->getProjectAt(i));
1645  CHECK(input_rex);
1646  CHECK_LT(static_cast<size_t>(input_rex->getIndex()), target_exprs.size());
1647  result.push_back(target_exprs[input_rex->getIndex()]);
1648  }
1649  return result;
1650 }
1651 
1658  public:
1660  const RelAlgNode* node_to_keep,
1661  const std::vector<std::unique_ptr<const RexScalar>>& scalar_sources)
1662  : node_to_keep_(node_to_keep), scalar_sources_(scalar_sources) {}
1663 
1664  // Reproject the RexInput from its current RA Node to the RA Node we intend to keep
1665  RetType visitInput(const RexInput* input) const final {
1666  if (input->getSourceNode() == node_to_keep_) {
1667  const auto index = input->getIndex();
1668  CHECK_LT(index, scalar_sources_.size());
1669  return visit(scalar_sources_[index].get());
1670  } else {
1671  return input->deepCopy();
1672  }
1673  }
1674 
1675  private:
1677  const std::vector<std::unique_ptr<const RexScalar>>& scalar_sources_;
1678 };
1679 
1680 } // namespace
1681 
1683  std::vector<std::shared_ptr<RelAlgNode>>& nodes,
1684  const std::vector<size_t>& pattern,
1685  std::unordered_map<size_t, std::unordered_map<unsigned, RegisteredQueryHint>>&
1686  query_hints) noexcept {
1687  CHECK_GE(pattern.size(), size_t(2));
1688  CHECK_LE(pattern.size(), size_t(4));
1689 
1690  std::unique_ptr<const RexScalar> filter_rex;
1691  std::vector<std::unique_ptr<const RexScalar>> scalar_sources;
1692  size_t groupby_count{0};
1693  std::vector<std::string> fields;
1694  std::vector<const RexAgg*> agg_exprs;
1695  std::vector<const Rex*> target_exprs;
1696  bool first_project{true};
1697  bool is_agg{false};
1698  RelAlgNode* last_node{nullptr};
1699 
1700  std::shared_ptr<ModifyManipulationTarget> manipulation_target;
1701  size_t node_hash{0};
1702  unsigned node_id{0};
1703  bool hint_registered{false};
1704  RegisteredQueryHint registered_query_hint = RegisteredQueryHint::defaults();
1705  for (const auto node_idx : pattern) {
1706  const auto ra_node = nodes[node_idx];
1707  auto registered_query_hint_map_it = query_hints.find(ra_node->toHash());
1708  if (registered_query_hint_map_it != query_hints.end()) {
1709  auto& registered_query_hint_map = registered_query_hint_map_it->second;
1710  auto registered_query_hint_it = registered_query_hint_map.find(ra_node->getId());
1711  if (registered_query_hint_it != registered_query_hint_map.end()) {
1712  hint_registered = true;
1713  node_hash = registered_query_hint_map_it->first;
1714  node_id = registered_query_hint_it->first;
1715  registered_query_hint = registered_query_hint_it->second;
1716  }
1717  }
1718  const auto ra_filter = std::dynamic_pointer_cast<RelFilter>(ra_node);
1719  if (ra_filter) {
1720  CHECK(!filter_rex);
1721  filter_rex.reset(ra_filter->getAndReleaseCondition());
1722  CHECK(filter_rex);
1723  last_node = ra_node.get();
1724  continue;
1725  }
1726  const auto ra_project = std::dynamic_pointer_cast<RelProject>(ra_node);
1727  if (ra_project) {
1728  fields = ra_project->getFields();
1729  manipulation_target = ra_project;
1730 
1731  if (first_project) {
1732  CHECK_EQ(size_t(1), ra_project->inputCount());
1733  // Rebind the input of the project to the input of the filter itself
1734  // since we know that we'll evaluate the filter on the fly, with no
1735  // intermediate buffer.
1736  const auto filter_input = dynamic_cast<const RelFilter*>(ra_project->getInput(0));
1737  if (filter_input) {
1738  CHECK_EQ(size_t(1), filter_input->inputCount());
1739  bind_project_to_input(ra_project.get(),
1740  get_node_output(filter_input->getInput(0)));
1741  }
1742  scalar_sources = ra_project->getExpressionsAndRelease();
1743  for (const auto& scalar_expr : scalar_sources) {
1744  target_exprs.push_back(scalar_expr.get());
1745  }
1746  first_project = false;
1747  } else {
1748  if (ra_project->isSimple()) {
1749  target_exprs = reproject_targets(ra_project.get(), target_exprs);
1750  } else {
1751  // TODO(adb): This is essentially a more general case of simple project, we
1752  // could likely merge the two
1753  std::vector<const Rex*> result;
1754  RexInputReplacementVisitor visitor(last_node, scalar_sources);
1755  for (size_t i = 0; i < ra_project->size(); ++i) {
1756  const auto rex = ra_project->getProjectAt(i);
1757  if (auto rex_input = dynamic_cast<const RexInput*>(rex)) {
1758  const auto index = rex_input->getIndex();
1759  CHECK_LT(index, target_exprs.size());
1760  result.push_back(target_exprs[index]);
1761  } else {
1762  scalar_sources.push_back(visitor.visit(rex));
1763  result.push_back(scalar_sources.back().get());
1764  }
1765  }
1766  target_exprs = result;
1767  }
1768  }
1769  last_node = ra_node.get();
1770  continue;
1771  }
1772  const auto ra_aggregate = std::dynamic_pointer_cast<RelAggregate>(ra_node);
1773  if (ra_aggregate) {
1774  is_agg = true;
1775  fields = ra_aggregate->getFields();
1776  agg_exprs = ra_aggregate->getAggregatesAndRelease();
1777  groupby_count = ra_aggregate->getGroupByCount();
1778  decltype(target_exprs){}.swap(target_exprs);
1779  CHECK_LE(groupby_count, scalar_sources.size());
1780  for (size_t group_idx = 0; group_idx < groupby_count; ++group_idx) {
1781  const auto rex_ref = new RexRef(group_idx + 1);
1782  target_exprs.push_back(rex_ref);
1783  scalar_sources.emplace_back(rex_ref);
1784  }
1785  for (const auto rex_agg : agg_exprs) {
1786  target_exprs.push_back(rex_agg);
1787  }
1788  last_node = ra_node.get();
1789  continue;
1790  }
1791  }
1792 
1793  auto compound_node =
1794  std::make_shared<RelCompound>(filter_rex,
1795  target_exprs,
1796  groupby_count,
1797  agg_exprs,
1798  fields,
1799  scalar_sources,
1800  is_agg,
1801  manipulation_target->isUpdateViaSelect(),
1802  manipulation_target->isDeleteViaSelect(),
1803  manipulation_target->isVarlenUpdateRequired(),
1804  manipulation_target->getModifiedTableDescriptor(),
1805  manipulation_target->getTargetColumns(),
1806  manipulation_target->getModifiedTableCatalog());
1807  auto old_node = nodes[pattern.back()];
1808  nodes[pattern.back()] = compound_node;
1809  auto first_node = nodes[pattern.front()];
1810  CHECK_EQ(size_t(1), first_node->inputCount());
1811  compound_node->addManagedInput(first_node->getAndOwnInput(0));
1812  if (hint_registered) {
1813  // pass the registered hint from the origin node to newly created compound node
1814  // where it is coalesced
1815  auto registered_query_hint_map_it = query_hints.find(node_hash);
1816  CHECK(registered_query_hint_map_it != query_hints.end());
1817  auto registered_query_hint_map = registered_query_hint_map_it->second;
1818  if (registered_query_hint_map.size() > 1) {
1819  registered_query_hint_map.erase(node_id);
1820  } else {
1821  CHECK_EQ(registered_query_hint_map.size(), static_cast<size_t>(1));
1822  query_hints.erase(node_hash);
1823  }
1824  std::unordered_map<unsigned, RegisteredQueryHint> hint_map;
1825  hint_map.emplace(compound_node->getId(), registered_query_hint);
1826  query_hints.emplace(compound_node->toHash(), hint_map);
1827  }
1828  for (size_t i = 0; i < pattern.size() - 1; ++i) {
1829  nodes[pattern[i]].reset();
1830  }
1831  for (auto node : nodes) {
1832  if (!node) {
1833  continue;
1834  }
1835  node->replaceInput(old_node, compound_node);
1836  }
1837 }
1838 
1839 class RANodeIterator : public std::vector<std::shared_ptr<RelAlgNode>>::const_iterator {
1840  using ElementType = std::shared_ptr<RelAlgNode>;
1841  using Super = std::vector<ElementType>::const_iterator;
1842  using Container = std::vector<ElementType>;
1843 
1844  public:
1845  enum class AdvancingMode { DUChain, InOrder };
1846 
1847  explicit RANodeIterator(const Container& nodes)
1848  : Super(nodes.begin()), owner_(nodes), nodeCount_([&nodes]() -> size_t {
1849  size_t non_zero_count = 0;
1850  for (const auto& node : nodes) {
1851  if (node) {
1852  ++non_zero_count;
1853  }
1854  }
1856  }()) {}
1857 
1858  explicit operator size_t() {
1859  return std::distance(owner_.begin(), *static_cast<Super*>(this));
1860  }
1861 
1862  RANodeIterator operator++() = delete;
1863 
1864  void advance(AdvancingMode mode) {
1865  Super& super = *this;
1866  switch (mode) {
1867  case AdvancingMode::DUChain: {
1868  size_t use_count = 0;
1869  Super only_use = owner_.end();
1870  for (Super nodeIt = std::next(super); nodeIt != owner_.end(); ++nodeIt) {
1871  if (!*nodeIt) {
1872  continue;
1873  }
1874  for (size_t i = 0; i < (*nodeIt)->inputCount(); ++i) {
1875  if ((*super) == (*nodeIt)->getAndOwnInput(i)) {
1876  ++use_count;
1877  if (1 == use_count) {
1878  only_use = nodeIt;
1879  } else {
1880  super = owner_.end();
1881  return;
1882  }
1883  }
1884  }
1885  }
1886  super = only_use;
1887  break;
1888  }
1889  case AdvancingMode::InOrder:
1890  for (size_t i = 0; i != owner_.size(); ++i) {
1891  if (!visited_.count(i)) {
1892  super = owner_.begin();
1893  std::advance(super, i);
1894  return;
1895  }
1896  }
1897  super = owner_.end();
1898  break;
1899  default:
1900  CHECK(false);
1901  }
1902  }
1903 
1904  bool allVisited() { return visited_.size() == nodeCount_; }
1905 
1907  visited_.insert(size_t(*this));
1908  Super& super = *this;
1909  return *super;
1910  }
1911 
1912  const ElementType* operator->() { return &(operator*()); }
1913 
1914  private:
1916  const size_t nodeCount_;
1917  std::unordered_set<size_t> visited_;
1918 };
1919 
1920 namespace {
1921 
1922 bool input_can_be_coalesced(const RelAlgNode* parent_node,
1923  const size_t index,
1924  const bool first_rex_is_input) {
1925  if (auto agg_node = dynamic_cast<const RelAggregate*>(parent_node)) {
1926  if (index == 0 && agg_node->getGroupByCount() > 0) {
1927  return true;
1928  } else {
1929  // Is an aggregated target, only allow the project to be elided if the aggregate
1930  // target is simply passed through (i.e. if the top level expression attached to
1931  // the project node is a RexInput expression)
1932  return first_rex_is_input;
1933  }
1934  }
1935  return first_rex_is_input;
1936 }
1937 
1943 class CoalesceSecondaryProjectVisitor : public RexVisitor<bool> {
1944  public:
1945  bool visitInput(const RexInput* input) const final {
1946  // The top level expression node is checked before we apply the visitor. If we get
1947  // here, this input rex is a child of another rex node, and we handle the can be
1948  // coalesced check slightly differently
1949  return input_can_be_coalesced(input->getSourceNode(), input->getIndex(), false);
1950  }
1951 
1952  bool visitLiteral(const RexLiteral*) const final { return false; }
1953 
1954  bool visitSubQuery(const RexSubQuery*) const final { return false; }
1955 
1956  bool visitRef(const RexRef*) const final { return false; }
1957 
1958  protected:
1959  bool aggregateResult(const bool& aggregate, const bool& next_result) const final {
1960  return aggregate && next_result;
1961  }
1962 
1963  bool defaultResult() const final { return true; }
1964 };
1965 
1966 // Detect the window function SUM pattern: CASE WHEN COUNT() > 0 THEN SUM ELSE 0
1968  const auto case_operator = dynamic_cast<const RexCase*>(rex);
1969  if (case_operator && case_operator->branchCount() == 1) {
1970  const auto then_window =
1971  dynamic_cast<const RexWindowFunctionOperator*>(case_operator->getThen(0));
1972  if (then_window && then_window->getKind() == SqlWindowFunctionKind::SUM_INTERNAL) {
1973  return true;
1974  }
1975  }
1976  return false;
1977 }
1978 
1979 // Check for Window Function AVG:
1980 // (CASE WHEN count > 0 THEN sum ELSE 0) / COUNT
1982  const RexOperator* divide_operator = dynamic_cast<const RexOperator*>(rex);
1983  if (divide_operator && divide_operator->getOperator() == kDIVIDE) {
1984  CHECK_EQ(divide_operator->size(), size_t(2));
1985  const auto case_operator =
1986  dynamic_cast<const RexCase*>(divide_operator->getOperand(0));
1987  const auto second_window =
1988  dynamic_cast<const RexWindowFunctionOperator*>(divide_operator->getOperand(1));
1989  if (case_operator && second_window &&
1990  second_window->getKind() == SqlWindowFunctionKind::COUNT) {
1991  if (is_window_function_sum(case_operator)) {
1992  return true;
1993  }
1994  }
1995  }
1996  return false;
1997 }
1998 
1999 // Detect both window function operators and window function operators embedded in case
2000 // statements (for null handling)
2002  return dynamic_cast<const RexWindowFunctionOperator*>(rex) ||
2004 }
2005 
2006 } // namespace
2007 
2009  std::vector<std::shared_ptr<RelAlgNode>>& nodes,
2010  const std::vector<const RelAlgNode*>& left_deep_joins,
2011  std::unordered_map<size_t, std::unordered_map<unsigned, RegisteredQueryHint>>&
2012  query_hints) {
2013  enum class CoalesceState { Initial, Filter, FirstProject, Aggregate };
2014  std::vector<size_t> crt_pattern;
2015  CoalesceState crt_state{CoalesceState::Initial};
2016 
2017  auto reset_state = [&crt_pattern, &crt_state]() {
2018  crt_state = CoalesceState::Initial;
2019  std::vector<size_t>().swap(crt_pattern);
2020  };
2021 
2022  for (RANodeIterator nodeIt(nodes); !nodeIt.allVisited();) {
2023  const auto ra_node = nodeIt != nodes.end() ? *nodeIt : nullptr;
2024  switch (crt_state) {
2025  case CoalesceState::Initial: {
2026  if (std::dynamic_pointer_cast<const RelFilter>(ra_node) &&
2027  std::find(left_deep_joins.begin(), left_deep_joins.end(), ra_node.get()) ==
2028  left_deep_joins.end()) {
2029  crt_pattern.push_back(size_t(nodeIt));
2030  crt_state = CoalesceState::Filter;
2031  nodeIt.advance(RANodeIterator::AdvancingMode::DUChain);
2032  } else if (auto project_node =
2033  std::dynamic_pointer_cast<const RelProject>(ra_node)) {
2034  if (project_node->hasWindowFunctionExpr()) {
2035  nodeIt.advance(RANodeIterator::AdvancingMode::InOrder);
2036  } else {
2037  crt_pattern.push_back(size_t(nodeIt));
2038  crt_state = CoalesceState::FirstProject;
2039  nodeIt.advance(RANodeIterator::AdvancingMode::DUChain);
2040  }
2041  } else {
2042  nodeIt.advance(RANodeIterator::AdvancingMode::InOrder);
2043  }
2044  break;
2045  }
2046  case CoalesceState::Filter: {
2047  if (auto project_node = std::dynamic_pointer_cast<const RelProject>(ra_node)) {
2048  // Given we now add preceding projects for all window functions following
2049  // RelFilter nodes, the following should never occur
2050  CHECK(!project_node->hasWindowFunctionExpr());
2051  crt_pattern.push_back(size_t(nodeIt));
2052  crt_state = CoalesceState::FirstProject;
2053  nodeIt.advance(RANodeIterator::AdvancingMode::DUChain);
2054  } else {
2055  reset_state();
2056  }
2057  break;
2058  }
2059  case CoalesceState::FirstProject: {
2060  if (std::dynamic_pointer_cast<const RelAggregate>(ra_node)) {
2061  crt_pattern.push_back(size_t(nodeIt));
2062  crt_state = CoalesceState::Aggregate;
2063  nodeIt.advance(RANodeIterator::AdvancingMode::DUChain);
2064  } else {
2065  if (crt_pattern.size() >= 2) {
2066  create_compound(nodes, crt_pattern, query_hints);
2067  }
2068  reset_state();
2069  }
2070  break;
2071  }
2072  case CoalesceState::Aggregate: {
2073  if (auto project_node = std::dynamic_pointer_cast<const RelProject>(ra_node)) {
2074  if (!project_node->hasWindowFunctionExpr()) {
2075  // TODO(adb): overloading the simple project terminology again here
2076  bool is_simple_project{true};
2077  for (size_t i = 0; i < project_node->size(); i++) {
2078  const auto scalar_rex = project_node->getProjectAt(i);
2079  // If the top level scalar rex is an input node, we can bypass the visitor
2080  if (auto input_rex = dynamic_cast<const RexInput*>(scalar_rex)) {
2082  input_rex->getSourceNode(), input_rex->getIndex(), true)) {
2083  is_simple_project = false;
2084  break;
2085  }
2086  continue;
2087  }
2088  CoalesceSecondaryProjectVisitor visitor;
2089  if (!visitor.visit(project_node->getProjectAt(i))) {
2090  is_simple_project = false;
2091  break;
2092  }
2093  }
2094  if (is_simple_project) {
2095  crt_pattern.push_back(size_t(nodeIt));
2096  nodeIt.advance(RANodeIterator::AdvancingMode::InOrder);
2097  }
2098  }
2099  }
2100  CHECK_GE(crt_pattern.size(), size_t(2));
2101  create_compound(nodes, crt_pattern, query_hints);
2102  reset_state();
2103  break;
2104  }
2105  default:
2106  CHECK(false);
2107  }
2108  }
2109  if (crt_state == CoalesceState::FirstProject || crt_state == CoalesceState::Aggregate) {
2110  if (crt_pattern.size() >= 2) {
2111  create_compound(nodes, crt_pattern, query_hints);
2112  }
2113  CHECK(!crt_pattern.empty());
2114  }
2115 }
2116 
2117 namespace {
2119  RelAlgNode const* node,
2120  std::vector<std::unique_ptr<const RexScalar>>& scalar_exprs,
2121  std::vector<std::string>& fields) {
2122  for (size_t i = 0; i < node->size(); i++) {
2123  auto new_rex_input = std::make_unique<RexInput>(node, i);
2124  scalar_exprs.emplace_back(std::move(new_rex_input));
2125  fields.emplace_back("");
2126  }
2127 }
2128 } // namespace
2129 
2130 // Handle a query plan pattern "Aggregate-Join" which is not supported b/c Join node
2131 // (i.e., RelLeftDeepJoin) has multiple inputs (at least two if it has a single binary
2132 // join) We expect that the Aggregate node has a single input node, so we inject a new
2133 // Project node to satisfy the condition After the transformation, we have
2134 // "Aggregate-Project-Join" and can continue the execution w/o any crash or exception
2136  std::vector<std::shared_ptr<RelAlgNode>>& nodes,
2137  std::unordered_map<size_t, std::unordered_map<unsigned, RegisteredQueryHint>>&
2138  query_hints) {
2139  std::list<std::shared_ptr<RelAlgNode>> node_list(nodes.begin(), nodes.end());
2140  bool replace_nodes = false;
2141  for (auto node_itr = node_list.begin(); node_itr != node_list.end(); ++node_itr) {
2142  auto node = *node_itr;
2143  if (auto agg_node = std::dynamic_pointer_cast<RelAggregate>(node)) {
2144  std::vector<std::unique_ptr<const RexScalar>> scalar_exprs;
2145  std::vector<std::string> fields;
2146  std::shared_ptr<RelProject> new_project;
2147  CHECK_EQ(agg_node->getInputs().size(), size_t(1));
2148  CHECK_NE(*node_itr, *node_list.begin());
2149  const auto prev_node = *std::prev(node_itr);
2150  CHECK(prev_node);
2151  auto const input_node_ptr = agg_node->getAndOwnInput(0);
2152  if (auto join_node =
2153  std::dynamic_pointer_cast<RelLeftDeepInnerJoin const>(input_node_ptr)) {
2154  for (auto const* join_input_node : join_node->getInputs()) {
2155  create_rex_input_for_new_project_node(join_input_node, scalar_exprs, fields);
2156  }
2157  if (!scalar_exprs.empty()) {
2158  replace_nodes = true;
2159  new_project = std::make_shared<RelProject>(scalar_exprs, fields, join_node);
2160  agg_node->replaceInput(join_node, new_project);
2161  node_list.insert(node_itr, new_project);
2162  }
2163  }
2164  }
2165  }
2166  if (replace_nodes) {
2167  nodes.assign(node_list.begin(), node_list.end());
2168  }
2169 }
2170 
2171 void eliminate_redundant_projection(std::vector<std::shared_ptr<RelAlgNode>>& nodes) {
2172  for (auto& node : nodes) {
2173  if (auto* proj_node = dynamic_cast<RelProject*>(node.get())) {
2174  if (proj_node->isSimple()) {
2175  if (auto child_proj_node =
2176  dynamic_cast<RelProject const*>(proj_node->getInput(0))) {
2177  std::vector<std::unique_ptr<RexScalar const>> scalar_exprs;
2178  RexDeepCopyVisitor copier;
2179  for (size_t i = 0; i < proj_node->size(); i++) {
2180  auto rex_abs_input =
2181  dynamic_cast<RexAbstractInput const*>(proj_node->getProjectAt(i));
2182  scalar_exprs.push_back(
2183  copier.visit(child_proj_node->getProjectAt(rex_abs_input->getIndex())));
2184  }
2185  CHECK_EQ(scalar_exprs.size(), proj_node->getFields().size());
2186  proj_node->setExpressions(scalar_exprs);
2187  proj_node->replaceInput(proj_node->getAndOwnInput(0),
2188  child_proj_node->getAndOwnInput(0));
2189  }
2190  }
2191  }
2192  }
2193 }
2194 
2195 class WindowFunctionCollector : public RexVisitor<void*> {
2196  public:
2198  std::unordered_map<size_t, const RexScalar*>& collected_window_func,
2199  bool only_add_window_expr)
2200  : collected_window_func_(collected_window_func)
2201  , only_add_window_expr_(only_add_window_expr) {}
2202 
2203  protected:
2204  // Detect embedded window function expressions in operators
2205  void* visitOperator(const RexOperator* rex_operator) const final {
2206  if (is_window_function_operator(rex_operator)) {
2207  tryAddWindowExpr(rex_operator);
2208  }
2209  const size_t operand_count = rex_operator->size();
2210  for (size_t i = 0; i < operand_count; ++i) {
2211  const auto operand = rex_operator->getOperand(i);
2212  if (is_window_function_operator(operand)) {
2213  // Handle both RexWindowFunctionOperators and window functions built up from
2214  // multiple RexScalar objects (e.g. AVG)
2215  tryAddWindowExpr(operand);
2216  } else {
2217  visit(operand);
2218  }
2219  }
2220  return defaultResult();
2221  }
2222 
2223  // Detect embedded window function expressions in case statements. Note that this may
2224  // manifest as a nested case statement inside a top level case statement, as some
2225  // window functions (sum, avg) are represented as a case statement. Use the
2226  // is_window_function_operator helper to detect complete window function expressions.
2227  void* visitCase(const RexCase* rex_case) const final {
2228  if (is_window_function_operator(rex_case)) {
2229  tryAddWindowExpr(rex_case);
2230  if (!only_add_window_expr_) {
2231  return nullptr;
2232  }
2233  }
2234 
2235  for (size_t i = 0; i < rex_case->branchCount(); ++i) {
2236  const auto when = rex_case->getWhen(i);
2237  if (is_window_function_operator(when)) {
2238  tryAddWindowExpr(when);
2239  } else {
2240  visit(when);
2241  }
2242  const auto then = rex_case->getThen(i);
2243  if (is_window_function_operator(then)) {
2244  tryAddWindowExpr(then);
2245  } else {
2246  visit(then);
2247  }
2248  }
2249  if (rex_case->getElse()) {
2250  auto else_expr = rex_case->getElse();
2251  if (is_window_function_operator(else_expr)) {
2252  tryAddWindowExpr(else_expr);
2253  } else {
2254  visit(else_expr);
2255  }
2256  }
2257  return defaultResult();
2258  }
2259 
2260  void tryAddWindowExpr(RexScalar const* expr) const {
2261  if (!only_add_window_expr_) {
2262  collected_window_func_.emplace(expr->toHash(), expr);
2263  } else {
2264  if (auto window_expr = dynamic_cast<RexWindowFunctionOperator const*>(expr)) {
2265  collected_window_func_.emplace(window_expr->toHash(), window_expr);
2266  }
2267  }
2268  }
2269 
2270  void* defaultResult() const final { return nullptr; }
2271 
2272  private:
2273  std::unordered_map<size_t, const RexScalar*>& collected_window_func_;
2275 };
2276 
2278  public:
2280  std::unordered_set<size_t>& collected_window_func_hash,
2281  std::vector<std::unique_ptr<const RexScalar>>& new_rex_input_for_window_func,
2282  std::unordered_map<size_t, size_t>& window_func_to_new_rex_input_idx_map,
2283  RelProject* new_project,
2284  std::unordered_map<size_t, std::unique_ptr<const RexInput>>&
2285  new_rex_input_from_child_node)
2286  : collected_window_func_hash_(collected_window_func_hash)
2287  , new_rex_input_for_window_func_(new_rex_input_for_window_func)
2288  , window_func_to_new_rex_input_idx_map_(window_func_to_new_rex_input_idx_map)
2289  , new_project_(new_project)
2290  , new_rex_input_from_child_node_(new_rex_input_from_child_node) {
2291  CHECK_EQ(collected_window_func_hash_.size(),
2292  window_func_to_new_rex_input_idx_map_.size());
2293  for (auto hash : collected_window_func_hash_) {
2294  auto rex_it = window_func_to_new_rex_input_idx_map_.find(hash);
2295  CHECK(rex_it != window_func_to_new_rex_input_idx_map_.end());
2296  CHECK_LT(rex_it->second, new_rex_input_for_window_func_.size());
2297  }
2298  CHECK(new_project_);
2299  }
2300 
2301  protected:
2302  RetType visitInput(const RexInput* rex_input) const final {
2303  if (rex_input->getSourceNode() != new_project_) {
2304  const auto cur_index = rex_input->getIndex();
2305  auto cur_source_node = rex_input->getSourceNode();
2306  std::string field_name = "";
2307  if (auto cur_project_node = dynamic_cast<const RelProject*>(cur_source_node)) {
2308  field_name = cur_project_node->getFieldName(cur_index);
2309  }
2310  auto rex_input_hash = rex_input->toHash();
2311  auto rex_input_it = new_rex_input_from_child_node_.find(rex_input_hash);
2312  if (rex_input_it == new_rex_input_from_child_node_.end()) {
2313  auto new_rex_input =
2314  std::make_unique<RexInput>(new_project_, new_project_->size());
2315  new_project_->appendInput(field_name, rex_input->deepCopy());
2316  new_rex_input_from_child_node_.emplace(rex_input_hash, new_rex_input->deepCopy());
2317  return new_rex_input;
2318  } else {
2319  return rex_input_it->second->deepCopy();
2320  }
2321  } else {
2322  return rex_input->deepCopy();
2323  }
2324  }
2325 
2326  RetType visitOperator(const RexOperator* rex_operator) const final {
2327  auto new_rex_idx = is_collected_window_function(rex_operator->toHash());
2328  if (new_rex_idx) {
2329  return get_new_rex_input(*new_rex_idx);
2330  }
2331 
2332  const auto rex_window_function_operator =
2333  dynamic_cast<const RexWindowFunctionOperator*>(rex_operator);
2334  if (rex_window_function_operator) {
2335  // Deep copy the embedded window function operator
2336  return visitWindowFunctionOperator(rex_window_function_operator);
2337  }
2338 
2339  const size_t operand_count = rex_operator->size();
2340  std::vector<RetType> new_opnds;
2341  for (size_t i = 0; i < operand_count; ++i) {
2342  const auto operand = rex_operator->getOperand(i);
2343  auto new_rex_idx_for_operand = is_collected_window_function(operand->toHash());
2344  if (new_rex_idx_for_operand) {
2345  new_opnds.push_back(get_new_rex_input(*new_rex_idx_for_operand));
2346  } else {
2347  new_opnds.emplace_back(visit(rex_operator->getOperand(i)));
2348  }
2349  }
2350  return rex_operator->getDisambiguated(new_opnds);
2351  }
2352 
2353  RetType visitCase(const RexCase* rex_case) const final {
2354  auto new_rex_idx = is_collected_window_function(rex_case->toHash());
2355  if (new_rex_idx) {
2356  return get_new_rex_input(*new_rex_idx);
2357  }
2358 
2359  std::vector<std::pair<RetType, RetType>> new_pair_list;
2360  for (size_t i = 0; i < rex_case->branchCount(); ++i) {
2361  auto when_operand = rex_case->getWhen(i);
2362  auto new_rex_idx_for_when_operand =
2363  is_collected_window_function(when_operand->toHash());
2364 
2365  auto then_operand = rex_case->getThen(i);
2366  auto new_rex_idx_for_then_operand =
2367  is_collected_window_function(then_operand->toHash());
2368 
2369  new_pair_list.emplace_back(
2370  new_rex_idx_for_when_operand ? get_new_rex_input(*new_rex_idx_for_when_operand)
2371  : visit(when_operand),
2372  new_rex_idx_for_then_operand ? get_new_rex_input(*new_rex_idx_for_then_operand)
2373  : visit(then_operand));
2374  }
2375  auto new_rex_idx_for_else_operand =
2376  is_collected_window_function(rex_case->getElse()->toHash());
2377  auto new_else = new_rex_idx_for_else_operand
2378  ? get_new_rex_input(*new_rex_idx_for_else_operand)
2379  : visit(rex_case->getElse());
2380  return std::make_unique<RexCase>(new_pair_list, new_else);
2381  }
2382 
2383  private:
2384  std::optional<size_t> is_collected_window_function(size_t rex_hash) const {
2385  auto rex_it = window_func_to_new_rex_input_idx_map_.find(rex_hash);
2386  if (rex_it != window_func_to_new_rex_input_idx_map_.end()) {
2387  return rex_it->second;
2388  }
2389  return std::nullopt;
2390  }
2391 
2392  std::unique_ptr<const RexScalar> get_new_rex_input(size_t rex_idx) const {
2393  CHECK_GE(rex_idx, 0UL);
2394  CHECK_LT(rex_idx, new_rex_input_for_window_func_.size());
2395  auto& new_rex_input = new_rex_input_for_window_func_.at(rex_idx);
2396  CHECK(new_rex_input);
2397  auto copied_rex_input = copier_.visit(new_rex_input.get());
2398  return copied_rex_input;
2399  }
2400 
2401  std::unordered_set<size_t>& collected_window_func_hash_;
2402  // we should have new rex_input for each window function collected
2403  std::vector<std::unique_ptr<const RexScalar>>& new_rex_input_for_window_func_;
2404  // an index to get a new rex_input for the collected window function
2405  std::unordered_map<size_t, size_t>& window_func_to_new_rex_input_idx_map_;
2407  std::unordered_map<size_t, std::unique_ptr<const RexInput>>&
2410 };
2411 
2413  std::shared_ptr<RelProject> prev_node,
2414  std::shared_ptr<RelProject> new_node,
2415  std::unordered_map<size_t, std::unordered_map<unsigned, RegisteredQueryHint>>&
2416  query_hints) {
2417  auto delivered_hints = prev_node->getDeliveredHints();
2418  bool needs_propagate_hints = !delivered_hints->empty();
2419  if (needs_propagate_hints) {
2420  for (auto& kv : *delivered_hints) {
2421  new_node->addHint(kv.second);
2422  }
2423  auto prev_it = query_hints.find(prev_node->toHash());
2424  // query hint for the prev projection node should be registered
2425  CHECK(prev_it != query_hints.end());
2426  auto prev_hint_it = prev_it->second.find(prev_node->getId());
2427  CHECK(prev_hint_it != prev_it->second.end());
2428  std::unordered_map<unsigned, RegisteredQueryHint> hint_map;
2429  hint_map.emplace(new_node->getId(), prev_hint_it->second);
2430  query_hints.emplace(new_node->toHash(), hint_map);
2431  }
2432 }
2433 
2457  std::vector<std::shared_ptr<RelAlgNode>>& nodes,
2458  std::unordered_map<size_t, std::unordered_map<unsigned, RegisteredQueryHint>>&
2459  query_hints) {
2460  std::list<std::shared_ptr<RelAlgNode>> node_list(nodes.begin(), nodes.end());
2461  for (auto node_itr = node_list.begin(); node_itr != node_list.end(); ++node_itr) {
2462  const auto node = *node_itr;
2463  auto window_func_project_node = std::dynamic_pointer_cast<RelProject>(node);
2464  if (!window_func_project_node) {
2465  continue;
2466  }
2467 
2468  const auto prev_node_itr = std::prev(node_itr);
2469  const auto prev_node = *prev_node_itr;
2470  CHECK(prev_node);
2471 
2472  // map scalar expression index in the project node to window function ptr
2473  std::unordered_map<size_t, const RexScalar*> collected_window_func;
2474  WindowFunctionCollector collector(collected_window_func, false);
2475  // Iterate the target exprs of the project node and check for window function
2476  // expressions. If an embedded expression exists, collect it
2477  for (size_t i = 0; i < window_func_project_node->size(); i++) {
2478  const auto scalar_rex = window_func_project_node->getProjectAt(i);
2479  if (is_window_function_operator(scalar_rex)) {
2480  // top level window function exprs are fine
2481  continue;
2482  }
2483  collector.visit(scalar_rex);
2484  }
2485 
2486  if (!collected_window_func.empty()) {
2487  // we have a nested window function expression
2488  std::unordered_set<size_t> collected_window_func_hash;
2489  // the current window function needs a set of new rex input which references
2490  // expressions in the newly introduced projection node
2491  std::vector<std::unique_ptr<const RexScalar>> new_rex_input_for_window_func;
2492  // a target projection expression of the newly created projection node
2493  std::vector<std::unique_ptr<const RexScalar>> new_scalar_expr_for_window_project;
2494  // a map between nested window function (hash val) and
2495  // its rex index stored in the `new_rex_input_for_window_func`
2496  std::unordered_map<size_t, size_t> window_func_to_new_rex_input_idx_map;
2497  // a map between RexInput of the current window function projection node (hash
2498  // val) and its corresponding new RexInput which is pushed down to the new
2499  // projection node
2500  std::unordered_map<size_t, std::unique_ptr<const RexInput>>
2501  new_rex_input_from_child_node;
2502  RexDeepCopyVisitor copier;
2503 
2504  std::vector<std::unique_ptr<const RexScalar>> dummy_scalar_exprs;
2505  std::vector<std::string> dummy_fields;
2506  std::vector<std::string> new_project_field_names;
2507  // create a new project node, it will contain window function expressions
2508  auto new_project =
2509  std::make_shared<RelProject>(dummy_scalar_exprs, dummy_fields, prev_node);
2510  // insert this new project node between the current window project node and its
2511  // child node
2512  node_list.insert(node_itr, new_project);
2513 
2514  // retrieve various information to replace expressions in the current window
2515  // function project node w/ considering scalar expressions in the new project node
2516  std::for_each(collected_window_func.begin(),
2517  collected_window_func.end(),
2518  [&new_project_field_names,
2519  &collected_window_func_hash,
2520  &new_rex_input_for_window_func,
2521  &new_scalar_expr_for_window_project,
2522  &copier,
2523  &new_project,
2524  &window_func_to_new_rex_input_idx_map](const auto& kv) {
2525  // compute window function expr's hash, and create a new rex_input
2526  // for it
2527  collected_window_func_hash.insert(kv.first);
2528 
2529  // map an old expression in the window function project node
2530  // to an index of the corresponding new RexInput
2531  const auto rex_idx = new_rex_input_for_window_func.size();
2532  window_func_to_new_rex_input_idx_map.emplace(kv.first, rex_idx);
2533 
2534  // create a new RexInput and make it as one of new expression of
2535  // the newly created project node
2536  new_rex_input_for_window_func.emplace_back(
2537  std::make_unique<const RexInput>(new_project.get(), rex_idx));
2538  new_scalar_expr_for_window_project.push_back(
2539  std::move(copier.visit(kv.second)));
2540  new_project_field_names.emplace_back("");
2541  });
2542  new_project->setExpressions(new_scalar_expr_for_window_project);
2543  new_project->setFields(std::move(new_project_field_names));
2544 
2545  auto window_func_scalar_exprs =
2546  window_func_project_node->getExpressionsAndRelease();
2547  RexWindowFuncReplacementVisitor replacer(collected_window_func_hash,
2548  new_rex_input_for_window_func,
2549  window_func_to_new_rex_input_idx_map,
2550  new_project.get(),
2551  new_rex_input_from_child_node);
2552  size_t rex_idx = 0;
2553  for (auto& scalar_expr : window_func_scalar_exprs) {
2554  // try to replace the old expressions in the window function project node
2555  // with expressions of the newly created project node
2556  auto new_parent_rex = replacer.visit(scalar_expr.get());
2557  window_func_scalar_exprs[rex_idx] = std::move(new_parent_rex);
2558  rex_idx++;
2559  }
2560  // Update the previous window project node
2561  window_func_project_node->setExpressions(window_func_scalar_exprs);
2562  window_func_project_node->replaceInput(prev_node, new_project);
2563  propagate_hints_to_new_project(window_func_project_node, new_project, query_hints);
2564  new_project->setPushedDownWindowExpr();
2565  }
2566  }
2567  nodes.assign(node_list.begin(), node_list.end());
2568 }
2569 
2570 using RexInputSet = std::unordered_set<RexInput>;
2571 
2572 class RexInputCollector : public RexVisitor<RexInputSet> {
2573  public:
2574  RexInputSet visitInput(const RexInput* input) const override {
2575  return RexInputSet{*input};
2576  }
2577 
2578  protected:
2580  const RexInputSet& next_result) const override {
2581  auto result = aggregate;
2582  result.insert(next_result.begin(), next_result.end());
2583  return result;
2584  }
2585 };
2586 
2587 namespace {
2589  bool& has_generic_expr_in_window_func) {
2590  for (auto const& partition_key : window_expr->getPartitionKeys()) {
2591  auto partition_input = dynamic_cast<RexInput const*>(partition_key.get());
2592  if (!partition_input) {
2593  return true;
2594  }
2595  }
2596  for (auto const& order_key : window_expr->getOrderKeys()) {
2597  auto order_input = dynamic_cast<RexInput const*>(order_key.get());
2598  if (!order_input) {
2599  return true;
2600  }
2601  }
2602  for (size_t k = 0; k < window_expr->size(); k++) {
2603  if (!shared::dynamic_castable_to_any<RexInput, RexLiteral>(
2604  window_expr->getOperand(k))) {
2605  has_generic_expr_in_window_func = true;
2606  return true;
2607  }
2608  }
2609  return false;
2610 }
2611 
2612 std::pair<bool, bool> need_pushdown_generic_expr(
2613  RelProject const* window_func_project_node) {
2614  bool has_generic_expr_in_window_func = false;
2615  bool res = false;
2616  for (size_t i = 0; i < window_func_project_node->size(); ++i) {
2617  auto const projected_target = window_func_project_node->getProjectAt(i);
2618  if (auto const* window_expr =
2619  dynamic_cast<RexWindowFunctionOperator const*>(projected_target)) {
2620  res =
2621  find_generic_expr_in_window_func(window_expr, has_generic_expr_in_window_func);
2622  } else if (auto const* case_expr = dynamic_cast<RexCase const*>(projected_target)) {
2623  std::unordered_map<size_t, const RexScalar*> collected_window_func;
2624  WindowFunctionCollector collector(collected_window_func, true);
2625  collector.visit(case_expr);
2626  for (auto const& kv : collected_window_func) {
2627  auto const* candidate_window_expr =
2628  dynamic_cast<RexWindowFunctionOperator const*>(kv.second);
2629  CHECK(candidate_window_expr);
2630  res = find_generic_expr_in_window_func(candidate_window_expr,
2631  has_generic_expr_in_window_func);
2632  }
2633  }
2634  }
2635  return std::make_pair(has_generic_expr_in_window_func, res);
2636 }
2637 }; // namespace
2651  std::vector<std::shared_ptr<RelAlgNode>>& nodes,
2652  const bool always_add_project_if_first_project_is_window_expr,
2653  std::unordered_map<size_t, std::unordered_map<unsigned, RegisteredQueryHint>>&
2654  query_hints) {
2655  std::list<std::shared_ptr<RelAlgNode>> node_list(nodes.begin(), nodes.end());
2656  size_t project_node_counter{0};
2657  for (auto node_itr = node_list.begin(); node_itr != node_list.end(); ++node_itr) {
2658  const auto node = *node_itr;
2659 
2660  auto window_func_project_node = std::dynamic_pointer_cast<RelProject>(node);
2661  if (!window_func_project_node) {
2662  continue;
2663  }
2664  project_node_counter++;
2665  if (!window_func_project_node->hasWindowFunctionExpr()) {
2666  // this projection node does not have a window function
2667  // expression -- skip to the next node in the DAG.
2668  continue;
2669  }
2670 
2671  const auto prev_node_itr = std::prev(node_itr);
2672  const auto prev_node = *prev_node_itr;
2673  CHECK(prev_node);
2674 
2675  auto filter_node = std::dynamic_pointer_cast<RelFilter>(prev_node);
2676  auto join_node = std::dynamic_pointer_cast<RelJoin>(prev_node);
2677 
2678  auto scan_node = std::dynamic_pointer_cast<RelScan>(prev_node);
2679  const bool has_multi_fragment_scan_input =
2680  (scan_node &&
2681  (scan_node->getNumShards() > 0 || scan_node->getNumFragments() > 1));
2682  auto const [has_generic_expr_in_window_func, needs_expr_pushdown] =
2683  need_pushdown_generic_expr(window_func_project_node.get());
2684 
2685  // We currently add a preceding project node in one of two conditions:
2686  // 1. always_add_project_if_first_project_is_window_expr = true, which
2687  // we currently only set for distributed, but could also be set to support
2688  // multi-frag window function inputs, either if we can detect that an input table
2689  // is multi-frag up front, or using a retry mechanism like we do for join filter
2690  // push down.
2691  // TODO(todd): Investigate a viable approach for the above.
2692  // 2. Regardless of #1, if the window function project node is preceded by a
2693  // filter node. This is required both for correctness and to avoid pulling
2694  // all source input columns into memory since non-coalesced filter node
2695  // inputs are currently not pruned or eliminated via dead column elimination.
2696  // Note that we expect any filter node followed by a project node to be coalesced
2697  // into a single compound node in RelAlgDag::coalesce_nodes, and that action
2698  // prunes unused inputs.
2699  // TODO(todd): Investigate whether the shotgun filter node issue affects other
2700  // query plans, i.e. filters before joins, and whether there is a more general
2701  // approach to solving this (will still need the preceding project node for
2702  // window functions preceded by filter nodes for correctness though)
2703  // 3. Similar to the above, when the window function project node is preceded
2704  // by a join node.
2705  // 4. when partition by / order by clauses have a general expression instead of
2706  // referencing column
2707 
2708  if (!((always_add_project_if_first_project_is_window_expr &&
2709  project_node_counter == 1) ||
2710  filter_node || join_node || has_multi_fragment_scan_input ||
2711  needs_expr_pushdown)) {
2712  continue;
2713  }
2714 
2715  if (needs_expr_pushdown || join_node) {
2716  // previous logic cannot cover join_node case well, so use the newly introduced
2717  // push-down expression logic to safely add pre_project node before processing
2718  // window function
2719  std::unordered_map<size_t, size_t> expr_offset_cache;
2720  std::vector<std::unique_ptr<const RexScalar>> scalar_exprs_for_new_project;
2721  std::vector<std::unique_ptr<const RexScalar>> scalar_exprs_for_window_project;
2722  std::vector<std::string> fields_for_window_project;
2723  std::vector<std::string> fields_for_new_project;
2724 
2725  // step 0. create new project node with an empty scalar expr to rebind target
2726  // exprs
2727  std::vector<std::unique_ptr<const RexScalar>> dummy_scalar_exprs;
2728  std::vector<std::string> dummy_fields;
2729  auto new_project =
2730  std::make_shared<RelProject>(dummy_scalar_exprs, dummy_fields, prev_node);
2731 
2732  // step 1 - 2
2733  PushDownGenericExpressionInWindowFunction visitor(new_project,
2734  scalar_exprs_for_new_project,
2735  fields_for_new_project,
2736  expr_offset_cache);
2737  for (size_t i = 0; i < window_func_project_node->size(); ++i) {
2738  auto projected_target = window_func_project_node->getProjectAt(i);
2739  auto new_projection_target = visitor.visit(projected_target);
2740  scalar_exprs_for_window_project.emplace_back(
2741  std::move(new_projection_target.release()));
2742  }
2743  new_project->setExpressions(scalar_exprs_for_new_project);
2744  new_project->setFields(std::move(fields_for_new_project));
2745  bool has_groupby = false;
2746  auto aggregate = std::dynamic_pointer_cast<RelAggregate>(prev_node);
2747  if (aggregate) {
2748  has_groupby = aggregate->getGroupByCount() > 0;
2749  }
2750  // force rowwise output to prevent computing incorrect query result
2751  if (has_groupby && visitor.hasPartitionExpression()) {
2752  // we currently may compute incorrect result with columnar output when
2753  // 1) the window function has partition expression, and
2754  // 2) a parent node of the window function projection node has group by
2755  // expression todo (yoonmin) : relax this
2756  VLOG(1)
2757  << "Query output overridden to row-wise format due to presence of a window "
2758  "function with partition expression and group-by expression.";
2759  new_project->forceRowwiseOutput();
2760  } else if (has_generic_expr_in_window_func) {
2761  VLOG(1) << "Query output overridden to row-wise format due to presence of a "
2762  "generic expression as an input expression of the window "
2763  "function.";
2764  new_project->forceRowwiseOutput();
2765  } else if (visitor.hasCaseExprAsWindowOperand()) {
2766  VLOG(1)
2767  << "Query output overridden to row-wise format due to presence of a window "
2768  "function with a case statement as its operand.";
2769  new_project->forceRowwiseOutput();
2770  }
2771 
2772  // step 3. finalize
2773  propagate_hints_to_new_project(window_func_project_node, new_project, query_hints);
2774  new_project->setPushedDownWindowExpr();
2775  node_list.insert(node_itr, new_project);
2776  window_func_project_node->replaceInput(prev_node, new_project);
2777  window_func_project_node->setExpressions(scalar_exprs_for_window_project);
2778  } else {
2779  // only push rex_inputs listed in the window function down to a new project node
2780  RexInputSet inputs;
2781  RexInputCollector input_collector;
2782  for (size_t i = 0; i < window_func_project_node->size(); i++) {
2783  auto new_inputs =
2784  input_collector.visit(window_func_project_node->getProjectAt(i));
2785  inputs.insert(new_inputs.begin(), new_inputs.end());
2786  }
2787 
2788  // Note: Technically not required since we are mapping old inputs to new input
2789  // indices, but makes the re-mapping of inputs easier to follow.
2790  std::vector<RexInput> sorted_inputs(inputs.begin(), inputs.end());
2791  std::sort(sorted_inputs.begin(),
2792  sorted_inputs.end(),
2793  [](const auto& a, const auto& b) { return a.getIndex() < b.getIndex(); });
2794 
2795  std::vector<std::unique_ptr<const RexScalar>> scalar_exprs;
2796  std::vector<std::string> fields;
2797  std::unordered_map<unsigned, unsigned> old_index_to_new_index;
2798  for (auto& input : sorted_inputs) {
2799  CHECK_EQ(input.getSourceNode(), prev_node.get());
2800  CHECK(old_index_to_new_index
2801  .insert(std::make_pair(input.getIndex(), scalar_exprs.size()))
2802  .second);
2803  scalar_exprs.emplace_back(input.deepCopy());
2804  fields.emplace_back("");
2805  }
2806 
2807  auto new_project = std::make_shared<RelProject>(scalar_exprs, fields, prev_node);
2808  propagate_hints_to_new_project(window_func_project_node, new_project, query_hints);
2809  new_project->setPushedDownWindowExpr();
2810  node_list.insert(node_itr, new_project);
2811  window_func_project_node->replaceInput(
2812  prev_node, new_project, old_index_to_new_index);
2813  }
2814  }
2815  nodes.assign(node_list.begin(), node_list.end());
2816 }
2817 
2818 int64_t get_int_literal_field(const rapidjson::Value& obj,
2819  const char field[],
2820  const int64_t default_val) noexcept {
2821  const auto it = obj.FindMember(field);
2822  if (it == obj.MemberEnd()) {
2823  return default_val;
2824  }
2825  std::unique_ptr<RexLiteral> lit(parse_literal(it->value));
2826  CHECK_EQ(kDECIMAL, lit->getType());
2827  CHECK_EQ(unsigned(0), lit->getScale());
2828  CHECK_EQ(unsigned(0), lit->getTargetScale());
2829  return lit->getVal<int64_t>();
2830 }
2831 
2832 void check_empty_inputs_field(const rapidjson::Value& node) noexcept {
2833  const auto& inputs_json = field(node, "inputs");
2834  CHECK(inputs_json.IsArray() && !inputs_json.Size());
2835 }
2836 
2837 const std::pair<const Catalog_Namespace::Catalog*, const TableDescriptor*>
2838 getCatalogAndTableFromScanNode(const rapidjson::Value& scan_ra) {
2839  const auto& table_json = field(scan_ra, "table");
2840  CHECK(table_json.IsArray());
2841  CHECK_EQ(unsigned(2), table_json.Size());
2842  const auto cat =
2843  Catalog_Namespace::SysCatalog::instance().getCatalog(table_json[0].GetString());
2844  CHECK(cat);
2845  const auto td = cat->getMetadataForTable(table_json[1].GetString());
2846  CHECK(td);
2847  return {cat.get(), td};
2848 }
2849 
2850 std::vector<std::string> getFieldNamesFromScanNode(const rapidjson::Value& scan_ra) {
2851  const auto& fields_json = field(scan_ra, "fieldNames");
2852  return strings_from_json_array(fields_json);
2853 }
2854 
2855 } // namespace
2856 
2858  for (const auto& expr : scalar_exprs_) {
2859  if (is_window_function_operator(expr.get())) {
2860  return true;
2861  }
2862  }
2863  return false;
2864 }
2865 namespace details {
2866 
2868  public:
2870 
2871  std::vector<std::shared_ptr<RelAlgNode>> run(const rapidjson::Value& rels,
2872  RelAlgDag& root_dag) {
2873  for (auto rels_it = rels.Begin(); rels_it != rels.End(); ++rels_it) {
2874  const auto& crt_node = *rels_it;
2875  const auto id = node_id(crt_node);
2876  CHECK_EQ(static_cast<size_t>(id), nodes_.size());
2877  CHECK(crt_node.IsObject());
2878  std::shared_ptr<RelAlgNode> ra_node = nullptr;
2879  const auto rel_op = json_str(field(crt_node, "relOp"));
2880  if (rel_op == std::string("EnumerableTableScan") ||
2881  rel_op == std::string("LogicalTableScan")) {
2882  ra_node = dispatchTableScan(crt_node);
2883  } else if (rel_op == std::string("LogicalProject")) {
2884  ra_node = dispatchProject(crt_node, root_dag);
2885  } else if (rel_op == std::string("LogicalFilter")) {
2886  ra_node = dispatchFilter(crt_node, root_dag);
2887  } else if (rel_op == std::string("LogicalAggregate")) {
2888  ra_node = dispatchAggregate(crt_node);
2889  } else if (rel_op == std::string("LogicalJoin")) {
2890  ra_node = dispatchJoin(crt_node, root_dag);
2891  } else if (rel_op == std::string("LogicalSort")) {
2892  ra_node = dispatchSort(crt_node);
2893  } else if (rel_op == std::string("LogicalValues")) {
2894  ra_node = dispatchLogicalValues(crt_node);
2895  } else if (rel_op == std::string("LogicalTableModify")) {
2896  ra_node = dispatchModify(crt_node);
2897  } else if (rel_op == std::string("LogicalTableFunctionScan")) {
2898  ra_node = dispatchTableFunction(crt_node, root_dag);
2899  } else if (rel_op == std::string("LogicalUnion")) {
2900  ra_node = dispatchUnion(crt_node);
2901  } else {
2902  throw QueryNotSupported(std::string("Node ") + rel_op + " not supported yet");
2903  }
2904  nodes_.push_back(ra_node);
2905  }
2906 
2907  return std::move(nodes_);
2908  }
2909 
2910  private:
2911  std::shared_ptr<RelScan> dispatchTableScan(const rapidjson::Value& scan_ra) {
2912  check_empty_inputs_field(scan_ra);
2913  CHECK(scan_ra.IsObject());
2914  const auto [cat, td] = getCatalogAndTableFromScanNode(scan_ra);
2915  const auto field_names = getFieldNamesFromScanNode(scan_ra);
2916  if (scan_ra.HasMember("hints")) {
2917  auto scan_node = std::make_shared<RelScan>(td, field_names, *cat);
2918  getRelAlgHints(scan_ra, scan_node);
2919  return scan_node;
2920  }
2921  return std::make_shared<RelScan>(td, field_names, *cat);
2922  }
2923 
2924  std::shared_ptr<RelProject> dispatchProject(const rapidjson::Value& proj_ra,
2925  RelAlgDag& root_dag) {
2926  const auto inputs = getRelAlgInputs(proj_ra);
2927  CHECK_EQ(size_t(1), inputs.size());
2928  const auto& exprs_json = field(proj_ra, "exprs");
2929  CHECK(exprs_json.IsArray());
2930  std::vector<std::unique_ptr<const RexScalar>> exprs;
2931  for (auto exprs_json_it = exprs_json.Begin(); exprs_json_it != exprs_json.End();
2932  ++exprs_json_it) {
2933  exprs.emplace_back(parse_scalar_expr(*exprs_json_it, root_dag));
2934  }
2935  const auto& fields = field(proj_ra, "fields");
2936  if (proj_ra.HasMember("hints")) {
2937  auto project_node = std::make_shared<RelProject>(
2938  exprs, strings_from_json_array(fields), inputs.front());
2939  getRelAlgHints(proj_ra, project_node);
2940  return project_node;
2941  }
2942  return std::make_shared<RelProject>(
2943  exprs, strings_from_json_array(fields), inputs.front());
2944  }
2945 
2946  std::shared_ptr<RelFilter> dispatchFilter(const rapidjson::Value& filter_ra,
2947  RelAlgDag& root_dag) {
2948  const auto inputs = getRelAlgInputs(filter_ra);
2949  CHECK_EQ(size_t(1), inputs.size());
2950  const auto id = node_id(filter_ra);
2951  CHECK(id);
2952  auto condition = parse_scalar_expr(field(filter_ra, "condition"), root_dag);
2953  return std::make_shared<RelFilter>(condition, inputs.front());
2954  }
2955 
2956  std::shared_ptr<RelAggregate> dispatchAggregate(const rapidjson::Value& agg_ra) {
2957  const auto inputs = getRelAlgInputs(agg_ra);
2958  CHECK_EQ(size_t(1), inputs.size());
2959  const auto fields = strings_from_json_array(field(agg_ra, "fields"));
2960  const auto group = indices_from_json_array(field(agg_ra, "group"));
2961  for (size_t i = 0; i < group.size(); ++i) {
2962  CHECK_EQ(i, group[i]);
2963  }
2964  if (agg_ra.HasMember("groups") || agg_ra.HasMember("indicator")) {
2965  throw QueryNotSupported("GROUP BY extensions not supported");
2966  }
2967  const auto& aggs_json_arr = field(agg_ra, "aggs");
2968  CHECK(aggs_json_arr.IsArray());
2969  std::vector<std::unique_ptr<const RexAgg>> aggs;
2970  for (auto aggs_json_arr_it = aggs_json_arr.Begin();
2971  aggs_json_arr_it != aggs_json_arr.End();
2972  ++aggs_json_arr_it) {
2973  aggs.emplace_back(parse_aggregate_expr(*aggs_json_arr_it));
2974  }
2975  if (agg_ra.HasMember("hints")) {
2976  auto agg_node =
2977  std::make_shared<RelAggregate>(group.size(), aggs, fields, inputs.front());
2978  getRelAlgHints(agg_ra, agg_node);
2979  return agg_node;
2980  }
2981  return std::make_shared<RelAggregate>(group.size(), aggs, fields, inputs.front());
2982  }
2983 
2984  std::shared_ptr<RelJoin> dispatchJoin(const rapidjson::Value& join_ra,
2985  RelAlgDag& root_dag) {
2986  const auto inputs = getRelAlgInputs(join_ra);
2987  CHECK_EQ(size_t(2), inputs.size());
2988  const auto join_type = to_join_type(json_str(field(join_ra, "joinType")));
2989  auto filter_rex = parse_scalar_expr(field(join_ra, "condition"), root_dag);
2990  if (join_ra.HasMember("hints")) {
2991  auto join_node =
2992  std::make_shared<RelJoin>(inputs[0], inputs[1], filter_rex, join_type);
2993  getRelAlgHints(join_ra, join_node);
2994  return join_node;
2995  }
2996  return std::make_shared<RelJoin>(inputs[0], inputs[1], filter_rex, join_type);
2997  }
2998 
2999  std::shared_ptr<RelSort> dispatchSort(const rapidjson::Value& sort_ra) {
3000  const auto inputs = getRelAlgInputs(sort_ra);
3001  CHECK_EQ(size_t(1), inputs.size());
3002  std::vector<SortField> collation;
3003  const auto& collation_arr = field(sort_ra, "collation");
3004  CHECK(collation_arr.IsArray());
3005  for (auto collation_arr_it = collation_arr.Begin();
3006  collation_arr_it != collation_arr.End();
3007  ++collation_arr_it) {
3008  const size_t field_idx = json_i64(field(*collation_arr_it, "field"));
3009  const auto sort_dir = parse_sort_direction(*collation_arr_it);
3010  const auto null_pos = parse_nulls_position(*collation_arr_it);
3011  collation.emplace_back(field_idx, sort_dir, null_pos);
3012  }
3013  auto limit = get_int_literal_field(sort_ra, "fetch", -1);
3014  const auto offset = get_int_literal_field(sort_ra, "offset", 0);
3015  auto ret = std::make_shared<RelSort>(
3016  collation,
3017  limit >= 0 ? std::make_optional<size_t>(limit) : std::nullopt,
3018  offset,
3019  inputs.front());
3020  return ret;
3021  }
3022 
3023  std::shared_ptr<RelModify> dispatchModify(const rapidjson::Value& logical_modify_ra) {
3024  const auto inputs = getRelAlgInputs(logical_modify_ra);
3025  CHECK_EQ(size_t(1), inputs.size());
3026 
3027  const auto [cat, table_descriptor] =
3028  getCatalogAndTableFromScanNode(logical_modify_ra);
3029  if (table_descriptor->isView) {
3030  throw std::runtime_error("UPDATE of a view is unsupported.");
3031  }
3032 
3033  bool flattened = json_bool(field(logical_modify_ra, "flattened"));
3034  std::string op = json_str(field(logical_modify_ra, "operation"));
3035  RelModify::TargetColumnList target_column_list;
3036 
3037  if (op == "UPDATE") {
3038  const auto& update_columns = field(logical_modify_ra, "updateColumnList");
3039  CHECK(update_columns.IsArray());
3040 
3041  for (auto column_arr_it = update_columns.Begin();
3042  column_arr_it != update_columns.End();
3043  ++column_arr_it) {
3044  target_column_list.push_back(column_arr_it->GetString());
3045  }
3046  }
3047 
3048  auto modify_node = std::make_shared<RelModify>(
3049  *cat, table_descriptor, flattened, op, target_column_list, inputs[0]);
3050  switch (modify_node->getOperation()) {
3052  modify_node->applyDeleteModificationsToInputNode();
3053  break;
3054  }
3056  modify_node->applyUpdateModificationsToInputNode();
3057  break;
3058  }
3059  default:
3060  throw std::runtime_error("Unsupported RelModify operation: " +
3061  json_node_to_string(logical_modify_ra));
3062  }
3063 
3064  return modify_node;
3065  }
3066 
3067  std::shared_ptr<RelTableFunction> dispatchTableFunction(
3068  const rapidjson::Value& table_func_ra,
3069  RelAlgDag& root_dag) {
3070  const auto inputs = getRelAlgInputs(table_func_ra);
3071  const auto& invocation = field(table_func_ra, "invocation");
3072  CHECK(invocation.IsObject());
3073 
3074  const auto& operands = field(invocation, "operands");
3075  CHECK(operands.IsArray());
3076  CHECK_GE(operands.Size(), unsigned(0));
3077 
3078  std::vector<const Rex*> col_inputs;
3079  std::vector<std::unique_ptr<const RexScalar>> table_func_inputs;
3080  std::vector<std::string> fields;
3081 
3082  for (auto exprs_json_it = operands.Begin(); exprs_json_it != operands.End();
3083  ++exprs_json_it) {
3084  const auto& expr_json = *exprs_json_it;
3085  CHECK(expr_json.IsObject());
3086  if (expr_json.HasMember("op")) {
3087  const auto op_str = json_str(field(expr_json, "op"));
3088  if (op_str == "CAST" && expr_json.HasMember("type")) {
3089  const auto& expr_type = field(expr_json, "type");
3090  CHECK(expr_type.IsObject());
3091  CHECK(expr_type.HasMember("type"));
3092  const auto& expr_type_name = json_str(field(expr_type, "type"));
3093  if (expr_type_name == "CURSOR") {
3094  CHECK(expr_json.HasMember("operands"));
3095  const auto& expr_operands = field(expr_json, "operands");
3096  CHECK(expr_operands.IsArray());
3097  if (expr_operands.Size() != 1) {
3098  throw std::runtime_error(
3099  "Table functions currently only support one ResultSet input");
3100  }
3101  auto pos = field(expr_operands[0], "input").GetInt();
3102  CHECK_LT(pos, inputs.size());
3103  for (size_t i = inputs[pos]->size(); i > 0; i--) {
3104  table_func_inputs.emplace_back(
3105  std::make_unique<RexAbstractInput>(col_inputs.size()));
3106  col_inputs.emplace_back(table_func_inputs.back().get());
3107  }
3108  continue;
3109  }
3110  }
3111  }
3112  table_func_inputs.emplace_back(parse_scalar_expr(*exprs_json_it, root_dag));
3113  }
3114 
3115  const auto& op_name = field(invocation, "op");
3116  CHECK(op_name.IsString());
3117 
3118  std::vector<std::unique_ptr<const RexScalar>> table_function_projected_outputs;
3119  const auto& row_types = field(table_func_ra, "rowType");
3120  CHECK(row_types.IsArray());
3121  CHECK_GE(row_types.Size(), unsigned(0));
3122  const auto& row_types_array = row_types.GetArray();
3123  for (size_t i = 0; i < row_types_array.Size(); i++) {
3124  // We don't care about the type information in rowType -- replace each output with
3125  // a reference to be resolved later in the translator
3126  table_function_projected_outputs.emplace_back(std::make_unique<RexRef>(i));
3127  fields.emplace_back("");
3128  }
3129  return std::make_shared<RelTableFunction>(op_name.GetString(),
3130  inputs,
3131  fields,
3132  col_inputs,
3133  table_func_inputs,
3134  table_function_projected_outputs);
3135  }
3136 
3137  std::shared_ptr<RelLogicalValues> dispatchLogicalValues(
3138  const rapidjson::Value& logical_values_ra) {
3139  const auto& tuple_type_arr = field(logical_values_ra, "type");
3140  CHECK(tuple_type_arr.IsArray());
3141  std::vector<TargetMetaInfo> tuple_type;
3142  for (auto tuple_type_arr_it = tuple_type_arr.Begin();
3143  tuple_type_arr_it != tuple_type_arr.End();
3144  ++tuple_type_arr_it) {
3145  auto component_type = parse_type(*tuple_type_arr_it);
3146  const auto component_name = json_str(field(*tuple_type_arr_it, "name"));
3147  if (component_type.is_none_encoded_string()) {
3148  component_type.set_compression(kENCODING_DICT);
3149  component_type.set_comp_param(TRANSIENT_DICT_ID);
3150  component_type.setStringDictKey({TRANSIENT_DICT_DB_ID, TRANSIENT_DICT_ID});
3151  component_type.set_size(4);
3152  }
3153  tuple_type.emplace_back(component_name, component_type);
3154  }
3155  const auto& inputs_arr = field(logical_values_ra, "inputs");
3156  CHECK(inputs_arr.IsArray());
3157  const auto& tuples_arr = field(logical_values_ra, "tuples");
3158  CHECK(tuples_arr.IsArray());
3159 
3160  if (inputs_arr.Size()) {
3161  throw QueryNotSupported("Inputs not supported in logical values yet.");
3162  }
3163 
3164  std::vector<RelLogicalValues::RowValues> values;
3165  if (tuples_arr.Size()) {
3166  for (const auto& row : tuples_arr.GetArray()) {
3167  CHECK(row.IsArray());
3168  const auto values_json = row.GetArray();
3169  if (!values.empty()) {
3170  CHECK_EQ(values[0].size(), values_json.Size());
3171  }
3172  values.emplace_back(RelLogicalValues::RowValues{});
3173  for (const auto& value : values_json) {
3174  CHECK(value.IsObject());
3175  CHECK(value.HasMember("literal"));
3176  values.back().emplace_back(parse_literal(value));
3177  }
3178  }
3179  }
3180 
3181  return std::make_shared<RelLogicalValues>(tuple_type, values);
3182  }
3183 
3184  std::shared_ptr<RelLogicalUnion> dispatchUnion(
3185  const rapidjson::Value& logical_union_ra) {
3186  auto inputs = getRelAlgInputs(logical_union_ra);
3187  auto const& all_type_bool = field(logical_union_ra, "all");
3188  CHECK(all_type_bool.IsBool());
3189  return std::make_shared<RelLogicalUnion>(std::move(inputs), all_type_bool.GetBool());
3190  }
3191 
3192  RelAlgInputs getRelAlgInputs(const rapidjson::Value& node) {
3193  if (node.HasMember("inputs")) {
3194  const auto str_input_ids = strings_from_json_array(field(node, "inputs"));
3195  RelAlgInputs ra_inputs;
3196  for (const auto& str_id : str_input_ids) {
3197  ra_inputs.push_back(nodes_[std::stoi(str_id)]);
3198  }
3199  return ra_inputs;
3200  }
3201  return {prev(node)};
3202  }
3203 
3204  std::pair<std::string, std::string> getKVOptionPair(std::string& str, size_t& pos) {
3205  auto option = str.substr(0, pos);
3206  std::string delim = "=";
3207  size_t delim_pos = option.find(delim);
3208  auto key = option.substr(0, delim_pos);
3209  auto val = option.substr(delim_pos + 1, option.length());
3210  str.erase(0, pos + delim.length() + 1);
3211  return {key, val};
3212  }
3213 
3214  ExplainedQueryHint parseHintString(std::string& hint_string) {
3215  std::string white_space_delim = " ";
3216  int l = hint_string.length();
3217  hint_string = hint_string.erase(0, 1).substr(0, l - 2);
3218  size_t pos = 0;
3219  auto global_hint_checker = [&](const std::string& input_hint_name) -> HintIdentifier {
3220  bool global_hint = false;
3221  std::string hint_name = input_hint_name;
3222  auto global_hint_identifier = hint_name.substr(0, 2);
3223  if (global_hint_identifier.compare("g_") == 0) {
3224  global_hint = true;
3225  hint_name = hint_name.substr(2, hint_string.length());
3226  }
3227  return {global_hint, hint_name};
3228  };
3229  auto parsed_hint =
3230  global_hint_checker(hint_string.substr(0, hint_string.find(white_space_delim)));
3231  auto hint_type = RegisteredQueryHint::translateQueryHint(parsed_hint.hint_name);
3232  if ((pos = hint_string.find("options:")) != std::string::npos) {
3233  // need to parse hint options
3234  std::vector<std::string> tokens;
3235  bool kv_list_op = false;
3236  std::string raw_options = hint_string.substr(pos + 8, hint_string.length() - 2);
3237  if (raw_options.find('{') != std::string::npos) {
3238  kv_list_op = true;
3239  } else {
3240  CHECK(raw_options.find('[') != std::string::npos);
3241  }
3242  auto t1 = raw_options.erase(0, 1);
3243  raw_options = t1.substr(0, t1.length() - 1);
3244  std::string op_delim = ", ";
3245  if (kv_list_op) {
3246  // kv options
3247  std::unordered_map<std::string, std::string> kv_options;
3248  while ((pos = raw_options.find(op_delim)) != std::string::npos) {
3249  auto kv_pair = getKVOptionPair(raw_options, pos);
3250  kv_options.emplace(kv_pair.first, kv_pair.second);
3251  }
3252  // handle the last kv pair
3253  auto kv_pair = getKVOptionPair(raw_options, pos);
3254  kv_options.emplace(kv_pair.first, kv_pair.second);
3255  return {hint_type, parsed_hint.global_hint, false, true, kv_options};
3256  } else {
3257  std::vector<std::string> list_options;
3258  while ((pos = raw_options.find(op_delim)) != std::string::npos) {
3259  list_options.emplace_back(raw_options.substr(0, pos));
3260  raw_options.erase(0, pos + white_space_delim.length() + 1);
3261  }
3262  // handle the last option
3263  list_options.emplace_back(raw_options.substr(0, pos));
3264  return {hint_type, parsed_hint.global_hint, false, false, list_options};
3265  }
3266  } else {
3267  // marker hint: no extra option for this hint
3268  return {hint_type, parsed_hint.global_hint, true, false};
3269  }
3270  }
3271 
3272  void getRelAlgHints(const rapidjson::Value& json_node,
3273  std::shared_ptr<RelAlgNode> node) {
3274  std::string hint_explained = json_str(field(json_node, "hints"));
3275  size_t pos = 0;
3276  std::string delim = "|";
3277  std::vector<std::string> hint_list;
3278  while ((pos = hint_explained.find(delim)) != std::string::npos) {
3279  hint_list.emplace_back(hint_explained.substr(0, pos));
3280  hint_explained.erase(0, pos + delim.length());
3281  }
3282  // handling the last one
3283  hint_list.emplace_back(hint_explained.substr(0, pos));
3284 
3285  const auto agg_node = std::dynamic_pointer_cast<RelAggregate>(node);
3286  if (agg_node) {
3287  for (std::string& hint : hint_list) {
3288  auto parsed_hint = parseHintString(hint);
3289  agg_node->addHint(parsed_hint);
3290  }
3291  }
3292  const auto project_node = std::dynamic_pointer_cast<RelProject>(node);
3293  if (project_node) {
3294  for (std::string& hint : hint_list) {
3295  auto parsed_hint = parseHintString(hint);
3296  project_node->addHint(parsed_hint);
3297  }
3298  }
3299  const auto scan_node = std::dynamic_pointer_cast<RelScan>(node);
3300  if (scan_node) {
3301  for (std::string& hint : hint_list) {
3302  auto parsed_hint = parseHintString(hint);
3303  scan_node->addHint(parsed_hint);
3304  }
3305  }
3306  const auto join_node = std::dynamic_pointer_cast<RelJoin>(node);
3307  if (join_node) {
3308  for (std::string& hint : hint_list) {
3309  auto parsed_hint = parseHintString(hint);
3310  join_node->addHint(parsed_hint);
3311  }
3312  }
3313 
3314  const auto compound_node = std::dynamic_pointer_cast<RelCompound>(node);
3315  if (compound_node) {
3316  for (std::string& hint : hint_list) {
3317  auto parsed_hint = parseHintString(hint);
3318  compound_node->addHint(parsed_hint);
3319  }
3320  }
3321  }
3322 
3323  std::shared_ptr<const RelAlgNode> prev(const rapidjson::Value& crt_node) {
3324  const auto id = node_id(crt_node);
3325  CHECK(id);
3326  CHECK_EQ(static_cast<size_t>(id), nodes_.size());
3327  return nodes_.back();
3328  }
3329 
3330  std::vector<std::shared_ptr<RelAlgNode>> nodes_;
3331 };
3332 
3333 } // namespace details
3334 
3335 std::unique_ptr<RelAlgDag> RelAlgDagBuilder::buildDag(const std::string& query_ra,
3336  const bool optimize_dag) {
3337  rapidjson::Document query_ast;
3338  query_ast.Parse(query_ra.c_str());
3339  VLOG(2) << "Parsing query RA JSON: " << query_ra;
3340  if (query_ast.HasParseError()) {
3341  query_ast.GetParseError();
3342  LOG(ERROR) << "Failed to parse RA tree from Calcite (offset "
3343  << query_ast.GetErrorOffset() << "):\n"
3344  << rapidjson::GetParseError_En(query_ast.GetParseError());
3345  VLOG(1) << "Failed to parse query RA: " << query_ra;
3346  throw std::runtime_error(
3347  "Failed to parse relational algebra tree. Possible query syntax error.");
3348  }
3349  CHECK(query_ast.IsObject());
3351 
3352  return build(query_ast, nullptr, optimize_dag);
3353 }
3354 
3355 std::unique_ptr<RelAlgDag> RelAlgDagBuilder::buildDagForSubquery(
3356  RelAlgDag& root_dag,
3357  const rapidjson::Value& query_ast) {
3358  return build(query_ast, &root_dag, true);
3359 }
3360 
3361 std::unique_ptr<RelAlgDag> RelAlgDagBuilder::build(const rapidjson::Value& query_ast,
3362  RelAlgDag* root_dag,
3363  const bool optimize_dag) {
3364  const auto& rels = field(query_ast, "rels");
3365  CHECK(rels.IsArray());
3366 
3367  auto rel_alg_dag_ptr = std::make_unique<RelAlgDag>();
3368  auto& rel_alg_dag = *rel_alg_dag_ptr;
3369  auto& nodes = getNodes(rel_alg_dag);
3370 
3371  try {
3372  nodes = details::RelAlgDispatcher().run(rels, root_dag ? *root_dag : rel_alg_dag);
3373  } catch (const QueryNotSupported&) {
3374  throw;
3375  }
3376  CHECK(!nodes.empty());
3377  bind_inputs(nodes);
3378 
3380 
3381  if (optimize_dag) {
3382  optimizeDag(rel_alg_dag);
3383  }
3384 
3385  return rel_alg_dag_ptr;
3386 }
3387 
3389  auto const build_state = rel_alg_dag.getBuildState();
3390  if (build_state == RelAlgDag::BuildState::kBuiltOptimized) {
3391  return;
3392  }
3393 
3395  << static_cast<int>(build_state);
3396 
3397  auto& nodes = getNodes(rel_alg_dag);
3398  auto& subqueries = getSubqueries(rel_alg_dag);
3399  auto& query_hints = getQueryHints(rel_alg_dag);
3400 
3401  compute_node_hash(nodes);
3402  handle_query_hint(nodes, rel_alg_dag);
3403  mark_nops(nodes);
3404  simplify_sort(nodes);
3406  eliminate_identical_copy(nodes);
3407  fold_filters(nodes);
3408  std::vector<const RelAlgNode*> filtered_left_deep_joins;
3409  std::vector<const RelAlgNode*> left_deep_joins;
3410  for (const auto& node : nodes) {
3411  const auto left_deep_join_root = get_left_deep_join_root(node);
3412  // The filter which starts a left-deep join pattern must not be coalesced
3413  // since it contains (part of) the join condition.
3414  if (left_deep_join_root) {
3415  left_deep_joins.push_back(left_deep_join_root.get());
3416  if (std::dynamic_pointer_cast<const RelFilter>(left_deep_join_root)) {
3417  filtered_left_deep_joins.push_back(left_deep_join_root.get());
3418  }
3419  }
3420  }
3421  if (filtered_left_deep_joins.empty()) {
3423  }
3424  eliminate_dead_columns(nodes);
3425  eliminate_dead_subqueries(subqueries, nodes.back().get());
3426  separate_window_function_expressions(nodes, query_hints);
3428  nodes,
3429  g_cluster /* always_add_project_if_first_project_is_window_expr */,
3430  query_hints);
3431  coalesce_nodes(nodes, left_deep_joins, query_hints);
3432  CHECK(nodes.back().use_count() == 1);
3433  create_left_deep_join(nodes);
3434  handle_agg_over_join(nodes, query_hints);
3436 
3438 }
3439 
3440 void RelAlgDag::eachNode(std::function<void(RelAlgNode const*)> const& callback) const {
3441  for (auto const& node : nodes_) {
3442  if (node) {
3443  callback(node.get());
3444  }
3445  }
3446 }
3447 
3449  for (auto& node : nodes_) {
3450  if (node) {
3451  node->resetQueryExecutionState();
3452  }
3453  }
3454 }
3455 
3456 // Return tree with depth represented by indentations.
3457 std::string tree_string(const RelAlgNode* ra, const size_t depth) {
3458  std::string result = std::string(2 * depth, ' ') + ::toString(ra) + '\n';
3459  for (size_t i = 0; i < ra->inputCount(); ++i) {
3460  result += tree_string(ra->getInput(i), depth + 1);
3461  }
3462  return result;
3463 }
3464 
3465 std::string RexSubQuery::toString(RelRexToStringConfig config) const {
3466  if (!config.attributes_only) {
3467  return cat(::typeName(this), "(", ra_->toString(config), ")");
3468  } else {
3469  return cat(::typeName(this), "()");
3470  }
3471 }
3472 
3473 std::string RexInput::toString(RelRexToStringConfig config) const {
3474  const auto scan_node = dynamic_cast<const RelScan*>(node_);
3475  if (scan_node) {
3476  auto field_name = scan_node->getFieldName(getIndex());
3477  auto table_name = scan_node->getTableDescriptor()->tableName;
3478  return ::typeName(this) + "(" + table_name + "." + field_name + ")";
3479  }
3480  auto node_id_in_plan = node_->getIdInPlanTree();
3481  auto node_id_str =
3482  node_id_in_plan ? std::to_string(*node_id_in_plan) : std::to_string(node_->getId());
3483  auto node_str = config.skip_input_nodes ? "(input_node_id=" + node_id_str
3484  : "(input_node=" + node_->toString(config);
3485  return cat(::typeName(this), node_str, ", in_index=", std::to_string(getIndex()), ")");
3486 }
3487 
3488 std::string RelCompound::toString(RelRexToStringConfig config) const {
3489  if (!config.attributes_only) {
3490  auto ret = cat(::typeName(this),
3491  ", filter_expr=",
3492  (filter_expr_ ? filter_expr_->toString(config) : "null"),
3493  ", target_exprs=");
3494  for (auto& expr : target_exprs_) {
3495  ret += expr->toString(config) + " ";
3496  }
3497  ret += ", agg_exps=";
3498  for (auto& expr : agg_exprs_) {
3499  ret += expr->toString(config) + " ";
3500  }
3501  ret += ", scalar_sources=";
3502  for (auto& expr : scalar_sources_) {
3503  ret += expr->toString(config) + " ";
3504  }
3505  return cat(ret,
3506  ", ",
3508  ", ",
3509  ", fields=",
3510  ::toString(fields_),
3511  ", is_agg=",
3513  } else {
3514  return cat(::typeName(this),
3515  "(",
3517  ", fields=",
3518  ::toString(fields_),
3519  ", is_agg=",
3521  ")");
3522  }
3523 }
3524 
3525 std::size_t hash_value(RexAbstractInput const& rex_ab_input) {
3526  if (rex_ab_input.hash_) {
3527  return *rex_ab_input.hash_;
3528  }
3529  rex_ab_input.hash_ = typeid(RexAbstractInput).hash_code();
3530  boost::hash_combine(*rex_ab_input.hash_, rex_ab_input.in_index_);
3531  return *rex_ab_input.hash_;
3532 }
3533 
3534 std::size_t hash_value(RexLiteral const& rex_literal) {
3535  if (rex_literal.hash_) {
3536  return *rex_literal.hash_;
3537  }
3538  rex_literal.hash_ = typeid(RexLiteral).hash_code();
3539  boost::apply_visitor(
3540  [&rex_literal](auto&& current_val) {
3541  using T = std::decay_t<decltype(current_val)>;
3542  if constexpr (!std::is_same_v<boost::blank, T>) {
3543  static_assert(std::is_same_v<int64_t, T> || std::is_same_v<double, T> ||
3544  std::is_same_v<std::string, T> || std::is_same_v<bool, T>);
3545  boost::hash_combine(*rex_literal.hash_, current_val);
3546  }
3547  },
3548  rex_literal.literal_);
3549  boost::hash_combine(*rex_literal.hash_, rex_literal.type_);
3550  boost::hash_combine(*rex_literal.hash_, rex_literal.target_type_);
3551  boost::hash_combine(*rex_literal.hash_, rex_literal.scale_);
3552  boost::hash_combine(*rex_literal.hash_, rex_literal.precision_);
3553  boost::hash_combine(*rex_literal.hash_, rex_literal.target_scale_);
3554  boost::hash_combine(*rex_literal.hash_, rex_literal.target_precision_);
3555  return *rex_literal.hash_;
3556 }
3557 
3558 std::size_t hash_value(RexOperator const& rex_op) {
3559  if (rex_op.hash_) {
3560  return *rex_op.hash_;
3561  }
3562  rex_op.hash_ = typeid(RexOperator).hash_code();
3563  boost::hash_combine(*rex_op.hash_, rex_op.op_);
3564  boost::hash_combine(*rex_op.hash_, rex_op.operands_);
3565  boost::hash_combine(*rex_op.hash_, rex_op.getType().get_type_name());
3566  return *rex_op.hash_;
3567 }
3568 
3569 std::size_t hash_value(RexCase const& rex_case) {
3570  if (rex_case.hash_) {
3571  return *rex_case.hash_;
3572  }
3573  rex_case.hash_ = typeid(RexCase).hash_code();
3574  boost::hash_combine(*rex_case.hash_, rex_case.expr_pair_list_);
3575  boost::hash_combine(*rex_case.hash_, rex_case.else_expr_);
3576  return *rex_case.hash_;
3577 }
3578 
3579 std::size_t hash_value(RexFunctionOperator const& rex_op) {
3580  if (rex_op.hash_) {
3581  return *rex_op.hash_;
3582  }
3583  rex_op.hash_ = typeid(RexFunctionOperator).hash_code();
3584  boost::hash_combine(*rex_op.hash_, ::toString(rex_op.op_));
3585  boost::hash_combine(*rex_op.hash_, rex_op.getType().get_type_name());
3586  boost::hash_combine(*rex_op.hash_, rex_op.operands_);
3587  boost::hash_combine(*rex_op.hash_, rex_op.name_);
3588  return *rex_op.hash_;
3589 }
3590 
3591 std::size_t hash_value(SortField const& sort_field) {
3592  auto hash = boost::hash_value(sort_field.field_);
3593  boost::hash_combine(hash, sort_field.sort_dir_ == SortDirection::Ascending ? "a" : "d");
3594  boost::hash_combine(hash,
3595  sort_field.nulls_pos_ == NullSortedPosition::First ? "f" : "l");
3596  return hash;
3597 }
3598 
3599 std::size_t hash_value(RexWindowFunctionOperator const& rex_window) {
3600  if (rex_window.hash_) {
3601  return *rex_window.hash_;
3602  }
3603  rex_window.hash_ = typeid(RexWindowFunctionOperator).hash_code();
3604  boost::hash_combine(*rex_window.hash_, rex_window.getType().get_type_name());
3605  boost::hash_combine(*rex_window.hash_, rex_window.getName());
3606  boost::hash_combine(*rex_window.hash_, rex_window.is_rows_);
3607  boost::hash_combine(*rex_window.hash_, rex_window.collation_);
3608  boost::hash_combine(*rex_window.hash_, rex_window.operands_);
3609  boost::hash_combine(*rex_window.hash_, rex_window.partition_keys_);
3610  boost::hash_combine(*rex_window.hash_, rex_window.order_keys_);
3611  auto get_window_bound_hash =
3613  auto h = boost::hash_value(bound.bound_expr);
3614  boost::hash_combine(h, bound.unbounded);
3615  boost::hash_combine(h, bound.preceding);
3616  boost::hash_combine(h, bound.following);
3617  boost::hash_combine(h, bound.is_current_row);
3618  boost::hash_combine(h, bound.order_key);
3619  return h;
3620  };
3621  boost::hash_combine(*rex_window.hash_,
3622  get_window_bound_hash(rex_window.frame_start_bound_));
3623  boost::hash_combine(*rex_window.hash_,
3624  get_window_bound_hash(rex_window.frame_end_bound_));
3625  return *rex_window.hash_;
3626 }
3627 
3628 std::size_t hash_value(RexRef const& rex_ref) {
3629  if (rex_ref.hash_) {
3630  return *rex_ref.hash_;
3631  }
3632  rex_ref.hash_ = typeid(RexRef).hash_code();
3633  boost::hash_combine(*rex_ref.hash_, rex_ref.index_);
3634  return *rex_ref.hash_;
3635 }
3636 
3637 std::size_t hash_value(RexAgg const& rex_agg) {
3638  if (rex_agg.hash_) {
3639  return *rex_agg.hash_;
3640  }
3641  rex_agg.hash_ = typeid(RexAgg).hash_code();
3642  boost::hash_combine(*rex_agg.hash_, rex_agg.operands_);
3643  boost::hash_combine(*rex_agg.hash_, rex_agg.agg_);
3644  boost::hash_combine(*rex_agg.hash_, rex_agg.distinct_);
3645  boost::hash_combine(*rex_agg.hash_, rex_agg.type_.get_type_name());
3646  return *rex_agg.hash_;
3647 }
3648 
3649 std::size_t hash_value(RexSubQuery const& rex_subq) {
3650  if (rex_subq.hash_) {
3651  return *rex_subq.hash_;
3652  }
3653  rex_subq.hash_ = typeid(RexSubQuery).hash_code();
3654  boost::hash_combine(*rex_subq.hash_, rex_subq.ra_);
3655  return *rex_subq.hash_;
3656 }
3657 
3658 std::size_t hash_value(RexInput const& rex_input) {
3659  if (rex_input.hash_) {
3660  return *rex_input.hash_;
3661  }
3662  rex_input.hash_ = typeid(RexInput).hash_code();
3663  boost::hash_combine(*rex_input.hash_, rex_input.node_);
3664  boost::hash_combine(*rex_input.hash_, rex_input.getIndex());
3665  return *rex_input.hash_;
3666 }
3667 
3668 std::size_t hash_value(RelScan const& rel_scan) {
3669  if (rel_scan.hash_) {
3670  return *rel_scan.hash_;
3671  }
3672  rel_scan.hash_ = typeid(RelScan).hash_code();
3673  boost::hash_combine(*rel_scan.hash_, rel_scan.td_->tableId);
3674  boost::hash_combine(*rel_scan.hash_, rel_scan.td_->tableName);
3675  boost::hash_combine(*rel_scan.hash_, ::toString(rel_scan.field_names_));
3676  return *rel_scan.hash_;
3677 }
3678 
3679 std::size_t hash_value(RelProject const& rel_project) {
3680  if (rel_project.hash_) {
3681  return *rel_project.hash_;
3682  }
3683  rel_project.hash_ = typeid(RelProject).hash_code();
3684  boost::hash_combine(*rel_project.hash_, rel_project.scalar_exprs_);
3685  boost::hash_combine(*rel_project.hash_, rel_project.fields_);
3686  boost::hash_combine(*rel_project.hash_, rel_project.inputs_);
3687  return *rel_project.hash_;
3688 }
3689 
3690 std::size_t hash_value(RelAggregate const& rel_agg) {
3691  if (rel_agg.hash_) {
3692  return *rel_agg.hash_;
3693  }
3694  rel_agg.hash_ = typeid(RelAggregate).hash_code();
3695  boost::hash_combine(*rel_agg.hash_, rel_agg.groupby_count_);
3696  boost::hash_combine(*rel_agg.hash_, rel_agg.agg_exprs_);
3697  boost::hash_combine(*rel_agg.hash_, rel_agg.fields_);
3698  boost::hash_combine(*rel_agg.hash_, rel_agg.inputs_);
3699  return *rel_agg.hash_;
3700 }
3701 
3702 std::size_t hash_value(RelJoin const& rel_join) {
3703  if (rel_join.hash_) {
3704  return *rel_join.hash_;
3705  }
3706  rel_join.hash_ = typeid(RelJoin).hash_code();
3707  boost::hash_combine(*rel_join.hash_, rel_join.condition_);
3708  boost::hash_combine(*rel_join.hash_, rel_join.inputs_);
3709  boost::hash_combine(*rel_join.hash_, ::toString(rel_join.getJoinType()));
3710  return *rel_join.hash_;
3711 }
3712 
3713 std::size_t hash_value(RelTranslatedJoin const& rel_tr_join) {
3714  if (rel_tr_join.hash_) {
3715  return *rel_tr_join.hash_;
3716  }
3717  rel_tr_join.hash_ = typeid(RelTranslatedJoin).hash_code();
3718  boost::hash_combine(*rel_tr_join.hash_, rel_tr_join.lhs_);
3719  boost::hash_combine(*rel_tr_join.hash_, rel_tr_join.rhs_);
3720  boost::hash_combine(*rel_tr_join.hash_, rel_tr_join.outer_join_cond_);
3721  boost::hash_combine(*rel_tr_join.hash_, rel_tr_join.nested_loop_);
3722  boost::hash_combine(*rel_tr_join.hash_, ::toString(rel_tr_join.join_type_));
3723  boost::hash_combine(*rel_tr_join.hash_, rel_tr_join.op_type_);
3724  boost::hash_combine(*rel_tr_join.hash_, rel_tr_join.qualifier_);
3725  boost::hash_combine(*rel_tr_join.hash_, rel_tr_join.op_typeinfo_);
3726  boost::hash_combine(*rel_tr_join.hash_, rel_tr_join.filter_ops_);
3727  return *rel_tr_join.hash_;
3728 }
3729 
3730 std::size_t hash_value(RelFilter const& rel_filter) {
3731  if (rel_filter.hash_) {
3732  return *rel_filter.hash_;
3733  }
3734  rel_filter.hash_ = typeid(RelFilter).hash_code();
3735  boost::hash_combine(*rel_filter.hash_, rel_filter.filter_);
3736  boost::hash_combine(*rel_filter.hash_, rel_filter.inputs_);
3737  return *rel_filter.hash_;
3738 }
3739 
3740 std::size_t hash_value(RelLeftDeepInnerJoin const& rel_join) {
3741  if (rel_join.hash_) {
3742  return *rel_join.hash_;
3743  }
3744  rel_join.hash_ = typeid(RelLeftDeepInnerJoin).hash_code();
3745  boost::hash_combine(*rel_join.hash_, rel_join.condition_);
3746  boost::hash_combine(*rel_join.hash_, rel_join.outer_conditions_per_level_);
3747  boost::hash_combine(*rel_join.hash_, rel_join.original_filter_);
3748  boost::hash_combine(*rel_join.hash_, rel_join.inputs_);
3749  return *rel_join.hash_;
3750 }
3751 
3752 std::size_t hash_value(RelCompound const& rel_compound) {
3753  if (rel_compound.hash_) {
3754  return *rel_compound.hash_;
3755  }
3756  rel_compound.hash_ = typeid(RelCompound).hash_code();
3757  boost::hash_combine(*rel_compound.hash_, rel_compound.filter_expr_);
3758  boost::hash_combine(*rel_compound.hash_, rel_compound.is_agg_);
3759  boost::hash_combine(*rel_compound.hash_, rel_compound.target_exprs_);
3760  boost::hash_combine(*rel_compound.hash_, rel_compound.agg_exprs_);
3761  boost::hash_combine(*rel_compound.hash_, rel_compound.scalar_sources_);
3762  boost::hash_combine(*rel_compound.hash_, rel_compound.groupby_count_);
3763  boost::hash_combine(*rel_compound.hash_, rel_compound.fields_);
3764  boost::hash_combine(*rel_compound.hash_, rel_compound.inputs_);
3765  return *rel_compound.hash_;
3766 }
3767 
3768 std::size_t hash_value(RelSort const& rel_sort) {
3769  if (rel_sort.hash_) {
3770  return *rel_sort.hash_;
3771  }
3772  rel_sort.hash_ = typeid(RelSort).hash_code();
3773  boost::hash_combine(*rel_sort.hash_, rel_sort.collation_);
3774  boost::hash_combine(*rel_sort.hash_, rel_sort.limit_.has_value());
3775  boost::hash_combine(*rel_sort.hash_, rel_sort.limit_.value_or(0));
3776  boost::hash_combine(*rel_sort.hash_, rel_sort.offset_);
3777  boost::hash_combine(*rel_sort.hash_, rel_sort.inputs_);
3778  return *rel_sort.hash_;
3779 }
3780 
3781 std::size_t hash_value(RelModify const& rel_modify) {
3782  if (rel_modify.hash_) {
3783  return *rel_modify.hash_;
3784  }
3785  rel_modify.hash_ = typeid(RelModify).hash_code();
3786  boost::hash_combine(*rel_modify.hash_, rel_modify.table_descriptor_->tableName);
3787  boost::hash_combine(*rel_modify.hash_, rel_modify.table_descriptor_->tableId);
3788  boost::hash_combine(*rel_modify.hash_, rel_modify.flattened_);
3789  boost::hash_combine(*rel_modify.hash_,
3791  boost::hash_combine(*rel_modify.hash_, rel_modify.target_column_list_);
3792  boost::hash_combine(*rel_modify.hash_, rel_modify.inputs_);
3793  return *rel_modify.hash_;
3794 }
3795 
3796 std::size_t hash_value(RelTableFunction const& rel_tf) {
3797  if (rel_tf.hash_) {
3798  return *rel_tf.hash_;
3799  }
3800  rel_tf.hash_ = typeid(RelTableFunction).hash_code();
3801  boost::hash_combine(*rel_tf.hash_, rel_tf.function_name_);
3802  boost::hash_combine(*rel_tf.hash_, rel_tf.table_func_inputs_);
3803  boost::hash_combine(*rel_tf.hash_, rel_tf.target_exprs_);
3804  boost::hash_combine(*rel_tf.hash_, rel_tf.fields_);
3805  boost::hash_combine(*rel_tf.hash_, rel_tf.inputs_);
3806  return *rel_tf.hash_;
3807 }
3808 
3809 std::size_t hash_value(RelLogicalValues const& rel_lv) {
3810  if (rel_lv.hash_) {
3811  return *rel_lv.hash_;
3812  }
3813  rel_lv.hash_ = typeid(RelLogicalValues).hash_code();
3814  for (auto& target_meta_info : rel_lv.tuple_type_) {
3815  boost::hash_combine(*rel_lv.hash_, target_meta_info.get_resname());
3816  boost::hash_combine(*rel_lv.hash_, target_meta_info.get_type_info().get_type_name());
3817  }
3818  return *rel_lv.hash_;
3819 }
3820 
3821 std::size_t hash_value(RelLogicalUnion const& rel_lv) {
3822  if (rel_lv.hash_) {
3823  return *rel_lv.hash_;
3824  }
3825  rel_lv.hash_ = typeid(RelLogicalUnion).hash_code();
3826  boost::hash_combine(*rel_lv.hash_, rel_lv.is_all_);
3827  boost::hash_combine(*rel_lv.hash_, rel_lv.inputs_);
3828  return *rel_lv.hash_;
3829 }
std::vector< std::shared_ptr< const RexScalar > > scalar_exprs_
Definition: RelAlgDag.h:2776
DEVICE auto upper_bound(ARGS &&...args)
Definition: gpu_enabled.h:123
const size_t getGroupByCount() const
Definition: RelAlgDag.h:1508
SQLTypes to_sql_type(const std::string &type_name)
void setGlobalQueryHints(const RegisteredQueryHint &global_hints)
Definition: RelAlgDag.h:3340
std::optional< size_t > is_collected_window_function(size_t rex_hash) const
Definition: RelAlgDag.cpp:2384
NullSortedPosition parse_nulls_position(const rapidjson::Value &collation)
Definition: RelAlgDag.cpp:1210
bool is_agg(const Analyzer::Expr *expr)
std::unique_ptr< const RexScalar > condition_
Definition: RelAlgDag.h:1731
std::unique_ptr< const RexOperator > disambiguate_operator(const RexOperator *rex_operator, const RANodeOutput &ra_output) noexcept
Definition: RelAlgDag.cpp:1419
RexWindowBound frame_start_bound_
Definition: RelAlgDag.h:725
RelCompound(const TableDescriptor *td, const Catalog_Namespace::Catalog *catalog)
Definition: RelAlgDag.h:2021
const RexScalar * getThen(const size_t idx) const
Definition: RelAlgDag.h:440
std::shared_ptr< RelAggregate > dispatchAggregate(const rapidjson::Value &agg_ra)
Definition: RelAlgDag.cpp:2956
#define CHECK_EQ(x, y)
Definition: Logger.h:301
std::shared_ptr< RelFilter > dispatchFilter(const rapidjson::Value &filter_ra, RelAlgDag &root_dag)
Definition: RelAlgDag.cpp:2946
void * visitInput(const RexInput *rex_input) const override
Definition: RelAlgDag.cpp:113
RexRebindReindexInputsVisitor(const RelAlgNode *old_input, const RelAlgNode *new_input, std::unordered_map< unsigned, unsigned > old_to_new_index_map)
Definition: RelAlgDag.cpp:107
void set_notnulls(std::vector< TargetMetaInfo > *tmis0, std::vector< bool > const &notnulls)
Definition: RelAlgDag.cpp:896
std::vector< std::unique_ptr< const RexScalar > > outer_conditions_per_level_
Definition: RelAlgDag.h:2007
void mark_nops(const std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
Definition: RelAlgDag.cpp:1622
std::unique_ptr< RexSubQuery > deepCopy() const
Definition: RelAlgDag.cpp:60
unsigned in_index_
Definition: RelAlgDag.h:188
void replaceInput(std::shared_ptr< const RelAlgNode > old_input, std::shared_ptr< const RelAlgNode > input) override
Definition: RelAlgDag.h:1374
JoinType
Definition: sqldefs.h:174
static std::unordered_map< size_t, std::unordered_map< unsigned, RegisteredQueryHint > > & getQueryHints(RelAlgDag &rel_alg_dag)
Definition: RelAlgDag.h:3385
std::vector< std::unique_ptr< const RexScalar > > table_func_inputs_
Definition: RelAlgDag.h:2645
std::string cat(Ts &&...args)
std::optional< size_t > getOffsetForPushedDownExpr(WindowExprType type, size_t expr_offset) const
Definition: RelAlgDag.cpp:158
RexWindowFuncReplacementVisitor(std::unordered_set< size_t > &collected_window_func_hash, std::vector< std::unique_ptr< const RexScalar >> &new_rex_input_for_window_func, std::unordered_map< size_t, size_t > &window_func_to_new_rex_input_idx_map, RelProject *new_project, std::unordered_map< size_t, std::unique_ptr< const RexInput >> &new_rex_input_from_child_node)
Definition: RelAlgDag.cpp:2279
std::vector< std::unique_ptr< const RexScalar > > parse_window_order_exprs(const rapidjson::Value &arr, RelAlgDag &root_dag)
Definition: RelAlgDag.cpp:1194
void hoist_filter_cond_to_cross_join(std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
std::vector< bool > get_notnulls(std::vector< TargetMetaInfo > const &tmis0)
Definition: RelAlgDag.cpp:882
Definition: sqltypes.h:76
std::vector< std::unique_ptr< const RexScalar > > & scalar_exprs_for_new_project_
Definition: RelAlgDag.cpp:333
size_t size() const override
Definition: RelAlgDag.h:1320
void addHint(const ExplainedQueryHint &hint_explained)
Definition: RelAlgDag.h:2136
std::shared_ptr< const RelAlgNode > get_left_deep_join_root(const std::shared_ptr< RelAlgNode > &node)
std::string tableName
const bool nested_loop_
Definition: RelAlgDag.h:1856
size_t groupby_count_
Definition: RelAlgDag.h:1606
void sink_projected_boolean_expr_to_join(std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
bool input_can_be_coalesced(const RelAlgNode *parent_node, const size_t index, const bool first_rex_is_input)
Definition: RelAlgDag.cpp:1922
void eliminate_redundant_projection(std::vector< std::shared_ptr< RelAlgNode >> &nodes)
Definition: RelAlgDag.cpp:2171
std::string toString(RelRexToStringConfig config=RelRexToStringConfig::defaults()) const override
Definition: RelAlgDag.cpp:3488
void eliminate_identical_copy(std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
RetType visitInput(const RexInput *rex_input) const final
Definition: RelAlgDag.cpp:2302
std::vector< RexInput > RANodeOutput
Definition: RelAlgDag.h:3432
std::unique_ptr< const RexCase > disambiguate_case(const RexCase *rex_case, const RANodeOutput &ra_output)
Definition: RelAlgDag.cpp:1454
const RexScalar * getElse() const
Definition: RelAlgDag.h:445
static thread_local unsigned crt_id_
Definition: RelAlgDag.h:953
NullSortedPosition nulls_pos_
Definition: RelAlgDag.h:571
std::unique_ptr< const RexScalar > visitOperator(const RexOperator *rex_operator) const override
Definition: RelAlgDag.cpp:298
std::string function_name_
Definition: RelAlgDag.h:2639
const RexScalar * outer_join_cond_
Definition: RelAlgDag.h:1855
SqlWindowFunctionKind parse_window_function_kind(const std::string &name)
Definition: RelAlgDag.cpp:1109
std::shared_ptr< RelScan > dispatchTableScan(const rapidjson::Value &scan_ra)
Definition: RelAlgDag.cpp:2911
RelProject(const TableDescriptor *td, const Catalog_Namespace::Catalog *catalog)
Definition: RelAlgDag.h:1261
std::pair< std::shared_ptr< RelLeftDeepInnerJoin >, std::shared_ptr< const RelAlgNode > > create_left_deep_join(const std::shared_ptr< RelAlgNode > &left_deep_join_root)
RexScalar const * copyAndRedirectSource(RexScalar const *, size_t input_idx) const
Definition: RelAlgDag.cpp:944
void replaceInput(std::shared_ptr< const RelAlgNode > old_input, std::shared_ptr< const RelAlgNode > input) override
Definition: RelAlgDag.cpp:686
SQLAgg to_agg_kind(const std::string &agg_name)
std::shared_ptr< RelLogicalUnion > dispatchUnion(const rapidjson::Value &logical_union_ra)
Definition: RelAlgDag.cpp:3184
#define LOG(tag)
Definition: Logger.h:285
std::vector< std::string > TargetColumnList
Definition: RelAlgDag.h:2291
std::unique_ptr< RexCase > parse_case(const rapidjson::Value &expr, RelAlgDag &root_dag)
Definition: RelAlgDag.cpp:1316
const SQLTypeInfo & getType() const
Definition: RelAlgDag.h:378
std::unique_ptr< const RexScalar > get_new_rex_input(size_t rex_idx) const
Definition: RelAlgDag.cpp:2392
size_t size() const
Definition: RelAlgDag.h:364
Hints * getDeliveredHints()
Definition: RelAlgDag.h:1436
std::shared_ptr< RelProject > dispatchProject(const rapidjson::Value &proj_ra, RelAlgDag &root_dag)
Definition: RelAlgDag.cpp:2924
const bool json_bool(const rapidjson::Value &obj) noexcept
Definition: JsonAccessors.h:51
const RexScalar * getOperand(const size_t idx) const
Definition: RelAlgDag.h:366
std::vector< const Rex * > col_inputs_
Definition: RelAlgDag.h:2642
const JoinType join_type_
Definition: RelAlgDag.h:1857
std::string json_node_to_string(const rapidjson::Value &node) noexcept
Definition: RelAlgDag.cpp:978
bool hasEquivCollationOf(const RelSort &that) const
Definition: RelAlgDag.cpp:803
JoinType to_join_type(const std::string &join_type_name)
Definition: RelAlgDag.cpp:1401
void resetQueryExecutionState()
Definition: RelAlgDag.cpp:3448
std::shared_ptr< RelFilter > original_filter_
Definition: RelAlgDag.h:2008
std::pair< bool, bool > need_pushdown_generic_expr(RelProject const *window_func_project_node)
Definition: RelAlgDag.cpp:2612
const std::string json_str(const rapidjson::Value &obj) noexcept
Definition: JsonAccessors.h:46
std::unique_ptr< const RexScalar > else_expr_
Definition: RelAlgDag.h:469
std::vector< std::shared_ptr< RelAlgNode > > nodes_
Definition: RelAlgDag.h:3352
std::string join(T const &container, std::string const &delim)
std::unique_ptr< const RexSubQuery > parse_subquery(const rapidjson::Value &expr, RelAlgDag &root_dag)
Definition: RelAlgDag.cpp:1247
#define UNREACHABLE()
Definition: Logger.h:338
void handle_query_hint(const std::vector< std::shared_ptr< RelAlgNode >> &nodes, RelAlgDag &rel_alg_dag) noexcept
Definition: RelAlgDag.cpp:1570
bool hint_applied_
Definition: RelAlgDag.h:1733
DEVICE void sort(ARGS &&...args)
Definition: gpu_enabled.h:105
#define CHECK_GE(x, y)
Definition: Logger.h:306
SortDirection
Definition: RelAlgDag.h:531
void registerQueryHint(const RelAlgNode *node, const RegisteredQueryHint &query_hint)
Definition: RelAlgDag.h:3298
std::vector< std::string > fields_
Definition: RelAlgDag.h:1460
const std::pair< const Catalog_Namespace::Catalog *, const TableDescriptor * > getCatalogAndTableFromScanNode(const rapidjson::Value &scan_ra)
Definition: RelAlgDag.cpp:2838
void pushDownExpressionInWindowFunction(const RexWindowFunctionOperator *window_expr) const
Definition: RelAlgDag.cpp:179
void addHint(const ExplainedQueryHint &hint_explained)
Definition: RelAlgDag.h:1701
Definition: sqldefs.h:48
std::unique_ptr< const RexScalar > visitCase(const RexCase *rex_case) const override
Definition: RelAlgDag.cpp:279
const RexScalar * getWhen(const size_t idx) const
Definition: RelAlgDag.h:435
std::vector< size_t > indices_from_json_array(const rapidjson::Value &json_idx_arr) noexcept
Definition: RelAlgDag.cpp:1348
const RegisteredQueryHint & getGlobalHints() const
Definition: RelAlgDag.h:3338
#define TRANSIENT_DICT_DB_ID
Definition: DbObjectKeys.h:25
void appendInput(std::string new_field_name, std::unique_ptr< const RexScalar > new_input)
Definition: RelAlgDag.cpp:365
void propagate_hints_to_new_project(std::shared_ptr< RelProject > prev_node, std::shared_ptr< RelProject > new_node, std::unordered_map< size_t, std::unordered_map< unsigned, RegisteredQueryHint >> &query_hints)
Definition: RelAlgDag.cpp:2412
bool isRenamedInput(const RelAlgNode *node, const size_t index, const std::string &new_name)
Definition: RelAlgDag.cpp:470
std::string toString(const QueryDescriptionType &type)
Definition: Types.h:64
std::unique_ptr< const RexScalar > defaultResult() const override
Definition: RelAlgDag.cpp:330
void addHint(const ExplainedQueryHint &hint_explained)
Definition: RelAlgDag.h:1576
const RelAlgNode * rhs_
Definition: RelAlgDag.h:1851
std::unique_ptr< const RexAgg > parse_aggregate_expr(const rapidjson::Value &expr)
Definition: RelAlgDag.cpp:1361
const TableDescriptor * td_
Definition: RelAlgDag.h:1174
std::optional< size_t > getIdInPlanTree() const
Definition: RelAlgDag.h:134
std::unordered_map< size_t, const RexScalar * > & collected_window_func_
Definition: RelAlgDag.cpp:2273
const std::string op_type_
Definition: RelAlgDag.h:1858
#define TRANSIENT_DICT_ID
Definition: DbObjectKeys.h:24
std::vector< std::unique_ptr< const RexScalar > > scalar_sources_
Definition: RelAlgDag.h:2172
void * visitInput(const RexInput *rex_input) const override
Definition: RelAlgDag.cpp:77
std::unique_ptr< RexAbstractInput > parse_abstract_input(const rapidjson::Value &expr) noexcept
Definition: RelAlgDag.cpp:989
static std::vector< std::shared_ptr< RexSubQuery > > & getSubqueries(RelAlgDag &rel_alg_dag)
Definition: RelAlgDag.h:3379
RexInputSet aggregateResult(const RexInputSet &aggregate, const RexInputSet &next_result) const override
Definition: RelAlgDag.cpp:2579
std::vector< std::string > field_names_
Definition: RelAlgDag.h:1175
std::unique_ptr< const RexScalar > disambiguate_rex(const RexScalar *, const RANodeOutput &)
Definition: RelAlgDag.cpp:1475
std::unique_ptr< const RexScalar > visitLiteral(const RexLiteral *rex_literal) const override
Definition: RelAlgDag.cpp:265
bool flattened_
Definition: RelAlgDag.h:2495
std::string to_string(char const *&&v)
void add_window_function_pre_project(std::vector< std::shared_ptr< RelAlgNode >> &nodes, const bool always_add_project_if_first_project_is_window_expr, std::unordered_map< size_t, std::unordered_map< unsigned, RegisteredQueryHint >> &query_hints)
Definition: RelAlgDag.cpp:2650
const std::string getFieldName(const size_t i) const
Definition: RelAlgDag.h:1127
std::unique_ptr< const RexScalar > visitSubQuery(const RexSubQuery *rex_subquery) const override
Definition: RelAlgDag.cpp:274
const std::string qualifier_
Definition: RelAlgDag.h:1859
void simplify_sort(std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
std::vector< SortField > collation_
Definition: RelAlgDag.h:2278
constexpr double a
Definition: Utm.h:32
std::shared_ptr< RelJoin > dispatchJoin(const rapidjson::Value &join_ra, RelAlgDag &root_dag)
Definition: RelAlgDag.cpp:2984
RelLogicalValues()=default
std::vector< std::unique_ptr< const RexScalar > > & new_rex_input_for_window_func_
Definition: RelAlgDag.cpp:2403
std::unordered_set< RexInput > RexInputSet
Definition: RelAlgDag.cpp:2570
This file contains the class specification and related data structures for Catalog.
virtual T visit(const RexScalar *rex_scalar) const
Definition: RexVisitor.h:27
std::string to_string() const
Definition: sqltypes.h:526
const rapidjson::Value & field(const rapidjson::Value &obj, const char field[]) noexcept
Definition: JsonAccessors.h:33
unsigned getIndex() const
Definition: RelAlgDag.h:174
size_t field_
Definition: RelAlgDag.h:569
void separate_window_function_expressions(std::vector< std::shared_ptr< RelAlgNode >> &nodes, std::unordered_map< size_t, std::unordered_map< unsigned, RegisteredQueryHint >> &query_hints)
Definition: RelAlgDag.cpp:2456
void markAsNop()
Definition: RelAlgDag.h:925
static SysCatalog & instance()
Definition: SysCatalog.h:343
bool aggregateResult(const bool &aggregate, const bool &next_result) const final
Definition: RelAlgDag.cpp:1959
SQLOps getOperator() const
Definition: RelAlgDag.h:376
void setExecutionResult(const ExecutionResultShPtr result)
Definition: RelAlgDag.cpp:51
std::shared_ptr< RelTableFunction > dispatchTableFunction(const rapidjson::Value &table_func_ra, RelAlgDag &root_dag)
Definition: RelAlgDag.cpp:3067
std::unordered_map< size_t, std::unique_ptr< const RexInput > > & new_rex_input_from_child_node_
Definition: RelAlgDag.cpp:2408
virtual size_t toHash() const override
Definition: RelAlgDag.h:461
unsigned getId() const
Definition: RelAlgDag.h:869
std::set< std::pair< const RelAlgNode *, int > > get_equiv_cols(const RelAlgNode *node, const size_t which_col)
Definition: RelAlgDag.cpp:764
void create_rex_input_for_new_project_node(RelAlgNode const *node, std::vector< std::unique_ptr< const RexScalar >> &scalar_exprs, std::vector< std::string > &fields)
Definition: RelAlgDag.cpp:2118
std::unique_ptr< RexOperator > parse_operator(const rapidjson::Value &expr, RelAlgDag &root_dag)
Definition: RelAlgDag.cpp:1274
static std::unique_ptr< RelAlgDag > buildDagForSubquery(RelAlgDag &root_dag, const rapidjson::Value &query_ast)
Definition: RelAlgDag.cpp:3355
std::unique_ptr< const RexScalar > visitInput(const RexInput *rex_input) const override
Definition: RelAlgDag.cpp:253
static QueryHint translateQueryHint(const std::string &hint_name)
Definition: QueryHint.h:367
size_t index_
Definition: RelAlgDag.h:762
DEVICE auto copy(ARGS &&...args)
Definition: gpu_enabled.h:51
#define CHECK_NE(x, y)
Definition: Logger.h:302
bool isRenaming() const
Definition: RelAlgDag.cpp:513
void setIndex(const unsigned in_index) const
Definition: RelAlgDag.h:176
std::vector< size_t > operands_
Definition: RelAlgDag.h:821
Hints * getDeliveredHints()
Definition: RelAlgDag.h:1599
void coalesce_nodes(std::vector< std::shared_ptr< RelAlgNode >> &nodes, const std::vector< const RelAlgNode * > &left_deep_joins, std::unordered_map< size_t, std::unordered_map< unsigned, RegisteredQueryHint >> &query_hints)
Definition: RelAlgDag.cpp:2008
std::vector< std::string > fields_
Definition: RelAlgDag.h:2169
SQLOps to_sql_op(const std::string &op_str)
std::unique_ptr< Hints > hints_
Definition: RelAlgDag.h:1610
void set_scale(int s)
Definition: sqltypes.h:473
const int64_t json_i64(const rapidjson::Value &obj) noexcept
Definition: JsonAccessors.h:41
const TableDescriptor * table_descriptor_
Definition: RelAlgDag.h:2494
std::unique_ptr< Hints > hints_
Definition: RelAlgDag.h:1734
std::vector< std::unique_ptr< const RexScalar > > copyRexScalars(std::vector< std::unique_ptr< const RexScalar >> const &scalar_sources)
Definition: RelAlgDag.cpp:627
std::vector< std::shared_ptr< const RelAlgNode >> RelAlgInputs
Definition: RelAlgDag.h:71
std::vector< std::unique_ptr< const RexScalar > > scalar_exprs_
Definition: RelAlgDag.h:1459
RetType visitOperator(const RexOperator *rex_operator) const final
Definition: RelAlgDag.cpp:2326
const RelAlgNode * lhs_
Definition: RelAlgDag.h:1850
const double json_double(const rapidjson::Value &obj) noexcept
Definition: JsonAccessors.h:56
void addHint(const ExplainedQueryHint &hint_explained)
Definition: RelAlgDag.h:1413
size_t branchCount() const
Definition: RelAlgDag.h:433
const RelAlgNode * getInput(const size_t idx) const
Definition: RelAlgDag.h:877
SQLTypeInfo parse_type(const rapidjson::Value &type_obj)
Definition: RelAlgDag.cpp:1080
Checked json field retrieval.
const std::vector< std::shared_ptr< const Analyzer::Expr > > filter_ops_
Definition: RelAlgDag.h:1854
void * visitCase(const RexCase *rex_case) const final
Definition: RelAlgDag.cpp:2227
std::vector< std::shared_ptr< RelAlgNode > > nodes_
Definition: RelAlgDag.cpp:3330
std::unique_ptr< const RexScalar > filter_
Definition: RelAlgDag.h:1945
bool isSimple() const
Definition: RelAlgDag.h:1307
std::vector< const Rex * > remapTargetPointers(std::vector< std::unique_ptr< const RexAgg >> const &agg_exprs_new, std::vector< std::unique_ptr< const RexScalar >> const &scalar_sources_new, std::vector< std::unique_ptr< const RexAgg >> const &agg_exprs_old, std::vector< std::unique_ptr< const RexScalar >> const &scalar_sources_old, std::vector< const Rex * > const &target_exprs_old)
Definition: RelAlgDag.cpp:638
std::vector< std::unique_ptr< const RexScalar > > operands_
Definition: RelAlgDag.h:400
std::optional< size_t > hash_
Definition: RelAlgDag.h:151
std::vector< std::string > fields_
Definition: RelAlgDag.h:2640
std::vector< const Rex * > target_exprs_
Definition: RelAlgDag.h:2174
void bind_inputs(const std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
Definition: RelAlgDag.cpp:1529
std::optional< size_t > hash_
Definition: RelAlgDag.h:947
unsigned getId() const
Definition: RelAlgDag.cpp:64
const RelAlgNode * node_
Definition: RelAlgDag.h:1079
virtual void replaceInput(std::shared_ptr< const RelAlgNode > old_input, std::shared_ptr< const RelAlgNode > input)
Definition: RelAlgDag.h:908
bool find_generic_expr_in_window_func(RexWindowFunctionOperator const *window_expr, bool &has_generic_expr_in_window_func)
Definition: RelAlgDag.cpp:2588
void bind_project_to_input(RelProject *project_node, const RANodeOutput &input) noexcept
Definition: RelAlgDag.cpp:1501
RexInputSet visitInput(const RexInput *input) const override
Definition: RelAlgDag.cpp:2574
static std::string yieldModifyOperationString(ModifyOperation const op)
Definition: RelAlgDag.h:2293
bool distinct_
Definition: RelAlgDag.h:819
std::vector< TargetMetaInfo > getCompatibleMetainfoTypes() const
Definition: RelAlgDag.cpp:911
virtual std::string toString(RelRexToStringConfig config=RelRexToStringConfig::defaults()) const =0
static std::unique_ptr< RelAlgDag > buildDag(const std::string &query_ra, const bool optimize_dag)
Definition: RelAlgDag.cpp:3335
std::string tree_string(const RelAlgNode *ra, const size_t depth)
Definition: RelAlgDag.cpp:3457
std::vector< std::unique_ptr< const RexAgg > > agg_exprs_
Definition: RelAlgDag.h:1607
std::vector< SortField > parse_window_order_collation(const rapidjson::Value &arr, RelAlgDag &root_dag)
Definition: RelAlgDag.cpp:1216
void compute_node_hash(const std::vector< std::shared_ptr< RelAlgNode >> &nodes)
Definition: RelAlgDag.cpp:1608
Hints * getDeliveredHints()
Definition: RelAlgDag.h:2159
SQLAgg agg_
Definition: RelAlgDag.h:818
void setTableFuncInputs(std::vector< std::unique_ptr< const RexScalar >> &&exprs)
Definition: RelAlgDag.cpp:730
void replaceInput(std::shared_ptr< const RelAlgNode > old_input, std::shared_ptr< const RelAlgNode > input) override
Definition: RelAlgDag.cpp:537
PushDownGenericExpressionInWindowFunction(std::shared_ptr< RelProject > new_project, std::vector< std::unique_ptr< const RexScalar >> &scalar_exprs_for_new_project, std::vector< std::string > &fields_for_new_project, std::unordered_map< size_t, size_t > &expr_offset_cache)
Definition: RelAlgDag.cpp:129
const RexScalar * getProjectAt(const size_t idx) const
Definition: RelAlgDag.h:1352
bool hint_applied_
Definition: RelAlgDag.h:1609
size_t offset_
Definition: RelAlgDag.h:2280
std::shared_ptr< Catalog > getCatalog(const std::string &dbName)
#define CHECK_LT(x, y)
Definition: Logger.h:303
Definition: sqltypes.h:79
Definition: sqltypes.h:80
static RegisteredQueryHint defaults()
Definition: QueryHint.h:364
static std::unique_ptr< RelAlgDag > build(const rapidjson::Value &query_ast, RelAlgDag *root_dag, const bool optimize_dag)
Definition: RelAlgDag.cpp:3361
int32_t countRexLiteralArgs() const
Definition: RelAlgDag.cpp:698
std::unique_ptr< Hints > hints_
Definition: RelAlgDag.h:2176
std::vector< const Rex * > reproject_targets(const RelProject *simple_project, const std::vector< const Rex * > &target_exprs) noexcept
Definition: RelAlgDag.cpp:1639
const ConstRexScalarPtrVector & getPartitionKeys() const
Definition: RelAlgDag.h:643
bool allStringCastsAreToDictionaryEncodedStrings() const
Definition: RelAlgDag.cpp:955
std::vector< std::shared_ptr< RelAlgNode > > run(const rapidjson::Value &rels, RelAlgDag &root_dag)
Definition: RelAlgDag.cpp:2871
DEVICE auto lower_bound(ARGS &&...args)
Definition: gpu_enabled.h:78
#define CHECK_LE(x, y)
Definition: Logger.h:304
const std::unordered_map< unsigned, unsigned > mapping_
Definition: RelAlgDag.cpp:122
std::unique_ptr< Hints > hints_
Definition: RelAlgDag.h:1462
int64_t get_int_literal_field(const rapidjson::Value &obj, const char field[], const int64_t default_val) noexcept
Definition: RelAlgDag.cpp:2818
void replaceInput(std::shared_ptr< const RelAlgNode > old_input, std::shared_ptr< const RelAlgNode > input) override
Definition: RelAlgDag.cpp:544
void registerSubquery(std::shared_ptr< RexSubQuery > subquery)
Definition: RelAlgDag.h:2830
std::vector< std::unique_ptr< const RexAgg > > agg_exprs_
Definition: RelAlgDag.h:2168
std::vector< std::unique_ptr< const RexScalar > > parse_expr_array(const rapidjson::Value &arr, RelAlgDag &root_dag)
Definition: RelAlgDag.cpp:1099
std::unique_ptr< const RexScalar > filter_expr_
Definition: RelAlgDag.h:2166
static std::vector< std::shared_ptr< RelAlgNode > > & getNodes(RelAlgDag &rel_alg_dag)
Definition: RelAlgDag.h:3375
void setSourceNode(const RelAlgNode *node) const
Definition: RelAlgDag.h:1061
bool hasWindowFunctionExpr() const
Definition: RelAlgDag.cpp:2857
std::shared_ptr< RelModify > dispatchModify(const rapidjson::Value &logical_modify_ra)
Definition: RelAlgDag.cpp:3023
std::vector< ElementType >::const_iterator Super
Definition: RelAlgDag.cpp:1841
RelFilter()=default
std::vector< std::unique_ptr< const RexAgg > > copyAggExprs(std::vector< std::unique_ptr< const RexAgg >> const &agg_exprs)
Definition: RelAlgDag.cpp:617
std::unique_ptr< RexLiteral > parse_literal(const rapidjson::Value &expr)
Definition: RelAlgDag.cpp:995
ConstRexScalarPtrVector order_keys_
Definition: RelAlgDag.h:723
std::vector< std::string > strings_from_json_array(const rapidjson::Value &json_str_arr) noexcept
Definition: RelAlgDag.cpp:1336
std::unordered_map< QueryHint, ExplainedQueryHint > Hints
Definition: QueryHint.h:390
virtual size_t size() const =0
const RelAlgNode * getSourceNode() const
Definition: RelAlgDag.h:1056
std::vector< SortField > collation_
Definition: RelAlgDag.h:724
RexWindowBound frame_end_bound_
Definition: RelAlgDag.h:726
std::string toString(RelRexToStringConfig config=RelRexToStringConfig::defaults()) const override
Definition: RelAlgDag.cpp:856
SQLTypeInfo type_
Definition: RelAlgDag.h:820
std::string get_type_name() const
Definition: sqltypes.h:482
virtual size_t toHash() const override
Definition: RelAlgDag.h:1074
std::vector< std::pair< std::unique_ptr< const RexScalar >, std::unique_ptr< const RexScalar > > > expr_pair_list_
Definition: RelAlgDag.h:468
std::string typeName(const T *v)
Definition: toString.h:106
ExplainedQueryHint parseHintString(std::string &hint_string)
Definition: RelAlgDag.cpp:3214
SqlWindowFunctionKind
Definition: sqldefs.h:122
void * visitOperator(const RexOperator *rex_operator) const final
Definition: RelAlgDag.cpp:2205
std::unique_ptr< const RexScalar > condition_
Definition: RelAlgDag.h:2006
SortDirection sort_dir_
Definition: RelAlgDag.h:570
Definition: sqldefs.h:52
void eachNode(std::function< void(RelAlgNode const *)> const &) const
Definition: RelAlgDag.cpp:3440
std::string toString(RelRexToStringConfig config=RelRexToStringConfig::defaults()) const override
Definition: RelAlgDag.cpp:3465
std::shared_ptr< RelSort > dispatchSort(const rapidjson::Value &sort_ra)
Definition: RelAlgDag.cpp:2999
std::size_t hash_value(RexAbstractInput const &rex_ab_input)
Definition: RelAlgDag.cpp:3525
const std::vector< std::string > & getFields() const
Definition: RelAlgDag.h:1366
std::unique_ptr< const RexScalar > visitRef(const RexRef *rex_ref) const override
Definition: RelAlgDag.cpp:270
std::string getFieldName(const size_t i) const
Definition: RelAlgDag.cpp:860
static void optimizeDag(RelAlgDag &rel_alg_dag)
Definition: RelAlgDag.cpp:3388
void handle_agg_over_join(std::vector< std::shared_ptr< RelAlgNode >> &nodes, std::unordered_map< size_t, std::unordered_map< unsigned, RegisteredQueryHint >> &query_hints)
Definition: RelAlgDag.cpp:2135
bool g_enable_watchdog false
Definition: Execute.cpp:80
void set_notnull(bool n)
Definition: sqltypes.h:475
ModifyOperation operation_
Definition: RelAlgDag.h:2496
#define CHECK(condition)
Definition: Logger.h:291
const ConstRexScalarPtrVector & getOrderKeys() const
Definition: RelAlgDag.h:653
const std::string op_typeinfo_
Definition: RelAlgDag.h:1860
std::string name_
Definition: RelAlgDag.h:526
std::unique_ptr< const RexScalar > parse_scalar_expr(const rapidjson::Value &expr, RelAlgDag &root_dag)
Definition: RelAlgDag.cpp:1378
RexInputReplacementVisitor(const RelAlgNode *node_to_keep, const std::vector< std::unique_ptr< const RexScalar >> &scalar_sources)
Definition: RelAlgDag.cpp:1659
bool g_enable_union
ConstRexScalarPtrVector partition_keys_
Definition: RelAlgDag.h:722
void create_compound(std::vector< std::shared_ptr< RelAlgNode >> &nodes, const std::vector< size_t > &pattern, std::unordered_map< size_t, std::unordered_map< unsigned, RegisteredQueryHint >> &query_hints) noexcept
Definition: RelAlgDag.cpp:1682
bool g_cluster
std::vector< RexInput > n_outputs(const RelAlgNode *node, const size_t n)
Definition: RelAlgDag.cpp:96
std::shared_ptr< const RelAlgNode > prev(const rapidjson::Value &crt_node)
Definition: RelAlgDag.cpp:3323
void replaceInput(std::shared_ptr< const RelAlgNode > old_input, std::shared_ptr< const RelAlgNode > input) override
Definition: RelAlgDag.cpp:528
void getRelAlgHints(const rapidjson::Value &json_node, std::shared_ptr< RelAlgNode > node)
Definition: RelAlgDag.cpp:3272
SortDirection parse_sort_direction(const rapidjson::Value &collation)
Definition: RelAlgDag.cpp:1204
RexWindowFunctionOperator::RexWindowBound parse_window_bound(const rapidjson::Value &window_bound_obj, RelAlgDag &root_dag)
Definition: RelAlgDag.cpp:1228
Common Enum definitions for SQL processing.
bool is_dict_encoded_string() const
Definition: sqltypes.h:641
Definition: sqltypes.h:72
std::string toString(RelRexToStringConfig config=RelRexToStringConfig::defaults()) const override
Definition: RelAlgDag.cpp:3473
void fold_filters(std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
RexRebindInputsVisitor(const RelAlgNode *old_input, const RelAlgNode *new_input)
Definition: RelAlgDag.cpp:72
std::optional< size_t > limit_
Definition: RelAlgDag.h:2279
std::vector< std::unique_ptr< const RexScalar >> RowValues
Definition: RelAlgDag.h:2656
void bind_table_func_to_input(RelTableFunction *table_func_node, const RANodeOutput &input) noexcept
Definition: RelAlgDag.cpp:1515
const std::string & getName() const
Definition: RelAlgDag.h:506
RetType visitCase(const RexCase *rex_case) const final
Definition: RelAlgDag.cpp:2353
const size_t inputCount() const
Definition: RelAlgDag.h:875
string name
Definition: setup.in.py:72
constexpr double n
Definition: Utm.h:38
void rebind_inputs_from_left_deep_join(const RexScalar *rex, const RelLeftDeepInnerJoin *left_deep_join)
void check_empty_inputs_field(const rapidjson::Value &node) noexcept
Definition: RelAlgDag.cpp:2832
WindowFunctionCollector(std::unordered_map< size_t, const RexScalar * > &collected_window_func, bool only_add_window_expr)
Definition: RelAlgDag.cpp:2197
HOST DEVICE bool get_notnull() const
Definition: sqltypes.h:398
unsigned node_id(const rapidjson::Value &ra_node) noexcept
Definition: RelAlgDag.cpp:973
void eliminate_dead_subqueries(std::vector< std::shared_ptr< RexSubQuery >> &subqueries, RelAlgNode const *root)
size_t size() const override
Definition: RelAlgDag.cpp:852
size_t operator()(const std::pair< const RelAlgNode *, int > &input_col) const
Definition: RelAlgDag.cpp:753
std::unordered_map< size_t, size_t > & window_func_to_new_rex_input_idx_map_
Definition: RelAlgDag.cpp:2405
RelAlgInputs getRelAlgInputs(const rapidjson::Value &node)
Definition: RelAlgDag.cpp:3192
std::vector< std::string > getFieldNamesFromScanNode(const rapidjson::Value &scan_ra)
Definition: RelAlgDag.cpp:2850
SQLOps op_
Definition: RelAlgDag.h:399
RelTableFunction()=default
TargetColumnList target_column_list_
Definition: RelAlgDag.h:2497
std::shared_ptr< RelLogicalValues > dispatchLogicalValues(const rapidjson::Value &logical_values_ra)
Definition: RelAlgDag.cpp:3137
std::vector< std::string > fields_
Definition: RelAlgDag.h:1608
JoinType getJoinType() const
Definition: RelAlgDag.h:1650
DEVICE void swap(ARGS &&...args)
Definition: gpu_enabled.h:114
std::unique_ptr< const RexScalar > RetType
Definition: RexVisitor.h:140
NullSortedPosition
Definition: RelAlgDag.h:533
RANodeOutput get_node_output(const RelAlgNode *ra_node)
Definition: RelAlgDag.cpp:371
virtual size_t toHash() const =0
#define VLOG(n)
Definition: Logger.h:388
BuildState getBuildState() const
Definition: RelAlgDag.h:2805
RelAlgInputs inputs_
Definition: RelAlgDag.h:945
void reset_table_function_inputs(std::vector< const Rex * > &column_inputs, const std::vector< std::unique_ptr< const RexScalar >> &old_table_func_inputs, const std::vector< std::unique_ptr< const RexScalar >> &new_table_func_inputs)
Definition: RelAlgDag.cpp:711
void set_precision(int d)
Definition: sqltypes.h:471
std::vector< TargetMetaInfo > tuple_type_
Definition: RelAlgDag.h:2735
std::pair< std::string, std::string > getKVOptionPair(std::string &str, size_t &pos)
Definition: RelAlgDag.cpp:3204
void eliminate_dead_columns(std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
bool isIdentity() const
Definition: RelAlgDag.cpp:440
bool same_ignoring_notnull(SQLTypeInfo ti0, SQLTypeInfo ti1)
Definition: RelAlgDag.cpp:890
std::vector< std::unique_ptr< const RexScalar > > target_exprs_
Definition: RelAlgDag.h:2648
static void setBuildState(RelAlgDag &rel_alg_dag, const RelAlgDag::BuildState build_state)
Definition: RelAlgDag.h:3389
static void resetRelAlgFirstId() noexcept
Definition: RelAlgDag.cpp:47