OmniSciDB  a987f07e93
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
RelAlgDag.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "RelAlgDag.h"
19 #include "Catalog/Catalog.h"
21 #include "JsonAccessors.h"
22 #include "RelAlgOptimizer.h"
23 #include "RelLeftDeepInnerJoin.h"
24 #include "RexVisitor.h"
25 #include "Shared/sqldefs.h"
26 
27 #include <rapidjson/error/en.h>
28 #include <rapidjson/error/error.h>
29 #include <rapidjson/stringbuffer.h>
30 #include <rapidjson/writer.h>
31 
32 #include <string>
33 #include <unordered_set>
34 
35 extern bool g_cluster;
36 extern bool g_enable_union;
37 
38 namespace {
39 
40 const unsigned FIRST_RA_NODE_ID = 1;
41 
42 } // namespace
43 
44 thread_local unsigned RelAlgNode::crt_id_ = FIRST_RA_NODE_ID;
45 
48 }
49 
51  const std::shared_ptr<const ExecutionResult> result) {
52  auto row_set = result->getRows();
53  CHECK(row_set);
54  CHECK_EQ(size_t(1), row_set->colCount());
55  *(type_.get()) = row_set->getColType(0);
56  (*(result_.get())) = result;
57 }
58 
59 std::unique_ptr<RexSubQuery> RexSubQuery::deepCopy() const {
60  return std::make_unique<RexSubQuery>(type_, result_, ra_->deepCopy());
61 }
62 
63 unsigned RexSubQuery::getId() const {
64  return ra_->getId();
65 }
66 
67 namespace {
68 
69 class RexRebindInputsVisitor : public RexVisitor<void*> {
70  public:
71  RexRebindInputsVisitor(const RelAlgNode* old_input, const RelAlgNode* new_input)
72  : old_input_(old_input), new_input_(new_input) {}
73 
74  virtual ~RexRebindInputsVisitor() = default;
75 
76  void* visitInput(const RexInput* rex_input) const override {
77  const auto old_source = rex_input->getSourceNode();
78  if (old_source == old_input_) {
79  const auto left_deep_join = dynamic_cast<const RelLeftDeepInnerJoin*>(new_input_);
80  if (left_deep_join) {
81  rebind_inputs_from_left_deep_join(rex_input, left_deep_join);
82  return nullptr;
83  }
84  rex_input->setSourceNode(new_input_);
85  }
86  return nullptr;
87  };
88 
89  private:
90  const RelAlgNode* old_input_;
92 };
93 
94 // Creates an output with n columns.
95 std::vector<RexInput> n_outputs(const RelAlgNode* node, const size_t n) {
96  std::vector<RexInput> outputs;
97  outputs.reserve(n);
98  for (size_t i = 0; i < n; ++i) {
99  outputs.emplace_back(node, i);
100  }
101  return outputs;
102 }
103 
105  public:
107  const RelAlgNode* old_input,
108  const RelAlgNode* new_input,
109  std::unordered_map<unsigned, unsigned> old_to_new_index_map)
110  : RexRebindInputsVisitor(old_input, new_input), mapping_(old_to_new_index_map) {}
111 
112  void* visitInput(const RexInput* rex_input) const override {
113  RexRebindInputsVisitor::visitInput(rex_input);
114  auto mapping_itr = mapping_.find(rex_input->getIndex());
115  CHECK(mapping_itr != mapping_.end());
116  rex_input->setIndex(mapping_itr->second);
117  return nullptr;
118  }
119 
120  private:
121  const std::unordered_map<unsigned, unsigned> mapping_;
122 };
123 
125  : public RexVisitorBase<std::unique_ptr<const RexScalar>> {
126  public:
127  enum class WindowExprType { PARTITION_KEY, ORDER_KEY };
129  std::shared_ptr<RelProject> new_project,
130  std::vector<std::unique_ptr<const RexScalar>>& scalar_exprs_for_new_project,
131  std::vector<std::string>& fields_for_new_project,
132  std::unordered_map<size_t, size_t>& expr_offset_cache)
133  : new_project_(new_project)
134  , scalar_exprs_for_new_project_(scalar_exprs_for_new_project)
135  , fields_for_new_project_(fields_for_new_project)
136  , expr_offset_cache_(expr_offset_cache)
137  , found_case_expr_window_operand_(false)
138  , has_partition_expr_(false) {}
139 
140  size_t pushDownExpressionImpl(const RexScalar* expr) const {
141  auto hash = expr->toHash();
142  auto it = expr_offset_cache_.find(hash);
143  auto new_offset = -1;
144  if (it == expr_offset_cache_.end()) {
145  CHECK(
146  expr_offset_cache_.emplace(hash, scalar_exprs_for_new_project_.size()).second);
147  new_offset = scalar_exprs_for_new_project_.size();
148  fields_for_new_project_.emplace_back("");
149  scalar_exprs_for_new_project_.emplace_back(deep_copier_.visit(expr));
150  } else {
151  // we already pushed down the same expression, so reuse it
152  new_offset = it->second;
153  }
154  return new_offset;
155  }
156 
158  size_t expr_offset) const {
159  // given window expr offset and inner expr's offset,
160  // return a (push-downed) expression's offset from the new projection node
161  switch (type) {
162  case WindowExprType::PARTITION_KEY: {
163  auto it = pushed_down_partition_key_offset_.find(expr_offset);
164  CHECK(it != pushed_down_partition_key_offset_.end());
165  return it->second;
166  }
167  case WindowExprType::ORDER_KEY: {
168  auto it = pushed_down_order_key_offset_.find(expr_offset);
169  CHECK(it != pushed_down_order_key_offset_.end());
170  return it->second;
171  }
172  default:
173  UNREACHABLE();
174  return std::nullopt;
175  }
176  }
177 
179  const RexWindowFunctionOperator* window_expr) const {
180  // step 1. push "all" target expressions of the window_func_project_node down to the
181  // new child projection
182  // each window expr is a separate target expression of the projection node
183  // and they have their own inner expression related to partition / order clauses
184  // so we capture their offsets to correctly rebind their input
185  pushed_down_window_operands_offset_.clear();
186  pushed_down_partition_key_offset_.clear();
187  pushed_down_order_key_offset_.clear();
188  for (size_t offset = 0; offset < window_expr->size(); ++offset) {
189  auto expr = window_expr->getOperand(offset);
190  auto literal_expr = dynamic_cast<const RexLiteral*>(expr);
191  auto case_expr = dynamic_cast<const RexCase*>(expr);
192  if (case_expr) {
193  // when columnar output is enabled, pushdown case expr can incur an issue
194  // during columnarization, so we record this and try to force rowwise-output
195  // until we fix the issue
196  // todo (yoonmin) : relax this
197  found_case_expr_window_operand_ = true;
198  }
199  if (!literal_expr) {
200  auto new_offset = pushDownExpressionImpl(expr);
201  pushed_down_window_operands_offset_.emplace(offset, new_offset);
202  }
203  }
204  size_t offset = 0;
205  for (const auto& partition_key : window_expr->getPartitionKeys()) {
206  auto new_offset = pushDownExpressionImpl(partition_key.get());
207  pushed_down_partition_key_offset_.emplace(offset, new_offset);
208  ++offset;
209  }
210  has_partition_expr_ = !window_expr->getPartitionKeys().empty();
211  offset = 0;
212  for (const auto& order_key : window_expr->getOrderKeys()) {
213  auto new_offset = pushDownExpressionImpl(order_key.get());
214  pushed_down_order_key_offset_.emplace(offset, new_offset);
215  ++offset;
216  }
217 
218  // step 2. rebind projected targets of the window_func_project_node with the new
219  // project node
220  std::vector<std::unique_ptr<const RexScalar>> window_operands;
221  auto deconst_window_expr = const_cast<RexWindowFunctionOperator*>(window_expr);
222  for (size_t idx = 0; idx < window_expr->size(); ++idx) {
223  auto it = pushed_down_window_operands_offset_.find(idx);
224  if (it != pushed_down_window_operands_offset_.end()) {
225  auto new_input = std::make_unique<const RexInput>(new_project_.get(), it->second);
226  CHECK(new_input);
227  window_operands.emplace_back(std::move(new_input));
228  } else {
229  auto copied_expr = deep_copier_.visit(window_expr->getOperand(idx));
230  window_operands.emplace_back(std::move(copied_expr));
231  }
232  }
233  deconst_window_expr->replaceOperands(std::move(window_operands));
234 
235  for (size_t idx = 0; idx < window_expr->getPartitionKeys().size(); ++idx) {
236  auto new_offset = getOffsetForPushedDownExpr(WindowExprType::PARTITION_KEY, idx);
237  CHECK(new_offset);
238  auto new_input = std::make_unique<const RexInput>(new_project_.get(), *new_offset);
239  CHECK(new_input);
240  deconst_window_expr->replacePartitionKey(idx, std::move(new_input));
241  }
242 
243  for (size_t idx = 0; idx < window_expr->getOrderKeys().size(); ++idx) {
244  auto new_offset = getOffsetForPushedDownExpr(WindowExprType::ORDER_KEY, idx);
245  CHECK(new_offset);
246  auto new_input = std::make_unique<const RexInput>(new_project_.get(), *new_offset);
247  CHECK(new_input);
248  deconst_window_expr->replaceOrderKey(idx, std::move(new_input));
249  }
250  }
251 
252  std::unique_ptr<const RexScalar> visitInput(const RexInput* rex_input) const override {
253  auto new_offset = pushDownExpressionImpl(rex_input);
254  CHECK_LT(new_offset, scalar_exprs_for_new_project_.size());
255  auto hash = rex_input->toHash();
256  auto it = expr_offset_cache_.find(hash);
257  CHECK(it != expr_offset_cache_.end());
258  CHECK_EQ(new_offset, it->second);
259  auto new_input = std::make_unique<const RexInput>(new_project_.get(), new_offset);
260  CHECK(new_input);
261  return new_input;
262  }
263 
264  std::unique_ptr<const RexScalar> visitLiteral(
265  const RexLiteral* rex_literal) const override {
266  return deep_copier_.visit(rex_literal);
267  }
268 
269  std::unique_ptr<const RexScalar> visitRef(const RexRef* rex_ref) const override {
270  return deep_copier_.visit(rex_ref);
271  }
272 
273  std::unique_ptr<const RexScalar> visitSubQuery(
274  const RexSubQuery* rex_subquery) const override {
275  return deep_copier_.visit(rex_subquery);
276  }
277 
278  std::unique_ptr<const RexScalar> visitCase(const RexCase* rex_case) const override {
279  std::vector<
280  std::pair<std::unique_ptr<const RexScalar>, std::unique_ptr<const RexScalar>>>
281  new_expr_pair_list;
282  std::unique_ptr<const RexScalar> new_else_expr;
283  for (size_t i = 0; i < rex_case->branchCount(); ++i) {
284  const auto when = rex_case->getWhen(i);
285  auto new_when = PushDownGenericExpressionInWindowFunction::visit(when);
286  const auto then = rex_case->getThen(i);
287  auto new_then = PushDownGenericExpressionInWindowFunction::visit(then);
288  new_expr_pair_list.emplace_back(std::move(new_when), std::move(new_then));
289  }
290  if (rex_case->getElse()) {
291  new_else_expr = deep_copier_.visit(rex_case->getElse());
292  }
293  auto new_case = std::make_unique<const RexCase>(new_expr_pair_list, new_else_expr);
294  return new_case;
295  }
296 
297  std::unique_ptr<const RexScalar> visitOperator(
298  const RexOperator* rex_operator) const override {
299  const auto rex_window_func_operator =
300  dynamic_cast<const RexWindowFunctionOperator*>(rex_operator);
301  if (rex_window_func_operator) {
302  pushDownExpressionInWindowFunction(rex_window_func_operator);
303  return deep_copier_.visit(rex_operator);
304  } else {
305  std::unique_ptr<const RexOperator> new_operator{nullptr};
306  std::vector<std::unique_ptr<const RexScalar>> new_operands;
307  for (size_t i = 0; i < rex_operator->size(); ++i) {
308  const auto operand = rex_operator->getOperand(i);
309  auto new_operand = PushDownGenericExpressionInWindowFunction::visit(operand);
310  new_operands.emplace_back(std::move(new_operand));
311  }
312  if (auto function_op = dynamic_cast<const RexFunctionOperator*>(rex_operator)) {
313  new_operator = std::make_unique<const RexFunctionOperator>(
314  function_op->getName(), new_operands, rex_operator->getType());
315  } else {
316  new_operator = std::make_unique<const RexOperator>(
317  rex_operator->getOperator(), new_operands, rex_operator->getType());
318  }
319  CHECK(new_operator);
320  return new_operator;
321  }
322  }
323 
324  bool hasCaseExprAsWindowOperand() { return found_case_expr_window_operand_; }
325 
326  bool hasPartitionExpression() { return has_partition_expr_; }
327 
328  private:
329  std::unique_ptr<const RexScalar> defaultResult() const override { return nullptr; }
330 
331  std::shared_ptr<RelProject> new_project_;
332  std::vector<std::unique_ptr<const RexScalar>>& scalar_exprs_for_new_project_;
333  std::vector<std::string>& fields_for_new_project_;
334  std::unordered_map<size_t, size_t>& expr_offset_cache_;
336  mutable bool has_partition_expr_;
337  mutable std::unordered_map<size_t, size_t> pushed_down_window_operands_offset_;
338  mutable std::unordered_map<size_t, size_t> pushed_down_partition_key_offset_;
339  mutable std::unordered_map<size_t, size_t> pushed_down_order_key_offset_;
341 };
342 
343 } // namespace
344 
346  std::shared_ptr<const RelAlgNode> old_input,
347  std::shared_ptr<const RelAlgNode> input,
348  std::optional<std::unordered_map<unsigned, unsigned>> old_to_new_index_map) {
349  RelAlgNode::replaceInput(old_input, input);
350  std::unique_ptr<RexRebindInputsVisitor> rebind_inputs;
351  if (old_to_new_index_map) {
352  rebind_inputs = std::make_unique<RexRebindReindexInputsVisitor>(
353  old_input.get(), input.get(), *old_to_new_index_map);
354  } else {
355  rebind_inputs =
356  std::make_unique<RexRebindInputsVisitor>(old_input.get(), input.get());
357  }
358  CHECK(rebind_inputs);
359  for (const auto& scalar_expr : scalar_exprs_) {
360  rebind_inputs->visit(scalar_expr.get());
361  }
362 }
363 
364 void RelProject::appendInput(std::string new_field_name,
365  std::unique_ptr<const RexScalar> new_input) {
366  fields_.emplace_back(std::move(new_field_name));
367  scalar_exprs_.emplace_back(std::move(new_input));
368 }
369 
371  const auto scan_node = dynamic_cast<const RelScan*>(ra_node);
372  if (scan_node) {
373  // Scan node has no inputs, output contains all columns in the table.
374  CHECK_EQ(size_t(0), scan_node->inputCount());
375  return n_outputs(scan_node, scan_node->size());
376  }
377  const auto project_node = dynamic_cast<const RelProject*>(ra_node);
378  if (project_node) {
379  // Project output count doesn't depend on the input
380  CHECK_EQ(size_t(1), project_node->inputCount());
381  return n_outputs(project_node, project_node->size());
382  }
383  const auto filter_node = dynamic_cast<const RelFilter*>(ra_node);
384  if (filter_node) {
385  // Filter preserves shape
386  CHECK_EQ(size_t(1), filter_node->inputCount());
387  const auto prev_out = get_node_output(filter_node->getInput(0));
388  return n_outputs(filter_node, prev_out.size());
389  }
390  const auto aggregate_node = dynamic_cast<const RelAggregate*>(ra_node);
391  if (aggregate_node) {
392  // Aggregate output count doesn't depend on the input
393  CHECK_EQ(size_t(1), aggregate_node->inputCount());
394  return n_outputs(aggregate_node, aggregate_node->size());
395  }
396  const auto compound_node = dynamic_cast<const RelCompound*>(ra_node);
397  if (compound_node) {
398  // Compound output count doesn't depend on the input
399  CHECK_EQ(size_t(1), compound_node->inputCount());
400  return n_outputs(compound_node, compound_node->size());
401  }
402  const auto join_node = dynamic_cast<const RelJoin*>(ra_node);
403  if (join_node) {
404  // Join concatenates the outputs from the inputs and the output
405  // directly references the nodes in the input.
406  CHECK_EQ(size_t(2), join_node->inputCount());
407  auto lhs_out =
408  n_outputs(join_node->getInput(0), get_node_output(join_node->getInput(0)).size());
409  const auto rhs_out =
410  n_outputs(join_node->getInput(1), get_node_output(join_node->getInput(1)).size());
411  lhs_out.insert(lhs_out.end(), rhs_out.begin(), rhs_out.end());
412  return lhs_out;
413  }
414  const auto table_func_node = dynamic_cast<const RelTableFunction*>(ra_node);
415  if (table_func_node) {
416  // Table Function output count doesn't depend on the input
417  return n_outputs(table_func_node, table_func_node->size());
418  }
419  const auto sort_node = dynamic_cast<const RelSort*>(ra_node);
420  if (sort_node) {
421  // Sort preserves shape
422  CHECK_EQ(size_t(1), sort_node->inputCount());
423  const auto prev_out = get_node_output(sort_node->getInput(0));
424  return n_outputs(sort_node, prev_out.size());
425  }
426  const auto logical_values_node = dynamic_cast<const RelLogicalValues*>(ra_node);
427  if (logical_values_node) {
428  CHECK_EQ(size_t(0), logical_values_node->inputCount());
429  return n_outputs(logical_values_node, logical_values_node->size());
430  }
431  const auto logical_union_node = dynamic_cast<const RelLogicalUnion*>(ra_node);
432  if (logical_union_node) {
433  return n_outputs(logical_union_node, logical_union_node->size());
434  }
435  LOG(FATAL) << "Unhandled ra_node type: " << ::toString(ra_node);
436  return {};
437 }
438 
440  if (!isSimple()) {
441  return false;
442  }
443  CHECK_EQ(size_t(1), inputCount());
444  const auto source = getInput(0);
445  if (dynamic_cast<const RelJoin*>(source)) {
446  return false;
447  }
448  const auto source_shape = get_node_output(source);
449  if (source_shape.size() != scalar_exprs_.size()) {
450  return false;
451  }
452  for (size_t i = 0; i < scalar_exprs_.size(); ++i) {
453  const auto& scalar_expr = scalar_exprs_[i];
454  const auto input = dynamic_cast<const RexInput*>(scalar_expr.get());
455  CHECK(input);
456  CHECK_EQ(source, input->getSourceNode());
457  // We should add the additional check that input->getIndex() !=
458  // source_shape[i].getIndex(), but Calcite doesn't generate the right
459  // Sort-Project-Sort sequence when joins are involved.
460  if (input->getSourceNode() != source_shape[i].getSourceNode()) {
461  return false;
462  }
463  }
464  return true;
465 }
466 
467 namespace {
468 
469 bool isRenamedInput(const RelAlgNode* node,
470  const size_t index,
471  const std::string& new_name) {
472  CHECK_LT(index, node->size());
473  if (auto join = dynamic_cast<const RelJoin*>(node)) {
474  CHECK_EQ(size_t(2), join->inputCount());
475  const auto lhs_size = join->getInput(0)->size();
476  if (index < lhs_size) {
477  return isRenamedInput(join->getInput(0), index, new_name);
478  }
479  CHECK_GE(index, lhs_size);
480  return isRenamedInput(join->getInput(1), index - lhs_size, new_name);
481  }
482 
483  if (auto scan = dynamic_cast<const RelScan*>(node)) {
484  return new_name != scan->getFieldName(index);
485  }
486 
487  if (auto aggregate = dynamic_cast<const RelAggregate*>(node)) {
488  return new_name != aggregate->getFieldName(index);
489  }
490 
491  if (auto project = dynamic_cast<const RelProject*>(node)) {
492  return new_name != project->getFieldName(index);
493  }
494 
495  if (auto table_func = dynamic_cast<const RelTableFunction*>(node)) {
496  return new_name != table_func->getFieldName(index);
497  }
498 
499  if (auto logical_values = dynamic_cast<const RelLogicalValues*>(node)) {
500  const auto& tuple_type = logical_values->getTupleType();
501  CHECK_LT(index, tuple_type.size());
502  return new_name != tuple_type[index].get_resname();
503  }
504 
505  CHECK(dynamic_cast<const RelSort*>(node) || dynamic_cast<const RelFilter*>(node) ||
506  dynamic_cast<const RelLogicalUnion*>(node));
507  return isRenamedInput(node->getInput(0), index, new_name);
508 }
509 
510 } // namespace
511 
513  if (!isSimple()) {
514  return false;
515  }
516  CHECK_EQ(scalar_exprs_.size(), fields_.size());
517  for (size_t i = 0; i < fields_.size(); ++i) {
518  auto rex_in = dynamic_cast<const RexInput*>(scalar_exprs_[i].get());
519  CHECK(rex_in);
520  if (isRenamedInput(rex_in->getSourceNode(), rex_in->getIndex(), fields_[i])) {
521  return true;
522  }
523  }
524  return false;
525 }
526 
527 void RelJoin::replaceInput(std::shared_ptr<const RelAlgNode> old_input,
528  std::shared_ptr<const RelAlgNode> input) {
529  RelAlgNode::replaceInput(old_input, input);
530  RexRebindInputsVisitor rebind_inputs(old_input.get(), input.get());
531  if (condition_) {
532  rebind_inputs.visit(condition_.get());
533  }
534 }
535 
536 void RelFilter::replaceInput(std::shared_ptr<const RelAlgNode> old_input,
537  std::shared_ptr<const RelAlgNode> input) {
538  RelAlgNode::replaceInput(old_input, input);
539  RexRebindInputsVisitor rebind_inputs(old_input.get(), input.get());
540  rebind_inputs.visit(filter_.get());
541 }
542 
543 void RelCompound::replaceInput(std::shared_ptr<const RelAlgNode> old_input,
544  std::shared_ptr<const RelAlgNode> input) {
545  RelAlgNode::replaceInput(old_input, input);
546  RexRebindInputsVisitor rebind_inputs(old_input.get(), input.get());
547  for (const auto& scalar_source : scalar_sources_) {
548  rebind_inputs.visit(scalar_source.get());
549  }
550  if (filter_expr_) {
551  rebind_inputs.visit(filter_expr_.get());
552  }
553 }
554 
556  : RelAlgNode(rhs)
558  , fields_(rhs.fields_)
559  , hint_applied_(false)
560  , hints_(std::make_unique<Hints>())
561  , has_pushed_down_window_expr_(rhs.has_pushed_down_window_expr_) {
562  RexDeepCopyVisitor copier;
563  for (auto const& expr : rhs.scalar_exprs_) {
564  scalar_exprs_.push_back(copier.visit(expr.get()));
565  }
566  if (rhs.hint_applied_) {
567  for (auto const& kv : *rhs.hints_) {
568  addHint(kv.second);
569  }
570  }
571 }
572 
574  : RelAlgNode(rhs)
575  , tuple_type_(rhs.tuple_type_)
576  , values_(RexDeepCopyVisitor::copy(rhs.values_)) {}
577 
579  RexDeepCopyVisitor copier;
580  filter_ = copier.visit(rhs.filter_.get());
581 }
582 
584  : RelAlgNode(rhs)
585  , groupby_count_(rhs.groupby_count_)
586  , fields_(rhs.fields_)
587  , hint_applied_(false)
588  , hints_(std::make_unique<Hints>()) {
589  agg_exprs_.reserve(rhs.agg_exprs_.size());
590  for (auto const& agg : rhs.agg_exprs_) {
591  agg_exprs_.push_back(agg->deepCopy());
592  }
593  if (rhs.hint_applied_) {
594  for (auto const& kv : *rhs.hints_) {
595  addHint(kv.second);
596  }
597  }
598 }
599 
601  : RelAlgNode(rhs)
602  , join_type_(rhs.join_type_)
603  , hint_applied_(false)
604  , hints_(std::make_unique<Hints>()) {
605  RexDeepCopyVisitor copier;
606  condition_ = copier.visit(rhs.condition_.get());
607  if (rhs.hint_applied_) {
608  for (auto const& kv : *rhs.hints_) {
609  addHint(kv.second);
610  }
611  }
612 }
613 
614 namespace {
615 
616 std::vector<std::unique_ptr<const RexAgg>> copyAggExprs(
617  std::vector<std::unique_ptr<const RexAgg>> const& agg_exprs) {
618  std::vector<std::unique_ptr<const RexAgg>> agg_exprs_copy;
619  agg_exprs_copy.reserve(agg_exprs.size());
620  for (auto const& agg_expr : agg_exprs) {
621  agg_exprs_copy.push_back(agg_expr->deepCopy());
622  }
623  return agg_exprs_copy;
624 }
625 
626 std::vector<std::unique_ptr<const RexScalar>> copyRexScalars(
627  std::vector<std::unique_ptr<const RexScalar>> const& scalar_sources) {
628  std::vector<std::unique_ptr<const RexScalar>> scalar_sources_copy;
629  scalar_sources_copy.reserve(scalar_sources.size());
630  RexDeepCopyVisitor copier;
631  for (auto const& scalar_source : scalar_sources) {
632  scalar_sources_copy.push_back(copier.visit(scalar_source.get()));
633  }
634  return scalar_sources_copy;
635 }
636 
637 std::vector<const Rex*> remapTargetPointers(
638  std::vector<std::unique_ptr<const RexAgg>> const& agg_exprs_new,
639  std::vector<std::unique_ptr<const RexScalar>> const& scalar_sources_new,
640  std::vector<std::unique_ptr<const RexAgg>> const& agg_exprs_old,
641  std::vector<std::unique_ptr<const RexScalar>> const& scalar_sources_old,
642  std::vector<const Rex*> const& target_exprs_old) {
643  std::vector<const Rex*> target_exprs(target_exprs_old);
644  std::unordered_map<const Rex*, const Rex*> old_to_new_target(target_exprs.size());
645  for (size_t i = 0; i < agg_exprs_new.size(); ++i) {
646  old_to_new_target.emplace(agg_exprs_old[i].get(), agg_exprs_new[i].get());
647  }
648  for (size_t i = 0; i < scalar_sources_new.size(); ++i) {
649  old_to_new_target.emplace(scalar_sources_old[i].get(), scalar_sources_new[i].get());
650  }
651  for (auto& target : target_exprs) {
652  auto target_it = old_to_new_target.find(target);
653  CHECK(target_it != old_to_new_target.end());
654  target = target_it->second;
655  }
656  return target_exprs;
657 }
658 
659 } // namespace
660 
662  : RelAlgNode(rhs)
664  , groupby_count_(rhs.groupby_count_)
665  , agg_exprs_(copyAggExprs(rhs.agg_exprs_))
666  , fields_(rhs.fields_)
667  , is_agg_(rhs.is_agg_)
668  , scalar_sources_(copyRexScalars(rhs.scalar_sources_))
669  , target_exprs_(remapTargetPointers(agg_exprs_,
670  scalar_sources_,
671  rhs.agg_exprs_,
672  rhs.scalar_sources_,
673  rhs.target_exprs_))
674  , hint_applied_(false)
675  , hints_(std::make_unique<Hints>()) {
676  RexDeepCopyVisitor copier;
677  filter_expr_ = rhs.filter_expr_ ? copier.visit(rhs.filter_expr_.get()) : nullptr;
678  if (rhs.hint_applied_) {
679  for (auto const& kv : *rhs.hints_) {
680  addHint(kv.second);
681  }
682  }
683 }
684 
685 void RelTableFunction::replaceInput(std::shared_ptr<const RelAlgNode> old_input,
686  std::shared_ptr<const RelAlgNode> input) {
687  RelAlgNode::replaceInput(old_input, input);
688  RexRebindInputsVisitor rebind_inputs(old_input.get(), input.get());
689  for (const auto& target_expr : target_exprs_) {
690  rebind_inputs.visit(target_expr.get());
691  }
692  for (const auto& func_input : table_func_inputs_) {
693  rebind_inputs.visit(func_input.get());
694  }
695 }
696 
698  int32_t literal_args = 0;
699  for (const auto& arg : table_func_inputs_) {
700  const auto rex_literal = dynamic_cast<const RexLiteral*>(arg.get());
701  if (rex_literal) {
702  literal_args += 1;
703  }
704  }
705  return literal_args;
706 }
707 
708 namespace {
709 
711  std::vector<const Rex*>& column_inputs,
712  const std::vector<std::unique_ptr<const RexScalar>>& old_table_func_inputs,
713  const std::vector<std::unique_ptr<const RexScalar>>& new_table_func_inputs) {
714  CHECK_EQ(old_table_func_inputs.size(), new_table_func_inputs.size());
715  std::unordered_map<const Rex*, const Rex*> old_to_new_input;
716  for (size_t i = 0; i < old_table_func_inputs.size(); ++i) {
717  old_to_new_input.emplace(old_table_func_inputs[i].get(),
718  new_table_func_inputs[i].get());
719  }
720  for (auto& target : column_inputs) {
721  auto target_it = old_to_new_input.find(target);
722  CHECK(target_it != old_to_new_input.end());
723  target = target_it->second;
724  }
725 }
726 
727 } // namespace
728 
730  std::vector<std::unique_ptr<const RexScalar>>&& exprs) {
731  // this should only be called in the event of disambiguating inputs, which means the
732  // RexScalar types in the exprs argument should be the exact same as those previously.
733  // So we can then assume all col_inputs_ would be in the same place. So just re-adjust
734  // the pointers.
736  table_func_inputs_ = std::move(exprs);
737 }
738 
740  : RelAlgNode(rhs)
741  , function_name_(rhs.function_name_)
742  , fields_(rhs.fields_)
743  , col_inputs_(rhs.col_inputs_)
744  , table_func_inputs_(copyRexScalars(rhs.table_func_inputs_))
745  , target_exprs_(copyRexScalars(rhs.target_exprs_)) {
747 }
748 
749 namespace std {
750 template <>
751 struct hash<std::pair<const RelAlgNode*, int>> {
752  size_t operator()(const std::pair<const RelAlgNode*, int>& input_col) const {
753  auto ptr_val = reinterpret_cast<const int64_t*>(&input_col.first);
754  auto h = static_cast<size_t>(*ptr_val);
755  boost::hash_combine(h, input_col.second);
756  return h;
757  }
758 };
759 } // namespace std
760 
761 namespace {
762 
763 std::set<std::pair<const RelAlgNode*, int>> get_equiv_cols(const RelAlgNode* node,
764  const size_t which_col) {
765  std::set<std::pair<const RelAlgNode*, int>> work_set;
766  auto walker = node;
767  auto curr_col = which_col;
768  while (true) {
769  work_set.insert(std::make_pair(walker, curr_col));
770  if (dynamic_cast<const RelScan*>(walker) || dynamic_cast<const RelJoin*>(walker)) {
771  break;
772  }
773  CHECK_EQ(size_t(1), walker->inputCount());
774  auto only_source = walker->getInput(0);
775  if (auto project = dynamic_cast<const RelProject*>(walker)) {
776  if (auto input = dynamic_cast<const RexInput*>(project->getProjectAt(curr_col))) {
777  const auto join_source = dynamic_cast<const RelJoin*>(only_source);
778  if (join_source) {
779  CHECK_EQ(size_t(2), join_source->inputCount());
780  auto lhs = join_source->getInput(0);
781  CHECK((input->getIndex() < lhs->size() && lhs == input->getSourceNode()) ||
782  join_source->getInput(1) == input->getSourceNode());
783  } else {
784  CHECK_EQ(input->getSourceNode(), only_source);
785  }
786  curr_col = input->getIndex();
787  } else {
788  break;
789  }
790  } else if (auto aggregate = dynamic_cast<const RelAggregate*>(walker)) {
791  if (curr_col >= aggregate->getGroupByCount()) {
792  break;
793  }
794  }
795  walker = only_source;
796  }
797  return work_set;
798 }
799 
800 } // namespace
801 
802 bool RelSort::hasEquivCollationOf(const RelSort& that) const {
803  if (collation_.size() != that.collation_.size()) {
804  return false;
805  }
806 
807  for (size_t i = 0, e = collation_.size(); i < e; ++i) {
808  auto this_sort_key = collation_[i];
809  auto that_sort_key = that.collation_[i];
810  if (this_sort_key.getSortDir() != that_sort_key.getSortDir()) {
811  return false;
812  }
813  if (this_sort_key.getNullsPosition() != that_sort_key.getNullsPosition()) {
814  return false;
815  }
816  auto this_equiv_keys = get_equiv_cols(this, this_sort_key.getField());
817  auto that_equiv_keys = get_equiv_cols(&that, that_sort_key.getField());
818  std::vector<std::pair<const RelAlgNode*, int>> intersect;
819  std::set_intersection(this_equiv_keys.begin(),
820  this_equiv_keys.end(),
821  that_equiv_keys.begin(),
822  that_equiv_keys.end(),
823  std::back_inserter(intersect));
824  if (intersect.empty()) {
825  return false;
826  }
827  }
828  return true;
829 }
830 
831 // class RelLogicalUnion methods
832 
834  : RelAlgNode(std::move(inputs)), is_all_(is_all) {
835  if (!g_enable_union) {
836  throw QueryNotSupported(
837  "The DEPRECATED enable-union option is set to off. Please remove this option as "
838  "it may be disabled in the future.");
839  }
840  CHECK_LE(2u, inputs_.size());
841  if (!is_all_) {
842  throw QueryNotSupported("UNION without ALL is not supported yet.");
843  }
844 }
845 
846 size_t RelLogicalUnion::size() const {
847  return inputs_.front()->size();
848 }
849 
851  return cat(::typeName(this), "(is_all(", is_all_, "))");
852 }
853 
854 size_t RelLogicalUnion::toHash() const {
855  if (!hash_) {
856  hash_ = typeid(RelLogicalUnion).hash_code();
857  boost::hash_combine(*hash_, is_all_);
858  }
859  return *hash_;
860 }
861 
862 std::string RelLogicalUnion::getFieldName(const size_t i) const {
863  if (auto const* input = dynamic_cast<RelCompound const*>(inputs_[0].get())) {
864  return input->getFieldName(i);
865  } else if (auto const* input = dynamic_cast<RelProject const*>(inputs_[0].get())) {
866  return input->getFieldName(i);
867  } else if (auto const* input = dynamic_cast<RelLogicalUnion const*>(inputs_[0].get())) {
868  return input->getFieldName(i);
869  } else if (auto const* input = dynamic_cast<RelAggregate const*>(inputs_[0].get())) {
870  return input->getFieldName(i);
871  } else if (auto const* input = dynamic_cast<RelScan const*>(inputs_[0].get())) {
872  return input->getFieldName(i);
873  } else if (auto const* input =
874  dynamic_cast<RelTableFunction const*>(inputs_[0].get())) {
875  return input->getFieldName(i);
876  }
877  UNREACHABLE() << "Unhandled input type: " << ::toString(inputs_.front());
878  return {};
879 }
880 
881 namespace {
882 std::vector<bool> get_notnulls(std::vector<TargetMetaInfo> const& tmis0) {
883  std::vector<bool> notnulls(tmis0.size());
884  for (size_t j = 0; j < tmis0.size(); ++j) {
885  notnulls[j] = tmis0[j].get_type_info().get_notnull();
886  }
887  return notnulls;
888 }
889 
891  ti0.set_notnull({}); // Actual value doesn't matter
892  ti1.set_notnull({}); // as long as they are the same.
893  return ti0 == ti1;
894 }
895 
896 void set_notnulls(std::vector<TargetMetaInfo>* tmis0, std::vector<bool> const& notnulls) {
897  for (size_t j = 0; j < tmis0->size(); ++j) {
898  SQLTypeInfo ti = (*tmis0)[j].get_type_info();
899  SQLTypeInfo physical_ti = (*tmis0)[j].get_physical_type_info();
900  ti.set_notnull(notnulls[j]);
901  physical_ti.set_notnull(notnulls[j]);
902  (*tmis0)[j] = TargetMetaInfo((*tmis0)[j].get_resname(), ti, physical_ti);
903  }
904 }
905 } // namespace
906 
907 // The returned std::vector<TargetMetaInfo> is identical to
908 // inputs_[0]->getOutputMetainfo() except for its SQLTypeInfo::notnull values, which is
909 // the logical AND over each input. In other words, the returned columns are notnull iff
910 // all corresponding input columns are notnull.
911 std::vector<TargetMetaInfo> RelLogicalUnion::getCompatibleMetainfoTypes() const {
912  std::vector<TargetMetaInfo> tmis0 = inputs_[0]->getOutputMetainfo();
913  std::vector<bool> notnulls = get_notnulls(tmis0);
914  for (size_t i = 1; i < inputs_.size(); ++i) {
915  std::vector<TargetMetaInfo> const& tmisi = inputs_[i]->getOutputMetainfo();
916  if (tmis0.size() != tmisi.size()) {
917  LOG(INFO) << "tmis0.size()=" << tmis0.size() << " != " << tmisi.size()
918  << "=tmisi.size() for i=" << i;
919  throw std::runtime_error("Subqueries of a UNION must have matching data types.");
920  }
921  for (size_t j = 0; j < tmis0.size(); ++j) {
922  SQLTypeInfo const& ti0 = tmis0[j].get_type_info();
923  SQLTypeInfo const& ti1 = tmisi[j].get_type_info();
924  // Allow types of different nullability to be UNIONed.
925  if (!same_ignoring_notnull(ti0, ti1)) {
926  LOG(INFO) << "Types do not match for UNION:\n tmis0[" << j
927  << "].get_type_info().to_string() = " << ti0.to_string() << "\n tmis"
928  << i << '[' << j
929  << "].get_type_info().to_string() = " << ti1.to_string();
930  // The only permitted difference is when both columns are dictionary-encoded.
931  if (!(ti0.is_dict_encoded_string() && ti1.is_dict_encoded_string())) {
932  throw std::runtime_error(
933  "Subqueries of a UNION must have the exact same data types.");
934  }
935  }
936  notnulls[j] = notnulls[j] && ti1.get_notnull();
937  }
938  }
939  set_notnulls(&tmis0, notnulls); // Set each SQLTypeInfo::notnull to compatible values.
940  return tmis0;
941 }
942 
943 // Rest of code requires a raw pointer, but RexInput object needs to live somewhere.
945  size_t input_idx) const {
946  if (auto const* rex_input_ptr = dynamic_cast<RexInput const*>(rex_scalar)) {
947  RexInput rex_input(*rex_input_ptr);
948  rex_input.setSourceNode(getInput(input_idx));
949  scalar_exprs_.emplace_back(std::make_shared<RexInput const>(std::move(rex_input)));
950  return scalar_exprs_.back().get();
951  }
952  return rex_scalar;
953 }
954 
955 namespace {
956 
957 unsigned node_id(const rapidjson::Value& ra_node) noexcept {
958  const auto& id = field(ra_node, "id");
959  return std::stoi(json_str(id));
960 }
961 
962 std::string json_node_to_string(const rapidjson::Value& node) noexcept {
963  rapidjson::StringBuffer buffer;
964  rapidjson::Writer<rapidjson::StringBuffer> writer(buffer);
965  node.Accept(writer);
966  return buffer.GetString();
967 }
968 
969 // The parse_* functions below de-serialize expressions as they come from Calcite.
970 // RelAlgDagBuilder will take care of making the representation easy to
971 // navigate for lower layers, for example by replacing RexAbstractInput with RexInput.
972 
973 std::unique_ptr<RexAbstractInput> parse_abstract_input(
974  const rapidjson::Value& expr) noexcept {
975  const auto& input = field(expr, "input");
976  return std::unique_ptr<RexAbstractInput>(new RexAbstractInput(json_i64(input)));
977 }
978 
979 std::unique_ptr<RexLiteral> parse_literal(const rapidjson::Value& expr) {
980  CHECK(expr.IsObject());
981  const auto& literal = field(expr, "literal");
982  const auto type = to_sql_type(json_str(field(expr, "type")));
983  const auto target_type = to_sql_type(json_str(field(expr, "target_type")));
984  const auto scale = json_i64(field(expr, "scale"));
985  const auto precision = json_i64(field(expr, "precision"));
986  const auto type_scale = json_i64(field(expr, "type_scale"));
987  const auto type_precision = json_i64(field(expr, "type_precision"));
988  if (literal.IsNull()) {
989  return std::unique_ptr<RexLiteral>(new RexLiteral(target_type));
990  }
991  switch (type) {
992  case kINT:
993  case kBIGINT:
994  case kDECIMAL:
995  case kINTERVAL_DAY_TIME:
997  case kTIME:
998  case kTIMESTAMP:
999  case kDATE:
1000  return std::unique_ptr<RexLiteral>(new RexLiteral(json_i64(literal),
1001  type,
1002  target_type,
1003  scale,
1004  precision,
1005  type_scale,
1006  type_precision));
1007  case kDOUBLE: {
1008  if (literal.IsDouble()) {
1009  return std::unique_ptr<RexLiteral>(new RexLiteral(json_double(literal),
1010  type,
1011  target_type,
1012  scale,
1013  precision,
1014  type_scale,
1015  type_precision));
1016  } else if (literal.IsInt64()) {
1017  return std::make_unique<RexLiteral>(static_cast<double>(literal.GetInt64()),
1018  type,
1019  target_type,
1020  scale,
1021  precision,
1022  type_scale,
1023  type_precision);
1024 
1025  } else if (literal.IsUint64()) {
1026  return std::make_unique<RexLiteral>(static_cast<double>(literal.GetUint64()),
1027  type,
1028  target_type,
1029  scale,
1030  precision,
1031  type_scale,
1032  type_precision);
1033  }
1034  UNREACHABLE() << "Unhandled type: " << literal.GetType();
1035  }
1036  case kTEXT:
1037  return std::unique_ptr<RexLiteral>(new RexLiteral(json_str(literal),
1038  type,
1039  target_type,
1040  scale,
1041  precision,
1042  type_scale,
1043  type_precision));
1044  case kBOOLEAN:
1045  return std::unique_ptr<RexLiteral>(new RexLiteral(json_bool(literal),
1046  type,
1047  target_type,
1048  scale,
1049  precision,
1050  type_scale,
1051  type_precision));
1052  case kNULLT:
1053  return std::unique_ptr<RexLiteral>(new RexLiteral(target_type));
1054  default:
1055  CHECK(false);
1056  }
1057  CHECK(false);
1058  return nullptr;
1059 }
1060 
1061 std::unique_ptr<const RexScalar> parse_scalar_expr(const rapidjson::Value& expr,
1063  RelAlgDag& root_dag);
1064 
1065 SQLTypeInfo parse_type(const rapidjson::Value& type_obj) {
1066  if (type_obj.IsArray()) {
1067  throw QueryNotSupported("Composite types are not currently supported.");
1068  }
1069  CHECK(type_obj.IsObject() && type_obj.MemberCount() >= 2)
1070  << json_node_to_string(type_obj);
1071  const auto type = to_sql_type(json_str(field(type_obj, "type")));
1072  const auto nullable = json_bool(field(type_obj, "nullable"));
1073  const auto precision_it = type_obj.FindMember("precision");
1074  const int precision =
1075  precision_it != type_obj.MemberEnd() ? json_i64(precision_it->value) : 0;
1076  const auto scale_it = type_obj.FindMember("scale");
1077  const int scale = scale_it != type_obj.MemberEnd() ? json_i64(scale_it->value) : 0;
1078  SQLTypeInfo ti(type, !nullable);
1079  ti.set_precision(precision);
1080  ti.set_scale(scale);
1081  return ti;
1082 }
1083 
1084 std::vector<std::unique_ptr<const RexScalar>> parse_expr_array(
1085  const rapidjson::Value& arr,
1087  RelAlgDag& root_dag) {
1088  std::vector<std::unique_ptr<const RexScalar>> exprs;
1089  for (auto it = arr.Begin(); it != arr.End(); ++it) {
1090  exprs.emplace_back(parse_scalar_expr(*it, cat, root_dag));
1091  }
1092  return exprs;
1093 }
1094 
1096  if (name == "ROW_NUMBER") {
1098  }
1099  if (name == "RANK") {
1101  }
1102  if (name == "DENSE_RANK") {
1104  }
1105  if (name == "PERCENT_RANK") {
1107  }
1108  if (name == "CUME_DIST") {
1110  }
1111  if (name == "NTILE") {
1113  }
1114  if (name == "LAG") {
1116  }
1117  if (name == "LAG_IN_FRAME") {
1119  }
1120  if (name == "LEAD") {
1122  }
1123  if (name == "LEAD_IN_FRAME") {
1125  }
1126  if (name == "FIRST_VALUE") {
1128  }
1129  if (name == "LAST_VALUE") {
1131  }
1132  if (name == "NTH_VALUE") {
1134  }
1135  if (name == "AVG") {
1137  }
1138  if (name == "MIN") {
1140  }
1141  if (name == "MAX") {
1143  }
1144  if (name == "SUM") {
1146  }
1147  if (name == "COUNT") {
1149  }
1150  if (name == "COUNT_IF") {
1152  }
1153  if (name == "SUM_IF") {
1155  }
1156  if (name == "$SUM0") {
1158  }
1159  throw std::runtime_error("Unsupported window function: " + name);
1160 }
1161 
1162 std::vector<std::unique_ptr<const RexScalar>> parse_window_order_exprs(
1163  const rapidjson::Value& arr,
1165  RelAlgDag& root_dag) {
1166  std::vector<std::unique_ptr<const RexScalar>> exprs;
1167  for (auto it = arr.Begin(); it != arr.End(); ++it) {
1168  exprs.emplace_back(parse_scalar_expr(field(*it, "field"), cat, root_dag));
1169  }
1170  return exprs;
1171 }
1172 
1173 SortDirection parse_sort_direction(const rapidjson::Value& collation) {
1174  return json_str(field(collation, "direction")) == std::string("DESCENDING")
1177 }
1178 
1179 NullSortedPosition parse_nulls_position(const rapidjson::Value& collation) {
1180  return json_str(field(collation, "nulls")) == std::string("FIRST")
1183 }
1184 
1185 std::vector<SortField> parse_window_order_collation(const rapidjson::Value& arr,
1187  RelAlgDag& root_dag) {
1188  std::vector<SortField> collation;
1189  size_t field_idx = 0;
1190  for (auto it = arr.Begin(); it != arr.End(); ++it, ++field_idx) {
1191  const auto sort_dir = parse_sort_direction(*it);
1192  const auto null_pos = parse_nulls_position(*it);
1193  collation.emplace_back(field_idx, sort_dir, null_pos);
1194  }
1195  return collation;
1196 }
1197 
1199  const rapidjson::Value& window_bound_obj,
1201  RelAlgDag& root_dag) {
1202  CHECK(window_bound_obj.IsObject());
1204  window_bound.unbounded = json_bool(field(window_bound_obj, "unbounded"));
1205  window_bound.preceding = json_bool(field(window_bound_obj, "preceding"));
1206  window_bound.following = json_bool(field(window_bound_obj, "following"));
1207  window_bound.is_current_row = json_bool(field(window_bound_obj, "is_current_row"));
1208  const auto& offset_field = field(window_bound_obj, "offset");
1209  if (offset_field.IsObject()) {
1210  window_bound.bound_expr = parse_scalar_expr(offset_field, cat, root_dag);
1211  } else {
1212  CHECK(offset_field.IsNull());
1213  }
1214  window_bound.order_key = json_i64(field(window_bound_obj, "order_key"));
1215  return window_bound;
1216 }
1217 
1218 std::unique_ptr<const RexSubQuery> parse_subquery(const rapidjson::Value& expr,
1220  RelAlgDag& root_dag) {
1221  const auto& operands = field(expr, "operands");
1222  CHECK(operands.IsArray());
1223  CHECK_GE(operands.Size(), unsigned(0));
1224  const auto& subquery_ast = field(expr, "subquery");
1225 
1226  auto subquery_dag = RelAlgDagBuilder::buildDagForSubquery(root_dag, subquery_ast, cat);
1227  const auto subquery_root_node = subquery_dag->getRootNodeShPtr();
1228  auto subquery = std::make_shared<RexSubQuery>(subquery_root_node);
1229  auto query_hint = subquery_dag->getQueryHint(subquery_dag->getRootNodeShPtr().get());
1230  root_dag.registerSubquery(subquery);
1231  const auto subquery_global_hint = subquery_dag->getGlobalHints();
1232  if (subquery_global_hint.isAnyQueryHintDelivered()) {
1233  // we need to propagate global query hint found in this subquery to its parent
1234  const auto new_global_hint = root_dag.getGlobalHints() || subquery_global_hint;
1235  root_dag.setGlobalQueryHints(new_global_hint);
1236  }
1237  const auto subquery_local_hint = subquery_dag->getQueryHint(subquery_root_node.get());
1238  if (subquery_local_hint) {
1239  // register local query hint of this subquery to its parent to correctly
1240  // enables them when executing this subquery
1241  root_dag.registerQueryHint(subquery_root_node.get(), *subquery_local_hint);
1242  }
1243  return subquery->deepCopy();
1244 }
1245 
1246 std::unique_ptr<RexOperator> parse_operator(const rapidjson::Value& expr,
1248  RelAlgDag& root_dag) {
1249  const auto op_name = json_str(field(expr, "op"));
1250  const bool is_quantifier =
1251  op_name == std::string("PG_ANY") || op_name == std::string("PG_ALL");
1252  const auto op = is_quantifier ? kFUNCTION : to_sql_op(op_name);
1253  const auto& operators_json_arr = field(expr, "operands");
1254  CHECK(operators_json_arr.IsArray());
1255  auto operands = parse_expr_array(operators_json_arr, cat, root_dag);
1256  const auto type_it = expr.FindMember("type");
1257  CHECK(type_it != expr.MemberEnd());
1258  auto ti = parse_type(type_it->value);
1259  if (op == kIN && expr.HasMember("subquery")) {
1260  auto subquery = parse_subquery(expr, cat, root_dag);
1261  operands.emplace_back(std::move(subquery));
1262  }
1263  if (expr.FindMember("partition_keys") != expr.MemberEnd()) {
1264  const auto& partition_keys_arr = field(expr, "partition_keys");
1265  auto partition_keys = parse_expr_array(partition_keys_arr, cat, root_dag);
1266  const auto& order_keys_arr = field(expr, "order_keys");
1267  auto order_keys = parse_window_order_exprs(order_keys_arr, cat, root_dag);
1268  const auto collation = parse_window_order_collation(order_keys_arr, cat, root_dag);
1269  const auto kind = parse_window_function_kind(op_name);
1270  const auto lower_bound =
1271  parse_window_bound(field(expr, "lower_bound"), cat, root_dag);
1272  const auto upper_bound =
1273  parse_window_bound(field(expr, "upper_bound"), cat, root_dag);
1274  bool is_rows = json_bool(field(expr, "is_rows"));
1275  ti.set_notnull(false);
1276  return std::make_unique<RexWindowFunctionOperator>(kind,
1277  operands,
1278  partition_keys,
1279  order_keys,
1280  collation,
1281  lower_bound,
1282  upper_bound,
1283  is_rows,
1284  ti);
1285  }
1286  return std::unique_ptr<RexOperator>(op == kFUNCTION
1287  ? new RexFunctionOperator(op_name, operands, ti)
1288  : new RexOperator(op, operands, ti));
1289 }
1290 
1291 std::unique_ptr<RexCase> parse_case(const rapidjson::Value& expr,
1293  RelAlgDag& root_dag) {
1294  const auto& operands = field(expr, "operands");
1295  CHECK(operands.IsArray());
1296  CHECK_GE(operands.Size(), unsigned(2));
1297  std::unique_ptr<const RexScalar> else_expr;
1298  std::vector<
1299  std::pair<std::unique_ptr<const RexScalar>, std::unique_ptr<const RexScalar>>>
1300  expr_pair_list;
1301  for (auto operands_it = operands.Begin(); operands_it != operands.End();) {
1302  auto when_expr = parse_scalar_expr(*operands_it++, cat, root_dag);
1303  if (operands_it == operands.End()) {
1304  else_expr = std::move(when_expr);
1305  break;
1306  }
1307  auto then_expr = parse_scalar_expr(*operands_it++, cat, root_dag);
1308  expr_pair_list.emplace_back(std::move(when_expr), std::move(then_expr));
1309  }
1310  return std::unique_ptr<RexCase>(new RexCase(expr_pair_list, else_expr));
1311 }
1312 
1313 std::vector<std::string> strings_from_json_array(
1314  const rapidjson::Value& json_str_arr) noexcept {
1315  CHECK(json_str_arr.IsArray());
1316  std::vector<std::string> fields;
1317  for (auto json_str_arr_it = json_str_arr.Begin(); json_str_arr_it != json_str_arr.End();
1318  ++json_str_arr_it) {
1319  CHECK(json_str_arr_it->IsString());
1320  fields.emplace_back(json_str_arr_it->GetString());
1321  }
1322  return fields;
1323 }
1324 
1325 std::vector<size_t> indices_from_json_array(
1326  const rapidjson::Value& json_idx_arr) noexcept {
1327  CHECK(json_idx_arr.IsArray());
1328  std::vector<size_t> indices;
1329  for (auto json_idx_arr_it = json_idx_arr.Begin(); json_idx_arr_it != json_idx_arr.End();
1330  ++json_idx_arr_it) {
1331  CHECK(json_idx_arr_it->IsInt());
1332  CHECK_GE(json_idx_arr_it->GetInt(), 0);
1333  indices.emplace_back(json_idx_arr_it->GetInt());
1334  }
1335  return indices;
1336 }
1337 
1338 std::unique_ptr<const RexAgg> parse_aggregate_expr(const rapidjson::Value& expr) {
1339  const auto agg_str = json_str(field(expr, "agg"));
1340  if (agg_str == "APPROX_QUANTILE") {
1341  LOG(INFO) << "APPROX_QUANTILE is deprecated. Please use APPROX_PERCENTILE instead.";
1342  }
1343  const auto agg = to_agg_kind(agg_str);
1344  const auto distinct = json_bool(field(expr, "distinct"));
1345  const auto agg_ti = parse_type(field(expr, "type"));
1346  const auto operands = indices_from_json_array(field(expr, "operands"));
1347  bool const allow_multiple_args =
1348  shared::is_any<kAPPROX_COUNT_DISTINCT, kAPPROX_QUANTILE, kSUM_IF>(agg);
1349  if (operands.size() > 1 && (operands.size() != 2 || !allow_multiple_args)) {
1350  throw QueryNotSupported("Multiple arguments for aggregates aren't supported");
1351  }
1352  return std::unique_ptr<const RexAgg>(new RexAgg(agg, distinct, agg_ti, operands));
1353 }
1354 
1355 std::unique_ptr<const RexScalar> parse_scalar_expr(const rapidjson::Value& expr,
1357  RelAlgDag& root_dag) {
1358  CHECK(expr.IsObject());
1359  if (expr.IsObject() && expr.HasMember("input")) {
1360  return std::unique_ptr<const RexScalar>(parse_abstract_input(expr));
1361  }
1362  if (expr.IsObject() && expr.HasMember("literal")) {
1363  return std::unique_ptr<const RexScalar>(parse_literal(expr));
1364  }
1365  if (expr.IsObject() && expr.HasMember("op")) {
1366  const auto op_str = json_str(field(expr, "op"));
1367  if (op_str == std::string("CASE")) {
1368  return std::unique_ptr<const RexScalar>(parse_case(expr, cat, root_dag));
1369  }
1370  if (op_str == std::string("$SCALAR_QUERY")) {
1371  return std::unique_ptr<const RexScalar>(parse_subquery(expr, cat, root_dag));
1372  }
1373  return std::unique_ptr<const RexScalar>(parse_operator(expr, cat, root_dag));
1374  }
1375  throw QueryNotSupported("Expression node " + json_node_to_string(expr) +
1376  " not supported");
1377 }
1378 
1379 JoinType to_join_type(const std::string& join_type_name) {
1380  if (join_type_name == "inner") {
1381  return JoinType::INNER;
1382  }
1383  if (join_type_name == "left") {
1384  return JoinType::LEFT;
1385  }
1386  if (join_type_name == "semi") {
1387  return JoinType::SEMI;
1388  }
1389  if (join_type_name == "anti") {
1390  return JoinType::ANTI;
1391  }
1392  throw QueryNotSupported("Join type (" + join_type_name + ") not supported");
1393 }
1394 
1395 std::unique_ptr<const RexScalar> disambiguate_rex(const RexScalar*, const RANodeOutput&);
1396 
1397 std::unique_ptr<const RexOperator> disambiguate_operator(
1398  const RexOperator* rex_operator,
1399  const RANodeOutput& ra_output) noexcept {
1400  std::vector<std::unique_ptr<const RexScalar>> disambiguated_operands;
1401  for (size_t i = 0; i < rex_operator->size(); ++i) {
1402  auto operand = rex_operator->getOperand(i);
1403  if (dynamic_cast<const RexSubQuery*>(operand)) {
1404  disambiguated_operands.emplace_back(rex_operator->getOperandAndRelease(i));
1405  } else {
1406  disambiguated_operands.emplace_back(disambiguate_rex(operand, ra_output));
1407  }
1408  }
1409  const auto rex_window_function_operator =
1410  dynamic_cast<const RexWindowFunctionOperator*>(rex_operator);
1411  if (rex_window_function_operator) {
1412  const auto& partition_keys = rex_window_function_operator->getPartitionKeys();
1413  std::vector<std::unique_ptr<const RexScalar>> disambiguated_partition_keys;
1414  for (const auto& partition_key : partition_keys) {
1415  disambiguated_partition_keys.emplace_back(
1416  disambiguate_rex(partition_key.get(), ra_output));
1417  }
1418  std::vector<std::unique_ptr<const RexScalar>> disambiguated_order_keys;
1419  const auto& order_keys = rex_window_function_operator->getOrderKeys();
1420  for (const auto& order_key : order_keys) {
1421  disambiguated_order_keys.emplace_back(disambiguate_rex(order_key.get(), ra_output));
1422  }
1423  return rex_window_function_operator->disambiguatedOperands(
1424  disambiguated_operands,
1425  disambiguated_partition_keys,
1426  disambiguated_order_keys,
1427  rex_window_function_operator->getCollation());
1428  }
1429  return rex_operator->getDisambiguated(disambiguated_operands);
1430 }
1431 
1432 std::unique_ptr<const RexCase> disambiguate_case(const RexCase* rex_case,
1433  const RANodeOutput& ra_output) {
1434  std::vector<
1435  std::pair<std::unique_ptr<const RexScalar>, std::unique_ptr<const RexScalar>>>
1436  disambiguated_expr_pair_list;
1437  for (size_t i = 0; i < rex_case->branchCount(); ++i) {
1438  auto disambiguated_when = disambiguate_rex(rex_case->getWhen(i), ra_output);
1439  auto disambiguated_then = disambiguate_rex(rex_case->getThen(i), ra_output);
1440  disambiguated_expr_pair_list.emplace_back(std::move(disambiguated_when),
1441  std::move(disambiguated_then));
1442  }
1443  std::unique_ptr<const RexScalar> disambiguated_else{
1444  disambiguate_rex(rex_case->getElse(), ra_output)};
1445  return std::unique_ptr<const RexCase>(
1446  new RexCase(disambiguated_expr_pair_list, disambiguated_else));
1447 }
1448 
1449 // The inputs used by scalar expressions are given as indices in the serialized
1450 // representation of the query. This is hard to navigate; make the relationship
1451 // explicit by creating RexInput expressions which hold a pointer to the source
1452 // relational algebra node and the index relative to the output of that node.
1453 std::unique_ptr<const RexScalar> disambiguate_rex(const RexScalar* rex_scalar,
1454  const RANodeOutput& ra_output) {
1455  const auto rex_abstract_input = dynamic_cast<const RexAbstractInput*>(rex_scalar);
1456  if (rex_abstract_input) {
1457  CHECK_LT(static_cast<size_t>(rex_abstract_input->getIndex()), ra_output.size());
1458  return std::unique_ptr<const RexInput>(
1459  new RexInput(ra_output[rex_abstract_input->getIndex()]));
1460  }
1461  const auto rex_operator = dynamic_cast<const RexOperator*>(rex_scalar);
1462  if (rex_operator) {
1463  return disambiguate_operator(rex_operator, ra_output);
1464  }
1465  const auto rex_case = dynamic_cast<const RexCase*>(rex_scalar);
1466  if (rex_case) {
1467  return disambiguate_case(rex_case, ra_output);
1468  }
1469  if (auto const rex_literal = dynamic_cast<const RexLiteral*>(rex_scalar)) {
1470  return rex_literal->deepCopy();
1471  } else if (auto const rex_subquery = dynamic_cast<const RexSubQuery*>(rex_scalar)) {
1472  return rex_subquery->deepCopy();
1473  } else {
1474  throw QueryNotSupported("Unable to disambiguate expression of type " +
1475  std::string(typeid(*rex_scalar).name()));
1476  }
1477 }
1478 
1479 void bind_project_to_input(RelProject* project_node, const RANodeOutput& input) noexcept {
1480  CHECK_EQ(size_t(1), project_node->inputCount());
1481  std::vector<std::unique_ptr<const RexScalar>> disambiguated_exprs;
1482  for (size_t i = 0; i < project_node->size(); ++i) {
1483  const auto projected_expr = project_node->getProjectAt(i);
1484  if (dynamic_cast<const RexSubQuery*>(projected_expr)) {
1485  disambiguated_exprs.emplace_back(project_node->getProjectAtAndRelease(i));
1486  } else {
1487  disambiguated_exprs.emplace_back(disambiguate_rex(projected_expr, input));
1488  }
1489  }
1490  project_node->setExpressions(disambiguated_exprs);
1491 }
1492 
1494  const RANodeOutput& input) noexcept {
1495  std::vector<std::unique_ptr<const RexScalar>> disambiguated_exprs;
1496  for (size_t i = 0; i < table_func_node->getTableFuncInputsSize(); ++i) {
1497  const auto target_expr = table_func_node->getTableFuncInputAt(i);
1498  if (dynamic_cast<const RexSubQuery*>(target_expr)) {
1499  disambiguated_exprs.emplace_back(table_func_node->getTableFuncInputAtAndRelease(i));
1500  } else {
1501  disambiguated_exprs.emplace_back(disambiguate_rex(target_expr, input));
1502  }
1503  }
1504  table_func_node->setTableFuncInputs(std::move(disambiguated_exprs));
1505 }
1506 
1507 void bind_inputs(const std::vector<std::shared_ptr<RelAlgNode>>& nodes) noexcept {
1508  for (auto ra_node : nodes) {
1509  const auto filter_node = std::dynamic_pointer_cast<RelFilter>(ra_node);
1510  if (filter_node) {
1511  CHECK_EQ(size_t(1), filter_node->inputCount());
1512  auto disambiguated_condition = disambiguate_rex(
1513  filter_node->getCondition(), get_node_output(filter_node->getInput(0)));
1514  filter_node->setCondition(disambiguated_condition);
1515  continue;
1516  }
1517  const auto join_node = std::dynamic_pointer_cast<RelJoin>(ra_node);
1518  if (join_node) {
1519  CHECK_EQ(size_t(2), join_node->inputCount());
1520  auto disambiguated_condition =
1521  disambiguate_rex(join_node->getCondition(), get_node_output(join_node.get()));
1522  join_node->setCondition(disambiguated_condition);
1523  continue;
1524  }
1525  const auto project_node = std::dynamic_pointer_cast<RelProject>(ra_node);
1526  if (project_node) {
1527  bind_project_to_input(project_node.get(),
1528  get_node_output(project_node->getInput(0)));
1529  continue;
1530  }
1531  const auto table_func_node = std::dynamic_pointer_cast<RelTableFunction>(ra_node);
1532  if (table_func_node) {
1533  /*
1534  Collect all inputs from table function input (non-literal)
1535  arguments.
1536  */
1537  RANodeOutput input;
1538  input.reserve(table_func_node->inputCount());
1539  for (size_t i = 0; i < table_func_node->inputCount(); i++) {
1540  auto node_output = get_node_output(table_func_node->getInput(i));
1541  input.insert(input.end(), node_output.begin(), node_output.end());
1542  }
1543  bind_table_func_to_input(table_func_node.get(), input);
1544  }
1545  }
1546 }
1547 
1548 void handle_query_hint(const std::vector<std::shared_ptr<RelAlgNode>>& nodes,
1549  RelAlgDag& rel_alg_dag) noexcept {
1550  // query hint is delivered by the above three nodes
1551  // when a query block has top-sort node, a hint is registered to
1552  // one of the node which locates at the nearest from the sort node
1553  RegisteredQueryHint global_query_hint;
1554  for (auto node : nodes) {
1555  Hints* hint_delivered = nullptr;
1556  const auto agg_node = std::dynamic_pointer_cast<RelAggregate>(node);
1557  if (agg_node) {
1558  if (agg_node->hasDeliveredHint()) {
1559  hint_delivered = agg_node->getDeliveredHints();
1560  }
1561  }
1562  const auto project_node = std::dynamic_pointer_cast<RelProject>(node);
1563  if (project_node) {
1564  if (project_node->hasDeliveredHint()) {
1565  hint_delivered = project_node->getDeliveredHints();
1566  }
1567  }
1568  const auto compound_node = std::dynamic_pointer_cast<RelCompound>(node);
1569  if (compound_node) {
1570  if (compound_node->hasDeliveredHint()) {
1571  hint_delivered = compound_node->getDeliveredHints();
1572  }
1573  }
1574  if (hint_delivered && !hint_delivered->empty()) {
1575  rel_alg_dag.registerQueryHints(node, hint_delivered, global_query_hint);
1576  }
1577  }
1578  // the current rel_alg_dag may contain global query hints from the subquery
1579  // so we combine the current global hint we collected with the original one together
1580  // to propagate global query hints correctly
1581  const auto existing_global_query_hints = rel_alg_dag.getGlobalHints();
1582  const auto new_global_query_hints = existing_global_query_hints || global_query_hint;
1583  rel_alg_dag.setGlobalQueryHints(new_global_query_hints);
1584 }
1585 
1586 void compute_node_hash(const std::vector<std::shared_ptr<RelAlgNode>>& nodes) {
1587  // compute each rel node's hash value in advance to avoid inconsistency of their hash
1588  // values depending on the toHash's caller
1589  // specifically, we manipulate our logical query plan before retrieving query step
1590  // sequence but once we compute a hash value we cached it so there is no way to update
1591  // it after the plan has been changed starting from the top node, we compute the hash
1592  // value (top-down manner)
1593  std::for_each(
1594  nodes.rbegin(), nodes.rend(), [](const std::shared_ptr<RelAlgNode>& node) {
1595  auto node_hash = node->toHash();
1596  CHECK_NE(node_hash, static_cast<size_t>(0));
1597  });
1598 }
1599 
1600 void mark_nops(const std::vector<std::shared_ptr<RelAlgNode>>& nodes) noexcept {
1601  for (auto node : nodes) {
1602  const auto agg_node = std::dynamic_pointer_cast<RelAggregate>(node);
1603  if (!agg_node || agg_node->getAggExprsCount()) {
1604  continue;
1605  }
1606  CHECK_EQ(size_t(1), node->inputCount());
1607  const auto agg_input_node = dynamic_cast<const RelAggregate*>(node->getInput(0));
1608  if (agg_input_node && !agg_input_node->getAggExprsCount() &&
1609  agg_node->getGroupByCount() == agg_input_node->getGroupByCount()) {
1610  agg_node->markAsNop();
1611  }
1612  }
1613 }
1614 
1615 namespace {
1616 
1617 std::vector<const Rex*> reproject_targets(
1618  const RelProject* simple_project,
1619  const std::vector<const Rex*>& target_exprs) noexcept {
1620  std::vector<const Rex*> result;
1621  for (size_t i = 0; i < simple_project->size(); ++i) {
1622  const auto input_rex = dynamic_cast<const RexInput*>(simple_project->getProjectAt(i));
1623  CHECK(input_rex);
1624  CHECK_LT(static_cast<size_t>(input_rex->getIndex()), target_exprs.size());
1625  result.push_back(target_exprs[input_rex->getIndex()]);
1626  }
1627  return result;
1628 }
1629 
1636  public:
1638  const RelAlgNode* node_to_keep,
1639  const std::vector<std::unique_ptr<const RexScalar>>& scalar_sources)
1640  : node_to_keep_(node_to_keep), scalar_sources_(scalar_sources) {}
1641 
1642  // Reproject the RexInput from its current RA Node to the RA Node we intend to keep
1643  RetType visitInput(const RexInput* input) const final {
1644  if (input->getSourceNode() == node_to_keep_) {
1645  const auto index = input->getIndex();
1646  CHECK_LT(index, scalar_sources_.size());
1647  return visit(scalar_sources_[index].get());
1648  } else {
1649  return input->deepCopy();
1650  }
1651  }
1652 
1653  private:
1655  const std::vector<std::unique_ptr<const RexScalar>>& scalar_sources_;
1656 };
1657 
1658 } // namespace
1659 
1661  std::vector<std::shared_ptr<RelAlgNode>>& nodes,
1662  const std::vector<size_t>& pattern,
1663  std::unordered_map<size_t, std::unordered_map<unsigned, RegisteredQueryHint>>&
1664  query_hints) noexcept {
1665  CHECK_GE(pattern.size(), size_t(2));
1666  CHECK_LE(pattern.size(), size_t(4));
1667 
1668  std::unique_ptr<const RexScalar> filter_rex;
1669  std::vector<std::unique_ptr<const RexScalar>> scalar_sources;
1670  size_t groupby_count{0};
1671  std::vector<std::string> fields;
1672  std::vector<const RexAgg*> agg_exprs;
1673  std::vector<const Rex*> target_exprs;
1674  bool first_project{true};
1675  bool is_agg{false};
1676  RelAlgNode* last_node{nullptr};
1677 
1678  std::shared_ptr<ModifyManipulationTarget> manipulation_target;
1679  size_t node_hash{0};
1680  unsigned node_id{0};
1681  bool hint_registered{false};
1682  RegisteredQueryHint registered_query_hint = RegisteredQueryHint::defaults();
1683  for (const auto node_idx : pattern) {
1684  const auto ra_node = nodes[node_idx];
1685  auto registered_query_hint_map_it = query_hints.find(ra_node->toHash());
1686  if (registered_query_hint_map_it != query_hints.end()) {
1687  auto& registered_query_hint_map = registered_query_hint_map_it->second;
1688  auto registered_query_hint_it = registered_query_hint_map.find(ra_node->getId());
1689  if (registered_query_hint_it != registered_query_hint_map.end()) {
1690  hint_registered = true;
1691  node_hash = registered_query_hint_map_it->first;
1692  node_id = registered_query_hint_it->first;
1693  registered_query_hint = registered_query_hint_it->second;
1694  }
1695  }
1696  const auto ra_filter = std::dynamic_pointer_cast<RelFilter>(ra_node);
1697  if (ra_filter) {
1698  CHECK(!filter_rex);
1699  filter_rex.reset(ra_filter->getAndReleaseCondition());
1700  CHECK(filter_rex);
1701  last_node = ra_node.get();
1702  continue;
1703  }
1704  const auto ra_project = std::dynamic_pointer_cast<RelProject>(ra_node);
1705  if (ra_project) {
1706  fields = ra_project->getFields();
1707  manipulation_target = ra_project;
1708 
1709  if (first_project) {
1710  CHECK_EQ(size_t(1), ra_project->inputCount());
1711  // Rebind the input of the project to the input of the filter itself
1712  // since we know that we'll evaluate the filter on the fly, with no
1713  // intermediate buffer.
1714  const auto filter_input = dynamic_cast<const RelFilter*>(ra_project->getInput(0));
1715  if (filter_input) {
1716  CHECK_EQ(size_t(1), filter_input->inputCount());
1717  bind_project_to_input(ra_project.get(),
1718  get_node_output(filter_input->getInput(0)));
1719  }
1720  scalar_sources = ra_project->getExpressionsAndRelease();
1721  for (const auto& scalar_expr : scalar_sources) {
1722  target_exprs.push_back(scalar_expr.get());
1723  }
1724  first_project = false;
1725  } else {
1726  if (ra_project->isSimple()) {
1727  target_exprs = reproject_targets(ra_project.get(), target_exprs);
1728  } else {
1729  // TODO(adb): This is essentially a more general case of simple project, we
1730  // could likely merge the two
1731  std::vector<const Rex*> result;
1732  RexInputReplacementVisitor visitor(last_node, scalar_sources);
1733  for (size_t i = 0; i < ra_project->size(); ++i) {
1734  const auto rex = ra_project->getProjectAt(i);
1735  if (auto rex_input = dynamic_cast<const RexInput*>(rex)) {
1736  const auto index = rex_input->getIndex();
1737  CHECK_LT(index, target_exprs.size());
1738  result.push_back(target_exprs[index]);
1739  } else {
1740  scalar_sources.push_back(visitor.visit(rex));
1741  result.push_back(scalar_sources.back().get());
1742  }
1743  }
1744  target_exprs = result;
1745  }
1746  }
1747  last_node = ra_node.get();
1748  continue;
1749  }
1750  const auto ra_aggregate = std::dynamic_pointer_cast<RelAggregate>(ra_node);
1751  if (ra_aggregate) {
1752  is_agg = true;
1753  fields = ra_aggregate->getFields();
1754  agg_exprs = ra_aggregate->getAggregatesAndRelease();
1755  groupby_count = ra_aggregate->getGroupByCount();
1756  decltype(target_exprs){}.swap(target_exprs);
1757  CHECK_LE(groupby_count, scalar_sources.size());
1758  for (size_t group_idx = 0; group_idx < groupby_count; ++group_idx) {
1759  const auto rex_ref = new RexRef(group_idx + 1);
1760  target_exprs.push_back(rex_ref);
1761  scalar_sources.emplace_back(rex_ref);
1762  }
1763  for (const auto rex_agg : agg_exprs) {
1764  target_exprs.push_back(rex_agg);
1765  }
1766  last_node = ra_node.get();
1767  continue;
1768  }
1769  }
1770 
1771  auto compound_node =
1772  std::make_shared<RelCompound>(filter_rex,
1773  target_exprs,
1774  groupby_count,
1775  agg_exprs,
1776  fields,
1777  scalar_sources,
1778  is_agg,
1779  manipulation_target->isUpdateViaSelect(),
1780  manipulation_target->isDeleteViaSelect(),
1781  manipulation_target->isVarlenUpdateRequired(),
1782  manipulation_target->getModifiedTableDescriptor(),
1783  manipulation_target->getTargetColumns());
1784  auto old_node = nodes[pattern.back()];
1785  nodes[pattern.back()] = compound_node;
1786  auto first_node = nodes[pattern.front()];
1787  CHECK_EQ(size_t(1), first_node->inputCount());
1788  compound_node->addManagedInput(first_node->getAndOwnInput(0));
1789  if (hint_registered) {
1790  // pass the registered hint from the origin node to newly created compound node
1791  // where it is coalesced
1792  auto registered_query_hint_map_it = query_hints.find(node_hash);
1793  CHECK(registered_query_hint_map_it != query_hints.end());
1794  auto registered_query_hint_map = registered_query_hint_map_it->second;
1795  if (registered_query_hint_map.size() > 1) {
1796  registered_query_hint_map.erase(node_id);
1797  } else {
1798  CHECK_EQ(registered_query_hint_map.size(), static_cast<size_t>(1));
1799  query_hints.erase(node_hash);
1800  }
1801  std::unordered_map<unsigned, RegisteredQueryHint> hint_map;
1802  hint_map.emplace(compound_node->getId(), registered_query_hint);
1803  query_hints.emplace(compound_node->toHash(), hint_map);
1804  }
1805  for (size_t i = 0; i < pattern.size() - 1; ++i) {
1806  nodes[pattern[i]].reset();
1807  }
1808  for (auto node : nodes) {
1809  if (!node) {
1810  continue;
1811  }
1812  node->replaceInput(old_node, compound_node);
1813  }
1814 }
1815 
1816 class RANodeIterator : public std::vector<std::shared_ptr<RelAlgNode>>::const_iterator {
1817  using ElementType = std::shared_ptr<RelAlgNode>;
1818  using Super = std::vector<ElementType>::const_iterator;
1819  using Container = std::vector<ElementType>;
1820 
1821  public:
1822  enum class AdvancingMode { DUChain, InOrder };
1823 
1824  explicit RANodeIterator(const Container& nodes)
1825  : Super(nodes.begin()), owner_(nodes), nodeCount_([&nodes]() -> size_t {
1826  size_t non_zero_count = 0;
1827  for (const auto& node : nodes) {
1828  if (node) {
1829  ++non_zero_count;
1830  }
1831  }
1833  }()) {}
1834 
1835  explicit operator size_t() {
1836  return std::distance(owner_.begin(), *static_cast<Super*>(this));
1837  }
1838 
1839  RANodeIterator operator++() = delete;
1840 
1841  void advance(AdvancingMode mode) {
1842  Super& super = *this;
1843  switch (mode) {
1844  case AdvancingMode::DUChain: {
1845  size_t use_count = 0;
1846  Super only_use = owner_.end();
1847  for (Super nodeIt = std::next(super); nodeIt != owner_.end(); ++nodeIt) {
1848  if (!*nodeIt) {
1849  continue;
1850  }
1851  for (size_t i = 0; i < (*nodeIt)->inputCount(); ++i) {
1852  if ((*super) == (*nodeIt)->getAndOwnInput(i)) {
1853  ++use_count;
1854  if (1 == use_count) {
1855  only_use = nodeIt;
1856  } else {
1857  super = owner_.end();
1858  return;
1859  }
1860  }
1861  }
1862  }
1863  super = only_use;
1864  break;
1865  }
1866  case AdvancingMode::InOrder:
1867  for (size_t i = 0; i != owner_.size(); ++i) {
1868  if (!visited_.count(i)) {
1869  super = owner_.begin();
1870  std::advance(super, i);
1871  return;
1872  }
1873  }
1874  super = owner_.end();
1875  break;
1876  default:
1877  CHECK(false);
1878  }
1879  }
1880 
1881  bool allVisited() { return visited_.size() == nodeCount_; }
1882 
1884  visited_.insert(size_t(*this));
1885  Super& super = *this;
1886  return *super;
1887  }
1888 
1889  const ElementType* operator->() { return &(operator*()); }
1890 
1891  private:
1893  const size_t nodeCount_;
1894  std::unordered_set<size_t> visited_;
1895 };
1896 
1897 namespace {
1898 
1899 bool input_can_be_coalesced(const RelAlgNode* parent_node,
1900  const size_t index,
1901  const bool first_rex_is_input) {
1902  if (auto agg_node = dynamic_cast<const RelAggregate*>(parent_node)) {
1903  if (index == 0 && agg_node->getGroupByCount() > 0) {
1904  return true;
1905  } else {
1906  // Is an aggregated target, only allow the project to be elided if the aggregate
1907  // target is simply passed through (i.e. if the top level expression attached to
1908  // the project node is a RexInput expression)
1909  return first_rex_is_input;
1910  }
1911  }
1912  return first_rex_is_input;
1913 }
1914 
1920 class CoalesceSecondaryProjectVisitor : public RexVisitor<bool> {
1921  public:
1922  bool visitInput(const RexInput* input) const final {
1923  // The top level expression node is checked before we apply the visitor. If we get
1924  // here, this input rex is a child of another rex node, and we handle the can be
1925  // coalesced check slightly differently
1926  return input_can_be_coalesced(input->getSourceNode(), input->getIndex(), false);
1927  }
1928 
1929  bool visitLiteral(const RexLiteral*) const final { return false; }
1930 
1931  bool visitSubQuery(const RexSubQuery*) const final { return false; }
1932 
1933  bool visitRef(const RexRef*) const final { return false; }
1934 
1935  protected:
1936  bool aggregateResult(const bool& aggregate, const bool& next_result) const final {
1937  return aggregate && next_result;
1938  }
1939 
1940  bool defaultResult() const final { return true; }
1941 };
1942 
1943 // Detect the window function SUM pattern: CASE WHEN COUNT() > 0 THEN SUM ELSE 0
1945  const auto case_operator = dynamic_cast<const RexCase*>(rex);
1946  if (case_operator && case_operator->branchCount() == 1) {
1947  const auto then_window =
1948  dynamic_cast<const RexWindowFunctionOperator*>(case_operator->getThen(0));
1949  if (then_window && then_window->getKind() == SqlWindowFunctionKind::SUM_INTERNAL) {
1950  return true;
1951  }
1952  }
1953  return false;
1954 }
1955 
1956 // Check for Window Function AVG:
1957 // (CASE WHEN count > 0 THEN sum ELSE 0) / COUNT
1959  const RexOperator* divide_operator = dynamic_cast<const RexOperator*>(rex);
1960  if (divide_operator && divide_operator->getOperator() == kDIVIDE) {
1961  CHECK_EQ(divide_operator->size(), size_t(2));
1962  const auto case_operator =
1963  dynamic_cast<const RexCase*>(divide_operator->getOperand(0));
1964  const auto second_window =
1965  dynamic_cast<const RexWindowFunctionOperator*>(divide_operator->getOperand(1));
1966  if (case_operator && second_window &&
1967  second_window->getKind() == SqlWindowFunctionKind::COUNT) {
1968  if (is_window_function_sum(case_operator)) {
1969  return true;
1970  }
1971  }
1972  }
1973  return false;
1974 }
1975 
1976 // Detect both window function operators and window function operators embedded in case
1977 // statements (for null handling)
1979  if (dynamic_cast<const RexWindowFunctionOperator*>(rex)) {
1980  return true;
1981  }
1982 
1983  // unwrap from casts, if they exist
1984  const auto rex_cast = dynamic_cast<const RexOperator*>(rex);
1985  if (rex_cast && rex_cast->getOperator() == kCAST) {
1986  CHECK_EQ(rex_cast->size(), size_t(1));
1987  return is_window_function_operator(rex_cast->getOperand(0));
1988  }
1989 
1991  return true;
1992  }
1993 
1994  return false;
1995 }
1996 
1997 } // namespace
1998 
2000  std::vector<std::shared_ptr<RelAlgNode>>& nodes,
2001  const std::vector<const RelAlgNode*>& left_deep_joins,
2002  std::unordered_map<size_t, std::unordered_map<unsigned, RegisteredQueryHint>>&
2003  query_hints) {
2004  enum class CoalesceState { Initial, Filter, FirstProject, Aggregate };
2005  std::vector<size_t> crt_pattern;
2006  CoalesceState crt_state{CoalesceState::Initial};
2007 
2008  auto reset_state = [&crt_pattern, &crt_state]() {
2009  crt_state = CoalesceState::Initial;
2010  std::vector<size_t>().swap(crt_pattern);
2011  };
2012 
2013  for (RANodeIterator nodeIt(nodes); !nodeIt.allVisited();) {
2014  const auto ra_node = nodeIt != nodes.end() ? *nodeIt : nullptr;
2015  switch (crt_state) {
2016  case CoalesceState::Initial: {
2017  if (std::dynamic_pointer_cast<const RelFilter>(ra_node) &&
2018  std::find(left_deep_joins.begin(), left_deep_joins.end(), ra_node.get()) ==
2019  left_deep_joins.end()) {
2020  crt_pattern.push_back(size_t(nodeIt));
2021  crt_state = CoalesceState::Filter;
2022  nodeIt.advance(RANodeIterator::AdvancingMode::DUChain);
2023  } else if (auto project_node =
2024  std::dynamic_pointer_cast<const RelProject>(ra_node)) {
2025  if (project_node->hasWindowFunctionExpr()) {
2026  nodeIt.advance(RANodeIterator::AdvancingMode::InOrder);
2027  } else {
2028  crt_pattern.push_back(size_t(nodeIt));
2029  crt_state = CoalesceState::FirstProject;
2030  nodeIt.advance(RANodeIterator::AdvancingMode::DUChain);
2031  }
2032  } else {
2033  nodeIt.advance(RANodeIterator::AdvancingMode::InOrder);
2034  }
2035  break;
2036  }
2037  case CoalesceState::Filter: {
2038  if (auto project_node = std::dynamic_pointer_cast<const RelProject>(ra_node)) {
2039  // Given we now add preceding projects for all window functions following
2040  // RelFilter nodes, the following should never occur
2041  CHECK(!project_node->hasWindowFunctionExpr());
2042  crt_pattern.push_back(size_t(nodeIt));
2043  crt_state = CoalesceState::FirstProject;
2044  nodeIt.advance(RANodeIterator::AdvancingMode::DUChain);
2045  } else {
2046  reset_state();
2047  }
2048  break;
2049  }
2050  case CoalesceState::FirstProject: {
2051  if (std::dynamic_pointer_cast<const RelAggregate>(ra_node)) {
2052  crt_pattern.push_back(size_t(nodeIt));
2053  crt_state = CoalesceState::Aggregate;
2054  nodeIt.advance(RANodeIterator::AdvancingMode::DUChain);
2055  } else {
2056  if (crt_pattern.size() >= 2) {
2057  create_compound(nodes, crt_pattern, query_hints);
2058  }
2059  reset_state();
2060  }
2061  break;
2062  }
2063  case CoalesceState::Aggregate: {
2064  if (auto project_node = std::dynamic_pointer_cast<const RelProject>(ra_node)) {
2065  if (!project_node->hasWindowFunctionExpr()) {
2066  // TODO(adb): overloading the simple project terminology again here
2067  bool is_simple_project{true};
2068  for (size_t i = 0; i < project_node->size(); i++) {
2069  const auto scalar_rex = project_node->getProjectAt(i);
2070  // If the top level scalar rex is an input node, we can bypass the visitor
2071  if (auto input_rex = dynamic_cast<const RexInput*>(scalar_rex)) {
2073  input_rex->getSourceNode(), input_rex->getIndex(), true)) {
2074  is_simple_project = false;
2075  break;
2076  }
2077  continue;
2078  }
2079  CoalesceSecondaryProjectVisitor visitor;
2080  if (!visitor.visit(project_node->getProjectAt(i))) {
2081  is_simple_project = false;
2082  break;
2083  }
2084  }
2085  if (is_simple_project) {
2086  crt_pattern.push_back(size_t(nodeIt));
2087  nodeIt.advance(RANodeIterator::AdvancingMode::InOrder);
2088  }
2089  }
2090  }
2091  CHECK_GE(crt_pattern.size(), size_t(2));
2092  create_compound(nodes, crt_pattern, query_hints);
2093  reset_state();
2094  break;
2095  }
2096  default:
2097  CHECK(false);
2098  }
2099  }
2100  if (crt_state == CoalesceState::FirstProject || crt_state == CoalesceState::Aggregate) {
2101  if (crt_pattern.size() >= 2) {
2102  create_compound(nodes, crt_pattern, query_hints);
2103  }
2104  CHECK(!crt_pattern.empty());
2105  }
2106 }
2107 
2108 class WindowFunctionCollector : public RexVisitor<void*> {
2109  public:
2111  std::unordered_map<size_t, const RexScalar*>& collected_window_func,
2112  bool only_add_window_expr)
2113  : collected_window_func_(collected_window_func)
2114  , only_add_window_expr_(only_add_window_expr) {}
2115 
2116  protected:
2117  // Detect embedded window function expressions in operators
2118  void* visitOperator(const RexOperator* rex_operator) const final {
2119  if (is_window_function_operator(rex_operator)) {
2120  tryAddWindowExpr(rex_operator);
2121  }
2122  const size_t operand_count = rex_operator->size();
2123  for (size_t i = 0; i < operand_count; ++i) {
2124  const auto operand = rex_operator->getOperand(i);
2125  if (is_window_function_operator(operand)) {
2126  // Handle both RexWindowFunctionOperators and window functions built up from
2127  // multiple RexScalar objects (e.g. AVG)
2128  tryAddWindowExpr(operand);
2129  } else {
2130  visit(operand);
2131  }
2132  }
2133  return defaultResult();
2134  }
2135 
2136  // Detect embedded window function expressions in case statements. Note that this may
2137  // manifest as a nested case statement inside a top level case statement, as some
2138  // window functions (sum, avg) are represented as a case statement. Use the
2139  // is_window_function_operator helper to detect complete window function expressions.
2140  void* visitCase(const RexCase* rex_case) const final {
2141  if (is_window_function_operator(rex_case)) {
2142  tryAddWindowExpr(rex_case);
2143  if (!only_add_window_expr_) {
2144  return nullptr;
2145  }
2146  }
2147 
2148  for (size_t i = 0; i < rex_case->branchCount(); ++i) {
2149  const auto when = rex_case->getWhen(i);
2150  if (is_window_function_operator(when)) {
2151  tryAddWindowExpr(when);
2152  } else {
2153  visit(when);
2154  }
2155  const auto then = rex_case->getThen(i);
2156  if (is_window_function_operator(then)) {
2157  tryAddWindowExpr(then);
2158  } else {
2159  visit(then);
2160  }
2161  }
2162  if (rex_case->getElse()) {
2163  auto else_expr = rex_case->getElse();
2164  if (is_window_function_operator(else_expr)) {
2165  tryAddWindowExpr(else_expr);
2166  } else {
2167  visit(else_expr);
2168  }
2169  }
2170  return defaultResult();
2171  }
2172 
2173  void tryAddWindowExpr(RexScalar const* expr) const {
2174  if (!only_add_window_expr_) {
2175  collected_window_func_.emplace(expr->toHash(), expr);
2176  } else {
2177  if (auto window_expr = dynamic_cast<RexWindowFunctionOperator const*>(expr)) {
2178  collected_window_func_.emplace(window_expr->toHash(), window_expr);
2179  }
2180  }
2181  }
2182 
2183  void* defaultResult() const final { return nullptr; }
2184 
2185  private:
2186  std::unordered_map<size_t, const RexScalar*>& collected_window_func_;
2188 };
2189 
2191  public:
2193  std::unordered_set<size_t>& collected_window_func_hash,
2194  std::vector<std::unique_ptr<const RexScalar>>& new_rex_input_for_window_func,
2195  std::unordered_map<size_t, size_t>& window_func_to_new_rex_input_idx_map,
2196  RelProject* new_project,
2197  std::unordered_map<size_t, std::unique_ptr<const RexInput>>&
2198  new_rex_input_from_child_node)
2199  : collected_window_func_hash_(collected_window_func_hash)
2200  , new_rex_input_for_window_func_(new_rex_input_for_window_func)
2201  , window_func_to_new_rex_input_idx_map_(window_func_to_new_rex_input_idx_map)
2202  , new_project_(new_project)
2203  , new_rex_input_from_child_node_(new_rex_input_from_child_node) {
2204  CHECK_EQ(collected_window_func_hash_.size(),
2205  window_func_to_new_rex_input_idx_map_.size());
2206  for (auto hash : collected_window_func_hash_) {
2207  auto rex_it = window_func_to_new_rex_input_idx_map_.find(hash);
2208  CHECK(rex_it != window_func_to_new_rex_input_idx_map_.end());
2209  CHECK_LT(rex_it->second, new_rex_input_for_window_func_.size());
2210  }
2211  CHECK(new_project_);
2212  }
2213 
2214  protected:
2215  RetType visitInput(const RexInput* rex_input) const final {
2216  if (rex_input->getSourceNode() != new_project_) {
2217  const auto cur_index = rex_input->getIndex();
2218  auto cur_source_node = rex_input->getSourceNode();
2219  std::string field_name = "";
2220  if (auto cur_project_node = dynamic_cast<const RelProject*>(cur_source_node)) {
2221  field_name = cur_project_node->getFieldName(cur_index);
2222  }
2223  auto rex_input_hash = rex_input->toHash();
2224  auto rex_input_it = new_rex_input_from_child_node_.find(rex_input_hash);
2225  if (rex_input_it == new_rex_input_from_child_node_.end()) {
2226  auto new_rex_input =
2227  std::make_unique<RexInput>(new_project_, new_project_->size());
2228  new_project_->appendInput(field_name, rex_input->deepCopy());
2229  new_rex_input_from_child_node_.emplace(rex_input_hash, new_rex_input->deepCopy());
2230  return new_rex_input;
2231  } else {
2232  return rex_input_it->second->deepCopy();
2233  }
2234  } else {
2235  return rex_input->deepCopy();
2236  }
2237  }
2238 
2239  RetType visitOperator(const RexOperator* rex_operator) const final {
2240  auto new_rex_idx = is_collected_window_function(rex_operator->toHash());
2241  if (new_rex_idx) {
2242  return get_new_rex_input(*new_rex_idx);
2243  }
2244 
2245  const auto rex_window_function_operator =
2246  dynamic_cast<const RexWindowFunctionOperator*>(rex_operator);
2247  if (rex_window_function_operator) {
2248  // Deep copy the embedded window function operator
2249  return visitWindowFunctionOperator(rex_window_function_operator);
2250  }
2251 
2252  const size_t operand_count = rex_operator->size();
2253  std::vector<RetType> new_opnds;
2254  for (size_t i = 0; i < operand_count; ++i) {
2255  const auto operand = rex_operator->getOperand(i);
2256  auto new_rex_idx_for_operand = is_collected_window_function(operand->toHash());
2257  if (new_rex_idx_for_operand) {
2258  new_opnds.push_back(get_new_rex_input(*new_rex_idx_for_operand));
2259  } else {
2260  new_opnds.emplace_back(visit(rex_operator->getOperand(i)));
2261  }
2262  }
2263  return rex_operator->getDisambiguated(new_opnds);
2264  }
2265 
2266  RetType visitCase(const RexCase* rex_case) const final {
2267  auto new_rex_idx = is_collected_window_function(rex_case->toHash());
2268  if (new_rex_idx) {
2269  return get_new_rex_input(*new_rex_idx);
2270  }
2271 
2272  std::vector<std::pair<RetType, RetType>> new_pair_list;
2273  for (size_t i = 0; i < rex_case->branchCount(); ++i) {
2274  auto when_operand = rex_case->getWhen(i);
2275  auto new_rex_idx_for_when_operand =
2276  is_collected_window_function(when_operand->toHash());
2277 
2278  auto then_operand = rex_case->getThen(i);
2279  auto new_rex_idx_for_then_operand =
2280  is_collected_window_function(then_operand->toHash());
2281 
2282  new_pair_list.emplace_back(
2283  new_rex_idx_for_when_operand ? get_new_rex_input(*new_rex_idx_for_when_operand)
2284  : visit(when_operand),
2285  new_rex_idx_for_then_operand ? get_new_rex_input(*new_rex_idx_for_then_operand)
2286  : visit(then_operand));
2287  }
2288  auto new_rex_idx_for_else_operand =
2289  is_collected_window_function(rex_case->getElse()->toHash());
2290  auto new_else = new_rex_idx_for_else_operand
2291  ? get_new_rex_input(*new_rex_idx_for_else_operand)
2292  : visit(rex_case->getElse());
2293  return std::make_unique<RexCase>(new_pair_list, new_else);
2294  }
2295 
2296  private:
2297  std::optional<size_t> is_collected_window_function(size_t rex_hash) const {
2298  auto rex_it = window_func_to_new_rex_input_idx_map_.find(rex_hash);
2299  if (rex_it != window_func_to_new_rex_input_idx_map_.end()) {
2300  return rex_it->second;
2301  }
2302  return std::nullopt;
2303  }
2304 
2305  std::unique_ptr<const RexScalar> get_new_rex_input(size_t rex_idx) const {
2306  CHECK_GE(rex_idx, 0UL);
2307  CHECK_LT(rex_idx, new_rex_input_for_window_func_.size());
2308  auto& new_rex_input = new_rex_input_for_window_func_.at(rex_idx);
2309  CHECK(new_rex_input);
2310  auto copied_rex_input = copier_.visit(new_rex_input.get());
2311  return copied_rex_input;
2312  }
2313 
2314  std::unordered_set<size_t>& collected_window_func_hash_;
2315  // we should have new rex_input for each window function collected
2316  std::vector<std::unique_ptr<const RexScalar>>& new_rex_input_for_window_func_;
2317  // an index to get a new rex_input for the collected window function
2318  std::unordered_map<size_t, size_t>& window_func_to_new_rex_input_idx_map_;
2320  std::unordered_map<size_t, std::unique_ptr<const RexInput>>&
2323 };
2324 
2326  std::shared_ptr<RelProject> prev_node,
2327  std::shared_ptr<RelProject> new_node,
2328  std::unordered_map<size_t, std::unordered_map<unsigned, RegisteredQueryHint>>&
2329  query_hints) {
2330  auto delivered_hints = prev_node->getDeliveredHints();
2331  bool needs_propagate_hints = !delivered_hints->empty();
2332  if (needs_propagate_hints) {
2333  for (auto& kv : *delivered_hints) {
2334  new_node->addHint(kv.second);
2335  }
2336  auto prev_it = query_hints.find(prev_node->toHash());
2337  // query hint for the prev projection node should be registered
2338  CHECK(prev_it != query_hints.end());
2339  auto prev_hint_it = prev_it->second.find(prev_node->getId());
2340  CHECK(prev_hint_it != prev_it->second.end());
2341  std::unordered_map<unsigned, RegisteredQueryHint> hint_map;
2342  hint_map.emplace(new_node->getId(), prev_hint_it->second);
2343  query_hints.emplace(new_node->toHash(), hint_map);
2344  }
2345 }
2346 
2370  std::vector<std::shared_ptr<RelAlgNode>>& nodes,
2371  std::unordered_map<size_t, std::unordered_map<unsigned, RegisteredQueryHint>>&
2372  query_hints) {
2373  std::list<std::shared_ptr<RelAlgNode>> node_list(nodes.begin(), nodes.end());
2374  for (auto node_itr = node_list.begin(); node_itr != node_list.end(); ++node_itr) {
2375  const auto node = *node_itr;
2376  auto window_func_project_node = std::dynamic_pointer_cast<RelProject>(node);
2377  if (!window_func_project_node) {
2378  continue;
2379  }
2380 
2381  const auto prev_node_itr = std::prev(node_itr);
2382  const auto prev_node = *prev_node_itr;
2383  CHECK(prev_node);
2384 
2385  // map scalar expression index in the project node to window function ptr
2386  std::unordered_map<size_t, const RexScalar*> collected_window_func;
2387  WindowFunctionCollector collector(collected_window_func, false);
2388  // Iterate the target exprs of the project node and check for window function
2389  // expressions. If an embedded expression exists, collect it
2390  for (size_t i = 0; i < window_func_project_node->size(); i++) {
2391  const auto scalar_rex = window_func_project_node->getProjectAt(i);
2392  if (is_window_function_operator(scalar_rex)) {
2393  // top level window function exprs are fine
2394  continue;
2395  }
2396  collector.visit(scalar_rex);
2397  }
2398 
2399  if (!collected_window_func.empty()) {
2400  // we have a nested window function expression
2401  std::unordered_set<size_t> collected_window_func_hash;
2402  // the current window function needs a set of new rex input which references
2403  // expressions in the newly introduced projection node
2404  std::vector<std::unique_ptr<const RexScalar>> new_rex_input_for_window_func;
2405  // a target projection expression of the newly created projection node
2406  std::vector<std::unique_ptr<const RexScalar>> new_scalar_expr_for_window_project;
2407  // a map between nested window function (hash val) and
2408  // its rex index stored in the `new_rex_input_for_window_func`
2409  std::unordered_map<size_t, size_t> window_func_to_new_rex_input_idx_map;
2410  // a map between RexInput of the current window function projection node (hash val)
2411  // and its corresponding new RexInput which is pushed down to the new projection
2412  // node
2413  std::unordered_map<size_t, std::unique_ptr<const RexInput>>
2414  new_rex_input_from_child_node;
2415  RexDeepCopyVisitor copier;
2416 
2417  std::vector<std::unique_ptr<const RexScalar>> dummy_scalar_exprs;
2418  std::vector<std::string> dummy_fields;
2419  std::vector<std::string> new_project_field_names;
2420  // create a new project node, it will contain window function expressions
2421  auto new_project =
2422  std::make_shared<RelProject>(dummy_scalar_exprs, dummy_fields, prev_node);
2423  // insert this new project node between the current window project node and its
2424  // child node
2425  node_list.insert(node_itr, new_project);
2426 
2427  // retrieve various information to replace expressions in the current window
2428  // function project node w/ considering scalar expressions in the new project node
2429  std::for_each(collected_window_func.begin(),
2430  collected_window_func.end(),
2431  [&new_project_field_names,
2432  &collected_window_func_hash,
2433  &new_rex_input_for_window_func,
2434  &new_scalar_expr_for_window_project,
2435  &copier,
2436  &new_project,
2437  &window_func_to_new_rex_input_idx_map](const auto& kv) {
2438  // compute window function expr's hash, and create a new rex_input
2439  // for it
2440  collected_window_func_hash.insert(kv.first);
2441 
2442  // map an old expression in the window function project node
2443  // to an index of the corresponding new RexInput
2444  const auto rex_idx = new_rex_input_for_window_func.size();
2445  window_func_to_new_rex_input_idx_map.emplace(kv.first, rex_idx);
2446 
2447  // create a new RexInput and make it as one of new expression of the
2448  // newly created project node
2449  new_rex_input_for_window_func.emplace_back(
2450  std::make_unique<const RexInput>(new_project.get(), rex_idx));
2451  new_scalar_expr_for_window_project.push_back(
2452  std::move(copier.visit(kv.second)));
2453  new_project_field_names.emplace_back("");
2454  });
2455  new_project->setExpressions(new_scalar_expr_for_window_project);
2456  new_project->setFields(std::move(new_project_field_names));
2457 
2458  auto window_func_scalar_exprs =
2459  window_func_project_node->getExpressionsAndRelease();
2460  RexWindowFuncReplacementVisitor replacer(collected_window_func_hash,
2461  new_rex_input_for_window_func,
2462  window_func_to_new_rex_input_idx_map,
2463  new_project.get(),
2464  new_rex_input_from_child_node);
2465  size_t rex_idx = 0;
2466  for (auto& scalar_expr : window_func_scalar_exprs) {
2467  // try to replace the old expressions in the window function project node
2468  // with expressions of the newly created project node
2469  auto new_parent_rex = replacer.visit(scalar_expr.get());
2470  window_func_scalar_exprs[rex_idx] = std::move(new_parent_rex);
2471  rex_idx++;
2472  }
2473  // Update the previous window project node
2474  window_func_project_node->setExpressions(window_func_scalar_exprs);
2475  window_func_project_node->replaceInput(prev_node, new_project);
2476  propagate_hints_to_new_project(window_func_project_node, new_project, query_hints);
2477  new_project->setPushedDownWindowExpr();
2478  }
2479  }
2480  nodes.assign(node_list.begin(), node_list.end());
2481 }
2482 
2483 using RexInputSet = std::unordered_set<RexInput>;
2484 
2485 class RexInputCollector : public RexVisitor<RexInputSet> {
2486  public:
2487  RexInputSet visitInput(const RexInput* input) const override {
2488  return RexInputSet{*input};
2489  }
2490 
2491  protected:
2493  const RexInputSet& next_result) const override {
2494  auto result = aggregate;
2495  result.insert(next_result.begin(), next_result.end());
2496  return result;
2497  }
2498 };
2499 
2500 namespace {
2502  bool& has_generic_expr_in_window_func) {
2503  for (auto const& partition_key : window_expr->getPartitionKeys()) {
2504  auto partition_input = dynamic_cast<RexInput const*>(partition_key.get());
2505  if (!partition_input) {
2506  return true;
2507  }
2508  }
2509  for (auto const& order_key : window_expr->getOrderKeys()) {
2510  auto order_input = dynamic_cast<RexInput const*>(order_key.get());
2511  if (!order_input) {
2512  return true;
2513  }
2514  }
2515  for (size_t k = 0; k < window_expr->size(); k++) {
2516  if (!shared::dynamic_castable_to_any<RexInput, RexLiteral>(
2517  window_expr->getOperand(k))) {
2518  has_generic_expr_in_window_func = true;
2519  return true;
2520  }
2521  }
2522  return false;
2523 }
2524 
2525 std::pair<bool, bool> need_pushdown_generic_expr(
2526  RelProject const* window_func_project_node) {
2527  bool has_generic_expr_in_window_func = false;
2528  bool res = false;
2529  for (size_t i = 0; i < window_func_project_node->size(); ++i) {
2530  auto const projected_target = window_func_project_node->getProjectAt(i);
2531  if (auto const* window_expr =
2532  dynamic_cast<RexWindowFunctionOperator const*>(projected_target)) {
2533  res =
2534  find_generic_expr_in_window_func(window_expr, has_generic_expr_in_window_func);
2535  } else if (auto const* case_expr = dynamic_cast<RexCase const*>(projected_target)) {
2536  std::unordered_map<size_t, const RexScalar*> collected_window_func;
2537  WindowFunctionCollector collector(collected_window_func, true);
2538  collector.visit(case_expr);
2539  for (auto const& kv : collected_window_func) {
2540  auto const* candidate_window_expr =
2541  dynamic_cast<RexWindowFunctionOperator const*>(kv.second);
2542  CHECK(candidate_window_expr);
2543  res = find_generic_expr_in_window_func(candidate_window_expr,
2544  has_generic_expr_in_window_func);
2545  }
2546  }
2547  }
2548  return std::make_pair(has_generic_expr_in_window_func, res);
2549 }
2550 }; // namespace
2564  std::vector<std::shared_ptr<RelAlgNode>>& nodes,
2565  const bool always_add_project_if_first_project_is_window_expr,
2566  std::unordered_map<size_t, std::unordered_map<unsigned, RegisteredQueryHint>>&
2567  query_hints) {
2568  std::list<std::shared_ptr<RelAlgNode>> node_list(nodes.begin(), nodes.end());
2569  size_t project_node_counter{0};
2570  for (auto node_itr = node_list.begin(); node_itr != node_list.end(); ++node_itr) {
2571  const auto node = *node_itr;
2572 
2573  auto window_func_project_node = std::dynamic_pointer_cast<RelProject>(node);
2574  if (!window_func_project_node) {
2575  continue;
2576  }
2577  project_node_counter++;
2578  if (!window_func_project_node->hasWindowFunctionExpr()) {
2579  // this projection node does not have a window function
2580  // expression -- skip to the next node in the DAG.
2581  continue;
2582  }
2583 
2584  const auto prev_node_itr = std::prev(node_itr);
2585  const auto prev_node = *prev_node_itr;
2586  CHECK(prev_node);
2587 
2588  auto filter_node = std::dynamic_pointer_cast<RelFilter>(prev_node);
2589  auto join_node = std::dynamic_pointer_cast<RelJoin>(prev_node);
2590 
2591  auto scan_node = std::dynamic_pointer_cast<RelScan>(prev_node);
2592  const bool has_multi_fragment_scan_input =
2593  (scan_node &&
2594  (scan_node->getNumShards() > 0 || scan_node->getNumFragments() > 1));
2595  auto const [has_generic_expr_in_window_func, needs_expr_pushdown] =
2596  need_pushdown_generic_expr(window_func_project_node.get());
2597 
2598  // We currently add a preceding project node in one of two conditions:
2599  // 1. always_add_project_if_first_project_is_window_expr = true, which
2600  // we currently only set for distributed, but could also be set to support
2601  // multi-frag window function inputs, either if we can detect that an input table
2602  // is multi-frag up front, or using a retry mechanism like we do for join filter
2603  // push down.
2604  // TODO(todd): Investigate a viable approach for the above.
2605  // 2. Regardless of #1, if the window function project node is preceded by a
2606  // filter node. This is required both for correctness and to avoid pulling
2607  // all source input columns into memory since non-coalesced filter node
2608  // inputs are currently not pruned or eliminated via dead column elimination.
2609  // Note that we expect any filter node followed by a project node to be coalesced
2610  // into a single compound node in RelAlgDag::coalesce_nodes, and that action
2611  // prunes unused inputs.
2612  // TODO(todd): Investigate whether the shotgun filter node issue affects other
2613  // query plans, i.e. filters before joins, and whether there is a more general
2614  // approach to solving this (will still need the preceding project node for
2615  // window functions preceded by filter nodes for correctness though)
2616  // 3. Similar to the above, when the window function project node is preceded
2617  // by a join node.
2618  // 4. when partition by / order by clauses have a general expression instead of
2619  // referencing column
2620 
2621  if (!((always_add_project_if_first_project_is_window_expr &&
2622  project_node_counter == 1) ||
2623  filter_node || join_node || has_multi_fragment_scan_input ||
2624  needs_expr_pushdown)) {
2625  continue;
2626  }
2627 
2628  if (needs_expr_pushdown || join_node) {
2629  // previous logic cannot cover join_node case well, so use the newly introduced
2630  // push-down expression logic to safely add pre_project node before processing
2631  // window function
2632  std::unordered_map<size_t, size_t> expr_offset_cache;
2633  std::vector<std::unique_ptr<const RexScalar>> scalar_exprs_for_new_project;
2634  std::vector<std::unique_ptr<const RexScalar>> scalar_exprs_for_window_project;
2635  std::vector<std::string> fields_for_window_project;
2636  std::vector<std::string> fields_for_new_project;
2637 
2638  // step 0. create new project node with an empty scalar expr to rebind target exprs
2639  std::vector<std::unique_ptr<const RexScalar>> dummy_scalar_exprs;
2640  std::vector<std::string> dummy_fields;
2641  auto new_project =
2642  std::make_shared<RelProject>(dummy_scalar_exprs, dummy_fields, prev_node);
2643 
2644  // step 1 - 2
2645  PushDownGenericExpressionInWindowFunction visitor(new_project,
2646  scalar_exprs_for_new_project,
2647  fields_for_new_project,
2648  expr_offset_cache);
2649  for (size_t i = 0; i < window_func_project_node->size(); ++i) {
2650  auto projected_target = window_func_project_node->getProjectAt(i);
2651  auto new_projection_target = visitor.visit(projected_target);
2652  scalar_exprs_for_window_project.emplace_back(
2653  std::move(new_projection_target.release()));
2654  }
2655  new_project->setExpressions(scalar_exprs_for_new_project);
2656  new_project->setFields(std::move(fields_for_new_project));
2657  bool has_groupby = false;
2658  auto aggregate = std::dynamic_pointer_cast<RelAggregate>(prev_node);
2659  if (aggregate) {
2660  has_groupby = aggregate->getGroupByCount() > 0;
2661  }
2662  // force rowwise output to prevent computing incorrect query result
2663  if (has_groupby && visitor.hasPartitionExpression()) {
2664  // we currently may compute incorrect result with columnar output when
2665  // 1) the window function has partition expression, and
2666  // 2) a parent node of the window function projection node has group by expression
2667  // todo (yoonmin) : relax this
2668  VLOG(1)
2669  << "Query output overridden to row-wise format due to presence of a window "
2670  "function with partition expression and group-by expression.";
2671  new_project->forceRowwiseOutput();
2672  } else if (has_generic_expr_in_window_func) {
2673  VLOG(1) << "Query output overridden to row-wise format due to presence of a "
2674  "generic expression as an input expression of the window "
2675  "function.";
2676  new_project->forceRowwiseOutput();
2677  } else if (visitor.hasCaseExprAsWindowOperand()) {
2678  VLOG(1)
2679  << "Query output overridden to row-wise format due to presence of a window "
2680  "function with a case statement as its operand.";
2681  new_project->forceRowwiseOutput();
2682  }
2683 
2684  // step 3. finalize
2685  propagate_hints_to_new_project(window_func_project_node, new_project, query_hints);
2686  new_project->setPushedDownWindowExpr();
2687  node_list.insert(node_itr, new_project);
2688  window_func_project_node->replaceInput(prev_node, new_project);
2689  window_func_project_node->setExpressions(scalar_exprs_for_window_project);
2690  } else {
2691  // only push rex_inputs listed in the window function down to a new project node
2692  RexInputSet inputs;
2693  RexInputCollector input_collector;
2694  for (size_t i = 0; i < window_func_project_node->size(); i++) {
2695  auto new_inputs =
2696  input_collector.visit(window_func_project_node->getProjectAt(i));
2697  inputs.insert(new_inputs.begin(), new_inputs.end());
2698  }
2699 
2700  // Note: Technically not required since we are mapping old inputs to new input
2701  // indices, but makes the re-mapping of inputs easier to follow.
2702  std::vector<RexInput> sorted_inputs(inputs.begin(), inputs.end());
2703  std::sort(sorted_inputs.begin(),
2704  sorted_inputs.end(),
2705  [](const auto& a, const auto& b) { return a.getIndex() < b.getIndex(); });
2706 
2707  std::vector<std::unique_ptr<const RexScalar>> scalar_exprs;
2708  std::vector<std::string> fields;
2709  std::unordered_map<unsigned, unsigned> old_index_to_new_index;
2710  for (auto& input : sorted_inputs) {
2711  CHECK_EQ(input.getSourceNode(), prev_node.get());
2712  CHECK(old_index_to_new_index
2713  .insert(std::make_pair(input.getIndex(), scalar_exprs.size()))
2714  .second);
2715  scalar_exprs.emplace_back(input.deepCopy());
2716  fields.emplace_back("");
2717  }
2718 
2719  auto new_project = std::make_shared<RelProject>(scalar_exprs, fields, prev_node);
2720  propagate_hints_to_new_project(window_func_project_node, new_project, query_hints);
2721  new_project->setPushedDownWindowExpr();
2722  node_list.insert(node_itr, new_project);
2723  window_func_project_node->replaceInput(
2724  prev_node, new_project, old_index_to_new_index);
2725  }
2726  }
2727  nodes.assign(node_list.begin(), node_list.end());
2728 }
2729 
2730 int64_t get_int_literal_field(const rapidjson::Value& obj,
2731  const char field[],
2732  const int64_t default_val) noexcept {
2733  const auto it = obj.FindMember(field);
2734  if (it == obj.MemberEnd()) {
2735  return default_val;
2736  }
2737  std::unique_ptr<RexLiteral> lit(parse_literal(it->value));
2738  CHECK_EQ(kDECIMAL, lit->getType());
2739  CHECK_EQ(unsigned(0), lit->getScale());
2740  CHECK_EQ(unsigned(0), lit->getTargetScale());
2741  return lit->getVal<int64_t>();
2742 }
2743 
2744 void check_empty_inputs_field(const rapidjson::Value& node) noexcept {
2745  const auto& inputs_json = field(node, "inputs");
2746  CHECK(inputs_json.IsArray() && !inputs_json.Size());
2747 }
2748 
2750  const rapidjson::Value& scan_ra) {
2751  const auto& table_json = field(scan_ra, "table");
2752  CHECK(table_json.IsArray());
2753  CHECK_EQ(unsigned(2), table_json.Size());
2754  const auto td = cat.getMetadataForTable(table_json[1].GetString());
2755  CHECK(td);
2756  return td;
2757 }
2758 
2759 std::vector<std::string> getFieldNamesFromScanNode(const rapidjson::Value& scan_ra) {
2760  const auto& fields_json = field(scan_ra, "fieldNames");
2761  return strings_from_json_array(fields_json);
2762 }
2763 
2764 } // namespace
2765 
2767  for (const auto& expr : scalar_exprs_) {
2768  if (is_window_function_operator(expr.get())) {
2769  return true;
2770  }
2771  }
2772  return false;
2773 }
2774 namespace details {
2775 
2777  public:
2779 
2780  std::vector<std::shared_ptr<RelAlgNode>> run(const rapidjson::Value& rels,
2781  RelAlgDag& root_dag) {
2782  for (auto rels_it = rels.Begin(); rels_it != rels.End(); ++rels_it) {
2783  const auto& crt_node = *rels_it;
2784  const auto id = node_id(crt_node);
2785  CHECK_EQ(static_cast<size_t>(id), nodes_.size());
2786  CHECK(crt_node.IsObject());
2787  std::shared_ptr<RelAlgNode> ra_node = nullptr;
2788  const auto rel_op = json_str(field(crt_node, "relOp"));
2789  if (rel_op == std::string("EnumerableTableScan") ||
2790  rel_op == std::string("LogicalTableScan")) {
2791  ra_node = dispatchTableScan(crt_node);
2792  } else if (rel_op == std::string("LogicalProject")) {
2793  ra_node = dispatchProject(crt_node, root_dag);
2794  } else if (rel_op == std::string("LogicalFilter")) {
2795  ra_node = dispatchFilter(crt_node, root_dag);
2796  } else if (rel_op == std::string("LogicalAggregate")) {
2797  ra_node = dispatchAggregate(crt_node);
2798  } else if (rel_op == std::string("LogicalJoin")) {
2799  ra_node = dispatchJoin(crt_node, root_dag);
2800  } else if (rel_op == std::string("LogicalSort")) {
2801  ra_node = dispatchSort(crt_node);
2802  } else if (rel_op == std::string("LogicalValues")) {
2803  ra_node = dispatchLogicalValues(crt_node);
2804  } else if (rel_op == std::string("LogicalTableModify")) {
2805  ra_node = dispatchModify(crt_node);
2806  } else if (rel_op == std::string("LogicalTableFunctionScan")) {
2807  ra_node = dispatchTableFunction(crt_node, root_dag);
2808  } else if (rel_op == std::string("LogicalUnion")) {
2809  ra_node = dispatchUnion(crt_node);
2810  } else {
2811  throw QueryNotSupported(std::string("Node ") + rel_op + " not supported yet");
2812  }
2813  nodes_.push_back(ra_node);
2814  }
2815 
2816  return std::move(nodes_);
2817  }
2818 
2819  private:
2820  std::shared_ptr<RelScan> dispatchTableScan(const rapidjson::Value& scan_ra) {
2821  check_empty_inputs_field(scan_ra);
2822  CHECK(scan_ra.IsObject());
2823  const auto td = getTableFromScanNode(cat_, scan_ra);
2824  const auto field_names = getFieldNamesFromScanNode(scan_ra);
2825  if (scan_ra.HasMember("hints")) {
2826  auto scan_node = std::make_shared<RelScan>(td, field_names);
2827  getRelAlgHints(scan_ra, scan_node);
2828  return scan_node;
2829  }
2830  return std::make_shared<RelScan>(td, field_names);
2831  }
2832 
2833  std::shared_ptr<RelProject> dispatchProject(const rapidjson::Value& proj_ra,
2834  RelAlgDag& root_dag) {
2835  const auto inputs = getRelAlgInputs(proj_ra);
2836  CHECK_EQ(size_t(1), inputs.size());
2837  const auto& exprs_json = field(proj_ra, "exprs");
2838  CHECK(exprs_json.IsArray());
2839  std::vector<std::unique_ptr<const RexScalar>> exprs;
2840  for (auto exprs_json_it = exprs_json.Begin(); exprs_json_it != exprs_json.End();
2841  ++exprs_json_it) {
2842  exprs.emplace_back(parse_scalar_expr(*exprs_json_it, cat_, root_dag));
2843  }
2844  const auto& fields = field(proj_ra, "fields");
2845  if (proj_ra.HasMember("hints")) {
2846  auto project_node = std::make_shared<RelProject>(
2847  exprs, strings_from_json_array(fields), inputs.front());
2848  getRelAlgHints(proj_ra, project_node);
2849  return project_node;
2850  }
2851  return std::make_shared<RelProject>(
2852  exprs, strings_from_json_array(fields), inputs.front());
2853  }
2854 
2855  std::shared_ptr<RelFilter> dispatchFilter(const rapidjson::Value& filter_ra,
2856  RelAlgDag& root_dag) {
2857  const auto inputs = getRelAlgInputs(filter_ra);
2858  CHECK_EQ(size_t(1), inputs.size());
2859  const auto id = node_id(filter_ra);
2860  CHECK(id);
2861  auto condition = parse_scalar_expr(field(filter_ra, "condition"), cat_, root_dag);
2862  return std::make_shared<RelFilter>(condition, inputs.front());
2863  }
2864 
2865  std::shared_ptr<RelAggregate> dispatchAggregate(const rapidjson::Value& agg_ra) {
2866  const auto inputs = getRelAlgInputs(agg_ra);
2867  CHECK_EQ(size_t(1), inputs.size());
2868  const auto fields = strings_from_json_array(field(agg_ra, "fields"));
2869  const auto group = indices_from_json_array(field(agg_ra, "group"));
2870  for (size_t i = 0; i < group.size(); ++i) {
2871  CHECK_EQ(i, group[i]);
2872  }
2873  if (agg_ra.HasMember("groups") || agg_ra.HasMember("indicator")) {
2874  throw QueryNotSupported("GROUP BY extensions not supported");
2875  }
2876  const auto& aggs_json_arr = field(agg_ra, "aggs");
2877  CHECK(aggs_json_arr.IsArray());
2878  std::vector<std::unique_ptr<const RexAgg>> aggs;
2879  for (auto aggs_json_arr_it = aggs_json_arr.Begin();
2880  aggs_json_arr_it != aggs_json_arr.End();
2881  ++aggs_json_arr_it) {
2882  aggs.emplace_back(parse_aggregate_expr(*aggs_json_arr_it));
2883  }
2884  if (agg_ra.HasMember("hints")) {
2885  auto agg_node =
2886  std::make_shared<RelAggregate>(group.size(), aggs, fields, inputs.front());
2887  getRelAlgHints(agg_ra, agg_node);
2888  return agg_node;
2889  }
2890  return std::make_shared<RelAggregate>(group.size(), aggs, fields, inputs.front());
2891  }
2892 
2893  std::shared_ptr<RelJoin> dispatchJoin(const rapidjson::Value& join_ra,
2894  RelAlgDag& root_dag) {
2895  const auto inputs = getRelAlgInputs(join_ra);
2896  CHECK_EQ(size_t(2), inputs.size());
2897  const auto join_type = to_join_type(json_str(field(join_ra, "joinType")));
2898  auto filter_rex = parse_scalar_expr(field(join_ra, "condition"), cat_, root_dag);
2899  if (join_ra.HasMember("hints")) {
2900  auto join_node =
2901  std::make_shared<RelJoin>(inputs[0], inputs[1], filter_rex, join_type);
2902  getRelAlgHints(join_ra, join_node);
2903  return join_node;
2904  }
2905  return std::make_shared<RelJoin>(inputs[0], inputs[1], filter_rex, join_type);
2906  }
2907 
2908  std::shared_ptr<RelSort> dispatchSort(const rapidjson::Value& sort_ra) {
2909  const auto inputs = getRelAlgInputs(sort_ra);
2910  CHECK_EQ(size_t(1), inputs.size());
2911  std::vector<SortField> collation;
2912  const auto& collation_arr = field(sort_ra, "collation");
2913  CHECK(collation_arr.IsArray());
2914  for (auto collation_arr_it = collation_arr.Begin();
2915  collation_arr_it != collation_arr.End();
2916  ++collation_arr_it) {
2917  const size_t field_idx = json_i64(field(*collation_arr_it, "field"));
2918  const auto sort_dir = parse_sort_direction(*collation_arr_it);
2919  const auto null_pos = parse_nulls_position(*collation_arr_it);
2920  collation.emplace_back(field_idx, sort_dir, null_pos);
2921  }
2922  auto limit = get_int_literal_field(sort_ra, "fetch", -1);
2923  const auto offset = get_int_literal_field(sort_ra, "offset", 0);
2924  auto ret = std::make_shared<RelSort>(
2925  collation, limit > 0 ? limit : 0, offset, inputs.front(), limit > 0);
2926  ret->setEmptyResult(limit == 0);
2927  return ret;
2928  }
2929 
2930  std::shared_ptr<RelModify> dispatchModify(const rapidjson::Value& logical_modify_ra) {
2931  const auto inputs = getRelAlgInputs(logical_modify_ra);
2932  CHECK_EQ(size_t(1), inputs.size());
2933 
2934  const auto table_descriptor = getTableFromScanNode(cat_, logical_modify_ra);
2935  if (table_descriptor->isView) {
2936  throw std::runtime_error("UPDATE of a view is unsupported.");
2937  }
2938 
2939  bool flattened = json_bool(field(logical_modify_ra, "flattened"));
2940  std::string op = json_str(field(logical_modify_ra, "operation"));
2941  RelModify::TargetColumnList target_column_list;
2942 
2943  if (op == "UPDATE") {
2944  const auto& update_columns = field(logical_modify_ra, "updateColumnList");
2945  CHECK(update_columns.IsArray());
2946 
2947  for (auto column_arr_it = update_columns.Begin();
2948  column_arr_it != update_columns.End();
2949  ++column_arr_it) {
2950  target_column_list.push_back(column_arr_it->GetString());
2951  }
2952  }
2953 
2954  auto modify_node = std::make_shared<RelModify>(
2955  cat_, table_descriptor, flattened, op, target_column_list, inputs[0]);
2956  switch (modify_node->getOperation()) {
2958  modify_node->applyDeleteModificationsToInputNode();
2959  break;
2960  }
2962  modify_node->applyUpdateModificationsToInputNode();
2963  break;
2964  }
2965  default:
2966  throw std::runtime_error("Unsupported RelModify operation: " +
2967  json_node_to_string(logical_modify_ra));
2968  }
2969 
2970  return modify_node;
2971  }
2972 
2973  std::shared_ptr<RelTableFunction> dispatchTableFunction(
2974  const rapidjson::Value& table_func_ra,
2975  RelAlgDag& root_dag) {
2976  const auto inputs = getRelAlgInputs(table_func_ra);
2977  const auto& invocation = field(table_func_ra, "invocation");
2978  CHECK(invocation.IsObject());
2979 
2980  const auto& operands = field(invocation, "operands");
2981  CHECK(operands.IsArray());
2982  CHECK_GE(operands.Size(), unsigned(0));
2983 
2984  std::vector<const Rex*> col_inputs;
2985  std::vector<std::unique_ptr<const RexScalar>> table_func_inputs;
2986  std::vector<std::string> fields;
2987 
2988  for (auto exprs_json_it = operands.Begin(); exprs_json_it != operands.End();
2989  ++exprs_json_it) {
2990  const auto& expr_json = *exprs_json_it;
2991  CHECK(expr_json.IsObject());
2992  if (expr_json.HasMember("op")) {
2993  const auto op_str = json_str(field(expr_json, "op"));
2994  if (op_str == "CAST" && expr_json.HasMember("type")) {
2995  const auto& expr_type = field(expr_json, "type");
2996  CHECK(expr_type.IsObject());
2997  CHECK(expr_type.HasMember("type"));
2998  const auto& expr_type_name = json_str(field(expr_type, "type"));
2999  if (expr_type_name == "CURSOR") {
3000  CHECK(expr_json.HasMember("operands"));
3001  const auto& expr_operands = field(expr_json, "operands");
3002  CHECK(expr_operands.IsArray());
3003  if (expr_operands.Size() != 1) {
3004  throw std::runtime_error(
3005  "Table functions currently only support one ResultSet input");
3006  }
3007  auto pos = field(expr_operands[0], "input").GetInt();
3008  CHECK_LT(pos, inputs.size());
3009  for (size_t i = inputs[pos]->size(); i > 0; i--) {
3010  table_func_inputs.emplace_back(
3011  std::make_unique<RexAbstractInput>(col_inputs.size()));
3012  col_inputs.emplace_back(table_func_inputs.back().get());
3013  }
3014  continue;
3015  }
3016  }
3017  }
3018  table_func_inputs.emplace_back(parse_scalar_expr(*exprs_json_it, cat_, root_dag));
3019  }
3020 
3021  const auto& op_name = field(invocation, "op");
3022  CHECK(op_name.IsString());
3023 
3024  std::vector<std::unique_ptr<const RexScalar>> table_function_projected_outputs;
3025  const auto& row_types = field(table_func_ra, "rowType");
3026  CHECK(row_types.IsArray());
3027  CHECK_GE(row_types.Size(), unsigned(0));
3028  const auto& row_types_array = row_types.GetArray();
3029  for (size_t i = 0; i < row_types_array.Size(); i++) {
3030  // We don't care about the type information in rowType -- replace each output with
3031  // a reference to be resolved later in the translator
3032  table_function_projected_outputs.emplace_back(std::make_unique<RexRef>(i));
3033  fields.emplace_back("");
3034  }
3035  return std::make_shared<RelTableFunction>(op_name.GetString(),
3036  inputs,
3037  fields,
3038  col_inputs,
3039  table_func_inputs,
3040  table_function_projected_outputs);
3041  }
3042 
3043  std::shared_ptr<RelLogicalValues> dispatchLogicalValues(
3044  const rapidjson::Value& logical_values_ra) {
3045  const auto& tuple_type_arr = field(logical_values_ra, "type");
3046  CHECK(tuple_type_arr.IsArray());
3047  std::vector<TargetMetaInfo> tuple_type;
3048  for (auto tuple_type_arr_it = tuple_type_arr.Begin();
3049  tuple_type_arr_it != tuple_type_arr.End();
3050  ++tuple_type_arr_it) {
3051  const auto component_type = parse_type(*tuple_type_arr_it);
3052  const auto component_name = json_str(field(*tuple_type_arr_it, "name"));
3053  tuple_type.emplace_back(component_name, component_type);
3054  }
3055  const auto& inputs_arr = field(logical_values_ra, "inputs");
3056  CHECK(inputs_arr.IsArray());
3057  const auto& tuples_arr = field(logical_values_ra, "tuples");
3058  CHECK(tuples_arr.IsArray());
3059 
3060  if (inputs_arr.Size()) {
3061  throw QueryNotSupported("Inputs not supported in logical values yet.");
3062  }
3063 
3064  std::vector<RelLogicalValues::RowValues> values;
3065  if (tuples_arr.Size()) {
3066  for (const auto& row : tuples_arr.GetArray()) {
3067  CHECK(row.IsArray());
3068  const auto values_json = row.GetArray();
3069  if (!values.empty()) {
3070  CHECK_EQ(values[0].size(), values_json.Size());
3071  }
3072  values.emplace_back(RelLogicalValues::RowValues{});
3073  for (const auto& value : values_json) {
3074  CHECK(value.IsObject());
3075  CHECK(value.HasMember("literal"));
3076  values.back().emplace_back(parse_literal(value));
3077  }
3078  }
3079  }
3080 
3081  return std::make_shared<RelLogicalValues>(tuple_type, values);
3082  }
3083 
3084  std::shared_ptr<RelLogicalUnion> dispatchUnion(
3085  const rapidjson::Value& logical_union_ra) {
3086  auto inputs = getRelAlgInputs(logical_union_ra);
3087  auto const& all_type_bool = field(logical_union_ra, "all");
3088  CHECK(all_type_bool.IsBool());
3089  return std::make_shared<RelLogicalUnion>(std::move(inputs), all_type_bool.GetBool());
3090  }
3091 
3092  RelAlgInputs getRelAlgInputs(const rapidjson::Value& node) {
3093  if (node.HasMember("inputs")) {
3094  const auto str_input_ids = strings_from_json_array(field(node, "inputs"));
3095  RelAlgInputs ra_inputs;
3096  for (const auto& str_id : str_input_ids) {
3097  ra_inputs.push_back(nodes_[std::stoi(str_id)]);
3098  }
3099  return ra_inputs;
3100  }
3101  return {prev(node)};
3102  }
3103 
3104  std::pair<std::string, std::string> getKVOptionPair(std::string& str, size_t& pos) {
3105  auto option = str.substr(0, pos);
3106  std::string delim = "=";
3107  size_t delim_pos = option.find(delim);
3108  auto key = option.substr(0, delim_pos);
3109  auto val = option.substr(delim_pos + 1, option.length());
3110  str.erase(0, pos + delim.length() + 1);
3111  return {key, val};
3112  }
3113 
3114  ExplainedQueryHint parseHintString(std::string& hint_string) {
3115  std::string white_space_delim = " ";
3116  int l = hint_string.length();
3117  hint_string = hint_string.erase(0, 1).substr(0, l - 2);
3118  size_t pos = 0;
3119  auto global_hint_checker = [&](const std::string& input_hint_name) -> HintIdentifier {
3120  bool global_hint = false;
3121  std::string hint_name = input_hint_name;
3122  auto global_hint_identifier = hint_name.substr(0, 2);
3123  if (global_hint_identifier.compare("g_") == 0) {
3124  global_hint = true;
3125  hint_name = hint_name.substr(2, hint_string.length());
3126  }
3127  return {global_hint, hint_name};
3128  };
3129  auto parsed_hint =
3130  global_hint_checker(hint_string.substr(0, hint_string.find(white_space_delim)));
3131  auto hint_type = RegisteredQueryHint::translateQueryHint(parsed_hint.hint_name);
3132  if ((pos = hint_string.find("options:")) != std::string::npos) {
3133  // need to parse hint options
3134  std::vector<std::string> tokens;
3135  bool kv_list_op = false;
3136  std::string raw_options = hint_string.substr(pos + 8, hint_string.length() - 2);
3137  if (raw_options.find('{') != std::string::npos) {
3138  kv_list_op = true;
3139  } else {
3140  CHECK(raw_options.find('[') != std::string::npos);
3141  }
3142  auto t1 = raw_options.erase(0, 1);
3143  raw_options = t1.substr(0, t1.length() - 1);
3144  std::string op_delim = ", ";
3145  if (kv_list_op) {
3146  // kv options
3147  std::unordered_map<std::string, std::string> kv_options;
3148  while ((pos = raw_options.find(op_delim)) != std::string::npos) {
3149  auto kv_pair = getKVOptionPair(raw_options, pos);
3150  kv_options.emplace(kv_pair.first, kv_pair.second);
3151  }
3152  // handle the last kv pair
3153  auto kv_pair = getKVOptionPair(raw_options, pos);
3154  kv_options.emplace(kv_pair.first, kv_pair.second);
3155  return {hint_type, parsed_hint.global_hint, false, true, kv_options};
3156  } else {
3157  std::vector<std::string> list_options;
3158  while ((pos = raw_options.find(op_delim)) != std::string::npos) {
3159  list_options.emplace_back(raw_options.substr(0, pos));
3160  raw_options.erase(0, pos + white_space_delim.length() + 1);
3161  }
3162  // handle the last option
3163  list_options.emplace_back(raw_options.substr(0, pos));
3164  return {hint_type, parsed_hint.global_hint, false, false, list_options};
3165  }
3166  } else {
3167  // marker hint: no extra option for this hint
3168  return {hint_type, parsed_hint.global_hint, true, false};
3169  }
3170  }
3171 
3172  void getRelAlgHints(const rapidjson::Value& json_node,
3173  std::shared_ptr<RelAlgNode> node) {
3174  std::string hint_explained = json_str(field(json_node, "hints"));
3175  size_t pos = 0;
3176  std::string delim = "|";
3177  std::vector<std::string> hint_list;
3178  while ((pos = hint_explained.find(delim)) != std::string::npos) {
3179  hint_list.emplace_back(hint_explained.substr(0, pos));
3180  hint_explained.erase(0, pos + delim.length());
3181  }
3182  // handling the last one
3183  hint_list.emplace_back(hint_explained.substr(0, pos));
3184 
3185  const auto agg_node = std::dynamic_pointer_cast<RelAggregate>(node);
3186  if (agg_node) {
3187  for (std::string& hint : hint_list) {
3188  auto parsed_hint = parseHintString(hint);
3189  agg_node->addHint(parsed_hint);
3190  }
3191  }
3192  const auto project_node = std::dynamic_pointer_cast<RelProject>(node);
3193  if (project_node) {
3194  for (std::string& hint : hint_list) {
3195  auto parsed_hint = parseHintString(hint);
3196  project_node->addHint(parsed_hint);
3197  }
3198  }
3199  const auto scan_node = std::dynamic_pointer_cast<RelScan>(node);
3200  if (scan_node) {
3201  for (std::string& hint : hint_list) {
3202  auto parsed_hint = parseHintString(hint);
3203  scan_node->addHint(parsed_hint);
3204  }
3205  }
3206  const auto join_node = std::dynamic_pointer_cast<RelJoin>(node);
3207  if (join_node) {
3208  for (std::string& hint : hint_list) {
3209  auto parsed_hint = parseHintString(hint);
3210  join_node->addHint(parsed_hint);
3211  }
3212  }
3213 
3214  const auto compound_node = std::dynamic_pointer_cast<RelCompound>(node);
3215  if (compound_node) {
3216  for (std::string& hint : hint_list) {
3217  auto parsed_hint = parseHintString(hint);
3218  compound_node->addHint(parsed_hint);
3219  }
3220  }
3221  }
3222 
3223  std::shared_ptr<const RelAlgNode> prev(const rapidjson::Value& crt_node) {
3224  const auto id = node_id(crt_node);
3225  CHECK(id);
3226  CHECK_EQ(static_cast<size_t>(id), nodes_.size());
3227  return nodes_.back();
3228  }
3229 
3231  std::vector<std::shared_ptr<RelAlgNode>> nodes_;
3232 };
3233 
3234 } // namespace details
3235 
3236 std::unique_ptr<RelAlgDag> RelAlgDagBuilder::buildDag(
3237  const std::string& query_ra,
3239  const bool optimize_dag) {
3240  rapidjson::Document query_ast;
3241  query_ast.Parse(query_ra.c_str());
3242  VLOG(2) << "Parsing query RA JSON: " << query_ra;
3243  if (query_ast.HasParseError()) {
3244  query_ast.GetParseError();
3245  LOG(ERROR) << "Failed to parse RA tree from Calcite (offset "
3246  << query_ast.GetErrorOffset() << "):\n"
3247  << rapidjson::GetParseError_En(query_ast.GetParseError());
3248  VLOG(1) << "Failed to parse query RA: " << query_ra;
3249  throw std::runtime_error(
3250  "Failed to parse relational algebra tree. Possible query syntax error.");
3251  }
3252  CHECK(query_ast.IsObject());
3254 
3255  return build(query_ast, cat, nullptr, optimize_dag);
3256 }
3257 
3258 std::unique_ptr<RelAlgDag> RelAlgDagBuilder::buildDagForSubquery(
3259  RelAlgDag& root_dag,
3260  const rapidjson::Value& query_ast,
3262  return build(query_ast, cat, &root_dag, true);
3263 }
3264 
3265 std::unique_ptr<RelAlgDag> RelAlgDagBuilder::build(const rapidjson::Value& query_ast,
3267  RelAlgDag* root_dag,
3268  const bool optimize_dag) {
3269  const auto& rels = field(query_ast, "rels");
3270  CHECK(rels.IsArray());
3271 
3272  auto rel_alg_dag_ptr = std::make_unique<RelAlgDag>();
3273  auto& rel_alg_dag = *rel_alg_dag_ptr;
3274  auto& nodes = getNodes(rel_alg_dag);
3275 
3276  try {
3277  nodes = details::RelAlgDispatcher(cat).run(rels, root_dag ? *root_dag : rel_alg_dag);
3278  } catch (const QueryNotSupported&) {
3279  throw;
3280  }
3281  CHECK(!nodes.empty());
3282  bind_inputs(nodes);
3283 
3285 
3286  if (optimize_dag) {
3287  optimizeDag(rel_alg_dag);
3288  }
3289 
3290  return rel_alg_dag_ptr;
3291 }
3292 
3294  auto const build_state = rel_alg_dag.getBuildState();
3295  if (build_state == RelAlgDag::BuildState::kBuiltOptimized) {
3296  return;
3297  }
3298 
3300  << static_cast<int>(build_state);
3301 
3302  auto& nodes = getNodes(rel_alg_dag);
3303  auto& subqueries = getSubqueries(rel_alg_dag);
3304  auto& query_hints = getQueryHints(rel_alg_dag);
3305 
3306  compute_node_hash(nodes);
3307  handle_query_hint(nodes, rel_alg_dag);
3308  mark_nops(nodes);
3309  simplify_sort(nodes);
3311  eliminate_identical_copy(nodes);
3312  fold_filters(nodes);
3313  std::vector<const RelAlgNode*> filtered_left_deep_joins;
3314  std::vector<const RelAlgNode*> left_deep_joins;
3315  for (const auto& node : nodes) {
3316  const auto left_deep_join_root = get_left_deep_join_root(node);
3317  // The filter which starts a left-deep join pattern must not be coalesced
3318  // since it contains (part of) the join condition.
3319  if (left_deep_join_root) {
3320  left_deep_joins.push_back(left_deep_join_root.get());
3321  if (std::dynamic_pointer_cast<const RelFilter>(left_deep_join_root)) {
3322  filtered_left_deep_joins.push_back(left_deep_join_root.get());
3323  }
3324  }
3325  }
3326  if (filtered_left_deep_joins.empty()) {
3328  }
3329  eliminate_dead_columns(nodes);
3330  eliminate_dead_subqueries(subqueries, nodes.back().get());
3331  separate_window_function_expressions(nodes, query_hints);
3333  nodes,
3334  g_cluster /* always_add_project_if_first_project_is_window_expr */,
3335  query_hints);
3336  coalesce_nodes(nodes, left_deep_joins, query_hints);
3337  CHECK(nodes.back().use_count() == 1);
3338  create_left_deep_join(nodes);
3339 
3341 }
3342 
3343 void RelAlgDag::eachNode(std::function<void(RelAlgNode const*)> const& callback) const {
3344  for (auto const& node : nodes_) {
3345  if (node) {
3346  callback(node.get());
3347  }
3348  }
3349 }
3350 
3352  for (auto& node : nodes_) {
3353  if (node) {
3354  node->resetQueryExecutionState();
3355  }
3356  }
3357 }
3358 
3359 // Return tree with depth represented by indentations.
3360 std::string tree_string(const RelAlgNode* ra, const size_t depth) {
3361  std::string result = std::string(2 * depth, ' ') + ::toString(ra) + '\n';
3362  for (size_t i = 0; i < ra->inputCount(); ++i) {
3363  result += tree_string(ra->getInput(i), depth + 1);
3364  }
3365  return result;
3366 }
3367 
3368 std::string RexSubQuery::toString(RelRexToStringConfig config) const {
3369  return cat(::typeName(this), "(", ra_->toString(config), ")");
3370 }
3371 
3372 size_t RexSubQuery::toHash() const {
3373  if (!hash_) {
3374  hash_ = typeid(RexSubQuery).hash_code();
3375  boost::hash_combine(*hash_, ra_->toHash());
3376  }
3377  return *hash_;
3378 }
3379 
3380 std::string RexInput::toString(RelRexToStringConfig config) const {
3381  const auto scan_node = dynamic_cast<const RelScan*>(node_);
3382  if (scan_node) {
3383  auto field_name = scan_node->getFieldName(getIndex());
3384  auto table_name = scan_node->getTableDescriptor()->tableName;
3385  return ::typeName(this) + "(" + table_name + "." + field_name + ")";
3386  }
3387  auto node_id_in_plan = node_->getIdInPlanTree();
3388  auto node_id_str =
3389  node_id_in_plan ? std::to_string(*node_id_in_plan) : std::to_string(node_->getId());
3390  auto node_str = config.skip_input_nodes ? "(input_node_id=" + node_id_str
3391  : "(input_node=" + node_->toString(config);
3392  return cat(::typeName(this), node_str, ", in_index=", std::to_string(getIndex()), ")");
3393 }
3394 
3395 size_t RexInput::toHash() const {
3396  if (!hash_) {
3397  hash_ = typeid(RexInput).hash_code();
3398  boost::hash_combine(*hash_, node_->toHash());
3399  boost::hash_combine(*hash_, getIndex());
3400  }
3401  return *hash_;
3402 }
3403 
3404 std::string RelCompound::toString(RelRexToStringConfig config) const {
3405  auto ret = cat(::typeName(this),
3406  ", filter_expr=",
3407  (filter_expr_ ? filter_expr_->toString(config) : "null"),
3408  ", target_exprs=");
3409  for (auto& expr : target_exprs_) {
3410  ret += expr->toString(config) + " ";
3411  }
3412  ret += ", agg_exps=";
3413  for (auto& expr : agg_exprs_) {
3414  ret += expr->toString(config) + " ";
3415  }
3416  ret += ", scalar_sources=";
3417  for (auto& expr : scalar_sources_) {
3418  ret += expr->toString(config) + " ";
3419  }
3420  return cat(ret,
3421  ", ",
3423  ", ",
3424  ", fields=",
3425  ::toString(fields_),
3426  ", is_agg=",
3428 }
3429 
3430 size_t RelCompound::toHash() const {
3431  if (!hash_) {
3432  hash_ = typeid(RelCompound).hash_code();
3433  boost::hash_combine(*hash_, filter_expr_ ? filter_expr_->toHash() : HASH_N);
3434  boost::hash_combine(*hash_, is_agg_);
3435  for (auto& target_expr : target_exprs_) {
3436  if (auto rex_scalar = dynamic_cast<const RexScalar*>(target_expr)) {
3437  boost::hash_combine(*hash_, rex_scalar->toHash());
3438  }
3439  }
3440  for (auto& agg_expr : agg_exprs_) {
3441  boost::hash_combine(*hash_, agg_expr->toHash());
3442  }
3443  for (auto& scalar_source : scalar_sources_) {
3444  boost::hash_combine(*hash_, scalar_source->toHash());
3445  }
3446  boost::hash_combine(*hash_, groupby_count_);
3447  boost::hash_combine(*hash_, ::toString(fields_));
3448  }
3449  return *hash_;
3450 }
std::vector< std::shared_ptr< const RexScalar > > scalar_exprs_
Definition: RelAlgDag.h:2458
DEVICE auto upper_bound(ARGS &&...args)
Definition: gpu_enabled.h:123
const size_t getGroupByCount() const
Definition: RelAlgDag.h:1324
SQLTypes to_sql_type(const std::string &type_name)
void setGlobalQueryHints(const RegisteredQueryHint &global_hints)
Definition: RelAlgDag.h:2949
std::optional< size_t > is_collected_window_function(size_t rex_hash) const
Definition: RelAlgDag.cpp:2297
NullSortedPosition parse_nulls_position(const rapidjson::Value &collation)
Definition: RelAlgDag.cpp:1179
bool is_agg(const Analyzer::Expr *expr)
std::unique_ptr< const RexScalar > condition_
Definition: RelAlgDag.h:1532
std::unique_ptr< const RexOperator > disambiguate_operator(const RexOperator *rex_operator, const RANodeOutput &ra_output) noexcept
Definition: RelAlgDag.cpp:1397
const RexScalar * getThen(const size_t idx) const
Definition: RelAlgDag.h:443
std::shared_ptr< RelAggregate > dispatchAggregate(const rapidjson::Value &agg_ra)
Definition: RelAlgDag.cpp:2865
#define CHECK_EQ(x, y)
Definition: Logger.h:297
std::shared_ptr< RelFilter > dispatchFilter(const rapidjson::Value &filter_ra, RelAlgDag &root_dag)
Definition: RelAlgDag.cpp:2855
size_t groupby_count_
Definition: RelAlgDag.h:1895
void * visitInput(const RexInput *rex_input) const override
Definition: RelAlgDag.cpp:112
const Catalog_Namespace::Catalog & cat_
Definition: RelAlgDag.cpp:3230
RexRebindReindexInputsVisitor(const RelAlgNode *old_input, const RelAlgNode *new_input, std::unordered_map< unsigned, unsigned > old_to_new_index_map)
Definition: RelAlgDag.cpp:106
void set_notnulls(std::vector< TargetMetaInfo > *tmis0, std::vector< bool > const &notnulls)
Definition: RelAlgDag.cpp:896
std::unique_ptr< RexOperator > parse_operator(const rapidjson::Value &expr, const Catalog_Namespace::Catalog &cat, RelAlgDag &root_dag)
Definition: RelAlgDag.cpp:1246
void mark_nops(const std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
Definition: RelAlgDag.cpp:1600
std::unique_ptr< RexSubQuery > deepCopy() const
Definition: RelAlgDag.cpp:59
void replaceInput(std::shared_ptr< const RelAlgNode > old_input, std::shared_ptr< const RelAlgNode > input) override
Definition: RelAlgDag.h:1205
JoinType
Definition: sqldefs.h:164
static std::unordered_map< size_t, std::unordered_map< unsigned, RegisteredQueryHint > > & getQueryHints(RelAlgDag &rel_alg_dag)
Definition: RelAlgDag.h:2994
std::vector< std::unique_ptr< const RexScalar > > table_func_inputs_
Definition: RelAlgDag.h:2353
std::string cat(Ts &&...args)
std::optional< size_t > getOffsetForPushedDownExpr(WindowExprType type, size_t expr_offset) const
Definition: RelAlgDag.cpp:157
RexWindowFuncReplacementVisitor(std::unordered_set< size_t > &collected_window_func_hash, std::vector< std::unique_ptr< const RexScalar >> &new_rex_input_for_window_func, std::unordered_map< size_t, size_t > &window_func_to_new_rex_input_idx_map, RelProject *new_project, std::unordered_map< size_t, std::unique_ptr< const RexInput >> &new_rex_input_from_child_node)
Definition: RelAlgDag.cpp:2192
void hoist_filter_cond_to_cross_join(std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
std::vector< bool > get_notnulls(std::vector< TargetMetaInfo > const &tmis0)
Definition: RelAlgDag.cpp:882
class for a per-database catalog. also includes metadata for the current database and the current use...
Definition: Catalog.h:132
Definition: sqltypes.h:64
std::vector< std::unique_ptr< const RexScalar > > & scalar_exprs_for_new_project_
Definition: RelAlgDag.cpp:332
size_t size() const override
Definition: RelAlgDag.h:1156
void addHint(const ExplainedQueryHint &hint_explained)
Definition: RelAlgDag.h:1868
std::shared_ptr< const RelAlgNode > get_left_deep_join_root(const std::shared_ptr< RelAlgNode > &node)
void sink_projected_boolean_expr_to_join(std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
bool input_can_be_coalesced(const RelAlgNode *parent_node, const size_t index, const bool first_rex_is_input)
Definition: RelAlgDag.cpp:1899
std::string toString(RelRexToStringConfig config=RelRexToStringConfig::defaults()) const override
Definition: RelAlgDag.cpp:3404
void eliminate_identical_copy(std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
size_t toHash() const override
Definition: RelAlgDag.cpp:3395
RetType visitInput(const RexInput *rex_input) const final
Definition: RelAlgDag.cpp:2215
std::vector< RexInput > RANodeOutput
Definition: RelAlgDag.h:3044
std::unique_ptr< const RexCase > disambiguate_case(const RexCase *rex_case, const RANodeOutput &ra_output)
Definition: RelAlgDag.cpp:1432
const RexScalar * getElse() const
Definition: RelAlgDag.h:448
static thread_local unsigned crt_id_
Definition: RelAlgDag.h:961
std::unique_ptr< const RexScalar > visitOperator(const RexOperator *rex_operator) const override
Definition: RelAlgDag.cpp:297
SqlWindowFunctionKind parse_window_function_kind(const std::string &name)
Definition: RelAlgDag.cpp:1095
std::shared_ptr< RelScan > dispatchTableScan(const rapidjson::Value &scan_ra)
Definition: RelAlgDag.cpp:2820
std::pair< std::shared_ptr< RelLeftDeepInnerJoin >, std::shared_ptr< const RelAlgNode > > create_left_deep_join(const std::shared_ptr< RelAlgNode > &left_deep_join_root)
RexScalar const * copyAndRedirectSource(RexScalar const *, size_t input_idx) const
Definition: RelAlgDag.cpp:944
void replaceInput(std::shared_ptr< const RelAlgNode > old_input, std::shared_ptr< const RelAlgNode > input) override
Definition: RelAlgDag.cpp:685
std::unique_ptr< const RexSubQuery > parse_subquery(const rapidjson::Value &expr, const Catalog_Namespace::Catalog &cat, RelAlgDag &root_dag)
Definition: RelAlgDag.cpp:1218
SQLAgg to_agg_kind(const std::string &agg_name)
std::shared_ptr< RelLogicalUnion > dispatchUnion(const rapidjson::Value &logical_union_ra)
Definition: RelAlgDag.cpp:3084
#define LOG(tag)
Definition: Logger.h:283
std::vector< std::string > TargetColumnList
Definition: RelAlgDag.h:2022
const SQLTypeInfo & getType() const
Definition: RelAlgDag.h:284
std::unique_ptr< const RexScalar > get_new_rex_input(size_t rex_idx) const
Definition: RelAlgDag.cpp:2305
size_t size() const
Definition: RelAlgDag.h:270
Hints * getDeliveredHints()
Definition: RelAlgDag.h:1274
std::shared_ptr< RelProject > dispatchProject(const rapidjson::Value &proj_ra, RelAlgDag &root_dag)
Definition: RelAlgDag.cpp:2833
const bool json_bool(const rapidjson::Value &obj) noexcept
Definition: JsonAccessors.h:49
const RexScalar * getOperand(const size_t idx) const
Definition: RelAlgDag.h:272
std::vector< std::unique_ptr< const RexScalar > > parse_window_order_exprs(const rapidjson::Value &arr, const Catalog_Namespace::Catalog &cat, RelAlgDag &root_dag)
Definition: RelAlgDag.cpp:1162
std::vector< const Rex * > col_inputs_
Definition: RelAlgDag.h:2350
std::string json_node_to_string(const rapidjson::Value &node) noexcept
Definition: RelAlgDag.cpp:962
bool hasEquivCollationOf(const RelSort &that) const
Definition: RelAlgDag.cpp:802
JoinType to_join_type(const std::string &join_type_name)
Definition: RelAlgDag.cpp:1379
void resetQueryExecutionState()
Definition: RelAlgDag.cpp:3351
std::vector< SortField > parse_window_order_collation(const rapidjson::Value &arr, const Catalog_Namespace::Catalog &cat, RelAlgDag &root_dag)
Definition: RelAlgDag.cpp:1185
std::pair< bool, bool > need_pushdown_generic_expr(RelProject const *window_func_project_node)
Definition: RelAlgDag.cpp:2525
const std::string json_str(const rapidjson::Value &obj) noexcept
Definition: JsonAccessors.h:44
std::vector< std::shared_ptr< RelAlgNode > > nodes_
Definition: RelAlgDag.h:2961
std::string join(T const &container, std::string const &delim)
#define UNREACHABLE()
Definition: Logger.h:333
void handle_query_hint(const std::vector< std::shared_ptr< RelAlgNode >> &nodes, RelAlgDag &rel_alg_dag) noexcept
Definition: RelAlgDag.cpp:1548
bool hint_applied_
Definition: RelAlgDag.h:1534
DEVICE void sort(ARGS &&...args)
Definition: gpu_enabled.h:105
RexInput()
Definition: RelAlgDag.h:384
#define CHECK_GE(x, y)
Definition: Logger.h:302
std::optional< size_t > getIdInPlanTree() const
Definition: RelAlgDag.h:884
SortDirection
Definition: RelAlgDag.h:530
void registerQueryHint(const RelAlgNode *node, const RegisteredQueryHint &query_hint)
Definition: RelAlgDag.h:2907
std::vector< std::string > fields_
Definition: RelAlgDag.h:1294
void pushDownExpressionInWindowFunction(const RexWindowFunctionOperator *window_expr) const
Definition: RelAlgDag.cpp:178
void addHint(const ExplainedQueryHint &hint_explained)
Definition: RelAlgDag.h:1506
Definition: sqldefs.h:48
std::unique_ptr< const RexScalar > visitCase(const RexCase *rex_case) const override
Definition: RelAlgDag.cpp:278
const RexScalar * getWhen(const size_t idx) const
Definition: RelAlgDag.h:438
std::vector< size_t > indices_from_json_array(const rapidjson::Value &json_idx_arr) noexcept
Definition: RelAlgDag.cpp:1325
const RegisteredQueryHint & getGlobalHints() const
Definition: RelAlgDag.h:2947
void appendInput(std::string new_field_name, std::unique_ptr< const RexScalar > new_input)
Definition: RelAlgDag.cpp:364
void propagate_hints_to_new_project(std::shared_ptr< RelProject > prev_node, std::shared_ptr< RelProject > new_node, std::unordered_map< size_t, std::unordered_map< unsigned, RegisteredQueryHint >> &query_hints)
Definition: RelAlgDag.cpp:2325
bool isRenamedInput(const RelAlgNode *node, const size_t index, const std::string &new_name)
Definition: RelAlgDag.cpp:469
std::unique_ptr< const RexScalar > defaultResult() const override
Definition: RelAlgDag.cpp:329
void addHint(const ExplainedQueryHint &hint_explained)
Definition: RelAlgDag.h:1398
std::unique_ptr< const RexAgg > parse_aggregate_expr(const rapidjson::Value &expr)
Definition: RelAlgDag.cpp:1338
std::unordered_map< size_t, const RexScalar * > & collected_window_func_
Definition: RelAlgDag.cpp:2186
std::unique_ptr< const RexScalar > parse_scalar_expr(const rapidjson::Value &expr, const Catalog_Namespace::Catalog &cat, RelAlgDag &root_dag)
Definition: RelAlgDag.cpp:1355
std::vector< std::unique_ptr< const RexScalar > > scalar_sources_
Definition: RelAlgDag.h:1900
void * visitInput(const RexInput *rex_input) const override
Definition: RelAlgDag.cpp:76
std::unique_ptr< RexAbstractInput > parse_abstract_input(const rapidjson::Value &expr) noexcept
Definition: RelAlgDag.cpp:973
static std::vector< std::shared_ptr< RexSubQuery > > & getSubqueries(RelAlgDag &rel_alg_dag)
Definition: RelAlgDag.h:2988
RexInputSet aggregateResult(const RexInputSet &aggregate, const RexInputSet &next_result) const override
Definition: RelAlgDag.cpp:2492
std::unique_ptr< const RexScalar > disambiguate_rex(const RexScalar *, const RANodeOutput &)
Definition: RelAlgDag.cpp:1453
std::unique_ptr< const RexScalar > visitLiteral(const RexLiteral *rex_literal) const override
Definition: RelAlgDag.cpp:264
bool hint_applied_
Definition: RelAlgDag.h:1903
std::string to_string(char const *&&v)
void add_window_function_pre_project(std::vector< std::shared_ptr< RelAlgNode >> &nodes, const bool always_add_project_if_first_project_is_window_expr, std::unordered_map< size_t, std::unordered_map< unsigned, RegisteredQueryHint >> &query_hints)
Definition: RelAlgDag.cpp:2563
const std::string getFieldName(const size_t i) const
Definition: RelAlgDag.h:990
std::unique_ptr< const RexScalar > visitSubQuery(const RexSubQuery *rex_subquery) const override
Definition: RelAlgDag.cpp:273
std::unique_ptr< RexCase > parse_case(const rapidjson::Value &expr, const Catalog_Namespace::Catalog &cat, RelAlgDag &root_dag)
Definition: RelAlgDag.cpp:1291
void simplify_sort(std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
std::vector< SortField > collation_
Definition: RelAlgDag.h:2007
constexpr double a
Definition: Utm.h:32
std::shared_ptr< RelJoin > dispatchJoin(const rapidjson::Value &join_ra, RelAlgDag &root_dag)
Definition: RelAlgDag.cpp:2893
RelLogicalValues()=default
std::vector< std::unique_ptr< const RexScalar > > & new_rex_input_for_window_func_
Definition: RelAlgDag.cpp:2316
std::unordered_set< RexInput > RexInputSet
Definition: RelAlgDag.cpp:2483
This file contains the class specification and related data structures for Catalog.
virtual T visit(const RexScalar *rex_scalar) const
Definition: RexVisitor.h:27
std::string to_string() const
Definition: sqltypes.h:544
const rapidjson::Value & field(const rapidjson::Value &obj, const char field[]) noexcept
Definition: JsonAccessors.h:31
virtual std::string toString(RelRexToStringConfig config) const =0
unsigned getIndex() const
Definition: RelAlgDag.h:77
void separate_window_function_expressions(std::vector< std::shared_ptr< RelAlgNode >> &nodes, std::unordered_map< size_t, std::unordered_map< unsigned, RegisteredQueryHint >> &query_hints)
Definition: RelAlgDag.cpp:2369
void markAsNop()
Definition: RelAlgDag.h:932
bool aggregateResult(const bool &aggregate, const bool &next_result) const final
Definition: RelAlgDag.cpp:1936
static auto const HASH_N
Definition: RelAlgDag.h:44
SQLOps getOperator() const
Definition: RelAlgDag.h:282
void setExecutionResult(const ExecutionResultShPtr result)
Definition: RelAlgDag.cpp:50
std::shared_ptr< RelTableFunction > dispatchTableFunction(const rapidjson::Value &table_func_ra, RelAlgDag &root_dag)
Definition: RelAlgDag.cpp:2973
std::unordered_map< size_t, std::unique_ptr< const RexInput > > & new_rex_input_from_child_node_
Definition: RelAlgDag.cpp:2321
unsigned getId() const
Definition: RelAlgDag.h:880
std::set< std::pair< const RelAlgNode *, int > > get_equiv_cols(const RelAlgNode *node, const size_t which_col)
Definition: RelAlgDag.cpp:763
std::unique_ptr< const RexScalar > visitInput(const RexInput *rex_input) const override
Definition: RelAlgDag.cpp:252
static QueryHint translateQueryHint(const std::string &hint_name)
Definition: QueryHint.h:332
DEVICE auto copy(ARGS &&...args)
Definition: gpu_enabled.h:51
#define CHECK_NE(x, y)
Definition: Logger.h:298
bool isRenaming() const
Definition: RelAlgDag.cpp:512
void setIndex(const unsigned in_index) const
Definition: RelAlgDag.h:79
Hints * getDeliveredHints()
Definition: RelAlgDag.h:1421
size_t toHash() const override
Definition: RelAlgDag.cpp:854
void coalesce_nodes(std::vector< std::shared_ptr< RelAlgNode >> &nodes, const std::vector< const RelAlgNode * > &left_deep_joins, std::unordered_map< size_t, std::unordered_map< unsigned, RegisteredQueryHint >> &query_hints)
Definition: RelAlgDag.cpp:1999
std::vector< std::string > fields_
Definition: RelAlgDag.h:1897
static std::unique_ptr< RelAlgDag > build(const rapidjson::Value &query_ast, const Catalog_Namespace::Catalog &cat, RelAlgDag *root_dag, const bool optimize_dag)
Definition: RelAlgDag.cpp:3265
SQLOps to_sql_op(const std::string &op_str)
std::unique_ptr< Hints > hints_
Definition: RelAlgDag.h:1428
void set_scale(int s)
Definition: sqltypes.h:495
const int64_t json_i64(const rapidjson::Value &obj) noexcept
Definition: JsonAccessors.h:39
std::unique_ptr< Hints > hints_
Definition: RelAlgDag.h:1535
std::vector< std::unique_ptr< const RexScalar > > copyRexScalars(std::vector< std::unique_ptr< const RexScalar >> const &scalar_sources)
Definition: RelAlgDag.cpp:626
std::vector< std::shared_ptr< const RelAlgNode >> RelAlgInputs
Definition: RelAlgDag.h:316
std::vector< std::unique_ptr< const RexScalar > > scalar_exprs_
Definition: RelAlgDag.h:1293
RetType visitOperator(const RexOperator *rex_operator) const final
Definition: RelAlgDag.cpp:2239
const double json_double(const rapidjson::Value &obj) noexcept
Definition: JsonAccessors.h:54
void addHint(const ExplainedQueryHint &hint_explained)
Definition: RelAlgDag.h:1251
size_t branchCount() const
Definition: RelAlgDag.h:436
const RelAlgNode * getInput(const size_t idx) const
Definition: RelAlgDag.h:892
SQLTypeInfo parse_type(const rapidjson::Value &type_obj)
Definition: RelAlgDag.cpp:1065
Checked json field retrieval.
void * visitCase(const RexCase *rex_case) const final
Definition: RelAlgDag.cpp:2140
std::vector< std::shared_ptr< RelAlgNode > > nodes_
Definition: RelAlgDag.cpp:3231
std::unique_ptr< const RexScalar > filter_
Definition: RelAlgDag.h:1727
bool isSimple() const
Definition: RelAlgDag.h:1143
std::vector< const Rex * > remapTargetPointers(std::vector< std::unique_ptr< const RexAgg >> const &agg_exprs_new, std::vector< std::unique_ptr< const RexScalar >> const &scalar_sources_new, std::vector< std::unique_ptr< const RexAgg >> const &agg_exprs_old, std::vector< std::unique_ptr< const RexScalar >> const &scalar_sources_old, std::vector< const Rex * > const &target_exprs_old)
Definition: RelAlgDag.cpp:637
std::optional< size_t > hash_
Definition: RelAlgDag.h:62
std::string toString(const ExecutorDeviceType &device_type)
std::vector< const Rex * > target_exprs_
Definition: RelAlgDag.h:1902
void bind_inputs(const std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
Definition: RelAlgDag.cpp:1507
std::optional< size_t > hash_
Definition: RelAlgDag.h:955
unsigned getId() const
Definition: RelAlgDag.cpp:63
const RelAlgNode * node_
Definition: RelAlgDag.h:410
virtual void replaceInput(std::shared_ptr< const RelAlgNode > old_input, std::shared_ptr< const RelAlgNode > input)
Definition: RelAlgDag.h:915
bool find_generic_expr_in_window_func(RexWindowFunctionOperator const *window_expr, bool &has_generic_expr_in_window_func)
Definition: RelAlgDag.cpp:2501
void bind_project_to_input(RelProject *project_node, const RANodeOutput &input) noexcept
Definition: RelAlgDag.cpp:1479
RexInputSet visitInput(const RexInput *input) const override
Definition: RelAlgDag.cpp:2487
std::vector< TargetMetaInfo > getCompatibleMetainfoTypes() const
Definition: RelAlgDag.cpp:911
std::vector< std::unique_ptr< const RexScalar > > parse_expr_array(const rapidjson::Value &arr, const Catalog_Namespace::Catalog &cat, RelAlgDag &root_dag)
Definition: RelAlgDag.cpp:1084
std::string tree_string(const RelAlgNode *ra, const size_t depth)
Definition: RelAlgDag.cpp:3360
std::vector< std::unique_ptr< const RexAgg > > agg_exprs_
Definition: RelAlgDag.h:1425
void compute_node_hash(const std::vector< std::shared_ptr< RelAlgNode >> &nodes)
Definition: RelAlgDag.cpp:1586
Hints * getDeliveredHints()
Definition: RelAlgDag.h:1891
void setTableFuncInputs(std::vector< std::unique_ptr< const RexScalar >> &&exprs)
Definition: RelAlgDag.cpp:729
void replaceInput(std::shared_ptr< const RelAlgNode > old_input, std::shared_ptr< const RelAlgNode > input) override
Definition: RelAlgDag.cpp:536
size_t toHash() const override
Definition: RelAlgDag.h:460
PushDownGenericExpressionInWindowFunction(std::shared_ptr< RelProject > new_project, std::vector< std::unique_ptr< const RexScalar >> &scalar_exprs_for_new_project, std::vector< std::string > &fields_for_new_project, std::unordered_map< size_t, size_t > &expr_offset_cache)
Definition: RelAlgDag.cpp:128
const RexScalar * getProjectAt(const size_t idx) const
Definition: RelAlgDag.h:1186
bool hint_applied_
Definition: RelAlgDag.h:1427
static std::unique_ptr< RelAlgDag > buildDag(const std::string &query_ra, const Catalog_Namespace::Catalog &cat, const bool optimize_dag)
Definition: RelAlgDag.cpp:3236
#define CHECK_LT(x, y)
Definition: Logger.h:299
Definition: sqltypes.h:67
Definition: sqltypes.h:68
static RegisteredQueryHint defaults()
Definition: QueryHint.h:329
int32_t countRexLiteralArgs() const
Definition: RelAlgDag.cpp:697
std::unique_ptr< Hints > hints_
Definition: RelAlgDag.h:1904
std::vector< const Rex * > reproject_targets(const RelProject *simple_project, const std::vector< const Rex * > &target_exprs) noexcept
Definition: RelAlgDag.cpp:1617
const ConstRexScalarPtrVector & getPartitionKeys() const
Definition: RelAlgDag.h:627
std::vector< std::shared_ptr< RelAlgNode > > run(const rapidjson::Value &rels, RelAlgDag &root_dag)
Definition: RelAlgDag.cpp:2780
DEVICE auto lower_bound(ARGS &&...args)
Definition: gpu_enabled.h:78
#define CHECK_LE(x, y)
Definition: Logger.h:300
const std::unordered_map< unsigned, unsigned > mapping_
Definition: RelAlgDag.cpp:121
std::unique_ptr< Hints > hints_
Definition: RelAlgDag.h:1296
int64_t get_int_literal_field(const rapidjson::Value &obj, const char field[], const int64_t default_val) noexcept
Definition: RelAlgDag.cpp:2730
void replaceInput(std::shared_ptr< const RelAlgNode > old_input, std::shared_ptr< const RelAlgNode > input) override
Definition: RelAlgDag.cpp:543
void registerSubquery(std::shared_ptr< RexSubQuery > subquery)
Definition: RelAlgDag.h:2505
std::vector< std::unique_ptr< const RexAgg > > agg_exprs_
Definition: RelAlgDag.h:1896
std::unique_ptr< const RexScalar > filter_expr_
Definition: RelAlgDag.h:1894
static std::vector< std::shared_ptr< RelAlgNode > > & getNodes(RelAlgDag &rel_alg_dag)
Definition: RelAlgDag.h:2984
void setSourceNode(const RelAlgNode *node) const
Definition: RelAlgDag.h:394
bool hasWindowFunctionExpr() const
Definition: RelAlgDag.cpp:2766
std::shared_ptr< RelModify > dispatchModify(const rapidjson::Value &logical_modify_ra)
Definition: RelAlgDag.cpp:2930
std::vector< ElementType >::const_iterator Super
Definition: RelAlgDag.cpp:1818
RelFilter()=default
std::vector< std::unique_ptr< const RexAgg > > copyAggExprs(std::vector< std::unique_ptr< const RexAgg >> const &agg_exprs)
Definition: RelAlgDag.cpp:616
std::unique_ptr< RexLiteral > parse_literal(const rapidjson::Value &expr)
Definition: RelAlgDag.cpp:979
std::vector< std::string > strings_from_json_array(const rapidjson::Value &json_str_arr) noexcept
Definition: RelAlgDag.cpp:1313
bool hint_applied_
Definition: RelAlgDag.h:1295
std::unordered_map< QueryHint, ExplainedQueryHint > Hints
Definition: QueryHint.h:355
bool is_agg_
Definition: RelAlgDag.h:1898
virtual size_t size() const =0
const RelAlgNode * getSourceNode() const
Definition: RelAlgDag.h:389
RelProject(const TableDescriptor *td)
Definition: RelAlgDag.h:1116
std::string toString(RelRexToStringConfig config=RelRexToStringConfig::defaults()) const override
Definition: RelAlgDag.cpp:850
size_t toHash() const override
Definition: RelAlgDag.cpp:3372
std::string typeName(const T *v)
Definition: toString.h:103
ExplainedQueryHint parseHintString(std::string &hint_string)
Definition: RelAlgDag.cpp:3114
SqlWindowFunctionKind
Definition: sqldefs.h:120
void * visitOperator(const RexOperator *rex_operator) const final
Definition: RelAlgDag.cpp:2118
Definition: sqldefs.h:52
void eachNode(std::function< void(RelAlgNode const *)> const &) const
Definition: RelAlgDag.cpp:3343
std::string toString(RelRexToStringConfig config=RelRexToStringConfig::defaults()) const override
Definition: RelAlgDag.cpp:3368
std::shared_ptr< RelSort > dispatchSort(const rapidjson::Value &sort_ra)
Definition: RelAlgDag.cpp:2908
RexWindowFunctionOperator::RexWindowBound parse_window_bound(const rapidjson::Value &window_bound_obj, const Catalog_Namespace::Catalog &cat, RelAlgDag &root_dag)
Definition: RelAlgDag.cpp:1198
const std::vector< std::string > & getFields() const
Definition: RelAlgDag.h:1200
std::unique_ptr< const RexScalar > visitRef(const RexRef *rex_ref) const override
Definition: RelAlgDag.cpp:269
std::string getFieldName(const size_t i) const
Definition: RelAlgDag.cpp:862
static void optimizeDag(RelAlgDag &rel_alg_dag)
Definition: RelAlgDag.cpp:3293
bool g_enable_watchdog false
Definition: Execute.cpp:79
void set_notnull(bool n)
Definition: sqltypes.h:497
#define CHECK(condition)
Definition: Logger.h:289
const ConstRexScalarPtrVector & getOrderKeys() const
Definition: RelAlgDag.h:637
RexInputReplacementVisitor(const RelAlgNode *node_to_keep, const std::vector< std::unique_ptr< const RexScalar >> &scalar_sources)
Definition: RelAlgDag.cpp:1637
bool g_enable_union
void create_compound(std::vector< std::shared_ptr< RelAlgNode >> &nodes, const std::vector< size_t > &pattern, std::unordered_map< size_t, std::unordered_map< unsigned, RegisteredQueryHint >> &query_hints) noexcept
Definition: RelAlgDag.cpp:1660
RelCompound(const TableDescriptor *td)
Definition: RelAlgDag.h:1779
bool g_cluster
std::vector< RexInput > n_outputs(const RelAlgNode *node, const size_t n)
Definition: RelAlgDag.cpp:95
std::shared_ptr< const RelAlgNode > prev(const rapidjson::Value &crt_node)
Definition: RelAlgDag.cpp:3223
void replaceInput(std::shared_ptr< const RelAlgNode > old_input, std::shared_ptr< const RelAlgNode > input) override
Definition: RelAlgDag.cpp:527
void getRelAlgHints(const rapidjson::Value &json_node, std::shared_ptr< RelAlgNode > node)
Definition: RelAlgDag.cpp:3172
virtual size_t toHash() const =0
SortDirection parse_sort_direction(const rapidjson::Value &collation)
Definition: RelAlgDag.cpp:1173
RelAlgDispatcher(const Catalog_Namespace::Catalog &cat)
Definition: RelAlgDag.cpp:2778
Common Enum definitions for SQL processing.
bool is_dict_encoded_string() const
Definition: sqltypes.h:628
Definition: sqltypes.h:60
std::string toString(RelRexToStringConfig config=RelRexToStringConfig::defaults()) const override
Definition: RelAlgDag.cpp:3380
void fold_filters(std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
RexRebindInputsVisitor(const RelAlgNode *old_input, const RelAlgNode *new_input)
Definition: RelAlgDag.cpp:71
const TableDescriptor * getMetadataForTable(const std::string &tableName, const bool populateFragmenter=true) const
Returns a pointer to a const TableDescriptor struct matching the provided tableName.
std::vector< std::unique_ptr< const RexScalar >> RowValues
Definition: RelAlgDag.h:2364
void bind_table_func_to_input(RelTableFunction *table_func_node, const RANodeOutput &input) noexcept
Definition: RelAlgDag.cpp:1493
RetType visitCase(const RexCase *rex_case) const final
Definition: RelAlgDag.cpp:2266
const size_t inputCount() const
Definition: RelAlgDag.h:890
string name
Definition: setup.in.py:72
constexpr double n
Definition: Utm.h:38
void rebind_inputs_from_left_deep_join(const RexScalar *rex, const RelLeftDeepInnerJoin *left_deep_join)
void check_empty_inputs_field(const rapidjson::Value &node) noexcept
Definition: RelAlgDag.cpp:2744
WindowFunctionCollector(std::unordered_map< size_t, const RexScalar * > &collected_window_func, bool only_add_window_expr)
Definition: RelAlgDag.cpp:2110
HOST DEVICE bool get_notnull() const
Definition: sqltypes.h:387
unsigned node_id(const rapidjson::Value &ra_node) noexcept
Definition: RelAlgDag.cpp:957
const TableDescriptor * getTableFromScanNode(const Catalog_Namespace::Catalog &cat, const rapidjson::Value &scan_ra)
Definition: RelAlgDag.cpp:2749
void eliminate_dead_subqueries(std::vector< std::shared_ptr< RexSubQuery >> &subqueries, RelAlgNode const *root)
size_t size() const override
Definition: RelAlgDag.cpp:846
size_t operator()(const std::pair< const RelAlgNode *, int > &input_col) const
Definition: RelAlgDag.cpp:752
std::unordered_map< size_t, size_t > & window_func_to_new_rex_input_idx_map_
Definition: RelAlgDag.cpp:2318
RelAlgInputs getRelAlgInputs(const rapidjson::Value &node)
Definition: RelAlgDag.cpp:3092
std::vector< std::string > getFieldNamesFromScanNode(const rapidjson::Value &scan_ra)
Definition: RelAlgDag.cpp:2759
static std::unique_ptr< RelAlgDag > buildDagForSubquery(RelAlgDag &root_dag, const rapidjson::Value &query_ast, const Catalog_Namespace::Catalog &cat)
Definition: RelAlgDag.cpp:3258
RelTableFunction()=default
std::shared_ptr< RelLogicalValues > dispatchLogicalValues(const rapidjson::Value &logical_values_ra)
Definition: RelAlgDag.cpp:3043
DEVICE void swap(ARGS &&...args)
Definition: gpu_enabled.h:114
std::unique_ptr< const RexScalar > RetType
Definition: RexVisitor.h:140
size_t toHash() const override
Definition: RelAlgDag.cpp:3430
NullSortedPosition
Definition: RelAlgDag.h:532
RANodeOutput get_node_output(const RelAlgNode *ra_node)
Definition: RelAlgDag.cpp:370
virtual size_t toHash() const =0
#define VLOG(n)
Definition: Logger.h:383
BuildState getBuildState() const
Definition: RelAlgDag.h:2482
RelAlgInputs inputs_
Definition: RelAlgDag.h:952
void reset_table_function_inputs(std::vector< const Rex * > &column_inputs, const std::vector< std::unique_ptr< const RexScalar >> &old_table_func_inputs, const std::vector< std::unique_ptr< const RexScalar >> &new_table_func_inputs)
Definition: RelAlgDag.cpp:710
void set_precision(int d)
Definition: sqltypes.h:493
std::pair< std::string, std::string > getKVOptionPair(std::string &str, size_t &pos)
Definition: RelAlgDag.cpp:3104
void eliminate_dead_columns(std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
bool isIdentity() const
Definition: RelAlgDag.cpp:439
bool same_ignoring_notnull(SQLTypeInfo ti0, SQLTypeInfo ti1)
Definition: RelAlgDag.cpp:890
std::vector< std::unique_ptr< const RexScalar > > target_exprs_
Definition: RelAlgDag.h:2356
static void setBuildState(RelAlgDag &rel_alg_dag, const RelAlgDag::BuildState build_state)
Definition: RelAlgDag.h:2998
static void resetRelAlgFirstId() noexcept
Definition: RelAlgDag.cpp:46