OmniSciDB  c1a53651b2
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
RelAlgDag.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "RelAlgDag.h"
19 #include "Catalog/Catalog.h"
21 #include "JsonAccessors.h"
22 #include "RelAlgOptimizer.h"
23 #include "RelLeftDeepInnerJoin.h"
24 #include "RexVisitor.h"
25 #include "Shared/sqldefs.h"
26 
27 #include <rapidjson/error/en.h>
28 #include <rapidjson/error/error.h>
29 #include <rapidjson/stringbuffer.h>
30 #include <rapidjson/writer.h>
31 
32 #include <string>
33 #include <unordered_set>
34 
35 extern bool g_cluster;
36 extern bool g_enable_union;
37 
38 namespace {
39 
40 const unsigned FIRST_RA_NODE_ID = 1;
41 
42 } // namespace
43 
44 thread_local unsigned RelAlgNode::crt_id_ = FIRST_RA_NODE_ID;
45 
48 }
49 
51  const std::shared_ptr<const ExecutionResult> result) {
52  auto row_set = result->getRows();
53  CHECK(row_set);
54  CHECK_EQ(size_t(1), row_set->colCount());
55  *(type_.get()) = row_set->getColType(0);
56  (*(result_.get())) = result;
57 }
58 
59 std::unique_ptr<RexSubQuery> RexSubQuery::deepCopy() const {
60  return std::make_unique<RexSubQuery>(type_, result_, ra_->deepCopy());
61 }
62 
63 unsigned RexSubQuery::getId() const {
64  return ra_->getId();
65 }
66 
67 namespace {
68 
69 class RexRebindInputsVisitor : public RexVisitor<void*> {
70  public:
71  RexRebindInputsVisitor(const RelAlgNode* old_input, const RelAlgNode* new_input)
72  : old_input_(old_input), new_input_(new_input) {}
73 
74  virtual ~RexRebindInputsVisitor() = default;
75 
76  void* visitInput(const RexInput* rex_input) const override {
77  const auto old_source = rex_input->getSourceNode();
78  if (old_source == old_input_) {
79  const auto left_deep_join = dynamic_cast<const RelLeftDeepInnerJoin*>(new_input_);
80  if (left_deep_join) {
81  rebind_inputs_from_left_deep_join(rex_input, left_deep_join);
82  return nullptr;
83  }
84  rex_input->setSourceNode(new_input_);
85  }
86  return nullptr;
87  };
88 
89  private:
90  const RelAlgNode* old_input_;
92 };
93 
94 // Creates an output with n columns.
95 std::vector<RexInput> n_outputs(const RelAlgNode* node, const size_t n) {
96  std::vector<RexInput> outputs;
97  outputs.reserve(n);
98  for (size_t i = 0; i < n; ++i) {
99  outputs.emplace_back(node, i);
100  }
101  return outputs;
102 }
103 
105  public:
107  const RelAlgNode* old_input,
108  const RelAlgNode* new_input,
109  std::unordered_map<unsigned, unsigned> old_to_new_index_map)
110  : RexRebindInputsVisitor(old_input, new_input), mapping_(old_to_new_index_map) {}
111 
112  void* visitInput(const RexInput* rex_input) const override {
113  RexRebindInputsVisitor::visitInput(rex_input);
114  auto mapping_itr = mapping_.find(rex_input->getIndex());
115  CHECK(mapping_itr != mapping_.end());
116  rex_input->setIndex(mapping_itr->second);
117  return nullptr;
118  }
119 
120  private:
121  const std::unordered_map<unsigned, unsigned> mapping_;
122 };
123 
125  : public RexVisitorBase<std::unique_ptr<const RexScalar>> {
126  public:
127  enum class WindowExprType { PARTITION_KEY, ORDER_KEY };
129  std::shared_ptr<RelProject> new_project,
130  std::vector<std::unique_ptr<const RexScalar>>& scalar_exprs_for_new_project,
131  std::vector<std::string>& fields_for_new_project,
132  std::unordered_map<size_t, size_t>& expr_offset_cache)
133  : new_project_(new_project)
134  , scalar_exprs_for_new_project_(scalar_exprs_for_new_project)
135  , fields_for_new_project_(fields_for_new_project)
136  , expr_offset_cache_(expr_offset_cache)
137  , found_case_expr_window_operand_(false)
138  , has_partition_expr_(false) {}
139 
140  size_t pushDownExpressionImpl(const RexScalar* expr) const {
141  auto hash = expr->toHash();
142  auto it = expr_offset_cache_.find(hash);
143  auto new_offset = -1;
144  if (it == expr_offset_cache_.end()) {
145  CHECK(
146  expr_offset_cache_.emplace(hash, scalar_exprs_for_new_project_.size()).second);
147  new_offset = scalar_exprs_for_new_project_.size();
148  fields_for_new_project_.emplace_back("");
149  scalar_exprs_for_new_project_.emplace_back(deep_copier_.visit(expr));
150  } else {
151  // we already pushed down the same expression, so reuse it
152  new_offset = it->second;
153  }
154  return new_offset;
155  }
156 
158  size_t expr_offset) const {
159  // given window expr offset and inner expr's offset,
160  // return a (push-downed) expression's offset from the new projection node
161  switch (type) {
162  case WindowExprType::PARTITION_KEY: {
163  auto it = pushed_down_partition_key_offset_.find(expr_offset);
164  CHECK(it != pushed_down_partition_key_offset_.end());
165  return it->second;
166  }
167  case WindowExprType::ORDER_KEY: {
168  auto it = pushed_down_order_key_offset_.find(expr_offset);
169  CHECK(it != pushed_down_order_key_offset_.end());
170  return it->second;
171  }
172  default:
173  UNREACHABLE();
174  return std::nullopt;
175  }
176  }
177 
179  const RexWindowFunctionOperator* window_expr) const {
180  // step 1. push "all" target expressions of the window_func_project_node down to the
181  // new child projection
182  // each window expr is a separate target expression of the projection node
183  // and they have their own inner expression related to partition / order clauses
184  // so we capture their offsets to correctly rebind their input
185  pushed_down_window_operands_offset_.clear();
186  pushed_down_partition_key_offset_.clear();
187  pushed_down_order_key_offset_.clear();
188  for (size_t offset = 0; offset < window_expr->size(); ++offset) {
189  auto expr = window_expr->getOperand(offset);
190  auto literal_expr = dynamic_cast<const RexLiteral*>(expr);
191  auto case_expr = dynamic_cast<const RexCase*>(expr);
192  if (case_expr) {
193  // when columnar output is enabled, pushdown case expr can incur an issue
194  // during columnarization, so we record this and try to force rowwise-output
195  // until we fix the issue
196  // todo (yoonmin) : relax this
197  found_case_expr_window_operand_ = true;
198  }
199  if (!literal_expr) {
200  auto new_offset = pushDownExpressionImpl(expr);
201  pushed_down_window_operands_offset_.emplace(offset, new_offset);
202  }
203  }
204  size_t offset = 0;
205  for (const auto& partition_key : window_expr->getPartitionKeys()) {
206  auto new_offset = pushDownExpressionImpl(partition_key.get());
207  pushed_down_partition_key_offset_.emplace(offset, new_offset);
208  ++offset;
209  }
210  has_partition_expr_ = !window_expr->getPartitionKeys().empty();
211  offset = 0;
212  for (const auto& order_key : window_expr->getOrderKeys()) {
213  auto new_offset = pushDownExpressionImpl(order_key.get());
214  pushed_down_order_key_offset_.emplace(offset, new_offset);
215  ++offset;
216  }
217 
218  // step 2. rebind projected targets of the window_func_project_node with the new
219  // project node
220  std::vector<std::unique_ptr<const RexScalar>> window_operands;
221  auto deconst_window_expr = const_cast<RexWindowFunctionOperator*>(window_expr);
222  for (size_t idx = 0; idx < window_expr->size(); ++idx) {
223  auto it = pushed_down_window_operands_offset_.find(idx);
224  if (it != pushed_down_window_operands_offset_.end()) {
225  auto new_input = std::make_unique<const RexInput>(new_project_.get(), it->second);
226  CHECK(new_input);
227  window_operands.emplace_back(std::move(new_input));
228  } else {
229  auto copied_expr = deep_copier_.visit(window_expr->getOperand(idx));
230  window_operands.emplace_back(std::move(copied_expr));
231  }
232  }
233  deconst_window_expr->replaceOperands(std::move(window_operands));
234 
235  for (size_t idx = 0; idx < window_expr->getPartitionKeys().size(); ++idx) {
236  auto new_offset = getOffsetForPushedDownExpr(WindowExprType::PARTITION_KEY, idx);
237  CHECK(new_offset);
238  auto new_input = std::make_unique<const RexInput>(new_project_.get(), *new_offset);
239  CHECK(new_input);
240  deconst_window_expr->replacePartitionKey(idx, std::move(new_input));
241  }
242 
243  for (size_t idx = 0; idx < window_expr->getOrderKeys().size(); ++idx) {
244  auto new_offset = getOffsetForPushedDownExpr(WindowExprType::ORDER_KEY, idx);
245  CHECK(new_offset);
246  auto new_input = std::make_unique<const RexInput>(new_project_.get(), *new_offset);
247  CHECK(new_input);
248  deconst_window_expr->replaceOrderKey(idx, std::move(new_input));
249  }
250  }
251 
252  std::unique_ptr<const RexScalar> visitInput(const RexInput* rex_input) const override {
253  auto new_offset = pushDownExpressionImpl(rex_input);
254  CHECK_LT(new_offset, scalar_exprs_for_new_project_.size());
255  auto hash = rex_input->toHash();
256  auto it = expr_offset_cache_.find(hash);
257  CHECK(it != expr_offset_cache_.end());
258  CHECK_EQ(new_offset, it->second);
259  auto new_input = std::make_unique<const RexInput>(new_project_.get(), new_offset);
260  CHECK(new_input);
261  return new_input;
262  }
263 
264  std::unique_ptr<const RexScalar> visitLiteral(
265  const RexLiteral* rex_literal) const override {
266  return deep_copier_.visit(rex_literal);
267  }
268 
269  std::unique_ptr<const RexScalar> visitRef(const RexRef* rex_ref) const override {
270  return deep_copier_.visit(rex_ref);
271  }
272 
273  std::unique_ptr<const RexScalar> visitSubQuery(
274  const RexSubQuery* rex_subquery) const override {
275  return deep_copier_.visit(rex_subquery);
276  }
277 
278  std::unique_ptr<const RexScalar> visitCase(const RexCase* rex_case) const override {
279  std::vector<
280  std::pair<std::unique_ptr<const RexScalar>, std::unique_ptr<const RexScalar>>>
281  new_expr_pair_list;
282  std::unique_ptr<const RexScalar> new_else_expr;
283  for (size_t i = 0; i < rex_case->branchCount(); ++i) {
284  const auto when = rex_case->getWhen(i);
285  auto new_when = PushDownGenericExpressionInWindowFunction::visit(when);
286  const auto then = rex_case->getThen(i);
287  auto new_then = PushDownGenericExpressionInWindowFunction::visit(then);
288  new_expr_pair_list.emplace_back(std::move(new_when), std::move(new_then));
289  }
290  if (rex_case->getElse()) {
291  new_else_expr = deep_copier_.visit(rex_case->getElse());
292  }
293  auto new_case = std::make_unique<const RexCase>(new_expr_pair_list, new_else_expr);
294  return new_case;
295  }
296 
297  std::unique_ptr<const RexScalar> visitOperator(
298  const RexOperator* rex_operator) const override {
299  const auto rex_window_func_operator =
300  dynamic_cast<const RexWindowFunctionOperator*>(rex_operator);
301  if (rex_window_func_operator) {
302  pushDownExpressionInWindowFunction(rex_window_func_operator);
303  return deep_copier_.visit(rex_operator);
304  } else {
305  std::unique_ptr<const RexOperator> new_operator{nullptr};
306  std::vector<std::unique_ptr<const RexScalar>> new_operands;
307  for (size_t i = 0; i < rex_operator->size(); ++i) {
308  const auto operand = rex_operator->getOperand(i);
309  auto new_operand = PushDownGenericExpressionInWindowFunction::visit(operand);
310  new_operands.emplace_back(std::move(new_operand));
311  }
312  if (auto function_op = dynamic_cast<const RexFunctionOperator*>(rex_operator)) {
313  new_operator = std::make_unique<const RexFunctionOperator>(
314  function_op->getName(), new_operands, rex_operator->getType());
315  } else {
316  new_operator = std::make_unique<const RexOperator>(
317  rex_operator->getOperator(), new_operands, rex_operator->getType());
318  }
319  CHECK(new_operator);
320  return new_operator;
321  }
322  }
323 
324  bool hasCaseExprAsWindowOperand() { return found_case_expr_window_operand_; }
325 
326  bool hasPartitionExpression() { return has_partition_expr_; }
327 
328  private:
329  std::unique_ptr<const RexScalar> defaultResult() const override { return nullptr; }
330 
331  std::shared_ptr<RelProject> new_project_;
332  std::vector<std::unique_ptr<const RexScalar>>& scalar_exprs_for_new_project_;
333  std::vector<std::string>& fields_for_new_project_;
334  std::unordered_map<size_t, size_t>& expr_offset_cache_;
336  mutable bool has_partition_expr_;
337  mutable std::unordered_map<size_t, size_t> pushed_down_window_operands_offset_;
338  mutable std::unordered_map<size_t, size_t> pushed_down_partition_key_offset_;
339  mutable std::unordered_map<size_t, size_t> pushed_down_order_key_offset_;
341 };
342 
343 } // namespace
344 
346  std::shared_ptr<const RelAlgNode> old_input,
347  std::shared_ptr<const RelAlgNode> input,
348  std::optional<std::unordered_map<unsigned, unsigned>> old_to_new_index_map) {
349  RelAlgNode::replaceInput(old_input, input);
350  std::unique_ptr<RexRebindInputsVisitor> rebind_inputs;
351  if (old_to_new_index_map) {
352  rebind_inputs = std::make_unique<RexRebindReindexInputsVisitor>(
353  old_input.get(), input.get(), *old_to_new_index_map);
354  } else {
355  rebind_inputs =
356  std::make_unique<RexRebindInputsVisitor>(old_input.get(), input.get());
357  }
358  CHECK(rebind_inputs);
359  for (const auto& scalar_expr : scalar_exprs_) {
360  rebind_inputs->visit(scalar_expr.get());
361  }
362 }
363 
364 void RelProject::appendInput(std::string new_field_name,
365  std::unique_ptr<const RexScalar> new_input) {
366  fields_.emplace_back(std::move(new_field_name));
367  scalar_exprs_.emplace_back(std::move(new_input));
368 }
369 
371  const auto scan_node = dynamic_cast<const RelScan*>(ra_node);
372  if (scan_node) {
373  // Scan node has no inputs, output contains all columns in the table.
374  CHECK_EQ(size_t(0), scan_node->inputCount());
375  return n_outputs(scan_node, scan_node->size());
376  }
377  const auto project_node = dynamic_cast<const RelProject*>(ra_node);
378  if (project_node) {
379  // Project output count doesn't depend on the input
380  CHECK_EQ(size_t(1), project_node->inputCount());
381  return n_outputs(project_node, project_node->size());
382  }
383  const auto filter_node = dynamic_cast<const RelFilter*>(ra_node);
384  if (filter_node) {
385  // Filter preserves shape
386  CHECK_EQ(size_t(1), filter_node->inputCount());
387  const auto prev_out = get_node_output(filter_node->getInput(0));
388  return n_outputs(filter_node, prev_out.size());
389  }
390  const auto aggregate_node = dynamic_cast<const RelAggregate*>(ra_node);
391  if (aggregate_node) {
392  // Aggregate output count doesn't depend on the input
393  CHECK_EQ(size_t(1), aggregate_node->inputCount());
394  return n_outputs(aggregate_node, aggregate_node->size());
395  }
396  const auto compound_node = dynamic_cast<const RelCompound*>(ra_node);
397  if (compound_node) {
398  // Compound output count doesn't depend on the input
399  CHECK_EQ(size_t(1), compound_node->inputCount());
400  return n_outputs(compound_node, compound_node->size());
401  }
402  const auto join_node = dynamic_cast<const RelJoin*>(ra_node);
403  if (join_node) {
404  // Join concatenates the outputs from the inputs and the output
405  // directly references the nodes in the input.
406  CHECK_EQ(size_t(2), join_node->inputCount());
407  auto lhs_out =
408  n_outputs(join_node->getInput(0), get_node_output(join_node->getInput(0)).size());
409  const auto rhs_out =
410  n_outputs(join_node->getInput(1), get_node_output(join_node->getInput(1)).size());
411  lhs_out.insert(lhs_out.end(), rhs_out.begin(), rhs_out.end());
412  return lhs_out;
413  }
414  const auto table_func_node = dynamic_cast<const RelTableFunction*>(ra_node);
415  if (table_func_node) {
416  // Table Function output count doesn't depend on the input
417  return n_outputs(table_func_node, table_func_node->size());
418  }
419  const auto sort_node = dynamic_cast<const RelSort*>(ra_node);
420  if (sort_node) {
421  // Sort preserves shape
422  CHECK_EQ(size_t(1), sort_node->inputCount());
423  const auto prev_out = get_node_output(sort_node->getInput(0));
424  return n_outputs(sort_node, prev_out.size());
425  }
426  const auto logical_values_node = dynamic_cast<const RelLogicalValues*>(ra_node);
427  if (logical_values_node) {
428  CHECK_EQ(size_t(0), logical_values_node->inputCount());
429  return n_outputs(logical_values_node, logical_values_node->size());
430  }
431  const auto logical_union_node = dynamic_cast<const RelLogicalUnion*>(ra_node);
432  if (logical_union_node) {
433  return n_outputs(logical_union_node, logical_union_node->size());
434  }
435  LOG(FATAL) << "Unhandled ra_node type: " << ::toString(ra_node);
436  return {};
437 }
438 
440  if (!isSimple()) {
441  return false;
442  }
443  CHECK_EQ(size_t(1), inputCount());
444  const auto source = getInput(0);
445  if (dynamic_cast<const RelJoin*>(source)) {
446  return false;
447  }
448  const auto source_shape = get_node_output(source);
449  if (source_shape.size() != scalar_exprs_.size()) {
450  return false;
451  }
452  for (size_t i = 0; i < scalar_exprs_.size(); ++i) {
453  const auto& scalar_expr = scalar_exprs_[i];
454  const auto input = dynamic_cast<const RexInput*>(scalar_expr.get());
455  CHECK(input);
456  CHECK_EQ(source, input->getSourceNode());
457  // We should add the additional check that input->getIndex() !=
458  // source_shape[i].getIndex(), but Calcite doesn't generate the right
459  // Sort-Project-Sort sequence when joins are involved.
460  if (input->getSourceNode() != source_shape[i].getSourceNode()) {
461  return false;
462  }
463  }
464  return true;
465 }
466 
467 namespace {
468 
469 bool isRenamedInput(const RelAlgNode* node,
470  const size_t index,
471  const std::string& new_name) {
472  CHECK_LT(index, node->size());
473  if (auto join = dynamic_cast<const RelJoin*>(node)) {
474  CHECK_EQ(size_t(2), join->inputCount());
475  const auto lhs_size = join->getInput(0)->size();
476  if (index < lhs_size) {
477  return isRenamedInput(join->getInput(0), index, new_name);
478  }
479  CHECK_GE(index, lhs_size);
480  return isRenamedInput(join->getInput(1), index - lhs_size, new_name);
481  }
482 
483  if (auto scan = dynamic_cast<const RelScan*>(node)) {
484  return new_name != scan->getFieldName(index);
485  }
486 
487  if (auto aggregate = dynamic_cast<const RelAggregate*>(node)) {
488  return new_name != aggregate->getFieldName(index);
489  }
490 
491  if (auto project = dynamic_cast<const RelProject*>(node)) {
492  return new_name != project->getFieldName(index);
493  }
494 
495  if (auto table_func = dynamic_cast<const RelTableFunction*>(node)) {
496  return new_name != table_func->getFieldName(index);
497  }
498 
499  if (auto logical_values = dynamic_cast<const RelLogicalValues*>(node)) {
500  const auto& tuple_type = logical_values->getTupleType();
501  CHECK_LT(index, tuple_type.size());
502  return new_name != tuple_type[index].get_resname();
503  }
504 
505  CHECK(dynamic_cast<const RelSort*>(node) || dynamic_cast<const RelFilter*>(node) ||
506  dynamic_cast<const RelLogicalUnion*>(node));
507  return isRenamedInput(node->getInput(0), index, new_name);
508 }
509 
510 } // namespace
511 
513  if (!isSimple()) {
514  return false;
515  }
516  CHECK_EQ(scalar_exprs_.size(), fields_.size());
517  for (size_t i = 0; i < fields_.size(); ++i) {
518  auto rex_in = dynamic_cast<const RexInput*>(scalar_exprs_[i].get());
519  CHECK(rex_in);
520  if (isRenamedInput(rex_in->getSourceNode(), rex_in->getIndex(), fields_[i])) {
521  return true;
522  }
523  }
524  return false;
525 }
526 
527 void RelJoin::replaceInput(std::shared_ptr<const RelAlgNode> old_input,
528  std::shared_ptr<const RelAlgNode> input) {
529  RelAlgNode::replaceInput(old_input, input);
530  RexRebindInputsVisitor rebind_inputs(old_input.get(), input.get());
531  if (condition_) {
532  rebind_inputs.visit(condition_.get());
533  }
534 }
535 
536 void RelFilter::replaceInput(std::shared_ptr<const RelAlgNode> old_input,
537  std::shared_ptr<const RelAlgNode> input) {
538  RelAlgNode::replaceInput(old_input, input);
539  RexRebindInputsVisitor rebind_inputs(old_input.get(), input.get());
540  rebind_inputs.visit(filter_.get());
541 }
542 
543 void RelCompound::replaceInput(std::shared_ptr<const RelAlgNode> old_input,
544  std::shared_ptr<const RelAlgNode> input) {
545  RelAlgNode::replaceInput(old_input, input);
546  RexRebindInputsVisitor rebind_inputs(old_input.get(), input.get());
547  for (const auto& scalar_source : scalar_sources_) {
548  rebind_inputs.visit(scalar_source.get());
549  }
550  if (filter_expr_) {
551  rebind_inputs.visit(filter_expr_.get());
552  }
553 }
554 
556  : RelAlgNode(rhs)
558  , fields_(rhs.fields_)
559  , hint_applied_(false)
560  , hints_(std::make_unique<Hints>())
561  , has_pushed_down_window_expr_(rhs.has_pushed_down_window_expr_) {
562  RexDeepCopyVisitor copier;
563  for (auto const& expr : rhs.scalar_exprs_) {
564  scalar_exprs_.push_back(copier.visit(expr.get()));
565  }
566  if (rhs.hint_applied_) {
567  for (auto const& kv : *rhs.hints_) {
568  addHint(kv.second);
569  }
570  }
571 }
572 
574  : RelAlgNode(rhs)
575  , tuple_type_(rhs.tuple_type_)
576  , values_(RexDeepCopyVisitor::copy(rhs.values_)) {}
577 
579  RexDeepCopyVisitor copier;
580  filter_ = copier.visit(rhs.filter_.get());
581 }
582 
584  : RelAlgNode(rhs)
585  , groupby_count_(rhs.groupby_count_)
586  , fields_(rhs.fields_)
587  , hint_applied_(false)
588  , hints_(std::make_unique<Hints>()) {
589  agg_exprs_.reserve(rhs.agg_exprs_.size());
590  for (auto const& agg : rhs.agg_exprs_) {
591  agg_exprs_.push_back(agg->deepCopy());
592  }
593  if (rhs.hint_applied_) {
594  for (auto const& kv : *rhs.hints_) {
595  addHint(kv.second);
596  }
597  }
598 }
599 
601  : RelAlgNode(rhs)
602  , join_type_(rhs.join_type_)
603  , hint_applied_(false)
604  , hints_(std::make_unique<Hints>()) {
605  RexDeepCopyVisitor copier;
606  condition_ = copier.visit(rhs.condition_.get());
607  if (rhs.hint_applied_) {
608  for (auto const& kv : *rhs.hints_) {
609  addHint(kv.second);
610  }
611  }
612 }
613 
614 namespace {
615 
616 std::vector<std::unique_ptr<const RexAgg>> copyAggExprs(
617  std::vector<std::unique_ptr<const RexAgg>> const& agg_exprs) {
618  std::vector<std::unique_ptr<const RexAgg>> agg_exprs_copy;
619  agg_exprs_copy.reserve(agg_exprs.size());
620  for (auto const& agg_expr : agg_exprs) {
621  agg_exprs_copy.push_back(agg_expr->deepCopy());
622  }
623  return agg_exprs_copy;
624 }
625 
626 std::vector<std::unique_ptr<const RexScalar>> copyRexScalars(
627  std::vector<std::unique_ptr<const RexScalar>> const& scalar_sources) {
628  std::vector<std::unique_ptr<const RexScalar>> scalar_sources_copy;
629  scalar_sources_copy.reserve(scalar_sources.size());
630  RexDeepCopyVisitor copier;
631  for (auto const& scalar_source : scalar_sources) {
632  scalar_sources_copy.push_back(copier.visit(scalar_source.get()));
633  }
634  return scalar_sources_copy;
635 }
636 
637 std::vector<const Rex*> remapTargetPointers(
638  std::vector<std::unique_ptr<const RexAgg>> const& agg_exprs_new,
639  std::vector<std::unique_ptr<const RexScalar>> const& scalar_sources_new,
640  std::vector<std::unique_ptr<const RexAgg>> const& agg_exprs_old,
641  std::vector<std::unique_ptr<const RexScalar>> const& scalar_sources_old,
642  std::vector<const Rex*> const& target_exprs_old) {
643  std::vector<const Rex*> target_exprs(target_exprs_old);
644  std::unordered_map<const Rex*, const Rex*> old_to_new_target(target_exprs.size());
645  for (size_t i = 0; i < agg_exprs_new.size(); ++i) {
646  old_to_new_target.emplace(agg_exprs_old[i].get(), agg_exprs_new[i].get());
647  }
648  for (size_t i = 0; i < scalar_sources_new.size(); ++i) {
649  old_to_new_target.emplace(scalar_sources_old[i].get(), scalar_sources_new[i].get());
650  }
651  for (auto& target : target_exprs) {
652  auto target_it = old_to_new_target.find(target);
653  CHECK(target_it != old_to_new_target.end());
654  target = target_it->second;
655  }
656  return target_exprs;
657 }
658 
659 } // namespace
660 
662  : RelAlgNode(rhs)
664  , groupby_count_(rhs.groupby_count_)
665  , agg_exprs_(copyAggExprs(rhs.agg_exprs_))
666  , fields_(rhs.fields_)
667  , is_agg_(rhs.is_agg_)
668  , scalar_sources_(copyRexScalars(rhs.scalar_sources_))
669  , target_exprs_(remapTargetPointers(agg_exprs_,
670  scalar_sources_,
671  rhs.agg_exprs_,
672  rhs.scalar_sources_,
673  rhs.target_exprs_))
674  , hint_applied_(false)
675  , hints_(std::make_unique<Hints>()) {
676  RexDeepCopyVisitor copier;
677  filter_expr_ = rhs.filter_expr_ ? copier.visit(rhs.filter_expr_.get()) : nullptr;
678  if (rhs.hint_applied_) {
679  for (auto const& kv : *rhs.hints_) {
680  addHint(kv.second);
681  }
682  }
683 }
684 
685 void RelTableFunction::replaceInput(std::shared_ptr<const RelAlgNode> old_input,
686  std::shared_ptr<const RelAlgNode> input) {
687  RelAlgNode::replaceInput(old_input, input);
688  RexRebindInputsVisitor rebind_inputs(old_input.get(), input.get());
689  for (const auto& target_expr : target_exprs_) {
690  rebind_inputs.visit(target_expr.get());
691  }
692  for (const auto& func_input : table_func_inputs_) {
693  rebind_inputs.visit(func_input.get());
694  }
695 }
696 
698  int32_t literal_args = 0;
699  for (const auto& arg : table_func_inputs_) {
700  const auto rex_literal = dynamic_cast<const RexLiteral*>(arg.get());
701  if (rex_literal) {
702  literal_args += 1;
703  }
704  }
705  return literal_args;
706 }
707 
708 namespace {
709 
711  std::vector<const Rex*>& column_inputs,
712  const std::vector<std::unique_ptr<const RexScalar>>& old_table_func_inputs,
713  const std::vector<std::unique_ptr<const RexScalar>>& new_table_func_inputs) {
714  CHECK_EQ(old_table_func_inputs.size(), new_table_func_inputs.size());
715  std::unordered_map<const Rex*, const Rex*> old_to_new_input;
716  for (size_t i = 0; i < old_table_func_inputs.size(); ++i) {
717  old_to_new_input.emplace(old_table_func_inputs[i].get(),
718  new_table_func_inputs[i].get());
719  }
720  for (auto& target : column_inputs) {
721  auto target_it = old_to_new_input.find(target);
722  CHECK(target_it != old_to_new_input.end());
723  target = target_it->second;
724  }
725 }
726 
727 } // namespace
728 
730  std::vector<std::unique_ptr<const RexScalar>>&& exprs) {
731  // this should only be called in the event of disambiguating inputs, which means the
732  // RexScalar types in the exprs argument should be the exact same as those previously.
733  // So we can then assume all col_inputs_ would be in the same place. So just re-adjust
734  // the pointers.
736  table_func_inputs_ = std::move(exprs);
737 }
738 
740  : RelAlgNode(rhs)
741  , function_name_(rhs.function_name_)
742  , fields_(rhs.fields_)
743  , col_inputs_(rhs.col_inputs_)
744  , table_func_inputs_(copyRexScalars(rhs.table_func_inputs_))
745  , target_exprs_(copyRexScalars(rhs.target_exprs_)) {
747 }
748 
749 namespace std {
750 template <>
751 struct hash<std::pair<const RelAlgNode*, int>> {
752  size_t operator()(const std::pair<const RelAlgNode*, int>& input_col) const {
753  auto ptr_val = reinterpret_cast<const int64_t*>(&input_col.first);
754  auto h = static_cast<size_t>(*ptr_val);
755  boost::hash_combine(h, input_col.second);
756  return h;
757  }
758 };
759 } // namespace std
760 
761 namespace {
762 
763 std::set<std::pair<const RelAlgNode*, int>> get_equiv_cols(const RelAlgNode* node,
764  const size_t which_col) {
765  std::set<std::pair<const RelAlgNode*, int>> work_set;
766  auto walker = node;
767  auto curr_col = which_col;
768  while (true) {
769  work_set.insert(std::make_pair(walker, curr_col));
770  if (dynamic_cast<const RelScan*>(walker) || dynamic_cast<const RelJoin*>(walker)) {
771  break;
772  }
773  CHECK_EQ(size_t(1), walker->inputCount());
774  auto only_source = walker->getInput(0);
775  if (auto project = dynamic_cast<const RelProject*>(walker)) {
776  if (auto input = dynamic_cast<const RexInput*>(project->getProjectAt(curr_col))) {
777  const auto join_source = dynamic_cast<const RelJoin*>(only_source);
778  if (join_source) {
779  CHECK_EQ(size_t(2), join_source->inputCount());
780  auto lhs = join_source->getInput(0);
781  CHECK((input->getIndex() < lhs->size() && lhs == input->getSourceNode()) ||
782  join_source->getInput(1) == input->getSourceNode());
783  } else {
784  CHECK_EQ(input->getSourceNode(), only_source);
785  }
786  curr_col = input->getIndex();
787  } else {
788  break;
789  }
790  } else if (auto aggregate = dynamic_cast<const RelAggregate*>(walker)) {
791  if (curr_col >= aggregate->getGroupByCount()) {
792  break;
793  }
794  }
795  walker = only_source;
796  }
797  return work_set;
798 }
799 
800 } // namespace
801 
802 bool RelSort::hasEquivCollationOf(const RelSort& that) const {
803  if (collation_.size() != that.collation_.size()) {
804  return false;
805  }
806 
807  for (size_t i = 0, e = collation_.size(); i < e; ++i) {
808  auto this_sort_key = collation_[i];
809  auto that_sort_key = that.collation_[i];
810  if (this_sort_key.getSortDir() != that_sort_key.getSortDir()) {
811  return false;
812  }
813  if (this_sort_key.getNullsPosition() != that_sort_key.getNullsPosition()) {
814  return false;
815  }
816  auto this_equiv_keys = get_equiv_cols(this, this_sort_key.getField());
817  auto that_equiv_keys = get_equiv_cols(&that, that_sort_key.getField());
818  std::vector<std::pair<const RelAlgNode*, int>> intersect;
819  std::set_intersection(this_equiv_keys.begin(),
820  this_equiv_keys.end(),
821  that_equiv_keys.begin(),
822  that_equiv_keys.end(),
823  std::back_inserter(intersect));
824  if (intersect.empty()) {
825  return false;
826  }
827  }
828  return true;
829 }
830 
831 // class RelLogicalUnion methods
832 
834  : RelAlgNode(std::move(inputs)), is_all_(is_all) {
835  if (!g_enable_union) {
836  throw QueryNotSupported(
837  "The DEPRECATED enable-union option is set to off. Please remove this option as "
838  "it may be disabled in the future.");
839  }
840  CHECK_LE(2u, inputs_.size());
841  if (!is_all_) {
842  throw QueryNotSupported("UNION without ALL is not supported yet.");
843  }
844 }
845 
846 size_t RelLogicalUnion::size() const {
847  return inputs_.front()->size();
848 }
849 
851  return cat(::typeName(this), "(is_all(", is_all_, "))");
852 }
853 
854 size_t RelLogicalUnion::toHash() const {
855  if (!hash_) {
856  hash_ = typeid(RelLogicalUnion).hash_code();
857  boost::hash_combine(*hash_, is_all_);
858  }
859  return *hash_;
860 }
861 
862 std::string RelLogicalUnion::getFieldName(const size_t i) const {
863  if (auto const* input = dynamic_cast<RelCompound const*>(inputs_[0].get())) {
864  return input->getFieldName(i);
865  } else if (auto const* input = dynamic_cast<RelProject const*>(inputs_[0].get())) {
866  return input->getFieldName(i);
867  } else if (auto const* input = dynamic_cast<RelLogicalUnion const*>(inputs_[0].get())) {
868  return input->getFieldName(i);
869  } else if (auto const* input = dynamic_cast<RelAggregate const*>(inputs_[0].get())) {
870  return input->getFieldName(i);
871  } else if (auto const* input = dynamic_cast<RelScan const*>(inputs_[0].get())) {
872  return input->getFieldName(i);
873  } else if (auto const* input =
874  dynamic_cast<RelTableFunction const*>(inputs_[0].get())) {
875  return input->getFieldName(i);
876  }
877  UNREACHABLE() << "Unhandled input type: " << ::toString(inputs_.front());
878  return {};
879 }
880 
881 namespace {
882 std::vector<bool> get_notnulls(std::vector<TargetMetaInfo> const& tmis0) {
883  std::vector<bool> notnulls(tmis0.size());
884  for (size_t j = 0; j < tmis0.size(); ++j) {
885  notnulls[j] = tmis0[j].get_type_info().get_notnull();
886  }
887  return notnulls;
888 }
889 
891  ti0.set_notnull({}); // Actual value doesn't matter
892  ti1.set_notnull({}); // as long as they are the same.
893  return ti0 == ti1;
894 }
895 
896 void set_notnulls(std::vector<TargetMetaInfo>* tmis0, std::vector<bool> const& notnulls) {
897  for (size_t j = 0; j < tmis0->size(); ++j) {
898  SQLTypeInfo ti = (*tmis0)[j].get_type_info();
899  SQLTypeInfo physical_ti = (*tmis0)[j].get_physical_type_info();
900  ti.set_notnull(notnulls[j]);
901  physical_ti.set_notnull(notnulls[j]);
902  (*tmis0)[j] = TargetMetaInfo((*tmis0)[j].get_resname(), ti, physical_ti);
903  }
904 }
905 } // namespace
906 
907 // The returned std::vector<TargetMetaInfo> is identical to
908 // inputs_[0]->getOutputMetainfo() except for its SQLTypeInfo::notnull values, which is
909 // the logical AND over each input. In other words, the returned columns are notnull iff
910 // all corresponding input columns are notnull.
911 std::vector<TargetMetaInfo> RelLogicalUnion::getCompatibleMetainfoTypes() const {
912  std::vector<TargetMetaInfo> tmis0 = inputs_[0]->getOutputMetainfo();
913  std::vector<bool> notnulls = get_notnulls(tmis0);
914  for (size_t i = 1; i < inputs_.size(); ++i) {
915  std::vector<TargetMetaInfo> const& tmisi = inputs_[i]->getOutputMetainfo();
916  if (tmis0.size() != tmisi.size()) {
917  LOG(INFO) << "tmis0.size()=" << tmis0.size() << " != " << tmisi.size()
918  << "=tmisi.size() for i=" << i;
919  throw std::runtime_error("Subqueries of a UNION must have matching data types.");
920  }
921  for (size_t j = 0; j < tmis0.size(); ++j) {
922  SQLTypeInfo const& ti0 = tmis0[j].get_type_info();
923  SQLTypeInfo const& ti1 = tmisi[j].get_type_info();
924  // Allow types of different nullability to be UNIONed.
925  if (!same_ignoring_notnull(ti0, ti1)) {
926  LOG(INFO) << "Types do not match for UNION:\n tmis0[" << j
927  << "].get_type_info().to_string() = " << ti0.to_string() << "\n tmis"
928  << i << '[' << j
929  << "].get_type_info().to_string() = " << ti1.to_string();
930  // The only permitted difference is when both columns are dictionary-encoded.
931  if (!(ti0.is_dict_encoded_string() && ti1.is_dict_encoded_string())) {
932  throw std::runtime_error(
933  "Subqueries of a UNION must have the exact same data types.");
934  }
935  }
936  notnulls[j] = notnulls[j] && ti1.get_notnull();
937  }
938  }
939  set_notnulls(&tmis0, notnulls); // Set each SQLTypeInfo::notnull to compatible values.
940  return tmis0;
941 }
942 
943 // Rest of code requires a raw pointer, but RexInput object needs to live somewhere.
945  size_t input_idx) const {
946  if (auto const* rex_input_ptr = dynamic_cast<RexInput const*>(rex_scalar)) {
947  RexInput rex_input(*rex_input_ptr);
948  rex_input.setSourceNode(getInput(input_idx));
949  scalar_exprs_.emplace_back(std::make_shared<RexInput const>(std::move(rex_input)));
950  return scalar_exprs_.back().get();
951  }
952  return rex_scalar;
953 }
954 
955 namespace {
956 
957 unsigned node_id(const rapidjson::Value& ra_node) noexcept {
958  const auto& id = field(ra_node, "id");
959  return std::stoi(json_str(id));
960 }
961 
962 std::string json_node_to_string(const rapidjson::Value& node) noexcept {
963  rapidjson::StringBuffer buffer;
964  rapidjson::Writer<rapidjson::StringBuffer> writer(buffer);
965  node.Accept(writer);
966  return buffer.GetString();
967 }
968 
969 // The parse_* functions below de-serialize expressions as they come from Calcite.
970 // RelAlgDagBuilder will take care of making the representation easy to
971 // navigate for lower layers, for example by replacing RexAbstractInput with RexInput.
972 
973 std::unique_ptr<RexAbstractInput> parse_abstract_input(
974  const rapidjson::Value& expr) noexcept {
975  const auto& input = field(expr, "input");
976  return std::unique_ptr<RexAbstractInput>(new RexAbstractInput(json_i64(input)));
977 }
978 
979 std::unique_ptr<RexLiteral> parse_literal(const rapidjson::Value& expr) {
980  CHECK(expr.IsObject());
981  const auto& literal = field(expr, "literal");
982  const auto type = to_sql_type(json_str(field(expr, "type")));
983  const auto target_type = to_sql_type(json_str(field(expr, "target_type")));
984  const auto scale = json_i64(field(expr, "scale"));
985  const auto precision = json_i64(field(expr, "precision"));
986  const auto type_scale = json_i64(field(expr, "type_scale"));
987  const auto type_precision = json_i64(field(expr, "type_precision"));
988  if (literal.IsNull()) {
989  return std::unique_ptr<RexLiteral>(new RexLiteral(target_type));
990  }
991  switch (type) {
992  case kINT:
993  case kBIGINT:
994  case kDECIMAL:
995  case kINTERVAL_DAY_TIME:
997  case kTIME:
998  case kTIMESTAMP:
999  case kDATE:
1000  return std::unique_ptr<RexLiteral>(new RexLiteral(json_i64(literal),
1001  type,
1002  target_type,
1003  scale,
1004  precision,
1005  type_scale,
1006  type_precision));
1007  case kDOUBLE: {
1008  if (literal.IsDouble()) {
1009  return std::unique_ptr<RexLiteral>(new RexLiteral(json_double(literal),
1010  type,
1011  target_type,
1012  scale,
1013  precision,
1014  type_scale,
1015  type_precision));
1016  } else if (literal.IsInt64()) {
1017  return std::make_unique<RexLiteral>(static_cast<double>(literal.GetInt64()),
1018  type,
1019  target_type,
1020  scale,
1021  precision,
1022  type_scale,
1023  type_precision);
1024 
1025  } else if (literal.IsUint64()) {
1026  return std::make_unique<RexLiteral>(static_cast<double>(literal.GetUint64()),
1027  type,
1028  target_type,
1029  scale,
1030  precision,
1031  type_scale,
1032  type_precision);
1033  }
1034  UNREACHABLE() << "Unhandled type: " << literal.GetType();
1035  }
1036  case kTEXT:
1037  return std::unique_ptr<RexLiteral>(new RexLiteral(json_str(literal),
1038  type,
1039  target_type,
1040  scale,
1041  precision,
1042  type_scale,
1043  type_precision));
1044  case kBOOLEAN:
1045  return std::unique_ptr<RexLiteral>(new RexLiteral(json_bool(literal),
1046  type,
1047  target_type,
1048  scale,
1049  precision,
1050  type_scale,
1051  type_precision));
1052  case kNULLT:
1053  return std::unique_ptr<RexLiteral>(new RexLiteral(target_type));
1054  default:
1055  CHECK(false);
1056  }
1057  CHECK(false);
1058  return nullptr;
1059 }
1060 
1061 std::unique_ptr<const RexScalar> parse_scalar_expr(const rapidjson::Value& expr,
1062  RelAlgDag& root_dag);
1063 
1064 SQLTypeInfo parse_type(const rapidjson::Value& type_obj) {
1065  if (type_obj.IsArray()) {
1066  throw QueryNotSupported("Composite types are not currently supported.");
1067  }
1068  CHECK(type_obj.IsObject() && type_obj.MemberCount() >= 2)
1069  << json_node_to_string(type_obj);
1070  const auto type = to_sql_type(json_str(field(type_obj, "type")));
1071  const auto nullable = json_bool(field(type_obj, "nullable"));
1072  const auto precision_it = type_obj.FindMember("precision");
1073  const int precision =
1074  precision_it != type_obj.MemberEnd() ? json_i64(precision_it->value) : 0;
1075  const auto scale_it = type_obj.FindMember("scale");
1076  const int scale = scale_it != type_obj.MemberEnd() ? json_i64(scale_it->value) : 0;
1077  SQLTypeInfo ti(type, !nullable);
1078  ti.set_precision(precision);
1079  ti.set_scale(scale);
1080  return ti;
1081 }
1082 
1083 std::vector<std::unique_ptr<const RexScalar>> parse_expr_array(
1084  const rapidjson::Value& arr,
1085  RelAlgDag& root_dag) {
1086  std::vector<std::unique_ptr<const RexScalar>> exprs;
1087  for (auto it = arr.Begin(); it != arr.End(); ++it) {
1088  exprs.emplace_back(parse_scalar_expr(*it, root_dag));
1089  }
1090  return exprs;
1091 }
1092 
1094  if (name == "ROW_NUMBER") {
1096  }
1097  if (name == "RANK") {
1099  }
1100  if (name == "DENSE_RANK") {
1102  }
1103  if (name == "PERCENT_RANK") {
1105  }
1106  if (name == "CUME_DIST") {
1108  }
1109  if (name == "NTILE") {
1111  }
1112  if (name == "LAG") {
1114  }
1115  if (name == "LAG_IN_FRAME") {
1117  }
1118  if (name == "LEAD") {
1120  }
1121  if (name == "LEAD_IN_FRAME") {
1123  }
1124  if (name == "FIRST_VALUE") {
1126  }
1127  if (name == "LAST_VALUE") {
1129  }
1130  if (name == "NTH_VALUE") {
1132  }
1133  if (name == "NTH_VALUE_IN_FRAME") {
1135  }
1136  if (name == "AVG") {
1138  }
1139  if (name == "MIN") {
1141  }
1142  if (name == "MAX") {
1144  }
1145  if (name == "SUM") {
1147  }
1148  if (name == "COUNT") {
1150  }
1151  if (name == "COUNT_IF") {
1153  }
1154  if (name == "SUM_IF") {
1156  }
1157  if (name == "$SUM0") {
1159  }
1160  throw std::runtime_error("Unsupported window function: " + name);
1161 }
1162 
1163 std::vector<std::unique_ptr<const RexScalar>> parse_window_order_exprs(
1164  const rapidjson::Value& arr,
1165  RelAlgDag& root_dag) {
1166  std::vector<std::unique_ptr<const RexScalar>> exprs;
1167  for (auto it = arr.Begin(); it != arr.End(); ++it) {
1168  exprs.emplace_back(parse_scalar_expr(field(*it, "field"), root_dag));
1169  }
1170  return exprs;
1171 }
1172 
1173 SortDirection parse_sort_direction(const rapidjson::Value& collation) {
1174  return json_str(field(collation, "direction")) == std::string("DESCENDING")
1177 }
1178 
1179 NullSortedPosition parse_nulls_position(const rapidjson::Value& collation) {
1180  return json_str(field(collation, "nulls")) == std::string("FIRST")
1183 }
1184 
1185 std::vector<SortField> parse_window_order_collation(const rapidjson::Value& arr,
1186  RelAlgDag& root_dag) {
1187  std::vector<SortField> collation;
1188  size_t field_idx = 0;
1189  for (auto it = arr.Begin(); it != arr.End(); ++it, ++field_idx) {
1190  const auto sort_dir = parse_sort_direction(*it);
1191  const auto null_pos = parse_nulls_position(*it);
1192  collation.emplace_back(field_idx, sort_dir, null_pos);
1193  }
1194  return collation;
1195 }
1196 
1198  const rapidjson::Value& window_bound_obj,
1199  RelAlgDag& root_dag) {
1200  CHECK(window_bound_obj.IsObject());
1202  window_bound.unbounded = json_bool(field(window_bound_obj, "unbounded"));
1203  window_bound.preceding = json_bool(field(window_bound_obj, "preceding"));
1204  window_bound.following = json_bool(field(window_bound_obj, "following"));
1205  window_bound.is_current_row = json_bool(field(window_bound_obj, "is_current_row"));
1206  const auto& offset_field = field(window_bound_obj, "offset");
1207  if (offset_field.IsObject()) {
1208  window_bound.bound_expr = parse_scalar_expr(offset_field, root_dag);
1209  } else {
1210  CHECK(offset_field.IsNull());
1211  }
1212  window_bound.order_key = json_i64(field(window_bound_obj, "order_key"));
1213  return window_bound;
1214 }
1215 
1216 std::unique_ptr<const RexSubQuery> parse_subquery(const rapidjson::Value& expr,
1217  RelAlgDag& root_dag) {
1218  const auto& operands = field(expr, "operands");
1219  CHECK(operands.IsArray());
1220  CHECK_GE(operands.Size(), unsigned(0));
1221  const auto& subquery_ast = field(expr, "subquery");
1222 
1223  auto subquery_dag = RelAlgDagBuilder::buildDagForSubquery(root_dag, subquery_ast);
1224  const auto subquery_root_node = subquery_dag->getRootNodeShPtr();
1225  auto subquery = std::make_shared<RexSubQuery>(subquery_root_node);
1226  auto query_hint = subquery_dag->getQueryHint(subquery_dag->getRootNodeShPtr().get());
1227  root_dag.registerSubquery(subquery);
1228  const auto subquery_global_hint = subquery_dag->getGlobalHints();
1229  if (subquery_global_hint.isAnyQueryHintDelivered()) {
1230  // we need to propagate global query hint found in this subquery to its parent
1231  const auto new_global_hint = root_dag.getGlobalHints() || subquery_global_hint;
1232  root_dag.setGlobalQueryHints(new_global_hint);
1233  }
1234  const auto subquery_local_hint = subquery_dag->getQueryHint(subquery_root_node.get());
1235  if (subquery_local_hint) {
1236  // register local query hint of this subquery to its parent to correctly
1237  // enables them when executing this subquery
1238  root_dag.registerQueryHint(subquery_root_node.get(), *subquery_local_hint);
1239  }
1240  return subquery->deepCopy();
1241 }
1242 
1243 std::unique_ptr<RexOperator> parse_operator(const rapidjson::Value& expr,
1244  RelAlgDag& root_dag) {
1245  const auto op_name = json_str(field(expr, "op"));
1246  const bool is_quantifier =
1247  op_name == std::string("PG_ANY") || op_name == std::string("PG_ALL");
1248  const auto op = is_quantifier ? kFUNCTION : to_sql_op(op_name);
1249  const auto& operators_json_arr = field(expr, "operands");
1250  CHECK(operators_json_arr.IsArray());
1251  auto operands = parse_expr_array(operators_json_arr, root_dag);
1252  const auto type_it = expr.FindMember("type");
1253  CHECK(type_it != expr.MemberEnd());
1254  auto ti = parse_type(type_it->value);
1255  if (op == kIN && expr.HasMember("subquery")) {
1256  auto subquery = parse_subquery(expr, root_dag);
1257  operands.emplace_back(std::move(subquery));
1258  }
1259  if (expr.FindMember("partition_keys") != expr.MemberEnd()) {
1260  const auto& partition_keys_arr = field(expr, "partition_keys");
1261  auto partition_keys = parse_expr_array(partition_keys_arr, root_dag);
1262  const auto& order_keys_arr = field(expr, "order_keys");
1263  auto order_keys = parse_window_order_exprs(order_keys_arr, root_dag);
1264  const auto collation = parse_window_order_collation(order_keys_arr, root_dag);
1265  const auto kind = parse_window_function_kind(op_name);
1266  const auto lower_bound = parse_window_bound(field(expr, "lower_bound"), root_dag);
1267  const auto upper_bound = parse_window_bound(field(expr, "upper_bound"), root_dag);
1268  bool is_rows = json_bool(field(expr, "is_rows"));
1269  ti.set_notnull(false);
1270  return std::make_unique<RexWindowFunctionOperator>(kind,
1271  operands,
1272  partition_keys,
1273  order_keys,
1274  collation,
1275  lower_bound,
1276  upper_bound,
1277  is_rows,
1278  ti);
1279  }
1280  return std::unique_ptr<RexOperator>(op == kFUNCTION
1281  ? new RexFunctionOperator(op_name, operands, ti)
1282  : new RexOperator(op, operands, ti));
1283 }
1284 
1285 std::unique_ptr<RexCase> parse_case(const rapidjson::Value& expr, RelAlgDag& root_dag) {
1286  const auto& operands = field(expr, "operands");
1287  CHECK(operands.IsArray());
1288  CHECK_GE(operands.Size(), unsigned(2));
1289  std::unique_ptr<const RexScalar> else_expr;
1290  std::vector<
1291  std::pair<std::unique_ptr<const RexScalar>, std::unique_ptr<const RexScalar>>>
1292  expr_pair_list;
1293  for (auto operands_it = operands.Begin(); operands_it != operands.End();) {
1294  auto when_expr = parse_scalar_expr(*operands_it++, root_dag);
1295  if (operands_it == operands.End()) {
1296  else_expr = std::move(when_expr);
1297  break;
1298  }
1299  auto then_expr = parse_scalar_expr(*operands_it++, root_dag);
1300  expr_pair_list.emplace_back(std::move(when_expr), std::move(then_expr));
1301  }
1302  return std::unique_ptr<RexCase>(new RexCase(expr_pair_list, else_expr));
1303 }
1304 
1305 std::vector<std::string> strings_from_json_array(
1306  const rapidjson::Value& json_str_arr) noexcept {
1307  CHECK(json_str_arr.IsArray());
1308  std::vector<std::string> fields;
1309  for (auto json_str_arr_it = json_str_arr.Begin(); json_str_arr_it != json_str_arr.End();
1310  ++json_str_arr_it) {
1311  CHECK(json_str_arr_it->IsString());
1312  fields.emplace_back(json_str_arr_it->GetString());
1313  }
1314  return fields;
1315 }
1316 
1317 std::vector<size_t> indices_from_json_array(
1318  const rapidjson::Value& json_idx_arr) noexcept {
1319  CHECK(json_idx_arr.IsArray());
1320  std::vector<size_t> indices;
1321  for (auto json_idx_arr_it = json_idx_arr.Begin(); json_idx_arr_it != json_idx_arr.End();
1322  ++json_idx_arr_it) {
1323  CHECK(json_idx_arr_it->IsInt());
1324  CHECK_GE(json_idx_arr_it->GetInt(), 0);
1325  indices.emplace_back(json_idx_arr_it->GetInt());
1326  }
1327  return indices;
1328 }
1329 
1330 std::unique_ptr<const RexAgg> parse_aggregate_expr(const rapidjson::Value& expr) {
1331  const auto agg_str = json_str(field(expr, "agg"));
1332  if (agg_str == "APPROX_QUANTILE") {
1333  LOG(INFO) << "APPROX_QUANTILE is deprecated. Please use APPROX_PERCENTILE instead.";
1334  }
1335  const auto agg = to_agg_kind(agg_str);
1336  const auto distinct = json_bool(field(expr, "distinct"));
1337  const auto agg_ti = parse_type(field(expr, "type"));
1338  const auto operands = indices_from_json_array(field(expr, "operands"));
1339  bool const allow_multiple_args =
1340  shared::is_any<kAPPROX_COUNT_DISTINCT, kAPPROX_QUANTILE, kSUM_IF>(agg);
1341  if (operands.size() > 1 && (operands.size() != 2 || !allow_multiple_args)) {
1342  throw QueryNotSupported("Multiple arguments for aggregates aren't supported");
1343  }
1344  return std::unique_ptr<const RexAgg>(new RexAgg(agg, distinct, agg_ti, operands));
1345 }
1346 
1347 std::unique_ptr<const RexScalar> parse_scalar_expr(const rapidjson::Value& expr,
1348  RelAlgDag& root_dag) {
1349  CHECK(expr.IsObject());
1350  if (expr.IsObject() && expr.HasMember("input")) {
1351  return std::unique_ptr<const RexScalar>(parse_abstract_input(expr));
1352  }
1353  if (expr.IsObject() && expr.HasMember("literal")) {
1354  return std::unique_ptr<const RexScalar>(parse_literal(expr));
1355  }
1356  if (expr.IsObject() && expr.HasMember("op")) {
1357  const auto op_str = json_str(field(expr, "op"));
1358  if (op_str == std::string("CASE")) {
1359  return std::unique_ptr<const RexScalar>(parse_case(expr, root_dag));
1360  }
1361  if (op_str == std::string("$SCALAR_QUERY")) {
1362  return std::unique_ptr<const RexScalar>(parse_subquery(expr, root_dag));
1363  }
1364  return std::unique_ptr<const RexScalar>(parse_operator(expr, root_dag));
1365  }
1366  throw QueryNotSupported("Expression node " + json_node_to_string(expr) +
1367  " not supported");
1368 }
1369 
1370 JoinType to_join_type(const std::string& join_type_name) {
1371  if (join_type_name == "inner") {
1372  return JoinType::INNER;
1373  }
1374  if (join_type_name == "left") {
1375  return JoinType::LEFT;
1376  }
1377  if (join_type_name == "semi") {
1378  return JoinType::SEMI;
1379  }
1380  if (join_type_name == "anti") {
1381  return JoinType::ANTI;
1382  }
1383  throw QueryNotSupported("Join type (" + join_type_name + ") not supported");
1384 }
1385 
1386 std::unique_ptr<const RexScalar> disambiguate_rex(const RexScalar*, const RANodeOutput&);
1387 
1388 std::unique_ptr<const RexOperator> disambiguate_operator(
1389  const RexOperator* rex_operator,
1390  const RANodeOutput& ra_output) noexcept {
1391  std::vector<std::unique_ptr<const RexScalar>> disambiguated_operands;
1392  for (size_t i = 0; i < rex_operator->size(); ++i) {
1393  auto operand = rex_operator->getOperand(i);
1394  if (dynamic_cast<const RexSubQuery*>(operand)) {
1395  disambiguated_operands.emplace_back(rex_operator->getOperandAndRelease(i));
1396  } else {
1397  disambiguated_operands.emplace_back(disambiguate_rex(operand, ra_output));
1398  }
1399  }
1400  const auto rex_window_function_operator =
1401  dynamic_cast<const RexWindowFunctionOperator*>(rex_operator);
1402  if (rex_window_function_operator) {
1403  const auto& partition_keys = rex_window_function_operator->getPartitionKeys();
1404  std::vector<std::unique_ptr<const RexScalar>> disambiguated_partition_keys;
1405  for (const auto& partition_key : partition_keys) {
1406  disambiguated_partition_keys.emplace_back(
1407  disambiguate_rex(partition_key.get(), ra_output));
1408  }
1409  std::vector<std::unique_ptr<const RexScalar>> disambiguated_order_keys;
1410  const auto& order_keys = rex_window_function_operator->getOrderKeys();
1411  for (const auto& order_key : order_keys) {
1412  disambiguated_order_keys.emplace_back(disambiguate_rex(order_key.get(), ra_output));
1413  }
1414  return rex_window_function_operator->disambiguatedOperands(
1415  disambiguated_operands,
1416  disambiguated_partition_keys,
1417  disambiguated_order_keys,
1418  rex_window_function_operator->getCollation());
1419  }
1420  return rex_operator->getDisambiguated(disambiguated_operands);
1421 }
1422 
1423 std::unique_ptr<const RexCase> disambiguate_case(const RexCase* rex_case,
1424  const RANodeOutput& ra_output) {
1425  std::vector<
1426  std::pair<std::unique_ptr<const RexScalar>, std::unique_ptr<const RexScalar>>>
1427  disambiguated_expr_pair_list;
1428  for (size_t i = 0; i < rex_case->branchCount(); ++i) {
1429  auto disambiguated_when = disambiguate_rex(rex_case->getWhen(i), ra_output);
1430  auto disambiguated_then = disambiguate_rex(rex_case->getThen(i), ra_output);
1431  disambiguated_expr_pair_list.emplace_back(std::move(disambiguated_when),
1432  std::move(disambiguated_then));
1433  }
1434  std::unique_ptr<const RexScalar> disambiguated_else{
1435  disambiguate_rex(rex_case->getElse(), ra_output)};
1436  return std::unique_ptr<const RexCase>(
1437  new RexCase(disambiguated_expr_pair_list, disambiguated_else));
1438 }
1439 
1440 // The inputs used by scalar expressions are given as indices in the serialized
1441 // representation of the query. This is hard to navigate; make the relationship
1442 // explicit by creating RexInput expressions which hold a pointer to the source
1443 // relational algebra node and the index relative to the output of that node.
1444 std::unique_ptr<const RexScalar> disambiguate_rex(const RexScalar* rex_scalar,
1445  const RANodeOutput& ra_output) {
1446  const auto rex_abstract_input = dynamic_cast<const RexAbstractInput*>(rex_scalar);
1447  if (rex_abstract_input) {
1448  CHECK_LT(static_cast<size_t>(rex_abstract_input->getIndex()), ra_output.size());
1449  return std::unique_ptr<const RexInput>(
1450  new RexInput(ra_output[rex_abstract_input->getIndex()]));
1451  }
1452  const auto rex_operator = dynamic_cast<const RexOperator*>(rex_scalar);
1453  if (rex_operator) {
1454  return disambiguate_operator(rex_operator, ra_output);
1455  }
1456  const auto rex_case = dynamic_cast<const RexCase*>(rex_scalar);
1457  if (rex_case) {
1458  return disambiguate_case(rex_case, ra_output);
1459  }
1460  if (auto const rex_literal = dynamic_cast<const RexLiteral*>(rex_scalar)) {
1461  return rex_literal->deepCopy();
1462  } else if (auto const rex_subquery = dynamic_cast<const RexSubQuery*>(rex_scalar)) {
1463  return rex_subquery->deepCopy();
1464  } else {
1465  throw QueryNotSupported("Unable to disambiguate expression of type " +
1466  std::string(typeid(*rex_scalar).name()));
1467  }
1468 }
1469 
1470 void bind_project_to_input(RelProject* project_node, const RANodeOutput& input) noexcept {
1471  CHECK_EQ(size_t(1), project_node->inputCount());
1472  std::vector<std::unique_ptr<const RexScalar>> disambiguated_exprs;
1473  for (size_t i = 0; i < project_node->size(); ++i) {
1474  const auto projected_expr = project_node->getProjectAt(i);
1475  if (dynamic_cast<const RexSubQuery*>(projected_expr)) {
1476  disambiguated_exprs.emplace_back(project_node->getProjectAtAndRelease(i));
1477  } else {
1478  disambiguated_exprs.emplace_back(disambiguate_rex(projected_expr, input));
1479  }
1480  }
1481  project_node->setExpressions(disambiguated_exprs);
1482 }
1483 
1485  const RANodeOutput& input) noexcept {
1486  std::vector<std::unique_ptr<const RexScalar>> disambiguated_exprs;
1487  for (size_t i = 0; i < table_func_node->getTableFuncInputsSize(); ++i) {
1488  const auto target_expr = table_func_node->getTableFuncInputAt(i);
1489  if (dynamic_cast<const RexSubQuery*>(target_expr)) {
1490  disambiguated_exprs.emplace_back(table_func_node->getTableFuncInputAtAndRelease(i));
1491  } else {
1492  disambiguated_exprs.emplace_back(disambiguate_rex(target_expr, input));
1493  }
1494  }
1495  table_func_node->setTableFuncInputs(std::move(disambiguated_exprs));
1496 }
1497 
1498 void bind_inputs(const std::vector<std::shared_ptr<RelAlgNode>>& nodes) noexcept {
1499  for (auto ra_node : nodes) {
1500  const auto filter_node = std::dynamic_pointer_cast<RelFilter>(ra_node);
1501  if (filter_node) {
1502  CHECK_EQ(size_t(1), filter_node->inputCount());
1503  auto disambiguated_condition = disambiguate_rex(
1504  filter_node->getCondition(), get_node_output(filter_node->getInput(0)));
1505  filter_node->setCondition(disambiguated_condition);
1506  continue;
1507  }
1508  const auto join_node = std::dynamic_pointer_cast<RelJoin>(ra_node);
1509  if (join_node) {
1510  CHECK_EQ(size_t(2), join_node->inputCount());
1511  auto disambiguated_condition =
1512  disambiguate_rex(join_node->getCondition(), get_node_output(join_node.get()));
1513  join_node->setCondition(disambiguated_condition);
1514  continue;
1515  }
1516  const auto project_node = std::dynamic_pointer_cast<RelProject>(ra_node);
1517  if (project_node) {
1518  bind_project_to_input(project_node.get(),
1519  get_node_output(project_node->getInput(0)));
1520  continue;
1521  }
1522  const auto table_func_node = std::dynamic_pointer_cast<RelTableFunction>(ra_node);
1523  if (table_func_node) {
1524  /*
1525  Collect all inputs from table function input (non-literal)
1526  arguments.
1527  */
1528  RANodeOutput input;
1529  input.reserve(table_func_node->inputCount());
1530  for (size_t i = 0; i < table_func_node->inputCount(); i++) {
1531  auto node_output = get_node_output(table_func_node->getInput(i));
1532  input.insert(input.end(), node_output.begin(), node_output.end());
1533  }
1534  bind_table_func_to_input(table_func_node.get(), input);
1535  }
1536  }
1537 }
1538 
1539 void handle_query_hint(const std::vector<std::shared_ptr<RelAlgNode>>& nodes,
1540  RelAlgDag& rel_alg_dag) noexcept {
1541  // query hint is delivered by the above three nodes
1542  // when a query block has top-sort node, a hint is registered to
1543  // one of the node which locates at the nearest from the sort node
1544  RegisteredQueryHint global_query_hint;
1545  for (auto node : nodes) {
1546  Hints* hint_delivered = nullptr;
1547  const auto agg_node = std::dynamic_pointer_cast<RelAggregate>(node);
1548  if (agg_node) {
1549  if (agg_node->hasDeliveredHint()) {
1550  hint_delivered = agg_node->getDeliveredHints();
1551  }
1552  }
1553  const auto project_node = std::dynamic_pointer_cast<RelProject>(node);
1554  if (project_node) {
1555  if (project_node->hasDeliveredHint()) {
1556  hint_delivered = project_node->getDeliveredHints();
1557  }
1558  }
1559  const auto compound_node = std::dynamic_pointer_cast<RelCompound>(node);
1560  if (compound_node) {
1561  if (compound_node->hasDeliveredHint()) {
1562  hint_delivered = compound_node->getDeliveredHints();
1563  }
1564  }
1565  if (hint_delivered && !hint_delivered->empty()) {
1566  rel_alg_dag.registerQueryHints(node, hint_delivered, global_query_hint);
1567  }
1568  }
1569  // the current rel_alg_dag may contain global query hints from the subquery
1570  // so we combine the current global hint we collected with the original one together
1571  // to propagate global query hints correctly
1572  const auto existing_global_query_hints = rel_alg_dag.getGlobalHints();
1573  const auto new_global_query_hints = existing_global_query_hints || global_query_hint;
1574  rel_alg_dag.setGlobalQueryHints(new_global_query_hints);
1575 }
1576 
1577 void compute_node_hash(const std::vector<std::shared_ptr<RelAlgNode>>& nodes) {
1578  // compute each rel node's hash value in advance to avoid inconsistency of their hash
1579  // values depending on the toHash's caller
1580  // specifically, we manipulate our logical query plan before retrieving query step
1581  // sequence but once we compute a hash value we cached it so there is no way to update
1582  // it after the plan has been changed starting from the top node, we compute the hash
1583  // value (top-down manner)
1584  std::for_each(
1585  nodes.rbegin(), nodes.rend(), [](const std::shared_ptr<RelAlgNode>& node) {
1586  auto node_hash = node->toHash();
1587  CHECK_NE(node_hash, static_cast<size_t>(0));
1588  });
1589 }
1590 
1591 void mark_nops(const std::vector<std::shared_ptr<RelAlgNode>>& nodes) noexcept {
1592  for (auto node : nodes) {
1593  const auto agg_node = std::dynamic_pointer_cast<RelAggregate>(node);
1594  if (!agg_node || agg_node->getAggExprsCount()) {
1595  continue;
1596  }
1597  CHECK_EQ(size_t(1), node->inputCount());
1598  const auto agg_input_node = dynamic_cast<const RelAggregate*>(node->getInput(0));
1599  if (agg_input_node && !agg_input_node->getAggExprsCount() &&
1600  agg_node->getGroupByCount() == agg_input_node->getGroupByCount()) {
1601  agg_node->markAsNop();
1602  }
1603  }
1604 }
1605 
1606 namespace {
1607 
1608 std::vector<const Rex*> reproject_targets(
1609  const RelProject* simple_project,
1610  const std::vector<const Rex*>& target_exprs) noexcept {
1611  std::vector<const Rex*> result;
1612  for (size_t i = 0; i < simple_project->size(); ++i) {
1613  const auto input_rex = dynamic_cast<const RexInput*>(simple_project->getProjectAt(i));
1614  CHECK(input_rex);
1615  CHECK_LT(static_cast<size_t>(input_rex->getIndex()), target_exprs.size());
1616  result.push_back(target_exprs[input_rex->getIndex()]);
1617  }
1618  return result;
1619 }
1620 
1627  public:
1629  const RelAlgNode* node_to_keep,
1630  const std::vector<std::unique_ptr<const RexScalar>>& scalar_sources)
1631  : node_to_keep_(node_to_keep), scalar_sources_(scalar_sources) {}
1632 
1633  // Reproject the RexInput from its current RA Node to the RA Node we intend to keep
1634  RetType visitInput(const RexInput* input) const final {
1635  if (input->getSourceNode() == node_to_keep_) {
1636  const auto index = input->getIndex();
1637  CHECK_LT(index, scalar_sources_.size());
1638  return visit(scalar_sources_[index].get());
1639  } else {
1640  return input->deepCopy();
1641  }
1642  }
1643 
1644  private:
1646  const std::vector<std::unique_ptr<const RexScalar>>& scalar_sources_;
1647 };
1648 
1649 } // namespace
1650 
1652  std::vector<std::shared_ptr<RelAlgNode>>& nodes,
1653  const std::vector<size_t>& pattern,
1654  std::unordered_map<size_t, std::unordered_map<unsigned, RegisteredQueryHint>>&
1655  query_hints) noexcept {
1656  CHECK_GE(pattern.size(), size_t(2));
1657  CHECK_LE(pattern.size(), size_t(4));
1658 
1659  std::unique_ptr<const RexScalar> filter_rex;
1660  std::vector<std::unique_ptr<const RexScalar>> scalar_sources;
1661  size_t groupby_count{0};
1662  std::vector<std::string> fields;
1663  std::vector<const RexAgg*> agg_exprs;
1664  std::vector<const Rex*> target_exprs;
1665  bool first_project{true};
1666  bool is_agg{false};
1667  RelAlgNode* last_node{nullptr};
1668 
1669  std::shared_ptr<ModifyManipulationTarget> manipulation_target;
1670  size_t node_hash{0};
1671  unsigned node_id{0};
1672  bool hint_registered{false};
1673  RegisteredQueryHint registered_query_hint = RegisteredQueryHint::defaults();
1674  for (const auto node_idx : pattern) {
1675  const auto ra_node = nodes[node_idx];
1676  auto registered_query_hint_map_it = query_hints.find(ra_node->toHash());
1677  if (registered_query_hint_map_it != query_hints.end()) {
1678  auto& registered_query_hint_map = registered_query_hint_map_it->second;
1679  auto registered_query_hint_it = registered_query_hint_map.find(ra_node->getId());
1680  if (registered_query_hint_it != registered_query_hint_map.end()) {
1681  hint_registered = true;
1682  node_hash = registered_query_hint_map_it->first;
1683  node_id = registered_query_hint_it->first;
1684  registered_query_hint = registered_query_hint_it->second;
1685  }
1686  }
1687  const auto ra_filter = std::dynamic_pointer_cast<RelFilter>(ra_node);
1688  if (ra_filter) {
1689  CHECK(!filter_rex);
1690  filter_rex.reset(ra_filter->getAndReleaseCondition());
1691  CHECK(filter_rex);
1692  last_node = ra_node.get();
1693  continue;
1694  }
1695  const auto ra_project = std::dynamic_pointer_cast<RelProject>(ra_node);
1696  if (ra_project) {
1697  fields = ra_project->getFields();
1698  manipulation_target = ra_project;
1699 
1700  if (first_project) {
1701  CHECK_EQ(size_t(1), ra_project->inputCount());
1702  // Rebind the input of the project to the input of the filter itself
1703  // since we know that we'll evaluate the filter on the fly, with no
1704  // intermediate buffer.
1705  const auto filter_input = dynamic_cast<const RelFilter*>(ra_project->getInput(0));
1706  if (filter_input) {
1707  CHECK_EQ(size_t(1), filter_input->inputCount());
1708  bind_project_to_input(ra_project.get(),
1709  get_node_output(filter_input->getInput(0)));
1710  }
1711  scalar_sources = ra_project->getExpressionsAndRelease();
1712  for (const auto& scalar_expr : scalar_sources) {
1713  target_exprs.push_back(scalar_expr.get());
1714  }
1715  first_project = false;
1716  } else {
1717  if (ra_project->isSimple()) {
1718  target_exprs = reproject_targets(ra_project.get(), target_exprs);
1719  } else {
1720  // TODO(adb): This is essentially a more general case of simple project, we
1721  // could likely merge the two
1722  std::vector<const Rex*> result;
1723  RexInputReplacementVisitor visitor(last_node, scalar_sources);
1724  for (size_t i = 0; i < ra_project->size(); ++i) {
1725  const auto rex = ra_project->getProjectAt(i);
1726  if (auto rex_input = dynamic_cast<const RexInput*>(rex)) {
1727  const auto index = rex_input->getIndex();
1728  CHECK_LT(index, target_exprs.size());
1729  result.push_back(target_exprs[index]);
1730  } else {
1731  scalar_sources.push_back(visitor.visit(rex));
1732  result.push_back(scalar_sources.back().get());
1733  }
1734  }
1735  target_exprs = result;
1736  }
1737  }
1738  last_node = ra_node.get();
1739  continue;
1740  }
1741  const auto ra_aggregate = std::dynamic_pointer_cast<RelAggregate>(ra_node);
1742  if (ra_aggregate) {
1743  is_agg = true;
1744  fields = ra_aggregate->getFields();
1745  agg_exprs = ra_aggregate->getAggregatesAndRelease();
1746  groupby_count = ra_aggregate->getGroupByCount();
1747  decltype(target_exprs){}.swap(target_exprs);
1748  CHECK_LE(groupby_count, scalar_sources.size());
1749  for (size_t group_idx = 0; group_idx < groupby_count; ++group_idx) {
1750  const auto rex_ref = new RexRef(group_idx + 1);
1751  target_exprs.push_back(rex_ref);
1752  scalar_sources.emplace_back(rex_ref);
1753  }
1754  for (const auto rex_agg : agg_exprs) {
1755  target_exprs.push_back(rex_agg);
1756  }
1757  last_node = ra_node.get();
1758  continue;
1759  }
1760  }
1761 
1762  auto compound_node =
1763  std::make_shared<RelCompound>(filter_rex,
1764  target_exprs,
1765  groupby_count,
1766  agg_exprs,
1767  fields,
1768  scalar_sources,
1769  is_agg,
1770  manipulation_target->isUpdateViaSelect(),
1771  manipulation_target->isDeleteViaSelect(),
1772  manipulation_target->isVarlenUpdateRequired(),
1773  manipulation_target->getModifiedTableDescriptor(),
1774  manipulation_target->getTargetColumns(),
1775  manipulation_target->getModifiedTableCatalog());
1776  auto old_node = nodes[pattern.back()];
1777  nodes[pattern.back()] = compound_node;
1778  auto first_node = nodes[pattern.front()];
1779  CHECK_EQ(size_t(1), first_node->inputCount());
1780  compound_node->addManagedInput(first_node->getAndOwnInput(0));
1781  if (hint_registered) {
1782  // pass the registered hint from the origin node to newly created compound node
1783  // where it is coalesced
1784  auto registered_query_hint_map_it = query_hints.find(node_hash);
1785  CHECK(registered_query_hint_map_it != query_hints.end());
1786  auto registered_query_hint_map = registered_query_hint_map_it->second;
1787  if (registered_query_hint_map.size() > 1) {
1788  registered_query_hint_map.erase(node_id);
1789  } else {
1790  CHECK_EQ(registered_query_hint_map.size(), static_cast<size_t>(1));
1791  query_hints.erase(node_hash);
1792  }
1793  std::unordered_map<unsigned, RegisteredQueryHint> hint_map;
1794  hint_map.emplace(compound_node->getId(), registered_query_hint);
1795  query_hints.emplace(compound_node->toHash(), hint_map);
1796  }
1797  for (size_t i = 0; i < pattern.size() - 1; ++i) {
1798  nodes[pattern[i]].reset();
1799  }
1800  for (auto node : nodes) {
1801  if (!node) {
1802  continue;
1803  }
1804  node->replaceInput(old_node, compound_node);
1805  }
1806 }
1807 
1808 class RANodeIterator : public std::vector<std::shared_ptr<RelAlgNode>>::const_iterator {
1809  using ElementType = std::shared_ptr<RelAlgNode>;
1810  using Super = std::vector<ElementType>::const_iterator;
1811  using Container = std::vector<ElementType>;
1812 
1813  public:
1814  enum class AdvancingMode { DUChain, InOrder };
1815 
1816  explicit RANodeIterator(const Container& nodes)
1817  : Super(nodes.begin()), owner_(nodes), nodeCount_([&nodes]() -> size_t {
1818  size_t non_zero_count = 0;
1819  for (const auto& node : nodes) {
1820  if (node) {
1821  ++non_zero_count;
1822  }
1823  }
1825  }()) {}
1826 
1827  explicit operator size_t() {
1828  return std::distance(owner_.begin(), *static_cast<Super*>(this));
1829  }
1830 
1831  RANodeIterator operator++() = delete;
1832 
1833  void advance(AdvancingMode mode) {
1834  Super& super = *this;
1835  switch (mode) {
1836  case AdvancingMode::DUChain: {
1837  size_t use_count = 0;
1838  Super only_use = owner_.end();
1839  for (Super nodeIt = std::next(super); nodeIt != owner_.end(); ++nodeIt) {
1840  if (!*nodeIt) {
1841  continue;
1842  }
1843  for (size_t i = 0; i < (*nodeIt)->inputCount(); ++i) {
1844  if ((*super) == (*nodeIt)->getAndOwnInput(i)) {
1845  ++use_count;
1846  if (1 == use_count) {
1847  only_use = nodeIt;
1848  } else {
1849  super = owner_.end();
1850  return;
1851  }
1852  }
1853  }
1854  }
1855  super = only_use;
1856  break;
1857  }
1858  case AdvancingMode::InOrder:
1859  for (size_t i = 0; i != owner_.size(); ++i) {
1860  if (!visited_.count(i)) {
1861  super = owner_.begin();
1862  std::advance(super, i);
1863  return;
1864  }
1865  }
1866  super = owner_.end();
1867  break;
1868  default:
1869  CHECK(false);
1870  }
1871  }
1872 
1873  bool allVisited() { return visited_.size() == nodeCount_; }
1874 
1876  visited_.insert(size_t(*this));
1877  Super& super = *this;
1878  return *super;
1879  }
1880 
1881  const ElementType* operator->() { return &(operator*()); }
1882 
1883  private:
1885  const size_t nodeCount_;
1886  std::unordered_set<size_t> visited_;
1887 };
1888 
1889 namespace {
1890 
1891 bool input_can_be_coalesced(const RelAlgNode* parent_node,
1892  const size_t index,
1893  const bool first_rex_is_input) {
1894  if (auto agg_node = dynamic_cast<const RelAggregate*>(parent_node)) {
1895  if (index == 0 && agg_node->getGroupByCount() > 0) {
1896  return true;
1897  } else {
1898  // Is an aggregated target, only allow the project to be elided if the aggregate
1899  // target is simply passed through (i.e. if the top level expression attached to
1900  // the project node is a RexInput expression)
1901  return first_rex_is_input;
1902  }
1903  }
1904  return first_rex_is_input;
1905 }
1906 
1912 class CoalesceSecondaryProjectVisitor : public RexVisitor<bool> {
1913  public:
1914  bool visitInput(const RexInput* input) const final {
1915  // The top level expression node is checked before we apply the visitor. If we get
1916  // here, this input rex is a child of another rex node, and we handle the can be
1917  // coalesced check slightly differently
1918  return input_can_be_coalesced(input->getSourceNode(), input->getIndex(), false);
1919  }
1920 
1921  bool visitLiteral(const RexLiteral*) const final { return false; }
1922 
1923  bool visitSubQuery(const RexSubQuery*) const final { return false; }
1924 
1925  bool visitRef(const RexRef*) const final { return false; }
1926 
1927  protected:
1928  bool aggregateResult(const bool& aggregate, const bool& next_result) const final {
1929  return aggregate && next_result;
1930  }
1931 
1932  bool defaultResult() const final { return true; }
1933 };
1934 
1935 // Detect the window function SUM pattern: CASE WHEN COUNT() > 0 THEN SUM ELSE 0
1937  const auto case_operator = dynamic_cast<const RexCase*>(rex);
1938  if (case_operator && case_operator->branchCount() == 1) {
1939  const auto then_window =
1940  dynamic_cast<const RexWindowFunctionOperator*>(case_operator->getThen(0));
1941  if (then_window && then_window->getKind() == SqlWindowFunctionKind::SUM_INTERNAL) {
1942  return true;
1943  }
1944  }
1945  return false;
1946 }
1947 
1948 // Check for Window Function AVG:
1949 // (CASE WHEN count > 0 THEN sum ELSE 0) / COUNT
1951  const RexOperator* divide_operator = dynamic_cast<const RexOperator*>(rex);
1952  if (divide_operator && divide_operator->getOperator() == kDIVIDE) {
1953  CHECK_EQ(divide_operator->size(), size_t(2));
1954  const auto case_operator =
1955  dynamic_cast<const RexCase*>(divide_operator->getOperand(0));
1956  const auto second_window =
1957  dynamic_cast<const RexWindowFunctionOperator*>(divide_operator->getOperand(1));
1958  if (case_operator && second_window &&
1959  second_window->getKind() == SqlWindowFunctionKind::COUNT) {
1960  if (is_window_function_sum(case_operator)) {
1961  return true;
1962  }
1963  }
1964  }
1965  return false;
1966 }
1967 
1968 // Detect both window function operators and window function operators embedded in case
1969 // statements (for null handling)
1971  if (dynamic_cast<const RexWindowFunctionOperator*>(rex)) {
1972  return true;
1973  }
1974 
1975  // unwrap from casts, if they exist
1976  const auto rex_cast = dynamic_cast<const RexOperator*>(rex);
1977  if (rex_cast && rex_cast->getOperator() == kCAST) {
1978  CHECK_EQ(rex_cast->size(), size_t(1));
1979  return is_window_function_operator(rex_cast->getOperand(0));
1980  }
1981 
1983  return true;
1984  }
1985 
1986  return false;
1987 }
1988 
1989 } // namespace
1990 
1992  std::vector<std::shared_ptr<RelAlgNode>>& nodes,
1993  const std::vector<const RelAlgNode*>& left_deep_joins,
1994  std::unordered_map<size_t, std::unordered_map<unsigned, RegisteredQueryHint>>&
1995  query_hints) {
1996  enum class CoalesceState { Initial, Filter, FirstProject, Aggregate };
1997  std::vector<size_t> crt_pattern;
1998  CoalesceState crt_state{CoalesceState::Initial};
1999 
2000  auto reset_state = [&crt_pattern, &crt_state]() {
2001  crt_state = CoalesceState::Initial;
2002  std::vector<size_t>().swap(crt_pattern);
2003  };
2004 
2005  for (RANodeIterator nodeIt(nodes); !nodeIt.allVisited();) {
2006  const auto ra_node = nodeIt != nodes.end() ? *nodeIt : nullptr;
2007  switch (crt_state) {
2008  case CoalesceState::Initial: {
2009  if (std::dynamic_pointer_cast<const RelFilter>(ra_node) &&
2010  std::find(left_deep_joins.begin(), left_deep_joins.end(), ra_node.get()) ==
2011  left_deep_joins.end()) {
2012  crt_pattern.push_back(size_t(nodeIt));
2013  crt_state = CoalesceState::Filter;
2014  nodeIt.advance(RANodeIterator::AdvancingMode::DUChain);
2015  } else if (auto project_node =
2016  std::dynamic_pointer_cast<const RelProject>(ra_node)) {
2017  if (project_node->hasWindowFunctionExpr()) {
2018  nodeIt.advance(RANodeIterator::AdvancingMode::InOrder);
2019  } else {
2020  crt_pattern.push_back(size_t(nodeIt));
2021  crt_state = CoalesceState::FirstProject;
2022  nodeIt.advance(RANodeIterator::AdvancingMode::DUChain);
2023  }
2024  } else {
2025  nodeIt.advance(RANodeIterator::AdvancingMode::InOrder);
2026  }
2027  break;
2028  }
2029  case CoalesceState::Filter: {
2030  if (auto project_node = std::dynamic_pointer_cast<const RelProject>(ra_node)) {
2031  // Given we now add preceding projects for all window functions following
2032  // RelFilter nodes, the following should never occur
2033  CHECK(!project_node->hasWindowFunctionExpr());
2034  crt_pattern.push_back(size_t(nodeIt));
2035  crt_state = CoalesceState::FirstProject;
2036  nodeIt.advance(RANodeIterator::AdvancingMode::DUChain);
2037  } else {
2038  reset_state();
2039  }
2040  break;
2041  }
2042  case CoalesceState::FirstProject: {
2043  if (std::dynamic_pointer_cast<const RelAggregate>(ra_node)) {
2044  crt_pattern.push_back(size_t(nodeIt));
2045  crt_state = CoalesceState::Aggregate;
2046  nodeIt.advance(RANodeIterator::AdvancingMode::DUChain);
2047  } else {
2048  if (crt_pattern.size() >= 2) {
2049  create_compound(nodes, crt_pattern, query_hints);
2050  }
2051  reset_state();
2052  }
2053  break;
2054  }
2055  case CoalesceState::Aggregate: {
2056  if (auto project_node = std::dynamic_pointer_cast<const RelProject>(ra_node)) {
2057  if (!project_node->hasWindowFunctionExpr()) {
2058  // TODO(adb): overloading the simple project terminology again here
2059  bool is_simple_project{true};
2060  for (size_t i = 0; i < project_node->size(); i++) {
2061  const auto scalar_rex = project_node->getProjectAt(i);
2062  // If the top level scalar rex is an input node, we can bypass the visitor
2063  if (auto input_rex = dynamic_cast<const RexInput*>(scalar_rex)) {
2065  input_rex->getSourceNode(), input_rex->getIndex(), true)) {
2066  is_simple_project = false;
2067  break;
2068  }
2069  continue;
2070  }
2071  CoalesceSecondaryProjectVisitor visitor;
2072  if (!visitor.visit(project_node->getProjectAt(i))) {
2073  is_simple_project = false;
2074  break;
2075  }
2076  }
2077  if (is_simple_project) {
2078  crt_pattern.push_back(size_t(nodeIt));
2079  nodeIt.advance(RANodeIterator::AdvancingMode::InOrder);
2080  }
2081  }
2082  }
2083  CHECK_GE(crt_pattern.size(), size_t(2));
2084  create_compound(nodes, crt_pattern, query_hints);
2085  reset_state();
2086  break;
2087  }
2088  default:
2089  CHECK(false);
2090  }
2091  }
2092  if (crt_state == CoalesceState::FirstProject || crt_state == CoalesceState::Aggregate) {
2093  if (crt_pattern.size() >= 2) {
2094  create_compound(nodes, crt_pattern, query_hints);
2095  }
2096  CHECK(!crt_pattern.empty());
2097  }
2098 }
2099 
2100 class WindowFunctionCollector : public RexVisitor<void*> {
2101  public:
2103  std::unordered_map<size_t, const RexScalar*>& collected_window_func,
2104  bool only_add_window_expr)
2105  : collected_window_func_(collected_window_func)
2106  , only_add_window_expr_(only_add_window_expr) {}
2107 
2108  protected:
2109  // Detect embedded window function expressions in operators
2110  void* visitOperator(const RexOperator* rex_operator) const final {
2111  if (is_window_function_operator(rex_operator)) {
2112  tryAddWindowExpr(rex_operator);
2113  }
2114  const size_t operand_count = rex_operator->size();
2115  for (size_t i = 0; i < operand_count; ++i) {
2116  const auto operand = rex_operator->getOperand(i);
2117  if (is_window_function_operator(operand)) {
2118  // Handle both RexWindowFunctionOperators and window functions built up from
2119  // multiple RexScalar objects (e.g. AVG)
2120  tryAddWindowExpr(operand);
2121  } else {
2122  visit(operand);
2123  }
2124  }
2125  return defaultResult();
2126  }
2127 
2128  // Detect embedded window function expressions in case statements. Note that this may
2129  // manifest as a nested case statement inside a top level case statement, as some
2130  // window functions (sum, avg) are represented as a case statement. Use the
2131  // is_window_function_operator helper to detect complete window function expressions.
2132  void* visitCase(const RexCase* rex_case) const final {
2133  if (is_window_function_operator(rex_case)) {
2134  tryAddWindowExpr(rex_case);
2135  if (!only_add_window_expr_) {
2136  return nullptr;
2137  }
2138  }
2139 
2140  for (size_t i = 0; i < rex_case->branchCount(); ++i) {
2141  const auto when = rex_case->getWhen(i);
2142  if (is_window_function_operator(when)) {
2143  tryAddWindowExpr(when);
2144  } else {
2145  visit(when);
2146  }
2147  const auto then = rex_case->getThen(i);
2148  if (is_window_function_operator(then)) {
2149  tryAddWindowExpr(then);
2150  } else {
2151  visit(then);
2152  }
2153  }
2154  if (rex_case->getElse()) {
2155  auto else_expr = rex_case->getElse();
2156  if (is_window_function_operator(else_expr)) {
2157  tryAddWindowExpr(else_expr);
2158  } else {
2159  visit(else_expr);
2160  }
2161  }
2162  return defaultResult();
2163  }
2164 
2165  void tryAddWindowExpr(RexScalar const* expr) const {
2166  if (!only_add_window_expr_) {
2167  collected_window_func_.emplace(expr->toHash(), expr);
2168  } else {
2169  if (auto window_expr = dynamic_cast<RexWindowFunctionOperator const*>(expr)) {
2170  collected_window_func_.emplace(window_expr->toHash(), window_expr);
2171  }
2172  }
2173  }
2174 
2175  void* defaultResult() const final { return nullptr; }
2176 
2177  private:
2178  std::unordered_map<size_t, const RexScalar*>& collected_window_func_;
2180 };
2181 
2183  public:
2185  std::unordered_set<size_t>& collected_window_func_hash,
2186  std::vector<std::unique_ptr<const RexScalar>>& new_rex_input_for_window_func,
2187  std::unordered_map<size_t, size_t>& window_func_to_new_rex_input_idx_map,
2188  RelProject* new_project,
2189  std::unordered_map<size_t, std::unique_ptr<const RexInput>>&
2190  new_rex_input_from_child_node)
2191  : collected_window_func_hash_(collected_window_func_hash)
2192  , new_rex_input_for_window_func_(new_rex_input_for_window_func)
2193  , window_func_to_new_rex_input_idx_map_(window_func_to_new_rex_input_idx_map)
2194  , new_project_(new_project)
2195  , new_rex_input_from_child_node_(new_rex_input_from_child_node) {
2196  CHECK_EQ(collected_window_func_hash_.size(),
2197  window_func_to_new_rex_input_idx_map_.size());
2198  for (auto hash : collected_window_func_hash_) {
2199  auto rex_it = window_func_to_new_rex_input_idx_map_.find(hash);
2200  CHECK(rex_it != window_func_to_new_rex_input_idx_map_.end());
2201  CHECK_LT(rex_it->second, new_rex_input_for_window_func_.size());
2202  }
2203  CHECK(new_project_);
2204  }
2205 
2206  protected:
2207  RetType visitInput(const RexInput* rex_input) const final {
2208  if (rex_input->getSourceNode() != new_project_) {
2209  const auto cur_index = rex_input->getIndex();
2210  auto cur_source_node = rex_input->getSourceNode();
2211  std::string field_name = "";
2212  if (auto cur_project_node = dynamic_cast<const RelProject*>(cur_source_node)) {
2213  field_name = cur_project_node->getFieldName(cur_index);
2214  }
2215  auto rex_input_hash = rex_input->toHash();
2216  auto rex_input_it = new_rex_input_from_child_node_.find(rex_input_hash);
2217  if (rex_input_it == new_rex_input_from_child_node_.end()) {
2218  auto new_rex_input =
2219  std::make_unique<RexInput>(new_project_, new_project_->size());
2220  new_project_->appendInput(field_name, rex_input->deepCopy());
2221  new_rex_input_from_child_node_.emplace(rex_input_hash, new_rex_input->deepCopy());
2222  return new_rex_input;
2223  } else {
2224  return rex_input_it->second->deepCopy();
2225  }
2226  } else {
2227  return rex_input->deepCopy();
2228  }
2229  }
2230 
2231  RetType visitOperator(const RexOperator* rex_operator) const final {
2232  auto new_rex_idx = is_collected_window_function(rex_operator->toHash());
2233  if (new_rex_idx) {
2234  return get_new_rex_input(*new_rex_idx);
2235  }
2236 
2237  const auto rex_window_function_operator =
2238  dynamic_cast<const RexWindowFunctionOperator*>(rex_operator);
2239  if (rex_window_function_operator) {
2240  // Deep copy the embedded window function operator
2241  return visitWindowFunctionOperator(rex_window_function_operator);
2242  }
2243 
2244  const size_t operand_count = rex_operator->size();
2245  std::vector<RetType> new_opnds;
2246  for (size_t i = 0; i < operand_count; ++i) {
2247  const auto operand = rex_operator->getOperand(i);
2248  auto new_rex_idx_for_operand = is_collected_window_function(operand->toHash());
2249  if (new_rex_idx_for_operand) {
2250  new_opnds.push_back(get_new_rex_input(*new_rex_idx_for_operand));
2251  } else {
2252  new_opnds.emplace_back(visit(rex_operator->getOperand(i)));
2253  }
2254  }
2255  return rex_operator->getDisambiguated(new_opnds);
2256  }
2257 
2258  RetType visitCase(const RexCase* rex_case) const final {
2259  auto new_rex_idx = is_collected_window_function(rex_case->toHash());
2260  if (new_rex_idx) {
2261  return get_new_rex_input(*new_rex_idx);
2262  }
2263 
2264  std::vector<std::pair<RetType, RetType>> new_pair_list;
2265  for (size_t i = 0; i < rex_case->branchCount(); ++i) {
2266  auto when_operand = rex_case->getWhen(i);
2267  auto new_rex_idx_for_when_operand =
2268  is_collected_window_function(when_operand->toHash());
2269 
2270  auto then_operand = rex_case->getThen(i);
2271  auto new_rex_idx_for_then_operand =
2272  is_collected_window_function(then_operand->toHash());
2273 
2274  new_pair_list.emplace_back(
2275  new_rex_idx_for_when_operand ? get_new_rex_input(*new_rex_idx_for_when_operand)
2276  : visit(when_operand),
2277  new_rex_idx_for_then_operand ? get_new_rex_input(*new_rex_idx_for_then_operand)
2278  : visit(then_operand));
2279  }
2280  auto new_rex_idx_for_else_operand =
2281  is_collected_window_function(rex_case->getElse()->toHash());
2282  auto new_else = new_rex_idx_for_else_operand
2283  ? get_new_rex_input(*new_rex_idx_for_else_operand)
2284  : visit(rex_case->getElse());
2285  return std::make_unique<RexCase>(new_pair_list, new_else);
2286  }
2287 
2288  private:
2289  std::optional<size_t> is_collected_window_function(size_t rex_hash) const {
2290  auto rex_it = window_func_to_new_rex_input_idx_map_.find(rex_hash);
2291  if (rex_it != window_func_to_new_rex_input_idx_map_.end()) {
2292  return rex_it->second;
2293  }
2294  return std::nullopt;
2295  }
2296 
2297  std::unique_ptr<const RexScalar> get_new_rex_input(size_t rex_idx) const {
2298  CHECK_GE(rex_idx, 0UL);
2299  CHECK_LT(rex_idx, new_rex_input_for_window_func_.size());
2300  auto& new_rex_input = new_rex_input_for_window_func_.at(rex_idx);
2301  CHECK(new_rex_input);
2302  auto copied_rex_input = copier_.visit(new_rex_input.get());
2303  return copied_rex_input;
2304  }
2305 
2306  std::unordered_set<size_t>& collected_window_func_hash_;
2307  // we should have new rex_input for each window function collected
2308  std::vector<std::unique_ptr<const RexScalar>>& new_rex_input_for_window_func_;
2309  // an index to get a new rex_input for the collected window function
2310  std::unordered_map<size_t, size_t>& window_func_to_new_rex_input_idx_map_;
2312  std::unordered_map<size_t, std::unique_ptr<const RexInput>>&
2315 };
2316 
2318  std::shared_ptr<RelProject> prev_node,
2319  std::shared_ptr<RelProject> new_node,
2320  std::unordered_map<size_t, std::unordered_map<unsigned, RegisteredQueryHint>>&
2321  query_hints) {
2322  auto delivered_hints = prev_node->getDeliveredHints();
2323  bool needs_propagate_hints = !delivered_hints->empty();
2324  if (needs_propagate_hints) {
2325  for (auto& kv : *delivered_hints) {
2326  new_node->addHint(kv.second);
2327  }
2328  auto prev_it = query_hints.find(prev_node->toHash());
2329  // query hint for the prev projection node should be registered
2330  CHECK(prev_it != query_hints.end());
2331  auto prev_hint_it = prev_it->second.find(prev_node->getId());
2332  CHECK(prev_hint_it != prev_it->second.end());
2333  std::unordered_map<unsigned, RegisteredQueryHint> hint_map;
2334  hint_map.emplace(new_node->getId(), prev_hint_it->second);
2335  query_hints.emplace(new_node->toHash(), hint_map);
2336  }
2337 }
2338 
2362  std::vector<std::shared_ptr<RelAlgNode>>& nodes,
2363  std::unordered_map<size_t, std::unordered_map<unsigned, RegisteredQueryHint>>&
2364  query_hints) {
2365  std::list<std::shared_ptr<RelAlgNode>> node_list(nodes.begin(), nodes.end());
2366  for (auto node_itr = node_list.begin(); node_itr != node_list.end(); ++node_itr) {
2367  const auto node = *node_itr;
2368  auto window_func_project_node = std::dynamic_pointer_cast<RelProject>(node);
2369  if (!window_func_project_node) {
2370  continue;
2371  }
2372 
2373  const auto prev_node_itr = std::prev(node_itr);
2374  const auto prev_node = *prev_node_itr;
2375  CHECK(prev_node);
2376 
2377  // map scalar expression index in the project node to window function ptr
2378  std::unordered_map<size_t, const RexScalar*> collected_window_func;
2379  WindowFunctionCollector collector(collected_window_func, false);
2380  // Iterate the target exprs of the project node and check for window function
2381  // expressions. If an embedded expression exists, collect it
2382  for (size_t i = 0; i < window_func_project_node->size(); i++) {
2383  const auto scalar_rex = window_func_project_node->getProjectAt(i);
2384  if (is_window_function_operator(scalar_rex)) {
2385  // top level window function exprs are fine
2386  continue;
2387  }
2388  collector.visit(scalar_rex);
2389  }
2390 
2391  if (!collected_window_func.empty()) {
2392  // we have a nested window function expression
2393  std::unordered_set<size_t> collected_window_func_hash;
2394  // the current window function needs a set of new rex input which references
2395  // expressions in the newly introduced projection node
2396  std::vector<std::unique_ptr<const RexScalar>> new_rex_input_for_window_func;
2397  // a target projection expression of the newly created projection node
2398  std::vector<std::unique_ptr<const RexScalar>> new_scalar_expr_for_window_project;
2399  // a map between nested window function (hash val) and
2400  // its rex index stored in the `new_rex_input_for_window_func`
2401  std::unordered_map<size_t, size_t> window_func_to_new_rex_input_idx_map;
2402  // a map between RexInput of the current window function projection node (hash val)
2403  // and its corresponding new RexInput which is pushed down to the new projection
2404  // node
2405  std::unordered_map<size_t, std::unique_ptr<const RexInput>>
2406  new_rex_input_from_child_node;
2407  RexDeepCopyVisitor copier;
2408 
2409  std::vector<std::unique_ptr<const RexScalar>> dummy_scalar_exprs;
2410  std::vector<std::string> dummy_fields;
2411  std::vector<std::string> new_project_field_names;
2412  // create a new project node, it will contain window function expressions
2413  auto new_project =
2414  std::make_shared<RelProject>(dummy_scalar_exprs, dummy_fields, prev_node);
2415  // insert this new project node between the current window project node and its
2416  // child node
2417  node_list.insert(node_itr, new_project);
2418 
2419  // retrieve various information to replace expressions in the current window
2420  // function project node w/ considering scalar expressions in the new project node
2421  std::for_each(collected_window_func.begin(),
2422  collected_window_func.end(),
2423  [&new_project_field_names,
2424  &collected_window_func_hash,
2425  &new_rex_input_for_window_func,
2426  &new_scalar_expr_for_window_project,
2427  &copier,
2428  &new_project,
2429  &window_func_to_new_rex_input_idx_map](const auto& kv) {
2430  // compute window function expr's hash, and create a new rex_input
2431  // for it
2432  collected_window_func_hash.insert(kv.first);
2433 
2434  // map an old expression in the window function project node
2435  // to an index of the corresponding new RexInput
2436  const auto rex_idx = new_rex_input_for_window_func.size();
2437  window_func_to_new_rex_input_idx_map.emplace(kv.first, rex_idx);
2438 
2439  // create a new RexInput and make it as one of new expression of the
2440  // newly created project node
2441  new_rex_input_for_window_func.emplace_back(
2442  std::make_unique<const RexInput>(new_project.get(), rex_idx));
2443  new_scalar_expr_for_window_project.push_back(
2444  std::move(copier.visit(kv.second)));
2445  new_project_field_names.emplace_back("");
2446  });
2447  new_project->setExpressions(new_scalar_expr_for_window_project);
2448  new_project->setFields(std::move(new_project_field_names));
2449 
2450  auto window_func_scalar_exprs =
2451  window_func_project_node->getExpressionsAndRelease();
2452  RexWindowFuncReplacementVisitor replacer(collected_window_func_hash,
2453  new_rex_input_for_window_func,
2454  window_func_to_new_rex_input_idx_map,
2455  new_project.get(),
2456  new_rex_input_from_child_node);
2457  size_t rex_idx = 0;
2458  for (auto& scalar_expr : window_func_scalar_exprs) {
2459  // try to replace the old expressions in the window function project node
2460  // with expressions of the newly created project node
2461  auto new_parent_rex = replacer.visit(scalar_expr.get());
2462  window_func_scalar_exprs[rex_idx] = std::move(new_parent_rex);
2463  rex_idx++;
2464  }
2465  // Update the previous window project node
2466  window_func_project_node->setExpressions(window_func_scalar_exprs);
2467  window_func_project_node->replaceInput(prev_node, new_project);
2468  propagate_hints_to_new_project(window_func_project_node, new_project, query_hints);
2469  new_project->setPushedDownWindowExpr();
2470  }
2471  }
2472  nodes.assign(node_list.begin(), node_list.end());
2473 }
2474 
2475 using RexInputSet = std::unordered_set<RexInput>;
2476 
2477 class RexInputCollector : public RexVisitor<RexInputSet> {
2478  public:
2479  RexInputSet visitInput(const RexInput* input) const override {
2480  return RexInputSet{*input};
2481  }
2482 
2483  protected:
2485  const RexInputSet& next_result) const override {
2486  auto result = aggregate;
2487  result.insert(next_result.begin(), next_result.end());
2488  return result;
2489  }
2490 };
2491 
2492 namespace {
2494  bool& has_generic_expr_in_window_func) {
2495  for (auto const& partition_key : window_expr->getPartitionKeys()) {
2496  auto partition_input = dynamic_cast<RexInput const*>(partition_key.get());
2497  if (!partition_input) {
2498  return true;
2499  }
2500  }
2501  for (auto const& order_key : window_expr->getOrderKeys()) {
2502  auto order_input = dynamic_cast<RexInput const*>(order_key.get());
2503  if (!order_input) {
2504  return true;
2505  }
2506  }
2507  for (size_t k = 0; k < window_expr->size(); k++) {
2508  if (!shared::dynamic_castable_to_any<RexInput, RexLiteral>(
2509  window_expr->getOperand(k))) {
2510  has_generic_expr_in_window_func = true;
2511  return true;
2512  }
2513  }
2514  return false;
2515 }
2516 
2517 std::pair<bool, bool> need_pushdown_generic_expr(
2518  RelProject const* window_func_project_node) {
2519  bool has_generic_expr_in_window_func = false;
2520  bool res = false;
2521  for (size_t i = 0; i < window_func_project_node->size(); ++i) {
2522  auto const projected_target = window_func_project_node->getProjectAt(i);
2523  if (auto const* window_expr =
2524  dynamic_cast<RexWindowFunctionOperator const*>(projected_target)) {
2525  res =
2526  find_generic_expr_in_window_func(window_expr, has_generic_expr_in_window_func);
2527  } else if (auto const* case_expr = dynamic_cast<RexCase const*>(projected_target)) {
2528  std::unordered_map<size_t, const RexScalar*> collected_window_func;
2529  WindowFunctionCollector collector(collected_window_func, true);
2530  collector.visit(case_expr);
2531  for (auto const& kv : collected_window_func) {
2532  auto const* candidate_window_expr =
2533  dynamic_cast<RexWindowFunctionOperator const*>(kv.second);
2534  CHECK(candidate_window_expr);
2535  res = find_generic_expr_in_window_func(candidate_window_expr,
2536  has_generic_expr_in_window_func);
2537  }
2538  }
2539  }
2540  return std::make_pair(has_generic_expr_in_window_func, res);
2541 }
2542 }; // namespace
2556  std::vector<std::shared_ptr<RelAlgNode>>& nodes,
2557  const bool always_add_project_if_first_project_is_window_expr,
2558  std::unordered_map<size_t, std::unordered_map<unsigned, RegisteredQueryHint>>&
2559  query_hints) {
2560  std::list<std::shared_ptr<RelAlgNode>> node_list(nodes.begin(), nodes.end());
2561  size_t project_node_counter{0};
2562  for (auto node_itr = node_list.begin(); node_itr != node_list.end(); ++node_itr) {
2563  const auto node = *node_itr;
2564 
2565  auto window_func_project_node = std::dynamic_pointer_cast<RelProject>(node);
2566  if (!window_func_project_node) {
2567  continue;
2568  }
2569  project_node_counter++;
2570  if (!window_func_project_node->hasWindowFunctionExpr()) {
2571  // this projection node does not have a window function
2572  // expression -- skip to the next node in the DAG.
2573  continue;
2574  }
2575 
2576  const auto prev_node_itr = std::prev(node_itr);
2577  const auto prev_node = *prev_node_itr;
2578  CHECK(prev_node);
2579 
2580  auto filter_node = std::dynamic_pointer_cast<RelFilter>(prev_node);
2581  auto join_node = std::dynamic_pointer_cast<RelJoin>(prev_node);
2582 
2583  auto scan_node = std::dynamic_pointer_cast<RelScan>(prev_node);
2584  const bool has_multi_fragment_scan_input =
2585  (scan_node &&
2586  (scan_node->getNumShards() > 0 || scan_node->getNumFragments() > 1));
2587  auto const [has_generic_expr_in_window_func, needs_expr_pushdown] =
2588  need_pushdown_generic_expr(window_func_project_node.get());
2589 
2590  // We currently add a preceding project node in one of two conditions:
2591  // 1. always_add_project_if_first_project_is_window_expr = true, which
2592  // we currently only set for distributed, but could also be set to support
2593  // multi-frag window function inputs, either if we can detect that an input table
2594  // is multi-frag up front, or using a retry mechanism like we do for join filter
2595  // push down.
2596  // TODO(todd): Investigate a viable approach for the above.
2597  // 2. Regardless of #1, if the window function project node is preceded by a
2598  // filter node. This is required both for correctness and to avoid pulling
2599  // all source input columns into memory since non-coalesced filter node
2600  // inputs are currently not pruned or eliminated via dead column elimination.
2601  // Note that we expect any filter node followed by a project node to be coalesced
2602  // into a single compound node in RelAlgDag::coalesce_nodes, and that action
2603  // prunes unused inputs.
2604  // TODO(todd): Investigate whether the shotgun filter node issue affects other
2605  // query plans, i.e. filters before joins, and whether there is a more general
2606  // approach to solving this (will still need the preceding project node for
2607  // window functions preceded by filter nodes for correctness though)
2608  // 3. Similar to the above, when the window function project node is preceded
2609  // by a join node.
2610  // 4. when partition by / order by clauses have a general expression instead of
2611  // referencing column
2612 
2613  if (!((always_add_project_if_first_project_is_window_expr &&
2614  project_node_counter == 1) ||
2615  filter_node || join_node || has_multi_fragment_scan_input ||
2616  needs_expr_pushdown)) {
2617  continue;
2618  }
2619 
2620  if (needs_expr_pushdown || join_node) {
2621  // previous logic cannot cover join_node case well, so use the newly introduced
2622  // push-down expression logic to safely add pre_project node before processing
2623  // window function
2624  std::unordered_map<size_t, size_t> expr_offset_cache;
2625  std::vector<std::unique_ptr<const RexScalar>> scalar_exprs_for_new_project;
2626  std::vector<std::unique_ptr<const RexScalar>> scalar_exprs_for_window_project;
2627  std::vector<std::string> fields_for_window_project;
2628  std::vector<std::string> fields_for_new_project;
2629 
2630  // step 0. create new project node with an empty scalar expr to rebind target exprs
2631  std::vector<std::unique_ptr<const RexScalar>> dummy_scalar_exprs;
2632  std::vector<std::string> dummy_fields;
2633  auto new_project =
2634  std::make_shared<RelProject>(dummy_scalar_exprs, dummy_fields, prev_node);
2635 
2636  // step 1 - 2
2637  PushDownGenericExpressionInWindowFunction visitor(new_project,
2638  scalar_exprs_for_new_project,
2639  fields_for_new_project,
2640  expr_offset_cache);
2641  for (size_t i = 0; i < window_func_project_node->size(); ++i) {
2642  auto projected_target = window_func_project_node->getProjectAt(i);
2643  auto new_projection_target = visitor.visit(projected_target);
2644  scalar_exprs_for_window_project.emplace_back(
2645  std::move(new_projection_target.release()));
2646  }
2647  new_project->setExpressions(scalar_exprs_for_new_project);
2648  new_project->setFields(std::move(fields_for_new_project));
2649  bool has_groupby = false;
2650  auto aggregate = std::dynamic_pointer_cast<RelAggregate>(prev_node);
2651  if (aggregate) {
2652  has_groupby = aggregate->getGroupByCount() > 0;
2653  }
2654  // force rowwise output to prevent computing incorrect query result
2655  if (has_groupby && visitor.hasPartitionExpression()) {
2656  // we currently may compute incorrect result with columnar output when
2657  // 1) the window function has partition expression, and
2658  // 2) a parent node of the window function projection node has group by expression
2659  // todo (yoonmin) : relax this
2660  VLOG(1)
2661  << "Query output overridden to row-wise format due to presence of a window "
2662  "function with partition expression and group-by expression.";
2663  new_project->forceRowwiseOutput();
2664  } else if (has_generic_expr_in_window_func) {
2665  VLOG(1) << "Query output overridden to row-wise format due to presence of a "
2666  "generic expression as an input expression of the window "
2667  "function.";
2668  new_project->forceRowwiseOutput();
2669  } else if (visitor.hasCaseExprAsWindowOperand()) {
2670  VLOG(1)
2671  << "Query output overridden to row-wise format due to presence of a window "
2672  "function with a case statement as its operand.";
2673  new_project->forceRowwiseOutput();
2674  }
2675 
2676  // step 3. finalize
2677  propagate_hints_to_new_project(window_func_project_node, new_project, query_hints);
2678  new_project->setPushedDownWindowExpr();
2679  node_list.insert(node_itr, new_project);
2680  window_func_project_node->replaceInput(prev_node, new_project);
2681  window_func_project_node->setExpressions(scalar_exprs_for_window_project);
2682  } else {
2683  // only push rex_inputs listed in the window function down to a new project node
2684  RexInputSet inputs;
2685  RexInputCollector input_collector;
2686  for (size_t i = 0; i < window_func_project_node->size(); i++) {
2687  auto new_inputs =
2688  input_collector.visit(window_func_project_node->getProjectAt(i));
2689  inputs.insert(new_inputs.begin(), new_inputs.end());
2690  }
2691 
2692  // Note: Technically not required since we are mapping old inputs to new input
2693  // indices, but makes the re-mapping of inputs easier to follow.
2694  std::vector<RexInput> sorted_inputs(inputs.begin(), inputs.end());
2695  std::sort(sorted_inputs.begin(),
2696  sorted_inputs.end(),
2697  [](const auto& a, const auto& b) { return a.getIndex() < b.getIndex(); });
2698 
2699  std::vector<std::unique_ptr<const RexScalar>> scalar_exprs;
2700  std::vector<std::string> fields;
2701  std::unordered_map<unsigned, unsigned> old_index_to_new_index;
2702  for (auto& input : sorted_inputs) {
2703  CHECK_EQ(input.getSourceNode(), prev_node.get());
2704  CHECK(old_index_to_new_index
2705  .insert(std::make_pair(input.getIndex(), scalar_exprs.size()))
2706  .second);
2707  scalar_exprs.emplace_back(input.deepCopy());
2708  fields.emplace_back("");
2709  }
2710 
2711  auto new_project = std::make_shared<RelProject>(scalar_exprs, fields, prev_node);
2712  propagate_hints_to_new_project(window_func_project_node, new_project, query_hints);
2713  new_project->setPushedDownWindowExpr();
2714  node_list.insert(node_itr, new_project);
2715  window_func_project_node->replaceInput(
2716  prev_node, new_project, old_index_to_new_index);
2717  }
2718  }
2719  nodes.assign(node_list.begin(), node_list.end());
2720 }
2721 
2722 int64_t get_int_literal_field(const rapidjson::Value& obj,
2723  const char field[],
2724  const int64_t default_val) noexcept {
2725  const auto it = obj.FindMember(field);
2726  if (it == obj.MemberEnd()) {
2727  return default_val;
2728  }
2729  std::unique_ptr<RexLiteral> lit(parse_literal(it->value));
2730  CHECK_EQ(kDECIMAL, lit->getType());
2731  CHECK_EQ(unsigned(0), lit->getScale());
2732  CHECK_EQ(unsigned(0), lit->getTargetScale());
2733  return lit->getVal<int64_t>();
2734 }
2735 
2736 void check_empty_inputs_field(const rapidjson::Value& node) noexcept {
2737  const auto& inputs_json = field(node, "inputs");
2738  CHECK(inputs_json.IsArray() && !inputs_json.Size());
2739 }
2740 
2741 const std::pair<const Catalog_Namespace::Catalog*, const TableDescriptor*>
2742 getCatalogAndTableFromScanNode(const rapidjson::Value& scan_ra) {
2743  const auto& table_json = field(scan_ra, "table");
2744  CHECK(table_json.IsArray());
2745  CHECK_EQ(unsigned(2), table_json.Size());
2746  const auto cat =
2747  Catalog_Namespace::SysCatalog::instance().getCatalog(table_json[0].GetString());
2748  CHECK(cat);
2749  const auto td = cat->getMetadataForTable(table_json[1].GetString());
2750  CHECK(td);
2751  return {cat.get(), td};
2752 }
2753 
2754 std::vector<std::string> getFieldNamesFromScanNode(const rapidjson::Value& scan_ra) {
2755  const auto& fields_json = field(scan_ra, "fieldNames");
2756  return strings_from_json_array(fields_json);
2757 }
2758 
2759 } // namespace
2760 
2762  for (const auto& expr : scalar_exprs_) {
2763  if (is_window_function_operator(expr.get())) {
2764  return true;
2765  }
2766  }
2767  return false;
2768 }
2769 namespace details {
2770 
2772  public:
2774 
2775  std::vector<std::shared_ptr<RelAlgNode>> run(const rapidjson::Value& rels,
2776  RelAlgDag& root_dag) {
2777  for (auto rels_it = rels.Begin(); rels_it != rels.End(); ++rels_it) {
2778  const auto& crt_node = *rels_it;
2779  const auto id = node_id(crt_node);
2780  CHECK_EQ(static_cast<size_t>(id), nodes_.size());
2781  CHECK(crt_node.IsObject());
2782  std::shared_ptr<RelAlgNode> ra_node = nullptr;
2783  const auto rel_op = json_str(field(crt_node, "relOp"));
2784  if (rel_op == std::string("EnumerableTableScan") ||
2785  rel_op == std::string("LogicalTableScan")) {
2786  ra_node = dispatchTableScan(crt_node);
2787  } else if (rel_op == std::string("LogicalProject")) {
2788  ra_node = dispatchProject(crt_node, root_dag);
2789  } else if (rel_op == std::string("LogicalFilter")) {
2790  ra_node = dispatchFilter(crt_node, root_dag);
2791  } else if (rel_op == std::string("LogicalAggregate")) {
2792  ra_node = dispatchAggregate(crt_node);
2793  } else if (rel_op == std::string("LogicalJoin")) {
2794  ra_node = dispatchJoin(crt_node, root_dag);
2795  } else if (rel_op == std::string("LogicalSort")) {
2796  ra_node = dispatchSort(crt_node);
2797  } else if (rel_op == std::string("LogicalValues")) {
2798  ra_node = dispatchLogicalValues(crt_node);
2799  } else if (rel_op == std::string("LogicalTableModify")) {
2800  ra_node = dispatchModify(crt_node);
2801  } else if (rel_op == std::string("LogicalTableFunctionScan")) {
2802  ra_node = dispatchTableFunction(crt_node, root_dag);
2803  } else if (rel_op == std::string("LogicalUnion")) {
2804  ra_node = dispatchUnion(crt_node);
2805  } else {
2806  throw QueryNotSupported(std::string("Node ") + rel_op + " not supported yet");
2807  }
2808  nodes_.push_back(ra_node);
2809  }
2810 
2811  return std::move(nodes_);
2812  }
2813 
2814  private:
2815  std::shared_ptr<RelScan> dispatchTableScan(const rapidjson::Value& scan_ra) {
2816  check_empty_inputs_field(scan_ra);
2817  CHECK(scan_ra.IsObject());
2818  const auto [cat, td] = getCatalogAndTableFromScanNode(scan_ra);
2819  const auto field_names = getFieldNamesFromScanNode(scan_ra);
2820  if (scan_ra.HasMember("hints")) {
2821  auto scan_node = std::make_shared<RelScan>(td, field_names, *cat);
2822  getRelAlgHints(scan_ra, scan_node);
2823  return scan_node;
2824  }
2825  return std::make_shared<RelScan>(td, field_names, *cat);
2826  }
2827 
2828  std::shared_ptr<RelProject> dispatchProject(const rapidjson::Value& proj_ra,
2829  RelAlgDag& root_dag) {
2830  const auto inputs = getRelAlgInputs(proj_ra);
2831  CHECK_EQ(size_t(1), inputs.size());
2832  const auto& exprs_json = field(proj_ra, "exprs");
2833  CHECK(exprs_json.IsArray());
2834  std::vector<std::unique_ptr<const RexScalar>> exprs;
2835  for (auto exprs_json_it = exprs_json.Begin(); exprs_json_it != exprs_json.End();
2836  ++exprs_json_it) {
2837  exprs.emplace_back(parse_scalar_expr(*exprs_json_it, root_dag));
2838  }
2839  const auto& fields = field(proj_ra, "fields");
2840  if (proj_ra.HasMember("hints")) {
2841  auto project_node = std::make_shared<RelProject>(
2842  exprs, strings_from_json_array(fields), inputs.front());
2843  getRelAlgHints(proj_ra, project_node);
2844  return project_node;
2845  }
2846  return std::make_shared<RelProject>(
2847  exprs, strings_from_json_array(fields), inputs.front());
2848  }
2849 
2850  std::shared_ptr<RelFilter> dispatchFilter(const rapidjson::Value& filter_ra,
2851  RelAlgDag& root_dag) {
2852  const auto inputs = getRelAlgInputs(filter_ra);
2853  CHECK_EQ(size_t(1), inputs.size());
2854  const auto id = node_id(filter_ra);
2855  CHECK(id);
2856  auto condition = parse_scalar_expr(field(filter_ra, "condition"), root_dag);
2857  return std::make_shared<RelFilter>(condition, inputs.front());
2858  }
2859 
2860  std::shared_ptr<RelAggregate> dispatchAggregate(const rapidjson::Value& agg_ra) {
2861  const auto inputs = getRelAlgInputs(agg_ra);
2862  CHECK_EQ(size_t(1), inputs.size());
2863  const auto fields = strings_from_json_array(field(agg_ra, "fields"));
2864  const auto group = indices_from_json_array(field(agg_ra, "group"));
2865  for (size_t i = 0; i < group.size(); ++i) {
2866  CHECK_EQ(i, group[i]);
2867  }
2868  if (agg_ra.HasMember("groups") || agg_ra.HasMember("indicator")) {
2869  throw QueryNotSupported("GROUP BY extensions not supported");
2870  }
2871  const auto& aggs_json_arr = field(agg_ra, "aggs");
2872  CHECK(aggs_json_arr.IsArray());
2873  std::vector<std::unique_ptr<const RexAgg>> aggs;
2874  for (auto aggs_json_arr_it = aggs_json_arr.Begin();
2875  aggs_json_arr_it != aggs_json_arr.End();
2876  ++aggs_json_arr_it) {
2877  aggs.emplace_back(parse_aggregate_expr(*aggs_json_arr_it));
2878  }
2879  if (agg_ra.HasMember("hints")) {
2880  auto agg_node =
2881  std::make_shared<RelAggregate>(group.size(), aggs, fields, inputs.front());
2882  getRelAlgHints(agg_ra, agg_node);
2883  return agg_node;
2884  }
2885  return std::make_shared<RelAggregate>(group.size(), aggs, fields, inputs.front());
2886  }
2887 
2888  std::shared_ptr<RelJoin> dispatchJoin(const rapidjson::Value& join_ra,
2889  RelAlgDag& root_dag) {
2890  const auto inputs = getRelAlgInputs(join_ra);
2891  CHECK_EQ(size_t(2), inputs.size());
2892  const auto join_type = to_join_type(json_str(field(join_ra, "joinType")));
2893  auto filter_rex = parse_scalar_expr(field(join_ra, "condition"), root_dag);
2894  if (join_ra.HasMember("hints")) {
2895  auto join_node =
2896  std::make_shared<RelJoin>(inputs[0], inputs[1], filter_rex, join_type);
2897  getRelAlgHints(join_ra, join_node);
2898  return join_node;
2899  }
2900  return std::make_shared<RelJoin>(inputs[0], inputs[1], filter_rex, join_type);
2901  }
2902 
2903  std::shared_ptr<RelSort> dispatchSort(const rapidjson::Value& sort_ra) {
2904  const auto inputs = getRelAlgInputs(sort_ra);
2905  CHECK_EQ(size_t(1), inputs.size());
2906  std::vector<SortField> collation;
2907  const auto& collation_arr = field(sort_ra, "collation");
2908  CHECK(collation_arr.IsArray());
2909  for (auto collation_arr_it = collation_arr.Begin();
2910  collation_arr_it != collation_arr.End();
2911  ++collation_arr_it) {
2912  const size_t field_idx = json_i64(field(*collation_arr_it, "field"));
2913  const auto sort_dir = parse_sort_direction(*collation_arr_it);
2914  const auto null_pos = parse_nulls_position(*collation_arr_it);
2915  collation.emplace_back(field_idx, sort_dir, null_pos);
2916  }
2917  auto limit = get_int_literal_field(sort_ra, "fetch", -1);
2918  const auto offset = get_int_literal_field(sort_ra, "offset", 0);
2919  auto ret = std::make_shared<RelSort>(
2920  collation, limit > 0 ? limit : 0, offset, inputs.front(), limit > 0);
2921  ret->setEmptyResult(limit == 0);
2922  return ret;
2923  }
2924 
2925  std::shared_ptr<RelModify> dispatchModify(const rapidjson::Value& logical_modify_ra) {
2926  const auto inputs = getRelAlgInputs(logical_modify_ra);
2927  CHECK_EQ(size_t(1), inputs.size());
2928 
2929  const auto [cat, table_descriptor] =
2930  getCatalogAndTableFromScanNode(logical_modify_ra);
2931  if (table_descriptor->isView) {
2932  throw std::runtime_error("UPDATE of a view is unsupported.");
2933  }
2934 
2935  bool flattened = json_bool(field(logical_modify_ra, "flattened"));
2936  std::string op = json_str(field(logical_modify_ra, "operation"));
2937  RelModify::TargetColumnList target_column_list;
2938 
2939  if (op == "UPDATE") {
2940  const auto& update_columns = field(logical_modify_ra, "updateColumnList");
2941  CHECK(update_columns.IsArray());
2942 
2943  for (auto column_arr_it = update_columns.Begin();
2944  column_arr_it != update_columns.End();
2945  ++column_arr_it) {
2946  target_column_list.push_back(column_arr_it->GetString());
2947  }
2948  }
2949 
2950  auto modify_node = std::make_shared<RelModify>(
2951  *cat, table_descriptor, flattened, op, target_column_list, inputs[0]);
2952  switch (modify_node->getOperation()) {
2954  modify_node->applyDeleteModificationsToInputNode();
2955  break;
2956  }
2958  modify_node->applyUpdateModificationsToInputNode();
2959  break;
2960  }
2961  default:
2962  throw std::runtime_error("Unsupported RelModify operation: " +
2963  json_node_to_string(logical_modify_ra));
2964  }
2965 
2966  return modify_node;
2967  }
2968 
2969  std::shared_ptr<RelTableFunction> dispatchTableFunction(
2970  const rapidjson::Value& table_func_ra,
2971  RelAlgDag& root_dag) {
2972  const auto inputs = getRelAlgInputs(table_func_ra);
2973  const auto& invocation = field(table_func_ra, "invocation");
2974  CHECK(invocation.IsObject());
2975 
2976  const auto& operands = field(invocation, "operands");
2977  CHECK(operands.IsArray());
2978  CHECK_GE(operands.Size(), unsigned(0));
2979 
2980  std::vector<const Rex*> col_inputs;
2981  std::vector<std::unique_ptr<const RexScalar>> table_func_inputs;
2982  std::vector<std::string> fields;
2983 
2984  for (auto exprs_json_it = operands.Begin(); exprs_json_it != operands.End();
2985  ++exprs_json_it) {
2986  const auto& expr_json = *exprs_json_it;
2987  CHECK(expr_json.IsObject());
2988  if (expr_json.HasMember("op")) {
2989  const auto op_str = json_str(field(expr_json, "op"));
2990  if (op_str == "CAST" && expr_json.HasMember("type")) {
2991  const auto& expr_type = field(expr_json, "type");
2992  CHECK(expr_type.IsObject());
2993  CHECK(expr_type.HasMember("type"));
2994  const auto& expr_type_name = json_str(field(expr_type, "type"));
2995  if (expr_type_name == "CURSOR") {
2996  CHECK(expr_json.HasMember("operands"));
2997  const auto& expr_operands = field(expr_json, "operands");
2998  CHECK(expr_operands.IsArray());
2999  if (expr_operands.Size() != 1) {
3000  throw std::runtime_error(
3001  "Table functions currently only support one ResultSet input");
3002  }
3003  auto pos = field(expr_operands[0], "input").GetInt();
3004  CHECK_LT(pos, inputs.size());
3005  for (size_t i = inputs[pos]->size(); i > 0; i--) {
3006  table_func_inputs.emplace_back(
3007  std::make_unique<RexAbstractInput>(col_inputs.size()));
3008  col_inputs.emplace_back(table_func_inputs.back().get());
3009  }
3010  continue;
3011  }
3012  }
3013  }
3014  table_func_inputs.emplace_back(parse_scalar_expr(*exprs_json_it, root_dag));
3015  }
3016 
3017  const auto& op_name = field(invocation, "op");
3018  CHECK(op_name.IsString());
3019 
3020  std::vector<std::unique_ptr<const RexScalar>> table_function_projected_outputs;
3021  const auto& row_types = field(table_func_ra, "rowType");
3022  CHECK(row_types.IsArray());
3023  CHECK_GE(row_types.Size(), unsigned(0));
3024  const auto& row_types_array = row_types.GetArray();
3025  for (size_t i = 0; i < row_types_array.Size(); i++) {
3026  // We don't care about the type information in rowType -- replace each output with
3027  // a reference to be resolved later in the translator
3028  table_function_projected_outputs.emplace_back(std::make_unique<RexRef>(i));
3029  fields.emplace_back("");
3030  }
3031  return std::make_shared<RelTableFunction>(op_name.GetString(),
3032  inputs,
3033  fields,
3034  col_inputs,
3035  table_func_inputs,
3036  table_function_projected_outputs);
3037  }
3038 
3039  std::shared_ptr<RelLogicalValues> dispatchLogicalValues(
3040  const rapidjson::Value& logical_values_ra) {
3041  const auto& tuple_type_arr = field(logical_values_ra, "type");
3042  CHECK(tuple_type_arr.IsArray());
3043  std::vector<TargetMetaInfo> tuple_type;
3044  for (auto tuple_type_arr_it = tuple_type_arr.Begin();
3045  tuple_type_arr_it != tuple_type_arr.End();
3046  ++tuple_type_arr_it) {
3047  auto component_type = parse_type(*tuple_type_arr_it);
3048  const auto component_name = json_str(field(*tuple_type_arr_it, "name"));
3049  if (component_type.is_none_encoded_string()) {
3050  component_type.set_compression(kENCODING_DICT);
3051  component_type.set_comp_param(TRANSIENT_DICT_ID);
3052  component_type.setStringDictKey({TRANSIENT_DICT_DB_ID, TRANSIENT_DICT_ID});
3053  component_type.set_size(4);
3054  }
3055  tuple_type.emplace_back(component_name, component_type);
3056  }
3057  const auto& inputs_arr = field(logical_values_ra, "inputs");
3058  CHECK(inputs_arr.IsArray());
3059  const auto& tuples_arr = field(logical_values_ra, "tuples");
3060  CHECK(tuples_arr.IsArray());
3061 
3062  if (inputs_arr.Size()) {
3063  throw QueryNotSupported("Inputs not supported in logical values yet.");
3064  }
3065 
3066  std::vector<RelLogicalValues::RowValues> values;
3067  if (tuples_arr.Size()) {
3068  for (const auto& row : tuples_arr.GetArray()) {
3069  CHECK(row.IsArray());
3070  const auto values_json = row.GetArray();
3071  if (!values.empty()) {
3072  CHECK_EQ(values[0].size(), values_json.Size());
3073  }
3074  values.emplace_back(RelLogicalValues::RowValues{});
3075  for (const auto& value : values_json) {
3076  CHECK(value.IsObject());
3077  CHECK(value.HasMember("literal"));
3078  values.back().emplace_back(parse_literal(value));
3079  }
3080  }
3081  }
3082 
3083  return std::make_shared<RelLogicalValues>(tuple_type, values);
3084  }
3085 
3086  std::shared_ptr<RelLogicalUnion> dispatchUnion(
3087  const rapidjson::Value& logical_union_ra) {
3088  auto inputs = getRelAlgInputs(logical_union_ra);
3089  auto const& all_type_bool = field(logical_union_ra, "all");
3090  CHECK(all_type_bool.IsBool());
3091  return std::make_shared<RelLogicalUnion>(std::move(inputs), all_type_bool.GetBool());
3092  }
3093 
3094  RelAlgInputs getRelAlgInputs(const rapidjson::Value& node) {
3095  if (node.HasMember("inputs")) {
3096  const auto str_input_ids = strings_from_json_array(field(node, "inputs"));
3097  RelAlgInputs ra_inputs;
3098  for (const auto& str_id : str_input_ids) {
3099  ra_inputs.push_back(nodes_[std::stoi(str_id)]);
3100  }
3101  return ra_inputs;
3102  }
3103  return {prev(node)};
3104  }
3105 
3106  std::pair<std::string, std::string> getKVOptionPair(std::string& str, size_t& pos) {
3107  auto option = str.substr(0, pos);
3108  std::string delim = "=";
3109  size_t delim_pos = option.find(delim);
3110  auto key = option.substr(0, delim_pos);
3111  auto val = option.substr(delim_pos + 1, option.length());
3112  str.erase(0, pos + delim.length() + 1);
3113  return {key, val};
3114  }
3115 
3116  ExplainedQueryHint parseHintString(std::string& hint_string) {
3117  std::string white_space_delim = " ";
3118  int l = hint_string.length();
3119  hint_string = hint_string.erase(0, 1).substr(0, l - 2);
3120  size_t pos = 0;
3121  auto global_hint_checker = [&](const std::string& input_hint_name) -> HintIdentifier {
3122  bool global_hint = false;
3123  std::string hint_name = input_hint_name;
3124  auto global_hint_identifier = hint_name.substr(0, 2);
3125  if (global_hint_identifier.compare("g_") == 0) {
3126  global_hint = true;
3127  hint_name = hint_name.substr(2, hint_string.length());
3128  }
3129  return {global_hint, hint_name};
3130  };
3131  auto parsed_hint =
3132  global_hint_checker(hint_string.substr(0, hint_string.find(white_space_delim)));
3133  auto hint_type = RegisteredQueryHint::translateQueryHint(parsed_hint.hint_name);
3134  if ((pos = hint_string.find("options:")) != std::string::npos) {
3135  // need to parse hint options
3136  std::vector<std::string> tokens;
3137  bool kv_list_op = false;
3138  std::string raw_options = hint_string.substr(pos + 8, hint_string.length() - 2);
3139  if (raw_options.find('{') != std::string::npos) {
3140  kv_list_op = true;
3141  } else {
3142  CHECK(raw_options.find('[') != std::string::npos);
3143  }
3144  auto t1 = raw_options.erase(0, 1);
3145  raw_options = t1.substr(0, t1.length() - 1);
3146  std::string op_delim = ", ";
3147  if (kv_list_op) {
3148  // kv options
3149  std::unordered_map<std::string, std::string> kv_options;
3150  while ((pos = raw_options.find(op_delim)) != std::string::npos) {
3151  auto kv_pair = getKVOptionPair(raw_options, pos);
3152  kv_options.emplace(kv_pair.first, kv_pair.second);
3153  }
3154  // handle the last kv pair
3155  auto kv_pair = getKVOptionPair(raw_options, pos);
3156  kv_options.emplace(kv_pair.first, kv_pair.second);
3157  return {hint_type, parsed_hint.global_hint, false, true, kv_options};
3158  } else {
3159  std::vector<std::string> list_options;
3160  while ((pos = raw_options.find(op_delim)) != std::string::npos) {
3161  list_options.emplace_back(raw_options.substr(0, pos));
3162  raw_options.erase(0, pos + white_space_delim.length() + 1);
3163  }
3164  // handle the last option
3165  list_options.emplace_back(raw_options.substr(0, pos));
3166  return {hint_type, parsed_hint.global_hint, false, false, list_options};
3167  }
3168  } else {
3169  // marker hint: no extra option for this hint
3170  return {hint_type, parsed_hint.global_hint, true, false};
3171  }
3172  }
3173 
3174  void getRelAlgHints(const rapidjson::Value& json_node,
3175  std::shared_ptr<RelAlgNode> node) {
3176  std::string hint_explained = json_str(field(json_node, "hints"));
3177  size_t pos = 0;
3178  std::string delim = "|";
3179  std::vector<std::string> hint_list;
3180  while ((pos = hint_explained.find(delim)) != std::string::npos) {
3181  hint_list.emplace_back(hint_explained.substr(0, pos));
3182  hint_explained.erase(0, pos + delim.length());
3183  }
3184  // handling the last one
3185  hint_list.emplace_back(hint_explained.substr(0, pos));
3186 
3187  const auto agg_node = std::dynamic_pointer_cast<RelAggregate>(node);
3188  if (agg_node) {
3189  for (std::string& hint : hint_list) {
3190  auto parsed_hint = parseHintString(hint);
3191  agg_node->addHint(parsed_hint);
3192  }
3193  }
3194  const auto project_node = std::dynamic_pointer_cast<RelProject>(node);
3195  if (project_node) {
3196  for (std::string& hint : hint_list) {
3197  auto parsed_hint = parseHintString(hint);
3198  project_node->addHint(parsed_hint);
3199  }
3200  }
3201  const auto scan_node = std::dynamic_pointer_cast<RelScan>(node);
3202  if (scan_node) {
3203  for (std::string& hint : hint_list) {
3204  auto parsed_hint = parseHintString(hint);
3205  scan_node->addHint(parsed_hint);
3206  }
3207  }
3208  const auto join_node = std::dynamic_pointer_cast<RelJoin>(node);
3209  if (join_node) {
3210  for (std::string& hint : hint_list) {
3211  auto parsed_hint = parseHintString(hint);
3212  join_node->addHint(parsed_hint);
3213  }
3214  }
3215 
3216  const auto compound_node = std::dynamic_pointer_cast<RelCompound>(node);
3217  if (compound_node) {
3218  for (std::string& hint : hint_list) {
3219  auto parsed_hint = parseHintString(hint);
3220  compound_node->addHint(parsed_hint);
3221  }
3222  }
3223  }
3224 
3225  std::shared_ptr<const RelAlgNode> prev(const rapidjson::Value& crt_node) {
3226  const auto id = node_id(crt_node);
3227  CHECK(id);
3228  CHECK_EQ(static_cast<size_t>(id), nodes_.size());
3229  return nodes_.back();
3230  }
3231 
3232  std::vector<std::shared_ptr<RelAlgNode>> nodes_;
3233 };
3234 
3235 } // namespace details
3236 
3237 std::unique_ptr<RelAlgDag> RelAlgDagBuilder::buildDag(const std::string& query_ra,
3238  const bool optimize_dag) {
3239  rapidjson::Document query_ast;
3240  query_ast.Parse(query_ra.c_str());
3241  VLOG(2) << "Parsing query RA JSON: " << query_ra;
3242  if (query_ast.HasParseError()) {
3243  query_ast.GetParseError();
3244  LOG(ERROR) << "Failed to parse RA tree from Calcite (offset "
3245  << query_ast.GetErrorOffset() << "):\n"
3246  << rapidjson::GetParseError_En(query_ast.GetParseError());
3247  VLOG(1) << "Failed to parse query RA: " << query_ra;
3248  throw std::runtime_error(
3249  "Failed to parse relational algebra tree. Possible query syntax error.");
3250  }
3251  CHECK(query_ast.IsObject());
3253 
3254  return build(query_ast, nullptr, optimize_dag);
3255 }
3256 
3257 std::unique_ptr<RelAlgDag> RelAlgDagBuilder::buildDagForSubquery(
3258  RelAlgDag& root_dag,
3259  const rapidjson::Value& query_ast) {
3260  return build(query_ast, &root_dag, true);
3261 }
3262 
3263 std::unique_ptr<RelAlgDag> RelAlgDagBuilder::build(const rapidjson::Value& query_ast,
3264  RelAlgDag* root_dag,
3265  const bool optimize_dag) {
3266  const auto& rels = field(query_ast, "rels");
3267  CHECK(rels.IsArray());
3268 
3269  auto rel_alg_dag_ptr = std::make_unique<RelAlgDag>();
3270  auto& rel_alg_dag = *rel_alg_dag_ptr;
3271  auto& nodes = getNodes(rel_alg_dag);
3272 
3273  try {
3274  nodes = details::RelAlgDispatcher().run(rels, root_dag ? *root_dag : rel_alg_dag);
3275  } catch (const QueryNotSupported&) {
3276  throw;
3277  }
3278  CHECK(!nodes.empty());
3279  bind_inputs(nodes);
3280 
3282 
3283  if (optimize_dag) {
3284  optimizeDag(rel_alg_dag);
3285  }
3286 
3287  return rel_alg_dag_ptr;
3288 }
3289 
3291  auto const build_state = rel_alg_dag.getBuildState();
3292  if (build_state == RelAlgDag::BuildState::kBuiltOptimized) {
3293  return;
3294  }
3295 
3297  << static_cast<int>(build_state);
3298 
3299  auto& nodes = getNodes(rel_alg_dag);
3300  auto& subqueries = getSubqueries(rel_alg_dag);
3301  auto& query_hints = getQueryHints(rel_alg_dag);
3302 
3303  compute_node_hash(nodes);
3304  handle_query_hint(nodes, rel_alg_dag);
3305  mark_nops(nodes);
3306  simplify_sort(nodes);
3308  eliminate_identical_copy(nodes);
3309  fold_filters(nodes);
3310  std::vector<const RelAlgNode*> filtered_left_deep_joins;
3311  std::vector<const RelAlgNode*> left_deep_joins;
3312  for (const auto& node : nodes) {
3313  const auto left_deep_join_root = get_left_deep_join_root(node);
3314  // The filter which starts a left-deep join pattern must not be coalesced
3315  // since it contains (part of) the join condition.
3316  if (left_deep_join_root) {
3317  left_deep_joins.push_back(left_deep_join_root.get());
3318  if (std::dynamic_pointer_cast<const RelFilter>(left_deep_join_root)) {
3319  filtered_left_deep_joins.push_back(left_deep_join_root.get());
3320  }
3321  }
3322  }
3323  if (filtered_left_deep_joins.empty()) {
3325  }
3326  eliminate_dead_columns(nodes);
3327  eliminate_dead_subqueries(subqueries, nodes.back().get());
3328  separate_window_function_expressions(nodes, query_hints);
3330  nodes,
3331  g_cluster /* always_add_project_if_first_project_is_window_expr */,
3332  query_hints);
3333  coalesce_nodes(nodes, left_deep_joins, query_hints);
3334  CHECK(nodes.back().use_count() == 1);
3335  create_left_deep_join(nodes);
3336 
3338 }
3339 
3340 void RelAlgDag::eachNode(std::function<void(RelAlgNode const*)> const& callback) const {
3341  for (auto const& node : nodes_) {
3342  if (node) {
3343  callback(node.get());
3344  }
3345  }
3346 }
3347 
3349  for (auto& node : nodes_) {
3350  if (node) {
3351  node->resetQueryExecutionState();
3352  }
3353  }
3354 }
3355 
3356 // Return tree with depth represented by indentations.
3357 std::string tree_string(const RelAlgNode* ra, const size_t depth) {
3358  std::string result = std::string(2 * depth, ' ') + ::toString(ra) + '\n';
3359  for (size_t i = 0; i < ra->inputCount(); ++i) {
3360  result += tree_string(ra->getInput(i), depth + 1);
3361  }
3362  return result;
3363 }
3364 
3365 std::string RexSubQuery::toString(RelRexToStringConfig config) const {
3366  return cat(::typeName(this), "(", ra_->toString(config), ")");
3367 }
3368 
3369 size_t RexSubQuery::toHash() const {
3370  if (!hash_) {
3371  hash_ = typeid(RexSubQuery).hash_code();
3372  boost::hash_combine(*hash_, ra_->toHash());
3373  }
3374  return *hash_;
3375 }
3376 
3377 std::string RexInput::toString(RelRexToStringConfig config) const {
3378  const auto scan_node = dynamic_cast<const RelScan*>(node_);
3379  if (scan_node) {
3380  auto field_name = scan_node->getFieldName(getIndex());
3381  auto table_name = scan_node->getTableDescriptor()->tableName;
3382  return ::typeName(this) + "(" + table_name + "." + field_name + ")";
3383  }
3384  auto node_id_in_plan = node_->getIdInPlanTree();
3385  auto node_id_str =
3386  node_id_in_plan ? std::to_string(*node_id_in_plan) : std::to_string(node_->getId());
3387  auto node_str = config.skip_input_nodes ? "(input_node_id=" + node_id_str
3388  : "(input_node=" + node_->toString(config);
3389  return cat(::typeName(this), node_str, ", in_index=", std::to_string(getIndex()), ")");
3390 }
3391 
3392 size_t RexInput::toHash() const {
3393  if (!hash_) {
3394  hash_ = typeid(RexInput).hash_code();
3395  boost::hash_combine(*hash_, node_->toHash());
3396  boost::hash_combine(*hash_, getIndex());
3397  }
3398  return *hash_;
3399 }
3400 
3401 std::string RelCompound::toString(RelRexToStringConfig config) const {
3402  auto ret = cat(::typeName(this),
3403  ", filter_expr=",
3404  (filter_expr_ ? filter_expr_->toString(config) : "null"),
3405  ", target_exprs=");
3406  for (auto& expr : target_exprs_) {
3407  ret += expr->toString(config) + " ";
3408  }
3409  ret += ", agg_exps=";
3410  for (auto& expr : agg_exprs_) {
3411  ret += expr->toString(config) + " ";
3412  }
3413  ret += ", scalar_sources=";
3414  for (auto& expr : scalar_sources_) {
3415  ret += expr->toString(config) + " ";
3416  }
3417  return cat(ret,
3418  ", ",
3420  ", ",
3421  ", fields=",
3422  ::toString(fields_),
3423  ", is_agg=",
3425 }
3426 
3427 size_t RelCompound::toHash() const {
3428  if (!hash_) {
3429  hash_ = typeid(RelCompound).hash_code();
3430  boost::hash_combine(*hash_, filter_expr_ ? filter_expr_->toHash() : HASH_N);
3431  boost::hash_combine(*hash_, is_agg_);
3432  for (auto& target_expr : target_exprs_) {
3433  if (auto rex_scalar = dynamic_cast<const RexScalar*>(target_expr)) {
3434  boost::hash_combine(*hash_, rex_scalar->toHash());
3435  }
3436  }
3437  for (auto& agg_expr : agg_exprs_) {
3438  boost::hash_combine(*hash_, agg_expr->toHash());
3439  }
3440  for (auto& scalar_source : scalar_sources_) {
3441  boost::hash_combine(*hash_, scalar_source->toHash());
3442  }
3443  boost::hash_combine(*hash_, groupby_count_);
3444  boost::hash_combine(*hash_, ::toString(fields_));
3445  }
3446  return *hash_;
3447 }
std::vector< std::shared_ptr< const RexScalar > > scalar_exprs_
Definition: RelAlgDag.h:2483
DEVICE auto upper_bound(ARGS &&...args)
Definition: gpu_enabled.h:123
const size_t getGroupByCount() const
Definition: RelAlgDag.h:1342
SQLTypes to_sql_type(const std::string &type_name)
void setGlobalQueryHints(const RegisteredQueryHint &global_hints)
Definition: RelAlgDag.h:2974
std::optional< size_t > is_collected_window_function(size_t rex_hash) const
Definition: RelAlgDag.cpp:2289
NullSortedPosition parse_nulls_position(const rapidjson::Value &collation)
Definition: RelAlgDag.cpp:1179
bool is_agg(const Analyzer::Expr *expr)
std::unique_ptr< const RexScalar > condition_
Definition: RelAlgDag.h:1550
std::unique_ptr< const RexOperator > disambiguate_operator(const RexOperator *rex_operator, const RANodeOutput &ra_output) noexcept
Definition: RelAlgDag.cpp:1388
RelCompound(const TableDescriptor *td, const Catalog_Namespace::Catalog *catalog)
Definition: RelAlgDag.h:1797
const RexScalar * getThen(const size_t idx) const
Definition: RelAlgDag.h:443
std::shared_ptr< RelAggregate > dispatchAggregate(const rapidjson::Value &agg_ra)
Definition: RelAlgDag.cpp:2860
#define CHECK_EQ(x, y)
Definition: Logger.h:301
std::shared_ptr< RelFilter > dispatchFilter(const rapidjson::Value &filter_ra, RelAlgDag &root_dag)
Definition: RelAlgDag.cpp:2850
void * visitInput(const RexInput *rex_input) const override
Definition: RelAlgDag.cpp:112
RexRebindReindexInputsVisitor(const RelAlgNode *old_input, const RelAlgNode *new_input, std::unordered_map< unsigned, unsigned > old_to_new_index_map)
Definition: RelAlgDag.cpp:106
void set_notnulls(std::vector< TargetMetaInfo > *tmis0, std::vector< bool > const &notnulls)
Definition: RelAlgDag.cpp:896
void mark_nops(const std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
Definition: RelAlgDag.cpp:1591
std::unique_ptr< RexSubQuery > deepCopy() const
Definition: RelAlgDag.cpp:59
void replaceInput(std::shared_ptr< const RelAlgNode > old_input, std::shared_ptr< const RelAlgNode > input) override
Definition: RelAlgDag.h:1223
JoinType
Definition: sqldefs.h:165
static std::unordered_map< size_t, std::unordered_map< unsigned, RegisteredQueryHint > > & getQueryHints(RelAlgDag &rel_alg_dag)
Definition: RelAlgDag.h:3019
std::vector< std::unique_ptr< const RexScalar > > table_func_inputs_
Definition: RelAlgDag.h:2378
std::string cat(Ts &&...args)
std::optional< size_t > getOffsetForPushedDownExpr(WindowExprType type, size_t expr_offset) const
Definition: RelAlgDag.cpp:157
RexWindowFuncReplacementVisitor(std::unordered_set< size_t > &collected_window_func_hash, std::vector< std::unique_ptr< const RexScalar >> &new_rex_input_for_window_func, std::unordered_map< size_t, size_t > &window_func_to_new_rex_input_idx_map, RelProject *new_project, std::unordered_map< size_t, std::unique_ptr< const RexInput >> &new_rex_input_from_child_node)
Definition: RelAlgDag.cpp:2184
std::vector< std::unique_ptr< const RexScalar > > parse_window_order_exprs(const rapidjson::Value &arr, RelAlgDag &root_dag)
Definition: RelAlgDag.cpp:1163
void hoist_filter_cond_to_cross_join(std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
std::vector< bool > get_notnulls(std::vector< TargetMetaInfo > const &tmis0)
Definition: RelAlgDag.cpp:882
Definition: sqltypes.h:66
std::vector< std::unique_ptr< const RexScalar > > & scalar_exprs_for_new_project_
Definition: RelAlgDag.cpp:332
size_t size() const override
Definition: RelAlgDag.h:1172
void addHint(const ExplainedQueryHint &hint_explained)
Definition: RelAlgDag.h:1888
std::shared_ptr< const RelAlgNode > get_left_deep_join_root(const std::shared_ptr< RelAlgNode > &node)
void sink_projected_boolean_expr_to_join(std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
bool input_can_be_coalesced(const RelAlgNode *parent_node, const size_t index, const bool first_rex_is_input)
Definition: RelAlgDag.cpp:1891
std::string toString(RelRexToStringConfig config=RelRexToStringConfig::defaults()) const override
Definition: RelAlgDag.cpp:3401
void eliminate_identical_copy(std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
size_t toHash() const override
Definition: RelAlgDag.cpp:3392
RetType visitInput(const RexInput *rex_input) const final
Definition: RelAlgDag.cpp:2207
std::vector< RexInput > RANodeOutput
Definition: RelAlgDag.h:3066
std::unique_ptr< const RexCase > disambiguate_case(const RexCase *rex_case, const RANodeOutput &ra_output)
Definition: RelAlgDag.cpp:1423
const RexScalar * getElse() const
Definition: RelAlgDag.h:448
static thread_local unsigned crt_id_
Definition: RelAlgDag.h:961
std::unique_ptr< const RexScalar > visitOperator(const RexOperator *rex_operator) const override
Definition: RelAlgDag.cpp:297
SqlWindowFunctionKind parse_window_function_kind(const std::string &name)
Definition: RelAlgDag.cpp:1093
std::shared_ptr< RelScan > dispatchTableScan(const rapidjson::Value &scan_ra)
Definition: RelAlgDag.cpp:2815
RelProject(const TableDescriptor *td, const Catalog_Namespace::Catalog *catalog)
Definition: RelAlgDag.h:1132
std::pair< std::shared_ptr< RelLeftDeepInnerJoin >, std::shared_ptr< const RelAlgNode > > create_left_deep_join(const std::shared_ptr< RelAlgNode > &left_deep_join_root)
RexScalar const * copyAndRedirectSource(RexScalar const *, size_t input_idx) const
Definition: RelAlgDag.cpp:944
void replaceInput(std::shared_ptr< const RelAlgNode > old_input, std::shared_ptr< const RelAlgNode > input) override
Definition: RelAlgDag.cpp:685
SQLAgg to_agg_kind(const std::string &agg_name)
std::shared_ptr< RelLogicalUnion > dispatchUnion(const rapidjson::Value &logical_union_ra)
Definition: RelAlgDag.cpp:3086
#define LOG(tag)
Definition: Logger.h:285
std::vector< std::string > TargetColumnList
Definition: RelAlgDag.h:2042
std::unique_ptr< RexCase > parse_case(const rapidjson::Value &expr, RelAlgDag &root_dag)
Definition: RelAlgDag.cpp:1285
const SQLTypeInfo & getType() const
Definition: RelAlgDag.h:284
std::unique_ptr< const RexScalar > get_new_rex_input(size_t rex_idx) const
Definition: RelAlgDag.cpp:2297
size_t size() const
Definition: RelAlgDag.h:270
Hints * getDeliveredHints()
Definition: RelAlgDag.h:1292
std::shared_ptr< RelProject > dispatchProject(const rapidjson::Value &proj_ra, RelAlgDag &root_dag)
Definition: RelAlgDag.cpp:2828
const bool json_bool(const rapidjson::Value &obj) noexcept
Definition: JsonAccessors.h:49
const RexScalar * getOperand(const size_t idx) const
Definition: RelAlgDag.h:272
std::vector< const Rex * > col_inputs_
Definition: RelAlgDag.h:2375
std::string json_node_to_string(const rapidjson::Value &node) noexcept
Definition: RelAlgDag.cpp:962
bool hasEquivCollationOf(const RelSort &that) const
Definition: RelAlgDag.cpp:802
JoinType to_join_type(const std::string &join_type_name)
Definition: RelAlgDag.cpp:1370
void resetQueryExecutionState()
Definition: RelAlgDag.cpp:3348
std::pair< bool, bool > need_pushdown_generic_expr(RelProject const *window_func_project_node)
Definition: RelAlgDag.cpp:2517
const std::string json_str(const rapidjson::Value &obj) noexcept
Definition: JsonAccessors.h:44
std::vector< std::shared_ptr< RelAlgNode > > nodes_
Definition: RelAlgDag.h:2986
std::string join(T const &container, std::string const &delim)
std::unique_ptr< const RexSubQuery > parse_subquery(const rapidjson::Value &expr, RelAlgDag &root_dag)
Definition: RelAlgDag.cpp:1216
#define UNREACHABLE()
Definition: Logger.h:337
void handle_query_hint(const std::vector< std::shared_ptr< RelAlgNode >> &nodes, RelAlgDag &rel_alg_dag) noexcept
Definition: RelAlgDag.cpp:1539
bool hint_applied_
Definition: RelAlgDag.h:1552
DEVICE void sort(ARGS &&...args)
Definition: gpu_enabled.h:105
RexInput()
Definition: RelAlgDag.h:384
#define CHECK_GE(x, y)
Definition: Logger.h:306
std::optional< size_t > getIdInPlanTree() const
Definition: RelAlgDag.h:884
SortDirection
Definition: RelAlgDag.h:530
void registerQueryHint(const RelAlgNode *node, const RegisteredQueryHint &query_hint)
Definition: RelAlgDag.h:2932
std::vector< std::string > fields_
Definition: RelAlgDag.h:1312
const std::pair< const Catalog_Namespace::Catalog *, const TableDescriptor * > getCatalogAndTableFromScanNode(const rapidjson::Value &scan_ra)
Definition: RelAlgDag.cpp:2742
void pushDownExpressionInWindowFunction(const RexWindowFunctionOperator *window_expr) const
Definition: RelAlgDag.cpp:178
void addHint(const ExplainedQueryHint &hint_explained)
Definition: RelAlgDag.h:1524
Definition: sqldefs.h:48
std::unique_ptr< const RexScalar > visitCase(const RexCase *rex_case) const override
Definition: RelAlgDag.cpp:278
const RexScalar * getWhen(const size_t idx) const
Definition: RelAlgDag.h:438
std::vector< size_t > indices_from_json_array(const rapidjson::Value &json_idx_arr) noexcept
Definition: RelAlgDag.cpp:1317
const RegisteredQueryHint & getGlobalHints() const
Definition: RelAlgDag.h:2972
#define TRANSIENT_DICT_DB_ID
Definition: DbObjectKeys.h:25
void appendInput(std::string new_field_name, std::unique_ptr< const RexScalar > new_input)
Definition: RelAlgDag.cpp:364
void propagate_hints_to_new_project(std::shared_ptr< RelProject > prev_node, std::shared_ptr< RelProject > new_node, std::unordered_map< size_t, std::unordered_map< unsigned, RegisteredQueryHint >> &query_hints)
Definition: RelAlgDag.cpp:2317
bool isRenamedInput(const RelAlgNode *node, const size_t index, const std::string &new_name)
Definition: RelAlgDag.cpp:469
std::unique_ptr< const RexScalar > defaultResult() const override
Definition: RelAlgDag.cpp:329
void addHint(const ExplainedQueryHint &hint_explained)
Definition: RelAlgDag.h:1416
std::unique_ptr< const RexAgg > parse_aggregate_expr(const rapidjson::Value &expr)
Definition: RelAlgDag.cpp:1330
std::unordered_map< size_t, const RexScalar * > & collected_window_func_
Definition: RelAlgDag.cpp:2178
#define TRANSIENT_DICT_ID
Definition: DbObjectKeys.h:24
std::vector< std::unique_ptr< const RexScalar > > scalar_sources_
Definition: RelAlgDag.h:1920
void * visitInput(const RexInput *rex_input) const override
Definition: RelAlgDag.cpp:76
std::unique_ptr< RexAbstractInput > parse_abstract_input(const rapidjson::Value &expr) noexcept
Definition: RelAlgDag.cpp:973
static std::vector< std::shared_ptr< RexSubQuery > > & getSubqueries(RelAlgDag &rel_alg_dag)
Definition: RelAlgDag.h:3013
RexInputSet aggregateResult(const RexInputSet &aggregate, const RexInputSet &next_result) const override
Definition: RelAlgDag.cpp:2484
std::unique_ptr< const RexScalar > disambiguate_rex(const RexScalar *, const RANodeOutput &)
Definition: RelAlgDag.cpp:1444
std::unique_ptr< const RexScalar > visitLiteral(const RexLiteral *rex_literal) const override
Definition: RelAlgDag.cpp:264
std::string to_string(char const *&&v)
void add_window_function_pre_project(std::vector< std::shared_ptr< RelAlgNode >> &nodes, const bool always_add_project_if_first_project_is_window_expr, std::unordered_map< size_t, std::unordered_map< unsigned, RegisteredQueryHint >> &query_hints)
Definition: RelAlgDag.cpp:2555
const std::string getFieldName(const size_t i) const
Definition: RelAlgDag.h:996
std::unique_ptr< const RexScalar > visitSubQuery(const RexSubQuery *rex_subquery) const override
Definition: RelAlgDag.cpp:273
void simplify_sort(std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
std::vector< SortField > collation_
Definition: RelAlgDag.h:2027
constexpr double a
Definition: Utm.h:32
std::shared_ptr< RelJoin > dispatchJoin(const rapidjson::Value &join_ra, RelAlgDag &root_dag)
Definition: RelAlgDag.cpp:2888
RelLogicalValues()=default
std::vector< std::unique_ptr< const RexScalar > > & new_rex_input_for_window_func_
Definition: RelAlgDag.cpp:2308
std::unordered_set< RexInput > RexInputSet
Definition: RelAlgDag.cpp:2475
This file contains the class specification and related data structures for Catalog.
virtual T visit(const RexScalar *rex_scalar) const
Definition: RexVisitor.h:27
std::string to_string() const
Definition: sqltypes.h:547
const rapidjson::Value & field(const rapidjson::Value &obj, const char field[]) noexcept
Definition: JsonAccessors.h:31
virtual std::string toString(RelRexToStringConfig config) const =0
unsigned getIndex() const
Definition: RelAlgDag.h:77
void separate_window_function_expressions(std::vector< std::shared_ptr< RelAlgNode >> &nodes, std::unordered_map< size_t, std::unordered_map< unsigned, RegisteredQueryHint >> &query_hints)
Definition: RelAlgDag.cpp:2361
void markAsNop()
Definition: RelAlgDag.h:932
static SysCatalog & instance()
Definition: SysCatalog.h:343
bool aggregateResult(const bool &aggregate, const bool &next_result) const final
Definition: RelAlgDag.cpp:1928
static auto const HASH_N
Definition: RelAlgDag.h:44
SQLOps getOperator() const
Definition: RelAlgDag.h:282
void setExecutionResult(const ExecutionResultShPtr result)
Definition: RelAlgDag.cpp:50
std::shared_ptr< RelTableFunction > dispatchTableFunction(const rapidjson::Value &table_func_ra, RelAlgDag &root_dag)
Definition: RelAlgDag.cpp:2969
std::unordered_map< size_t, std::unique_ptr< const RexInput > > & new_rex_input_from_child_node_
Definition: RelAlgDag.cpp:2313
unsigned getId() const
Definition: RelAlgDag.h:880
std::set< std::pair< const RelAlgNode *, int > > get_equiv_cols(const RelAlgNode *node, const size_t which_col)
Definition: RelAlgDag.cpp:763
std::unique_ptr< RexOperator > parse_operator(const rapidjson::Value &expr, RelAlgDag &root_dag)
Definition: RelAlgDag.cpp:1243
static std::unique_ptr< RelAlgDag > buildDagForSubquery(RelAlgDag &root_dag, const rapidjson::Value &query_ast)
Definition: RelAlgDag.cpp:3257
std::unique_ptr< const RexScalar > visitInput(const RexInput *rex_input) const override
Definition: RelAlgDag.cpp:252
static QueryHint translateQueryHint(const std::string &hint_name)
Definition: QueryHint.h:332
DEVICE auto copy(ARGS &&...args)
Definition: gpu_enabled.h:51
#define CHECK_NE(x, y)
Definition: Logger.h:302
bool isRenaming() const
Definition: RelAlgDag.cpp:512
void setIndex(const unsigned in_index) const
Definition: RelAlgDag.h:79
Hints * getDeliveredHints()
Definition: RelAlgDag.h:1439
size_t toHash() const override
Definition: RelAlgDag.cpp:854
void coalesce_nodes(std::vector< std::shared_ptr< RelAlgNode >> &nodes, const std::vector< const RelAlgNode * > &left_deep_joins, std::unordered_map< size_t, std::unordered_map< unsigned, RegisteredQueryHint >> &query_hints)
Definition: RelAlgDag.cpp:1991
std::vector< std::string > fields_
Definition: RelAlgDag.h:1917
SQLOps to_sql_op(const std::string &op_str)
std::unique_ptr< Hints > hints_
Definition: RelAlgDag.h:1446
void set_scale(int s)
Definition: sqltypes.h:498
const int64_t json_i64(const rapidjson::Value &obj) noexcept
Definition: JsonAccessors.h:39
std::unique_ptr< Hints > hints_
Definition: RelAlgDag.h:1553
std::vector< std::unique_ptr< const RexScalar > > copyRexScalars(std::vector< std::unique_ptr< const RexScalar >> const &scalar_sources)
Definition: RelAlgDag.cpp:626
std::vector< std::shared_ptr< const RelAlgNode >> RelAlgInputs
Definition: RelAlgDag.h:316
std::vector< std::unique_ptr< const RexScalar > > scalar_exprs_
Definition: RelAlgDag.h:1311
RetType visitOperator(const RexOperator *rex_operator) const final
Definition: RelAlgDag.cpp:2231
const double json_double(const rapidjson::Value &obj) noexcept
Definition: JsonAccessors.h:54
void addHint(const ExplainedQueryHint &hint_explained)
Definition: RelAlgDag.h:1269
size_t branchCount() const
Definition: RelAlgDag.h:436
const RelAlgNode * getInput(const size_t idx) const
Definition: RelAlgDag.h:892
SQLTypeInfo parse_type(const rapidjson::Value &type_obj)
Definition: RelAlgDag.cpp:1064
Checked json field retrieval.
void * visitCase(const RexCase *rex_case) const final
Definition: RelAlgDag.cpp:2132
std::vector< std::shared_ptr< RelAlgNode > > nodes_
Definition: RelAlgDag.cpp:3232
std::unique_ptr< const RexScalar > filter_
Definition: RelAlgDag.h:1745
bool isSimple() const
Definition: RelAlgDag.h:1159
std::vector< const Rex * > remapTargetPointers(std::vector< std::unique_ptr< const RexAgg >> const &agg_exprs_new, std::vector< std::unique_ptr< const RexScalar >> const &scalar_sources_new, std::vector< std::unique_ptr< const RexAgg >> const &agg_exprs_old, std::vector< std::unique_ptr< const RexScalar >> const &scalar_sources_old, std::vector< const Rex * > const &target_exprs_old)
Definition: RelAlgDag.cpp:637
std::optional< size_t > hash_
Definition: RelAlgDag.h:62
std::string toString(const ExecutorDeviceType &device_type)
std::vector< const Rex * > target_exprs_
Definition: RelAlgDag.h:1922
void bind_inputs(const std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
Definition: RelAlgDag.cpp:1498
std::optional< size_t > hash_
Definition: RelAlgDag.h:955
unsigned getId() const
Definition: RelAlgDag.cpp:63
const RelAlgNode * node_
Definition: RelAlgDag.h:410
virtual void replaceInput(std::shared_ptr< const RelAlgNode > old_input, std::shared_ptr< const RelAlgNode > input)
Definition: RelAlgDag.h:915
bool find_generic_expr_in_window_func(RexWindowFunctionOperator const *window_expr, bool &has_generic_expr_in_window_func)
Definition: RelAlgDag.cpp:2493
void bind_project_to_input(RelProject *project_node, const RANodeOutput &input) noexcept
Definition: RelAlgDag.cpp:1470
RexInputSet visitInput(const RexInput *input) const override
Definition: RelAlgDag.cpp:2479
std::vector< TargetMetaInfo > getCompatibleMetainfoTypes() const
Definition: RelAlgDag.cpp:911
static std::unique_ptr< RelAlgDag > buildDag(const std::string &query_ra, const bool optimize_dag)
Definition: RelAlgDag.cpp:3237
std::string tree_string(const RelAlgNode *ra, const size_t depth)
Definition: RelAlgDag.cpp:3357
std::vector< std::unique_ptr< const RexAgg > > agg_exprs_
Definition: RelAlgDag.h:1443
std::vector< SortField > parse_window_order_collation(const rapidjson::Value &arr, RelAlgDag &root_dag)
Definition: RelAlgDag.cpp:1185
void compute_node_hash(const std::vector< std::shared_ptr< RelAlgNode >> &nodes)
Definition: RelAlgDag.cpp:1577
Hints * getDeliveredHints()
Definition: RelAlgDag.h:1911
void setTableFuncInputs(std::vector< std::unique_ptr< const RexScalar >> &&exprs)
Definition: RelAlgDag.cpp:729
void replaceInput(std::shared_ptr< const RelAlgNode > old_input, std::shared_ptr< const RelAlgNode > input) override
Definition: RelAlgDag.cpp:536
size_t toHash() const override
Definition: RelAlgDag.h:460
PushDownGenericExpressionInWindowFunction(std::shared_ptr< RelProject > new_project, std::vector< std::unique_ptr< const RexScalar >> &scalar_exprs_for_new_project, std::vector< std::string > &fields_for_new_project, std::unordered_map< size_t, size_t > &expr_offset_cache)
Definition: RelAlgDag.cpp:128
const RexScalar * getProjectAt(const size_t idx) const
Definition: RelAlgDag.h:1204
bool hint_applied_
Definition: RelAlgDag.h:1445
std::shared_ptr< Catalog > getCatalog(const std::string &dbName)
#define CHECK_LT(x, y)
Definition: Logger.h:303
Definition: sqltypes.h:69
Definition: sqltypes.h:70
static RegisteredQueryHint defaults()
Definition: QueryHint.h:329
static std::unique_ptr< RelAlgDag > build(const rapidjson::Value &query_ast, RelAlgDag *root_dag, const bool optimize_dag)
Definition: RelAlgDag.cpp:3263
int32_t countRexLiteralArgs() const
Definition: RelAlgDag.cpp:697
std::unique_ptr< Hints > hints_
Definition: RelAlgDag.h:1924
std::vector< const Rex * > reproject_targets(const RelProject *simple_project, const std::vector< const Rex * > &target_exprs) noexcept
Definition: RelAlgDag.cpp:1608
const ConstRexScalarPtrVector & getPartitionKeys() const
Definition: RelAlgDag.h:627
std::vector< std::shared_ptr< RelAlgNode > > run(const rapidjson::Value &rels, RelAlgDag &root_dag)
Definition: RelAlgDag.cpp:2775
DEVICE auto lower_bound(ARGS &&...args)
Definition: gpu_enabled.h:78
#define CHECK_LE(x, y)
Definition: Logger.h:304
const std::unordered_map< unsigned, unsigned > mapping_
Definition: RelAlgDag.cpp:121
std::unique_ptr< Hints > hints_
Definition: RelAlgDag.h:1314
int64_t get_int_literal_field(const rapidjson::Value &obj, const char field[], const int64_t default_val) noexcept
Definition: RelAlgDag.cpp:2722
void replaceInput(std::shared_ptr< const RelAlgNode > old_input, std::shared_ptr< const RelAlgNode > input) override
Definition: RelAlgDag.cpp:543
void registerSubquery(std::shared_ptr< RexSubQuery > subquery)
Definition: RelAlgDag.h:2530
std::vector< std::unique_ptr< const RexAgg > > agg_exprs_
Definition: RelAlgDag.h:1916
std::vector< std::unique_ptr< const RexScalar > > parse_expr_array(const rapidjson::Value &arr, RelAlgDag &root_dag)
Definition: RelAlgDag.cpp:1083
std::unique_ptr< const RexScalar > filter_expr_
Definition: RelAlgDag.h:1914
static std::vector< std::shared_ptr< RelAlgNode > > & getNodes(RelAlgDag &rel_alg_dag)
Definition: RelAlgDag.h:3009
void setSourceNode(const RelAlgNode *node) const
Definition: RelAlgDag.h:394
bool hasWindowFunctionExpr() const
Definition: RelAlgDag.cpp:2761
std::shared_ptr< RelModify > dispatchModify(const rapidjson::Value &logical_modify_ra)
Definition: RelAlgDag.cpp:2925
std::vector< ElementType >::const_iterator Super
Definition: RelAlgDag.cpp:1810
RelFilter()=default
std::vector< std::unique_ptr< const RexAgg > > copyAggExprs(std::vector< std::unique_ptr< const RexAgg >> const &agg_exprs)
Definition: RelAlgDag.cpp:616
std::unique_ptr< RexLiteral > parse_literal(const rapidjson::Value &expr)
Definition: RelAlgDag.cpp:979
std::vector< std::string > strings_from_json_array(const rapidjson::Value &json_str_arr) noexcept
Definition: RelAlgDag.cpp:1305
std::unordered_map< QueryHint, ExplainedQueryHint > Hints
Definition: QueryHint.h:355
virtual size_t size() const =0
const RelAlgNode * getSourceNode() const
Definition: RelAlgDag.h:389
std::string toString(RelRexToStringConfig config=RelRexToStringConfig::defaults()) const override
Definition: RelAlgDag.cpp:850
size_t toHash() const override
Definition: RelAlgDag.cpp:3369
std::string typeName(const T *v)
Definition: toString.h:103
ExplainedQueryHint parseHintString(std::string &hint_string)
Definition: RelAlgDag.cpp:3116
SqlWindowFunctionKind
Definition: sqldefs.h:120
void * visitOperator(const RexOperator *rex_operator) const final
Definition: RelAlgDag.cpp:2110
Definition: sqldefs.h:52
void eachNode(std::function< void(RelAlgNode const *)> const &) const
Definition: RelAlgDag.cpp:3340
std::string toString(RelRexToStringConfig config=RelRexToStringConfig::defaults()) const override
Definition: RelAlgDag.cpp:3365
std::shared_ptr< RelSort > dispatchSort(const rapidjson::Value &sort_ra)
Definition: RelAlgDag.cpp:2903
const std::vector< std::string > & getFields() const
Definition: RelAlgDag.h:1218
std::unique_ptr< const RexScalar > visitRef(const RexRef *rex_ref) const override
Definition: RelAlgDag.cpp:269
std::string getFieldName(const size_t i) const
Definition: RelAlgDag.cpp:862
static void optimizeDag(RelAlgDag &rel_alg_dag)
Definition: RelAlgDag.cpp:3290
bool g_enable_watchdog false
Definition: Execute.cpp:79
void set_notnull(bool n)
Definition: sqltypes.h:500
#define CHECK(condition)
Definition: Logger.h:291
const ConstRexScalarPtrVector & getOrderKeys() const
Definition: RelAlgDag.h:637
std::unique_ptr< const RexScalar > parse_scalar_expr(const rapidjson::Value &expr, RelAlgDag &root_dag)
Definition: RelAlgDag.cpp:1347
RexInputReplacementVisitor(const RelAlgNode *node_to_keep, const std::vector< std::unique_ptr< const RexScalar >> &scalar_sources)
Definition: RelAlgDag.cpp:1628
bool g_enable_union
void create_compound(std::vector< std::shared_ptr< RelAlgNode >> &nodes, const std::vector< size_t > &pattern, std::unordered_map< size_t, std::unordered_map< unsigned, RegisteredQueryHint >> &query_hints) noexcept
Definition: RelAlgDag.cpp:1651
bool g_cluster
std::vector< RexInput > n_outputs(const RelAlgNode *node, const size_t n)
Definition: RelAlgDag.cpp:95
std::shared_ptr< const RelAlgNode > prev(const rapidjson::Value &crt_node)
Definition: RelAlgDag.cpp:3225
void replaceInput(std::shared_ptr< const RelAlgNode > old_input, std::shared_ptr< const RelAlgNode > input) override
Definition: RelAlgDag.cpp:527
void getRelAlgHints(const rapidjson::Value &json_node, std::shared_ptr< RelAlgNode > node)
Definition: RelAlgDag.cpp:3174
virtual size_t toHash() const =0
SortDirection parse_sort_direction(const rapidjson::Value &collation)
Definition: RelAlgDag.cpp:1173
RexWindowFunctionOperator::RexWindowBound parse_window_bound(const rapidjson::Value &window_bound_obj, RelAlgDag &root_dag)
Definition: RelAlgDag.cpp:1197
Common Enum definitions for SQL processing.
bool is_dict_encoded_string() const
Definition: sqltypes.h:632
Definition: sqltypes.h:62
std::string toString(RelRexToStringConfig config=RelRexToStringConfig::defaults()) const override
Definition: RelAlgDag.cpp:3377
void fold_filters(std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
RexRebindInputsVisitor(const RelAlgNode *old_input, const RelAlgNode *new_input)
Definition: RelAlgDag.cpp:71
std::vector< std::unique_ptr< const RexScalar >> RowValues
Definition: RelAlgDag.h:2389
void bind_table_func_to_input(RelTableFunction *table_func_node, const RANodeOutput &input) noexcept
Definition: RelAlgDag.cpp:1484
RetType visitCase(const RexCase *rex_case) const final
Definition: RelAlgDag.cpp:2258
const size_t inputCount() const
Definition: RelAlgDag.h:890
string name
Definition: setup.in.py:72
constexpr double n
Definition: Utm.h:38
void rebind_inputs_from_left_deep_join(const RexScalar *rex, const RelLeftDeepInnerJoin *left_deep_join)
void check_empty_inputs_field(const rapidjson::Value &node) noexcept
Definition: RelAlgDag.cpp:2736
WindowFunctionCollector(std::unordered_map< size_t, const RexScalar * > &collected_window_func, bool only_add_window_expr)
Definition: RelAlgDag.cpp:2102
HOST DEVICE bool get_notnull() const
Definition: sqltypes.h:388
unsigned node_id(const rapidjson::Value &ra_node) noexcept
Definition: RelAlgDag.cpp:957
void eliminate_dead_subqueries(std::vector< std::shared_ptr< RexSubQuery >> &subqueries, RelAlgNode const *root)
size_t size() const override
Definition: RelAlgDag.cpp:846
size_t operator()(const std::pair< const RelAlgNode *, int > &input_col) const
Definition: RelAlgDag.cpp:752
std::unordered_map< size_t, size_t > & window_func_to_new_rex_input_idx_map_
Definition: RelAlgDag.cpp:2310
RelAlgInputs getRelAlgInputs(const rapidjson::Value &node)
Definition: RelAlgDag.cpp:3094
std::vector< std::string > getFieldNamesFromScanNode(const rapidjson::Value &scan_ra)
Definition: RelAlgDag.cpp:2754
RelTableFunction()=default
std::shared_ptr< RelLogicalValues > dispatchLogicalValues(const rapidjson::Value &logical_values_ra)
Definition: RelAlgDag.cpp:3039
DEVICE void swap(ARGS &&...args)
Definition: gpu_enabled.h:114
std::unique_ptr< const RexScalar > RetType
Definition: RexVisitor.h:140
size_t toHash() const override
Definition: RelAlgDag.cpp:3427
NullSortedPosition
Definition: RelAlgDag.h:532
RANodeOutput get_node_output(const RelAlgNode *ra_node)
Definition: RelAlgDag.cpp:370
virtual size_t toHash() const =0
#define VLOG(n)
Definition: Logger.h:387
BuildState getBuildState() const
Definition: RelAlgDag.h:2507
RelAlgInputs inputs_
Definition: RelAlgDag.h:952
void reset_table_function_inputs(std::vector< const Rex * > &column_inputs, const std::vector< std::unique_ptr< const RexScalar >> &old_table_func_inputs, const std::vector< std::unique_ptr< const RexScalar >> &new_table_func_inputs)
Definition: RelAlgDag.cpp:710
void set_precision(int d)
Definition: sqltypes.h:496
std::pair< std::string, std::string > getKVOptionPair(std::string &str, size_t &pos)
Definition: RelAlgDag.cpp:3106
void eliminate_dead_columns(std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
bool isIdentity() const
Definition: RelAlgDag.cpp:439
bool same_ignoring_notnull(SQLTypeInfo ti0, SQLTypeInfo ti1)
Definition: RelAlgDag.cpp:890
std::vector< std::unique_ptr< const RexScalar > > target_exprs_
Definition: RelAlgDag.h:2381
static void setBuildState(RelAlgDag &rel_alg_dag, const RelAlgDag::BuildState build_state)
Definition: RelAlgDag.h:3023
static void resetRelAlgFirstId() noexcept
Definition: RelAlgDag.cpp:46