OmniSciDB  ab4938a6a3
RelAlgDagBuilder.h
Go to the documentation of this file.
1 /*
2  * Copyright 2017 MapD Technologies, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
19 #include <iterator>
20 #include <memory>
21 #include <unordered_map>
22 
23 #include <rapidjson/document.h>
24 #include <boost/core/noncopyable.hpp>
25 
26 #include "Catalog/Catalog.h"
31 #include "Utils/FsiUtils.h"
32 
33 using ColumnNameList = std::vector<std::string>;
34 
35 class Rex {
36  public:
37  virtual std::string toString() const = 0;
38 
39  virtual ~Rex() {}
40 };
41 
42 class RexScalar : public Rex {};
43 
44 // For internal use of the abstract interpreter only. The result after abstract
45 // interpretation will not have any references to 'RexAbstractInput' objects.
46 class RexAbstractInput : public RexScalar {
47  public:
48  RexAbstractInput(const unsigned in_index) : in_index_(in_index) {}
49 
50  unsigned getIndex() const { return in_index_; }
51 
52  void setIndex(const unsigned in_index) const { in_index_ = in_index; }
53 
54  std::string toString() const override {
55  return "(RexAbstractInput " + std::to_string(in_index_) + ")";
56  }
57 
58  private:
59  mutable unsigned in_index_;
60 };
61 
62 class RexLiteral : public RexScalar {
63  public:
64  RexLiteral(const int64_t val,
65  const SQLTypes type,
66  const SQLTypes target_type,
67  const unsigned scale,
68  const unsigned precision,
69  const unsigned type_scale,
70  const unsigned type_precision)
71  : literal_(val)
72  , type_(type)
73  , target_type_(target_type)
74  , scale_(scale)
75  , precision_(precision)
76  , type_scale_(type_scale)
77  , type_precision_(type_precision) {
78  CHECK(type == kDECIMAL || type == kINTERVAL_DAY_TIME ||
79  type == kINTERVAL_YEAR_MONTH || is_datetime(type));
80  }
81 
82  RexLiteral(const double val,
83  const SQLTypes type,
84  const SQLTypes target_type,
85  const unsigned scale,
86  const unsigned precision,
87  const unsigned type_scale,
88  const unsigned type_precision)
89  : literal_(val)
90  , type_(type)
91  , target_type_(target_type)
92  , scale_(scale)
93  , precision_(precision)
94  , type_scale_(type_scale)
95  , type_precision_(type_precision) {
96  CHECK_EQ(kDOUBLE, type);
97  }
98 
99  RexLiteral(const std::string& val,
100  const SQLTypes type,
101  const SQLTypes target_type,
102  const unsigned scale,
103  const unsigned precision,
104  const unsigned type_scale,
105  const unsigned type_precision)
106  : literal_(val)
107  , type_(type)
108  , target_type_(target_type)
109  , scale_(scale)
110  , precision_(precision)
111  , type_scale_(type_scale)
112  , type_precision_(type_precision) {
113  CHECK_EQ(kTEXT, type);
114  }
115 
116  RexLiteral(const bool val,
117  const SQLTypes type,
118  const SQLTypes target_type,
119  const unsigned scale,
120  const unsigned precision,
121  const unsigned type_scale,
122  const unsigned type_precision)
123  : literal_(val)
124  , type_(type)
125  , target_type_(target_type)
126  , scale_(scale)
127  , precision_(precision)
128  , type_scale_(type_scale)
129  , type_precision_(type_precision) {
130  CHECK_EQ(kBOOLEAN, type);
131  }
132 
133  RexLiteral(const SQLTypes target_type)
134  : literal_(nullptr)
135  , type_(kNULLT)
136  , target_type_(target_type)
137  , scale_(0)
138  , precision_(0)
139  , type_scale_(0)
140  , type_precision_(0) {}
141 
142  template <class T>
143  T getVal() const {
144  const auto ptr = boost::get<T>(&literal_);
145  CHECK(ptr);
146  return *ptr;
147  }
148 
149  SQLTypes getType() const { return type_; }
150 
151  SQLTypes getTargetType() const { return target_type_; }
152 
153  unsigned getScale() const { return scale_; }
154 
155  unsigned getPrecision() const { return precision_; }
156 
157  unsigned getTypeScale() const { return type_scale_; }
158 
159  unsigned getTypePrecision() const { return type_precision_; }
160 
161  std::string toString() const override {
162  return "(RexLiteral " + boost::lexical_cast<std::string>(literal_) + ")";
163  }
164 
165  std::unique_ptr<RexLiteral> deepCopy() const {
166  switch (literal_.which()) {
167  case 0: {
168  int64_t val = getVal<int64_t>();
169  return std::make_unique<RexLiteral>(
170  val, type_, target_type_, scale_, precision_, type_scale_, type_precision_);
171  }
172  case 1: {
173  double val = getVal<double>();
174  return std::make_unique<RexLiteral>(
175  val, type_, target_type_, scale_, precision_, type_scale_, type_precision_);
176  }
177  case 2: {
178  auto val = getVal<std::string>();
179  return std::make_unique<RexLiteral>(
180  val, type_, target_type_, scale_, precision_, type_scale_, type_precision_);
181  }
182  case 3: {
183  bool val = getVal<bool>();
184  return std::make_unique<RexLiteral>(
185  val, type_, target_type_, scale_, precision_, type_scale_, type_precision_);
186  }
187  case 4: {
188  return std::make_unique<RexLiteral>(target_type_);
189  }
190  default:
191  CHECK(false);
192  }
193  return nullptr;
194  }
195 
196  private:
197  const boost::variant<int64_t, double, std::string, bool, void*> literal_;
200  const unsigned scale_;
201  const unsigned precision_;
202  const unsigned type_scale_;
203  const unsigned type_precision_;
204 };
205 
206 using RexLiteralArray = std::vector<RexLiteral>;
207 using TupleContentsArray = std::vector<RexLiteralArray>;
208 
209 class RexOperator : public RexScalar {
210  public:
211  RexOperator(const SQLOps op,
212  std::vector<std::unique_ptr<const RexScalar>>& operands,
213  const SQLTypeInfo& type)
214  : op_(op), operands_(std::move(operands)), type_(type) {}
215 
216  virtual std::unique_ptr<const RexOperator> getDisambiguated(
217  std::vector<std::unique_ptr<const RexScalar>>& operands) const {
218  return std::unique_ptr<const RexOperator>(new RexOperator(op_, operands, type_));
219  }
220 
221  size_t size() const { return operands_.size(); }
222 
223  const RexScalar* getOperand(const size_t idx) const {
224  CHECK(idx < operands_.size());
225  return operands_[idx].get();
226  }
227 
228  const RexScalar* getOperandAndRelease(const size_t idx) const {
229  CHECK(idx < operands_.size());
230  return operands_[idx].release();
231  }
232 
233  SQLOps getOperator() const { return op_; }
234 
235  const SQLTypeInfo& getType() const { return type_; }
236 
237  std::string toString() const override {
238  std::string result = "(RexOperator " + std::to_string(op_);
239  for (const auto& operand : operands_) {
240  result += " " + operand->toString();
241  }
242  return result + ")";
243  };
244 
245  protected:
246  const SQLOps op_;
247  mutable std::vector<std::unique_ptr<const RexScalar>> operands_;
249 };
250 
251 class RelAlgNode;
252 using RelAlgInputs = std::vector<std::shared_ptr<const RelAlgNode>>;
253 
254 class ExecutionResult;
255 
256 class RexSubQuery : public RexScalar {
257  public:
258  RexSubQuery(const std::shared_ptr<const RelAlgNode> ra)
259  : type_(new SQLTypeInfo(kNULLT, false))
260  , result_(new std::shared_ptr<const ExecutionResult>(nullptr))
261  , ra_(ra) {}
262 
263  // for deep copy
264  RexSubQuery(std::shared_ptr<SQLTypeInfo> type,
265  std::shared_ptr<std::shared_ptr<const ExecutionResult>> result,
266  const std::shared_ptr<const RelAlgNode> ra)
267  : type_(type), result_(result), ra_(ra) {}
268 
269  RexSubQuery(const RexSubQuery&) = delete;
270 
271  RexSubQuery& operator=(const RexSubQuery&) = delete;
272 
273  RexSubQuery(RexSubQuery&&) = delete;
274 
275  RexSubQuery& operator=(RexSubQuery&&) = delete;
276 
277  const SQLTypeInfo& getType() const {
278  CHECK_NE(kNULLT, type_->get_type());
279  return *(type_.get());
280  }
281 
282  std::shared_ptr<const ExecutionResult> getExecutionResult() const {
283  CHECK(result_);
284  CHECK(result_.get());
285  return *(result_.get());
286  }
287 
288  const RelAlgNode* getRelAlg() const { return ra_.get(); }
289 
290  std::string toString() const override {
291  return "(RexSubQuery " + std::to_string(reinterpret_cast<const uint64_t>(this)) + ")";
292  }
293 
294  std::unique_ptr<RexSubQuery> deepCopy() const;
295 
296  void setExecutionResult(const std::shared_ptr<const ExecutionResult> result);
297 
298  private:
299  std::shared_ptr<SQLTypeInfo> type_;
300  std::shared_ptr<std::shared_ptr<const ExecutionResult>> result_;
301  const std::shared_ptr<const RelAlgNode> ra_;
302 };
303 
304 // The actual input node understood by the Executor.
305 // The in_index_ is relative to the output of node_.
306 class RexInput : public RexAbstractInput {
307  public:
308  RexInput(const RelAlgNode* node, const unsigned in_index)
309  : RexAbstractInput(in_index), node_(node) {}
310 
311  const RelAlgNode* getSourceNode() const { return node_; }
312 
313  // This isn't great, but we need it for coalescing nodes to Compound since
314  // RexInput in descendents need to be rebound to the newly created Compound.
315  // Maybe create a fresh RA tree with the required changes after each coalescing?
316  void setSourceNode(const RelAlgNode* node) const { node_ = node; }
317 
318  bool operator==(const RexInput& that) const {
319  return getSourceNode() == that.getSourceNode() && getIndex() == that.getIndex();
320  }
321 
322  std::string toString() const override {
323  return "(RexInput " + std::to_string(getIndex()) + " " +
324  std::to_string(reinterpret_cast<const uint64_t>(node_)) + ")";
325  }
326 
327  std::unique_ptr<RexInput> deepCopy() const {
328  return std::make_unique<RexInput>(node_, getIndex());
329  }
330 
331  private:
332  mutable const RelAlgNode* node_;
333 };
334 
335 namespace std {
336 
337 template <>
338 struct hash<RexInput> {
339  size_t operator()(const RexInput& rex_in) const {
340  auto addr = rex_in.getSourceNode();
341  return *reinterpret_cast<const size_t*>(may_alias_ptr(&addr)) ^ rex_in.getIndex();
342  }
343 };
344 
345 } // namespace std
346 
347 // Not a real node created by Calcite. Created by us because CaseExpr is a node in our
348 // Analyzer.
349 class RexCase : public RexScalar {
350  public:
351  RexCase(std::vector<std::pair<std::unique_ptr<const RexScalar>,
352  std::unique_ptr<const RexScalar>>>& expr_pair_list,
353  std::unique_ptr<const RexScalar>& else_expr)
354  : expr_pair_list_(std::move(expr_pair_list)), else_expr_(std::move(else_expr)) {}
355 
356  size_t branchCount() const { return expr_pair_list_.size(); }
357 
358  const RexScalar* getWhen(const size_t idx) const {
359  CHECK(idx < expr_pair_list_.size());
360  return expr_pair_list_[idx].first.get();
361  }
362 
363  const RexScalar* getThen(const size_t idx) const {
364  CHECK(idx < expr_pair_list_.size());
365  return expr_pair_list_[idx].second.get();
366  }
367 
368  const RexScalar* getElse() const { return else_expr_.get(); }
369 
370  std::string toString() const override {
371  std::string ret = "(RexCase";
372  for (const auto& expr_pair : expr_pair_list_) {
373  ret += " " + expr_pair.first->toString() + " -> " + expr_pair.second->toString();
374  }
375  if (else_expr_) {
376  ret += " else " + else_expr_->toString();
377  }
378  ret += ")";
379  return ret;
380  }
381 
382  private:
383  std::vector<
384  std::pair<std::unique_ptr<const RexScalar>, std::unique_ptr<const RexScalar>>>
386  std::unique_ptr<const RexScalar> else_expr_;
387 };
388 
390  public:
391  using ConstRexScalarPtr = std::unique_ptr<const RexScalar>;
392  using ConstRexScalarPtrVector = std::vector<ConstRexScalarPtr>;
393 
394  RexFunctionOperator(const std::string& name,
395  ConstRexScalarPtrVector& operands,
396  const SQLTypeInfo& ti)
397  : RexOperator(kFUNCTION, operands, ti), name_(name) {}
398 
399  std::unique_ptr<const RexOperator> getDisambiguated(
400  std::vector<std::unique_ptr<const RexScalar>>& operands) const override {
401  return std::unique_ptr<const RexOperator>(
402  new RexFunctionOperator(name_, operands, getType()));
403  }
404 
405  const std::string& getName() const { return name_; }
406 
407  std::string toString() const override {
408  auto result = "(RexFunctionOperator " + name_;
409  for (const auto& operand : operands_) {
410  result += (" " + operand->toString());
411  }
412  return result + ")";
413  }
414 
415  private:
416  const std::string name_;
417 };
418 
420 
421 enum class NullSortedPosition { First, Last };
422 
423 class SortField {
424  public:
425  SortField(const size_t field,
426  const SortDirection sort_dir,
427  const NullSortedPosition nulls_pos)
428  : field_(field), sort_dir_(sort_dir), nulls_pos_(nulls_pos) {}
429 
430  bool operator==(const SortField& that) const {
431  return field_ == that.field_ && sort_dir_ == that.sort_dir_ &&
432  nulls_pos_ == that.nulls_pos_;
433  }
434 
435  size_t getField() const { return field_; }
436 
437  SortDirection getSortDir() const { return sort_dir_; }
438 
439  NullSortedPosition getNullsPosition() const { return nulls_pos_; }
440 
441  std::string toString() const {
442  return "(" + std::to_string(field_) + " " +
443  (sort_dir_ == SortDirection::Ascending ? "asc" : "desc") + " " +
444  (nulls_pos_ == NullSortedPosition::First ? "nulls_first" : "nulls_last") + ")";
445  }
446 
447  private:
448  const size_t field_;
451 };
452 
454  public:
455  struct RexWindowBound {
456  bool unbounded;
457  bool preceding;
458  bool following;
460  std::shared_ptr<const RexScalar> offset;
462  };
463 
465  ConstRexScalarPtrVector& operands,
466  ConstRexScalarPtrVector& partition_keys,
467  ConstRexScalarPtrVector& order_keys,
468  const std::vector<SortField> collation,
469  const RexWindowBound& lower_bound,
470  const RexWindowBound& upper_bound,
471  const bool is_rows,
472  const SQLTypeInfo& ti)
473  : RexFunctionOperator(sql_window_function_to_str(kind), operands, ti)
474  , kind_(kind)
475  , partition_keys_(std::move(partition_keys))
476  , order_keys_(std::move(order_keys))
477  , collation_(collation)
478  , lower_bound_(lower_bound)
479  , upper_bound_(upper_bound)
480  , is_rows_(is_rows) {}
481 
482  SqlWindowFunctionKind getKind() const { return kind_; }
483 
484  const ConstRexScalarPtrVector& getPartitionKeys() const { return partition_keys_; }
485 
486  ConstRexScalarPtrVector getPartitionKeysAndRelease() const {
487  return std::move(partition_keys_);
488  }
489 
490  ConstRexScalarPtrVector getOrderKeysAndRelease() const {
491  return std::move(order_keys_);
492  }
493 
494  const ConstRexScalarPtrVector& getOrderKeys() const { return order_keys_; }
495 
496  const std::vector<SortField>& getCollation() const { return collation_; }
497 
498  const RexWindowBound& getLowerBound() const { return lower_bound_; }
499 
500  const RexWindowBound& getUpperBound() const { return upper_bound_; }
501 
502  bool isRows() const { return is_rows_; }
503 
504  std::unique_ptr<const RexOperator> disambiguatedOperands(
505  ConstRexScalarPtrVector& operands,
506  ConstRexScalarPtrVector& partition_keys,
507  ConstRexScalarPtrVector& order_keys,
508  const std::vector<SortField>& collation) const {
509  return std::unique_ptr<const RexOperator>(
510  new RexWindowFunctionOperator(kind_,
511  operands,
512  partition_keys,
513  order_keys,
514  collation,
515  getLowerBound(),
516  getUpperBound(),
517  isRows(),
518  getType()));
519  }
520 
521  std::string toString() const override {
522  auto result = "(RexWindowFunctionOperator " + getName();
523  for (const auto& operand : operands_) {
524  result += (" " + operand->toString());
525  }
526  result += " partition[";
527  for (const auto& partition_key : partition_keys_) {
528  result += (" " + partition_key->toString());
529  }
530  result += "]";
531  result += " order[";
532  for (const auto& order_key : order_keys_) {
533  result += (" " + order_key->toString());
534  }
535  result += "]";
536  return result + ")";
537  }
538 
539  private:
541  mutable ConstRexScalarPtrVector partition_keys_;
542  mutable ConstRexScalarPtrVector order_keys_;
543  const std::vector<SortField> collation_;
546  const bool is_rows_;
547 };
548 
549 // Not a real node created by Calcite. Created by us because targets of a query
550 // should reference the group by expressions instead of creating completely new one.
551 class RexRef : public RexScalar {
552  public:
553  RexRef(const size_t index) : index_(index) {}
554 
555  size_t getIndex() const { return index_; }
556 
557  std::string toString() const override {
558  return "(RexRef " + std::to_string(index_) + ")";
559  }
560 
561  std::unique_ptr<RexRef> deepCopy() const { return std::make_unique<RexRef>(index_); }
562 
563  private:
564  const size_t index_;
565 };
566 
567 class RexAgg : public Rex {
568  public:
569  RexAgg(const SQLAgg agg,
570  const bool distinct,
571  const SQLTypeInfo& type,
572  const std::vector<size_t>& operands)
573  : agg_(agg), distinct_(distinct), type_(type), operands_(operands) {}
574 
575  std::string toString() const override {
576  auto result = "(RexAgg " + std::to_string(agg_) + " " + std::to_string(distinct_) +
577  " " + type_.get_type_name() + " " + type_.get_compression_name();
578  for (auto operand : operands_) {
579  result += " " + std::to_string(operand);
580  }
581  return result + ")";
582  }
583 
584  SQLAgg getKind() const { return agg_; }
585 
586  bool isDistinct() const { return distinct_; }
587 
588  size_t size() const { return operands_.size(); }
589 
590  size_t getOperand(size_t idx) const { return operands_[idx]; }
591 
592  const SQLTypeInfo& getType() const { return type_; }
593 
594  std::unique_ptr<RexAgg> deepCopy() const {
595  return std::make_unique<RexAgg>(agg_, distinct_, type_, operands_);
596  }
597 
598  private:
599  const SQLAgg agg_;
600  const bool distinct_;
602  const std::vector<size_t> operands_;
603 };
604 
605 class RelAlgNode {
606  public:
608  : inputs_(std::move(inputs))
609  , id_(crt_id_++)
610  , context_data_(nullptr)
611  , is_nop_(false) {}
612 
613  virtual ~RelAlgNode() {}
614 
616  context_data_ = nullptr;
617  targets_metainfo_ = {};
618  }
619 
620  void setContextData(const void* context_data) const {
621  CHECK(!context_data_);
622  context_data_ = context_data;
623  }
624 
625  void setOutputMetainfo(const std::vector<TargetMetaInfo>& targets_metainfo) const {
626  targets_metainfo_ = targets_metainfo;
627  }
628 
629  const std::vector<TargetMetaInfo>& getOutputMetainfo() const {
630  return targets_metainfo_;
631  }
632 
633  unsigned getId() const { return id_; }
634 
635  bool hasContextData() const { return !(context_data_ == nullptr); }
636 
637  const void* getContextData() const {
638  CHECK(context_data_);
639  return context_data_;
640  }
641 
642  const size_t inputCount() const { return inputs_.size(); }
643 
644  const RelAlgNode* getInput(const size_t idx) const {
645  CHECK_LT(idx, inputs_.size());
646  return inputs_[idx].get();
647  }
648 
649  std::shared_ptr<const RelAlgNode> getAndOwnInput(const size_t idx) const {
650  CHECK_LT(idx, inputs_.size());
651  return inputs_[idx];
652  }
653 
654  void addManagedInput(std::shared_ptr<const RelAlgNode> input) {
655  inputs_.push_back(input);
656  }
657 
658  bool hasInput(const RelAlgNode* needle) const {
659  for (auto& input_ptr : inputs_) {
660  if (input_ptr.get() == needle) {
661  return true;
662  }
663  }
664  return false;
665  }
666 
667  virtual void replaceInput(std::shared_ptr<const RelAlgNode> old_input,
668  std::shared_ptr<const RelAlgNode> input) {
669  for (auto& input_ptr : inputs_) {
670  if (input_ptr == old_input) {
671  input_ptr = input;
672  break;
673  }
674  }
675  }
676 
677  bool isNop() const { return is_nop_; }
678 
679  void markAsNop() { is_nop_ = true; }
680 
681  virtual std::string toString() const = 0;
682 
683  virtual size_t size() const = 0;
684 
685  virtual std::shared_ptr<RelAlgNode> deepCopy() const = 0;
686 
687  static void resetRelAlgFirstId() noexcept;
688 
689  protected:
691  const unsigned id_;
692 
693  private:
694  mutable const void* context_data_;
695  bool is_nop_;
696  mutable std::vector<TargetMetaInfo> targets_metainfo_;
697  static thread_local unsigned crt_id_;
698 };
699 
700 class RelScan : public RelAlgNode {
701  public:
702  RelScan(const TableDescriptor* td, const std::vector<std::string>& field_names)
703  : td_(td), field_names_(field_names) {}
704 
705  size_t size() const override { return field_names_.size(); }
706 
707  const TableDescriptor* getTableDescriptor() const { return td_; }
708 
709  const std::vector<std::string>& getFieldNames() const { return field_names_; }
710 
711  const std::string getFieldName(const size_t i) const { return field_names_[i]; }
712 
713  std::string toString() const override {
714  return "(RelScan<" + std::to_string(reinterpret_cast<uint64_t>(this)) + "> " +
715  td_->tableName + ")";
716  }
717 
718  std::shared_ptr<RelAlgNode> deepCopy() const override {
719  CHECK(false);
720  return nullptr;
721  };
722 
723  private:
724  const TableDescriptor* td_;
725  const std::vector<std::string> field_names_;
726 };
727 
729  public:
730  ModifyManipulationTarget(bool const update_via_select = false,
731  bool const delete_via_select = false,
732  bool const varlen_update_required = false,
733  TableDescriptor const* table_descriptor = nullptr,
734  ColumnNameList target_columns = ColumnNameList())
735  : is_update_via_select_(update_via_select)
736  , is_delete_via_select_(delete_via_select)
737  , varlen_update_required_(varlen_update_required)
738  , table_descriptor_(table_descriptor)
739  , target_columns_(target_columns) {}
740 
741  void setUpdateViaSelectFlag() const { is_update_via_select_ = true; }
742  void setDeleteViaSelectFlag() const { is_delete_via_select_ = true; }
744  varlen_update_required_ = required;
745  }
746 
747  TableDescriptor const* getModifiedTableDescriptor() const { return table_descriptor_; }
749  table_descriptor_ = td;
750  }
751 
752  auto const isUpdateViaSelect() const { return is_update_via_select_; }
753  auto const isDeleteViaSelect() const { return is_delete_via_select_; }
754  auto const isVarlenUpdateRequired() const { return varlen_update_required_; }
755 
756  void setTargetColumns(ColumnNameList const& target_columns) const {
757  target_columns_ = target_columns;
758  }
759  ColumnNameList const& getTargetColumns() const { return target_columns_; }
760 
761  template <typename VALIDATION_FUNCTOR>
762  bool validateTargetColumns(VALIDATION_FUNCTOR validator) const {
763  for (auto const& column_name : target_columns_) {
764  if (validator(column_name) == false) {
765  return false;
766  }
767  }
768  return true;
769  }
770 
771  private:
772  mutable bool is_update_via_select_ = false;
773  mutable bool is_delete_via_select_ = false;
774  mutable bool varlen_update_required_ = false;
775  mutable TableDescriptor const* table_descriptor_ = nullptr;
777 };
778 
780  public:
781  friend class RelModify;
782  using ConstRexScalarPtr = std::unique_ptr<const RexScalar>;
783  using ConstRexScalarPtrVector = std::vector<ConstRexScalarPtr>;
784 
785  // Takes memory ownership of the expressions.
786  RelProject(std::vector<std::unique_ptr<const RexScalar>>& scalar_exprs,
787  const std::vector<std::string>& fields,
788  std::shared_ptr<const RelAlgNode> input)
789  : ModifyManipulationTarget(false, false, false, nullptr)
790  , scalar_exprs_(std::move(scalar_exprs))
791  , fields_(fields) {
792  inputs_.push_back(input);
793  }
794 
795  void setExpressions(std::vector<std::unique_ptr<const RexScalar>>& exprs) const {
796  scalar_exprs_ = std::move(exprs);
797  }
798 
799  // True iff all the projected expressions are inputs. If true,
800  // this node can be elided and merged into the previous node
801  // since it's just a subset and / or permutation of its outputs.
802  bool isSimple() const {
803  for (const auto& expr : scalar_exprs_) {
804  if (!dynamic_cast<const RexInput*>(expr.get())) {
805  return false;
806  }
807  }
808  return true;
809  }
810 
811  bool isIdentity() const;
812 
813  bool isRenaming() const;
814 
815  size_t size() const override { return scalar_exprs_.size(); }
816 
817  const RexScalar* getProjectAt(const size_t idx) const {
818  CHECK(idx < scalar_exprs_.size());
819  return scalar_exprs_[idx].get();
820  }
821 
822  const RexScalar* getProjectAtAndRelease(const size_t idx) const {
823  CHECK(idx < scalar_exprs_.size());
824  return scalar_exprs_[idx].release();
825  }
826 
827  std::vector<std::unique_ptr<const RexScalar>> getExpressionsAndRelease() {
828  return std::move(scalar_exprs_);
829  }
830 
831  const std::vector<std::string>& getFields() const { return fields_; }
832  void setFields(std::vector<std::string>& fields) { fields_ = std::move(fields); }
833 
834  const std::string getFieldName(const size_t i) const { return fields_[i]; }
835 
836  void replaceInput(std::shared_ptr<const RelAlgNode> old_input,
837  std::shared_ptr<const RelAlgNode> input) override {
838  replaceInput(old_input, input, std::nullopt);
839  }
840 
841  void replaceInput(
842  std::shared_ptr<const RelAlgNode> old_input,
843  std::shared_ptr<const RelAlgNode> input,
844  std::optional<std::unordered_map<unsigned, unsigned>> old_to_new_index_map);
845 
846  void appendInput(std::string new_field_name,
847  std::unique_ptr<const RexScalar> new_input);
848 
849  std::string toString() const override {
850  std::string result =
851  "(RelProject<" + std::to_string(reinterpret_cast<uint64_t>(this)) + ">";
852  for (const auto& scalar_expr : scalar_exprs_) {
853  result += " " + scalar_expr->toString();
854  }
855  return result + ")";
856  }
857 
858  std::shared_ptr<RelAlgNode> deepCopy() const override;
859 
860  bool hasWindowFunctionExpr() const;
861 
862  private:
863  template <typename EXPR_VISITOR_FUNCTOR>
864  void visitScalarExprs(EXPR_VISITOR_FUNCTOR visitor_functor) const {
865  for (int i = 0; i < static_cast<int>(scalar_exprs_.size()); i++) {
866  visitor_functor(i);
867  }
868  }
869 
872  scalar_exprs_.emplace_back(std::make_unique<RexFunctionOperator const>(
873  std::string("OFFSET_IN_FRAGMENT"), transient_vector, SQLTypeInfo(kINT, false)));
874  fields_.emplace_back("EXPR$DELETE_OFFSET_IN_FRAGMENT");
875  }
876 
877  mutable std::vector<std::unique_ptr<const RexScalar>> scalar_exprs_;
878  mutable std::vector<std::string> fields_;
879 };
880 
881 class RelAggregate : public RelAlgNode {
882  public:
883  // Takes ownership of the aggregate expressions.
884  RelAggregate(const size_t groupby_count,
885  std::vector<std::unique_ptr<const RexAgg>>& agg_exprs,
886  const std::vector<std::string>& fields,
887  std::shared_ptr<const RelAlgNode> input)
888  : groupby_count_(groupby_count), agg_exprs_(std::move(agg_exprs)), fields_(fields) {
889  inputs_.push_back(input);
890  }
891 
892  size_t size() const override { return groupby_count_ + agg_exprs_.size(); }
893 
894  const size_t getGroupByCount() const { return groupby_count_; }
895 
896  const size_t getAggExprsCount() const { return agg_exprs_.size(); }
897 
898  const std::vector<std::string>& getFields() const { return fields_; }
899  void setFields(std::vector<std::string>& new_fields) {
900  fields_ = std::move(new_fields);
901  }
902 
903  const std::string getFieldName(const size_t i) const { return fields_[i]; }
904 
905  std::vector<const RexAgg*> getAggregatesAndRelease() {
906  std::vector<const RexAgg*> result;
907  for (auto& agg_expr : agg_exprs_) {
908  result.push_back(agg_expr.release());
909  }
910  return result;
911  }
912 
913  std::vector<std::unique_ptr<const RexAgg>> getAggExprsAndRelease() {
914  return std::move(agg_exprs_);
915  }
916 
917  const std::vector<std::unique_ptr<const RexAgg>>& getAggExprs() const {
918  return agg_exprs_;
919  }
920 
921  void setAggExprs(std::vector<std::unique_ptr<const RexAgg>>& agg_exprs) {
922  agg_exprs_ = std::move(agg_exprs);
923  }
924 
925  std::string toString() const override {
926  std::string result = "(RelAggregate<" +
927  std::to_string(reinterpret_cast<uint64_t>(this)) + ">(groups: [";
928  for (size_t group_index = 0; group_index < groupby_count_; ++group_index) {
929  result += " " + std::to_string(group_index);
930  }
931  result += " ] aggs: [";
932  for (const auto& agg_expr : agg_exprs_) {
933  result += " " + agg_expr->toString();
934  }
935  return result + " ]))";
936  }
937 
938  std::shared_ptr<RelAlgNode> deepCopy() const override;
939 
940  private:
941  const size_t groupby_count_;
942  std::vector<std::unique_ptr<const RexAgg>> agg_exprs_;
943  std::vector<std::string> fields_;
944 };
945 
946 class RelJoin : public RelAlgNode {
947  public:
948  RelJoin(std::shared_ptr<const RelAlgNode> lhs,
949  std::shared_ptr<const RelAlgNode> rhs,
950  std::unique_ptr<const RexScalar>& condition,
951  const JoinType join_type)
952  : condition_(std::move(condition)), join_type_(join_type) {
953  inputs_.push_back(lhs);
954  inputs_.push_back(rhs);
955  }
956 
957  JoinType getJoinType() const { return join_type_; }
958 
959  const RexScalar* getCondition() const { return condition_.get(); }
960 
961  const RexScalar* getAndReleaseCondition() const { return condition_.release(); }
962 
963  void setCondition(std::unique_ptr<const RexScalar>& condition) {
964  CHECK(condition);
965  condition_ = std::move(condition);
966  }
967 
968  void replaceInput(std::shared_ptr<const RelAlgNode> old_input,
969  std::shared_ptr<const RelAlgNode> input) override;
970 
971  std::string toString() const override {
972  std::string result =
973  "(RelJoin<" + std::to_string(reinterpret_cast<uint64_t>(this)) + ">(";
974  result += condition_ ? condition_->toString() : "null";
975  result += " " + std::to_string(static_cast<int>(join_type_));
976  return result + "))";
977  }
978 
979  size_t size() const override { return inputs_[0]->size() + inputs_[1]->size(); }
980 
981  std::shared_ptr<RelAlgNode> deepCopy() const override;
982 
983  private:
984  mutable std::unique_ptr<const RexScalar> condition_;
986 };
987 
988 class RelFilter : public RelAlgNode {
989  public:
990  RelFilter(std::unique_ptr<const RexScalar>& filter,
991  std::shared_ptr<const RelAlgNode> input)
992  : filter_(std::move(filter)) {
993  CHECK(filter_);
994  inputs_.push_back(input);
995  }
996 
997  const RexScalar* getCondition() const { return filter_.get(); }
998 
999  const RexScalar* getAndReleaseCondition() { return filter_.release(); }
1000 
1001  void setCondition(std::unique_ptr<const RexScalar>& condition) {
1002  CHECK(condition);
1003  filter_ = std::move(condition);
1004  }
1005 
1006  size_t size() const override { return inputs_[0]->size(); }
1007 
1008  void replaceInput(std::shared_ptr<const RelAlgNode> old_input,
1009  std::shared_ptr<const RelAlgNode> input) override;
1010 
1011  std::string toString() const override {
1012  std::string result =
1013  "(RelFilter<" + std::to_string(reinterpret_cast<uint64_t>(this)) + ">(";
1014  result += filter_ ? filter_->toString() : "null";
1015  return result + "))";
1016  }
1017 
1018  std::shared_ptr<RelAlgNode> deepCopy() const override;
1019 
1020  private:
1021  std::unique_ptr<const RexScalar> filter_;
1022 };
1023 
1024 // Synthetic node to assist execution of left-deep join relational algebra.
1026  public:
1027  RelLeftDeepInnerJoin(const std::shared_ptr<RelFilter>& filter,
1028  RelAlgInputs inputs,
1029  std::vector<std::shared_ptr<const RelJoin>>& original_joins);
1030 
1031  const RexScalar* getInnerCondition() const;
1032 
1033  const RexScalar* getOuterCondition(const size_t nesting_level) const;
1034 
1035  std::string toString() const override;
1036 
1037  size_t size() const override;
1038 
1039  std::shared_ptr<RelAlgNode> deepCopy() const override;
1040 
1041  bool coversOriginalNode(const RelAlgNode* node) const;
1042 
1043  private:
1044  std::unique_ptr<const RexScalar> condition_;
1045  std::vector<std::unique_ptr<const RexScalar>> outer_conditions_per_level_;
1046  const std::shared_ptr<RelFilter> original_filter_;
1047  const std::vector<std::shared_ptr<const RelJoin>> original_joins_;
1048 };
1049 
1050 // The 'RelCompound' node combines filter and on the fly aggregate computation.
1051 // It's the result of combining a sequence of 'RelFilter' (optional), 'RelProject',
1052 // 'RelAggregate' (optional) and a simple 'RelProject' (optional) into a single node
1053 // which can be efficiently executed with no intermediate buffers.
1055  public:
1056  // 'target_exprs_' are either scalar expressions owned by 'scalar_sources_'
1057  // or aggregate expressions owned by 'agg_exprs_', with the arguments
1058  // owned by 'scalar_sources_'.
1059  RelCompound(std::unique_ptr<const RexScalar>& filter_expr,
1060  const std::vector<const Rex*>& target_exprs,
1061  const size_t groupby_count,
1062  const std::vector<const RexAgg*>& agg_exprs,
1063  const std::vector<std::string>& fields,
1064  std::vector<std::unique_ptr<const RexScalar>>& scalar_sources,
1065  const bool is_agg,
1066  bool update_disguised_as_select = false,
1067  bool delete_disguised_as_select = false,
1068  bool varlen_update_required = false,
1069  TableDescriptor const* manipulation_target_table = nullptr,
1070  ColumnNameList target_columns = ColumnNameList())
1071  : ModifyManipulationTarget(update_disguised_as_select,
1072  delete_disguised_as_select,
1073  varlen_update_required,
1074  manipulation_target_table,
1075  target_columns)
1076  , filter_expr_(std::move(filter_expr))
1077  , target_exprs_(target_exprs)
1078  , groupby_count_(groupby_count)
1079  , fields_(fields)
1080  , is_agg_(is_agg)
1081  , scalar_sources_(std::move(scalar_sources)) {
1082  CHECK_EQ(fields.size(), target_exprs.size());
1083  for (auto agg_expr : agg_exprs) {
1084  agg_exprs_.emplace_back(agg_expr);
1085  }
1086  }
1087 
1088  void replaceInput(std::shared_ptr<const RelAlgNode> old_input,
1089  std::shared_ptr<const RelAlgNode> input) override;
1090 
1091  size_t size() const override { return target_exprs_.size(); }
1092 
1093  const RexScalar* getFilterExpr() const { return filter_expr_.get(); }
1094 
1095  void setFilterExpr(std::unique_ptr<const RexScalar>& new_expr) {
1096  filter_expr_ = std::move(new_expr);
1097  }
1098 
1099  const Rex* getTargetExpr(const size_t i) const { return target_exprs_[i]; }
1100 
1101  const std::vector<std::string>& getFields() const { return fields_; }
1102 
1103  const std::string getFieldName(const size_t i) const { return fields_[i]; }
1104 
1105  const size_t getScalarSourcesSize() const { return scalar_sources_.size(); }
1106 
1107  const RexScalar* getScalarSource(const size_t i) const {
1108  return scalar_sources_[i].get();
1109  }
1110 
1111  void setScalarSources(std::vector<std::unique_ptr<const RexScalar>>& new_sources) {
1112  CHECK_EQ(new_sources.size(), scalar_sources_.size());
1113  scalar_sources_ = std::move(new_sources);
1114  }
1115 
1116  const size_t getGroupByCount() const { return groupby_count_; }
1117 
1118  bool isAggregate() const { return is_agg_; }
1119 
1120  std::string toString() const override {
1121  std::string result =
1122  "(RelCompound<" + std::to_string(reinterpret_cast<uint64_t>(this)) + ">(";
1123  result += (filter_expr_ ? filter_expr_->toString() : "null") + " ";
1124  for (const auto target_expr : target_exprs_) {
1125  result += target_expr->toString() + " ";
1126  }
1127  result += "groups: [";
1128  for (size_t group_index = 0; group_index < groupby_count_; ++group_index) {
1129  result += " " + std::to_string(group_index);
1130  }
1131  result += " ] sources: [";
1132  for (const auto& scalar_source : scalar_sources_) {
1133  result += " " + scalar_source->toString();
1134  }
1135  return result + " ]))";
1136  }
1137 
1138  std::shared_ptr<RelAlgNode> deepCopy() const override;
1139 
1140  private:
1141  std::unique_ptr<const RexScalar> filter_expr_;
1142  const std::vector<const Rex*> target_exprs_;
1143  const size_t groupby_count_;
1144  std::vector<std::unique_ptr<const RexAgg>> agg_exprs_;
1145  const std::vector<std::string> fields_;
1146  const bool is_agg_;
1147  std::vector<std::unique_ptr<const RexScalar>>
1148  scalar_sources_; // building blocks for group_indices_ and agg_exprs_; not actually
1149  // projected, just owned
1150 };
1151 
1152 class RelSort : public RelAlgNode {
1153  public:
1154  RelSort(const std::vector<SortField>& collation,
1155  const size_t limit,
1156  const size_t offset,
1157  std::shared_ptr<const RelAlgNode> input)
1158  : collation_(collation), limit_(limit), offset_(offset) {
1159  inputs_.push_back(input);
1160  }
1161 
1162  bool operator==(const RelSort& that) const {
1163  return limit_ == that.limit_ && offset_ == that.offset_ &&
1164  empty_result_ == that.empty_result_ && hasEquivCollationOf(that);
1165  }
1166 
1167  size_t collationCount() const { return collation_.size(); }
1168 
1169  SortField getCollation(const size_t i) const {
1170  CHECK_LT(i, collation_.size());
1171  return collation_[i];
1172  }
1173 
1174  void setCollation(std::vector<SortField>&& collation) {
1175  collation_ = std::move(collation);
1176  }
1177 
1178  void setEmptyResult(bool emptyResult) { empty_result_ = emptyResult; }
1179 
1180  bool isEmptyResult() const { return empty_result_; }
1181 
1182  size_t getLimit() const { return limit_; }
1183 
1184  size_t getOffset() const { return offset_; }
1185 
1186  std::string toString() const override {
1187  std::string result =
1188  "(RelSort<" + std::to_string(reinterpret_cast<uint64_t>(this)) + ">(";
1189  result += "limit: " + std::to_string(limit_) + " ";
1190  result += "offset: " + std::to_string(offset_) + " ";
1191  result += "empty_result: " + std::to_string(empty_result_) + " ";
1192  result += "collation: [ ";
1193  for (const auto& sort_field : collation_) {
1194  result += sort_field.toString() + " ";
1195  }
1196  result += "]";
1197  return result + "))";
1198  }
1199 
1200  size_t size() const override { return inputs_[0]->size(); }
1201 
1202  std::shared_ptr<RelAlgNode> deepCopy() const override;
1203 
1204  private:
1205  std::vector<SortField> collation_;
1206  const size_t limit_;
1207  const size_t offset_;
1209 
1210  bool hasEquivCollationOf(const RelSort& that) const;
1211 };
1212 
1213 class RelModify : public RelAlgNode {
1214  public:
1215  enum class ModifyOperation { Insert, Delete, Update };
1216  using RelAlgNodeInputPtr = std::shared_ptr<const RelAlgNode>;
1217  using TargetColumnList = std::vector<std::string>;
1218 
1219  static std::string yieldModifyOperationString(ModifyOperation const op) {
1220  switch (op) {
1221  case ModifyOperation::Delete:
1222  return "DELETE";
1223  case ModifyOperation::Insert:
1224  return "INSERT";
1225  case ModifyOperation::Update:
1226  return "UPDATE";
1227  default:
1228  break;
1229  }
1230  throw std::runtime_error("Unexpected ModifyOperation enum encountered.");
1231  }
1232 
1233  static ModifyOperation yieldModifyOperationEnum(std::string const& op_string) {
1234  if (op_string == "INSERT") {
1235  return ModifyOperation::Insert;
1236  } else if (op_string == "DELETE") {
1237  return ModifyOperation::Delete;
1238  } else if (op_string == "UPDATE") {
1239  return ModifyOperation::Update;
1240  }
1241 
1242  throw std::runtime_error(
1243  std::string("Unsupported logical modify operation encountered " + op_string));
1244  }
1245 
1247  TableDescriptor const* const td,
1248  bool flattened,
1249  std::string const& op_string,
1250  TargetColumnList const& target_column_list,
1251  RelAlgNodeInputPtr input)
1252  : catalog_(cat)
1253  , table_descriptor_(td)
1254  , flattened_(flattened)
1255  , operation_(yieldModifyOperationEnum(op_string))
1256  , target_column_list_(target_column_list) {
1258  inputs_.push_back(input);
1259  }
1260 
1262  TableDescriptor const* const td,
1263  bool flattened,
1264  ModifyOperation op,
1265  TargetColumnList const& target_column_list,
1266  RelAlgNodeInputPtr input)
1267  : catalog_(cat)
1268  , table_descriptor_(td)
1269  , flattened_(flattened)
1270  , operation_(op)
1271  , target_column_list_(target_column_list) {
1273  inputs_.push_back(input);
1274  }
1275 
1276  TableDescriptor const* const getTableDescriptor() const { return table_descriptor_; }
1277  bool const isFlattened() const { return flattened_; }
1278  ModifyOperation getOperation() const { return operation_; }
1279  TargetColumnList const& getUpdateColumnNames() { return target_column_list_; }
1280  int getUpdateColumnCount() const { return target_column_list_.size(); }
1281 
1282  size_t size() const override { return 0; }
1283  std::shared_ptr<RelAlgNode> deepCopy() const override {
1284  return std::make_shared<RelModify>(catalog_,
1285  table_descriptor_,
1286  flattened_,
1287  operation_,
1288  target_column_list_,
1289  inputs_[0]);
1290  }
1291 
1292  std::string toString() const override {
1293  std::ostringstream result_stream;
1294  result_stream << std::boolalpha
1295  << "(RelModify<" + std::to_string(reinterpret_cast<uint64_t>(this)) +
1296  "> "
1297  << table_descriptor_->tableName << " flattened= " << flattened_
1298  << " operation= " << yieldModifyOperationString(operation_) << ")";
1299 
1300  return result_stream.str();
1301  }
1302 
1304  RelProject const* previous_project_node =
1305  dynamic_cast<RelProject const*>(inputs_[0].get());
1306  CHECK(previous_project_node != nullptr);
1307 
1308  previous_project_node->setUpdateViaSelectFlag();
1309  // remove the offset column in the projection for update handling
1310  target_column_list_.pop_back();
1311 
1312  previous_project_node->setModifiedTableDescriptor(table_descriptor_);
1313  previous_project_node->setTargetColumns(target_column_list_);
1314 
1315  int target_update_column_expr_start = 0;
1316  int target_update_column_expr_end = (int)(target_column_list_.size() - 1);
1317  CHECK(target_update_column_expr_start >= 0);
1318  CHECK(target_update_column_expr_end >= 0);
1319 
1320  bool varlen_update_required = false;
1321 
1322  auto varlen_scan_visitor = [this,
1323  &varlen_update_required,
1324  target_update_column_expr_start,
1325  target_update_column_expr_end](int index) {
1326  if (index >= target_update_column_expr_start &&
1327  index <= target_update_column_expr_end) {
1328  auto target_index = index - target_update_column_expr_start;
1329 
1330  auto* column_desc = catalog_.getMetadataForColumn(
1331  table_descriptor_->tableId, target_column_list_[target_index]);
1332  CHECK(column_desc);
1333 
1334  if (table_descriptor_->nShards) {
1335  const auto shard_cd =
1336  catalog_.getShardColumnMetadataForTable(table_descriptor_);
1337  CHECK(shard_cd);
1338  if ((column_desc->columnName == shard_cd->columnName)) {
1339  throw std::runtime_error("UPDATE of a shard key is currently unsupported.");
1340  }
1341  }
1342 
1343  // Check for valid types
1344  if (column_desc->columnType.is_varlen()) {
1345  varlen_update_required = true;
1346  }
1347  if (column_desc->columnType.is_geometry()) {
1348  throw std::runtime_error("UPDATE of a geo column is unsupported.");
1349  }
1350  }
1351  };
1352 
1353  previous_project_node->visitScalarExprs(varlen_scan_visitor);
1354  previous_project_node->setVarlenUpdateRequired(varlen_update_required);
1355  }
1356 
1358  RelProject const* previous_project_node =
1359  dynamic_cast<RelProject const*>(inputs_[0].get());
1360  CHECK(previous_project_node != nullptr);
1361  previous_project_node->setDeleteViaSelectFlag();
1362  previous_project_node->setModifiedTableDescriptor(table_descriptor_);
1363  }
1364 
1365  private:
1371 };
1372 
1374  public:
1375  RelTableFunction(const std::string& function_name,
1376  std::shared_ptr<const RelAlgNode> input,
1377  std::vector<std::string>& fields,
1378  std::vector<const Rex*> col_inputs,
1379  std::vector<std::unique_ptr<const RexScalar>>& table_func_inputs,
1380  std::vector<std::unique_ptr<const RexScalar>>& target_exprs)
1381  : function_name_(function_name)
1382  , fields_(fields)
1383  , col_inputs_(col_inputs)
1384  , table_func_inputs_(std::move(table_func_inputs))
1385  , target_exprs_(std::move(target_exprs)) {
1386  inputs_.emplace_back(input);
1387  }
1388 
1389  void replaceInput(std::shared_ptr<const RelAlgNode> old_input,
1390  std::shared_ptr<const RelAlgNode> input) override;
1391 
1392  std::string getFunctionName() const { return function_name_; }
1393 
1394  size_t size() const override { return target_exprs_.size(); }
1395 
1396  size_t getTableFuncInputsSize() const { return table_func_inputs_.size(); }
1397 
1398  size_t getColInputsSize() const { return col_inputs_.size(); }
1399 
1400  const RexScalar* getTableFuncInputAt(const size_t idx) const {
1401  CHECK_LT(idx, table_func_inputs_.size());
1402  return table_func_inputs_[idx].get();
1403  }
1404 
1405  const RexScalar* getTableFuncInputAtAndRelease(const size_t idx) {
1406  CHECK_LT(idx, table_func_inputs_.size());
1407  return table_func_inputs_[idx].release();
1408  }
1409 
1410  void setTableFuncInputs(std::vector<std::unique_ptr<const RexScalar>>& exprs) {
1411  table_func_inputs_ = std::move(exprs);
1412  }
1413 
1414  std::string getFieldName(const size_t idx) const {
1415  CHECK_LT(idx, fields_.size());
1416  return fields_[idx];
1417  }
1418 
1419  std::shared_ptr<RelAlgNode> deepCopy() const override;
1420 
1421  std::string toString() const override {
1422  std::string result = "RelTableFunction<" +
1423  std::to_string(reinterpret_cast<uint64_t>(this)) + ">(" +
1424  function_name_ + " ";
1425 
1426  result += "targets: " + std::to_string(target_exprs_.size());
1427  result += "inputs: [";
1428  for (size_t i = 0; i < target_exprs_.size(); ++i) {
1429  result += target_exprs_[i]->toString();
1430  if (i < target_exprs_.size() - 1) {
1431  result += ", ";
1432  }
1433  }
1434  result += "])";
1435 
1436  return result;
1437  }
1438 
1439  private:
1440  std::string function_name_;
1441  std::vector<std::string> fields_;
1442 
1443  std::vector<const Rex*>
1444  col_inputs_; // owned by `table_func_inputs_`, but allows picking out the specific
1445  // input columns vs other table function inputs (e.g. literals)
1446  std::vector<std::unique_ptr<const RexScalar>> table_func_inputs_;
1447 
1448  std::vector<std::unique_ptr<const RexScalar>>
1449  target_exprs_; // Note: these should all be RexRef but are stored as RexScalar for
1450  // consistency
1451 };
1452 
1454  public:
1455  using RowValues = std::vector<std::unique_ptr<const RexScalar>>;
1456 
1457  RelLogicalValues(const std::vector<TargetMetaInfo>& tuple_type,
1458  std::vector<RowValues>& values)
1459  : tuple_type_(tuple_type), values_(std::move(values)) {}
1460 
1461  const std::vector<TargetMetaInfo> getTupleType() const { return tuple_type_; }
1462 
1463  std::string toString() const override {
1464  std::string ret =
1465  "(RelLogicalValues<" + std::to_string(reinterpret_cast<uint64_t>(this)) + ">";
1466  for (const auto& target_meta_info : tuple_type_) {
1467  ret += " (" + target_meta_info.get_resname() + " " +
1468  target_meta_info.get_type_info().get_type_name() + ")";
1469  }
1470  ret += " )";
1471  return ret;
1472  }
1473 
1474  const RexScalar* getValueAt(const size_t row_idx, const size_t col_idx) const {
1475  CHECK_LT(row_idx, values_.size());
1476  const auto& row = values_[row_idx];
1477  CHECK_LT(col_idx, row.size());
1478  return row[col_idx].get();
1479  }
1480 
1481  size_t getRowsSize() const {
1482  if (values_.empty()) {
1483  return 0;
1484  } else {
1485  return values_.front().size();
1486  }
1487  }
1488 
1489  size_t getNumRows() const { return values_.size(); }
1490 
1491  size_t size() const override { return tuple_type_.size(); }
1492 
1493  bool hasRows() const { return !values_.empty(); }
1494 
1495  std::shared_ptr<RelAlgNode> deepCopy() const override;
1496 
1497  private:
1498  const std::vector<TargetMetaInfo> tuple_type_;
1499  const std::vector<RowValues> values_;
1500 };
1501 
1502 class RelLogicalUnion : public RelAlgNode {
1503  public:
1504  RelLogicalUnion(RelAlgInputs, bool is_all);
1505  std::shared_ptr<RelAlgNode> deepCopy() const override;
1506  size_t size() const override;
1507  std::string toString() const override;
1508 
1509  std::string getFieldName(const size_t i) const;
1510 
1511  inline bool isAll() const { return is_all_; }
1512  bool inputMetainfoTypesMatch() const;
1513  RexScalar const* copyAndRedirectSource(RexScalar const*, size_t input_idx) const;
1514 
1515  // Not unique_ptr to allow for an easy deepCopy() implementation.
1516  mutable std::vector<std::shared_ptr<const RexScalar>> scalar_exprs_;
1517 
1518  private:
1519  bool const is_all_;
1520 };
1521 
1522 class QueryNotSupported : public std::runtime_error {
1523  public:
1524  QueryNotSupported(const std::string& reason) : std::runtime_error(reason) {}
1525 };
1526 
1536 class RelAlgDagBuilder : public boost::noncopyable {
1537  public:
1538  RelAlgDagBuilder() = delete;
1539 
1546  RelAlgDagBuilder(const std::string& query_ra,
1548  const RenderInfo* render_info);
1549 
1559  RelAlgDagBuilder(RelAlgDagBuilder& root_dag_builder,
1560  const rapidjson::Value& query_ast,
1561  const Catalog_Namespace::Catalog& cat,
1562  const RenderInfo* render_opts);
1563 
1564  void eachNode(std::function<void(RelAlgNode const*)> const&) const;
1565 
1569  const RelAlgNode& getRootNode() const {
1570  CHECK(nodes_.size());
1571  const auto& last_ptr = nodes_.back();
1572  CHECK(last_ptr);
1573  return *last_ptr;
1574  }
1575 
1576  std::shared_ptr<const RelAlgNode> getRootNodeShPtr() const {
1577  CHECK(nodes_.size());
1578  return nodes_.back();
1579  }
1580 
1585  void registerSubquery(std::shared_ptr<RexSubQuery> subquery) {
1586  subqueries_.push_back(subquery);
1587  }
1588 
1592  const std::vector<std::shared_ptr<RexSubQuery>>& getSubqueries() const {
1593  return subqueries_;
1594  }
1595 
1599  void resetQueryExecutionState();
1600 
1601  private:
1602  void build(const rapidjson::Value& query_ast, RelAlgDagBuilder& root_dag_builder);
1603 
1605  std::vector<std::shared_ptr<RelAlgNode>> nodes_;
1606  std::vector<std::shared_ptr<RexSubQuery>> subqueries_;
1608 };
1609 
1610 using RANodeOutput = std::vector<RexInput>;
1611 
1612 RANodeOutput get_node_output(const RelAlgNode* ra_node);
1613 
1614 std::string tree_string(const RelAlgNode*, const size_t depth = 0);
std::vector< std::shared_ptr< const RexScalar > > scalar_exprs_
std::string toString() const override
std::string toString() const override
std::string getFunctionName() const
bool is_agg(const Analyzer::Expr *expr)
void validate_non_foreign_table_write(const TableDescriptor *table_descriptor)
Definition: FsiUtils.h:22
std::unique_ptr< const RexOperator > disambiguatedOperands(ConstRexScalarPtrVector &operands, ConstRexScalarPtrVector &partition_keys, ConstRexScalarPtrVector &order_keys, const std::vector< SortField > &collation) const
std::unique_ptr< const RexScalar > condition_
const std::vector< TargetMetaInfo > getTupleType() const
SQLOps getOperator() const
SQLAgg
Definition: sqldefs.h:71
size_t size() const
#define CHECK_EQ(x, y)
Definition: Logger.h:205
std::unique_ptr< const RexScalar > ConstRexScalarPtr
std::vector< std::unique_ptr< const RexScalar > > getExpressionsAndRelease()
const std::vector< SortField > & getCollation() const
const ConstRexScalarPtrVector & getPartitionKeys() const
void setFields(std::vector< std::string > &fields)
std::vector< std::unique_ptr< const RexScalar > > outer_conditions_per_level_
const size_t limit_
const std::vector< std::string > & getFields() const
void replaceInput(std::shared_ptr< const RelAlgNode > old_input, std::shared_ptr< const RelAlgNode > input) override
const size_t getGroupByCount() const
JoinType
Definition: sqldefs.h:107
std::string toString() const override
void setSourceNode(const RelAlgNode *node) const
const RelAlgNode * getRelAlg() const
ColumnNameList target_columns_
void setIndex(const unsigned in_index) const
std::vector< std::unique_ptr< const RexScalar > > table_func_inputs_
const TableDescriptor * getTableDescriptor() const
std::string toString() const override
RexLiteral(const std::string &val, const SQLTypes type, const SQLTypes target_type, const unsigned scale, const unsigned precision, const unsigned type_scale, const unsigned type_precision)
const std::shared_ptr< const RelAlgNode > ra_
RelAlgNode(RelAlgInputs inputs={})
class for a per-database catalog. also includes metadata for the current database and the current use...
Definition: Catalog.h:86
size_t collationCount() const
SqlWindowFunctionKind getKind() const
size_t size() const override
const size_t index_
SQLTypes
Definition: sqltypes.h:39
std::shared_ptr< RelAlgNode > deepCopy() const override
const std::string name_
size_t size() const override
unsigned getId() const
const unsigned type_scale_
std::string toString() const override
std::string toString() const override
std::string tree_string(const RelAlgNode *, const size_t depth=0)
RexOperator(const SQLOps op, std::vector< std::unique_ptr< const RexScalar >> &operands, const SQLTypeInfo &type)
static thread_local unsigned crt_id_
const RelAlgNode & getRootNode() const
void setCondition(std::unique_ptr< const RexScalar > &condition)
std::string function_name_
T getVal() const
void setEmptyResult(bool emptyResult)
std::unique_ptr< const RexScalar > ConstRexScalarPtr
const RexWindowBound & getLowerBound() const
const JoinType join_type_
size_t getColInputsSize() const
SortDirection getSortDir() const
NullSortedPosition
void applyDeleteModificationsToInputNode()
std::vector< std::string > TargetColumnList
size_t size() const override
void setTargetColumns(ColumnNameList const &target_columns) const
unsigned getPrecision() const
const RexWindowBound upper_bound_
auto const isDeleteViaSelect() const
const SqlWindowFunctionKind kind_
std::unique_ptr< RexLiteral > deepCopy() const
std::string toString() const override
size_t size() const override
std::vector< const Rex * > col_inputs_
SQLOps
Definition: sqldefs.h:29
const RexScalar * getProjectAt(const size_t idx) const
const std::vector< std::string > fields_
const ConstRexScalarPtrVector & getOrderKeys() const
bool isAll() const
const Rex * getTargetExpr(const size_t i) const
size_t getOperand(size_t idx) const
std::string toString() const override
void applyUpdateModificationsToInputNode()
bool operator==(const RelSort &that) const
std::shared_ptr< const RelAlgNode > getAndOwnInput(const size_t idx) const
std::string toString() const override
const boost::variant< int64_t, double, std::string, bool, void * > literal_
size_t branchCount() const
std::shared_ptr< std::shared_ptr< const ExecutionResult > > result_
std::unique_ptr< const RexScalar > else_expr_
void addManagedInput(std::shared_ptr< const RelAlgNode > input)
const std::vector< TargetMetaInfo > tuple_type_
RexSubQuery(std::shared_ptr< SQLTypeInfo > type, std::shared_ptr< std::shared_ptr< const ExecutionResult >> result, const std::shared_ptr< const RelAlgNode > ra)
std::vector< std::string > fields_
const RexScalar * getTableFuncInputAt(const size_t idx) const
std::vector< std::unique_ptr< const RexAgg > > getAggExprsAndRelease()
RexInput(const RelAlgNode *node, const unsigned in_index)
void setModifiedTableDescriptor(TableDescriptor const *td) const
const void * context_data_
void setFilterExpr(std::unique_ptr< const RexScalar > &new_expr)
std::shared_ptr< const RelAlgNode > getRootNodeShPtr() const
std::shared_ptr< RelAlgNode > deepCopy() const override
std::string getFieldName(const size_t idx) const
bool validateTargetColumns(VALIDATION_FUNCTOR validator) const
bool empty_result_
virtual ~Rex()
std::vector< ConstRexScalarPtr > ConstRexScalarPtrVector
TableDescriptor const * getModifiedTableDescriptor() const
const TableDescriptor * td_
const Catalog_Namespace::Catalog & cat_
void setUpdateViaSelectFlag() const
std::vector< std::unique_ptr< const RexScalar > > scalar_sources_
const std::vector< std::string > & getFields() const
const SQLOps op_
std::string to_string(char const *&&v)
const SQLAgg agg_
const size_t groupby_count_
const RexScalar * getCondition() const
auto const isUpdateViaSelect() const
std::unique_ptr< RexInput > deepCopy() const
RelTableFunction(const std::string &function_name, std::shared_ptr< const RelAlgNode > input, std::vector< std::string > &fields, std::vector< const Rex *> col_inputs, std::vector< std::unique_ptr< const RexScalar >> &table_func_inputs, std::vector< std::unique_ptr< const RexScalar >> &target_exprs)
const std::string getFieldName(const size_t i) const
std::vector< SortField > collation_
SortField getCollation(const size_t i) const
const RexScalar * getWhen(const size_t idx) const
std::string toString() const override
std::string toString() const override
size_t getIndex() const
const std::string getFieldName(const size_t i) const
const RexWindowBound & getUpperBound() const
std::vector< RexLiteral > RexLiteralArray
This file contains the class specification and related data structures for Catalog.
std::vector< std::shared_ptr< RexSubQuery > > subqueries_
TargetColumnList const & getUpdateColumnNames()
const RenderInfo * render_info_
RexLiteral(const double val, const SQLTypes type, const SQLTypes target_type, const unsigned scale, const unsigned precision, const unsigned type_scale, const unsigned type_precision)
const rapidjson::Value & field(const rapidjson::Value &obj, const char field[]) noexcept
Definition: JsonAccessors.h:31
const unsigned precision_
auto const isVarlenUpdateRequired() const
std::string toString() const override
TableDescriptor const *const getTableDescriptor() const
bool operator==(const RexInput &that) const
bool hasInput(const RelAlgNode *needle) const
virtual ~RelAlgNode()
std::string cat(Ts &&... args)
void setOutputMetainfo(const std::vector< TargetMetaInfo > &targets_metainfo) const
std::vector< std::shared_ptr< RelAlgNode > > nodes_
std::shared_ptr< const RelAlgNode > RelAlgNodeInputPtr
const RexScalar * getThen(const size_t idx) const
bool isSimple() const
ConstRexScalarPtrVector getPartitionKeysAndRelease() const
const RelAlgNode * getSourceNode() const
const std::vector< std::shared_ptr< RexSubQuery > > & getSubqueries() const
const RexScalar * getTableFuncInputAtAndRelease(const size_t idx)
bool hasContextData() const
const bool distinct_
const NullSortedPosition nulls_pos_
const size_t offset_
#define CHECK_NE(x, y)
Definition: Logger.h:206
const std::vector< RowValues > values_
const RexScalar * getProjectAtAndRelease(const size_t idx) const
ColumnNameList const & getTargetColumns() const
RexCase(std::vector< std::pair< std::unique_ptr< const RexScalar >, std::unique_ptr< const RexScalar >>> &expr_pair_list, std::unique_ptr< const RexScalar > &else_expr)
bool hasRows() const
void setDeleteViaSelectFlag() const
const SQLTypeInfo type_
const TableDescriptor * table_descriptor_
const std::vector< std::shared_ptr< const RelJoin > > original_joins_
std::shared_ptr< const RexScalar > offset
const size_t getAggExprsCount() const
const SQLTypeInfo & getType() const
std::vector< std::unique_ptr< const RexScalar > > scalar_exprs_
const size_t getScalarSourcesSize() const
size_t size() const override
std::shared_ptr< SQLTypeInfo > type_
JoinType getJoinType() const
size_t size() const override
const RexScalar * getAndReleaseCondition()
bool isNop() const
std::string sql_window_function_to_str(const SqlWindowFunctionKind kind)
RexSubQuery(const std::shared_ptr< const RelAlgNode > ra)
ModifyOperation getOperation() const
RexAbstractInput(const unsigned in_index)
RelFilter(std::unique_ptr< const RexScalar > &filter, std::shared_ptr< const RelAlgNode > input)
Catalog_Namespace::Catalog const & catalog_
const unsigned type_precision_
std::string toString() const override
RelAggregate(const size_t groupby_count, std::vector< std::unique_ptr< const RexAgg >> &agg_exprs, const std::vector< std::string > &fields, std::shared_ptr< const RelAlgNode > input)
std::unique_ptr< const RexScalar > filter_
void setCondition(std::unique_ptr< const RexScalar > &condition)
std::vector< std::unique_ptr< const RexScalar > > operands_
const size_t groupby_count_
std::vector< std::string > fields_
virtual std::unique_ptr< const RexOperator > getDisambiguated(std::vector< std::unique_ptr< const RexScalar >> &operands) const
RelSort(const std::vector< SortField > &collation, const size_t limit, const size_t offset, std::shared_ptr< const RelAlgNode > input)
RANodeOutput get_node_output(const RelAlgNode *ra_node)
unsigned getTypeScale() const
const RexScalar * getOperand(const size_t idx) const
const RelAlgNode * node_
SQLTypes getType() const
virtual void replaceInput(std::shared_ptr< const RelAlgNode > old_input, std::shared_ptr< const RelAlgNode > input)
static std::string yieldModifyOperationString(ModifyOperation const op)
size_t getOffset() const
size_t size() const override
const SQLTypes type_
static ModifyOperation yieldModifyOperationEnum(std::string const &op_string)
RelCompound(std::unique_ptr< const RexScalar > &filter_expr, const std::vector< const Rex *> &target_exprs, const size_t groupby_count, const std::vector< const RexAgg *> &agg_exprs, const std::vector< std::string > &fields, std::vector< std::unique_ptr< const RexScalar >> &scalar_sources, const bool is_agg, bool update_disguised_as_select=false, bool delete_disguised_as_select=false, bool varlen_update_required=false, TableDescriptor const *manipulation_target_table=nullptr, ColumnNameList target_columns=ColumnNameList())
void setScalarSources(std::vector< std::unique_ptr< const RexScalar >> &new_sources)
std::vector< std::unique_ptr< const RexAgg > > agg_exprs_
std::vector< TargetMetaInfo > targets_metainfo_
const std::string getFieldName(const size_t i) const
std::unique_ptr< RexAgg > deepCopy() const
void setContextData(const void *context_data) const
const void * getContextData() const
size_t size() const override
SortDirection
RexWindowFunctionOperator(const SqlWindowFunctionKind kind, ConstRexScalarPtrVector &operands, ConstRexScalarPtrVector &partition_keys, ConstRexScalarPtrVector &order_keys, const std::vector< SortField > collation, const RexWindowBound &lower_bound, const RexWindowBound &upper_bound, const bool is_rows, const SQLTypeInfo &ti)
std::vector< std::shared_ptr< const RelAlgNode > > RelAlgInputs
const std::vector< SortField > collation_
#define CHECK_LT(x, y)
Definition: Logger.h:207
Definition: sqltypes.h:53
NullSortedPosition getNullsPosition() const
void setCollation(std::vector< SortField > &&collation)
const std::vector< TargetMetaInfo > & getOutputMetainfo() const
const RexScalar * getCondition() const
const size_t getGroupByCount() const
std::string toString() const override
const std::string getFieldName(const size_t i) const
const std::vector< const Rex * > target_exprs_
unsigned getIndex() const
void setTableFuncInputs(std::vector< std::unique_ptr< const RexScalar >> &exprs)
size_t getLimit() const
std::vector< std::unique_ptr< const RexScalar > > RowValues
std::vector< std::unique_ptr< const RexAgg > > agg_exprs_
const RexScalar * getScalarSource(const size_t i) const
void injectOffsetInFragmentExpr() const
const RexScalar * getAndReleaseCondition() const
SQLAgg getKind() const
const size_t inputCount() const
SortField(const size_t field, const SortDirection sort_dir, const NullSortedPosition nulls_pos)
std::unique_ptr< const RexScalar > filter_expr_
const RexScalar * getOperandAndRelease(const size_t idx) const
void resetQueryExecutionState()
std::vector< ConstRexScalarPtr > ConstRexScalarPtrVector
bool isAggregate() const
ConstRexScalarPtrVector order_keys_
bool operator==(const SortField &that) const
RelModify(Catalog_Namespace::Catalog const &cat, TableDescriptor const *const td, bool flattened, ModifyOperation op, TargetColumnList const &target_column_list, RelAlgNodeInputPtr input)
void registerSubquery(std::shared_ptr< RexSubQuery > subquery)
RelLogicalValues(const std::vector< TargetMetaInfo > &tuple_type, std::vector< RowValues > &values)
std::string toString() const override
std::unique_ptr< const RexOperator > getDisambiguated(std::vector< std::unique_ptr< const RexScalar >> &operands) const override
RexLiteral(const bool val, const SQLTypes type, const SQLTypes target_type, const unsigned scale, const unsigned precision, const unsigned type_scale, const unsigned type_precision)
std::vector< std::pair< std::unique_ptr< const RexScalar >, std::unique_ptr< const RexScalar > > > expr_pair_list_
SqlWindowFunctionKind
Definition: sqldefs.h:82
RexFunctionOperator(const std::string &name, ConstRexScalarPtrVector &operands, const SQLTypeInfo &ti)
const SQLTypes target_type_
std::unique_ptr< const RexScalar > condition_
const std::vector< std::string > field_names_
const std::vector< std::string > & getFieldNames() const
std::string toString() const override
virtual std::string toString() const =0
const SortDirection sort_dir_
std::vector< const RexAgg * > getAggregatesAndRelease()
const RexScalar * getValueAt(const size_t row_idx, const size_t col_idx) const
const RexScalar * getFilterExpr() const
ModifyOperation operation_
#define CHECK(condition)
Definition: Logger.h:197
RelProject(std::vector< std::unique_ptr< const RexScalar >> &scalar_exprs, const std::vector< std::string > &fields, std::shared_ptr< const RelAlgNode > input)
bool const isFlattened() const
std::string toString() const override
ConstRexScalarPtrVector partition_keys_
std::unique_ptr< RexRef > deepCopy() const
const RelAlgNode * getInput(const size_t idx) const
const SQLTypeInfo & getType() const
std::vector< RexLiteralArray > TupleContentsArray
const std::vector< std::string > & getFields() const
const SQLTypeInfo type_
const std::vector< size_t > operands_
std::string toString() const override
void setExpressions(std::vector< std::unique_ptr< const RexScalar >> &exprs) const
const std::shared_ptr< RelFilter > original_filter_
unsigned getScale() const
std::vector< std::string > ColumnNameList
size_t getRowsSize() const
ModifyManipulationTarget(bool const update_via_select=false, bool const delete_via_select=false, bool const varlen_update_required=false, TableDescriptor const *table_descriptor=nullptr, ColumnNameList target_columns=ColumnNameList())
RexLiteral(const SQLTypes target_type)
QueryNotSupported(const std::string &reason)
Definition: sqltypes.h:46
void setVarlenUpdateRequired(bool required) const
int getUpdateColumnCount() const
size_t size() const override
std::vector< RexInput > RANodeOutput
std::shared_ptr< const ExecutionResult > getExecutionResult() const
specifies the content in-memory of a row in the table metadata table
const std::string & getName() const
RexAgg(const SQLAgg agg, const bool distinct, const SQLTypeInfo &type, const std::vector< size_t > &operands)
const std::vector< std::unique_ptr< const RexAgg > > & getAggExprs() const
std::string toString() const
const size_t field_
std::string toString() const override
size_t operator()(const RexInput &rex_in) const
const RexWindowBound lower_bound_
SQLTypes getTargetType() const
RexLiteral(const int64_t val, const SQLTypes type, const SQLTypes target_type, const unsigned scale, const unsigned precision, const unsigned type_scale, const unsigned type_precision)
RelScan(const TableDescriptor *td, const std::vector< std::string > &field_names)
size_t size() const override
bool isEmptyResult() const
const unsigned scale_
size_t getTableFuncInputsSize() const
TargetColumnList target_column_list_
void setAggExprs(std::vector< std::unique_ptr< const RexAgg >> &agg_exprs)
std::vector< std::string > fields_
size_t getNumRows() const
void visitScalarExprs(EXPR_VISITOR_FUNCTOR visitor_functor) const
ConstRexScalarPtrVector getOrderKeysAndRelease() const
unsigned getTypePrecision() const
size_t size() const
RelJoin(std::shared_ptr< const RelAlgNode > lhs, std::shared_ptr< const RelAlgNode > rhs, std::unique_ptr< const RexScalar > &condition, const JoinType join_type)
RelModify(Catalog_Namespace::Catalog const &cat, TableDescriptor const *const td, bool flattened, std::string const &op_string, TargetColumnList const &target_column_list, RelAlgNodeInputPtr input)
RelAlgInputs inputs_
void setFields(std::vector< std::string > &new_fields)
RexRef(const size_t index)
const SQLTypeInfo & getType() const
size_t getField() const
constexpr auto is_datetime(SQLTypes type)
Definition: sqltypes.h:201
std::vector< std::unique_ptr< const RexScalar > > target_exprs_
const bool is_agg_
bool isDistinct() const
std::string toString() const override
const unsigned id_
const RexScalar * getElse() const