OmniSciDB  a987f07e93
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
TransientStringLiteralsVisitor.h
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
19 #include "Logger/Logger.h"
21 #include "StringOps/StringOps.h"
22 
24  public:
26  : sdp_(sdp), executor_(executor) {
27  CHECK(sdp);
28  }
29 
30  void* visitConstant(const Analyzer::Constant* constant) const override {
31  if (constant->get_type_info().is_string() && !constant->get_is_null()) {
32  CHECK(constant->get_constval().stringval);
34  }
35  return defaultResult();
36  }
37 
38  // visitUOper is for handling casts between dictionary encoded text
39  // columns that do not share string dictionaries. For these
40  // we need to run the translation again on the aggregator
41  // so that we know how to interpret the transient literals added
42  // by the leaves via string-to-string casts
43 
44  // Todo(todd): It is inefficient to do the same translation on
45  // the aggregator and each of the leaves, explore storing these
46  // translations/literals on the remote dictionary server instead
47  // so the translation happens once and only once
48 
49  void* visitUOper(const Analyzer::UOper* uoper) const override {
50  const auto& uoper_ti = uoper->get_type_info();
51  const auto& operand_ti = uoper->get_operand()->get_type_info();
52  if (!(uoper->get_optype() == kCAST && uoper_ti.is_dict_encoded_string())) {
53  return defaultResult();
54  }
55  const bool outputs_target_sdp = uoper_ti.get_comp_param() == sdp_->getDictId();
56 
57  if (!parent_feeds_sdp_ && !outputs_target_sdp) {
58  // If we are not casting to our dictionary (sdp_)
59  return defaultResult();
60  }
61  if (uoper_ti.is_dict_intersection()) {
62  // Intersection translations don't add transients to the dest proxy,
63  // and hence can be ignored for the purposes of populating transients
64  return defaultResult();
65  }
66  const bool parent_feeds_sdp_already_set = parent_feeds_sdp_;
67  parent_feeds_sdp_ = true;
68 
69  visit(uoper->get_operand());
70 
71  if (!parent_feeds_sdp_already_set) {
72  parent_feeds_sdp_ = false;
73  }
74 
75  if (operand_ti.is_dict_encoded_string() &&
76  uoper_ti.get_comp_param() != operand_ti.get_comp_param()) {
78  operand_ti.get_comp_param(),
79  uoper_ti.get_comp_param(),
81  {},
83  true); // with_generation
84  }
85  return defaultResult();
86  }
87 
88  void* visitStringOper(const Analyzer::StringOper* string_oper) const override {
89  CHECK_GE(string_oper->getArity(), 1UL);
90  const auto str_operand = string_oper->getArg(0);
91  const auto& string_oper_ti = string_oper->get_type_info();
92  const auto& str_operand_ti = str_operand->get_type_info();
93  const auto string_oper_kind = string_oper->get_kind();
94  if (!string_oper_ti.is_string() || !str_operand_ti.is_string()) {
95  return defaultResult();
96  }
97  if (string_oper->getNonLiteralsArity() >= 2UL) {
98  return defaultResult();
99  }
100  const bool parent_feeds_sdp_already_set = parent_feeds_sdp_;
101  const bool outputs_target_sdp = string_oper_ti.get_comp_param() == sdp_->getDictId();
102  if (string_oper_ti.is_dict_encoded_string() &&
103  str_operand_ti.is_dict_encoded_string() &&
104  (parent_feeds_sdp_ || outputs_target_sdp)) {
105  parent_feeds_sdp_ = true;
106  visit(str_operand);
107  if (!parent_feeds_sdp_already_set) {
108  parent_feeds_sdp_ = false;
109  }
110  // Todo(todd): Dedup the code to get string_op_infos from the same
111  // in StringOpsIR.cpp (needs thought as Analyzer and StringOps
112  // deliberately are oblivious to each other)
113 
114  std::vector<StringOps_Namespace::StringOpInfo> string_op_infos;
115  const auto chained_string_op_exprs = string_oper->getChainedStringOpExprs();
116  for (const auto& chained_string_op_expr : chained_string_op_exprs) {
117  auto chained_string_op =
118  dynamic_cast<const Analyzer::StringOper*>(chained_string_op_expr.get());
119  CHECK(chained_string_op);
120  StringOps_Namespace::StringOpInfo string_op_info(
121  chained_string_op->get_kind(),
122  chained_string_op->get_type_info(),
123  chained_string_op->getLiteralArgs());
124  string_op_infos.emplace_back(string_op_info);
125  }
126 
128  str_operand_ti.get_comp_param(),
129  string_oper_ti.get_comp_param(),
131  string_op_infos,
133  true); // with_generation
134  } else if ((parent_feeds_sdp_ || outputs_target_sdp) &&
135  (string_oper->getLiteralsArity() == string_oper->getArity())) {
136  // This is likely dead code due to ExpressionRewrite of all-literal string ops
137  // (meaning when this visitor gets to a string op with all literal args it
138  // (would have already been rewritten as a literal string)
139  // Todo(todd): Verify and remove if so
140  const StringOps_Namespace::StringOpInfo string_op_info(
141  string_oper_kind, string_oper->get_type_info(), string_oper->getLiteralArgs());
142  CHECK_EQ(string_op_info.numLiterals(), string_oper->getArity());
143  const auto str_result_and_null_status =
145  if (string_oper->get_type_info().is_string() &&
146  !str_result_and_null_status.second &&
147  !str_result_and_null_status.first
148  .empty()) { // Todo(todd): Is there a central/non-magic function/constant
149  // to determine if a none-encoded string is null
150  sdp_->getOrAddTransient(str_result_and_null_status.first);
151  }
152  }
153  return defaultResult();
154  }
155 
156  protected:
157  void* defaultResult() const override { return nullptr; }
158 
159  private:
161  mutable Executor* executor_;
162  mutable bool parent_feeds_sdp_{false};
163 };
164 
166  public:
167  int visitUOper(const Analyzer::UOper* uoper) const override {
168  const auto& expr_ti = uoper->get_type_info();
169  if (uoper->get_optype() == kCAST && expr_ti.is_string() &&
170  expr_ti.get_compression() == kENCODING_DICT) {
171  return expr_ti.get_comp_param();
172  }
173  return defaultResult();
174  }
175 
176  int visitCaseExpr(const Analyzer::CaseExpr* case_expr) const override {
177  const auto& expr_ti = case_expr->get_type_info();
178  if (expr_ti.is_string() && expr_ti.get_compression() == kENCODING_DICT) {
179  return expr_ti.get_comp_param();
180  }
181  return defaultResult();
182  }
183 
184  int visitStringOper(const Analyzer::StringOper* string_oper) const override {
185  const auto& expr_ti = string_oper->get_type_info();
186  if (expr_ti.is_string() && expr_ti.get_compression() == kENCODING_DICT) {
187  return expr_ti.get_comp_param();
188  }
189  return defaultResult();
190  }
191 
192  protected:
193  int defaultResult() const override { return -1; }
194 };
#define CHECK_EQ(x, y)
Definition: Logger.h:297
const std::shared_ptr< RowSetMemoryOwner > getRowSetMemoryOwner() const
Definition: Execute.cpp:682
void * visitStringOper(const Analyzer::StringOper *string_oper) const override
void * visitUOper(const Analyzer::UOper *uoper) const override
int visitCaseExpr(const Analyzer::CaseExpr *case_expr) const override
bool get_is_null() const
Definition: Analyzer.h:343
#define CHECK_GE(x, y)
Definition: Logger.h:302
int visitUOper(const Analyzer::UOper *uoper) const override
Definition: sqldefs.h:48
void * visitConstant(const Analyzer::Constant *constant) const override
size_t getArity() const
Definition: Analyzer.h:1544
void * visit(const Analyzer::Expr *expr) const
size_t getLiteralsArity() const
Definition: Analyzer.h:1546
const StringDictionaryProxy::IdMap * getStringProxyTranslationMap(const int source_dict_id, const int dest_dict_id, const RowSetMemoryOwner::StringTranslationType translation_type, const std::vector< StringOps_Namespace::StringOpInfo > &string_op_infos, std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner, const bool with_generation) const
Definition: Execute.cpp:565
LiteralArgMap getLiteralArgs() const
Definition: Analyzer.cpp:4062
TransientStringLiteralsVisitor(StringDictionaryProxy *sdp, Executor *executor)
std::pair< std::string, bool > apply_string_op_to_literals(const StringOpInfo &string_op_info)
Definition: StringOps.cpp:905
int32_t getOrAddTransient(const std::string &)
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:83
std::string * stringval
Definition: Datum.h:77
Expression class for string functions The &quot;arg&quot; constructor parameter must be an expression that reso...
Definition: Analyzer.h:1475
SqlStringOpKind get_kind() const
Definition: Analyzer.h:1542
const Expr * get_operand() const
Definition: Analyzer.h:380
Datum get_constval() const
Definition: Analyzer.h:344
int visitStringOper(const Analyzer::StringOper *string_oper) const override
HOST DEVICE int get_comp_param() const
Definition: sqltypes.h:389
int32_t getDictId() const noexcept
#define CHECK(condition)
Definition: Logger.h:289
bool is_string() const
Definition: sqltypes.h:576
size_t getNonLiteralsArity() const
Definition: Analyzer.h:1556
std::vector< std::shared_ptr< Analyzer::Expr > > getChainedStringOpExprs() const
Definition: Analyzer.h:1570
const Expr * getArg(const size_t i) const
Definition: Analyzer.h:1558
SQLOps get_optype() const
Definition: Analyzer.h:379