OmniSciDB  72c90bc290
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
TransientStringLiteralsVisitor.h
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
19 #include "Logger/Logger.h"
21 #include "Shared/DbObjectKeys.h"
22 #include "StringOps/StringOps.h"
23 
25  public:
27  : sdp_(sdp), executor_(executor) {
28  CHECK(sdp);
29  }
30 
31  void* visitConstant(const Analyzer::Constant* constant) const override {
32  if (constant->get_type_info().is_string() && !constant->get_is_null()) {
33  CHECK(constant->get_constval().stringval);
35  }
36  return defaultResult();
37  }
38 
39  // visitUOper is for handling casts between dictionary encoded text
40  // columns that do not share string dictionaries. For these
41  // we need to run the translation again on the aggregator
42  // so that we know how to interpret the transient literals added
43  // by the leaves via string-to-string casts
44 
45  // Todo(todd): It is inefficient to do the same translation on
46  // the aggregator and each of the leaves, explore storing these
47  // translations/literals on the remote dictionary server instead
48  // so the translation happens once and only once
49 
50  void* visitUOper(const Analyzer::UOper* uoper) const override {
51  const auto& uoper_ti = uoper->get_type_info();
52  const auto& operand_ti = uoper->get_operand()->get_type_info();
53  if (!(uoper->get_optype() == kCAST && uoper_ti.is_dict_encoded_string())) {
54  return defaultResult();
55  }
56  const bool outputs_target_sdp = uoper_ti.getStringDictKey() == sdp_->getDictKey();
57 
58  if (!parent_feeds_sdp_ && !outputs_target_sdp) {
59  // If we are not casting to our dictionary (sdp_)
60  return defaultResult();
61  }
62  if (uoper_ti.is_dict_intersection()) {
63  // Intersection translations don't add transients to the dest proxy,
64  // and hence can be ignored for the purposes of populating transients
65  return defaultResult();
66  }
67  const bool parent_feeds_sdp_already_set = parent_feeds_sdp_;
68  parent_feeds_sdp_ = true;
69 
70  visit(uoper->get_operand());
71 
72  if (!parent_feeds_sdp_already_set) {
73  parent_feeds_sdp_ = false;
74  }
75 
76  if (operand_ti.is_dict_encoded_string() &&
77  uoper_ti.getStringDictKey() != operand_ti.getStringDictKey()) {
79  operand_ti.getStringDictKey(),
80  uoper_ti.getStringDictKey(),
82  {},
84  true); // with_generation
85  }
86  return defaultResult();
87  }
88 
89  void* visitStringOper(const Analyzer::StringOper* string_oper) const override {
90  CHECK_GE(string_oper->getArity(), 1UL);
91  const auto str_operand = string_oper->getArg(0);
92  const auto& string_oper_ti = string_oper->get_type_info();
93  const auto& str_operand_ti = str_operand->get_type_info();
94  const auto string_oper_kind = string_oper->get_kind();
95  if (!string_oper_ti.is_string() || !str_operand_ti.is_string()) {
96  return defaultResult();
97  }
98  if (string_oper->getNonLiteralsArity() >= 2UL) {
99  return defaultResult();
100  }
101  const bool parent_feeds_sdp_already_set = parent_feeds_sdp_;
102  const bool outputs_target_sdp =
103  string_oper_ti.getStringDictKey() == sdp_->getDictKey();
104  if (string_oper_ti.is_dict_encoded_string() &&
105  str_operand_ti.is_dict_encoded_string() &&
106  (parent_feeds_sdp_ || outputs_target_sdp)) {
107  parent_feeds_sdp_ = true;
108  visit(str_operand);
109  if (!parent_feeds_sdp_already_set) {
110  parent_feeds_sdp_ = false;
111  }
112  // Todo(todd): Dedup the code to get string_op_infos from the same
113  // in StringOpsIR.cpp (needs thought as Analyzer and StringOps
114  // deliberately are oblivious to each other)
115 
116  std::vector<StringOps_Namespace::StringOpInfo> string_op_infos;
117  const auto chained_string_op_exprs = string_oper->getChainedStringOpExprs();
118  for (const auto& chained_string_op_expr : chained_string_op_exprs) {
119  auto chained_string_op =
120  dynamic_cast<const Analyzer::StringOper*>(chained_string_op_expr.get());
121  CHECK(chained_string_op);
122  StringOps_Namespace::StringOpInfo string_op_info(
123  chained_string_op->get_kind(),
124  chained_string_op->get_type_info(),
125  chained_string_op->getLiteralArgs());
126  string_op_infos.emplace_back(string_op_info);
127  }
128 
130  str_operand_ti.getStringDictKey(),
131  string_oper_ti.getStringDictKey(),
133  string_op_infos,
135  true); // with_generation
136  } else if ((parent_feeds_sdp_ || outputs_target_sdp) &&
137  (string_oper->getLiteralsArity() == string_oper->getArity())) {
138  // This is likely dead code due to ExpressionRewrite of all-literal string ops
139  // (meaning when this visitor gets to a string op with all literal args it
140  // (would have already been rewritten as a literal string)
141  // Todo(todd): Verify and remove if so
142  const StringOps_Namespace::StringOpInfo string_op_info(
143  string_oper_kind, string_oper->get_type_info(), string_oper->getLiteralArgs());
144  CHECK_EQ(string_op_info.numLiterals(), string_oper->getArity());
145  const auto str_result_and_null_status =
147  if (string_oper->get_type_info().is_string() &&
148  !str_result_and_null_status.second &&
149  !str_result_and_null_status.first
150  .empty()) { // Todo(todd): Is there a central/non-magic function/constant
151  // to determine if a none-encoded string is null
152  sdp_->getOrAddTransient(str_result_and_null_status.first);
153  }
154  }
155  return defaultResult();
156  }
157 
158  protected:
159  void* defaultResult() const override { return nullptr; }
160 
161  private:
163  mutable Executor* executor_;
164  mutable bool parent_feeds_sdp_{false};
165 };
166 
167 class TransientDictIdVisitor : public ScalarExprVisitor<shared::StringDictKey> {
168  public:
169  shared::StringDictKey visitUOper(const Analyzer::UOper* uoper) const override {
170  const auto& expr_ti = uoper->get_type_info();
171  if (uoper->get_optype() == kCAST && expr_ti.is_string() &&
172  expr_ti.get_compression() == kENCODING_DICT) {
173  return expr_ti.getStringDictKey();
174  }
175  return defaultResult();
176  }
177 
179  const Analyzer::CaseExpr* case_expr) const override {
180  const auto& expr_ti = case_expr->get_type_info();
181  if (expr_ti.is_string() && expr_ti.get_compression() == kENCODING_DICT) {
182  return expr_ti.getStringDictKey();
183  }
184  return defaultResult();
185  }
186 
188  const Analyzer::StringOper* string_oper) const override {
189  const auto& expr_ti = string_oper->get_type_info();
190  if (expr_ti.is_string() && expr_ti.get_compression() == kENCODING_DICT) {
191  return expr_ti.getStringDictKey();
192  }
193  return defaultResult();
194  }
195 
196  protected:
197  shared::StringDictKey defaultResult() const override { return {}; }
198 };
#define CHECK_EQ(x, y)
Definition: Logger.h:301
const std::shared_ptr< RowSetMemoryOwner > getRowSetMemoryOwner() const
Definition: Execute.cpp:703
void * visitStringOper(const Analyzer::StringOper *string_oper) const override
void * visitUOper(const Analyzer::UOper *uoper) const override
bool get_is_null() const
Definition: Analyzer.h:347
#define CHECK_GE(x, y)
Definition: Logger.h:306
Definition: sqldefs.h:48
void * visitConstant(const Analyzer::Constant *constant) const override
const StringDictionaryProxy::IdMap * getStringProxyTranslationMap(const shared::StringDictKey &source_dict_key, const shared::StringDictKey &dest_dict_key, const RowSetMemoryOwner::StringTranslationType translation_type, const std::vector< StringOps_Namespace::StringOpInfo > &string_op_infos, std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner, const bool with_generation) const
Definition: Execute.cpp:602
size_t getArity() const
Definition: Analyzer.h:1674
void * visit(const Analyzer::Expr *expr) const
shared::StringDictKey defaultResult() const override
size_t getLiteralsArity() const
Definition: Analyzer.h:1676
shared::StringDictKey visitStringOper(const Analyzer::StringOper *string_oper) const override
LiteralArgMap getLiteralArgs() const
Definition: Analyzer.cpp:4281
TransientStringLiteralsVisitor(StringDictionaryProxy *sdp, Executor *executor)
std::pair< std::string, bool > apply_string_op_to_literals(const StringOpInfo &string_op_info)
Definition: StringOps.cpp:1134
shared::StringDictKey visitUOper(const Analyzer::UOper *uoper) const override
int32_t getOrAddTransient(const std::string &)
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:79
std::string * stringval
Definition: Datum.h:79
Expression class for string functions The &quot;arg&quot; constructor parameter must be an expression that reso...
Definition: Analyzer.h:1601
SqlStringOpKind get_kind() const
Definition: Analyzer.h:1672
const Expr * get_operand() const
Definition: Analyzer.h:384
Datum get_constval() const
Definition: Analyzer.h:348
#define CHECK(condition)
Definition: Logger.h:291
bool is_string() const
Definition: sqltypes.h:559
const shared::StringDictKey & getDictKey() const noexcept
size_t getNonLiteralsArity() const
Definition: Analyzer.h:1686
std::vector< std::shared_ptr< Analyzer::Expr > > getChainedStringOpExprs() const
Definition: Analyzer.h:1700
shared::StringDictKey visitCaseExpr(const Analyzer::CaseExpr *case_expr) const override
const Expr * getArg(const size_t i) const
Definition: Analyzer.h:1688
SQLOps get_optype() const
Definition: Analyzer.h:383
const shared::StringDictKey & getStringDictKey() const
Definition: sqltypes.h:1055