OmniSciDB  72c90bc290
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
StringOpsIR.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "CodeGenerator.h"
18 #include "Execute.h"
19 
20 #include "../Shared/funcannotations.h"
21 #include "../Shared/sqldefs.h"
22 #include "Parser/ParserNode.h"
24 #include "StringOps/StringOps.h"
25 
26 #include <boost/locale/conversion.hpp>
27 
28 extern "C" RUNTIME_EXPORT StringView string_decode(int8_t* chunk_iter_, int64_t pos) {
29  auto chunk_iter = reinterpret_cast<ChunkIter*>(chunk_iter_);
30  VarlenDatum vd;
31  bool is_end;
32  ChunkIter_get_nth(chunk_iter, pos, false, &vd, &is_end);
33  CHECK(!is_end);
34  return vd.is_null ? StringView{nullptr, 0u}
35  : StringView{reinterpret_cast<char const*>(vd.pointer), vd.length};
36 }
37 
38 extern "C" RUNTIME_EXPORT StringView string_decompress(const int32_t string_id,
39  const int64_t string_dict_handle) {
40  if (string_id == NULL_INT) {
41  return {nullptr, 0};
42  }
43  auto string_dict_proxy =
44  reinterpret_cast<const StringDictionaryProxy*>(string_dict_handle);
45  auto string_bytes = string_dict_proxy->getStringBytes(string_id);
46  CHECK(string_bytes.first);
47  return {string_bytes.first, string_bytes.second};
48 }
49 
50 extern "C" RUNTIME_EXPORT int32_t string_compress(const StringView string_view,
51  const int64_t string_dict_handle) {
52  std::string_view const sv = string_view.stringView();
53  if (sv.empty()) {
54  return inline_int_null_value<int32_t>();
55  }
56  auto string_dict_proxy = reinterpret_cast<StringDictionaryProxy*>(string_dict_handle);
57  return string_dict_proxy->getOrAddTransient(sv);
58 }
59 
60 extern "C" RUNTIME_EXPORT int32_t
61 apply_string_ops_and_encode(const char* str_ptr,
62  const int32_t str_len,
63  const int64_t string_ops_handle,
64  const int64_t string_dict_handle) {
65  std::string raw_str(str_ptr, str_len);
66  auto string_ops =
67  reinterpret_cast<const StringOps_Namespace::StringOps*>(string_ops_handle);
68  auto string_dict_proxy = reinterpret_cast<StringDictionaryProxy*>(string_dict_handle);
69  const auto result_str = string_ops->operator()(raw_str);
70  if (result_str.empty()) {
71  return inline_int_null_value<int32_t>();
72  }
73  return string_dict_proxy->getOrAddTransient(result_str);
74 }
75 
76 extern "C" RUNTIME_EXPORT int32_t
78  const int32_t str1_len,
79  const char* str2_ptr,
80  const int32_t str2_len,
81  const int64_t string_ops_handle,
82  const int64_t string_dict_handle) {
83  std::string_view raw_str1(str1_ptr, str1_len);
84  std::string_view raw_str2(str2_ptr, str2_len);
85  auto string_ops =
86  reinterpret_cast<const StringOps_Namespace::StringOps*>(string_ops_handle);
87  auto string_dict_proxy = reinterpret_cast<StringDictionaryProxy*>(string_dict_handle);
88  const auto result_str = string_ops->multi_input_eval(raw_str1, raw_str2);
89  if (result_str.empty()) {
90  return inline_int_null_value<int32_t>();
91  }
92  return string_dict_proxy->getOrAddTransient(result_str);
93 }
94 
95 extern "C" RUNTIME_EXPORT int32_t
97  const int64_t source_string_dict_handle,
98  const int64_t dest_string_dict_handle) {
99  const auto source_string_dict_proxy =
100  reinterpret_cast<StringDictionaryProxy*>(source_string_dict_handle);
101  auto dest_string_dict_proxy =
102  reinterpret_cast<StringDictionaryProxy*>(dest_string_dict_handle);
103  // Can we have StringDictionaryProxy::getString return a reference?
104  const auto source_str = source_string_dict_proxy->getString(string_id);
105  if (source_str.empty()) {
106  return inline_int_null_value<int32_t>();
107  }
108  return dest_string_dict_proxy->getIdOfString(source_str);
109 }
110 
111 extern "C" RUNTIME_EXPORT int32_t
113  const int64_t source_string_dict_handle,
114  const int64_t dest_string_dict_handle) {
115  const auto source_string_dict_proxy =
116  reinterpret_cast<StringDictionaryProxy*>(source_string_dict_handle);
117  auto dest_string_dict_proxy =
118  reinterpret_cast<StringDictionaryProxy*>(dest_string_dict_handle);
119  // Can we have StringDictionaryProxy::getString return a reference?
120  const auto source_str = source_string_dict_proxy->getString(string_id);
121  if (source_str.empty()) {
122  return inline_int_null_value<int32_t>();
123  }
124  return dest_string_dict_proxy->getOrAddTransient(source_str);
125 }
126 
127 #define DEF_APPLY_NUMERIC_STRING_OPS(value_type, value_name) \
128  extern "C" RUNTIME_EXPORT ALWAYS_INLINE value_type \
129  apply_numeric_string_ops_##value_name( \
130  const char* str_ptr, const int32_t str_len, const int64_t string_ops_handle) { \
131  const std::string_view raw_str(str_ptr, str_len); \
132  auto string_ops = \
133  reinterpret_cast<const StringOps_Namespace::StringOps*>(string_ops_handle); \
134  const auto result_datum = string_ops->numericEval(raw_str); \
135  return result_datum.value_name##val; \
136  }
137 
138 DEF_APPLY_NUMERIC_STRING_OPS(int8_t, bool)
139 DEF_APPLY_NUMERIC_STRING_OPS(int8_t, tinyint)
140 DEF_APPLY_NUMERIC_STRING_OPS(int16_t, smallint)
141 DEF_APPLY_NUMERIC_STRING_OPS(int32_t, int)
142 DEF_APPLY_NUMERIC_STRING_OPS(int64_t, bigint)
143 DEF_APPLY_NUMERIC_STRING_OPS(float, float)
144 DEF_APPLY_NUMERIC_STRING_OPS(double, double)
145 
146 #undef DEF_APPLY_NUMERIC_STRING_OPS
147 
148 #define DEF_APPLY_MULTI_INPUT_NUMERIC_STRING_OPS(value_type, value_name) \
149  extern "C" RUNTIME_EXPORT ALWAYS_INLINE value_type \
150  apply_multi_input_numeric_string_ops_##value_name( \
151  const char* str1_ptr, \
152  const int32_t str1_len, \
153  const char* str2_ptr, \
154  const int32_t str2_len, \
155  const int64_t string_ops_handle) { \
156  const std::string_view raw_str1(str1_ptr, str1_len); \
157  const std::string_view raw_str2(str2_ptr, str2_len); \
158  auto string_ops = \
159  reinterpret_cast<const StringOps_Namespace::StringOps*>(string_ops_handle); \
160  const auto result_datum = string_ops->numericEval(raw_str1, raw_str2); \
161  return result_datum.value_name##val; \
162  }
163 
165 
166 #undef DEF_APPLY_MULTI_INPUT_NUMERIC_STRING_OPS
167 
168 inline int32_t write_string_to_proxy(const std::string& str,
169  const int64_t string_dict_handle) {
170  if (str.empty()) {
171  return inline_int_null_value<int32_t>();
172  }
173  auto string_dict_proxy = reinterpret_cast<StringDictionaryProxy*>(string_dict_handle);
174  return string_dict_proxy->getOrAddTransient(str);
175 }
176 
177 #define DEF_CONVERT_TO_STRING_AND_ENCODE(value_type, value_name) \
178  extern "C" RUNTIME_EXPORT ALWAYS_INLINE int32_t \
179  convert_to_string_and_encode_##value_name(const value_type operand, \
180  const int64_t string_dict_handle) { \
181  return write_string_to_proxy(std::to_string(operand), string_dict_handle); \
182  }
183 
184 DEF_CONVERT_TO_STRING_AND_ENCODE(int8_t, tinyint)
185 DEF_CONVERT_TO_STRING_AND_ENCODE(int16_t, smallint)
187 DEF_CONVERT_TO_STRING_AND_ENCODE(int64_t, bigint)
189 DEF_CONVERT_TO_STRING_AND_ENCODE(double, double)
190 
191 #undef DEF_CONVERT_TO_STRING_AND_ENCODE
192 
193 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int32_t
195  const int64_t string_dict_handle) {
196  return write_string_to_proxy(operand == 1 ? "true" : "false", string_dict_handle);
197 }
198 
199 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int32_t
201  const int32_t precision,
202  const int32_t scale,
203  const int64_t string_dict_handle) {
204  constexpr size_t buf_size = 64;
205  char buf[buf_size]; // Hold "2000-03-01 12:34:56.123456789" and large years.
206  const double v = static_cast<double>(operand) * shared::power10inv(scale);
207  snprintf(buf, buf_size, "%*.*f", precision, scale, v);
208  return write_string_to_proxy(buf, string_dict_handle);
209 }
210 
211 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int32_t
212 convert_to_string_and_encode_time(const int64_t operand,
213  const int64_t string_dict_handle) {
214  constexpr size_t buf_size = 64;
215  char buf[buf_size];
216  shared::formatHMS(buf, buf_size, operand);
217  return write_string_to_proxy(buf, string_dict_handle);
218 }
219 
220 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int32_t
222  const int32_t dimension,
223  const int64_t string_dict_handle) {
224  constexpr size_t buf_size = 64;
225  char buf[buf_size]; // Hold "2000-03-01 12:34:56.123456789" and large years.
226  shared::formatDateTime(buf, buf_size, operand, dimension);
227  return write_string_to_proxy(buf, string_dict_handle);
228 }
229 
230 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int32_t
231 convert_to_string_and_encode_date(const int64_t operand,
232  const int64_t string_dict_handle) {
233  constexpr size_t buf_size = 64;
234  char buf[buf_size]; // Hold "2000-03-01 12:34:56.123456789" and large years.
235  shared::formatDate(buf, buf_size, operand);
236  return write_string_to_proxy(buf, string_dict_handle);
237 }
238 
240  const CompilationOptions& co) {
242  auto str_lv = codegen(expr->get_arg(), true, co);
243  if (str_lv.size() != 3) {
244  CHECK_EQ(size_t(1), str_lv.size());
245  str_lv.push_back(cgen_state_->ir_builder_.CreateExtractValue(str_lv.front(), 0));
246  str_lv.push_back(cgen_state_->ir_builder_.CreateExtractValue(str_lv.front(), 1));
247  str_lv.back() = cgen_state_->ir_builder_.CreateTrunc(
248  str_lv.back(), llvm::Type::getInt32Ty(cgen_state_->context_));
250  throw QueryMustRunOnCpu();
251  }
252  }
253  std::vector<llvm::Value*> charlength_args{str_lv[1], str_lv[2]};
254  std::string fn_name("char_length");
255  if (expr->get_calc_encoded_length()) {
256  fn_name += "_encoded";
257  }
258  const bool is_nullable{!expr->get_arg()->get_type_info().get_notnull()};
259  if (is_nullable) {
260  fn_name += "_nullable";
261  charlength_args.push_back(cgen_state_->inlineIntNull(expr->get_type_info()));
262  }
263  return expr->get_calc_encoded_length()
265  fn_name, get_int_type(32, cgen_state_->context_), charlength_args)
266  : cgen_state_->emitCall(fn_name, charlength_args);
267 }
268 
270  const CompilationOptions& co) {
272  auto str_lv = codegen(expr->get_arg(), true, co);
273  CHECK_EQ(size_t(1), str_lv.size());
274  return cgen_state_->emitCall("key_for_string_encoded", str_lv);
275 }
276 
277 std::vector<StringOps_Namespace::StringOpInfo> getStringOpInfos(
278  const Analyzer::StringOper* expr) {
279  std::vector<StringOps_Namespace::StringOpInfo> string_op_infos;
280  auto chained_string_op_exprs = expr->getChainedStringOpExprs();
281  if (chained_string_op_exprs.empty()) {
282  // Likely will change the below to a CHECK but until we have more confidence
283  // that all potential query patterns have nodes that might contain string ops folded,
284  // leaving as an error for now
285  throw std::runtime_error(
286  "Expected folded string operator but found operator unfolded.");
287  }
288  // Consider encapsulating below in an Analyzer::StringOper method to dedup
289  for (const auto& chained_string_op_expr : chained_string_op_exprs) {
290  auto chained_string_op =
291  dynamic_cast<const Analyzer::StringOper*>(chained_string_op_expr.get());
292  CHECK(chained_string_op);
293  StringOps_Namespace::StringOpInfo string_op_info(chained_string_op->get_kind(),
294  chained_string_op->get_type_info(),
295  chained_string_op->getLiteralArgs());
296  string_op_infos.emplace_back(string_op_info);
297  }
298  return string_op_infos;
299 }
300 
301 std::pair<std::vector<llvm::Value*>, std::unique_ptr<CodeGenerator::NullCheckCodegen>>
303  const CompilationOptions& co,
304  const size_t arg_idx,
305  const bool codegen_nullcheck) {
306  CHECK_LT(arg_idx, expr->getArity());
307  const auto& arg_ti = expr->getArg(arg_idx)->get_type_info();
308 
309  auto primary_str_lv = codegen(expr->getArg(arg_idx), true, co);
310  std::unique_ptr<CodeGenerator::NullCheckCodegen> nullcheck_codegen;
311  if (primary_str_lv.size() != 3 && arg_ti.is_dict_encoded_string()) {
312  // If this is the case we should have a transient dictionary from a previous op
313  // We can't use the dictionary values without decoding as this op occurs directly
314  // inline on top of whatever operation created the transient dictionary
315  CHECK_EQ(size_t(1), primary_str_lv.size());
316  const bool is_nullable = !arg_ti.get_notnull();
317  if (codegen_nullcheck && is_nullable) {
318  const auto decoded_input_ti = SQLTypeInfo(kTEXT, is_nullable, kENCODING_DICT);
319  nullcheck_codegen = std::make_unique<CodeGenerator::NullCheckCodegen>(
320  cgen_state_,
321  executor_,
322  primary_str_lv[0],
323  decoded_input_ti,
324  "transient_dict_per_row_nullcheck");
325  }
326  const auto sdp_ptr = reinterpret_cast<int64_t>(executor()->getStringDictionaryProxy(
327  arg_ti.getStringDictKey(), executor()->getRowSetMemoryOwner(), true));
328  const auto string_view =
329  cgen_state_->emitExternalCall("string_decompress",
331  {primary_str_lv[0], cgen_state_->llInt(sdp_ptr)});
332  primary_str_lv.push_back(cgen_state_->ir_builder_.CreateExtractValue(string_view, 0));
333  primary_str_lv.push_back(cgen_state_->ir_builder_.CreateExtractValue(string_view, 1));
334  primary_str_lv.back() = cgen_state_->ir_builder_.CreateTrunc(
335  primary_str_lv.back(), llvm::Type::getInt32Ty(cgen_state_->context_));
336  } else if (primary_str_lv.size() == 1 and arg_ti.is_none_encoded_string()) {
337  // real (not dictionary-encoded) strings
338  CHECK(primary_str_lv[0]->getType()->isPointerTy());
339  const auto none_enc_string = cgen_state_->ir_builder_.CreateLoad(
340  primary_str_lv[0]->getType()->getPointerElementType(), primary_str_lv[0]);
341  primary_str_lv.push_back(
342  cgen_state_->ir_builder_.CreateExtractValue(none_enc_string, 0));
343  primary_str_lv.push_back(cgen_state_->ir_builder_.CreateTrunc(
344  cgen_state_->ir_builder_.CreateExtractValue(none_enc_string, 1),
345  llvm::Type::getInt32Ty(cgen_state_->context_)));
346  }
347  CHECK_EQ(size_t(3), primary_str_lv.size());
348  return std::make_pair(primary_str_lv, std::move(nullcheck_codegen));
349 }
350 
352  const CompilationOptions& co) {
354  CHECK_GE(expr->getArity(), 1UL);
355  const auto non_literals_arity = expr->getNonLiteralsArity();
356  CHECK_GE(non_literals_arity, 1UL);
357  CHECK_LE(non_literals_arity, 2UL);
358  const auto& return_ti = expr->get_type_info();
359  if (g_cluster && return_ti.is_dict_encoded_string()) {
360  throw std::runtime_error(
361  "Cast from none-encoded string to dictionary-encoded not supported for "
362  "distributed queries");
363  }
365  throw QueryMustRunOnCpu();
366  }
367  const auto [primary_str_lv, nullcheck_codegen] =
368  codegenStringFetchAndEncode(expr, co, 0UL, false);
369  CHECK_EQ(size_t(3), primary_str_lv.size());
370 
371  const auto string_op_infos = getStringOpInfos(expr);
372  CHECK(string_op_infos.size());
373 
374  const auto string_ops =
375  executor()->getRowSetMemoryOwner()->getStringOps(string_op_infos);
376  const int64_t string_ops_handle = reinterpret_cast<int64_t>(string_ops);
377  auto string_ops_handle_lv = cgen_state_->llInt(string_ops_handle);
378 
379  if (!return_ti.is_string()) {
380  CHECK_GE(non_literals_arity, 1UL);
381  CHECK_LE(non_literals_arity, 2UL);
382  std::vector<llvm::Value*> string_oper_lvs;
383  if (non_literals_arity == 1UL) {
384  string_oper_lvs = {primary_str_lv[1], primary_str_lv[2], string_ops_handle_lv};
385  } else {
386  const auto [secondary_str_lv, secondary_nullcheck_codegen] =
387  codegenStringFetchAndEncode(expr, co, 1UL, false);
388  CHECK_EQ(size_t(3), secondary_str_lv.size());
389  string_oper_lvs = {primary_str_lv[1],
390  primary_str_lv[2],
391  secondary_str_lv[1],
392  secondary_str_lv[2],
393  string_ops_handle_lv};
394  }
395  const auto return_type = return_ti.get_type();
396  std::string fn_call = non_literals_arity == 1UL
397  ? "apply_numeric_string_ops_"
398  : "apply_multi_input_numeric_string_ops_";
399  switch (return_type) {
400  case kBOOLEAN: {
401  fn_call += "bool";
402  break;
403  }
404  case kTINYINT:
405  case kSMALLINT:
406  case kINT:
407  case kBIGINT:
408  case kFLOAT:
409  case kDOUBLE: {
410  fn_call += to_lower(toString(return_type));
411  break;
412  }
413  case kNUMERIC:
414  case kDECIMAL:
415  case kTIME:
416  case kTIMESTAMP:
417  case kDATE: {
418  fn_call += "bigint";
419  break;
420  }
421  default: {
422  throw std::runtime_error("Unimplemented type for string-to-numeric translation");
423  }
424  }
425  const auto logical_size = return_ti.get_logical_size() * 8;
426  auto llvm_return_type = return_ti.is_fp()
427  ? get_fp_type(logical_size, cgen_state_->context_)
428  : get_int_type(logical_size, cgen_state_->context_);
429  auto ret = cgen_state_->emitExternalCall(fn_call, llvm_return_type, string_oper_lvs);
430  if (nullcheck_codegen) {
431  ret = nullcheck_codegen->finalize(cgen_state_->inlineNull(return_ti), ret);
432  }
433  return ret;
434  }
435 
436  // If here we are outputing a string dictionary column
437  CHECK(return_ti.is_dict_encoded_string());
438  const int64_t dest_string_proxy_handle =
439  reinterpret_cast<int64_t>(executor()->getStringDictionaryProxy(
440  return_ti.getStringDictKey(), executor()->getRowSetMemoryOwner(), true));
441  auto dest_string_proxy_handle_lv = cgen_state_->llInt(dest_string_proxy_handle);
442  if (non_literals_arity == 1UL) {
443  std::vector<llvm::Value*> string_oper_lvs{primary_str_lv[1],
444  primary_str_lv[2],
445  string_ops_handle_lv,
446  dest_string_proxy_handle_lv};
447 
448  auto ret = cgen_state_->emitExternalCall("apply_string_ops_and_encode",
450  string_oper_lvs);
451  if (nullcheck_codegen) {
452  ret = nullcheck_codegen->finalize(cgen_state_->inlineNull(return_ti), ret);
453  }
454  return ret;
455  } else {
456  // For now only CONCAT is supported, which takes up to 2 non-literal string
457  // arguments. In the future (likely when we can codegen the StringOps to enable
458  // generic, multi-branch execution rather than linear chains of functors as we do
459  // today), we will generalize this to functions that take
460  // any number of string and numeric non-literal arguments, in which case
461  // we will need to make apply_multi_input_string_ops_and_encode take
462  // a vector of arguments. For now, however, expecting exactly 2 arguments
463  // suffices.
464  CHECK_EQ(non_literals_arity, 2UL);
467  const auto [secondary_str_lv, secondary_nullcheck_codegen] =
468  codegenStringFetchAndEncode(expr, co, 1UL, false);
469  CHECK_EQ(size_t(3), secondary_str_lv.size());
470  std::vector<llvm::Value*> string_oper_lvs{primary_str_lv[1],
471  primary_str_lv[2],
472  secondary_str_lv[1],
473  secondary_str_lv[2],
474  string_ops_handle_lv,
475  dest_string_proxy_handle_lv};
476  auto ret = cgen_state_->emitExternalCall("apply_multi_input_string_ops_and_encode",
478  string_oper_lvs);
479  if (secondary_nullcheck_codegen) {
480  ret =
481  secondary_nullcheck_codegen->finalize(cgen_state_->inlineNull(return_ti), ret);
482  }
483  if (nullcheck_codegen) {
484  ret = nullcheck_codegen->finalize(cgen_state_->inlineNull(return_ti), ret);
485  }
486  return ret;
487  }
488 }
489 
490 std::unique_ptr<StringDictionaryTranslationMgr> translate_dict_strings(
491  const Analyzer::StringOper* expr,
492  const ExecutorDeviceType device_type,
493  Executor* executor) {
494  const auto& expr_ti = expr->get_type_info();
495  const auto& primary_input_expr_ti = expr->getArg(0)->get_type_info();
496  const auto& dict_id = primary_input_expr_ti.getStringDictKey();
497  const auto string_op_infos = getStringOpInfos(expr);
498  CHECK(string_op_infos.size());
499 
500  if (string_op_infos.back().getReturnType().is_dict_encoded_string()) {
501  // string->string translation
502  auto string_dictionary_translation_mgr =
503  std::make_unique<StringDictionaryTranslationMgr>(
504  dict_id,
505  dict_id,
506  false, // translate_intersection_only
507  expr_ti,
508  string_op_infos,
511  executor->deviceCount(device_type),
512  executor,
513  executor->getDataMgr(),
514  false /* delay_translation */);
515  return string_dictionary_translation_mgr;
516  } else {
517  // string->numeric translation
518  auto string_dictionary_translation_mgr =
519  std::make_unique<StringDictionaryTranslationMgr>(
520  dict_id,
521  expr_ti,
522  string_op_infos,
525  executor->deviceCount(device_type),
526  executor,
527  executor->getDataMgr(),
528  false /* delay_translation */);
529  return string_dictionary_translation_mgr;
530  }
531 }
532 
534  const CompilationOptions& co) {
535  CHECK_GE(expr->getArity(), 1UL);
536  if (expr->requiresPerRowTranslation()) {
537  return codegenPerRowStringOper(expr, co);
538  }
539 
541 
542  auto string_dictionary_translation_mgr =
544 
545  auto str_id_lv = codegen(expr->getArg(0), true, co);
546  CHECK_EQ(size_t(1), str_id_lv.size());
547  const auto& expr_ti = expr->get_type_info();
548 
549  return cgen_state_
550  ->moveStringDictionaryTranslationMgr(std::move(string_dictionary_translation_mgr))
551  ->codegen(str_id_lv[0], expr_ti, true /* add_nullcheck */, co);
552 }
553 
554 // Method below is for join probes, as we cast the StringOper nodes to ColumnVars early to
555 // not special case that codepath (but retain the StringOpInfos, which we use here to
556 // execute the same string ops as we would on a native StringOper node)
558  const Analyzer::ColumnVar* expr,
559  const std::vector<StringOps_Namespace::StringOpInfo>& string_op_infos,
560  const CompilationOptions& co) {
562  const auto& expr_ti = expr->get_type_info();
563  const auto& dict_id = expr_ti.getStringDictKey();
564 
565  auto string_dictionary_translation_mgr =
566  std::make_unique<StringDictionaryTranslationMgr>(
567  dict_id,
568  dict_id,
569  false, // translate_intersection_only
570  expr->get_type_info(),
571  string_op_infos,
574  executor()->deviceCount(co.device_type),
575  executor(),
576  executor()->getDataMgr(),
577  false /* delay_translation */);
578 
579  auto str_id_lv = codegen(expr, true /* fetch_column */, co);
580  CHECK_EQ(size_t(1), str_id_lv.size());
581 
582  return cgen_state_
583  ->moveStringDictionaryTranslationMgr(std::move(string_dictionary_translation_mgr))
584  ->codegen(str_id_lv[0], expr_ti, true /* add_nullcheck */, co);
585 }
586 
588  const CompilationOptions& co) {
590  if (is_unnest(extract_cast_arg(expr->get_arg()))) {
591  throw std::runtime_error("LIKE not supported for unnested expressions");
592  }
593  char escape_char{'\\'};
594  if (expr->get_escape_expr()) {
595  auto escape_char_expr =
596  dynamic_cast<const Analyzer::Constant*>(expr->get_escape_expr());
597  CHECK(escape_char_expr);
598  CHECK(escape_char_expr->get_type_info().is_string());
599  CHECK_EQ(size_t(1), escape_char_expr->get_constval().stringval->size());
600  escape_char = (*escape_char_expr->get_constval().stringval)[0];
601  }
602  auto pattern = dynamic_cast<const Analyzer::Constant*>(expr->get_like_expr());
603  CHECK(pattern);
604  auto fast_dict_like_lv = codegenDictLike(expr->get_own_arg(),
605  pattern,
606  expr->get_is_ilike(),
607  expr->get_is_simple(),
608  escape_char,
609  co);
610  if (fast_dict_like_lv) {
611  return fast_dict_like_lv;
612  }
613  const auto& ti = expr->get_arg()->get_type_info();
614  CHECK(ti.is_string());
615  if (g_enable_watchdog && ti.get_compression() != kENCODING_NONE) {
616  throw WatchdogException(
617  "Cannot do LIKE / ILIKE on this dictionary encoded column, its cardinality is "
618  "too high");
619  }
620  auto str_lv = codegen(expr->get_arg(), true, co);
621  if (str_lv.size() != 3) {
622  CHECK_EQ(size_t(1), str_lv.size());
623  str_lv.push_back(cgen_state_->ir_builder_.CreateExtractValue(str_lv.front(), 0));
624  str_lv.push_back(cgen_state_->ir_builder_.CreateExtractValue(str_lv.front(), 1));
625  str_lv.back() = cgen_state_->ir_builder_.CreateTrunc(
626  str_lv.back(), llvm::Type::getInt32Ty(cgen_state_->context_));
628  throw QueryMustRunOnCpu();
629  }
630  }
631  auto like_expr_arg_lvs = codegen(expr->get_like_expr(), true, co);
632  CHECK_EQ(size_t(3), like_expr_arg_lvs.size());
633  const bool is_nullable{!expr->get_arg()->get_type_info().get_notnull()};
634  std::vector<llvm::Value*> str_like_args{
635  str_lv[1], str_lv[2], like_expr_arg_lvs[1], like_expr_arg_lvs[2]};
636  std::string fn_name{expr->get_is_ilike() ? "string_ilike" : "string_like"};
637  if (expr->get_is_simple()) {
638  fn_name += "_simple";
639  } else {
640  str_like_args.push_back(cgen_state_->llInt(int8_t(escape_char)));
641  }
642  if (is_nullable) {
643  fn_name += "_nullable";
644  str_like_args.push_back(cgen_state_->inlineIntNull(expr->get_type_info()));
645  }
646  return cgen_state_->emitCall(fn_name, str_like_args);
647 }
648 
650  Executor* executor) {
651  // If here we are operating on top of one or more string functions, i.e. LOWER(str),
652  // and before running the dictionary LIKE/ILIKE or REGEXP_LIKE,
653  // we need to translate the strings first.
654 
655  // This approach is a temporary solution until we can implement the next stage
656  // of the string translation project, which will broaden the StringOper class to include
657  // operations that operate on strings but do not neccessarily return strings like
658  // LIKE/ILIKE/REGEXP_LIKE/CHAR_LENGTH At this point these aforementioned operators,
659  // including LIKE/ILIKE, will just become part of a StringOps chain (which will also
660  // avoid the overhead of serializing the transformed raw strings from previous string
661  // opers to the dictionary to only read back out and perform LIKE/ILIKE.)
662  CHECK_GT(string_oper->getArity(), 0UL);
663  const auto& string_oper_primary_arg_ti = string_oper->getArg(0)->get_type_info();
664  CHECK(string_oper_primary_arg_ti.is_dict_encoded_string());
665  CHECK_NE(string_oper_primary_arg_ti.getStringDictKey().dict_id, TRANSIENT_DICT_ID);
666  // Note the actual translation below will be cached by RowSetMemOwner
667  translate_dict_strings(string_oper, ExecutorDeviceType::CPU, executor);
668 }
669 
671  const std::shared_ptr<Analyzer::Expr> like_arg,
672  const Analyzer::Constant* pattern,
673  const bool ilike,
674  const bool is_simple,
675  const char escape_char,
676  const CompilationOptions& co) {
678  const auto cast_oper = std::dynamic_pointer_cast<Analyzer::UOper>(like_arg);
679  if (!cast_oper) {
680  return nullptr;
681  }
682  CHECK(cast_oper);
683  CHECK_EQ(kCAST, cast_oper->get_optype());
684  const auto dict_like_arg = cast_oper->get_own_operand();
685  const auto& dict_like_arg_ti = dict_like_arg->get_type_info();
686  if (!dict_like_arg_ti.is_string()) {
687  throw(std::runtime_error("Cast from " + dict_like_arg_ti.get_type_name() + " to " +
688  cast_oper->get_type_info().get_type_name() +
689  " not supported"));
690  }
691  CHECK_EQ(kENCODING_DICT, dict_like_arg_ti.get_compression());
692  const auto sdp = executor()->getStringDictionaryProxy(
693  dict_like_arg_ti.getStringDictKey(), executor()->getRowSetMemoryOwner(), true);
694  if (sdp->storageEntryCount() > 200000000) {
695  return nullptr;
696  }
697  if (sdp->getDictKey().isTransientDict()) {
698  // If we have a literal dictionary it was a product
699  // of string ops applied to none-encoded strings, and
700  // will not be populated at codegen-time, so we
701  // cannot use the fast path
702 
703  // Todo(todd): Once string ops support non-string producting
704  // operators (like like/ilike), like/ilike can be chained and
705  // we can avoid the string translation
706  return nullptr;
707  }
708  const auto string_oper = dynamic_cast<const Analyzer::StringOper*>(dict_like_arg.get());
709  if (string_oper) {
710  pre_translate_string_ops(string_oper, executor());
711  }
712  const auto& pattern_ti = pattern->get_type_info();
713  CHECK(pattern_ti.is_string());
714  CHECK_EQ(kENCODING_NONE, pattern_ti.get_compression());
715  const auto& pattern_datum = pattern->get_constval();
716  const auto& pattern_str = *pattern_datum.stringval;
717  const auto matching_ids = sdp->getLike(pattern_str, ilike, is_simple, escape_char);
718  // InIntegerSet requires 64-bit values
719  std::vector<int64_t> matching_ids_64(matching_ids.size());
720  std::copy(matching_ids.begin(), matching_ids.end(), matching_ids_64.begin());
721  const auto in_values = std::make_shared<Analyzer::InIntegerSet>(
722  dict_like_arg, matching_ids_64, dict_like_arg_ti.get_notnull());
723  return codegen(in_values.get(), co);
724 }
725 
726 namespace {
727 
728 std::vector<int32_t> get_compared_ids(const StringDictionaryProxy* dict,
729  const SQLOps compare_operator,
730  const std::string& pattern) {
731  std::vector<int> ret;
732  switch (compare_operator) {
733  case kLT:
734  ret = dict->getCompare(pattern, "<");
735  break;
736  case kLE:
737  ret = dict->getCompare(pattern, "<=");
738  break;
739  case kEQ:
740  case kBW_EQ:
741  ret = dict->getCompare(pattern, "=");
742  break;
743  case kGT:
744  ret = dict->getCompare(pattern, ">");
745  break;
746  case kGE:
747  ret = dict->getCompare(pattern, ">=");
748  break;
749  case kNE:
750  ret = dict->getCompare(pattern, "<>");
751  break;
752  default:
753  std::runtime_error("unsuported operator for string comparision");
754  }
755  return ret;
756 }
757 } // namespace
758 
759 llvm::Value* CodeGenerator::codegenDictStrCmp(const std::shared_ptr<Analyzer::Expr> lhs,
760  const std::shared_ptr<Analyzer::Expr> rhs,
761  const SQLOps compare_operator,
762  const CompilationOptions& co) {
764  auto rhs_cast_oper = std::dynamic_pointer_cast<const Analyzer::UOper>(rhs);
765  auto lhs_cast_oper = std::dynamic_pointer_cast<const Analyzer::UOper>(lhs);
766  auto rhs_col_var = std::dynamic_pointer_cast<const Analyzer::ColumnVar>(rhs);
767  auto lhs_col_var = std::dynamic_pointer_cast<const Analyzer::ColumnVar>(lhs);
768  std::shared_ptr<const Analyzer::UOper> cast_oper;
769  std::shared_ptr<const Analyzer::ColumnVar> col_var;
770  auto compare_opr = compare_operator;
771  if (lhs_col_var && rhs_col_var) {
772  if (lhs_col_var->get_type_info().getStringDictKey() ==
773  rhs_col_var->get_type_info().getStringDictKey()) {
774  if (compare_operator == kEQ || compare_operator == kNE) {
775  // TODO (vraj): implement compare between two dictionary encoded columns which
776  // share a dictionary
777  return nullptr;
778  }
779  }
780  // TODO (vraj): implement compare between two dictionary encoded columns which don't
781  // shared dictionary
782  throw std::runtime_error("Decoding two Dictionary encoded columns will be slow");
783  } else if (lhs_col_var && rhs_cast_oper) {
784  cast_oper.swap(rhs_cast_oper);
785  col_var.swap(lhs_col_var);
786  } else if (lhs_cast_oper && rhs_col_var) {
787  cast_oper.swap(lhs_cast_oper);
788  col_var.swap(rhs_col_var);
789  switch (compare_operator) {
790  case kLT:
791  compare_opr = kGT;
792  break;
793  case kLE:
794  compare_opr = kGE;
795  break;
796  case kGT:
797  compare_opr = kLT;
798  break;
799  case kGE:
800  compare_opr = kLE;
801  default:
802  break;
803  }
804  }
805  if (!cast_oper || !col_var) {
806  return nullptr;
807  }
808  CHECK_EQ(kCAST, cast_oper->get_optype());
809 
810  const auto const_expr =
811  dynamic_cast<Analyzer::Constant*>(cast_oper->get_own_operand().get());
812  if (!const_expr) {
813  // Analyzer casts dictionary encoded columns to none encoded if there is a comparison
814  // between two encoded columns. Which we currently do not handle.
815  return nullptr;
816  }
817  const auto& const_val = const_expr->get_constval();
818 
819  const auto col_ti = col_var->get_type_info();
820  CHECK(col_ti.is_string());
821  CHECK_EQ(kENCODING_DICT, col_ti.get_compression());
822  const auto sdp = executor()->getStringDictionaryProxy(
823  col_ti.getStringDictKey(), executor()->getRowSetMemoryOwner(), true);
824 
825  if (sdp->storageEntryCount() > 200000000) {
826  std::runtime_error("Cardinality for string dictionary is too high");
827  return nullptr;
828  }
829 
830  const auto& pattern_str = *const_val.stringval;
831  const auto matching_ids = get_compared_ids(sdp, compare_opr, pattern_str);
832 
833  // InIntegerSet requires 64-bit values
834  std::vector<int64_t> matching_ids_64(matching_ids.size());
835  std::copy(matching_ids.begin(), matching_ids.end(), matching_ids_64.begin());
836 
837  const auto in_values = std::make_shared<Analyzer::InIntegerSet>(
838  col_var, matching_ids_64, col_ti.get_notnull());
839  return codegen(in_values.get(), co);
840 }
841 
843  const CompilationOptions& co) {
845  if (is_unnest(extract_cast_arg(expr->get_arg()))) {
846  throw std::runtime_error("REGEXP not supported for unnested expressions");
847  }
848  char escape_char{'\\'};
849  if (expr->get_escape_expr()) {
850  auto escape_char_expr =
851  dynamic_cast<const Analyzer::Constant*>(expr->get_escape_expr());
852  CHECK(escape_char_expr);
853  CHECK(escape_char_expr->get_type_info().is_string());
854  CHECK_EQ(size_t(1), escape_char_expr->get_constval().stringval->size());
855  escape_char = (*escape_char_expr->get_constval().stringval)[0];
856  }
857  auto pattern = dynamic_cast<const Analyzer::Constant*>(expr->get_pattern_expr());
858  CHECK(pattern);
859  auto fast_dict_pattern_lv =
860  codegenDictRegexp(expr->get_own_arg(), pattern, escape_char, co);
861  if (fast_dict_pattern_lv) {
862  return fast_dict_pattern_lv;
863  }
864  const auto& ti = expr->get_arg()->get_type_info();
865  CHECK(ti.is_string());
866  if (g_enable_watchdog && ti.get_compression() != kENCODING_NONE) {
867  throw WatchdogException(
868  "Cannot do REGEXP_LIKE on this dictionary encoded column, its cardinality is too "
869  "high");
870  }
871  // Now we know we are working on NONE ENCODED column. So switch back to CPU
873  throw QueryMustRunOnCpu();
874  }
875  auto str_lv = codegen(expr->get_arg(), true, co);
876  if (str_lv.size() != 3) {
877  CHECK_EQ(size_t(1), str_lv.size());
878  str_lv.push_back(cgen_state_->ir_builder_.CreateExtractValue(str_lv.front(), 0));
879  str_lv.push_back(cgen_state_->ir_builder_.CreateExtractValue(str_lv.front(), 1));
880  str_lv.back() = cgen_state_->ir_builder_.CreateTrunc(
881  str_lv.back(), llvm::Type::getInt32Ty(cgen_state_->context_));
882  }
883  auto regexp_expr_arg_lvs = codegen(expr->get_pattern_expr(), true, co);
884  CHECK_EQ(size_t(3), regexp_expr_arg_lvs.size());
885  const bool is_nullable{!expr->get_arg()->get_type_info().get_notnull()};
886  std::vector<llvm::Value*> regexp_args{
887  str_lv[1], str_lv[2], regexp_expr_arg_lvs[1], regexp_expr_arg_lvs[2]};
888  std::string fn_name("regexp_like");
889  regexp_args.push_back(cgen_state_->llInt(int8_t(escape_char)));
890  if (is_nullable) {
891  fn_name += "_nullable";
892  regexp_args.push_back(cgen_state_->inlineIntNull(expr->get_type_info()));
894  fn_name, get_int_type(8, cgen_state_->context_), regexp_args);
895  }
897  fn_name, get_int_type(1, cgen_state_->context_), regexp_args);
898 }
899 
901  const std::shared_ptr<Analyzer::Expr> pattern_arg,
902  const Analyzer::Constant* pattern,
903  const char escape_char,
904  const CompilationOptions& co) {
906  const auto cast_oper = std::dynamic_pointer_cast<Analyzer::UOper>(pattern_arg);
907  if (!cast_oper) {
908  return nullptr;
909  }
910  CHECK(cast_oper);
911  CHECK_EQ(kCAST, cast_oper->get_optype());
912  const auto dict_regexp_arg = cast_oper->get_own_operand();
913  const auto& dict_regexp_arg_ti = dict_regexp_arg->get_type_info();
914  CHECK(dict_regexp_arg_ti.is_string());
915  CHECK_EQ(kENCODING_DICT, dict_regexp_arg_ti.get_compression());
916  const auto& dict_key = dict_regexp_arg_ti.getStringDictKey();
917  const auto sdp = executor()->getStringDictionaryProxy(
918  dict_key, executor()->getRowSetMemoryOwner(), true);
919  if (sdp->storageEntryCount() > 15000000) {
920  return nullptr;
921  }
922  if (sdp->getDictKey().isTransientDict()) {
923  // If we have a literal dictionary it was a product
924  // of string ops applied to none-encoded strings, and
925  // will not be populated at codegen-time, so we
926  // cannot use the fast path
927 
928  // Todo(todd): Once string ops support non-string producting
929  // operators (like regexp_like), these operators can be chained
930  // and we can avoid the string translation
931  return nullptr;
932  }
933  const auto string_oper =
934  dynamic_cast<const Analyzer::StringOper*>(dict_regexp_arg.get());
935  if (string_oper) {
936  pre_translate_string_ops(string_oper, executor());
937  }
938  const auto& pattern_ti = pattern->get_type_info();
939  CHECK(pattern_ti.is_string());
940  CHECK_EQ(kENCODING_NONE, pattern_ti.get_compression());
941  const auto& pattern_datum = pattern->get_constval();
942  const auto& pattern_str = *pattern_datum.stringval;
943  const auto matching_ids = sdp->getRegexpLike(pattern_str, escape_char);
944  // InIntegerSet requires 64-bit values
945  std::vector<int64_t> matching_ids_64(matching_ids.size());
946  std::copy(matching_ids.begin(), matching_ids.end(), matching_ids_64.begin());
947  const auto in_values = std::make_shared<Analyzer::InIntegerSet>(
948  dict_regexp_arg, matching_ids_64, dict_regexp_arg_ti.get_notnull());
949  return codegen(in_values.get(), co);
950 }
std::string to_lower(const std::string &str)
#define CHECK_EQ(x, y)
Definition: Logger.h:301
std::pair< const char *, size_t > getStringBytes(int32_t string_id) const noexcept
llvm::Value * codegenPerRowStringOper(const Analyzer::StringOper *string_oper, const CompilationOptions &co)
const std::shared_ptr< Analyzer::Expr > get_own_arg() const
Definition: Analyzer.h:1134
RUNTIME_EXPORT int32_t union_translate_string_id_to_other_dict(const int32_t string_id, const int64_t source_string_dict_handle, const int64_t dest_string_dict_handle)
std::vector< int32_t > get_compared_ids(const StringDictionaryProxy *dict, const SQLOps compare_operator, const std::string &pattern)
const Expr * get_escape_expr() const
Definition: Analyzer.h:1064
#define DEF_APPLY_NUMERIC_STRING_OPS(value_type, value_name)
Definition: sqltypes.h:76
__device__ StringView string_decode(int8_t *chunk_iter_, int64_t pos)
CgenState * cgen_state_
bool is_null
Definition: Datum.h:57
std::unique_ptr< StringDictionaryTranslationMgr > translate_dict_strings(const Analyzer::StringOper *expr, const ExecutorDeviceType device_type, Executor *executor)
void pre_translate_string_ops(const Analyzer::StringOper *string_oper, Executor *executor)
const Expr * get_escape_expr() const
Definition: Analyzer.h:1136
SQLOps
Definition: sqldefs.h:28
Definition: sqldefs.h:34
llvm::IRBuilder ir_builder_
Definition: CgenState.h:384
Definition: sqldefs.h:35
#define CHECK_GE(x, y)
Definition: Logger.h:306
Definition: sqldefs.h:48
llvm::Value * codegenPseudoStringOper(const Analyzer::ColumnVar *, const std::vector< StringOps_Namespace::StringOpInfo > &string_op_infos, const CompilationOptions &)
Definition: sqldefs.h:29
llvm::Type * get_fp_type(const int width, llvm::LLVMContext &context)
const Expr * get_arg() const
Definition: Analyzer.h:1133
size_t getArity() const
Definition: Analyzer.h:1674
const Analyzer::Expr * extract_cast_arg(const Analyzer::Expr *expr)
Definition: Execute.h:222
std::string getString(int32_t string_id) const
bool requiresPerRowTranslation() const
Definition: Analyzer.h:1704
std::string toString(const QueryDescriptionType &type)
Definition: Types.h:64
int32_t write_string_to_proxy(const std::string &str, const int64_t string_dict_handle)
#define TRANSIENT_DICT_ID
Definition: DbObjectKeys.h:24
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
#define CHECK_GT(x, y)
Definition: Logger.h:305
DEVICE void ChunkIter_get_nth(ChunkIter *it, int n, bool uncompress, VarlenDatum *result, bool *is_end)
Definition: ChunkIter.cpp:182
const Expr * get_arg() const
Definition: Analyzer.h:1061
RUNTIME_EXPORT int32_t apply_multi_input_string_ops_and_encode(const char *str1_ptr, const int32_t str1_len, const char *str2_ptr, const int32_t str2_len, const int64_t string_ops_handle, const int64_t string_dict_handle)
Definition: StringOpsIR.cpp:77
ExecutorDeviceType
size_t formatHMS(char *buf, size_t const max, int64_t const unixtime)
Definition: misc.cpp:96
#define DEF_CONVERT_TO_STRING_AND_ENCODE(value_type, value_name)
int8_t * pointer
Definition: Datum.h:56
#define NULL_INT
std::vector< int32_t > getCompare(const std::string &pattern, const std::string &comp_operator) const
bool get_calc_encoded_length() const
Definition: Analyzer.h:870
llvm::LLVMContext & context_
Definition: CgenState.h:382
llvm::Value * emitExternalCall(const std::string &fname, llvm::Type *ret_type, const std::vector< llvm::Value * > args, const std::vector< llvm::Attribute::AttrKind > &fnattrs={}, const bool has_struct_return=false)
Definition: CgenState.cpp:395
Classes representing a parse tree.
RUNTIME_EXPORT ALWAYS_INLINE int32_t convert_to_string_and_encode_timestamp(const int64_t operand, const int32_t dimension, const int64_t string_dict_handle)
DEVICE auto copy(ARGS &&...args)
Definition: gpu_enabled.h:51
#define CHECK_NE(x, y)
Definition: Logger.h:302
llvm::ConstantInt * inlineIntNull(const SQLTypeInfo &)
Definition: CgenState.cpp:65
Executor * executor_
RUNTIME_EXPORT ALWAYS_INLINE int32_t convert_to_string_and_encode_date(const int64_t operand, const int64_t string_dict_handle)
bool g_enable_watchdog
bool get_is_simple() const
Definition: Analyzer.h:1066
std::string_view stringView() const
Definition: Datum.h:44
llvm::Value * codegenDictStrCmp(const std::shared_ptr< Analyzer::Expr >, const std::shared_ptr< Analyzer::Expr >, const SQLOps, const CompilationOptions &co)
llvm::Value * codegenDictRegexp(const std::shared_ptr< Analyzer::Expr > arg, const Analyzer::Constant *pattern, const char escape_char, const CompilationOptions &)
#define AUTOMATIC_IR_METADATA(CGENSTATE)
double power10inv(unsigned const x)
Definition: misc.h:282
int32_t getOrAddTransient(const std::string &)
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:79
llvm::Value * emitCall(const std::string &fname, const std::vector< llvm::Value * > &args)
Definition: CgenState.cpp:217
#define DEF_APPLY_MULTI_INPUT_NUMERIC_STRING_OPS(value_type, value_name)
std::string * stringval
Definition: Datum.h:79
llvm::Constant * inlineNull(const SQLTypeInfo &)
Definition: CgenState.cpp:116
ExecutorDeviceType device_type
#define RUNTIME_EXPORT
std::vector< StringOps_Namespace::StringOpInfo > getStringOpInfos(const Analyzer::StringOper *expr)
Definition: sqldefs.h:33
size_t formatDate(char *buf, size_t const max, int64_t const unixtime)
Definition: misc.cpp:27
RUNTIME_EXPORT ALWAYS_INLINE int32_t convert_to_string_and_encode_decimal(const int64_t operand, const int32_t precision, const int32_t scale, const int64_t string_dict_handle)
std::vector< llvm::Value * > codegen(const Analyzer::Expr *, const bool fetch_columns, const CompilationOptions &)
Definition: IRCodegen.cpp:30
const Expr * get_pattern_expr() const
Definition: Analyzer.h:1135
#define CHECK_LT(x, y)
Definition: Logger.h:303
Definition: sqltypes.h:79
Definition: sqltypes.h:80
Expression class for string functions The &quot;arg&quot; constructor parameter must be an expression that reso...
Definition: Analyzer.h:1601
#define CHECK_LE(x, y)
Definition: Logger.h:304
RUNTIME_EXPORT int32_t intersect_translate_string_id_to_other_dict(const int32_t string_id, const int64_t source_string_dict_handle, const int64_t dest_string_dict_handle)
Definition: StringOpsIR.cpp:96
RUNTIME_EXPORT int32_t apply_string_ops_and_encode(const char *str_ptr, const int32_t str_len, const int64_t string_ops_handle, const int64_t string_dict_handle)
Definition: StringOpsIR.cpp:61
SqlStringOpKind get_kind() const
Definition: Analyzer.h:1672
const Expr * get_like_expr() const
Definition: Analyzer.h:1063
Datum get_constval() const
Definition: Analyzer.h:348
Definition: sqldefs.h:31
const Expr * get_arg() const
Definition: Analyzer.h:868
llvm::StructType * createStringViewStructType()
size_t formatDateTime(char *buf, size_t const max, int64_t const timestamp, int const dimension, bool use_iso_format)
Definition: misc.cpp:45
std::pair< std::vector< llvm::Value * >, std::unique_ptr< CodeGenerator::NullCheckCodegen > > codegenStringFetchAndEncode(const Analyzer::StringOper *expr, const CompilationOptions &co, const size_t arg_idx, const bool codegen_nullcheck)
RUNTIME_EXPORT ALWAYS_INLINE int32_t convert_to_string_and_encode_time(const int64_t operand, const int64_t string_dict_handle)
RUNTIME_EXPORT ALWAYS_INLINE int32_t convert_to_string_and_encode_bool(const int8_t operand, const int64_t string_dict_handle)
const Expr * get_arg() const
Definition: Analyzer.h:917
llvm::ConstantInt * llInt(const T v) const
Definition: CgenState.h:249
#define CHECK(condition)
Definition: Logger.h:291
Definition: sqldefs.h:30
bool g_cluster
Definition: sqldefs.h:32
const StringDictionaryTranslationMgr * moveStringDictionaryTranslationMgr(std::unique_ptr< const StringDictionaryTranslationMgr > &&str_dict_translation_mgr)
Definition: CgenState.h:199
Definition: sqltypes.h:72
const std::shared_ptr< Analyzer::Expr > get_own_arg() const
Definition: Analyzer.h:1062
bool is_unnest(const Analyzer::Expr *expr)
Definition: Execute.h:1694
HOST DEVICE bool get_notnull() const
Definition: sqltypes.h:398
llvm::Value * codegen(llvm::Value *str_id_input, const SQLTypeInfo &input_ti, const bool add_nullcheck, const CompilationOptions &co) const
#define ALWAYS_INLINE
size_t getNonLiteralsArity() const
Definition: Analyzer.h:1686
std::vector< std::shared_ptr< Analyzer::Expr > > getChainedStringOpExprs() const
Definition: Analyzer.h:1700
const Expr * getArg(const size_t i) const
Definition: Analyzer.h:1688
RUNTIME_EXPORT int32_t string_compress(const StringView string_view, const int64_t string_dict_handle)
Definition: StringOpsIR.cpp:50
bool get_is_ilike() const
Definition: Analyzer.h:1065
const shared::StringDictKey & getStringDictKey() const
Definition: sqltypes.h:1055
llvm::Value * codegenDictLike(const std::shared_ptr< Analyzer::Expr > arg, const Analyzer::Constant *pattern, const bool ilike, const bool is_simple, const char escape_char, const CompilationOptions &)
size_t length
Definition: Datum.h:55
Executor * executor() const
RUNTIME_EXPORT StringView string_decompress(const int32_t string_id, const int64_t string_dict_handle)
Definition: StringOpsIR.cpp:38