OmniSciDB  c1a53651b2
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
StringOpsIR.cpp File Reference
#include "CodeGenerator.h"
#include "Execute.h"
#include "../Shared/funcannotations.h"
#include "../Shared/sqldefs.h"
#include "Parser/ParserNode.h"
#include "QueryEngine/ExpressionRewrite.h"
#include "StringOps/StringOps.h"
#include <boost/locale/conversion.hpp>
+ Include dependency graph for StringOpsIR.cpp:

Go to the source code of this file.

Namespaces

 anonymous_namespace{StringOpsIR.cpp}
 

Macros

#define DEF_APPLY_NUMERIC_STRING_OPS(value_type, value_name)
 
#define DEF_CONVERT_TO_STRING_AND_ENCODE(value_type, value_name)
 

Functions

RUNTIME_EXPORT StringView string_decode (int8_t *chunk_iter_, int64_t pos)
 
RUNTIME_EXPORT StringView string_decompress (const int32_t string_id, const int64_t string_dict_handle)
 
RUNTIME_EXPORT int32_t string_compress (const StringView string_view, const int64_t string_dict_handle)
 
RUNTIME_EXPORT int32_t apply_string_ops_and_encode (const char *str_ptr, const int32_t str_len, const int64_t string_ops_handle, const int64_t string_dict_handle)
 
RUNTIME_EXPORT int32_t apply_multi_input_string_ops_and_encode (const char *str1_ptr, const int32_t str1_len, const char *str2_ptr, const int32_t str2_len, const int64_t string_ops_handle, const int64_t string_dict_handle)
 
RUNTIME_EXPORT int32_t intersect_translate_string_id_to_other_dict (const int32_t string_id, const int64_t source_string_dict_handle, const int64_t dest_string_dict_handle)
 
RUNTIME_EXPORT int32_t union_translate_string_id_to_other_dict (const int32_t string_id, const int64_t source_string_dict_handle, const int64_t dest_string_dict_handle)
 
int32_t write_string_to_proxy (const std::string &str, const int64_t string_dict_handle)
 
RUNTIME_EXPORT ALWAYS_INLINE
int32_t 
convert_to_string_and_encode_bool (const int8_t operand, const int64_t string_dict_handle)
 
RUNTIME_EXPORT ALWAYS_INLINE
int32_t 
convert_to_string_and_encode_decimal (const int64_t operand, const int32_t precision, const int32_t scale, const int64_t string_dict_handle)
 
RUNTIME_EXPORT ALWAYS_INLINE
int32_t 
convert_to_string_and_encode_time (const int64_t operand, const int64_t string_dict_handle)
 
RUNTIME_EXPORT ALWAYS_INLINE
int32_t 
convert_to_string_and_encode_timestamp (const int64_t operand, const int32_t dimension, const int64_t string_dict_handle)
 
RUNTIME_EXPORT ALWAYS_INLINE
int32_t 
convert_to_string_and_encode_date (const int64_t operand, const int64_t string_dict_handle)
 
std::vector
< StringOps_Namespace::StringOpInfo
getStringOpInfos (const Analyzer::StringOper *expr)
 
std::unique_ptr
< StringDictionaryTranslationMgr
translate_dict_strings (const Analyzer::StringOper *expr, const ExecutorDeviceType device_type, Executor *executor)
 
void pre_translate_string_ops (const Analyzer::StringOper *string_oper, Executor *executor)
 
std::vector< int32_t > anonymous_namespace{StringOpsIR.cpp}::get_compared_ids (const StringDictionaryProxy *dict, const SQLOps compare_operator, const std::string &pattern)
 

Macro Definition Documentation

#define DEF_APPLY_NUMERIC_STRING_OPS (   value_type,
  value_name 
)
Value:
extern "C" RUNTIME_EXPORT ALWAYS_INLINE value_type \
apply_numeric_string_ops_##value_name( \
const char* str_ptr, const int32_t str_len, const int64_t string_ops_handle) { \
const std::string_view raw_str(str_ptr, str_len); \
auto string_ops = \
reinterpret_cast<const StringOps_Namespace::StringOps*>(string_ops_handle); \
const auto result_datum = string_ops->numericEval(raw_str); \
return result_datum.value_name##val; \
}
#define RUNTIME_EXPORT
#define ALWAYS_INLINE

Definition at line 127 of file StringOpsIR.cpp.

#define DEF_CONVERT_TO_STRING_AND_ENCODE (   value_type,
  value_name 
)
Value:
extern "C" RUNTIME_EXPORT ALWAYS_INLINE int32_t \
convert_to_string_and_encode_##value_name(const value_type operand, \
const int64_t string_dict_handle) { \
return write_string_to_proxy(std::to_string(operand), string_dict_handle); \
}
int32_t write_string_to_proxy(const std::string &str, const int64_t string_dict_handle)
std::string to_string(char const *&&v)
#define RUNTIME_EXPORT
#define ALWAYS_INLINE

Definition at line 157 of file StringOpsIR.cpp.

Function Documentation

RUNTIME_EXPORT int32_t apply_multi_input_string_ops_and_encode ( const char *  str1_ptr,
const int32_t  str1_len,
const char *  str2_ptr,
const int32_t  str2_len,
const int64_t  string_ops_handle,
const int64_t  string_dict_handle 
)

Definition at line 77 of file StringOpsIR.cpp.

References StringDictionaryProxy::getOrAddTransient().

82  {
83  std::string_view raw_str1(str1_ptr, str1_len);
84  std::string_view raw_str2(str2_ptr, str2_len);
85  auto string_ops =
86  reinterpret_cast<const StringOps_Namespace::StringOps*>(string_ops_handle);
87  auto string_dict_proxy = reinterpret_cast<StringDictionaryProxy*>(string_dict_handle);
88  const auto result_str = string_ops->multi_input_eval(raw_str1, raw_str2);
89  if (result_str.empty()) {
90  return inline_int_null_value<int32_t>();
91  }
92  return string_dict_proxy->getOrAddTransient(result_str);
93 }
int32_t getOrAddTransient(const std::string &)

+ Here is the call graph for this function:

RUNTIME_EXPORT int32_t apply_string_ops_and_encode ( const char *  str_ptr,
const int32_t  str_len,
const int64_t  string_ops_handle,
const int64_t  string_dict_handle 
)

Definition at line 61 of file StringOpsIR.cpp.

References StringDictionaryProxy::getOrAddTransient().

64  {
65  std::string raw_str(str_ptr, str_len);
66  auto string_ops =
67  reinterpret_cast<const StringOps_Namespace::StringOps*>(string_ops_handle);
68  auto string_dict_proxy = reinterpret_cast<StringDictionaryProxy*>(string_dict_handle);
69  const auto result_str = string_ops->operator()(raw_str);
70  if (result_str.empty()) {
71  return inline_int_null_value<int32_t>();
72  }
73  return string_dict_proxy->getOrAddTransient(result_str);
74 }
int32_t getOrAddTransient(const std::string &)

+ Here is the call graph for this function:

RUNTIME_EXPORT ALWAYS_INLINE int32_t convert_to_string_and_encode_bool ( const int8_t  operand,
const int64_t  string_dict_handle 
)

Definition at line 174 of file StringOpsIR.cpp.

References write_string_to_proxy().

175  {
176  return write_string_to_proxy(operand == 1 ? "true" : "false", string_dict_handle);
177 }
int32_t write_string_to_proxy(const std::string &str, const int64_t string_dict_handle)

+ Here is the call graph for this function:

RUNTIME_EXPORT ALWAYS_INLINE int32_t convert_to_string_and_encode_date ( const int64_t  operand,
const int64_t  string_dict_handle 
)

Definition at line 211 of file StringOpsIR.cpp.

References shared::formatDate(), and write_string_to_proxy().

212  {
213  constexpr size_t buf_size = 64;
214  char buf[buf_size]; // Hold "2000-03-01 12:34:56.123456789" and large years.
215  shared::formatDate(buf, buf_size, operand);
216  return write_string_to_proxy(buf, string_dict_handle);
217 }
int32_t write_string_to_proxy(const std::string &str, const int64_t string_dict_handle)
size_t formatDate(char *buf, size_t const max, int64_t const unixtime)
Definition: misc.cpp:27

+ Here is the call graph for this function:

RUNTIME_EXPORT ALWAYS_INLINE int32_t convert_to_string_and_encode_decimal ( const int64_t  operand,
const int32_t  precision,
const int32_t  scale,
const int64_t  string_dict_handle 
)

Definition at line 180 of file StringOpsIR.cpp.

References shared::power10inv(), and write_string_to_proxy().

183  {
184  constexpr size_t buf_size = 64;
185  char buf[buf_size]; // Hold "2000-03-01 12:34:56.123456789" and large years.
186  const double v = static_cast<double>(operand) * shared::power10inv(scale);
187  snprintf(buf, buf_size, "%*.*f", precision, scale, v);
188  return write_string_to_proxy(buf, string_dict_handle);
189 }
int32_t write_string_to_proxy(const std::string &str, const int64_t string_dict_handle)
double power10inv(unsigned const x)
Definition: misc.h:282

+ Here is the call graph for this function:

RUNTIME_EXPORT ALWAYS_INLINE int32_t convert_to_string_and_encode_time ( const int64_t  operand,
const int64_t  string_dict_handle 
)

Definition at line 192 of file StringOpsIR.cpp.

References shared::formatHMS(), and write_string_to_proxy().

193  {
194  constexpr size_t buf_size = 64;
195  char buf[buf_size];
196  shared::formatHMS(buf, buf_size, operand);
197  return write_string_to_proxy(buf, string_dict_handle);
198 }
int32_t write_string_to_proxy(const std::string &str, const int64_t string_dict_handle)
size_t formatHMS(char *buf, size_t const max, int64_t const unixtime)
Definition: misc.cpp:96

+ Here is the call graph for this function:

RUNTIME_EXPORT ALWAYS_INLINE int32_t convert_to_string_and_encode_timestamp ( const int64_t  operand,
const int32_t  dimension,
const int64_t  string_dict_handle 
)

Definition at line 201 of file StringOpsIR.cpp.

References shared::formatDateTime(), and write_string_to_proxy().

203  {
204  constexpr size_t buf_size = 64;
205  char buf[buf_size]; // Hold "2000-03-01 12:34:56.123456789" and large years.
206  shared::formatDateTime(buf, buf_size, operand, dimension);
207  return write_string_to_proxy(buf, string_dict_handle);
208 }
int32_t write_string_to_proxy(const std::string &str, const int64_t string_dict_handle)
size_t formatDateTime(char *buf, size_t const max, int64_t const timestamp, int const dimension, bool use_iso_format)
Definition: misc.cpp:45

+ Here is the call graph for this function:

std::vector<StringOps_Namespace::StringOpInfo> getStringOpInfos ( const Analyzer::StringOper expr)

Definition at line 257 of file StringOpsIR.cpp.

References CHECK, and Analyzer::StringOper::getChainedStringOpExprs().

Referenced by CodeGenerator::codegenPerRowStringOper(), and translate_dict_strings().

258  {
259  std::vector<StringOps_Namespace::StringOpInfo> string_op_infos;
260  auto chained_string_op_exprs = expr->getChainedStringOpExprs();
261  if (chained_string_op_exprs.empty()) {
262  // Likely will change the below to a CHECK but until we have more confidence
263  // that all potential query patterns have nodes that might contain string ops folded,
264  // leaving as an error for now
265  throw std::runtime_error(
266  "Expected folded string operator but found operator unfolded.");
267  }
268  // Consider encapsulating below in an Analyzer::StringOper method to dedup
269  for (const auto& chained_string_op_expr : chained_string_op_exprs) {
270  auto chained_string_op =
271  dynamic_cast<const Analyzer::StringOper*>(chained_string_op_expr.get());
272  CHECK(chained_string_op);
273  StringOps_Namespace::StringOpInfo string_op_info(chained_string_op->get_kind(),
274  chained_string_op->get_type_info(),
275  chained_string_op->getLiteralArgs());
276  string_op_infos.emplace_back(string_op_info);
277  }
278  return string_op_infos;
279 }
Expression class for string functions The &quot;arg&quot; constructor parameter must be an expression that reso...
Definition: Analyzer.h:1479
#define CHECK(condition)
Definition: Logger.h:291
std::vector< std::shared_ptr< Analyzer::Expr > > getChainedStringOpExprs() const
Definition: Analyzer.h:1574

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

RUNTIME_EXPORT int32_t intersect_translate_string_id_to_other_dict ( const int32_t  string_id,
const int64_t  source_string_dict_handle,
const int64_t  dest_string_dict_handle 
)

Definition at line 96 of file StringOpsIR.cpp.

References StringDictionaryProxy::getString().

98  {
99  const auto source_string_dict_proxy =
100  reinterpret_cast<StringDictionaryProxy*>(source_string_dict_handle);
101  auto dest_string_dict_proxy =
102  reinterpret_cast<StringDictionaryProxy*>(dest_string_dict_handle);
103  // Can we have StringDictionaryProxy::getString return a reference?
104  const auto source_str = source_string_dict_proxy->getString(string_id);
105  if (source_str.empty()) {
106  return inline_int_null_value<int32_t>();
107  }
108  return dest_string_dict_proxy->getIdOfString(source_str);
109 }
std::string getString(int32_t string_id) const

+ Here is the call graph for this function:

void pre_translate_string_ops ( const Analyzer::StringOper string_oper,
Executor executor 
)

Definition at line 606 of file StringOpsIR.cpp.

References CHECK, CHECK_GT, CHECK_NE, CPU, Analyzer::Expr::get_type_info(), Analyzer::StringOper::getArg(), Analyzer::StringOper::getArity(), TRANSIENT_DICT_ID, and translate_dict_strings().

Referenced by CodeGenerator::codegenDictLike(), and CodeGenerator::codegenDictRegexp().

607  {
608  // If here we are operating on top of one or more string functions, i.e. LOWER(str),
609  // and before running the dictionary LIKE/ILIKE or REGEXP_LIKE,
610  // we need to translate the strings first.
611 
612  // This approach is a temporary solution until we can implement the next stage
613  // of the string translation project, which will broaden the StringOper class to include
614  // operations that operate on strings but do not neccessarily return strings like
615  // LIKE/ILIKE/REGEXP_LIKE/CHAR_LENGTH At this point these aforementioned operators,
616  // including LIKE/ILIKE, will just become part of a StringOps chain (which will also
617  // avoid the overhead of serializing the transformed raw strings from previous string
618  // opers to the dictionary to only read back out and perform LIKE/ILIKE.)
619  CHECK_GT(string_oper->getArity(), 0UL);
620  const auto& string_oper_primary_arg_ti = string_oper->getArg(0)->get_type_info();
621  CHECK(string_oper_primary_arg_ti.is_dict_encoded_string());
622  CHECK_NE(string_oper_primary_arg_ti.getStringDictKey().dict_id, TRANSIENT_DICT_ID);
623  // Note the actual translation below will be cached by RowSetMemOwner
624  translate_dict_strings(string_oper, ExecutorDeviceType::CPU, executor);
625 }
std::unique_ptr< StringDictionaryTranslationMgr > translate_dict_strings(const Analyzer::StringOper *expr, const ExecutorDeviceType device_type, Executor *executor)
size_t getArity() const
Definition: Analyzer.h:1548
#define TRANSIENT_DICT_ID
Definition: DbObjectKeys.h:24
#define CHECK_GT(x, y)
Definition: Logger.h:305
#define CHECK_NE(x, y)
Definition: Logger.h:302
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:79
#define CHECK(condition)
Definition: Logger.h:291
const Expr * getArg(const size_t i) const
Definition: Analyzer.h:1562

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

RUNTIME_EXPORT int32_t string_compress ( const StringView  string_view,
const int64_t  string_dict_handle 
)

Definition at line 50 of file StringOpsIR.cpp.

References StringDictionaryProxy::getOrAddTransient(), and StringView::stringView().

51  {
52  std::string_view const sv = string_view.stringView();
53  if (sv.empty()) {
54  return inline_int_null_value<int32_t>();
55  }
56  auto string_dict_proxy = reinterpret_cast<StringDictionaryProxy*>(string_dict_handle);
57  return string_dict_proxy->getOrAddTransient(sv);
58 }
std::string_view stringView() const
Definition: Datum.h:44
int32_t getOrAddTransient(const std::string &)

+ Here is the call graph for this function:

RUNTIME_EXPORT StringView string_decode ( int8_t *  chunk_iter_,
int64_t  pos 
)

Definition at line 28 of file StringOpsIR.cpp.

References CHECK, ChunkIter_get_nth(), VarlenDatum::is_null, VarlenDatum::length, and VarlenDatum::pointer.

28  {
29  auto chunk_iter = reinterpret_cast<ChunkIter*>(chunk_iter_);
30  VarlenDatum vd;
31  bool is_end;
32  ChunkIter_get_nth(chunk_iter, pos, false, &vd, &is_end);
33  CHECK(!is_end);
34  return vd.is_null ? StringView{nullptr, 0u}
35  : StringView{reinterpret_cast<char const*>(vd.pointer), vd.length};
36 }
bool is_null
Definition: Datum.h:55
DEVICE void ChunkIter_get_nth(ChunkIter *it, int n, bool uncompress, VarlenDatum *result, bool *is_end)
Definition: ChunkIter.cpp:182
int8_t * pointer
Definition: Datum.h:54
#define CHECK(condition)
Definition: Logger.h:291
size_t length
Definition: Datum.h:53

+ Here is the call graph for this function:

RUNTIME_EXPORT StringView string_decompress ( const int32_t  string_id,
const int64_t  string_dict_handle 
)

Definition at line 38 of file StringOpsIR.cpp.

References CHECK, StringDictionaryProxy::getStringBytes(), and NULL_INT.

39  {
40  if (string_id == NULL_INT) {
41  return {nullptr, 0};
42  }
43  auto string_dict_proxy =
44  reinterpret_cast<const StringDictionaryProxy*>(string_dict_handle);
45  auto string_bytes = string_dict_proxy->getStringBytes(string_id);
46  CHECK(string_bytes.first);
47  return {string_bytes.first, string_bytes.second};
48 }
std::pair< const char *, size_t > getStringBytes(int32_t string_id) const noexcept
#define NULL_INT
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the call graph for this function:

std::unique_ptr<StringDictionaryTranslationMgr> translate_dict_strings ( const Analyzer::StringOper expr,
const ExecutorDeviceType  device_type,
Executor executor 
)

Definition at line 447 of file StringOpsIR.cpp.

References CHECK, Data_Namespace::CPU_LEVEL, Analyzer::Expr::get_type_info(), Analyzer::StringOper::getArg(), SQLTypeInfo::getStringDictKey(), getStringOpInfos(), GPU, and Data_Namespace::GPU_LEVEL.

Referenced by CodeGenerator::codegen(), and pre_translate_string_ops().

450  {
451  const auto& expr_ti = expr->get_type_info();
452  const auto& primary_input_expr_ti = expr->getArg(0)->get_type_info();
453  const auto& dict_id = primary_input_expr_ti.getStringDictKey();
454  const auto string_op_infos = getStringOpInfos(expr);
455  CHECK(string_op_infos.size());
456 
457  if (string_op_infos.back().getReturnType().is_dict_encoded_string()) {
458  // string->string translation
459  auto string_dictionary_translation_mgr =
460  std::make_unique<StringDictionaryTranslationMgr>(
461  dict_id,
462  dict_id,
463  false, // translate_intersection_only
464  expr_ti,
465  string_op_infos,
468  executor->deviceCount(device_type),
469  executor,
470  executor->getDataMgr(),
471  false /* delay_translation */);
472  return string_dictionary_translation_mgr;
473  } else {
474  // string->numeric translation
475  auto string_dictionary_translation_mgr =
476  std::make_unique<StringDictionaryTranslationMgr>(
477  dict_id,
478  expr_ti,
479  string_op_infos,
482  executor->deviceCount(device_type),
483  executor,
484  executor->getDataMgr(),
485  false /* delay_translation */);
486  return string_dictionary_translation_mgr;
487  }
488 }
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:79
std::vector< StringOps_Namespace::StringOpInfo > getStringOpInfos(const Analyzer::StringOper *expr)
#define CHECK(condition)
Definition: Logger.h:291
const Expr * getArg(const size_t i) const
Definition: Analyzer.h:1562
const shared::StringDictKey & getStringDictKey() const
Definition: sqltypes.h:1021

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

RUNTIME_EXPORT int32_t union_translate_string_id_to_other_dict ( const int32_t  string_id,
const int64_t  source_string_dict_handle,
const int64_t  dest_string_dict_handle 
)

Definition at line 112 of file StringOpsIR.cpp.

References StringDictionaryProxy::getString().

114  {
115  const auto source_string_dict_proxy =
116  reinterpret_cast<StringDictionaryProxy*>(source_string_dict_handle);
117  auto dest_string_dict_proxy =
118  reinterpret_cast<StringDictionaryProxy*>(dest_string_dict_handle);
119  // Can we have StringDictionaryProxy::getString return a reference?
120  const auto source_str = source_string_dict_proxy->getString(string_id);
121  if (source_str.empty()) {
122  return inline_int_null_value<int32_t>();
123  }
124  return dest_string_dict_proxy->getOrAddTransient(source_str);
125 }
std::string getString(int32_t string_id) const

+ Here is the call graph for this function:

int32_t write_string_to_proxy ( const std::string &  str,
const int64_t  string_dict_handle 
)
inline

Definition at line 148 of file StringOpsIR.cpp.

References StringDictionaryProxy::getOrAddTransient().

Referenced by convert_to_string_and_encode_bool(), convert_to_string_and_encode_date(), convert_to_string_and_encode_decimal(), convert_to_string_and_encode_time(), and convert_to_string_and_encode_timestamp().

149  {
150  if (str.empty()) {
151  return inline_int_null_value<int32_t>();
152  }
153  auto string_dict_proxy = reinterpret_cast<StringDictionaryProxy*>(string_dict_handle);
154  return string_dict_proxy->getOrAddTransient(str);
155 }
int32_t getOrAddTransient(const std::string &)

+ Here is the call graph for this function:

+ Here is the caller graph for this function: