OmniSciDB  cde582ebc3
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
StringOpsIR.cpp File Reference
#include "CodeGenerator.h"
#include "Execute.h"
#include "../Shared/funcannotations.h"
#include "../Shared/sqldefs.h"
#include "Parser/ParserNode.h"
#include "QueryEngine/ExpressionRewrite.h"
#include "StringOps/StringOps.h"
#include <boost/locale/conversion.hpp>
+ Include dependency graph for StringOpsIR.cpp:

Go to the source code of this file.

Namespaces

 anonymous_namespace{StringOpsIR.cpp}
 

Functions

RUNTIME_EXPORT uint64_t string_decode (int8_t *chunk_iter_, int64_t pos)
 
RUNTIME_EXPORT uint64_t string_decompress (const int32_t string_id, const int64_t string_dict_handle)
 
RUNTIME_EXPORT int32_t string_compress (const int64_t ptr_and_len, const int64_t string_dict_handle)
 
RUNTIME_EXPORT int32_t apply_string_ops_and_encode (const char *str_ptr, const int32_t str_len, const int64_t string_ops_handle, const int64_t string_dict_handle)
 
RUNTIME_EXPORT int32_t intersect_translate_string_id_to_other_dict (const int32_t string_id, const int64_t source_string_dict_handle, const int64_t dest_string_dict_handle)
 
RUNTIME_EXPORT int32_t union_translate_string_id_to_other_dict (const int32_t string_id, const int64_t source_string_dict_handle, const int64_t dest_string_dict_handle)
 
std::vector
< StringOps_Namespace::StringOpInfo
getStringOpInfos (const Analyzer::StringOper *expr)
 
std::unique_ptr
< StringDictionaryTranslationMgr
translate_dict_strings (const Analyzer::StringOper *expr, const ExecutorDeviceType device_type, Executor *executor)
 
void pre_translate_string_ops (const Analyzer::StringOper *string_oper, Executor *executor)
 
std::vector< int32_t > anonymous_namespace{StringOpsIR.cpp}::get_compared_ids (const StringDictionaryProxy *dict, const SQLOps compare_operator, const std::string &pattern)
 

Function Documentation

RUNTIME_EXPORT int32_t apply_string_ops_and_encode ( const char *  str_ptr,
const int32_t  str_len,
const int64_t  string_ops_handle,
const int64_t  string_dict_handle 
)

Definition at line 64 of file StringOpsIR.cpp.

References StringDictionaryProxy::getOrAddTransient().

67  {
68  std::string raw_str(str_ptr, str_len);
69  auto string_ops =
70  reinterpret_cast<const StringOps_Namespace::StringOps*>(string_ops_handle);
71  auto string_dict_proxy = reinterpret_cast<StringDictionaryProxy*>(string_dict_handle);
72  const auto result_str = string_ops->operator()(raw_str);
73  if (result_str.empty()) {
74  return inline_int_null_value<int32_t>();
75  }
76  return string_dict_proxy->getOrAddTransient(result_str);
77 }
int32_t getOrAddTransient(const std::string &str)

+ Here is the call graph for this function:

std::vector<StringOps_Namespace::StringOpInfo> getStringOpInfos ( const Analyzer::StringOper expr)

Definition at line 147 of file StringOpsIR.cpp.

References CHECK, and Analyzer::StringOper::getChainedStringOpExprs().

Referenced by CodeGenerator::codegenPerRowStringOper(), and translate_dict_strings().

148  {
149  std::vector<StringOps_Namespace::StringOpInfo> string_op_infos;
150  auto chained_string_op_exprs = expr->getChainedStringOpExprs();
151  if (chained_string_op_exprs.empty()) {
152  // Likely will change the below to a CHECK but until we have more confidence
153  // that all potential query patterns have nodes that might contain string ops folded,
154  // leaving as an error for now
155  throw std::runtime_error(
156  "Expected folded string operator but found operator unfolded.");
157  }
158  // Consider encapsulating below in an Analyzer::StringOper method to dedup
159  for (const auto& chained_string_op_expr : chained_string_op_exprs) {
160  auto chained_string_op =
161  dynamic_cast<const Analyzer::StringOper*>(chained_string_op_expr.get());
162  CHECK(chained_string_op);
163  StringOps_Namespace::StringOpInfo string_op_info(chained_string_op->get_kind(),
164  chained_string_op->getLiteralArgs());
165  string_op_infos.emplace_back(string_op_info);
166  }
167  return string_op_infos;
168 }
Expression class for string functions The &quot;arg&quot; constructor parameter must be an expression that reso...
Definition: Analyzer.h:1463
#define CHECK(condition)
Definition: Logger.h:222
std::vector< std::shared_ptr< Analyzer::Expr > > getChainedStringOpExprs() const
Definition: Analyzer.h:1530

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

RUNTIME_EXPORT int32_t intersect_translate_string_id_to_other_dict ( const int32_t  string_id,
const int64_t  source_string_dict_handle,
const int64_t  dest_string_dict_handle 
)

Definition at line 80 of file StringOpsIR.cpp.

References StringDictionaryProxy::getString().

82  {
83  const auto source_string_dict_proxy =
84  reinterpret_cast<StringDictionaryProxy*>(source_string_dict_handle);
85  auto dest_string_dict_proxy =
86  reinterpret_cast<StringDictionaryProxy*>(dest_string_dict_handle);
87  // Can we have StringDictionaryProxy::getString return a reference?
88  const auto source_str = source_string_dict_proxy->getString(string_id);
89  if (source_str.empty()) {
90  return inline_int_null_value<int32_t>();
91  }
92  return dest_string_dict_proxy->getIdOfString(source_str);
93 }
std::string getString(int32_t string_id) const

+ Here is the call graph for this function:

void pre_translate_string_ops ( const Analyzer::StringOper string_oper,
Executor executor 
)

Definition at line 348 of file StringOpsIR.cpp.

References CHECK, CHECK_GT, CHECK_NE, CPU, Analyzer::Expr::get_type_info(), Analyzer::StringOper::getArg(), Analyzer::StringOper::getArity(), TRANSIENT_DICT_ID, and translate_dict_strings().

Referenced by CodeGenerator::codegenDictLike(), and CodeGenerator::codegenDictRegexp().

349  {
350  // If here we are operating on top of one or more string functions, i.e. LOWER(str),
351  // and before running the dictionary LIKE/ILIKE or REGEXP_LIKE,
352  // we need to translate the strings first.
353 
354  // This approach is a temporary solution until we can implement the next stage
355  // of the string translation project, which will broaden the StringOper class to include
356  // operations that operate on strings but do not neccessarily return strings like
357  // LIKE/ILIKE/REGEXP_LIKE/CHAR_LENGTH At this point these aforementioned operators,
358  // including LIKE/ILIKE, will just become part of a StringOps chain (which will also
359  // avoid the overhead of serializing the transformed raw strings from previous string
360  // opers to the dictionary to only read back out and perform LIKE/ILIKE.)
361  CHECK_GT(string_oper->getArity(), 0UL);
362  const auto& string_oper_primary_arg_ti = string_oper->getArg(0)->get_type_info();
363  CHECK(string_oper_primary_arg_ti.is_dict_encoded_string());
364  CHECK_NE(string_oper_primary_arg_ti.get_comp_param(), TRANSIENT_DICT_ID);
365  // Note the actual translation below will be cached by RowSetMemOwner
366  translate_dict_strings(string_oper, ExecutorDeviceType::CPU, executor);
367 }
std::unique_ptr< StringDictionaryTranslationMgr > translate_dict_strings(const Analyzer::StringOper *expr, const ExecutorDeviceType device_type, Executor *executor)
size_t getArity() const
Definition: Analyzer.h:1506
#define CHECK_GT(x, y)
Definition: Logger.h:234
#define CHECK_NE(x, y)
Definition: Logger.h:231
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:81
#define TRANSIENT_DICT_ID
Definition: sqltypes.h:259
#define CHECK(condition)
Definition: Logger.h:222
const Expr * getArg(const size_t i) const
Definition: Analyzer.h:1518

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

RUNTIME_EXPORT int32_t string_compress ( const int64_t  ptr_and_len,
const int64_t  string_dict_handle 
)

Definition at line 52 of file StringOpsIR.cpp.

References extract_str_len_noinline(), extract_str_ptr_noinline(), and StringDictionaryProxy::getOrAddTransient().

53  {
54  std::string raw_str(reinterpret_cast<char*>(extract_str_ptr_noinline(ptr_and_len)),
55  extract_str_len_noinline(ptr_and_len));
56  if (raw_str.empty()) {
57  return inline_int_null_value<int32_t>();
58  }
59  auto string_dict_proxy = reinterpret_cast<StringDictionaryProxy*>(string_dict_handle);
60  return string_dict_proxy->getOrAddTransient(raw_str);
61 }
RUNTIME_EXPORT NEVER_INLINE int32_t extract_str_len_noinline(const uint64_t str_and_len)
RUNTIME_EXPORT NEVER_INLINE int8_t * extract_str_ptr_noinline(const uint64_t str_and_len)
int32_t getOrAddTransient(const std::string &str)

+ Here is the call graph for this function:

RUNTIME_EXPORT uint64_t string_decode ( int8_t *  chunk_iter_,
int64_t  pos 
)

Definition at line 28 of file StringOpsIR.cpp.

References CHECK, ChunkIter_get_nth(), VarlenDatum::is_null, VarlenDatum::length, and VarlenDatum::pointer.

28  {
29  auto chunk_iter = reinterpret_cast<ChunkIter*>(chunk_iter_);
30  VarlenDatum vd;
31  bool is_end;
32  ChunkIter_get_nth(chunk_iter, pos, false, &vd, &is_end);
33  CHECK(!is_end);
34  return vd.is_null ? 0
35  : (reinterpret_cast<uint64_t>(vd.pointer) & 0xffffffffffff) |
36  (static_cast<uint64_t>(vd.length) << 48);
37 }
bool is_null
Definition: sqltypes.h:153
DEVICE void ChunkIter_get_nth(ChunkIter *it, int n, bool uncompress, VarlenDatum *result, bool *is_end)
Definition: ChunkIter.cpp:182
int8_t * pointer
Definition: sqltypes.h:152
#define CHECK(condition)
Definition: Logger.h:222
size_t length
Definition: sqltypes.h:151

+ Here is the call graph for this function:

RUNTIME_EXPORT uint64_t string_decompress ( const int32_t  string_id,
const int64_t  string_dict_handle 
)

Definition at line 39 of file StringOpsIR.cpp.

References CHECK, StringDictionaryProxy::getStringBytes(), and NULL_INT.

40  {
41  if (string_id == NULL_INT) {
42  return 0;
43  }
44  auto string_dict_proxy =
45  reinterpret_cast<const StringDictionaryProxy*>(string_dict_handle);
46  auto string_bytes = string_dict_proxy->getStringBytes(string_id);
47  CHECK(string_bytes.first);
48  return (reinterpret_cast<uint64_t>(string_bytes.first) & 0xffffffffffff) |
49  (static_cast<uint64_t>(string_bytes.second) << 48);
50 }
std::pair< const char *, size_t > getStringBytes(int32_t string_id) const noexcept
#define NULL_INT
#define CHECK(condition)
Definition: Logger.h:222

+ Here is the call graph for this function:

std::unique_ptr<StringDictionaryTranslationMgr> translate_dict_strings ( const Analyzer::StringOper expr,
const ExecutorDeviceType  device_type,
Executor executor 
)

Definition at line 212 of file StringOpsIR.cpp.

References CHECK, Data_Namespace::CPU_LEVEL, SQLTypeInfo::get_comp_param(), Analyzer::Expr::get_type_info(), getStringOpInfos(), GPU, and Data_Namespace::GPU_LEVEL.

Referenced by CodeGenerator::codegen(), and pre_translate_string_ops().

215  {
216  const auto& expr_ti = expr->get_type_info();
217  const auto dict_id = expr_ti.get_comp_param();
218  const auto string_op_infos = getStringOpInfos(expr);
219  CHECK(string_op_infos.size());
220 
221  auto string_dictionary_translation_mgr =
222  std::make_unique<StringDictionaryTranslationMgr>(
223  dict_id,
224  dict_id,
225  false, // translate_intersection_only
226  string_op_infos,
229  executor->deviceCount(device_type),
230  executor,
231  &executor->getCatalog()->getDataMgr(),
232  false /* delay_translation */);
233  return string_dictionary_translation_mgr;
234 }
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:81
std::vector< StringOps_Namespace::StringOpInfo > getStringOpInfos(const Analyzer::StringOper *expr)
HOST DEVICE int get_comp_param() const
Definition: sqltypes.h:338
#define CHECK(condition)
Definition: Logger.h:222

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

RUNTIME_EXPORT int32_t union_translate_string_id_to_other_dict ( const int32_t  string_id,
const int64_t  source_string_dict_handle,
const int64_t  dest_string_dict_handle 
)

Definition at line 96 of file StringOpsIR.cpp.

References StringDictionaryProxy::getString().

98  {
99  const auto source_string_dict_proxy =
100  reinterpret_cast<StringDictionaryProxy*>(source_string_dict_handle);
101  auto dest_string_dict_proxy =
102  reinterpret_cast<StringDictionaryProxy*>(dest_string_dict_handle);
103  // Can we have StringDictionaryProxy::getString return a reference?
104  const auto source_str = source_string_dict_proxy->getString(string_id);
105  if (source_str.empty()) {
106  return inline_int_null_value<int32_t>();
107  }
108  return dest_string_dict_proxy->getOrAddTransient(source_str);
109 }
std::string getString(int32_t string_id) const

+ Here is the call graph for this function: