20 #include "../Shared/funcannotations.h"
21 #include "../Shared/sqldefs.h"
26 #include <boost/locale/conversion.hpp>
29 auto chunk_iter =
reinterpret_cast<ChunkIter*
>(chunk_iter_);
35 : (
reinterpret_cast<uint64_t
>(vd.
pointer) & 0xffffffffffff) |
36 (
static_cast<uint64_t
>(vd.
length) << 48);
40 const int64_t string_dict_handle) {
44 auto string_dict_proxy =
47 CHECK(string_bytes.first);
48 return (reinterpret_cast<uint64_t>(string_bytes.first) & 0xffffffffffff) |
49 (
static_cast<uint64_t
>(string_bytes.second) << 48);
53 const int64_t string_dict_handle) {
56 if (raw_str.empty()) {
57 return inline_int_null_value<int32_t>();
65 const int32_t str_len,
66 const int64_t string_ops_handle,
67 const int64_t string_dict_handle) {
68 std::string raw_str(str_ptr, str_len);
70 reinterpret_cast<const StringOps_Namespace::StringOps*
>(string_ops_handle);
72 const auto result_str = string_ops->operator()(raw_str);
73 if (result_str.empty()) {
74 return inline_int_null_value<int32_t>();
81 const int64_t source_string_dict_handle,
82 const int64_t dest_string_dict_handle) {
83 const auto source_string_dict_proxy =
85 auto dest_string_dict_proxy =
88 const auto source_str = source_string_dict_proxy->
getString(string_id);
89 if (source_str.empty()) {
90 return inline_int_null_value<int32_t>();
92 return dest_string_dict_proxy->getIdOfString(source_str);
97 const int64_t source_string_dict_handle,
98 const int64_t dest_string_dict_handle) {
99 const auto source_string_dict_proxy =
101 auto dest_string_dict_proxy =
104 const auto source_str = source_string_dict_proxy->
getString(string_id);
105 if (source_str.empty()) {
106 return inline_int_null_value<int32_t>();
108 return dest_string_dict_proxy->getOrAddTransient(source_str);
115 if (str_lv.size() != 3) {
123 std::vector<llvm::Value*> charlength_args{str_lv[1], str_lv[2]};
124 std::string fn_name(
"char_length");
125 if (expr->get_calc_encoded_length()) {
126 fn_name +=
"_encoded";
128 const bool is_nullable{!expr->get_arg()->get_type_info().get_notnull()};
130 fn_name +=
"_nullable";
131 charlength_args.push_back(cgen_state_->inlineIntNull(expr->get_type_info()));
133 return expr->get_calc_encoded_length()
134 ? cgen_state_->emitExternalCall(
135 fn_name,
get_int_type(32, cgen_state_->context_), charlength_args)
136 : cgen_state_->emitCall(fn_name, charlength_args);
149 std::vector<StringOps_Namespace::StringOpInfo> string_op_infos;
151 if (chained_string_op_exprs.empty()) {
155 throw std::runtime_error(
156 "Expected folded string operator but found operator unfolded.");
159 for (
const auto& chained_string_op_expr : chained_string_op_exprs) {
160 auto chained_string_op =
162 CHECK(chained_string_op);
164 chained_string_op->getLiteralArgs());
165 string_op_infos.emplace_back(string_op_info);
167 return string_op_infos;
178 CHECK(primary_arg->get_type_info().is_none_encoded_string());
181 throw std::runtime_error(
182 "Cast from none-encoded string to dictionary-encoded not supported for "
183 "distributed queries");
188 auto primary_str_lv =
codegen(primary_arg,
true, co);
189 CHECK_EQ(
size_t(3), primary_str_lv.size());
191 CHECK(string_op_infos.size());
193 const auto string_ops =
194 executor()->getRowSetMemoryOwner()->getStringOps(string_op_infos);
195 const int64_t string_ops_handle =
reinterpret_cast<int64_t
>(string_ops);
198 const int64_t dest_string_proxy_handle =
199 reinterpret_cast<int64_t
>(
executor()->getStringDictionaryProxy(
200 expr_ti.get_comp_param(),
executor()->getRowSetMemoryOwner(),
true));
201 auto dest_string_proxy_handle_lv =
cgen_state_->
llInt(dest_string_proxy_handle);
202 std::vector<llvm::Value*> string_oper_lvs{primary_str_lv[1],
204 string_ops_handle_lv,
205 dest_string_proxy_handle_lv};
215 Executor* executor) {
219 CHECK(string_op_infos.size());
221 auto string_dictionary_translation_mgr =
222 std::make_unique<StringDictionaryTranslationMgr>(
229 executor->deviceCount(device_type),
231 &executor->getCatalog()->getDataMgr(),
233 return string_dictionary_translation_mgr;
245 auto string_dictionary_translation_mgr =
249 CHECK_EQ(
size_t(1), str_id_lv.size());
253 ->
codegen(str_id_lv[0], expr_ti,
true , co);
261 const std::vector<StringOps_Namespace::StringOpInfo>& string_op_infos,
267 auto string_dictionary_translation_mgr =
268 std::make_unique<StringDictionaryTranslationMgr>(
277 &
executor()->getCatalog()->getDataMgr(),
280 auto str_id_lv =
codegen(expr,
true , co);
281 CHECK_EQ(
size_t(1), str_id_lv.size());
285 ->
codegen(str_id_lv[0], expr_ti,
true , co);
292 throw std::runtime_error(
"LIKE not supported for unnested expressions");
294 char escape_char{
'\\'};
296 auto escape_char_expr =
298 CHECK(escape_char_expr);
299 CHECK(escape_char_expr->get_type_info().is_string());
300 CHECK_EQ(
size_t(1), escape_char_expr->get_constval().stringval->size());
301 escape_char = (*escape_char_expr->get_constval().stringval)[0];
311 if (fast_dict_like_lv) {
312 return fast_dict_like_lv;
315 CHECK(ti.is_string());
318 "Cannot do LIKE / ILIKE on this dictionary encoded column, its cardinality is "
322 if (str_lv.size() != 3) {
330 auto like_expr_arg_lvs = codegen(expr->get_like_expr(),
true, co);
331 CHECK_EQ(
size_t(3), like_expr_arg_lvs.size());
332 const bool is_nullable{!expr->get_arg()->get_type_info().get_notnull()};
333 std::vector<llvm::Value*> str_like_args{
334 str_lv[1], str_lv[2], like_expr_arg_lvs[1], like_expr_arg_lvs[2]};
335 std::string fn_name{expr->get_is_ilike() ?
"string_ilike" :
"string_like"};
336 if (expr->get_is_simple()) {
337 fn_name +=
"_simple";
339 str_like_args.push_back(cgen_state_->llInt(int8_t(escape_char)));
342 fn_name +=
"_nullable";
343 str_like_args.push_back(cgen_state_->inlineIntNull(expr->get_type_info()));
345 return cgen_state_->emitCall(fn_name, str_like_args);
349 Executor* executor) {
363 CHECK(string_oper_primary_arg_ti.is_dict_encoded_string());
370 const std::shared_ptr<Analyzer::Expr> like_arg,
373 const bool is_simple,
374 const char escape_char,
377 const auto cast_oper = std::dynamic_pointer_cast<
Analyzer::UOper>(like_arg);
383 const auto dict_like_arg = cast_oper->get_own_operand();
384 const auto& dict_like_arg_ti = dict_like_arg->get_type_info();
385 if (!dict_like_arg_ti.is_string()) {
386 throw(std::runtime_error(
"Cast from " + dict_like_arg_ti.get_type_name() +
" to " +
387 cast_oper->get_type_info().get_type_name() +
391 const auto sdp =
executor()->getStringDictionaryProxy(
392 dict_like_arg_ti.get_comp_param(),
executor()->getRowSetMemoryOwner(),
true);
393 if (sdp->storageEntryCount() > 200000000) {
412 CHECK(pattern_ti.is_string());
415 const auto& pattern_str = *pattern_datum.
stringval;
416 const auto matching_ids = sdp->getLike(pattern_str, ilike, is_simple, escape_char);
418 std::vector<int64_t> matching_ids_64(matching_ids.size());
419 std::copy(matching_ids.begin(), matching_ids.end(), matching_ids_64.begin());
420 const auto in_values = std::make_shared<Analyzer::InIntegerSet>(
421 dict_like_arg, matching_ids_64, dict_like_arg_ti.get_notnull());
422 return codegen(in_values.get(), co);
428 const SQLOps compare_operator,
429 const std::string& pattern) {
430 std::vector<int> ret;
431 switch (compare_operator) {
452 std::runtime_error(
"unsuported operator for string comparision");
459 const std::shared_ptr<Analyzer::Expr> rhs,
460 const SQLOps compare_operator,
463 auto rhs_cast_oper = std::dynamic_pointer_cast<
const Analyzer::UOper>(rhs);
464 auto lhs_cast_oper = std::dynamic_pointer_cast<
const Analyzer::UOper>(lhs);
467 std::shared_ptr<const Analyzer::UOper> cast_oper;
468 std::shared_ptr<const Analyzer::ColumnVar> col_var;
469 auto compare_opr = compare_operator;
470 if (lhs_col_var && rhs_col_var) {
471 if (lhs_col_var->get_type_info().get_comp_param() ==
472 rhs_col_var->get_type_info().get_comp_param()) {
473 if (compare_operator ==
kEQ || compare_operator ==
kNE) {
481 throw std::runtime_error(
"Decoding two Dictionary encoded columns will be slow");
482 }
else if (lhs_col_var && rhs_cast_oper) {
483 cast_oper.swap(rhs_cast_oper);
484 col_var.swap(lhs_col_var);
485 }
else if (lhs_cast_oper && rhs_col_var) {
486 cast_oper.swap(lhs_cast_oper);
487 col_var.swap(rhs_col_var);
488 switch (compare_operator) {
504 if (!cast_oper || !col_var) {
509 const auto const_expr =
518 const auto col_ti = col_var->get_type_info();
519 CHECK(col_ti.is_string());
521 const auto sdp =
executor()->getStringDictionaryProxy(
522 col_ti.get_comp_param(),
executor()->getRowSetMemoryOwner(),
true);
524 if (sdp->storageEntryCount() > 200000000) {
525 std::runtime_error(
"Cardinality for string dictionary is too high");
529 const auto& pattern_str = *const_val.stringval;
533 std::vector<int64_t> matching_ids_64(matching_ids.size());
534 std::copy(matching_ids.begin(), matching_ids.end(), matching_ids_64.begin());
536 const auto in_values = std::make_shared<Analyzer::InIntegerSet>(
537 col_var, matching_ids_64, col_ti.get_notnull());
538 return codegen(in_values.get(), co);
545 throw std::runtime_error(
"REGEXP not supported for unnested expressions");
547 char escape_char{
'\\'};
549 auto escape_char_expr =
551 CHECK(escape_char_expr);
552 CHECK(escape_char_expr->get_type_info().is_string());
553 CHECK_EQ(
size_t(1), escape_char_expr->get_constval().stringval->size());
554 escape_char = (*escape_char_expr->get_constval().stringval)[0];
558 auto fast_dict_pattern_lv =
560 if (fast_dict_pattern_lv) {
561 return fast_dict_pattern_lv;
564 CHECK(ti.is_string());
567 "Cannot do REGEXP_LIKE on this dictionary encoded column, its cardinality is too "
575 if (str_lv.size() != 3) {
580 auto regexp_expr_arg_lvs = codegen(expr->get_pattern_expr(),
true, co);
581 CHECK_EQ(
size_t(3), regexp_expr_arg_lvs.size());
582 const bool is_nullable{!expr->get_arg()->get_type_info().get_notnull()};
583 std::vector<llvm::Value*> regexp_args{
584 str_lv[1], str_lv[2], regexp_expr_arg_lvs[1], regexp_expr_arg_lvs[2]};
585 std::string fn_name(
"regexp_like");
586 regexp_args.push_back(cgen_state_->llInt(int8_t(escape_char)));
588 fn_name +=
"_nullable";
589 regexp_args.push_back(cgen_state_->inlineIntNull(expr->get_type_info()));
590 return cgen_state_->emitExternalCall(
591 fn_name,
get_int_type(8, cgen_state_->context_), regexp_args);
593 return cgen_state_->emitExternalCall(
594 fn_name,
get_int_type(1, cgen_state_->context_), regexp_args);
598 const std::shared_ptr<Analyzer::Expr> pattern_arg,
600 const char escape_char,
603 const auto cast_oper = std::dynamic_pointer_cast<
Analyzer::UOper>(pattern_arg);
609 const auto dict_regexp_arg = cast_oper->get_own_operand();
610 const auto& dict_regexp_arg_ti = dict_regexp_arg->get_type_info();
611 CHECK(dict_regexp_arg_ti.is_string());
613 const auto comp_param = dict_regexp_arg_ti.get_comp_param();
614 const auto sdp =
executor()->getStringDictionaryProxy(
615 comp_param,
executor()->getRowSetMemoryOwner(),
true);
616 if (sdp->storageEntryCount() > 15000000) {
630 const auto string_oper =
636 CHECK(pattern_ti.is_string());
639 const auto& pattern_str = *pattern_datum.
stringval;
640 const auto matching_ids = sdp->getRegexpLike(pattern_str, escape_char);
642 std::vector<int64_t> matching_ids_64(matching_ids.size());
643 std::copy(matching_ids.begin(), matching_ids.end(), matching_ids_64.begin());
644 const auto in_values = std::make_shared<Analyzer::InIntegerSet>(
645 dict_regexp_arg, matching_ids_64, dict_regexp_arg_ti.get_notnull());
646 return codegen(in_values.get(), co);
std::pair< const char *, size_t > getStringBytes(int32_t string_id) const noexcept
llvm::Value * codegenPerRowStringOper(const Analyzer::StringOper *string_oper, const CompilationOptions &co)
bool hasNoneEncodedTextArg() const
const std::shared_ptr< Analyzer::Expr > get_own_arg() const
RUNTIME_EXPORT int32_t union_translate_string_id_to_other_dict(const int32_t string_id, const int64_t source_string_dict_handle, const int64_t dest_string_dict_handle)
std::vector< int32_t > get_compared_ids(const StringDictionaryProxy *dict, const SQLOps compare_operator, const std::string &pattern)
const Expr * get_escape_expr() const
std::shared_ptr< Analyzer::Expr > remove_cast(const std::shared_ptr< Analyzer::Expr > &expr)
std::unique_ptr< StringDictionaryTranslationMgr > translate_dict_strings(const Analyzer::StringOper *expr, const ExecutorDeviceType device_type, Executor *executor)
void pre_translate_string_ops(const Analyzer::StringOper *string_oper, Executor *executor)
const Expr * get_escape_expr() const
llvm::Value * emitExternalCall(const std::string &fname, llvm::Type *ret_type, const std::vector< llvm::Value * > args, const std::vector< llvm::Attribute::AttrKind > &fnattrs={}, const bool has_struct_return=false)
llvm::Value * codegenPseudoStringOper(const Analyzer::ColumnVar *, const std::vector< StringOps_Namespace::StringOpInfo > &string_op_infos, const CompilationOptions &)
const Expr * get_arg() const
const Analyzer::Expr * extract_cast_arg(const Analyzer::Expr *expr)
std::string getString(int32_t string_id) const
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
DEVICE void ChunkIter_get_nth(ChunkIter *it, int n, bool uncompress, VarlenDatum *result, bool *is_end)
const Expr * get_arg() const
std::vector< int32_t > getCompare(const std::string &pattern, const std::string &comp_operator) const
RUNTIME_EXPORT NEVER_INLINE int32_t extract_str_len_noinline(const uint64_t str_and_len)
llvm::LLVMContext & context_
Classes representing a parse tree.
DEVICE auto copy(ARGS &&...args)
bool get_is_simple() const
llvm::Value * codegenDictStrCmp(const std::shared_ptr< Analyzer::Expr >, const std::shared_ptr< Analyzer::Expr >, const SQLOps, const CompilationOptions &co)
llvm::Value * codegenDictRegexp(const std::shared_ptr< Analyzer::Expr > arg, const Analyzer::Constant *pattern, const char escape_char, const CompilationOptions &)
const SQLTypeInfo & get_type_info() const
llvm::Value * emitCall(const std::string &fname, const std::vector< llvm::Value * > &args)
ExecutorDeviceType device_type
std::vector< StringOps_Namespace::StringOpInfo > getStringOpInfos(const Analyzer::StringOper *expr)
std::vector< llvm::Value * > codegen(const Analyzer::Expr *, const bool fetch_columns, const CompilationOptions &)
const Expr * get_pattern_expr() const
RUNTIME_EXPORT NEVER_INLINE int8_t * extract_str_ptr_noinline(const uint64_t str_and_len)
Expression class for string functions The "arg" constructor parameter must be an expression that reso...
#define TRANSIENT_DICT_ID
RUNTIME_EXPORT int32_t intersect_translate_string_id_to_other_dict(const int32_t string_id, const int64_t source_string_dict_handle, const int64_t dest_string_dict_handle)
RUNTIME_EXPORT int32_t apply_string_ops_and_encode(const char *str_ptr, const int32_t str_len, const int64_t string_ops_handle, const int64_t string_dict_handle)
const Expr * get_like_expr() const
Datum get_constval() const
const Expr * get_arg() const
RUNTIME_EXPORT int32_t string_compress(const int64_t ptr_and_len, const int64_t string_dict_handle)
HOST DEVICE int get_comp_param() const
const Expr * get_arg() const
llvm::ConstantInt * llInt(const T v) const
int32_t getOrAddTransient(const std::string &str)
const StringDictionaryTranslationMgr * moveStringDictionaryTranslationMgr(std::unique_ptr< const StringDictionaryTranslationMgr > &&str_dict_translation_mgr)
const std::shared_ptr< Analyzer::Expr > get_own_arg() const
bool is_unnest(const Analyzer::Expr *expr)
__device__ uint64_t string_decode(int8_t *chunk_iter_, int64_t pos)
llvm::Value * codegen(llvm::Value *str_id_input, const SQLTypeInfo &input_ti, const bool add_nullcheck, const CompilationOptions &co) const
std::vector< std::shared_ptr< Analyzer::Expr > > getChainedStringOpExprs() const
const Expr * getArg(const size_t i) const
RUNTIME_EXPORT uint64_t string_decompress(const int32_t string_id, const int64_t string_dict_handle)
bool get_is_ilike() const
llvm::Value * codegenDictLike(const std::shared_ptr< Analyzer::Expr > arg, const Analyzer::Constant *pattern, const bool ilike, const bool is_simple, const char escape_char, const CompilationOptions &)
Executor * executor() const