20 #include "../Shared/funcannotations.h"
21 #include "../Shared/sqldefs.h"
26 #include <boost/locale/conversion.hpp>
29 auto chunk_iter =
reinterpret_cast<ChunkIter*
>(chunk_iter_);
39 const int64_t string_dict_handle) {
43 auto string_dict_proxy =
46 CHECK(string_bytes.first);
47 return {string_bytes.first, string_bytes.second};
51 const int64_t string_dict_handle) {
52 std::string_view
const sv = string_view.
stringView();
54 return inline_int_null_value<int32_t>();
62 const int32_t str_len,
63 const int64_t string_ops_handle,
64 const int64_t string_dict_handle) {
65 std::string raw_str(str_ptr, str_len);
67 reinterpret_cast<const StringOps_Namespace::StringOps*
>(string_ops_handle);
69 const auto result_str = string_ops->operator()(raw_str);
70 if (result_str.empty()) {
71 return inline_int_null_value<int32_t>();
78 const int32_t str1_len,
80 const int32_t str2_len,
81 const int64_t string_ops_handle,
82 const int64_t string_dict_handle) {
83 std::string_view raw_str1(str1_ptr, str1_len);
84 std::string_view raw_str2(str2_ptr, str2_len);
86 reinterpret_cast<const StringOps_Namespace::StringOps*
>(string_ops_handle);
88 const auto result_str = string_ops->multi_input_eval(raw_str1, raw_str2);
89 if (result_str.empty()) {
90 return inline_int_null_value<int32_t>();
97 const int64_t source_string_dict_handle,
98 const int64_t dest_string_dict_handle) {
99 const auto source_string_dict_proxy =
101 auto dest_string_dict_proxy =
104 const auto source_str = source_string_dict_proxy->
getString(string_id);
105 if (source_str.empty()) {
106 return inline_int_null_value<int32_t>();
108 return dest_string_dict_proxy->getIdOfString(source_str);
113 const int64_t source_string_dict_handle,
114 const int64_t dest_string_dict_handle) {
115 const auto source_string_dict_proxy =
117 auto dest_string_dict_proxy =
120 const auto source_str = source_string_dict_proxy->
getString(string_id);
121 if (source_str.empty()) {
122 return inline_int_null_value<int32_t>();
124 return dest_string_dict_proxy->getOrAddTransient(source_str);
127 #define DEF_APPLY_NUMERIC_STRING_OPS(value_type, value_name) \
128 extern "C" RUNTIME_EXPORT ALWAYS_INLINE value_type \
129 apply_numeric_string_ops_##value_name( \
130 const char* str_ptr, const int32_t str_len, const int64_t string_ops_handle) { \
131 const std::string_view raw_str(str_ptr, str_len); \
133 reinterpret_cast<const StringOps_Namespace::StringOps*>(string_ops_handle); \
134 const auto result_datum = string_ops->numericEval(raw_str); \
135 return result_datum.value_name##val; \
146 #undef DEF_APPLY_NUMERIC_STRING_OPS
149 const int64_t string_dict_handle) {
151 return inline_int_null_value<int32_t>();
157 #define DEF_CONVERT_TO_STRING_AND_ENCODE(value_type, value_name) \
158 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int32_t \
159 convert_to_string_and_encode_##value_name(const value_type operand, \
160 const int64_t string_dict_handle) { \
161 return write_string_to_proxy(std::to_string(operand), string_dict_handle); \
171 #undef DEF_CONVERT_TO_STRING_AND_ENCODE
175 const int64_t string_dict_handle) {
181 const int32_t precision,
183 const int64_t string_dict_handle) {
184 constexpr
size_t buf_size = 64;
187 snprintf(buf, buf_size,
"%*.*f", precision, scale, v);
193 const int64_t string_dict_handle) {
194 constexpr
size_t buf_size = 64;
202 const int32_t dimension,
203 const int64_t string_dict_handle) {
204 constexpr
size_t buf_size = 64;
212 const int64_t string_dict_handle) {
213 constexpr
size_t buf_size = 64;
223 if (str_lv.size() != 3) {
233 std::vector<llvm::Value*> charlength_args{str_lv[1], str_lv[2]};
234 std::string fn_name(
"char_length");
236 fn_name +=
"_encoded";
240 fn_name +=
"_nullable";
259 std::vector<StringOps_Namespace::StringOpInfo> string_op_infos;
261 if (chained_string_op_exprs.empty()) {
265 throw std::runtime_error(
266 "Expected folded string operator but found operator unfolded.");
269 for (
const auto& chained_string_op_expr : chained_string_op_exprs) {
270 auto chained_string_op =
272 CHECK(chained_string_op);
274 chained_string_op->get_type_info(),
275 chained_string_op->getLiteralArgs());
276 string_op_infos.emplace_back(string_op_info);
278 return string_op_infos;
281 std::pair<std::vector<llvm::Value*>, std::unique_ptr<CodeGenerator::NullCheckCodegen>>
284 const size_t arg_idx,
285 const bool codegen_nullcheck) {
289 auto primary_str_lv =
codegen(expr->
getArg(arg_idx),
true, co);
290 std::unique_ptr<CodeGenerator::NullCheckCodegen> nullcheck_codegen;
291 if (primary_str_lv.size() != 3) {
295 CHECK_EQ(
size_t(1), primary_str_lv.size());
296 CHECK(arg_ti.is_dict_encoded_string());
297 const bool is_nullable = !arg_ti.get_notnull();
298 if (codegen_nullcheck && is_nullable) {
300 nullcheck_codegen = std::make_unique<CodeGenerator::NullCheckCodegen>(
305 "transient_dict_per_row_nullcheck");
307 const auto sdp_ptr =
reinterpret_cast<int64_t
>(
executor()->getStringDictionaryProxy(
308 arg_ti.getStringDictKey(),
executor()->getRowSetMemoryOwner(),
true));
309 const auto string_view =
318 CHECK_EQ(
size_t(3), primary_str_lv.size());
319 return std::make_pair(primary_str_lv, std::move(nullcheck_codegen));
330 if (
g_cluster && return_ti.is_dict_encoded_string()) {
331 throw std::runtime_error(
332 "Cast from none-encoded string to dictionary-encoded not supported for "
333 "distributed queries");
338 const auto [primary_str_lv, nullcheck_codegen] =
340 CHECK_EQ(
size_t(3), primary_str_lv.size());
343 CHECK(string_op_infos.size());
345 const auto string_ops =
346 executor()->getRowSetMemoryOwner()->getStringOps(string_op_infos);
347 const int64_t string_ops_handle =
reinterpret_cast<int64_t
>(string_ops);
350 if (!return_ti.is_string()) {
352 std::vector<llvm::Value*> string_oper_lvs{
353 primary_str_lv[1], primary_str_lv[2], string_ops_handle_lv};
354 const auto return_type = return_ti.get_type();
355 std::string fn_call =
"apply_numeric_string_ops_";
356 switch (return_type) {
379 throw std::runtime_error(
"Unimplemented type for string-to-numeric translation");
382 const auto logical_size = return_ti.get_logical_size() * 8;
383 auto llvm_return_type = return_ti.is_fp()
387 if (nullcheck_codegen) {
394 CHECK(return_ti.is_dict_encoded_string());
395 const int64_t dest_string_proxy_handle =
396 reinterpret_cast<int64_t
>(
executor()->getStringDictionaryProxy(
397 return_ti.getStringDictKey(),
executor()->getRowSetMemoryOwner(),
true));
398 auto dest_string_proxy_handle_lv =
cgen_state_->
llInt(dest_string_proxy_handle);
399 if (non_literals_arity == 1UL) {
400 std::vector<llvm::Value*> string_oper_lvs{primary_str_lv[1],
402 string_ops_handle_lv,
403 dest_string_proxy_handle_lv};
408 if (nullcheck_codegen) {
424 const auto [secondary_str_lv, secondary_nullcheck_codegen] =
426 CHECK_EQ(
size_t(3), secondary_str_lv.size());
427 std::vector<llvm::Value*> string_oper_lvs{primary_str_lv[1],
431 string_ops_handle_lv,
432 dest_string_proxy_handle_lv};
436 if (secondary_nullcheck_codegen) {
440 if (nullcheck_codegen) {
450 Executor* executor) {
455 CHECK(string_op_infos.size());
457 if (string_op_infos.back().getReturnType().is_dict_encoded_string()) {
459 auto string_dictionary_translation_mgr =
460 std::make_unique<StringDictionaryTranslationMgr>(
468 executor->deviceCount(device_type),
470 executor->getDataMgr(),
472 return string_dictionary_translation_mgr;
475 auto string_dictionary_translation_mgr =
476 std::make_unique<StringDictionaryTranslationMgr>(
482 executor->deviceCount(device_type),
484 executor->getDataMgr(),
486 return string_dictionary_translation_mgr;
499 auto string_dictionary_translation_mgr =
503 CHECK_EQ(
size_t(1), str_id_lv.size());
508 ->
codegen(str_id_lv[0], expr_ti,
true , co);
516 const std::vector<StringOps_Namespace::StringOpInfo>& string_op_infos,
522 auto string_dictionary_translation_mgr =
523 std::make_unique<StringDictionaryTranslationMgr>(
536 auto str_id_lv =
codegen(expr,
true , co);
537 CHECK_EQ(
size_t(1), str_id_lv.size());
541 ->
codegen(str_id_lv[0], expr_ti,
true , co);
548 throw std::runtime_error(
"LIKE not supported for unnested expressions");
550 char escape_char{
'\\'};
552 auto escape_char_expr =
554 CHECK(escape_char_expr);
555 CHECK(escape_char_expr->get_type_info().is_string());
556 CHECK_EQ(
size_t(1), escape_char_expr->get_constval().stringval->size());
557 escape_char = (*escape_char_expr->get_constval().stringval)[0];
567 if (fast_dict_like_lv) {
568 return fast_dict_like_lv;
571 CHECK(ti.is_string());
574 "Cannot do LIKE / ILIKE on this dictionary encoded column, its cardinality is "
578 if (str_lv.size() != 3) {
589 CHECK_EQ(
size_t(3), like_expr_arg_lvs.size());
591 std::vector<llvm::Value*> str_like_args{
592 str_lv[1], str_lv[2], like_expr_arg_lvs[1], like_expr_arg_lvs[2]};
593 std::string fn_name{expr->
get_is_ilike() ?
"string_ilike" :
"string_like"};
595 fn_name +=
"_simple";
600 fn_name +=
"_nullable";
607 Executor* executor) {
621 CHECK(string_oper_primary_arg_ti.is_dict_encoded_string());
628 const std::shared_ptr<Analyzer::Expr> like_arg,
631 const bool is_simple,
632 const char escape_char,
635 const auto cast_oper = std::dynamic_pointer_cast<
Analyzer::UOper>(like_arg);
641 const auto dict_like_arg = cast_oper->get_own_operand();
642 const auto& dict_like_arg_ti = dict_like_arg->get_type_info();
643 if (!dict_like_arg_ti.is_string()) {
644 throw(std::runtime_error(
"Cast from " + dict_like_arg_ti.get_type_name() +
" to " +
645 cast_oper->get_type_info().get_type_name() +
649 const auto sdp =
executor()->getStringDictionaryProxy(
650 dict_like_arg_ti.getStringDictKey(),
executor()->getRowSetMemoryOwner(),
true);
651 if (sdp->storageEntryCount() > 200000000) {
654 if (sdp->getDictKey().isTransientDict()) {
670 CHECK(pattern_ti.is_string());
673 const auto& pattern_str = *pattern_datum.
stringval;
674 const auto matching_ids = sdp->getLike(pattern_str, ilike, is_simple, escape_char);
676 std::vector<int64_t> matching_ids_64(matching_ids.size());
677 std::copy(matching_ids.begin(), matching_ids.end(), matching_ids_64.begin());
678 const auto in_values = std::make_shared<Analyzer::InIntegerSet>(
679 dict_like_arg, matching_ids_64, dict_like_arg_ti.get_notnull());
680 return codegen(in_values.get(), co);
686 const SQLOps compare_operator,
687 const std::string& pattern) {
688 std::vector<int> ret;
689 switch (compare_operator) {
710 std::runtime_error(
"unsuported operator for string comparision");
717 const std::shared_ptr<Analyzer::Expr> rhs,
718 const SQLOps compare_operator,
721 auto rhs_cast_oper = std::dynamic_pointer_cast<
const Analyzer::UOper>(rhs);
722 auto lhs_cast_oper = std::dynamic_pointer_cast<
const Analyzer::UOper>(lhs);
725 std::shared_ptr<const Analyzer::UOper> cast_oper;
726 std::shared_ptr<const Analyzer::ColumnVar> col_var;
727 auto compare_opr = compare_operator;
728 if (lhs_col_var && rhs_col_var) {
729 if (lhs_col_var->get_type_info().getStringDictKey() ==
730 rhs_col_var->get_type_info().getStringDictKey()) {
731 if (compare_operator ==
kEQ || compare_operator ==
kNE) {
739 throw std::runtime_error(
"Decoding two Dictionary encoded columns will be slow");
740 }
else if (lhs_col_var && rhs_cast_oper) {
741 cast_oper.swap(rhs_cast_oper);
742 col_var.swap(lhs_col_var);
743 }
else if (lhs_cast_oper && rhs_col_var) {
744 cast_oper.swap(lhs_cast_oper);
745 col_var.swap(rhs_col_var);
746 switch (compare_operator) {
762 if (!cast_oper || !col_var) {
767 const auto const_expr =
776 const auto col_ti = col_var->get_type_info();
777 CHECK(col_ti.is_string());
779 const auto sdp =
executor()->getStringDictionaryProxy(
780 col_ti.getStringDictKey(),
executor()->getRowSetMemoryOwner(),
true);
782 if (sdp->storageEntryCount() > 200000000) {
783 std::runtime_error(
"Cardinality for string dictionary is too high");
787 const auto& pattern_str = *const_val.stringval;
791 std::vector<int64_t> matching_ids_64(matching_ids.size());
792 std::copy(matching_ids.begin(), matching_ids.end(), matching_ids_64.begin());
794 const auto in_values = std::make_shared<Analyzer::InIntegerSet>(
795 col_var, matching_ids_64, col_ti.get_notnull());
796 return codegen(in_values.get(), co);
803 throw std::runtime_error(
"REGEXP not supported for unnested expressions");
805 char escape_char{
'\\'};
807 auto escape_char_expr =
809 CHECK(escape_char_expr);
810 CHECK(escape_char_expr->get_type_info().is_string());
811 CHECK_EQ(
size_t(1), escape_char_expr->get_constval().stringval->size());
812 escape_char = (*escape_char_expr->get_constval().stringval)[0];
816 auto fast_dict_pattern_lv =
818 if (fast_dict_pattern_lv) {
819 return fast_dict_pattern_lv;
822 CHECK(ti.is_string());
825 "Cannot do REGEXP_LIKE on this dictionary encoded column, its cardinality is too "
833 if (str_lv.size() != 3) {
841 CHECK_EQ(
size_t(3), regexp_expr_arg_lvs.size());
843 std::vector<llvm::Value*> regexp_args{
844 str_lv[1], str_lv[2], regexp_expr_arg_lvs[1], regexp_expr_arg_lvs[2]};
845 std::string fn_name(
"regexp_like");
848 fn_name +=
"_nullable";
858 const std::shared_ptr<Analyzer::Expr> pattern_arg,
860 const char escape_char,
863 const auto cast_oper = std::dynamic_pointer_cast<
Analyzer::UOper>(pattern_arg);
869 const auto dict_regexp_arg = cast_oper->get_own_operand();
870 const auto& dict_regexp_arg_ti = dict_regexp_arg->get_type_info();
871 CHECK(dict_regexp_arg_ti.is_string());
873 const auto& dict_key = dict_regexp_arg_ti.getStringDictKey();
874 const auto sdp =
executor()->getStringDictionaryProxy(
875 dict_key,
executor()->getRowSetMemoryOwner(),
true);
876 if (sdp->storageEntryCount() > 15000000) {
879 if (sdp->getDictKey().isTransientDict()) {
890 const auto string_oper =
896 CHECK(pattern_ti.is_string());
899 const auto& pattern_str = *pattern_datum.
stringval;
900 const auto matching_ids = sdp->getRegexpLike(pattern_str, escape_char);
902 std::vector<int64_t> matching_ids_64(matching_ids.size());
903 std::copy(matching_ids.begin(), matching_ids.end(), matching_ids_64.begin());
904 const auto in_values = std::make_shared<Analyzer::InIntegerSet>(
905 dict_regexp_arg, matching_ids_64, dict_regexp_arg_ti.get_notnull());
906 return codegen(in_values.get(), co);
std::pair< const char *, size_t > getStringBytes(int32_t string_id) const noexcept
llvm::Value * codegenPerRowStringOper(const Analyzer::StringOper *string_oper, const CompilationOptions &co)
const std::shared_ptr< Analyzer::Expr > get_own_arg() const
RUNTIME_EXPORT int32_t union_translate_string_id_to_other_dict(const int32_t string_id, const int64_t source_string_dict_handle, const int64_t dest_string_dict_handle)
std::vector< int32_t > get_compared_ids(const StringDictionaryProxy *dict, const SQLOps compare_operator, const std::string &pattern)
const Expr * get_escape_expr() const
#define DEF_APPLY_NUMERIC_STRING_OPS(value_type, value_name)
__device__ StringView string_decode(int8_t *chunk_iter_, int64_t pos)
std::unique_ptr< StringDictionaryTranslationMgr > translate_dict_strings(const Analyzer::StringOper *expr, const ExecutorDeviceType device_type, Executor *executor)
void pre_translate_string_ops(const Analyzer::StringOper *string_oper, Executor *executor)
const Expr * get_escape_expr() const
llvm::IRBuilder ir_builder_
llvm::Value * codegenPseudoStringOper(const Analyzer::ColumnVar *, const std::vector< StringOps_Namespace::StringOpInfo > &string_op_infos, const CompilationOptions &)
llvm::Type * get_fp_type(const int width, llvm::LLVMContext &context)
const Expr * get_arg() const
const Analyzer::Expr * extract_cast_arg(const Analyzer::Expr *expr)
std::string getString(int32_t string_id) const
bool requiresPerRowTranslation() const
std::string toString(const QueryDescriptionType &type)
int32_t write_string_to_proxy(const std::string &str, const int64_t string_dict_handle)
#define TRANSIENT_DICT_ID
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
DEVICE void ChunkIter_get_nth(ChunkIter *it, int n, bool uncompress, VarlenDatum *result, bool *is_end)
const Expr * get_arg() const
RUNTIME_EXPORT int32_t apply_multi_input_string_ops_and_encode(const char *str1_ptr, const int32_t str1_len, const char *str2_ptr, const int32_t str2_len, const int64_t string_ops_handle, const int64_t string_dict_handle)
size_t formatHMS(char *buf, size_t const max, int64_t const unixtime)
#define DEF_CONVERT_TO_STRING_AND_ENCODE(value_type, value_name)
std::vector< int32_t > getCompare(const std::string &pattern, const std::string &comp_operator) const
bool get_calc_encoded_length() const
llvm::LLVMContext & context_
llvm::Value * emitExternalCall(const std::string &fname, llvm::Type *ret_type, const std::vector< llvm::Value * > args, const std::vector< llvm::Attribute::AttrKind > &fnattrs={}, const bool has_struct_return=false)
Classes representing a parse tree.
RUNTIME_EXPORT ALWAYS_INLINE int32_t convert_to_string_and_encode_timestamp(const int64_t operand, const int32_t dimension, const int64_t string_dict_handle)
DEVICE auto copy(ARGS &&...args)
llvm::ConstantInt * inlineIntNull(const SQLTypeInfo &)
RUNTIME_EXPORT ALWAYS_INLINE int32_t convert_to_string_and_encode_date(const int64_t operand, const int64_t string_dict_handle)
bool get_is_simple() const
std::string_view stringView() const
llvm::Value * codegenDictStrCmp(const std::shared_ptr< Analyzer::Expr >, const std::shared_ptr< Analyzer::Expr >, const SQLOps, const CompilationOptions &co)
llvm::Value * codegenDictRegexp(const std::shared_ptr< Analyzer::Expr > arg, const Analyzer::Constant *pattern, const char escape_char, const CompilationOptions &)
double power10inv(unsigned const x)
int32_t getOrAddTransient(const std::string &)
const SQLTypeInfo & get_type_info() const
llvm::Value * emitCall(const std::string &fname, const std::vector< llvm::Value * > &args)
llvm::Constant * inlineNull(const SQLTypeInfo &)
ExecutorDeviceType device_type
std::vector< StringOps_Namespace::StringOpInfo > getStringOpInfos(const Analyzer::StringOper *expr)
size_t formatDate(char *buf, size_t const max, int64_t const unixtime)
RUNTIME_EXPORT ALWAYS_INLINE int32_t convert_to_string_and_encode_decimal(const int64_t operand, const int32_t precision, const int32_t scale, const int64_t string_dict_handle)
std::vector< llvm::Value * > codegen(const Analyzer::Expr *, const bool fetch_columns, const CompilationOptions &)
const Expr * get_pattern_expr() const
Expression class for string functions The "arg" constructor parameter must be an expression that reso...
RUNTIME_EXPORT int32_t intersect_translate_string_id_to_other_dict(const int32_t string_id, const int64_t source_string_dict_handle, const int64_t dest_string_dict_handle)
RUNTIME_EXPORT int32_t apply_string_ops_and_encode(const char *str_ptr, const int32_t str_len, const int64_t string_ops_handle, const int64_t string_dict_handle)
SqlStringOpKind get_kind() const
const Expr * get_like_expr() const
Datum get_constval() const
const Expr * get_arg() const
llvm::StructType * createStringViewStructType()
size_t formatDateTime(char *buf, size_t const max, int64_t const timestamp, int const dimension, bool use_iso_format)
std::pair< std::vector< llvm::Value * >, std::unique_ptr< CodeGenerator::NullCheckCodegen > > codegenStringFetchAndEncode(const Analyzer::StringOper *expr, const CompilationOptions &co, const size_t arg_idx, const bool codegen_nullcheck)
RUNTIME_EXPORT ALWAYS_INLINE int32_t convert_to_string_and_encode_time(const int64_t operand, const int64_t string_dict_handle)
RUNTIME_EXPORT ALWAYS_INLINE int32_t convert_to_string_and_encode_bool(const int8_t operand, const int64_t string_dict_handle)
const Expr * get_arg() const
llvm::ConstantInt * llInt(const T v) const
const StringDictionaryTranslationMgr * moveStringDictionaryTranslationMgr(std::unique_ptr< const StringDictionaryTranslationMgr > &&str_dict_translation_mgr)
const std::shared_ptr< Analyzer::Expr > get_own_arg() const
bool is_unnest(const Analyzer::Expr *expr)
HOST DEVICE bool get_notnull() const
llvm::Value * codegen(llvm::Value *str_id_input, const SQLTypeInfo &input_ti, const bool add_nullcheck, const CompilationOptions &co) const
size_t getNonLiteralsArity() const
std::vector< std::shared_ptr< Analyzer::Expr > > getChainedStringOpExprs() const
const Expr * getArg(const size_t i) const
RUNTIME_EXPORT int32_t string_compress(const StringView string_view, const int64_t string_dict_handle)
bool get_is_ilike() const
const shared::StringDictKey & getStringDictKey() const
llvm::Value * codegenDictLike(const std::shared_ptr< Analyzer::Expr > arg, const Analyzer::Constant *pattern, const bool ilike, const bool is_simple, const char escape_char, const CompilationOptions &)
Executor * executor() const
RUNTIME_EXPORT StringView string_decompress(const int32_t string_id, const int64_t string_dict_handle)