33 #include <string_view>
37 namespace StringOps_Namespace {
39 struct NullableStrType {
40 NullableStrType(
const std::string& str) : str(str),
is_null(str.empty()) {}
41 NullableStrType(
const std::string_view sv) : str(sv),
is_null(sv.empty()) {}
44 std::pair<std::string, bool> toPair()
const {
return {str,
is_null}; }
53 const std::optional<std::string>& var_str_optional_literal)
56 , has_var_str_literal_(var_str_optional_literal.has_value())
57 , var_str_literal_(!var_str_optional_literal.has_value()
59 : NullableStrType(var_str_optional_literal.value())) {}
63 const std::optional<std::string>& var_str_optional_literal)
65 , return_ti_(return_ti)
66 , has_var_str_literal_(var_str_optional_literal.has_value())
67 , var_str_literal_(!var_str_optional_literal.has_value()
69 : NullableStrType(var_str_optional_literal.value())) {}
71 virtual ~StringOp() =
default;
73 virtual NullableStrType operator()(std::string
const&)
const = 0;
75 virtual NullableStrType operator()(
const std::string& str1,
76 const std::string& str2)
const {
77 UNREACHABLE() <<
"operator(str1, str2) not allowed for this method";
79 return NullableStrType();
82 virtual NullableStrType operator()()
const {
83 CHECK(hasVarStringLiteral());
84 if (var_str_literal_.is_null) {
85 return var_str_literal_;
87 return operator()(var_str_literal_.str);
90 virtual Datum numericEval(
const std::string_view str)
const {
91 UNREACHABLE() <<
"numericEval not allowed for this method";
96 virtual Datum numericEval(
const std::string_view str1,
97 const std::string_view str2)
const {
98 UNREACHABLE() <<
"numericEval not allowed for this method";
103 virtual Datum numericEval()
const {
104 CHECK(hasVarStringLiteral());
105 if (var_str_literal_.is_null) {
108 return numericEval(var_str_literal_.str);
111 virtual const SQLTypeInfo& getReturnType()
const {
return return_ti_; }
113 const std::string& getVarStringLiteral()
const {
114 CHECK(hasVarStringLiteral());
115 return var_str_literal_.str;
118 bool hasVarStringLiteral()
const {
return has_var_str_literal_; }
121 static boost::regex generateRegex(
const std::string& op_name,
122 const std::string& regex_pattern,
123 const std::string& regex_params,
124 const bool supports_sub_matches);
128 const bool has_var_str_literal_{
false};
129 const NullableStrType var_str_literal_;
132 struct TryStringCast :
public StringOp {
135 const std::optional<std::string>& var_str_optional_literal)
138 NullableStrType operator()(
const std::string& str)
const override;
139 Datum numericEval(
const std::string_view str)
const override;
142 struct Position :
public StringOp {
144 Position(
const std::optional<std::string>& var_str_optional_literal,
145 const std::string& search_str)
148 var_str_optional_literal)
149 , search_str_(search_str)
152 Position(
const std::optional<std::string>& var_str_optional_literal,
153 const std::string& search_str,
157 var_str_optional_literal)
158 , search_str_(search_str)
159 , start_(start > 0 ? start - 1 : start) {}
161 NullableStrType operator()(
const std::string& str)
const override;
162 Datum numericEval(
const std::string_view str)
const override;
165 const std::string search_str_;
166 const int64_t start_;
169 struct JarowinklerSimilarity :
public StringOp {
170 JarowinklerSimilarity(
const std::optional<std::string>& var_str_optional_literal,
171 const std::string& str_literal)
174 var_str_optional_literal)
175 , str_literal_(str_literal) {}
177 JarowinklerSimilarity(
const std::optional<std::string>& var_str_optional_literal)
180 NullableStrType operator()(
const std::string& str)
const override;
182 Datum numericEval(
const std::string_view str)
const override;
183 Datum numericEval(
const std::string_view str1,
184 const std::string_view str2)
const override;
186 const std::string str_literal_;
189 struct LevenshteinDistance :
public StringOp {
190 LevenshteinDistance(
const std::optional<std::string>& var_str_optional_literal,
191 const std::string& str_literal)
194 var_str_optional_literal)
195 , str_literal_(str_literal) {}
197 LevenshteinDistance(
const std::optional<std::string>& var_str_optional_literal)
200 NullableStrType operator()(
const std::string& str)
const override;
202 Datum numericEval(
const std::string_view str)
const override;
203 Datum numericEval(
const std::string_view str1,
204 const std::string_view str2)
const override;
206 const std::string str_literal_;
209 struct Lower :
public StringOp {
210 Lower(
const std::optional<std::string>& var_str_optional_literal)
213 NullableStrType operator()(
const std::string& str)
const override;
216 struct Upper :
public StringOp {
217 Upper(
const std::optional<std::string>& var_str_optional_literal)
219 NullableStrType operator()(
const std::string& str)
const override;
222 inline std::bitset<256> build_char_bitmap(
const std::string& chars_to_set) {
223 std::bitset<256> char_bitmap;
224 for (
const auto& str_char : chars_to_set) {
225 char_bitmap.set(str_char);
230 struct InitCap :
public StringOp {
231 InitCap(
const std::optional<std::string>& var_str_optional_literal)
233 , delimiter_bitmap_(build_char_bitmap(InitCap::delimiter_chars)) {}
235 NullableStrType operator()(
const std::string& str)
const override;
238 static constexpr
char const* delimiter_chars = R
"(!?@"^#$&~_,.:;+-*%/|\[](){}<>)";
239 const std::bitset<256> delimiter_bitmap_;
242 struct Reverse :
public StringOp {
243 Reverse(
const std::optional<std::string>& var_str_optional_literal)
246 NullableStrType operator()(
const std::string& str)
const override;
249 struct Repeat :
public StringOp {
251 Repeat(
const std::optional<std::string>& var_str_optional_literal,
const int64_t
n)
253 , n_(n >= 0 ? n : 0UL) {
255 throw std::runtime_error(
"Number of repeats must be >= 0");
259 NullableStrType operator()(
const std::string& str)
const override;
265 struct Concat :
public StringOp {
266 Concat(
const std::optional<std::string>& var_str_optional_literal,
267 const std::string& str_literal,
268 const bool reverse_order)
270 var_str_optional_literal)
271 , str_literal_(str_literal)
272 , reverse_order_(reverse_order) {}
274 Concat(
const std::optional<std::string>& var_str_optional_literal)
276 , reverse_order_(
false) {}
278 NullableStrType operator()(
const std::string& str)
const override;
280 NullableStrType operator()(
const std::string& str1,
281 const std::string& str2)
const override;
283 const std::string str_literal_;
284 const bool reverse_order_;
287 struct Pad :
public StringOp {
289 enum class PadMode {
LEFT, RIGHT };
291 Pad(
const std::optional<std::string>& var_str_optional_literal,
293 const int64_t padded_length,
294 const std::string& padding_string)
295 : StringOp(op_kind, var_str_optional_literal)
296 , pad_mode_(Pad::op_kind_to_pad_mode(op_kind))
297 , padded_length_(static_cast<size_t>(padded_length))
298 , padding_string_(padding_string.empty() ?
" " : padding_string)
299 , padding_string_length_(padding_string.size())
300 , padding_char_(padding_string.empty() ?
' ' : padding_string[0]) {}
302 NullableStrType operator()(
const std::string& str)
const override;
305 std::string lpad(
const std::string& str)
const;
307 std::string rpad(
const std::string& str)
const;
311 const PadMode pad_mode_;
312 const size_t padded_length_;
313 const std::string padding_string_;
314 const size_t padding_string_length_;
315 const char padding_char_;
318 struct Trim :
public StringOp {
320 enum class TrimMode {
LEFT, RIGHT, BOTH };
322 Trim(
const std::optional<std::string>& var_str_optional_literal,
324 const std::string& trim_chars)
325 : StringOp(op_kind, var_str_optional_literal)
326 , trim_mode_(Trim::op_kind_to_trim_mode(op_kind))
327 , trim_char_bitmap_(build_char_bitmap(trim_chars.empty() ?
" " : trim_chars)) {}
329 NullableStrType operator()(
const std::string& str)
const override;
334 const TrimMode trim_mode_;
335 const std::bitset<256> trim_char_bitmap_;
338 struct Substring :
public StringOp {
344 Substring(
const std::optional<std::string>& var_str_optional_literal,
347 , start_(start > 0 ? start - 1 : start)
348 , length_(std::string::npos) {}
354 Substring(
const std::optional<std::string>& var_str_optional_literal,
356 const int64_t length)
358 , start_(start > 0 ? start - 1 : start)
359 , length_(static_cast<size_t>(length >= 0 ? length : 0)) {}
361 NullableStrType operator()(
const std::string& str)
const override;
364 const int64_t start_;
365 const size_t length_;
368 struct Overlay :
public StringOp {
369 Overlay(
const std::optional<std::string>& var_str_optional_literal,
370 const std::string& insert_str,
373 , insert_str_(insert_str)
374 , start_(start > 0 ? start - 1 : start)
375 , replacement_length_(insert_str_.size()) {}
377 Overlay(
const std::optional<std::string>& var_str_optional_literal,
378 const std::string& insert_str,
380 const int64_t replacement_length)
382 , insert_str_(insert_str)
383 , start_(start > 0 ? start - 1 : start)
384 , replacement_length_(
385 static_cast<size_t>(replacement_length >= 0 ? replacement_length : 0)) {}
387 NullableStrType operator()(
const std::string& base_str)
const override;
390 const std::string insert_str_;
391 const int64_t start_;
392 const size_t replacement_length_;
395 struct Replace :
public StringOp {
396 Replace(
const std::optional<std::string>& var_str_optional_literal,
397 const std::string& pattern_str,
398 const std::string& replacement_str)
400 , pattern_str_(pattern_str)
401 , replacement_str_(replacement_str)
402 , pattern_str_len_(pattern_str.size())
403 , replacement_str_len_(replacement_str.size()) {}
405 NullableStrType operator()(
const std::string& str)
const override;
407 const std::string pattern_str_;
408 const std::string replacement_str_;
409 const size_t pattern_str_len_;
410 const size_t replacement_str_len_;
413 struct SplitPart :
public StringOp {
414 SplitPart(
const std::optional<std::string>& var_str_optional_literal,
415 const std::string& delimiter,
416 const int64_t split_part)
418 , delimiter_(delimiter)
419 , split_part_(split_part == 0 ? 1UL : std::abs(split_part))
420 , delimiter_length_(delimiter.size())
421 , reverse_(split_part < 0) {}
423 NullableStrType operator()(
const std::string& str)
const override;
427 const std::string delimiter_;
428 const size_t split_part_;
429 const size_t delimiter_length_;
433 struct RegexpSubstr :
public StringOp {
435 RegexpSubstr(
const std::optional<std::string>& var_str_optional_literal,
436 const std::string& regex_pattern,
437 const int64_t start_pos,
438 const int64_t occurrence,
439 const std::string& regex_params,
440 const int64_t sub_match_group_idx)
442 , regex_pattern_str_(
445 StringOp::generateRegex(
"REGEXP_SUBSTR", regex_pattern, regex_params,
true))
446 , start_pos_(start_pos > 0 ? start_pos - 1 : start_pos)
447 , occurrence_(occurrence > 0 ? occurrence - 1 : occurrence)
448 , sub_match_info_(set_sub_match_info(regex_params, sub_match_group_idx)) {}
450 NullableStrType operator()(
const std::string& str)
const override;
453 static std::string get_sub_match(
const boost::smatch& match,
454 const std::pair<bool, int64_t> sub_match_info);
456 static std::pair<bool, int64_t> set_sub_match_info(
const std::string& regex_pattern,
457 const int64_t sub_match_group_idx);
459 const std::string regex_pattern_str_;
460 const boost::regex regex_pattern_;
461 const int64_t start_pos_;
462 const int64_t occurrence_;
463 const std::pair<bool, int64_t> sub_match_info_;
466 struct RegexpReplace :
public StringOp {
468 RegexpReplace(
const std::optional<std::string>& var_str_optional_literal,
469 const std::string& regex_pattern,
470 const std::string& replacement,
471 const int64_t start_pos,
472 const int64_t occurrence,
473 const std::string& regex_params)
475 , regex_pattern_str_(
478 StringOp::generateRegex(
"REGEXP_REPLACE", regex_pattern, regex_params,
false))
479 , replacement_(replacement)
480 , start_pos_(start_pos > 0 ? start_pos - 1 : start_pos)
481 , occurrence_(occurrence) {}
483 NullableStrType operator()(
const std::string& str)
const override;
486 static std::pair<size_t, size_t> get_nth_regex_match(
const std::string& str,
487 const size_t start_pos,
488 const boost::regex& regex_pattern,
489 const int64_t occurrence);
491 const std::string regex_pattern_str_;
492 const boost::regex regex_pattern_;
493 const std::string replacement_;
494 const int64_t start_pos_;
495 const int64_t occurrence_;
516 struct JsonValue :
public StringOp {
518 JsonValue(
const std::optional<std::string>& var_str_optional_literal,
519 const std::string& json_path)
521 , json_parse_mode_(parse_json_parse_mode(json_path))
522 , json_keys_(parse_json_path(json_path)) {}
524 NullableStrType operator()(
const std::string& str)
const override;
527 enum class JsonKeyKind { JSON_OBJECT, JSON_ARRAY };
528 enum class JsonParseMode { PARSE_MODE_LAX, PARSE_MODE_STRICT };
531 JsonKeyKind key_kind;
532 std::string object_key;
536 JsonKey(
const std::string& object_key)
537 : key_kind(JsonKeyKind::JSON_OBJECT), object_key(object_key) {}
538 JsonKey(
const size_t array_key)
539 : key_kind(JsonKeyKind::JSON_ARRAY), array_key(array_key) {}
542 static JsonParseMode parse_json_parse_mode(std::string_view json_path);
543 static std::vector<JsonKey> parse_json_path(
const std::string& json_path);
544 inline NullableStrType handle_parse_error(
const std::string&
json_str)
const {
545 if (json_parse_mode_ == JsonParseMode::PARSE_MODE_LAX) {
546 return NullableStrType();
548 throw std::runtime_error(
"Could not parse: " + json_str +
".");
552 inline NullableStrType handle_key_error(
const std::string& json_str)
const {
553 if (json_parse_mode_ == JsonParseMode::PARSE_MODE_LAX) {
554 return NullableStrType();
556 throw std::runtime_error(
"Key not found or did not contain value in: " + json_str +
560 static constexpr
bool allow_strict_json_parsing{
false};
561 const JsonParseMode json_parse_mode_;
563 const std::vector<JsonKey> json_keys_;
566 struct Base64Encode :
public StringOp {
567 Base64Encode(
const std::optional<std::string>& var_str_optional_literal)
570 NullableStrType operator()(
const std::string& str)
const override;
573 struct Base64Decode :
public StringOp {
574 Base64Decode(
const std::optional<std::string>& var_str_optional_literal)
577 NullableStrType operator()(
const std::string& str)
const override;
580 struct NullOp :
public StringOp {
581 NullOp(
const std::optional<std::string>& var_str_optional_literal,
585 NullableStrType operator()(
const std::string& str)
const override {
586 return NullableStrType();
592 std::unique_ptr<const StringOp>
gen_string_op(
const StringOpInfo& string_op_info);
595 const StringOpInfo& string_op_info);
601 StringOps() : string_ops_(genStringOpsFromOpInfos({})), num_ops_(0UL) {}
603 StringOps(
const std::vector<StringOpInfo>& string_op_infos)
604 : string_ops_(genStringOpsFromOpInfos(string_op_infos))
605 , num_ops_(string_op_infos.size()) {}
607 std::string operator()(
const std::string& str)
const;
609 std::string multi_input_eval(
const std::string_view str1,
610 const std::string_view str2)
const;
612 std::string_view operator()(
const std::string_view sv, std::string& sv_storage)
const;
614 Datum numericEval(
const std::string_view str)
const;
615 Datum numericEval(
const std::string_view str1,
const std::string_view str2)
const;
617 size_t size()
const {
return num_ops_; }
620 std::vector<std::unique_ptr<const StringOp>> genStringOpsFromOpInfos(
621 const std::vector<StringOpInfo>& string_op_infos)
const;
623 const std::vector<std::unique_ptr<const StringOp>> string_ops_;
624 const size_t num_ops_;
Datum apply_numeric_op_to_literals(const StringOpInfo &string_op_info)
const std::string json_str(const rapidjson::Value &obj) noexcept
Constants for Builtin SQL Types supported by HEAVY.AI.
CONSTEXPR DEVICE bool is_null(const T &value)
std::pair< std::string, bool > apply_string_op_to_literals(const StringOpInfo &string_op_info)
bool g_enable_smem_group_by true
Datum NullDatum(const SQLTypeInfo &ti)
bool g_enable_watchdog false
Common Enum definitions for SQL processing.
std::unique_ptr< const StringOp > gen_string_op(const StringOpInfo &string_op_info)