33 #include <string_view>
37 namespace StringOps_Namespace {
39 struct NullableStrType {
40 NullableStrType(
const std::string& str) : str(str),
is_null(str.empty()) {}
41 NullableStrType(
const std::string_view sv) : str(sv),
is_null(sv.empty()) {}
44 std::pair<std::string, bool> toPair()
const {
return {str,
is_null}; }
53 const std::optional<std::string>& var_str_optional_literal)
56 , has_var_str_literal_(var_str_optional_literal.has_value())
57 , var_str_literal_(!var_str_optional_literal.has_value()
59 : NullableStrType(var_str_optional_literal.value())) {}
63 const std::optional<std::string>& var_str_optional_literal)
65 , return_ti_(return_ti)
66 , has_var_str_literal_(var_str_optional_literal.has_value())
67 , var_str_literal_(!var_str_optional_literal.has_value()
69 : NullableStrType(var_str_optional_literal.value())) {}
71 virtual ~StringOp() =
default;
73 virtual NullableStrType operator()(std::string
const&)
const = 0;
75 virtual NullableStrType operator()(
const std::string& str1,
76 const std::string& str2)
const {
77 UNREACHABLE() <<
"operator(str1, str2) not allowed for this method";
79 return NullableStrType();
82 virtual NullableStrType operator()()
const {
83 CHECK(hasVarStringLiteral());
84 if (var_str_literal_.is_null) {
85 return var_str_literal_;
87 return operator()(var_str_literal_.str);
90 virtual Datum numericEval(
const std::string_view str)
const {
91 UNREACHABLE() <<
"numericEval not allowed for this method";
96 virtual Datum numericEval()
const {
97 CHECK(hasVarStringLiteral());
98 if (var_str_literal_.is_null) {
101 return numericEval(var_str_literal_.str);
104 virtual const SQLTypeInfo& getReturnType()
const {
return return_ti_; }
106 const std::string& getVarStringLiteral()
const {
107 CHECK(hasVarStringLiteral());
108 return var_str_literal_.str;
111 bool hasVarStringLiteral()
const {
return has_var_str_literal_; }
114 static boost::regex generateRegex(
const std::string& op_name,
115 const std::string& regex_pattern,
116 const std::string& regex_params,
117 const bool supports_sub_matches);
121 const bool has_var_str_literal_{
false};
122 const NullableStrType var_str_literal_;
125 struct TryStringCast :
public StringOp {
128 const std::optional<std::string>& var_str_optional_literal)
131 NullableStrType operator()(
const std::string& str)
const override;
132 Datum numericEval(
const std::string_view str)
const override;
135 struct Position :
public StringOp {
137 Position(
const std::optional<std::string>& var_str_optional_literal,
138 const std::string& search_str)
141 var_str_optional_literal)
142 , search_str_(search_str)
145 Position(
const std::optional<std::string>& var_str_optional_literal,
146 const std::string& search_str,
150 var_str_optional_literal)
151 , search_str_(search_str)
152 , start_(start > 0 ? start - 1 : start) {}
154 NullableStrType operator()(
const std::string& str)
const override;
155 Datum numericEval(
const std::string_view str)
const override;
158 const std::string search_str_;
159 const int64_t start_;
162 struct Lower :
public StringOp {
163 Lower(
const std::optional<std::string>& var_str_optional_literal)
166 NullableStrType operator()(
const std::string& str)
const override;
169 struct Upper :
public StringOp {
170 Upper(
const std::optional<std::string>& var_str_optional_literal)
172 NullableStrType operator()(
const std::string& str)
const override;
175 inline std::bitset<256> build_char_bitmap(
const std::string& chars_to_set) {
176 std::bitset<256> char_bitmap;
177 for (
const auto& str_char : chars_to_set) {
178 char_bitmap.set(str_char);
183 struct InitCap :
public StringOp {
184 InitCap(
const std::optional<std::string>& var_str_optional_literal)
186 , delimiter_bitmap_(build_char_bitmap(InitCap::delimiter_chars)) {}
188 NullableStrType operator()(
const std::string& str)
const override;
191 static constexpr
char const* delimiter_chars = R
"(!?@"^#$&~_,.:;+-*%/|\[](){}<>)";
192 const std::bitset<256> delimiter_bitmap_;
195 struct Reverse :
public StringOp {
196 Reverse(
const std::optional<std::string>& var_str_optional_literal)
199 NullableStrType operator()(
const std::string& str)
const override;
202 struct Repeat :
public StringOp {
204 Repeat(
const std::optional<std::string>& var_str_optional_literal,
const int64_t
n)
206 , n_(n >= 0 ? n : 0UL) {
208 throw std::runtime_error(
"Number of repeats must be >= 0");
212 NullableStrType operator()(
const std::string& str)
const override;
218 struct Concat :
public StringOp {
219 Concat(
const std::optional<std::string>& var_str_optional_literal,
220 const std::string& str_literal,
221 const bool reverse_order)
223 var_str_optional_literal)
224 , str_literal_(str_literal)
225 , reverse_order_(reverse_order) {}
227 Concat(
const std::optional<std::string>& var_str_optional_literal)
229 , reverse_order_(
false) {}
231 NullableStrType operator()(
const std::string& str)
const override;
233 NullableStrType operator()(
const std::string& str1,
234 const std::string& str2)
const override;
236 const std::string str_literal_;
237 const bool reverse_order_;
240 struct Pad :
public StringOp {
242 enum class PadMode {
LEFT, RIGHT };
244 Pad(
const std::optional<std::string>& var_str_optional_literal,
246 const int64_t padded_length,
247 const std::string& padding_string)
248 : StringOp(op_kind, var_str_optional_literal)
249 , pad_mode_(Pad::op_kind_to_pad_mode(op_kind))
250 , padded_length_(static_cast<size_t>(padded_length))
251 , padding_string_(padding_string.empty() ?
" " : padding_string)
252 , padding_string_length_(padding_string.size())
253 , padding_char_(padding_string.empty() ?
' ' : padding_string[0]) {}
255 NullableStrType operator()(
const std::string& str)
const override;
258 std::string lpad(
const std::string& str)
const;
260 std::string rpad(
const std::string& str)
const;
264 const PadMode pad_mode_;
265 const size_t padded_length_;
266 const std::string padding_string_;
267 const size_t padding_string_length_;
268 const char padding_char_;
271 struct Trim :
public StringOp {
273 enum class TrimMode {
LEFT, RIGHT, BOTH };
275 Trim(
const std::optional<std::string>& var_str_optional_literal,
277 const std::string& trim_chars)
278 : StringOp(op_kind, var_str_optional_literal)
279 , trim_mode_(Trim::op_kind_to_trim_mode(op_kind))
280 , trim_char_bitmap_(build_char_bitmap(trim_chars.empty() ?
" " : trim_chars)) {}
282 NullableStrType operator()(
const std::string& str)
const override;
287 const TrimMode trim_mode_;
288 const std::bitset<256> trim_char_bitmap_;
291 struct Substring :
public StringOp {
297 Substring(
const std::optional<std::string>& var_str_optional_literal,
300 , start_(start > 0 ? start - 1 : start)
301 , length_(std::string::npos) {}
307 Substring(
const std::optional<std::string>& var_str_optional_literal,
309 const int64_t length)
311 , start_(start > 0 ? start - 1 : start)
312 , length_(static_cast<size_t>(length >= 0 ? length : 0)) {}
314 NullableStrType operator()(
const std::string& str)
const override;
317 const int64_t start_;
318 const size_t length_;
321 struct Overlay :
public StringOp {
322 Overlay(
const std::optional<std::string>& var_str_optional_literal,
323 const std::string& insert_str,
326 , insert_str_(insert_str)
327 , start_(start > 0 ? start - 1 : start)
328 , replacement_length_(insert_str_.size()) {}
330 Overlay(
const std::optional<std::string>& var_str_optional_literal,
331 const std::string& insert_str,
333 const int64_t replacement_length)
335 , insert_str_(insert_str)
336 , start_(start > 0 ? start - 1 : start)
337 , replacement_length_(
338 static_cast<size_t>(replacement_length >= 0 ? replacement_length : 0)) {}
340 NullableStrType operator()(
const std::string& base_str)
const override;
343 const std::string insert_str_;
344 const int64_t start_;
345 const size_t replacement_length_;
348 struct Replace :
public StringOp {
349 Replace(
const std::optional<std::string>& var_str_optional_literal,
350 const std::string& pattern_str,
351 const std::string& replacement_str)
353 , pattern_str_(pattern_str)
354 , replacement_str_(replacement_str)
355 , pattern_str_len_(pattern_str.size())
356 , replacement_str_len_(replacement_str.size()) {}
358 NullableStrType operator()(
const std::string& str)
const override;
360 const std::string pattern_str_;
361 const std::string replacement_str_;
362 const size_t pattern_str_len_;
363 const size_t replacement_str_len_;
366 struct SplitPart :
public StringOp {
367 SplitPart(
const std::optional<std::string>& var_str_optional_literal,
368 const std::string& delimiter,
369 const int64_t split_part)
371 , delimiter_(delimiter)
372 , split_part_(split_part == 0 ? 1UL : std::abs(split_part))
373 , delimiter_length_(delimiter.size())
374 , reverse_(split_part < 0) {}
376 NullableStrType operator()(
const std::string& str)
const override;
380 const std::string delimiter_;
381 const size_t split_part_;
382 const size_t delimiter_length_;
386 struct RegexpSubstr :
public StringOp {
388 RegexpSubstr(
const std::optional<std::string>& var_str_optional_literal,
389 const std::string& regex_pattern,
390 const int64_t start_pos,
391 const int64_t occurrence,
392 const std::string& regex_params,
393 const int64_t sub_match_group_idx)
395 , regex_pattern_str_(
398 StringOp::generateRegex(
"REGEXP_SUBSTR", regex_pattern, regex_params,
true))
399 , start_pos_(start_pos > 0 ? start_pos - 1 : start_pos)
400 , occurrence_(occurrence > 0 ? occurrence - 1 : occurrence)
401 , sub_match_info_(set_sub_match_info(regex_params, sub_match_group_idx)) {}
403 NullableStrType operator()(
const std::string& str)
const override;
406 static std::string get_sub_match(
const boost::smatch& match,
407 const std::pair<bool, int64_t> sub_match_info);
409 static std::pair<bool, int64_t> set_sub_match_info(
const std::string& regex_pattern,
410 const int64_t sub_match_group_idx);
412 const std::string regex_pattern_str_;
413 const boost::regex regex_pattern_;
414 const int64_t start_pos_;
415 const int64_t occurrence_;
416 const std::pair<bool, int64_t> sub_match_info_;
419 struct RegexpReplace :
public StringOp {
421 RegexpReplace(
const std::optional<std::string>& var_str_optional_literal,
422 const std::string& regex_pattern,
423 const std::string& replacement,
424 const int64_t start_pos,
425 const int64_t occurrence,
426 const std::string& regex_params)
428 , regex_pattern_str_(
431 StringOp::generateRegex(
"REGEXP_REPLACE", regex_pattern, regex_params,
false))
432 , replacement_(replacement)
433 , start_pos_(start_pos > 0 ? start_pos - 1 : start_pos)
434 , occurrence_(occurrence) {}
436 NullableStrType operator()(
const std::string& str)
const override;
439 static std::pair<size_t, size_t> get_nth_regex_match(
const std::string& str,
440 const size_t start_pos,
441 const boost::regex& regex_pattern,
442 const int64_t occurrence);
444 const std::string regex_pattern_str_;
445 const boost::regex regex_pattern_;
446 const std::string replacement_;
447 const int64_t start_pos_;
448 const int64_t occurrence_;
469 struct JsonValue :
public StringOp {
471 JsonValue(
const std::optional<std::string>& var_str_optional_literal,
472 const std::string& json_path)
474 , json_parse_mode_(parse_json_parse_mode(json_path))
475 , json_keys_(parse_json_path(json_path)) {}
477 NullableStrType operator()(
const std::string& str)
const override;
480 enum class JsonKeyKind { JSON_OBJECT, JSON_ARRAY };
481 enum class JsonParseMode { PARSE_MODE_LAX, PARSE_MODE_STRICT };
484 JsonKeyKind key_kind;
485 std::string object_key;
489 JsonKey(
const std::string& object_key)
490 : key_kind(JsonKeyKind::JSON_OBJECT), object_key(object_key) {}
491 JsonKey(
const size_t array_key)
492 : key_kind(JsonKeyKind::JSON_ARRAY), array_key(array_key) {}
495 static JsonParseMode parse_json_parse_mode(std::string_view json_path);
496 static std::vector<JsonKey> parse_json_path(
const std::string& json_path);
497 inline NullableStrType handle_parse_error(
const std::string&
json_str)
const {
498 if (json_parse_mode_ == JsonParseMode::PARSE_MODE_LAX) {
499 return NullableStrType();
501 throw std::runtime_error(
"Could not parse: " + json_str +
".");
505 inline NullableStrType handle_key_error(
const std::string& json_str)
const {
506 if (json_parse_mode_ == JsonParseMode::PARSE_MODE_LAX) {
507 return NullableStrType();
509 throw std::runtime_error(
"Key not found or did not contain value in: " + json_str +
513 static constexpr
bool allow_strict_json_parsing{
false};
514 const JsonParseMode json_parse_mode_;
516 const std::vector<JsonKey> json_keys_;
519 struct Base64Encode :
public StringOp {
520 Base64Encode(
const std::optional<std::string>& var_str_optional_literal)
523 NullableStrType operator()(
const std::string& str)
const override;
526 struct Base64Decode :
public StringOp {
527 Base64Decode(
const std::optional<std::string>& var_str_optional_literal)
530 NullableStrType operator()(
const std::string& str)
const override;
533 struct NullOp :
public StringOp {
534 NullOp(
const std::optional<std::string>& var_str_optional_literal,
538 NullableStrType operator()(
const std::string& str)
const override {
539 return NullableStrType();
545 std::unique_ptr<const StringOp>
gen_string_op(
const StringOpInfo& string_op_info);
548 const StringOpInfo& string_op_info);
554 StringOps() : string_ops_(genStringOpsFromOpInfos({})), num_ops_(0UL) {}
556 StringOps(
const std::vector<StringOpInfo>& string_op_infos)
557 : string_ops_(genStringOpsFromOpInfos(string_op_infos))
558 , num_ops_(string_op_infos.size()) {}
560 std::string operator()(
const std::string& str)
const;
562 std::string multi_input_eval(
const std::string_view str1,
563 const std::string_view str2)
const;
565 std::string_view operator()(
const std::string_view sv, std::string& sv_storage)
const;
567 Datum numericEval(
const std::string_view str)
const;
569 size_t size()
const {
return num_ops_; }
572 std::vector<std::unique_ptr<const StringOp>> genStringOpsFromOpInfos(
573 const std::vector<StringOpInfo>& string_op_infos)
const;
575 const std::vector<std::unique_ptr<const StringOp>> string_ops_;
576 const size_t num_ops_;
Datum apply_numeric_op_to_literals(const StringOpInfo &string_op_info)
const std::string json_str(const rapidjson::Value &obj) noexcept
Constants for Builtin SQL Types supported by HEAVY.AI.
CONSTEXPR DEVICE bool is_null(const T &value)
std::pair< std::string, bool > apply_string_op_to_literals(const StringOpInfo &string_op_info)
bool g_enable_smem_group_by true
Datum NullDatum(const SQLTypeInfo &ti)
bool g_enable_watchdog false
Common Enum definitions for SQL processing.
std::unique_ptr< const StringOp > gen_string_op(const StringOpInfo &string_op_info)