OmniSciDB  72c90bc290
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
StringLike.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
24 #include "StringLike.h"
25 
26 enum LikeStatus {
29  kLIKE_ABORT, // means we run out of string characters to match against pattern, can
30  // abort early
31  kLIKE_ERROR // error condition
32 };
33 
34 DEVICE static int inline lowercase(char c) {
35  if ('A' <= c && c <= 'Z') {
36  return 'a' + (c - 'A');
37  }
38  return c;
39 }
40 
41 extern "C" RUNTIME_EXPORT DEVICE bool string_like_simple(const char* str,
42  const int32_t str_len,
43  const char* pattern,
44  const int32_t pat_len) {
45  int i, j;
46  int search_len = str_len - pat_len + 1;
47  for (i = 0; i < search_len; ++i) {
48  for (j = 0; j < pat_len && pattern[j] == str[j + i]; ++j) {
49  }
50  if (j >= pat_len) {
51  return true;
52  }
53  }
54  return false;
55 }
56 
57 extern "C" RUNTIME_EXPORT DEVICE bool string_ilike_simple(const char* str,
58  const int32_t str_len,
59  const char* pattern,
60  const int32_t pat_len) {
61  int i, j;
62  int search_len = str_len - pat_len + 1;
63  for (i = 0; i < search_len; ++i) {
64  for (j = 0; j < pat_len && pattern[j] == lowercase(str[j + i]); ++j) {
65  }
66  if (j >= pat_len) {
67  return true;
68  }
69  }
70  return false;
71 }
72 
73 #define STR_LIKE_SIMPLE_NULLABLE(base_func) \
74  extern "C" RUNTIME_EXPORT DEVICE int8_t base_func##_nullable(const char* lhs, \
75  const int32_t lhs_len, \
76  const char* rhs, \
77  const int32_t rhs_len, \
78  const int8_t bool_null) { \
79  if (!lhs || !rhs) { \
80  return bool_null; \
81  } \
82  return base_func(lhs, lhs_len, rhs, rhs_len) ? 1 : 0; \
83  }
84 
87 
88 #undef STR_LIKE_SIMPLE_NULLABLE
89 
90 // internal recursive function for performing LIKE matching.
91 // when is_ilike is true, pattern is assumed to be already converted to all lowercase
92 DEVICE static LikeStatus string_like_match(const char* str,
93  const int32_t str_len,
94  const char* pattern,
95  const int32_t pat_len,
96  const char escape_char,
97  const bool is_ilike) {
98  const char* s = str;
99  int slen = str_len;
100  const char* p = pattern;
101  int plen = pat_len;
102 
103  while (slen > 0 && plen > 0) {
104  if (*p == escape_char) {
105  // next pattern char must match literally, whatever it is
106  p++;
107  plen--;
108  if (plen <= 0) {
109  return kLIKE_ERROR;
110  }
111  if ((!is_ilike && *s != *p) || (is_ilike && lowercase(*s) != *p)) {
112  return kLIKE_FALSE;
113  }
114  } else if (*p == '%') {
115  char firstpat;
116  p++;
117  plen--;
118  while (plen > 0) {
119  if (*p == '%') {
120  p++;
121  plen--;
122  } else if (*p == '_') {
123  if (slen <= 0) {
124  return kLIKE_ABORT;
125  }
126  s++;
127  slen--;
128  p++;
129  plen--;
130  } else {
131  break;
132  }
133  }
134  if (plen <= 0) {
135  return kLIKE_TRUE;
136  }
137  if (*p == escape_char) {
138  if (plen < 2) {
139  return kLIKE_ERROR;
140  }
141  firstpat = p[1];
142  } else {
143  firstpat = *p;
144  }
145 
146  while (slen > 0) {
147  bool match = false;
148  if (firstpat == '[' && *p != escape_char) {
149  const char* pp = p + 1;
150  int pplen = plen - 1;
151  while (pplen > 0 && *pp != ']') {
152  if ((!is_ilike && *s == *pp) || (is_ilike && lowercase(*s) == *pp)) {
153  match = true;
154  break;
155  }
156  pp++;
157  pplen--;
158  }
159  if (pplen <= 0) {
160  return kLIKE_ERROR; // malformed
161  }
162  } else if ((!is_ilike && *s == firstpat) ||
163  (is_ilike && lowercase(*s) == firstpat)) {
164  match = true;
165  }
166  if (match) {
167  LikeStatus status = string_like_match(s, slen, p, plen, escape_char, is_ilike);
168  if (status != kLIKE_FALSE) {
169  return status;
170  }
171  }
172  s++;
173  slen--;
174  }
175  return kLIKE_ABORT;
176  } else if (*p == '_') {
177  s++;
178  slen--;
179  p++;
180  plen--;
181  continue;
182  } else if (*p == '[') {
183  const char* pp = p + 1;
184  int pplen = plen - 1;
185  bool match = false;
186  while (pplen > 0 && *pp != ']') {
187  if ((!is_ilike && *s == *pp) || (is_ilike && lowercase(*s) == *pp)) {
188  match = true;
189  break;
190  }
191  pp++;
192  pplen--;
193  }
194  if (match) {
195  s++;
196  slen--;
197  pplen--;
198  const char* x;
199  for (x = pp + 1; *x != ']' && pplen > 0; x++, pplen--) {
200  ;
201  }
202  if (pplen <= 0) {
203  return kLIKE_ERROR; // malformed
204  }
205  plen -= (x - p + 1);
206  p = x + 1;
207  continue;
208  } else {
209  return kLIKE_FALSE;
210  }
211  } else if ((!is_ilike && *s != *p) || (is_ilike && lowercase(*s) != *p)) {
212  return kLIKE_FALSE;
213  }
214  s++;
215  slen--;
216  p++;
217  plen--;
218  }
219  if (slen > 0) {
220  return kLIKE_FALSE;
221  }
222  while (plen > 0 && *p == '%') {
223  p++;
224  plen--;
225  }
226  if (plen <= 0) {
227  return kLIKE_TRUE;
228  }
229  return kLIKE_ABORT;
230 }
231 
232 /*
233  * @brief string_like performs the SQL LIKE and ILIKE operation
234  * @param str string argument to be matched against pattern. single-byte
235  * character set only for now. null-termination not required.
236  * @param str_len length of str
237  * @param pattern pattern string for SQL LIKE
238  * @param pat_len length of pattern
239  * @param escape_char the escape character. '\\' is expected by default.
240  * @param is_ilike true if it is ILIKE, i.e., case-insensitive matching
241  * @return true if str matchs pattern, false otherwise. error condition
242  * not handled for now.
243  */
244 extern "C" RUNTIME_EXPORT DEVICE bool string_like(const char* str,
245  const int32_t str_len,
246  const char* pattern,
247  const int32_t pat_len,
248  const char escape_char) {
249  // @TODO(wei/alex) add runtime error handling
250  LikeStatus status =
251  string_like_match(str, str_len, pattern, pat_len, escape_char, false);
252  return status == kLIKE_TRUE;
253 }
254 
255 extern "C" RUNTIME_EXPORT DEVICE bool string_ilike(const char* str,
256  const int32_t str_len,
257  const char* pattern,
258  const int32_t pat_len,
259  const char escape_char) {
260  // @TODO(wei/alex) add runtime error handling
261  LikeStatus status =
262  string_like_match(str, str_len, pattern, pat_len, escape_char, true);
263  return status == kLIKE_TRUE;
264 }
265 
266 extern "C" RUNTIME_EXPORT DEVICE int32_t StringCompare(const char* s1,
267  const int32_t s1_len,
268  const char* s2,
269  const int32_t s2_len) {
270  const char* s1_ = s1;
271  const char* s2_ = s2;
272 
273  while (s1_ < s1 + s1_len && s2_ < s2 + s2_len && *s1_ == *s2_) {
274  s1_++;
275  s2_++;
276  }
277 
278  unsigned char c1 = (s1_ < s1 + s1_len) ? (*(unsigned char*)s1_) : 0;
279  unsigned char c2 = (s2_ < s2 + s2_len) ? (*(unsigned char*)s2_) : 0;
280 
281  return c1 - c2;
282 }
283 
284 #define STR_LIKE_NULLABLE(base_func) \
285  extern "C" RUNTIME_EXPORT DEVICE int8_t base_func##_nullable(const char* lhs, \
286  const int32_t lhs_len, \
287  const char* rhs, \
288  const int32_t rhs_len, \
289  const char escape_char, \
290  const int8_t bool_null) { \
291  if (!lhs || !rhs) { \
292  return bool_null; \
293  } \
294  return base_func(lhs, lhs_len, rhs, rhs_len, escape_char) ? 1 : 0; \
295  }
296 
299 
300 #undef STR_LIKE_NULLABLE
301 
302 extern "C" RUNTIME_EXPORT DEVICE bool string_lt(const char* lhs,
303  const int32_t lhs_len,
304  const char* rhs,
305  const int32_t rhs_len) {
306  return StringCompare(lhs, lhs_len, rhs, rhs_len) < 0;
307 }
308 
309 extern "C" RUNTIME_EXPORT DEVICE bool string_le(const char* lhs,
310  const int32_t lhs_len,
311  const char* rhs,
312  const int32_t rhs_len) {
313  return StringCompare(lhs, lhs_len, rhs, rhs_len) <= 0;
314 }
315 
316 extern "C" RUNTIME_EXPORT DEVICE bool string_gt(const char* lhs,
317  const int32_t lhs_len,
318  const char* rhs,
319  const int32_t rhs_len) {
320  return StringCompare(lhs, lhs_len, rhs, rhs_len) > 0;
321 }
322 
323 extern "C" RUNTIME_EXPORT DEVICE bool string_ge(const char* lhs,
324  const int32_t lhs_len,
325  const char* rhs,
326  const int32_t rhs_len) {
327  return StringCompare(lhs, lhs_len, rhs, rhs_len) >= 0;
328 }
329 
330 extern "C" RUNTIME_EXPORT DEVICE bool string_eq(const char* lhs,
331  const int32_t lhs_len,
332  const char* rhs,
333  const int32_t rhs_len) {
334  return StringCompare(lhs, lhs_len, rhs, rhs_len) == 0;
335 }
336 
337 extern "C" RUNTIME_EXPORT DEVICE bool string_ne(const char* lhs,
338  const int32_t lhs_len,
339  const char* rhs,
340  const int32_t rhs_len) {
341  return StringCompare(lhs, lhs_len, rhs, rhs_len) != 0;
342 }
343 
344 #define STR_CMP_NULLABLE(base_func) \
345  extern "C" RUNTIME_EXPORT DEVICE int8_t base_func##_nullable(const char* lhs, \
346  const int32_t lhs_len, \
347  const char* rhs, \
348  const int32_t rhs_len, \
349  const int8_t bool_null) { \
350  if (!lhs || !rhs) { \
351  return bool_null; \
352  } \
353  return base_func(lhs, lhs_len, rhs, rhs_len) ? 1 : 0; \
354  }
355 
362 
363 #undef STR_CMP_NULLABLE
RUNTIME_EXPORT DEVICE int32_t StringCompare(const char *s1, const int32_t s1_len, const char *s2, const int32_t s2_len)
Definition: StringLike.cpp:266
RUNTIME_EXPORT DEVICE bool string_eq(const char *lhs, const int32_t lhs_len, const char *rhs, const int32_t rhs_len)
Definition: StringLike.cpp:330
RUNTIME_EXPORT DEVICE bool string_gt(const char *lhs, const int32_t lhs_len, const char *rhs, const int32_t rhs_len)
Definition: StringLike.cpp:316
RUNTIME_EXPORT DEVICE bool string_le(const char *lhs, const int32_t lhs_len, const char *rhs, const int32_t rhs_len)
Definition: StringLike.cpp:309
RUNTIME_EXPORT DEVICE bool string_ge(const char *lhs, const int32_t lhs_len, const char *rhs, const int32_t rhs_len)
Definition: StringLike.cpp:323
RUNTIME_EXPORT DEVICE bool string_lt(const char *lhs, const int32_t lhs_len, const char *rhs, const int32_t rhs_len)
Definition: StringLike.cpp:302
#define DEVICE
#define STR_LIKE_NULLABLE(base_func)
Definition: StringLike.cpp:284
#define STR_LIKE_SIMPLE_NULLABLE(base_func)
Definition: StringLike.cpp:73
static DEVICE LikeStatus string_like_match(const char *str, const int32_t str_len, const char *pattern, const int32_t pat_len, const char escape_char, const bool is_ilike)
Definition: StringLike.cpp:92
RUNTIME_EXPORT DEVICE bool string_like(const char *str, const int32_t str_len, const char *pattern, const int32_t pat_len, const char escape_char)
Definition: StringLike.cpp:244
static DEVICE int lowercase(char c)
Definition: StringLike.cpp:34
RUNTIME_EXPORT DEVICE bool string_like_simple(const char *str, const int32_t str_len, const char *pattern, const int32_t pat_len)
Definition: StringLike.cpp:41
Functions to support the LIKE and ILIKE operator in SQL. Only single-byte character set is supported ...
#define RUNTIME_EXPORT
RUNTIME_EXPORT DEVICE bool string_ilike_simple(const char *str, const int32_t str_len, const char *pattern, const int32_t pat_len)
Definition: StringLike.cpp:57
LikeStatus
Definition: StringLike.cpp:26
#define STR_CMP_NULLABLE(base_func)
Definition: StringLike.cpp:344
RUNTIME_EXPORT DEVICE bool string_ne(const char *lhs, const int32_t lhs_len, const char *rhs, const int32_t rhs_len)
Definition: StringLike.cpp:337
RUNTIME_EXPORT DEVICE bool string_ilike(const char *str, const int32_t str_len, const char *pattern, const int32_t pat_len, const char escape_char)
Definition: StringLike.cpp:255