OmniSciDB  1dac507f6e
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
StringLike.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2017 MapD Technologies, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
26 #include "StringLike.h"
27 
28 enum LikeStatus {
31  kLIKE_ABORT, // means we run out of string characters to match against pattern, can
32  // abort early
33  kLIKE_ERROR // error condition
34 };
35 
36 DEVICE static int inline lowercase(char c) {
37  if ('A' <= c && c <= 'Z') {
38  return 'a' + (c - 'A');
39  }
40  return c;
41 }
42 
43 extern "C" DEVICE bool string_like_simple(const char* str,
44  const int32_t str_len,
45  const char* pattern,
46  const int32_t pat_len) {
47  int i, j;
48  int search_len = str_len - pat_len + 1;
49  for (i = 0; i < search_len; ++i) {
50  for (j = 0; j < pat_len && pattern[j] == str[j + i]; ++j) {
51  }
52  if (j >= pat_len) {
53  return true;
54  }
55  }
56  return false;
57 }
58 
59 extern "C" DEVICE bool string_ilike_simple(const char* str,
60  const int32_t str_len,
61  const char* pattern,
62  const int32_t pat_len) {
63  int i, j;
64  int search_len = str_len - pat_len + 1;
65  for (i = 0; i < search_len; ++i) {
66  for (j = 0; j < pat_len && pattern[j] == lowercase(str[j + i]); ++j) {
67  }
68  if (j >= pat_len) {
69  return true;
70  }
71  }
72  return false;
73 }
74 
75 #define STR_LIKE_SIMPLE_NULLABLE(base_func) \
76  extern "C" DEVICE int8_t base_func##_nullable(const char* lhs, \
77  const int32_t lhs_len, \
78  const char* rhs, \
79  const int32_t rhs_len, \
80  const int8_t bool_null) { \
81  if (!lhs || !rhs) { \
82  return bool_null; \
83  } \
84  return base_func(lhs, lhs_len, rhs, rhs_len) ? 1 : 0; \
85  }
86 
89 
90 #undef STR_LIKE_SIMPLE_NULLABLE
91 
92 // internal recursive function for performing LIKE matching.
93 // when is_ilike is true, pattern is assumed to be already converted to all lowercase
94 DEVICE static LikeStatus string_like_match(const char* str,
95  const int32_t str_len,
96  const char* pattern,
97  const int32_t pat_len,
98  const char escape_char,
99  const bool is_ilike) {
100  const char* s = str;
101  int slen = str_len;
102  const char* p = pattern;
103  int plen = pat_len;
104 
105  while (slen > 0 && plen > 0) {
106  if (*p == escape_char) {
107  // next pattern char must match literally, whatever it is
108  p++;
109  plen--;
110  if (plen <= 0) {
111  return kLIKE_ERROR;
112  }
113  if ((!is_ilike && *s != *p) || (is_ilike && lowercase(*s) != *p)) {
114  return kLIKE_FALSE;
115  }
116  } else if (*p == '%') {
117  char firstpat;
118  p++;
119  plen--;
120  while (plen > 0) {
121  if (*p == '%') {
122  p++;
123  plen--;
124  } else if (*p == '_') {
125  if (slen <= 0) {
126  return kLIKE_ABORT;
127  }
128  s++;
129  slen--;
130  p++;
131  plen--;
132  } else {
133  break;
134  }
135  }
136  if (plen <= 0) {
137  return kLIKE_TRUE;
138  }
139  if (*p == escape_char) {
140  if (plen < 2) {
141  return kLIKE_ERROR;
142  }
143  firstpat = p[1];
144  } else {
145  firstpat = *p;
146  }
147 
148  while (slen > 0) {
149  bool match = false;
150  if (firstpat == '[' && *p != escape_char) {
151  const char* pp = p + 1;
152  int pplen = plen - 1;
153  while (pplen > 0 && *pp != ']') {
154  if ((!is_ilike && *s == *pp) || (is_ilike && lowercase(*s) == *pp)) {
155  match = true;
156  break;
157  }
158  pp++;
159  pplen--;
160  }
161  if (pplen <= 0) {
162  return kLIKE_ERROR; // malformed
163  }
164  } else if ((!is_ilike && *s == firstpat) ||
165  (is_ilike && lowercase(*s) == firstpat)) {
166  match = true;
167  }
168  if (match) {
169  LikeStatus status = string_like_match(s, slen, p, plen, escape_char, is_ilike);
170  if (status != kLIKE_FALSE) {
171  return status;
172  }
173  }
174  s++;
175  slen--;
176  }
177  return kLIKE_ABORT;
178  } else if (*p == '_') {
179  s++;
180  slen--;
181  p++;
182  plen--;
183  continue;
184  } else if (*p == '[') {
185  const char* pp = p + 1;
186  int pplen = plen - 1;
187  bool match = false;
188  while (pplen > 0 && *pp != ']') {
189  if ((!is_ilike && *s == *pp) || (is_ilike && lowercase(*s) == *pp)) {
190  match = true;
191  break;
192  }
193  pp++;
194  pplen--;
195  }
196  if (match) {
197  s++;
198  slen--;
199  pplen--;
200  const char* x;
201  for (x = pp + 1; *x != ']' && pplen > 0; x++, pplen--) {
202  ;
203  }
204  if (pplen <= 0) {
205  return kLIKE_ERROR; // malformed
206  }
207  plen -= (x - p + 1);
208  p = x + 1;
209  continue;
210  } else {
211  return kLIKE_FALSE;
212  }
213  } else if ((!is_ilike && *s != *p) || (is_ilike && lowercase(*s) != *p)) {
214  return kLIKE_FALSE;
215  }
216  s++;
217  slen--;
218  p++;
219  plen--;
220  }
221  if (slen > 0) {
222  return kLIKE_FALSE;
223  }
224  while (plen > 0 && *p == '%') {
225  p++;
226  plen--;
227  }
228  if (plen <= 0) {
229  return kLIKE_TRUE;
230  }
231  return kLIKE_ABORT;
232 }
233 
234 /*
235  * @brief string_like performs the SQL LIKE and ILIKE operation
236  * @param str string argument to be matched against pattern. single-byte
237  * character set only for now. null-termination not required.
238  * @param str_len length of str
239  * @param pattern pattern string for SQL LIKE
240  * @param pat_len length of pattern
241  * @param escape_char the escape character. '\\' is expected by default.
242  * @param is_ilike true if it is ILIKE, i.e., case-insensitive matching
243  * @return true if str matchs pattern, false otherwise. error condition
244  * not handled for now.
245  */
246 extern "C" DEVICE bool string_like(const char* str,
247  const int32_t str_len,
248  const char* pattern,
249  const int32_t pat_len,
250  const char escape_char) {
251  // @TODO(wei/alex) add runtime error handling
252  LikeStatus status =
253  string_like_match(str, str_len, pattern, pat_len, escape_char, false);
254  return status == kLIKE_TRUE;
255 }
256 
257 extern "C" DEVICE bool string_ilike(const char* str,
258  const int32_t str_len,
259  const char* pattern,
260  const int32_t pat_len,
261  const char escape_char) {
262  // @TODO(wei/alex) add runtime error handling
263  LikeStatus status =
264  string_like_match(str, str_len, pattern, pat_len, escape_char, true);
265  return status == kLIKE_TRUE;
266 }
267 
268 extern "C" DEVICE int32_t StringCompare(const char* s1,
269  const int32_t s1_len,
270  const char* s2,
271  const int32_t s2_len) {
272  const char* s1_ = s1;
273  const char* s2_ = s2;
274 
275  while (s1_ < s1 + s1_len && s2_ < s2 + s2_len && *s1_ == *s2_) {
276  s1_++;
277  s2_++;
278  }
279 
280  unsigned char c1 = (s1_ < s1 + s1_len) ? (*(unsigned char*)s1_) : 0;
281  unsigned char c2 = (s2_ < s2 + s2_len) ? (*(unsigned char*)s2_) : 0;
282 
283  return c1 - c2;
284 }
285 
286 #define STR_LIKE_NULLABLE(base_func) \
287  extern "C" DEVICE int8_t base_func##_nullable(const char* lhs, \
288  const int32_t lhs_len, \
289  const char* rhs, \
290  const int32_t rhs_len, \
291  const char escape_char, \
292  const int8_t bool_null) { \
293  if (!lhs || !rhs) { \
294  return bool_null; \
295  } \
296  return base_func(lhs, lhs_len, rhs, rhs_len, escape_char) ? 1 : 0; \
297  }
298 
301 
302 #undef STR_LIKE_NULLABLE
303 
304 extern "C" DEVICE bool string_lt(const char* lhs,
305  const int32_t lhs_len,
306  const char* rhs,
307  const int32_t rhs_len) {
308  return StringCompare(lhs, lhs_len, rhs, rhs_len) < 0;
309 }
310 
311 extern "C" DEVICE bool string_le(const char* lhs,
312  const int32_t lhs_len,
313  const char* rhs,
314  const int32_t rhs_len) {
315  return StringCompare(lhs, lhs_len, rhs, rhs_len) <= 0;
316 }
317 
318 extern "C" DEVICE bool string_gt(const char* lhs,
319  const int32_t lhs_len,
320  const char* rhs,
321  const int32_t rhs_len) {
322  return StringCompare(lhs, lhs_len, rhs, rhs_len) > 0;
323 }
324 
325 extern "C" DEVICE bool string_ge(const char* lhs,
326  const int32_t lhs_len,
327  const char* rhs,
328  const int32_t rhs_len) {
329  return StringCompare(lhs, lhs_len, rhs, rhs_len) >= 0;
330 }
331 
332 extern "C" DEVICE bool string_eq(const char* lhs,
333  const int32_t lhs_len,
334  const char* rhs,
335  const int32_t rhs_len) {
336  return StringCompare(lhs, lhs_len, rhs, rhs_len) == 0;
337 }
338 
339 extern "C" DEVICE bool string_ne(const char* lhs,
340  const int32_t lhs_len,
341  const char* rhs,
342  const int32_t rhs_len) {
343  return StringCompare(lhs, lhs_len, rhs, rhs_len) != 0;
344 }
345 
346 #define STR_CMP_NULLABLE(base_func) \
347  extern "C" DEVICE int8_t base_func##_nullable(const char* lhs, \
348  const int32_t lhs_len, \
349  const char* rhs, \
350  const int32_t rhs_len, \
351  const int8_t bool_null) { \
352  if (!lhs || !rhs) { \
353  return bool_null; \
354  } \
355  return base_func(lhs, lhs_len, rhs, rhs_len) ? 1 : 0; \
356  }
357 
364 
365 #undef STR_CMP_NULLABLE
DEVICE bool string_ge(const char *lhs, const int32_t lhs_len, const char *rhs, const int32_t rhs_len)
Definition: StringLike.cpp:325
DEVICE bool string_gt(const char *lhs, const int32_t lhs_len, const char *rhs, const int32_t rhs_len)
Definition: StringLike.cpp:318
DEVICE bool string_eq(const char *lhs, const int32_t lhs_len, const char *rhs, const int32_t rhs_len)
Definition: StringLike.cpp:332
#define DEVICE
DEVICE bool string_le(const char *lhs, const int32_t lhs_len, const char *rhs, const int32_t rhs_len)
Definition: StringLike.cpp:311
DEVICE bool string_lt(const char *lhs, const int32_t lhs_len, const char *rhs, const int32_t rhs_len)
Definition: StringLike.cpp:304
#define STR_LIKE_NULLABLE(base_func)
Definition: StringLike.cpp:286
#define STR_LIKE_SIMPLE_NULLABLE(base_func)
Definition: StringLike.cpp:75
DEVICE bool string_ilike(const char *str, const int32_t str_len, const char *pattern, const int32_t pat_len, const char escape_char)
Definition: StringLike.cpp:257
static DEVICE LikeStatus string_like_match(const char *str, const int32_t str_len, const char *pattern, const int32_t pat_len, const char escape_char, const bool is_ilike)
Definition: StringLike.cpp:94
static DEVICE int lowercase(char c)
Definition: StringLike.cpp:36
DEVICE bool string_like(const char *str, const int32_t str_len, const char *pattern, const int32_t pat_len, const char escape_char)
Definition: StringLike.cpp:246
DEVICE bool string_ilike_simple(const char *str, const int32_t str_len, const char *pattern, const int32_t pat_len)
Definition: StringLike.cpp:59
Functions to support the LIKE and ILIKE operator in SQL. Only single-byte character set is supported ...
LikeStatus
Definition: StringLike.cpp:28
DEVICE int32_t StringCompare(const char *s1, const int32_t s1_len, const char *s2, const int32_t s2_len)
Definition: StringLike.cpp:268
#define STR_CMP_NULLABLE(base_func)
Definition: StringLike.cpp:346
DEVICE bool string_like_simple(const char *str, const int32_t str_len, const char *pattern, const int32_t pat_len)
Definition: StringLike.cpp:43
DEVICE bool string_ne(const char *lhs, const int32_t lhs_len, const char *rhs, const int32_t rhs_len)
Definition: StringLike.cpp:339