OmniSciDB  b28c0d5765
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
ArrayOps.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
23 #include <cstdint>
24 #include "../Shared/funcannotations.h"
25 #include "../Utils/ChunkIter.h"
26 #include "TypePunning.h"
27 
28 #ifdef EXECUTE_INCLUDE
29 
30 extern "C" DEVICE RUNTIME_EXPORT int32_t array_size(int8_t* chunk_iter_,
31  const uint64_t row_pos,
32  const uint32_t elem_log_sz) {
33  ChunkIter* chunk_iter = reinterpret_cast<ChunkIter*>(chunk_iter_);
34  ArrayDatum ad;
35  bool is_end;
36  ChunkIter_get_nth(chunk_iter, row_pos, &ad, &is_end);
37  return ad.is_null ? 0 : ad.length >> elem_log_sz;
38 }
39 
40 extern "C" DEVICE RUNTIME_EXPORT int32_t array_size_nullable(int8_t* chunk_iter_,
41  const uint64_t row_pos,
42  const uint32_t elem_log_sz,
43  const int32_t null_val) {
44  ChunkIter* chunk_iter = reinterpret_cast<ChunkIter*>(chunk_iter_);
45  ArrayDatum ad;
46  bool is_end;
47  ChunkIter_get_nth(chunk_iter, row_pos, &ad, &is_end);
48  return ad.is_null ? null_val : ad.length >> elem_log_sz;
49 }
50 
51 extern "C" DEVICE RUNTIME_EXPORT int32_t array_size_1_nullable(int8_t* chunk_iter_,
52  const uint64_t row_pos,
53  const int32_t null_val) {
54  ChunkIter* chunk_iter = reinterpret_cast<ChunkIter*>(chunk_iter_);
55  ArrayDatum ad;
56  bool is_end;
57  ChunkIter_get_nth(chunk_iter, row_pos, &ad, &is_end);
58  return ad.is_null ? null_val : 1;
59 }
60 
61 extern "C" DEVICE RUNTIME_EXPORT bool array_is_null(int8_t* chunk_iter_,
62  const uint64_t row_pos) {
63  ChunkIter* chunk_iter = reinterpret_cast<ChunkIter*>(chunk_iter_);
64  ArrayDatum ad;
65  bool is_end;
66  ChunkIter_get_nth(chunk_iter, row_pos, &ad, &is_end);
67  return ad.is_null;
68 }
69 
70 extern "C" DEVICE RUNTIME_EXPORT bool point_coord_array_is_null(int8_t* chunk_iter_,
71  const uint64_t row_pos) {
72  ChunkIter* chunk_iter = reinterpret_cast<ChunkIter*>(chunk_iter_);
73  ArrayDatum ad;
74  bool is_end;
75  ChunkIter_get_nth_point_coords(chunk_iter, row_pos, &ad, &is_end);
76  return ad.is_null;
77 }
78 
79 #define ARRAY_AT(type) \
80  extern "C" DEVICE RUNTIME_EXPORT type array_at_##type( \
81  int8_t* chunk_iter_, const uint64_t row_pos, const uint32_t elem_idx) { \
82  ChunkIter* chunk_iter = reinterpret_cast<ChunkIter*>(chunk_iter_); \
83  ArrayDatum ad; \
84  bool is_end; \
85  ChunkIter_get_nth(chunk_iter, row_pos, &ad, &is_end); \
86  return reinterpret_cast<type*>(ad.pointer)[elem_idx]; \
87  }
88 
89 ARRAY_AT(int8_t)
90 ARRAY_AT(int16_t)
91 ARRAY_AT(int32_t)
92 ARRAY_AT(int64_t)
93 ARRAY_AT(float)
94 ARRAY_AT(double)
95 
96 #undef ARRAY_AT
97 
98 #define VARLEN_ARRAY_AT(type) \
99  extern "C" DEVICE RUNTIME_EXPORT type varlen_array_at_##type( \
100  int8_t* chunk_iter_, const uint64_t row_pos, const uint32_t elem_idx) { \
101  ChunkIter* chunk_iter = reinterpret_cast<ChunkIter*>(chunk_iter_); \
102  ArrayDatum ad; \
103  bool is_end; \
104  ChunkIter_get_nth_varlen(chunk_iter, row_pos, &ad, &is_end); \
105  return reinterpret_cast<type*>(ad.pointer)[elem_idx]; \
106  }
107 
108 VARLEN_ARRAY_AT(int8_t)
109 VARLEN_ARRAY_AT(int16_t)
110 VARLEN_ARRAY_AT(int32_t)
111 VARLEN_ARRAY_AT(int64_t)
112 VARLEN_ARRAY_AT(float)
113 VARLEN_ARRAY_AT(double)
114 
115 #undef VARLEN_ARRAY_AT
116 
117 #define VARLEN_NOTNULL_ARRAY_AT(type) \
118  extern "C" DEVICE RUNTIME_EXPORT type varlen_notnull_array_at_##type( \
119  int8_t* chunk_iter_, const uint64_t row_pos, const uint32_t elem_idx) { \
120  ChunkIter* chunk_iter = reinterpret_cast<ChunkIter*>(chunk_iter_); \
121  ArrayDatum ad; \
122  bool is_end; \
123  ChunkIter_get_nth_varlen_notnull(chunk_iter, row_pos, &ad, &is_end); \
124  return reinterpret_cast<type*>(ad.pointer)[elem_idx]; \
125  }
126 
127 VARLEN_NOTNULL_ARRAY_AT(int8_t)
128 VARLEN_NOTNULL_ARRAY_AT(int16_t)
129 VARLEN_NOTNULL_ARRAY_AT(int32_t)
130 VARLEN_NOTNULL_ARRAY_AT(int64_t)
131 VARLEN_NOTNULL_ARRAY_AT(float)
132 VARLEN_NOTNULL_ARRAY_AT(double)
133 
134 #undef VARLEN_NOTNULL_ARRAY_AT
135 
136 #define ARRAY_ANY(type, needle_type, oper_name, oper) \
137  extern "C" DEVICE RUNTIME_EXPORT bool array_any_##oper_name##_##type##_##needle_type( \
138  int8_t* chunk_iter_, \
139  const uint64_t row_pos, \
140  const needle_type needle, \
141  const type null_val) { \
142  ChunkIter* chunk_iter = reinterpret_cast<ChunkIter*>(chunk_iter_); \
143  ArrayDatum ad; \
144  bool is_end; \
145  ChunkIter_get_nth(chunk_iter, row_pos, &ad, &is_end); \
146  const size_t elem_count = ad.length / sizeof(type); \
147  for (size_t i = 0; i < elem_count; ++i) { \
148  const needle_type val = reinterpret_cast<type*>(ad.pointer)[i]; \
149  if (val != null_val && val oper needle) { \
150  return true; \
151  } \
152  } \
153  return false; \
154  }
155 
156 #define ARRAY_ALL(type, needle_type, oper_name, oper) \
157  extern "C" DEVICE RUNTIME_EXPORT bool array_all_##oper_name##_##type##_##needle_type( \
158  int8_t* chunk_iter_, \
159  const uint64_t row_pos, \
160  const needle_type needle, \
161  const type null_val) { \
162  ChunkIter* chunk_iter = reinterpret_cast<ChunkIter*>(chunk_iter_); \
163  ArrayDatum ad; \
164  bool is_end; \
165  ChunkIter_get_nth(chunk_iter, row_pos, &ad, &is_end); \
166  const size_t elem_count = ad.length / sizeof(type); \
167  for (size_t i = 0; i < elem_count; ++i) { \
168  const needle_type val = reinterpret_cast<type*>(ad.pointer)[i]; \
169  if (!(val != null_val && val oper needle)) { \
170  return false; \
171  } \
172  } \
173  return true; \
174  }
175 
176 #define ARRAY_ALL_ANY_ALL_TYPES(oper_name, oper, needle_type) \
177  ARRAY_ANY(int8_t, needle_type, oper_name, oper) \
178  ARRAY_ALL(int8_t, needle_type, oper_name, oper) \
179  ARRAY_ANY(int16_t, needle_type, oper_name, oper) \
180  ARRAY_ALL(int16_t, needle_type, oper_name, oper) \
181  ARRAY_ANY(int32_t, needle_type, oper_name, oper) \
182  ARRAY_ALL(int32_t, needle_type, oper_name, oper) \
183  ARRAY_ANY(int64_t, needle_type, oper_name, oper) \
184  ARRAY_ALL(int64_t, needle_type, oper_name, oper) \
185  ARRAY_ANY(float, needle_type, oper_name, oper) \
186  ARRAY_ALL(float, needle_type, oper_name, oper) \
187  ARRAY_ANY(double, needle_type, oper_name, oper) \
188  ARRAY_ALL(double, needle_type, oper_name, oper)
189 
190 ARRAY_ALL_ANY_ALL_TYPES(eq, ==, int8_t)
191 ARRAY_ALL_ANY_ALL_TYPES(ne, !=, int8_t)
192 ARRAY_ALL_ANY_ALL_TYPES(lt, <, int8_t)
193 ARRAY_ALL_ANY_ALL_TYPES(le, <=, int8_t)
194 ARRAY_ALL_ANY_ALL_TYPES(gt, >, int8_t)
195 ARRAY_ALL_ANY_ALL_TYPES(ge, >=, int8_t)
196 
197 ARRAY_ALL_ANY_ALL_TYPES(eq, ==, int16_t)
198 ARRAY_ALL_ANY_ALL_TYPES(ne, !=, int16_t)
199 ARRAY_ALL_ANY_ALL_TYPES(lt, <, int16_t)
200 ARRAY_ALL_ANY_ALL_TYPES(le, <=, int16_t)
201 ARRAY_ALL_ANY_ALL_TYPES(gt, >, int16_t)
202 ARRAY_ALL_ANY_ALL_TYPES(ge, >=, int16_t)
203 
204 ARRAY_ALL_ANY_ALL_TYPES(eq, ==, int32_t)
205 ARRAY_ALL_ANY_ALL_TYPES(ne, !=, int32_t)
206 ARRAY_ALL_ANY_ALL_TYPES(lt, <, int32_t)
207 ARRAY_ALL_ANY_ALL_TYPES(le, <=, int32_t)
208 ARRAY_ALL_ANY_ALL_TYPES(gt, >, int32_t)
209 ARRAY_ALL_ANY_ALL_TYPES(ge, >=, int32_t)
210 
211 ARRAY_ALL_ANY_ALL_TYPES(eq, ==, int64_t)
212 ARRAY_ALL_ANY_ALL_TYPES(ne, !=, int64_t)
213 ARRAY_ALL_ANY_ALL_TYPES(lt, <, int64_t)
214 ARRAY_ALL_ANY_ALL_TYPES(le, <=, int64_t)
215 ARRAY_ALL_ANY_ALL_TYPES(gt, >, int64_t)
216 ARRAY_ALL_ANY_ALL_TYPES(ge, >=, int64_t)
217 
218 ARRAY_ALL_ANY_ALL_TYPES(eq, ==, float)
219 ARRAY_ALL_ANY_ALL_TYPES(ne, !=, float)
220 ARRAY_ALL_ANY_ALL_TYPES(lt, <, float)
221 ARRAY_ALL_ANY_ALL_TYPES(le, <=, float)
222 ARRAY_ALL_ANY_ALL_TYPES(gt, >, float)
223 ARRAY_ALL_ANY_ALL_TYPES(ge, >=, float)
224 
225 ARRAY_ALL_ANY_ALL_TYPES(eq, ==, double)
226 ARRAY_ALL_ANY_ALL_TYPES(ne, !=, double)
227 ARRAY_ALL_ANY_ALL_TYPES(lt, <, double)
228 ARRAY_ALL_ANY_ALL_TYPES(le, <=, double)
229 ARRAY_ALL_ANY_ALL_TYPES(gt, >, double)
230 ARRAY_ALL_ANY_ALL_TYPES(ge, >=, double)
231 
232 #undef ARRAY_ALL_ANY_ALL_TYPES
233 #undef ARRAY_ALL
234 #undef ARRAY_ANY
235 
236 #define ARRAY_AT_CHECKED(type) \
237  extern "C" DEVICE RUNTIME_EXPORT type array_at_##type##_checked( \
238  int8_t* chunk_iter_, \
239  const uint64_t row_pos, \
240  const int64_t elem_idx, \
241  const type null_val) { \
242  if (elem_idx <= 0) { \
243  return null_val; \
244  } \
245  ChunkIter* chunk_iter = reinterpret_cast<ChunkIter*>(chunk_iter_); \
246  ArrayDatum ad; \
247  bool is_end; \
248  ChunkIter_get_nth(chunk_iter, row_pos, &ad, &is_end); \
249  if (ad.is_null || static_cast<size_t>(elem_idx) > ad.length / sizeof(type)) { \
250  return null_val; \
251  } \
252  return reinterpret_cast<type*>(ad.pointer)[elem_idx - 1]; \
253  }
254 
255 ARRAY_AT_CHECKED(int8_t)
256 ARRAY_AT_CHECKED(int16_t)
257 ARRAY_AT_CHECKED(int32_t)
258 ARRAY_AT_CHECKED(int64_t)
259 ARRAY_AT_CHECKED(float)
260 ARRAY_AT_CHECKED(double)
261 
262 #undef ARRAY_AT_CHECKED
263 
264 extern "C" DEVICE RUNTIME_EXPORT int8_t* allocate_varlen_buffer(int64_t element_count,
265  int64_t element_size) {
266 #ifndef __CUDACC__
267  int8_t* varlen_buffer =
268  reinterpret_cast<int8_t*>(checked_malloc((element_count + 1) * element_size));
269  return varlen_buffer;
270 #else
271  return nullptr;
272 #endif
273 }
274 
275 extern "C" DEVICE RUNTIME_EXPORT ALWAYS_INLINE int32_t
276 fast_fixlen_array_size(int8_t* chunk_iter_, const uint32_t elem_log_sz) {
277  ChunkIter* it = reinterpret_cast<ChunkIter*>(chunk_iter_);
278  return it->skip_size >> elem_log_sz;
279 }
280 
281 extern "C" DEVICE RUNTIME_EXPORT ALWAYS_INLINE int8_t* fast_fixlen_array_buff(
282  int8_t* chunk_iter_,
283  const uint64_t row_pos) {
284  ChunkIter* it = reinterpret_cast<ChunkIter*>(chunk_iter_);
285  auto n = static_cast<int>(row_pos);
286  int8_t* current_pos = it->start_pos + n * it->skip_size;
287  return current_pos;
288 }
289 
290 extern "C" DEVICE RUNTIME_EXPORT int8_t* array_buff(int8_t* chunk_iter_,
291  const uint64_t row_pos) {
292  ChunkIter* chunk_iter = reinterpret_cast<ChunkIter*>(chunk_iter_);
293  ArrayDatum ad;
294  bool is_end;
295  ChunkIter_get_nth(chunk_iter, row_pos, &ad, &is_end);
296  return ad.pointer;
297 }
298 
299 #ifndef __CUDACC__
300 
301 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int64_t elem_bitcast_int8_t(const int8_t val) {
302  return val;
303 }
304 
305 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int64_t elem_bitcast_int16_t(const int16_t val) {
306  return val;
307 }
308 
309 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int64_t elem_bitcast_int32_t(const int32_t val) {
310  return val;
311 }
312 
313 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int64_t elem_bitcast_int64_t(const int64_t val) {
314  return val;
315 }
316 
317 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int64_t elem_bitcast_float(const float val) {
318  const double dval{val};
319  return *reinterpret_cast<const int64_t*>(may_alias_ptr(&dval));
320 }
321 
322 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int64_t elem_bitcast_double(const double val) {
323  return *reinterpret_cast<const int64_t*>(may_alias_ptr(&val));
324 }
325 
326 #define COUNT_DISTINCT_ARRAY(type) \
327  extern "C" RUNTIME_EXPORT void agg_count_distinct_array_##type( \
328  int64_t* agg, int8_t* chunk_iter_, const uint64_t row_pos, const type null_val) { \
329  ChunkIter* chunk_iter = reinterpret_cast<ChunkIter*>(chunk_iter_); \
330  ArrayDatum ad; \
331  bool is_end; \
332  ChunkIter_get_nth(chunk_iter, row_pos, &ad, &is_end); \
333  const size_t elem_count{ad.length / sizeof(type)}; \
334  for (size_t i = 0; i < elem_count; ++i) { \
335  const auto val = reinterpret_cast<type*>(ad.pointer)[i]; \
336  if (val != null_val) { \
337  reinterpret_cast<CountDistinctSet*>(*agg)->insert(elem_bitcast_##type(val)); \
338  } \
339  } \
340  }
341 
342 COUNT_DISTINCT_ARRAY(int8_t)
343 COUNT_DISTINCT_ARRAY(int16_t)
344 COUNT_DISTINCT_ARRAY(int32_t)
345 COUNT_DISTINCT_ARRAY(int64_t)
346 COUNT_DISTINCT_ARRAY(float)
347 COUNT_DISTINCT_ARRAY(double)
348 
349 #undef COUNT_DISTINCT_ARRAY
350 
351 #include <string>
352 
353 extern "C" RUNTIME_EXPORT uint64_t string_decompress(const int32_t string_id,
354  const int64_t string_dict_handle);
355 
356 #define ARRAY_STR_ANY(type, oper_name, oper) \
357  extern "C" RUNTIME_EXPORT bool array_any_##oper_name##_str_##type( \
358  int8_t* chunk_iter_, \
359  const uint64_t row_pos, \
360  const char* needle_ptr, \
361  const uint32_t needle_len, \
362  const int64_t string_dict_handle, \
363  const type null_val) { \
364  ChunkIter* chunk_iter = reinterpret_cast<ChunkIter*>(chunk_iter_); \
365  ArrayDatum ad; \
366  bool is_end; \
367  ChunkIter_get_nth(chunk_iter, row_pos, &ad, &is_end); \
368  const size_t elem_count = ad.length / sizeof(type); \
369  std::string needle_str(needle_ptr, needle_len); \
370  for (size_t i = 0; i < elem_count; ++i) { \
371  const type val = reinterpret_cast<type*>(ad.pointer)[i]; \
372  if (val != null_val) { \
373  uint64_t str_and_len = string_decompress(val, string_dict_handle); \
374  const char* str = reinterpret_cast<const char*>(str_and_len & 0xffffffffffff); \
375  const uint16_t len = str_and_len >> 48; \
376  std::string val_str(str, len); \
377  if (val_str oper needle_str) { \
378  return true; \
379  } \
380  } \
381  } \
382  return false; \
383  }
384 
385 #define ARRAY_STR_ALL(type, oper_name, oper) \
386  extern "C" RUNTIME_EXPORT bool array_all_##oper_name##_str_##type( \
387  int8_t* chunk_iter_, \
388  const uint64_t row_pos, \
389  const char* needle_ptr, \
390  const uint32_t needle_len, \
391  const int64_t string_dict_handle, \
392  const type null_val) { \
393  ChunkIter* chunk_iter = reinterpret_cast<ChunkIter*>(chunk_iter_); \
394  ArrayDatum ad; \
395  bool is_end; \
396  ChunkIter_get_nth(chunk_iter, row_pos, &ad, &is_end); \
397  const size_t elem_count = ad.length / sizeof(type); \
398  std::string needle_str(needle_ptr, needle_len); \
399  for (size_t i = 0; i < elem_count; ++i) { \
400  const type val = reinterpret_cast<type*>(ad.pointer)[i]; \
401  if (val == null_val) { \
402  return false; \
403  } \
404  uint64_t str_and_len = string_decompress(val, string_dict_handle); \
405  const char* str = reinterpret_cast<const char*>(str_and_len & 0xffffffffffff); \
406  const uint16_t len = str_and_len >> 48; \
407  std::string val_str(str, len); \
408  if (!(val_str oper needle_str)) { \
409  return false; \
410  } \
411  } \
412  return true; \
413  }
414 
415 #define ARRAY_STR_ALL_ANY_ALL_TYPES(oper_name, oper) \
416  ARRAY_STR_ANY(int8_t, oper_name, oper) \
417  ARRAY_STR_ALL(int8_t, oper_name, oper) \
418  ARRAY_STR_ANY(int16_t, oper_name, oper) \
419  ARRAY_STR_ALL(int16_t, oper_name, oper) \
420  ARRAY_STR_ANY(int32_t, oper_name, oper) \
421  ARRAY_STR_ALL(int32_t, oper_name, oper) \
422  ARRAY_STR_ANY(int64_t, oper_name, oper) \
423  ARRAY_STR_ALL(int64_t, oper_name, oper)
424 
425 ARRAY_STR_ALL_ANY_ALL_TYPES(eq, ==)
426 ARRAY_STR_ALL_ANY_ALL_TYPES(ne, !=)
427 ARRAY_STR_ALL_ANY_ALL_TYPES(lt, <)
428 ARRAY_STR_ALL_ANY_ALL_TYPES(le, <=)
429 ARRAY_STR_ALL_ANY_ALL_TYPES(gt, >)
430 ARRAY_STR_ALL_ANY_ALL_TYPES(ge, >=)
431 
432 #undef ARRAY_ALL_ANY_ALL_TYPES
433 #undef ARRAY_STR_ALL
434 #undef ARRAY_STR_ANY
435 
436 #endif
437 
438 #endif // EXECUTE_INCLUDE
int8_t * start_pos
Definition: ChunkIter.h:34
DEVICE void ChunkIter_get_nth_point_coords(ChunkIter *it, int n, ArrayDatum *result, bool *is_end)
Definition: ChunkIter.cpp:317
DEVICE void ChunkIter_get_nth(ChunkIter *it, int n, bool uncompress, VarlenDatum *result, bool *is_end)
Definition: ChunkIter.cpp:182
#define DEVICE
std::conditional_t< is_cuda_compiler(), DeviceArrayDatum, HostArrayDatum > ArrayDatum
Definition: sqltypes.h:217
EXTENSION_NOINLINE int8_t * allocate_varlen_buffer(int64_t element_count, int64_t element_size)
void * checked_malloc(const size_t size)
Definition: checked_alloc.h:45
#define RUNTIME_EXPORT
int skip_size
Definition: ChunkIter.h:37
constexpr double n
Definition: Utm.h:38
#define ALWAYS_INLINE
RUNTIME_EXPORT uint64_t string_decompress(const int32_t string_id, const int64_t string_dict_handle)
Definition: StringOpsIR.cpp:39