OmniSciDB  72c90bc290
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
ArrayOps.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
23 #include <cstdint>
24 #include "../Shared/funcannotations.h"
25 #include "../Utils/ChunkIter.h"
26 #include "TypePunning.h"
27 
28 #ifdef EXECUTE_INCLUDE
29 
30 extern "C" DEVICE RUNTIME_EXPORT int32_t array_size(int8_t* chunk_iter_,
31  const uint64_t row_pos,
32  const uint32_t elem_log_sz) {
33  if (!chunk_iter_) {
34  return 0;
35  }
36  ChunkIter* chunk_iter = reinterpret_cast<ChunkIter*>(chunk_iter_);
37  ArrayDatum ad;
38  bool is_end;
39  ChunkIter_get_nth(chunk_iter, row_pos, &ad, &is_end);
40  return ad.is_null ? 0 : ad.length >> elem_log_sz;
41 }
42 
43 extern "C" DEVICE RUNTIME_EXPORT int32_t array_size_nullable(int8_t* chunk_iter_,
44  const uint64_t row_pos,
45  const uint32_t elem_log_sz,
46  const int32_t null_val) {
47  ChunkIter* chunk_iter = reinterpret_cast<ChunkIter*>(chunk_iter_);
48  ArrayDatum ad;
49  bool is_end;
50  ChunkIter_get_nth(chunk_iter, row_pos, &ad, &is_end);
51  return ad.is_null ? null_val : ad.length >> elem_log_sz;
52 }
53 
54 extern "C" DEVICE RUNTIME_EXPORT int32_t array_size_1_nullable(int8_t* chunk_iter_,
55  const uint64_t row_pos,
56  const int32_t null_val) {
57  ChunkIter* chunk_iter = reinterpret_cast<ChunkIter*>(chunk_iter_);
58  ArrayDatum ad;
59  bool is_end;
60  ChunkIter_get_nth(chunk_iter, row_pos, &ad, &is_end);
61  return ad.is_null ? null_val : 1;
62 }
63 
64 extern "C" DEVICE RUNTIME_EXPORT bool array_is_null(int8_t* chunk_iter_,
65  const uint64_t row_pos) {
66  ChunkIter* chunk_iter = reinterpret_cast<ChunkIter*>(chunk_iter_);
67  ArrayDatum ad;
68  bool is_end;
69  ChunkIter_get_nth(chunk_iter, row_pos, &ad, &is_end);
70  return ad.is_null;
71 }
72 
73 extern "C" DEVICE RUNTIME_EXPORT bool point_coord_array_is_null(int8_t* chunk_iter_,
74  const uint64_t row_pos) {
75  ChunkIter* chunk_iter = reinterpret_cast<ChunkIter*>(chunk_iter_);
76  ArrayDatum ad;
77  bool is_end;
78  ChunkIter_get_nth_point_coords(chunk_iter, row_pos, &ad, &is_end);
79  return ad.is_null;
80 }
81 
82 #define ARRAY_AT(type) \
83  extern "C" DEVICE RUNTIME_EXPORT type array_at_##type( \
84  int8_t* chunk_iter_, const uint64_t row_pos, const uint32_t elem_idx) { \
85  ChunkIter* chunk_iter = reinterpret_cast<ChunkIter*>(chunk_iter_); \
86  ArrayDatum ad; \
87  bool is_end; \
88  ChunkIter_get_nth(chunk_iter, row_pos, &ad, &is_end); \
89  return reinterpret_cast<type*>(ad.pointer)[elem_idx]; \
90  }
91 
92 ARRAY_AT(int8_t)
93 ARRAY_AT(int16_t)
94 ARRAY_AT(int32_t)
95 ARRAY_AT(int64_t)
96 ARRAY_AT(float)
97 ARRAY_AT(double)
98 
99 #undef ARRAY_AT
100 
101 #define VARLEN_ARRAY_AT(type) \
102  extern "C" DEVICE RUNTIME_EXPORT type varlen_array_at_##type( \
103  int8_t* chunk_iter_, const uint64_t row_pos, const uint32_t elem_idx) { \
104  ChunkIter* chunk_iter = reinterpret_cast<ChunkIter*>(chunk_iter_); \
105  ArrayDatum ad; \
106  bool is_end; \
107  ChunkIter_get_nth_varlen(chunk_iter, row_pos, &ad, &is_end); \
108  return reinterpret_cast<type*>(ad.pointer)[elem_idx]; \
109  }
110 
111 VARLEN_ARRAY_AT(int8_t)
112 VARLEN_ARRAY_AT(int16_t)
113 VARLEN_ARRAY_AT(int32_t)
114 VARLEN_ARRAY_AT(int64_t)
115 VARLEN_ARRAY_AT(float)
116 VARLEN_ARRAY_AT(double)
117 
118 #undef VARLEN_ARRAY_AT
119 
120 #define VARLEN_NOTNULL_ARRAY_AT(type) \
121  extern "C" DEVICE RUNTIME_EXPORT type varlen_notnull_array_at_##type( \
122  int8_t* chunk_iter_, const uint64_t row_pos, const uint32_t elem_idx) { \
123  ChunkIter* chunk_iter = reinterpret_cast<ChunkIter*>(chunk_iter_); \
124  ArrayDatum ad; \
125  bool is_end; \
126  ChunkIter_get_nth_varlen_notnull(chunk_iter, row_pos, &ad, &is_end); \
127  return reinterpret_cast<type*>(ad.pointer)[elem_idx]; \
128  }
129 
130 VARLEN_NOTNULL_ARRAY_AT(int8_t)
131 VARLEN_NOTNULL_ARRAY_AT(int16_t)
132 VARLEN_NOTNULL_ARRAY_AT(int32_t)
133 VARLEN_NOTNULL_ARRAY_AT(int64_t)
134 VARLEN_NOTNULL_ARRAY_AT(float)
135 VARLEN_NOTNULL_ARRAY_AT(double)
136 
137 #undef VARLEN_NOTNULL_ARRAY_AT
138 
139 #define ARRAY_ANY(type, needle_type, oper_name, oper) \
140  extern "C" DEVICE RUNTIME_EXPORT bool array_any_##oper_name##_##type##_##needle_type( \
141  int8_t* chunk_iter_, \
142  const uint64_t row_pos, \
143  const needle_type needle, \
144  const type null_val) { \
145  ChunkIter* chunk_iter = reinterpret_cast<ChunkIter*>(chunk_iter_); \
146  ArrayDatum ad; \
147  bool is_end; \
148  ChunkIter_get_nth(chunk_iter, row_pos, &ad, &is_end); \
149  const size_t elem_count = ad.length / sizeof(type); \
150  for (size_t i = 0; i < elem_count; ++i) { \
151  const needle_type val = reinterpret_cast<type*>(ad.pointer)[i]; \
152  if (val != null_val && val oper needle) { \
153  return true; \
154  } \
155  } \
156  return false; \
157  }
158 
159 #define ARRAY_ALL(type, needle_type, oper_name, oper) \
160  extern "C" DEVICE RUNTIME_EXPORT bool array_all_##oper_name##_##type##_##needle_type( \
161  int8_t* chunk_iter_, \
162  const uint64_t row_pos, \
163  const needle_type needle, \
164  const type null_val) { \
165  ChunkIter* chunk_iter = reinterpret_cast<ChunkIter*>(chunk_iter_); \
166  ArrayDatum ad; \
167  bool is_end; \
168  ChunkIter_get_nth(chunk_iter, row_pos, &ad, &is_end); \
169  const size_t elem_count = ad.length / sizeof(type); \
170  for (size_t i = 0; i < elem_count; ++i) { \
171  const needle_type val = reinterpret_cast<type*>(ad.pointer)[i]; \
172  if (!(val != null_val && val oper needle)) { \
173  return false; \
174  } \
175  } \
176  return true; \
177  }
178 
179 #define ARRAY_ALL_ANY_ALL_TYPES(oper_name, oper, needle_type) \
180  ARRAY_ANY(int8_t, needle_type, oper_name, oper) \
181  ARRAY_ALL(int8_t, needle_type, oper_name, oper) \
182  ARRAY_ANY(int16_t, needle_type, oper_name, oper) \
183  ARRAY_ALL(int16_t, needle_type, oper_name, oper) \
184  ARRAY_ANY(int32_t, needle_type, oper_name, oper) \
185  ARRAY_ALL(int32_t, needle_type, oper_name, oper) \
186  ARRAY_ANY(int64_t, needle_type, oper_name, oper) \
187  ARRAY_ALL(int64_t, needle_type, oper_name, oper) \
188  ARRAY_ANY(float, needle_type, oper_name, oper) \
189  ARRAY_ALL(float, needle_type, oper_name, oper) \
190  ARRAY_ANY(double, needle_type, oper_name, oper) \
191  ARRAY_ALL(double, needle_type, oper_name, oper)
192 
193 ARRAY_ALL_ANY_ALL_TYPES(eq, ==, int8_t)
194 ARRAY_ALL_ANY_ALL_TYPES(ne, !=, int8_t)
195 ARRAY_ALL_ANY_ALL_TYPES(lt, <, int8_t)
196 ARRAY_ALL_ANY_ALL_TYPES(le, <=, int8_t)
197 ARRAY_ALL_ANY_ALL_TYPES(gt, >, int8_t)
198 ARRAY_ALL_ANY_ALL_TYPES(ge, >=, int8_t)
199 
200 ARRAY_ALL_ANY_ALL_TYPES(eq, ==, int16_t)
201 ARRAY_ALL_ANY_ALL_TYPES(ne, !=, int16_t)
202 ARRAY_ALL_ANY_ALL_TYPES(lt, <, int16_t)
203 ARRAY_ALL_ANY_ALL_TYPES(le, <=, int16_t)
204 ARRAY_ALL_ANY_ALL_TYPES(gt, >, int16_t)
205 ARRAY_ALL_ANY_ALL_TYPES(ge, >=, int16_t)
206 
207 ARRAY_ALL_ANY_ALL_TYPES(eq, ==, int32_t)
208 ARRAY_ALL_ANY_ALL_TYPES(ne, !=, int32_t)
209 ARRAY_ALL_ANY_ALL_TYPES(lt, <, int32_t)
210 ARRAY_ALL_ANY_ALL_TYPES(le, <=, int32_t)
211 ARRAY_ALL_ANY_ALL_TYPES(gt, >, int32_t)
212 ARRAY_ALL_ANY_ALL_TYPES(ge, >=, int32_t)
213 
214 ARRAY_ALL_ANY_ALL_TYPES(eq, ==, int64_t)
215 ARRAY_ALL_ANY_ALL_TYPES(ne, !=, int64_t)
216 ARRAY_ALL_ANY_ALL_TYPES(lt, <, int64_t)
217 ARRAY_ALL_ANY_ALL_TYPES(le, <=, int64_t)
218 ARRAY_ALL_ANY_ALL_TYPES(gt, >, int64_t)
219 ARRAY_ALL_ANY_ALL_TYPES(ge, >=, int64_t)
220 
221 ARRAY_ALL_ANY_ALL_TYPES(eq, ==, float)
222 ARRAY_ALL_ANY_ALL_TYPES(ne, !=, float)
223 ARRAY_ALL_ANY_ALL_TYPES(lt, <, float)
224 ARRAY_ALL_ANY_ALL_TYPES(le, <=, float)
225 ARRAY_ALL_ANY_ALL_TYPES(gt, >, float)
226 ARRAY_ALL_ANY_ALL_TYPES(ge, >=, float)
227 
228 ARRAY_ALL_ANY_ALL_TYPES(eq, ==, double)
229 ARRAY_ALL_ANY_ALL_TYPES(ne, !=, double)
230 ARRAY_ALL_ANY_ALL_TYPES(lt, <, double)
231 ARRAY_ALL_ANY_ALL_TYPES(le, <=, double)
232 ARRAY_ALL_ANY_ALL_TYPES(gt, >, double)
233 ARRAY_ALL_ANY_ALL_TYPES(ge, >=, double)
234 
235 #undef ARRAY_ALL_ANY_ALL_TYPES
236 #undef ARRAY_ALL
237 #undef ARRAY_ANY
238 
239 #define ARRAY_AT_CHECKED(type) \
240  extern "C" DEVICE RUNTIME_EXPORT type array_at_##type##_checked( \
241  int8_t* chunk_iter_, \
242  const uint64_t row_pos, \
243  const int64_t elem_idx, \
244  const type null_val) { \
245  if (elem_idx <= 0) { \
246  return null_val; \
247  } \
248  ChunkIter* chunk_iter = reinterpret_cast<ChunkIter*>(chunk_iter_); \
249  ArrayDatum ad; \
250  bool is_end; \
251  ChunkIter_get_nth(chunk_iter, row_pos, &ad, &is_end); \
252  if (ad.is_null || static_cast<size_t>(elem_idx) > ad.length / sizeof(type)) { \
253  return null_val; \
254  } \
255  return reinterpret_cast<type*>(ad.pointer)[elem_idx - 1]; \
256  }
257 
258 ARRAY_AT_CHECKED(int8_t)
259 ARRAY_AT_CHECKED(int16_t)
260 ARRAY_AT_CHECKED(int32_t)
261 ARRAY_AT_CHECKED(int64_t)
262 ARRAY_AT_CHECKED(float)
263 ARRAY_AT_CHECKED(double)
264 
265 #undef ARRAY_AT_CHECKED
266 
267 extern "C" DEVICE RUNTIME_EXPORT int8_t* allocate_varlen_buffer(int64_t element_count,
268  int64_t element_size) {
269 #ifndef __CUDACC__
270  int8_t* varlen_buffer =
271  reinterpret_cast<int8_t*>(checked_malloc((element_count + 1) * element_size));
272  return varlen_buffer;
273 #else
274  return nullptr;
275 #endif
276 }
277 
278 extern "C" DEVICE RUNTIME_EXPORT ALWAYS_INLINE int32_t
279 fast_fixlen_array_size(int8_t* chunk_iter_, const uint32_t elem_log_sz) {
280  ChunkIter* it = reinterpret_cast<ChunkIter*>(chunk_iter_);
281  return it->skip_size >> elem_log_sz;
282 }
283 
284 extern "C" DEVICE RUNTIME_EXPORT ALWAYS_INLINE int8_t* fast_fixlen_array_buff(
285  int8_t* chunk_iter_,
286  const uint64_t row_pos) {
287  if (!chunk_iter_) {
288  return nullptr;
289  }
290  ChunkIter* it = reinterpret_cast<ChunkIter*>(chunk_iter_);
291  auto n = static_cast<int>(row_pos);
292  int8_t* current_pos = it->start_pos + n * it->skip_size;
293  return current_pos;
294 }
295 
296 extern "C" DEVICE RUNTIME_EXPORT ALWAYS_INLINE int64_t
297 determine_fixed_array_len(int8_t* chunk_iter, int64_t valid_len) {
298  return chunk_iter ? valid_len : 0;
299 }
300 
301 extern "C" DEVICE RUNTIME_EXPORT int8_t* array_buff(int8_t* chunk_iter_,
302  const uint64_t row_pos) {
303  if (!chunk_iter_) {
304  return nullptr;
305  }
306  ChunkIter* chunk_iter = reinterpret_cast<ChunkIter*>(chunk_iter_);
307  ArrayDatum ad;
308  bool is_end;
309  ChunkIter_get_nth(chunk_iter, row_pos, &ad, &is_end);
310  return ad.pointer;
311 }
312 
313 #ifndef __CUDACC__
314 
315 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int64_t elem_bitcast_int8_t(const int8_t val) {
316  return val;
317 }
318 
319 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int64_t elem_bitcast_int16_t(const int16_t val) {
320  return val;
321 }
322 
323 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int64_t elem_bitcast_int32_t(const int32_t val) {
324  return val;
325 }
326 
327 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int64_t elem_bitcast_int64_t(const int64_t val) {
328  return val;
329 }
330 
331 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int64_t elem_bitcast_float(const float val) {
332  const double dval{val};
333  return *reinterpret_cast<const int64_t*>(may_alias_ptr(&dval));
334 }
335 
336 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int64_t elem_bitcast_double(const double val) {
337  return *reinterpret_cast<const int64_t*>(may_alias_ptr(&val));
338 }
339 
340 #define COUNT_DISTINCT_ARRAY(type) \
341  extern "C" RUNTIME_EXPORT void agg_count_distinct_array_##type( \
342  int64_t* agg, int8_t* chunk_iter_, const uint64_t row_pos, const type null_val) { \
343  ChunkIter* chunk_iter = reinterpret_cast<ChunkIter*>(chunk_iter_); \
344  ArrayDatum ad; \
345  bool is_end; \
346  ChunkIter_get_nth(chunk_iter, row_pos, &ad, &is_end); \
347  const size_t elem_count{ad.length / sizeof(type)}; \
348  for (size_t i = 0; i < elem_count; ++i) { \
349  const auto val = reinterpret_cast<type*>(ad.pointer)[i]; \
350  if (val != null_val) { \
351  reinterpret_cast<CountDistinctSet*>(*agg)->insert(elem_bitcast_##type(val)); \
352  } \
353  } \
354  }
355 
356 COUNT_DISTINCT_ARRAY(int8_t)
357 COUNT_DISTINCT_ARRAY(int16_t)
358 COUNT_DISTINCT_ARRAY(int32_t)
359 COUNT_DISTINCT_ARRAY(int64_t)
360 COUNT_DISTINCT_ARRAY(float)
361 COUNT_DISTINCT_ARRAY(double)
362 
363 #undef COUNT_DISTINCT_ARRAY
364 
365 #include <functional>
366 #include <string_view>
367 
368 extern "C" RUNTIME_EXPORT StringView string_decompress(const int32_t string_id,
369  const int64_t string_dict_handle);
370 
371 template <typename T>
372 bool array_any(int8_t* const chunk_iter_i8,
373  uint64_t const row_pos,
374  std::string_view const needle_str,
375  int64_t const string_dict_handle,
376  T const null_val,
377  std::function<bool(std::string_view, std::string_view)> const cmp) {
378  ChunkIter* const chunk_iter = reinterpret_cast<ChunkIter*>(chunk_iter_i8);
379  ArrayDatum ad;
380  bool is_end;
381  ChunkIter_get_nth(chunk_iter, row_pos, &ad, &is_end);
382  size_t const elem_count = ad.length / sizeof(T);
383  for (size_t i = 0; i < elem_count; ++i) {
384  T const val = reinterpret_cast<T*>(ad.pointer)[i];
385  if (val != null_val) {
386  StringView const sv = string_decompress(val, string_dict_handle);
387  if (cmp(sv.stringView(), needle_str)) {
388  return true;
389  }
390  }
391  }
392  return false;
393 }
394 
395 template <typename T>
396 bool array_all(int8_t* const chunk_iter_i8,
397  uint64_t const row_pos,
398  std::string_view const needle_str,
399  int64_t const string_dict_handle,
400  T const null_val,
401  std::function<bool(std::string_view, std::string_view)> const cmp) {
402  ChunkIter* const chunk_iter = reinterpret_cast<ChunkIter*>(chunk_iter_i8);
403  ArrayDatum ad;
404  bool is_end;
405  ChunkIter_get_nth(chunk_iter, row_pos, &ad, &is_end);
406  size_t const elem_count = ad.length / sizeof(T);
407  for (size_t i = 0; i < elem_count; ++i) {
408  T const val = reinterpret_cast<T*>(ad.pointer)[i];
409  if (val == null_val) {
410  return false;
411  }
412  StringView const sv = string_decompress(val, string_dict_handle);
413  if (!cmp(sv.stringView(), needle_str)) {
414  return false;
415  }
416  }
417  return true;
418 }
419 
420 #define ARRAY_STR_ANY(type, oper_name, oper) \
421  extern "C" RUNTIME_EXPORT bool array_any_##oper_name##_str_##type( \
422  int8_t* const chunk_iter_i8, \
423  uint64_t const row_pos, \
424  char const* const needle_ptr, \
425  uint32_t const needle_len, \
426  int64_t const string_dict_handle, \
427  type const null_val) { \
428  return array_any(chunk_iter_i8, \
429  row_pos, \
430  std::string_view{needle_ptr, needle_len}, \
431  string_dict_handle, \
432  null_val, \
433  std::oper<std::string_view>{}); \
434  }
435 
436 #define ARRAY_STR_ALL(type, oper_name, oper) \
437  extern "C" RUNTIME_EXPORT bool array_all_##oper_name##_str_##type( \
438  int8_t* const chunk_iter_i8, \
439  uint64_t const row_pos, \
440  char const* const needle_ptr, \
441  uint32_t const needle_len, \
442  int64_t const string_dict_handle, \
443  type const null_val) { \
444  return array_all(chunk_iter_i8, \
445  row_pos, \
446  std::string_view{needle_ptr, needle_len}, \
447  string_dict_handle, \
448  null_val, \
449  std::oper<std::string_view>{}); \
450  }
451 
452 #define ARRAY_STR_ALL_ANY_ALL_TYPES(oper_name, oper) \
453  ARRAY_STR_ANY(int8_t, oper_name, oper) \
454  ARRAY_STR_ALL(int8_t, oper_name, oper) \
455  ARRAY_STR_ANY(int16_t, oper_name, oper) \
456  ARRAY_STR_ALL(int16_t, oper_name, oper) \
457  ARRAY_STR_ANY(int32_t, oper_name, oper) \
458  ARRAY_STR_ALL(int32_t, oper_name, oper) \
459  ARRAY_STR_ANY(int64_t, oper_name, oper) \
460  ARRAY_STR_ALL(int64_t, oper_name, oper)
461 
462 ARRAY_STR_ALL_ANY_ALL_TYPES(eq, equal_to)
463 ARRAY_STR_ALL_ANY_ALL_TYPES(ne, not_equal_to)
464 ARRAY_STR_ALL_ANY_ALL_TYPES(lt, less)
465 ARRAY_STR_ALL_ANY_ALL_TYPES(le, less_equal)
466 ARRAY_STR_ALL_ANY_ALL_TYPES(gt, greater)
467 ARRAY_STR_ALL_ANY_ALL_TYPES(ge, greater_equal)
468 
469 #undef ARRAY_ALL_ANY_ALL_TYPES
470 #undef ARRAY_STR_ALL
471 #undef ARRAY_STR_ANY
472 
473 #endif
474 
475 #endif // EXECUTE_INCLUDE
int8_t * start_pos
Definition: ChunkIter.h:34
DEVICE void ChunkIter_get_nth_point_coords(ChunkIter *it, int n, ArrayDatum *result, bool *is_end)
Definition: ChunkIter.cpp:321
DEVICE void ChunkIter_get_nth(ChunkIter *it, int n, bool uncompress, VarlenDatum *result, bool *is_end)
Definition: ChunkIter.cpp:182
#define DEVICE
std::conditional_t< is_cuda_compiler(), DeviceArrayDatum, HostArrayDatum > ArrayDatum
Definition: sqltypes.h:229
EXTENSION_NOINLINE int8_t * allocate_varlen_buffer(int64_t element_count, int64_t element_size)
void * checked_malloc(const size_t size)
Definition: checked_alloc.h:45
std::string_view stringView() const
Definition: Datum.h:44
#define RUNTIME_EXPORT
int skip_size
Definition: ChunkIter.h:37
constexpr double n
Definition: Utm.h:38
#define ALWAYS_INLINE
RUNTIME_EXPORT StringView string_decompress(const int32_t string_id, const int64_t string_dict_handle)
Definition: StringOpsIR.cpp:38