OmniSciDB  04ee39c94c
ArrayOps.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2017 MapD Technologies, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
25 #include <cstdint>
26 #include "../Shared/funcannotations.h"
27 #include "../Utils/ChunkIter.h"
28 #include "TypePunning.h"
29 
30 #ifdef EXECUTE_INCLUDE
31 
32 extern "C" DEVICE int32_t array_size(int8_t* chunk_iter_,
33  const uint64_t row_pos,
34  const uint32_t elem_log_sz) {
35  ChunkIter* chunk_iter = reinterpret_cast<ChunkIter*>(chunk_iter_);
36  ArrayDatum ad;
37  bool is_end;
38  ChunkIter_get_nth(chunk_iter, row_pos, &ad, &is_end);
39  return ad.is_null ? 0 : ad.length >> elem_log_sz;
40 }
41 
42 extern "C" DEVICE int32_t array_size_nullable(int8_t* chunk_iter_,
43  const uint64_t row_pos,
44  const uint32_t elem_log_sz,
45  const int32_t null_val) {
46  ChunkIter* chunk_iter = reinterpret_cast<ChunkIter*>(chunk_iter_);
47  ArrayDatum ad;
48  bool is_end;
49  ChunkIter_get_nth(chunk_iter, row_pos, &ad, &is_end);
50  return ad.is_null ? null_val : ad.length >> elem_log_sz;
51 }
52 
53 extern "C" DEVICE bool array_is_null(int8_t* chunk_iter_, const uint64_t row_pos) {
54  ChunkIter* chunk_iter = reinterpret_cast<ChunkIter*>(chunk_iter_);
55  ArrayDatum ad;
56  bool is_end;
57  ChunkIter_get_nth(chunk_iter, row_pos, &ad, &is_end);
58  return ad.is_null;
59 }
60 
61 #define ARRAY_AT(type) \
62  extern "C" DEVICE type array_at_##type( \
63  int8_t* chunk_iter_, const uint64_t row_pos, const uint32_t elem_idx) { \
64  ChunkIter* chunk_iter = reinterpret_cast<ChunkIter*>(chunk_iter_); \
65  ArrayDatum ad; \
66  bool is_end; \
67  ChunkIter_get_nth(chunk_iter, row_pos, &ad, &is_end); \
68  return reinterpret_cast<type*>(ad.pointer)[elem_idx]; \
69  }
70 
71 ARRAY_AT(int8_t)
72 ARRAY_AT(int16_t)
73 ARRAY_AT(int32_t)
74 ARRAY_AT(int64_t)
75 ARRAY_AT(float)
76 ARRAY_AT(double)
77 
78 #undef ARRAY_AT
79 
80 #define VARLEN_ARRAY_AT(type) \
81  extern "C" DEVICE type varlen_array_at_##type( \
82  int8_t* chunk_iter_, const uint64_t row_pos, const uint32_t elem_idx) { \
83  ChunkIter* chunk_iter = reinterpret_cast<ChunkIter*>(chunk_iter_); \
84  ArrayDatum ad; \
85  bool is_end; \
86  ChunkIter_get_nth_varlen(chunk_iter, row_pos, &ad, &is_end); \
87  return reinterpret_cast<type*>(ad.pointer)[elem_idx]; \
88  }
89 
90 VARLEN_ARRAY_AT(int8_t)
91 VARLEN_ARRAY_AT(int16_t)
92 VARLEN_ARRAY_AT(int32_t)
93 VARLEN_ARRAY_AT(int64_t)
94 VARLEN_ARRAY_AT(float)
95 VARLEN_ARRAY_AT(double)
96 
97 #undef VARLEN_ARRAY_AT
98 
99 #define VARLEN_NOTNULL_ARRAY_AT(type) \
100  extern "C" DEVICE type varlen_notnull_array_at_##type( \
101  int8_t* chunk_iter_, const uint64_t row_pos, const uint32_t elem_idx) { \
102  ChunkIter* chunk_iter = reinterpret_cast<ChunkIter*>(chunk_iter_); \
103  ArrayDatum ad; \
104  bool is_end; \
105  ChunkIter_get_nth_varlen_notnull(chunk_iter, row_pos, &ad, &is_end); \
106  return reinterpret_cast<type*>(ad.pointer)[elem_idx]; \
107  }
108 
109 VARLEN_NOTNULL_ARRAY_AT(int8_t)
110 VARLEN_NOTNULL_ARRAY_AT(int16_t)
111 VARLEN_NOTNULL_ARRAY_AT(int32_t)
112 VARLEN_NOTNULL_ARRAY_AT(int64_t)
113 VARLEN_NOTNULL_ARRAY_AT(float)
114 VARLEN_NOTNULL_ARRAY_AT(double)
115 
116 #undef VARLEN_NOTNULL_ARRAY_AT
117 
118 #define ARRAY_ANY(type, needle_type, oper_name, oper) \
119  extern "C" DEVICE bool array_any_##oper_name##_##type##_##needle_type( \
120  int8_t* chunk_iter_, \
121  const uint64_t row_pos, \
122  const needle_type needle, \
123  const type null_val) { \
124  ChunkIter* chunk_iter = reinterpret_cast<ChunkIter*>(chunk_iter_); \
125  ArrayDatum ad; \
126  bool is_end; \
127  ChunkIter_get_nth(chunk_iter, row_pos, &ad, &is_end); \
128  const size_t elem_count = ad.length / sizeof(type); \
129  for (size_t i = 0; i < elem_count; ++i) { \
130  const needle_type val = reinterpret_cast<type*>(ad.pointer)[i]; \
131  if (val != null_val && val oper needle) { \
132  return true; \
133  } \
134  } \
135  return false; \
136  }
137 
138 #define ARRAY_ALL(type, needle_type, oper_name, oper) \
139  extern "C" DEVICE bool array_all_##oper_name##_##type##_##needle_type( \
140  int8_t* chunk_iter_, \
141  const uint64_t row_pos, \
142  const needle_type needle, \
143  const type null_val) { \
144  ChunkIter* chunk_iter = reinterpret_cast<ChunkIter*>(chunk_iter_); \
145  ArrayDatum ad; \
146  bool is_end; \
147  ChunkIter_get_nth(chunk_iter, row_pos, &ad, &is_end); \
148  const size_t elem_count = ad.length / sizeof(type); \
149  for (size_t i = 0; i < elem_count; ++i) { \
150  const needle_type val = reinterpret_cast<type*>(ad.pointer)[i]; \
151  if (!(val != null_val && val oper needle)) { \
152  return false; \
153  } \
154  } \
155  return true; \
156  }
157 
158 #define ARRAY_ALL_ANY_ALL_TYPES(oper_name, oper, needle_type) \
159  ARRAY_ANY(int8_t, needle_type, oper_name, oper) \
160  ARRAY_ALL(int8_t, needle_type, oper_name, oper) \
161  ARRAY_ANY(int16_t, needle_type, oper_name, oper) \
162  ARRAY_ALL(int16_t, needle_type, oper_name, oper) \
163  ARRAY_ANY(int32_t, needle_type, oper_name, oper) \
164  ARRAY_ALL(int32_t, needle_type, oper_name, oper) \
165  ARRAY_ANY(int64_t, needle_type, oper_name, oper) \
166  ARRAY_ALL(int64_t, needle_type, oper_name, oper) \
167  ARRAY_ANY(float, needle_type, oper_name, oper) \
168  ARRAY_ALL(float, needle_type, oper_name, oper) \
169  ARRAY_ANY(double, needle_type, oper_name, oper) \
170  ARRAY_ALL(double, needle_type, oper_name, oper)
171 
172 ARRAY_ALL_ANY_ALL_TYPES(eq, ==, int8_t)
173 ARRAY_ALL_ANY_ALL_TYPES(ne, !=, int8_t)
174 ARRAY_ALL_ANY_ALL_TYPES(lt, <, int8_t)
175 ARRAY_ALL_ANY_ALL_TYPES(le, <=, int8_t)
176 ARRAY_ALL_ANY_ALL_TYPES(gt, >, int8_t)
177 ARRAY_ALL_ANY_ALL_TYPES(ge, >=, int8_t)
178 
179 ARRAY_ALL_ANY_ALL_TYPES(eq, ==, int16_t)
180 ARRAY_ALL_ANY_ALL_TYPES(ne, !=, int16_t)
181 ARRAY_ALL_ANY_ALL_TYPES(lt, <, int16_t)
182 ARRAY_ALL_ANY_ALL_TYPES(le, <=, int16_t)
183 ARRAY_ALL_ANY_ALL_TYPES(gt, >, int16_t)
184 ARRAY_ALL_ANY_ALL_TYPES(ge, >=, int16_t)
185 
186 ARRAY_ALL_ANY_ALL_TYPES(eq, ==, int32_t)
187 ARRAY_ALL_ANY_ALL_TYPES(ne, !=, int32_t)
188 ARRAY_ALL_ANY_ALL_TYPES(lt, <, int32_t)
189 ARRAY_ALL_ANY_ALL_TYPES(le, <=, int32_t)
190 ARRAY_ALL_ANY_ALL_TYPES(gt, >, int32_t)
191 ARRAY_ALL_ANY_ALL_TYPES(ge, >=, int32_t)
192 
193 ARRAY_ALL_ANY_ALL_TYPES(eq, ==, int64_t)
194 ARRAY_ALL_ANY_ALL_TYPES(ne, !=, int64_t)
195 ARRAY_ALL_ANY_ALL_TYPES(lt, <, int64_t)
196 ARRAY_ALL_ANY_ALL_TYPES(le, <=, int64_t)
197 ARRAY_ALL_ANY_ALL_TYPES(gt, >, int64_t)
198 ARRAY_ALL_ANY_ALL_TYPES(ge, >=, int64_t)
199 
200 ARRAY_ALL_ANY_ALL_TYPES(eq, ==, float)
201 ARRAY_ALL_ANY_ALL_TYPES(ne, !=, float)
202 ARRAY_ALL_ANY_ALL_TYPES(lt, <, float)
203 ARRAY_ALL_ANY_ALL_TYPES(le, <=, float)
204 ARRAY_ALL_ANY_ALL_TYPES(gt, >, float)
205 ARRAY_ALL_ANY_ALL_TYPES(ge, >=, float)
206 
207 ARRAY_ALL_ANY_ALL_TYPES(eq, ==, double)
208 ARRAY_ALL_ANY_ALL_TYPES(ne, !=, double)
209 ARRAY_ALL_ANY_ALL_TYPES(lt, <, double)
210 ARRAY_ALL_ANY_ALL_TYPES(le, <=, double)
211 ARRAY_ALL_ANY_ALL_TYPES(gt, >, double)
212 ARRAY_ALL_ANY_ALL_TYPES(ge, >=, double)
213 
214 #undef ARRAY_ALL_ANY_ALL_TYPES
215 #undef ARRAY_ALL
216 #undef ARRAY_ANY
217 
218 #define ARRAY_AT_CHECKED(type) \
219  extern "C" DEVICE type array_at_##type##_checked(int8_t* chunk_iter_, \
220  const uint64_t row_pos, \
221  const int64_t elem_idx, \
222  const type null_val) { \
223  if (elem_idx <= 0) { \
224  return null_val; \
225  } \
226  ChunkIter* chunk_iter = reinterpret_cast<ChunkIter*>(chunk_iter_); \
227  ArrayDatum ad; \
228  bool is_end; \
229  ChunkIter_get_nth(chunk_iter, row_pos, &ad, &is_end); \
230  if (ad.is_null || static_cast<size_t>(elem_idx) > ad.length / sizeof(type)) { \
231  return null_val; \
232  } \
233  return reinterpret_cast<type*>(ad.pointer)[elem_idx - 1]; \
234  }
235 
236 ARRAY_AT_CHECKED(int8_t)
237 ARRAY_AT_CHECKED(int16_t)
238 ARRAY_AT_CHECKED(int32_t)
239 ARRAY_AT_CHECKED(int64_t)
240 ARRAY_AT_CHECKED(float)
241 ARRAY_AT_CHECKED(double)
242 
243 #undef ARRAY_AT_CHECKED
244 
245 extern "C" DEVICE int8_t* allocate_varlen_buffer(int64_t element_count,
246  int64_t element_size) {
247 #ifndef __CUDACC__
248  int8_t* varlen_buffer =
249  reinterpret_cast<int8_t*>(checked_malloc((element_count + 1) * element_size));
250  return varlen_buffer;
251 #else
252  return nullptr;
253 #endif
254 }
255 
256 extern "C" DEVICE ALWAYS_INLINE int32_t
257 fast_fixlen_array_size(int8_t* chunk_iter_, const uint32_t elem_log_sz) {
258  ChunkIter* it = reinterpret_cast<ChunkIter*>(chunk_iter_);
259  return it->skip_size >> elem_log_sz;
260 }
261 
262 extern "C" DEVICE ALWAYS_INLINE int8_t* fast_fixlen_array_buff(int8_t* chunk_iter_,
263  const uint64_t row_pos) {
264  ChunkIter* it = reinterpret_cast<ChunkIter*>(chunk_iter_);
265  auto n = static_cast<int>(row_pos);
266  int8_t* current_pos = it->start_pos + n * it->skip_size;
267  return current_pos;
268 }
269 
270 extern "C" DEVICE int8_t* array_buff(int8_t* chunk_iter_, const uint64_t row_pos) {
271  ChunkIter* chunk_iter = reinterpret_cast<ChunkIter*>(chunk_iter_);
272  ArrayDatum ad;
273  bool is_end;
274  ChunkIter_get_nth(chunk_iter, row_pos, &ad, &is_end);
275  return ad.pointer;
276 }
277 
278 #ifndef __CUDACC__
279 
280 #include <set>
281 
282 extern "C" ALWAYS_INLINE int64_t elem_bitcast_int8_t(const int8_t val) {
283  return val;
284 }
285 
286 extern "C" ALWAYS_INLINE int64_t elem_bitcast_int16_t(const int16_t val) {
287  return val;
288 }
289 
290 extern "C" ALWAYS_INLINE int64_t elem_bitcast_int32_t(const int32_t val) {
291  return val;
292 }
293 
294 extern "C" ALWAYS_INLINE int64_t elem_bitcast_int64_t(const int64_t val) {
295  return val;
296 }
297 
298 extern "C" ALWAYS_INLINE int64_t elem_bitcast_float(const float val) {
299  const double dval{val};
300  return *reinterpret_cast<const int64_t*>(may_alias_ptr(&dval));
301 }
302 
303 extern "C" ALWAYS_INLINE int64_t elem_bitcast_double(const double val) {
304  return *reinterpret_cast<const int64_t*>(may_alias_ptr(&val));
305 }
306 
307 #define COUNT_DISTINCT_ARRAY(type) \
308  extern "C" void agg_count_distinct_array_##type( \
309  int64_t* agg, int8_t* chunk_iter_, const uint64_t row_pos, const type null_val) { \
310  ChunkIter* chunk_iter = reinterpret_cast<ChunkIter*>(chunk_iter_); \
311  ArrayDatum ad; \
312  bool is_end; \
313  ChunkIter_get_nth(chunk_iter, row_pos, &ad, &is_end); \
314  const size_t elem_count{ad.length / sizeof(type)}; \
315  for (size_t i = 0; i < elem_count; ++i) { \
316  const auto val = reinterpret_cast<type*>(ad.pointer)[i]; \
317  if (val != null_val) { \
318  reinterpret_cast<std::set<int64_t>*>(*agg)->insert(elem_bitcast_##type(val)); \
319  } \
320  } \
321  }
322 
323 COUNT_DISTINCT_ARRAY(int8_t)
324 COUNT_DISTINCT_ARRAY(int16_t)
325 COUNT_DISTINCT_ARRAY(int32_t)
326 COUNT_DISTINCT_ARRAY(int64_t)
327 COUNT_DISTINCT_ARRAY(float)
328 COUNT_DISTINCT_ARRAY(double)
329 
330 #undef COUNT_DISTINCT_ARRAY
331 
332 #include <string>
333 
334 extern "C" uint64_t string_decompress(const int32_t string_id,
335  const int64_t string_dict_handle);
336 
337 #define ARRAY_STR_ANY(type, oper_name, oper) \
338  extern "C" bool array_any_##oper_name##_str_##type(int8_t* chunk_iter_, \
339  const uint64_t row_pos, \
340  const char* needle_ptr, \
341  const uint32_t needle_len, \
342  const int64_t string_dict_handle, \
343  const type null_val) { \
344  ChunkIter* chunk_iter = reinterpret_cast<ChunkIter*>(chunk_iter_); \
345  ArrayDatum ad; \
346  bool is_end; \
347  ChunkIter_get_nth(chunk_iter, row_pos, &ad, &is_end); \
348  const size_t elem_count = ad.length / sizeof(type); \
349  std::string needle_str(needle_ptr, needle_len); \
350  for (size_t i = 0; i < elem_count; ++i) { \
351  const type val = reinterpret_cast<type*>(ad.pointer)[i]; \
352  if (val != null_val) { \
353  uint64_t str_and_len = string_decompress(val, string_dict_handle); \
354  const char* str = reinterpret_cast<const char*>(str_and_len & 0xffffffffffff); \
355  const uint16_t len = str_and_len >> 48; \
356  std::string val_str(str, len); \
357  if (val_str oper needle_str) { \
358  return true; \
359  } \
360  } \
361  } \
362  return false; \
363  }
364 
365 #define ARRAY_STR_ALL(type, oper_name, oper) \
366  extern "C" bool array_all_##oper_name##_str_##type(int8_t* chunk_iter_, \
367  const uint64_t row_pos, \
368  const char* needle_ptr, \
369  const uint32_t needle_len, \
370  const int64_t string_dict_handle, \
371  const type null_val) { \
372  ChunkIter* chunk_iter = reinterpret_cast<ChunkIter*>(chunk_iter_); \
373  ArrayDatum ad; \
374  bool is_end; \
375  ChunkIter_get_nth(chunk_iter, row_pos, &ad, &is_end); \
376  const size_t elem_count = ad.length / sizeof(type); \
377  std::string needle_str(needle_ptr, needle_len); \
378  for (size_t i = 0; i < elem_count; ++i) { \
379  const type val = reinterpret_cast<type*>(ad.pointer)[i]; \
380  if (val == null_val) { \
381  return false; \
382  } \
383  uint64_t str_and_len = string_decompress(val, string_dict_handle); \
384  const char* str = reinterpret_cast<const char*>(str_and_len & 0xffffffffffff); \
385  const uint16_t len = str_and_len >> 48; \
386  std::string val_str(str, len); \
387  if (!(val_str oper needle_str)) { \
388  return false; \
389  } \
390  } \
391  return true; \
392  }
393 
394 #define ARRAY_STR_ALL_ANY_ALL_TYPES(oper_name, oper) \
395  ARRAY_STR_ANY(int8_t, oper_name, oper) \
396  ARRAY_STR_ALL(int8_t, oper_name, oper) \
397  ARRAY_STR_ANY(int16_t, oper_name, oper) \
398  ARRAY_STR_ALL(int16_t, oper_name, oper) \
399  ARRAY_STR_ANY(int32_t, oper_name, oper) \
400  ARRAY_STR_ALL(int32_t, oper_name, oper) \
401  ARRAY_STR_ANY(int64_t, oper_name, oper) \
402  ARRAY_STR_ALL(int64_t, oper_name, oper)
403 
404 ARRAY_STR_ALL_ANY_ALL_TYPES(eq, ==)
405 ARRAY_STR_ALL_ANY_ALL_TYPES(ne, !=)
406 ARRAY_STR_ALL_ANY_ALL_TYPES(lt, <)
407 ARRAY_STR_ALL_ANY_ALL_TYPES(le, <=)
408 ARRAY_STR_ALL_ANY_ALL_TYPES(gt, >)
409 ARRAY_STR_ALL_ANY_ALL_TYPES(ge, >=)
410 
411 #undef ARRAY_ALL_ANY_ALL_TYPES
412 #undef ARRAY_STR_ALL
413 #undef ARRAY_STR_ANY
414 
415 #endif
416 
417 #endif // EXECUTE_INCLUDE
int8_t * start_pos
Definition: ChunkIter.h:33
DEVICE void ChunkIter_get_nth(ChunkIter *it, int n, bool uncompress, VarlenDatum *result, bool *is_end)
Definition: ChunkIter.cpp:181
#define DEVICE
void * checked_malloc(const size_t size)
Definition: checked_alloc.h:40
int skip_size
Definition: ChunkIter.h:36
uint64_t string_decompress(const int32_t string_id, const int64_t string_dict_handle)
Definition: StringOpsIR.cpp:34
#define ALWAYS_INLINE
std::conditional_t< isCudaCC(), DeviceArrayDatum, HostArrayDatum > ArrayDatum
Definition: sqltypes.h:119