OmniSciDB  ab4938a6a3
RuntimeFunctions.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2017 MapD Technologies, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifdef __CUDACC__
18 #error This code is not intended to be compiled with a CUDA C++ compiler
19 #endif // __CUDACC__
20 
21 #include "RuntimeFunctions.h"
22 #include "../Shared/funcannotations.h"
23 #include "BufferCompaction.h"
24 #include "HyperLogLogRank.h"
25 #include "MurmurHash.h"
26 #include "TypePunning.h"
27 
28 #include <algorithm>
29 #include <atomic>
30 #include <chrono>
31 #include <cmath>
32 #include <cstring>
33 #include <thread>
34 #include <tuple>
35 
36 // decoder implementations
37 
38 #include "DecodersImpl.h"
39 
40 // arithmetic operator implementations
41 
42 #define DEF_ARITH_NULLABLE(type, null_type, opname, opsym) \
43  extern "C" ALWAYS_INLINE type opname##_##type##_nullable( \
44  const type lhs, const type rhs, const null_type null_val) { \
45  if (lhs != null_val && rhs != null_val) { \
46  return lhs opsym rhs; \
47  } \
48  return null_val; \
49  }
50 
51 #define DEF_ARITH_NULLABLE_LHS(type, null_type, opname, opsym) \
52  extern "C" ALWAYS_INLINE type opname##_##type##_nullable_lhs( \
53  const type lhs, const type rhs, const null_type null_val) { \
54  if (lhs != null_val) { \
55  return lhs opsym rhs; \
56  } \
57  return null_val; \
58  }
59 
60 #define DEF_ARITH_NULLABLE_RHS(type, null_type, opname, opsym) \
61  extern "C" ALWAYS_INLINE type opname##_##type##_nullable_rhs( \
62  const type lhs, const type rhs, const null_type null_val) { \
63  if (rhs != null_val) { \
64  return lhs opsym rhs; \
65  } \
66  return null_val; \
67  }
68 
69 #define DEF_CMP_NULLABLE(type, null_type, opname, opsym) \
70  extern "C" ALWAYS_INLINE int8_t opname##_##type##_nullable( \
71  const type lhs, \
72  const type rhs, \
73  const null_type null_val, \
74  const int8_t null_bool_val) { \
75  if (lhs != null_val && rhs != null_val) { \
76  return lhs opsym rhs; \
77  } \
78  return null_bool_val; \
79  }
80 
81 #define DEF_CMP_NULLABLE_LHS(type, null_type, opname, opsym) \
82  extern "C" ALWAYS_INLINE int8_t opname##_##type##_nullable_lhs( \
83  const type lhs, \
84  const type rhs, \
85  const null_type null_val, \
86  const int8_t null_bool_val) { \
87  if (lhs != null_val) { \
88  return lhs opsym rhs; \
89  } \
90  return null_bool_val; \
91  }
92 
93 #define DEF_CMP_NULLABLE_RHS(type, null_type, opname, opsym) \
94  extern "C" ALWAYS_INLINE int8_t opname##_##type##_nullable_rhs( \
95  const type lhs, \
96  const type rhs, \
97  const null_type null_val, \
98  const int8_t null_bool_val) { \
99  if (rhs != null_val) { \
100  return lhs opsym rhs; \
101  } \
102  return null_bool_val; \
103  }
104 
105 #define DEF_SAFE_DIV_NULLABLE(type, null_type, opname) \
106  extern "C" ALWAYS_INLINE type safe_div_##type( \
107  const type lhs, const type rhs, const null_type null_val) { \
108  if (lhs != null_val && rhs != null_val && rhs != 0) { \
109  return lhs / rhs; \
110  } \
111  return null_val; \
112  }
113 
114 #define DEF_BINARY_NULLABLE_ALL_OPS(type, null_type) \
115  DEF_ARITH_NULLABLE(type, null_type, add, +) \
116  DEF_ARITH_NULLABLE(type, null_type, sub, -) \
117  DEF_ARITH_NULLABLE(type, null_type, mul, *) \
118  DEF_ARITH_NULLABLE(type, null_type, div, /) \
119  DEF_SAFE_DIV_NULLABLE(type, null_type, safe_div) \
120  DEF_ARITH_NULLABLE_LHS(type, null_type, add, +) \
121  DEF_ARITH_NULLABLE_LHS(type, null_type, sub, -) \
122  DEF_ARITH_NULLABLE_LHS(type, null_type, mul, *) \
123  DEF_ARITH_NULLABLE_LHS(type, null_type, div, /) \
124  DEF_ARITH_NULLABLE_RHS(type, null_type, add, +) \
125  DEF_ARITH_NULLABLE_RHS(type, null_type, sub, -) \
126  DEF_ARITH_NULLABLE_RHS(type, null_type, mul, *) \
127  DEF_ARITH_NULLABLE_RHS(type, null_type, div, /) \
128  DEF_CMP_NULLABLE(type, null_type, eq, ==) \
129  DEF_CMP_NULLABLE(type, null_type, ne, !=) \
130  DEF_CMP_NULLABLE(type, null_type, lt, <) \
131  DEF_CMP_NULLABLE(type, null_type, gt, >) \
132  DEF_CMP_NULLABLE(type, null_type, le, <=) \
133  DEF_CMP_NULLABLE(type, null_type, ge, >=) \
134  DEF_CMP_NULLABLE_LHS(type, null_type, eq, ==) \
135  DEF_CMP_NULLABLE_LHS(type, null_type, ne, !=) \
136  DEF_CMP_NULLABLE_LHS(type, null_type, lt, <) \
137  DEF_CMP_NULLABLE_LHS(type, null_type, gt, >) \
138  DEF_CMP_NULLABLE_LHS(type, null_type, le, <=) \
139  DEF_CMP_NULLABLE_LHS(type, null_type, ge, >=) \
140  DEF_CMP_NULLABLE_RHS(type, null_type, eq, ==) \
141  DEF_CMP_NULLABLE_RHS(type, null_type, ne, !=) \
142  DEF_CMP_NULLABLE_RHS(type, null_type, lt, <) \
143  DEF_CMP_NULLABLE_RHS(type, null_type, gt, >) \
144  DEF_CMP_NULLABLE_RHS(type, null_type, le, <=) \
145  DEF_CMP_NULLABLE_RHS(type, null_type, ge, >=)
146 
147 DEF_BINARY_NULLABLE_ALL_OPS(int8_t, int64_t)
148 DEF_BINARY_NULLABLE_ALL_OPS(int16_t, int64_t)
149 DEF_BINARY_NULLABLE_ALL_OPS(int32_t, int64_t)
150 DEF_BINARY_NULLABLE_ALL_OPS(int64_t, int64_t)
151 DEF_BINARY_NULLABLE_ALL_OPS(float, float)
152 DEF_BINARY_NULLABLE_ALL_OPS(double, double)
153 DEF_ARITH_NULLABLE(int8_t, int64_t, mod, %)
154 DEF_ARITH_NULLABLE(int16_t, int64_t, mod, %)
155 DEF_ARITH_NULLABLE(int32_t, int64_t, mod, %)
156 DEF_ARITH_NULLABLE(int64_t, int64_t, mod, %)
157 DEF_ARITH_NULLABLE_LHS(int8_t, int64_t, mod, %)
158 DEF_ARITH_NULLABLE_LHS(int16_t, int64_t, mod, %)
159 DEF_ARITH_NULLABLE_LHS(int32_t, int64_t, mod, %)
160 DEF_ARITH_NULLABLE_LHS(int64_t, int64_t, mod, %)
161 DEF_ARITH_NULLABLE_RHS(int8_t, int64_t, mod, %)
162 DEF_ARITH_NULLABLE_RHS(int16_t, int64_t, mod, %)
163 DEF_ARITH_NULLABLE_RHS(int32_t, int64_t, mod, %)
164 DEF_ARITH_NULLABLE_RHS(int64_t, int64_t, mod, %)
165 
166 #undef DEF_BINARY_NULLABLE_ALL_OPS
167 #undef DEF_SAFE_DIV_NULLABLE
168 #undef DEF_CMP_NULLABLE_RHS
169 #undef DEF_CMP_NULLABLE_LHS
170 #undef DEF_CMP_NULLABLE
171 #undef DEF_ARITH_NULLABLE_RHS
172 #undef DEF_ARITH_NULLABLE_LHS
173 #undef DEF_ARITH_NULLABLE
174 
175 extern "C" ALWAYS_INLINE int64_t scale_decimal_up(const int64_t operand,
176  const uint64_t scale,
177  const int64_t operand_null_val,
178  const int64_t result_null_val) {
179  return operand != operand_null_val ? operand * scale : result_null_val;
180 }
181 
182 extern "C" ALWAYS_INLINE int64_t scale_decimal_down_nullable(const int64_t operand,
183  const int64_t scale,
184  const int64_t null_val) {
185  // rounded scale down of a decimal
186  if (operand == null_val) {
187  return null_val;
188  }
189 
190  int64_t tmp = scale >> 1;
191  tmp = operand >= 0 ? operand + tmp : operand - tmp;
192  return tmp / scale;
193 }
194 
195 extern "C" ALWAYS_INLINE int64_t scale_decimal_down_not_nullable(const int64_t operand,
196  const int64_t scale,
197  const int64_t null_val) {
198  int64_t tmp = scale >> 1;
199  tmp = operand >= 0 ? operand + tmp : operand - tmp;
200  return tmp / scale;
201 }
202 
203 #define DEF_UMINUS_NULLABLE(type, null_type) \
204  extern "C" ALWAYS_INLINE type uminus_##type##_nullable(const type operand, \
205  const null_type null_val) { \
206  return operand == null_val ? null_val : -operand; \
207  }
208 
209 DEF_UMINUS_NULLABLE(int8_t, int8_t)
210 DEF_UMINUS_NULLABLE(int16_t, int16_t)
211 DEF_UMINUS_NULLABLE(int32_t, int32_t)
212 DEF_UMINUS_NULLABLE(int64_t, int64_t)
213 DEF_UMINUS_NULLABLE(float, float)
214 DEF_UMINUS_NULLABLE(double, double)
215 
216 #undef DEF_UMINUS_NULLABLE
217 
218 #define DEF_CAST_NULLABLE(from_type, to_type) \
219  extern "C" ALWAYS_INLINE to_type cast_##from_type##_to_##to_type##_nullable( \
220  const from_type operand, \
221  const from_type from_null_val, \
222  const to_type to_null_val) { \
223  return operand == from_null_val ? to_null_val : operand; \
224  }
225 
226 #define DEF_CAST_NULLABLE_BIDIR(type1, type2) \
227  DEF_CAST_NULLABLE(type1, type2) \
228  DEF_CAST_NULLABLE(type2, type1)
229 
230 DEF_CAST_NULLABLE_BIDIR(int8_t, int16_t)
231 DEF_CAST_NULLABLE_BIDIR(int8_t, int32_t)
232 DEF_CAST_NULLABLE_BIDIR(int8_t, int64_t)
233 DEF_CAST_NULLABLE_BIDIR(int16_t, int32_t)
234 DEF_CAST_NULLABLE_BIDIR(int16_t, int64_t)
235 DEF_CAST_NULLABLE_BIDIR(int32_t, int64_t)
236 DEF_CAST_NULLABLE_BIDIR(float, double)
237 DEF_CAST_NULLABLE_BIDIR(float, int8_t)
238 DEF_CAST_NULLABLE_BIDIR(float, int16_t)
239 DEF_CAST_NULLABLE_BIDIR(float, int32_t)
240 DEF_CAST_NULLABLE_BIDIR(float, int64_t)
241 DEF_CAST_NULLABLE_BIDIR(double, int8_t)
242 DEF_CAST_NULLABLE_BIDIR(double, int16_t)
243 DEF_CAST_NULLABLE_BIDIR(double, int32_t)
244 DEF_CAST_NULLABLE_BIDIR(double, int64_t)
245 DEF_CAST_NULLABLE(uint8_t, int32_t)
246 DEF_CAST_NULLABLE(uint16_t, int32_t)
247 
248 #undef DEF_CAST_NULLABLE_BIDIR
249 #undef DEF_CAST_NULLABLE
250 
251 extern "C" ALWAYS_INLINE int8_t logical_not(const int8_t operand, const int8_t null_val) {
252  return operand == null_val ? operand : (operand ? 0 : 1);
253 }
254 
255 extern "C" ALWAYS_INLINE int8_t logical_and(const int8_t lhs,
256  const int8_t rhs,
257  const int8_t null_val) {
258  if (lhs == null_val) {
259  return rhs == 0 ? rhs : null_val;
260  }
261  if (rhs == null_val) {
262  return lhs == 0 ? lhs : null_val;
263  }
264  return (lhs && rhs) ? 1 : 0;
265 }
266 
267 extern "C" ALWAYS_INLINE int8_t logical_or(const int8_t lhs,
268  const int8_t rhs,
269  const int8_t null_val) {
270  if (lhs == null_val) {
271  return rhs == 0 ? null_val : rhs;
272  }
273  if (rhs == null_val) {
274  return lhs == 0 ? null_val : lhs;
275  }
276  return (lhs || rhs) ? 1 : 0;
277 }
278 
279 // aggregator implementations
280 
281 extern "C" ALWAYS_INLINE uint64_t agg_count(uint64_t* agg, const int64_t) {
282  return (*agg)++;
283 }
284 
285 extern "C" ALWAYS_INLINE void agg_count_distinct_bitmap(int64_t* agg,
286  const int64_t val,
287  const int64_t min_val) {
288  const uint64_t bitmap_idx = val - min_val;
289  reinterpret_cast<int8_t*>(*agg)[bitmap_idx >> 3] |= (1 << (bitmap_idx & 7));
290 }
291 
292 #define GPU_RT_STUB NEVER_INLINE __attribute__((optnone))
293 
295  const int64_t,
296  const int64_t,
297  const int64_t,
298  const int64_t,
299  const uint64_t,
300  const uint64_t) {}
301 
302 extern "C" NEVER_INLINE void agg_approximate_count_distinct(int64_t* agg,
303  const int64_t key,
304  const uint32_t b) {
305  const uint64_t hash = MurmurHash64A(&key, sizeof(key), 0);
306  const uint32_t index = hash >> (64 - b);
307  const uint8_t rank = get_rank(hash << b, 64 - b);
308  uint8_t* M = reinterpret_cast<uint8_t*>(*agg);
309  M[index] = std::max(M[index], rank);
310 }
311 
313  const int64_t,
314  const uint32_t,
315  const int64_t,
316  const int64_t) {}
317 
318 extern "C" ALWAYS_INLINE int8_t bit_is_set(const int64_t bitset,
319  const int64_t val,
320  const int64_t min_val,
321  const int64_t max_val,
322  const int64_t null_val,
323  const int8_t null_bool_val) {
324  if (val == null_val) {
325  return null_bool_val;
326  }
327  if (val < min_val || val > max_val) {
328  return 0;
329  }
330  if (!bitset) {
331  return 0;
332  }
333  const uint64_t bitmap_idx = val - min_val;
334  return (reinterpret_cast<const int8_t*>(bitset))[bitmap_idx >> 3] &
335  (1 << (bitmap_idx & 7))
336  ? 1
337  : 0;
338 }
339 
340 extern "C" ALWAYS_INLINE int64_t agg_sum(int64_t* agg, const int64_t val) {
341  const auto old = *agg;
342  *agg += val;
343  return old;
344 }
345 
346 extern "C" ALWAYS_INLINE void agg_max(int64_t* agg, const int64_t val) {
347  *agg = std::max(*agg, val);
348 }
349 
350 extern "C" ALWAYS_INLINE void agg_min(int64_t* agg, const int64_t val) {
351  *agg = std::min(*agg, val);
352 }
353 
354 extern "C" ALWAYS_INLINE void agg_id(int64_t* agg, const int64_t val) {
355  *agg = val;
356 }
357 
358 extern "C" ALWAYS_INLINE int32_t checked_single_agg_id(int64_t* agg,
359  const int64_t val,
360  const int64_t null_val) {
361  if (val == null_val) {
362  return 0;
363  }
364 
365  if (*agg == val) {
366  return 0;
367  } else if (*agg == null_val) {
368  *agg = val;
369  return 0;
370  } else {
371  // see Execute::ERR_SINGLE_VALUE_FOUND_MULTIPLE_VALUES
372  return 15;
373  }
374 }
375 
377  const int64_t val,
378  const int64_t min_val,
379  const int64_t skip_val) {
380  if (val != skip_val) {
381  agg_count_distinct_bitmap(agg, val, min_val);
382  }
383 }
384 
386  const int64_t,
387  const int64_t,
388  const int64_t,
389  const int64_t,
390  const int64_t,
391  const uint64_t,
392  const uint64_t) {}
393 
394 extern "C" ALWAYS_INLINE uint32_t agg_count_int32(uint32_t* agg, const int32_t) {
395  return (*agg)++;
396 }
397 
398 extern "C" ALWAYS_INLINE int32_t agg_sum_int32(int32_t* agg, const int32_t val) {
399  const auto old = *agg;
400  *agg += val;
401  return old;
402 }
403 
404 #define DEF_AGG_MAX_INT(n) \
405  extern "C" ALWAYS_INLINE void agg_max_int##n(int##n##_t* agg, const int##n##_t val) { \
406  *agg = std::max(*agg, val); \
407  }
408 
409 DEF_AGG_MAX_INT(32)
410 DEF_AGG_MAX_INT(16)
412 #undef DEF_AGG_MAX_INT
413 
414 #define DEF_AGG_MIN_INT(n) \
415  extern "C" ALWAYS_INLINE void agg_min_int##n(int##n##_t* agg, const int##n##_t val) { \
416  *agg = std::min(*agg, val); \
417  }
418 
419 DEF_AGG_MIN_INT(32)
420 DEF_AGG_MIN_INT(16)
422 #undef DEF_AGG_MIN_INT
423 
424 #define DEF_AGG_ID_INT(n) \
425  extern "C" ALWAYS_INLINE void agg_id_int##n(int##n##_t* agg, const int##n##_t val) { \
426  *agg = val; \
427  }
428 
429 #define DEF_CHECKED_SINGLE_AGG_ID_INT(n) \
430  extern "C" ALWAYS_INLINE int32_t checked_single_agg_id_int##n( \
431  int##n##_t* agg, const int##n##_t val, const int##n##_t null_val) { \
432  if (val == null_val) { \
433  return 0; \
434  } \
435  if (*agg == val) { \
436  return 0; \
437  } else if (*agg == null_val) { \
438  *agg = val; \
439  return 0; \
440  } else { \
441  /* see Execute::ERR_SINGLE_VALUE_FOUND_MULTIPLE_VALUES*/ \
442  return 15; \
443  } \
444  }
445 
446 DEF_AGG_ID_INT(32)
447 DEF_AGG_ID_INT(16)
449 
453 
454 #undef DEF_AGG_ID_INT
455 #undef DEF_CHECKED_SINGLE_AGG_ID_INT
456 
457 #define DEF_WRITE_PROJECTION_INT(n) \
458  extern "C" ALWAYS_INLINE void write_projection_int##n( \
459  int8_t* slot_ptr, const int##n##_t val, const int64_t init_val) { \
460  if (val != init_val) { \
461  *reinterpret_cast<int##n##_t*>(slot_ptr) = val; \
462  } \
463  }
464 
467 #undef DEF_WRITE_PROJECTION_INT
468 
469 extern "C" ALWAYS_INLINE int64_t agg_sum_skip_val(int64_t* agg,
470  const int64_t val,
471  const int64_t skip_val) {
472  const auto old = *agg;
473  if (val != skip_val) {
474  if (old != skip_val) {
475  return agg_sum(agg, val);
476  } else {
477  *agg = val;
478  }
479  }
480  return old;
481 }
482 
483 extern "C" ALWAYS_INLINE int32_t agg_sum_int32_skip_val(int32_t* agg,
484  const int32_t val,
485  const int32_t skip_val) {
486  const auto old = *agg;
487  if (val != skip_val) {
488  if (old != skip_val) {
489  return agg_sum_int32(agg, val);
490  } else {
491  *agg = val;
492  }
493  }
494  return old;
495 }
496 
497 extern "C" ALWAYS_INLINE uint64_t agg_count_skip_val(uint64_t* agg,
498  const int64_t val,
499  const int64_t skip_val) {
500  if (val != skip_val) {
501  return agg_count(agg, val);
502  }
503  return *agg;
504 }
505 
506 extern "C" ALWAYS_INLINE uint32_t agg_count_int32_skip_val(uint32_t* agg,
507  const int32_t val,
508  const int32_t skip_val) {
509  if (val != skip_val) {
510  return agg_count_int32(agg, val);
511  }
512  return *agg;
513 }
514 
515 #define DEF_SKIP_AGG_ADD(base_agg_func) \
516  extern "C" ALWAYS_INLINE void base_agg_func##_skip_val( \
517  DATA_T* agg, const DATA_T val, const DATA_T skip_val) { \
518  if (val != skip_val) { \
519  base_agg_func(agg, val); \
520  } \
521  }
522 
523 #define DEF_SKIP_AGG(base_agg_func) \
524  extern "C" ALWAYS_INLINE void base_agg_func##_skip_val( \
525  DATA_T* agg, const DATA_T val, const DATA_T skip_val) { \
526  if (val != skip_val) { \
527  const DATA_T old_agg = *agg; \
528  if (old_agg != skip_val) { \
529  base_agg_func(agg, val); \
530  } else { \
531  *agg = val; \
532  } \
533  } \
534  }
535 
536 #define DATA_T int64_t
539 #undef DATA_T
540 
541 #define DATA_T int32_t
544 #undef DATA_T
545 
546 #define DATA_T int16_t
549 #undef DATA_T
550 
551 #define DATA_T int8_t
554 #undef DATA_T
555 
556 #undef DEF_SKIP_AGG_ADD
557 #undef DEF_SKIP_AGG
558 
559 // TODO(alex): fix signature
560 
561 extern "C" ALWAYS_INLINE uint64_t agg_count_double(uint64_t* agg, const double val) {
562  return (*agg)++;
563 }
564 
565 extern "C" ALWAYS_INLINE void agg_sum_double(int64_t* agg, const double val) {
566  const auto r = *reinterpret_cast<const double*>(agg) + val;
567  *agg = *reinterpret_cast<const int64_t*>(may_alias_ptr(&r));
568 }
569 
570 extern "C" ALWAYS_INLINE void agg_max_double(int64_t* agg, const double val) {
571  const auto r = std::max(*reinterpret_cast<const double*>(agg), val);
572  *agg = *(reinterpret_cast<const int64_t*>(may_alias_ptr(&r)));
573 }
574 
575 extern "C" ALWAYS_INLINE void agg_min_double(int64_t* agg, const double val) {
576  const auto r = std::min(*reinterpret_cast<const double*>(agg), val);
577  *agg = *(reinterpret_cast<const int64_t*>(may_alias_ptr(&r)));
578 }
579 
580 extern "C" ALWAYS_INLINE void agg_id_double(int64_t* agg, const double val) {
581  *agg = *(reinterpret_cast<const int64_t*>(may_alias_ptr(&val)));
582 }
583 
584 extern "C" ALWAYS_INLINE int32_t checked_single_agg_id_double(int64_t* agg,
585  const double val,
586  const double null_val) {
587  if (val == null_val) {
588  return 0;
589  }
590 
591  if (*agg == *(reinterpret_cast<const int64_t*>(may_alias_ptr(&val)))) {
592  return 0;
593  } else if (*agg == *(reinterpret_cast<const int64_t*>(may_alias_ptr(&null_val)))) {
594  *agg = *(reinterpret_cast<const int64_t*>(may_alias_ptr(&val)));
595  return 0;
596  } else {
597  // see Execute::ERR_SINGLE_VALUE_FOUND_MULTIPLE_VALUES
598  return 15;
599  }
600 }
601 
602 extern "C" ALWAYS_INLINE uint32_t agg_count_float(uint32_t* agg, const float val) {
603  return (*agg)++;
604 }
605 
606 extern "C" ALWAYS_INLINE void agg_sum_float(int32_t* agg, const float val) {
607  const auto r = *reinterpret_cast<const float*>(agg) + val;
608  *agg = *reinterpret_cast<const int32_t*>(may_alias_ptr(&r));
609 }
610 
611 extern "C" ALWAYS_INLINE void agg_max_float(int32_t* agg, const float val) {
612  const auto r = std::max(*reinterpret_cast<const float*>(agg), val);
613  *agg = *(reinterpret_cast<const int32_t*>(may_alias_ptr(&r)));
614 }
615 
616 extern "C" ALWAYS_INLINE void agg_min_float(int32_t* agg, const float val) {
617  const auto r = std::min(*reinterpret_cast<const float*>(agg), val);
618  *agg = *(reinterpret_cast<const int32_t*>(may_alias_ptr(&r)));
619 }
620 
621 extern "C" ALWAYS_INLINE void agg_id_float(int32_t* agg, const float val) {
622  *agg = *(reinterpret_cast<const int32_t*>(may_alias_ptr(&val)));
623 }
624 
625 extern "C" ALWAYS_INLINE int32_t checked_single_agg_id_float(int32_t* agg,
626  const float val,
627  const float null_val) {
628  if (val == null_val) {
629  return 0;
630  }
631 
632  if (*agg == *(reinterpret_cast<const int32_t*>(may_alias_ptr(&val)))) {
633  return 0;
634  } else if (*agg == *(reinterpret_cast<const int32_t*>(may_alias_ptr(&null_val)))) {
635  *agg = *(reinterpret_cast<const int32_t*>(may_alias_ptr(&val)));
636  return 0;
637  } else {
638  // see Execute::ERR_SINGLE_VALUE_FOUND_MULTIPLE_VALUES
639  return 15;
640  }
641 }
642 
643 extern "C" ALWAYS_INLINE uint64_t agg_count_double_skip_val(uint64_t* agg,
644  const double val,
645  const double skip_val) {
646  if (val != skip_val) {
647  return agg_count_double(agg, val);
648  }
649  return *agg;
650 }
651 
652 extern "C" ALWAYS_INLINE uint32_t agg_count_float_skip_val(uint32_t* agg,
653  const float val,
654  const float skip_val) {
655  if (val != skip_val) {
656  return agg_count_float(agg, val);
657  }
658  return *agg;
659 }
660 
661 #define DEF_SKIP_AGG_ADD(base_agg_func) \
662  extern "C" ALWAYS_INLINE void base_agg_func##_skip_val( \
663  ADDR_T* agg, const DATA_T val, const DATA_T skip_val) { \
664  if (val != skip_val) { \
665  base_agg_func(agg, val); \
666  } \
667  }
668 
669 #define DEF_SKIP_AGG(base_agg_func) \
670  extern "C" ALWAYS_INLINE void base_agg_func##_skip_val( \
671  ADDR_T* agg, const DATA_T val, const DATA_T skip_val) { \
672  if (val != skip_val) { \
673  const ADDR_T old_agg = *agg; \
674  if (old_agg != *reinterpret_cast<const ADDR_T*>(may_alias_ptr(&skip_val))) { \
675  base_agg_func(agg, val); \
676  } else { \
677  *agg = *reinterpret_cast<const ADDR_T*>(may_alias_ptr(&val)); \
678  } \
679  } \
680  }
681 
682 #define DATA_T double
683 #define ADDR_T int64_t
687 #undef ADDR_T
688 #undef DATA_T
689 
690 #define DATA_T float
691 #define ADDR_T int32_t
695 #undef ADDR_T
696 #undef DATA_T
697 
698 #undef DEF_SKIP_AGG_ADD
699 #undef DEF_SKIP_AGG
700 
701 extern "C" ALWAYS_INLINE int64_t decimal_floor(const int64_t x, const int64_t scale) {
702  if (x >= 0) {
703  return x / scale * scale;
704  }
705  if (!(x % scale)) {
706  return x;
707  }
708  return x / scale * scale - scale;
709 }
710 
711 extern "C" ALWAYS_INLINE int64_t decimal_ceil(const int64_t x, const int64_t scale) {
712  return decimal_floor(x, scale) + (x % scale ? scale : 0);
713 }
714 
715 // Shared memory aggregators. Should never be called,
716 // real implementations are in cuda_mapd_rt.cu.
717 #define DEF_SHARED_AGG_RET_STUBS(base_agg_func) \
718  extern "C" GPU_RT_STUB uint64_t base_agg_func##_shared(uint64_t* agg, \
719  const int64_t val) { \
720  return 0; \
721  } \
722  \
723  extern "C" GPU_RT_STUB uint64_t base_agg_func##_skip_val_shared( \
724  uint64_t* agg, const int64_t val, const int64_t skip_val) { \
725  return 0; \
726  } \
727  extern "C" GPU_RT_STUB uint32_t base_agg_func##_int32_shared(uint32_t* agg, \
728  const int32_t val) { \
729  return 0; \
730  } \
731  \
732  extern "C" GPU_RT_STUB uint32_t base_agg_func##_int32_skip_val_shared( \
733  uint32_t* agg, const int32_t val, const int32_t skip_val) { \
734  return 0; \
735  } \
736  \
737  extern "C" GPU_RT_STUB uint64_t base_agg_func##_double_shared(uint64_t* agg, \
738  const double val) { \
739  return 0; \
740  } \
741  \
742  extern "C" GPU_RT_STUB uint64_t base_agg_func##_double_skip_val_shared( \
743  uint64_t* agg, const double val, const double skip_val) { \
744  return 0; \
745  } \
746  extern "C" GPU_RT_STUB uint32_t base_agg_func##_float_shared(uint32_t* agg, \
747  const float val) { \
748  return 0; \
749  } \
750  \
751  extern "C" GPU_RT_STUB uint32_t base_agg_func##_float_skip_val_shared( \
752  uint32_t* agg, const float val, const float skip_val) { \
753  return 0; \
754  }
755 
756 #define DEF_SHARED_AGG_STUBS(base_agg_func) \
757  extern "C" GPU_RT_STUB void base_agg_func##_shared(int64_t* agg, const int64_t val) {} \
758  \
759  extern "C" GPU_RT_STUB void base_agg_func##_skip_val_shared( \
760  int64_t* agg, const int64_t val, const int64_t skip_val) {} \
761  extern "C" GPU_RT_STUB void base_agg_func##_int32_shared(int32_t* agg, \
762  const int32_t val) {} \
763  extern "C" GPU_RT_STUB void base_agg_func##_int16_shared(int16_t* agg, \
764  const int16_t val) {} \
765  extern "C" GPU_RT_STUB void base_agg_func##_int8_shared(int8_t* agg, \
766  const int8_t val) {} \
767  \
768  extern "C" GPU_RT_STUB void base_agg_func##_int32_skip_val_shared( \
769  int32_t* agg, const int32_t val, const int32_t skip_val) {} \
770  \
771  extern "C" GPU_RT_STUB void base_agg_func##_double_shared(int64_t* agg, \
772  const double val) {} \
773  \
774  extern "C" GPU_RT_STUB void base_agg_func##_double_skip_val_shared( \
775  int64_t* agg, const double val, const double skip_val) {} \
776  extern "C" GPU_RT_STUB void base_agg_func##_float_shared(int32_t* agg, \
777  const float val) {} \
778  \
779  extern "C" GPU_RT_STUB void base_agg_func##_float_skip_val_shared( \
780  int32_t* agg, const float val, const float skip_val) {}
781 
786 
787 extern "C" GPU_RT_STUB int32_t checked_single_agg_id_shared(int64_t* agg,
788  const int64_t val,
789  const int64_t null_val) {
790  return 0;
791 }
792 
793 extern "C" GPU_RT_STUB int32_t
795  const int32_t val,
796  const int32_t null_val) {
797  return 0;
798 }
799 extern "C" GPU_RT_STUB int32_t
801  const int16_t val,
802  const int16_t null_val) {
803  return 0;
804 }
805 extern "C" GPU_RT_STUB int32_t checked_single_agg_id_int8_shared(int8_t* agg,
806  const int8_t val,
807  const int8_t null_val) {
808  return 0;
809 }
810 
811 extern "C" GPU_RT_STUB int32_t
813  const double val,
814  const double null_val) {
815  return 0;
816 }
817 
818 extern "C" GPU_RT_STUB int32_t checked_single_agg_id_float_shared(int32_t* agg,
819  const float val,
820  const float null_val) {
821  return 0;
822 }
823 
824 extern "C" GPU_RT_STUB void agg_max_int16_skip_val_shared(int16_t* agg,
825  const int16_t val,
826  const int16_t skip_val) {}
827 
828 extern "C" GPU_RT_STUB void agg_max_int8_skip_val_shared(int8_t* agg,
829  const int8_t val,
830  const int8_t skip_val) {}
831 
832 extern "C" GPU_RT_STUB void agg_min_int16_skip_val_shared(int16_t* agg,
833  const int16_t val,
834  const int16_t skip_val) {}
835 
836 extern "C" GPU_RT_STUB void agg_min_int8_skip_val_shared(int8_t* agg,
837  const int8_t val,
838  const int8_t skip_val) {}
839 
840 extern "C" GPU_RT_STUB void agg_id_double_shared_slow(int64_t* agg, const double* val) {}
841 
842 extern "C" GPU_RT_STUB int64_t agg_sum_shared(int64_t* agg, const int64_t val) {
843  return 0;
844 }
845 
846 extern "C" GPU_RT_STUB int64_t agg_sum_skip_val_shared(int64_t* agg,
847  const int64_t val,
848  const int64_t skip_val) {
849  return 0;
850 }
851 extern "C" GPU_RT_STUB int32_t agg_sum_int32_shared(int32_t* agg, const int32_t val) {
852  return 0;
853 }
854 
855 extern "C" GPU_RT_STUB int32_t agg_sum_int32_skip_val_shared(int32_t* agg,
856  const int32_t val,
857  const int32_t skip_val) {
858  return 0;
859 }
860 
861 extern "C" GPU_RT_STUB void agg_sum_double_shared(int64_t* agg, const double val) {}
862 
863 extern "C" GPU_RT_STUB void agg_sum_double_skip_val_shared(int64_t* agg,
864  const double val,
865  const double skip_val) {}
866 extern "C" GPU_RT_STUB void agg_sum_float_shared(int32_t* agg, const float val) {}
867 
868 extern "C" GPU_RT_STUB void agg_sum_float_skip_val_shared(int32_t* agg,
869  const float val,
870  const float skip_val) {}
871 
872 extern "C" GPU_RT_STUB void force_sync() {}
873 
874 extern "C" GPU_RT_STUB void sync_warp() {}
875 extern "C" GPU_RT_STUB void sync_warp_protected(int64_t thread_pos, int64_t row_count) {}
876 extern "C" GPU_RT_STUB void sync_threadblock() {}
877 
878 extern "C" GPU_RT_STUB void write_back_non_grouped_agg(int64_t* input_buffer,
879  int64_t* output_buffer,
880  const int32_t num_agg_cols){};
881 // x64 stride functions
882 
883 extern "C" __attribute__((noinline)) int32_t pos_start_impl(int32_t* error_code) {
884  int32_t row_index_resume{0};
885  if (error_code) {
886  row_index_resume = error_code[0];
887  error_code[0] = 0;
888  }
889  return row_index_resume;
890 }
891 
892 extern "C" __attribute__((noinline)) int32_t group_buff_idx_impl() {
893  return pos_start_impl(nullptr);
894 }
895 
896 extern "C" __attribute__((noinline)) int32_t pos_step_impl() {
897  return 1;
898 }
899 
900 extern "C" GPU_RT_STUB int8_t thread_warp_idx(const int8_t warp_sz) {
901  return 0;
902 }
903 
904 extern "C" GPU_RT_STUB int64_t get_thread_index() {
905  return 0;
906 }
907 
909  return nullptr;
910 }
911 
912 extern "C" GPU_RT_STUB int64_t get_block_index() {
913  return 0;
914 }
915 
916 #undef GPU_RT_STUB
917 
918 extern "C" ALWAYS_INLINE int32_t record_error_code(const int32_t err_code,
919  int32_t* error_codes) {
920  // NB: never override persistent error codes (with code greater than zero).
921  // On GPU, a projection query with a limit can run out of slots without it
922  // being an actual error if the limit has been hit. If a persistent error
923  // (division by zero, for example) occurs before running out of slots, we
924  // have to avoid overriding it, because there's a risk that the query would
925  // go through if we override with a potentially benign out-of-slots code.
926  if (err_code && error_codes[pos_start_impl(nullptr)] <= 0) {
927  error_codes[pos_start_impl(nullptr)] = err_code;
928  }
929  return err_code;
930 }
931 
932 // group by helpers
933 
934 extern "C" __attribute__((noinline)) const int64_t* init_shared_mem_nop(
935  const int64_t* groups_buffer,
936  const int32_t groups_buffer_size) {
937  return groups_buffer;
938 }
939 
940 extern "C" __attribute__((noinline)) void write_back_nop(int64_t* dest,
941  int64_t* src,
942  const int32_t sz) {
943  // the body is not really needed, just make sure the call is not optimized away
944  assert(dest);
945 }
946 
947 extern "C" int64_t* init_shared_mem(const int64_t* global_groups_buffer,
948  const int32_t groups_buffer_size) {
949  return nullptr;
950 }
951 
952 extern "C" __attribute__((noinline)) void init_group_by_buffer_gpu(
953  int64_t* groups_buffer,
954  const int64_t* init_vals,
955  const uint32_t groups_buffer_entry_count,
956  const uint32_t key_qw_count,
957  const uint32_t agg_col_count,
958  const bool keyless,
959  const int8_t warp_size) {
960  // the body is not really needed, just make sure the call is not optimized away
961  assert(groups_buffer);
962 }
963 
964 extern "C" __attribute__((noinline)) void init_columnar_group_by_buffer_gpu(
965  int64_t* groups_buffer,
966  const int64_t* init_vals,
967  const uint32_t groups_buffer_entry_count,
968  const uint32_t key_qw_count,
969  const uint32_t agg_col_count,
970  const bool keyless,
971  const bool blocks_share_memory,
972  const int32_t frag_idx) {
973  // the body is not really needed, just make sure the call is not optimized away
974  assert(groups_buffer);
975 }
976 
977 extern "C" __attribute__((noinline)) void init_group_by_buffer_impl(
978  int64_t* groups_buffer,
979  const int64_t* init_vals,
980  const uint32_t groups_buffer_entry_count,
981  const uint32_t key_qw_count,
982  const uint32_t agg_col_count,
983  const bool keyless,
984  const int8_t warp_size) {
985  // the body is not really needed, just make sure the call is not optimized away
986  assert(groups_buffer);
987 }
988 
989 template <typename T>
990 ALWAYS_INLINE int64_t* get_matching_group_value(int64_t* groups_buffer,
991  const uint32_t h,
992  const T* key,
993  const uint32_t key_count,
994  const uint32_t row_size_quad) {
995  auto off = h * row_size_quad;
996  auto row_ptr = reinterpret_cast<T*>(groups_buffer + off);
997  if (*row_ptr == get_empty_key<T>()) {
998  memcpy(row_ptr, key, key_count * sizeof(T));
999  auto row_ptr_i8 = reinterpret_cast<int8_t*>(row_ptr + key_count);
1000  return reinterpret_cast<int64_t*>(align_to_int64(row_ptr_i8));
1001  }
1002  if (memcmp(row_ptr, key, key_count * sizeof(T)) == 0) {
1003  auto row_ptr_i8 = reinterpret_cast<int8_t*>(row_ptr + key_count);
1004  return reinterpret_cast<int64_t*>(align_to_int64(row_ptr_i8));
1005  }
1006  return nullptr;
1007 }
1008 
1009 extern "C" ALWAYS_INLINE int64_t* get_matching_group_value(int64_t* groups_buffer,
1010  const uint32_t h,
1011  const int64_t* key,
1012  const uint32_t key_count,
1013  const uint32_t key_width,
1014  const uint32_t row_size_quad,
1015  const int64_t* init_vals) {
1016  switch (key_width) {
1017  case 4:
1018  return get_matching_group_value(groups_buffer,
1019  h,
1020  reinterpret_cast<const int32_t*>(key),
1021  key_count,
1022  row_size_quad);
1023  case 8:
1024  return get_matching_group_value(groups_buffer, h, key, key_count, row_size_quad);
1025  default:;
1026  }
1027  return nullptr;
1028 }
1029 
1030 template <typename T>
1032  const uint32_t entry_count,
1033  const uint32_t h,
1034  const T* key,
1035  const uint32_t key_count) {
1036  auto off = h;
1037  auto key_buffer = reinterpret_cast<T*>(groups_buffer);
1038  if (key_buffer[off] == get_empty_key<T>()) {
1039  for (size_t i = 0; i < key_count; ++i) {
1040  key_buffer[off] = key[i];
1041  off += entry_count;
1042  }
1043  return h;
1044  }
1045  off = h;
1046  for (size_t i = 0; i < key_count; ++i) {
1047  if (key_buffer[off] != key[i]) {
1048  return -1;
1049  }
1050  off += entry_count;
1051  }
1052  return h;
1053 }
1054 
1055 extern "C" ALWAYS_INLINE int32_t
1057  const uint32_t entry_count,
1058  const uint32_t h,
1059  const int64_t* key,
1060  const uint32_t key_count,
1061  const uint32_t key_width) {
1062  switch (key_width) {
1063  case 4:
1064  return get_matching_group_value_columnar_slot(groups_buffer,
1065  entry_count,
1066  h,
1067  reinterpret_cast<const int32_t*>(key),
1068  key_count);
1069  case 8:
1071  groups_buffer, entry_count, h, key, key_count);
1072  default:
1073  return -1;
1074  }
1075  return -1;
1076 }
1077 
1079  int64_t* groups_buffer,
1080  const uint32_t h,
1081  const int64_t* key,
1082  const uint32_t key_qw_count,
1083  const size_t entry_count) {
1084  auto off = h;
1085  if (groups_buffer[off] == EMPTY_KEY_64) {
1086  for (size_t i = 0; i < key_qw_count; ++i) {
1087  groups_buffer[off] = key[i];
1088  off += entry_count;
1089  }
1090  return &groups_buffer[off];
1091  }
1092  off = h;
1093  for (size_t i = 0; i < key_qw_count; ++i) {
1094  if (groups_buffer[off] != key[i]) {
1095  return nullptr;
1096  }
1097  off += entry_count;
1098  }
1099  return &groups_buffer[off];
1100 }
1101 
1102 /*
1103  * For a particular hashed_index, returns the row-wise offset
1104  * to the first matching agg column in memory.
1105  * It also checks the corresponding group column, and initialize all
1106  * available keys if they are not empty (it is assumed all group columns are
1107  * 64-bit wide).
1108  *
1109  * Memory layout:
1110  *
1111  * | prepended group columns (64-bit each) | agg columns |
1112  */
1114  int64_t* groups_buffer,
1115  const uint32_t hashed_index,
1116  const int64_t* key,
1117  const uint32_t key_count,
1118  const uint32_t row_size_quad) {
1119  uint32_t off = hashed_index * row_size_quad;
1120  if (groups_buffer[off] == EMPTY_KEY_64) {
1121  for (uint32_t i = 0; i < key_count; ++i) {
1122  groups_buffer[off + i] = key[i];
1123  }
1124  }
1125  return groups_buffer + off + key_count;
1126 }
1127 
1135  int64_t* groups_buffer,
1136  const uint32_t hashed_index,
1137  const uint32_t row_size_quad) {
1138  return groups_buffer + row_size_quad * hashed_index;
1139 }
1140 
1141 /*
1142  * For a particular hashed_index, find and initialize (if necessary) all the group
1143  * columns corresponding to a key. It is assumed that all group columns are 64-bit wide.
1144  */
1146  int64_t* groups_buffer,
1147  const uint32_t hashed_index,
1148  const int64_t* key,
1149  const uint32_t key_count,
1150  const uint32_t entry_count) {
1151  if (groups_buffer[hashed_index] == EMPTY_KEY_64) {
1152  for (uint32_t i = 0; i < key_count; i++) {
1153  groups_buffer[i * entry_count + hashed_index] = key[i];
1154  }
1155  }
1156 }
1157 
1158 #include "GroupByRuntime.cpp"
1160 
1162  int64_t* groups_buffer,
1163  const int64_t key,
1164  const int64_t min_key,
1165  const int64_t /* bucket */,
1166  const uint32_t row_size_quad) {
1167  return groups_buffer + row_size_quad * (key - min_key);
1168 }
1169 
1171  int64_t* groups_buffer,
1172  const int64_t key,
1173  const int64_t min_key,
1174  const int64_t /* bucket */,
1175  const uint32_t row_size_quad,
1176  const uint8_t thread_warp_idx,
1177  const uint8_t warp_size) {
1178  return groups_buffer + row_size_quad * (warp_size * (key - min_key) + thread_warp_idx);
1179 }
1180 
1181 extern "C" ALWAYS_INLINE int8_t* extract_str_ptr(const uint64_t str_and_len) {
1182  return reinterpret_cast<int8_t*>(str_and_len & 0xffffffffffff);
1183 }
1184 
1185 extern "C" ALWAYS_INLINE int32_t extract_str_len(const uint64_t str_and_len) {
1186  return static_cast<int64_t>(str_and_len) >> 48;
1187 }
1188 
1189 extern "C" __attribute__((noinline)) int8_t* extract_str_ptr_noinline(
1190  const uint64_t str_and_len) {
1191  return extract_str_ptr(str_and_len);
1192 }
1193 
1194 extern "C" __attribute__((noinline)) int32_t extract_str_len_noinline(
1195  const uint64_t str_and_len) {
1196  return extract_str_len(str_and_len);
1197 }
1198 
1199 extern "C" ALWAYS_INLINE uint64_t string_pack(const int8_t* ptr, const int32_t len) {
1200  return (reinterpret_cast<const uint64_t>(ptr) & 0xffffffffffff) |
1201  (static_cast<const uint64_t>(len) << 48);
1202 }
1203 
1204 #ifdef __clang__
1205 #include "../Utils/StringLike.cpp"
1206 #endif
1207 
1208 #ifndef __CUDACC__
1209 #include "TopKRuntime.cpp"
1210 #endif
1211 
1212 extern "C" ALWAYS_INLINE DEVICE int32_t char_length(const char* str,
1213  const int32_t str_len) {
1214  return str_len;
1215 }
1216 
1217 extern "C" ALWAYS_INLINE DEVICE int32_t char_length_nullable(const char* str,
1218  const int32_t str_len,
1219  const int32_t int_null) {
1220  if (!str) {
1221  return int_null;
1222  }
1223  return str_len;
1224 }
1225 
1226 extern "C" ALWAYS_INLINE DEVICE int32_t key_for_string_encoded(const int32_t str_id) {
1227  return str_id;
1228 }
1229 
1230 extern "C" ALWAYS_INLINE int64_t row_number_window_func(const int64_t output_buff,
1231  const int64_t pos) {
1232  return reinterpret_cast<const int64_t*>(output_buff)[pos];
1233 }
1234 
1235 extern "C" ALWAYS_INLINE double percent_window_func(const int64_t output_buff,
1236  const int64_t pos) {
1237  return reinterpret_cast<const double*>(output_buff)[pos];
1238 }
1239 
1240 extern "C" ALWAYS_INLINE double load_double(const int64_t* agg) {
1241  return *reinterpret_cast<const double*>(may_alias_ptr(agg));
1242 }
1243 
1244 extern "C" ALWAYS_INLINE float load_float(const int32_t* agg) {
1245  return *reinterpret_cast<const float*>(may_alias_ptr(agg));
1246 }
1247 
1248 extern "C" ALWAYS_INLINE double load_avg_int(const int64_t* sum,
1249  const int64_t* count,
1250  const double null_val) {
1251  return *count != 0 ? static_cast<double>(*sum) / *count : null_val;
1252 }
1253 
1254 extern "C" ALWAYS_INLINE double load_avg_decimal(const int64_t* sum,
1255  const int64_t* count,
1256  const double null_val,
1257  const uint32_t scale) {
1258  return *count != 0 ? (static_cast<double>(*sum) / pow(10, scale)) / *count : null_val;
1259 }
1260 
1261 extern "C" ALWAYS_INLINE double load_avg_double(const int64_t* agg,
1262  const int64_t* count,
1263  const double null_val) {
1264  return *count != 0 ? *reinterpret_cast<const double*>(may_alias_ptr(agg)) / *count
1265  : null_val;
1266 }
1267 
1268 extern "C" ALWAYS_INLINE double load_avg_float(const int32_t* agg,
1269  const int32_t* count,
1270  const double null_val) {
1271  return *count != 0 ? *reinterpret_cast<const float*>(may_alias_ptr(agg)) / *count
1272  : null_val;
1273 }
1274 
1275 extern "C" NEVER_INLINE void linear_probabilistic_count(uint8_t* bitmap,
1276  const uint32_t bitmap_bytes,
1277  const uint8_t* key_bytes,
1278  const uint32_t key_len) {
1279  const uint32_t bit_pos = MurmurHash1(key_bytes, key_len, 0) % (bitmap_bytes * 8);
1280  const uint32_t word_idx = bit_pos / 32;
1281  const uint32_t bit_idx = bit_pos % 32;
1282  reinterpret_cast<uint32_t*>(bitmap)[word_idx] |= 1 << bit_idx;
1283 }
1284 
1285 extern "C" __attribute__((noinline)) void query_stub_hoisted_literals(
1286  const int8_t** col_buffers,
1287  const int8_t* literals,
1288  const int64_t* num_rows,
1289  const uint64_t* frag_row_offsets,
1290  const int32_t* max_matched,
1291  const int64_t* init_agg_value,
1292  int64_t** out,
1293  uint32_t frag_idx,
1294  const int64_t* join_hash_tables,
1295  int32_t* error_code,
1296  int32_t* total_matched) {
1297  assert(col_buffers || literals || num_rows || frag_row_offsets || max_matched ||
1298  init_agg_value || out || frag_idx || error_code || join_hash_tables ||
1299  total_matched);
1300 }
1301 
1302 extern "C" void multifrag_query_hoisted_literals(const int8_t*** col_buffers,
1303  const uint64_t* num_fragments,
1304  const int8_t* literals,
1305  const int64_t* num_rows,
1306  const uint64_t* frag_row_offsets,
1307  const int32_t* max_matched,
1308  int32_t* total_matched,
1309  const int64_t* init_agg_value,
1310  int64_t** out,
1311  int32_t* error_code,
1312  const uint32_t* num_tables_ptr,
1313  const int64_t* join_hash_tables) {
1314  for (uint32_t i = 0; i < *num_fragments; ++i) {
1315  query_stub_hoisted_literals(col_buffers ? col_buffers[i] : nullptr,
1316  literals,
1317  &num_rows[i * (*num_tables_ptr)],
1318  &frag_row_offsets[i * (*num_tables_ptr)],
1319  max_matched,
1320  init_agg_value,
1321  out,
1322  i,
1323  join_hash_tables,
1324  total_matched,
1325  error_code);
1326  }
1327 }
1328 
1329 extern "C" __attribute__((noinline)) void query_stub(const int8_t** col_buffers,
1330  const int64_t* num_rows,
1331  const uint64_t* frag_row_offsets,
1332  const int32_t* max_matched,
1333  const int64_t* init_agg_value,
1334  int64_t** out,
1335  uint32_t frag_idx,
1336  const int64_t* join_hash_tables,
1337  int32_t* error_code,
1338  int32_t* total_matched) {
1339  assert(col_buffers || num_rows || frag_row_offsets || max_matched || init_agg_value ||
1340  out || frag_idx || error_code || join_hash_tables || total_matched);
1341 }
1342 
1343 extern "C" void multifrag_query(const int8_t*** col_buffers,
1344  const uint64_t* num_fragments,
1345  const int64_t* num_rows,
1346  const uint64_t* frag_row_offsets,
1347  const int32_t* max_matched,
1348  int32_t* total_matched,
1349  const int64_t* init_agg_value,
1350  int64_t** out,
1351  int32_t* error_code,
1352  const uint32_t* num_tables_ptr,
1353  const int64_t* join_hash_tables) {
1354  for (uint32_t i = 0; i < *num_fragments; ++i) {
1355  query_stub(col_buffers ? col_buffers[i] : nullptr,
1356  &num_rows[i * (*num_tables_ptr)],
1357  &frag_row_offsets[i * (*num_tables_ptr)],
1358  max_matched,
1359  init_agg_value,
1360  out,
1361  i,
1362  join_hash_tables,
1363  total_matched,
1364  error_code);
1365  }
1366 }
1367 
1369  if (check_interrupt_init(static_cast<unsigned>(INT_CHECK))) {
1370  return true;
1371  }
1372  return false;
1373 }
1374 
1375 extern "C" bool check_interrupt_init(unsigned command) {
1376  static std::atomic_bool runtime_interrupt_flag{false};
1377 
1378  if (command == static_cast<unsigned>(INT_CHECK)) {
1379  if (runtime_interrupt_flag.load()) {
1380  return true;
1381  }
1382  return false;
1383  }
1384  if (command == static_cast<unsigned>(INT_ABORT)) {
1385  runtime_interrupt_flag.store(true);
1386  return false;
1387  }
1388  if (command == static_cast<unsigned>(INT_RESET)) {
1389  runtime_interrupt_flag.store(false);
1390  return false;
1391  }
1392  return false;
1393 }
ALWAYS_INLINE void agg_sum_float(int32_t *agg, const float val)
NEVER_INLINE DEVICE uint32_t MurmurHash1(const void *key, int len, const uint32_t seed)
Definition: MurmurHash.cpp:20
ALWAYS_INLINE int64_t agg_sum_skip_val(int64_t *agg, const int64_t val, const int64_t skip_val)
GPU_RT_STUB int32_t agg_sum_int32_shared(int32_t *agg, const int32_t val)
ALWAYS_INLINE int32_t get_matching_group_value_columnar_slot(int64_t *groups_buffer, const uint32_t entry_count, const uint32_t h, const T *key, const uint32_t key_count)
int64_t * src
#define DEF_UMINUS_NULLABLE(type, null_type)
GPU_RT_STUB void agg_sum_double_skip_val_shared(int64_t *agg, const double val, const double skip_val)
int8_t * extract_str_ptr_noinline(const uint64_t str_and_len)
const int8_t const int64_t const uint64_t const int32_t const int64_t int64_t uint32_t const int64_t * join_hash_tables
GPU_RT_STUB int32_t checked_single_agg_id_int32_shared(int32_t *agg, const int32_t val, const int32_t null_val)
const int64_t const uint32_t const uint32_t const uint32_t agg_col_count
GPU_RT_STUB void agg_sum_double_shared(int64_t *agg, const double val)
#define DEF_CHECKED_SINGLE_AGG_ID_INT(n)
GPU_RT_STUB void agg_sum_float_shared(int32_t *agg, const float val)
ALWAYS_INLINE uint32_t agg_count_float(uint32_t *agg, const float val)
#define EMPTY_KEY_64
GPU_RT_STUB void write_back_non_grouped_agg(int64_t *input_buffer, int64_t *output_buffer, const int32_t num_agg_cols)
GPU_RT_STUB void agg_min_int16_skip_val_shared(int16_t *agg, const int16_t val, const int16_t skip_val)
#define GPU_RT_STUB
const int8_t const int64_t * num_rows
void agg_min_int32(int32_t *agg, const int32_t val)
GPU_RT_STUB int32_t checked_single_agg_id_shared(int64_t *agg, const int64_t val, const int64_t null_val)
ALWAYS_INLINE int64_t row_number_window_func(const int64_t output_buff, const int64_t pos)
#define DEF_CAST_NULLABLE_BIDIR(type1, type2)
ALWAYS_INLINE double load_avg_float(const int32_t *agg, const int32_t *count, const double null_val)
ALWAYS_INLINE void agg_max_float(int32_t *agg, const float val)
ALWAYS_INLINE int64_t * get_group_value_fast_keyless_semiprivate(int64_t *groups_buffer, const int64_t key, const int64_t min_key, const int64_t, const uint32_t row_size_quad, const uint8_t thread_warp_idx, const uint8_t warp_size)
ALWAYS_INLINE int32_t agg_sum_int32_skip_val(int32_t *agg, const int32_t val, const int32_t skip_val)
const int64_t const uint32_t const uint32_t const uint32_t const bool const int8_t warp_size
ALWAYS_INLINE uint64_t agg_count(uint64_t *agg, const int64_t)
FORCE_INLINE uint8_t get_rank(uint64_t x, uint32_t b)
GPU_RT_STUB void sync_warp_protected(int64_t thread_pos, int64_t row_count)
ALWAYS_INLINE int64_t scale_decimal_down_not_nullable(const int64_t operand, const int64_t scale, const int64_t null_val)
ALWAYS_INLINE double load_avg_double(const int64_t *agg, const int64_t *count, const double null_val)
#define DEF_CAST_NULLABLE(from_type, to_type)
ALWAYS_INLINE int32_t checked_single_agg_id_double(int64_t *agg, const double val, const double null_val)
ALWAYS_INLINE double load_double(const int64_t *agg)
#define DEF_ARITH_NULLABLE_RHS(type, null_type, opname, opsym)
ALWAYS_INLINE void agg_count_distinct_bitmap_skip_val(int64_t *agg, const int64_t val, const int64_t min_val, const int64_t skip_val)
ALWAYS_INLINE int64_t scale_decimal_down_nullable(const int64_t operand, const int64_t scale, const int64_t null_val)
GPU_RT_STUB int64_t get_block_index()
#define DEF_AGG_MAX_INT(n)
void multifrag_query(const int8_t ***col_buffers, const uint64_t *num_fragments, const int64_t *num_rows, const uint64_t *frag_row_offsets, const int32_t *max_matched, int32_t *total_matched, const int64_t *init_agg_value, int64_t **out, int32_t *error_code, const uint32_t *num_tables_ptr, const int64_t *join_hash_tables)
GPU_RT_STUB void agg_max_int8_skip_val_shared(int8_t *agg, const int8_t val, const int8_t skip_val)
ALWAYS_INLINE int32_t extract_str_len(const uint64_t str_and_len)
int64_t const int32_t sz
GPU_RT_STUB void agg_count_distinct_bitmap_gpu(int64_t *, const int64_t, const int64_t, const int64_t, const int64_t, const uint64_t, const uint64_t)
ALWAYS_INLINE int32_t checked_single_agg_id(int64_t *agg, const int64_t val, const int64_t null_val)
const int32_t groups_buffer_size
ALWAYS_INLINE void set_matching_group_value_perfect_hash_columnar(int64_t *groups_buffer, const uint32_t hashed_index, const int64_t *key, const uint32_t key_count, const uint32_t entry_count)
void agg_max_int16(int16_t *agg, const int16_t val)
void agg_min_int8(int8_t *agg, const int8_t val)
ALWAYS_INLINE int64_t * get_group_value_fast_keyless(int64_t *groups_buffer, const int64_t key, const int64_t min_key, const int64_t, const uint32_t row_size_quad)
const int64_t const uint32_t groups_buffer_entry_count
GPU_RT_STUB void agg_sum_float_skip_val_shared(int32_t *agg, const float val, const float skip_val)
const int8_t const int64_t const uint64_t const int32_t const int64_t int64_t uint32_t const int64_t int32_t int32_t * total_matched
ALWAYS_INLINE uint32_t agg_count_int32(uint32_t *agg, const int32_t)
const int64_t const uint32_t const uint32_t key_qw_count
ALWAYS_INLINE void agg_id_double(int64_t *agg, const double val)
ALWAYS_INLINE uint64_t string_pack(const int8_t *ptr, const int32_t len)
ALWAYS_INLINE void agg_sum_double(int64_t *agg, const double val)
#define DEVICE
ALWAYS_INLINE int8_t * extract_str_ptr(const uint64_t str_and_len)
#define DEF_WRITE_PROJECTION_INT(n)
ALWAYS_INLINE void agg_id_float(int32_t *agg, const float val)
ALWAYS_INLINE uint32_t agg_count_float_skip_val(uint32_t *agg, const float val, const float skip_val)
GPU_RT_STUB int32_t checked_single_agg_id_int8_shared(int8_t *agg, const int8_t val, const int8_t null_val)
NEVER_INLINE DEVICE uint64_t MurmurHash64A(const void *key, int len, uint64_t seed)
Definition: MurmurHash.cpp:26
ALWAYS_INLINE uint32_t agg_count_int32_skip_val(uint32_t *agg, const int32_t val, const int32_t skip_val)
ALWAYS_INLINE void agg_min_double(int64_t *agg, const double val)
ALWAYS_INLINE int32_t agg_sum_int32(int32_t *agg, const int32_t val)
ALWAYS_INLINE DEVICE int32_t char_length(const char *str, const int32_t str_len)
#define DEF_SHARED_AGG_RET_STUBS(base_agg_func)
ALWAYS_INLINE int64_t * get_matching_group_value_perfect_hash_keyless(int64_t *groups_buffer, const uint32_t hashed_index, const uint32_t row_size_quad)
int32_t extract_str_len_noinline(const uint64_t str_and_len)
void agg_min_int16(int16_t *agg, const int16_t val)
GPU_RT_STUB int64_t agg_sum_shared(int64_t *agg, const int64_t val)
const int8_t const int64_t const uint64_t const int32_t const int64_t int64_t uint32_t const int64_t int32_t * error_code
#define DEF_ARITH_NULLABLE_LHS(type, null_type, opname, opsym)
#define DEF_AGG_MIN_INT(n)
ALWAYS_INLINE void agg_max_double(int64_t *agg, const double val)
GPU_RT_STUB void agg_min_int8_skip_val_shared(int8_t *agg, const int8_t val, const int8_t skip_val)
ALWAYS_INLINE uint64_t agg_count_double(uint64_t *agg, const double val)
void multifrag_query_hoisted_literals(const int8_t ***col_buffers, const uint64_t *num_fragments, const int8_t *literals, const int64_t *num_rows, const uint64_t *frag_row_offsets, const int32_t *max_matched, int32_t *total_matched, const int64_t *init_agg_value, int64_t **out, int32_t *error_code, const uint32_t *num_tables_ptr, const int64_t *join_hash_tables)
GPU_RT_STUB int32_t checked_single_agg_id_float_shared(int32_t *agg, const float val, const float null_val)
GPU_RT_STUB int64_t agg_sum_skip_val_shared(int64_t *agg, const int64_t val, const int64_t skip_val)
GPU_RT_STUB int32_t agg_sum_int32_skip_val_shared(int32_t *agg, const int32_t val, const int32_t skip_val)
const int8_t const int64_t const uint64_t const int32_t * max_matched
ALWAYS_INLINE double load_avg_decimal(const int64_t *sum, const int64_t *count, const double null_val, const uint32_t scale)
ALWAYS_INLINE int32_t checked_single_agg_id_float(int32_t *agg, const float val, const float null_val)
ALWAYS_INLINE int64_t scale_decimal_up(const int64_t operand, const uint64_t scale, const int64_t operand_null_val, const int64_t result_null_val)
ALWAYS_INLINE int64_t agg_sum(int64_t *agg, const int64_t val)
ALWAYS_INLINE int64_t * get_matching_group_value(int64_t *groups_buffer, const uint32_t h, const T *key, const uint32_t key_count, const uint32_t row_size_quad)
ALWAYS_INLINE void agg_min(int64_t *agg, const int64_t val)
GPU_RT_STUB int32_t checked_single_agg_id_double_shared(int64_t *agg, const double val, const double null_val)
GPU_RT_STUB void agg_approximate_count_distinct_gpu(int64_t *, const int64_t, const uint32_t, const int64_t, const int64_t)
NEVER_INLINE void linear_probabilistic_count(uint8_t *bitmap, const uint32_t bitmap_bytes, const uint8_t *key_bytes, const uint32_t key_len)
ALWAYS_INLINE int64_t decimal_floor(const int64_t x, const int64_t scale)
GPU_RT_STUB void agg_count_distinct_bitmap_skip_val_gpu(int64_t *, const int64_t, const int64_t, const int64_t, const int64_t, const int64_t, const uint64_t, const uint64_t)
int64_t * init_shared_mem(const int64_t *global_groups_buffer, const int32_t groups_buffer_size)
ALWAYS_INLINE int64_t * get_matching_group_value_perfect_hash(int64_t *groups_buffer, const uint32_t hashed_index, const int64_t *key, const uint32_t key_count, const uint32_t row_size_quad)
GPU_RT_STUB void force_sync()
ALWAYS_INLINE DEVICE int32_t key_for_string_encoded(const int32_t str_id)
const int64_t const uint32_t const uint32_t const uint32_t const bool const bool const int32_t frag_idx
GPU_RT_STUB void agg_id_double_shared_slow(int64_t *agg, const double *val)
#define DEF_SKIP_AGG(base_agg_func)
const int8_t const int64_t const uint64_t const int32_t const int64_t * init_agg_value
void agg_max_int32(int32_t *agg, const int32_t val)
ALWAYS_INLINE int8_t logical_or(const int8_t lhs, const int8_t rhs, const int8_t null_val)
GPU_RT_STUB int32_t checked_single_agg_id_int16_shared(int16_t *agg, const int16_t val, const int16_t null_val)
GPU_RT_STUB int64_t get_thread_index()
ALWAYS_INLINE DEVICE bool check_interrupt()
#define DEF_BINARY_NULLABLE_ALL_OPS(type, null_type)
bool check_interrupt_init(unsigned command)
#define NEVER_INLINE
GPU_RT_STUB int8_t thread_warp_idx(const int8_t warp_sz)
ALWAYS_INLINE void agg_max(int64_t *agg, const int64_t val)
const int8_t * literals
const int64_t const uint32_t const uint32_t const uint32_t const bool const bool blocks_share_memory
GPU_RT_STUB void sync_warp()
#define DEF_ARITH_NULLABLE(type, null_type, opname, opsym)
ALWAYS_INLINE float load_float(const int32_t *agg)
ALWAYS_INLINE int32_t record_error_code(const int32_t err_code, int32_t *error_codes)
const int8_t const int64_t const uint64_t * frag_row_offsets
ALWAYS_INLINE uint64_t agg_count_skip_val(uint64_t *agg, const int64_t val, const int64_t skip_val)
ALWAYS_INLINE double load_avg_int(const int64_t *sum, const int64_t *count, const double null_val)
ALWAYS_INLINE DEVICE int32_t char_length_nullable(const char *str, const int32_t str_len, const int32_t int_null)
const int8_t const int64_t const uint64_t const int32_t const int64_t int64_t ** out
void agg_max_int8(int8_t *agg, const int8_t val)
__attribute__((noinline)) int32_t pos_start_impl(int32_t *error_code)
const int64_t * init_vals
ALWAYS_INLINE int64_t * get_matching_group_value_columnar(int64_t *groups_buffer, const uint32_t h, const int64_t *key, const uint32_t key_qw_count, const size_t entry_count)
#define DEF_SHARED_AGG_STUBS(base_agg_func)
ALWAYS_INLINE int8_t bit_is_set(const int64_t bitset, const int64_t val, const int64_t min_val, const int64_t max_val, const int64_t null_val, const int8_t null_bool_val)
GPU_RT_STUB void sync_threadblock()
#define ALWAYS_INLINE
#define DEF_AGG_ID_INT(n)
NEVER_INLINE void agg_approximate_count_distinct(int64_t *agg, const int64_t key, const uint32_t b)
GPU_RT_STUB int64_t * declare_dynamic_shared_memory()
ALWAYS_INLINE uint64_t agg_count_double_skip_val(uint64_t *agg, const double val, const double skip_val)
ALWAYS_INLINE void agg_id(int64_t *agg, const int64_t val)
ALWAYS_INLINE int64_t decimal_ceil(const int64_t x, const int64_t scale)
GPU_RT_STUB void agg_max_int16_skip_val_shared(int16_t *agg, const int16_t val, const int16_t skip_val)
ALWAYS_INLINE void agg_count_distinct_bitmap(int64_t *agg, const int64_t val, const int64_t min_val)
const int64_t const uint32_t const uint32_t const uint32_t const bool keyless
FORCE_INLINE HOST DEVICE T align_to_int64(T addr)
ALWAYS_INLINE int8_t logical_not(const int8_t operand, const int8_t null_val)
ALWAYS_INLINE int8_t logical_and(const int8_t lhs, const int8_t rhs, const int8_t null_val)
ALWAYS_INLINE void agg_min_float(int32_t *agg, const float val)
ALWAYS_INLINE double percent_window_func(const int64_t output_buff, const int64_t pos)