OmniSciDB  2e3a973ef4
RuntimeFunctions.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2017 MapD Technologies, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifdef __CUDACC__
18 #error This code is not intended to be compiled with a CUDA C++ compiler
19 #endif // __CUDACC__
20 
21 #include "RuntimeFunctions.h"
22 #include "../Shared/funcannotations.h"
23 #include "BufferCompaction.h"
24 #include "HyperLogLogRank.h"
25 #include "MurmurHash.h"
26 #include "TypePunning.h"
27 
28 #include <algorithm>
29 #include <atomic>
30 #include <chrono>
31 #include <cmath>
32 #include <cstring>
33 #include <thread>
34 #include <tuple>
35 
36 // decoder implementations
37 
38 #include "DecodersImpl.h"
39 
40 // arithmetic operator implementations
41 
42 #define DEF_ARITH_NULLABLE(type, null_type, opname, opsym) \
43  extern "C" ALWAYS_INLINE type opname##_##type##_nullable( \
44  const type lhs, const type rhs, const null_type null_val) { \
45  if (lhs != null_val && rhs != null_val) { \
46  return lhs opsym rhs; \
47  } \
48  return null_val; \
49  }
50 
51 #define DEF_ARITH_NULLABLE_LHS(type, null_type, opname, opsym) \
52  extern "C" ALWAYS_INLINE type opname##_##type##_nullable_lhs( \
53  const type lhs, const type rhs, const null_type null_val) { \
54  if (lhs != null_val) { \
55  return lhs opsym rhs; \
56  } \
57  return null_val; \
58  }
59 
60 #define DEF_ARITH_NULLABLE_RHS(type, null_type, opname, opsym) \
61  extern "C" ALWAYS_INLINE type opname##_##type##_nullable_rhs( \
62  const type lhs, const type rhs, const null_type null_val) { \
63  if (rhs != null_val) { \
64  return lhs opsym rhs; \
65  } \
66  return null_val; \
67  }
68 
69 #define DEF_CMP_NULLABLE(type, null_type, opname, opsym) \
70  extern "C" ALWAYS_INLINE int8_t opname##_##type##_nullable( \
71  const type lhs, \
72  const type rhs, \
73  const null_type null_val, \
74  const int8_t null_bool_val) { \
75  if (lhs != null_val && rhs != null_val) { \
76  return lhs opsym rhs; \
77  } \
78  return null_bool_val; \
79  }
80 
81 #define DEF_CMP_NULLABLE_LHS(type, null_type, opname, opsym) \
82  extern "C" ALWAYS_INLINE int8_t opname##_##type##_nullable_lhs( \
83  const type lhs, \
84  const type rhs, \
85  const null_type null_val, \
86  const int8_t null_bool_val) { \
87  if (lhs != null_val) { \
88  return lhs opsym rhs; \
89  } \
90  return null_bool_val; \
91  }
92 
93 #define DEF_CMP_NULLABLE_RHS(type, null_type, opname, opsym) \
94  extern "C" ALWAYS_INLINE int8_t opname##_##type##_nullable_rhs( \
95  const type lhs, \
96  const type rhs, \
97  const null_type null_val, \
98  const int8_t null_bool_val) { \
99  if (rhs != null_val) { \
100  return lhs opsym rhs; \
101  } \
102  return null_bool_val; \
103  }
104 
105 #define DEF_SAFE_DIV_NULLABLE(type, null_type, opname) \
106  extern "C" ALWAYS_INLINE type safe_div_##type( \
107  const type lhs, const type rhs, const null_type null_val) { \
108  if (lhs != null_val && rhs != null_val && rhs != 0) { \
109  return lhs / rhs; \
110  } \
111  return null_val; \
112  }
113 
114 #define DEF_BINARY_NULLABLE_ALL_OPS(type, null_type) \
115  DEF_ARITH_NULLABLE(type, null_type, add, +) \
116  DEF_ARITH_NULLABLE(type, null_type, sub, -) \
117  DEF_ARITH_NULLABLE(type, null_type, mul, *) \
118  DEF_ARITH_NULLABLE(type, null_type, div, /) \
119  DEF_SAFE_DIV_NULLABLE(type, null_type, safe_div) \
120  DEF_ARITH_NULLABLE_LHS(type, null_type, add, +) \
121  DEF_ARITH_NULLABLE_LHS(type, null_type, sub, -) \
122  DEF_ARITH_NULLABLE_LHS(type, null_type, mul, *) \
123  DEF_ARITH_NULLABLE_LHS(type, null_type, div, /) \
124  DEF_ARITH_NULLABLE_RHS(type, null_type, add, +) \
125  DEF_ARITH_NULLABLE_RHS(type, null_type, sub, -) \
126  DEF_ARITH_NULLABLE_RHS(type, null_type, mul, *) \
127  DEF_ARITH_NULLABLE_RHS(type, null_type, div, /) \
128  DEF_CMP_NULLABLE(type, null_type, eq, ==) \
129  DEF_CMP_NULLABLE(type, null_type, ne, !=) \
130  DEF_CMP_NULLABLE(type, null_type, lt, <) \
131  DEF_CMP_NULLABLE(type, null_type, gt, >) \
132  DEF_CMP_NULLABLE(type, null_type, le, <=) \
133  DEF_CMP_NULLABLE(type, null_type, ge, >=) \
134  DEF_CMP_NULLABLE_LHS(type, null_type, eq, ==) \
135  DEF_CMP_NULLABLE_LHS(type, null_type, ne, !=) \
136  DEF_CMP_NULLABLE_LHS(type, null_type, lt, <) \
137  DEF_CMP_NULLABLE_LHS(type, null_type, gt, >) \
138  DEF_CMP_NULLABLE_LHS(type, null_type, le, <=) \
139  DEF_CMP_NULLABLE_LHS(type, null_type, ge, >=) \
140  DEF_CMP_NULLABLE_RHS(type, null_type, eq, ==) \
141  DEF_CMP_NULLABLE_RHS(type, null_type, ne, !=) \
142  DEF_CMP_NULLABLE_RHS(type, null_type, lt, <) \
143  DEF_CMP_NULLABLE_RHS(type, null_type, gt, >) \
144  DEF_CMP_NULLABLE_RHS(type, null_type, le, <=) \
145  DEF_CMP_NULLABLE_RHS(type, null_type, ge, >=)
146 
147 DEF_BINARY_NULLABLE_ALL_OPS(int8_t, int64_t)
148 DEF_BINARY_NULLABLE_ALL_OPS(int16_t, int64_t)
149 DEF_BINARY_NULLABLE_ALL_OPS(int32_t, int64_t)
150 DEF_BINARY_NULLABLE_ALL_OPS(int64_t, int64_t)
151 DEF_BINARY_NULLABLE_ALL_OPS(float, float)
152 DEF_BINARY_NULLABLE_ALL_OPS(double, double)
153 DEF_ARITH_NULLABLE(int8_t, int64_t, mod, %)
154 DEF_ARITH_NULLABLE(int16_t, int64_t, mod, %)
155 DEF_ARITH_NULLABLE(int32_t, int64_t, mod, %)
156 DEF_ARITH_NULLABLE(int64_t, int64_t, mod, %)
157 DEF_ARITH_NULLABLE_LHS(int8_t, int64_t, mod, %)
158 DEF_ARITH_NULLABLE_LHS(int16_t, int64_t, mod, %)
159 DEF_ARITH_NULLABLE_LHS(int32_t, int64_t, mod, %)
160 DEF_ARITH_NULLABLE_LHS(int64_t, int64_t, mod, %)
161 DEF_ARITH_NULLABLE_RHS(int8_t, int64_t, mod, %)
162 DEF_ARITH_NULLABLE_RHS(int16_t, int64_t, mod, %)
163 DEF_ARITH_NULLABLE_RHS(int32_t, int64_t, mod, %)
164 DEF_ARITH_NULLABLE_RHS(int64_t, int64_t, mod, %)
165 
166 #undef DEF_BINARY_NULLABLE_ALL_OPS
167 #undef DEF_SAFE_DIV_NULLABLE
168 #undef DEF_CMP_NULLABLE_RHS
169 #undef DEF_CMP_NULLABLE_LHS
170 #undef DEF_CMP_NULLABLE
171 #undef DEF_ARITH_NULLABLE_RHS
172 #undef DEF_ARITH_NULLABLE_LHS
173 #undef DEF_ARITH_NULLABLE
174 
175 extern "C" ALWAYS_INLINE int64_t scale_decimal_up(const int64_t operand,
176  const uint64_t scale,
177  const int64_t operand_null_val,
178  const int64_t result_null_val) {
179  return operand != operand_null_val ? operand * scale : result_null_val;
180 }
181 
182 extern "C" ALWAYS_INLINE int64_t scale_decimal_down_nullable(const int64_t operand,
183  const int64_t scale,
184  const int64_t null_val) {
185  // rounded scale down of a decimal
186  if (operand == null_val) {
187  return null_val;
188  }
189 
190  int64_t tmp = scale >> 1;
191  tmp = operand >= 0 ? operand + tmp : operand - tmp;
192  return tmp / scale;
193 }
194 
195 extern "C" ALWAYS_INLINE int64_t scale_decimal_down_not_nullable(const int64_t operand,
196  const int64_t scale,
197  const int64_t null_val) {
198  int64_t tmp = scale >> 1;
199  tmp = operand >= 0 ? operand + tmp : operand - tmp;
200  return tmp / scale;
201 }
202 
203 // Return floor(dividend / divisor).
204 // Assumes 0 < divisor.
205 extern "C" ALWAYS_INLINE int64_t floor_div_lhs(const int64_t dividend,
206  const int64_t divisor) {
207  return (dividend < 0 ? dividend - (divisor - 1) : dividend) / divisor;
208 }
209 
210 // Return floor(dividend / divisor) or NULL if dividend IS NULL.
211 // Assumes 0 < divisor.
212 extern "C" ALWAYS_INLINE int64_t floor_div_nullable_lhs(const int64_t dividend,
213  const int64_t divisor,
214  const int64_t null_val) {
215  return dividend == null_val ? null_val : floor_div_lhs(dividend, divisor);
216 }
217 
218 #define DEF_UMINUS_NULLABLE(type, null_type) \
219  extern "C" ALWAYS_INLINE type uminus_##type##_nullable(const type operand, \
220  const null_type null_val) { \
221  return operand == null_val ? null_val : -operand; \
222  }
223 
224 DEF_UMINUS_NULLABLE(int8_t, int8_t)
225 DEF_UMINUS_NULLABLE(int16_t, int16_t)
226 DEF_UMINUS_NULLABLE(int32_t, int32_t)
227 DEF_UMINUS_NULLABLE(int64_t, int64_t)
228 DEF_UMINUS_NULLABLE(float, float)
229 DEF_UMINUS_NULLABLE(double, double)
230 
231 #undef DEF_UMINUS_NULLABLE
232 
233 #define DEF_CAST_NULLABLE(from_type, to_type) \
234  extern "C" ALWAYS_INLINE to_type cast_##from_type##_to_##to_type##_nullable( \
235  const from_type operand, \
236  const from_type from_null_val, \
237  const to_type to_null_val) { \
238  return operand == from_null_val ? to_null_val : operand; \
239  }
240 
241 #define DEF_CAST_NULLABLE_BIDIR(type1, type2) \
242  DEF_CAST_NULLABLE(type1, type2) \
243  DEF_CAST_NULLABLE(type2, type1)
244 
245 DEF_CAST_NULLABLE_BIDIR(int8_t, int16_t)
246 DEF_CAST_NULLABLE_BIDIR(int8_t, int32_t)
247 DEF_CAST_NULLABLE_BIDIR(int8_t, int64_t)
248 DEF_CAST_NULLABLE_BIDIR(int16_t, int32_t)
249 DEF_CAST_NULLABLE_BIDIR(int16_t, int64_t)
250 DEF_CAST_NULLABLE_BIDIR(int32_t, int64_t)
251 DEF_CAST_NULLABLE_BIDIR(float, double)
252 DEF_CAST_NULLABLE_BIDIR(float, int8_t)
253 DEF_CAST_NULLABLE_BIDIR(float, int16_t)
254 DEF_CAST_NULLABLE_BIDIR(float, int32_t)
255 DEF_CAST_NULLABLE_BIDIR(float, int64_t)
256 DEF_CAST_NULLABLE_BIDIR(double, int8_t)
257 DEF_CAST_NULLABLE_BIDIR(double, int16_t)
258 DEF_CAST_NULLABLE_BIDIR(double, int32_t)
259 DEF_CAST_NULLABLE_BIDIR(double, int64_t)
260 DEF_CAST_NULLABLE(uint8_t, int32_t)
261 DEF_CAST_NULLABLE(uint16_t, int32_t)
262 
263 #undef DEF_CAST_NULLABLE_BIDIR
264 #undef DEF_CAST_NULLABLE
265 
266 extern "C" ALWAYS_INLINE int8_t logical_not(const int8_t operand, const int8_t null_val) {
267  return operand == null_val ? operand : (operand ? 0 : 1);
268 }
269 
270 extern "C" ALWAYS_INLINE int8_t logical_and(const int8_t lhs,
271  const int8_t rhs,
272  const int8_t null_val) {
273  if (lhs == null_val) {
274  return rhs == 0 ? rhs : null_val;
275  }
276  if (rhs == null_val) {
277  return lhs == 0 ? lhs : null_val;
278  }
279  return (lhs && rhs) ? 1 : 0;
280 }
281 
282 extern "C" ALWAYS_INLINE int8_t logical_or(const int8_t lhs,
283  const int8_t rhs,
284  const int8_t null_val) {
285  if (lhs == null_val) {
286  return rhs == 0 ? null_val : rhs;
287  }
288  if (rhs == null_val) {
289  return lhs == 0 ? null_val : lhs;
290  }
291  return (lhs || rhs) ? 1 : 0;
292 }
293 
294 // aggregator implementations
295 
296 extern "C" ALWAYS_INLINE uint64_t agg_count(uint64_t* agg, const int64_t) {
297  return (*agg)++;
298 }
299 
300 extern "C" ALWAYS_INLINE void agg_count_distinct_bitmap(int64_t* agg,
301  const int64_t val,
302  const int64_t min_val) {
303  const uint64_t bitmap_idx = val - min_val;
304  reinterpret_cast<int8_t*>(*agg)[bitmap_idx >> 3] |= (1 << (bitmap_idx & 7));
305 }
306 
307 #define GPU_RT_STUB NEVER_INLINE __attribute__((optnone))
308 
310  const int64_t,
311  const int64_t,
312  const int64_t,
313  const int64_t,
314  const uint64_t,
315  const uint64_t) {}
316 
317 extern "C" NEVER_INLINE void agg_approximate_count_distinct(int64_t* agg,
318  const int64_t key,
319  const uint32_t b) {
320  const uint64_t hash = MurmurHash64A(&key, sizeof(key), 0);
321  const uint32_t index = hash >> (64 - b);
322  const uint8_t rank = get_rank(hash << b, 64 - b);
323  uint8_t* M = reinterpret_cast<uint8_t*>(*agg);
324  M[index] = std::max(M[index], rank);
325 }
326 
328  const int64_t,
329  const uint32_t,
330  const int64_t,
331  const int64_t) {}
332 
333 extern "C" ALWAYS_INLINE int8_t bit_is_set(const int64_t bitset,
334  const int64_t val,
335  const int64_t min_val,
336  const int64_t max_val,
337  const int64_t null_val,
338  const int8_t null_bool_val) {
339  if (val == null_val) {
340  return null_bool_val;
341  }
342  if (val < min_val || val > max_val) {
343  return 0;
344  }
345  if (!bitset) {
346  return 0;
347  }
348  const uint64_t bitmap_idx = val - min_val;
349  return (reinterpret_cast<const int8_t*>(bitset))[bitmap_idx >> 3] &
350  (1 << (bitmap_idx & 7))
351  ? 1
352  : 0;
353 }
354 
355 extern "C" ALWAYS_INLINE int64_t agg_sum(int64_t* agg, const int64_t val) {
356  const auto old = *agg;
357  *agg += val;
358  return old;
359 }
360 
361 extern "C" ALWAYS_INLINE void agg_max(int64_t* agg, const int64_t val) {
362  *agg = std::max(*agg, val);
363 }
364 
365 extern "C" ALWAYS_INLINE void agg_min(int64_t* agg, const int64_t val) {
366  *agg = std::min(*agg, val);
367 }
368 
369 extern "C" ALWAYS_INLINE void agg_id(int64_t* agg, const int64_t val) {
370  *agg = val;
371 }
372 
373 extern "C" ALWAYS_INLINE int32_t checked_single_agg_id(int64_t* agg,
374  const int64_t val,
375  const int64_t null_val) {
376  if (val == null_val) {
377  return 0;
378  }
379 
380  if (*agg == val) {
381  return 0;
382  } else if (*agg == null_val) {
383  *agg = val;
384  return 0;
385  } else {
386  // see Execute::ERR_SINGLE_VALUE_FOUND_MULTIPLE_VALUES
387  return 15;
388  }
389 }
390 
392  const int64_t val,
393  const int64_t min_val,
394  const int64_t skip_val) {
395  if (val != skip_val) {
396  agg_count_distinct_bitmap(agg, val, min_val);
397  }
398 }
399 
401  const int64_t,
402  const int64_t,
403  const int64_t,
404  const int64_t,
405  const int64_t,
406  const uint64_t,
407  const uint64_t) {}
408 
409 extern "C" ALWAYS_INLINE uint32_t agg_count_int32(uint32_t* agg, const int32_t) {
410  return (*agg)++;
411 }
412 
413 extern "C" ALWAYS_INLINE int32_t agg_sum_int32(int32_t* agg, const int32_t val) {
414  const auto old = *agg;
415  *agg += val;
416  return old;
417 }
418 
419 #define DEF_AGG_MAX_INT(n) \
420  extern "C" ALWAYS_INLINE void agg_max_int##n(int##n##_t* agg, const int##n##_t val) { \
421  *agg = std::max(*agg, val); \
422  }
423 
424 DEF_AGG_MAX_INT(32)
425 DEF_AGG_MAX_INT(16)
427 #undef DEF_AGG_MAX_INT
428 
429 #define DEF_AGG_MIN_INT(n) \
430  extern "C" ALWAYS_INLINE void agg_min_int##n(int##n##_t* agg, const int##n##_t val) { \
431  *agg = std::min(*agg, val); \
432  }
433 
434 DEF_AGG_MIN_INT(32)
435 DEF_AGG_MIN_INT(16)
437 #undef DEF_AGG_MIN_INT
438 
439 #define DEF_AGG_ID_INT(n) \
440  extern "C" ALWAYS_INLINE void agg_id_int##n(int##n##_t* agg, const int##n##_t val) { \
441  *agg = val; \
442  }
443 
444 #define DEF_CHECKED_SINGLE_AGG_ID_INT(n) \
445  extern "C" ALWAYS_INLINE int32_t checked_single_agg_id_int##n( \
446  int##n##_t* agg, const int##n##_t val, const int##n##_t null_val) { \
447  if (val == null_val) { \
448  return 0; \
449  } \
450  if (*agg == val) { \
451  return 0; \
452  } else if (*agg == null_val) { \
453  *agg = val; \
454  return 0; \
455  } else { \
456  /* see Execute::ERR_SINGLE_VALUE_FOUND_MULTIPLE_VALUES*/ \
457  return 15; \
458  } \
459  }
460 
461 DEF_AGG_ID_INT(32)
462 DEF_AGG_ID_INT(16)
464 
468 
469 #undef DEF_AGG_ID_INT
470 #undef DEF_CHECKED_SINGLE_AGG_ID_INT
471 
472 #define DEF_WRITE_PROJECTION_INT(n) \
473  extern "C" ALWAYS_INLINE void write_projection_int##n( \
474  int8_t* slot_ptr, const int##n##_t val, const int64_t init_val) { \
475  if (val != init_val) { \
476  *reinterpret_cast<int##n##_t*>(slot_ptr) = val; \
477  } \
478  }
479 
482 #undef DEF_WRITE_PROJECTION_INT
483 
484 extern "C" ALWAYS_INLINE int64_t agg_sum_skip_val(int64_t* agg,
485  const int64_t val,
486  const int64_t skip_val) {
487  const auto old = *agg;
488  if (val != skip_val) {
489  if (old != skip_val) {
490  return agg_sum(agg, val);
491  } else {
492  *agg = val;
493  }
494  }
495  return old;
496 }
497 
498 extern "C" ALWAYS_INLINE int32_t agg_sum_int32_skip_val(int32_t* agg,
499  const int32_t val,
500  const int32_t skip_val) {
501  const auto old = *agg;
502  if (val != skip_val) {
503  if (old != skip_val) {
504  return agg_sum_int32(agg, val);
505  } else {
506  *agg = val;
507  }
508  }
509  return old;
510 }
511 
512 extern "C" ALWAYS_INLINE uint64_t agg_count_skip_val(uint64_t* agg,
513  const int64_t val,
514  const int64_t skip_val) {
515  if (val != skip_val) {
516  return agg_count(agg, val);
517  }
518  return *agg;
519 }
520 
521 extern "C" ALWAYS_INLINE uint32_t agg_count_int32_skip_val(uint32_t* agg,
522  const int32_t val,
523  const int32_t skip_val) {
524  if (val != skip_val) {
525  return agg_count_int32(agg, val);
526  }
527  return *agg;
528 }
529 
530 #define DEF_SKIP_AGG_ADD(base_agg_func) \
531  extern "C" ALWAYS_INLINE void base_agg_func##_skip_val( \
532  DATA_T* agg, const DATA_T val, const DATA_T skip_val) { \
533  if (val != skip_val) { \
534  base_agg_func(agg, val); \
535  } \
536  }
537 
538 #define DEF_SKIP_AGG(base_agg_func) \
539  extern "C" ALWAYS_INLINE void base_agg_func##_skip_val( \
540  DATA_T* agg, const DATA_T val, const DATA_T skip_val) { \
541  if (val != skip_val) { \
542  const DATA_T old_agg = *agg; \
543  if (old_agg != skip_val) { \
544  base_agg_func(agg, val); \
545  } else { \
546  *agg = val; \
547  } \
548  } \
549  }
550 
551 #define DATA_T int64_t
554 #undef DATA_T
555 
556 #define DATA_T int32_t
559 #undef DATA_T
560 
561 #define DATA_T int16_t
564 #undef DATA_T
565 
566 #define DATA_T int8_t
569 #undef DATA_T
570 
571 #undef DEF_SKIP_AGG_ADD
572 #undef DEF_SKIP_AGG
573 
574 // TODO(alex): fix signature
575 
576 extern "C" ALWAYS_INLINE uint64_t agg_count_double(uint64_t* agg, const double val) {
577  return (*agg)++;
578 }
579 
580 extern "C" ALWAYS_INLINE void agg_sum_double(int64_t* agg, const double val) {
581  const auto r = *reinterpret_cast<const double*>(agg) + val;
582  *agg = *reinterpret_cast<const int64_t*>(may_alias_ptr(&r));
583 }
584 
585 extern "C" ALWAYS_INLINE void agg_max_double(int64_t* agg, const double val) {
586  const auto r = std::max(*reinterpret_cast<const double*>(agg), val);
587  *agg = *(reinterpret_cast<const int64_t*>(may_alias_ptr(&r)));
588 }
589 
590 extern "C" ALWAYS_INLINE void agg_min_double(int64_t* agg, const double val) {
591  const auto r = std::min(*reinterpret_cast<const double*>(agg), val);
592  *agg = *(reinterpret_cast<const int64_t*>(may_alias_ptr(&r)));
593 }
594 
595 extern "C" ALWAYS_INLINE void agg_id_double(int64_t* agg, const double val) {
596  *agg = *(reinterpret_cast<const int64_t*>(may_alias_ptr(&val)));
597 }
598 
599 extern "C" ALWAYS_INLINE int32_t checked_single_agg_id_double(int64_t* agg,
600  const double val,
601  const double null_val) {
602  if (val == null_val) {
603  return 0;
604  }
605 
606  if (*agg == *(reinterpret_cast<const int64_t*>(may_alias_ptr(&val)))) {
607  return 0;
608  } else if (*agg == *(reinterpret_cast<const int64_t*>(may_alias_ptr(&null_val)))) {
609  *agg = *(reinterpret_cast<const int64_t*>(may_alias_ptr(&val)));
610  return 0;
611  } else {
612  // see Execute::ERR_SINGLE_VALUE_FOUND_MULTIPLE_VALUES
613  return 15;
614  }
615 }
616 
617 extern "C" ALWAYS_INLINE uint32_t agg_count_float(uint32_t* agg, const float val) {
618  return (*agg)++;
619 }
620 
621 extern "C" ALWAYS_INLINE void agg_sum_float(int32_t* agg, const float val) {
622  const auto r = *reinterpret_cast<const float*>(agg) + val;
623  *agg = *reinterpret_cast<const int32_t*>(may_alias_ptr(&r));
624 }
625 
626 extern "C" ALWAYS_INLINE void agg_max_float(int32_t* agg, const float val) {
627  const auto r = std::max(*reinterpret_cast<const float*>(agg), val);
628  *agg = *(reinterpret_cast<const int32_t*>(may_alias_ptr(&r)));
629 }
630 
631 extern "C" ALWAYS_INLINE void agg_min_float(int32_t* agg, const float val) {
632  const auto r = std::min(*reinterpret_cast<const float*>(agg), val);
633  *agg = *(reinterpret_cast<const int32_t*>(may_alias_ptr(&r)));
634 }
635 
636 extern "C" ALWAYS_INLINE void agg_id_float(int32_t* agg, const float val) {
637  *agg = *(reinterpret_cast<const int32_t*>(may_alias_ptr(&val)));
638 }
639 
640 extern "C" ALWAYS_INLINE int32_t checked_single_agg_id_float(int32_t* agg,
641  const float val,
642  const float null_val) {
643  if (val == null_val) {
644  return 0;
645  }
646 
647  if (*agg == *(reinterpret_cast<const int32_t*>(may_alias_ptr(&val)))) {
648  return 0;
649  } else if (*agg == *(reinterpret_cast<const int32_t*>(may_alias_ptr(&null_val)))) {
650  *agg = *(reinterpret_cast<const int32_t*>(may_alias_ptr(&val)));
651  return 0;
652  } else {
653  // see Execute::ERR_SINGLE_VALUE_FOUND_MULTIPLE_VALUES
654  return 15;
655  }
656 }
657 
658 extern "C" ALWAYS_INLINE uint64_t agg_count_double_skip_val(uint64_t* agg,
659  const double val,
660  const double skip_val) {
661  if (val != skip_val) {
662  return agg_count_double(agg, val);
663  }
664  return *agg;
665 }
666 
667 extern "C" ALWAYS_INLINE uint32_t agg_count_float_skip_val(uint32_t* agg,
668  const float val,
669  const float skip_val) {
670  if (val != skip_val) {
671  return agg_count_float(agg, val);
672  }
673  return *agg;
674 }
675 
676 #define DEF_SKIP_AGG_ADD(base_agg_func) \
677  extern "C" ALWAYS_INLINE void base_agg_func##_skip_val( \
678  ADDR_T* agg, const DATA_T val, const DATA_T skip_val) { \
679  if (val != skip_val) { \
680  base_agg_func(agg, val); \
681  } \
682  }
683 
684 #define DEF_SKIP_AGG(base_agg_func) \
685  extern "C" ALWAYS_INLINE void base_agg_func##_skip_val( \
686  ADDR_T* agg, const DATA_T val, const DATA_T skip_val) { \
687  if (val != skip_val) { \
688  const ADDR_T old_agg = *agg; \
689  if (old_agg != *reinterpret_cast<const ADDR_T*>(may_alias_ptr(&skip_val))) { \
690  base_agg_func(agg, val); \
691  } else { \
692  *agg = *reinterpret_cast<const ADDR_T*>(may_alias_ptr(&val)); \
693  } \
694  } \
695  }
696 
697 #define DATA_T double
698 #define ADDR_T int64_t
702 #undef ADDR_T
703 #undef DATA_T
704 
705 #define DATA_T float
706 #define ADDR_T int32_t
710 #undef ADDR_T
711 #undef DATA_T
712 
713 #undef DEF_SKIP_AGG_ADD
714 #undef DEF_SKIP_AGG
715 
716 extern "C" ALWAYS_INLINE int64_t decimal_floor(const int64_t x, const int64_t scale) {
717  if (x >= 0) {
718  return x / scale * scale;
719  }
720  if (!(x % scale)) {
721  return x;
722  }
723  return x / scale * scale - scale;
724 }
725 
726 extern "C" ALWAYS_INLINE int64_t decimal_ceil(const int64_t x, const int64_t scale) {
727  return decimal_floor(x, scale) + (x % scale ? scale : 0);
728 }
729 
730 // Shared memory aggregators. Should never be called,
731 // real implementations are in cuda_mapd_rt.cu.
732 #define DEF_SHARED_AGG_RET_STUBS(base_agg_func) \
733  extern "C" GPU_RT_STUB uint64_t base_agg_func##_shared(uint64_t* agg, \
734  const int64_t val) { \
735  return 0; \
736  } \
737  \
738  extern "C" GPU_RT_STUB uint64_t base_agg_func##_skip_val_shared( \
739  uint64_t* agg, const int64_t val, const int64_t skip_val) { \
740  return 0; \
741  } \
742  extern "C" GPU_RT_STUB uint32_t base_agg_func##_int32_shared(uint32_t* agg, \
743  const int32_t val) { \
744  return 0; \
745  } \
746  \
747  extern "C" GPU_RT_STUB uint32_t base_agg_func##_int32_skip_val_shared( \
748  uint32_t* agg, const int32_t val, const int32_t skip_val) { \
749  return 0; \
750  } \
751  \
752  extern "C" GPU_RT_STUB uint64_t base_agg_func##_double_shared(uint64_t* agg, \
753  const double val) { \
754  return 0; \
755  } \
756  \
757  extern "C" GPU_RT_STUB uint64_t base_agg_func##_double_skip_val_shared( \
758  uint64_t* agg, const double val, const double skip_val) { \
759  return 0; \
760  } \
761  extern "C" GPU_RT_STUB uint32_t base_agg_func##_float_shared(uint32_t* agg, \
762  const float val) { \
763  return 0; \
764  } \
765  \
766  extern "C" GPU_RT_STUB uint32_t base_agg_func##_float_skip_val_shared( \
767  uint32_t* agg, const float val, const float skip_val) { \
768  return 0; \
769  }
770 
771 #define DEF_SHARED_AGG_STUBS(base_agg_func) \
772  extern "C" GPU_RT_STUB void base_agg_func##_shared(int64_t* agg, const int64_t val) {} \
773  \
774  extern "C" GPU_RT_STUB void base_agg_func##_skip_val_shared( \
775  int64_t* agg, const int64_t val, const int64_t skip_val) {} \
776  extern "C" GPU_RT_STUB void base_agg_func##_int32_shared(int32_t* agg, \
777  const int32_t val) {} \
778  extern "C" GPU_RT_STUB void base_agg_func##_int16_shared(int16_t* agg, \
779  const int16_t val) {} \
780  extern "C" GPU_RT_STUB void base_agg_func##_int8_shared(int8_t* agg, \
781  const int8_t val) {} \
782  \
783  extern "C" GPU_RT_STUB void base_agg_func##_int32_skip_val_shared( \
784  int32_t* agg, const int32_t val, const int32_t skip_val) {} \
785  \
786  extern "C" GPU_RT_STUB void base_agg_func##_double_shared(int64_t* agg, \
787  const double val) {} \
788  \
789  extern "C" GPU_RT_STUB void base_agg_func##_double_skip_val_shared( \
790  int64_t* agg, const double val, const double skip_val) {} \
791  extern "C" GPU_RT_STUB void base_agg_func##_float_shared(int32_t* agg, \
792  const float val) {} \
793  \
794  extern "C" GPU_RT_STUB void base_agg_func##_float_skip_val_shared( \
795  int32_t* agg, const float val, const float skip_val) {}
796 
801 
802 extern "C" GPU_RT_STUB int32_t checked_single_agg_id_shared(int64_t* agg,
803  const int64_t val,
804  const int64_t null_val) {
805  return 0;
806 }
807 
808 extern "C" GPU_RT_STUB int32_t
810  const int32_t val,
811  const int32_t null_val) {
812  return 0;
813 }
814 extern "C" GPU_RT_STUB int32_t
816  const int16_t val,
817  const int16_t null_val) {
818  return 0;
819 }
820 extern "C" GPU_RT_STUB int32_t checked_single_agg_id_int8_shared(int8_t* agg,
821  const int8_t val,
822  const int8_t null_val) {
823  return 0;
824 }
825 
826 extern "C" GPU_RT_STUB int32_t
828  const double val,
829  const double null_val) {
830  return 0;
831 }
832 
833 extern "C" GPU_RT_STUB int32_t checked_single_agg_id_float_shared(int32_t* agg,
834  const float val,
835  const float null_val) {
836  return 0;
837 }
838 
839 extern "C" GPU_RT_STUB void agg_max_int16_skip_val_shared(int16_t* agg,
840  const int16_t val,
841  const int16_t skip_val) {}
842 
843 extern "C" GPU_RT_STUB void agg_max_int8_skip_val_shared(int8_t* agg,
844  const int8_t val,
845  const int8_t skip_val) {}
846 
847 extern "C" GPU_RT_STUB void agg_min_int16_skip_val_shared(int16_t* agg,
848  const int16_t val,
849  const int16_t skip_val) {}
850 
851 extern "C" GPU_RT_STUB void agg_min_int8_skip_val_shared(int8_t* agg,
852  const int8_t val,
853  const int8_t skip_val) {}
854 
855 extern "C" GPU_RT_STUB void agg_id_double_shared_slow(int64_t* agg, const double* val) {}
856 
857 extern "C" GPU_RT_STUB int64_t agg_sum_shared(int64_t* agg, const int64_t val) {
858  return 0;
859 }
860 
861 extern "C" GPU_RT_STUB int64_t agg_sum_skip_val_shared(int64_t* agg,
862  const int64_t val,
863  const int64_t skip_val) {
864  return 0;
865 }
866 extern "C" GPU_RT_STUB int32_t agg_sum_int32_shared(int32_t* agg, const int32_t val) {
867  return 0;
868 }
869 
870 extern "C" GPU_RT_STUB int32_t agg_sum_int32_skip_val_shared(int32_t* agg,
871  const int32_t val,
872  const int32_t skip_val) {
873  return 0;
874 }
875 
876 extern "C" GPU_RT_STUB void agg_sum_double_shared(int64_t* agg, const double val) {}
877 
878 extern "C" GPU_RT_STUB void agg_sum_double_skip_val_shared(int64_t* agg,
879  const double val,
880  const double skip_val) {}
881 extern "C" GPU_RT_STUB void agg_sum_float_shared(int32_t* agg, const float val) {}
882 
883 extern "C" GPU_RT_STUB void agg_sum_float_skip_val_shared(int32_t* agg,
884  const float val,
885  const float skip_val) {}
886 
887 extern "C" GPU_RT_STUB void force_sync() {}
888 
889 extern "C" GPU_RT_STUB void sync_warp() {}
890 extern "C" GPU_RT_STUB void sync_warp_protected(int64_t thread_pos, int64_t row_count) {}
891 extern "C" GPU_RT_STUB void sync_threadblock() {}
892 
893 extern "C" GPU_RT_STUB void write_back_non_grouped_agg(int64_t* input_buffer,
894  int64_t* output_buffer,
895  const int32_t num_agg_cols){};
896 // x64 stride functions
897 
898 extern "C" __attribute__((noinline)) int32_t pos_start_impl(int32_t* error_code) {
899  int32_t row_index_resume{0};
900  if (error_code) {
901  row_index_resume = error_code[0];
902  error_code[0] = 0;
903  }
904  return row_index_resume;
905 }
906 
907 extern "C" __attribute__((noinline)) int32_t group_buff_idx_impl() {
908  return pos_start_impl(nullptr);
909 }
910 
911 extern "C" __attribute__((noinline)) int32_t pos_step_impl() {
912  return 1;
913 }
914 
915 extern "C" GPU_RT_STUB int8_t thread_warp_idx(const int8_t warp_sz) {
916  return 0;
917 }
918 
919 extern "C" GPU_RT_STUB int64_t get_thread_index() {
920  return 0;
921 }
922 
924  return nullptr;
925 }
926 
927 extern "C" GPU_RT_STUB int64_t get_block_index() {
928  return 0;
929 }
930 
931 #undef GPU_RT_STUB
932 
933 extern "C" ALWAYS_INLINE int32_t record_error_code(const int32_t err_code,
934  int32_t* error_codes) {
935  // NB: never override persistent error codes (with code greater than zero).
936  // On GPU, a projection query with a limit can run out of slots without it
937  // being an actual error if the limit has been hit. If a persistent error
938  // (division by zero, for example) occurs before running out of slots, we
939  // have to avoid overriding it, because there's a risk that the query would
940  // go through if we override with a potentially benign out-of-slots code.
941  if (err_code && error_codes[pos_start_impl(nullptr)] <= 0) {
942  error_codes[pos_start_impl(nullptr)] = err_code;
943  }
944  return err_code;
945 }
946 
947 // group by helpers
948 
949 extern "C" __attribute__((noinline)) const int64_t* init_shared_mem_nop(
950  const int64_t* groups_buffer,
951  const int32_t groups_buffer_size) {
952  return groups_buffer;
953 }
954 
955 extern "C" __attribute__((noinline)) void write_back_nop(int64_t* dest,
956  int64_t* src,
957  const int32_t sz) {
958  // the body is not really needed, just make sure the call is not optimized away
959  assert(dest);
960 }
961 
962 extern "C" int64_t* init_shared_mem(const int64_t* global_groups_buffer,
963  const int32_t groups_buffer_size) {
964  return nullptr;
965 }
966 
967 extern "C" __attribute__((noinline)) void init_group_by_buffer_gpu(
968  int64_t* groups_buffer,
969  const int64_t* init_vals,
970  const uint32_t groups_buffer_entry_count,
971  const uint32_t key_qw_count,
972  const uint32_t agg_col_count,
973  const bool keyless,
974  const int8_t warp_size) {
975  // the body is not really needed, just make sure the call is not optimized away
976  assert(groups_buffer);
977 }
978 
979 extern "C" __attribute__((noinline)) void init_columnar_group_by_buffer_gpu(
980  int64_t* groups_buffer,
981  const int64_t* init_vals,
982  const uint32_t groups_buffer_entry_count,
983  const uint32_t key_qw_count,
984  const uint32_t agg_col_count,
985  const bool keyless,
986  const bool blocks_share_memory,
987  const int32_t frag_idx) {
988  // the body is not really needed, just make sure the call is not optimized away
989  assert(groups_buffer);
990 }
991 
992 extern "C" __attribute__((noinline)) void init_group_by_buffer_impl(
993  int64_t* groups_buffer,
994  const int64_t* init_vals,
995  const uint32_t groups_buffer_entry_count,
996  const uint32_t key_qw_count,
997  const uint32_t agg_col_count,
998  const bool keyless,
999  const int8_t warp_size) {
1000  // the body is not really needed, just make sure the call is not optimized away
1001  assert(groups_buffer);
1002 }
1003 
1004 template <typename T>
1005 ALWAYS_INLINE int64_t* get_matching_group_value(int64_t* groups_buffer,
1006  const uint32_t h,
1007  const T* key,
1008  const uint32_t key_count,
1009  const uint32_t row_size_quad) {
1010  auto off = h * row_size_quad;
1011  auto row_ptr = reinterpret_cast<T*>(groups_buffer + off);
1012  if (*row_ptr == get_empty_key<T>()) {
1013  memcpy(row_ptr, key, key_count * sizeof(T));
1014  auto row_ptr_i8 = reinterpret_cast<int8_t*>(row_ptr + key_count);
1015  return reinterpret_cast<int64_t*>(align_to_int64(row_ptr_i8));
1016  }
1017  if (memcmp(row_ptr, key, key_count * sizeof(T)) == 0) {
1018  auto row_ptr_i8 = reinterpret_cast<int8_t*>(row_ptr + key_count);
1019  return reinterpret_cast<int64_t*>(align_to_int64(row_ptr_i8));
1020  }
1021  return nullptr;
1022 }
1023 
1024 extern "C" ALWAYS_INLINE int64_t* get_matching_group_value(int64_t* groups_buffer,
1025  const uint32_t h,
1026  const int64_t* key,
1027  const uint32_t key_count,
1028  const uint32_t key_width,
1029  const uint32_t row_size_quad,
1030  const int64_t* init_vals) {
1031  switch (key_width) {
1032  case 4:
1033  return get_matching_group_value(groups_buffer,
1034  h,
1035  reinterpret_cast<const int32_t*>(key),
1036  key_count,
1037  row_size_quad);
1038  case 8:
1039  return get_matching_group_value(groups_buffer, h, key, key_count, row_size_quad);
1040  default:;
1041  }
1042  return nullptr;
1043 }
1044 
1045 template <typename T>
1047  const uint32_t entry_count,
1048  const uint32_t h,
1049  const T* key,
1050  const uint32_t key_count) {
1051  auto off = h;
1052  auto key_buffer = reinterpret_cast<T*>(groups_buffer);
1053  if (key_buffer[off] == get_empty_key<T>()) {
1054  for (size_t i = 0; i < key_count; ++i) {
1055  key_buffer[off] = key[i];
1056  off += entry_count;
1057  }
1058  return h;
1059  }
1060  off = h;
1061  for (size_t i = 0; i < key_count; ++i) {
1062  if (key_buffer[off] != key[i]) {
1063  return -1;
1064  }
1065  off += entry_count;
1066  }
1067  return h;
1068 }
1069 
1070 extern "C" ALWAYS_INLINE int32_t
1072  const uint32_t entry_count,
1073  const uint32_t h,
1074  const int64_t* key,
1075  const uint32_t key_count,
1076  const uint32_t key_width) {
1077  switch (key_width) {
1078  case 4:
1079  return get_matching_group_value_columnar_slot(groups_buffer,
1080  entry_count,
1081  h,
1082  reinterpret_cast<const int32_t*>(key),
1083  key_count);
1084  case 8:
1086  groups_buffer, entry_count, h, key, key_count);
1087  default:
1088  return -1;
1089  }
1090  return -1;
1091 }
1092 
1094  int64_t* groups_buffer,
1095  const uint32_t h,
1096  const int64_t* key,
1097  const uint32_t key_qw_count,
1098  const size_t entry_count) {
1099  auto off = h;
1100  if (groups_buffer[off] == EMPTY_KEY_64) {
1101  for (size_t i = 0; i < key_qw_count; ++i) {
1102  groups_buffer[off] = key[i];
1103  off += entry_count;
1104  }
1105  return &groups_buffer[off];
1106  }
1107  off = h;
1108  for (size_t i = 0; i < key_qw_count; ++i) {
1109  if (groups_buffer[off] != key[i]) {
1110  return nullptr;
1111  }
1112  off += entry_count;
1113  }
1114  return &groups_buffer[off];
1115 }
1116 
1117 /*
1118  * For a particular hashed_index, returns the row-wise offset
1119  * to the first matching agg column in memory.
1120  * It also checks the corresponding group column, and initialize all
1121  * available keys if they are not empty (it is assumed all group columns are
1122  * 64-bit wide).
1123  *
1124  * Memory layout:
1125  *
1126  * | prepended group columns (64-bit each) | agg columns |
1127  */
1129  int64_t* groups_buffer,
1130  const uint32_t hashed_index,
1131  const int64_t* key,
1132  const uint32_t key_count,
1133  const uint32_t row_size_quad) {
1134  uint32_t off = hashed_index * row_size_quad;
1135  if (groups_buffer[off] == EMPTY_KEY_64) {
1136  for (uint32_t i = 0; i < key_count; ++i) {
1137  groups_buffer[off + i] = key[i];
1138  }
1139  }
1140  return groups_buffer + off + key_count;
1141 }
1142 
1150  int64_t* groups_buffer,
1151  const uint32_t hashed_index,
1152  const uint32_t row_size_quad) {
1153  return groups_buffer + row_size_quad * hashed_index;
1154 }
1155 
1156 /*
1157  * For a particular hashed_index, find and initialize (if necessary) all the group
1158  * columns corresponding to a key. It is assumed that all group columns are 64-bit wide.
1159  */
1161  int64_t* groups_buffer,
1162  const uint32_t hashed_index,
1163  const int64_t* key,
1164  const uint32_t key_count,
1165  const uint32_t entry_count) {
1166  if (groups_buffer[hashed_index] == EMPTY_KEY_64) {
1167  for (uint32_t i = 0; i < key_count; i++) {
1168  groups_buffer[i * entry_count + hashed_index] = key[i];
1169  }
1170  }
1171 }
1172 
1173 #include "GroupByRuntime.cpp"
1175 
1177  int64_t* groups_buffer,
1178  const int64_t key,
1179  const int64_t min_key,
1180  const int64_t /* bucket */,
1181  const uint32_t row_size_quad) {
1182  return groups_buffer + row_size_quad * (key - min_key);
1183 }
1184 
1186  int64_t* groups_buffer,
1187  const int64_t key,
1188  const int64_t min_key,
1189  const int64_t /* bucket */,
1190  const uint32_t row_size_quad,
1191  const uint8_t thread_warp_idx,
1192  const uint8_t warp_size) {
1193  return groups_buffer + row_size_quad * (warp_size * (key - min_key) + thread_warp_idx);
1194 }
1195 
1196 extern "C" ALWAYS_INLINE int8_t* extract_str_ptr(const uint64_t str_and_len) {
1197  return reinterpret_cast<int8_t*>(str_and_len & 0xffffffffffff);
1198 }
1199 
1200 extern "C" ALWAYS_INLINE int32_t extract_str_len(const uint64_t str_and_len) {
1201  return static_cast<int64_t>(str_and_len) >> 48;
1202 }
1203 
1204 extern "C" __attribute__((noinline)) int8_t* extract_str_ptr_noinline(
1205  const uint64_t str_and_len) {
1206  return extract_str_ptr(str_and_len);
1207 }
1208 
1209 extern "C" __attribute__((noinline)) int32_t extract_str_len_noinline(
1210  const uint64_t str_and_len) {
1211  return extract_str_len(str_and_len);
1212 }
1213 
1214 extern "C" ALWAYS_INLINE uint64_t string_pack(const int8_t* ptr, const int32_t len) {
1215  return (reinterpret_cast<const uint64_t>(ptr) & 0xffffffffffff) |
1216  (static_cast<const uint64_t>(len) << 48);
1217 }
1218 
1219 #ifdef __clang__
1220 #include "../Utils/StringLike.cpp"
1221 #endif
1222 
1223 #ifndef __CUDACC__
1224 #include "TopKRuntime.cpp"
1225 #endif
1226 
1227 extern "C" ALWAYS_INLINE DEVICE int32_t char_length(const char* str,
1228  const int32_t str_len) {
1229  return str_len;
1230 }
1231 
1232 extern "C" ALWAYS_INLINE DEVICE int32_t char_length_nullable(const char* str,
1233  const int32_t str_len,
1234  const int32_t int_null) {
1235  if (!str) {
1236  return int_null;
1237  }
1238  return str_len;
1239 }
1240 
1241 extern "C" ALWAYS_INLINE DEVICE int32_t key_for_string_encoded(const int32_t str_id) {
1242  return str_id;
1243 }
1244 
1245 extern "C" ALWAYS_INLINE DEVICE bool sample_ratio(const double proportion,
1246  const int64_t row_offset) {
1247  const int64_t threshold = 4294967296 * proportion;
1248  return (row_offset * 2654435761) % 4294967296 < threshold;
1249 }
1250 
1251 extern "C" ALWAYS_INLINE int64_t row_number_window_func(const int64_t output_buff,
1252  const int64_t pos) {
1253  return reinterpret_cast<const int64_t*>(output_buff)[pos];
1254 }
1255 
1256 extern "C" ALWAYS_INLINE double percent_window_func(const int64_t output_buff,
1257  const int64_t pos) {
1258  return reinterpret_cast<const double*>(output_buff)[pos];
1259 }
1260 
1261 extern "C" ALWAYS_INLINE double load_double(const int64_t* agg) {
1262  return *reinterpret_cast<const double*>(may_alias_ptr(agg));
1263 }
1264 
1265 extern "C" ALWAYS_INLINE float load_float(const int32_t* agg) {
1266  return *reinterpret_cast<const float*>(may_alias_ptr(agg));
1267 }
1268 
1269 extern "C" ALWAYS_INLINE double load_avg_int(const int64_t* sum,
1270  const int64_t* count,
1271  const double null_val) {
1272  return *count != 0 ? static_cast<double>(*sum) / *count : null_val;
1273 }
1274 
1275 extern "C" ALWAYS_INLINE double load_avg_decimal(const int64_t* sum,
1276  const int64_t* count,
1277  const double null_val,
1278  const uint32_t scale) {
1279  return *count != 0 ? (static_cast<double>(*sum) / pow(10, scale)) / *count : null_val;
1280 }
1281 
1282 extern "C" ALWAYS_INLINE double load_avg_double(const int64_t* agg,
1283  const int64_t* count,
1284  const double null_val) {
1285  return *count != 0 ? *reinterpret_cast<const double*>(may_alias_ptr(agg)) / *count
1286  : null_val;
1287 }
1288 
1289 extern "C" ALWAYS_INLINE double load_avg_float(const int32_t* agg,
1290  const int32_t* count,
1291  const double null_val) {
1292  return *count != 0 ? *reinterpret_cast<const float*>(may_alias_ptr(agg)) / *count
1293  : null_val;
1294 }
1295 
1296 extern "C" NEVER_INLINE void linear_probabilistic_count(uint8_t* bitmap,
1297  const uint32_t bitmap_bytes,
1298  const uint8_t* key_bytes,
1299  const uint32_t key_len) {
1300  const uint32_t bit_pos = MurmurHash1(key_bytes, key_len, 0) % (bitmap_bytes * 8);
1301  const uint32_t word_idx = bit_pos / 32;
1302  const uint32_t bit_idx = bit_pos % 32;
1303  reinterpret_cast<uint32_t*>(bitmap)[word_idx] |= 1 << bit_idx;
1304 }
1305 
1306 extern "C" __attribute__((noinline)) void query_stub_hoisted_literals(
1307  const int8_t** col_buffers,
1308  const int8_t* literals,
1309  const int64_t* num_rows,
1310  const uint64_t* frag_row_offsets,
1311  const int32_t* max_matched,
1312  const int64_t* init_agg_value,
1313  int64_t** out,
1314  uint32_t frag_idx,
1315  const int64_t* join_hash_tables,
1316  int32_t* error_code,
1317  int32_t* total_matched) {
1318  assert(col_buffers || literals || num_rows || frag_row_offsets || max_matched ||
1319  init_agg_value || out || frag_idx || error_code || join_hash_tables ||
1320  total_matched);
1321 }
1322 
1323 extern "C" void multifrag_query_hoisted_literals(const int8_t*** col_buffers,
1324  const uint64_t* num_fragments,
1325  const int8_t* literals,
1326  const int64_t* num_rows,
1327  const uint64_t* frag_row_offsets,
1328  const int32_t* max_matched,
1329  int32_t* total_matched,
1330  const int64_t* init_agg_value,
1331  int64_t** out,
1332  int32_t* error_code,
1333  const uint32_t* num_tables_ptr,
1334  const int64_t* join_hash_tables) {
1335  for (uint32_t i = 0; i < *num_fragments; ++i) {
1336  query_stub_hoisted_literals(col_buffers ? col_buffers[i] : nullptr,
1337  literals,
1338  &num_rows[i * (*num_tables_ptr)],
1339  &frag_row_offsets[i * (*num_tables_ptr)],
1340  max_matched,
1341  init_agg_value,
1342  out,
1343  i,
1344  join_hash_tables,
1345  total_matched,
1346  error_code);
1347  }
1348 }
1349 
1350 extern "C" __attribute__((noinline)) void query_stub(const int8_t** col_buffers,
1351  const int64_t* num_rows,
1352  const uint64_t* frag_row_offsets,
1353  const int32_t* max_matched,
1354  const int64_t* init_agg_value,
1355  int64_t** out,
1356  uint32_t frag_idx,
1357  const int64_t* join_hash_tables,
1358  int32_t* error_code,
1359  int32_t* total_matched) {
1360  assert(col_buffers || num_rows || frag_row_offsets || max_matched || init_agg_value ||
1361  out || frag_idx || error_code || join_hash_tables || total_matched);
1362 }
1363 
1364 extern "C" void multifrag_query(const int8_t*** col_buffers,
1365  const uint64_t* num_fragments,
1366  const int64_t* num_rows,
1367  const uint64_t* frag_row_offsets,
1368  const int32_t* max_matched,
1369  int32_t* total_matched,
1370  const int64_t* init_agg_value,
1371  int64_t** out,
1372  int32_t* error_code,
1373  const uint32_t* num_tables_ptr,
1374  const int64_t* join_hash_tables) {
1375  for (uint32_t i = 0; i < *num_fragments; ++i) {
1376  query_stub(col_buffers ? col_buffers[i] : nullptr,
1377  &num_rows[i * (*num_tables_ptr)],
1378  &frag_row_offsets[i * (*num_tables_ptr)],
1379  max_matched,
1380  init_agg_value,
1381  out,
1382  i,
1383  join_hash_tables,
1384  total_matched,
1385  error_code);
1386  }
1387 }
1388 
1390  if (check_interrupt_init(static_cast<unsigned>(INT_CHECK))) {
1391  return true;
1392  }
1393  return false;
1394 }
1395 
1396 extern "C" bool check_interrupt_init(unsigned command) {
1397  static std::atomic_bool runtime_interrupt_flag{false};
1398 
1399  if (command == static_cast<unsigned>(INT_CHECK)) {
1400  if (runtime_interrupt_flag.load()) {
1401  return true;
1402  }
1403  return false;
1404  }
1405  if (command == static_cast<unsigned>(INT_ABORT)) {
1406  runtime_interrupt_flag.store(true);
1407  return false;
1408  }
1409  if (command == static_cast<unsigned>(INT_RESET)) {
1410  runtime_interrupt_flag.store(false);
1411  return false;
1412  }
1413  return false;
1414 }
ALWAYS_INLINE void agg_sum_float(int32_t *agg, const float val)
NEVER_INLINE DEVICE uint32_t MurmurHash1(const void *key, int len, const uint32_t seed)
Definition: MurmurHash.cpp:20
ALWAYS_INLINE int64_t agg_sum_skip_val(int64_t *agg, const int64_t val, const int64_t skip_val)
GPU_RT_STUB int32_t agg_sum_int32_shared(int32_t *agg, const int32_t val)
ALWAYS_INLINE int32_t get_matching_group_value_columnar_slot(int64_t *groups_buffer, const uint32_t entry_count, const uint32_t h, const T *key, const uint32_t key_count)
int64_t * src
#define DEF_UMINUS_NULLABLE(type, null_type)
GPU_RT_STUB void agg_sum_double_skip_val_shared(int64_t *agg, const double val, const double skip_val)
int8_t * extract_str_ptr_noinline(const uint64_t str_and_len)
const int8_t const int64_t const uint64_t const int32_t const int64_t int64_t uint32_t const int64_t * join_hash_tables
GPU_RT_STUB int32_t checked_single_agg_id_int32_shared(int32_t *agg, const int32_t val, const int32_t null_val)
const int64_t const uint32_t const uint32_t const uint32_t agg_col_count
GPU_RT_STUB void agg_sum_double_shared(int64_t *agg, const double val)
#define DEF_CHECKED_SINGLE_AGG_ID_INT(n)
GPU_RT_STUB void agg_sum_float_shared(int32_t *agg, const float val)
ALWAYS_INLINE uint32_t agg_count_float(uint32_t *agg, const float val)
#define EMPTY_KEY_64
GPU_RT_STUB void write_back_non_grouped_agg(int64_t *input_buffer, int64_t *output_buffer, const int32_t num_agg_cols)
GPU_RT_STUB void agg_min_int16_skip_val_shared(int16_t *agg, const int16_t val, const int16_t skip_val)
#define GPU_RT_STUB
const int8_t const int64_t * num_rows
void agg_min_int32(int32_t *agg, const int32_t val)
GPU_RT_STUB int32_t checked_single_agg_id_shared(int64_t *agg, const int64_t val, const int64_t null_val)
ALWAYS_INLINE int64_t row_number_window_func(const int64_t output_buff, const int64_t pos)
#define DEF_CAST_NULLABLE_BIDIR(type1, type2)
ALWAYS_INLINE double load_avg_float(const int32_t *agg, const int32_t *count, const double null_val)
ALWAYS_INLINE void agg_max_float(int32_t *agg, const float val)
ALWAYS_INLINE int64_t * get_group_value_fast_keyless_semiprivate(int64_t *groups_buffer, const int64_t key, const int64_t min_key, const int64_t, const uint32_t row_size_quad, const uint8_t thread_warp_idx, const uint8_t warp_size)
ALWAYS_INLINE int32_t agg_sum_int32_skip_val(int32_t *agg, const int32_t val, const int32_t skip_val)
const int64_t const uint32_t const uint32_t const uint32_t const bool const int8_t warp_size
ALWAYS_INLINE uint64_t agg_count(uint64_t *agg, const int64_t)
FORCE_INLINE uint8_t get_rank(uint64_t x, uint32_t b)
GPU_RT_STUB void sync_warp_protected(int64_t thread_pos, int64_t row_count)
ALWAYS_INLINE int64_t scale_decimal_down_not_nullable(const int64_t operand, const int64_t scale, const int64_t null_val)
ALWAYS_INLINE double load_avg_double(const int64_t *agg, const int64_t *count, const double null_val)
#define DEF_CAST_NULLABLE(from_type, to_type)
ALWAYS_INLINE int32_t checked_single_agg_id_double(int64_t *agg, const double val, const double null_val)
ALWAYS_INLINE double load_double(const int64_t *agg)
#define DEF_ARITH_NULLABLE_RHS(type, null_type, opname, opsym)
ALWAYS_INLINE void agg_count_distinct_bitmap_skip_val(int64_t *agg, const int64_t val, const int64_t min_val, const int64_t skip_val)
ALWAYS_INLINE int64_t scale_decimal_down_nullable(const int64_t operand, const int64_t scale, const int64_t null_val)
GPU_RT_STUB int64_t get_block_index()
#define DEF_AGG_MAX_INT(n)
void multifrag_query(const int8_t ***col_buffers, const uint64_t *num_fragments, const int64_t *num_rows, const uint64_t *frag_row_offsets, const int32_t *max_matched, int32_t *total_matched, const int64_t *init_agg_value, int64_t **out, int32_t *error_code, const uint32_t *num_tables_ptr, const int64_t *join_hash_tables)
GPU_RT_STUB void agg_max_int8_skip_val_shared(int8_t *agg, const int8_t val, const int8_t skip_val)
ALWAYS_INLINE int32_t extract_str_len(const uint64_t str_and_len)
int64_t const int32_t sz
GPU_RT_STUB void agg_count_distinct_bitmap_gpu(int64_t *, const int64_t, const int64_t, const int64_t, const int64_t, const uint64_t, const uint64_t)
ALWAYS_INLINE int32_t checked_single_agg_id(int64_t *agg, const int64_t val, const int64_t null_val)
const int32_t groups_buffer_size
ALWAYS_INLINE void set_matching_group_value_perfect_hash_columnar(int64_t *groups_buffer, const uint32_t hashed_index, const int64_t *key, const uint32_t key_count, const uint32_t entry_count)
void agg_max_int16(int16_t *agg, const int16_t val)
ALWAYS_INLINE int64_t floor_div_nullable_lhs(const int64_t dividend, const int64_t divisor, const int64_t null_val)
void agg_min_int8(int8_t *agg, const int8_t val)
ALWAYS_INLINE int64_t * get_group_value_fast_keyless(int64_t *groups_buffer, const int64_t key, const int64_t min_key, const int64_t, const uint32_t row_size_quad)
const int64_t const uint32_t groups_buffer_entry_count
GPU_RT_STUB void agg_sum_float_skip_val_shared(int32_t *agg, const float val, const float skip_val)
const int8_t const int64_t const uint64_t const int32_t const int64_t int64_t uint32_t const int64_t int32_t int32_t * total_matched
ALWAYS_INLINE uint32_t agg_count_int32(uint32_t *agg, const int32_t)
const int64_t const uint32_t const uint32_t key_qw_count
ALWAYS_INLINE void agg_id_double(int64_t *agg, const double val)
ALWAYS_INLINE uint64_t string_pack(const int8_t *ptr, const int32_t len)
ALWAYS_INLINE void agg_sum_double(int64_t *agg, const double val)
#define DEVICE
ALWAYS_INLINE int8_t * extract_str_ptr(const uint64_t str_and_len)
#define DEF_WRITE_PROJECTION_INT(n)
ALWAYS_INLINE void agg_id_float(int32_t *agg, const float val)
ALWAYS_INLINE uint32_t agg_count_float_skip_val(uint32_t *agg, const float val, const float skip_val)
GPU_RT_STUB int32_t checked_single_agg_id_int8_shared(int8_t *agg, const int8_t val, const int8_t null_val)
NEVER_INLINE DEVICE uint64_t MurmurHash64A(const void *key, int len, uint64_t seed)
Definition: MurmurHash.cpp:26
ALWAYS_INLINE uint32_t agg_count_int32_skip_val(uint32_t *agg, const int32_t val, const int32_t skip_val)
ALWAYS_INLINE void agg_min_double(int64_t *agg, const double val)
ALWAYS_INLINE int32_t agg_sum_int32(int32_t *agg, const int32_t val)
ALWAYS_INLINE DEVICE int32_t char_length(const char *str, const int32_t str_len)
#define DEF_SHARED_AGG_RET_STUBS(base_agg_func)
ALWAYS_INLINE int64_t * get_matching_group_value_perfect_hash_keyless(int64_t *groups_buffer, const uint32_t hashed_index, const uint32_t row_size_quad)
int32_t extract_str_len_noinline(const uint64_t str_and_len)
void agg_min_int16(int16_t *agg, const int16_t val)
GPU_RT_STUB int64_t agg_sum_shared(int64_t *agg, const int64_t val)
const int8_t const int64_t const uint64_t const int32_t const int64_t int64_t uint32_t const int64_t int32_t * error_code
#define DEF_ARITH_NULLABLE_LHS(type, null_type, opname, opsym)
ALWAYS_INLINE int64_t floor_div_lhs(const int64_t dividend, const int64_t divisor)
#define DEF_AGG_MIN_INT(n)
ALWAYS_INLINE void agg_max_double(int64_t *agg, const double val)
GPU_RT_STUB void agg_min_int8_skip_val_shared(int8_t *agg, const int8_t val, const int8_t skip_val)
ALWAYS_INLINE uint64_t agg_count_double(uint64_t *agg, const double val)
void multifrag_query_hoisted_literals(const int8_t ***col_buffers, const uint64_t *num_fragments, const int8_t *literals, const int64_t *num_rows, const uint64_t *frag_row_offsets, const int32_t *max_matched, int32_t *total_matched, const int64_t *init_agg_value, int64_t **out, int32_t *error_code, const uint32_t *num_tables_ptr, const int64_t *join_hash_tables)
GPU_RT_STUB int32_t checked_single_agg_id_float_shared(int32_t *agg, const float val, const float null_val)
GPU_RT_STUB int64_t agg_sum_skip_val_shared(int64_t *agg, const int64_t val, const int64_t skip_val)
GPU_RT_STUB int32_t agg_sum_int32_skip_val_shared(int32_t *agg, const int32_t val, const int32_t skip_val)
const int8_t const int64_t const uint64_t const int32_t * max_matched
ALWAYS_INLINE double load_avg_decimal(const int64_t *sum, const int64_t *count, const double null_val, const uint32_t scale)
ALWAYS_INLINE int32_t checked_single_agg_id_float(int32_t *agg, const float val, const float null_val)
ALWAYS_INLINE int64_t scale_decimal_up(const int64_t operand, const uint64_t scale, const int64_t operand_null_val, const int64_t result_null_val)
ALWAYS_INLINE int64_t agg_sum(int64_t *agg, const int64_t val)
ALWAYS_INLINE int64_t * get_matching_group_value(int64_t *groups_buffer, const uint32_t h, const T *key, const uint32_t key_count, const uint32_t row_size_quad)
ALWAYS_INLINE void agg_min(int64_t *agg, const int64_t val)
GPU_RT_STUB int32_t checked_single_agg_id_double_shared(int64_t *agg, const double val, const double null_val)
GPU_RT_STUB void agg_approximate_count_distinct_gpu(int64_t *, const int64_t, const uint32_t, const int64_t, const int64_t)
NEVER_INLINE void linear_probabilistic_count(uint8_t *bitmap, const uint32_t bitmap_bytes, const uint8_t *key_bytes, const uint32_t key_len)
ALWAYS_INLINE int64_t decimal_floor(const int64_t x, const int64_t scale)
GPU_RT_STUB void agg_count_distinct_bitmap_skip_val_gpu(int64_t *, const int64_t, const int64_t, const int64_t, const int64_t, const int64_t, const uint64_t, const uint64_t)
int64_t * init_shared_mem(const int64_t *global_groups_buffer, const int32_t groups_buffer_size)
ALWAYS_INLINE int64_t * get_matching_group_value_perfect_hash(int64_t *groups_buffer, const uint32_t hashed_index, const int64_t *key, const uint32_t key_count, const uint32_t row_size_quad)
GPU_RT_STUB void force_sync()
ALWAYS_INLINE DEVICE int32_t key_for_string_encoded(const int32_t str_id)
const int64_t const uint32_t const uint32_t const uint32_t const bool const bool const int32_t frag_idx
GPU_RT_STUB void agg_id_double_shared_slow(int64_t *agg, const double *val)
#define DEF_SKIP_AGG(base_agg_func)
const int8_t const int64_t const uint64_t const int32_t const int64_t * init_agg_value
void agg_max_int32(int32_t *agg, const int32_t val)
ALWAYS_INLINE int8_t logical_or(const int8_t lhs, const int8_t rhs, const int8_t null_val)
GPU_RT_STUB int32_t checked_single_agg_id_int16_shared(int16_t *agg, const int16_t val, const int16_t null_val)
GPU_RT_STUB int64_t get_thread_index()
ALWAYS_INLINE DEVICE bool check_interrupt()
#define DEF_BINARY_NULLABLE_ALL_OPS(type, null_type)
bool check_interrupt_init(unsigned command)
#define NEVER_INLINE
GPU_RT_STUB int8_t thread_warp_idx(const int8_t warp_sz)
ALWAYS_INLINE void agg_max(int64_t *agg, const int64_t val)
const int8_t * literals
const int64_t const uint32_t const uint32_t const uint32_t const bool const bool blocks_share_memory
GPU_RT_STUB void sync_warp()
#define DEF_ARITH_NULLABLE(type, null_type, opname, opsym)
ALWAYS_INLINE float load_float(const int32_t *agg)
ALWAYS_INLINE int32_t record_error_code(const int32_t err_code, int32_t *error_codes)
const int8_t const int64_t const uint64_t * frag_row_offsets
ALWAYS_INLINE uint64_t agg_count_skip_val(uint64_t *agg, const int64_t val, const int64_t skip_val)
ALWAYS_INLINE double load_avg_int(const int64_t *sum, const int64_t *count, const double null_val)
ALWAYS_INLINE DEVICE int32_t char_length_nullable(const char *str, const int32_t str_len, const int32_t int_null)
ALWAYS_INLINE DEVICE bool sample_ratio(const double proportion, const int64_t row_offset)
const int8_t const int64_t const uint64_t const int32_t const int64_t int64_t ** out
void agg_max_int8(int8_t *agg, const int8_t val)
__attribute__((noinline)) int32_t pos_start_impl(int32_t *error_code)
const int64_t * init_vals
ALWAYS_INLINE int64_t * get_matching_group_value_columnar(int64_t *groups_buffer, const uint32_t h, const int64_t *key, const uint32_t key_qw_count, const size_t entry_count)
#define DEF_SHARED_AGG_STUBS(base_agg_func)
ALWAYS_INLINE int8_t bit_is_set(const int64_t bitset, const int64_t val, const int64_t min_val, const int64_t max_val, const int64_t null_val, const int8_t null_bool_val)
GPU_RT_STUB void sync_threadblock()
#define ALWAYS_INLINE
#define DEF_AGG_ID_INT(n)
NEVER_INLINE void agg_approximate_count_distinct(int64_t *agg, const int64_t key, const uint32_t b)
GPU_RT_STUB int64_t * declare_dynamic_shared_memory()
ALWAYS_INLINE uint64_t agg_count_double_skip_val(uint64_t *agg, const double val, const double skip_val)
ALWAYS_INLINE void agg_id(int64_t *agg, const int64_t val)
ALWAYS_INLINE int64_t decimal_ceil(const int64_t x, const int64_t scale)
GPU_RT_STUB void agg_max_int16_skip_val_shared(int16_t *agg, const int16_t val, const int16_t skip_val)
ALWAYS_INLINE void agg_count_distinct_bitmap(int64_t *agg, const int64_t val, const int64_t min_val)
const int64_t const uint32_t const uint32_t const uint32_t const bool keyless
FORCE_INLINE HOST DEVICE T align_to_int64(T addr)
ALWAYS_INLINE int8_t logical_not(const int8_t operand, const int8_t null_val)
ALWAYS_INLINE int8_t logical_and(const int8_t lhs, const int8_t rhs, const int8_t null_val)
ALWAYS_INLINE void agg_min_float(int32_t *agg, const float val)
ALWAYS_INLINE double percent_window_func(const int64_t output_buff, const int64_t pos)