OmniSciDB  eb3a3d0a03
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
RuntimeFunctions.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2021 OmniSci, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifdef __CUDACC__
18 #error This code is not intended to be compiled with a CUDA C++ compiler
19 #endif // __CUDACC__
20 
21 #include "RuntimeFunctions.h"
22 #include "../Shared/funcannotations.h"
23 #include "BufferCompaction.h"
24 #include "HyperLogLogRank.h"
25 #include "MurmurHash.h"
26 #include "Shared/quantile.h"
27 #include "TypePunning.h"
28 
29 #include <algorithm>
30 #include <atomic>
31 #include <chrono>
32 #include <cmath>
33 #include <cstdio>
34 #include <cstring>
35 #include <thread>
36 #include <tuple>
37 
38 // decoder implementations
39 
40 #include "DecodersImpl.h"
41 
42 // arithmetic operator implementations
43 
44 #define DEF_ARITH_NULLABLE(type, null_type, opname, opsym) \
45  extern "C" ALWAYS_INLINE type opname##_##type##_nullable( \
46  const type lhs, const type rhs, const null_type null_val) { \
47  if (lhs != null_val && rhs != null_val) { \
48  return lhs opsym rhs; \
49  } \
50  return null_val; \
51  }
52 
53 #define DEF_ARITH_NULLABLE_LHS(type, null_type, opname, opsym) \
54  extern "C" ALWAYS_INLINE type opname##_##type##_nullable_lhs( \
55  const type lhs, const type rhs, const null_type null_val) { \
56  if (lhs != null_val) { \
57  return lhs opsym rhs; \
58  } \
59  return null_val; \
60  }
61 
62 #define DEF_ARITH_NULLABLE_RHS(type, null_type, opname, opsym) \
63  extern "C" ALWAYS_INLINE type opname##_##type##_nullable_rhs( \
64  const type lhs, const type rhs, const null_type null_val) { \
65  if (rhs != null_val) { \
66  return lhs opsym rhs; \
67  } \
68  return null_val; \
69  }
70 
71 #define DEF_CMP_NULLABLE(type, null_type, opname, opsym) \
72  extern "C" ALWAYS_INLINE int8_t opname##_##type##_nullable( \
73  const type lhs, \
74  const type rhs, \
75  const null_type null_val, \
76  const int8_t null_bool_val) { \
77  if (lhs != null_val && rhs != null_val) { \
78  return lhs opsym rhs; \
79  } \
80  return null_bool_val; \
81  }
82 
83 #define DEF_CMP_NULLABLE_LHS(type, null_type, opname, opsym) \
84  extern "C" ALWAYS_INLINE int8_t opname##_##type##_nullable_lhs( \
85  const type lhs, \
86  const type rhs, \
87  const null_type null_val, \
88  const int8_t null_bool_val) { \
89  if (lhs != null_val) { \
90  return lhs opsym rhs; \
91  } \
92  return null_bool_val; \
93  }
94 
95 #define DEF_CMP_NULLABLE_RHS(type, null_type, opname, opsym) \
96  extern "C" ALWAYS_INLINE int8_t opname##_##type##_nullable_rhs( \
97  const type lhs, \
98  const type rhs, \
99  const null_type null_val, \
100  const int8_t null_bool_val) { \
101  if (rhs != null_val) { \
102  return lhs opsym rhs; \
103  } \
104  return null_bool_val; \
105  }
106 
107 #define DEF_SAFE_DIV_NULLABLE(type, null_type, opname) \
108  extern "C" ALWAYS_INLINE type safe_div_##type( \
109  const type lhs, const type rhs, const null_type null_val) { \
110  if (lhs != null_val && rhs != null_val && rhs != 0) { \
111  return lhs / rhs; \
112  } \
113  return null_val; \
114  }
115 
116 #define DEF_BINARY_NULLABLE_ALL_OPS(type, null_type) \
117  DEF_ARITH_NULLABLE(type, null_type, add, +) \
118  DEF_ARITH_NULLABLE(type, null_type, sub, -) \
119  DEF_ARITH_NULLABLE(type, null_type, mul, *) \
120  DEF_ARITH_NULLABLE(type, null_type, div, /) \
121  DEF_SAFE_DIV_NULLABLE(type, null_type, safe_div) \
122  DEF_ARITH_NULLABLE_LHS(type, null_type, add, +) \
123  DEF_ARITH_NULLABLE_LHS(type, null_type, sub, -) \
124  DEF_ARITH_NULLABLE_LHS(type, null_type, mul, *) \
125  DEF_ARITH_NULLABLE_LHS(type, null_type, div, /) \
126  DEF_ARITH_NULLABLE_RHS(type, null_type, add, +) \
127  DEF_ARITH_NULLABLE_RHS(type, null_type, sub, -) \
128  DEF_ARITH_NULLABLE_RHS(type, null_type, mul, *) \
129  DEF_ARITH_NULLABLE_RHS(type, null_type, div, /) \
130  DEF_CMP_NULLABLE(type, null_type, eq, ==) \
131  DEF_CMP_NULLABLE(type, null_type, ne, !=) \
132  DEF_CMP_NULLABLE(type, null_type, lt, <) \
133  DEF_CMP_NULLABLE(type, null_type, gt, >) \
134  DEF_CMP_NULLABLE(type, null_type, le, <=) \
135  DEF_CMP_NULLABLE(type, null_type, ge, >=) \
136  DEF_CMP_NULLABLE_LHS(type, null_type, eq, ==) \
137  DEF_CMP_NULLABLE_LHS(type, null_type, ne, !=) \
138  DEF_CMP_NULLABLE_LHS(type, null_type, lt, <) \
139  DEF_CMP_NULLABLE_LHS(type, null_type, gt, >) \
140  DEF_CMP_NULLABLE_LHS(type, null_type, le, <=) \
141  DEF_CMP_NULLABLE_LHS(type, null_type, ge, >=) \
142  DEF_CMP_NULLABLE_RHS(type, null_type, eq, ==) \
143  DEF_CMP_NULLABLE_RHS(type, null_type, ne, !=) \
144  DEF_CMP_NULLABLE_RHS(type, null_type, lt, <) \
145  DEF_CMP_NULLABLE_RHS(type, null_type, gt, >) \
146  DEF_CMP_NULLABLE_RHS(type, null_type, le, <=) \
147  DEF_CMP_NULLABLE_RHS(type, null_type, ge, >=)
148 
149 DEF_BINARY_NULLABLE_ALL_OPS(int8_t, int64_t)
150 DEF_BINARY_NULLABLE_ALL_OPS(int16_t, int64_t)
151 DEF_BINARY_NULLABLE_ALL_OPS(int32_t, int64_t)
152 DEF_BINARY_NULLABLE_ALL_OPS(int64_t, int64_t)
153 DEF_BINARY_NULLABLE_ALL_OPS(float, float)
154 DEF_BINARY_NULLABLE_ALL_OPS(double, double)
155 DEF_ARITH_NULLABLE(int8_t, int64_t, mod, %)
156 DEF_ARITH_NULLABLE(int16_t, int64_t, mod, %)
157 DEF_ARITH_NULLABLE(int32_t, int64_t, mod, %)
158 DEF_ARITH_NULLABLE(int64_t, int64_t, mod, %)
159 DEF_ARITH_NULLABLE_LHS(int8_t, int64_t, mod, %)
160 DEF_ARITH_NULLABLE_LHS(int16_t, int64_t, mod, %)
161 DEF_ARITH_NULLABLE_LHS(int32_t, int64_t, mod, %)
162 DEF_ARITH_NULLABLE_LHS(int64_t, int64_t, mod, %)
163 DEF_ARITH_NULLABLE_RHS(int8_t, int64_t, mod, %)
164 DEF_ARITH_NULLABLE_RHS(int16_t, int64_t, mod, %)
165 DEF_ARITH_NULLABLE_RHS(int32_t, int64_t, mod, %)
166 DEF_ARITH_NULLABLE_RHS(int64_t, int64_t, mod, %)
167 
168 #undef DEF_BINARY_NULLABLE_ALL_OPS
169 #undef DEF_SAFE_DIV_NULLABLE
170 #undef DEF_CMP_NULLABLE_RHS
171 #undef DEF_CMP_NULLABLE_LHS
172 #undef DEF_CMP_NULLABLE
173 #undef DEF_ARITH_NULLABLE_RHS
174 #undef DEF_ARITH_NULLABLE_LHS
175 #undef DEF_ARITH_NULLABLE
176 
177 extern "C" ALWAYS_INLINE int64_t scale_decimal_up(const int64_t operand,
178  const uint64_t scale,
179  const int64_t operand_null_val,
180  const int64_t result_null_val) {
181  return operand != operand_null_val ? operand * scale : result_null_val;
182 }
183 
184 extern "C" ALWAYS_INLINE int64_t scale_decimal_down_nullable(const int64_t operand,
185  const int64_t scale,
186  const int64_t null_val) {
187  // rounded scale down of a decimal
188  if (operand == null_val) {
189  return null_val;
190  }
191 
192  int64_t tmp = scale >> 1;
193  tmp = operand >= 0 ? operand + tmp : operand - tmp;
194  return tmp / scale;
195 }
196 
197 extern "C" ALWAYS_INLINE int64_t scale_decimal_down_not_nullable(const int64_t operand,
198  const int64_t scale,
199  const int64_t null_val) {
200  int64_t tmp = scale >> 1;
201  tmp = operand >= 0 ? operand + tmp : operand - tmp;
202  return tmp / scale;
203 }
204 
205 // Return floor(dividend / divisor).
206 // Assumes 0 < divisor.
207 extern "C" ALWAYS_INLINE int64_t floor_div_lhs(const int64_t dividend,
208  const int64_t divisor) {
209  return (dividend < 0 ? dividend - (divisor - 1) : dividend) / divisor;
210 }
211 
212 // Return floor(dividend / divisor) or NULL if dividend IS NULL.
213 // Assumes 0 < divisor.
214 extern "C" ALWAYS_INLINE int64_t floor_div_nullable_lhs(const int64_t dividend,
215  const int64_t divisor,
216  const int64_t null_val) {
217  return dividend == null_val ? null_val : floor_div_lhs(dividend, divisor);
218 }
219 
220 #define DEF_UMINUS_NULLABLE(type, null_type) \
221  extern "C" ALWAYS_INLINE type uminus_##type##_nullable(const type operand, \
222  const null_type null_val) { \
223  return operand == null_val ? null_val : -operand; \
224  }
225 
226 DEF_UMINUS_NULLABLE(int8_t, int8_t)
227 DEF_UMINUS_NULLABLE(int16_t, int16_t)
228 DEF_UMINUS_NULLABLE(int32_t, int32_t)
229 DEF_UMINUS_NULLABLE(int64_t, int64_t)
230 DEF_UMINUS_NULLABLE(float, float)
231 DEF_UMINUS_NULLABLE(double, double)
232 
233 #undef DEF_UMINUS_NULLABLE
234 
235 #define DEF_CAST_NULLABLE(from_type, to_type) \
236  extern "C" ALWAYS_INLINE to_type cast_##from_type##_to_##to_type##_nullable( \
237  const from_type operand, \
238  const from_type from_null_val, \
239  const to_type to_null_val) { \
240  return operand == from_null_val ? to_null_val : operand; \
241  }
242 
243 #define DEF_CAST_SCALED_NULLABLE(from_type, to_type) \
244  extern "C" ALWAYS_INLINE to_type cast_##from_type##_to_##to_type##_scaled_nullable( \
245  const from_type operand, \
246  const from_type from_null_val, \
247  const to_type to_null_val, \
248  const to_type multiplier) { \
249  return operand == from_null_val ? to_null_val : multiplier * operand; \
250  }
251 
252 #define DEF_CAST_NULLABLE_BIDIR(type1, type2) \
253  DEF_CAST_NULLABLE(type1, type2) \
254  DEF_CAST_NULLABLE(type2, type1)
255 
256 DEF_CAST_NULLABLE_BIDIR(int8_t, int16_t)
257 DEF_CAST_NULLABLE_BIDIR(int8_t, int32_t)
258 DEF_CAST_NULLABLE_BIDIR(int8_t, int64_t)
259 DEF_CAST_NULLABLE_BIDIR(int16_t, int32_t)
260 DEF_CAST_NULLABLE_BIDIR(int16_t, int64_t)
261 DEF_CAST_NULLABLE_BIDIR(int32_t, int64_t)
262 DEF_CAST_NULLABLE_BIDIR(float, double)
263 DEF_CAST_NULLABLE_BIDIR(float, int8_t)
264 DEF_CAST_NULLABLE_BIDIR(float, int16_t)
265 DEF_CAST_NULLABLE_BIDIR(float, int32_t)
266 DEF_CAST_NULLABLE_BIDIR(float, int64_t)
267 DEF_CAST_NULLABLE_BIDIR(double, int8_t)
268 DEF_CAST_NULLABLE_BIDIR(double, int16_t)
269 DEF_CAST_NULLABLE_BIDIR(double, int32_t)
270 DEF_CAST_NULLABLE_BIDIR(double, int64_t)
271 DEF_CAST_NULLABLE(uint8_t, int32_t)
272 DEF_CAST_NULLABLE(uint16_t, int32_t)
273 DEF_CAST_SCALED_NULLABLE(int64_t, float)
274 DEF_CAST_SCALED_NULLABLE(int64_t, double)
275 
276 #undef DEF_CAST_NULLABLE_BIDIR
277 #undef DEF_CAST_SCALED_NULLABLE
278 #undef DEF_CAST_NULLABLE
279 
280 extern "C" ALWAYS_INLINE int8_t logical_not(const int8_t operand, const int8_t null_val) {
281  return operand == null_val ? operand : (operand ? 0 : 1);
282 }
283 
284 extern "C" ALWAYS_INLINE int8_t logical_and(const int8_t lhs,
285  const int8_t rhs,
286  const int8_t null_val) {
287  if (lhs == null_val) {
288  return rhs == 0 ? rhs : null_val;
289  }
290  if (rhs == null_val) {
291  return lhs == 0 ? lhs : null_val;
292  }
293  return (lhs && rhs) ? 1 : 0;
294 }
295 
296 extern "C" ALWAYS_INLINE int8_t logical_or(const int8_t lhs,
297  const int8_t rhs,
298  const int8_t null_val) {
299  if (lhs == null_val) {
300  return rhs == 0 ? null_val : rhs;
301  }
302  if (rhs == null_val) {
303  return lhs == 0 ? null_val : lhs;
304  }
305  return (lhs || rhs) ? 1 : 0;
306 }
307 
308 // aggregator implementations
309 
310 extern "C" ALWAYS_INLINE uint64_t agg_count(uint64_t* agg, const int64_t) {
311  return (*agg)++;
312 }
313 
314 extern "C" ALWAYS_INLINE void agg_count_distinct_bitmap(int64_t* agg,
315  const int64_t val,
316  const int64_t min_val) {
317  const uint64_t bitmap_idx = val - min_val;
318  reinterpret_cast<int8_t*>(*agg)[bitmap_idx >> 3] |= (1 << (bitmap_idx & 7));
319 }
320 
321 #ifdef _MSC_VER
322 #define GPU_RT_STUB NEVER_INLINE
323 #else
324 #define GPU_RT_STUB NEVER_INLINE __attribute__((optnone))
325 #endif
326 
328  const int64_t,
329  const int64_t,
330  const int64_t,
331  const int64_t,
332  const uint64_t,
333  const uint64_t) {}
334 
335 extern "C" NEVER_INLINE void agg_approximate_count_distinct(int64_t* agg,
336  const int64_t key,
337  const uint32_t b) {
338  const uint64_t hash = MurmurHash64A(&key, sizeof(key), 0);
339  const uint32_t index = hash >> (64 - b);
340  const uint8_t rank = get_rank(hash << b, 64 - b);
341  uint8_t* M = reinterpret_cast<uint8_t*>(*agg);
342  M[index] = std::max(M[index], rank);
343 }
344 
346  const int64_t,
347  const uint32_t,
348  const int64_t,
349  const int64_t) {}
350 
351 extern "C" ALWAYS_INLINE int8_t bit_is_set(const int64_t bitset,
352  const int64_t val,
353  const int64_t min_val,
354  const int64_t max_val,
355  const int64_t null_val,
356  const int8_t null_bool_val) {
357  if (val == null_val) {
358  return null_bool_val;
359  }
360  if (val < min_val || val > max_val) {
361  return 0;
362  }
363  if (!bitset) {
364  return 0;
365  }
366  const uint64_t bitmap_idx = val - min_val;
367  return (reinterpret_cast<const int8_t*>(bitset))[bitmap_idx >> 3] &
368  (1 << (bitmap_idx & 7))
369  ? 1
370  : 0;
371 }
372 
373 extern "C" ALWAYS_INLINE int64_t agg_sum(int64_t* agg, const int64_t val) {
374  const auto old = *agg;
375  *agg += val;
376  return old;
377 }
378 
379 extern "C" ALWAYS_INLINE void agg_max(int64_t* agg, const int64_t val) {
380  *agg = std::max(*agg, val);
381 }
382 
383 extern "C" ALWAYS_INLINE void agg_min(int64_t* agg, const int64_t val) {
384  *agg = std::min(*agg, val);
385 }
386 
387 extern "C" ALWAYS_INLINE void agg_id(int64_t* agg, const int64_t val) {
388  *agg = val;
389 }
390 
391 extern "C" ALWAYS_INLINE int8_t* agg_id_varlen(int8_t* varlen_buffer,
392  const int64_t offset,
393  const int8_t* value,
394  const int64_t size_bytes) {
395  for (auto i = 0; i < size_bytes; i++) {
396  varlen_buffer[offset + i] = value[i];
397  }
398  return &varlen_buffer[offset];
399 }
400 
401 extern "C" ALWAYS_INLINE int32_t checked_single_agg_id(int64_t* agg,
402  const int64_t val,
403  const int64_t null_val) {
404  if (val == null_val) {
405  return 0;
406  }
407 
408  if (*agg == val) {
409  return 0;
410  } else if (*agg == null_val) {
411  *agg = val;
412  return 0;
413  } else {
414  // see Execute::ERR_SINGLE_VALUE_FOUND_MULTIPLE_VALUES
415  return 15;
416  }
417 }
418 
420  const int64_t val,
421  const int64_t min_val,
422  const int64_t skip_val) {
423  if (val != skip_val) {
424  agg_count_distinct_bitmap(agg, val, min_val);
425  }
426 }
427 
429  const int64_t,
430  const int64_t,
431  const int64_t,
432  const int64_t,
433  const int64_t,
434  const uint64_t,
435  const uint64_t) {}
436 
437 extern "C" ALWAYS_INLINE uint32_t agg_count_int32(uint32_t* agg, const int32_t) {
438  return (*agg)++;
439 }
440 
441 extern "C" ALWAYS_INLINE int32_t agg_sum_int32(int32_t* agg, const int32_t val) {
442  const auto old = *agg;
443  *agg += val;
444  return old;
445 }
446 
447 #define DEF_AGG_MAX_INT(n) \
448  extern "C" ALWAYS_INLINE void agg_max_int##n(int##n##_t* agg, const int##n##_t val) { \
449  *agg = std::max(*agg, val); \
450  }
451 
452 DEF_AGG_MAX_INT(32)
453 DEF_AGG_MAX_INT(16)
455 #undef DEF_AGG_MAX_INT
456 
457 #define DEF_AGG_MIN_INT(n) \
458  extern "C" ALWAYS_INLINE void agg_min_int##n(int##n##_t* agg, const int##n##_t val) { \
459  *agg = std::min(*agg, val); \
460  }
461 
462 DEF_AGG_MIN_INT(32)
463 DEF_AGG_MIN_INT(16)
465 #undef DEF_AGG_MIN_INT
466 
467 #define DEF_AGG_ID_INT(n) \
468  extern "C" ALWAYS_INLINE void agg_id_int##n(int##n##_t* agg, const int##n##_t val) { \
469  *agg = val; \
470  }
471 
472 #define DEF_CHECKED_SINGLE_AGG_ID_INT(n) \
473  extern "C" ALWAYS_INLINE int32_t checked_single_agg_id_int##n( \
474  int##n##_t* agg, const int##n##_t val, const int##n##_t null_val) { \
475  if (val == null_val) { \
476  return 0; \
477  } \
478  if (*agg == val) { \
479  return 0; \
480  } else if (*agg == null_val) { \
481  *agg = val; \
482  return 0; \
483  } else { \
484  /* see Execute::ERR_SINGLE_VALUE_FOUND_MULTIPLE_VALUES*/ \
485  return 15; \
486  } \
487  }
488 
489 DEF_AGG_ID_INT(32)
490 DEF_AGG_ID_INT(16)
492 
496 
497 #undef DEF_AGG_ID_INT
498 #undef DEF_CHECKED_SINGLE_AGG_ID_INT
499 
500 #define DEF_WRITE_PROJECTION_INT(n) \
501  extern "C" ALWAYS_INLINE void write_projection_int##n( \
502  int8_t* slot_ptr, const int##n##_t val, const int64_t init_val) { \
503  if (val != init_val) { \
504  *reinterpret_cast<int##n##_t*>(slot_ptr) = val; \
505  } \
506  }
507 
510 #undef DEF_WRITE_PROJECTION_INT
511 
512 extern "C" ALWAYS_INLINE int64_t agg_sum_skip_val(int64_t* agg,
513  const int64_t val,
514  const int64_t skip_val) {
515  const auto old = *agg;
516  if (val != skip_val) {
517  if (old != skip_val) {
518  return agg_sum(agg, val);
519  } else {
520  *agg = val;
521  }
522  }
523  return old;
524 }
525 
526 extern "C" ALWAYS_INLINE int32_t agg_sum_int32_skip_val(int32_t* agg,
527  const int32_t val,
528  const int32_t skip_val) {
529  const auto old = *agg;
530  if (val != skip_val) {
531  if (old != skip_val) {
532  return agg_sum_int32(agg, val);
533  } else {
534  *agg = val;
535  }
536  }
537  return old;
538 }
539 
540 extern "C" ALWAYS_INLINE uint64_t agg_count_skip_val(uint64_t* agg,
541  const int64_t val,
542  const int64_t skip_val) {
543  if (val != skip_val) {
544  return agg_count(agg, val);
545  }
546  return *agg;
547 }
548 
549 extern "C" ALWAYS_INLINE uint32_t agg_count_int32_skip_val(uint32_t* agg,
550  const int32_t val,
551  const int32_t skip_val) {
552  if (val != skip_val) {
553  return agg_count_int32(agg, val);
554  }
555  return *agg;
556 }
557 
558 #define DEF_SKIP_AGG_ADD(base_agg_func) \
559  extern "C" ALWAYS_INLINE void base_agg_func##_skip_val( \
560  DATA_T* agg, const DATA_T val, const DATA_T skip_val) { \
561  if (val != skip_val) { \
562  base_agg_func(agg, val); \
563  } \
564  }
565 
566 #define DEF_SKIP_AGG(base_agg_func) \
567  extern "C" ALWAYS_INLINE void base_agg_func##_skip_val( \
568  DATA_T* agg, const DATA_T val, const DATA_T skip_val) { \
569  if (val != skip_val) { \
570  const DATA_T old_agg = *agg; \
571  if (old_agg != skip_val) { \
572  base_agg_func(agg, val); \
573  } else { \
574  *agg = val; \
575  } \
576  } \
577  }
578 
579 #define DATA_T int64_t
582 #undef DATA_T
583 
584 #define DATA_T int32_t
587 #undef DATA_T
588 
589 #define DATA_T int16_t
592 #undef DATA_T
593 
594 #define DATA_T int8_t
597 #undef DATA_T
598 
599 #undef DEF_SKIP_AGG_ADD
600 #undef DEF_SKIP_AGG
601 
602 // TODO(alex): fix signature
603 
604 extern "C" ALWAYS_INLINE uint64_t agg_count_double(uint64_t* agg, const double val) {
605  return (*agg)++;
606 }
607 
608 extern "C" ALWAYS_INLINE void agg_sum_double(int64_t* agg, const double val) {
609  const auto r = *reinterpret_cast<const double*>(agg) + val;
610  *agg = *reinterpret_cast<const int64_t*>(may_alias_ptr(&r));
611 }
612 
613 extern "C" ALWAYS_INLINE void agg_max_double(int64_t* agg, const double val) {
614  const auto r = std::max(*reinterpret_cast<const double*>(agg), val);
615  *agg = *(reinterpret_cast<const int64_t*>(may_alias_ptr(&r)));
616 }
617 
618 extern "C" ALWAYS_INLINE void agg_min_double(int64_t* agg, const double val) {
619  const auto r = std::min(*reinterpret_cast<const double*>(agg), val);
620  *agg = *(reinterpret_cast<const int64_t*>(may_alias_ptr(&r)));
621 }
622 
623 extern "C" ALWAYS_INLINE void agg_id_double(int64_t* agg, const double val) {
624  *agg = *(reinterpret_cast<const int64_t*>(may_alias_ptr(&val)));
625 }
626 
627 extern "C" ALWAYS_INLINE int32_t checked_single_agg_id_double(int64_t* agg,
628  const double val,
629  const double null_val) {
630  if (val == null_val) {
631  return 0;
632  }
633 
634  if (*agg == *(reinterpret_cast<const int64_t*>(may_alias_ptr(&val)))) {
635  return 0;
636  } else if (*agg == *(reinterpret_cast<const int64_t*>(may_alias_ptr(&null_val)))) {
637  *agg = *(reinterpret_cast<const int64_t*>(may_alias_ptr(&val)));
638  return 0;
639  } else {
640  // see Execute::ERR_SINGLE_VALUE_FOUND_MULTIPLE_VALUES
641  return 15;
642  }
643 }
644 
645 extern "C" ALWAYS_INLINE uint32_t agg_count_float(uint32_t* agg, const float val) {
646  return (*agg)++;
647 }
648 
649 extern "C" ALWAYS_INLINE void agg_sum_float(int32_t* agg, const float val) {
650  const auto r = *reinterpret_cast<const float*>(agg) + val;
651  *agg = *reinterpret_cast<const int32_t*>(may_alias_ptr(&r));
652 }
653 
654 extern "C" ALWAYS_INLINE void agg_max_float(int32_t* agg, const float val) {
655  const auto r = std::max(*reinterpret_cast<const float*>(agg), val);
656  *agg = *(reinterpret_cast<const int32_t*>(may_alias_ptr(&r)));
657 }
658 
659 extern "C" ALWAYS_INLINE void agg_min_float(int32_t* agg, const float val) {
660  const auto r = std::min(*reinterpret_cast<const float*>(agg), val);
661  *agg = *(reinterpret_cast<const int32_t*>(may_alias_ptr(&r)));
662 }
663 
664 extern "C" ALWAYS_INLINE void agg_id_float(int32_t* agg, const float val) {
665  *agg = *(reinterpret_cast<const int32_t*>(may_alias_ptr(&val)));
666 }
667 
668 extern "C" ALWAYS_INLINE int32_t checked_single_agg_id_float(int32_t* agg,
669  const float val,
670  const float null_val) {
671  if (val == null_val) {
672  return 0;
673  }
674 
675  if (*agg == *(reinterpret_cast<const int32_t*>(may_alias_ptr(&val)))) {
676  return 0;
677  } else if (*agg == *(reinterpret_cast<const int32_t*>(may_alias_ptr(&null_val)))) {
678  *agg = *(reinterpret_cast<const int32_t*>(may_alias_ptr(&val)));
679  return 0;
680  } else {
681  // see Execute::ERR_SINGLE_VALUE_FOUND_MULTIPLE_VALUES
682  return 15;
683  }
684 }
685 
686 extern "C" ALWAYS_INLINE uint64_t agg_count_double_skip_val(uint64_t* agg,
687  const double val,
688  const double skip_val) {
689  if (val != skip_val) {
690  return agg_count_double(agg, val);
691  }
692  return *agg;
693 }
694 
695 extern "C" ALWAYS_INLINE uint32_t agg_count_float_skip_val(uint32_t* agg,
696  const float val,
697  const float skip_val) {
698  if (val != skip_val) {
699  return agg_count_float(agg, val);
700  }
701  return *agg;
702 }
703 
704 #define DEF_SKIP_AGG_ADD(base_agg_func) \
705  extern "C" ALWAYS_INLINE void base_agg_func##_skip_val( \
706  ADDR_T* agg, const DATA_T val, const DATA_T skip_val) { \
707  if (val != skip_val) { \
708  base_agg_func(agg, val); \
709  } \
710  }
711 
712 #define DEF_SKIP_AGG(base_agg_func) \
713  extern "C" ALWAYS_INLINE void base_agg_func##_skip_val( \
714  ADDR_T* agg, const DATA_T val, const DATA_T skip_val) { \
715  if (val != skip_val) { \
716  const ADDR_T old_agg = *agg; \
717  if (old_agg != *reinterpret_cast<const ADDR_T*>(may_alias_ptr(&skip_val))) { \
718  base_agg_func(agg, val); \
719  } else { \
720  *agg = *reinterpret_cast<const ADDR_T*>(may_alias_ptr(&val)); \
721  } \
722  } \
723  }
724 
725 #define DATA_T double
726 #define ADDR_T int64_t
730 #undef ADDR_T
731 #undef DATA_T
732 
733 #define DATA_T float
734 #define ADDR_T int32_t
738 #undef ADDR_T
739 #undef DATA_T
740 
741 #undef DEF_SKIP_AGG_ADD
742 #undef DEF_SKIP_AGG
743 
744 extern "C" ALWAYS_INLINE int64_t decimal_floor(const int64_t x, const int64_t scale) {
745  if (x >= 0) {
746  return x / scale * scale;
747  }
748  if (!(x % scale)) {
749  return x;
750  }
751  return x / scale * scale - scale;
752 }
753 
754 extern "C" ALWAYS_INLINE int64_t decimal_ceil(const int64_t x, const int64_t scale) {
755  return decimal_floor(x, scale) + (x % scale ? scale : 0);
756 }
757 
758 // Shared memory aggregators. Should never be called,
759 // real implementations are in cuda_mapd_rt.cu.
760 #define DEF_SHARED_AGG_RET_STUBS(base_agg_func) \
761  extern "C" GPU_RT_STUB uint64_t base_agg_func##_shared(uint64_t* agg, \
762  const int64_t val) { \
763  return 0; \
764  } \
765  \
766  extern "C" GPU_RT_STUB uint64_t base_agg_func##_skip_val_shared( \
767  uint64_t* agg, const int64_t val, const int64_t skip_val) { \
768  return 0; \
769  } \
770  extern "C" GPU_RT_STUB uint32_t base_agg_func##_int32_shared(uint32_t* agg, \
771  const int32_t val) { \
772  return 0; \
773  } \
774  \
775  extern "C" GPU_RT_STUB uint32_t base_agg_func##_int32_skip_val_shared( \
776  uint32_t* agg, const int32_t val, const int32_t skip_val) { \
777  return 0; \
778  } \
779  \
780  extern "C" GPU_RT_STUB uint64_t base_agg_func##_double_shared(uint64_t* agg, \
781  const double val) { \
782  return 0; \
783  } \
784  \
785  extern "C" GPU_RT_STUB uint64_t base_agg_func##_double_skip_val_shared( \
786  uint64_t* agg, const double val, const double skip_val) { \
787  return 0; \
788  } \
789  extern "C" GPU_RT_STUB uint32_t base_agg_func##_float_shared(uint32_t* agg, \
790  const float val) { \
791  return 0; \
792  } \
793  \
794  extern "C" GPU_RT_STUB uint32_t base_agg_func##_float_skip_val_shared( \
795  uint32_t* agg, const float val, const float skip_val) { \
796  return 0; \
797  }
798 
799 #define DEF_SHARED_AGG_STUBS(base_agg_func) \
800  extern "C" GPU_RT_STUB void base_agg_func##_shared(int64_t* agg, const int64_t val) {} \
801  \
802  extern "C" GPU_RT_STUB void base_agg_func##_skip_val_shared( \
803  int64_t* agg, const int64_t val, const int64_t skip_val) {} \
804  extern "C" GPU_RT_STUB void base_agg_func##_int32_shared(int32_t* agg, \
805  const int32_t val) {} \
806  extern "C" GPU_RT_STUB void base_agg_func##_int16_shared(int16_t* agg, \
807  const int16_t val) {} \
808  extern "C" GPU_RT_STUB void base_agg_func##_int8_shared(int8_t* agg, \
809  const int8_t val) {} \
810  \
811  extern "C" GPU_RT_STUB void base_agg_func##_int32_skip_val_shared( \
812  int32_t* agg, const int32_t val, const int32_t skip_val) {} \
813  \
814  extern "C" GPU_RT_STUB void base_agg_func##_double_shared(int64_t* agg, \
815  const double val) {} \
816  \
817  extern "C" GPU_RT_STUB void base_agg_func##_double_skip_val_shared( \
818  int64_t* agg, const double val, const double skip_val) {} \
819  extern "C" GPU_RT_STUB void base_agg_func##_float_shared(int32_t* agg, \
820  const float val) {} \
821  \
822  extern "C" GPU_RT_STUB void base_agg_func##_float_skip_val_shared( \
823  int32_t* agg, const float val, const float skip_val) {}
824 
829 
830 extern "C" GPU_RT_STUB int8_t* agg_id_varlen_shared(int8_t* varlen_buffer,
831  const int64_t offset,
832  const int8_t* value,
833  const int64_t size_bytes) {
834  return nullptr;
835 }
836 
837 extern "C" GPU_RT_STUB int32_t checked_single_agg_id_shared(int64_t* agg,
838  const int64_t val,
839  const int64_t null_val) {
840  return 0;
841 }
842 
843 extern "C" GPU_RT_STUB int32_t
845  const int32_t val,
846  const int32_t null_val) {
847  return 0;
848 }
849 extern "C" GPU_RT_STUB int32_t
851  const int16_t val,
852  const int16_t null_val) {
853  return 0;
854 }
855 extern "C" GPU_RT_STUB int32_t checked_single_agg_id_int8_shared(int8_t* agg,
856  const int8_t val,
857  const int8_t null_val) {
858  return 0;
859 }
860 
861 extern "C" GPU_RT_STUB int32_t
863  const double val,
864  const double null_val) {
865  return 0;
866 }
867 
868 extern "C" GPU_RT_STUB int32_t checked_single_agg_id_float_shared(int32_t* agg,
869  const float val,
870  const float null_val) {
871  return 0;
872 }
873 
874 extern "C" GPU_RT_STUB void agg_max_int16_skip_val_shared(int16_t* agg,
875  const int16_t val,
876  const int16_t skip_val) {}
877 
878 extern "C" GPU_RT_STUB void agg_max_int8_skip_val_shared(int8_t* agg,
879  const int8_t val,
880  const int8_t skip_val) {}
881 
882 extern "C" GPU_RT_STUB void agg_min_int16_skip_val_shared(int16_t* agg,
883  const int16_t val,
884  const int16_t skip_val) {}
885 
886 extern "C" GPU_RT_STUB void agg_min_int8_skip_val_shared(int8_t* agg,
887  const int8_t val,
888  const int8_t skip_val) {}
889 
890 extern "C" GPU_RT_STUB void agg_id_double_shared_slow(int64_t* agg, const double* val) {}
891 
892 extern "C" GPU_RT_STUB int64_t agg_sum_shared(int64_t* agg, const int64_t val) {
893  return 0;
894 }
895 
896 extern "C" GPU_RT_STUB int64_t agg_sum_skip_val_shared(int64_t* agg,
897  const int64_t val,
898  const int64_t skip_val) {
899  return 0;
900 }
901 extern "C" GPU_RT_STUB int32_t agg_sum_int32_shared(int32_t* agg, const int32_t val) {
902  return 0;
903 }
904 
905 extern "C" GPU_RT_STUB int32_t agg_sum_int32_skip_val_shared(int32_t* agg,
906  const int32_t val,
907  const int32_t skip_val) {
908  return 0;
909 }
910 
911 extern "C" GPU_RT_STUB void agg_sum_double_shared(int64_t* agg, const double val) {}
912 
913 extern "C" GPU_RT_STUB void agg_sum_double_skip_val_shared(int64_t* agg,
914  const double val,
915  const double skip_val) {}
916 extern "C" GPU_RT_STUB void agg_sum_float_shared(int32_t* agg, const float val) {}
917 
918 extern "C" GPU_RT_STUB void agg_sum_float_skip_val_shared(int32_t* agg,
919  const float val,
920  const float skip_val) {}
921 
922 extern "C" GPU_RT_STUB void force_sync() {}
923 
924 extern "C" GPU_RT_STUB void sync_warp() {}
925 extern "C" GPU_RT_STUB void sync_warp_protected(int64_t thread_pos, int64_t row_count) {}
926 extern "C" GPU_RT_STUB void sync_threadblock() {}
927 
928 extern "C" GPU_RT_STUB void write_back_non_grouped_agg(int64_t* input_buffer,
929  int64_t* output_buffer,
930  const int32_t num_agg_cols){};
931 // x64 stride functions
932 
933 extern "C" NEVER_INLINE int32_t pos_start_impl(int32_t* error_code) {
934  int32_t row_index_resume{0};
935  if (error_code) {
936  row_index_resume = error_code[0];
937  error_code[0] = 0;
938  }
939  return row_index_resume;
940 }
941 
942 extern "C" NEVER_INLINE int32_t group_buff_idx_impl() {
943  return pos_start_impl(nullptr);
944 }
945 
946 extern "C" NEVER_INLINE int32_t pos_step_impl() {
947  return 1;
948 }
949 
950 extern "C" GPU_RT_STUB int8_t thread_warp_idx(const int8_t warp_sz) {
951  return 0;
952 }
953 
954 extern "C" GPU_RT_STUB int64_t get_thread_index() {
955  return 0;
956 }
957 
959  return nullptr;
960 }
961 
962 extern "C" GPU_RT_STUB int64_t get_block_index() {
963  return 0;
964 }
965 
966 #undef GPU_RT_STUB
967 
968 extern "C" ALWAYS_INLINE void record_error_code(const int32_t err_code,
969  int32_t* error_codes) {
970  // NB: never override persistent error codes (with code greater than zero).
971  // On GPU, a projection query with a limit can run out of slots without it
972  // being an actual error if the limit has been hit. If a persistent error
973  // (division by zero, for example) occurs before running out of slots, we
974  // have to avoid overriding it, because there's a risk that the query would
975  // go through if we override with a potentially benign out-of-slots code.
976  if (err_code && error_codes[pos_start_impl(nullptr)] <= 0) {
977  error_codes[pos_start_impl(nullptr)] = err_code;
978  }
979 }
980 
981 extern "C" ALWAYS_INLINE int32_t get_error_code(int32_t* error_codes) {
982  return error_codes[pos_start_impl(nullptr)];
983 }
984 
985 // group by helpers
986 
987 extern "C" NEVER_INLINE const int64_t* init_shared_mem_nop(
988  const int64_t* groups_buffer,
989  const int32_t groups_buffer_size) {
990  return groups_buffer;
991 }
992 
993 extern "C" NEVER_INLINE void write_back_nop(int64_t* dest,
994  int64_t* src,
995  const int32_t sz) {
996 #ifndef _WIN32
997  // the body is not really needed, just make sure the call is not optimized away
998  assert(dest);
999 #endif
1000 }
1001 
1002 extern "C" int64_t* init_shared_mem(const int64_t* global_groups_buffer,
1003  const int32_t groups_buffer_size) {
1004  return nullptr;
1005 }
1006 
1008  int64_t* groups_buffer,
1009  const int64_t* init_vals,
1010  const uint32_t groups_buffer_entry_count,
1011  const uint32_t key_qw_count,
1012  const uint32_t agg_col_count,
1013  const bool keyless,
1014  const int8_t warp_size) {
1015 #ifndef _WIN32
1016  // the body is not really needed, just make sure the call is not optimized away
1017  assert(groups_buffer);
1018 #endif
1019 }
1020 
1022  int64_t* groups_buffer,
1023  const int64_t* init_vals,
1024  const uint32_t groups_buffer_entry_count,
1025  const uint32_t key_qw_count,
1026  const uint32_t agg_col_count,
1027  const bool keyless,
1028  const bool blocks_share_memory,
1029  const int32_t frag_idx) {
1030 #ifndef _WIN32
1031  // the body is not really needed, just make sure the call is not optimized away
1032  assert(groups_buffer);
1033 #endif
1034 }
1035 
1037  int64_t* groups_buffer,
1038  const int64_t* init_vals,
1039  const uint32_t groups_buffer_entry_count,
1040  const uint32_t key_qw_count,
1041  const uint32_t agg_col_count,
1042  const bool keyless,
1043  const int8_t warp_size) {
1044 #ifndef _WIN32
1045  // the body is not really needed, just make sure the call is not optimized away
1046  assert(groups_buffer);
1047 #endif
1048 }
1049 
1050 template <typename T>
1051 ALWAYS_INLINE int64_t* get_matching_group_value(int64_t* groups_buffer,
1052  const uint32_t h,
1053  const T* key,
1054  const uint32_t key_count,
1055  const uint32_t row_size_quad) {
1056  auto off = h * row_size_quad;
1057  auto row_ptr = reinterpret_cast<T*>(groups_buffer + off);
1058  if (*row_ptr == get_empty_key<T>()) {
1059  memcpy(row_ptr, key, key_count * sizeof(T));
1060  auto row_ptr_i8 = reinterpret_cast<int8_t*>(row_ptr + key_count);
1061  return reinterpret_cast<int64_t*>(align_to_int64(row_ptr_i8));
1062  }
1063  if (memcmp(row_ptr, key, key_count * sizeof(T)) == 0) {
1064  auto row_ptr_i8 = reinterpret_cast<int8_t*>(row_ptr + key_count);
1065  return reinterpret_cast<int64_t*>(align_to_int64(row_ptr_i8));
1066  }
1067  return nullptr;
1068 }
1069 
1070 extern "C" ALWAYS_INLINE int64_t* get_matching_group_value(int64_t* groups_buffer,
1071  const uint32_t h,
1072  const int64_t* key,
1073  const uint32_t key_count,
1074  const uint32_t key_width,
1075  const uint32_t row_size_quad) {
1076  switch (key_width) {
1077  case 4:
1078  return get_matching_group_value(groups_buffer,
1079  h,
1080  reinterpret_cast<const int32_t*>(key),
1081  key_count,
1082  row_size_quad);
1083  case 8:
1084  return get_matching_group_value(groups_buffer, h, key, key_count, row_size_quad);
1085  default:;
1086  }
1087  return nullptr;
1088 }
1089 
1090 template <typename T>
1092  const uint32_t entry_count,
1093  const uint32_t h,
1094  const T* key,
1095  const uint32_t key_count) {
1096  auto off = h;
1097  auto key_buffer = reinterpret_cast<T*>(groups_buffer);
1098  if (key_buffer[off] == get_empty_key<T>()) {
1099  for (size_t i = 0; i < key_count; ++i) {
1100  key_buffer[off] = key[i];
1101  off += entry_count;
1102  }
1103  return h;
1104  }
1105  off = h;
1106  for (size_t i = 0; i < key_count; ++i) {
1107  if (key_buffer[off] != key[i]) {
1108  return -1;
1109  }
1110  off += entry_count;
1111  }
1112  return h;
1113 }
1114 
1115 extern "C" ALWAYS_INLINE int32_t
1117  const uint32_t entry_count,
1118  const uint32_t h,
1119  const int64_t* key,
1120  const uint32_t key_count,
1121  const uint32_t key_width) {
1122  switch (key_width) {
1123  case 4:
1124  return get_matching_group_value_columnar_slot(groups_buffer,
1125  entry_count,
1126  h,
1127  reinterpret_cast<const int32_t*>(key),
1128  key_count);
1129  case 8:
1131  groups_buffer, entry_count, h, key, key_count);
1132  default:
1133  return -1;
1134  }
1135  return -1;
1136 }
1137 
1139  int64_t* groups_buffer,
1140  const uint32_t h,
1141  const int64_t* key,
1142  const uint32_t key_qw_count,
1143  const size_t entry_count) {
1144  auto off = h;
1145  if (groups_buffer[off] == EMPTY_KEY_64) {
1146  for (size_t i = 0; i < key_qw_count; ++i) {
1147  groups_buffer[off] = key[i];
1148  off += entry_count;
1149  }
1150  return &groups_buffer[off];
1151  }
1152  off = h;
1153  for (size_t i = 0; i < key_qw_count; ++i) {
1154  if (groups_buffer[off] != key[i]) {
1155  return nullptr;
1156  }
1157  off += entry_count;
1158  }
1159  return &groups_buffer[off];
1160 }
1161 
1162 /*
1163  * For a particular hashed_index, returns the row-wise offset
1164  * to the first matching agg column in memory.
1165  * It also checks the corresponding group column, and initialize all
1166  * available keys if they are not empty (it is assumed all group columns are
1167  * 64-bit wide).
1168  *
1169  * Memory layout:
1170  *
1171  * | prepended group columns (64-bit each) | agg columns |
1172  */
1174  int64_t* groups_buffer,
1175  const uint32_t hashed_index,
1176  const int64_t* key,
1177  const uint32_t key_count,
1178  const uint32_t row_size_quad) {
1179  uint32_t off = hashed_index * row_size_quad;
1180  if (groups_buffer[off] == EMPTY_KEY_64) {
1181  for (uint32_t i = 0; i < key_count; ++i) {
1182  groups_buffer[off + i] = key[i];
1183  }
1184  }
1185  return groups_buffer + off + key_count;
1186 }
1187 
1195  int64_t* groups_buffer,
1196  const uint32_t hashed_index,
1197  const uint32_t row_size_quad) {
1198  return groups_buffer + row_size_quad * hashed_index;
1199 }
1200 
1201 /*
1202  * For a particular hashed_index, find and initialize (if necessary) all the group
1203  * columns corresponding to a key. It is assumed that all group columns are 64-bit wide.
1204  */
1206  int64_t* groups_buffer,
1207  const uint32_t hashed_index,
1208  const int64_t* key,
1209  const uint32_t key_count,
1210  const uint32_t entry_count) {
1211  if (groups_buffer[hashed_index] == EMPTY_KEY_64) {
1212  for (uint32_t i = 0; i < key_count; i++) {
1213  groups_buffer[i * entry_count + hashed_index] = key[i];
1214  }
1215  }
1216 }
1217 
1218 #include "GeoOpsRuntime.cpp"
1219 #include "GroupByRuntime.cpp"
1221 
1223  int64_t* groups_buffer,
1224  const int64_t key,
1225  const int64_t min_key,
1226  const int64_t /* bucket */,
1227  const uint32_t row_size_quad) {
1228  return groups_buffer + row_size_quad * (key - min_key);
1229 }
1230 
1232  int64_t* groups_buffer,
1233  const int64_t key,
1234  const int64_t min_key,
1235  const int64_t /* bucket */,
1236  const uint32_t row_size_quad,
1237  const uint8_t thread_warp_idx,
1238  const uint8_t warp_size) {
1239  return groups_buffer + row_size_quad * (warp_size * (key - min_key) + thread_warp_idx);
1240 }
1241 
1242 extern "C" ALWAYS_INLINE int8_t* extract_str_ptr(const uint64_t str_and_len) {
1243  return reinterpret_cast<int8_t*>(str_and_len & 0xffffffffffff);
1244 }
1245 
1246 extern "C" ALWAYS_INLINE int32_t extract_str_len(const uint64_t str_and_len) {
1247  return static_cast<int64_t>(str_and_len) >> 48;
1248 }
1249 
1250 extern "C" NEVER_INLINE int8_t* extract_str_ptr_noinline(const uint64_t str_and_len) {
1251  return extract_str_ptr(str_and_len);
1252 }
1253 
1254 extern "C" NEVER_INLINE int32_t extract_str_len_noinline(const uint64_t str_and_len) {
1255  return extract_str_len(str_and_len);
1256 }
1257 
1258 extern "C" ALWAYS_INLINE uint64_t string_pack(const int8_t* ptr, const int32_t len) {
1259  return (reinterpret_cast<const uint64_t>(ptr) & 0xffffffffffff) |
1260  (static_cast<const uint64_t>(len) << 48);
1261 }
1262 
1263 #ifdef __clang__
1264 #include "../Utils/StringLike.cpp"
1265 #endif
1266 
1267 #ifndef __CUDACC__
1268 #include "TopKRuntime.cpp"
1269 #endif
1270 
1271 extern "C" ALWAYS_INLINE DEVICE int32_t char_length(const char* str,
1272  const int32_t str_len) {
1273  return str_len;
1274 }
1275 
1276 extern "C" ALWAYS_INLINE DEVICE int32_t char_length_nullable(const char* str,
1277  const int32_t str_len,
1278  const int32_t int_null) {
1279  if (!str) {
1280  return int_null;
1281  }
1282  return str_len;
1283 }
1284 
1285 extern "C" ALWAYS_INLINE DEVICE int32_t key_for_string_encoded(const int32_t str_id) {
1286  return str_id;
1287 }
1288 
1289 extern "C" ALWAYS_INLINE DEVICE bool sample_ratio(const double proportion,
1290  const int64_t row_offset) {
1291  const int64_t threshold = 4294967296 * proportion;
1292  return (row_offset * 2654435761) % 4294967296 < threshold;
1293 }
1294 
1295 extern "C" ALWAYS_INLINE DEVICE double width_bucket(const double target_value,
1296  const double lower_bound,
1297  const double upper_bound,
1298  const double scale_factor,
1299  const int32_t partition_count) {
1300  if (target_value < lower_bound) {
1301  return 0;
1302  } else if (target_value >= upper_bound) {
1303  return partition_count + 1;
1304  }
1305  return ((target_value - lower_bound) * scale_factor) + 1;
1306 }
1307 
1309  const double target_value,
1310  const double lower_bound,
1311  const double upper_bound,
1312  const double scale_factor,
1313  const int32_t partition_count) {
1314  if (target_value > lower_bound) {
1315  return 0;
1316  } else if (target_value <= upper_bound) {
1317  return partition_count + 1;
1318  }
1319  return ((lower_bound - target_value) * scale_factor) + 1;
1320 }
1321 
1322 extern "C" ALWAYS_INLINE double width_bucket_nullable(const double target_value,
1323  const double lower_bound,
1324  const double upper_bound,
1325  const double scale_factor,
1326  const int32_t partition_count,
1327  const double null_val) {
1328  if (target_value == null_val) {
1329  return INT32_MIN;
1330  }
1331  return width_bucket(
1332  target_value, lower_bound, upper_bound, scale_factor, partition_count);
1333 }
1334 
1336  const double target_value,
1337  const double lower_bound,
1338  const double upper_bound,
1339  const double scale_factor,
1340  const int32_t partition_count,
1341  const double null_val) {
1342  if (target_value == null_val) {
1343  return INT32_MIN;
1344  }
1345  return width_bucket_reversed(
1346  target_value, lower_bound, upper_bound, scale_factor, partition_count);
1347 }
1348 
1349 // width_bucket with no out-of-bound check version which can be called
1350 // if we can assure the input target_value expr always resides in the valid range
1351 // (so we can also avoid null checking)
1353  const double target_value,
1354  const double lower_bound,
1355  const double scale_factor) {
1356  return ((target_value - lower_bound) * scale_factor) + 1;
1357 }
1358 
1360  const double target_value,
1361  const double lower_bound,
1362  const double scale_factor) {
1363  return ((lower_bound - target_value) * scale_factor) + 1;
1364 }
1365 
1366 extern "C" ALWAYS_INLINE DEVICE double width_bucket_expr(const double target_value,
1367  const bool reversed,
1368  const double lower_bound,
1369  const double upper_bound,
1370  const int32_t partition_count) {
1371  if (reversed) {
1372  return width_bucket_reversed(target_value,
1373  lower_bound,
1374  upper_bound,
1375  partition_count / (lower_bound - upper_bound),
1376  partition_count);
1377  }
1378  return width_bucket(target_value,
1379  lower_bound,
1380  upper_bound,
1381  partition_count / (upper_bound - lower_bound),
1382  partition_count);
1383 }
1384 
1386  const double target_value,
1387  const bool reversed,
1388  const double lower_bound,
1389  const double upper_bound,
1390  const int32_t partition_count,
1391  const double null_val) {
1392  if (target_value == null_val) {
1393  return INT32_MIN;
1394  }
1395  return width_bucket_expr(
1396  target_value, reversed, lower_bound, upper_bound, partition_count);
1397 }
1398 
1400  const double target_value,
1401  const bool reversed,
1402  const double lower_bound,
1403  const double upper_bound,
1404  const int32_t partition_count) {
1405  if (reversed) {
1407  target_value, lower_bound, partition_count / (lower_bound - upper_bound));
1408  }
1410  target_value, lower_bound, partition_count / (upper_bound - lower_bound));
1411 }
1412 
1413 extern "C" ALWAYS_INLINE int64_t row_number_window_func(const int64_t output_buff,
1414  const int64_t pos) {
1415  return reinterpret_cast<const int64_t*>(output_buff)[pos];
1416 }
1417 
1418 extern "C" ALWAYS_INLINE double percent_window_func(const int64_t output_buff,
1419  const int64_t pos) {
1420  return reinterpret_cast<const double*>(output_buff)[pos];
1421 }
1422 
1423 extern "C" ALWAYS_INLINE double load_double(const int64_t* agg) {
1424  return *reinterpret_cast<const double*>(may_alias_ptr(agg));
1425 }
1426 
1427 extern "C" ALWAYS_INLINE float load_float(const int32_t* agg) {
1428  return *reinterpret_cast<const float*>(may_alias_ptr(agg));
1429 }
1430 
1431 extern "C" ALWAYS_INLINE double load_avg_int(const int64_t* sum,
1432  const int64_t* count,
1433  const double null_val) {
1434  return *count != 0 ? static_cast<double>(*sum) / *count : null_val;
1435 }
1436 
1437 extern "C" ALWAYS_INLINE double load_avg_decimal(const int64_t* sum,
1438  const int64_t* count,
1439  const double null_val,
1440  const uint32_t scale) {
1441  return *count != 0 ? (static_cast<double>(*sum) / pow(10, scale)) / *count : null_val;
1442 }
1443 
1444 extern "C" ALWAYS_INLINE double load_avg_double(const int64_t* agg,
1445  const int64_t* count,
1446  const double null_val) {
1447  return *count != 0 ? *reinterpret_cast<const double*>(may_alias_ptr(agg)) / *count
1448  : null_val;
1449 }
1450 
1451 extern "C" ALWAYS_INLINE double load_avg_float(const int32_t* agg,
1452  const int32_t* count,
1453  const double null_val) {
1454  return *count != 0 ? *reinterpret_cast<const float*>(may_alias_ptr(agg)) / *count
1455  : null_val;
1456 }
1457 
1458 extern "C" NEVER_INLINE void linear_probabilistic_count(uint8_t* bitmap,
1459  const uint32_t bitmap_bytes,
1460  const uint8_t* key_bytes,
1461  const uint32_t key_len) {
1462  const uint32_t bit_pos = MurmurHash3(key_bytes, key_len, 0) % (bitmap_bytes * 8);
1463  const uint32_t word_idx = bit_pos / 32;
1464  const uint32_t bit_idx = bit_pos % 32;
1465  reinterpret_cast<uint32_t*>(bitmap)[word_idx] |= 1 << bit_idx;
1466 }
1467 
1468 extern "C" NEVER_INLINE void query_stub_hoisted_literals(const int8_t** col_buffers,
1469  const int8_t* literals,
1470  const int64_t* num_rows,
1471  const uint64_t* frag_row_offsets,
1472  const int32_t* max_matched,
1473  const int64_t* init_agg_value,
1474  int64_t** out,
1475  uint32_t frag_idx,
1476  const int64_t* join_hash_tables,
1477  int32_t* error_code,
1478  int32_t* total_matched) {
1479 #ifndef _WIN32
1480  assert(col_buffers || literals || num_rows || frag_row_offsets || max_matched ||
1481  init_agg_value || out || frag_idx || error_code || join_hash_tables ||
1482  total_matched);
1483 #endif
1484 }
1485 
1486 extern "C" void multifrag_query_hoisted_literals(const int8_t*** col_buffers,
1487  const uint64_t* num_fragments,
1488  const int8_t* literals,
1489  const int64_t* num_rows,
1490  const uint64_t* frag_row_offsets,
1491  const int32_t* max_matched,
1492  int32_t* total_matched,
1493  const int64_t* init_agg_value,
1494  int64_t** out,
1495  int32_t* error_code,
1496  const uint32_t* num_tables_ptr,
1497  const int64_t* join_hash_tables) {
1498  for (uint32_t i = 0; i < *num_fragments; ++i) {
1499  query_stub_hoisted_literals(col_buffers ? col_buffers[i] : nullptr,
1500  literals,
1501  &num_rows[i * (*num_tables_ptr)],
1502  &frag_row_offsets[i * (*num_tables_ptr)],
1503  max_matched,
1504  init_agg_value,
1505  out,
1506  i,
1507  join_hash_tables,
1508  total_matched,
1509  error_code);
1510  }
1511 }
1512 
1513 extern "C" NEVER_INLINE void query_stub(const int8_t** col_buffers,
1514  const int64_t* num_rows,
1515  const uint64_t* frag_row_offsets,
1516  const int32_t* max_matched,
1517  const int64_t* init_agg_value,
1518  int64_t** out,
1519  uint32_t frag_idx,
1520  const int64_t* join_hash_tables,
1521  int32_t* error_code,
1522  int32_t* total_matched) {
1523 #ifndef _WIN32
1524  assert(col_buffers || num_rows || frag_row_offsets || max_matched || init_agg_value ||
1525  out || frag_idx || error_code || join_hash_tables || total_matched);
1526 #endif
1527 }
1528 
1529 extern "C" void multifrag_query(const int8_t*** col_buffers,
1530  const uint64_t* num_fragments,
1531  const int64_t* num_rows,
1532  const uint64_t* frag_row_offsets,
1533  const int32_t* max_matched,
1534  int32_t* total_matched,
1535  const int64_t* init_agg_value,
1536  int64_t** out,
1537  int32_t* error_code,
1538  const uint32_t* num_tables_ptr,
1539  const int64_t* join_hash_tables) {
1540  for (uint32_t i = 0; i < *num_fragments; ++i) {
1541  query_stub(col_buffers ? col_buffers[i] : nullptr,
1542  &num_rows[i * (*num_tables_ptr)],
1543  &frag_row_offsets[i * (*num_tables_ptr)],
1544  max_matched,
1545  init_agg_value,
1546  out,
1547  i,
1548  join_hash_tables,
1549  total_matched,
1550  error_code);
1551  }
1552 }
1553 
1555  if (check_interrupt_init(static_cast<unsigned>(INT_CHECK))) {
1556  return true;
1557  }
1558  return false;
1559 }
1560 
1561 extern "C" bool check_interrupt_init(unsigned command) {
1562  static std::atomic_bool runtime_interrupt_flag{false};
1563 
1564  if (command == static_cast<unsigned>(INT_CHECK)) {
1565  if (runtime_interrupt_flag.load()) {
1566  return true;
1567  }
1568  return false;
1569  }
1570  if (command == static_cast<unsigned>(INT_ABORT)) {
1571  runtime_interrupt_flag.store(true);
1572  return false;
1573  }
1574  if (command == static_cast<unsigned>(INT_RESET)) {
1575  runtime_interrupt_flag.store(false);
1576  return false;
1577  }
1578  return false;
1579 }
DEVICE auto upper_bound(ARGS &&...args)
Definition: gpu_enabled.h:123
__device__ void sync_warp_protected(int64_t thread_pos, int64_t row_count)
ALWAYS_INLINE void agg_sum_float(int32_t *agg, const float val)
ALWAYS_INLINE int64_t agg_sum_skip_val(int64_t *agg, const int64_t val, const int64_t skip_val)
#define DEF_UMINUS_NULLABLE(type, null_type)
ALWAYS_INLINE void record_error_code(const int32_t err_code, int32_t *error_codes)
GPU_RT_STUB int32_t checked_single_agg_id_int32_shared(int32_t *agg, const int32_t val, const int32_t null_val)
__device__ void agg_count_distinct_bitmap_skip_val_gpu(int64_t *agg, const int64_t val, const int64_t min_val, const int64_t skip_val, const int64_t base_dev_addr, const int64_t base_host_addr, const uint64_t sub_bitmap_count, const uint64_t bitmap_bytes)
#define DEF_CHECKED_SINGLE_AGG_ID_INT(n)
__device__ int64_t * get_matching_group_value_columnar(int64_t *groups_buffer, const uint32_t h, const int64_t *key, const uint32_t key_qw_count, const size_t entry_count)
ALWAYS_INLINE uint32_t agg_count_float(uint32_t *agg, const float val)
#define EMPTY_KEY_64
__device__ void write_back_nop(int64_t *dest, int64_t *src, const int32_t sz)
Definition: cuda_mapd_rt.cu:50
#define GPU_RT_STUB
__device__ void agg_sum_float_skip_val_shared(int32_t *agg, const float val, const float skip_val)
void agg_min_int32(int32_t *agg, const int32_t val)
ALWAYS_INLINE int64_t row_number_window_func(const int64_t output_buff, const int64_t pos)
#define DEF_CAST_NULLABLE_BIDIR(type1, type2)
ALWAYS_INLINE double load_avg_float(const int32_t *agg, const int32_t *count, const double null_val)
ALWAYS_INLINE void agg_max_float(int32_t *agg, const float val)
ALWAYS_INLINE int64_t * get_group_value_fast_keyless_semiprivate(int64_t *groups_buffer, const int64_t key, const int64_t min_key, const int64_t, const uint32_t row_size_quad, const uint8_t thread_warp_idx, const uint8_t warp_size)
ALWAYS_INLINE int32_t agg_sum_int32_skip_val(int32_t *agg, const int32_t val, const int32_t skip_val)
ALWAYS_INLINE uint64_t agg_count(uint64_t *agg, const int64_t)
FORCE_INLINE uint8_t get_rank(uint64_t x, uint32_t b)
ALWAYS_INLINE int64_t scale_decimal_down_not_nullable(const int64_t operand, const int64_t scale, const int64_t null_val)
__device__ int8_t thread_warp_idx(const int8_t warp_sz)
Definition: cuda_mapd_rt.cu:40
__global__ void init_group_by_buffer_gpu(int64_t *groups_buffer, const int64_t *init_vals, const uint32_t groups_buffer_entry_count, const uint32_t key_count, const uint32_t key_width, const uint32_t row_size_quad, const bool keyless, const int8_t warp_size)
ALWAYS_INLINE double load_avg_double(const int64_t *agg, const int64_t *count, const double null_val)
#define DEF_CAST_NULLABLE(from_type, to_type)
ALWAYS_INLINE int32_t checked_single_agg_id_double(int64_t *agg, const double val, const double null_val)
__device__ int64_t get_thread_index()
Definition: cuda_mapd_rt.cu:20
RUNTIME_EXPORT NEVER_INLINE DEVICE uint64_t MurmurHash64A(const void *key, int len, uint64_t seed)
Definition: MurmurHash.cpp:27
#define const
ALWAYS_INLINE DEVICE double width_bucket_expr_nullable(const double target_value, const bool reversed, const double lower_bound, const double upper_bound, const int32_t partition_count, const double null_val)
ALWAYS_INLINE DEVICE double width_bucket_reversed_no_oob_check(const double target_value, const double lower_bound, const double scale_factor)
__device__ int32_t pos_step_impl()
Definition: cuda_mapd_rt.cu:36
__device__ void write_back_non_grouped_agg(int64_t *input_buffer, int64_t *output_buffer, const int32_t agg_idx)
ALWAYS_INLINE double load_double(const int64_t *agg)
__device__ int32_t checked_single_agg_id_double_shared(int64_t *agg, const double val, const double null_val)
__device__ const int64_t * init_shared_mem_nop(const int64_t *groups_buffer, const int32_t groups_buffer_size)
Definition: cuda_mapd_rt.cu:44
ALWAYS_INLINE DEVICE double width_bucket_expr_no_oob_check(const double target_value, const bool reversed, const double lower_bound, const double upper_bound, const int32_t partition_count)
#define DEF_ARITH_NULLABLE_RHS(type, null_type, opname, opsym)
ALWAYS_INLINE void agg_count_distinct_bitmap_skip_val(int64_t *agg, const int64_t val, const int64_t min_val, const int64_t skip_val)
ALWAYS_INLINE int64_t scale_decimal_down_nullable(const int64_t operand, const int64_t scale, const int64_t null_val)
#define DEF_AGG_MAX_INT(n)
void multifrag_query(const int8_t ***col_buffers, const uint64_t *num_fragments, const int64_t *num_rows, const uint64_t *frag_row_offsets, const int32_t *max_matched, int32_t *total_matched, const int64_t *init_agg_value, int64_t **out, int32_t *error_code, const uint32_t *num_tables_ptr, const int64_t *join_hash_tables)
ALWAYS_INLINE int32_t extract_str_len(const uint64_t str_and_len)
__device__ int32_t checked_single_agg_id_float_shared(int32_t *agg, const float val, const float null_val)
__device__ int64_t * get_matching_group_value(int64_t *groups_buffer, const uint32_t h, const T *key, const uint32_t key_count, const uint32_t row_size_quad)
ALWAYS_INLINE int32_t checked_single_agg_id(int64_t *agg, const int64_t val, const int64_t null_val)
ALWAYS_INLINE void set_matching_group_value_perfect_hash_columnar(int64_t *groups_buffer, const uint32_t hashed_index, const int64_t *key, const uint32_t key_count, const uint32_t entry_count)
#define DEF_CAST_SCALED_NULLABLE(from_type, to_type)
void agg_max_int16(int16_t *agg, const int16_t val)
ALWAYS_INLINE int64_t floor_div_nullable_lhs(const int64_t dividend, const int64_t divisor, const int64_t null_val)
void agg_min_int8(int8_t *agg, const int8_t val)
__device__ int64_t agg_sum_shared(int64_t *agg, const int64_t val)
ALWAYS_INLINE int64_t * get_group_value_fast_keyless(int64_t *groups_buffer, const int64_t key, const int64_t min_key, const int64_t, const uint32_t row_size_quad)
__device__ void agg_id_double_shared_slow(int64_t *agg, const double *val)
ALWAYS_INLINE uint32_t agg_count_int32(uint32_t *agg, const int32_t)
ALWAYS_INLINE void agg_id_double(int64_t *agg, const double val)
ALWAYS_INLINE uint64_t string_pack(const int8_t *ptr, const int32_t len)
__device__ int8_t * agg_id_varlen_shared(int8_t *varlen_buffer, const int64_t offset, const int8_t *value, const int64_t size_bytes)
__device__ int64_t * declare_dynamic_shared_memory()
Definition: cuda_mapd_rt.cu:57
__device__ int32_t agg_sum_int32_shared(int32_t *agg, const int32_t val)
__device__ int64_t agg_sum_skip_val_shared(int64_t *agg, const int64_t val, const int64_t skip_val)
__device__ void agg_sum_float_shared(int32_t *agg, const float val)
ALWAYS_INLINE void agg_sum_double(int64_t *agg, const double val)
NEVER_INLINE int8_t * extract_str_ptr_noinline(const uint64_t str_and_len)
#define DEVICE
ALWAYS_INLINE int8_t * extract_str_ptr(const uint64_t str_and_len)
#define DEF_SKIP_AGG(base_agg_func)
__device__ int64_t get_block_index()
Definition: cuda_mapd_rt.cu:24
__device__ bool check_interrupt()
ALWAYS_INLINE double width_bucket_reversed_nullable(const double target_value, const double lower_bound, const double upper_bound, const double scale_factor, const int32_t partition_count, const double null_val)
#define DEF_WRITE_PROJECTION_INT(n)
ALWAYS_INLINE void agg_id_float(int32_t *agg, const float val)
ALWAYS_INLINE uint32_t agg_count_float_skip_val(uint32_t *agg, const float val, const float skip_val)
GPU_RT_STUB int32_t checked_single_agg_id_int8_shared(int8_t *agg, const int8_t val, const int8_t null_val)
ALWAYS_INLINE uint32_t agg_count_int32_skip_val(uint32_t *agg, const int32_t val, const int32_t skip_val)
__device__ int32_t agg_sum_int32_skip_val_shared(int32_t *agg, const int32_t val, const int32_t skip_val)
ALWAYS_INLINE void agg_min_double(int64_t *agg, const double val)
ALWAYS_INLINE int32_t agg_sum_int32(int32_t *agg, const int32_t val)
__device__ void linear_probabilistic_count(uint8_t *bitmap, const uint32_t bitmap_bytes, const uint8_t *key_bytes, const uint32_t key_len)
ALWAYS_INLINE DEVICE int32_t char_length(const char *str, const int32_t str_len)
__device__ void agg_count_distinct_bitmap_gpu(int64_t *agg, const int64_t val, const int64_t min_val, const int64_t base_dev_addr, const int64_t base_host_addr, const uint64_t sub_bitmap_count, const uint64_t bitmap_bytes)
#define DEF_SHARED_AGG_RET_STUBS(base_agg_func)
#define INT32_MIN
ALWAYS_INLINE int64_t * get_matching_group_value_perfect_hash_keyless(int64_t *groups_buffer, const uint32_t hashed_index, const uint32_t row_size_quad)
ALWAYS_INLINE DEVICE double width_bucket_reversed(const double target_value, const double lower_bound, const double upper_bound, const double scale_factor, const int32_t partition_count)
__device__ void agg_sum_double_shared(int64_t *agg, const double val)
int count
void agg_min_int16(int16_t *agg, const int16_t val)
#define DEF_ARITH_NULLABLE_LHS(type, null_type, opname, opsym)
ALWAYS_INLINE int64_t floor_div_lhs(const int64_t dividend, const int64_t divisor)
#define DEF_AGG_MIN_INT(n)
ALWAYS_INLINE void agg_max_double(int64_t *agg, const double val)
__device__ int32_t pos_start_impl(const int32_t *row_index_resume)
Definition: cuda_mapd_rt.cu:28
ALWAYS_INLINE uint64_t agg_count_double(uint64_t *agg, const double val)
void multifrag_query_hoisted_literals(const int8_t ***col_buffers, const uint64_t *num_fragments, const int8_t *literals, const int64_t *num_rows, const uint64_t *frag_row_offsets, const int32_t *max_matched, int32_t *total_matched, const int64_t *init_agg_value, int64_t **out, int32_t *error_code, const uint32_t *num_tables_ptr, const int64_t *join_hash_tables)
__device__ int32_t runtime_interrupt_flag
Definition: cuda_mapd_rt.cu:96
__device__ void agg_approximate_count_distinct_gpu(int64_t *agg, const int64_t key, const uint32_t b, const int64_t base_dev_addr, const int64_t base_host_addr)
__device__ void sync_warp()
ALWAYS_INLINE double load_avg_decimal(const int64_t *sum, const int64_t *count, const double null_val, const uint32_t scale)
NEVER_INLINE int32_t extract_str_len_noinline(const uint64_t str_and_len)
ALWAYS_INLINE int32_t checked_single_agg_id_float(int32_t *agg, const float val, const float null_val)
NEVER_INLINE void query_stub(const int8_t **col_buffers, const int64_t *num_rows, const uint64_t *frag_row_offsets, const int32_t *max_matched, const int64_t *init_agg_value, int64_t **out, uint32_t frag_idx, const int64_t *join_hash_tables, int32_t *error_code, int32_t *total_matched)
ALWAYS_INLINE int64_t scale_decimal_up(const int64_t operand, const uint64_t scale, const int64_t operand_null_val, const int64_t result_null_val)
ALWAYS_INLINE int64_t agg_sum(int64_t *agg, const int64_t val)
__device__ void agg_sum_double_skip_val_shared(int64_t *agg, const double val, const double skip_val)
ALWAYS_INLINE DEVICE double width_bucket_no_oob_check(const double target_value, const double lower_bound, const double scale_factor)
__device__ void agg_max_int8_skip_val_shared(int8_t *agg, const int8_t val, const int8_t skip_val)
ALWAYS_INLINE void agg_min(int64_t *agg, const int64_t val)
ALWAYS_INLINE int64_t decimal_floor(const int64_t x, const int64_t scale)
DEVICE auto lower_bound(ARGS &&...args)
Definition: gpu_enabled.h:78
__device__ void agg_max_int16_skip_val_shared(int16_t *agg, const int16_t val, const int16_t skip_val)
RUNTIME_EXPORT NEVER_INLINE DEVICE uint32_t MurmurHash3(const void *key, int len, const uint32_t seed)
Definition: MurmurHash.cpp:33
ALWAYS_INLINE int64_t * get_matching_group_value_perfect_hash(int64_t *groups_buffer, const uint32_t hashed_index, const int64_t *key, const uint32_t key_count, const uint32_t row_size_quad)
ALWAYS_INLINE DEVICE int32_t key_for_string_encoded(const int32_t str_id)
NEVER_INLINE void init_group_by_buffer_impl(int64_t *groups_buffer, const int64_t *init_vals, const uint32_t groups_buffer_entry_count, const uint32_t key_qw_count, const uint32_t agg_col_count, const bool keyless, const int8_t warp_size)
__device__ const int64_t * init_shared_mem(const int64_t *global_groups_buffer, const int32_t groups_buffer_size)
Definition: cuda_mapd_rt.cu:67
void agg_max_int32(int32_t *agg, const int32_t val)
ALWAYS_INLINE int8_t logical_or(const int8_t lhs, const int8_t rhs, const int8_t null_val)
GPU_RT_STUB int32_t checked_single_agg_id_int16_shared(int16_t *agg, const int16_t val, const int16_t null_val)
#define DEF_BINARY_NULLABLE_ALL_OPS(type, null_type)
ALWAYS_INLINE int32_t get_error_code(int32_t *error_codes)
bool check_interrupt_init(unsigned command)
#define NEVER_INLINE
ALWAYS_INLINE void agg_max(int64_t *agg, const int64_t val)
#define DEF_ARITH_NULLABLE(type, null_type, opname, opsym)
ALWAYS_INLINE float load_float(const int32_t *agg)
NEVER_INLINE void query_stub_hoisted_literals(const int8_t **col_buffers, const int8_t *literals, const int64_t *num_rows, const uint64_t *frag_row_offsets, const int32_t *max_matched, const int64_t *init_agg_value, int64_t **out, uint32_t frag_idx, const int64_t *join_hash_tables, int32_t *error_code, int32_t *total_matched)
__device__ void agg_min_int16_skip_val_shared(int16_t *agg, const int16_t val, const int16_t skip_val)
__device__ void sync_threadblock()
ALWAYS_INLINE uint64_t agg_count_skip_val(uint64_t *agg, const int64_t val, const int64_t skip_val)
__device__ void agg_min_int8_skip_val_shared(int8_t *agg, const int8_t val, const int8_t skip_val)
ALWAYS_INLINE int8_t * agg_id_varlen(int8_t *varlen_buffer, const int64_t offset, const int8_t *value, const int64_t size_bytes)
ALWAYS_INLINE double load_avg_int(const int64_t *sum, const int64_t *count, const double null_val)
ALWAYS_INLINE DEVICE int32_t char_length_nullable(const char *str, const int32_t str_len, const int32_t int_null)
ALWAYS_INLINE DEVICE bool sample_ratio(const double proportion, const int64_t row_offset)
void agg_max_int8(int8_t *agg, const int8_t val)
#define DEF_SHARED_AGG_STUBS(base_agg_func)
ALWAYS_INLINE int8_t bit_is_set(const int64_t bitset, const int64_t val, const int64_t min_val, const int64_t max_val, const int64_t null_val, const int8_t null_bool_val)
__device__ int32_t get_matching_group_value_columnar_slot(int64_t *groups_buffer, const uint32_t entry_count, const uint32_t h, const T *key, const uint32_t key_count)
__device__ int32_t checked_single_agg_id_shared(int64_t *agg, const int64_t val, const int64_t null_val)
#define ALWAYS_INLINE
#define DEF_AGG_ID_INT(n)
NEVER_INLINE void agg_approximate_count_distinct(int64_t *agg, const int64_t key, const uint32_t b)
ALWAYS_INLINE uint64_t agg_count_double_skip_val(uint64_t *agg, const double val, const double skip_val)
ALWAYS_INLINE void agg_id(int64_t *agg, const int64_t val)
ALWAYS_INLINE int64_t decimal_ceil(const int64_t x, const int64_t scale)
ALWAYS_INLINE double width_bucket_nullable(const double target_value, const double lower_bound, const double upper_bound, const double scale_factor, const int32_t partition_count, const double null_val)
ALWAYS_INLINE void agg_count_distinct_bitmap(int64_t *agg, const int64_t val, const int64_t min_val)
FORCE_INLINE HOST DEVICE T align_to_int64(T addr)
ALWAYS_INLINE int8_t logical_not(const int8_t operand, const int8_t null_val)
ALWAYS_INLINE DEVICE double width_bucket_expr(const double target_value, const bool reversed, const double lower_bound, const double upper_bound, const int32_t partition_count)
ALWAYS_INLINE int8_t logical_and(const int8_t lhs, const int8_t rhs, const int8_t null_val)
ALWAYS_INLINE void agg_min_float(int32_t *agg, const float val)
__device__ void force_sync()
ALWAYS_INLINE double percent_window_func(const int64_t output_buff, const int64_t pos)
NEVER_INLINE void init_columnar_group_by_buffer_gpu(int64_t *groups_buffer, const int64_t *init_vals, const uint32_t groups_buffer_entry_count, const uint32_t key_qw_count, const uint32_t agg_col_count, const bool keyless, const bool blocks_share_memory, const int32_t frag_idx)
ALWAYS_INLINE DEVICE double width_bucket(const double target_value, const double lower_bound, const double upper_bound, const double scale_factor, const int32_t partition_count)
__device__ int32_t group_buff_idx_impl()
Definition: cuda_mapd_rt.cu:32