OmniSciDB  ca0c39ec8f
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
RuntimeFunctions.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifdef __CUDACC__
18 #error This code is not intended to be compiled with a CUDA C++ compiler
19 #endif // __CUDACC__
20 
21 #include "RuntimeFunctions.h"
22 #include "../Shared/Datum.h"
23 #include "../Shared/funcannotations.h"
24 #include "BufferCompaction.h"
25 #include "HyperLogLogRank.h"
26 #include "MurmurHash.h"
27 #include "Shared/quantile.h"
28 #include "TypePunning.h"
29 #include "Utils/SegmentTreeUtils.h"
30 
31 #include <algorithm>
32 #include <atomic>
33 #include <chrono>
34 #include <cmath>
35 #include <cstdio>
36 #include <cstring>
37 #include <thread>
38 #include <tuple>
39 
40 // decoder implementations
41 
42 #include "DecodersImpl.h"
43 
44 // arithmetic operator implementations
45 
46 #define DEF_ARITH_NULLABLE(type, null_type, opname, opsym) \
47  extern "C" RUNTIME_EXPORT ALWAYS_INLINE type opname##_##type##_nullable( \
48  const type lhs, const type rhs, const null_type null_val) { \
49  if (lhs != null_val && rhs != null_val) { \
50  return lhs opsym rhs; \
51  } \
52  return null_val; \
53  }
54 
55 #define DEF_ARITH_NULLABLE_LHS(type, null_type, opname, opsym) \
56  extern "C" RUNTIME_EXPORT ALWAYS_INLINE type opname##_##type##_nullable_lhs( \
57  const type lhs, const type rhs, const null_type null_val) { \
58  if (lhs != null_val) { \
59  return lhs opsym rhs; \
60  } \
61  return null_val; \
62  }
63 
64 #define DEF_ARITH_NULLABLE_RHS(type, null_type, opname, opsym) \
65  extern "C" RUNTIME_EXPORT ALWAYS_INLINE type opname##_##type##_nullable_rhs( \
66  const type lhs, const type rhs, const null_type null_val) { \
67  if (rhs != null_val) { \
68  return lhs opsym rhs; \
69  } \
70  return null_val; \
71  }
72 
73 #define DEF_CMP_NULLABLE(type, null_type, opname, opsym) \
74  extern "C" RUNTIME_EXPORT ALWAYS_INLINE int8_t opname##_##type##_nullable( \
75  const type lhs, \
76  const type rhs, \
77  const null_type null_val, \
78  const int8_t null_bool_val) { \
79  if (lhs != null_val && rhs != null_val) { \
80  return lhs opsym rhs; \
81  } \
82  return null_bool_val; \
83  }
84 
85 #define DEF_CMP_NULLABLE_LHS(type, null_type, opname, opsym) \
86  extern "C" RUNTIME_EXPORT ALWAYS_INLINE int8_t opname##_##type##_nullable_lhs( \
87  const type lhs, \
88  const type rhs, \
89  const null_type null_val, \
90  const int8_t null_bool_val) { \
91  if (lhs != null_val) { \
92  return lhs opsym rhs; \
93  } \
94  return null_bool_val; \
95  }
96 
97 #define DEF_CMP_NULLABLE_RHS(type, null_type, opname, opsym) \
98  extern "C" RUNTIME_EXPORT ALWAYS_INLINE int8_t opname##_##type##_nullable_rhs( \
99  const type lhs, \
100  const type rhs, \
101  const null_type null_val, \
102  const int8_t null_bool_val) { \
103  if (rhs != null_val) { \
104  return lhs opsym rhs; \
105  } \
106  return null_bool_val; \
107  }
108 
109 #define DEF_SAFE_DIV_NULLABLE(type, null_type, opname) \
110  extern "C" RUNTIME_EXPORT ALWAYS_INLINE type safe_div_##type( \
111  const type lhs, const type rhs, const null_type null_val) { \
112  if (lhs != null_val && rhs != null_val && rhs != 0) { \
113  return lhs / rhs; \
114  } \
115  return null_val; \
116  }
117 
118 #define DEF_BINARY_NULLABLE_ALL_OPS(type, null_type) \
119  DEF_ARITH_NULLABLE(type, null_type, add, +) \
120  DEF_ARITH_NULLABLE(type, null_type, sub, -) \
121  DEF_ARITH_NULLABLE(type, null_type, mul, *) \
122  DEF_ARITH_NULLABLE(type, null_type, div, /) \
123  DEF_SAFE_DIV_NULLABLE(type, null_type, safe_div) \
124  DEF_ARITH_NULLABLE_LHS(type, null_type, add, +) \
125  DEF_ARITH_NULLABLE_LHS(type, null_type, sub, -) \
126  DEF_ARITH_NULLABLE_LHS(type, null_type, mul, *) \
127  DEF_ARITH_NULLABLE_LHS(type, null_type, div, /) \
128  DEF_ARITH_NULLABLE_RHS(type, null_type, add, +) \
129  DEF_ARITH_NULLABLE_RHS(type, null_type, sub, -) \
130  DEF_ARITH_NULLABLE_RHS(type, null_type, mul, *) \
131  DEF_ARITH_NULLABLE_RHS(type, null_type, div, /) \
132  DEF_CMP_NULLABLE(type, null_type, eq, ==) \
133  DEF_CMP_NULLABLE(type, null_type, ne, !=) \
134  DEF_CMP_NULLABLE(type, null_type, lt, <) \
135  DEF_CMP_NULLABLE(type, null_type, gt, >) \
136  DEF_CMP_NULLABLE(type, null_type, le, <=) \
137  DEF_CMP_NULLABLE(type, null_type, ge, >=) \
138  DEF_CMP_NULLABLE_LHS(type, null_type, eq, ==) \
139  DEF_CMP_NULLABLE_LHS(type, null_type, ne, !=) \
140  DEF_CMP_NULLABLE_LHS(type, null_type, lt, <) \
141  DEF_CMP_NULLABLE_LHS(type, null_type, gt, >) \
142  DEF_CMP_NULLABLE_LHS(type, null_type, le, <=) \
143  DEF_CMP_NULLABLE_LHS(type, null_type, ge, >=) \
144  DEF_CMP_NULLABLE_RHS(type, null_type, eq, ==) \
145  DEF_CMP_NULLABLE_RHS(type, null_type, ne, !=) \
146  DEF_CMP_NULLABLE_RHS(type, null_type, lt, <) \
147  DEF_CMP_NULLABLE_RHS(type, null_type, gt, >) \
148  DEF_CMP_NULLABLE_RHS(type, null_type, le, <=) \
149  DEF_CMP_NULLABLE_RHS(type, null_type, ge, >=)
150 
151 DEF_BINARY_NULLABLE_ALL_OPS(int8_t, int64_t)
152 DEF_BINARY_NULLABLE_ALL_OPS(int16_t, int64_t)
153 DEF_BINARY_NULLABLE_ALL_OPS(int32_t, int64_t)
154 DEF_BINARY_NULLABLE_ALL_OPS(int64_t, int64_t)
155 DEF_BINARY_NULLABLE_ALL_OPS(float, float)
156 DEF_BINARY_NULLABLE_ALL_OPS(double, double)
157 DEF_ARITH_NULLABLE(int8_t, int64_t, mod, %)
158 DEF_ARITH_NULLABLE(int16_t, int64_t, mod, %)
159 DEF_ARITH_NULLABLE(int32_t, int64_t, mod, %)
160 DEF_ARITH_NULLABLE(int64_t, int64_t, mod, %)
161 DEF_ARITH_NULLABLE_LHS(int8_t, int64_t, mod, %)
162 DEF_ARITH_NULLABLE_LHS(int16_t, int64_t, mod, %)
163 DEF_ARITH_NULLABLE_LHS(int32_t, int64_t, mod, %)
164 DEF_ARITH_NULLABLE_LHS(int64_t, int64_t, mod, %)
165 DEF_ARITH_NULLABLE_RHS(int8_t, int64_t, mod, %)
166 DEF_ARITH_NULLABLE_RHS(int16_t, int64_t, mod, %)
167 DEF_ARITH_NULLABLE_RHS(int32_t, int64_t, mod, %)
168 DEF_ARITH_NULLABLE_RHS(int64_t, int64_t, mod, %)
169 
170 #undef DEF_BINARY_NULLABLE_ALL_OPS
171 #undef DEF_SAFE_DIV_NULLABLE
172 #undef DEF_CMP_NULLABLE_RHS
173 #undef DEF_CMP_NULLABLE_LHS
174 #undef DEF_CMP_NULLABLE
175 #undef DEF_ARITH_NULLABLE_RHS
176 #undef DEF_ARITH_NULLABLE_LHS
177 #undef DEF_ARITH_NULLABLE
178 
179 #define DEF_MAP_STRING_TO_DATUM(value_type, value_name) \
180  extern "C" ALWAYS_INLINE DEVICE value_type map_string_to_datum_##value_name( \
181  const int32_t string_id, \
182  const int64_t translation_map_handle, \
183  const int32_t min_source_id) { \
184  const Datum* translation_map = \
185  reinterpret_cast<const Datum*>(translation_map_handle); \
186  const Datum& out_datum = translation_map[string_id - min_source_id]; \
187  return out_datum.value_name##val; \
188  }
189 
190 DEF_MAP_STRING_TO_DATUM(int8_t, bool)
191 DEF_MAP_STRING_TO_DATUM(int8_t, tinyint)
192 DEF_MAP_STRING_TO_DATUM(int16_t, smallint)
193 DEF_MAP_STRING_TO_DATUM(int32_t, int)
194 DEF_MAP_STRING_TO_DATUM(int64_t, bigint)
195 DEF_MAP_STRING_TO_DATUM(float, float)
196 DEF_MAP_STRING_TO_DATUM(double, double)
197 
198 #undef DEF_MAP_STRING_TO_DATUM
199 
200 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int64_t
201 scale_decimal_up(const int64_t operand,
202  const uint64_t scale,
203  const int64_t operand_null_val,
204  const int64_t result_null_val) {
205  return operand != operand_null_val ? operand * scale : result_null_val;
206 }
207 
208 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int64_t
209 scale_decimal_down_nullable(const int64_t operand,
210  const int64_t scale,
211  const int64_t null_val) {
212  // rounded scale down of a decimal
213  if (operand == null_val) {
214  return null_val;
215  }
216 
217  int64_t tmp = scale >> 1;
218  tmp = operand >= 0 ? operand + tmp : operand - tmp;
219  return tmp / scale;
220 }
221 
222 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int64_t
223 scale_decimal_down_not_nullable(const int64_t operand,
224  const int64_t scale,
225  const int64_t null_val) {
226  int64_t tmp = scale >> 1;
227  tmp = operand >= 0 ? operand + tmp : operand - tmp;
228  return tmp / scale;
229 }
230 
231 // Return floor(dividend / divisor).
232 // Assumes 0 < divisor.
233 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int64_t floor_div_lhs(const int64_t dividend,
234  const int64_t divisor) {
235  return (dividend < 0 ? dividend - (divisor - 1) : dividend) / divisor;
236 }
237 
238 // Return floor(dividend / divisor) or NULL if dividend IS NULL.
239 // Assumes 0 < divisor.
240 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int64_t
241 floor_div_nullable_lhs(const int64_t dividend,
242  const int64_t divisor,
243  const int64_t null_val) {
244  return dividend == null_val ? null_val : floor_div_lhs(dividend, divisor);
245 }
246 
247 #define DEF_UMINUS_NULLABLE(type, null_type) \
248  extern "C" RUNTIME_EXPORT ALWAYS_INLINE type uminus_##type##_nullable( \
249  const type operand, const null_type null_val) { \
250  return operand == null_val ? null_val : -operand; \
251  }
252 
253 DEF_UMINUS_NULLABLE(int8_t, int8_t)
254 DEF_UMINUS_NULLABLE(int16_t, int16_t)
255 DEF_UMINUS_NULLABLE(int32_t, int32_t)
256 DEF_UMINUS_NULLABLE(int64_t, int64_t)
257 DEF_UMINUS_NULLABLE(float, float)
258 DEF_UMINUS_NULLABLE(double, double)
259 
260 #undef DEF_UMINUS_NULLABLE
261 
262 #define DEF_CAST_NULLABLE(from_type, to_type) \
263  extern "C" RUNTIME_EXPORT ALWAYS_INLINE to_type \
264  cast_##from_type##_to_##to_type##_nullable(const from_type operand, \
265  const from_type from_null_val, \
266  const to_type to_null_val) { \
267  return operand == from_null_val ? to_null_val : operand; \
268  }
269 
270 #define DEF_CAST_SCALED_NULLABLE(from_type, to_type) \
271  extern "C" RUNTIME_EXPORT ALWAYS_INLINE to_type \
272  cast_##from_type##_to_##to_type##_scaled_nullable(const from_type operand, \
273  const from_type from_null_val, \
274  const to_type to_null_val, \
275  const to_type multiplier) { \
276  return operand == from_null_val ? to_null_val : multiplier * operand; \
277  }
278 
279 #define DEF_CAST_NULLABLE_BIDIR(type1, type2) \
280  DEF_CAST_NULLABLE(type1, type2) \
281  DEF_CAST_NULLABLE(type2, type1)
282 
283 #define DEF_ROUND_NULLABLE(from_type, to_type) \
284  extern "C" RUNTIME_EXPORT ALWAYS_INLINE to_type \
285  cast_##from_type##_to_##to_type##_nullable(const from_type operand, \
286  const from_type from_null_val, \
287  const to_type to_null_val) { \
288  return operand == from_null_val \
289  ? to_null_val \
290  : static_cast<to_type>(operand + (operand < from_type(0) \
291  ? from_type(-0.5) \
292  : from_type(0.5))); \
293  }
294 
295 DEF_CAST_NULLABLE_BIDIR(int8_t, int16_t)
296 DEF_CAST_NULLABLE_BIDIR(int8_t, int32_t)
297 DEF_CAST_NULLABLE_BIDIR(int8_t, int64_t)
298 DEF_CAST_NULLABLE_BIDIR(int16_t, int32_t)
299 DEF_CAST_NULLABLE_BIDIR(int16_t, int64_t)
300 DEF_CAST_NULLABLE_BIDIR(int32_t, int64_t)
301 DEF_CAST_NULLABLE_BIDIR(float, double)
302 
303 DEF_CAST_NULLABLE(int8_t, float)
304 DEF_CAST_NULLABLE(int16_t, float)
305 DEF_CAST_NULLABLE(int32_t, float)
306 DEF_CAST_NULLABLE(int64_t, float)
307 DEF_CAST_NULLABLE(int8_t, double)
308 DEF_CAST_NULLABLE(int16_t, double)
309 DEF_CAST_NULLABLE(int32_t, double)
310 DEF_CAST_NULLABLE(int64_t, double)
311 
312 DEF_ROUND_NULLABLE(float, int8_t)
313 DEF_ROUND_NULLABLE(float, int16_t)
314 DEF_ROUND_NULLABLE(float, int32_t)
315 DEF_ROUND_NULLABLE(float, int64_t)
316 DEF_ROUND_NULLABLE(double, int8_t)
317 DEF_ROUND_NULLABLE(double, int16_t)
318 DEF_ROUND_NULLABLE(double, int32_t)
319 DEF_ROUND_NULLABLE(double, int64_t)
320 
321 DEF_CAST_NULLABLE(uint8_t, int32_t)
322 DEF_CAST_NULLABLE(uint16_t, int32_t)
323 DEF_CAST_SCALED_NULLABLE(int64_t, float)
324 DEF_CAST_SCALED_NULLABLE(int64_t, double)
325 
326 #undef DEF_ROUND_NULLABLE
327 #undef DEF_CAST_NULLABLE_BIDIR
328 #undef DEF_CAST_SCALED_NULLABLE
329 #undef DEF_CAST_NULLABLE
330 
331 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int8_t logical_not(const int8_t operand,
332  const int8_t null_val) {
333  return operand == null_val ? operand : (operand ? 0 : 1);
334 }
335 
336 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int8_t logical_and(const int8_t lhs,
337  const int8_t rhs,
338  const int8_t null_val) {
339  if (lhs == null_val) {
340  return rhs == 0 ? rhs : null_val;
341  }
342  if (rhs == null_val) {
343  return lhs == 0 ? lhs : null_val;
344  }
345  return (lhs && rhs) ? 1 : 0;
346 }
347 
348 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int8_t logical_or(const int8_t lhs,
349  const int8_t rhs,
350  const int8_t null_val) {
351  if (lhs == null_val) {
352  return rhs == 0 ? null_val : rhs;
353  }
354  if (rhs == null_val) {
355  return lhs == 0 ? null_val : lhs;
356  }
357  return (lhs || rhs) ? 1 : 0;
358 }
359 
360 // aggregator implementations
361 
362 extern "C" RUNTIME_EXPORT ALWAYS_INLINE uint64_t agg_count(uint64_t* agg, const int64_t) {
363  return (*agg)++;
364 }
365 
366 extern "C" RUNTIME_EXPORT ALWAYS_INLINE void
367 agg_count_distinct_bitmap(int64_t* agg, const int64_t val, const int64_t min_val) {
368  const uint64_t bitmap_idx = val - min_val;
369  reinterpret_cast<int8_t*>(*agg)[bitmap_idx >> 3] |= (1 << (bitmap_idx & 7));
370 }
371 
372 #ifdef _MSC_VER
373 #define GPU_RT_STUB NEVER_INLINE
374 #else
375 #define GPU_RT_STUB NEVER_INLINE __attribute__((optnone))
376 #endif
377 
379  const int64_t,
380  const int64_t,
381  const int64_t,
382  const int64_t,
383  const uint64_t,
384  const uint64_t) {}
385 
386 extern "C" RUNTIME_EXPORT NEVER_INLINE void
387 agg_approximate_count_distinct(int64_t* agg, const int64_t key, const uint32_t b) {
388  const uint64_t hash = MurmurHash64A(&key, sizeof(key), 0);
389  const uint32_t index = hash >> (64 - b);
390  const uint8_t rank = get_rank(hash << b, 64 - b);
391  uint8_t* M = reinterpret_cast<uint8_t*>(*agg);
392  M[index] = std::max(M[index], rank);
393 }
394 
396  const int64_t,
397  const uint32_t,
398  const int64_t,
399  const int64_t) {}
400 
401 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int8_t bit_is_set(const int64_t bitset,
402  const int64_t val,
403  const int64_t min_val,
404  const int64_t max_val,
405  const int64_t null_val,
406  const int8_t null_bool_val) {
407  if (val == null_val) {
408  return null_bool_val;
409  }
410  if (val < min_val || val > max_val) {
411  return 0;
412  }
413  if (!bitset) {
414  return 0;
415  }
416  const uint64_t bitmap_idx = val - min_val;
417  return (reinterpret_cast<const int8_t*>(bitset))[bitmap_idx >> 3] &
418  (1 << (bitmap_idx & 7))
419  ? 1
420  : 0;
421 }
422 
423 #define DEF_COMPUTE_LOWER_BOUND(value_type) \
424  extern "C" RUNTIME_EXPORT ALWAYS_INLINE int64_t compute_##value_type##_lower_bound( \
425  const int64_t entry_cnt, \
426  const value_type target_value, \
427  const value_type* col_buf) { \
428  int64_t l = 0; \
429  int64_t h = entry_cnt - 1; \
430  while (l < h) { \
431  int64_t mid = l + (h - l) / 2; \
432  if (target_value < col_buf[mid]) { \
433  h = mid; \
434  } else { \
435  l = mid + 1; \
436  } \
437  } \
438  return l; \
439  }
446 #undef DEF_COMPUTE_LOWER_BOUND
447 
448 #define DEF_COMPUTE_UPPER_BOUND(value_type) \
449  extern "C" RUNTIME_EXPORT ALWAYS_INLINE int64_t compute_##value_type##_upper_bound( \
450  const int64_t entry_cnt, \
451  const value_type target_value, \
452  const value_type* col_buf) { \
453  int64_t l = 0; \
454  int64_t h = entry_cnt - 1; \
455  while (l < h) { \
456  int64_t mid = l + (h - l) / 2; \
457  if (target_value >= col_buf[mid]) { \
458  h = mid; \
459  } else { \
460  l = mid + 1; \
461  } \
462  } \
463  return l; \
464  }
471 #undef DEF_COMPUTE_UPPER_BOUND
472 
473 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int64_t
474 get_valid_buf_start_pos(const int64_t null_start_pos, const int64_t null_end_pos) {
475  return null_start_pos == 0 ? null_end_pos + 1 : 0;
476 }
477 
478 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int64_t
479 get_valid_buf_end_pos(const int64_t num_elems,
480  const int64_t null_start_pos,
481  const int64_t null_end_pos) {
482  return null_end_pos == num_elems ? null_start_pos : num_elems;
483 }
484 
485 #define DEF_COMPUTE_CURRENT_ROW_IDX_IN_FRAME(value_type) \
486  extern "C" RUNTIME_EXPORT ALWAYS_INLINE int64_t \
487  compute_##value_type##_current_row_idx_in_frame( \
488  const int64_t num_elems, \
489  const int64_t cur_row_idx, \
490  const value_type* col_buf, \
491  const int32_t* partition_rowid_buf, \
492  const int64_t* ordered_index_buf, \
493  const value_type null_val, \
494  const int64_t null_start_pos, \
495  const int64_t null_end_pos) { \
496  const auto target_value = col_buf[cur_row_idx]; \
497  if (target_value == null_val) { \
498  for (int64_t target_offset = null_start_pos; target_offset < null_end_pos; \
499  target_offset++) { \
500  const auto candidate_offset = \
501  partition_rowid_buf[ordered_index_buf[target_offset]]; \
502  if (candidate_offset == cur_row_idx) { \
503  return target_offset; \
504  } \
505  } \
506  } \
507  int64_t l = get_valid_buf_start_pos(null_start_pos, null_end_pos); \
508  int64_t h = get_valid_buf_end_pos(num_elems, null_start_pos, null_end_pos); \
509  while (l <= h) { \
510  int64_t mid = l + (h - l) / 2; \
511  auto row_idx_in_frame = partition_rowid_buf[ordered_index_buf[mid]]; \
512  auto cur_value = col_buf[row_idx_in_frame]; \
513  if (cur_value == target_value) { \
514  return mid; \
515  } else if (cur_value < target_value) { \
516  l = mid + 1; \
517  } else { \
518  h = mid - 1; \
519  } \
520  } \
521  return -1; \
522  }
529 #undef DEF_COMPUTE_CURRENT_ROW_IDX_IN_FRAME
530 
531 #define DEF_COMPUTE_LOWER_BOUND_FROM_ORDERED_INDEX(value_type) \
532  extern "C" RUNTIME_EXPORT ALWAYS_INLINE int64_t \
533  compute_##value_type##_lower_bound_from_ordered_index( \
534  const int64_t num_elems, \
535  const value_type target_value, \
536  const value_type* col_buf, \
537  const int32_t* partition_rowid_buf, \
538  const int64_t* ordered_index_buf, \
539  const value_type null_val, \
540  const int64_t null_start_pos, \
541  const int64_t null_end_pos) { \
542  if (target_value == null_val) { \
543  return null_start_pos; \
544  } \
545  int64_t l = get_valid_buf_start_pos(null_start_pos, null_end_pos); \
546  int64_t h = get_valid_buf_end_pos(num_elems, null_start_pos, null_end_pos); \
547  while (l < h) { \
548  int64_t mid = l + (h - l) / 2; \
549  if (target_value <= col_buf[partition_rowid_buf[ordered_index_buf[mid]]]) { \
550  h = mid; \
551  } else { \
552  l = mid + 1; \
553  } \
554  } \
555  return h; \
556  }
563 #undef DEF_COMPUTE_LOWER_BOUND_FROM_ORDERED_INDEX
564 
565 #define DEF_COMPUTE_LOWER_BOUND_FROM_ORDERED_INDEX_FOR_TIMEINTERVAL(value_type) \
566  extern "C" RUNTIME_EXPORT ALWAYS_INLINE int64_t \
567  compute_##value_type##_lower_bound_from_ordered_index_for_timeinterval( \
568  const int64_t num_elems, \
569  const int64_t target_value, \
570  const value_type* col_buf, \
571  const int32_t* partition_rowid_buf, \
572  const int64_t* ordered_index_buf, \
573  const int64_t null_val, \
574  const int64_t null_start_pos, \
575  const int64_t null_end_pos) { \
576  if (target_value == null_val) { \
577  return null_start_pos; \
578  } \
579  int64_t l = get_valid_buf_start_pos(null_start_pos, null_end_pos); \
580  int64_t h = get_valid_buf_end_pos(num_elems, null_start_pos, null_end_pos); \
581  while (l < h) { \
582  int64_t mid = l + (h - l) / 2; \
583  if (target_value <= col_buf[partition_rowid_buf[ordered_index_buf[mid]]]) { \
584  h = mid; \
585  } else { \
586  l = mid + 1; \
587  } \
588  } \
589  return h; \
590  }
594 #undef DEF_COMPUTE_LOWER_BOUND_FROM_ORDERED_INDEX_FOR_TIMEINTERVAL
595 
596 #define DEF_COMPUTE_UPPER_BOUND_FROM_ORDERED_INDEX(value_type) \
597  extern "C" RUNTIME_EXPORT ALWAYS_INLINE int64_t \
598  compute_##value_type##_upper_bound_from_ordered_index( \
599  const int64_t num_elems, \
600  const value_type target_value, \
601  const value_type* col_buf, \
602  const int32_t* partition_rowid_buf, \
603  const int64_t* ordered_index_buf, \
604  const value_type null_val, \
605  const int64_t null_start_pos, \
606  const int64_t null_end_pos) { \
607  if (target_value == null_val) { \
608  return null_end_pos; \
609  } \
610  int64_t l = get_valid_buf_start_pos(null_start_pos, null_end_pos); \
611  int64_t h = get_valid_buf_end_pos(num_elems, null_start_pos, null_end_pos); \
612  while (l < h) { \
613  int64_t mid = l + (h - l) / 2; \
614  if (target_value >= col_buf[partition_rowid_buf[ordered_index_buf[mid]]]) { \
615  l = mid + 1; \
616  } else { \
617  h = mid; \
618  } \
619  } \
620  return l; \
621  }
628 #undef DEF_COMPUTE_UPPER_BOUND_FROM_ORDERED_INDEX
629 
630 #define DEF_COMPUTE_UPPER_BOUND_FROM_ORDERED_INDEX_FOR_TIMEINTERVAL(value_type) \
631  extern "C" RUNTIME_EXPORT ALWAYS_INLINE int64_t \
632  compute_##value_type##_upper_bound_from_ordered_index_for_timeinterval( \
633  const int64_t num_elems, \
634  const int64_t target_value, \
635  const value_type* col_buf, \
636  const int32_t* partition_rowid_buf, \
637  const int64_t* ordered_index_buf, \
638  const int64_t null_val, \
639  const int64_t null_start_pos, \
640  const int64_t null_end_pos) { \
641  if (target_value == null_val) { \
642  return null_end_pos; \
643  } \
644  int64_t l = get_valid_buf_start_pos(null_start_pos, null_end_pos); \
645  int64_t h = get_valid_buf_end_pos(num_elems, null_start_pos, null_end_pos); \
646  while (l < h) { \
647  int64_t mid = l + (h - l) / 2; \
648  if (target_value >= col_buf[partition_rowid_buf[ordered_index_buf[mid]]]) { \
649  l = mid + 1; \
650  } else { \
651  h = mid; \
652  } \
653  } \
654  return l; \
655  }
659 #undef DEF_COMPUTE_UPPER_BOUND_FROM_ORDERED_INDEX_FOR_TIMEINTERVAL
660 
661 #define DEF_GET_VALUE_IN_FRAME(value_type, logical_type) \
662  extern "C" RUNTIME_EXPORT ALWAYS_INLINE logical_type \
663  get_##value_type##_value_##logical_type##_type_in_frame( \
664  const int64_t target_row_idx_in_frame, \
665  const int64_t frame_start_offset, \
666  const int64_t frame_end_offset, \
667  const value_type* col_buf, \
668  const int32_t* partition_rowid_buf, \
669  const int64_t* ordered_index_buf, \
670  const logical_type logical_null_val, \
671  const logical_type col_null_val) { \
672  if (target_row_idx_in_frame < frame_start_offset || \
673  target_row_idx_in_frame >= frame_end_offset) { \
674  return logical_null_val; \
675  } \
676  const auto target_offset = \
677  partition_rowid_buf[ordered_index_buf[target_row_idx_in_frame]]; \
678  logical_type target_val = col_buf[target_offset]; \
679  if (target_val == col_null_val) { \
680  target_val = logical_null_val; \
681  } \
682  return target_val; \
683  }
684 DEF_GET_VALUE_IN_FRAME(int8_t, int8_t)
685 DEF_GET_VALUE_IN_FRAME(int8_t, int16_t)
686 DEF_GET_VALUE_IN_FRAME(int8_t, int32_t)
687 DEF_GET_VALUE_IN_FRAME(int8_t, int64_t)
688 DEF_GET_VALUE_IN_FRAME(int16_t, int16_t)
689 DEF_GET_VALUE_IN_FRAME(int16_t, int32_t)
690 DEF_GET_VALUE_IN_FRAME(int16_t, int64_t)
691 DEF_GET_VALUE_IN_FRAME(int32_t, int32_t)
692 DEF_GET_VALUE_IN_FRAME(int32_t, int64_t)
693 DEF_GET_VALUE_IN_FRAME(int64_t, int64_t)
694 DEF_GET_VALUE_IN_FRAME(float, float)
695 DEF_GET_VALUE_IN_FRAME(double, double)
696 #undef DEF_GET_VALUE_IN_FRAME
697 
698 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int64_t encode_date(int64_t decoded_val,
699  int64_t null_val,
700  int64_t multiplier) {
701  return decoded_val == null_val ? decoded_val : decoded_val * multiplier;
702 }
703 
704 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int64_t
705 compute_row_mode_start_index_sub(int64_t candidate_index,
706  int64_t current_partition_start_offset,
707  int64_t frame_bound) {
708  int64_t index = candidate_index - current_partition_start_offset - frame_bound;
709  return index < 0 ? 0 : index;
710 }
711 
712 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int64_t
713 compute_row_mode_start_index_add(int64_t candidate_index,
714  int64_t current_partition_start_offset,
715  int64_t frame_bound,
716  int64_t num_current_partition_elem) {
717  int64_t index = candidate_index - current_partition_start_offset + frame_bound;
718  return index >= num_current_partition_elem ? num_current_partition_elem : index;
719 }
720 
721 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int64_t
722 compute_row_mode_end_index_sub(int64_t candidate_index,
723  int64_t current_partition_start_offset,
724  int64_t frame_bound) {
725  int64_t index = candidate_index - current_partition_start_offset - frame_bound;
726  return index < 0 ? 0 : index + 1;
727 }
728 
729 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int64_t
730 compute_row_mode_end_index_add(int64_t candidate_index,
731  int64_t current_partition_start_offset,
732  int64_t frame_bound,
733  int64_t num_current_partition_elem) {
734  int64_t index = candidate_index - current_partition_start_offset + frame_bound;
735  return index >= num_current_partition_elem ? num_current_partition_elem : index + 1;
736 }
737 
738 #define DEF_RANGE_MODE_FRAME_LOWER_BOUND(value_type, opname, opsym) \
739  extern "C" RUNTIME_EXPORT ALWAYS_INLINE int64_t \
740  range_mode_##value_type##_##opname##_frame_lower_bound( \
741  const int64_t num_elems, \
742  const value_type target_value, \
743  const value_type* col_buf, \
744  const int32_t* partition_rowid_buf, \
745  const int64_t* ordered_index_buf, \
746  const int64_t frame_bound_val, \
747  const value_type null_val, \
748  const int64_t null_start_pos, \
749  const int64_t null_end_pos) { \
750  if (target_value == null_val) { \
751  return null_start_pos; \
752  } else { \
753  value_type new_val = target_value opsym frame_bound_val; \
754  return compute_##value_type##_lower_bound_from_ordered_index(num_elems, \
755  new_val, \
756  col_buf, \
757  partition_rowid_buf, \
758  ordered_index_buf, \
759  null_val, \
760  null_start_pos, \
761  null_end_pos); \
762  } \
763  }
764 DEF_RANGE_MODE_FRAME_LOWER_BOUND(int8_t, add, +)
765 DEF_RANGE_MODE_FRAME_LOWER_BOUND(int8_t, sub, -)
766 DEF_RANGE_MODE_FRAME_LOWER_BOUND(int16_t, add, +)
767 DEF_RANGE_MODE_FRAME_LOWER_BOUND(int16_t, sub, -)
768 DEF_RANGE_MODE_FRAME_LOWER_BOUND(int32_t, add, +)
769 DEF_RANGE_MODE_FRAME_LOWER_BOUND(int32_t, sub, -)
770 DEF_RANGE_MODE_FRAME_LOWER_BOUND(int64_t, add, +)
771 DEF_RANGE_MODE_FRAME_LOWER_BOUND(int64_t, sub, -)
774 DEF_RANGE_MODE_FRAME_LOWER_BOUND(double, add, +)
775 DEF_RANGE_MODE_FRAME_LOWER_BOUND(double, sub, -)
776 #undef DEF_RANGE_MODE_FRAME_LOWER_BOUND
777 
778 #define DEF_RANGE_MODE_FRAME_UPPER_BOUND(value_type, opname, opsym) \
779  extern "C" RUNTIME_EXPORT ALWAYS_INLINE int64_t \
780  range_mode_##value_type##_##opname##_frame_upper_bound( \
781  const int64_t num_elems, \
782  const value_type target_value, \
783  const value_type* col_buf, \
784  const int32_t* partition_rowid_buf, \
785  const int64_t* ordered_index_buf, \
786  const int64_t frame_bound_val, \
787  const value_type null_val, \
788  const int64_t null_start_pos, \
789  const int64_t null_end_pos) { \
790  if (target_value == null_val) { \
791  return null_end_pos; \
792  } else { \
793  value_type new_val = target_value opsym frame_bound_val; \
794  return compute_##value_type##_upper_bound_from_ordered_index(num_elems, \
795  new_val, \
796  col_buf, \
797  partition_rowid_buf, \
798  ordered_index_buf, \
799  null_val, \
800  null_start_pos, \
801  null_end_pos); \
802  } \
803  }
804 DEF_RANGE_MODE_FRAME_UPPER_BOUND(int8_t, add, +)
805 DEF_RANGE_MODE_FRAME_UPPER_BOUND(int8_t, sub, -)
806 DEF_RANGE_MODE_FRAME_UPPER_BOUND(int16_t, add, +)
807 DEF_RANGE_MODE_FRAME_UPPER_BOUND(int16_t, sub, -)
808 DEF_RANGE_MODE_FRAME_UPPER_BOUND(int32_t, add, +)
809 DEF_RANGE_MODE_FRAME_UPPER_BOUND(int32_t, sub, -)
810 DEF_RANGE_MODE_FRAME_UPPER_BOUND(int64_t, add, +)
811 DEF_RANGE_MODE_FRAME_UPPER_BOUND(int64_t, sub, -)
814 DEF_RANGE_MODE_FRAME_UPPER_BOUND(double, add, +)
815 DEF_RANGE_MODE_FRAME_UPPER_BOUND(double, sub, -)
816 #undef DEF_RANGE_MODE_FRAME_UPPER_BOUND
817 
819  int64_t** aggregation_trees,
820  size_t partition_idx) {
821  return aggregation_trees[partition_idx];
822 }
823 
825  int64_t** aggregation_trees,
826  size_t partition_idx) {
827  double** casted_aggregation_trees = reinterpret_cast<double**>(aggregation_trees);
828  return casted_aggregation_trees[partition_idx];
829 }
830 
832 get_integer_derived_aggregation_tree(int64_t** aggregation_trees, size_t partition_idx) {
833  SumAndCountPair<int64_t>** casted_aggregation_trees =
834  reinterpret_cast<SumAndCountPair<int64_t>**>(aggregation_trees);
835  return casted_aggregation_trees[partition_idx];
836 }
837 
839 get_double_derived_aggregation_tree(int64_t** aggregation_trees, size_t partition_idx) {
840  SumAndCountPair<double>** casted_aggregation_trees =
841  reinterpret_cast<SumAndCountPair<double>**>(aggregation_trees);
842  return casted_aggregation_trees[partition_idx];
843 }
844 
845 extern "C" RUNTIME_EXPORT ALWAYS_INLINE size_t
846 getStartOffsetForSegmentTreeTraversal(size_t level, size_t tree_fanout) {
847  size_t offset = 0;
848  for (size_t i = 0; i < level; i++) {
849  offset += pow(tree_fanout, i);
850  }
851  return offset;
852 }
853 
854 #define DEF_COMPUTE_FRAME_MIN(agg_value_type) \
855  extern "C" RUNTIME_EXPORT ALWAYS_INLINE agg_value_type \
856  compute_##agg_value_type##_frame_min( \
857  agg_value_type* aggregation_tree_for_partition, \
858  size_t query_range_start_idx, \
859  size_t query_range_end_idx, \
860  size_t leaf_level, \
861  size_t tree_fanout, \
862  agg_value_type invalid_val, \
863  agg_value_type null_val) { \
864  size_t leaf_start_idx = \
865  getStartOffsetForSegmentTreeTraversal(leaf_level, tree_fanout); \
866  size_t begin = leaf_start_idx + query_range_start_idx; \
867  size_t end = leaf_start_idx + query_range_end_idx; \
868  agg_value_type res = std::numeric_limits<agg_value_type>::max(); \
869  bool all_nulls = true; \
870  for (size_t level = leaf_level; level >= 0; level--) { \
871  size_t parentBegin = begin / tree_fanout; \
872  size_t parentEnd = (end - 1) / tree_fanout; \
873  if (parentBegin == parentEnd) { \
874  for (size_t pos = begin; pos < end; pos++) { \
875  if (aggregation_tree_for_partition[pos] != null_val) { \
876  all_nulls = false; \
877  res = std::min(res, aggregation_tree_for_partition[pos]); \
878  } \
879  } \
880  return all_nulls ? null_val : res; \
881  } else if (parentBegin > parentEnd) { \
882  return null_val; \
883  } \
884  size_t group_begin = (parentBegin * tree_fanout) + 1; \
885  if (begin != group_begin) { \
886  size_t limit = (parentBegin * tree_fanout) + tree_fanout + 1; \
887  for (size_t pos = begin; pos < limit; pos++) { \
888  if (aggregation_tree_for_partition[pos] != null_val) { \
889  all_nulls = false; \
890  res = std::min(res, aggregation_tree_for_partition[pos]); \
891  } \
892  } \
893  parentBegin++; \
894  } \
895  size_t group_end = (parentEnd * tree_fanout) + 1; \
896  if (end != group_end) { \
897  for (size_t pos = group_end; pos < end; pos++) { \
898  if (aggregation_tree_for_partition[pos] != null_val) { \
899  all_nulls = false; \
900  res = std::min(res, aggregation_tree_for_partition[pos]); \
901  } \
902  } \
903  } \
904  begin = parentBegin; \
905  end = parentEnd; \
906  } \
907  return invalid_val; \
908  }
909 DEF_COMPUTE_FRAME_MIN(int64_t)
910 DEF_COMPUTE_FRAME_MIN(double)
911 #undef DEF_COMPUTE_FRAME_MIN
912 
913 #define DEF_COMPUTE_FRAME_MAX(agg_value_type) \
914  extern "C" RUNTIME_EXPORT ALWAYS_INLINE agg_value_type \
915  compute_##agg_value_type##_frame_max( \
916  agg_value_type* aggregation_tree_for_partition, \
917  size_t query_range_start_idx, \
918  size_t query_range_end_idx, \
919  size_t leaf_level, \
920  size_t tree_fanout, \
921  agg_value_type invalid_val, \
922  agg_value_type null_val) { \
923  size_t leaf_start_idx = \
924  getStartOffsetForSegmentTreeTraversal(leaf_level, tree_fanout); \
925  size_t begin = leaf_start_idx + query_range_start_idx; \
926  size_t end = leaf_start_idx + query_range_end_idx; \
927  agg_value_type res = std::numeric_limits<agg_value_type>::lowest(); \
928  bool all_nulls = true; \
929  for (size_t level = leaf_level; level >= 0; level--) { \
930  size_t parentBegin = begin / tree_fanout; \
931  size_t parentEnd = (end - 1) / tree_fanout; \
932  if (parentBegin == parentEnd) { \
933  for (size_t pos = begin; pos < end; pos++) { \
934  if (aggregation_tree_for_partition[pos] != null_val) { \
935  all_nulls = false; \
936  res = std::max(res, aggregation_tree_for_partition[pos]); \
937  } \
938  } \
939  return all_nulls ? null_val : res; \
940  } else if (parentBegin > parentEnd) { \
941  return null_val; \
942  } \
943  size_t group_begin = (parentBegin * tree_fanout) + 1; \
944  if (begin != group_begin) { \
945  size_t limit = (parentBegin * tree_fanout) + tree_fanout + 1; \
946  for (size_t pos = begin; pos < limit; pos++) { \
947  if (aggregation_tree_for_partition[pos] != null_val) { \
948  all_nulls = false; \
949  res = std::max(res, aggregation_tree_for_partition[pos]); \
950  } \
951  } \
952  parentBegin++; \
953  } \
954  size_t group_end = (parentEnd * tree_fanout) + 1; \
955  if (end != group_end) { \
956  for (size_t pos = group_end; pos < end; pos++) { \
957  if (aggregation_tree_for_partition[pos] != null_val) { \
958  all_nulls = false; \
959  res = std::max(res, aggregation_tree_for_partition[pos]); \
960  } \
961  } \
962  } \
963  begin = parentBegin; \
964  end = parentEnd; \
965  } \
966  return invalid_val; \
967  }
968 DEF_COMPUTE_FRAME_MAX(int64_t)
969 DEF_COMPUTE_FRAME_MAX(double)
970 #undef DEF_COMPUTE_FRAME_MAX
971 
972 #define DEF_COMPUTE_FRAME_SUM(agg_value_type) \
973  extern "C" RUNTIME_EXPORT ALWAYS_INLINE agg_value_type \
974  compute_##agg_value_type##_frame_sum( \
975  agg_value_type* aggregation_tree_for_partition, \
976  size_t query_range_start_idx, \
977  size_t query_range_end_idx, \
978  size_t leaf_level, \
979  size_t tree_fanout, \
980  agg_value_type invalid_val, \
981  agg_value_type null_val) { \
982  size_t leaf_start_idx = \
983  getStartOffsetForSegmentTreeTraversal(leaf_level, tree_fanout); \
984  size_t begin = leaf_start_idx + query_range_start_idx; \
985  size_t end = leaf_start_idx + query_range_end_idx; \
986  agg_value_type res = 0; \
987  bool all_nulls = true; \
988  for (size_t level = leaf_level; level >= 0; level--) { \
989  size_t parentBegin = begin / tree_fanout; \
990  size_t parentEnd = (end - 1) / tree_fanout; \
991  if (parentBegin == parentEnd) { \
992  for (size_t pos = begin; pos < end; pos++) { \
993  if (aggregation_tree_for_partition[pos] != null_val) { \
994  all_nulls = false; \
995  res += aggregation_tree_for_partition[pos]; \
996  } \
997  } \
998  return all_nulls ? null_val : res; \
999  } else if (parentBegin > parentEnd) { \
1000  return null_val; \
1001  } \
1002  size_t group_begin = (parentBegin * tree_fanout) + 1; \
1003  if (begin != group_begin) { \
1004  size_t limit = (parentBegin * tree_fanout) + tree_fanout + 1; \
1005  for (size_t pos = begin; pos < limit; pos++) { \
1006  if (aggregation_tree_for_partition[pos] != null_val) { \
1007  all_nulls = false; \
1008  res += aggregation_tree_for_partition[pos]; \
1009  } \
1010  } \
1011  parentBegin++; \
1012  } else if (parentBegin > parentEnd) { \
1013  return null_val; \
1014  } \
1015  size_t group_end = (parentEnd * tree_fanout) + 1; \
1016  if (end != group_end) { \
1017  for (size_t pos = group_end; pos < end; pos++) { \
1018  if (aggregation_tree_for_partition[pos] != null_val) { \
1019  all_nulls = false; \
1020  res += aggregation_tree_for_partition[pos]; \
1021  } \
1022  } \
1023  } \
1024  begin = parentBegin; \
1025  end = parentEnd; \
1026  } \
1027  return invalid_val; \
1028  }
1029 DEF_COMPUTE_FRAME_SUM(int64_t)
1030 DEF_COMPUTE_FRAME_SUM(double)
1031 #undef DEF_COMPUTE_FRAME_SUM
1032 
1033 #define DEF_COMPUTE_SUM_AND_COUNT_FRAME_AGGREGATES(agg_value_type) \
1034  extern "C" RUNTIME_EXPORT ALWAYS_INLINE void \
1035  compute_##agg_value_type##_sum_and_count_frame_aggregates( \
1036  SumAndCountPair<agg_value_type>* aggregation_tree_for_partition, \
1037  SumAndCountPair<agg_value_type>& res, \
1038  size_t query_range_start_idx, \
1039  size_t query_range_end_idx, \
1040  size_t leaf_level, \
1041  size_t tree_fanout, \
1042  agg_value_type invalid_val, \
1043  agg_value_type null_val) { \
1044  size_t leaf_start_idx = \
1045  getStartOffsetForSegmentTreeTraversal(leaf_level, tree_fanout); \
1046  size_t begin = leaf_start_idx + query_range_start_idx; \
1047  size_t end = leaf_start_idx + query_range_end_idx; \
1048  bool all_nulls = true; \
1049  for (size_t level = leaf_level; level >= 0; level--) { \
1050  size_t parentBegin = begin / tree_fanout; \
1051  size_t parentEnd = (end - 1) / tree_fanout; \
1052  if (parentBegin == parentEnd) { \
1053  for (size_t pos = begin; pos < end; pos++) { \
1054  if (aggregation_tree_for_partition[pos].sum != null_val) { \
1055  all_nulls = false; \
1056  res.sum += aggregation_tree_for_partition[pos].sum; \
1057  res.count += aggregation_tree_for_partition[pos].count; \
1058  } \
1059  } \
1060  if (all_nulls) { \
1061  res.sum = null_val; \
1062  res.count = 0; \
1063  } \
1064  return; \
1065  } else if (parentBegin > parentEnd) { \
1066  res.sum = null_val; \
1067  res.count = 0; \
1068  return; \
1069  } \
1070  size_t group_begin = (parentBegin * tree_fanout) + 1; \
1071  if (begin != group_begin) { \
1072  size_t limit = (parentBegin * tree_fanout) + tree_fanout + 1; \
1073  for (size_t pos = begin; pos < limit; pos++) { \
1074  if (aggregation_tree_for_partition[pos].sum != null_val) { \
1075  all_nulls = false; \
1076  res.sum += aggregation_tree_for_partition[pos].sum; \
1077  res.count += aggregation_tree_for_partition[pos].count; \
1078  } \
1079  } \
1080  parentBegin++; \
1081  } \
1082  size_t group_end = (parentEnd * tree_fanout) + 1; \
1083  if (end != group_end) { \
1084  for (size_t pos = group_end; pos < end; pos++) { \
1085  if (aggregation_tree_for_partition[pos].sum != null_val) { \
1086  all_nulls = false; \
1087  res.sum += aggregation_tree_for_partition[pos].sum; \
1088  res.count += aggregation_tree_for_partition[pos].count; \
1089  } \
1090  } \
1091  } \
1092  begin = parentBegin; \
1093  end = parentEnd; \
1094  } \
1095  res.sum = invalid_val; \
1096  res.count = 0; \
1097  return; \
1098  }
1101 #undef DEF_COMPUTE_SUM_AND_COUNT_FRAME_AGGREGATES
1102 
1103 #define DEF_SEARCH_AGGREGATION_TREE(agg_value_type) \
1104  extern "C" RUNTIME_EXPORT ALWAYS_INLINE agg_value_type \
1105  search_##agg_value_type##_aggregation_tree( \
1106  agg_value_type* aggregated_tree_for_partition, \
1107  size_t query_range_start_idx, \
1108  size_t query_range_end_idx, \
1109  size_t leaf_level, \
1110  size_t tree_fanout, \
1111  bool decimal_type, \
1112  size_t scale, \
1113  agg_value_type invalid_val, \
1114  agg_value_type null_val, \
1115  int32_t agg_type) { \
1116  if (!aggregated_tree_for_partition || query_range_start_idx > query_range_end_idx) { \
1117  return null_val; \
1118  } \
1119  switch (agg_type) { \
1120  case 1: { \
1121  return compute_##agg_value_type##_frame_min(aggregated_tree_for_partition, \
1122  query_range_start_idx, \
1123  query_range_end_idx, \
1124  leaf_level, \
1125  tree_fanout, \
1126  invalid_val, \
1127  null_val); \
1128  } \
1129  case 2: { \
1130  return compute_##agg_value_type##_frame_max(aggregated_tree_for_partition, \
1131  query_range_start_idx, \
1132  query_range_end_idx, \
1133  leaf_level, \
1134  tree_fanout, \
1135  invalid_val, \
1136  null_val); \
1137  } \
1138  default: { \
1139  return compute_##agg_value_type##_frame_sum(aggregated_tree_for_partition, \
1140  query_range_start_idx, \
1141  query_range_end_idx, \
1142  leaf_level, \
1143  tree_fanout, \
1144  invalid_val, \
1145  null_val); \
1146  } \
1147  } \
1148  }
1149 
1152 #undef DEF_SEARCH_AGGREGATION_TREE
1153 
1154 #define DEF_SEARCH_DERIVED_AGGREGATION_TREE(agg_value_type) \
1155  extern "C" RUNTIME_EXPORT ALWAYS_INLINE double \
1156  search_##agg_value_type##_derived_aggregation_tree( \
1157  SumAndCountPair<agg_value_type>* aggregated_tree_for_partition, \
1158  size_t query_range_start_idx, \
1159  size_t query_range_end_idx, \
1160  size_t leaf_level, \
1161  size_t tree_fanout, \
1162  bool decimal_type, \
1163  size_t scale, \
1164  agg_value_type invalid_val, \
1165  agg_value_type null_val, \
1166  int32_t agg_type) { \
1167  if (!aggregated_tree_for_partition || query_range_start_idx > query_range_end_idx) { \
1168  return null_val; \
1169  } \
1170  SumAndCountPair<agg_value_type> res{0, 0}; \
1171  compute_##agg_value_type##_sum_and_count_frame_aggregates( \
1172  aggregated_tree_for_partition, \
1173  res, \
1174  query_range_start_idx, \
1175  query_range_end_idx, \
1176  leaf_level, \
1177  tree_fanout, \
1178  invalid_val, \
1179  null_val); \
1180  if (res.sum == null_val) { \
1181  return null_val; \
1182  } else if (res.count > 0) { \
1183  if (decimal_type) { \
1184  return (static_cast<double>(res.sum) / pow(10, scale)) / res.count; \
1185  } \
1186  return (static_cast<double>(res.sum)) / res.count; \
1187  } else { \
1188  return invalid_val; \
1189  } \
1190  }
1191 
1194 #undef DEF_SEARCH_DERIVED_AGGREGATION_TREE
1195 
1196 #define DEF_HANDLE_NULL_FOR_WINDOW_FRAMING_AGG(agg_type, null_type) \
1197  extern "C" RUNTIME_EXPORT ALWAYS_INLINE agg_type \
1198  handle_null_val_##agg_type##_##null_type##_window_framing_agg( \
1199  agg_type res, null_type agg_null_val, agg_type input_col_null_val) { \
1200  if (res == agg_null_val) { \
1201  return input_col_null_val; \
1202  } \
1203  return res; \
1204  }
1208 #undef DEF_HANDLE_NULL_FOR_WINDOW_FRAMING_AGG
1209 
1210 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int64_t agg_sum(int64_t* agg, const int64_t val) {
1211  const auto old = *agg;
1212  *agg += val;
1213  return old;
1214 }
1215 
1216 extern "C" RUNTIME_EXPORT ALWAYS_INLINE void agg_max(int64_t* agg, const int64_t val) {
1217  *agg = std::max(*agg, val);
1218 }
1219 
1220 extern "C" RUNTIME_EXPORT ALWAYS_INLINE void agg_min(int64_t* agg, const int64_t val) {
1221  *agg = std::min(*agg, val);
1222 }
1223 
1224 extern "C" RUNTIME_EXPORT ALWAYS_INLINE void agg_id(int64_t* agg, const int64_t val) {
1225  *agg = val;
1226 }
1227 
1228 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int8_t* agg_id_varlen(int8_t* varlen_buffer,
1229  const int64_t offset,
1230  const int8_t* value,
1231  const int64_t size_bytes) {
1232  for (auto i = 0; i < size_bytes; i++) {
1233  varlen_buffer[offset + i] = value[i];
1234  }
1235  return &varlen_buffer[offset];
1236 }
1237 
1238 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int32_t
1239 checked_single_agg_id(int64_t* agg, const int64_t val, const int64_t null_val) {
1240  if (val == null_val) {
1241  return 0;
1242  }
1243 
1244  if (*agg == val) {
1245  return 0;
1246  } else if (*agg == null_val) {
1247  *agg = val;
1248  return 0;
1249  } else {
1250  // see Execute::ERR_SINGLE_VALUE_FOUND_MULTIPLE_VALUES
1251  return 15;
1252  }
1253 }
1254 
1256  int64_t* agg,
1257  const int64_t val,
1258  const int64_t min_val,
1259  const int64_t skip_val) {
1260  if (val != skip_val) {
1261  agg_count_distinct_bitmap(agg, val, min_val);
1262  }
1263 }
1264 
1266  const int64_t,
1267  const int64_t,
1268  const int64_t,
1269  const int64_t,
1270  const int64_t,
1271  const uint64_t,
1272  const uint64_t) {}
1273 
1274 extern "C" RUNTIME_EXPORT ALWAYS_INLINE uint32_t agg_count_int32(uint32_t* agg,
1275  const int32_t) {
1276  return (*agg)++;
1277 }
1278 
1279 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int32_t agg_sum_int32(int32_t* agg,
1280  const int32_t val) {
1281  const auto old = *agg;
1282  *agg += val;
1283  return old;
1284 }
1285 
1286 #define DEF_AGG_MAX_INT(n) \
1287  extern "C" RUNTIME_EXPORT ALWAYS_INLINE void agg_max_int##n(int##n##_t* agg, \
1288  const int##n##_t val) { \
1289  *agg = std::max(*agg, val); \
1290  }
1291 
1292 DEF_AGG_MAX_INT(32)
1293 DEF_AGG_MAX_INT(16)
1294 DEF_AGG_MAX_INT(8)
1295 #undef DEF_AGG_MAX_INT
1296 
1297 #define DEF_AGG_MIN_INT(n) \
1298  extern "C" RUNTIME_EXPORT ALWAYS_INLINE void agg_min_int##n(int##n##_t* agg, \
1299  const int##n##_t val) { \
1300  *agg = std::min(*agg, val); \
1301  }
1302 
1303 DEF_AGG_MIN_INT(32)
1304 DEF_AGG_MIN_INT(16)
1305 DEF_AGG_MIN_INT(8)
1306 #undef DEF_AGG_MIN_INT
1307 
1308 #define DEF_AGG_ID_INT(n) \
1309  extern "C" RUNTIME_EXPORT ALWAYS_INLINE void agg_id_int##n(int##n##_t* agg, \
1310  const int##n##_t val) { \
1311  *agg = val; \
1312  }
1313 
1314 #define DEF_CHECKED_SINGLE_AGG_ID_INT(n) \
1315  extern "C" RUNTIME_EXPORT ALWAYS_INLINE int32_t checked_single_agg_id_int##n( \
1316  int##n##_t* agg, const int##n##_t val, const int##n##_t null_val) { \
1317  if (val == null_val) { \
1318  return 0; \
1319  } \
1320  if (*agg == val) { \
1321  return 0; \
1322  } else if (*agg == null_val) { \
1323  *agg = val; \
1324  return 0; \
1325  } else { \
1326  /* see Execute::ERR_SINGLE_VALUE_FOUND_MULTIPLE_VALUES*/ \
1327  return 15; \
1328  } \
1329  }
1330 
1331 DEF_AGG_ID_INT(32)
1332 DEF_AGG_ID_INT(16)
1333 DEF_AGG_ID_INT(8)
1334 
1338 
1339 #undef DEF_AGG_ID_INT
1340 #undef DEF_CHECKED_SINGLE_AGG_ID_INT
1341 
1342 #define DEF_WRITE_PROJECTION_INT(n) \
1343  extern "C" RUNTIME_EXPORT ALWAYS_INLINE void write_projection_int##n( \
1344  int8_t* slot_ptr, const int##n##_t val, const int64_t init_val) { \
1345  if (val != init_val) { \
1346  *reinterpret_cast<int##n##_t*>(slot_ptr) = val; \
1347  } \
1348  }
1349 
1352 #undef DEF_WRITE_PROJECTION_INT
1353 
1354 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int64_t agg_sum_skip_val(int64_t* agg,
1355  const int64_t val,
1356  const int64_t skip_val) {
1357  const auto old = *agg;
1358  if (val != skip_val) {
1359  if (old != skip_val) {
1360  return agg_sum(agg, val);
1361  } else {
1362  *agg = val;
1363  }
1364  }
1365  return old;
1366 }
1367 
1368 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int32_t
1369 agg_sum_int32_skip_val(int32_t* agg, const int32_t val, const int32_t skip_val) {
1370  const auto old = *agg;
1371  if (val != skip_val) {
1372  if (old != skip_val) {
1373  return agg_sum_int32(agg, val);
1374  } else {
1375  *agg = val;
1376  }
1377  }
1378  return old;
1379 }
1380 
1381 extern "C" RUNTIME_EXPORT ALWAYS_INLINE uint64_t
1382 agg_count_skip_val(uint64_t* agg, const int64_t val, const int64_t skip_val) {
1383  if (val != skip_val) {
1384  return agg_count(agg, val);
1385  }
1386  return *agg;
1387 }
1388 
1389 extern "C" RUNTIME_EXPORT ALWAYS_INLINE uint32_t
1390 agg_count_int32_skip_val(uint32_t* agg, const int32_t val, const int32_t skip_val) {
1391  if (val != skip_val) {
1392  return agg_count_int32(agg, val);
1393  }
1394  return *agg;
1395 }
1396 
1397 #define DEF_SKIP_AGG_ADD(base_agg_func) \
1398  extern "C" RUNTIME_EXPORT ALWAYS_INLINE void base_agg_func##_skip_val( \
1399  DATA_T* agg, const DATA_T val, const DATA_T skip_val) { \
1400  if (val != skip_val) { \
1401  base_agg_func(agg, val); \
1402  } \
1403  }
1404 
1405 #define DEF_SKIP_AGG(base_agg_func) \
1406  extern "C" RUNTIME_EXPORT ALWAYS_INLINE void base_agg_func##_skip_val( \
1407  DATA_T* agg, const DATA_T val, const DATA_T skip_val) { \
1408  if (val != skip_val) { \
1409  const DATA_T old_agg = *agg; \
1410  if (old_agg != skip_val) { \
1411  base_agg_func(agg, val); \
1412  } else { \
1413  *agg = val; \
1414  } \
1415  } \
1416  }
1417 
1418 #define DATA_T int64_t
1421 #undef DATA_T
1422 
1423 #define DATA_T int32_t
1426 #undef DATA_T
1427 
1428 #define DATA_T int16_t
1431 #undef DATA_T
1432 
1433 #define DATA_T int8_t
1436 #undef DATA_T
1437 
1438 #undef DEF_SKIP_AGG_ADD
1439 #undef DEF_SKIP_AGG
1440 
1441 // TODO(alex): fix signature
1442 
1443 extern "C" RUNTIME_EXPORT ALWAYS_INLINE uint64_t agg_count_double(uint64_t* agg,
1444  const double val) {
1445  return (*agg)++;
1446 }
1447 
1448 extern "C" RUNTIME_EXPORT ALWAYS_INLINE void agg_sum_double(int64_t* agg,
1449  const double val) {
1450  const auto r = *reinterpret_cast<const double*>(agg) + val;
1451  *agg = *reinterpret_cast<const int64_t*>(may_alias_ptr(&r));
1452 }
1453 
1454 extern "C" RUNTIME_EXPORT ALWAYS_INLINE void agg_max_double(int64_t* agg,
1455  const double val) {
1456  const auto r = std::max(*reinterpret_cast<const double*>(agg), val);
1457  *agg = *(reinterpret_cast<const int64_t*>(may_alias_ptr(&r)));
1458 }
1459 
1460 extern "C" RUNTIME_EXPORT ALWAYS_INLINE void agg_min_double(int64_t* agg,
1461  const double val) {
1462  const auto r = std::min(*reinterpret_cast<const double*>(agg), val);
1463  *agg = *(reinterpret_cast<const int64_t*>(may_alias_ptr(&r)));
1464 }
1465 
1466 extern "C" RUNTIME_EXPORT ALWAYS_INLINE void agg_id_double(int64_t* agg,
1467  const double val) {
1468  *agg = *(reinterpret_cast<const int64_t*>(may_alias_ptr(&val)));
1469 }
1470 
1471 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int32_t
1472 checked_single_agg_id_double(int64_t* agg, const double val, const double null_val) {
1473  if (val == null_val) {
1474  return 0;
1475  }
1476 
1477  if (*agg == *(reinterpret_cast<const int64_t*>(may_alias_ptr(&val)))) {
1478  return 0;
1479  } else if (*agg == *(reinterpret_cast<const int64_t*>(may_alias_ptr(&null_val)))) {
1480  *agg = *(reinterpret_cast<const int64_t*>(may_alias_ptr(&val)));
1481  return 0;
1482  } else {
1483  // see Execute::ERR_SINGLE_VALUE_FOUND_MULTIPLE_VALUES
1484  return 15;
1485  }
1486 }
1487 
1488 extern "C" RUNTIME_EXPORT ALWAYS_INLINE uint32_t agg_count_float(uint32_t* agg,
1489  const float val) {
1490  return (*agg)++;
1491 }
1492 
1493 extern "C" RUNTIME_EXPORT ALWAYS_INLINE void agg_sum_float(int32_t* agg,
1494  const float val) {
1495  const auto r = *reinterpret_cast<const float*>(agg) + val;
1496  *agg = *reinterpret_cast<const int32_t*>(may_alias_ptr(&r));
1497 }
1498 
1499 extern "C" RUNTIME_EXPORT ALWAYS_INLINE void agg_max_float(int32_t* agg,
1500  const float val) {
1501  const auto r = std::max(*reinterpret_cast<const float*>(agg), val);
1502  *agg = *(reinterpret_cast<const int32_t*>(may_alias_ptr(&r)));
1503 }
1504 
1505 extern "C" RUNTIME_EXPORT ALWAYS_INLINE void agg_min_float(int32_t* agg,
1506  const float val) {
1507  const auto r = std::min(*reinterpret_cast<const float*>(agg), val);
1508  *agg = *(reinterpret_cast<const int32_t*>(may_alias_ptr(&r)));
1509 }
1510 
1511 extern "C" RUNTIME_EXPORT ALWAYS_INLINE void agg_id_float(int32_t* agg, const float val) {
1512  *agg = *(reinterpret_cast<const int32_t*>(may_alias_ptr(&val)));
1513 }
1514 
1515 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int32_t
1516 checked_single_agg_id_float(int32_t* agg, const float val, const float null_val) {
1517  if (val == null_val) {
1518  return 0;
1519  }
1520 
1521  if (*agg == *(reinterpret_cast<const int32_t*>(may_alias_ptr(&val)))) {
1522  return 0;
1523  } else if (*agg == *(reinterpret_cast<const int32_t*>(may_alias_ptr(&null_val)))) {
1524  *agg = *(reinterpret_cast<const int32_t*>(may_alias_ptr(&val)));
1525  return 0;
1526  } else {
1527  // see Execute::ERR_SINGLE_VALUE_FOUND_MULTIPLE_VALUES
1528  return 15;
1529  }
1530 }
1531 
1532 extern "C" RUNTIME_EXPORT ALWAYS_INLINE uint64_t
1533 agg_count_double_skip_val(uint64_t* agg, const double val, const double skip_val) {
1534  if (val != skip_val) {
1535  return agg_count_double(agg, val);
1536  }
1537  return *agg;
1538 }
1539 
1540 extern "C" RUNTIME_EXPORT ALWAYS_INLINE uint32_t
1541 agg_count_float_skip_val(uint32_t* agg, const float val, const float skip_val) {
1542  if (val != skip_val) {
1543  return agg_count_float(agg, val);
1544  }
1545  return *agg;
1546 }
1547 
1548 #define DEF_SKIP_AGG_ADD(base_agg_func) \
1549  extern "C" RUNTIME_EXPORT ALWAYS_INLINE void base_agg_func##_skip_val( \
1550  ADDR_T* agg, const DATA_T val, const DATA_T skip_val) { \
1551  if (val != skip_val) { \
1552  base_agg_func(agg, val); \
1553  } \
1554  }
1555 
1556 #define DEF_SKIP_AGG(base_agg_func) \
1557  extern "C" RUNTIME_EXPORT ALWAYS_INLINE void base_agg_func##_skip_val( \
1558  ADDR_T* agg, const DATA_T val, const DATA_T skip_val) { \
1559  if (val != skip_val) { \
1560  const ADDR_T old_agg = *agg; \
1561  if (old_agg != *reinterpret_cast<const ADDR_T*>(may_alias_ptr(&skip_val))) { \
1562  base_agg_func(agg, val); \
1563  } else { \
1564  *agg = *reinterpret_cast<const ADDR_T*>(may_alias_ptr(&val)); \
1565  } \
1566  } \
1567  }
1568 
1569 #define DATA_T double
1570 #define ADDR_T int64_t
1574 #undef ADDR_T
1575 #undef DATA_T
1576 
1577 #define DATA_T float
1578 #define ADDR_T int32_t
1582 #undef ADDR_T
1583 #undef DATA_T
1584 
1585 #undef DEF_SKIP_AGG_ADD
1586 #undef DEF_SKIP_AGG
1587 
1588 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int64_t decimal_floor(const int64_t x,
1589  const int64_t scale) {
1590  if (x >= 0) {
1591  return x / scale * scale;
1592  }
1593  if (!(x % scale)) {
1594  return x;
1595  }
1596  return x / scale * scale - scale;
1597 }
1598 
1599 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int64_t decimal_ceil(const int64_t x,
1600  const int64_t scale) {
1601  return decimal_floor(x, scale) + (x % scale ? scale : 0);
1602 }
1603 
1604 // Shared memory aggregators. Should never be called,
1605 // real implementations are in cuda_mapd_rt.cu.
1606 #define DEF_SHARED_AGG_RET_STUBS(base_agg_func) \
1607  extern "C" GPU_RT_STUB uint64_t base_agg_func##_shared(uint64_t* agg, \
1608  const int64_t val) { \
1609  return 0; \
1610  } \
1611  \
1612  extern "C" GPU_RT_STUB uint64_t base_agg_func##_skip_val_shared( \
1613  uint64_t* agg, const int64_t val, const int64_t skip_val) { \
1614  return 0; \
1615  } \
1616  extern "C" GPU_RT_STUB uint32_t base_agg_func##_int32_shared(uint32_t* agg, \
1617  const int32_t val) { \
1618  return 0; \
1619  } \
1620  \
1621  extern "C" GPU_RT_STUB uint32_t base_agg_func##_int32_skip_val_shared( \
1622  uint32_t* agg, const int32_t val, const int32_t skip_val) { \
1623  return 0; \
1624  } \
1625  \
1626  extern "C" GPU_RT_STUB uint64_t base_agg_func##_double_shared(uint64_t* agg, \
1627  const double val) { \
1628  return 0; \
1629  } \
1630  \
1631  extern "C" GPU_RT_STUB uint64_t base_agg_func##_double_skip_val_shared( \
1632  uint64_t* agg, const double val, const double skip_val) { \
1633  return 0; \
1634  } \
1635  extern "C" GPU_RT_STUB uint32_t base_agg_func##_float_shared(uint32_t* agg, \
1636  const float val) { \
1637  return 0; \
1638  } \
1639  \
1640  extern "C" GPU_RT_STUB uint32_t base_agg_func##_float_skip_val_shared( \
1641  uint32_t* agg, const float val, const float skip_val) { \
1642  return 0; \
1643  }
1644 
1645 #define DEF_SHARED_AGG_STUBS(base_agg_func) \
1646  extern "C" GPU_RT_STUB void base_agg_func##_shared(int64_t* agg, const int64_t val) {} \
1647  \
1648  extern "C" GPU_RT_STUB void base_agg_func##_skip_val_shared( \
1649  int64_t* agg, const int64_t val, const int64_t skip_val) {} \
1650  extern "C" GPU_RT_STUB void base_agg_func##_int32_shared(int32_t* agg, \
1651  const int32_t val) {} \
1652  extern "C" GPU_RT_STUB void base_agg_func##_int16_shared(int16_t* agg, \
1653  const int16_t val) {} \
1654  extern "C" GPU_RT_STUB void base_agg_func##_int8_shared(int8_t* agg, \
1655  const int8_t val) {} \
1656  \
1657  extern "C" GPU_RT_STUB void base_agg_func##_int32_skip_val_shared( \
1658  int32_t* agg, const int32_t val, const int32_t skip_val) {} \
1659  \
1660  extern "C" GPU_RT_STUB void base_agg_func##_double_shared(int64_t* agg, \
1661  const double val) {} \
1662  \
1663  extern "C" GPU_RT_STUB void base_agg_func##_double_skip_val_shared( \
1664  int64_t* agg, const double val, const double skip_val) {} \
1665  extern "C" GPU_RT_STUB void base_agg_func##_float_shared(int32_t* agg, \
1666  const float val) {} \
1667  \
1668  extern "C" GPU_RT_STUB void base_agg_func##_float_skip_val_shared( \
1669  int32_t* agg, const float val, const float skip_val) {}
1670 
1675 
1676 extern "C" GPU_RT_STUB int8_t* agg_id_varlen_shared(int8_t* varlen_buffer,
1677  const int64_t offset,
1678  const int8_t* value,
1679  const int64_t size_bytes) {
1680  return nullptr;
1681 }
1682 
1683 extern "C" GPU_RT_STUB int32_t checked_single_agg_id_shared(int64_t* agg,
1684  const int64_t val,
1685  const int64_t null_val) {
1686  return 0;
1687 }
1688 
1689 extern "C" GPU_RT_STUB int32_t
1691  const int32_t val,
1692  const int32_t null_val) {
1693  return 0;
1694 }
1695 extern "C" GPU_RT_STUB int32_t
1697  const int16_t val,
1698  const int16_t null_val) {
1699  return 0;
1700 }
1701 extern "C" GPU_RT_STUB int32_t checked_single_agg_id_int8_shared(int8_t* agg,
1702  const int8_t val,
1703  const int8_t null_val) {
1704  return 0;
1705 }
1706 
1707 extern "C" GPU_RT_STUB int32_t
1709  const double val,
1710  const double null_val) {
1711  return 0;
1712 }
1713 
1714 extern "C" GPU_RT_STUB int32_t checked_single_agg_id_float_shared(int32_t* agg,
1715  const float val,
1716  const float null_val) {
1717  return 0;
1718 }
1719 
1720 extern "C" GPU_RT_STUB void agg_max_int16_skip_val_shared(int16_t* agg,
1721  const int16_t val,
1722  const int16_t skip_val) {}
1723 
1724 extern "C" GPU_RT_STUB void agg_max_int8_skip_val_shared(int8_t* agg,
1725  const int8_t val,
1726  const int8_t skip_val) {}
1727 
1728 extern "C" GPU_RT_STUB void agg_min_int16_skip_val_shared(int16_t* agg,
1729  const int16_t val,
1730  const int16_t skip_val) {}
1731 
1732 extern "C" GPU_RT_STUB void agg_min_int8_skip_val_shared(int8_t* agg,
1733  const int8_t val,
1734  const int8_t skip_val) {}
1735 
1736 extern "C" GPU_RT_STUB void agg_id_double_shared_slow(int64_t* agg, const double* val) {}
1737 
1738 extern "C" GPU_RT_STUB int64_t agg_sum_shared(int64_t* agg, const int64_t val) {
1739  return 0;
1740 }
1741 
1742 extern "C" GPU_RT_STUB int64_t agg_sum_skip_val_shared(int64_t* agg,
1743  const int64_t val,
1744  const int64_t skip_val) {
1745  return 0;
1746 }
1747 extern "C" GPU_RT_STUB int32_t agg_sum_int32_shared(int32_t* agg, const int32_t val) {
1748  return 0;
1749 }
1750 
1751 extern "C" GPU_RT_STUB int32_t agg_sum_int32_skip_val_shared(int32_t* agg,
1752  const int32_t val,
1753  const int32_t skip_val) {
1754  return 0;
1755 }
1756 
1757 extern "C" GPU_RT_STUB void agg_sum_double_shared(int64_t* agg, const double val) {}
1758 
1759 extern "C" GPU_RT_STUB void agg_sum_double_skip_val_shared(int64_t* agg,
1760  const double val,
1761  const double skip_val) {}
1762 extern "C" GPU_RT_STUB void agg_sum_float_shared(int32_t* agg, const float val) {}
1763 
1764 extern "C" GPU_RT_STUB void agg_sum_float_skip_val_shared(int32_t* agg,
1765  const float val,
1766  const float skip_val) {}
1767 
1768 extern "C" GPU_RT_STUB void force_sync() {}
1769 
1770 extern "C" GPU_RT_STUB void sync_warp() {}
1771 extern "C" GPU_RT_STUB void sync_warp_protected(int64_t thread_pos, int64_t row_count) {}
1772 extern "C" GPU_RT_STUB void sync_threadblock() {}
1773 
1774 extern "C" GPU_RT_STUB void write_back_non_grouped_agg(int64_t* input_buffer,
1775  int64_t* output_buffer,
1776  const int32_t num_agg_cols){};
1777 // x64 stride functions
1778 
1779 extern "C" RUNTIME_EXPORT NEVER_INLINE int32_t pos_start_impl(int32_t* error_code) {
1780  int32_t row_index_resume{0};
1781  if (error_code) {
1782  row_index_resume = error_code[0];
1783  error_code[0] = 0;
1784  }
1785  return row_index_resume;
1786 }
1787 
1789  return pos_start_impl(nullptr);
1790 }
1791 
1793  return 1;
1794 }
1795 
1796 extern "C" GPU_RT_STUB int8_t thread_warp_idx(const int8_t warp_sz) {
1797  return 0;
1798 }
1799 
1800 extern "C" GPU_RT_STUB int64_t get_thread_index() {
1801  return 0;
1802 }
1803 
1805  return nullptr;
1806 }
1807 
1808 extern "C" GPU_RT_STUB int64_t get_block_index() {
1809  return 0;
1810 }
1811 
1812 #undef GPU_RT_STUB
1813 
1814 extern "C" RUNTIME_EXPORT ALWAYS_INLINE void record_error_code(const int32_t err_code,
1815  int32_t* error_codes) {
1816  // NB: never override persistent error codes (with code greater than zero).
1817  // On GPU, a projection query with a limit can run out of slots without it
1818  // being an actual error if the limit has been hit. If a persistent error
1819  // (division by zero, for example) occurs before running out of slots, we
1820  // have to avoid overriding it, because there's a risk that the query would
1821  // go through if we override with a potentially benign out-of-slots code.
1822  if (err_code && error_codes[pos_start_impl(nullptr)] <= 0) {
1823  error_codes[pos_start_impl(nullptr)] = err_code;
1824  }
1825 }
1826 
1827 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int32_t get_error_code(int32_t* error_codes) {
1828  return error_codes[pos_start_impl(nullptr)];
1829 }
1830 
1831 // group by helpers
1832 
1834  const int64_t* groups_buffer,
1835  const int32_t groups_buffer_size) {
1836  return groups_buffer;
1837 }
1838 
1840  int64_t* src,
1841  const int32_t sz) {
1842 #ifndef _WIN32
1843  // the body is not really needed, just make sure the call is not optimized away
1844  assert(dest);
1845 #endif
1846 }
1847 
1848 extern "C" RUNTIME_EXPORT int64_t* init_shared_mem(const int64_t* global_groups_buffer,
1849  const int32_t groups_buffer_size) {
1850  return nullptr;
1851 }
1852 
1854  int64_t* groups_buffer,
1855  const int64_t* init_vals,
1856  const uint32_t groups_buffer_entry_count,
1857  const uint32_t key_qw_count,
1858  const uint32_t agg_col_count,
1859  const bool keyless,
1860  const int8_t warp_size) {
1861 #ifndef _WIN32
1862  // the body is not really needed, just make sure the call is not optimized away
1863  assert(groups_buffer);
1864 #endif
1865 }
1866 
1868  int64_t* groups_buffer,
1869  const int64_t* init_vals,
1870  const uint32_t groups_buffer_entry_count,
1871  const uint32_t key_qw_count,
1872  const uint32_t agg_col_count,
1873  const bool keyless,
1874  const bool blocks_share_memory,
1875  const int32_t frag_idx) {
1876 #ifndef _WIN32
1877  // the body is not really needed, just make sure the call is not optimized away
1878  assert(groups_buffer);
1879 #endif
1880 }
1881 
1883  int64_t* groups_buffer,
1884  const int64_t* init_vals,
1885  const uint32_t groups_buffer_entry_count,
1886  const uint32_t key_qw_count,
1887  const uint32_t agg_col_count,
1888  const bool keyless,
1889  const int8_t warp_size) {
1890 #ifndef _WIN32
1891  // the body is not really needed, just make sure the call is not optimized away
1892  assert(groups_buffer);
1893 #endif
1894 }
1895 
1896 template <typename T>
1897 ALWAYS_INLINE int64_t* get_matching_group_value(int64_t* groups_buffer,
1898  const uint32_t h,
1899  const T* key,
1900  const uint32_t key_count,
1901  const uint32_t row_size_quad) {
1902  auto off = h * row_size_quad;
1903  auto row_ptr = reinterpret_cast<T*>(groups_buffer + off);
1904  if (*row_ptr == get_empty_key<T>()) {
1905  memcpy(row_ptr, key, key_count * sizeof(T));
1906  auto row_ptr_i8 = reinterpret_cast<int8_t*>(row_ptr + key_count);
1907  return reinterpret_cast<int64_t*>(align_to_int64(row_ptr_i8));
1908  }
1909  if (memcmp(row_ptr, key, key_count * sizeof(T)) == 0) {
1910  auto row_ptr_i8 = reinterpret_cast<int8_t*>(row_ptr + key_count);
1911  return reinterpret_cast<int64_t*>(align_to_int64(row_ptr_i8));
1912  }
1913  return nullptr;
1914 }
1915 
1917  int64_t* groups_buffer,
1918  const uint32_t h,
1919  const int64_t* key,
1920  const uint32_t key_count,
1921  const uint32_t key_width,
1922  const uint32_t row_size_quad) {
1923  switch (key_width) {
1924  case 4:
1925  return get_matching_group_value(groups_buffer,
1926  h,
1927  reinterpret_cast<const int32_t*>(key),
1928  key_count,
1929  row_size_quad);
1930  case 8:
1931  return get_matching_group_value(groups_buffer, h, key, key_count, row_size_quad);
1932  default:;
1933  }
1934  return nullptr;
1935 }
1936 
1937 template <typename T>
1939  const uint32_t entry_count,
1940  const uint32_t h,
1941  const T* key,
1942  const uint32_t key_count) {
1943  auto off = h;
1944  auto key_buffer = reinterpret_cast<T*>(groups_buffer);
1945  if (key_buffer[off] == get_empty_key<T>()) {
1946  for (size_t i = 0; i < key_count; ++i) {
1947  key_buffer[off] = key[i];
1948  off += entry_count;
1949  }
1950  return h;
1951  }
1952  off = h;
1953  for (size_t i = 0; i < key_count; ++i) {
1954  if (key_buffer[off] != key[i]) {
1955  return -1;
1956  }
1957  off += entry_count;
1958  }
1959  return h;
1960 }
1961 
1962 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int32_t
1964  const uint32_t entry_count,
1965  const uint32_t h,
1966  const int64_t* key,
1967  const uint32_t key_count,
1968  const uint32_t key_width) {
1969  switch (key_width) {
1970  case 4:
1971  return get_matching_group_value_columnar_slot(groups_buffer,
1972  entry_count,
1973  h,
1974  reinterpret_cast<const int32_t*>(key),
1975  key_count);
1976  case 8:
1978  groups_buffer, entry_count, h, key, key_count);
1979  default:
1980  return -1;
1981  }
1982  return -1;
1983 }
1984 
1986  int64_t* groups_buffer,
1987  const uint32_t h,
1988  const int64_t* key,
1989  const uint32_t key_qw_count,
1990  const size_t entry_count) {
1991  auto off = h;
1992  if (groups_buffer[off] == EMPTY_KEY_64) {
1993  for (size_t i = 0; i < key_qw_count; ++i) {
1994  groups_buffer[off] = key[i];
1995  off += entry_count;
1996  }
1997  return &groups_buffer[off];
1998  }
1999  off = h;
2000  for (size_t i = 0; i < key_qw_count; ++i) {
2001  if (groups_buffer[off] != key[i]) {
2002  return nullptr;
2003  }
2004  off += entry_count;
2005  }
2006  return &groups_buffer[off];
2007 }
2008 
2009 /*
2010  * For a particular hashed_index, returns the row-wise offset
2011  * to the first matching agg column in memory.
2012  * It also checks the corresponding group column, and initialize all
2013  * available keys if they are not empty (it is assumed all group columns are
2014  * 64-bit wide).
2015  *
2016  * Memory layout:
2017  *
2018  * | prepended group columns (64-bit each) | agg columns |
2019  */
2021  int64_t* groups_buffer,
2022  const uint32_t hashed_index,
2023  const int64_t* key,
2024  const uint32_t key_count,
2025  const uint32_t row_size_quad) {
2026  uint32_t off = hashed_index * row_size_quad;
2027  if (groups_buffer[off] == EMPTY_KEY_64) {
2028  for (uint32_t i = 0; i < key_count; ++i) {
2029  groups_buffer[off + i] = key[i];
2030  }
2031  }
2032  return groups_buffer + off + key_count;
2033 }
2034 
2041 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int64_t*
2043  const uint32_t hashed_index,
2044  const uint32_t row_size_quad) {
2045  return groups_buffer + row_size_quad * hashed_index;
2046 }
2047 
2048 /*
2049  * For a particular hashed_index, find and initialize (if necessary) all the group
2050  * columns corresponding to a key. It is assumed that all group columns are 64-bit wide.
2051  */
2052 extern "C" RUNTIME_EXPORT ALWAYS_INLINE void
2054  const uint32_t hashed_index,
2055  const int64_t* key,
2056  const uint32_t key_count,
2057  const uint32_t entry_count) {
2058  if (groups_buffer[hashed_index] == EMPTY_KEY_64) {
2059  for (uint32_t i = 0; i < key_count; i++) {
2060  groups_buffer[i * entry_count + hashed_index] = key[i];
2061  }
2062  }
2063 }
2064 
2065 #include "GeoOpsRuntime.cpp"
2066 #include "GroupByRuntime.cpp"
2068 
2070  int64_t* groups_buffer,
2071  const int64_t key,
2072  const int64_t min_key,
2073  const int64_t /* bucket */,
2074  const uint32_t row_size_quad) {
2075  return groups_buffer + row_size_quad * (key - min_key);
2076 }
2077 
2079  int64_t* groups_buffer,
2080  const int64_t key,
2081  const int64_t min_key,
2082  const int64_t /* bucket */,
2083  const uint32_t row_size_quad,
2084  const uint8_t thread_warp_idx,
2085  const uint8_t warp_size) {
2086  return groups_buffer + row_size_quad * (warp_size * (key - min_key) + thread_warp_idx);
2087 }
2088 
2090  const uint64_t str_and_len) {
2091  return reinterpret_cast<int8_t*>(str_and_len & 0xffffffffffff);
2092 }
2093 
2094 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int32_t
2095 extract_str_len(const uint64_t str_and_len) {
2096  return static_cast<int64_t>(str_and_len) >> 48;
2097 }
2098 
2100  const uint64_t str_and_len) {
2101  return extract_str_ptr(str_and_len);
2102 }
2103 
2104 extern "C" RUNTIME_EXPORT NEVER_INLINE int32_t
2105 extract_str_len_noinline(const uint64_t str_and_len) {
2106  return extract_str_len(str_and_len);
2107 }
2108 
2109 extern "C" RUNTIME_EXPORT ALWAYS_INLINE uint64_t string_pack(const int8_t* ptr,
2110  const int32_t len) {
2111  return (reinterpret_cast<const uint64_t>(ptr) & 0xffffffffffff) |
2112  (static_cast<const uint64_t>(len) << 48);
2113 }
2114 
2115 #ifdef __clang__
2116 #include "../Utils/StringLike.cpp"
2117 #endif
2118 
2119 #ifndef __CUDACC__
2120 #include "TopKRuntime.cpp"
2121 #endif
2122 
2123 extern "C" RUNTIME_EXPORT ALWAYS_INLINE DEVICE int32_t
2124 char_length(const char* str, const int32_t str_len) {
2125  return str_len;
2126 }
2127 
2128 extern "C" RUNTIME_EXPORT ALWAYS_INLINE DEVICE int32_t
2129 char_length_nullable(const char* str, const int32_t str_len, const int32_t int_null) {
2130  if (!str) {
2131  return int_null;
2132  }
2133  return str_len;
2134 }
2135 
2136 extern "C" RUNTIME_EXPORT ALWAYS_INLINE DEVICE int32_t
2137 key_for_string_encoded(const int32_t str_id) {
2138  return str_id;
2139 }
2140 
2141 extern "C" ALWAYS_INLINE DEVICE int32_t
2142 map_string_dict_id(const int32_t string_id,
2143  const int64_t translation_map_handle,
2144  const int32_t min_source_id) {
2145  const int32_t* translation_map =
2146  reinterpret_cast<const int32_t*>(translation_map_handle);
2147  return translation_map[string_id - min_source_id];
2148 }
2149 
2151  const double proportion,
2152  const int64_t row_offset) {
2153  const int64_t threshold = 4294967296 * proportion;
2154  return (row_offset * 2654435761) % 4294967296 < threshold;
2155 }
2156 
2157 extern "C" RUNTIME_EXPORT ALWAYS_INLINE DEVICE int32_t
2158 width_bucket(const double target_value,
2159  const double lower_bound,
2160  const double upper_bound,
2161  const double scale_factor,
2162  const int32_t partition_count) {
2163  if (target_value < lower_bound) {
2164  return 0;
2165  } else if (target_value >= upper_bound) {
2166  return partition_count + 1;
2167  }
2168  return ((target_value - lower_bound) * scale_factor) + 1;
2169 }
2170 
2171 extern "C" RUNTIME_EXPORT ALWAYS_INLINE DEVICE int32_t
2172 width_bucket_reversed(const double target_value,
2173  const double lower_bound,
2174  const double upper_bound,
2175  const double scale_factor,
2176  const int32_t partition_count) {
2177  if (target_value > lower_bound) {
2178  return 0;
2179  } else if (target_value <= upper_bound) {
2180  return partition_count + 1;
2181  }
2182  return ((lower_bound - target_value) * scale_factor) + 1;
2183 }
2184 
2185 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int32_t
2186 width_bucket_nullable(const double target_value,
2187  const double lower_bound,
2188  const double upper_bound,
2189  const double scale_factor,
2190  const int32_t partition_count,
2191  const double null_val) {
2192  if (target_value == null_val) {
2193  return INT32_MIN;
2194  }
2195  return width_bucket(
2196  target_value, lower_bound, upper_bound, scale_factor, partition_count);
2197 }
2198 
2199 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int32_t
2200 width_bucket_reversed_nullable(const double target_value,
2201  const double lower_bound,
2202  const double upper_bound,
2203  const double scale_factor,
2204  const int32_t partition_count,
2205  const double null_val) {
2206  if (target_value == null_val) {
2207  return INT32_MIN;
2208  }
2209  return width_bucket_reversed(
2210  target_value, lower_bound, upper_bound, scale_factor, partition_count);
2211 }
2212 
2213 // width_bucket with no out-of-bound check version which can be called
2214 // if we can assure the input target_value expr always resides in the valid range
2215 // (so we can also avoid null checking)
2216 extern "C" RUNTIME_EXPORT ALWAYS_INLINE DEVICE int32_t
2217 width_bucket_no_oob_check(const double target_value,
2218  const double lower_bound,
2219  const double scale_factor) {
2220  return ((target_value - lower_bound) * scale_factor) + 1;
2221 }
2222 
2223 extern "C" RUNTIME_EXPORT ALWAYS_INLINE DEVICE int32_t
2224 width_bucket_reversed_no_oob_check(const double target_value,
2225  const double lower_bound,
2226  const double scale_factor) {
2227  return ((lower_bound - target_value) * scale_factor) + 1;
2228 }
2229 
2230 extern "C" RUNTIME_EXPORT ALWAYS_INLINE DEVICE int32_t
2231 width_bucket_expr(const double target_value,
2232  const bool reversed,
2233  const double lower_bound,
2234  const double upper_bound,
2235  const int32_t partition_count) {
2236  if (reversed) {
2237  return width_bucket_reversed(target_value,
2238  lower_bound,
2239  upper_bound,
2240  partition_count / (lower_bound - upper_bound),
2241  partition_count);
2242  }
2243  return width_bucket(target_value,
2244  lower_bound,
2245  upper_bound,
2246  partition_count / (upper_bound - lower_bound),
2247  partition_count);
2248 }
2249 
2250 extern "C" RUNTIME_EXPORT ALWAYS_INLINE DEVICE int32_t
2251 width_bucket_expr_nullable(const double target_value,
2252  const bool reversed,
2253  const double lower_bound,
2254  const double upper_bound,
2255  const int32_t partition_count,
2256  const double null_val) {
2257  if (target_value == null_val) {
2258  return INT32_MIN;
2259  }
2260  return width_bucket_expr(
2261  target_value, reversed, lower_bound, upper_bound, partition_count);
2262 }
2263 
2264 extern "C" RUNTIME_EXPORT ALWAYS_INLINE DEVICE int32_t
2265 width_bucket_expr_no_oob_check(const double target_value,
2266  const bool reversed,
2267  const double lower_bound,
2268  const double upper_bound,
2269  const int32_t partition_count) {
2270  if (reversed) {
2272  target_value, lower_bound, partition_count / (lower_bound - upper_bound));
2273  }
2275  target_value, lower_bound, partition_count / (upper_bound - lower_bound));
2276 }
2277 
2278 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int64_t
2279 row_number_window_func(const int64_t output_buff, const int64_t pos) {
2280  return reinterpret_cast<const int64_t*>(output_buff)[pos];
2281 }
2282 
2284  const int64_t output_buff,
2285  const int64_t pos) {
2286  return reinterpret_cast<const double*>(output_buff)[pos];
2287 }
2288 
2289 extern "C" RUNTIME_EXPORT ALWAYS_INLINE double load_double(const int64_t* agg) {
2290  return *reinterpret_cast<const double*>(may_alias_ptr(agg));
2291 }
2292 
2293 extern "C" RUNTIME_EXPORT ALWAYS_INLINE float load_float(const int32_t* agg) {
2294  return *reinterpret_cast<const float*>(may_alias_ptr(agg));
2295 }
2296 
2297 extern "C" RUNTIME_EXPORT ALWAYS_INLINE double load_avg_int(const int64_t* sum,
2298  const int64_t* count,
2299  const double null_val) {
2300  return *count != 0 ? static_cast<double>(*sum) / *count : null_val;
2301 }
2302 
2303 extern "C" RUNTIME_EXPORT ALWAYS_INLINE double load_avg_decimal(const int64_t* sum,
2304  const int64_t* count,
2305  const double null_val,
2306  const uint32_t scale) {
2307  return *count != 0 ? (static_cast<double>(*sum) / pow(10, scale)) / *count : null_val;
2308 }
2309 
2310 extern "C" RUNTIME_EXPORT ALWAYS_INLINE double load_avg_double(const int64_t* agg,
2311  const int64_t* count,
2312  const double null_val) {
2313  return *count != 0 ? *reinterpret_cast<const double*>(may_alias_ptr(agg)) / *count
2314  : null_val;
2315 }
2316 
2317 extern "C" RUNTIME_EXPORT ALWAYS_INLINE double load_avg_float(const int32_t* agg,
2318  const int32_t* count,
2319  const double null_val) {
2320  return *count != 0 ? *reinterpret_cast<const float*>(may_alias_ptr(agg)) / *count
2321  : null_val;
2322 }
2323 
2325  uint8_t* bitmap,
2326  const uint32_t bitmap_bytes,
2327  const uint8_t* key_bytes,
2328  const uint32_t key_len) {
2329  const uint32_t bit_pos = MurmurHash3(key_bytes, key_len, 0) % (bitmap_bytes * 8);
2330  const uint32_t word_idx = bit_pos / 32;
2331  const uint32_t bit_idx = bit_pos % 32;
2332  reinterpret_cast<uint32_t*>(bitmap)[word_idx] |= 1 << bit_idx;
2333 }
2334 
2336  const int8_t** col_buffers,
2337  const int8_t* literals,
2338  const int64_t* num_rows,
2339  const uint64_t* frag_row_offsets,
2340  const int32_t* max_matched,
2341  const int64_t* init_agg_value,
2342  int64_t** out,
2343  uint32_t frag_idx,
2344  const int64_t* join_hash_tables,
2345  int32_t* error_code,
2346  int32_t* total_matched) {
2347 #ifndef _WIN32
2348  assert(col_buffers || literals || num_rows || frag_row_offsets || max_matched ||
2349  init_agg_value || out || frag_idx || error_code || join_hash_tables ||
2350  total_matched);
2351 #endif
2352 }
2353 
2355  const int8_t*** col_buffers,
2356  const uint64_t* num_fragments,
2357  const int8_t* literals,
2358  const int64_t* num_rows,
2359  const uint64_t* frag_row_offsets,
2360  const int32_t* max_matched,
2361  int32_t* total_matched,
2362  const int64_t* init_agg_value,
2363  int64_t** out,
2364  int32_t* error_code,
2365  const uint32_t* num_tables_ptr,
2366  const int64_t* join_hash_tables) {
2367  for (uint32_t i = 0; i < *num_fragments; ++i) {
2368  query_stub_hoisted_literals(col_buffers ? col_buffers[i] : nullptr,
2369  literals,
2370  &num_rows[i * (*num_tables_ptr)],
2371  &frag_row_offsets[i * (*num_tables_ptr)],
2372  max_matched,
2373  init_agg_value,
2374  out,
2375  i,
2376  join_hash_tables,
2377  total_matched,
2378  error_code);
2379  }
2380 }
2381 
2382 extern "C" RUNTIME_EXPORT NEVER_INLINE void query_stub(const int8_t** col_buffers,
2383  const int64_t* num_rows,
2384  const uint64_t* frag_row_offsets,
2385  const int32_t* max_matched,
2386  const int64_t* init_agg_value,
2387  int64_t** out,
2388  uint32_t frag_idx,
2389  const int64_t* join_hash_tables,
2390  int32_t* error_code,
2391  int32_t* total_matched) {
2392 #ifndef _WIN32
2393  assert(col_buffers || num_rows || frag_row_offsets || max_matched || init_agg_value ||
2394  out || frag_idx || error_code || join_hash_tables || total_matched);
2395 #endif
2396 }
2397 
2398 extern "C" RUNTIME_EXPORT void multifrag_query(const int8_t*** col_buffers,
2399  const uint64_t* num_fragments,
2400  const int64_t* num_rows,
2401  const uint64_t* frag_row_offsets,
2402  const int32_t* max_matched,
2403  int32_t* total_matched,
2404  const int64_t* init_agg_value,
2405  int64_t** out,
2406  int32_t* error_code,
2407  const uint32_t* num_tables_ptr,
2408  const int64_t* join_hash_tables) {
2409  for (uint32_t i = 0; i < *num_fragments; ++i) {
2410  query_stub(col_buffers ? col_buffers[i] : nullptr,
2411  &num_rows[i * (*num_tables_ptr)],
2412  &frag_row_offsets[i * (*num_tables_ptr)],
2413  max_matched,
2414  init_agg_value,
2415  out,
2416  i,
2417  join_hash_tables,
2418  total_matched,
2419  error_code);
2420  }
2421 }
2422 
2424  if (check_interrupt_init(static_cast<unsigned>(INT_CHECK))) {
2425  return true;
2426  }
2427  return false;
2428 }
2429 
2430 extern "C" RUNTIME_EXPORT bool check_interrupt_init(unsigned command) {
2431  static std::atomic_bool runtime_interrupt_flag{false};
2432 
2433  if (command == static_cast<unsigned>(INT_CHECK)) {
2434  if (runtime_interrupt_flag.load()) {
2435  return true;
2436  }
2437  return false;
2438  }
2439  if (command == static_cast<unsigned>(INT_ABORT)) {
2440  runtime_interrupt_flag.store(true);
2441  return false;
2442  }
2443  if (command == static_cast<unsigned>(INT_RESET)) {
2444  runtime_interrupt_flag.store(false);
2445  return false;
2446  }
2447  return false;
2448 }
DEVICE auto upper_bound(ARGS &&...args)
Definition: gpu_enabled.h:123
__device__ void sync_warp_protected(int64_t thread_pos, int64_t row_count)
RUNTIME_EXPORT void agg_min_int8(int8_t *agg, const int8_t val)
RUNTIME_EXPORT ALWAYS_INLINE int64_t encode_date(int64_t decoded_val, int64_t null_val, int64_t multiplier)
RUNTIME_EXPORT ALWAYS_INLINE void agg_count_distinct_bitmap_skip_val(int64_t *agg, const int64_t val, const int64_t min_val, const int64_t skip_val)
RUNTIME_EXPORT ALWAYS_INLINE int64_t compute_row_mode_start_index_sub(int64_t candidate_index, int64_t current_partition_start_offset, int64_t frame_bound)
#define DEF_UMINUS_NULLABLE(type, null_type)
GPU_RT_STUB int32_t checked_single_agg_id_int32_shared(int32_t *agg, const int32_t val, const int32_t null_val)
__device__ void agg_count_distinct_bitmap_skip_val_gpu(int64_t *agg, const int64_t val, const int64_t min_val, const int64_t skip_val, const int64_t base_dev_addr, const int64_t base_host_addr, const uint64_t sub_bitmap_count, const uint64_t bitmap_bytes)
#define DEF_CHECKED_SINGLE_AGG_ID_INT(n)
RUNTIME_EXPORT ALWAYS_INLINE int8_t * agg_id_varlen(int8_t *varlen_buffer, const int64_t offset, const int8_t *value, const int64_t size_bytes)
RUNTIME_EXPORT void agg_max_int32(int32_t *agg, const int32_t val)
RUNTIME_EXPORT ALWAYS_INLINE int64_t scale_decimal_down_not_nullable(const int64_t operand, const int64_t scale, const int64_t null_val)
__device__ int64_t * get_matching_group_value_columnar(int64_t *groups_buffer, const uint32_t h, const int64_t *key, const uint32_t key_qw_count, const size_t entry_count)
#define EMPTY_KEY_64
RUNTIME_EXPORT ALWAYS_INLINE int64_t * get_matching_group_value_perfect_hash(int64_t *groups_buffer, const uint32_t hashed_index, const int64_t *key, const uint32_t key_count, const uint32_t row_size_quad)
__device__ void write_back_nop(int64_t *dest, int64_t *src, const int32_t sz)
Definition: cuda_mapd_rt.cu:49
RUNTIME_EXPORT void agg_min_int16(int16_t *agg, const int16_t val)
#define GPU_RT_STUB
__device__ void agg_sum_float_skip_val_shared(int32_t *agg, const float val, const float skip_val)
RUNTIME_EXPORT ALWAYS_INLINE int64_t floor_div_nullable_lhs(const int64_t dividend, const int64_t divisor, const int64_t null_val)
RUNTIME_EXPORT void agg_max_int16(int16_t *agg, const int16_t val)
#define DEF_CAST_NULLABLE_BIDIR(type1, type2)
RUNTIME_EXPORT ALWAYS_INLINE int64_t * get_group_value_fast_keyless(int64_t *groups_buffer, const int64_t key, const int64_t min_key, const int64_t, const uint32_t row_size_quad)
RUNTIME_EXPORT ALWAYS_INLINE uint32_t agg_count_int32_skip_val(uint32_t *agg, const int32_t val, const int32_t skip_val)
RUNTIME_EXPORT NEVER_INLINE void agg_approximate_count_distinct(int64_t *agg, const int64_t key, const uint32_t b)
FORCE_INLINE uint8_t get_rank(uint64_t x, uint32_t b)
__device__ int8_t thread_warp_idx(const int8_t warp_sz)
Definition: cuda_mapd_rt.cu:39
__global__ void init_group_by_buffer_gpu(int64_t *groups_buffer, const int64_t *init_vals, const uint32_t groups_buffer_entry_count, const uint32_t key_count, const uint32_t key_width, const uint32_t row_size_quad, const bool keyless, const int8_t warp_size)
RUNTIME_EXPORT ALWAYS_INLINE int32_t extract_str_len(const uint64_t str_and_len)
#define DEF_CAST_NULLABLE(from_type, to_type)
RUNTIME_EXPORT ALWAYS_INLINE void agg_max_double(int64_t *agg, const double val)
RUNTIME_EXPORT ALWAYS_INLINE void agg_max(int64_t *agg, const int64_t val)
RUNTIME_EXPORT ALWAYS_INLINE uint64_t agg_count_skip_val(uint64_t *agg, const int64_t val, const int64_t skip_val)
__device__ int64_t get_thread_index()
Definition: cuda_mapd_rt.cu:19
RUNTIME_EXPORT NEVER_INLINE DEVICE uint64_t MurmurHash64A(const void *key, int len, uint64_t seed)
Definition: MurmurHash.cpp:27
RUNTIME_EXPORT ALWAYS_INLINE int8_t * extract_str_ptr(const uint64_t str_and_len)
RUNTIME_EXPORT ALWAYS_INLINE void agg_min_float(int32_t *agg, const float val)
__device__ int32_t pos_step_impl()
Definition: cuda_mapd_rt.cu:35
__device__ void write_back_non_grouped_agg(int64_t *input_buffer, int64_t *output_buffer, const int32_t agg_idx)
Calculate approximate median and general quantiles, based on &quot;Computing Extremely Accurate Quantiles ...
RUNTIME_EXPORT ALWAYS_INLINE double load_avg_int(const int64_t *sum, const int64_t *count, const double null_val)
#define DEF_COMPUTE_LOWER_BOUND_FROM_ORDERED_INDEX_FOR_TIMEINTERVAL(value_type)
Structures and runtime functions of streaming top-k heap.
#define DEF_RANGE_MODE_FRAME_LOWER_BOUND(value_type, opname, opsym)
__device__ int32_t checked_single_agg_id_double_shared(int64_t *agg, const double val, const double null_val)
__device__ const int64_t * init_shared_mem_nop(const int64_t *groups_buffer, const int32_t groups_buffer_size)
Definition: cuda_mapd_rt.cu:43
RUNTIME_EXPORT ALWAYS_INLINE int32_t checked_single_agg_id(int64_t *agg, const int64_t val, const int64_t null_val)
#define DEF_ARITH_NULLABLE_RHS(type, null_type, opname, opsym)
#define DEF_AGG_MAX_INT(n)
RUNTIME_EXPORT ALWAYS_INLINE int64_t * get_integer_aggregation_tree(int64_t **aggregation_trees, size_t partition_idx)
__device__ int32_t checked_single_agg_id_float_shared(int32_t *agg, const float val, const float null_val)
RUNTIME_EXPORT ALWAYS_INLINE void agg_count_distinct_bitmap(int64_t *agg, const int64_t val, const int64_t min_val)
Macros and functions for groupby buffer compaction.
__device__ int64_t * get_matching_group_value(int64_t *groups_buffer, const uint32_t h, const T *key, const uint32_t key_count, const uint32_t row_size_quad)
RUNTIME_EXPORT ALWAYS_INLINE int8_t logical_and(const int8_t lhs, const int8_t rhs, const int8_t null_val)
#define DEF_CAST_SCALED_NULLABLE(from_type, to_type)
#define DEF_COMPUTE_FRAME_MAX(agg_value_type)
RUNTIME_EXPORT NEVER_INLINE void query_stub(const int8_t **col_buffers, const int64_t *num_rows, const uint64_t *frag_row_offsets, const int32_t *max_matched, const int64_t *init_agg_value, int64_t **out, uint32_t frag_idx, const int64_t *join_hash_tables, int32_t *error_code, int32_t *total_matched)
__device__ int64_t agg_sum_shared(int64_t *agg, const int64_t val)
__device__ void agg_id_double_shared_slow(int64_t *agg, const double *val)
RUNTIME_EXPORT ALWAYS_INLINE int64_t floor_div_lhs(const int64_t dividend, const int64_t divisor)
RUNTIME_EXPORT ALWAYS_INLINE DEVICE int32_t key_for_string_encoded(const int32_t str_id)
RUNTIME_EXPORT ALWAYS_INLINE int64_t * get_matching_group_value_perfect_hash_keyless(int64_t *groups_buffer, const uint32_t hashed_index, const uint32_t row_size_quad)
__device__ int8_t * agg_id_varlen_shared(int8_t *varlen_buffer, const int64_t offset, const int8_t *value, const int64_t size_bytes)
__device__ int64_t * declare_dynamic_shared_memory()
Definition: cuda_mapd_rt.cu:56
RUNTIME_EXPORT ALWAYS_INLINE int64_t compute_row_mode_end_index_add(int64_t candidate_index, int64_t current_partition_start_offset, int64_t frame_bound, int64_t num_current_partition_elem)
ALWAYS_INLINE DEVICE int32_t map_string_dict_id(const int32_t string_id, const int64_t translation_map_handle, const int32_t min_source_id)
__device__ int32_t agg_sum_int32_shared(int32_t *agg, const int32_t val)
__device__ int64_t agg_sum_skip_val_shared(int64_t *agg, const int64_t val, const int64_t skip_val)
__device__ void agg_sum_float_shared(int32_t *agg, const float val)
#define DEF_ROUND_NULLABLE(from_type, to_type)
RUNTIME_EXPORT ALWAYS_INLINE DEVICE int32_t width_bucket(const double target_value, const double lower_bound, const double upper_bound, const double scale_factor, const int32_t partition_count)
#define DEVICE
#define DEF_SKIP_AGG(base_agg_func)
__device__ int64_t get_block_index()
Definition: cuda_mapd_rt.cu:23
__device__ bool check_interrupt()
#define DEF_WRITE_PROJECTION_INT(n)
GPU_RT_STUB int32_t checked_single_agg_id_int8_shared(int8_t *agg, const int8_t val, const int8_t null_val)
#define DEF_GET_VALUE_IN_FRAME(value_type, logical_type)
RUNTIME_EXPORT NEVER_INLINE int32_t extract_str_len_noinline(const uint64_t str_and_len)
RUNTIME_EXPORT ALWAYS_INLINE int64_t compute_row_mode_start_index_add(int64_t candidate_index, int64_t current_partition_start_offset, int64_t frame_bound, int64_t num_current_partition_elem)
__device__ int32_t agg_sum_int32_skip_val_shared(int32_t *agg, const int32_t val, const int32_t skip_val)
RUNTIME_EXPORT ALWAYS_INLINE int64_t decimal_floor(const int64_t x, const int64_t scale)
#define DEF_COMPUTE_FRAME_SUM(agg_value_type)
#define DEF_SEARCH_DERIVED_AGGREGATION_TREE(agg_value_type)
__device__ void linear_probabilistic_count(uint8_t *bitmap, const uint32_t bitmap_bytes, const uint8_t *key_bytes, const uint32_t key_len)
RUNTIME_EXPORT ALWAYS_INLINE uint64_t agg_count_double(uint64_t *agg, const double val)
__device__ void agg_count_distinct_bitmap_gpu(int64_t *agg, const int64_t val, const int64_t min_val, const int64_t base_dev_addr, const int64_t base_host_addr, const uint64_t sub_bitmap_count, const uint64_t bitmap_bytes)
#define DEF_SHARED_AGG_RET_STUBS(base_agg_func)
__device__ void agg_sum_double_shared(int64_t *agg, const double val)
RUNTIME_EXPORT ALWAYS_INLINE void agg_min_double(int64_t *agg, const double val)
RUNTIME_EXPORT ALWAYS_INLINE DEVICE int32_t width_bucket_no_oob_check(const double target_value, const double lower_bound, const double scale_factor)
RUNTIME_EXPORT ALWAYS_INLINE int64_t decimal_ceil(const int64_t x, const int64_t scale)
#define DEF_ARITH_NULLABLE_LHS(type, null_type, opname, opsym)
#define DEF_COMPUTE_UPPER_BOUND_FROM_ORDERED_INDEX_FOR_TIMEINTERVAL(value_type)
#define DEF_MAP_STRING_TO_DATUM(value_type, value_name)
RUNTIME_EXPORT ALWAYS_INLINE int64_t get_valid_buf_start_pos(const int64_t null_start_pos, const int64_t null_end_pos)
#define DEF_AGG_MIN_INT(n)
RUNTIME_EXPORT ALWAYS_INLINE uint64_t agg_count_double_skip_val(uint64_t *agg, const double val, const double skip_val)
RUNTIME_EXPORT ALWAYS_INLINE void agg_min(int64_t *agg, const int64_t val)
__device__ int32_t pos_start_impl(const int32_t *row_index_resume)
Definition: cuda_mapd_rt.cu:27
RUNTIME_EXPORT ALWAYS_INLINE int32_t width_bucket_nullable(const double target_value, const double lower_bound, const double upper_bound, const double scale_factor, const int32_t partition_count, const double null_val)
RUNTIME_EXPORT ALWAYS_INLINE int8_t logical_not(const int8_t operand, const int8_t null_val)
#define DEF_RANGE_MODE_FRAME_UPPER_BOUND(value_type, opname, opsym)
RUNTIME_EXPORT ALWAYS_INLINE void agg_id_float(int32_t *agg, const float val)
RUNTIME_EXPORT ALWAYS_INLINE float load_float(const int32_t *agg)
__device__ int32_t runtime_interrupt_flag
Definition: cuda_mapd_rt.cu:95
RUNTIME_EXPORT ALWAYS_INLINE void agg_sum_double(int64_t *agg, const double val)
RUNTIME_EXPORT ALWAYS_INLINE int64_t row_number_window_func(const int64_t output_buff, const int64_t pos)
RUNTIME_EXPORT NEVER_INLINE void init_columnar_group_by_buffer_gpu(int64_t *groups_buffer, const int64_t *init_vals, const uint32_t groups_buffer_entry_count, const uint32_t key_qw_count, const uint32_t agg_col_count, const bool keyless, const bool blocks_share_memory, const int32_t frag_idx)
__device__ void agg_approximate_count_distinct_gpu(int64_t *agg, const int64_t key, const uint32_t b, const int64_t base_dev_addr, const int64_t base_host_addr)
__device__ void sync_warp()
RUNTIME_EXPORT ALWAYS_INLINE int64_t scale_decimal_down_nullable(const int64_t operand, const int64_t scale, const int64_t null_val)
#define DEF_COMPUTE_LOWER_BOUND_FROM_ORDERED_INDEX(value_type)
RUNTIME_EXPORT ALWAYS_INLINE void agg_id(int64_t *agg, const int64_t val)
RUNTIME_EXPORT ALWAYS_INLINE int64_t scale_decimal_up(const int64_t operand, const uint64_t scale, const int64_t operand_null_val, const int64_t result_null_val)
RUNTIME_EXPORT ALWAYS_INLINE DEVICE int32_t char_length(const char *str, const int32_t str_len)
RUNTIME_EXPORT ALWAYS_INLINE double * get_double_aggregation_tree(int64_t **aggregation_trees, size_t partition_idx)
#define RUNTIME_EXPORT
RUNTIME_EXPORT ALWAYS_INLINE uint64_t string_pack(const int8_t *ptr, const int32_t len)
RUNTIME_EXPORT ALWAYS_INLINE DEVICE int32_t width_bucket_reversed(const double target_value, const double lower_bound, const double upper_bound, const double scale_factor, const int32_t partition_count)
RUNTIME_EXPORT ALWAYS_INLINE int8_t bit_is_set(const int64_t bitset, const int64_t val, const int64_t min_val, const int64_t max_val, const int64_t null_val, const int8_t null_bool_val)
RUNTIME_EXPORT ALWAYS_INLINE int32_t agg_sum_int32(int32_t *agg, const int32_t val)
RUNTIME_EXPORT ALWAYS_INLINE int32_t checked_single_agg_id_float(int32_t *agg, const float val, const float null_val)
__device__ void agg_sum_double_skip_val_shared(int64_t *agg, const double val, const double skip_val)
#define DEF_COMPUTE_FRAME_MIN(agg_value_type)
__device__ void agg_max_int8_skip_val_shared(int8_t *agg, const int8_t val, const int8_t skip_val)
RUNTIME_EXPORT ALWAYS_INLINE int64_t get_valid_buf_end_pos(const int64_t num_elems, const int64_t null_start_pos, const int64_t null_end_pos)
RUNTIME_EXPORT ALWAYS_INLINE double load_double(const int64_t *agg)
RUNTIME_EXPORT NEVER_INLINE int8_t * extract_str_ptr_noinline(const uint64_t str_and_len)
RUNTIME_EXPORT ALWAYS_INLINE void agg_id_double(int64_t *agg, const double val)
RUNTIME_EXPORT ALWAYS_INLINE DEVICE int32_t width_bucket_expr_no_oob_check(const double target_value, const bool reversed, const double lower_bound, const double upper_bound, const int32_t partition_count)
DEVICE auto lower_bound(ARGS &&...args)
Definition: gpu_enabled.h:78
__device__ void agg_max_int16_skip_val_shared(int16_t *agg, const int16_t val, const int16_t skip_val)
#define DEF_SEARCH_AGGREGATION_TREE(agg_value_type)
RUNTIME_EXPORT NEVER_INLINE DEVICE uint32_t MurmurHash3(const void *key, int len, const uint32_t seed)
Definition: MurmurHash.cpp:33
RUNTIME_EXPORT void agg_max_int8(int8_t *agg, const int8_t val)
RUNTIME_EXPORT ALWAYS_INLINE int64_t * get_group_value_fast_keyless_semiprivate(int64_t *groups_buffer, const int64_t key, const int64_t min_key, const int64_t, const uint32_t row_size_quad, const uint8_t thread_warp_idx, const uint8_t warp_size)
RUNTIME_EXPORT ALWAYS_INLINE size_t getStartOffsetForSegmentTreeTraversal(size_t level, size_t tree_fanout)
RUNTIME_EXPORT ALWAYS_INLINE void agg_max_float(int32_t *agg, const float val)
RUNTIME_EXPORT void multifrag_query_hoisted_literals(const int8_t ***col_buffers, const uint64_t *num_fragments, const int8_t *literals, const int64_t *num_rows, const uint64_t *frag_row_offsets, const int32_t *max_matched, int32_t *total_matched, const int64_t *init_agg_value, int64_t **out, int32_t *error_code, const uint32_t *num_tables_ptr, const int64_t *join_hash_tables)
__device__ const int64_t * init_shared_mem(const int64_t *global_groups_buffer, const int32_t groups_buffer_size)
Definition: cuda_mapd_rt.cu:66
RUNTIME_EXPORT ALWAYS_INLINE int32_t checked_single_agg_id_double(int64_t *agg, const double val, const double null_val)
GPU_RT_STUB int32_t checked_single_agg_id_int16_shared(int16_t *agg, const int16_t val, const int16_t null_val)
RUNTIME_EXPORT ALWAYS_INLINE double load_avg_float(const int32_t *agg, const int32_t *count, const double null_val)
#define DEF_BINARY_NULLABLE_ALL_OPS(type, null_type)
RUNTIME_EXPORT ALWAYS_INLINE SumAndCountPair< double > * get_double_derived_aggregation_tree(int64_t **aggregation_trees, size_t partition_idx)
RUNTIME_EXPORT NEVER_INLINE void init_group_by_buffer_impl(int64_t *groups_buffer, const int64_t *init_vals, const uint32_t groups_buffer_entry_count, const uint32_t key_qw_count, const uint32_t agg_col_count, const bool keyless, const int8_t warp_size)
RUNTIME_EXPORT ALWAYS_INLINE uint32_t agg_count_int32(uint32_t *agg, const int32_t)
RUNTIME_EXPORT ALWAYS_INLINE void set_matching_group_value_perfect_hash_columnar(int64_t *groups_buffer, const uint32_t hashed_index, const int64_t *key, const uint32_t key_count, const uint32_t entry_count)
#define NEVER_INLINE
RUNTIME_EXPORT ALWAYS_INLINE int64_t compute_row_mode_end_index_sub(int64_t candidate_index, int64_t current_partition_start_offset, int64_t frame_bound)
RUNTIME_EXPORT ALWAYS_INLINE DEVICE int32_t width_bucket_expr_nullable(const double target_value, const bool reversed, const double lower_bound, const double upper_bound, const int32_t partition_count, const double null_val)
#define DEF_ARITH_NULLABLE(type, null_type, opname, opsym)
__device__ void agg_min_int16_skip_val_shared(int16_t *agg, const int16_t val, const int16_t skip_val)
#define DEF_COMPUTE_UPPER_BOUND(value_type)
RUNTIME_EXPORT ALWAYS_INLINE int32_t width_bucket_reversed_nullable(const double target_value, const double lower_bound, const double upper_bound, const double scale_factor, const int32_t partition_count, const double null_val)
RUNTIME_EXPORT ALWAYS_INLINE int64_t agg_sum_skip_val(int64_t *agg, const int64_t val, const int64_t skip_val)
__device__ void sync_threadblock()
__device__ void agg_min_int8_skip_val_shared(int8_t *agg, const int8_t val, const int8_t skip_val)
RUNTIME_EXPORT ALWAYS_INLINE int32_t agg_sum_int32_skip_val(int32_t *agg, const int32_t val, const int32_t skip_val)
RUNTIME_EXPORT ALWAYS_INLINE double load_avg_double(const int64_t *agg, const int64_t *count, const double null_val)
RUNTIME_EXPORT ALWAYS_INLINE uint64_t agg_count(uint64_t *agg, const int64_t)
RUNTIME_EXPORT ALWAYS_INLINE void agg_sum_float(int32_t *agg, const float val)
RUNTIME_EXPORT ALWAYS_INLINE double load_avg_decimal(const int64_t *sum, const int64_t *count, const double null_val, const uint32_t scale)
RUNTIME_EXPORT void multifrag_query(const int8_t ***col_buffers, const uint64_t *num_fragments, const int64_t *num_rows, const uint64_t *frag_row_offsets, const int32_t *max_matched, int32_t *total_matched, const int64_t *init_agg_value, int64_t **out, int32_t *error_code, const uint32_t *num_tables_ptr, const int64_t *join_hash_tables)
RUNTIME_EXPORT bool check_interrupt_init(unsigned command)
RUNTIME_EXPORT ALWAYS_INLINE int32_t get_error_code(int32_t *error_codes)
#define DEF_COMPUTE_SUM_AND_COUNT_FRAME_AGGREGATES(agg_value_type)
RUNTIME_EXPORT ALWAYS_INLINE int64_t agg_sum(int64_t *agg, const int64_t val)
#define DEF_SHARED_AGG_STUBS(base_agg_func)
__device__ int32_t get_matching_group_value_columnar_slot(int64_t *groups_buffer, const uint32_t entry_count, const uint32_t h, const T *key, const uint32_t key_count)
__device__ int32_t checked_single_agg_id_shared(int64_t *agg, const int64_t val, const int64_t null_val)
RUNTIME_EXPORT NEVER_INLINE void query_stub_hoisted_literals(const int8_t **col_buffers, const int8_t *literals, const int64_t *num_rows, const uint64_t *frag_row_offsets, const int32_t *max_matched, const int64_t *init_agg_value, int64_t **out, uint32_t frag_idx, const int64_t *join_hash_tables, int32_t *error_code, int32_t *total_matched)
#define ALWAYS_INLINE
#define DEF_HANDLE_NULL_FOR_WINDOW_FRAMING_AGG(agg_type, null_type)
#define DEF_AGG_ID_INT(n)
#define DEF_COMPUTE_UPPER_BOUND_FROM_ORDERED_INDEX(value_type)
RUNTIME_EXPORT ALWAYS_INLINE void record_error_code(const int32_t err_code, int32_t *error_codes)
#define DEF_COMPUTE_LOWER_BOUND(value_type)
RUNTIME_EXPORT ALWAYS_INLINE DEVICE bool sample_ratio(const double proportion, const int64_t row_offset)
FORCE_INLINE HOST DEVICE T align_to_int64(T addr)
RUNTIME_EXPORT ALWAYS_INLINE double percent_window_func(const int64_t output_buff, const int64_t pos)
RUNTIME_EXPORT ALWAYS_INLINE int8_t logical_or(const int8_t lhs, const int8_t rhs, const int8_t null_val)
__device__ void force_sync()
RUNTIME_EXPORT void agg_min_int32(int32_t *agg, const int32_t val)
#define DEF_COMPUTE_CURRENT_ROW_IDX_IN_FRAME(value_type)
RUNTIME_EXPORT ALWAYS_INLINE uint32_t agg_count_float_skip_val(uint32_t *agg, const float val, const float skip_val)
RUNTIME_EXPORT ALWAYS_INLINE DEVICE int32_t char_length_nullable(const char *str, const int32_t str_len, const int32_t int_null)
RUNTIME_EXPORT ALWAYS_INLINE SumAndCountPair< int64_t > * get_integer_derived_aggregation_tree(int64_t **aggregation_trees, size_t partition_idx)
RUNTIME_EXPORT ALWAYS_INLINE uint32_t agg_count_float(uint32_t *agg, const float val)
RUNTIME_EXPORT ALWAYS_INLINE DEVICE int32_t width_bucket_reversed_no_oob_check(const double target_value, const double lower_bound, const double scale_factor)
RUNTIME_EXPORT ALWAYS_INLINE DEVICE int32_t width_bucket_expr(const double target_value, const bool reversed, const double lower_bound, const double upper_bound, const int32_t partition_count)
__device__ int32_t group_buff_idx_impl()
Definition: cuda_mapd_rt.cu:31