OmniSciDB  c1a53651b2
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
RuntimeFunctions.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifdef __CUDACC__
18 #error This code is not intended to be compiled with a CUDA C++ compiler
19 #endif // __CUDACC__
20 
21 #include "RuntimeFunctions.h"
22 #include "BufferCompaction.h"
23 #include "HyperLogLogRank.h"
24 #include "MurmurHash.h"
25 #include "Shared/Datum.h"
26 #include "Shared/quantile.h"
27 #include "TypePunning.h"
28 #include "Utils/SegmentTreeUtils.h"
29 
30 #include <atomic>
31 #include <chrono>
32 #include <cmath>
33 #include <cstring>
34 #include <functional>
35 #include <thread>
36 #include <tuple>
37 
38 // decoder implementations
39 
40 #include "DecodersImpl.h"
41 
42 // arithmetic operator implementations
43 
44 #define DEF_ARITH_NULLABLE(type, null_type, opname, opsym) \
45  extern "C" RUNTIME_EXPORT ALWAYS_INLINE type opname##_##type##_nullable( \
46  const type lhs, const type rhs, const null_type null_val) { \
47  if (lhs != null_val && rhs != null_val) { \
48  return lhs opsym rhs; \
49  } \
50  return null_val; \
51  }
52 
53 #define DEF_ARITH_NULLABLE_LHS(type, null_type, opname, opsym) \
54  extern "C" RUNTIME_EXPORT ALWAYS_INLINE type opname##_##type##_nullable_lhs( \
55  const type lhs, const type rhs, const null_type null_val) { \
56  if (lhs != null_val) { \
57  return lhs opsym rhs; \
58  } \
59  return null_val; \
60  }
61 
62 #define DEF_ARITH_NULLABLE_RHS(type, null_type, opname, opsym) \
63  extern "C" RUNTIME_EXPORT ALWAYS_INLINE type opname##_##type##_nullable_rhs( \
64  const type lhs, const type rhs, const null_type null_val) { \
65  if (rhs != null_val) { \
66  return lhs opsym rhs; \
67  } \
68  return null_val; \
69  }
70 
71 #define DEF_CMP_NULLABLE(type, null_type, opname, opsym) \
72  extern "C" RUNTIME_EXPORT ALWAYS_INLINE int8_t opname##_##type##_nullable( \
73  const type lhs, \
74  const type rhs, \
75  const null_type null_val, \
76  const int8_t null_bool_val) { \
77  if (lhs != null_val && rhs != null_val) { \
78  return lhs opsym rhs; \
79  } \
80  return null_bool_val; \
81  }
82 
83 #define DEF_CMP_NULLABLE_LHS(type, null_type, opname, opsym) \
84  extern "C" RUNTIME_EXPORT ALWAYS_INLINE int8_t opname##_##type##_nullable_lhs( \
85  const type lhs, \
86  const type rhs, \
87  const null_type null_val, \
88  const int8_t null_bool_val) { \
89  if (lhs != null_val) { \
90  return lhs opsym rhs; \
91  } \
92  return null_bool_val; \
93  }
94 
95 #define DEF_CMP_NULLABLE_RHS(type, null_type, opname, opsym) \
96  extern "C" RUNTIME_EXPORT ALWAYS_INLINE int8_t opname##_##type##_nullable_rhs( \
97  const type lhs, \
98  const type rhs, \
99  const null_type null_val, \
100  const int8_t null_bool_val) { \
101  if (rhs != null_val) { \
102  return lhs opsym rhs; \
103  } \
104  return null_bool_val; \
105  }
106 
107 #define DEF_SAFE_DIV_NULLABLE(type, null_type, opname) \
108  extern "C" RUNTIME_EXPORT ALWAYS_INLINE type safe_div_##type( \
109  const type lhs, const type rhs, const null_type null_val) { \
110  if (lhs != null_val && rhs != null_val && rhs != 0) { \
111  return lhs / rhs; \
112  } \
113  return null_val; \
114  }
115 
116 #define DEF_BINARY_NULLABLE_ALL_OPS(type, null_type) \
117  DEF_ARITH_NULLABLE(type, null_type, add, +) \
118  DEF_ARITH_NULLABLE(type, null_type, sub, -) \
119  DEF_ARITH_NULLABLE(type, null_type, mul, *) \
120  DEF_ARITH_NULLABLE(type, null_type, div, /) \
121  DEF_SAFE_DIV_NULLABLE(type, null_type, safe_div) \
122  DEF_ARITH_NULLABLE_LHS(type, null_type, add, +) \
123  DEF_ARITH_NULLABLE_LHS(type, null_type, sub, -) \
124  DEF_ARITH_NULLABLE_LHS(type, null_type, mul, *) \
125  DEF_ARITH_NULLABLE_LHS(type, null_type, div, /) \
126  DEF_ARITH_NULLABLE_RHS(type, null_type, add, +) \
127  DEF_ARITH_NULLABLE_RHS(type, null_type, sub, -) \
128  DEF_ARITH_NULLABLE_RHS(type, null_type, mul, *) \
129  DEF_ARITH_NULLABLE_RHS(type, null_type, div, /) \
130  DEF_CMP_NULLABLE(type, null_type, eq, ==) \
131  DEF_CMP_NULLABLE(type, null_type, ne, !=) \
132  DEF_CMP_NULLABLE(type, null_type, lt, <) \
133  DEF_CMP_NULLABLE(type, null_type, gt, >) \
134  DEF_CMP_NULLABLE(type, null_type, le, <=) \
135  DEF_CMP_NULLABLE(type, null_type, ge, >=) \
136  DEF_CMP_NULLABLE_LHS(type, null_type, eq, ==) \
137  DEF_CMP_NULLABLE_LHS(type, null_type, ne, !=) \
138  DEF_CMP_NULLABLE_LHS(type, null_type, lt, <) \
139  DEF_CMP_NULLABLE_LHS(type, null_type, gt, >) \
140  DEF_CMP_NULLABLE_LHS(type, null_type, le, <=) \
141  DEF_CMP_NULLABLE_LHS(type, null_type, ge, >=) \
142  DEF_CMP_NULLABLE_RHS(type, null_type, eq, ==) \
143  DEF_CMP_NULLABLE_RHS(type, null_type, ne, !=) \
144  DEF_CMP_NULLABLE_RHS(type, null_type, lt, <) \
145  DEF_CMP_NULLABLE_RHS(type, null_type, gt, >) \
146  DEF_CMP_NULLABLE_RHS(type, null_type, le, <=) \
147  DEF_CMP_NULLABLE_RHS(type, null_type, ge, >=)
148 
149 DEF_BINARY_NULLABLE_ALL_OPS(int8_t, int64_t)
150 DEF_BINARY_NULLABLE_ALL_OPS(int16_t, int64_t)
151 DEF_BINARY_NULLABLE_ALL_OPS(int32_t, int64_t)
152 DEF_BINARY_NULLABLE_ALL_OPS(int64_t, int64_t)
153 DEF_BINARY_NULLABLE_ALL_OPS(float, float)
154 DEF_BINARY_NULLABLE_ALL_OPS(double, double)
155 DEF_ARITH_NULLABLE(int8_t, int64_t, mod, %)
156 DEF_ARITH_NULLABLE(int16_t, int64_t, mod, %)
157 DEF_ARITH_NULLABLE(int32_t, int64_t, mod, %)
158 DEF_ARITH_NULLABLE(int64_t, int64_t, mod, %)
159 DEF_ARITH_NULLABLE_LHS(int8_t, int64_t, mod, %)
160 DEF_ARITH_NULLABLE_LHS(int16_t, int64_t, mod, %)
161 DEF_ARITH_NULLABLE_LHS(int32_t, int64_t, mod, %)
162 DEF_ARITH_NULLABLE_LHS(int64_t, int64_t, mod, %)
163 DEF_ARITH_NULLABLE_RHS(int8_t, int64_t, mod, %)
164 DEF_ARITH_NULLABLE_RHS(int16_t, int64_t, mod, %)
165 DEF_ARITH_NULLABLE_RHS(int32_t, int64_t, mod, %)
166 DEF_ARITH_NULLABLE_RHS(int64_t, int64_t, mod, %)
167 
168 #undef DEF_BINARY_NULLABLE_ALL_OPS
169 #undef DEF_SAFE_DIV_NULLABLE
170 #undef DEF_CMP_NULLABLE_RHS
171 #undef DEF_CMP_NULLABLE_LHS
172 #undef DEF_CMP_NULLABLE
173 #undef DEF_ARITH_NULLABLE_RHS
174 #undef DEF_ARITH_NULLABLE_LHS
175 #undef DEF_ARITH_NULLABLE
176 
177 #define DEF_MAP_STRING_TO_DATUM(value_type, value_name) \
178  extern "C" ALWAYS_INLINE DEVICE value_type map_string_to_datum_##value_name( \
179  const int32_t string_id, \
180  const int64_t translation_map_handle, \
181  const int32_t min_source_id) { \
182  const Datum* translation_map = \
183  reinterpret_cast<const Datum*>(translation_map_handle); \
184  const Datum& out_datum = translation_map[string_id - min_source_id]; \
185  return out_datum.value_name##val; \
186  }
187 
188 DEF_MAP_STRING_TO_DATUM(int8_t, bool)
189 DEF_MAP_STRING_TO_DATUM(int8_t, tinyint)
190 DEF_MAP_STRING_TO_DATUM(int16_t, smallint)
191 DEF_MAP_STRING_TO_DATUM(int32_t, int)
192 DEF_MAP_STRING_TO_DATUM(int64_t, bigint)
193 DEF_MAP_STRING_TO_DATUM(float, float)
194 DEF_MAP_STRING_TO_DATUM(double, double)
195 
196 #undef DEF_MAP_STRING_TO_DATUM
197 
198 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int64_t
199 scale_decimal_up(const int64_t operand,
200  const uint64_t scale,
201  const int64_t operand_null_val,
202  const int64_t result_null_val) {
203  return operand != operand_null_val ? operand * scale : result_null_val;
204 }
205 
206 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int64_t
207 scale_decimal_down_nullable(const int64_t operand,
208  const int64_t scale,
209  const int64_t null_val) {
210  // rounded scale down of a decimal
211  if (operand == null_val) {
212  return null_val;
213  }
214 
215  int64_t tmp = scale >> 1;
216  tmp = operand >= 0 ? operand + tmp : operand - tmp;
217  return tmp / scale;
218 }
219 
220 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int64_t
221 scale_decimal_down_not_nullable(const int64_t operand,
222  const int64_t scale,
223  const int64_t null_val) {
224  int64_t tmp = scale >> 1;
225  tmp = operand >= 0 ? operand + tmp : operand - tmp;
226  return tmp / scale;
227 }
228 
229 // Return floor(dividend / divisor).
230 // Assumes 0 < divisor.
231 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int64_t floor_div_lhs(const int64_t dividend,
232  const int64_t divisor) {
233  return (dividend < 0 ? dividend - (divisor - 1) : dividend) / divisor;
234 }
235 
236 // Return floor(dividend / divisor) or NULL if dividend IS NULL.
237 // Assumes 0 < divisor.
238 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int64_t
239 floor_div_nullable_lhs(const int64_t dividend,
240  const int64_t divisor,
241  const int64_t null_val) {
242  return dividend == null_val ? null_val : floor_div_lhs(dividend, divisor);
243 }
244 
245 #define DEF_UMINUS_NULLABLE(type, null_type) \
246  extern "C" RUNTIME_EXPORT ALWAYS_INLINE type uminus_##type##_nullable( \
247  const type operand, const null_type null_val) { \
248  return operand == null_val ? null_val : -operand; \
249  }
250 
251 DEF_UMINUS_NULLABLE(int8_t, int8_t)
252 DEF_UMINUS_NULLABLE(int16_t, int16_t)
253 DEF_UMINUS_NULLABLE(int32_t, int32_t)
254 DEF_UMINUS_NULLABLE(int64_t, int64_t)
255 DEF_UMINUS_NULLABLE(float, float)
256 DEF_UMINUS_NULLABLE(double, double)
257 
258 #undef DEF_UMINUS_NULLABLE
259 
260 #define DEF_CAST_NULLABLE(from_type, to_type) \
261  extern "C" RUNTIME_EXPORT ALWAYS_INLINE to_type \
262  cast_##from_type##_to_##to_type##_nullable(const from_type operand, \
263  const from_type from_null_val, \
264  const to_type to_null_val) { \
265  return operand == from_null_val ? to_null_val : operand; \
266  }
267 
268 #define DEF_CAST_SCALED_NULLABLE(from_type, to_type) \
269  extern "C" RUNTIME_EXPORT ALWAYS_INLINE to_type \
270  cast_##from_type##_to_##to_type##_scaled_nullable(const from_type operand, \
271  const from_type from_null_val, \
272  const to_type to_null_val, \
273  const to_type multiplier) { \
274  return operand == from_null_val ? to_null_val : multiplier * operand; \
275  }
276 
277 #define DEF_CAST_NULLABLE_BIDIR(type1, type2) \
278  DEF_CAST_NULLABLE(type1, type2) \
279  DEF_CAST_NULLABLE(type2, type1)
280 
281 #define DEF_ROUND_NULLABLE(from_type, to_type) \
282  extern "C" RUNTIME_EXPORT ALWAYS_INLINE to_type \
283  cast_##from_type##_to_##to_type##_nullable(const from_type operand, \
284  const from_type from_null_val, \
285  const to_type to_null_val) { \
286  return operand == from_null_val \
287  ? to_null_val \
288  : static_cast<to_type>(operand + (operand < from_type(0) \
289  ? from_type(-0.5) \
290  : from_type(0.5))); \
291  }
292 
293 DEF_CAST_NULLABLE_BIDIR(int8_t, int16_t)
294 DEF_CAST_NULLABLE_BIDIR(int8_t, int32_t)
295 DEF_CAST_NULLABLE_BIDIR(int8_t, int64_t)
296 DEF_CAST_NULLABLE_BIDIR(int16_t, int32_t)
297 DEF_CAST_NULLABLE_BIDIR(int16_t, int64_t)
298 DEF_CAST_NULLABLE_BIDIR(int32_t, int64_t)
299 DEF_CAST_NULLABLE_BIDIR(float, double)
300 
301 DEF_CAST_NULLABLE(int8_t, float)
302 DEF_CAST_NULLABLE(int16_t, float)
303 DEF_CAST_NULLABLE(int32_t, float)
304 DEF_CAST_NULLABLE(int64_t, float)
305 DEF_CAST_NULLABLE(int8_t, double)
306 DEF_CAST_NULLABLE(int16_t, double)
307 DEF_CAST_NULLABLE(int32_t, double)
308 DEF_CAST_NULLABLE(int64_t, double)
309 
310 DEF_ROUND_NULLABLE(float, int8_t)
311 DEF_ROUND_NULLABLE(float, int16_t)
312 DEF_ROUND_NULLABLE(float, int32_t)
313 DEF_ROUND_NULLABLE(float, int64_t)
314 DEF_ROUND_NULLABLE(double, int8_t)
315 DEF_ROUND_NULLABLE(double, int16_t)
316 DEF_ROUND_NULLABLE(double, int32_t)
317 DEF_ROUND_NULLABLE(double, int64_t)
318 
319 DEF_CAST_NULLABLE(uint8_t, int32_t)
320 DEF_CAST_NULLABLE(uint16_t, int32_t)
321 DEF_CAST_SCALED_NULLABLE(int64_t, float)
322 DEF_CAST_SCALED_NULLABLE(int64_t, double)
323 
324 #undef DEF_ROUND_NULLABLE
325 #undef DEF_CAST_NULLABLE_BIDIR
326 #undef DEF_CAST_SCALED_NULLABLE
327 #undef DEF_CAST_NULLABLE
328 
329 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int8_t logical_not(const int8_t operand,
330  const int8_t null_val) {
331  return operand == null_val ? operand : (operand ? 0 : 1);
332 }
333 
334 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int8_t logical_and(const int8_t lhs,
335  const int8_t rhs,
336  const int8_t null_val) {
337  if (lhs == null_val) {
338  return rhs == 0 ? rhs : null_val;
339  }
340  if (rhs == null_val) {
341  return lhs == 0 ? lhs : null_val;
342  }
343  return (lhs && rhs) ? 1 : 0;
344 }
345 
346 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int8_t logical_or(const int8_t lhs,
347  const int8_t rhs,
348  const int8_t null_val) {
349  if (lhs == null_val) {
350  return rhs == 0 ? null_val : rhs;
351  }
352  if (rhs == null_val) {
353  return lhs == 0 ? null_val : lhs;
354  }
355  return (lhs || rhs) ? 1 : 0;
356 }
357 
358 // aggregator implementations
359 
360 extern "C" RUNTIME_EXPORT ALWAYS_INLINE uint64_t agg_count(uint64_t* agg, const int64_t) {
361  return (*agg)++;
362 }
363 
364 extern "C" RUNTIME_EXPORT ALWAYS_INLINE void
365 agg_count_distinct_bitmap(int64_t* agg, const int64_t val, const int64_t min_val) {
366  const uint64_t bitmap_idx = val - min_val;
367  reinterpret_cast<int8_t*>(*agg)[bitmap_idx >> 3] |= (1 << (bitmap_idx & 7));
368 }
369 
370 #ifdef _MSC_VER
371 #define GPU_RT_STUB NEVER_INLINE
372 #else
373 #define GPU_RT_STUB NEVER_INLINE __attribute__((optnone))
374 #endif
375 
377  const int64_t,
378  const int64_t,
379  const int64_t,
380  const int64_t,
381  const uint64_t,
382  const uint64_t) {}
383 
384 extern "C" RUNTIME_EXPORT NEVER_INLINE void
385 agg_approximate_count_distinct(int64_t* agg, const int64_t key, const uint32_t b) {
386  const uint64_t hash = MurmurHash64A(&key, sizeof(key), 0);
387  const uint32_t index = hash >> (64 - b);
388  const uint8_t rank = get_rank(hash << b, 64 - b);
389  uint8_t* M = reinterpret_cast<uint8_t*>(*agg);
390  M[index] = std::max(M[index], rank);
391 }
392 
394  const int64_t,
395  const uint32_t,
396  const int64_t,
397  const int64_t) {}
398 
399 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int8_t bit_is_set(const int64_t bitset,
400  const int64_t val,
401  const int64_t min_val,
402  const int64_t max_val,
403  const int64_t null_val,
404  const int8_t null_bool_val) {
405  if (val == null_val) {
406  return null_bool_val;
407  }
408  if (val < min_val || val > max_val) {
409  return 0;
410  }
411  if (!bitset) {
412  return 0;
413  }
414  const uint64_t bitmap_idx = val - min_val;
415  return (reinterpret_cast<const int8_t*>(bitset))[bitmap_idx >> 3] &
416  (1 << (bitmap_idx & 7))
417  ? 1
418  : 0;
419 }
420 
421 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int64_t
422 compute_int64_t_lower_bound(const int64_t entry_cnt,
423  const int64_t target_value,
424  const int64_t* col_buf) {
425  int64_t l = 0;
426  int64_t h = entry_cnt - 1;
427  while (l < h) {
428  int64_t mid = l + (h - l) / 2;
429  if (target_value < col_buf[mid]) {
430  h = mid;
431  } else {
432  l = mid + 1;
433  }
434  }
435  return l;
436 }
437 
438 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int64_t
439 get_valid_buf_start_pos(const int64_t null_start_pos, const int64_t null_end_pos) {
440  return null_start_pos == 0 ? null_end_pos + 1 : 0;
441 }
442 
443 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int64_t
444 get_valid_buf_end_pos(const int64_t num_elems,
445  const int64_t null_start_pos,
446  const int64_t null_end_pos) {
447  return null_end_pos == num_elems ? null_start_pos : num_elems;
448 }
449 
450 template <typename T, typename Comparator>
451 inline int64_t compute_current_row_idx_in_frame(const int64_t num_elems,
452  const int64_t cur_row_idx,
453  const T* col_buf,
454  const int32_t* partition_rowid_buf,
455  const int64_t* ordered_index_buf,
456  const T null_val,
457  const bool nulls_first,
458  const int64_t null_start_pos,
459  const int64_t null_end_pos,
460  Comparator cmp) {
461  const auto target_value = col_buf[cur_row_idx];
462  if (target_value == null_val) {
463  for (int64_t target_offset = null_start_pos; target_offset < null_end_pos;
464  target_offset++) {
465  const auto candidate_offset = partition_rowid_buf[ordered_index_buf[target_offset]];
466  if (candidate_offset == cur_row_idx) {
467  return target_offset;
468  }
469  }
470  }
471  auto const modified_null_end_pos = nulls_first ? null_end_pos - 1 : null_end_pos;
472  int64_t l = get_valid_buf_start_pos(null_start_pos, modified_null_end_pos);
473  int64_t h = get_valid_buf_end_pos(num_elems, null_start_pos, modified_null_end_pos);
474  while (l < h) {
475  int64_t mid = l + (h - l) / 2;
476  auto const target_row_idx = partition_rowid_buf[ordered_index_buf[mid]];
477  auto const cur_value = col_buf[target_row_idx];
478  if (cmp(target_value, cur_value)) {
479  h = mid;
480  } else {
481  l = mid + 1;
482  }
483  }
484  int64_t target_offset = l;
485  int64_t candidate_row_idx = partition_rowid_buf[ordered_index_buf[target_offset]];
486  while (col_buf[candidate_row_idx] == target_value) {
487  if (candidate_row_idx == cur_row_idx) {
488  return target_offset;
489  }
490  candidate_row_idx = partition_rowid_buf[ordered_index_buf[++target_offset]];
491  }
492  return -1;
493 }
494 
495 #define DEF_COMPUTE_CURRENT_ROW_IDX_IN_FRAME(value_type, oper_name) \
496  extern "C" RUNTIME_EXPORT ALWAYS_INLINE int64_t \
497  compute_##value_type##_##oper_name##_current_row_idx_in_frame( \
498  const int64_t num_elems, \
499  const int64_t cur_row_idx, \
500  const value_type* col_buf, \
501  const int32_t* partition_rowid_buf, \
502  const int64_t* ordered_index_buf, \
503  const value_type null_val, \
504  const bool nulls_first, \
505  const int64_t null_start_pos, \
506  const int64_t null_end_pos) { \
507  return compute_current_row_idx_in_frame<value_type>(num_elems, \
508  cur_row_idx, \
509  col_buf, \
510  partition_rowid_buf, \
511  ordered_index_buf, \
512  null_val, \
513  nulls_first, \
514  null_start_pos, \
515  null_end_pos, \
516  std::oper_name<value_type>{}); \
517  }
518 #define DEF_COMPUTE_CURRENT_ROW_IDX_IN_FRAME_ALL_TYPES(oper_name) \
519  DEF_COMPUTE_CURRENT_ROW_IDX_IN_FRAME(int8_t, oper_name) \
520  DEF_COMPUTE_CURRENT_ROW_IDX_IN_FRAME(int16_t, oper_name) \
521  DEF_COMPUTE_CURRENT_ROW_IDX_IN_FRAME(int32_t, oper_name) \
522  DEF_COMPUTE_CURRENT_ROW_IDX_IN_FRAME(int64_t, oper_name) \
523  DEF_COMPUTE_CURRENT_ROW_IDX_IN_FRAME(float, oper_name) \
524  DEF_COMPUTE_CURRENT_ROW_IDX_IN_FRAME(double, oper_name)
525 
528 
529 #undef DEF_COMPUTE_CURRENT_ROW_IDX_IN_FRAME_ALL_TYPES
530 #undef DEF_COMPUTE_CURRENT_ROW_IDX_IN_FRAME
531 
532 template <typename TARGET_VAL_TYPE, typename COL_TYPE, typename NULL_TYPE>
534  const int64_t num_elems,
535  const TARGET_VAL_TYPE target_val,
536  const COL_TYPE* col_buf,
537  const int32_t* partition_rowid_buf,
538  const int64_t* ordered_index_buf,
539  const NULL_TYPE null_val,
540  const bool nulls_first,
541  const int64_t null_start_offset,
542  const int64_t null_end_offset) {
543  if (target_val == null_val) {
544  return null_start_offset;
545  }
546  auto const modified_null_end_pos = nulls_first ? null_end_offset - 1 : null_end_offset;
547  int64_t l = get_valid_buf_start_pos(null_start_offset, modified_null_end_pos);
548  int64_t h = get_valid_buf_end_pos(num_elems, null_start_offset, modified_null_end_pos);
549  while (l < h) {
550  int64_t mid = l + (h - l) / 2;
551  if (target_val <= col_buf[partition_rowid_buf[ordered_index_buf[mid]]]) {
552  h = mid;
553  } else {
554  l = mid + 1;
555  }
556  }
557  return l;
558 }
559 
560 #define DEF_RANGE_MODE_FRAME_LOWER_BOUND( \
561  target_val_type, col_type, null_type, opname, opsym) \
562  extern "C" RUNTIME_EXPORT ALWAYS_INLINE int64_t \
563  range_mode_##target_val_type##_##col_type##_##null_type##_##opname##_frame_lower_bound( \
564  const int64_t num_elems, \
565  const target_val_type target_value, \
566  const col_type* col_buf, \
567  const int32_t* partition_rowid_buf, \
568  const int64_t* ordered_index_buf, \
569  const int64_t frame_bound_val, \
570  const null_type null_val, \
571  const bool nulls_first, \
572  const int64_t null_start_pos, \
573  const int64_t null_end_pos) { \
574  if (target_value == null_val) { \
575  return null_start_pos; \
576  } \
577  target_val_type new_val = target_value opsym frame_bound_val; \
578  return compute_lower_bound_from_ordered_partition_index<target_val_type, \
579  col_type, \
580  null_type>( \
581  num_elems, \
582  new_val, \
583  col_buf, \
584  partition_rowid_buf, \
585  ordered_index_buf, \
586  null_val, \
587  nulls_first, \
588  null_start_pos, \
589  null_end_pos); \
590  }
591 DEF_RANGE_MODE_FRAME_LOWER_BOUND(int8_t, int8_t, int8_t, add, +)
592 DEF_RANGE_MODE_FRAME_LOWER_BOUND(int8_t, int8_t, int8_t, sub, -)
593 DEF_RANGE_MODE_FRAME_LOWER_BOUND(int16_t, int16_t, int16_t, add, +)
594 DEF_RANGE_MODE_FRAME_LOWER_BOUND(int16_t, int16_t, int16_t, sub, -)
595 DEF_RANGE_MODE_FRAME_LOWER_BOUND(int16_t, int16_t, int64_t, add, +)
596 DEF_RANGE_MODE_FRAME_LOWER_BOUND(int16_t, int16_t, int64_t, sub, -)
597 DEF_RANGE_MODE_FRAME_LOWER_BOUND(int32_t, int32_t, int32_t, add, +)
598 DEF_RANGE_MODE_FRAME_LOWER_BOUND(int32_t, int32_t, int32_t, sub, -)
599 DEF_RANGE_MODE_FRAME_LOWER_BOUND(int32_t, int32_t, int64_t, add, +)
600 DEF_RANGE_MODE_FRAME_LOWER_BOUND(int32_t, int32_t, int64_t, sub, -)
601 DEF_RANGE_MODE_FRAME_LOWER_BOUND(int64_t, int16_t, int64_t, add, +)
602 DEF_RANGE_MODE_FRAME_LOWER_BOUND(int64_t, int16_t, int64_t, sub, -)
603 DEF_RANGE_MODE_FRAME_LOWER_BOUND(int64_t, int32_t, int64_t, add, +)
604 DEF_RANGE_MODE_FRAME_LOWER_BOUND(int64_t, int32_t, int64_t, sub, -)
605 DEF_RANGE_MODE_FRAME_LOWER_BOUND(int64_t, int64_t, int64_t, add, +)
606 DEF_RANGE_MODE_FRAME_LOWER_BOUND(int64_t, int64_t, int64_t, sub, -)
607 DEF_RANGE_MODE_FRAME_LOWER_BOUND(float, float, float, add, +)
608 DEF_RANGE_MODE_FRAME_LOWER_BOUND(float, float, float, sub, -)
609 DEF_RANGE_MODE_FRAME_LOWER_BOUND(double, double, double, add, +)
610 DEF_RANGE_MODE_FRAME_LOWER_BOUND(double, double, double, sub, -)
611 #undef DEF_RANGE_MODE_FRAME_LOWER_BOUND
612 
613 template <typename TARGET_VAL_TYPE, typename COL_TYPE, typename NULL_TYPE>
615  const int64_t num_elems,
616  const TARGET_VAL_TYPE target_val,
617  const COL_TYPE* col_buf,
618  const int32_t* partition_rowid_buf,
619  const int64_t* ordered_index_buf,
620  const NULL_TYPE null_val,
621  const bool nulls_first,
622  const int64_t null_start_offset,
623  const int64_t null_end_offset) {
624  if (target_val == null_val) {
625  return null_end_offset;
626  }
627  auto const modified_null_end_pos = nulls_first ? null_end_offset - 1 : null_end_offset;
628  int64_t l = get_valid_buf_start_pos(null_start_offset, modified_null_end_pos);
629  int64_t h = get_valid_buf_end_pos(num_elems, null_start_offset, modified_null_end_pos);
630  while (l < h) {
631  int64_t mid = l + (h - l) / 2;
632  if (target_val >= col_buf[partition_rowid_buf[ordered_index_buf[mid]]]) {
633  l = mid + 1;
634  } else {
635  h = mid;
636  }
637  }
638  return l;
639 }
640 
641 #define DEF_RANGE_MODE_FRAME_UPPER_BOUND( \
642  target_val_type, col_type, null_type, opname, opsym) \
643  extern "C" RUNTIME_EXPORT ALWAYS_INLINE int64_t \
644  range_mode_##target_val_type##_##col_type##_##null_type##_##opname##_frame_upper_bound( \
645  const int64_t num_elems, \
646  const target_val_type target_value, \
647  const col_type* col_buf, \
648  const int32_t* partition_rowid_buf, \
649  const int64_t* ordered_index_buf, \
650  const int64_t frame_bound_val, \
651  const null_type null_val, \
652  const bool nulls_first, \
653  const int64_t null_start_pos, \
654  const int64_t null_end_pos) { \
655  if (target_value == null_val) { \
656  return null_end_pos; \
657  } \
658  target_val_type new_val = target_value opsym frame_bound_val; \
659  return compute_upper_bound_from_ordered_partition_index<target_val_type, \
660  col_type, \
661  null_type>( \
662  num_elems, \
663  new_val, \
664  col_buf, \
665  partition_rowid_buf, \
666  ordered_index_buf, \
667  null_val, \
668  nulls_first, \
669  null_start_pos, \
670  null_end_pos); \
671  }
672 DEF_RANGE_MODE_FRAME_UPPER_BOUND(int8_t, int8_t, int8_t, add, +)
673 DEF_RANGE_MODE_FRAME_UPPER_BOUND(int8_t, int8_t, int8_t, sub, -)
674 DEF_RANGE_MODE_FRAME_UPPER_BOUND(int16_t, int16_t, int16_t, add, +)
675 DEF_RANGE_MODE_FRAME_UPPER_BOUND(int16_t, int16_t, int16_t, sub, -)
676 DEF_RANGE_MODE_FRAME_UPPER_BOUND(int16_t, int16_t, int64_t, add, +)
677 DEF_RANGE_MODE_FRAME_UPPER_BOUND(int16_t, int16_t, int64_t, sub, -)
678 DEF_RANGE_MODE_FRAME_UPPER_BOUND(int32_t, int32_t, int32_t, add, +)
679 DEF_RANGE_MODE_FRAME_UPPER_BOUND(int32_t, int32_t, int32_t, sub, -)
680 DEF_RANGE_MODE_FRAME_UPPER_BOUND(int32_t, int32_t, int64_t, add, +)
681 DEF_RANGE_MODE_FRAME_UPPER_BOUND(int32_t, int32_t, int64_t, sub, -)
682 DEF_RANGE_MODE_FRAME_UPPER_BOUND(int64_t, int16_t, int64_t, add, +)
683 DEF_RANGE_MODE_FRAME_UPPER_BOUND(int64_t, int16_t, int64_t, sub, -)
684 DEF_RANGE_MODE_FRAME_UPPER_BOUND(int64_t, int32_t, int64_t, add, +)
685 DEF_RANGE_MODE_FRAME_UPPER_BOUND(int64_t, int32_t, int64_t, sub, -)
686 DEF_RANGE_MODE_FRAME_UPPER_BOUND(int64_t, int64_t, int64_t, add, +)
687 DEF_RANGE_MODE_FRAME_UPPER_BOUND(int64_t, int64_t, int64_t, sub, -)
688 DEF_RANGE_MODE_FRAME_UPPER_BOUND(float, float, float, add, +)
689 DEF_RANGE_MODE_FRAME_UPPER_BOUND(float, float, float, sub, -)
690 DEF_RANGE_MODE_FRAME_UPPER_BOUND(double, double, double, add, +)
691 DEF_RANGE_MODE_FRAME_UPPER_BOUND(double, double, double, sub, -)
692 #undef DEF_RANGE_MODE_FRAME_UPPER_BOUND
693 
694 template <typename COL_TYPE, typename LOGICAL_TYPE>
695 inline LOGICAL_TYPE get_value_in_window_frame(const int64_t target_row_idx_in_frame,
696  const int64_t frame_start_offset,
697  const int64_t frame_end_offset,
698  const COL_TYPE* col_buf,
699  const int32_t* partition_rowid_buf,
700  const int64_t* ordered_index_buf,
701  const LOGICAL_TYPE logical_null_val,
702  const LOGICAL_TYPE col_null_val) {
703  if (target_row_idx_in_frame < frame_start_offset ||
704  target_row_idx_in_frame > frame_end_offset) {
705  return logical_null_val;
706  }
707  const auto target_offset =
708  partition_rowid_buf[ordered_index_buf[target_row_idx_in_frame]];
709  LOGICAL_TYPE target_val = col_buf[target_offset];
710  if (target_val == col_null_val) {
711  return logical_null_val;
712  }
713  return target_val;
714 }
715 
716 #define DEF_GET_VALUE_IN_FRAME(col_type, logical_type) \
717  extern "C" RUNTIME_EXPORT ALWAYS_INLINE logical_type \
718  get_##col_type##_value_##logical_type##_type_in_frame( \
719  const int64_t target_row_idx_in_frame, \
720  const int64_t frame_start_offset, \
721  const int64_t frame_end_offset, \
722  const col_type* col_buf, \
723  const int32_t* partition_rowid_buf, \
724  const int64_t* ordered_index_buf, \
725  const logical_type logical_null_val, \
726  const logical_type col_null_val) { \
727  return get_value_in_window_frame<col_type, logical_type>(target_row_idx_in_frame, \
728  frame_start_offset, \
729  frame_end_offset, \
730  col_buf, \
731  partition_rowid_buf, \
732  ordered_index_buf, \
733  logical_null_val, \
734  col_null_val); \
735  }
736 DEF_GET_VALUE_IN_FRAME(int8_t, int8_t)
737 DEF_GET_VALUE_IN_FRAME(int8_t, int16_t)
738 DEF_GET_VALUE_IN_FRAME(int8_t, int32_t)
739 DEF_GET_VALUE_IN_FRAME(int8_t, int64_t)
740 DEF_GET_VALUE_IN_FRAME(int16_t, int16_t)
741 DEF_GET_VALUE_IN_FRAME(int16_t, int32_t)
742 DEF_GET_VALUE_IN_FRAME(int16_t, int64_t)
743 DEF_GET_VALUE_IN_FRAME(int32_t, int32_t)
744 DEF_GET_VALUE_IN_FRAME(int32_t, int64_t)
745 DEF_GET_VALUE_IN_FRAME(int64_t, int64_t)
746 DEF_GET_VALUE_IN_FRAME(float, float)
747 DEF_GET_VALUE_IN_FRAME(double, double)
748 #undef DEF_GET_VALUE_IN_FRAME
749 
750 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int64_t encode_date(int64_t decoded_val,
751  int64_t null_val,
752  int64_t multiplier) {
753  return decoded_val == null_val ? decoded_val : decoded_val * multiplier;
754 }
755 
756 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int64_t
757 compute_row_mode_start_index_sub(int64_t candidate_index,
758  int64_t current_partition_start_offset,
759  int64_t frame_bound) {
760  int64_t index = candidate_index - current_partition_start_offset - frame_bound;
761  return index < 0 ? 0 : index;
762 }
763 
764 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int64_t
765 compute_row_mode_start_index_add(int64_t candidate_index,
766  int64_t current_partition_start_offset,
767  int64_t frame_bound,
768  int64_t num_current_partition_elem) {
769  int64_t index = candidate_index - current_partition_start_offset + frame_bound;
770  return index >= num_current_partition_elem ? num_current_partition_elem : index;
771 }
772 
773 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int64_t
774 compute_row_mode_end_index_sub(int64_t candidate_index,
775  int64_t current_partition_start_offset,
776  int64_t frame_bound) {
777  int64_t index = candidate_index - current_partition_start_offset - frame_bound;
778  return index < 0 ? 0 : index + 1;
779 }
780 
781 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int64_t
782 compute_row_mode_end_index_add(int64_t candidate_index,
783  int64_t current_partition_start_offset,
784  int64_t frame_bound,
785  int64_t num_current_partition_elem) {
786  int64_t index = candidate_index - current_partition_start_offset + frame_bound;
787  return index >= num_current_partition_elem ? num_current_partition_elem : index + 1;
788 }
789 
791  int64_t** aggregation_trees,
792  size_t partition_idx) {
793  return aggregation_trees[partition_idx];
794 }
795 
797  int64_t** aggregation_trees,
798  size_t partition_idx) {
799  double** casted_aggregation_trees = reinterpret_cast<double**>(aggregation_trees);
800  return casted_aggregation_trees[partition_idx];
801 }
802 
804 get_integer_derived_aggregation_tree(int64_t** aggregation_trees, size_t partition_idx) {
805  SumAndCountPair<int64_t>** casted_aggregation_trees =
806  reinterpret_cast<SumAndCountPair<int64_t>**>(aggregation_trees);
807  return casted_aggregation_trees[partition_idx];
808 }
809 
811 get_double_derived_aggregation_tree(int64_t** aggregation_trees, size_t partition_idx) {
812  SumAndCountPair<double>** casted_aggregation_trees =
813  reinterpret_cast<SumAndCountPair<double>**>(aggregation_trees);
814  return casted_aggregation_trees[partition_idx];
815 }
816 
817 extern "C" RUNTIME_EXPORT ALWAYS_INLINE size_t
818 getStartOffsetForSegmentTreeTraversal(size_t level, size_t tree_fanout) {
819  size_t offset = 0;
820  for (size_t i = 0; i < level; i++) {
821  offset += pow(tree_fanout, i);
822  }
823  return offset;
824 }
825 namespace {
826 enum class AggFuncType { MIN, MAX, SUM };
827 
828 template <AggFuncType AGG_FUNC_TYPE, typename AGG_TYPE>
829 inline AGG_TYPE agg_func(AGG_TYPE const lhs, AGG_TYPE const rhs) {
830  if constexpr (AGG_FUNC_TYPE == AggFuncType::MIN) {
831  return std::min(lhs, rhs);
832  } else if constexpr (AGG_FUNC_TYPE == AggFuncType::MAX) {
833  return std::max(lhs, rhs);
834  } else {
835  return lhs + rhs;
836  }
837 }
838 } // namespace
839 
840 template <AggFuncType AGG_FUNC_TYPE, typename AGG_TYPE>
842  AGG_TYPE* aggregation_tree_for_partition,
843  size_t query_range_start_idx,
844  size_t query_range_end_idx,
845  size_t leaf_level,
846  size_t tree_fanout,
847  AGG_TYPE init_val,
848  AGG_TYPE invalid_val,
849  AGG_TYPE null_val) {
850  size_t leaf_start_idx = getStartOffsetForSegmentTreeTraversal(leaf_level, tree_fanout);
851  size_t begin = leaf_start_idx + query_range_start_idx;
852  size_t end = leaf_start_idx + query_range_end_idx;
853  AGG_TYPE res = init_val;
854  bool all_nulls = true;
855  for (int level = leaf_level; level >= 0; level--) {
856  size_t parentBegin = begin / tree_fanout;
857  size_t parentEnd = (end - 1) / tree_fanout;
858  if (parentBegin == parentEnd) {
859  for (size_t pos = begin; pos < end; pos++) {
860  if (aggregation_tree_for_partition[pos] != null_val) {
861  all_nulls = false;
862  res = agg_func<AGG_FUNC_TYPE>(res, aggregation_tree_for_partition[pos]);
863  }
864  }
865  return all_nulls ? null_val : res;
866  } else if (parentBegin > parentEnd) {
867  return null_val;
868  }
869  size_t group_begin = (parentBegin * tree_fanout) + 1;
870  if (begin != group_begin) {
871  size_t limit = (parentBegin * tree_fanout) + tree_fanout + 1;
872  for (size_t pos = begin; pos < limit; pos++) {
873  if (aggregation_tree_for_partition[pos] != null_val) {
874  all_nulls = false;
875  res = agg_func<AGG_FUNC_TYPE>(res, aggregation_tree_for_partition[pos]);
876  }
877  }
878  parentBegin++;
879  }
880  size_t group_end = (parentEnd * tree_fanout) + 1;
881  if (end != group_end) {
882  for (size_t pos = group_end; pos < end; pos++) {
883  if (aggregation_tree_for_partition[pos] != null_val) {
884  all_nulls = false;
885  res = agg_func<AGG_FUNC_TYPE>(res, aggregation_tree_for_partition[pos]);
886  }
887  }
888  }
889  begin = parentBegin;
890  end = parentEnd;
891  }
892  return invalid_val;
893 }
894 
895 #define DEF_SEARCH_AGGREGATION_TREE(agg_value_type) \
896  extern "C" RUNTIME_EXPORT ALWAYS_INLINE agg_value_type \
897  search_##agg_value_type##_aggregation_tree( \
898  agg_value_type* aggregated_tree_for_partition, \
899  size_t query_range_start_idx, \
900  size_t query_range_end_idx, \
901  size_t leaf_level, \
902  size_t tree_fanout, \
903  bool decimal_type, \
904  size_t scale, \
905  agg_value_type invalid_val, \
906  agg_value_type null_val, \
907  int32_t agg_type) { \
908  if (!aggregated_tree_for_partition || query_range_start_idx > query_range_end_idx) { \
909  return null_val; \
910  } \
911  switch (agg_type) { \
912  case 1: { \
913  return compute_window_func_via_aggregation_tree<AggFuncType::MIN>( \
914  aggregated_tree_for_partition, \
915  query_range_start_idx, \
916  query_range_end_idx, \
917  leaf_level, \
918  tree_fanout, \
919  std::numeric_limits<agg_value_type>::max(), \
920  invalid_val, \
921  null_val); \
922  } \
923  case 2: { \
924  return compute_window_func_via_aggregation_tree<AggFuncType::MAX>( \
925  aggregated_tree_for_partition, \
926  query_range_start_idx, \
927  query_range_end_idx, \
928  leaf_level, \
929  tree_fanout, \
930  std::numeric_limits<agg_value_type>::lowest(), \
931  invalid_val, \
932  null_val); \
933  } \
934  default: { \
935  return compute_window_func_via_aggregation_tree<AggFuncType::SUM>( \
936  aggregated_tree_for_partition, \
937  query_range_start_idx, \
938  query_range_end_idx, \
939  leaf_level, \
940  tree_fanout, \
941  static_cast<agg_value_type>(0), \
942  invalid_val, \
943  null_val); \
944  } \
945  } \
946  }
947 
950 #undef DEF_SEARCH_AGGREGATION_TREE
951 
952 template <typename AGG_VALUE_TYPE>
954  SumAndCountPair<AGG_VALUE_TYPE>* aggregation_tree_for_partition,
956  size_t query_range_start_idx,
957  size_t query_range_end_idx,
958  size_t leaf_level,
959  size_t tree_fanout,
960  AGG_VALUE_TYPE invalid_val,
961  AGG_VALUE_TYPE null_val) {
962  size_t leaf_start_idx = getStartOffsetForSegmentTreeTraversal(leaf_level, tree_fanout);
963  size_t begin = leaf_start_idx + query_range_start_idx;
964  size_t end = leaf_start_idx + query_range_end_idx;
965  SumAndCountPair<AGG_VALUE_TYPE> null_res{null_val, 0};
966  SumAndCountPair<AGG_VALUE_TYPE> invalid_res{invalid_val, 0};
967  bool all_nulls = true;
968  for (int level = leaf_level; level >= 0; level--) {
969  size_t parentBegin = begin / tree_fanout;
970  size_t parentEnd = (end - 1) / tree_fanout;
971  if (parentBegin == parentEnd) {
972  for (size_t pos = begin; pos < end; pos++) {
973  if (aggregation_tree_for_partition[pos].sum != null_val) {
974  all_nulls = false;
975  res.sum += aggregation_tree_for_partition[pos].sum;
976  res.count += aggregation_tree_for_partition[pos].count;
977  }
978  }
979  if (all_nulls) {
980  res = null_res;
981  }
982  return;
983  } else if (parentBegin > parentEnd) {
984  res = null_res;
985  return;
986  }
987  size_t group_begin = (parentBegin * tree_fanout) + 1;
988  if (begin != group_begin) {
989  size_t limit = (parentBegin * tree_fanout) + tree_fanout + 1;
990  for (size_t pos = begin; pos < limit; pos++) {
991  if (aggregation_tree_for_partition[pos].sum != null_val) {
992  all_nulls = false;
993  res.sum += aggregation_tree_for_partition[pos].sum;
994  res.count += aggregation_tree_for_partition[pos].count;
995  }
996  }
997  parentBegin++;
998  }
999  size_t group_end = (parentEnd * tree_fanout) + 1;
1000  if (end != group_end) {
1001  for (size_t pos = group_end; pos < end; pos++) {
1002  if (aggregation_tree_for_partition[pos].sum != null_val) {
1003  all_nulls = false;
1004  res.sum += aggregation_tree_for_partition[pos].sum;
1005  res.count += aggregation_tree_for_partition[pos].count;
1006  }
1007  }
1008  }
1009  begin = parentBegin;
1010  end = parentEnd;
1011  }
1012  res = invalid_res;
1013  return;
1014 }
1015 
1016 #define DEF_SEARCH_DERIVED_AGGREGATION_TREE(agg_value_type) \
1017  extern "C" RUNTIME_EXPORT ALWAYS_INLINE double \
1018  search_##agg_value_type##_derived_aggregation_tree( \
1019  SumAndCountPair<agg_value_type>* aggregated_tree_for_partition, \
1020  size_t query_range_start_idx, \
1021  size_t query_range_end_idx, \
1022  size_t leaf_level, \
1023  size_t tree_fanout, \
1024  bool decimal_type, \
1025  size_t scale, \
1026  agg_value_type invalid_val, \
1027  agg_value_type null_val, \
1028  int32_t agg_type) { \
1029  if (!aggregated_tree_for_partition || query_range_start_idx > query_range_end_idx) { \
1030  return null_val; \
1031  } \
1032  SumAndCountPair<agg_value_type> res{0, 0}; \
1033  compute_derived_aggregates<agg_value_type>(aggregated_tree_for_partition, \
1034  res, \
1035  query_range_start_idx, \
1036  query_range_end_idx, \
1037  leaf_level, \
1038  tree_fanout, \
1039  invalid_val, \
1040  null_val); \
1041  if (res.sum == null_val) { \
1042  return null_val; \
1043  } else if (res.count > 0) { \
1044  if (decimal_type) { \
1045  return (static_cast<double>(res.sum) / pow(10, scale)) / res.count; \
1046  } \
1047  return (static_cast<double>(res.sum)) / res.count; \
1048  } else { \
1049  return invalid_val; \
1050  } \
1051  }
1052 
1055 #undef DEF_SEARCH_DERIVED_AGGREGATION_TREE
1056 
1057 #define DEF_HANDLE_NULL_FOR_WINDOW_FRAMING_AGG(agg_type, null_type) \
1058  extern "C" RUNTIME_EXPORT ALWAYS_INLINE agg_type \
1059  handle_null_val_##agg_type##_##null_type##_window_framing_agg( \
1060  agg_type res, null_type agg_null_val, agg_type input_col_null_val) { \
1061  if (res == agg_null_val) { \
1062  return input_col_null_val; \
1063  } \
1064  return res; \
1065  }
1069 #undef DEF_HANDLE_NULL_FOR_WINDOW_FRAMING_AGG
1070 
1071 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int64_t agg_sum(int64_t* agg, const int64_t val) {
1072  const auto old = *agg;
1073  *agg += val;
1074  return old;
1075 }
1076 
1077 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int64_t agg_sum_if(int64_t* agg,
1078  const int64_t val,
1079  const int8_t cond) {
1080  return cond ? agg_sum(agg, val) : *agg;
1081 }
1082 
1083 extern "C" RUNTIME_EXPORT ALWAYS_INLINE void agg_max(int64_t* agg, const int64_t val) {
1084  *agg = std::max(*agg, val);
1085 }
1086 
1087 extern "C" RUNTIME_EXPORT ALWAYS_INLINE void agg_min(int64_t* agg, const int64_t val) {
1088  *agg = std::min(*agg, val);
1089 }
1090 
1091 extern "C" RUNTIME_EXPORT ALWAYS_INLINE void agg_id(int64_t* agg, const int64_t val) {
1092  *agg = val;
1093 }
1094 
1095 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int8_t* agg_id_varlen(int8_t* varlen_buffer,
1096  const int64_t offset,
1097  const int8_t* value,
1098  const int64_t size_bytes) {
1099  for (auto i = 0; i < size_bytes; i++) {
1100  varlen_buffer[offset + i] = value[i];
1101  }
1102  return &varlen_buffer[offset];
1103 }
1104 
1105 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int32_t
1106 checked_single_agg_id(int64_t* agg, const int64_t val, const int64_t null_val) {
1107  if (val == null_val) {
1108  return 0;
1109  }
1110 
1111  if (*agg == val) {
1112  return 0;
1113  } else if (*agg == null_val) {
1114  *agg = val;
1115  return 0;
1116  } else {
1117  // see Execute::ERR_SINGLE_VALUE_FOUND_MULTIPLE_VALUES
1118  return 15;
1119  }
1120 }
1121 
1123  int64_t* agg,
1124  const int64_t val,
1125  const int64_t min_val,
1126  const int64_t skip_val) {
1127  if (val != skip_val) {
1128  agg_count_distinct_bitmap(agg, val, min_val);
1129  }
1130 }
1131 
1133  const int64_t,
1134  const int64_t,
1135  const int64_t,
1136  const int64_t,
1137  const int64_t,
1138  const uint64_t,
1139  const uint64_t) {}
1140 
1141 extern "C" RUNTIME_EXPORT ALWAYS_INLINE uint32_t agg_count_int32(uint32_t* agg,
1142  const int32_t) {
1143  return (*agg)++;
1144 }
1145 
1146 extern "C" RUNTIME_EXPORT ALWAYS_INLINE uint32_t agg_count_if_int32(uint32_t* agg,
1147  const int32_t cond) {
1148  return cond ? (*agg)++ : *agg;
1149 }
1150 
1151 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int32_t agg_sum_int32(int32_t* agg,
1152  const int32_t val) {
1153  const auto old = *agg;
1154  *agg += val;
1155  return old;
1156 }
1157 
1158 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int32_t agg_sum_if_int32(int32_t* agg,
1159  const int32_t val,
1160  const int8_t cond) {
1161  return cond ? agg_sum_int32(agg, val) : *agg;
1162 }
1163 
1164 #define DEF_AGG_MAX_INT(n) \
1165  extern "C" RUNTIME_EXPORT ALWAYS_INLINE void agg_max_int##n(int##n##_t* agg, \
1166  const int##n##_t val) { \
1167  *agg = std::max(*agg, val); \
1168  }
1169 
1170 DEF_AGG_MAX_INT(32)
1171 DEF_AGG_MAX_INT(16)
1172 DEF_AGG_MAX_INT(8)
1173 #undef DEF_AGG_MAX_INT
1174 
1175 #define DEF_AGG_MIN_INT(n) \
1176  extern "C" RUNTIME_EXPORT ALWAYS_INLINE void agg_min_int##n(int##n##_t* agg, \
1177  const int##n##_t val) { \
1178  *agg = std::min(*agg, val); \
1179  }
1180 
1181 DEF_AGG_MIN_INT(32)
1182 DEF_AGG_MIN_INT(16)
1183 DEF_AGG_MIN_INT(8)
1184 #undef DEF_AGG_MIN_INT
1185 
1186 #define DEF_AGG_ID_INT(n) \
1187  extern "C" RUNTIME_EXPORT ALWAYS_INLINE void agg_id_int##n(int##n##_t* agg, \
1188  const int##n##_t val) { \
1189  *agg = val; \
1190  }
1191 
1192 #define DEF_CHECKED_SINGLE_AGG_ID_INT(n) \
1193  extern "C" RUNTIME_EXPORT ALWAYS_INLINE int32_t checked_single_agg_id_int##n( \
1194  int##n##_t* agg, const int##n##_t val, const int##n##_t null_val) { \
1195  if (val == null_val) { \
1196  return 0; \
1197  } \
1198  if (*agg == val) { \
1199  return 0; \
1200  } else if (*agg == null_val) { \
1201  *agg = val; \
1202  return 0; \
1203  } else { \
1204  /* see Execute::ERR_SINGLE_VALUE_FOUND_MULTIPLE_VALUES*/ \
1205  return 15; \
1206  } \
1207  }
1208 
1209 DEF_AGG_ID_INT(32)
1210 DEF_AGG_ID_INT(16)
1211 DEF_AGG_ID_INT(8)
1212 
1216 
1217 #undef DEF_AGG_ID_INT
1218 #undef DEF_CHECKED_SINGLE_AGG_ID_INT
1219 
1220 #define DEF_WRITE_PROJECTION_INT(n) \
1221  extern "C" RUNTIME_EXPORT ALWAYS_INLINE void write_projection_int##n( \
1222  int8_t* slot_ptr, const int##n##_t val, const int64_t init_val) { \
1223  if (val != init_val) { \
1224  *reinterpret_cast<int##n##_t*>(slot_ptr) = val; \
1225  } \
1226  }
1227 
1230 #undef DEF_WRITE_PROJECTION_INT
1231 
1232 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int64_t agg_sum_skip_val(int64_t* agg,
1233  const int64_t val,
1234  const int64_t skip_val) {
1235  const auto old = *agg;
1236  if (val != skip_val) {
1237  if (old != skip_val) {
1238  return agg_sum(agg, val);
1239  } else {
1240  *agg = val;
1241  }
1242  }
1243  return old;
1244 }
1245 
1246 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int32_t
1247 agg_sum_int32_skip_val(int32_t* agg, const int32_t val, const int32_t skip_val) {
1248  const auto old = *agg;
1249  if (val != skip_val) {
1250  if (old != skip_val) {
1251  return agg_sum_int32(agg, val);
1252  } else {
1253  *agg = val;
1254  }
1255  }
1256  return old;
1257 }
1258 
1259 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int64_t
1260 agg_sum_if_skip_val(int64_t* agg,
1261  const int64_t val,
1262  const int64_t skip_val,
1263  const int8_t cond) {
1264  return cond ? agg_sum_skip_val(agg, val, skip_val) : *agg;
1265 }
1266 
1267 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int32_t
1269  const int32_t val,
1270  const int32_t skip_val,
1271  const int8_t cond) {
1272  return cond ? agg_sum_int32_skip_val(agg, val, skip_val) : *agg;
1273 }
1274 
1275 extern "C" RUNTIME_EXPORT ALWAYS_INLINE uint64_t agg_count_if(uint64_t* agg,
1276  const int64_t cond) {
1277  return cond ? (*agg)++ : *agg;
1278 }
1279 
1280 extern "C" RUNTIME_EXPORT ALWAYS_INLINE uint64_t
1281 agg_count_skip_val(uint64_t* agg, const int64_t val, const int64_t skip_val) {
1282  if (val != skip_val) {
1283  return agg_count(agg, val);
1284  }
1285  return *agg;
1286 }
1287 
1288 extern "C" RUNTIME_EXPORT ALWAYS_INLINE uint64_t
1289 agg_count_if_skip_val(uint64_t* agg, const int64_t cond, const int64_t skip_val) {
1290  if (cond != skip_val) {
1291  return agg_count_if(agg, cond);
1292  }
1293  return *agg;
1294 }
1295 
1296 extern "C" RUNTIME_EXPORT ALWAYS_INLINE uint32_t
1297 agg_count_int32_skip_val(uint32_t* agg, const int32_t val, const int32_t skip_val) {
1298  if (val != skip_val) {
1299  return agg_count_int32(agg, val);
1300  }
1301  return *agg;
1302 }
1303 
1304 extern "C" RUNTIME_EXPORT ALWAYS_INLINE uint32_t
1305 agg_count_if_int32_skip_val(uint32_t* agg, const int32_t cond, const int32_t skip_val) {
1306  if (cond != skip_val) {
1307  return agg_count_if_int32(agg, cond);
1308  }
1309  return *agg;
1310 }
1311 
1312 #define DEF_SKIP_AGG_ADD(base_agg_func) \
1313  extern "C" RUNTIME_EXPORT ALWAYS_INLINE void base_agg_func##_skip_val( \
1314  DATA_T* agg, const DATA_T val, const DATA_T skip_val) { \
1315  if (val != skip_val) { \
1316  base_agg_func(agg, val); \
1317  } \
1318  }
1319 
1320 #define DEF_SKIP_AGG(base_agg_func) \
1321  extern "C" RUNTIME_EXPORT ALWAYS_INLINE void base_agg_func##_skip_val( \
1322  DATA_T* agg, const DATA_T val, const DATA_T skip_val) { \
1323  if (val != skip_val) { \
1324  const DATA_T old_agg = *agg; \
1325  if (old_agg != skip_val) { \
1326  base_agg_func(agg, val); \
1327  } else { \
1328  *agg = val; \
1329  } \
1330  } \
1331  }
1332 
1333 #define DATA_T int64_t
1336 #undef DATA_T
1337 
1338 #define DATA_T int32_t
1341 #undef DATA_T
1342 
1343 #define DATA_T int16_t
1346 #undef DATA_T
1347 
1348 #define DATA_T int8_t
1351 #undef DATA_T
1352 
1353 #undef DEF_SKIP_AGG_ADD
1354 #undef DEF_SKIP_AGG
1355 
1356 // TODO(alex): fix signature
1357 
1358 extern "C" RUNTIME_EXPORT ALWAYS_INLINE uint64_t agg_count_double(uint64_t* agg,
1359  const double val) {
1360  return (*agg)++;
1361 }
1362 
1363 extern "C" RUNTIME_EXPORT ALWAYS_INLINE void agg_sum_double(int64_t* agg,
1364  const double val) {
1365  const auto r = *reinterpret_cast<const double*>(agg) + val;
1366  *agg = *reinterpret_cast<const int64_t*>(may_alias_ptr(&r));
1367 }
1368 
1369 extern "C" RUNTIME_EXPORT ALWAYS_INLINE void agg_sum_if_double(int64_t* agg,
1370  const double val,
1371  const int8_t cond) {
1372  if (cond) {
1373  agg_sum_double(agg, val);
1374  }
1375 }
1376 
1377 extern "C" RUNTIME_EXPORT ALWAYS_INLINE void agg_max_double(int64_t* agg,
1378  const double val) {
1379  const auto r = std::max(*reinterpret_cast<const double*>(agg), val);
1380  *agg = *(reinterpret_cast<const int64_t*>(may_alias_ptr(&r)));
1381 }
1382 
1383 extern "C" RUNTIME_EXPORT ALWAYS_INLINE void agg_min_double(int64_t* agg,
1384  const double val) {
1385  const auto r = std::min(*reinterpret_cast<const double*>(agg), val);
1386  *agg = *(reinterpret_cast<const int64_t*>(may_alias_ptr(&r)));
1387 }
1388 
1389 extern "C" RUNTIME_EXPORT ALWAYS_INLINE void agg_id_double(int64_t* agg,
1390  const double val) {
1391  *agg = *(reinterpret_cast<const int64_t*>(may_alias_ptr(&val)));
1392 }
1393 
1394 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int32_t
1395 checked_single_agg_id_double(int64_t* agg, const double val, const double null_val) {
1396  if (val == null_val) {
1397  return 0;
1398  }
1399 
1400  if (*agg == *(reinterpret_cast<const int64_t*>(may_alias_ptr(&val)))) {
1401  return 0;
1402  } else if (*agg == *(reinterpret_cast<const int64_t*>(may_alias_ptr(&null_val)))) {
1403  *agg = *(reinterpret_cast<const int64_t*>(may_alias_ptr(&val)));
1404  return 0;
1405  } else {
1406  // see Execute::ERR_SINGLE_VALUE_FOUND_MULTIPLE_VALUES
1407  return 15;
1408  }
1409 }
1410 
1411 extern "C" RUNTIME_EXPORT ALWAYS_INLINE uint32_t agg_count_float(uint32_t* agg,
1412  const float val) {
1413  return (*agg)++;
1414 }
1415 
1416 extern "C" RUNTIME_EXPORT ALWAYS_INLINE void agg_sum_float(int32_t* agg,
1417  const float val) {
1418  const auto r = *reinterpret_cast<const float*>(agg) + val;
1419  *agg = *reinterpret_cast<const int32_t*>(may_alias_ptr(&r));
1420 }
1421 
1422 extern "C" RUNTIME_EXPORT ALWAYS_INLINE void agg_if_sum_float(int32_t* agg,
1423  const float val,
1424  const int8_t cond) {
1425  if (cond) {
1426  agg_sum_float(agg, val);
1427  }
1428 }
1429 
1430 extern "C" RUNTIME_EXPORT ALWAYS_INLINE void agg_max_float(int32_t* agg,
1431  const float val) {
1432  const auto r = std::max(*reinterpret_cast<const float*>(agg), val);
1433  *agg = *(reinterpret_cast<const int32_t*>(may_alias_ptr(&r)));
1434 }
1435 
1436 extern "C" RUNTIME_EXPORT ALWAYS_INLINE void agg_min_float(int32_t* agg,
1437  const float val) {
1438  const auto r = std::min(*reinterpret_cast<const float*>(agg), val);
1439  *agg = *(reinterpret_cast<const int32_t*>(may_alias_ptr(&r)));
1440 }
1441 
1442 extern "C" RUNTIME_EXPORT ALWAYS_INLINE void agg_id_float(int32_t* agg, const float val) {
1443  *agg = *(reinterpret_cast<const int32_t*>(may_alias_ptr(&val)));
1444 }
1445 
1446 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int32_t
1447 checked_single_agg_id_float(int32_t* agg, const float val, const float null_val) {
1448  if (val == null_val) {
1449  return 0;
1450  }
1451 
1452  if (*agg == *(reinterpret_cast<const int32_t*>(may_alias_ptr(&val)))) {
1453  return 0;
1454  } else if (*agg == *(reinterpret_cast<const int32_t*>(may_alias_ptr(&null_val)))) {
1455  *agg = *(reinterpret_cast<const int32_t*>(may_alias_ptr(&val)));
1456  return 0;
1457  } else {
1458  // see Execute::ERR_SINGLE_VALUE_FOUND_MULTIPLE_VALUES
1459  return 15;
1460  }
1461 }
1462 
1463 extern "C" RUNTIME_EXPORT ALWAYS_INLINE uint64_t
1464 agg_count_double_skip_val(uint64_t* agg, const double val, const double skip_val) {
1465  if (val != skip_val) {
1466  return agg_count_double(agg, val);
1467  }
1468  return *agg;
1469 }
1470 
1471 extern "C" RUNTIME_EXPORT ALWAYS_INLINE uint32_t
1472 agg_count_float_skip_val(uint32_t* agg, const float val, const float skip_val) {
1473  if (val != skip_val) {
1474  return agg_count_float(agg, val);
1475  }
1476  return *agg;
1477 }
1478 
1479 #define DEF_SKIP_AGG(base_agg_func) \
1480  extern "C" RUNTIME_EXPORT ALWAYS_INLINE void base_agg_func##_skip_val( \
1481  ADDR_T* agg, const DATA_T val, const DATA_T skip_val) { \
1482  if (val != skip_val) { \
1483  const ADDR_T old_agg = *agg; \
1484  if (old_agg != *reinterpret_cast<const ADDR_T*>(may_alias_ptr(&skip_val))) { \
1485  base_agg_func(agg, val); \
1486  } else { \
1487  *agg = *reinterpret_cast<const ADDR_T*>(may_alias_ptr(&val)); \
1488  } \
1489  } \
1490  }
1491 
1492 #define DEF_SKIP_IF_AGG(skip_agg_func, base_agg_func) \
1493  extern "C" RUNTIME_EXPORT ALWAYS_INLINE void skip_agg_func##_skip_val( \
1494  ADDR_T* agg, const DATA_T val, const DATA_T skip_val, const int8_t cond) { \
1495  if (cond) { \
1496  base_agg_func##_skip_val(agg, val, skip_val); \
1497  } \
1498  }
1499 
1500 #define DATA_T double
1501 #define ADDR_T int64_t
1506 #undef ADDR_T
1507 #undef DATA_T
1508 
1509 #define DATA_T float
1510 #define ADDR_T int32_t
1515 #undef ADDR_T
1516 #undef DATA_T
1517 
1518 #undef DEF_SKIP_AGG
1519 #undef DEF_SKIP_IF_AGG
1520 
1521 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int64_t decimal_floor(const int64_t x,
1522  const int64_t scale) {
1523  if (x >= 0) {
1524  return x / scale * scale;
1525  }
1526  if (!(x % scale)) {
1527  return x;
1528  }
1529  return x / scale * scale - scale;
1530 }
1531 
1532 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int64_t decimal_ceil(const int64_t x,
1533  const int64_t scale) {
1534  return decimal_floor(x, scale) + (x % scale ? scale : 0);
1535 }
1536 
1537 // Shared memory aggregators. Should never be called,
1538 // real implementations are in cuda_mapd_rt.cu.
1539 #define DEF_SHARED_AGG_RET_STUBS(base_agg_func) \
1540  extern "C" GPU_RT_STUB uint64_t base_agg_func##_shared(uint64_t* agg, \
1541  const int64_t val) { \
1542  return 0; \
1543  } \
1544  \
1545  extern "C" GPU_RT_STUB uint64_t base_agg_func##_skip_val_shared( \
1546  uint64_t* agg, const int64_t val, const int64_t skip_val) { \
1547  return 0; \
1548  } \
1549  extern "C" GPU_RT_STUB uint32_t base_agg_func##_int32_shared(uint32_t* agg, \
1550  const int32_t val) { \
1551  return 0; \
1552  } \
1553  \
1554  extern "C" GPU_RT_STUB uint32_t base_agg_func##_int32_skip_val_shared( \
1555  uint32_t* agg, const int32_t val, const int32_t skip_val) { \
1556  return 0; \
1557  } \
1558  \
1559  extern "C" GPU_RT_STUB uint64_t base_agg_func##_double_shared(uint64_t* agg, \
1560  const double val) { \
1561  return 0; \
1562  } \
1563  \
1564  extern "C" GPU_RT_STUB uint64_t base_agg_func##_double_skip_val_shared( \
1565  uint64_t* agg, const double val, const double skip_val) { \
1566  return 0; \
1567  } \
1568  extern "C" GPU_RT_STUB uint32_t base_agg_func##_float_shared(uint32_t* agg, \
1569  const float val) { \
1570  return 0; \
1571  } \
1572  \
1573  extern "C" GPU_RT_STUB uint32_t base_agg_func##_float_skip_val_shared( \
1574  uint32_t* agg, const float val, const float skip_val) { \
1575  return 0; \
1576  }
1577 
1578 #define DEF_SHARED_AGG_STUBS(base_agg_func) \
1579  extern "C" GPU_RT_STUB void base_agg_func##_shared(int64_t* agg, const int64_t val) {} \
1580  \
1581  extern "C" GPU_RT_STUB void base_agg_func##_skip_val_shared( \
1582  int64_t* agg, const int64_t val, const int64_t skip_val) {} \
1583  extern "C" GPU_RT_STUB void base_agg_func##_int32_shared(int32_t* agg, \
1584  const int32_t val) {} \
1585  extern "C" GPU_RT_STUB void base_agg_func##_int16_shared(int16_t* agg, \
1586  const int16_t val) {} \
1587  extern "C" GPU_RT_STUB void base_agg_func##_int8_shared(int8_t* agg, \
1588  const int8_t val) {} \
1589  \
1590  extern "C" GPU_RT_STUB void base_agg_func##_int32_skip_val_shared( \
1591  int32_t* agg, const int32_t val, const int32_t skip_val) {} \
1592  \
1593  extern "C" GPU_RT_STUB void base_agg_func##_double_shared(int64_t* agg, \
1594  const double val) {} \
1595  \
1596  extern "C" GPU_RT_STUB void base_agg_func##_double_skip_val_shared( \
1597  int64_t* agg, const double val, const double skip_val) {} \
1598  extern "C" GPU_RT_STUB void base_agg_func##_float_shared(int32_t* agg, \
1599  const float val) {} \
1600  \
1601  extern "C" GPU_RT_STUB void base_agg_func##_float_skip_val_shared( \
1602  int32_t* agg, const float val, const float skip_val) {}
1603 
1609 
1610 extern "C" GPU_RT_STUB int8_t* agg_id_varlen_shared(int8_t* varlen_buffer,
1611  const int64_t offset,
1612  const int8_t* value,
1613  const int64_t size_bytes) {
1614  return nullptr;
1615 }
1616 
1617 extern "C" GPU_RT_STUB int32_t checked_single_agg_id_shared(int64_t* agg,
1618  const int64_t val,
1619  const int64_t null_val) {
1620  return 0;
1621 }
1622 
1623 extern "C" GPU_RT_STUB int32_t
1625  const int32_t val,
1626  const int32_t null_val) {
1627  return 0;
1628 }
1629 extern "C" GPU_RT_STUB int32_t
1631  const int16_t val,
1632  const int16_t null_val) {
1633  return 0;
1634 }
1635 extern "C" GPU_RT_STUB int32_t checked_single_agg_id_int8_shared(int8_t* agg,
1636  const int8_t val,
1637  const int8_t null_val) {
1638  return 0;
1639 }
1640 
1641 extern "C" GPU_RT_STUB int32_t
1643  const double val,
1644  const double null_val) {
1645  return 0;
1646 }
1647 
1648 extern "C" GPU_RT_STUB int32_t checked_single_agg_id_float_shared(int32_t* agg,
1649  const float val,
1650  const float null_val) {
1651  return 0;
1652 }
1653 
1654 extern "C" GPU_RT_STUB void agg_max_int16_skip_val_shared(int16_t* agg,
1655  const int16_t val,
1656  const int16_t skip_val) {}
1657 
1658 extern "C" GPU_RT_STUB void agg_max_int8_skip_val_shared(int8_t* agg,
1659  const int8_t val,
1660  const int8_t skip_val) {}
1661 
1662 extern "C" GPU_RT_STUB void agg_min_int16_skip_val_shared(int16_t* agg,
1663  const int16_t val,
1664  const int16_t skip_val) {}
1665 
1666 extern "C" GPU_RT_STUB void agg_min_int8_skip_val_shared(int8_t* agg,
1667  const int8_t val,
1668  const int8_t skip_val) {}
1669 
1670 extern "C" GPU_RT_STUB void agg_id_double_shared_slow(int64_t* agg, const double* val) {}
1671 
1672 extern "C" GPU_RT_STUB int64_t agg_sum_shared(int64_t* agg, const int64_t val) {
1673  return 0;
1674 }
1675 
1676 extern "C" GPU_RT_STUB int64_t agg_sum_if_shared(int64_t* agg,
1677  const int64_t val,
1678  const int8_t cond) {
1679  return 0;
1680 }
1681 
1682 extern "C" GPU_RT_STUB int64_t agg_sum_skip_val_shared(int64_t* agg,
1683  const int64_t val,
1684  const int64_t skip_val) {
1685  return 0;
1686 }
1687 
1688 extern "C" GPU_RT_STUB int64_t agg_sum_if_skip_val_shared(int64_t* agg,
1689  const int64_t val,
1690  const int64_t skip_val,
1691  const int8_t cond) {
1692  return 0;
1693 }
1694 extern "C" GPU_RT_STUB int32_t agg_sum_int32_shared(int32_t* agg, const int32_t val) {
1695  return 0;
1696 }
1697 
1698 extern "C" GPU_RT_STUB int32_t agg_sum_int32_skip_val_shared(int32_t* agg,
1699  const int32_t val,
1700  const int32_t skip_val) {
1701  return 0;
1702 }
1703 
1704 extern "C" GPU_RT_STUB void agg_sum_double_shared(int64_t* agg, const double val) {}
1705 
1706 extern "C" GPU_RT_STUB void agg_sum_double_skip_val_shared(int64_t* agg,
1707  const double val,
1708  const double skip_val) {}
1709 extern "C" GPU_RT_STUB void agg_sum_float_shared(int32_t* agg, const float val) {}
1710 
1711 extern "C" GPU_RT_STUB void agg_sum_float_skip_val_shared(int32_t* agg,
1712  const float val,
1713  const float skip_val) {}
1714 
1715 extern "C" GPU_RT_STUB int32_t agg_sum_if_int32_shared(int32_t* agg,
1716  const int32_t val,
1717  const int8_t cond) {
1718  return 0;
1719 }
1720 
1721 extern "C" GPU_RT_STUB int32_t agg_sum_if_int32_skip_val_shared(int32_t* agg,
1722  const int32_t val,
1723  const int32_t skip_val,
1724  const int8_t cond) {
1725  return 0;
1726 }
1727 
1728 extern "C" GPU_RT_STUB void agg_sum_if_double_shared(int64_t* agg,
1729  const double val,
1730  const int8_t cond) {}
1731 
1732 extern "C" GPU_RT_STUB void agg_sum_if_double_skip_val_shared(int64_t* agg,
1733  const double val,
1734  const double skip_val,
1735  const int8_t cond) {}
1736 extern "C" GPU_RT_STUB void agg_sum_if_float_shared(int32_t* agg,
1737  const float val,
1738  const int8_t cond) {}
1739 
1740 extern "C" GPU_RT_STUB void agg_sum_if_float_skip_val_shared(int32_t* agg,
1741  const float val,
1742  const float skip_val,
1743  const int8_t cond) {}
1744 
1745 extern "C" GPU_RT_STUB void force_sync() {}
1746 
1747 extern "C" GPU_RT_STUB void sync_warp() {}
1748 extern "C" GPU_RT_STUB void sync_warp_protected(int64_t thread_pos, int64_t row_count) {}
1749 extern "C" GPU_RT_STUB void sync_threadblock() {}
1750 
1751 extern "C" GPU_RT_STUB void write_back_non_grouped_agg(int64_t* input_buffer,
1752  int64_t* output_buffer,
1753  const int32_t num_agg_cols){};
1754 // x64 stride functions
1755 
1756 extern "C" RUNTIME_EXPORT NEVER_INLINE int32_t pos_start_impl(int32_t* error_code) {
1757  int32_t row_index_resume{0};
1758  if (error_code) {
1759  row_index_resume = error_code[0];
1760  error_code[0] = 0;
1761  }
1762  return row_index_resume;
1763 }
1764 
1766  return pos_start_impl(nullptr);
1767 }
1768 
1770  return 1;
1771 }
1772 
1773 extern "C" GPU_RT_STUB int8_t thread_warp_idx(const int8_t warp_sz) {
1774  return 0;
1775 }
1776 
1777 extern "C" GPU_RT_STUB int64_t get_thread_index() {
1778  return 0;
1779 }
1780 
1782  return nullptr;
1783 }
1784 
1785 extern "C" GPU_RT_STUB int64_t get_block_index() {
1786  return 0;
1787 }
1788 
1789 #undef GPU_RT_STUB
1790 
1791 extern "C" RUNTIME_EXPORT ALWAYS_INLINE void record_error_code(const int32_t err_code,
1792  int32_t* error_codes) {
1793  // NB: never override persistent error codes (with code greater than zero).
1794  // On GPU, a projection query with a limit can run out of slots without it
1795  // being an actual error if the limit has been hit. If a persistent error
1796  // (division by zero, for example) occurs before running out of slots, we
1797  // have to avoid overriding it, because there's a risk that the query would
1798  // go through if we override with a potentially benign out-of-slots code.
1799  if (err_code && error_codes[pos_start_impl(nullptr)] <= 0) {
1800  error_codes[pos_start_impl(nullptr)] = err_code;
1801  }
1802 }
1803 
1804 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int32_t get_error_code(int32_t* error_codes) {
1805  return error_codes[pos_start_impl(nullptr)];
1806 }
1807 
1808 // group by helpers
1809 
1811  const int64_t* groups_buffer,
1812  const int32_t groups_buffer_size) {
1813  return groups_buffer;
1814 }
1815 
1817  int64_t* src,
1818  const int32_t sz) {
1819 #ifndef _WIN32
1820  // the body is not really needed, just make sure the call is not optimized away
1821  assert(dest);
1822 #endif
1823 }
1824 
1825 extern "C" RUNTIME_EXPORT int64_t* init_shared_mem(const int64_t* global_groups_buffer,
1826  const int32_t groups_buffer_size) {
1827  return nullptr;
1828 }
1829 
1831  int64_t* groups_buffer,
1832  const int64_t* init_vals,
1833  const uint32_t groups_buffer_entry_count,
1834  const uint32_t key_qw_count,
1835  const uint32_t agg_col_count,
1836  const bool keyless,
1837  const int8_t warp_size) {
1838 #ifndef _WIN32
1839  // the body is not really needed, just make sure the call is not optimized away
1840  assert(groups_buffer);
1841 #endif
1842 }
1843 
1845  int64_t* groups_buffer,
1846  const int64_t* init_vals,
1847  const uint32_t groups_buffer_entry_count,
1848  const uint32_t key_qw_count,
1849  const uint32_t agg_col_count,
1850  const bool keyless,
1851  const bool blocks_share_memory,
1852  const int32_t frag_idx) {
1853 #ifndef _WIN32
1854  // the body is not really needed, just make sure the call is not optimized away
1855  assert(groups_buffer);
1856 #endif
1857 }
1858 
1860  int64_t* groups_buffer,
1861  const int64_t* init_vals,
1862  const uint32_t groups_buffer_entry_count,
1863  const uint32_t key_qw_count,
1864  const uint32_t agg_col_count,
1865  const bool keyless,
1866  const int8_t warp_size) {
1867 #ifndef _WIN32
1868  // the body is not really needed, just make sure the call is not optimized away
1869  assert(groups_buffer);
1870 #endif
1871 }
1872 
1873 template <typename T>
1874 ALWAYS_INLINE int64_t* get_matching_group_value(int64_t* groups_buffer,
1875  const uint32_t h,
1876  const T* key,
1877  const uint32_t key_count,
1878  const uint32_t row_size_quad) {
1879  auto off = h * row_size_quad;
1880  auto row_ptr = reinterpret_cast<T*>(groups_buffer + off);
1881  if (*row_ptr == get_empty_key<T>()) {
1882  memcpy(row_ptr, key, key_count * sizeof(T));
1883  auto row_ptr_i8 = reinterpret_cast<int8_t*>(row_ptr + key_count);
1884  return reinterpret_cast<int64_t*>(align_to_int64(row_ptr_i8));
1885  }
1886  if (memcmp(row_ptr, key, key_count * sizeof(T)) == 0) {
1887  auto row_ptr_i8 = reinterpret_cast<int8_t*>(row_ptr + key_count);
1888  return reinterpret_cast<int64_t*>(align_to_int64(row_ptr_i8));
1889  }
1890  return nullptr;
1891 }
1892 
1894  int64_t* groups_buffer,
1895  const uint32_t h,
1896  const int64_t* key,
1897  const uint32_t key_count,
1898  const uint32_t key_width,
1899  const uint32_t row_size_quad) {
1900  switch (key_width) {
1901  case 4:
1902  return get_matching_group_value(groups_buffer,
1903  h,
1904  reinterpret_cast<const int32_t*>(key),
1905  key_count,
1906  row_size_quad);
1907  case 8:
1908  return get_matching_group_value(groups_buffer, h, key, key_count, row_size_quad);
1909  default:;
1910  }
1911  return nullptr;
1912 }
1913 
1914 template <typename T>
1916  const uint32_t entry_count,
1917  const uint32_t h,
1918  const T* key,
1919  const uint32_t key_count) {
1920  auto off = h;
1921  auto key_buffer = reinterpret_cast<T*>(groups_buffer);
1922  if (key_buffer[off] == get_empty_key<T>()) {
1923  for (size_t i = 0; i < key_count; ++i) {
1924  key_buffer[off] = key[i];
1925  off += entry_count;
1926  }
1927  return h;
1928  }
1929  off = h;
1930  for (size_t i = 0; i < key_count; ++i) {
1931  if (key_buffer[off] != key[i]) {
1932  return -1;
1933  }
1934  off += entry_count;
1935  }
1936  return h;
1937 }
1938 
1939 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int32_t
1941  const uint32_t entry_count,
1942  const uint32_t h,
1943  const int64_t* key,
1944  const uint32_t key_count,
1945  const uint32_t key_width) {
1946  switch (key_width) {
1947  case 4:
1948  return get_matching_group_value_columnar_slot(groups_buffer,
1949  entry_count,
1950  h,
1951  reinterpret_cast<const int32_t*>(key),
1952  key_count);
1953  case 8:
1955  groups_buffer, entry_count, h, key, key_count);
1956  default:
1957  return -1;
1958  }
1959  return -1;
1960 }
1961 
1963  int64_t* groups_buffer,
1964  const uint32_t h,
1965  const int64_t* key,
1966  const uint32_t key_qw_count,
1967  const size_t entry_count) {
1968  auto off = h;
1969  if (groups_buffer[off] == EMPTY_KEY_64) {
1970  for (size_t i = 0; i < key_qw_count; ++i) {
1971  groups_buffer[off] = key[i];
1972  off += entry_count;
1973  }
1974  return &groups_buffer[off];
1975  }
1976  off = h;
1977  for (size_t i = 0; i < key_qw_count; ++i) {
1978  if (groups_buffer[off] != key[i]) {
1979  return nullptr;
1980  }
1981  off += entry_count;
1982  }
1983  return &groups_buffer[off];
1984 }
1985 
1986 /*
1987  * For a particular hashed_index, returns the row-wise offset
1988  * to the first matching agg column in memory.
1989  * It also checks the corresponding group column, and initialize all
1990  * available keys if they are not empty (it is assumed all group columns are
1991  * 64-bit wide).
1992  *
1993  * Memory layout:
1994  *
1995  * | prepended group columns (64-bit each) | agg columns |
1996  */
1998  int64_t* groups_buffer,
1999  const uint32_t hashed_index,
2000  const int64_t* key,
2001  const uint32_t key_count,
2002  const uint32_t row_size_quad) {
2003  uint32_t off = hashed_index * row_size_quad;
2004  if (groups_buffer[off] == EMPTY_KEY_64) {
2005  for (uint32_t i = 0; i < key_count; ++i) {
2006  groups_buffer[off + i] = key[i];
2007  }
2008  }
2009  return groups_buffer + off + key_count;
2010 }
2011 
2018 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int64_t*
2020  const uint32_t hashed_index,
2021  const uint32_t row_size_quad) {
2022  return groups_buffer + row_size_quad * hashed_index;
2023 }
2024 
2025 /*
2026  * For a particular hashed_index, find and initialize (if necessary) all the group
2027  * columns corresponding to a key. It is assumed that all group columns are 64-bit wide.
2028  */
2029 extern "C" RUNTIME_EXPORT ALWAYS_INLINE void
2031  const uint32_t hashed_index,
2032  const int64_t* key,
2033  const uint32_t key_count,
2034  const uint32_t entry_count) {
2035  if (groups_buffer[hashed_index] == EMPTY_KEY_64) {
2036  for (uint32_t i = 0; i < key_count; i++) {
2037  groups_buffer[i * entry_count + hashed_index] = key[i];
2038  }
2039  }
2040 }
2041 
2042 #include "GeoOpsRuntime.cpp"
2043 #include "GroupByRuntime.cpp"
2045 
2047  int64_t* groups_buffer,
2048  const int64_t key,
2049  const int64_t min_key,
2050  const int64_t /* bucket */,
2051  const uint32_t row_size_quad) {
2052  return groups_buffer + row_size_quad * (key - min_key);
2053 }
2054 
2056  int64_t* groups_buffer,
2057  const int64_t key,
2058  const int64_t min_key,
2059  const int64_t /* bucket */,
2060  const uint32_t row_size_quad,
2061  const uint8_t thread_warp_idx,
2062  const uint8_t warp_size) {
2063  return groups_buffer + row_size_quad * (warp_size * (key - min_key) + thread_warp_idx);
2064 }
2065 
2067  const int32_t len) {
2068  return {reinterpret_cast<char const*>(ptr), static_cast<uint64_t>(len)};
2069 }
2070 
2071 #ifdef __clang__
2072 #include "../Utils/StringLike.cpp"
2073 #endif
2074 
2075 #ifndef __CUDACC__
2076 #include "TopKRuntime.cpp"
2077 #endif
2078 
2079 extern "C" RUNTIME_EXPORT ALWAYS_INLINE DEVICE int32_t
2080 char_length(const char* str, const int32_t str_len) {
2081  return str_len;
2082 }
2083 
2084 extern "C" RUNTIME_EXPORT ALWAYS_INLINE DEVICE int32_t
2085 char_length_nullable(const char* str, const int32_t str_len, const int32_t int_null) {
2086  if (!str) {
2087  return int_null;
2088  }
2089  return str_len;
2090 }
2091 
2092 extern "C" RUNTIME_EXPORT ALWAYS_INLINE DEVICE int32_t
2093 key_for_string_encoded(const int32_t str_id) {
2094  return str_id;
2095 }
2096 
2097 extern "C" ALWAYS_INLINE DEVICE int32_t
2098 map_string_dict_id(const int32_t string_id,
2099  const int64_t translation_map_handle,
2100  const int32_t min_source_id) {
2101  const int32_t* translation_map =
2102  reinterpret_cast<const int32_t*>(translation_map_handle);
2103  return translation_map[string_id - min_source_id];
2104 }
2105 
2107  const double proportion,
2108  const int64_t row_offset) {
2109  const int64_t threshold = 4294967296 * proportion;
2110  return (row_offset * 2654435761) % 4294967296 < threshold;
2111 }
2112 
2113 extern "C" RUNTIME_EXPORT ALWAYS_INLINE DEVICE int32_t
2114 width_bucket(const double target_value,
2115  const double lower_bound,
2116  const double upper_bound,
2117  const double scale_factor,
2118  const int32_t partition_count) {
2119  if (target_value < lower_bound) {
2120  return 0;
2121  } else if (target_value >= upper_bound) {
2122  return partition_count + 1;
2123  }
2124  return ((target_value - lower_bound) * scale_factor) + 1;
2125 }
2126 
2127 extern "C" RUNTIME_EXPORT ALWAYS_INLINE DEVICE int32_t
2128 width_bucket_reversed(const double target_value,
2129  const double lower_bound,
2130  const double upper_bound,
2131  const double scale_factor,
2132  const int32_t partition_count) {
2133  if (target_value > lower_bound) {
2134  return 0;
2135  } else if (target_value <= upper_bound) {
2136  return partition_count + 1;
2137  }
2138  return ((lower_bound - target_value) * scale_factor) + 1;
2139 }
2140 
2141 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int32_t
2142 width_bucket_nullable(const double target_value,
2143  const double lower_bound,
2144  const double upper_bound,
2145  const double scale_factor,
2146  const int32_t partition_count,
2147  const double null_val) {
2148  if (target_value == null_val) {
2149  return INT32_MIN;
2150  }
2151  return width_bucket(
2152  target_value, lower_bound, upper_bound, scale_factor, partition_count);
2153 }
2154 
2155 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int32_t
2156 width_bucket_reversed_nullable(const double target_value,
2157  const double lower_bound,
2158  const double upper_bound,
2159  const double scale_factor,
2160  const int32_t partition_count,
2161  const double null_val) {
2162  if (target_value == null_val) {
2163  return INT32_MIN;
2164  }
2165  return width_bucket_reversed(
2166  target_value, lower_bound, upper_bound, scale_factor, partition_count);
2167 }
2168 
2169 // width_bucket with no out-of-bound check version which can be called
2170 // if we can assure the input target_value expr always resides in the valid range
2171 // (so we can also avoid null checking)
2172 extern "C" RUNTIME_EXPORT ALWAYS_INLINE DEVICE int32_t
2173 width_bucket_no_oob_check(const double target_value,
2174  const double lower_bound,
2175  const double scale_factor) {
2176  return ((target_value - lower_bound) * scale_factor) + 1;
2177 }
2178 
2179 extern "C" RUNTIME_EXPORT ALWAYS_INLINE DEVICE int32_t
2180 width_bucket_reversed_no_oob_check(const double target_value,
2181  const double lower_bound,
2182  const double scale_factor) {
2183  return ((lower_bound - target_value) * scale_factor) + 1;
2184 }
2185 
2186 extern "C" RUNTIME_EXPORT ALWAYS_INLINE DEVICE int32_t
2187 width_bucket_expr(const double target_value,
2188  const bool reversed,
2189  const double lower_bound,
2190  const double upper_bound,
2191  const int32_t partition_count) {
2192  if (reversed) {
2193  return width_bucket_reversed(target_value,
2194  lower_bound,
2195  upper_bound,
2196  partition_count / (lower_bound - upper_bound),
2197  partition_count);
2198  }
2199  return width_bucket(target_value,
2200  lower_bound,
2201  upper_bound,
2202  partition_count / (upper_bound - lower_bound),
2203  partition_count);
2204 }
2205 
2206 extern "C" RUNTIME_EXPORT ALWAYS_INLINE DEVICE int32_t
2207 width_bucket_expr_nullable(const double target_value,
2208  const bool reversed,
2209  const double lower_bound,
2210  const double upper_bound,
2211  const int32_t partition_count,
2212  const double null_val) {
2213  if (target_value == null_val) {
2214  return INT32_MIN;
2215  }
2216  return width_bucket_expr(
2217  target_value, reversed, lower_bound, upper_bound, partition_count);
2218 }
2219 
2220 extern "C" RUNTIME_EXPORT ALWAYS_INLINE DEVICE int32_t
2221 width_bucket_expr_no_oob_check(const double target_value,
2222  const bool reversed,
2223  const double lower_bound,
2224  const double upper_bound,
2225  const int32_t partition_count) {
2226  if (reversed) {
2228  target_value, lower_bound, partition_count / (lower_bound - upper_bound));
2229  }
2231  target_value, lower_bound, partition_count / (upper_bound - lower_bound));
2232 }
2233 
2234 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int64_t
2235 row_number_window_func(const int64_t output_buff, const int64_t pos) {
2236  return reinterpret_cast<const int64_t*>(output_buff)[pos];
2237 }
2238 
2240  const int64_t output_buff,
2241  const int64_t pos) {
2242  return reinterpret_cast<const double*>(output_buff)[pos];
2243 }
2244 
2245 extern "C" RUNTIME_EXPORT ALWAYS_INLINE double load_double(const int64_t* agg) {
2246  return *reinterpret_cast<const double*>(may_alias_ptr(agg));
2247 }
2248 
2249 extern "C" RUNTIME_EXPORT ALWAYS_INLINE float load_float(const int32_t* agg) {
2250  return *reinterpret_cast<const float*>(may_alias_ptr(agg));
2251 }
2252 
2253 extern "C" RUNTIME_EXPORT ALWAYS_INLINE double load_avg_int(const int64_t* sum,
2254  const int64_t* count,
2255  const double null_val) {
2256  return *count != 0 ? static_cast<double>(*sum) / *count : null_val;
2257 }
2258 
2259 extern "C" RUNTIME_EXPORT ALWAYS_INLINE double load_avg_decimal(const int64_t* sum,
2260  const int64_t* count,
2261  const double null_val,
2262  const uint32_t scale) {
2263  return *count != 0 ? (static_cast<double>(*sum) / pow(10, scale)) / *count : null_val;
2264 }
2265 
2266 extern "C" RUNTIME_EXPORT ALWAYS_INLINE double load_avg_double(const int64_t* agg,
2267  const int64_t* count,
2268  const double null_val) {
2269  return *count != 0 ? *reinterpret_cast<const double*>(may_alias_ptr(agg)) / *count
2270  : null_val;
2271 }
2272 
2273 extern "C" RUNTIME_EXPORT ALWAYS_INLINE double load_avg_float(const int32_t* agg,
2274  const int32_t* count,
2275  const double null_val) {
2276  return *count != 0 ? *reinterpret_cast<const float*>(may_alias_ptr(agg)) / *count
2277  : null_val;
2278 }
2279 
2281  uint8_t* bitmap,
2282  const uint32_t bitmap_bytes,
2283  const uint8_t* key_bytes,
2284  const uint32_t key_len) {
2285  const uint32_t bit_pos = MurmurHash3(key_bytes, key_len, 0) % (bitmap_bytes * 8);
2286  const uint32_t word_idx = bit_pos / 32;
2287  const uint32_t bit_idx = bit_pos % 32;
2288  reinterpret_cast<uint32_t*>(bitmap)[word_idx] |= 1 << bit_idx;
2289 }
2290 
2292  const int8_t** col_buffers,
2293  const int8_t* literals,
2294  const int64_t* num_rows,
2295  const uint64_t* frag_row_offsets,
2296  const int32_t* max_matched,
2297  const int64_t* init_agg_value,
2298  int64_t** out,
2299  uint32_t frag_idx,
2300  const int64_t* join_hash_tables,
2301  int32_t* error_code,
2302  int32_t* total_matched,
2303  const int8_t* row_func_mgr) {
2304 #ifndef _WIN32
2305  assert(col_buffers || literals || num_rows || frag_row_offsets || max_matched ||
2306  init_agg_value || out || frag_idx || error_code || join_hash_tables ||
2307  total_matched || row_func_mgr);
2308 #endif
2309 }
2310 
2312  const int8_t*** col_buffers,
2313  const uint64_t* num_fragments,
2314  const int8_t* literals,
2315  const int64_t* num_rows,
2316  const uint64_t* frag_row_offsets,
2317  const int32_t* max_matched,
2318  int32_t* total_matched,
2319  const int64_t* init_agg_value,
2320  int64_t** out,
2321  int32_t* error_code,
2322  const uint32_t* num_tables_ptr,
2323  const int64_t* join_hash_tables,
2324  const int8_t* row_func_mgr) {
2325  for (uint32_t i = 0; i < *num_fragments; ++i) {
2326  query_stub_hoisted_literals(col_buffers ? col_buffers[i] : nullptr,
2327  literals,
2328  &num_rows[i * (*num_tables_ptr)],
2329  &frag_row_offsets[i * (*num_tables_ptr)],
2330  max_matched,
2331  init_agg_value,
2332  out,
2333  i,
2334  join_hash_tables,
2335  total_matched,
2336  error_code,
2337  row_func_mgr);
2338  }
2339 }
2340 
2341 extern "C" RUNTIME_EXPORT NEVER_INLINE void query_stub(const int8_t** col_buffers,
2342  const int64_t* num_rows,
2343  const uint64_t* frag_row_offsets,
2344  const int32_t* max_matched,
2345  const int64_t* init_agg_value,
2346  int64_t** out,
2347  uint32_t frag_idx,
2348  const int64_t* join_hash_tables,
2349  int32_t* error_code,
2350  int32_t* total_matched,
2351  const int8_t* row_func_mgr) {
2352 #ifndef _WIN32
2353  assert(col_buffers || num_rows || frag_row_offsets || max_matched || init_agg_value ||
2354  out || frag_idx || error_code || join_hash_tables || total_matched ||
2355  row_func_mgr);
2356 #endif
2357 }
2358 
2359 extern "C" RUNTIME_EXPORT void multifrag_query(const int8_t*** col_buffers,
2360  const uint64_t* num_fragments,
2361  const int64_t* num_rows,
2362  const uint64_t* frag_row_offsets,
2363  const int32_t* max_matched,
2364  int32_t* total_matched,
2365  const int64_t* init_agg_value,
2366  int64_t** out,
2367  int32_t* error_code,
2368  const uint32_t* num_tables_ptr,
2369  const int64_t* join_hash_tables,
2370  const int8_t* row_func_mgr) {
2371  for (uint32_t i = 0; i < *num_fragments; ++i) {
2372  query_stub(col_buffers ? col_buffers[i] : nullptr,
2373  &num_rows[i * (*num_tables_ptr)],
2374  &frag_row_offsets[i * (*num_tables_ptr)],
2375  max_matched,
2376  init_agg_value,
2377  out,
2378  i,
2379  join_hash_tables,
2380  total_matched,
2381  error_code,
2382  row_func_mgr);
2383  }
2384 }
2385 
2387  if (check_interrupt_init(static_cast<unsigned>(INT_CHECK))) {
2388  return true;
2389  }
2390  return false;
2391 }
2392 
2393 extern "C" RUNTIME_EXPORT bool check_interrupt_init(unsigned command) {
2394  static std::atomic_bool runtime_interrupt_flag{false};
2395 
2396  if (command == static_cast<unsigned>(INT_CHECK)) {
2397  if (runtime_interrupt_flag.load()) {
2398  return true;
2399  }
2400  return false;
2401  }
2402  if (command == static_cast<unsigned>(INT_ABORT)) {
2403  runtime_interrupt_flag.store(true);
2404  return false;
2405  }
2406  if (command == static_cast<unsigned>(INT_RESET)) {
2407  runtime_interrupt_flag.store(false);
2408  return false;
2409  }
2410  return false;
2411 }
DEVICE auto upper_bound(ARGS &&...args)
Definition: gpu_enabled.h:123
__device__ void sync_warp_protected(int64_t thread_pos, int64_t row_count)
RUNTIME_EXPORT void agg_min_int8(int8_t *agg, const int8_t val)
RUNTIME_EXPORT ALWAYS_INLINE int64_t encode_date(int64_t decoded_val, int64_t null_val, int64_t multiplier)
RUNTIME_EXPORT ALWAYS_INLINE int32_t agg_sum_if_int32_skip_val(int32_t *agg, const int32_t val, const int32_t skip_val, const int8_t cond)
RUNTIME_EXPORT ALWAYS_INLINE void agg_count_distinct_bitmap_skip_val(int64_t *agg, const int64_t val, const int64_t min_val, const int64_t skip_val)
RUNTIME_EXPORT ALWAYS_INLINE int64_t compute_row_mode_start_index_sub(int64_t candidate_index, int64_t current_partition_start_offset, int64_t frame_bound)
#define DEF_UMINUS_NULLABLE(type, null_type)
GPU_RT_STUB int32_t checked_single_agg_id_int32_shared(int32_t *agg, const int32_t val, const int32_t null_val)
__device__ void agg_count_distinct_bitmap_skip_val_gpu(int64_t *agg, const int64_t val, const int64_t min_val, const int64_t skip_val, const int64_t base_dev_addr, const int64_t base_host_addr, const uint64_t sub_bitmap_count, const uint64_t bitmap_bytes)
#define DEF_CHECKED_SINGLE_AGG_ID_INT(n)
RUNTIME_EXPORT ALWAYS_INLINE int8_t * agg_id_varlen(int8_t *varlen_buffer, const int64_t offset, const int8_t *value, const int64_t size_bytes)
RUNTIME_EXPORT void agg_max_int32(int32_t *agg, const int32_t val)
RUNTIME_EXPORT ALWAYS_INLINE int64_t scale_decimal_down_not_nullable(const int64_t operand, const int64_t scale, const int64_t null_val)
__device__ int64_t * get_matching_group_value_columnar(int64_t *groups_buffer, const uint32_t h, const int64_t *key, const uint32_t key_qw_count, const size_t entry_count)
#define EMPTY_KEY_64
RUNTIME_EXPORT ALWAYS_INLINE int64_t * get_matching_group_value_perfect_hash(int64_t *groups_buffer, const uint32_t hashed_index, const int64_t *key, const uint32_t key_count, const uint32_t row_size_quad)
RUNTIME_EXPORT ALWAYS_INLINE uint64_t agg_count_if(uint64_t *agg, const int64_t cond)
__device__ void write_back_nop(int64_t *dest, int64_t *src, const int32_t sz)
Definition: cuda_mapd_rt.cu:49
RUNTIME_EXPORT void agg_min_int16(int16_t *agg, const int16_t val)
RUNTIME_EXPORT ALWAYS_INLINE StringView string_pack(const int8_t *ptr, const int32_t len)
#define GPU_RT_STUB
__device__ void agg_sum_float_skip_val_shared(int32_t *agg, const float val, const float skip_val)
RUNTIME_EXPORT ALWAYS_INLINE int64_t floor_div_nullable_lhs(const int64_t dividend, const int64_t divisor, const int64_t null_val)
#define DEF_COMPUTE_CURRENT_ROW_IDX_IN_FRAME_ALL_TYPES(oper_name)
RUNTIME_EXPORT void agg_max_int16(int16_t *agg, const int16_t val)
#define DEF_CAST_NULLABLE_BIDIR(type1, type2)
RUNTIME_EXPORT ALWAYS_INLINE int64_t * get_group_value_fast_keyless(int64_t *groups_buffer, const int64_t key, const int64_t min_key, const int64_t, const uint32_t row_size_quad)
RUNTIME_EXPORT ALWAYS_INLINE uint32_t agg_count_int32_skip_val(uint32_t *agg, const int32_t val, const int32_t skip_val)
RUNTIME_EXPORT NEVER_INLINE void agg_approximate_count_distinct(int64_t *agg, const int64_t key, const uint32_t b)
FORCE_INLINE uint8_t get_rank(uint64_t x, uint32_t b)
__device__ int8_t thread_warp_idx(const int8_t warp_sz)
Definition: cuda_mapd_rt.cu:39
__global__ void init_group_by_buffer_gpu(int64_t *groups_buffer, const int64_t *init_vals, const uint32_t groups_buffer_entry_count, const uint32_t key_count, const uint32_t key_width, const uint32_t row_size_quad, const bool keyless, const int8_t warp_size)
#define DEF_CAST_NULLABLE(from_type, to_type)
RUNTIME_EXPORT ALWAYS_INLINE void agg_max_double(int64_t *agg, const double val)
RUNTIME_EXPORT ALWAYS_INLINE int64_t agg_sum_if(int64_t *agg, const int64_t val, const int8_t cond)
RUNTIME_EXPORT ALWAYS_INLINE void agg_max(int64_t *agg, const int64_t val)
RUNTIME_EXPORT ALWAYS_INLINE uint64_t agg_count_skip_val(uint64_t *agg, const int64_t val, const int64_t skip_val)
__device__ int64_t get_thread_index()
Definition: cuda_mapd_rt.cu:19
RUNTIME_EXPORT NEVER_INLINE DEVICE uint64_t MurmurHash64A(const void *key, int len, uint64_t seed)
Definition: MurmurHash.cpp:27
__device__ void agg_sum_if_double_skip_val_shared(int64_t *agg, const double val, const double skip_val, const int8_t cond)
RUNTIME_EXPORT ALWAYS_INLINE void agg_min_float(int32_t *agg, const float val)
__device__ int32_t pos_step_impl()
Definition: cuda_mapd_rt.cu:35
__device__ void write_back_non_grouped_agg(int64_t *input_buffer, int64_t *output_buffer, const int32_t agg_idx)
Calculate approximate median and general quantiles, based on &quot;Computing Extremely Accurate Quantiles ...
RUNTIME_EXPORT ALWAYS_INLINE double load_avg_int(const int64_t *sum, const int64_t *count, const double null_val)
Structures and runtime functions of streaming top-k heap.
__device__ int32_t checked_single_agg_id_double_shared(int64_t *agg, const double val, const double null_val)
RUNTIME_EXPORT NEVER_INLINE void query_stub_hoisted_literals(const int8_t **col_buffers, const int8_t *literals, const int64_t *num_rows, const uint64_t *frag_row_offsets, const int32_t *max_matched, const int64_t *init_agg_value, int64_t **out, uint32_t frag_idx, const int64_t *join_hash_tables, int32_t *error_code, int32_t *total_matched, const int8_t *row_func_mgr)
__device__ const int64_t * init_shared_mem_nop(const int64_t *groups_buffer, const int32_t groups_buffer_size)
Definition: cuda_mapd_rt.cu:43
RUNTIME_EXPORT ALWAYS_INLINE int32_t checked_single_agg_id(int64_t *agg, const int64_t val, const int64_t null_val)
__device__ void agg_sum_if_float_shared(int32_t *agg, const float val, const int8_t cond)
#define DEF_ARITH_NULLABLE_RHS(type, null_type, opname, opsym)
#define DEF_AGG_MAX_INT(n)
Definitions for core Datum union type.
RUNTIME_EXPORT ALWAYS_INLINE int64_t * get_integer_aggregation_tree(int64_t **aggregation_trees, size_t partition_idx)
__device__ int32_t checked_single_agg_id_float_shared(int32_t *agg, const float val, const float null_val)
int64_t compute_upper_bound_from_ordered_partition_index(const int64_t num_elems, const TARGET_VAL_TYPE target_val, const COL_TYPE *col_buf, const int32_t *partition_rowid_buf, const int64_t *ordered_index_buf, const NULL_TYPE null_val, const bool nulls_first, const int64_t null_start_offset, const int64_t null_end_offset)
RUNTIME_EXPORT ALWAYS_INLINE void agg_count_distinct_bitmap(int64_t *agg, const int64_t val, const int64_t min_val)
AGG_TYPE agg_func(AGG_TYPE const lhs, AGG_TYPE const rhs)
Macros and functions for groupby buffer compaction.
__device__ int64_t * get_matching_group_value(int64_t *groups_buffer, const uint32_t h, const T *key, const uint32_t key_count, const uint32_t row_size_quad)
RUNTIME_EXPORT ALWAYS_INLINE int8_t logical_and(const int8_t lhs, const int8_t rhs, const int8_t null_val)
#define DEF_CAST_SCALED_NULLABLE(from_type, to_type)
RUNTIME_EXPORT NEVER_INLINE void query_stub(const int8_t **col_buffers, const int64_t *num_rows, const uint64_t *frag_row_offsets, const int32_t *max_matched, const int64_t *init_agg_value, int64_t **out, uint32_t frag_idx, const int64_t *join_hash_tables, int32_t *error_code, int32_t *total_matched, const int8_t *row_func_mgr)
__device__ void agg_sum_if_double_shared(int64_t *agg, const double val, const int8_t cond)
__device__ int64_t agg_sum_shared(int64_t *agg, const int64_t val)
RUNTIME_EXPORT void agg_sum_if_float(int32_t *agg, const float val, const int8_t cond)
__device__ void agg_id_double_shared_slow(int64_t *agg, const double *val)
RUNTIME_EXPORT ALWAYS_INLINE int64_t floor_div_lhs(const int64_t dividend, const int64_t divisor)
__device__ int32_t agg_sum_if_int32_shared(int32_t *agg, const int32_t val, const int8_t cond)
RUNTIME_EXPORT ALWAYS_INLINE DEVICE int32_t key_for_string_encoded(const int32_t str_id)
RUNTIME_EXPORT ALWAYS_INLINE int64_t * get_matching_group_value_perfect_hash_keyless(int64_t *groups_buffer, const uint32_t hashed_index, const uint32_t row_size_quad)
__device__ int8_t * agg_id_varlen_shared(int8_t *varlen_buffer, const int64_t offset, const int8_t *value, const int64_t size_bytes)
RUNTIME_EXPORT ALWAYS_INLINE uint32_t agg_count_if_int32_skip_val(uint32_t *agg, const int32_t cond, const int32_t skip_val)
int64_t compute_current_row_idx_in_frame(const int64_t num_elems, const int64_t cur_row_idx, const T *col_buf, const int32_t *partition_rowid_buf, const int64_t *ordered_index_buf, const T null_val, const bool nulls_first, const int64_t null_start_pos, const int64_t null_end_pos, Comparator cmp)
__device__ int64_t * declare_dynamic_shared_memory()
Definition: cuda_mapd_rt.cu:56
RUNTIME_EXPORT ALWAYS_INLINE int64_t compute_row_mode_end_index_add(int64_t candidate_index, int64_t current_partition_start_offset, int64_t frame_bound, int64_t num_current_partition_elem)
ALWAYS_INLINE DEVICE int32_t map_string_dict_id(const int32_t string_id, const int64_t translation_map_handle, const int32_t min_source_id)
__device__ int32_t agg_sum_int32_shared(int32_t *agg, const int32_t val)
__device__ int64_t agg_sum_skip_val_shared(int64_t *agg, const int64_t val, const int64_t skip_val)
__device__ void agg_sum_float_shared(int32_t *agg, const float val)
__device__ int64_t agg_sum_if_shared(int64_t *agg, const int64_t val, const int8_t cond)
#define DEF_ROUND_NULLABLE(from_type, to_type)
RUNTIME_EXPORT ALWAYS_INLINE DEVICE int32_t width_bucket(const double target_value, const double lower_bound, const double upper_bound, const double scale_factor, const int32_t partition_count)
#define DEVICE
#define DEF_SKIP_AGG(base_agg_func)
__device__ int64_t get_block_index()
Definition: cuda_mapd_rt.cu:23
__device__ bool check_interrupt()
#define DEF_WRITE_PROJECTION_INT(n)
GPU_RT_STUB int32_t checked_single_agg_id_int8_shared(int8_t *agg, const int8_t val, const int8_t null_val)
RUNTIME_EXPORT ALWAYS_INLINE int64_t compute_row_mode_start_index_add(int64_t candidate_index, int64_t current_partition_start_offset, int64_t frame_bound, int64_t num_current_partition_elem)
__device__ int32_t agg_sum_int32_skip_val_shared(int32_t *agg, const int32_t val, const int32_t skip_val)
RUNTIME_EXPORT ALWAYS_INLINE int64_t decimal_floor(const int64_t x, const int64_t scale)
#define DEF_SEARCH_DERIVED_AGGREGATION_TREE(agg_value_type)
__device__ void linear_probabilistic_count(uint8_t *bitmap, const uint32_t bitmap_bytes, const uint8_t *key_bytes, const uint32_t key_len)
RUNTIME_EXPORT ALWAYS_INLINE uint64_t agg_count_double(uint64_t *agg, const double val)
__device__ void agg_count_distinct_bitmap_gpu(int64_t *agg, const int64_t val, const int64_t min_val, const int64_t base_dev_addr, const int64_t base_host_addr, const uint64_t sub_bitmap_count, const uint64_t bitmap_bytes)
#define DEF_SHARED_AGG_RET_STUBS(base_agg_func)
__device__ void agg_sum_double_shared(int64_t *agg, const double val)
RUNTIME_EXPORT ALWAYS_INLINE void agg_min_double(int64_t *agg, const double val)
RUNTIME_EXPORT ALWAYS_INLINE DEVICE int32_t width_bucket_no_oob_check(const double target_value, const double lower_bound, const double scale_factor)
RUNTIME_EXPORT ALWAYS_INLINE int64_t decimal_ceil(const int64_t x, const int64_t scale)
#define DEF_ARITH_NULLABLE_LHS(type, null_type, opname, opsym)
__device__ int64_t agg_sum_if_skip_val_shared(int64_t *agg, const int64_t val, const int64_t skip_val, const int8_t cond)
RUNTIME_EXPORT ALWAYS_INLINE void agg_if_sum_float(int32_t *agg, const float val, const int8_t cond)
#define DEF_MAP_STRING_TO_DATUM(value_type, value_name)
RUNTIME_EXPORT ALWAYS_INLINE int64_t get_valid_buf_start_pos(const int64_t null_start_pos, const int64_t null_end_pos)
#define DEF_AGG_MIN_INT(n)
RUNTIME_EXPORT ALWAYS_INLINE uint64_t agg_count_double_skip_val(uint64_t *agg, const double val, const double skip_val)
RUNTIME_EXPORT ALWAYS_INLINE void agg_min(int64_t *agg, const int64_t val)
__device__ int32_t pos_start_impl(const int32_t *row_index_resume)
Definition: cuda_mapd_rt.cu:27
RUNTIME_EXPORT ALWAYS_INLINE int32_t width_bucket_nullable(const double target_value, const double lower_bound, const double upper_bound, const double scale_factor, const int32_t partition_count, const double null_val)
RUNTIME_EXPORT ALWAYS_INLINE int8_t logical_not(const int8_t operand, const int8_t null_val)
RUNTIME_EXPORT ALWAYS_INLINE void agg_id_float(int32_t *agg, const float val)
RUNTIME_EXPORT ALWAYS_INLINE float load_float(const int32_t *agg)
__device__ int32_t runtime_interrupt_flag
Definition: cuda_mapd_rt.cu:95
RUNTIME_EXPORT ALWAYS_INLINE void agg_sum_double(int64_t *agg, const double val)
RUNTIME_EXPORT ALWAYS_INLINE int64_t row_number_window_func(const int64_t output_buff, const int64_t pos)
RUNTIME_EXPORT NEVER_INLINE void init_columnar_group_by_buffer_gpu(int64_t *groups_buffer, const int64_t *init_vals, const uint32_t groups_buffer_entry_count, const uint32_t key_qw_count, const uint32_t agg_col_count, const bool keyless, const bool blocks_share_memory, const int32_t frag_idx)
std::function< bool(const PermutationIdx, const PermutationIdx)> Comparator
Definition: ResultSet.h:155
__device__ void agg_approximate_count_distinct_gpu(int64_t *agg, const int64_t key, const uint32_t b, const int64_t base_dev_addr, const int64_t base_host_addr)
__device__ void sync_warp()
RUNTIME_EXPORT ALWAYS_INLINE int64_t scale_decimal_down_nullable(const int64_t operand, const int64_t scale, const int64_t null_val)
RUNTIME_EXPORT ALWAYS_INLINE int64_t agg_sum_if_skip_val(int64_t *agg, const int64_t val, const int64_t skip_val, const int8_t cond)
RUNTIME_EXPORT ALWAYS_INLINE void agg_id(int64_t *agg, const int64_t val)
RUNTIME_EXPORT ALWAYS_INLINE int64_t scale_decimal_up(const int64_t operand, const uint64_t scale, const int64_t operand_null_val, const int64_t result_null_val)
RUNTIME_EXPORT ALWAYS_INLINE DEVICE int32_t char_length(const char *str, const int32_t str_len)
RUNTIME_EXPORT ALWAYS_INLINE double * get_double_aggregation_tree(int64_t **aggregation_trees, size_t partition_idx)
#define RUNTIME_EXPORT
RUNTIME_EXPORT ALWAYS_INLINE DEVICE int32_t width_bucket_reversed(const double target_value, const double lower_bound, const double upper_bound, const double scale_factor, const int32_t partition_count)
RUNTIME_EXPORT ALWAYS_INLINE int8_t bit_is_set(const int64_t bitset, const int64_t val, const int64_t min_val, const int64_t max_val, const int64_t null_val, const int8_t null_bool_val)
RUNTIME_EXPORT ALWAYS_INLINE int32_t agg_sum_int32(int32_t *agg, const int32_t val)
RUNTIME_EXPORT ALWAYS_INLINE int32_t checked_single_agg_id_float(int32_t *agg, const float val, const float null_val)
__device__ void agg_sum_double_skip_val_shared(int64_t *agg, const double val, const double skip_val)
RUNTIME_EXPORT ALWAYS_INLINE uint64_t agg_count_if_skip_val(uint64_t *agg, const int64_t cond, const int64_t skip_val)
RUNTIME_EXPORT void multifrag_query(const int8_t ***col_buffers, const uint64_t *num_fragments, const int64_t *num_rows, const uint64_t *frag_row_offsets, const int32_t *max_matched, int32_t *total_matched, const int64_t *init_agg_value, int64_t **out, int32_t *error_code, const uint32_t *num_tables_ptr, const int64_t *join_hash_tables, const int8_t *row_func_mgr)
LOGICAL_TYPE get_value_in_window_frame(const int64_t target_row_idx_in_frame, const int64_t frame_start_offset, const int64_t frame_end_offset, const COL_TYPE *col_buf, const int32_t *partition_rowid_buf, const int64_t *ordered_index_buf, const LOGICAL_TYPE logical_null_val, const LOGICAL_TYPE col_null_val)
__device__ void agg_max_int8_skip_val_shared(int8_t *agg, const int8_t val, const int8_t skip_val)
RUNTIME_EXPORT ALWAYS_INLINE int64_t get_valid_buf_end_pos(const int64_t num_elems, const int64_t null_start_pos, const int64_t null_end_pos)
RUNTIME_EXPORT ALWAYS_INLINE double load_double(const int64_t *agg)
RUNTIME_EXPORT ALWAYS_INLINE void agg_id_double(int64_t *agg, const double val)
RUNTIME_EXPORT ALWAYS_INLINE DEVICE int32_t width_bucket_expr_no_oob_check(const double target_value, const bool reversed, const double lower_bound, const double upper_bound, const int32_t partition_count)
DEVICE auto lower_bound(ARGS &&...args)
Definition: gpu_enabled.h:78
RUNTIME_EXPORT ALWAYS_INLINE uint32_t agg_count_if_int32(uint32_t *agg, const int32_t cond)
__device__ void agg_max_int16_skip_val_shared(int16_t *agg, const int16_t val, const int16_t skip_val)
#define DEF_SEARCH_AGGREGATION_TREE(agg_value_type)
RUNTIME_EXPORT NEVER_INLINE DEVICE uint32_t MurmurHash3(const void *key, int len, const uint32_t seed)
Definition: MurmurHash.cpp:33
void compute_derived_aggregates(SumAndCountPair< AGG_VALUE_TYPE > *aggregation_tree_for_partition, SumAndCountPair< AGG_VALUE_TYPE > &res, size_t query_range_start_idx, size_t query_range_end_idx, size_t leaf_level, size_t tree_fanout, AGG_VALUE_TYPE invalid_val, AGG_VALUE_TYPE null_val)
#define DEF_RANGE_MODE_FRAME_UPPER_BOUND(target_val_type, col_type, null_type, opname, opsym)
RUNTIME_EXPORT void agg_max_int8(int8_t *agg, const int8_t val)
RUNTIME_EXPORT ALWAYS_INLINE int64_t * get_group_value_fast_keyless_semiprivate(int64_t *groups_buffer, const int64_t key, const int64_t min_key, const int64_t, const uint32_t row_size_quad, const uint8_t thread_warp_idx, const uint8_t warp_size)
RUNTIME_EXPORT ALWAYS_INLINE size_t getStartOffsetForSegmentTreeTraversal(size_t level, size_t tree_fanout)
RUNTIME_EXPORT ALWAYS_INLINE void agg_max_float(int32_t *agg, const float val)
__device__ const int64_t * init_shared_mem(const int64_t *global_groups_buffer, const int32_t groups_buffer_size)
Definition: cuda_mapd_rt.cu:66
RUNTIME_EXPORT ALWAYS_INLINE int32_t checked_single_agg_id_double(int64_t *agg, const double val, const double null_val)
GPU_RT_STUB int32_t checked_single_agg_id_int16_shared(int16_t *agg, const int16_t val, const int16_t null_val)
RUNTIME_EXPORT ALWAYS_INLINE double load_avg_float(const int32_t *agg, const int32_t *count, const double null_val)
#define DEF_BINARY_NULLABLE_ALL_OPS(type, null_type)
RUNTIME_EXPORT ALWAYS_INLINE SumAndCountPair< double > * get_double_derived_aggregation_tree(int64_t **aggregation_trees, size_t partition_idx)
RUNTIME_EXPORT NEVER_INLINE void init_group_by_buffer_impl(int64_t *groups_buffer, const int64_t *init_vals, const uint32_t groups_buffer_entry_count, const uint32_t key_qw_count, const uint32_t agg_col_count, const bool keyless, const int8_t warp_size)
RUNTIME_EXPORT ALWAYS_INLINE uint32_t agg_count_int32(uint32_t *agg, const int32_t)
RUNTIME_EXPORT ALWAYS_INLINE void set_matching_group_value_perfect_hash_columnar(int64_t *groups_buffer, const uint32_t hashed_index, const int64_t *key, const uint32_t key_count, const uint32_t entry_count)
#define DEF_GET_VALUE_IN_FRAME(col_type, logical_type)
#define NEVER_INLINE
RUNTIME_EXPORT ALWAYS_INLINE int64_t compute_row_mode_end_index_sub(int64_t candidate_index, int64_t current_partition_start_offset, int64_t frame_bound)
AGG_TYPE compute_window_func_via_aggregation_tree(AGG_TYPE *aggregation_tree_for_partition, size_t query_range_start_idx, size_t query_range_end_idx, size_t leaf_level, size_t tree_fanout, AGG_TYPE init_val, AGG_TYPE invalid_val, AGG_TYPE null_val)
RUNTIME_EXPORT ALWAYS_INLINE DEVICE int32_t width_bucket_expr_nullable(const double target_value, const bool reversed, const double lower_bound, const double upper_bound, const int32_t partition_count, const double null_val)
#define DEF_ARITH_NULLABLE(type, null_type, opname, opsym)
RUNTIME_EXPORT ALWAYS_INLINE int32_t agg_sum_if_int32(int32_t *agg, const int32_t val, const int8_t cond)
__device__ void agg_min_int16_skip_val_shared(int16_t *agg, const int16_t val, const int16_t skip_val)
RUNTIME_EXPORT ALWAYS_INLINE int32_t width_bucket_reversed_nullable(const double target_value, const double lower_bound, const double upper_bound, const double scale_factor, const int32_t partition_count, const double null_val)
int64_t compute_lower_bound_from_ordered_partition_index(const int64_t num_elems, const TARGET_VAL_TYPE target_val, const COL_TYPE *col_buf, const int32_t *partition_rowid_buf, const int64_t *ordered_index_buf, const NULL_TYPE null_val, const bool nulls_first, const int64_t null_start_offset, const int64_t null_end_offset)
RUNTIME_EXPORT ALWAYS_INLINE int64_t agg_sum_skip_val(int64_t *agg, const int64_t val, const int64_t skip_val)
__device__ void sync_threadblock()
RUNTIME_EXPORT ALWAYS_INLINE int64_t compute_int64_t_lower_bound(const int64_t entry_cnt, const int64_t target_value, const int64_t *col_buf)
__device__ void agg_min_int8_skip_val_shared(int8_t *agg, const int8_t val, const int8_t skip_val)
RUNTIME_EXPORT ALWAYS_INLINE int32_t agg_sum_int32_skip_val(int32_t *agg, const int32_t val, const int32_t skip_val)
RUNTIME_EXPORT ALWAYS_INLINE double load_avg_double(const int64_t *agg, const int64_t *count, const double null_val)
RUNTIME_EXPORT ALWAYS_INLINE uint64_t agg_count(uint64_t *agg, const int64_t)
RUNTIME_EXPORT void multifrag_query_hoisted_literals(const int8_t ***col_buffers, const uint64_t *num_fragments, const int8_t *literals, const int64_t *num_rows, const uint64_t *frag_row_offsets, const int32_t *max_matched, int32_t *total_matched, const int64_t *init_agg_value, int64_t **out, int32_t *error_code, const uint32_t *num_tables_ptr, const int64_t *join_hash_tables, const int8_t *row_func_mgr)
RUNTIME_EXPORT ALWAYS_INLINE void agg_sum_float(int32_t *agg, const float val)
RUNTIME_EXPORT ALWAYS_INLINE double load_avg_decimal(const int64_t *sum, const int64_t *count, const double null_val, const uint32_t scale)
RUNTIME_EXPORT bool check_interrupt_init(unsigned command)
RUNTIME_EXPORT ALWAYS_INLINE int32_t get_error_code(int32_t *error_codes)
RUNTIME_EXPORT ALWAYS_INLINE int64_t agg_sum(int64_t *agg, const int64_t val)
#define DEF_SHARED_AGG_STUBS(base_agg_func)
__device__ int32_t get_matching_group_value_columnar_slot(int64_t *groups_buffer, const uint32_t entry_count, const uint32_t h, const T *key, const uint32_t key_count)
__device__ int32_t checked_single_agg_id_shared(int64_t *agg, const int64_t val, const int64_t null_val)
#define ALWAYS_INLINE
#define DEF_HANDLE_NULL_FOR_WINDOW_FRAMING_AGG(agg_type, null_type)
#define DEF_AGG_ID_INT(n)
RUNTIME_EXPORT ALWAYS_INLINE void record_error_code(const int32_t err_code, int32_t *error_codes)
__device__ void agg_sum_if_float_skip_val_shared(int32_t *agg, const float val, const float skip_val, const int8_t cond)
RUNTIME_EXPORT ALWAYS_INLINE DEVICE bool sample_ratio(const double proportion, const int64_t row_offset)
FORCE_INLINE HOST DEVICE T align_to_int64(T addr)
RUNTIME_EXPORT ALWAYS_INLINE double percent_window_func(const int64_t output_buff, const int64_t pos)
RUNTIME_EXPORT ALWAYS_INLINE int8_t logical_or(const int8_t lhs, const int8_t rhs, const int8_t null_val)
__device__ void force_sync()
#define DEF_SKIP_IF_AGG(skip_agg_func, base_agg_func)
#define DEF_RANGE_MODE_FRAME_LOWER_BOUND(target_val_type, col_type, null_type, opname, opsym)
RUNTIME_EXPORT void agg_min_int32(int32_t *agg, const int32_t val)
RUNTIME_EXPORT ALWAYS_INLINE void agg_sum_if_double(int64_t *agg, const double val, const int8_t cond)
RUNTIME_EXPORT ALWAYS_INLINE uint32_t agg_count_float_skip_val(uint32_t *agg, const float val, const float skip_val)
RUNTIME_EXPORT ALWAYS_INLINE DEVICE int32_t char_length_nullable(const char *str, const int32_t str_len, const int32_t int_null)
__device__ int32_t agg_sum_if_int32_skip_val_shared(int32_t *agg, const int32_t val, const int32_t skip_val, const int8_t cond)
RUNTIME_EXPORT ALWAYS_INLINE SumAndCountPair< int64_t > * get_integer_derived_aggregation_tree(int64_t **aggregation_trees, size_t partition_idx)
RUNTIME_EXPORT ALWAYS_INLINE uint32_t agg_count_float(uint32_t *agg, const float val)
RUNTIME_EXPORT ALWAYS_INLINE DEVICE int32_t width_bucket_reversed_no_oob_check(const double target_value, const double lower_bound, const double scale_factor)
RUNTIME_EXPORT ALWAYS_INLINE DEVICE int32_t width_bucket_expr(const double target_value, const bool reversed, const double lower_bound, const double upper_bound, const int32_t partition_count)
__device__ int32_t group_buff_idx_impl()
Definition: cuda_mapd_rt.cu:31