OmniSciDB  72c90bc290
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
RuntimeFunctions.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifdef __CUDACC__
18 #error This code is not intended to be compiled with a CUDA C++ compiler
19 #endif // __CUDACC__
20 
21 #include "RuntimeFunctions.h"
22 #include "BufferCompaction.h"
23 #include "DecisionTreeEntry.h"
24 #include "HyperLogLogRank.h"
25 #include "MurmurHash.h"
26 #include "Shared/Datum.h"
27 #include "Shared/quantile.h"
28 #include "TypePunning.h"
29 #include "Utils/SegmentTreeUtils.h"
30 
31 #include <atomic>
32 #include <chrono>
33 #include <cmath>
34 #include <cstring>
35 #include <functional>
36 #include <thread>
37 #include <tuple>
38 
39 // decoder implementations
40 
41 #include "DecodersImpl.h"
42 
43 // arithmetic operator implementations
44 
45 #define DEF_ARITH_NULLABLE(type, null_type, opname, opsym) \
46  extern "C" RUNTIME_EXPORT ALWAYS_INLINE type opname##_##type##_nullable( \
47  const type lhs, const type rhs, const null_type null_val) { \
48  if (lhs != null_val && rhs != null_val) { \
49  return lhs opsym rhs; \
50  } \
51  return null_val; \
52  }
53 
54 #define DEF_ARITH_NULLABLE_LHS(type, null_type, opname, opsym) \
55  extern "C" RUNTIME_EXPORT ALWAYS_INLINE type opname##_##type##_nullable_lhs( \
56  const type lhs, const type rhs, const null_type null_val) { \
57  if (lhs != null_val) { \
58  return lhs opsym rhs; \
59  } \
60  return null_val; \
61  }
62 
63 #define DEF_ARITH_NULLABLE_RHS(type, null_type, opname, opsym) \
64  extern "C" RUNTIME_EXPORT ALWAYS_INLINE type opname##_##type##_nullable_rhs( \
65  const type lhs, const type rhs, const null_type null_val) { \
66  if (rhs != null_val) { \
67  return lhs opsym rhs; \
68  } \
69  return null_val; \
70  }
71 
72 #define DEF_CMP_NULLABLE(type, null_type, opname, opsym) \
73  extern "C" RUNTIME_EXPORT ALWAYS_INLINE int8_t opname##_##type##_nullable( \
74  const type lhs, \
75  const type rhs, \
76  const null_type null_val, \
77  const int8_t null_bool_val) { \
78  if (lhs != null_val && rhs != null_val) { \
79  return lhs opsym rhs; \
80  } \
81  return null_bool_val; \
82  }
83 
84 #define DEF_CMP_NULLABLE_LHS(type, null_type, opname, opsym) \
85  extern "C" RUNTIME_EXPORT ALWAYS_INLINE int8_t opname##_##type##_nullable_lhs( \
86  const type lhs, \
87  const type rhs, \
88  const null_type null_val, \
89  const int8_t null_bool_val) { \
90  if (lhs != null_val) { \
91  return lhs opsym rhs; \
92  } \
93  return null_bool_val; \
94  }
95 
96 #define DEF_CMP_NULLABLE_RHS(type, null_type, opname, opsym) \
97  extern "C" RUNTIME_EXPORT ALWAYS_INLINE int8_t opname##_##type##_nullable_rhs( \
98  const type lhs, \
99  const type rhs, \
100  const null_type null_val, \
101  const int8_t null_bool_val) { \
102  if (rhs != null_val) { \
103  return lhs opsym rhs; \
104  } \
105  return null_bool_val; \
106  }
107 
108 #define DEF_SAFE_DIV_NULLABLE(type, null_type, opname) \
109  extern "C" RUNTIME_EXPORT ALWAYS_INLINE type safe_div_##type( \
110  const type lhs, const type rhs, const null_type null_val) { \
111  if (lhs != null_val && rhs != null_val && rhs != 0) { \
112  return lhs / rhs; \
113  } \
114  return null_val; \
115  }
116 
117 #define DEF_BINARY_NULLABLE_ALL_OPS(type, null_type) \
118  DEF_ARITH_NULLABLE(type, null_type, add, +) \
119  DEF_ARITH_NULLABLE(type, null_type, sub, -) \
120  DEF_ARITH_NULLABLE(type, null_type, mul, *) \
121  DEF_ARITH_NULLABLE(type, null_type, div, /) \
122  DEF_SAFE_DIV_NULLABLE(type, null_type, safe_div) \
123  DEF_ARITH_NULLABLE_LHS(type, null_type, add, +) \
124  DEF_ARITH_NULLABLE_LHS(type, null_type, sub, -) \
125  DEF_ARITH_NULLABLE_LHS(type, null_type, mul, *) \
126  DEF_ARITH_NULLABLE_LHS(type, null_type, div, /) \
127  DEF_ARITH_NULLABLE_RHS(type, null_type, add, +) \
128  DEF_ARITH_NULLABLE_RHS(type, null_type, sub, -) \
129  DEF_ARITH_NULLABLE_RHS(type, null_type, mul, *) \
130  DEF_ARITH_NULLABLE_RHS(type, null_type, div, /) \
131  DEF_CMP_NULLABLE(type, null_type, eq, ==) \
132  DEF_CMP_NULLABLE(type, null_type, ne, !=) \
133  DEF_CMP_NULLABLE(type, null_type, lt, <) \
134  DEF_CMP_NULLABLE(type, null_type, gt, >) \
135  DEF_CMP_NULLABLE(type, null_type, le, <=) \
136  DEF_CMP_NULLABLE(type, null_type, ge, >=) \
137  DEF_CMP_NULLABLE_LHS(type, null_type, eq, ==) \
138  DEF_CMP_NULLABLE_LHS(type, null_type, ne, !=) \
139  DEF_CMP_NULLABLE_LHS(type, null_type, lt, <) \
140  DEF_CMP_NULLABLE_LHS(type, null_type, gt, >) \
141  DEF_CMP_NULLABLE_LHS(type, null_type, le, <=) \
142  DEF_CMP_NULLABLE_LHS(type, null_type, ge, >=) \
143  DEF_CMP_NULLABLE_RHS(type, null_type, eq, ==) \
144  DEF_CMP_NULLABLE_RHS(type, null_type, ne, !=) \
145  DEF_CMP_NULLABLE_RHS(type, null_type, lt, <) \
146  DEF_CMP_NULLABLE_RHS(type, null_type, gt, >) \
147  DEF_CMP_NULLABLE_RHS(type, null_type, le, <=) \
148  DEF_CMP_NULLABLE_RHS(type, null_type, ge, >=)
149 
150 DEF_BINARY_NULLABLE_ALL_OPS(int8_t, int64_t)
151 DEF_BINARY_NULLABLE_ALL_OPS(int16_t, int64_t)
152 DEF_BINARY_NULLABLE_ALL_OPS(int32_t, int64_t)
153 DEF_BINARY_NULLABLE_ALL_OPS(int64_t, int64_t)
154 DEF_BINARY_NULLABLE_ALL_OPS(float, float)
155 DEF_BINARY_NULLABLE_ALL_OPS(double, double)
156 DEF_ARITH_NULLABLE(int8_t, int64_t, mod, %)
157 DEF_ARITH_NULLABLE(int16_t, int64_t, mod, %)
158 DEF_ARITH_NULLABLE(int32_t, int64_t, mod, %)
159 DEF_ARITH_NULLABLE(int64_t, int64_t, mod, %)
160 DEF_ARITH_NULLABLE_LHS(int8_t, int64_t, mod, %)
161 DEF_ARITH_NULLABLE_LHS(int16_t, int64_t, mod, %)
162 DEF_ARITH_NULLABLE_LHS(int32_t, int64_t, mod, %)
163 DEF_ARITH_NULLABLE_LHS(int64_t, int64_t, mod, %)
164 DEF_ARITH_NULLABLE_RHS(int8_t, int64_t, mod, %)
165 DEF_ARITH_NULLABLE_RHS(int16_t, int64_t, mod, %)
166 DEF_ARITH_NULLABLE_RHS(int32_t, int64_t, mod, %)
167 DEF_ARITH_NULLABLE_RHS(int64_t, int64_t, mod, %)
168 
169 #undef DEF_BINARY_NULLABLE_ALL_OPS
170 #undef DEF_SAFE_DIV_NULLABLE
171 #undef DEF_CMP_NULLABLE_RHS
172 #undef DEF_CMP_NULLABLE_LHS
173 #undef DEF_CMP_NULLABLE
174 #undef DEF_ARITH_NULLABLE_RHS
175 #undef DEF_ARITH_NULLABLE_LHS
176 #undef DEF_ARITH_NULLABLE
177 
178 #define DEF_MAP_STRING_TO_DATUM(value_type, value_name) \
179  extern "C" ALWAYS_INLINE DEVICE value_type map_string_to_datum_##value_name( \
180  const int32_t string_id, \
181  const int64_t translation_map_handle, \
182  const int32_t min_source_id) { \
183  const Datum* translation_map = \
184  reinterpret_cast<const Datum*>(translation_map_handle); \
185  const Datum& out_datum = translation_map[string_id - min_source_id]; \
186  return out_datum.value_name##val; \
187  }
188 
189 DEF_MAP_STRING_TO_DATUM(int8_t, bool)
190 DEF_MAP_STRING_TO_DATUM(int8_t, tinyint)
191 DEF_MAP_STRING_TO_DATUM(int16_t, smallint)
192 DEF_MAP_STRING_TO_DATUM(int32_t, int)
193 DEF_MAP_STRING_TO_DATUM(int64_t, bigint)
194 DEF_MAP_STRING_TO_DATUM(float, float)
195 DEF_MAP_STRING_TO_DATUM(double, double)
196 
197 #undef DEF_MAP_STRING_TO_DATUM
198 
199 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int64_t
200 scale_decimal_up(const int64_t operand,
201  const uint64_t scale,
202  const int64_t operand_null_val,
203  const int64_t result_null_val) {
204  return operand != operand_null_val ? operand * scale : result_null_val;
205 }
206 
207 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int64_t
208 scale_decimal_down_nullable(const int64_t operand,
209  const int64_t scale,
210  const int64_t null_val) {
211  // rounded scale down of a decimal
212  if (operand == null_val) {
213  return null_val;
214  }
215 
216  int64_t tmp = scale >> 1;
217  tmp = operand >= 0 ? operand + tmp : operand - tmp;
218  return tmp / scale;
219 }
220 
221 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int64_t
222 scale_decimal_down_not_nullable(const int64_t operand,
223  const int64_t scale,
224  const int64_t null_val) {
225  int64_t tmp = scale >> 1;
226  tmp = operand >= 0 ? operand + tmp : operand - tmp;
227  return tmp / scale;
228 }
229 
230 // Return floor(dividend / divisor).
231 // Assumes 0 < divisor.
232 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int64_t floor_div_lhs(const int64_t dividend,
233  const int64_t divisor) {
234  return (dividend < 0 ? dividend - (divisor - 1) : dividend) / divisor;
235 }
236 
237 // Return floor(dividend / divisor) or NULL if dividend IS NULL.
238 // Assumes 0 < divisor.
239 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int64_t
240 floor_div_nullable_lhs(const int64_t dividend,
241  const int64_t divisor,
242  const int64_t null_val) {
243  return dividend == null_val ? null_val : floor_div_lhs(dividend, divisor);
244 }
245 
246 #define DEF_UMINUS_NULLABLE(type, null_type) \
247  extern "C" RUNTIME_EXPORT ALWAYS_INLINE type uminus_##type##_nullable( \
248  const type operand, const null_type null_val) { \
249  return operand == null_val ? null_val : -operand; \
250  }
251 
252 DEF_UMINUS_NULLABLE(int8_t, int8_t)
253 DEF_UMINUS_NULLABLE(int16_t, int16_t)
254 DEF_UMINUS_NULLABLE(int32_t, int32_t)
255 DEF_UMINUS_NULLABLE(int64_t, int64_t)
256 DEF_UMINUS_NULLABLE(float, float)
257 DEF_UMINUS_NULLABLE(double, double)
258 
259 #undef DEF_UMINUS_NULLABLE
260 
261 #define DEF_CAST_NULLABLE(from_type, to_type) \
262  extern "C" RUNTIME_EXPORT ALWAYS_INLINE to_type \
263  cast_##from_type##_to_##to_type##_nullable(const from_type operand, \
264  const from_type from_null_val, \
265  const to_type to_null_val) { \
266  return operand == from_null_val ? to_null_val : operand; \
267  }
268 
269 #define DEF_CAST_SCALED_NULLABLE(from_type, to_type) \
270  extern "C" RUNTIME_EXPORT ALWAYS_INLINE to_type \
271  cast_##from_type##_to_##to_type##_scaled_nullable(const from_type operand, \
272  const from_type from_null_val, \
273  const to_type to_null_val, \
274  const to_type divider) { \
275  return operand == from_null_val ? to_null_val : operand / divider; \
276  }
277 
278 #define DEF_CAST_NULLABLE_BIDIR(type1, type2) \
279  DEF_CAST_NULLABLE(type1, type2) \
280  DEF_CAST_NULLABLE(type2, type1)
281 
282 #define DEF_ROUND_NULLABLE(from_type, to_type) \
283  extern "C" RUNTIME_EXPORT ALWAYS_INLINE to_type \
284  cast_##from_type##_to_##to_type##_nullable(const from_type operand, \
285  const from_type from_null_val, \
286  const to_type to_null_val) { \
287  return operand == from_null_val \
288  ? to_null_val \
289  : static_cast<to_type>(operand + (operand < from_type(0) \
290  ? from_type(-0.5) \
291  : from_type(0.5))); \
292  }
293 
294 DEF_CAST_NULLABLE_BIDIR(int8_t, int16_t)
295 DEF_CAST_NULLABLE_BIDIR(int8_t, int32_t)
296 DEF_CAST_NULLABLE_BIDIR(int8_t, int64_t)
297 DEF_CAST_NULLABLE_BIDIR(int16_t, int32_t)
298 DEF_CAST_NULLABLE_BIDIR(int16_t, int64_t)
299 DEF_CAST_NULLABLE_BIDIR(int32_t, int64_t)
300 DEF_CAST_NULLABLE_BIDIR(float, double)
301 
302 DEF_CAST_NULLABLE(int8_t, float)
303 DEF_CAST_NULLABLE(int16_t, float)
304 DEF_CAST_NULLABLE(int32_t, float)
305 DEF_CAST_NULLABLE(int64_t, float)
306 DEF_CAST_NULLABLE(int8_t, double)
307 DEF_CAST_NULLABLE(int16_t, double)
308 DEF_CAST_NULLABLE(int32_t, double)
309 DEF_CAST_NULLABLE(int64_t, double)
310 
311 DEF_ROUND_NULLABLE(float, int8_t)
312 DEF_ROUND_NULLABLE(float, int16_t)
313 DEF_ROUND_NULLABLE(float, int32_t)
314 DEF_ROUND_NULLABLE(float, int64_t)
315 DEF_ROUND_NULLABLE(double, int8_t)
316 DEF_ROUND_NULLABLE(double, int16_t)
317 DEF_ROUND_NULLABLE(double, int32_t)
318 DEF_ROUND_NULLABLE(double, int64_t)
319 
320 DEF_CAST_NULLABLE(uint8_t, int32_t)
321 DEF_CAST_NULLABLE(uint16_t, int32_t)
322 DEF_CAST_SCALED_NULLABLE(int64_t, float)
323 DEF_CAST_SCALED_NULLABLE(int64_t, double)
324 
325 #undef DEF_ROUND_NULLABLE
326 #undef DEF_CAST_NULLABLE_BIDIR
327 #undef DEF_CAST_SCALED_NULLABLE
328 #undef DEF_CAST_NULLABLE
329 
330 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int8_t logical_not(const int8_t operand,
331  const int8_t null_val) {
332  return operand == null_val ? operand : (operand ? 0 : 1);
333 }
334 
335 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int8_t logical_and(const int8_t lhs,
336  const int8_t rhs,
337  const int8_t null_val) {
338  if (lhs == null_val) {
339  return rhs == 0 ? rhs : null_val;
340  }
341  if (rhs == null_val) {
342  return lhs == 0 ? lhs : null_val;
343  }
344  return (lhs && rhs) ? 1 : 0;
345 }
346 
347 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int8_t logical_or(const int8_t lhs,
348  const int8_t rhs,
349  const int8_t null_val) {
350  if (lhs == null_val) {
351  return rhs == 0 ? null_val : rhs;
352  }
353  if (rhs == null_val) {
354  return lhs == 0 ? null_val : lhs;
355  }
356  return (lhs || rhs) ? 1 : 0;
357 }
358 
359 // aggregator implementations
360 
361 extern "C" RUNTIME_EXPORT ALWAYS_INLINE uint64_t agg_count(uint64_t* agg, const int64_t) {
362  return (*agg)++;
363 }
364 
366  int64_t* agg,
367  const int64_t val,
368  const int64_t min_val,
369  const int64_t bucket_size) {
370  uint64_t bitmap_idx = val - min_val;
371  if (1 < bucket_size) {
372  bitmap_idx /= static_cast<uint64_t>(bucket_size);
373  }
374  reinterpret_cast<int8_t*>(*agg)[bitmap_idx >> 3] |= (1 << (bitmap_idx & 7));
375 }
376 
377 #ifdef _MSC_VER
378 #define GPU_RT_STUB NEVER_INLINE
379 #else
380 #define GPU_RT_STUB NEVER_INLINE __attribute__((optnone))
381 #endif
382 
384  const int64_t,
385  const int64_t,
386  const int64_t,
387  const int64_t,
388  const int64_t,
389  const uint64_t,
390  const uint64_t) {}
391 
392 extern "C" RUNTIME_EXPORT NEVER_INLINE void
393 agg_approximate_count_distinct(int64_t* agg, const int64_t key, const uint32_t b) {
394  const uint64_t hash = MurmurHash64A(&key, sizeof(key), 0);
395  const uint32_t index = hash >> (64 - b);
396  const uint8_t rank = get_rank(hash << b, 64 - b);
397  uint8_t* M = reinterpret_cast<uint8_t*>(*agg);
398  M[index] = std::max(M[index], rank);
399 }
400 
402  const int64_t,
403  const uint32_t,
404  const int64_t,
405  const int64_t) {}
406 
407 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int8_t bit_is_set(const int8_t* bitset,
408  const int64_t val,
409  const int64_t min_val,
410  const int64_t max_val,
411  const int64_t null_val,
412  const int8_t null_bool_val) {
413  if (val == null_val) {
414  return null_bool_val;
415  }
416  if (val < min_val || val > max_val) {
417  return 0;
418  }
419  if (!bitset) {
420  return 0;
421  }
422  const uint64_t bitmap_idx = val - min_val;
423  return bitset[bitmap_idx >> 3] & (1 << (bitmap_idx & 7)) ? 1 : 0;
424 }
425 
426 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int64_t
427 compute_int64_t_lower_bound(const int64_t entry_cnt,
428  const int64_t target_value,
429  const int64_t* col_buf) {
430  int64_t l = 0;
431  int64_t h = entry_cnt - 1;
432  while (l < h) {
433  int64_t mid = l + (h - l) / 2;
434  if (target_value < col_buf[mid]) {
435  h = mid;
436  } else {
437  l = mid + 1;
438  }
439  }
440  return l;
441 }
442 
443 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int64_t
444 get_valid_buf_start_pos(const int64_t null_start_pos, const int64_t null_end_pos) {
445  return null_start_pos == 0 ? null_end_pos + 1 : 0;
446 }
447 
448 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int64_t
449 get_valid_buf_end_pos(const int64_t num_elems,
450  const int64_t null_start_pos,
451  const int64_t null_end_pos) {
452  return null_end_pos == num_elems ? null_start_pos : num_elems;
453 }
454 
455 template <typename T, typename Comparator>
456 inline int64_t compute_current_row_idx_in_frame(const int64_t num_elems,
457  const int64_t cur_row_idx,
458  const T* col_buf,
459  const int32_t* partition_rowid_buf,
460  const int64_t* ordered_index_buf,
461  const T null_val,
462  const bool nulls_first,
463  const int64_t null_start_pos,
464  const int64_t null_end_pos,
465  Comparator cmp) {
466  const auto target_value = col_buf[cur_row_idx];
467  if (target_value == null_val) {
468  for (int64_t target_offset = null_start_pos; target_offset < null_end_pos;
469  target_offset++) {
470  const auto candidate_offset = partition_rowid_buf[ordered_index_buf[target_offset]];
471  if (candidate_offset == cur_row_idx) {
472  return target_offset;
473  }
474  }
475  }
476  auto const modified_null_end_pos = nulls_first ? null_end_pos - 1 : null_end_pos;
477  int64_t l = get_valid_buf_start_pos(null_start_pos, modified_null_end_pos);
478  int64_t h = get_valid_buf_end_pos(num_elems, null_start_pos, modified_null_end_pos);
479  while (l < h) {
480  int64_t mid = l + (h - l) / 2;
481  auto const target_row_idx = partition_rowid_buf[ordered_index_buf[mid]];
482  auto const cur_value = col_buf[target_row_idx];
483  if (cmp(target_value, cur_value)) {
484  h = mid;
485  } else {
486  l = mid + 1;
487  }
488  }
489  int64_t target_offset = l;
490  int64_t candidate_row_idx = partition_rowid_buf[ordered_index_buf[target_offset]];
491  while (col_buf[candidate_row_idx] == target_value && target_offset < num_elems) {
492  if (candidate_row_idx == cur_row_idx) {
493  return target_offset;
494  }
495  candidate_row_idx = partition_rowid_buf[ordered_index_buf[++target_offset]];
496  }
497  return -1;
498 }
499 
500 #define DEF_COMPUTE_CURRENT_ROW_IDX_IN_FRAME(value_type, oper_name) \
501  extern "C" RUNTIME_EXPORT ALWAYS_INLINE int64_t \
502  compute_##value_type##_##oper_name##_current_row_idx_in_frame( \
503  const int64_t num_elems, \
504  const int64_t cur_row_idx, \
505  const value_type* col_buf, \
506  const int32_t* partition_rowid_buf, \
507  const int64_t* ordered_index_buf, \
508  const value_type null_val, \
509  const bool nulls_first, \
510  const int64_t null_start_pos, \
511  const int64_t null_end_pos) { \
512  return compute_current_row_idx_in_frame<value_type>(num_elems, \
513  cur_row_idx, \
514  col_buf, \
515  partition_rowid_buf, \
516  ordered_index_buf, \
517  null_val, \
518  nulls_first, \
519  null_start_pos, \
520  null_end_pos, \
521  std::oper_name<value_type>{}); \
522  }
523 #define DEF_COMPUTE_CURRENT_ROW_IDX_IN_FRAME_ALL_TYPES(oper_name) \
524  DEF_COMPUTE_CURRENT_ROW_IDX_IN_FRAME(int8_t, oper_name) \
525  DEF_COMPUTE_CURRENT_ROW_IDX_IN_FRAME(int16_t, oper_name) \
526  DEF_COMPUTE_CURRENT_ROW_IDX_IN_FRAME(int32_t, oper_name) \
527  DEF_COMPUTE_CURRENT_ROW_IDX_IN_FRAME(int64_t, oper_name) \
528  DEF_COMPUTE_CURRENT_ROW_IDX_IN_FRAME(float, oper_name) \
529  DEF_COMPUTE_CURRENT_ROW_IDX_IN_FRAME(double, oper_name)
530 
533 
534 #undef DEF_COMPUTE_CURRENT_ROW_IDX_IN_FRAME_ALL_TYPES
535 #undef DEF_COMPUTE_CURRENT_ROW_IDX_IN_FRAME
536 
537 template <typename TARGET_VAL_TYPE, typename COL_TYPE, typename NULL_TYPE>
539  const int64_t num_elems,
540  const TARGET_VAL_TYPE target_val,
541  const COL_TYPE* col_buf,
542  const int32_t* partition_rowid_buf,
543  const int64_t* ordered_index_buf,
544  const NULL_TYPE null_val,
545  const bool nulls_first,
546  const int64_t null_start_offset,
547  const int64_t null_end_offset) {
548  if (target_val == null_val) {
549  return null_start_offset;
550  }
551  auto const modified_null_end_pos = nulls_first ? null_end_offset - 1 : null_end_offset;
552  int64_t l = get_valid_buf_start_pos(null_start_offset, modified_null_end_pos);
553  int64_t h = get_valid_buf_end_pos(num_elems, null_start_offset, modified_null_end_pos);
554  while (l < h) {
555  int64_t mid = l + (h - l) / 2;
556  if (target_val <= col_buf[partition_rowid_buf[ordered_index_buf[mid]]]) {
557  h = mid;
558  } else {
559  l = mid + 1;
560  }
561  }
562  return l;
563 }
564 
565 #define DEF_RANGE_MODE_FRAME_LOWER_BOUND( \
566  target_val_type, col_type, null_type, opname, opsym) \
567  extern "C" RUNTIME_EXPORT ALWAYS_INLINE int64_t \
568  range_mode_##target_val_type##_##col_type##_##null_type##_##opname##_frame_lower_bound( \
569  const int64_t num_elems, \
570  const target_val_type target_value, \
571  const col_type* col_buf, \
572  const int32_t* partition_rowid_buf, \
573  const int64_t* ordered_index_buf, \
574  const int64_t frame_bound_val, \
575  const null_type null_val, \
576  const bool nulls_first, \
577  const int64_t null_start_pos, \
578  const int64_t null_end_pos) { \
579  if (target_value == null_val) { \
580  return null_start_pos; \
581  } \
582  target_val_type new_val = target_value opsym frame_bound_val; \
583  return compute_lower_bound_from_ordered_partition_index<target_val_type, \
584  col_type, \
585  null_type>( \
586  num_elems, \
587  new_val, \
588  col_buf, \
589  partition_rowid_buf, \
590  ordered_index_buf, \
591  null_val, \
592  nulls_first, \
593  null_start_pos, \
594  null_end_pos); \
595  }
596 DEF_RANGE_MODE_FRAME_LOWER_BOUND(int8_t, int8_t, int8_t, add, +)
597 DEF_RANGE_MODE_FRAME_LOWER_BOUND(int8_t, int8_t, int8_t, sub, -)
598 DEF_RANGE_MODE_FRAME_LOWER_BOUND(int16_t, int16_t, int16_t, add, +)
599 DEF_RANGE_MODE_FRAME_LOWER_BOUND(int16_t, int16_t, int16_t, sub, -)
600 DEF_RANGE_MODE_FRAME_LOWER_BOUND(int16_t, int16_t, int64_t, add, +)
601 DEF_RANGE_MODE_FRAME_LOWER_BOUND(int16_t, int16_t, int64_t, sub, -)
602 DEF_RANGE_MODE_FRAME_LOWER_BOUND(int32_t, int32_t, int32_t, add, +)
603 DEF_RANGE_MODE_FRAME_LOWER_BOUND(int32_t, int32_t, int32_t, sub, -)
604 DEF_RANGE_MODE_FRAME_LOWER_BOUND(int32_t, int32_t, int64_t, add, +)
605 DEF_RANGE_MODE_FRAME_LOWER_BOUND(int32_t, int32_t, int64_t, sub, -)
606 DEF_RANGE_MODE_FRAME_LOWER_BOUND(int64_t, int16_t, int64_t, add, +)
607 DEF_RANGE_MODE_FRAME_LOWER_BOUND(int64_t, int16_t, int64_t, sub, -)
608 DEF_RANGE_MODE_FRAME_LOWER_BOUND(int64_t, int32_t, int64_t, add, +)
609 DEF_RANGE_MODE_FRAME_LOWER_BOUND(int64_t, int32_t, int64_t, sub, -)
610 DEF_RANGE_MODE_FRAME_LOWER_BOUND(int64_t, int64_t, int64_t, add, +)
611 DEF_RANGE_MODE_FRAME_LOWER_BOUND(int64_t, int64_t, int64_t, sub, -)
612 DEF_RANGE_MODE_FRAME_LOWER_BOUND(float, float, float, add, +)
613 DEF_RANGE_MODE_FRAME_LOWER_BOUND(float, float, float, sub, -)
614 DEF_RANGE_MODE_FRAME_LOWER_BOUND(double, double, double, add, +)
615 DEF_RANGE_MODE_FRAME_LOWER_BOUND(double, double, double, sub, -)
616 #undef DEF_RANGE_MODE_FRAME_LOWER_BOUND
617 
618 template <typename TARGET_VAL_TYPE, typename COL_TYPE, typename NULL_TYPE>
620  const int64_t num_elems,
621  const TARGET_VAL_TYPE target_val,
622  const COL_TYPE* col_buf,
623  const int32_t* partition_rowid_buf,
624  const int64_t* ordered_index_buf,
625  const NULL_TYPE null_val,
626  const bool nulls_first,
627  const int64_t null_start_offset,
628  const int64_t null_end_offset) {
629  if (target_val == null_val) {
630  return null_end_offset;
631  }
632  auto const modified_null_end_pos = nulls_first ? null_end_offset - 1 : null_end_offset;
633  int64_t l = get_valid_buf_start_pos(null_start_offset, modified_null_end_pos);
634  int64_t h = get_valid_buf_end_pos(num_elems, null_start_offset, modified_null_end_pos);
635  while (l < h) {
636  int64_t mid = l + (h - l) / 2;
637  if (target_val >= col_buf[partition_rowid_buf[ordered_index_buf[mid]]]) {
638  l = mid + 1;
639  } else {
640  h = mid;
641  }
642  }
643  return l;
644 }
645 
646 #define DEF_RANGE_MODE_FRAME_UPPER_BOUND( \
647  target_val_type, col_type, null_type, opname, opsym) \
648  extern "C" RUNTIME_EXPORT ALWAYS_INLINE int64_t \
649  range_mode_##target_val_type##_##col_type##_##null_type##_##opname##_frame_upper_bound( \
650  const int64_t num_elems, \
651  const target_val_type target_value, \
652  const col_type* col_buf, \
653  const int32_t* partition_rowid_buf, \
654  const int64_t* ordered_index_buf, \
655  const int64_t frame_bound_val, \
656  const null_type null_val, \
657  const bool nulls_first, \
658  const int64_t null_start_pos, \
659  const int64_t null_end_pos) { \
660  if (target_value == null_val) { \
661  return null_end_pos; \
662  } \
663  target_val_type new_val = target_value opsym frame_bound_val; \
664  return compute_upper_bound_from_ordered_partition_index<target_val_type, \
665  col_type, \
666  null_type>( \
667  num_elems, \
668  new_val, \
669  col_buf, \
670  partition_rowid_buf, \
671  ordered_index_buf, \
672  null_val, \
673  nulls_first, \
674  null_start_pos, \
675  null_end_pos); \
676  }
677 DEF_RANGE_MODE_FRAME_UPPER_BOUND(int8_t, int8_t, int8_t, add, +)
678 DEF_RANGE_MODE_FRAME_UPPER_BOUND(int8_t, int8_t, int8_t, sub, -)
679 DEF_RANGE_MODE_FRAME_UPPER_BOUND(int16_t, int16_t, int16_t, add, +)
680 DEF_RANGE_MODE_FRAME_UPPER_BOUND(int16_t, int16_t, int16_t, sub, -)
681 DEF_RANGE_MODE_FRAME_UPPER_BOUND(int16_t, int16_t, int64_t, add, +)
682 DEF_RANGE_MODE_FRAME_UPPER_BOUND(int16_t, int16_t, int64_t, sub, -)
683 DEF_RANGE_MODE_FRAME_UPPER_BOUND(int32_t, int32_t, int32_t, add, +)
684 DEF_RANGE_MODE_FRAME_UPPER_BOUND(int32_t, int32_t, int32_t, sub, -)
685 DEF_RANGE_MODE_FRAME_UPPER_BOUND(int32_t, int32_t, int64_t, add, +)
686 DEF_RANGE_MODE_FRAME_UPPER_BOUND(int32_t, int32_t, int64_t, sub, -)
687 DEF_RANGE_MODE_FRAME_UPPER_BOUND(int64_t, int16_t, int64_t, add, +)
688 DEF_RANGE_MODE_FRAME_UPPER_BOUND(int64_t, int16_t, int64_t, sub, -)
689 DEF_RANGE_MODE_FRAME_UPPER_BOUND(int64_t, int32_t, int64_t, add, +)
690 DEF_RANGE_MODE_FRAME_UPPER_BOUND(int64_t, int32_t, int64_t, sub, -)
691 DEF_RANGE_MODE_FRAME_UPPER_BOUND(int64_t, int64_t, int64_t, add, +)
692 DEF_RANGE_MODE_FRAME_UPPER_BOUND(int64_t, int64_t, int64_t, sub, -)
693 DEF_RANGE_MODE_FRAME_UPPER_BOUND(float, float, float, add, +)
694 DEF_RANGE_MODE_FRAME_UPPER_BOUND(float, float, float, sub, -)
695 DEF_RANGE_MODE_FRAME_UPPER_BOUND(double, double, double, add, +)
696 DEF_RANGE_MODE_FRAME_UPPER_BOUND(double, double, double, sub, -)
697 #undef DEF_RANGE_MODE_FRAME_UPPER_BOUND
698 
699 template <typename COL_TYPE, typename LOGICAL_TYPE>
700 inline LOGICAL_TYPE get_value_in_window_frame(const int64_t target_row_idx_in_frame,
701  const int64_t frame_start_offset,
702  const int64_t frame_end_offset,
703  const COL_TYPE* col_buf,
704  const int32_t* partition_rowid_buf,
705  const int64_t* ordered_index_buf,
706  const LOGICAL_TYPE logical_null_val,
707  const LOGICAL_TYPE col_null_val) {
708  if (target_row_idx_in_frame < frame_start_offset ||
709  target_row_idx_in_frame > frame_end_offset) {
710  return logical_null_val;
711  }
712  const auto target_offset =
713  partition_rowid_buf[ordered_index_buf[target_row_idx_in_frame]];
714  LOGICAL_TYPE target_val = col_buf[target_offset];
715  if (target_val == col_null_val) {
716  return logical_null_val;
717  }
718  return target_val;
719 }
720 
721 #define DEF_GET_VALUE_IN_FRAME(col_type, logical_type) \
722  extern "C" RUNTIME_EXPORT ALWAYS_INLINE logical_type \
723  get_##col_type##_value_##logical_type##_type_in_frame( \
724  const int64_t target_row_idx_in_frame, \
725  const int64_t frame_start_offset, \
726  const int64_t frame_end_offset, \
727  const col_type* col_buf, \
728  const int32_t* partition_rowid_buf, \
729  const int64_t* ordered_index_buf, \
730  const logical_type logical_null_val, \
731  const logical_type col_null_val) { \
732  return get_value_in_window_frame<col_type, logical_type>(target_row_idx_in_frame, \
733  frame_start_offset, \
734  frame_end_offset, \
735  col_buf, \
736  partition_rowid_buf, \
737  ordered_index_buf, \
738  logical_null_val, \
739  col_null_val); \
740  }
741 DEF_GET_VALUE_IN_FRAME(int8_t, int8_t)
742 DEF_GET_VALUE_IN_FRAME(int8_t, int16_t)
743 DEF_GET_VALUE_IN_FRAME(int8_t, int32_t)
744 DEF_GET_VALUE_IN_FRAME(int8_t, int64_t)
745 DEF_GET_VALUE_IN_FRAME(int16_t, int16_t)
746 DEF_GET_VALUE_IN_FRAME(int16_t, int32_t)
747 DEF_GET_VALUE_IN_FRAME(int16_t, int64_t)
748 DEF_GET_VALUE_IN_FRAME(int32_t, int32_t)
749 DEF_GET_VALUE_IN_FRAME(int32_t, int64_t)
750 DEF_GET_VALUE_IN_FRAME(int64_t, int64_t)
751 DEF_GET_VALUE_IN_FRAME(float, float)
752 DEF_GET_VALUE_IN_FRAME(double, double)
753 #undef DEF_GET_VALUE_IN_FRAME
754 
755 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int64_t encode_date(int64_t decoded_val,
756  int64_t null_val,
757  int64_t multiplier) {
758  return decoded_val == null_val ? decoded_val : decoded_val * multiplier;
759 }
760 
761 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int64_t
762 compute_row_mode_start_index_sub(int64_t candidate_index,
763  int64_t current_partition_start_offset,
764  int64_t frame_bound) {
765  int64_t index = candidate_index - current_partition_start_offset - frame_bound;
766  return index < 0 ? 0 : index;
767 }
768 
769 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int64_t
770 compute_row_mode_start_index_add(int64_t candidate_index,
771  int64_t current_partition_start_offset,
772  int64_t frame_bound,
773  int64_t num_current_partition_elem) {
774  int64_t index = candidate_index - current_partition_start_offset + frame_bound;
775  return index >= num_current_partition_elem ? num_current_partition_elem : index;
776 }
777 
778 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int64_t
779 compute_row_mode_end_index_sub(int64_t candidate_index,
780  int64_t current_partition_start_offset,
781  int64_t frame_bound) {
782  int64_t index = candidate_index - current_partition_start_offset - frame_bound;
783  return index < 0 ? 0 : index + 1;
784 }
785 
786 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int64_t
787 compute_row_mode_end_index_add(int64_t candidate_index,
788  int64_t current_partition_start_offset,
789  int64_t frame_bound,
790  int64_t num_current_partition_elem) {
791  int64_t index = candidate_index - current_partition_start_offset + frame_bound;
792  return index >= num_current_partition_elem ? num_current_partition_elem : index + 1;
793 }
794 
796  int64_t** aggregation_trees,
797  size_t partition_idx) {
798  return aggregation_trees[partition_idx];
799 }
800 
802  int64_t** aggregation_trees,
803  size_t partition_idx) {
804  double** casted_aggregation_trees = reinterpret_cast<double**>(aggregation_trees);
805  return casted_aggregation_trees[partition_idx];
806 }
807 
809 get_integer_derived_aggregation_tree(int64_t** aggregation_trees, size_t partition_idx) {
810  SumAndCountPair<int64_t>** casted_aggregation_trees =
811  reinterpret_cast<SumAndCountPair<int64_t>**>(aggregation_trees);
812  return casted_aggregation_trees[partition_idx];
813 }
814 
816 get_double_derived_aggregation_tree(int64_t** aggregation_trees, size_t partition_idx) {
817  SumAndCountPair<double>** casted_aggregation_trees =
818  reinterpret_cast<SumAndCountPair<double>**>(aggregation_trees);
819  return casted_aggregation_trees[partition_idx];
820 }
821 
822 extern "C" RUNTIME_EXPORT ALWAYS_INLINE size_t
823 getStartOffsetForSegmentTreeTraversal(size_t level, size_t tree_fanout) {
824  size_t offset = 0;
825  for (size_t i = 0; i < level; i++) {
826  offset += pow(tree_fanout, i);
827  }
828  return offset;
829 }
830 namespace {
831 enum class AggFuncType { MIN, MAX, SUM };
832 
833 template <AggFuncType AGG_FUNC_TYPE, typename AGG_TYPE>
834 inline AGG_TYPE agg_func(AGG_TYPE const lhs, AGG_TYPE const rhs) {
835  if constexpr (AGG_FUNC_TYPE == AggFuncType::MIN) {
836  return std::min(lhs, rhs);
837  } else if constexpr (AGG_FUNC_TYPE == AggFuncType::MAX) {
838  return std::max(lhs, rhs);
839  } else {
840  return lhs + rhs;
841  }
842 }
843 } // namespace
844 
845 template <AggFuncType AGG_FUNC_TYPE, typename AGG_TYPE>
847  AGG_TYPE* aggregation_tree_for_partition,
848  size_t query_range_start_idx,
849  size_t query_range_end_idx,
850  size_t leaf_level,
851  size_t tree_fanout,
852  AGG_TYPE init_val,
853  AGG_TYPE invalid_val,
854  AGG_TYPE null_val) {
855  size_t leaf_start_idx = getStartOffsetForSegmentTreeTraversal(leaf_level, tree_fanout);
856  size_t begin = leaf_start_idx + query_range_start_idx;
857  size_t end = leaf_start_idx + query_range_end_idx;
858  AGG_TYPE res = init_val;
859  bool all_nulls = true;
860  for (int level = leaf_level; level >= 0; level--) {
861  size_t parentBegin = begin / tree_fanout;
862  size_t parentEnd = (end - 1) / tree_fanout;
863  if (parentBegin == parentEnd) {
864  for (size_t pos = begin; pos < end; pos++) {
865  if (aggregation_tree_for_partition[pos] != null_val) {
866  all_nulls = false;
867  res = agg_func<AGG_FUNC_TYPE>(res, aggregation_tree_for_partition[pos]);
868  }
869  }
870  return all_nulls ? null_val : res;
871  } else if (parentBegin > parentEnd) {
872  return null_val;
873  }
874  size_t group_begin = (parentBegin * tree_fanout) + 1;
875  if (begin != group_begin) {
876  size_t limit = (parentBegin * tree_fanout) + tree_fanout + 1;
877  for (size_t pos = begin; pos < limit; pos++) {
878  if (aggregation_tree_for_partition[pos] != null_val) {
879  all_nulls = false;
880  res = agg_func<AGG_FUNC_TYPE>(res, aggregation_tree_for_partition[pos]);
881  }
882  }
883  parentBegin++;
884  }
885  size_t group_end = (parentEnd * tree_fanout) + 1;
886  if (end != group_end) {
887  for (size_t pos = group_end; pos < end; pos++) {
888  if (aggregation_tree_for_partition[pos] != null_val) {
889  all_nulls = false;
890  res = agg_func<AGG_FUNC_TYPE>(res, aggregation_tree_for_partition[pos]);
891  }
892  }
893  }
894  begin = parentBegin;
895  end = parentEnd;
896  }
897  return invalid_val;
898 }
899 
900 #define DEF_SEARCH_AGGREGATION_TREE(agg_value_type) \
901  extern "C" RUNTIME_EXPORT ALWAYS_INLINE agg_value_type \
902  search_##agg_value_type##_aggregation_tree( \
903  agg_value_type* aggregated_tree_for_partition, \
904  size_t query_range_start_idx, \
905  size_t query_range_end_idx, \
906  size_t leaf_level, \
907  size_t tree_fanout, \
908  bool decimal_type, \
909  size_t scale, \
910  agg_value_type invalid_val, \
911  agg_value_type null_val, \
912  int32_t agg_type) { \
913  if (!aggregated_tree_for_partition || query_range_start_idx > query_range_end_idx) { \
914  return null_val; \
915  } \
916  switch (agg_type) { \
917  case 1: { \
918  return compute_window_func_via_aggregation_tree<AggFuncType::MIN>( \
919  aggregated_tree_for_partition, \
920  query_range_start_idx, \
921  query_range_end_idx, \
922  leaf_level, \
923  tree_fanout, \
924  std::numeric_limits<agg_value_type>::max(), \
925  invalid_val, \
926  null_val); \
927  } \
928  case 2: { \
929  return compute_window_func_via_aggregation_tree<AggFuncType::MAX>( \
930  aggregated_tree_for_partition, \
931  query_range_start_idx, \
932  query_range_end_idx, \
933  leaf_level, \
934  tree_fanout, \
935  std::numeric_limits<agg_value_type>::lowest(), \
936  invalid_val, \
937  null_val); \
938  } \
939  default: { \
940  return compute_window_func_via_aggregation_tree<AggFuncType::SUM>( \
941  aggregated_tree_for_partition, \
942  query_range_start_idx, \
943  query_range_end_idx, \
944  leaf_level, \
945  tree_fanout, \
946  static_cast<agg_value_type>(0), \
947  invalid_val, \
948  null_val); \
949  } \
950  } \
951  }
952 
955 #undef DEF_SEARCH_AGGREGATION_TREE
956 
957 template <typename AGG_VALUE_TYPE>
959  SumAndCountPair<AGG_VALUE_TYPE>* aggregation_tree_for_partition,
961  size_t query_range_start_idx,
962  size_t query_range_end_idx,
963  size_t leaf_level,
964  size_t tree_fanout,
965  AGG_VALUE_TYPE invalid_val,
966  AGG_VALUE_TYPE null_val) {
967  size_t leaf_start_idx = getStartOffsetForSegmentTreeTraversal(leaf_level, tree_fanout);
968  size_t begin = leaf_start_idx + query_range_start_idx;
969  size_t end = leaf_start_idx + query_range_end_idx;
970  SumAndCountPair<AGG_VALUE_TYPE> null_res{null_val, 0};
971  SumAndCountPair<AGG_VALUE_TYPE> invalid_res{invalid_val, 0};
972  bool all_nulls = true;
973  for (int level = leaf_level; level >= 0; level--) {
974  size_t parentBegin = begin / tree_fanout;
975  size_t parentEnd = (end - 1) / tree_fanout;
976  if (parentBegin == parentEnd) {
977  for (size_t pos = begin; pos < end; pos++) {
978  if (aggregation_tree_for_partition[pos].sum != null_val) {
979  all_nulls = false;
980  res.sum += aggregation_tree_for_partition[pos].sum;
981  res.count += aggregation_tree_for_partition[pos].count;
982  }
983  }
984  if (all_nulls) {
985  res = null_res;
986  }
987  return;
988  } else if (parentBegin > parentEnd) {
989  res = null_res;
990  return;
991  }
992  size_t group_begin = (parentBegin * tree_fanout) + 1;
993  if (begin != group_begin) {
994  size_t limit = (parentBegin * tree_fanout) + tree_fanout + 1;
995  for (size_t pos = begin; pos < limit; pos++) {
996  if (aggregation_tree_for_partition[pos].sum != null_val) {
997  all_nulls = false;
998  res.sum += aggregation_tree_for_partition[pos].sum;
999  res.count += aggregation_tree_for_partition[pos].count;
1000  }
1001  }
1002  parentBegin++;
1003  }
1004  size_t group_end = (parentEnd * tree_fanout) + 1;
1005  if (end != group_end) {
1006  for (size_t pos = group_end; pos < end; pos++) {
1007  if (aggregation_tree_for_partition[pos].sum != null_val) {
1008  all_nulls = false;
1009  res.sum += aggregation_tree_for_partition[pos].sum;
1010  res.count += aggregation_tree_for_partition[pos].count;
1011  }
1012  }
1013  }
1014  begin = parentBegin;
1015  end = parentEnd;
1016  }
1017  res = invalid_res;
1018  return;
1019 }
1020 
1021 #define DEF_SEARCH_DERIVED_AGGREGATION_TREE(agg_value_type) \
1022  extern "C" RUNTIME_EXPORT ALWAYS_INLINE double \
1023  search_##agg_value_type##_derived_aggregation_tree( \
1024  SumAndCountPair<agg_value_type>* aggregated_tree_for_partition, \
1025  size_t query_range_start_idx, \
1026  size_t query_range_end_idx, \
1027  size_t leaf_level, \
1028  size_t tree_fanout, \
1029  bool decimal_type, \
1030  size_t scale, \
1031  agg_value_type invalid_val, \
1032  agg_value_type null_val, \
1033  int32_t agg_type) { \
1034  if (!aggregated_tree_for_partition || query_range_start_idx > query_range_end_idx) { \
1035  return null_val; \
1036  } \
1037  SumAndCountPair<agg_value_type> res{0, 0}; \
1038  compute_derived_aggregates<agg_value_type>(aggregated_tree_for_partition, \
1039  res, \
1040  query_range_start_idx, \
1041  query_range_end_idx, \
1042  leaf_level, \
1043  tree_fanout, \
1044  invalid_val, \
1045  null_val); \
1046  if (res.sum == null_val) { \
1047  return null_val; \
1048  } else if (res.count > 0) { \
1049  if (decimal_type) { \
1050  return (static_cast<double>(res.sum) / pow(10, scale)) / res.count; \
1051  } \
1052  return (static_cast<double>(res.sum)) / res.count; \
1053  } else { \
1054  return invalid_val; \
1055  } \
1056  }
1057 
1060 #undef DEF_SEARCH_DERIVED_AGGREGATION_TREE
1061 
1062 #define DEF_HANDLE_NULL_FOR_WINDOW_FRAMING_AGG(agg_type, null_type) \
1063  extern "C" RUNTIME_EXPORT ALWAYS_INLINE agg_type \
1064  handle_null_val_##agg_type##_##null_type##_window_framing_agg( \
1065  agg_type res, null_type agg_null_val, agg_type input_col_null_val) { \
1066  if (res == agg_null_val) { \
1067  return input_col_null_val; \
1068  } \
1069  return res; \
1070  }
1074 #undef DEF_HANDLE_NULL_FOR_WINDOW_FRAMING_AGG
1075 
1076 template <typename T>
1077 T fill_missing_value(int64_t const cur_idx,
1078  T const null_val,
1079  T* const col_buf,
1080  int64_t const num_elems_in_partition,
1081  int32_t* const partition_rowid_buf,
1082  int64_t* const ordered_index_buf,
1083  bool const is_forward_fill) {
1084  T const cur_val = col_buf[partition_rowid_buf[ordered_index_buf[cur_idx]]];
1085  if (cur_val == null_val) {
1086  if (is_forward_fill) {
1087  for (int64_t cand_idx = cur_idx - 1; cand_idx >= 0; --cand_idx) {
1088  T const candidate_val = col_buf[partition_rowid_buf[ordered_index_buf[cand_idx]]];
1089  if (candidate_val != null_val) {
1090  return candidate_val;
1091  }
1092  }
1093  } else {
1094  for (int64_t cand_idx = cur_idx + 1; cand_idx < num_elems_in_partition;
1095  ++cand_idx) {
1096  T const candidate_val = col_buf[partition_rowid_buf[ordered_index_buf[cand_idx]]];
1097  if (candidate_val != null_val) {
1098  return candidate_val;
1099  }
1100  }
1101  }
1102  }
1103  return cur_val;
1104 }
1105 #define DEF_FILL_MISSING_VALUE(col_type) \
1106  extern "C" RUNTIME_EXPORT ALWAYS_INLINE col_type fill_##col_type##_missing_value( \
1107  int64_t const cur_row_idx_in_frame, \
1108  col_type const null_val, \
1109  col_type* const col_buf, \
1110  int64_t const num_elems_in_partition, \
1111  int32_t* const partition_rowid_buf, \
1112  int64_t* const ordered_index_buf, \
1113  bool const is_forward_fill) { \
1114  return fill_missing_value<col_type>(cur_row_idx_in_frame, \
1115  null_val, \
1116  col_buf, \
1117  num_elems_in_partition, \
1118  partition_rowid_buf, \
1119  ordered_index_buf, \
1120  is_forward_fill); \
1121  }
1122 DEF_FILL_MISSING_VALUE(int8_t)
1123 DEF_FILL_MISSING_VALUE(int16_t)
1124 DEF_FILL_MISSING_VALUE(int32_t)
1125 DEF_FILL_MISSING_VALUE(int64_t)
1127 DEF_FILL_MISSING_VALUE(double)
1128 #undef DEF_FILL_MISSING_VALUE
1129 
1130 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int64_t agg_sum(int64_t* agg, const int64_t val) {
1131  const auto old = *agg;
1132  *agg += val;
1133  return old;
1134 }
1135 
1136 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int64_t agg_sum_if(int64_t* agg,
1137  const int64_t val,
1138  const int8_t cond) {
1139  return cond ? agg_sum(agg, val) : *agg;
1140 }
1141 
1142 extern "C" RUNTIME_EXPORT ALWAYS_INLINE void agg_max(int64_t* agg, const int64_t val) {
1143  *agg = std::max(*agg, val);
1144 }
1145 
1146 extern "C" RUNTIME_EXPORT ALWAYS_INLINE void agg_min(int64_t* agg, const int64_t val) {
1147  *agg = std::min(*agg, val);
1148 }
1149 
1150 extern "C" RUNTIME_EXPORT ALWAYS_INLINE void agg_id(int64_t* agg, const int64_t val) {
1151  *agg = val;
1152 }
1153 
1154 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int8_t* agg_id_varlen(int8_t* varlen_buffer,
1155  const int64_t offset,
1156  const int8_t* value,
1157  const int64_t size_bytes) {
1158  for (auto i = 0; i < size_bytes; i++) {
1159  varlen_buffer[offset + i] = value[i];
1160  }
1161  return &varlen_buffer[offset];
1162 }
1163 
1164 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int32_t
1165 checked_single_agg_id(int64_t* agg, const int64_t val, const int64_t null_val) {
1166  if (val == null_val) {
1167  return 0;
1168  }
1169 
1170  if (*agg == val) {
1171  return 0;
1172  } else if (*agg == null_val) {
1173  *agg = val;
1174  return 0;
1175  } else {
1176  // see Execute::ERR_SINGLE_VALUE_FOUND_MULTIPLE_VALUES
1177  return 15;
1178  }
1179 }
1180 
1182  int64_t* agg,
1183  const int64_t val,
1184  const int64_t min_val,
1185  const int64_t bucket_size,
1186  const int64_t skip_val) {
1187  if (val != skip_val) {
1188  agg_count_distinct_bitmap(agg, val, min_val, bucket_size);
1189  }
1190 }
1191 
1193  const int64_t,
1194  const int64_t,
1195  const int64_t,
1196  const int64_t,
1197  const int64_t,
1198  const int64_t,
1199  const uint64_t,
1200  const uint64_t) {}
1201 
1202 extern "C" RUNTIME_EXPORT ALWAYS_INLINE uint32_t agg_count_int32(uint32_t* agg,
1203  const int32_t) {
1204  return (*agg)++;
1205 }
1206 
1207 extern "C" RUNTIME_EXPORT ALWAYS_INLINE uint32_t agg_count_if_int32(uint32_t* agg,
1208  const int32_t cond) {
1209  return cond ? (*agg)++ : *agg;
1210 }
1211 
1212 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int32_t agg_sum_int32(int32_t* agg,
1213  const int32_t val) {
1214  const auto old = *agg;
1215  *agg += val;
1216  return old;
1217 }
1218 
1219 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int32_t agg_sum_if_int32(int32_t* agg,
1220  const int32_t val,
1221  const int8_t cond) {
1222  return cond ? agg_sum_int32(agg, val) : *agg;
1223 }
1224 
1225 #define DEF_AGG_MAX_INT(n) \
1226  extern "C" RUNTIME_EXPORT ALWAYS_INLINE void agg_max_int##n(int##n##_t* agg, \
1227  const int##n##_t val) { \
1228  *agg = std::max(*agg, val); \
1229  }
1230 
1231 DEF_AGG_MAX_INT(32)
1232 DEF_AGG_MAX_INT(16)
1233 DEF_AGG_MAX_INT(8)
1234 #undef DEF_AGG_MAX_INT
1235 
1236 #define DEF_AGG_MIN_INT(n) \
1237  extern "C" RUNTIME_EXPORT ALWAYS_INLINE void agg_min_int##n(int##n##_t* agg, \
1238  const int##n##_t val) { \
1239  *agg = std::min(*agg, val); \
1240  }
1241 
1242 DEF_AGG_MIN_INT(32)
1243 DEF_AGG_MIN_INT(16)
1244 DEF_AGG_MIN_INT(8)
1245 #undef DEF_AGG_MIN_INT
1246 
1247 #define DEF_AGG_ID_INT(n) \
1248  extern "C" RUNTIME_EXPORT ALWAYS_INLINE void agg_id_int##n(int##n##_t* agg, \
1249  const int##n##_t val) { \
1250  *agg = val; \
1251  }
1252 
1253 #define DEF_CHECKED_SINGLE_AGG_ID_INT(n) \
1254  extern "C" RUNTIME_EXPORT ALWAYS_INLINE int32_t checked_single_agg_id_int##n( \
1255  int##n##_t* agg, const int##n##_t val, const int##n##_t null_val) { \
1256  if (val == null_val) { \
1257  return 0; \
1258  } \
1259  if (*agg == val) { \
1260  return 0; \
1261  } else if (*agg == null_val) { \
1262  *agg = val; \
1263  return 0; \
1264  } else { \
1265  /* see Execute::ERR_SINGLE_VALUE_FOUND_MULTIPLE_VALUES*/ \
1266  return 15; \
1267  } \
1268  }
1269 
1270 DEF_AGG_ID_INT(32)
1271 DEF_AGG_ID_INT(16)
1272 DEF_AGG_ID_INT(8)
1273 
1277 
1278 #undef DEF_AGG_ID_INT
1279 #undef DEF_CHECKED_SINGLE_AGG_ID_INT
1280 
1281 #define DEF_WRITE_PROJECTION_INT(n) \
1282  extern "C" RUNTIME_EXPORT ALWAYS_INLINE void write_projection_int##n( \
1283  int8_t* slot_ptr, const int##n##_t val, const int64_t init_val) { \
1284  if (val != init_val) { \
1285  *reinterpret_cast<int##n##_t*>(slot_ptr) = val; \
1286  } \
1287  }
1288 
1291 #undef DEF_WRITE_PROJECTION_INT
1292 
1293 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int64_t agg_sum_skip_val(int64_t* agg,
1294  const int64_t val,
1295  const int64_t skip_val) {
1296  const auto old = *agg;
1297  if (val != skip_val) {
1298  if (old != skip_val) {
1299  return agg_sum(agg, val);
1300  } else {
1301  *agg = val;
1302  }
1303  }
1304  return old;
1305 }
1306 
1307 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int32_t
1308 agg_sum_int32_skip_val(int32_t* agg, const int32_t val, const int32_t skip_val) {
1309  const auto old = *agg;
1310  if (val != skip_val) {
1311  if (old != skip_val) {
1312  return agg_sum_int32(agg, val);
1313  } else {
1314  *agg = val;
1315  }
1316  }
1317  return old;
1318 }
1319 
1320 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int64_t
1321 agg_sum_if_skip_val(int64_t* agg,
1322  const int64_t val,
1323  const int64_t skip_val,
1324  const int8_t cond) {
1325  return cond ? agg_sum_skip_val(agg, val, skip_val) : *agg;
1326 }
1327 
1328 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int32_t
1330  const int32_t val,
1331  const int32_t skip_val,
1332  const int8_t cond) {
1333  return cond ? agg_sum_int32_skip_val(agg, val, skip_val) : *agg;
1334 }
1335 
1336 extern "C" RUNTIME_EXPORT ALWAYS_INLINE uint64_t agg_count_if(uint64_t* agg,
1337  const int64_t cond) {
1338  return cond ? (*agg)++ : *agg;
1339 }
1340 
1341 extern "C" RUNTIME_EXPORT ALWAYS_INLINE uint64_t
1342 agg_count_skip_val(uint64_t* agg, const int64_t val, const int64_t skip_val) {
1343  if (val != skip_val) {
1344  return agg_count(agg, val);
1345  }
1346  return *agg;
1347 }
1348 
1349 extern "C" RUNTIME_EXPORT ALWAYS_INLINE uint64_t
1350 agg_count_if_skip_val(uint64_t* agg, const int64_t cond, const int64_t skip_val) {
1351  if (cond != skip_val) {
1352  return agg_count_if(agg, cond);
1353  }
1354  return *agg;
1355 }
1356 
1357 extern "C" RUNTIME_EXPORT ALWAYS_INLINE uint32_t
1358 agg_count_int32_skip_val(uint32_t* agg, const int32_t val, const int32_t skip_val) {
1359  if (val != skip_val) {
1360  return agg_count_int32(agg, val);
1361  }
1362  return *agg;
1363 }
1364 
1365 extern "C" RUNTIME_EXPORT ALWAYS_INLINE uint32_t
1366 agg_count_if_int32_skip_val(uint32_t* agg, const int32_t cond, const int32_t skip_val) {
1367  if (cond != skip_val) {
1368  return agg_count_if_int32(agg, cond);
1369  }
1370  return *agg;
1371 }
1372 
1373 #define DEF_SKIP_AGG_ADD(base_agg_func) \
1374  extern "C" RUNTIME_EXPORT ALWAYS_INLINE void base_agg_func##_skip_val( \
1375  DATA_T* agg, const DATA_T val, const DATA_T skip_val) { \
1376  if (val != skip_val) { \
1377  base_agg_func(agg, val); \
1378  } \
1379  }
1380 
1381 #define DEF_SKIP_AGG(base_agg_func) \
1382  extern "C" RUNTIME_EXPORT ALWAYS_INLINE void base_agg_func##_skip_val( \
1383  DATA_T* agg, const DATA_T val, const DATA_T skip_val) { \
1384  if (val != skip_val) { \
1385  const DATA_T old_agg = *agg; \
1386  if (old_agg != skip_val) { \
1387  base_agg_func(agg, val); \
1388  } else { \
1389  *agg = val; \
1390  } \
1391  } \
1392  }
1393 
1394 #define DATA_T int64_t
1397 #undef DATA_T
1398 
1399 #define DATA_T int32_t
1402 #undef DATA_T
1403 
1404 #define DATA_T int16_t
1407 #undef DATA_T
1408 
1409 #define DATA_T int8_t
1412 #undef DATA_T
1413 
1414 #undef DEF_SKIP_AGG_ADD
1415 #undef DEF_SKIP_AGG
1416 
1417 // TODO(alex): fix signature
1418 
1419 extern "C" RUNTIME_EXPORT ALWAYS_INLINE uint64_t agg_count_double(uint64_t* agg,
1420  const double val) {
1421  return (*agg)++;
1422 }
1423 
1424 extern "C" RUNTIME_EXPORT ALWAYS_INLINE void agg_sum_double(int64_t* agg,
1425  const double val) {
1426  const auto r = *reinterpret_cast<const double*>(agg) + val;
1427  *agg = *reinterpret_cast<const int64_t*>(may_alias_ptr(&r));
1428 }
1429 
1430 extern "C" RUNTIME_EXPORT ALWAYS_INLINE void agg_sum_if_double(int64_t* agg,
1431  const double val,
1432  const int8_t cond) {
1433  if (cond) {
1434  agg_sum_double(agg, val);
1435  }
1436 }
1437 
1438 extern "C" RUNTIME_EXPORT ALWAYS_INLINE void agg_max_double(int64_t* agg,
1439  const double val) {
1440  const auto r = std::max(*reinterpret_cast<const double*>(agg), val);
1441  *agg = *(reinterpret_cast<const int64_t*>(may_alias_ptr(&r)));
1442 }
1443 
1444 extern "C" RUNTIME_EXPORT ALWAYS_INLINE void agg_min_double(int64_t* agg,
1445  const double val) {
1446  const auto r = std::min(*reinterpret_cast<const double*>(agg), val);
1447  *agg = *(reinterpret_cast<const int64_t*>(may_alias_ptr(&r)));
1448 }
1449 
1450 extern "C" RUNTIME_EXPORT ALWAYS_INLINE void agg_id_double(int64_t* agg,
1451  const double val) {
1452  *agg = *(reinterpret_cast<const int64_t*>(may_alias_ptr(&val)));
1453 }
1454 
1455 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int32_t
1456 checked_single_agg_id_double(int64_t* agg, const double val, const double null_val) {
1457  if (val == null_val) {
1458  return 0;
1459  }
1460 
1461  if (*agg == *(reinterpret_cast<const int64_t*>(may_alias_ptr(&val)))) {
1462  return 0;
1463  } else if (*agg == *(reinterpret_cast<const int64_t*>(may_alias_ptr(&null_val)))) {
1464  *agg = *(reinterpret_cast<const int64_t*>(may_alias_ptr(&val)));
1465  return 0;
1466  } else {
1467  // see Execute::ERR_SINGLE_VALUE_FOUND_MULTIPLE_VALUES
1468  return 15;
1469  }
1470 }
1471 
1472 extern "C" RUNTIME_EXPORT ALWAYS_INLINE uint32_t agg_count_float(uint32_t* agg,
1473  const float val) {
1474  return (*agg)++;
1475 }
1476 
1477 extern "C" RUNTIME_EXPORT ALWAYS_INLINE void agg_sum_float(int32_t* agg,
1478  const float val) {
1479  const auto r = *reinterpret_cast<const float*>(agg) + val;
1480  *agg = *reinterpret_cast<const int32_t*>(may_alias_ptr(&r));
1481 }
1482 
1483 extern "C" RUNTIME_EXPORT ALWAYS_INLINE void agg_if_sum_float(int32_t* agg,
1484  const float val,
1485  const int8_t cond) {
1486  if (cond) {
1487  agg_sum_float(agg, val);
1488  }
1489 }
1490 
1491 extern "C" RUNTIME_EXPORT ALWAYS_INLINE void agg_max_float(int32_t* agg,
1492  const float val) {
1493  const auto r = std::max(*reinterpret_cast<const float*>(agg), val);
1494  *agg = *(reinterpret_cast<const int32_t*>(may_alias_ptr(&r)));
1495 }
1496 
1497 extern "C" RUNTIME_EXPORT ALWAYS_INLINE void agg_min_float(int32_t* agg,
1498  const float val) {
1499  const auto r = std::min(*reinterpret_cast<const float*>(agg), val);
1500  *agg = *(reinterpret_cast<const int32_t*>(may_alias_ptr(&r)));
1501 }
1502 
1503 extern "C" RUNTIME_EXPORT ALWAYS_INLINE void agg_id_float(int32_t* agg, const float val) {
1504  *agg = *(reinterpret_cast<const int32_t*>(may_alias_ptr(&val)));
1505 }
1506 
1507 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int32_t
1508 checked_single_agg_id_float(int32_t* agg, const float val, const float null_val) {
1509  if (val == null_val) {
1510  return 0;
1511  }
1512 
1513  if (*agg == *(reinterpret_cast<const int32_t*>(may_alias_ptr(&val)))) {
1514  return 0;
1515  } else if (*agg == *(reinterpret_cast<const int32_t*>(may_alias_ptr(&null_val)))) {
1516  *agg = *(reinterpret_cast<const int32_t*>(may_alias_ptr(&val)));
1517  return 0;
1518  } else {
1519  // see Execute::ERR_SINGLE_VALUE_FOUND_MULTIPLE_VALUES
1520  return 15;
1521  }
1522 }
1523 
1524 extern "C" RUNTIME_EXPORT ALWAYS_INLINE uint64_t
1525 agg_count_double_skip_val(uint64_t* agg, const double val, const double skip_val) {
1526  if (val != skip_val) {
1527  return agg_count_double(agg, val);
1528  }
1529  return *agg;
1530 }
1531 
1532 extern "C" RUNTIME_EXPORT ALWAYS_INLINE uint32_t
1533 agg_count_float_skip_val(uint32_t* agg, const float val, const float skip_val) {
1534  if (val != skip_val) {
1535  return agg_count_float(agg, val);
1536  }
1537  return *agg;
1538 }
1539 
1540 #define DEF_SKIP_AGG(base_agg_func) \
1541  extern "C" RUNTIME_EXPORT ALWAYS_INLINE void base_agg_func##_skip_val( \
1542  ADDR_T* agg, const DATA_T val, const DATA_T skip_val) { \
1543  if (val != skip_val) { \
1544  const ADDR_T old_agg = *agg; \
1545  if (old_agg != *reinterpret_cast<const ADDR_T*>(may_alias_ptr(&skip_val))) { \
1546  base_agg_func(agg, val); \
1547  } else { \
1548  *agg = *reinterpret_cast<const ADDR_T*>(may_alias_ptr(&val)); \
1549  } \
1550  } \
1551  }
1552 
1553 #define DEF_SKIP_IF_AGG(skip_agg_func, base_agg_func) \
1554  extern "C" RUNTIME_EXPORT ALWAYS_INLINE void skip_agg_func##_skip_val( \
1555  ADDR_T* agg, const DATA_T val, const DATA_T skip_val, const int8_t cond) { \
1556  if (cond) { \
1557  base_agg_func##_skip_val(agg, val, skip_val); \
1558  } \
1559  }
1560 
1561 #define DATA_T double
1562 #define ADDR_T int64_t
1567 #undef ADDR_T
1568 #undef DATA_T
1569 
1570 #define DATA_T float
1571 #define ADDR_T int32_t
1576 #undef ADDR_T
1577 #undef DATA_T
1578 
1579 #undef DEF_SKIP_AGG
1580 #undef DEF_SKIP_IF_AGG
1581 
1582 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int64_t decimal_floor(const int64_t x,
1583  const int64_t scale) {
1584  if (x >= 0) {
1585  return x / scale * scale;
1586  }
1587  if (!(x % scale)) {
1588  return x;
1589  }
1590  return x / scale * scale - scale;
1591 }
1592 
1593 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int64_t decimal_ceil(const int64_t x,
1594  const int64_t scale) {
1595  return decimal_floor(x, scale) + (x % scale ? scale : 0);
1596 }
1597 
1598 // Shared memory aggregators. Should never be called,
1599 // real implementations are in cuda_mapd_rt.cu.
1600 #define DEF_SHARED_AGG_RET_STUBS(base_agg_func) \
1601  extern "C" GPU_RT_STUB uint64_t base_agg_func##_shared(uint64_t* agg, \
1602  const int64_t val) { \
1603  return 0; \
1604  } \
1605  \
1606  extern "C" GPU_RT_STUB uint64_t base_agg_func##_skip_val_shared( \
1607  uint64_t* agg, const int64_t val, const int64_t skip_val) { \
1608  return 0; \
1609  } \
1610  extern "C" GPU_RT_STUB uint32_t base_agg_func##_int32_shared(uint32_t* agg, \
1611  const int32_t val) { \
1612  return 0; \
1613  } \
1614  \
1615  extern "C" GPU_RT_STUB uint32_t base_agg_func##_int32_skip_val_shared( \
1616  uint32_t* agg, const int32_t val, const int32_t skip_val) { \
1617  return 0; \
1618  } \
1619  \
1620  extern "C" GPU_RT_STUB uint64_t base_agg_func##_double_shared(uint64_t* agg, \
1621  const double val) { \
1622  return 0; \
1623  } \
1624  \
1625  extern "C" GPU_RT_STUB uint64_t base_agg_func##_double_skip_val_shared( \
1626  uint64_t* agg, const double val, const double skip_val) { \
1627  return 0; \
1628  } \
1629  extern "C" GPU_RT_STUB uint32_t base_agg_func##_float_shared(uint32_t* agg, \
1630  const float val) { \
1631  return 0; \
1632  } \
1633  \
1634  extern "C" GPU_RT_STUB uint32_t base_agg_func##_float_skip_val_shared( \
1635  uint32_t* agg, const float val, const float skip_val) { \
1636  return 0; \
1637  }
1638 
1639 #define DEF_SHARED_AGG_STUBS(base_agg_func) \
1640  extern "C" GPU_RT_STUB void base_agg_func##_shared(int64_t* agg, const int64_t val) {} \
1641  \
1642  extern "C" GPU_RT_STUB void base_agg_func##_skip_val_shared( \
1643  int64_t* agg, const int64_t val, const int64_t skip_val) {} \
1644  extern "C" GPU_RT_STUB void base_agg_func##_int32_shared(int32_t* agg, \
1645  const int32_t val) {} \
1646  extern "C" GPU_RT_STUB void base_agg_func##_int16_shared(int16_t* agg, \
1647  const int16_t val) {} \
1648  extern "C" GPU_RT_STUB void base_agg_func##_int8_shared(int8_t* agg, \
1649  const int8_t val) {} \
1650  \
1651  extern "C" GPU_RT_STUB void base_agg_func##_int32_skip_val_shared( \
1652  int32_t* agg, const int32_t val, const int32_t skip_val) {} \
1653  \
1654  extern "C" GPU_RT_STUB void base_agg_func##_double_shared(int64_t* agg, \
1655  const double val) {} \
1656  \
1657  extern "C" GPU_RT_STUB void base_agg_func##_double_skip_val_shared( \
1658  int64_t* agg, const double val, const double skip_val) {} \
1659  extern "C" GPU_RT_STUB void base_agg_func##_float_shared(int32_t* agg, \
1660  const float val) {} \
1661  \
1662  extern "C" GPU_RT_STUB void base_agg_func##_float_skip_val_shared( \
1663  int32_t* agg, const float val, const float skip_val) {}
1664 
1670 
1671 extern "C" GPU_RT_STUB int8_t* agg_id_varlen_shared(int8_t* varlen_buffer,
1672  const int64_t offset,
1673  const int8_t* value,
1674  const int64_t size_bytes) {
1675  return nullptr;
1676 }
1677 
1678 extern "C" GPU_RT_STUB int32_t checked_single_agg_id_shared(int64_t* agg,
1679  const int64_t val,
1680  const int64_t null_val) {
1681  return 0;
1682 }
1683 
1684 extern "C" GPU_RT_STUB int32_t
1686  const int32_t val,
1687  const int32_t null_val) {
1688  return 0;
1689 }
1690 extern "C" GPU_RT_STUB int32_t
1692  const int16_t val,
1693  const int16_t null_val) {
1694  return 0;
1695 }
1696 extern "C" GPU_RT_STUB int32_t checked_single_agg_id_int8_shared(int8_t* agg,
1697  const int8_t val,
1698  const int8_t null_val) {
1699  return 0;
1700 }
1701 
1702 extern "C" GPU_RT_STUB int32_t
1704  const double val,
1705  const double null_val) {
1706  return 0;
1707 }
1708 
1709 extern "C" GPU_RT_STUB int32_t checked_single_agg_id_float_shared(int32_t* agg,
1710  const float val,
1711  const float null_val) {
1712  return 0;
1713 }
1714 
1715 extern "C" GPU_RT_STUB void agg_max_int16_skip_val_shared(int16_t* agg,
1716  const int16_t val,
1717  const int16_t skip_val) {}
1718 
1719 extern "C" GPU_RT_STUB void agg_max_int8_skip_val_shared(int8_t* agg,
1720  const int8_t val,
1721  const int8_t skip_val) {}
1722 
1723 extern "C" GPU_RT_STUB void agg_min_int16_skip_val_shared(int16_t* agg,
1724  const int16_t val,
1725  const int16_t skip_val) {}
1726 
1727 extern "C" GPU_RT_STUB void agg_min_int8_skip_val_shared(int8_t* agg,
1728  const int8_t val,
1729  const int8_t skip_val) {}
1730 
1731 extern "C" GPU_RT_STUB void agg_id_double_shared_slow(int64_t* agg, const double* val) {}
1732 
1733 extern "C" GPU_RT_STUB int64_t agg_sum_shared(int64_t* agg, const int64_t val) {
1734  return 0;
1735 }
1736 
1737 extern "C" GPU_RT_STUB int64_t agg_sum_if_shared(int64_t* agg,
1738  const int64_t val,
1739  const int8_t cond) {
1740  return 0;
1741 }
1742 
1743 extern "C" GPU_RT_STUB int64_t agg_sum_skip_val_shared(int64_t* agg,
1744  const int64_t val,
1745  const int64_t skip_val) {
1746  return 0;
1747 }
1748 
1749 extern "C" GPU_RT_STUB int64_t agg_sum_if_skip_val_shared(int64_t* agg,
1750  const int64_t val,
1751  const int64_t skip_val,
1752  const int8_t cond) {
1753  return 0;
1754 }
1755 extern "C" GPU_RT_STUB int32_t agg_sum_int32_shared(int32_t* agg, const int32_t val) {
1756  return 0;
1757 }
1758 
1759 extern "C" GPU_RT_STUB int32_t agg_sum_int32_skip_val_shared(int32_t* agg,
1760  const int32_t val,
1761  const int32_t skip_val) {
1762  return 0;
1763 }
1764 
1765 extern "C" GPU_RT_STUB void agg_sum_double_shared(int64_t* agg, const double val) {}
1766 
1767 extern "C" GPU_RT_STUB void agg_sum_double_skip_val_shared(int64_t* agg,
1768  const double val,
1769  const double skip_val) {}
1770 extern "C" GPU_RT_STUB void agg_sum_float_shared(int32_t* agg, const float val) {}
1771 
1772 extern "C" GPU_RT_STUB void agg_sum_float_skip_val_shared(int32_t* agg,
1773  const float val,
1774  const float skip_val) {}
1775 
1776 extern "C" GPU_RT_STUB int32_t agg_sum_if_int32_shared(int32_t* agg,
1777  const int32_t val,
1778  const int8_t cond) {
1779  return 0;
1780 }
1781 
1782 extern "C" GPU_RT_STUB int32_t agg_sum_if_int32_skip_val_shared(int32_t* agg,
1783  const int32_t val,
1784  const int32_t skip_val,
1785  const int8_t cond) {
1786  return 0;
1787 }
1788 
1789 extern "C" GPU_RT_STUB void agg_sum_if_double_shared(int64_t* agg,
1790  const double val,
1791  const int8_t cond) {}
1792 
1793 extern "C" GPU_RT_STUB void agg_sum_if_double_skip_val_shared(int64_t* agg,
1794  const double val,
1795  const double skip_val,
1796  const int8_t cond) {}
1797 extern "C" GPU_RT_STUB void agg_sum_if_float_shared(int32_t* agg,
1798  const float val,
1799  const int8_t cond) {}
1800 
1801 extern "C" GPU_RT_STUB void agg_sum_if_float_skip_val_shared(int32_t* agg,
1802  const float val,
1803  const float skip_val,
1804  const int8_t cond) {}
1805 
1806 extern "C" GPU_RT_STUB void force_sync() {}
1807 
1808 extern "C" GPU_RT_STUB void sync_warp() {}
1809 extern "C" GPU_RT_STUB void sync_warp_protected(int64_t thread_pos, int64_t row_count) {}
1810 extern "C" GPU_RT_STUB void sync_threadblock() {}
1811 
1812 extern "C" GPU_RT_STUB void write_back_non_grouped_agg(int64_t* input_buffer,
1813  int64_t* output_buffer,
1814  const int32_t num_agg_cols){};
1815 // x64 stride functions
1816 
1817 extern "C" RUNTIME_EXPORT NEVER_INLINE int32_t
1818 pos_start_impl(int32_t const* row_index_resume) {
1819  return row_index_resume ? *row_index_resume : 0;
1820 }
1821 
1823  return pos_start_impl(nullptr);
1824 }
1825 
1827  return 1;
1828 }
1829 
1830 extern "C" GPU_RT_STUB int8_t thread_warp_idx(const int8_t warp_sz) {
1831  return 0;
1832 }
1833 
1834 extern "C" GPU_RT_STUB int64_t get_thread_index() {
1835  return 0;
1836 }
1837 
1839  return nullptr;
1840 }
1841 
1842 extern "C" GPU_RT_STUB int64_t get_block_index() {
1843  return 0;
1844 }
1845 
1846 #undef GPU_RT_STUB
1847 
1848 extern "C" RUNTIME_EXPORT ALWAYS_INLINE void record_error_code(const int32_t err_code,
1849  int32_t* error_codes) {
1850  // NB: never override persistent error codes (with code greater than zero).
1851  // On GPU, a projection query with a limit can run out of slots without it
1852  // being an actual error if the limit has been hit. If a persistent error
1853  // (division by zero, for example) occurs before running out of slots, we
1854  // have to avoid overriding it, because there's a risk that the query would
1855  // go through if we override with a potentially benign out-of-slots code.
1856  if (err_code && error_codes[pos_start_impl(nullptr)] <= 0) {
1857  error_codes[pos_start_impl(nullptr)] = err_code;
1858  }
1859 }
1860 
1861 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int32_t get_error_code(int32_t* error_codes) {
1862  return error_codes[pos_start_impl(nullptr)];
1863 }
1864 
1865 // group by helpers
1866 
1868  const int64_t* groups_buffer,
1869  const int32_t groups_buffer_size) {
1870  return groups_buffer;
1871 }
1872 
1874  int64_t* src,
1875  const int32_t sz) {
1876 #ifndef _WIN32
1877  // the body is not really needed, just make sure the call is not optimized away
1878  assert(dest);
1879 #endif
1880 }
1881 
1882 extern "C" RUNTIME_EXPORT int64_t* init_shared_mem(const int64_t* global_groups_buffer,
1883  const int32_t groups_buffer_size) {
1884  return nullptr;
1885 }
1886 
1888  int64_t* groups_buffer,
1889  const int64_t* init_vals,
1890  const uint32_t groups_buffer_entry_count,
1891  const uint32_t key_qw_count,
1892  const uint32_t agg_col_count,
1893  const bool keyless,
1894  const int8_t warp_size) {
1895 #ifndef _WIN32
1896  // the body is not really needed, just make sure the call is not optimized away
1897  assert(groups_buffer);
1898 #endif
1899 }
1900 
1902  int64_t* groups_buffer,
1903  const int64_t* init_vals,
1904  const uint32_t groups_buffer_entry_count,
1905  const uint32_t key_qw_count,
1906  const uint32_t agg_col_count,
1907  const bool keyless,
1908  const bool blocks_share_memory,
1909  const int32_t frag_idx) {
1910 #ifndef _WIN32
1911  // the body is not really needed, just make sure the call is not optimized away
1912  assert(groups_buffer);
1913 #endif
1914 }
1915 
1917  int64_t* groups_buffer,
1918  const int64_t* init_vals,
1919  const uint32_t groups_buffer_entry_count,
1920  const uint32_t key_qw_count,
1921  const uint32_t agg_col_count,
1922  const bool keyless,
1923  const int8_t warp_size) {
1924 #ifndef _WIN32
1925  // the body is not really needed, just make sure the call is not optimized away
1926  assert(groups_buffer);
1927 #endif
1928 }
1929 
1930 template <typename T>
1931 ALWAYS_INLINE int64_t* get_matching_group_value(int64_t* groups_buffer,
1932  const uint32_t h,
1933  const T* key,
1934  const uint32_t key_count,
1935  const uint32_t row_size_quad) {
1936  auto off = h * row_size_quad;
1937  auto row_ptr = reinterpret_cast<T*>(groups_buffer + off);
1938  if (*row_ptr == get_empty_key<T>()) {
1939  memcpy(row_ptr, key, key_count * sizeof(T));
1940  auto row_ptr_i8 = reinterpret_cast<int8_t*>(row_ptr + key_count);
1941  return reinterpret_cast<int64_t*>(align_to_int64(row_ptr_i8));
1942  }
1943  if (memcmp(row_ptr, key, key_count * sizeof(T)) == 0) {
1944  auto row_ptr_i8 = reinterpret_cast<int8_t*>(row_ptr + key_count);
1945  return reinterpret_cast<int64_t*>(align_to_int64(row_ptr_i8));
1946  }
1947  return nullptr;
1948 }
1949 
1951  int64_t* groups_buffer,
1952  const uint32_t h,
1953  const int64_t* key,
1954  const uint32_t key_count,
1955  const uint32_t key_width,
1956  const uint32_t row_size_quad) {
1957  switch (key_width) {
1958  case 4:
1959  return get_matching_group_value(groups_buffer,
1960  h,
1961  reinterpret_cast<const int32_t*>(key),
1962  key_count,
1963  row_size_quad);
1964  case 8:
1965  return get_matching_group_value(groups_buffer, h, key, key_count, row_size_quad);
1966  default:;
1967  }
1968  return nullptr;
1969 }
1970 
1971 template <typename T>
1973  const uint32_t entry_count,
1974  const uint32_t h,
1975  const T* key,
1976  const uint32_t key_count) {
1977  auto off = h;
1978  auto key_buffer = reinterpret_cast<T*>(groups_buffer);
1979  if (key_buffer[off] == get_empty_key<T>()) {
1980  for (size_t i = 0; i < key_count; ++i) {
1981  key_buffer[off] = key[i];
1982  off += entry_count;
1983  }
1984  return h;
1985  }
1986  off = h;
1987  for (size_t i = 0; i < key_count; ++i) {
1988  if (key_buffer[off] != key[i]) {
1989  return -1;
1990  }
1991  off += entry_count;
1992  }
1993  return h;
1994 }
1995 
1996 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int32_t
1998  const uint32_t entry_count,
1999  const uint32_t h,
2000  const int64_t* key,
2001  const uint32_t key_count,
2002  const uint32_t key_width) {
2003  switch (key_width) {
2004  case 4:
2005  return get_matching_group_value_columnar_slot(groups_buffer,
2006  entry_count,
2007  h,
2008  reinterpret_cast<const int32_t*>(key),
2009  key_count);
2010  case 8:
2012  groups_buffer, entry_count, h, key, key_count);
2013  default:
2014  return -1;
2015  }
2016  return -1;
2017 }
2018 
2020  int64_t* groups_buffer,
2021  const uint32_t h,
2022  const int64_t* key,
2023  const uint32_t key_qw_count,
2024  const size_t entry_count) {
2025  auto off = h;
2026  if (groups_buffer[off] == EMPTY_KEY_64) {
2027  for (size_t i = 0; i < key_qw_count; ++i) {
2028  groups_buffer[off] = key[i];
2029  off += entry_count;
2030  }
2031  return &groups_buffer[off];
2032  }
2033  off = h;
2034  for (size_t i = 0; i < key_qw_count; ++i) {
2035  if (groups_buffer[off] != key[i]) {
2036  return nullptr;
2037  }
2038  off += entry_count;
2039  }
2040  return &groups_buffer[off];
2041 }
2042 
2043 /*
2044  * For a particular hashed_index, returns the row-wise offset
2045  * to the first matching agg column in memory.
2046  * It also checks the corresponding group column, and initialize all
2047  * available keys if they are not empty (it is assumed all group columns are
2048  * 64-bit wide).
2049  *
2050  * Memory layout:
2051  *
2052  * | prepended group columns (64-bit each) | agg columns |
2053  */
2055  int64_t* groups_buffer,
2056  const uint32_t hashed_index,
2057  const int64_t* key,
2058  const uint32_t key_count,
2059  const uint32_t row_size_quad) {
2060  uint32_t off = hashed_index * row_size_quad;
2061  if (groups_buffer[off] == EMPTY_KEY_64) {
2062  for (uint32_t i = 0; i < key_count; ++i) {
2063  groups_buffer[off + i] = key[i];
2064  }
2065  }
2066  return groups_buffer + off + key_count;
2067 }
2068 
2075 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int64_t*
2077  const uint32_t hashed_index,
2078  const uint32_t row_size_quad) {
2079  return groups_buffer + row_size_quad * hashed_index;
2080 }
2081 
2082 /*
2083  * For a particular hashed_index, find and initialize (if necessary) all the group
2084  * columns corresponding to a key. It is assumed that all group columns are 64-bit wide.
2085  */
2086 extern "C" RUNTIME_EXPORT ALWAYS_INLINE void
2088  const uint32_t hashed_index,
2089  const int64_t* key,
2090  const uint32_t key_count,
2091  const uint32_t entry_count) {
2092  if (groups_buffer[hashed_index] == EMPTY_KEY_64) {
2093  for (uint32_t i = 0; i < key_count; i++) {
2094  groups_buffer[i * entry_count + hashed_index] = key[i];
2095  }
2096  }
2097 }
2098 
2099 #include "GeoOpsRuntime.cpp"
2100 #include "GroupByRuntime.cpp"
2102 
2104  int64_t* groups_buffer,
2105  const int64_t key,
2106  const int64_t min_key,
2107  const int64_t /* bucket */,
2108  const uint32_t row_size_quad) {
2109  return groups_buffer + row_size_quad * (key - min_key);
2110 }
2111 
2113  int64_t* groups_buffer,
2114  const int64_t key,
2115  const int64_t min_key,
2116  const int64_t /* bucket */,
2117  const uint32_t row_size_quad,
2118  const uint8_t thread_warp_idx,
2119  const uint8_t warp_size) {
2120  return groups_buffer + row_size_quad * (warp_size * (key - min_key) + thread_warp_idx);
2121 }
2122 
2124  const int32_t len) {
2125  return {reinterpret_cast<char const*>(ptr), static_cast<uint64_t>(len)};
2126 }
2127 
2128 #ifdef __clang__
2129 #include "../Utils/StringLike.cpp"
2130 #endif
2131 
2132 #ifndef __CUDACC__
2133 #include "TopKRuntime.cpp"
2134 #endif
2135 
2136 extern "C" RUNTIME_EXPORT ALWAYS_INLINE DEVICE int32_t
2137 char_length(const char* str, const int32_t str_len) {
2138  return str_len;
2139 }
2140 
2141 extern "C" RUNTIME_EXPORT ALWAYS_INLINE DEVICE int32_t
2142 char_length_nullable(const char* str, const int32_t str_len, const int32_t int_null) {
2143  if (!str) {
2144  return int_null;
2145  }
2146  return str_len;
2147 }
2148 
2149 extern "C" RUNTIME_EXPORT ALWAYS_INLINE DEVICE int32_t
2150 key_for_string_encoded(const int32_t str_id) {
2151  return str_id;
2152 }
2153 
2154 extern "C" ALWAYS_INLINE DEVICE int32_t
2155 map_string_dict_id(const int32_t string_id,
2156  const int64_t translation_map_handle,
2157  const int32_t min_source_id) {
2158  const int32_t* translation_map =
2159  reinterpret_cast<const int32_t*>(translation_map_handle);
2160  return translation_map[string_id - min_source_id];
2161 }
2162 
2164  const double* regressor_inputs,
2165  const int64_t decision_tree_table_handle,
2166  const int64_t decision_tree_offsets_handle,
2167  const int32_t num_regressors,
2168  const int32_t num_trees,
2169  const bool compute_avg,
2170  const double null_value) {
2171  for (int32_t regressor_idx = 0; regressor_idx < num_regressors; ++regressor_idx) {
2172  if (regressor_inputs[regressor_idx] == null_value) {
2173  return null_value;
2174  }
2175  }
2176  const DecisionTreeEntry* decision_tree_table =
2177  reinterpret_cast<const DecisionTreeEntry*>(decision_tree_table_handle);
2178  const int64_t* decision_tree_offsets =
2179  reinterpret_cast<const int64_t*>(decision_tree_offsets_handle);
2180  double sum_tree_results{0};
2181  for (int32_t tree_idx = 0; tree_idx < num_trees; ++tree_idx) {
2182  int64_t row_idx = decision_tree_offsets[tree_idx];
2183  while (true) {
2184  const DecisionTreeEntry& current_entry = decision_tree_table[row_idx];
2185  if (!current_entry.isSplitNode()) {
2186  sum_tree_results += current_entry.value;
2187  break;
2188  }
2189  const auto regressor_input = regressor_inputs[current_entry.feature_index];
2190  row_idx = regressor_input <= current_entry.value
2191  ? current_entry.left_child_row_idx
2192  : current_entry.right_child_row_idx;
2193  }
2194  }
2195  return compute_avg ? sum_tree_results / num_trees : sum_tree_results;
2196 }
2197 
2199  const double proportion,
2200  const int64_t row_offset) {
2201  const int64_t threshold = 4294967296 * proportion;
2202  return (row_offset * 2654435761) % 4294967296 < threshold;
2203 }
2204 
2205 extern "C" RUNTIME_EXPORT ALWAYS_INLINE DEVICE int32_t
2206 width_bucket(const double target_value,
2207  const double lower_bound,
2208  const double upper_bound,
2209  const double scale_factor,
2210  const int32_t partition_count) {
2211  if (target_value < lower_bound) {
2212  return 0;
2213  } else if (target_value >= upper_bound) {
2214  return partition_count + 1;
2215  }
2216  return ((target_value - lower_bound) * scale_factor) + 1;
2217 }
2218 
2219 extern "C" RUNTIME_EXPORT ALWAYS_INLINE DEVICE int32_t
2220 width_bucket_reversed(const double target_value,
2221  const double lower_bound,
2222  const double upper_bound,
2223  const double scale_factor,
2224  const int32_t partition_count) {
2225  if (target_value > lower_bound) {
2226  return 0;
2227  } else if (target_value <= upper_bound) {
2228  return partition_count + 1;
2229  }
2230  return ((lower_bound - target_value) * scale_factor) + 1;
2231 }
2232 
2233 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int32_t
2234 width_bucket_nullable(const double target_value,
2235  const double lower_bound,
2236  const double upper_bound,
2237  const double scale_factor,
2238  const int32_t partition_count,
2239  const double null_val) {
2240  if (target_value == null_val) {
2241  return INT32_MIN;
2242  }
2243  return width_bucket(
2244  target_value, lower_bound, upper_bound, scale_factor, partition_count);
2245 }
2246 
2247 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int32_t
2248 width_bucket_reversed_nullable(const double target_value,
2249  const double lower_bound,
2250  const double upper_bound,
2251  const double scale_factor,
2252  const int32_t partition_count,
2253  const double null_val) {
2254  if (target_value == null_val) {
2255  return INT32_MIN;
2256  }
2257  return width_bucket_reversed(
2258  target_value, lower_bound, upper_bound, scale_factor, partition_count);
2259 }
2260 
2261 // width_bucket with no out-of-bound check version which can be called
2262 // if we can assure the input target_value expr always resides in the valid range
2263 // (so we can also avoid null checking)
2264 extern "C" RUNTIME_EXPORT ALWAYS_INLINE DEVICE int32_t
2265 width_bucket_no_oob_check(const double target_value,
2266  const double lower_bound,
2267  const double scale_factor) {
2268  int32_t calc = (target_value - lower_bound) * scale_factor;
2269  return calc + 1;
2270 }
2271 
2272 extern "C" RUNTIME_EXPORT ALWAYS_INLINE DEVICE int32_t
2273 width_bucket_reversed_no_oob_check(const double target_value,
2274  const double lower_bound,
2275  const double scale_factor) {
2276  int32_t calc = (lower_bound - target_value) * scale_factor;
2277  return calc + 1;
2278 }
2279 
2280 extern "C" RUNTIME_EXPORT ALWAYS_INLINE DEVICE int32_t
2281 width_bucket_expr(const double target_value,
2282  const bool reversed,
2283  const double lower_bound,
2284  const double upper_bound,
2285  const int32_t partition_count) {
2286  if (reversed) {
2287  return width_bucket_reversed(target_value,
2288  lower_bound,
2289  upper_bound,
2290  partition_count / (lower_bound - upper_bound),
2291  partition_count);
2292  }
2293  return width_bucket(target_value,
2294  lower_bound,
2295  upper_bound,
2296  partition_count / (upper_bound - lower_bound),
2297  partition_count);
2298 }
2299 
2300 extern "C" RUNTIME_EXPORT ALWAYS_INLINE DEVICE int32_t
2301 width_bucket_expr_nullable(const double target_value,
2302  const bool reversed,
2303  const double lower_bound,
2304  const double upper_bound,
2305  const int32_t partition_count,
2306  const double null_val) {
2307  if (target_value == null_val) {
2308  return INT32_MIN;
2309  }
2310  return width_bucket_expr(
2311  target_value, reversed, lower_bound, upper_bound, partition_count);
2312 }
2313 
2314 extern "C" RUNTIME_EXPORT ALWAYS_INLINE DEVICE int32_t
2315 width_bucket_expr_no_oob_check(const double target_value,
2316  const bool reversed,
2317  const double lower_bound,
2318  const double upper_bound,
2319  const int32_t partition_count) {
2320  if (reversed) {
2322  target_value, lower_bound, partition_count / (lower_bound - upper_bound));
2323  }
2325  target_value, lower_bound, partition_count / (upper_bound - lower_bound));
2326 }
2327 
2328 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int64_t
2329 row_number_window_func(const int64_t output_buff, const int64_t pos) {
2330  return reinterpret_cast<const int64_t*>(output_buff)[pos];
2331 }
2332 
2334  const int64_t output_buff,
2335  const int64_t pos) {
2336  return reinterpret_cast<const double*>(output_buff)[pos];
2337 }
2338 
2339 extern "C" RUNTIME_EXPORT ALWAYS_INLINE double load_double(const int64_t* agg) {
2340  return *reinterpret_cast<const double*>(may_alias_ptr(agg));
2341 }
2342 
2343 extern "C" RUNTIME_EXPORT ALWAYS_INLINE float load_float(const int32_t* agg) {
2344  return *reinterpret_cast<const float*>(may_alias_ptr(agg));
2345 }
2346 
2347 extern "C" RUNTIME_EXPORT ALWAYS_INLINE double load_avg_int(const int64_t* sum,
2348  const int64_t* count,
2349  const double null_val) {
2350  return *count != 0 ? static_cast<double>(*sum) / *count : null_val;
2351 }
2352 
2353 extern "C" RUNTIME_EXPORT ALWAYS_INLINE double load_avg_decimal(const int64_t* sum,
2354  const int64_t* count,
2355  const double null_val,
2356  const uint32_t scale) {
2357  return *count != 0 ? (static_cast<double>(*sum) / pow(10, scale)) / *count : null_val;
2358 }
2359 
2360 extern "C" RUNTIME_EXPORT ALWAYS_INLINE double load_avg_double(const int64_t* agg,
2361  const int64_t* count,
2362  const double null_val) {
2363  return *count != 0 ? *reinterpret_cast<const double*>(may_alias_ptr(agg)) / *count
2364  : null_val;
2365 }
2366 
2367 extern "C" RUNTIME_EXPORT ALWAYS_INLINE double load_avg_float(const int32_t* agg,
2368  const int32_t* count,
2369  const double null_val) {
2370  return *count != 0 ? *reinterpret_cast<const float*>(may_alias_ptr(agg)) / *count
2371  : null_val;
2372 }
2373 
2375  uint8_t* bitmap,
2376  const uint32_t bitmap_bytes,
2377  const uint8_t* key_bytes,
2378  const uint32_t key_len) {
2379  const uint32_t bit_pos = MurmurHash3(key_bytes, key_len, 0) % (bitmap_bytes * 8);
2380  const uint32_t word_idx = bit_pos / 32;
2381  const uint32_t bit_idx = bit_pos % 32;
2382  reinterpret_cast<uint32_t*>(bitmap)[word_idx] |= 1 << bit_idx;
2383 }
2384 
2385 // First 3 parameters are output, the rest are input.
2387  int32_t* error_code,
2388  int32_t* total_matched,
2389  int64_t** out,
2390  const uint32_t frag_idx,
2391  const uint32_t* row_index_resume,
2392  const int8_t** col_buffers,
2393  const int8_t* literals,
2394  const int64_t* num_rows,
2395  const uint64_t* frag_row_offsets,
2396  const int32_t* max_matched,
2397  const int64_t* init_agg_value,
2398  const int64_t* join_hash_tables,
2399  const int8_t* row_func_mgr) {
2400 #ifndef _WIN32
2401  assert(error_code || total_matched || out || frag_idx || row_index_resume ||
2402  col_buffers || literals || num_rows || frag_row_offsets || max_matched ||
2403  init_agg_value || join_hash_tables || row_func_mgr);
2404 #endif
2405 }
2406 
2407 // First 3 parameters are output, the rest are input.
2409  int32_t* error_code,
2410  int32_t* total_matched,
2411  int64_t** out,
2412  const uint32_t* num_fragments_ptr,
2413  const uint32_t* num_tables_ptr,
2414  const uint32_t* row_index_resume, // aka start_rowid
2415  const int8_t*** col_buffers,
2416  const int8_t* literals,
2417  const int64_t* num_rows,
2418  const uint64_t* frag_row_offsets,
2419  const int32_t* max_matched,
2420  const int64_t* init_agg_value,
2421  const int64_t* join_hash_tables,
2422  const int8_t* row_func_mgr) {
2423  uint32_t const num_fragments = *num_fragments_ptr;
2424  uint32_t const num_tables = *num_tables_ptr;
2425  // num_fragments_ptr and num_tables_ptr are replaced by frag_idx when passed below.
2426  for (uint32_t frag_idx = 0; frag_idx < num_fragments; ++frag_idx) {
2427  query_stub_hoisted_literals(error_code,
2428  total_matched,
2429  out,
2430  frag_idx,
2431  row_index_resume,
2432  col_buffers ? col_buffers[frag_idx] : nullptr,
2433  literals,
2434  &num_rows[frag_idx * num_tables],
2435  &frag_row_offsets[frag_idx * num_tables],
2436  max_matched,
2437  init_agg_value,
2438  join_hash_tables,
2439  row_func_mgr);
2440  }
2441 }
2442 
2443 // First 3 parameters are output, the rest are input.
2445  int32_t* total_matched,
2446  int64_t** out,
2447  const uint32_t frag_idx,
2448  const uint32_t* row_index_resume,
2449  const int8_t** col_buffers,
2450  const int64_t* num_rows,
2451  const uint64_t* frag_row_offsets,
2452  const int32_t* max_matched,
2453  const int64_t* init_agg_value,
2454  const int64_t* join_hash_tables,
2455  const int8_t* row_func_mgr) {
2456 #ifndef _WIN32
2457  assert(error_code || total_matched || out || frag_idx || row_index_resume ||
2458  col_buffers || num_rows || frag_row_offsets || max_matched || init_agg_value ||
2459  join_hash_tables || row_func_mgr);
2460 #endif
2461 }
2462 
2463 // First 3 parameters are output, the rest are input.
2464 extern "C" RUNTIME_EXPORT void multifrag_query(int32_t* error_code,
2465  int32_t* total_matched,
2466  int64_t** out,
2467  const uint32_t* num_fragments_ptr,
2468  const uint32_t* num_tables_ptr,
2469  const uint32_t* row_index_resume,
2470  const int8_t*** col_buffers,
2471  const int64_t* num_rows,
2472  const uint64_t* frag_row_offsets,
2473  const int32_t* max_matched,
2474  const int64_t* init_agg_value,
2475  const int64_t* join_hash_tables,
2476  const int8_t* row_func_mgr) {
2477  uint32_t const num_fragments = *num_fragments_ptr;
2478  uint32_t const num_tables = *num_tables_ptr;
2479  // num_fragments_ptr and num_tables_ptr are replaced by frag_idx when passed below.
2480  for (uint32_t frag_idx = 0; frag_idx < num_fragments; ++frag_idx) {
2481  query_stub(error_code,
2482  total_matched,
2483  out,
2484  frag_idx,
2485  row_index_resume,
2486  col_buffers ? col_buffers[frag_idx] : nullptr,
2487  &num_rows[frag_idx * num_tables],
2488  &frag_row_offsets[frag_idx * num_tables],
2489  max_matched,
2490  init_agg_value,
2491  join_hash_tables,
2492  row_func_mgr);
2493  }
2494 }
2495 
2497  if (check_interrupt_init(static_cast<unsigned>(INT_CHECK))) {
2498  return true;
2499  }
2500  return false;
2501 }
2502 
2503 extern "C" RUNTIME_EXPORT bool check_interrupt_init(unsigned command) {
2504  static std::atomic_bool runtime_interrupt_flag{false};
2505 
2506  if (command == static_cast<unsigned>(INT_CHECK)) {
2507  if (runtime_interrupt_flag.load()) {
2508  return true;
2509  }
2510  return false;
2511  }
2512  if (command == static_cast<unsigned>(INT_ABORT)) {
2513  runtime_interrupt_flag.store(true);
2514  return false;
2515  }
2516  if (command == static_cast<unsigned>(INT_RESET)) {
2517  runtime_interrupt_flag.store(false);
2518  return false;
2519  }
2520  return false;
2521 }
DEVICE auto upper_bound(ARGS &&...args)
Definition: gpu_enabled.h:123
__device__ void sync_warp_protected(int64_t thread_pos, int64_t row_count)
RUNTIME_EXPORT void agg_min_int8(int8_t *agg, const int8_t val)
RUNTIME_EXPORT ALWAYS_INLINE int64_t encode_date(int64_t decoded_val, int64_t null_val, int64_t multiplier)
RUNTIME_EXPORT ALWAYS_INLINE int32_t agg_sum_if_int32_skip_val(int32_t *agg, const int32_t val, const int32_t skip_val, const int8_t cond)
RUNTIME_EXPORT ALWAYS_INLINE int64_t compute_row_mode_start_index_sub(int64_t candidate_index, int64_t current_partition_start_offset, int64_t frame_bound)
double value
#define DEF_UMINUS_NULLABLE(type, null_type)
GPU_RT_STUB int32_t checked_single_agg_id_int32_shared(int32_t *agg, const int32_t val, const int32_t null_val)
#define DEF_CHECKED_SINGLE_AGG_ID_INT(n)
RUNTIME_EXPORT ALWAYS_INLINE int8_t * agg_id_varlen(int8_t *varlen_buffer, const int64_t offset, const int8_t *value, const int64_t size_bytes)
RUNTIME_EXPORT void agg_max_int32(int32_t *agg, const int32_t val)
RUNTIME_EXPORT ALWAYS_INLINE int64_t scale_decimal_down_not_nullable(const int64_t operand, const int64_t scale, const int64_t null_val)
__device__ int64_t * get_matching_group_value_columnar(int64_t *groups_buffer, const uint32_t h, const int64_t *key, const uint32_t key_qw_count, const size_t entry_count)
#define EMPTY_KEY_64
RUNTIME_EXPORT ALWAYS_INLINE int64_t * get_matching_group_value_perfect_hash(int64_t *groups_buffer, const uint32_t hashed_index, const int64_t *key, const uint32_t key_count, const uint32_t row_size_quad)
int64_t left_child_row_idx
RUNTIME_EXPORT ALWAYS_INLINE uint64_t agg_count_if(uint64_t *agg, const int64_t cond)
__device__ void write_back_nop(int64_t *dest, int64_t *src, const int32_t sz)
Definition: cuda_mapd_rt.cu:49
RUNTIME_EXPORT void agg_min_int16(int16_t *agg, const int16_t val)
RUNTIME_EXPORT ALWAYS_INLINE StringView string_pack(const int8_t *ptr, const int32_t len)
#define GPU_RT_STUB
__device__ void agg_sum_float_skip_val_shared(int32_t *agg, const float val, const float skip_val)
int64_t feature_index
RUNTIME_EXPORT ALWAYS_INLINE int64_t floor_div_nullable_lhs(const int64_t dividend, const int64_t divisor, const int64_t null_val)
#define DEF_COMPUTE_CURRENT_ROW_IDX_IN_FRAME_ALL_TYPES(oper_name)
RUNTIME_EXPORT void agg_max_int16(int16_t *agg, const int16_t val)
#define DEF_CAST_NULLABLE_BIDIR(type1, type2)
RUNTIME_EXPORT ALWAYS_INLINE int64_t * get_group_value_fast_keyless(int64_t *groups_buffer, const int64_t key, const int64_t min_key, const int64_t, const uint32_t row_size_quad)
RUNTIME_EXPORT ALWAYS_INLINE uint32_t agg_count_int32_skip_val(uint32_t *agg, const int32_t val, const int32_t skip_val)
RUNTIME_EXPORT NEVER_INLINE void agg_approximate_count_distinct(int64_t *agg, const int64_t key, const uint32_t b)
FORCE_INLINE uint8_t get_rank(uint64_t x, uint32_t b)
__device__ int8_t thread_warp_idx(const int8_t warp_sz)
Definition: cuda_mapd_rt.cu:39
__global__ void init_group_by_buffer_gpu(int64_t *groups_buffer, const int64_t *init_vals, const uint32_t groups_buffer_entry_count, const uint32_t key_count, const uint32_t key_width, const uint32_t row_size_quad, const bool keyless, const int8_t warp_size)
#define DEF_CAST_NULLABLE(from_type, to_type)
RUNTIME_EXPORT ALWAYS_INLINE void agg_max_double(int64_t *agg, const double val)
RUNTIME_EXPORT ALWAYS_INLINE int64_t agg_sum_if(int64_t *agg, const int64_t val, const int8_t cond)
RUNTIME_EXPORT ALWAYS_INLINE void agg_max(int64_t *agg, const int64_t val)
RUNTIME_EXPORT ALWAYS_INLINE uint64_t agg_count_skip_val(uint64_t *agg, const int64_t val, const int64_t skip_val)
__device__ int64_t get_thread_index()
Definition: cuda_mapd_rt.cu:19
RUNTIME_EXPORT NEVER_INLINE DEVICE uint64_t MurmurHash64A(const void *key, int len, uint64_t seed)
Definition: MurmurHash.cpp:27
__device__ void agg_sum_if_double_skip_val_shared(int64_t *agg, const double val, const double skip_val, const int8_t cond)
RUNTIME_EXPORT ALWAYS_INLINE void agg_min_float(int32_t *agg, const float val)
__device__ int32_t pos_step_impl()
Definition: cuda_mapd_rt.cu:35
__device__ void write_back_non_grouped_agg(int64_t *input_buffer, int64_t *output_buffer, const int32_t agg_idx)
Calculate approximate median and general quantiles, based on &quot;Computing Extremely Accurate Quantiles ...
RUNTIME_EXPORT ALWAYS_INLINE double load_avg_int(const int64_t *sum, const int64_t *count, const double null_val)
Structures and runtime functions of streaming top-k heap.
__device__ int32_t checked_single_agg_id_double_shared(int64_t *agg, const double val, const double null_val)
__device__ const int64_t * init_shared_mem_nop(const int64_t *groups_buffer, const int32_t groups_buffer_size)
Definition: cuda_mapd_rt.cu:43
RUNTIME_EXPORT ALWAYS_INLINE int32_t checked_single_agg_id(int64_t *agg, const int64_t val, const int64_t null_val)
__device__ void agg_sum_if_float_shared(int32_t *agg, const float val, const int8_t cond)
#define DEF_ARITH_NULLABLE_RHS(type, null_type, opname, opsym)
#define DEF_AGG_MAX_INT(n)
Definitions for core Datum union type.
RUNTIME_EXPORT ALWAYS_INLINE int64_t * get_integer_aggregation_tree(int64_t **aggregation_trees, size_t partition_idx)
__device__ int32_t checked_single_agg_id_float_shared(int32_t *agg, const float val, const float null_val)
int64_t compute_upper_bound_from_ordered_partition_index(const int64_t num_elems, const TARGET_VAL_TYPE target_val, const COL_TYPE *col_buf, const int32_t *partition_rowid_buf, const int64_t *ordered_index_buf, const NULL_TYPE null_val, const bool nulls_first, const int64_t null_start_offset, const int64_t null_end_offset)
AGG_TYPE agg_func(AGG_TYPE const lhs, AGG_TYPE const rhs)
Macros and functions for groupby buffer compaction.
__device__ int64_t * get_matching_group_value(int64_t *groups_buffer, const uint32_t h, const T *key, const uint32_t key_count, const uint32_t row_size_quad)
RUNTIME_EXPORT ALWAYS_INLINE int8_t logical_and(const int8_t lhs, const int8_t rhs, const int8_t null_val)
#define DEF_CAST_SCALED_NULLABLE(from_type, to_type)
RUNTIME_EXPORT ALWAYS_INLINE void agg_count_distinct_bitmap(int64_t *agg, const int64_t val, const int64_t min_val, const int64_t bucket_size)
T fill_missing_value(int64_t const cur_idx, T const null_val, T *const col_buf, int64_t const num_elems_in_partition, int32_t *const partition_rowid_buf, int64_t *const ordered_index_buf, bool const is_forward_fill)
__device__ void agg_sum_if_double_shared(int64_t *agg, const double val, const int8_t cond)
__device__ int64_t agg_sum_shared(int64_t *agg, const int64_t val)
RUNTIME_EXPORT void agg_sum_if_float(int32_t *agg, const float val, const int8_t cond)
__device__ void agg_id_double_shared_slow(int64_t *agg, const double *val)
RUNTIME_EXPORT ALWAYS_INLINE void agg_count_distinct_bitmap_skip_val(int64_t *agg, const int64_t val, const int64_t min_val, const int64_t bucket_size, const int64_t skip_val)
RUNTIME_EXPORT ALWAYS_INLINE int64_t floor_div_lhs(const int64_t dividend, const int64_t divisor)
__device__ int32_t agg_sum_if_int32_shared(int32_t *agg, const int32_t val, const int8_t cond)
RUNTIME_EXPORT ALWAYS_INLINE DEVICE int32_t key_for_string_encoded(const int32_t str_id)
RUNTIME_EXPORT ALWAYS_INLINE int64_t * get_matching_group_value_perfect_hash_keyless(int64_t *groups_buffer, const uint32_t hashed_index, const uint32_t row_size_quad)
__device__ int8_t * agg_id_varlen_shared(int8_t *varlen_buffer, const int64_t offset, const int8_t *value, const int64_t size_bytes)
RUNTIME_EXPORT ALWAYS_INLINE uint32_t agg_count_if_int32_skip_val(uint32_t *agg, const int32_t cond, const int32_t skip_val)
int64_t compute_current_row_idx_in_frame(const int64_t num_elems, const int64_t cur_row_idx, const T *col_buf, const int32_t *partition_rowid_buf, const int64_t *ordered_index_buf, const T null_val, const bool nulls_first, const int64_t null_start_pos, const int64_t null_end_pos, Comparator cmp)
__device__ int64_t * declare_dynamic_shared_memory()
Definition: cuda_mapd_rt.cu:56
RUNTIME_EXPORT ALWAYS_INLINE int64_t compute_row_mode_end_index_add(int64_t candidate_index, int64_t current_partition_start_offset, int64_t frame_bound, int64_t num_current_partition_elem)
ALWAYS_INLINE DEVICE int32_t map_string_dict_id(const int32_t string_id, const int64_t translation_map_handle, const int32_t min_source_id)
__device__ int32_t agg_sum_int32_shared(int32_t *agg, const int32_t val)
__device__ int64_t agg_sum_skip_val_shared(int64_t *agg, const int64_t val, const int64_t skip_val)
__device__ void agg_sum_float_shared(int32_t *agg, const float val)
__device__ int64_t agg_sum_if_shared(int64_t *agg, const int64_t val, const int8_t cond)
#define DEF_ROUND_NULLABLE(from_type, to_type)
ALWAYS_INLINE DEVICE double tree_model_reg_predict(const double *regressor_inputs, const int64_t decision_tree_table_handle, const int64_t decision_tree_offsets_handle, const int32_t num_regressors, const int32_t num_trees, const bool compute_avg, const double null_value)
RUNTIME_EXPORT ALWAYS_INLINE DEVICE int32_t width_bucket(const double target_value, const double lower_bound, const double upper_bound, const double scale_factor, const int32_t partition_count)
#define DEVICE
#define DEF_SKIP_AGG(base_agg_func)
__device__ int64_t get_block_index()
Definition: cuda_mapd_rt.cu:23
__device__ bool check_interrupt()
#define DEF_WRITE_PROJECTION_INT(n)
GPU_RT_STUB int32_t checked_single_agg_id_int8_shared(int8_t *agg, const int8_t val, const int8_t null_val)
RUNTIME_EXPORT NEVER_INLINE void query_stub(int32_t *error_code, int32_t *total_matched, int64_t **out, const uint32_t frag_idx, const uint32_t *row_index_resume, const int8_t **col_buffers, const int64_t *num_rows, const uint64_t *frag_row_offsets, const int32_t *max_matched, const int64_t *init_agg_value, const int64_t *join_hash_tables, const int8_t *row_func_mgr)
RUNTIME_EXPORT ALWAYS_INLINE int64_t compute_row_mode_start_index_add(int64_t candidate_index, int64_t current_partition_start_offset, int64_t frame_bound, int64_t num_current_partition_elem)
__device__ int32_t agg_sum_int32_skip_val_shared(int32_t *agg, const int32_t val, const int32_t skip_val)
RUNTIME_EXPORT ALWAYS_INLINE int64_t decimal_floor(const int64_t x, const int64_t scale)
#define DEF_SEARCH_DERIVED_AGGREGATION_TREE(agg_value_type)
__device__ void linear_probabilistic_count(uint8_t *bitmap, const uint32_t bitmap_bytes, const uint8_t *key_bytes, const uint32_t key_len)
RUNTIME_EXPORT ALWAYS_INLINE uint64_t agg_count_double(uint64_t *agg, const double val)
#define DEF_SHARED_AGG_RET_STUBS(base_agg_func)
__device__ void agg_sum_double_shared(int64_t *agg, const double val)
RUNTIME_EXPORT ALWAYS_INLINE void agg_min_double(int64_t *agg, const double val)
RUNTIME_EXPORT ALWAYS_INLINE DEVICE int32_t width_bucket_no_oob_check(const double target_value, const double lower_bound, const double scale_factor)
RUNTIME_EXPORT ALWAYS_INLINE int64_t decimal_ceil(const int64_t x, const int64_t scale)
#define DEF_ARITH_NULLABLE_LHS(type, null_type, opname, opsym)
__device__ int64_t agg_sum_if_skip_val_shared(int64_t *agg, const int64_t val, const int64_t skip_val, const int8_t cond)
RUNTIME_EXPORT ALWAYS_INLINE void agg_if_sum_float(int32_t *agg, const float val, const int8_t cond)
#define DEF_MAP_STRING_TO_DATUM(value_type, value_name)
RUNTIME_EXPORT ALWAYS_INLINE int64_t get_valid_buf_start_pos(const int64_t null_start_pos, const int64_t null_end_pos)
#define DEF_AGG_MIN_INT(n)
RUNTIME_EXPORT ALWAYS_INLINE uint64_t agg_count_double_skip_val(uint64_t *agg, const double val, const double skip_val)
RUNTIME_EXPORT NEVER_INLINE void query_stub_hoisted_literals(int32_t *error_code, int32_t *total_matched, int64_t **out, const uint32_t frag_idx, const uint32_t *row_index_resume, const int8_t **col_buffers, const int8_t *literals, const int64_t *num_rows, const uint64_t *frag_row_offsets, const int32_t *max_matched, const int64_t *init_agg_value, const int64_t *join_hash_tables, const int8_t *row_func_mgr)
RUNTIME_EXPORT void multifrag_query(int32_t *error_code, int32_t *total_matched, int64_t **out, const uint32_t *num_fragments_ptr, const uint32_t *num_tables_ptr, const uint32_t *row_index_resume, const int8_t ***col_buffers, const int64_t *num_rows, const uint64_t *frag_row_offsets, const int32_t *max_matched, const int64_t *init_agg_value, const int64_t *join_hash_tables, const int8_t *row_func_mgr)
RUNTIME_EXPORT ALWAYS_INLINE void agg_min(int64_t *agg, const int64_t val)
__device__ int32_t pos_start_impl(const int32_t *row_index_resume)
Definition: cuda_mapd_rt.cu:27
RUNTIME_EXPORT ALWAYS_INLINE int32_t width_bucket_nullable(const double target_value, const double lower_bound, const double upper_bound, const double scale_factor, const int32_t partition_count, const double null_val)
RUNTIME_EXPORT ALWAYS_INLINE int8_t logical_not(const int8_t operand, const int8_t null_val)
RUNTIME_EXPORT ALWAYS_INLINE void agg_id_float(int32_t *agg, const float val)
RUNTIME_EXPORT ALWAYS_INLINE float load_float(const int32_t *agg)
__device__ int32_t runtime_interrupt_flag
Definition: cuda_mapd_rt.cu:95
RUNTIME_EXPORT ALWAYS_INLINE void agg_sum_double(int64_t *agg, const double val)
RUNTIME_EXPORT ALWAYS_INLINE int64_t row_number_window_func(const int64_t output_buff, const int64_t pos)
RUNTIME_EXPORT NEVER_INLINE void init_columnar_group_by_buffer_gpu(int64_t *groups_buffer, const int64_t *init_vals, const uint32_t groups_buffer_entry_count, const uint32_t key_qw_count, const uint32_t agg_col_count, const bool keyless, const bool blocks_share_memory, const int32_t frag_idx)
std::function< bool(const PermutationIdx, const PermutationIdx)> Comparator
Definition: ResultSet.h:155
__device__ void agg_approximate_count_distinct_gpu(int64_t *agg, const int64_t key, const uint32_t b, const int64_t base_dev_addr, const int64_t base_host_addr)
bool isSplitNode() const
__device__ void sync_warp()
RUNTIME_EXPORT ALWAYS_INLINE int64_t scale_decimal_down_nullable(const int64_t operand, const int64_t scale, const int64_t null_val)
__device__ void agg_count_distinct_bitmap_skip_val_gpu(int64_t *agg, const int64_t val, const int64_t min_val, const int64_t bucket_size, const int64_t skip_val, const int64_t base_dev_addr, const int64_t base_host_addr, const uint64_t sub_bitmap_count, const uint64_t bitmap_bytes)
RUNTIME_EXPORT ALWAYS_INLINE int64_t agg_sum_if_skip_val(int64_t *agg, const int64_t val, const int64_t skip_val, const int8_t cond)
RUNTIME_EXPORT ALWAYS_INLINE void agg_id(int64_t *agg, const int64_t val)
RUNTIME_EXPORT ALWAYS_INLINE int64_t scale_decimal_up(const int64_t operand, const uint64_t scale, const int64_t operand_null_val, const int64_t result_null_val)
RUNTIME_EXPORT ALWAYS_INLINE DEVICE int32_t char_length(const char *str, const int32_t str_len)
RUNTIME_EXPORT ALWAYS_INLINE double * get_double_aggregation_tree(int64_t **aggregation_trees, size_t partition_idx)
#define RUNTIME_EXPORT
RUNTIME_EXPORT ALWAYS_INLINE DEVICE int32_t width_bucket_reversed(const double target_value, const double lower_bound, const double upper_bound, const double scale_factor, const int32_t partition_count)
RUNTIME_EXPORT ALWAYS_INLINE int32_t agg_sum_int32(int32_t *agg, const int32_t val)
RUNTIME_EXPORT ALWAYS_INLINE int32_t checked_single_agg_id_float(int32_t *agg, const float val, const float null_val)
__device__ void agg_sum_double_skip_val_shared(int64_t *agg, const double val, const double skip_val)
int64_t right_child_row_idx
RUNTIME_EXPORT ALWAYS_INLINE uint64_t agg_count_if_skip_val(uint64_t *agg, const int64_t cond, const int64_t skip_val)
LOGICAL_TYPE get_value_in_window_frame(const int64_t target_row_idx_in_frame, const int64_t frame_start_offset, const int64_t frame_end_offset, const COL_TYPE *col_buf, const int32_t *partition_rowid_buf, const int64_t *ordered_index_buf, const LOGICAL_TYPE logical_null_val, const LOGICAL_TYPE col_null_val)
__device__ void agg_max_int8_skip_val_shared(int8_t *agg, const int8_t val, const int8_t skip_val)
RUNTIME_EXPORT ALWAYS_INLINE int64_t get_valid_buf_end_pos(const int64_t num_elems, const int64_t null_start_pos, const int64_t null_end_pos)
RUNTIME_EXPORT ALWAYS_INLINE double load_double(const int64_t *agg)
RUNTIME_EXPORT ALWAYS_INLINE void agg_id_double(int64_t *agg, const double val)
#define DEF_FILL_MISSING_VALUE(col_type)
RUNTIME_EXPORT ALWAYS_INLINE int8_t bit_is_set(const int8_t *bitset, const int64_t val, const int64_t min_val, const int64_t max_val, const int64_t null_val, const int8_t null_bool_val)
RUNTIME_EXPORT ALWAYS_INLINE DEVICE int32_t width_bucket_expr_no_oob_check(const double target_value, const bool reversed, const double lower_bound, const double upper_bound, const int32_t partition_count)
DEVICE auto lower_bound(ARGS &&...args)
Definition: gpu_enabled.h:78
RUNTIME_EXPORT ALWAYS_INLINE uint32_t agg_count_if_int32(uint32_t *agg, const int32_t cond)
__device__ void agg_max_int16_skip_val_shared(int16_t *agg, const int16_t val, const int16_t skip_val)
#define DEF_SEARCH_AGGREGATION_TREE(agg_value_type)
RUNTIME_EXPORT NEVER_INLINE DEVICE uint32_t MurmurHash3(const void *key, int len, const uint32_t seed)
Definition: MurmurHash.cpp:33
void compute_derived_aggregates(SumAndCountPair< AGG_VALUE_TYPE > *aggregation_tree_for_partition, SumAndCountPair< AGG_VALUE_TYPE > &res, size_t query_range_start_idx, size_t query_range_end_idx, size_t leaf_level, size_t tree_fanout, AGG_VALUE_TYPE invalid_val, AGG_VALUE_TYPE null_val)
#define DEF_RANGE_MODE_FRAME_UPPER_BOUND(target_val_type, col_type, null_type, opname, opsym)
RUNTIME_EXPORT void agg_max_int8(int8_t *agg, const int8_t val)
RUNTIME_EXPORT ALWAYS_INLINE int64_t * get_group_value_fast_keyless_semiprivate(int64_t *groups_buffer, const int64_t key, const int64_t min_key, const int64_t, const uint32_t row_size_quad, const uint8_t thread_warp_idx, const uint8_t warp_size)
RUNTIME_EXPORT ALWAYS_INLINE size_t getStartOffsetForSegmentTreeTraversal(size_t level, size_t tree_fanout)
RUNTIME_EXPORT ALWAYS_INLINE void agg_max_float(int32_t *agg, const float val)
__device__ const int64_t * init_shared_mem(const int64_t *global_groups_buffer, const int32_t groups_buffer_size)
Definition: cuda_mapd_rt.cu:66
RUNTIME_EXPORT ALWAYS_INLINE int32_t checked_single_agg_id_double(int64_t *agg, const double val, const double null_val)
GPU_RT_STUB int32_t checked_single_agg_id_int16_shared(int16_t *agg, const int16_t val, const int16_t null_val)
RUNTIME_EXPORT void multifrag_query_hoisted_literals(int32_t *error_code, int32_t *total_matched, int64_t **out, const uint32_t *num_fragments_ptr, const uint32_t *num_tables_ptr, const uint32_t *row_index_resume, const int8_t ***col_buffers, const int8_t *literals, const int64_t *num_rows, const uint64_t *frag_row_offsets, const int32_t *max_matched, const int64_t *init_agg_value, const int64_t *join_hash_tables, const int8_t *row_func_mgr)
RUNTIME_EXPORT ALWAYS_INLINE double load_avg_float(const int32_t *agg, const int32_t *count, const double null_val)
#define DEF_BINARY_NULLABLE_ALL_OPS(type, null_type)
RUNTIME_EXPORT ALWAYS_INLINE SumAndCountPair< double > * get_double_derived_aggregation_tree(int64_t **aggregation_trees, size_t partition_idx)
RUNTIME_EXPORT NEVER_INLINE void init_group_by_buffer_impl(int64_t *groups_buffer, const int64_t *init_vals, const uint32_t groups_buffer_entry_count, const uint32_t key_qw_count, const uint32_t agg_col_count, const bool keyless, const int8_t warp_size)
RUNTIME_EXPORT ALWAYS_INLINE uint32_t agg_count_int32(uint32_t *agg, const int32_t)
RUNTIME_EXPORT ALWAYS_INLINE void set_matching_group_value_perfect_hash_columnar(int64_t *groups_buffer, const uint32_t hashed_index, const int64_t *key, const uint32_t key_count, const uint32_t entry_count)
#define DEF_GET_VALUE_IN_FRAME(col_type, logical_type)
#define NEVER_INLINE
def error_code
Definition: report.py:244
RUNTIME_EXPORT ALWAYS_INLINE int64_t compute_row_mode_end_index_sub(int64_t candidate_index, int64_t current_partition_start_offset, int64_t frame_bound)
AGG_TYPE compute_window_func_via_aggregation_tree(AGG_TYPE *aggregation_tree_for_partition, size_t query_range_start_idx, size_t query_range_end_idx, size_t leaf_level, size_t tree_fanout, AGG_TYPE init_val, AGG_TYPE invalid_val, AGG_TYPE null_val)
RUNTIME_EXPORT ALWAYS_INLINE DEVICE int32_t width_bucket_expr_nullable(const double target_value, const bool reversed, const double lower_bound, const double upper_bound, const int32_t partition_count, const double null_val)
#define DEF_ARITH_NULLABLE(type, null_type, opname, opsym)
RUNTIME_EXPORT ALWAYS_INLINE int32_t agg_sum_if_int32(int32_t *agg, const int32_t val, const int8_t cond)
__device__ void agg_min_int16_skip_val_shared(int16_t *agg, const int16_t val, const int16_t skip_val)
RUNTIME_EXPORT ALWAYS_INLINE int32_t width_bucket_reversed_nullable(const double target_value, const double lower_bound, const double upper_bound, const double scale_factor, const int32_t partition_count, const double null_val)
int64_t compute_lower_bound_from_ordered_partition_index(const int64_t num_elems, const TARGET_VAL_TYPE target_val, const COL_TYPE *col_buf, const int32_t *partition_rowid_buf, const int64_t *ordered_index_buf, const NULL_TYPE null_val, const bool nulls_first, const int64_t null_start_offset, const int64_t null_end_offset)
RUNTIME_EXPORT ALWAYS_INLINE int64_t agg_sum_skip_val(int64_t *agg, const int64_t val, const int64_t skip_val)
__device__ void sync_threadblock()
RUNTIME_EXPORT ALWAYS_INLINE int64_t compute_int64_t_lower_bound(const int64_t entry_cnt, const int64_t target_value, const int64_t *col_buf)
__device__ void agg_min_int8_skip_val_shared(int8_t *agg, const int8_t val, const int8_t skip_val)
RUNTIME_EXPORT ALWAYS_INLINE int32_t agg_sum_int32_skip_val(int32_t *agg, const int32_t val, const int32_t skip_val)
RUNTIME_EXPORT ALWAYS_INLINE double load_avg_double(const int64_t *agg, const int64_t *count, const double null_val)
RUNTIME_EXPORT ALWAYS_INLINE uint64_t agg_count(uint64_t *agg, const int64_t)
RUNTIME_EXPORT ALWAYS_INLINE void agg_sum_float(int32_t *agg, const float val)
RUNTIME_EXPORT ALWAYS_INLINE double load_avg_decimal(const int64_t *sum, const int64_t *count, const double null_val, const uint32_t scale)
RUNTIME_EXPORT bool check_interrupt_init(unsigned command)
RUNTIME_EXPORT ALWAYS_INLINE int32_t get_error_code(int32_t *error_codes)
__device__ void agg_count_distinct_bitmap_gpu(int64_t *agg, const int64_t val, const int64_t min_val, const int64_t bucket_size, const int64_t base_dev_addr, const int64_t base_host_addr, const uint64_t sub_bitmap_count, const uint64_t bitmap_bytes)
RUNTIME_EXPORT ALWAYS_INLINE int64_t agg_sum(int64_t *agg, const int64_t val)
#define DEF_SHARED_AGG_STUBS(base_agg_func)
__device__ int32_t get_matching_group_value_columnar_slot(int64_t *groups_buffer, const uint32_t entry_count, const uint32_t h, const T *key, const uint32_t key_count)
__device__ int32_t checked_single_agg_id_shared(int64_t *agg, const int64_t val, const int64_t null_val)
#define ALWAYS_INLINE
#define DEF_HANDLE_NULL_FOR_WINDOW_FRAMING_AGG(agg_type, null_type)
#define DEF_AGG_ID_INT(n)
RUNTIME_EXPORT ALWAYS_INLINE void record_error_code(const int32_t err_code, int32_t *error_codes)
__device__ void agg_sum_if_float_skip_val_shared(int32_t *agg, const float val, const float skip_val, const int8_t cond)
RUNTIME_EXPORT ALWAYS_INLINE DEVICE bool sample_ratio(const double proportion, const int64_t row_offset)
FORCE_INLINE HOST DEVICE T align_to_int64(T addr)
RUNTIME_EXPORT ALWAYS_INLINE double percent_window_func(const int64_t output_buff, const int64_t pos)
RUNTIME_EXPORT ALWAYS_INLINE int8_t logical_or(const int8_t lhs, const int8_t rhs, const int8_t null_val)
__device__ void force_sync()
#define DEF_SKIP_IF_AGG(skip_agg_func, base_agg_func)
#define DEF_RANGE_MODE_FRAME_LOWER_BOUND(target_val_type, col_type, null_type, opname, opsym)
RUNTIME_EXPORT void agg_min_int32(int32_t *agg, const int32_t val)
RUNTIME_EXPORT ALWAYS_INLINE void agg_sum_if_double(int64_t *agg, const double val, const int8_t cond)
RUNTIME_EXPORT ALWAYS_INLINE uint32_t agg_count_float_skip_val(uint32_t *agg, const float val, const float skip_val)
RUNTIME_EXPORT ALWAYS_INLINE DEVICE int32_t char_length_nullable(const char *str, const int32_t str_len, const int32_t int_null)
__device__ int32_t agg_sum_if_int32_skip_val_shared(int32_t *agg, const int32_t val, const int32_t skip_val, const int8_t cond)
RUNTIME_EXPORT ALWAYS_INLINE SumAndCountPair< int64_t > * get_integer_derived_aggregation_tree(int64_t **aggregation_trees, size_t partition_idx)
RUNTIME_EXPORT ALWAYS_INLINE uint32_t agg_count_float(uint32_t *agg, const float val)
RUNTIME_EXPORT ALWAYS_INLINE DEVICE int32_t width_bucket_reversed_no_oob_check(const double target_value, const double lower_bound, const double scale_factor)
RUNTIME_EXPORT ALWAYS_INLINE DEVICE int32_t width_bucket_expr(const double target_value, const bool reversed, const double lower_bound, const double upper_bound, const int32_t partition_count)
__device__ int32_t group_buff_idx_impl()
Definition: cuda_mapd_rt.cu:31