18 #error This code is not intended to be compiled with a CUDA C++ compiler
44 #define DEF_ARITH_NULLABLE(type, null_type, opname, opsym) \
45 extern "C" RUNTIME_EXPORT ALWAYS_INLINE type opname##_##type##_nullable( \
46 const type lhs, const type rhs, const null_type null_val) { \
47 if (lhs != null_val && rhs != null_val) { \
48 return lhs opsym rhs; \
53 #define DEF_ARITH_NULLABLE_LHS(type, null_type, opname, opsym) \
54 extern "C" RUNTIME_EXPORT ALWAYS_INLINE type opname##_##type##_nullable_lhs( \
55 const type lhs, const type rhs, const null_type null_val) { \
56 if (lhs != null_val) { \
57 return lhs opsym rhs; \
62 #define DEF_ARITH_NULLABLE_RHS(type, null_type, opname, opsym) \
63 extern "C" RUNTIME_EXPORT ALWAYS_INLINE type opname##_##type##_nullable_rhs( \
64 const type lhs, const type rhs, const null_type null_val) { \
65 if (rhs != null_val) { \
66 return lhs opsym rhs; \
71 #define DEF_CMP_NULLABLE(type, null_type, opname, opsym) \
72 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int8_t opname##_##type##_nullable( \
75 const null_type null_val, \
76 const int8_t null_bool_val) { \
77 if (lhs != null_val && rhs != null_val) { \
78 return lhs opsym rhs; \
80 return null_bool_val; \
83 #define DEF_CMP_NULLABLE_LHS(type, null_type, opname, opsym) \
84 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int8_t opname##_##type##_nullable_lhs( \
87 const null_type null_val, \
88 const int8_t null_bool_val) { \
89 if (lhs != null_val) { \
90 return lhs opsym rhs; \
92 return null_bool_val; \
95 #define DEF_CMP_NULLABLE_RHS(type, null_type, opname, opsym) \
96 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int8_t opname##_##type##_nullable_rhs( \
99 const null_type null_val, \
100 const int8_t null_bool_val) { \
101 if (rhs != null_val) { \
102 return lhs opsym rhs; \
104 return null_bool_val; \
107 #define DEF_SAFE_DIV_NULLABLE(type, null_type, opname) \
108 extern "C" RUNTIME_EXPORT ALWAYS_INLINE type safe_div_##type( \
109 const type lhs, const type rhs, const null_type null_val) { \
110 if (lhs != null_val && rhs != null_val && rhs != 0) { \
116 #define DEF_BINARY_NULLABLE_ALL_OPS(type, null_type) \
117 DEF_ARITH_NULLABLE(type, null_type, add, +) \
118 DEF_ARITH_NULLABLE(type, null_type, sub, -) \
119 DEF_ARITH_NULLABLE(type, null_type, mul, *) \
120 DEF_ARITH_NULLABLE(type, null_type, div, /) \
121 DEF_SAFE_DIV_NULLABLE(type, null_type, safe_div) \
122 DEF_ARITH_NULLABLE_LHS(type, null_type, add, +) \
123 DEF_ARITH_NULLABLE_LHS(type, null_type, sub, -) \
124 DEF_ARITH_NULLABLE_LHS(type, null_type, mul, *) \
125 DEF_ARITH_NULLABLE_LHS(type, null_type, div, /) \
126 DEF_ARITH_NULLABLE_RHS(type, null_type, add, +) \
127 DEF_ARITH_NULLABLE_RHS(type, null_type, sub, -) \
128 DEF_ARITH_NULLABLE_RHS(type, null_type, mul, *) \
129 DEF_ARITH_NULLABLE_RHS(type, null_type, div, /) \
130 DEF_CMP_NULLABLE(type, null_type, eq, ==) \
131 DEF_CMP_NULLABLE(type, null_type, ne, !=) \
132 DEF_CMP_NULLABLE(type, null_type, lt, <) \
133 DEF_CMP_NULLABLE(type, null_type, gt, >) \
134 DEF_CMP_NULLABLE(type, null_type, le, <=) \
135 DEF_CMP_NULLABLE(type, null_type, ge, >=) \
136 DEF_CMP_NULLABLE_LHS(type, null_type, eq, ==) \
137 DEF_CMP_NULLABLE_LHS(type, null_type, ne, !=) \
138 DEF_CMP_NULLABLE_LHS(type, null_type, lt, <) \
139 DEF_CMP_NULLABLE_LHS(type, null_type, gt, >) \
140 DEF_CMP_NULLABLE_LHS(type, null_type, le, <=) \
141 DEF_CMP_NULLABLE_LHS(type, null_type, ge, >=) \
142 DEF_CMP_NULLABLE_RHS(type, null_type, eq, ==) \
143 DEF_CMP_NULLABLE_RHS(type, null_type, ne, !=) \
144 DEF_CMP_NULLABLE_RHS(type, null_type, lt, <) \
145 DEF_CMP_NULLABLE_RHS(type, null_type, gt, >) \
146 DEF_CMP_NULLABLE_RHS(type, null_type, le, <=) \
147 DEF_CMP_NULLABLE_RHS(type, null_type, ge, >=)
168 #undef DEF_BINARY_NULLABLE_ALL_OPS
169 #undef DEF_SAFE_DIV_NULLABLE
170 #undef DEF_CMP_NULLABLE_RHS
171 #undef DEF_CMP_NULLABLE_LHS
172 #undef DEF_CMP_NULLABLE
173 #undef DEF_ARITH_NULLABLE_RHS
174 #undef DEF_ARITH_NULLABLE_LHS
175 #undef DEF_ARITH_NULLABLE
177 #define DEF_MAP_STRING_TO_DATUM(value_type, value_name) \
178 extern "C" ALWAYS_INLINE DEVICE value_type map_string_to_datum_##value_name( \
179 const int32_t string_id, \
180 const int64_t translation_map_handle, \
181 const int32_t min_source_id) { \
182 const Datum* translation_map = \
183 reinterpret_cast<const Datum*>(translation_map_handle); \
184 const Datum& out_datum = translation_map[string_id - min_source_id]; \
185 return out_datum.value_name##val; \
196 #undef DEF_MAP_STRING_TO_DATUM
200 const uint64_t scale,
201 const int64_t operand_null_val,
202 const int64_t result_null_val) {
203 return operand != operand_null_val ? operand * scale : result_null_val;
209 const int64_t null_val) {
211 if (operand == null_val) {
215 int64_t tmp = scale >> 1;
216 tmp = operand >= 0 ? operand + tmp : operand - tmp;
223 const int64_t null_val) {
224 int64_t tmp = scale >> 1;
225 tmp = operand >= 0 ? operand + tmp : operand - tmp;
232 const int64_t divisor) {
233 return (dividend < 0 ? dividend - (divisor - 1) : dividend) / divisor;
240 const int64_t divisor,
241 const int64_t null_val) {
242 return dividend == null_val ? null_val :
floor_div_lhs(dividend, divisor);
245 #define DEF_UMINUS_NULLABLE(type, null_type) \
246 extern "C" RUNTIME_EXPORT ALWAYS_INLINE type uminus_##type##_nullable( \
247 const type operand, const null_type null_val) { \
248 return operand == null_val ? null_val : -operand; \
258 #undef DEF_UMINUS_NULLABLE
260 #define DEF_CAST_NULLABLE(from_type, to_type) \
261 extern "C" RUNTIME_EXPORT ALWAYS_INLINE to_type \
262 cast_##from_type##_to_##to_type##_nullable(const from_type operand, \
263 const from_type from_null_val, \
264 const to_type to_null_val) { \
265 return operand == from_null_val ? to_null_val : operand; \
268 #define DEF_CAST_SCALED_NULLABLE(from_type, to_type) \
269 extern "C" RUNTIME_EXPORT ALWAYS_INLINE to_type \
270 cast_##from_type##_to_##to_type##_scaled_nullable(const from_type operand, \
271 const from_type from_null_val, \
272 const to_type to_null_val, \
273 const to_type multiplier) { \
274 return operand == from_null_val ? to_null_val : multiplier * operand; \
277 #define DEF_CAST_NULLABLE_BIDIR(type1, type2) \
278 DEF_CAST_NULLABLE(type1, type2) \
279 DEF_CAST_NULLABLE(type2, type1)
281 #define DEF_ROUND_NULLABLE(from_type, to_type) \
282 extern "C" RUNTIME_EXPORT ALWAYS_INLINE to_type \
283 cast_##from_type##_to_##to_type##_nullable(const from_type operand, \
284 const from_type from_null_val, \
285 const to_type to_null_val) { \
286 return operand == from_null_val \
288 : static_cast<to_type>(operand + (operand < from_type(0) \
290 : from_type(0.5))); \
324 #undef DEF_ROUND_NULLABLE
325 #undef DEF_CAST_NULLABLE_BIDIR
326 #undef DEF_CAST_SCALED_NULLABLE
327 #undef DEF_CAST_NULLABLE
330 const int8_t null_val) {
331 return operand == null_val ? operand : (operand ? 0 : 1);
336 const int8_t null_val) {
337 if (lhs == null_val) {
338 return rhs == 0 ? rhs : null_val;
340 if (rhs == null_val) {
341 return lhs == 0 ? lhs : null_val;
343 return (lhs && rhs) ? 1 : 0;
348 const int8_t null_val) {
349 if (lhs == null_val) {
350 return rhs == 0 ? null_val : rhs;
352 if (rhs == null_val) {
353 return lhs == 0 ? null_val : lhs;
355 return (lhs || rhs) ? 1 : 0;
366 const uint64_t bitmap_idx = val - min_val;
367 reinterpret_cast<int8_t*
>(*agg)[bitmap_idx >> 3] |= (1 << (bitmap_idx & 7));
371 #define GPU_RT_STUB NEVER_INLINE
373 #define GPU_RT_STUB NEVER_INLINE __attribute__((optnone))
387 const uint32_t index = hash >> (64 - b);
388 const uint8_t rank =
get_rank(hash << b, 64 - b);
389 uint8_t* M =
reinterpret_cast<uint8_t*
>(*agg);
390 M[index] = std::max(M[index], rank);
401 const int64_t min_val,
402 const int64_t max_val,
403 const int64_t null_val,
404 const int8_t null_bool_val) {
405 if (val == null_val) {
406 return null_bool_val;
408 if (val < min_val || val > max_val) {
414 const uint64_t bitmap_idx = val - min_val;
415 return (reinterpret_cast<const int8_t*>(bitset))[bitmap_idx >> 3] &
416 (1 << (bitmap_idx & 7))
423 const int64_t target_value,
424 const int64_t* col_buf) {
426 int64_t h = entry_cnt - 1;
428 int64_t mid = l + (h - l) / 2;
429 if (target_value < col_buf[mid]) {
440 return null_start_pos == 0 ? null_end_pos + 1 : 0;
445 const int64_t null_start_pos,
446 const int64_t null_end_pos) {
447 return null_end_pos == num_elems ? null_start_pos : num_elems;
450 template <
typename T,
typename Comparator>
452 const int64_t cur_row_idx,
454 const int32_t* partition_rowid_buf,
455 const int64_t* ordered_index_buf,
457 const bool nulls_first,
458 const int64_t null_start_pos,
459 const int64_t null_end_pos,
461 const auto target_value = col_buf[cur_row_idx];
462 if (target_value == null_val) {
463 for (int64_t target_offset = null_start_pos; target_offset < null_end_pos;
465 const auto candidate_offset = partition_rowid_buf[ordered_index_buf[target_offset]];
466 if (candidate_offset == cur_row_idx) {
467 return target_offset;
471 auto const modified_null_end_pos = nulls_first ? null_end_pos - 1 : null_end_pos;
475 int64_t mid = l + (h - l) / 2;
476 auto const target_row_idx = partition_rowid_buf[ordered_index_buf[mid]];
477 auto const cur_value = col_buf[target_row_idx];
478 if (cmp(target_value, cur_value)) {
484 int64_t target_offset = l;
485 int64_t candidate_row_idx = partition_rowid_buf[ordered_index_buf[target_offset]];
486 while (col_buf[candidate_row_idx] == target_value) {
487 if (candidate_row_idx == cur_row_idx) {
488 return target_offset;
490 candidate_row_idx = partition_rowid_buf[ordered_index_buf[++target_offset]];
495 #define DEF_COMPUTE_CURRENT_ROW_IDX_IN_FRAME(value_type, oper_name) \
496 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int64_t \
497 compute_##value_type##_##oper_name##_current_row_idx_in_frame( \
498 const int64_t num_elems, \
499 const int64_t cur_row_idx, \
500 const value_type* col_buf, \
501 const int32_t* partition_rowid_buf, \
502 const int64_t* ordered_index_buf, \
503 const value_type null_val, \
504 const bool nulls_first, \
505 const int64_t null_start_pos, \
506 const int64_t null_end_pos) { \
507 return compute_current_row_idx_in_frame<value_type>(num_elems, \
510 partition_rowid_buf, \
516 std::oper_name<value_type>{}); \
518 #define DEF_COMPUTE_CURRENT_ROW_IDX_IN_FRAME_ALL_TYPES(oper_name) \
519 DEF_COMPUTE_CURRENT_ROW_IDX_IN_FRAME(int8_t, oper_name) \
520 DEF_COMPUTE_CURRENT_ROW_IDX_IN_FRAME(int16_t, oper_name) \
521 DEF_COMPUTE_CURRENT_ROW_IDX_IN_FRAME(int32_t, oper_name) \
522 DEF_COMPUTE_CURRENT_ROW_IDX_IN_FRAME(int64_t, oper_name) \
523 DEF_COMPUTE_CURRENT_ROW_IDX_IN_FRAME(float, oper_name) \
524 DEF_COMPUTE_CURRENT_ROW_IDX_IN_FRAME(double, oper_name)
529 #undef DEF_COMPUTE_CURRENT_ROW_IDX_IN_FRAME_ALL_TYPES
530 #undef DEF_COMPUTE_CURRENT_ROW_IDX_IN_FRAME
532 template <
typename TARGET_VAL_TYPE,
typename COL_TYPE,
typename NULL_TYPE>
534 const int64_t num_elems,
535 const TARGET_VAL_TYPE target_val,
536 const COL_TYPE* col_buf,
537 const int32_t* partition_rowid_buf,
538 const int64_t* ordered_index_buf,
539 const NULL_TYPE null_val,
540 const bool nulls_first,
541 const int64_t null_start_offset,
542 const int64_t null_end_offset) {
543 if (target_val == null_val) {
544 return null_start_offset;
546 auto const modified_null_end_pos = nulls_first ? null_end_offset - 1 : null_end_offset;
550 int64_t mid = l + (h - l) / 2;
551 if (target_val <= col_buf[partition_rowid_buf[ordered_index_buf[mid]]]) {
560 #define DEF_RANGE_MODE_FRAME_LOWER_BOUND( \
561 target_val_type, col_type, null_type, opname, opsym) \
562 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int64_t \
563 range_mode_##target_val_type##_##col_type##_##null_type##_##opname##_frame_lower_bound( \
564 const int64_t num_elems, \
565 const target_val_type target_value, \
566 const col_type* col_buf, \
567 const int32_t* partition_rowid_buf, \
568 const int64_t* ordered_index_buf, \
569 const int64_t frame_bound_val, \
570 const null_type null_val, \
571 const bool nulls_first, \
572 const int64_t null_start_pos, \
573 const int64_t null_end_pos) { \
574 if (target_value == null_val) { \
575 return null_start_pos; \
577 target_val_type new_val = target_value opsym frame_bound_val; \
578 return compute_lower_bound_from_ordered_partition_index<target_val_type, \
584 partition_rowid_buf, \
611 #undef DEF_RANGE_MODE_FRAME_LOWER_BOUND
613 template <
typename TARGET_VAL_TYPE,
typename COL_TYPE,
typename NULL_TYPE>
615 const int64_t num_elems,
616 const TARGET_VAL_TYPE target_val,
617 const COL_TYPE* col_buf,
618 const int32_t* partition_rowid_buf,
619 const int64_t* ordered_index_buf,
620 const NULL_TYPE null_val,
621 const bool nulls_first,
622 const int64_t null_start_offset,
623 const int64_t null_end_offset) {
624 if (target_val == null_val) {
625 return null_end_offset;
627 auto const modified_null_end_pos = nulls_first ? null_end_offset - 1 : null_end_offset;
631 int64_t mid = l + (h - l) / 2;
632 if (target_val >= col_buf[partition_rowid_buf[ordered_index_buf[mid]]]) {
641 #define DEF_RANGE_MODE_FRAME_UPPER_BOUND( \
642 target_val_type, col_type, null_type, opname, opsym) \
643 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int64_t \
644 range_mode_##target_val_type##_##col_type##_##null_type##_##opname##_frame_upper_bound( \
645 const int64_t num_elems, \
646 const target_val_type target_value, \
647 const col_type* col_buf, \
648 const int32_t* partition_rowid_buf, \
649 const int64_t* ordered_index_buf, \
650 const int64_t frame_bound_val, \
651 const null_type null_val, \
652 const bool nulls_first, \
653 const int64_t null_start_pos, \
654 const int64_t null_end_pos) { \
655 if (target_value == null_val) { \
656 return null_end_pos; \
658 target_val_type new_val = target_value opsym frame_bound_val; \
659 return compute_upper_bound_from_ordered_partition_index<target_val_type, \
665 partition_rowid_buf, \
692 #undef DEF_RANGE_MODE_FRAME_UPPER_BOUND
694 template <
typename COL_TYPE,
typename LOGICAL_TYPE>
696 const int64_t frame_start_offset,
697 const int64_t frame_end_offset,
698 const COL_TYPE* col_buf,
699 const int32_t* partition_rowid_buf,
700 const int64_t* ordered_index_buf,
701 const LOGICAL_TYPE logical_null_val,
702 const LOGICAL_TYPE col_null_val) {
703 if (target_row_idx_in_frame < frame_start_offset ||
704 target_row_idx_in_frame > frame_end_offset) {
705 return logical_null_val;
707 const auto target_offset =
708 partition_rowid_buf[ordered_index_buf[target_row_idx_in_frame]];
709 LOGICAL_TYPE target_val = col_buf[target_offset];
710 if (target_val == col_null_val) {
711 return logical_null_val;
716 #define DEF_GET_VALUE_IN_FRAME(col_type, logical_type) \
717 extern "C" RUNTIME_EXPORT ALWAYS_INLINE logical_type \
718 get_##col_type##_value_##logical_type##_type_in_frame( \
719 const int64_t target_row_idx_in_frame, \
720 const int64_t frame_start_offset, \
721 const int64_t frame_end_offset, \
722 const col_type* col_buf, \
723 const int32_t* partition_rowid_buf, \
724 const int64_t* ordered_index_buf, \
725 const logical_type logical_null_val, \
726 const logical_type col_null_val) { \
727 return get_value_in_window_frame<col_type, logical_type>(target_row_idx_in_frame, \
728 frame_start_offset, \
731 partition_rowid_buf, \
748 #undef DEF_GET_VALUE_IN_FRAME
752 int64_t multiplier) {
753 return decoded_val == null_val ? decoded_val : decoded_val * multiplier;
758 int64_t current_partition_start_offset,
759 int64_t frame_bound) {
760 int64_t index = candidate_index - current_partition_start_offset - frame_bound;
761 return index < 0 ? 0 : index;
766 int64_t current_partition_start_offset,
768 int64_t num_current_partition_elem) {
769 int64_t index = candidate_index - current_partition_start_offset + frame_bound;
770 return index >= num_current_partition_elem ? num_current_partition_elem : index;
775 int64_t current_partition_start_offset,
776 int64_t frame_bound) {
777 int64_t index = candidate_index - current_partition_start_offset - frame_bound;
778 return index < 0 ? 0 : index + 1;
783 int64_t current_partition_start_offset,
785 int64_t num_current_partition_elem) {
786 int64_t index = candidate_index - current_partition_start_offset + frame_bound;
787 return index >= num_current_partition_elem ? num_current_partition_elem : index + 1;
791 int64_t** aggregation_trees,
792 size_t partition_idx) {
793 return aggregation_trees[partition_idx];
797 int64_t** aggregation_trees,
798 size_t partition_idx) {
799 double** casted_aggregation_trees =
reinterpret_cast<double**
>(aggregation_trees);
800 return casted_aggregation_trees[partition_idx];
807 return casted_aggregation_trees[partition_idx];
814 return casted_aggregation_trees[partition_idx];
820 for (
size_t i = 0; i < level; i++) {
821 offset += pow(tree_fanout, i);
828 template <AggFuncType AGG_FUNC_TYPE,
typename AGG_TYPE>
829 inline AGG_TYPE
agg_func(AGG_TYPE
const lhs, AGG_TYPE
const rhs) {
831 return std::min(lhs, rhs);
833 return std::max(lhs, rhs);
840 template <AggFuncType AGG_FUNC_TYPE,
typename AGG_TYPE>
842 AGG_TYPE* aggregation_tree_for_partition,
843 size_t query_range_start_idx,
844 size_t query_range_end_idx,
848 AGG_TYPE invalid_val,
851 size_t begin = leaf_start_idx + query_range_start_idx;
852 size_t end = leaf_start_idx + query_range_end_idx;
853 AGG_TYPE
res = init_val;
854 bool all_nulls =
true;
855 for (
int level = leaf_level; level >= 0; level--) {
856 size_t parentBegin = begin / tree_fanout;
857 size_t parentEnd = (end - 1) / tree_fanout;
858 if (parentBegin == parentEnd) {
859 for (
size_t pos = begin; pos < end; pos++) {
860 if (aggregation_tree_for_partition[pos] != null_val) {
862 res = agg_func<AGG_FUNC_TYPE>(
res, aggregation_tree_for_partition[pos]);
865 return all_nulls ? null_val :
res;
866 }
else if (parentBegin > parentEnd) {
869 size_t group_begin = (parentBegin * tree_fanout) + 1;
870 if (begin != group_begin) {
871 size_t limit = (parentBegin * tree_fanout) + tree_fanout + 1;
872 for (
size_t pos = begin; pos < limit; pos++) {
873 if (aggregation_tree_for_partition[pos] != null_val) {
875 res = agg_func<AGG_FUNC_TYPE>(
res, aggregation_tree_for_partition[pos]);
880 size_t group_end = (parentEnd * tree_fanout) + 1;
881 if (end != group_end) {
882 for (
size_t pos = group_end; pos < end; pos++) {
883 if (aggregation_tree_for_partition[pos] != null_val) {
885 res = agg_func<AGG_FUNC_TYPE>(
res, aggregation_tree_for_partition[pos]);
895 #define DEF_SEARCH_AGGREGATION_TREE(agg_value_type) \
896 extern "C" RUNTIME_EXPORT ALWAYS_INLINE agg_value_type \
897 search_##agg_value_type##_aggregation_tree( \
898 agg_value_type* aggregated_tree_for_partition, \
899 size_t query_range_start_idx, \
900 size_t query_range_end_idx, \
902 size_t tree_fanout, \
905 agg_value_type invalid_val, \
906 agg_value_type null_val, \
907 int32_t agg_type) { \
908 if (!aggregated_tree_for_partition || query_range_start_idx > query_range_end_idx) { \
911 switch (agg_type) { \
913 return compute_window_func_via_aggregation_tree<AggFuncType::MIN>( \
914 aggregated_tree_for_partition, \
915 query_range_start_idx, \
916 query_range_end_idx, \
919 std::numeric_limits<agg_value_type>::max(), \
924 return compute_window_func_via_aggregation_tree<AggFuncType::MAX>( \
925 aggregated_tree_for_partition, \
926 query_range_start_idx, \
927 query_range_end_idx, \
930 std::numeric_limits<agg_value_type>::lowest(), \
935 return compute_window_func_via_aggregation_tree<AggFuncType::SUM>( \
936 aggregated_tree_for_partition, \
937 query_range_start_idx, \
938 query_range_end_idx, \
941 static_cast<agg_value_type>(0), \
950 #undef DEF_SEARCH_AGGREGATION_TREE
952 template <
typename AGG_VALUE_TYPE>
956 size_t query_range_start_idx,
957 size_t query_range_end_idx,
960 AGG_VALUE_TYPE invalid_val,
961 AGG_VALUE_TYPE null_val) {
963 size_t begin = leaf_start_idx + query_range_start_idx;
964 size_t end = leaf_start_idx + query_range_end_idx;
967 bool all_nulls =
true;
968 for (
int level = leaf_level; level >= 0; level--) {
969 size_t parentBegin = begin / tree_fanout;
970 size_t parentEnd = (end - 1) / tree_fanout;
971 if (parentBegin == parentEnd) {
972 for (
size_t pos = begin; pos < end; pos++) {
973 if (aggregation_tree_for_partition[pos].sum != null_val) {
975 res.
sum += aggregation_tree_for_partition[pos].
sum;
976 res.
count += aggregation_tree_for_partition[pos].
count;
983 }
else if (parentBegin > parentEnd) {
987 size_t group_begin = (parentBegin * tree_fanout) + 1;
988 if (begin != group_begin) {
989 size_t limit = (parentBegin * tree_fanout) + tree_fanout + 1;
990 for (
size_t pos = begin; pos < limit; pos++) {
991 if (aggregation_tree_for_partition[pos].sum != null_val) {
993 res.
sum += aggregation_tree_for_partition[pos].
sum;
994 res.
count += aggregation_tree_for_partition[pos].
count;
999 size_t group_end = (parentEnd * tree_fanout) + 1;
1000 if (end != group_end) {
1001 for (
size_t pos = group_end; pos < end; pos++) {
1002 if (aggregation_tree_for_partition[pos].sum != null_val) {
1004 res.
sum += aggregation_tree_for_partition[pos].
sum;
1005 res.
count += aggregation_tree_for_partition[pos].
count;
1009 begin = parentBegin;
1016 #define DEF_SEARCH_DERIVED_AGGREGATION_TREE(agg_value_type) \
1017 extern "C" RUNTIME_EXPORT ALWAYS_INLINE double \
1018 search_##agg_value_type##_derived_aggregation_tree( \
1019 SumAndCountPair<agg_value_type>* aggregated_tree_for_partition, \
1020 size_t query_range_start_idx, \
1021 size_t query_range_end_idx, \
1022 size_t leaf_level, \
1023 size_t tree_fanout, \
1024 bool decimal_type, \
1026 agg_value_type invalid_val, \
1027 agg_value_type null_val, \
1028 int32_t agg_type) { \
1029 if (!aggregated_tree_for_partition || query_range_start_idx > query_range_end_idx) { \
1032 SumAndCountPair<agg_value_type> res{0, 0}; \
1033 compute_derived_aggregates<agg_value_type>(aggregated_tree_for_partition, \
1035 query_range_start_idx, \
1036 query_range_end_idx, \
1041 if (res.sum == null_val) { \
1043 } else if (res.count > 0) { \
1044 if (decimal_type) { \
1045 return (static_cast<double>(res.sum) / pow(10, scale)) / res.count; \
1047 return (static_cast<double>(res.sum)) / res.count; \
1049 return invalid_val; \
1055 #undef DEF_SEARCH_DERIVED_AGGREGATION_TREE
1057 #define DEF_HANDLE_NULL_FOR_WINDOW_FRAMING_AGG(agg_type, null_type) \
1058 extern "C" RUNTIME_EXPORT ALWAYS_INLINE agg_type \
1059 handle_null_val_##agg_type##_##null_type##_window_framing_agg( \
1060 agg_type res, null_type agg_null_val, agg_type input_col_null_val) { \
1061 if (res == agg_null_val) { \
1062 return input_col_null_val; \
1069 #undef DEF_HANDLE_NULL_FOR_WINDOW_FRAMING_AGG
1072 const auto old = *agg;
1079 const int8_t cond) {
1080 return cond ?
agg_sum(agg, val) : *agg;
1084 *agg = std::max(*agg, val);
1088 *agg = std::min(*agg, val);
1096 const int64_t offset,
1097 const int8_t* value,
1098 const int64_t size_bytes) {
1099 for (
auto i = 0; i < size_bytes; i++) {
1100 varlen_buffer[offset + i] = value[i];
1102 return &varlen_buffer[offset];
1107 if (val == null_val) {
1113 }
else if (*agg == null_val) {
1125 const int64_t min_val,
1126 const int64_t skip_val) {
1127 if (val != skip_val) {
1147 const int32_t cond) {
1148 return cond ? (*agg)++ : *agg;
1152 const int32_t val) {
1153 const auto old = *agg;
1160 const int8_t cond) {
1164 #define DEF_AGG_MAX_INT(n) \
1165 extern "C" RUNTIME_EXPORT ALWAYS_INLINE void agg_max_int##n(int##n##_t* agg, \
1166 const int##n##_t val) { \
1167 *agg = std::max(*agg, val); \
1173 #undef DEF_AGG_MAX_INT
1175 #define DEF_AGG_MIN_INT(n) \
1176 extern "C" RUNTIME_EXPORT ALWAYS_INLINE void agg_min_int##n(int##n##_t* agg, \
1177 const int##n##_t val) { \
1178 *agg = std::min(*agg, val); \
1184 #undef DEF_AGG_MIN_INT
1186 #define DEF_AGG_ID_INT(n) \
1187 extern "C" RUNTIME_EXPORT ALWAYS_INLINE void agg_id_int##n(int##n##_t* agg, \
1188 const int##n##_t val) { \
1192 #define DEF_CHECKED_SINGLE_AGG_ID_INT(n) \
1193 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int32_t checked_single_agg_id_int##n( \
1194 int##n##_t* agg, const int##n##_t val, const int##n##_t null_val) { \
1195 if (val == null_val) { \
1198 if (*agg == val) { \
1200 } else if (*agg == null_val) { \
1217 #undef DEF_AGG_ID_INT
1218 #undef DEF_CHECKED_SINGLE_AGG_ID_INT
1220 #define DEF_WRITE_PROJECTION_INT(n) \
1221 extern "C" RUNTIME_EXPORT ALWAYS_INLINE void write_projection_int##n( \
1222 int8_t* slot_ptr, const int##n##_t val, const int64_t init_val) { \
1223 if (val != init_val) { \
1224 *reinterpret_cast<int##n##_t*>(slot_ptr) = val; \
1230 #undef DEF_WRITE_PROJECTION_INT
1234 const int64_t skip_val) {
1235 const auto old = *agg;
1236 if (val != skip_val) {
1237 if (old != skip_val) {
1248 const auto old = *agg;
1249 if (val != skip_val) {
1250 if (old != skip_val) {
1262 const int64_t skip_val,
1263 const int8_t cond) {
1270 const int32_t skip_val,
1271 const int8_t cond) {
1276 const int64_t cond) {
1277 return cond ? (*agg)++ : *agg;
1282 if (val != skip_val) {
1290 if (cond != skip_val) {
1298 if (val != skip_val) {
1306 if (cond != skip_val) {
1312 #define DEF_SKIP_AGG_ADD(base_agg_func) \
1313 extern "C" RUNTIME_EXPORT ALWAYS_INLINE void base_agg_func##_skip_val( \
1314 DATA_T* agg, const DATA_T val, const DATA_T skip_val) { \
1315 if (val != skip_val) { \
1316 base_agg_func(agg, val); \
1320 #define DEF_SKIP_AGG(base_agg_func) \
1321 extern "C" RUNTIME_EXPORT ALWAYS_INLINE void base_agg_func##_skip_val( \
1322 DATA_T* agg, const DATA_T val, const DATA_T skip_val) { \
1323 if (val != skip_val) { \
1324 const DATA_T old_agg = *agg; \
1325 if (old_agg != skip_val) { \
1326 base_agg_func(agg, val); \
1333 #define DATA_T int64_t
1338 #define DATA_T int32_t
1343 #define DATA_T int16_t
1348 #define DATA_T int8_t
1353 #undef DEF_SKIP_AGG_ADD
1365 const auto r = *
reinterpret_cast<const double*
>(agg) + val;
1366 *agg = *
reinterpret_cast<const int64_t*
>(may_alias_ptr(&r));
1371 const int8_t cond) {
1379 const auto r = std::max(*reinterpret_cast<const double*>(agg), val);
1380 *agg = *(
reinterpret_cast<const int64_t*
>(may_alias_ptr(&r)));
1385 const auto r = std::min(*reinterpret_cast<const double*>(agg), val);
1386 *agg = *(
reinterpret_cast<const int64_t*
>(may_alias_ptr(&r)));
1391 *agg = *(
reinterpret_cast<const int64_t*
>(may_alias_ptr(&val)));
1396 if (val == null_val) {
1400 if (*agg == *(reinterpret_cast<const int64_t*>(may_alias_ptr(&val)))) {
1402 }
else if (*agg == *(reinterpret_cast<const int64_t*>(may_alias_ptr(&null_val)))) {
1403 *agg = *(
reinterpret_cast<const int64_t*
>(may_alias_ptr(&val)));
1418 const auto r = *
reinterpret_cast<const float*
>(agg) + val;
1419 *agg = *
reinterpret_cast<const int32_t*
>(may_alias_ptr(&r));
1424 const int8_t cond) {
1432 const auto r = std::max(*reinterpret_cast<const float*>(agg), val);
1433 *agg = *(
reinterpret_cast<const int32_t*
>(may_alias_ptr(&r)));
1438 const auto r = std::min(*reinterpret_cast<const float*>(agg), val);
1439 *agg = *(
reinterpret_cast<const int32_t*
>(may_alias_ptr(&r)));
1443 *agg = *(
reinterpret_cast<const int32_t*
>(may_alias_ptr(&val)));
1448 if (val == null_val) {
1452 if (*agg == *(reinterpret_cast<const int32_t*>(may_alias_ptr(&val)))) {
1454 }
else if (*agg == *(reinterpret_cast<const int32_t*>(may_alias_ptr(&null_val)))) {
1455 *agg = *(
reinterpret_cast<const int32_t*
>(may_alias_ptr(&val)));
1465 if (val != skip_val) {
1473 if (val != skip_val) {
1479 #define DEF_SKIP_AGG(base_agg_func) \
1480 extern "C" RUNTIME_EXPORT ALWAYS_INLINE void base_agg_func##_skip_val( \
1481 ADDR_T* agg, const DATA_T val, const DATA_T skip_val) { \
1482 if (val != skip_val) { \
1483 const ADDR_T old_agg = *agg; \
1484 if (old_agg != *reinterpret_cast<const ADDR_T*>(may_alias_ptr(&skip_val))) { \
1485 base_agg_func(agg, val); \
1487 *agg = *reinterpret_cast<const ADDR_T*>(may_alias_ptr(&val)); \
1492 #define DEF_SKIP_IF_AGG(skip_agg_func, base_agg_func) \
1493 extern "C" RUNTIME_EXPORT ALWAYS_INLINE void skip_agg_func##_skip_val( \
1494 ADDR_T* agg, const DATA_T val, const DATA_T skip_val, const int8_t cond) { \
1496 base_agg_func##_skip_val(agg, val, skip_val); \
1500 #define DATA_T double
1501 #define ADDR_T int64_t
1509 #define DATA_T float
1510 #define ADDR_T int32_t
1519 #undef DEF_SKIP_IF_AGG
1522 const int64_t scale) {
1524 return x / scale * scale;
1529 return x / scale * scale - scale;
1533 const int64_t scale) {
1539 #define DEF_SHARED_AGG_RET_STUBS(base_agg_func) \
1540 extern "C" GPU_RT_STUB uint64_t base_agg_func##_shared(uint64_t* agg, \
1541 const int64_t val) { \
1545 extern "C" GPU_RT_STUB uint64_t base_agg_func##_skip_val_shared( \
1546 uint64_t* agg, const int64_t val, const int64_t skip_val) { \
1549 extern "C" GPU_RT_STUB uint32_t base_agg_func##_int32_shared(uint32_t* agg, \
1550 const int32_t val) { \
1554 extern "C" GPU_RT_STUB uint32_t base_agg_func##_int32_skip_val_shared( \
1555 uint32_t* agg, const int32_t val, const int32_t skip_val) { \
1559 extern "C" GPU_RT_STUB uint64_t base_agg_func##_double_shared(uint64_t* agg, \
1560 const double val) { \
1564 extern "C" GPU_RT_STUB uint64_t base_agg_func##_double_skip_val_shared( \
1565 uint64_t* agg, const double val, const double skip_val) { \
1568 extern "C" GPU_RT_STUB uint32_t base_agg_func##_float_shared(uint32_t* agg, \
1569 const float val) { \
1573 extern "C" GPU_RT_STUB uint32_t base_agg_func##_float_skip_val_shared( \
1574 uint32_t* agg, const float val, const float skip_val) { \
1578 #define DEF_SHARED_AGG_STUBS(base_agg_func) \
1579 extern "C" GPU_RT_STUB void base_agg_func##_shared(int64_t* agg, const int64_t val) {} \
1581 extern "C" GPU_RT_STUB void base_agg_func##_skip_val_shared( \
1582 int64_t* agg, const int64_t val, const int64_t skip_val) {} \
1583 extern "C" GPU_RT_STUB void base_agg_func##_int32_shared(int32_t* agg, \
1584 const int32_t val) {} \
1585 extern "C" GPU_RT_STUB void base_agg_func##_int16_shared(int16_t* agg, \
1586 const int16_t val) {} \
1587 extern "C" GPU_RT_STUB void base_agg_func##_int8_shared(int8_t* agg, \
1588 const int8_t val) {} \
1590 extern "C" GPU_RT_STUB void base_agg_func##_int32_skip_val_shared( \
1591 int32_t* agg, const int32_t val, const int32_t skip_val) {} \
1593 extern "C" GPU_RT_STUB void base_agg_func##_double_shared(int64_t* agg, \
1594 const double val) {} \
1596 extern "C" GPU_RT_STUB void base_agg_func##_double_skip_val_shared( \
1597 int64_t* agg, const double val, const double skip_val) {} \
1598 extern "C" GPU_RT_STUB void base_agg_func##_float_shared(int32_t* agg, \
1599 const float val) {} \
1601 extern "C" GPU_RT_STUB void base_agg_func##_float_skip_val_shared( \
1602 int32_t* agg, const float val, const float skip_val) {}
1611 const int64_t offset,
1612 const int8_t* value,
1613 const int64_t size_bytes) {
1619 const int64_t null_val) {
1626 const int32_t null_val) {
1632 const int16_t null_val) {
1637 const int8_t null_val) {
1644 const double null_val) {
1650 const float null_val) {
1656 const int16_t skip_val) {}
1660 const int8_t skip_val) {}
1664 const int16_t skip_val) {}
1668 const int8_t skip_val) {}
1678 const int8_t cond) {
1684 const int64_t skip_val) {
1690 const int64_t skip_val,
1691 const int8_t cond) {
1700 const int32_t skip_val) {
1708 const double skip_val) {}
1713 const float skip_val) {}
1717 const int8_t cond) {
1723 const int32_t skip_val,
1724 const int8_t cond) {
1730 const int8_t cond) {}
1734 const double skip_val,
1735 const int8_t cond) {}
1738 const int8_t cond) {}
1742 const float skip_val,
1743 const int8_t cond) {}
1752 int64_t* output_buffer,
1753 const int32_t num_agg_cols){};
1757 int32_t row_index_resume{0};
1759 row_index_resume = error_code[0];
1762 return row_index_resume;
1792 int32_t* error_codes) {
1811 const int64_t* groups_buffer,
1812 const int32_t groups_buffer_size) {
1813 return groups_buffer;
1826 const int32_t groups_buffer_size) {
1831 int64_t* groups_buffer,
1832 const int64_t* init_vals,
1833 const uint32_t groups_buffer_entry_count,
1834 const uint32_t key_qw_count,
1835 const uint32_t agg_col_count,
1837 const int8_t warp_size) {
1840 assert(groups_buffer);
1845 int64_t* groups_buffer,
1846 const int64_t* init_vals,
1847 const uint32_t groups_buffer_entry_count,
1848 const uint32_t key_qw_count,
1849 const uint32_t agg_col_count,
1851 const bool blocks_share_memory,
1852 const int32_t frag_idx) {
1855 assert(groups_buffer);
1860 int64_t* groups_buffer,
1861 const int64_t* init_vals,
1862 const uint32_t groups_buffer_entry_count,
1863 const uint32_t key_qw_count,
1864 const uint32_t agg_col_count,
1866 const int8_t warp_size) {
1869 assert(groups_buffer);
1873 template <
typename T>
1877 const uint32_t key_count,
1878 const uint32_t row_size_quad) {
1879 auto off = h * row_size_quad;
1880 auto row_ptr =
reinterpret_cast<T*
>(groups_buffer + off);
1881 if (*row_ptr == get_empty_key<T>()) {
1882 memcpy(row_ptr, key, key_count *
sizeof(
T));
1883 auto row_ptr_i8 =
reinterpret_cast<int8_t*
>(row_ptr + key_count);
1886 if (memcmp(row_ptr, key, key_count *
sizeof(
T)) == 0) {
1887 auto row_ptr_i8 =
reinterpret_cast<int8_t*
>(row_ptr + key_count);
1894 int64_t* groups_buffer,
1897 const uint32_t key_count,
1898 const uint32_t key_width,
1899 const uint32_t row_size_quad) {
1900 switch (key_width) {
1904 reinterpret_cast<const int32_t*>(key),
1914 template <
typename T>
1916 const uint32_t entry_count,
1919 const uint32_t key_count) {
1921 auto key_buffer =
reinterpret_cast<T*
>(groups_buffer);
1922 if (key_buffer[off] == get_empty_key<T>()) {
1923 for (
size_t i = 0; i < key_count; ++i) {
1924 key_buffer[off] = key[i];
1930 for (
size_t i = 0; i < key_count; ++i) {
1931 if (key_buffer[off] != key[i]) {
1941 const uint32_t entry_count,
1944 const uint32_t key_count,
1945 const uint32_t key_width) {
1946 switch (key_width) {
1951 reinterpret_cast<const int32_t*>(key),
1955 groups_buffer, entry_count, h, key, key_count);
1963 int64_t* groups_buffer,
1966 const uint32_t key_qw_count,
1967 const size_t entry_count) {
1970 for (
size_t i = 0; i < key_qw_count; ++i) {
1971 groups_buffer[off] = key[i];
1974 return &groups_buffer[off];
1977 for (
size_t i = 0; i < key_qw_count; ++i) {
1978 if (groups_buffer[off] != key[i]) {
1983 return &groups_buffer[off];
1998 int64_t* groups_buffer,
1999 const uint32_t hashed_index,
2001 const uint32_t key_count,
2002 const uint32_t row_size_quad) {
2003 uint32_t off = hashed_index * row_size_quad;
2005 for (uint32_t i = 0; i < key_count; ++i) {
2006 groups_buffer[off + i] = key[i];
2009 return groups_buffer + off + key_count;
2020 const uint32_t hashed_index,
2021 const uint32_t row_size_quad) {
2022 return groups_buffer + row_size_quad * hashed_index;
2031 const uint32_t hashed_index,
2033 const uint32_t key_count,
2034 const uint32_t entry_count) {
2036 for (uint32_t i = 0; i < key_count; i++) {
2037 groups_buffer[i * entry_count + hashed_index] = key[i];
2047 int64_t* groups_buffer,
2049 const int64_t min_key,
2051 const uint32_t row_size_quad) {
2052 return groups_buffer + row_size_quad * (key - min_key);
2056 int64_t* groups_buffer,
2058 const int64_t min_key,
2060 const uint32_t row_size_quad,
2062 const uint8_t warp_size) {
2063 return groups_buffer + row_size_quad * (warp_size * (key - min_key) + thread_warp_idx);
2067 const int32_t len) {
2068 return {
reinterpret_cast<char const*
>(ptr), static_cast<uint64_t>(len)};
2072 #include "../Utils/StringLike.cpp"
2099 const int64_t translation_map_handle,
2100 const int32_t min_source_id) {
2101 const int32_t* translation_map =
2102 reinterpret_cast<const int32_t*
>(translation_map_handle);
2103 return translation_map[string_id - min_source_id];
2107 const double proportion,
2108 const int64_t row_offset) {
2109 const int64_t threshold = 4294967296 * proportion;
2110 return (row_offset * 2654435761) % 4294967296 < threshold;
2117 const double scale_factor,
2118 const int32_t partition_count) {
2119 if (target_value < lower_bound) {
2121 }
else if (target_value >= upper_bound) {
2122 return partition_count + 1;
2124 return ((target_value - lower_bound) * scale_factor) + 1;
2131 const double scale_factor,
2132 const int32_t partition_count) {
2133 if (target_value > lower_bound) {
2135 }
else if (target_value <= upper_bound) {
2136 return partition_count + 1;
2138 return ((lower_bound - target_value) * scale_factor) + 1;
2145 const double scale_factor,
2146 const int32_t partition_count,
2147 const double null_val) {
2148 if (target_value == null_val) {
2152 target_value, lower_bound, upper_bound, scale_factor, partition_count);
2159 const double scale_factor,
2160 const int32_t partition_count,
2161 const double null_val) {
2162 if (target_value == null_val) {
2166 target_value, lower_bound, upper_bound, scale_factor, partition_count);
2175 const double scale_factor) {
2176 return ((target_value - lower_bound) * scale_factor) + 1;
2182 const double scale_factor) {
2183 return ((lower_bound - target_value) * scale_factor) + 1;
2188 const bool reversed,
2191 const int32_t partition_count) {
2196 partition_count / (lower_bound - upper_bound),
2202 partition_count / (upper_bound - lower_bound),
2208 const bool reversed,
2211 const int32_t partition_count,
2212 const double null_val) {
2213 if (target_value == null_val) {
2217 target_value, reversed, lower_bound, upper_bound, partition_count);
2222 const bool reversed,
2225 const int32_t partition_count) {
2228 target_value, lower_bound, partition_count / (lower_bound - upper_bound));
2231 target_value, lower_bound, partition_count / (upper_bound - lower_bound));
2236 return reinterpret_cast<const int64_t*
>(output_buff)[pos];
2240 const int64_t output_buff,
2241 const int64_t pos) {
2242 return reinterpret_cast<const double*
>(output_buff)[pos];
2246 return *
reinterpret_cast<const double*
>(may_alias_ptr(agg));
2250 return *
reinterpret_cast<const float*
>(may_alias_ptr(agg));
2254 const int64_t* count,
2255 const double null_val) {
2256 return *count != 0 ?
static_cast<double>(*sum) / *count : null_val;
2260 const int64_t* count,
2261 const double null_val,
2262 const uint32_t scale) {
2263 return *count != 0 ? (
static_cast<double>(*sum) / pow(10, scale)) / *count : null_val;
2267 const int64_t* count,
2268 const double null_val) {
2269 return *count != 0 ? *
reinterpret_cast<const double*
>(may_alias_ptr(agg)) / *count
2274 const int32_t* count,
2275 const double null_val) {
2276 return *count != 0 ? *
reinterpret_cast<const float*
>(may_alias_ptr(agg)) / *count
2282 const uint32_t bitmap_bytes,
2283 const uint8_t* key_bytes,
2284 const uint32_t key_len) {
2285 const uint32_t bit_pos =
MurmurHash3(key_bytes, key_len, 0) % (bitmap_bytes * 8);
2286 const uint32_t word_idx = bit_pos / 32;
2287 const uint32_t bit_idx = bit_pos % 32;
2288 reinterpret_cast<uint32_t*
>(bitmap)[word_idx] |= 1 << bit_idx;
2292 const int8_t** col_buffers,
2293 const int8_t* literals,
2294 const int64_t* num_rows,
2295 const uint64_t* frag_row_offsets,
2296 const int32_t* max_matched,
2297 const int64_t* init_agg_value,
2300 const int64_t* join_hash_tables,
2301 int32_t* error_code,
2302 int32_t* total_matched,
2303 const int8_t* row_func_mgr) {
2305 assert(col_buffers || literals || num_rows || frag_row_offsets || max_matched ||
2306 init_agg_value || out || frag_idx || error_code || join_hash_tables ||
2307 total_matched || row_func_mgr);
2312 const int8_t*** col_buffers,
2313 const uint64_t* num_fragments,
2314 const int8_t* literals,
2315 const int64_t* num_rows,
2316 const uint64_t* frag_row_offsets,
2317 const int32_t* max_matched,
2318 int32_t* total_matched,
2319 const int64_t* init_agg_value,
2321 int32_t* error_code,
2322 const uint32_t* num_tables_ptr,
2323 const int64_t* join_hash_tables,
2324 const int8_t* row_func_mgr) {
2325 for (uint32_t i = 0; i < *num_fragments; ++i) {
2328 &num_rows[i * (*num_tables_ptr)],
2329 &frag_row_offsets[i * (*num_tables_ptr)],
2342 const int64_t* num_rows,
2343 const uint64_t* frag_row_offsets,
2344 const int32_t* max_matched,
2345 const int64_t* init_agg_value,
2348 const int64_t* join_hash_tables,
2349 int32_t* error_code,
2350 int32_t* total_matched,
2351 const int8_t* row_func_mgr) {
2353 assert(col_buffers || num_rows || frag_row_offsets || max_matched || init_agg_value ||
2354 out || frag_idx || error_code || join_hash_tables || total_matched ||
2360 const uint64_t* num_fragments,
2361 const int64_t* num_rows,
2362 const uint64_t* frag_row_offsets,
2363 const int32_t* max_matched,
2364 int32_t* total_matched,
2365 const int64_t* init_agg_value,
2367 int32_t* error_code,
2368 const uint32_t* num_tables_ptr,
2369 const int64_t* join_hash_tables,
2370 const int8_t* row_func_mgr) {
2371 for (uint32_t i = 0; i < *num_fragments; ++i) {
2372 query_stub(col_buffers ? col_buffers[i] :
nullptr,
2373 &num_rows[i * (*num_tables_ptr)],
2374 &frag_row_offsets[i * (*num_tables_ptr)],
2396 if (command == static_cast<unsigned>(
INT_CHECK)) {
2402 if (command == static_cast<unsigned>(
INT_ABORT)) {
2406 if (command == static_cast<unsigned>(
INT_RESET)) {
DEVICE auto upper_bound(ARGS &&...args)
__device__ void sync_warp_protected(int64_t thread_pos, int64_t row_count)
RUNTIME_EXPORT void agg_min_int8(int8_t *agg, const int8_t val)
RUNTIME_EXPORT ALWAYS_INLINE int64_t encode_date(int64_t decoded_val, int64_t null_val, int64_t multiplier)
RUNTIME_EXPORT ALWAYS_INLINE int32_t agg_sum_if_int32_skip_val(int32_t *agg, const int32_t val, const int32_t skip_val, const int8_t cond)
RUNTIME_EXPORT ALWAYS_INLINE void agg_count_distinct_bitmap_skip_val(int64_t *agg, const int64_t val, const int64_t min_val, const int64_t skip_val)
RUNTIME_EXPORT ALWAYS_INLINE int64_t compute_row_mode_start_index_sub(int64_t candidate_index, int64_t current_partition_start_offset, int64_t frame_bound)
#define DEF_UMINUS_NULLABLE(type, null_type)
GPU_RT_STUB int32_t checked_single_agg_id_int32_shared(int32_t *agg, const int32_t val, const int32_t null_val)
__device__ void agg_count_distinct_bitmap_skip_val_gpu(int64_t *agg, const int64_t val, const int64_t min_val, const int64_t skip_val, const int64_t base_dev_addr, const int64_t base_host_addr, const uint64_t sub_bitmap_count, const uint64_t bitmap_bytes)
#define DEF_CHECKED_SINGLE_AGG_ID_INT(n)
RUNTIME_EXPORT ALWAYS_INLINE int8_t * agg_id_varlen(int8_t *varlen_buffer, const int64_t offset, const int8_t *value, const int64_t size_bytes)
RUNTIME_EXPORT void agg_max_int32(int32_t *agg, const int32_t val)
RUNTIME_EXPORT ALWAYS_INLINE int64_t scale_decimal_down_not_nullable(const int64_t operand, const int64_t scale, const int64_t null_val)
__device__ int64_t * get_matching_group_value_columnar(int64_t *groups_buffer, const uint32_t h, const int64_t *key, const uint32_t key_qw_count, const size_t entry_count)
RUNTIME_EXPORT ALWAYS_INLINE int64_t * get_matching_group_value_perfect_hash(int64_t *groups_buffer, const uint32_t hashed_index, const int64_t *key, const uint32_t key_count, const uint32_t row_size_quad)
RUNTIME_EXPORT ALWAYS_INLINE uint64_t agg_count_if(uint64_t *agg, const int64_t cond)
__device__ void write_back_nop(int64_t *dest, int64_t *src, const int32_t sz)
RUNTIME_EXPORT void agg_min_int16(int16_t *agg, const int16_t val)
RUNTIME_EXPORT ALWAYS_INLINE StringView string_pack(const int8_t *ptr, const int32_t len)
__device__ void agg_sum_float_skip_val_shared(int32_t *agg, const float val, const float skip_val)
RUNTIME_EXPORT ALWAYS_INLINE int64_t floor_div_nullable_lhs(const int64_t dividend, const int64_t divisor, const int64_t null_val)
#define DEF_COMPUTE_CURRENT_ROW_IDX_IN_FRAME_ALL_TYPES(oper_name)
RUNTIME_EXPORT void agg_max_int16(int16_t *agg, const int16_t val)
#define DEF_CAST_NULLABLE_BIDIR(type1, type2)
RUNTIME_EXPORT ALWAYS_INLINE int64_t * get_group_value_fast_keyless(int64_t *groups_buffer, const int64_t key, const int64_t min_key, const int64_t, const uint32_t row_size_quad)
RUNTIME_EXPORT ALWAYS_INLINE uint32_t agg_count_int32_skip_val(uint32_t *agg, const int32_t val, const int32_t skip_val)
RUNTIME_EXPORT NEVER_INLINE void agg_approximate_count_distinct(int64_t *agg, const int64_t key, const uint32_t b)
FORCE_INLINE uint8_t get_rank(uint64_t x, uint32_t b)
__device__ int8_t thread_warp_idx(const int8_t warp_sz)
__global__ void init_group_by_buffer_gpu(int64_t *groups_buffer, const int64_t *init_vals, const uint32_t groups_buffer_entry_count, const uint32_t key_count, const uint32_t key_width, const uint32_t row_size_quad, const bool keyless, const int8_t warp_size)
#define DEF_CAST_NULLABLE(from_type, to_type)
RUNTIME_EXPORT ALWAYS_INLINE void agg_max_double(int64_t *agg, const double val)
RUNTIME_EXPORT ALWAYS_INLINE int64_t agg_sum_if(int64_t *agg, const int64_t val, const int8_t cond)
RUNTIME_EXPORT ALWAYS_INLINE void agg_max(int64_t *agg, const int64_t val)
RUNTIME_EXPORT ALWAYS_INLINE uint64_t agg_count_skip_val(uint64_t *agg, const int64_t val, const int64_t skip_val)
__device__ int64_t get_thread_index()
RUNTIME_EXPORT NEVER_INLINE DEVICE uint64_t MurmurHash64A(const void *key, int len, uint64_t seed)
__device__ void agg_sum_if_double_skip_val_shared(int64_t *agg, const double val, const double skip_val, const int8_t cond)
RUNTIME_EXPORT ALWAYS_INLINE void agg_min_float(int32_t *agg, const float val)
__device__ int32_t pos_step_impl()
__device__ void write_back_non_grouped_agg(int64_t *input_buffer, int64_t *output_buffer, const int32_t agg_idx)
Calculate approximate median and general quantiles, based on "Computing Extremely Accurate Quantiles ...
RUNTIME_EXPORT ALWAYS_INLINE double load_avg_int(const int64_t *sum, const int64_t *count, const double null_val)
Structures and runtime functions of streaming top-k heap.
__device__ int32_t checked_single_agg_id_double_shared(int64_t *agg, const double val, const double null_val)
RUNTIME_EXPORT NEVER_INLINE void query_stub_hoisted_literals(const int8_t **col_buffers, const int8_t *literals, const int64_t *num_rows, const uint64_t *frag_row_offsets, const int32_t *max_matched, const int64_t *init_agg_value, int64_t **out, uint32_t frag_idx, const int64_t *join_hash_tables, int32_t *error_code, int32_t *total_matched, const int8_t *row_func_mgr)
__device__ const int64_t * init_shared_mem_nop(const int64_t *groups_buffer, const int32_t groups_buffer_size)
RUNTIME_EXPORT ALWAYS_INLINE int32_t checked_single_agg_id(int64_t *agg, const int64_t val, const int64_t null_val)
__device__ void agg_sum_if_float_shared(int32_t *agg, const float val, const int8_t cond)
#define DEF_ARITH_NULLABLE_RHS(type, null_type, opname, opsym)
#define DEF_AGG_MAX_INT(n)
Definitions for core Datum union type.
RUNTIME_EXPORT ALWAYS_INLINE int64_t * get_integer_aggregation_tree(int64_t **aggregation_trees, size_t partition_idx)
__device__ int32_t checked_single_agg_id_float_shared(int32_t *agg, const float val, const float null_val)
int64_t compute_upper_bound_from_ordered_partition_index(const int64_t num_elems, const TARGET_VAL_TYPE target_val, const COL_TYPE *col_buf, const int32_t *partition_rowid_buf, const int64_t *ordered_index_buf, const NULL_TYPE null_val, const bool nulls_first, const int64_t null_start_offset, const int64_t null_end_offset)
RUNTIME_EXPORT ALWAYS_INLINE void agg_count_distinct_bitmap(int64_t *agg, const int64_t val, const int64_t min_val)
AGG_TYPE agg_func(AGG_TYPE const lhs, AGG_TYPE const rhs)
Macros and functions for groupby buffer compaction.
__device__ int64_t * get_matching_group_value(int64_t *groups_buffer, const uint32_t h, const T *key, const uint32_t key_count, const uint32_t row_size_quad)
RUNTIME_EXPORT ALWAYS_INLINE int8_t logical_and(const int8_t lhs, const int8_t rhs, const int8_t null_val)
#define DEF_CAST_SCALED_NULLABLE(from_type, to_type)
RUNTIME_EXPORT NEVER_INLINE void query_stub(const int8_t **col_buffers, const int64_t *num_rows, const uint64_t *frag_row_offsets, const int32_t *max_matched, const int64_t *init_agg_value, int64_t **out, uint32_t frag_idx, const int64_t *join_hash_tables, int32_t *error_code, int32_t *total_matched, const int8_t *row_func_mgr)
__device__ void agg_sum_if_double_shared(int64_t *agg, const double val, const int8_t cond)
__device__ int64_t agg_sum_shared(int64_t *agg, const int64_t val)
RUNTIME_EXPORT void agg_sum_if_float(int32_t *agg, const float val, const int8_t cond)
__device__ void agg_id_double_shared_slow(int64_t *agg, const double *val)
RUNTIME_EXPORT ALWAYS_INLINE int64_t floor_div_lhs(const int64_t dividend, const int64_t divisor)
__device__ int32_t agg_sum_if_int32_shared(int32_t *agg, const int32_t val, const int8_t cond)
RUNTIME_EXPORT ALWAYS_INLINE DEVICE int32_t key_for_string_encoded(const int32_t str_id)
RUNTIME_EXPORT ALWAYS_INLINE int64_t * get_matching_group_value_perfect_hash_keyless(int64_t *groups_buffer, const uint32_t hashed_index, const uint32_t row_size_quad)
__device__ int8_t * agg_id_varlen_shared(int8_t *varlen_buffer, const int64_t offset, const int8_t *value, const int64_t size_bytes)
RUNTIME_EXPORT ALWAYS_INLINE uint32_t agg_count_if_int32_skip_val(uint32_t *agg, const int32_t cond, const int32_t skip_val)
int64_t compute_current_row_idx_in_frame(const int64_t num_elems, const int64_t cur_row_idx, const T *col_buf, const int32_t *partition_rowid_buf, const int64_t *ordered_index_buf, const T null_val, const bool nulls_first, const int64_t null_start_pos, const int64_t null_end_pos, Comparator cmp)
__device__ int64_t * declare_dynamic_shared_memory()
RUNTIME_EXPORT ALWAYS_INLINE int64_t compute_row_mode_end_index_add(int64_t candidate_index, int64_t current_partition_start_offset, int64_t frame_bound, int64_t num_current_partition_elem)
ALWAYS_INLINE DEVICE int32_t map_string_dict_id(const int32_t string_id, const int64_t translation_map_handle, const int32_t min_source_id)
__device__ int32_t agg_sum_int32_shared(int32_t *agg, const int32_t val)
__device__ int64_t agg_sum_skip_val_shared(int64_t *agg, const int64_t val, const int64_t skip_val)
__device__ void agg_sum_float_shared(int32_t *agg, const float val)
__device__ int64_t agg_sum_if_shared(int64_t *agg, const int64_t val, const int8_t cond)
#define DEF_ROUND_NULLABLE(from_type, to_type)
RUNTIME_EXPORT ALWAYS_INLINE DEVICE int32_t width_bucket(const double target_value, const double lower_bound, const double upper_bound, const double scale_factor, const int32_t partition_count)
#define DEF_SKIP_AGG(base_agg_func)
__device__ int64_t get_block_index()
__device__ bool check_interrupt()
#define DEF_WRITE_PROJECTION_INT(n)
GPU_RT_STUB int32_t checked_single_agg_id_int8_shared(int8_t *agg, const int8_t val, const int8_t null_val)
RUNTIME_EXPORT ALWAYS_INLINE int64_t compute_row_mode_start_index_add(int64_t candidate_index, int64_t current_partition_start_offset, int64_t frame_bound, int64_t num_current_partition_elem)
__device__ int32_t agg_sum_int32_skip_val_shared(int32_t *agg, const int32_t val, const int32_t skip_val)
RUNTIME_EXPORT ALWAYS_INLINE int64_t decimal_floor(const int64_t x, const int64_t scale)
#define DEF_SEARCH_DERIVED_AGGREGATION_TREE(agg_value_type)
__device__ void linear_probabilistic_count(uint8_t *bitmap, const uint32_t bitmap_bytes, const uint8_t *key_bytes, const uint32_t key_len)
RUNTIME_EXPORT ALWAYS_INLINE uint64_t agg_count_double(uint64_t *agg, const double val)
__device__ void agg_count_distinct_bitmap_gpu(int64_t *agg, const int64_t val, const int64_t min_val, const int64_t base_dev_addr, const int64_t base_host_addr, const uint64_t sub_bitmap_count, const uint64_t bitmap_bytes)
#define DEF_SHARED_AGG_RET_STUBS(base_agg_func)
__device__ void agg_sum_double_shared(int64_t *agg, const double val)
RUNTIME_EXPORT ALWAYS_INLINE void agg_min_double(int64_t *agg, const double val)
RUNTIME_EXPORT ALWAYS_INLINE DEVICE int32_t width_bucket_no_oob_check(const double target_value, const double lower_bound, const double scale_factor)
RUNTIME_EXPORT ALWAYS_INLINE int64_t decimal_ceil(const int64_t x, const int64_t scale)
#define DEF_ARITH_NULLABLE_LHS(type, null_type, opname, opsym)
__device__ int64_t agg_sum_if_skip_val_shared(int64_t *agg, const int64_t val, const int64_t skip_val, const int8_t cond)
RUNTIME_EXPORT ALWAYS_INLINE void agg_if_sum_float(int32_t *agg, const float val, const int8_t cond)
#define DEF_MAP_STRING_TO_DATUM(value_type, value_name)
RUNTIME_EXPORT ALWAYS_INLINE int64_t get_valid_buf_start_pos(const int64_t null_start_pos, const int64_t null_end_pos)
#define DEF_AGG_MIN_INT(n)
RUNTIME_EXPORT ALWAYS_INLINE uint64_t agg_count_double_skip_val(uint64_t *agg, const double val, const double skip_val)
RUNTIME_EXPORT ALWAYS_INLINE void agg_min(int64_t *agg, const int64_t val)
__device__ int32_t pos_start_impl(const int32_t *row_index_resume)
RUNTIME_EXPORT ALWAYS_INLINE int32_t width_bucket_nullable(const double target_value, const double lower_bound, const double upper_bound, const double scale_factor, const int32_t partition_count, const double null_val)
RUNTIME_EXPORT ALWAYS_INLINE int8_t logical_not(const int8_t operand, const int8_t null_val)
RUNTIME_EXPORT ALWAYS_INLINE void agg_id_float(int32_t *agg, const float val)
RUNTIME_EXPORT ALWAYS_INLINE float load_float(const int32_t *agg)
__device__ int32_t runtime_interrupt_flag
RUNTIME_EXPORT ALWAYS_INLINE void agg_sum_double(int64_t *agg, const double val)
RUNTIME_EXPORT ALWAYS_INLINE int64_t row_number_window_func(const int64_t output_buff, const int64_t pos)
RUNTIME_EXPORT NEVER_INLINE void init_columnar_group_by_buffer_gpu(int64_t *groups_buffer, const int64_t *init_vals, const uint32_t groups_buffer_entry_count, const uint32_t key_qw_count, const uint32_t agg_col_count, const bool keyless, const bool blocks_share_memory, const int32_t frag_idx)
std::function< bool(const PermutationIdx, const PermutationIdx)> Comparator
__device__ void agg_approximate_count_distinct_gpu(int64_t *agg, const int64_t key, const uint32_t b, const int64_t base_dev_addr, const int64_t base_host_addr)
__device__ void sync_warp()
RUNTIME_EXPORT ALWAYS_INLINE int64_t scale_decimal_down_nullable(const int64_t operand, const int64_t scale, const int64_t null_val)
RUNTIME_EXPORT ALWAYS_INLINE int64_t agg_sum_if_skip_val(int64_t *agg, const int64_t val, const int64_t skip_val, const int8_t cond)
RUNTIME_EXPORT ALWAYS_INLINE void agg_id(int64_t *agg, const int64_t val)
RUNTIME_EXPORT ALWAYS_INLINE int64_t scale_decimal_up(const int64_t operand, const uint64_t scale, const int64_t operand_null_val, const int64_t result_null_val)
RUNTIME_EXPORT ALWAYS_INLINE DEVICE int32_t char_length(const char *str, const int32_t str_len)
RUNTIME_EXPORT ALWAYS_INLINE double * get_double_aggregation_tree(int64_t **aggregation_trees, size_t partition_idx)
RUNTIME_EXPORT ALWAYS_INLINE DEVICE int32_t width_bucket_reversed(const double target_value, const double lower_bound, const double upper_bound, const double scale_factor, const int32_t partition_count)
RUNTIME_EXPORT ALWAYS_INLINE int8_t bit_is_set(const int64_t bitset, const int64_t val, const int64_t min_val, const int64_t max_val, const int64_t null_val, const int8_t null_bool_val)
RUNTIME_EXPORT ALWAYS_INLINE int32_t agg_sum_int32(int32_t *agg, const int32_t val)
RUNTIME_EXPORT ALWAYS_INLINE int32_t checked_single_agg_id_float(int32_t *agg, const float val, const float null_val)
__device__ void agg_sum_double_skip_val_shared(int64_t *agg, const double val, const double skip_val)
RUNTIME_EXPORT ALWAYS_INLINE uint64_t agg_count_if_skip_val(uint64_t *agg, const int64_t cond, const int64_t skip_val)
RUNTIME_EXPORT void multifrag_query(const int8_t ***col_buffers, const uint64_t *num_fragments, const int64_t *num_rows, const uint64_t *frag_row_offsets, const int32_t *max_matched, int32_t *total_matched, const int64_t *init_agg_value, int64_t **out, int32_t *error_code, const uint32_t *num_tables_ptr, const int64_t *join_hash_tables, const int8_t *row_func_mgr)
LOGICAL_TYPE get_value_in_window_frame(const int64_t target_row_idx_in_frame, const int64_t frame_start_offset, const int64_t frame_end_offset, const COL_TYPE *col_buf, const int32_t *partition_rowid_buf, const int64_t *ordered_index_buf, const LOGICAL_TYPE logical_null_val, const LOGICAL_TYPE col_null_val)
__device__ void agg_max_int8_skip_val_shared(int8_t *agg, const int8_t val, const int8_t skip_val)
RUNTIME_EXPORT ALWAYS_INLINE int64_t get_valid_buf_end_pos(const int64_t num_elems, const int64_t null_start_pos, const int64_t null_end_pos)
RUNTIME_EXPORT ALWAYS_INLINE double load_double(const int64_t *agg)
RUNTIME_EXPORT ALWAYS_INLINE void agg_id_double(int64_t *agg, const double val)
RUNTIME_EXPORT ALWAYS_INLINE DEVICE int32_t width_bucket_expr_no_oob_check(const double target_value, const bool reversed, const double lower_bound, const double upper_bound, const int32_t partition_count)
DEVICE auto lower_bound(ARGS &&...args)
RUNTIME_EXPORT ALWAYS_INLINE uint32_t agg_count_if_int32(uint32_t *agg, const int32_t cond)
__device__ void agg_max_int16_skip_val_shared(int16_t *agg, const int16_t val, const int16_t skip_val)
#define DEF_SEARCH_AGGREGATION_TREE(agg_value_type)
RUNTIME_EXPORT NEVER_INLINE DEVICE uint32_t MurmurHash3(const void *key, int len, const uint32_t seed)
void compute_derived_aggregates(SumAndCountPair< AGG_VALUE_TYPE > *aggregation_tree_for_partition, SumAndCountPair< AGG_VALUE_TYPE > &res, size_t query_range_start_idx, size_t query_range_end_idx, size_t leaf_level, size_t tree_fanout, AGG_VALUE_TYPE invalid_val, AGG_VALUE_TYPE null_val)
#define DEF_RANGE_MODE_FRAME_UPPER_BOUND(target_val_type, col_type, null_type, opname, opsym)
RUNTIME_EXPORT void agg_max_int8(int8_t *agg, const int8_t val)
RUNTIME_EXPORT ALWAYS_INLINE int64_t * get_group_value_fast_keyless_semiprivate(int64_t *groups_buffer, const int64_t key, const int64_t min_key, const int64_t, const uint32_t row_size_quad, const uint8_t thread_warp_idx, const uint8_t warp_size)
RUNTIME_EXPORT ALWAYS_INLINE size_t getStartOffsetForSegmentTreeTraversal(size_t level, size_t tree_fanout)
RUNTIME_EXPORT ALWAYS_INLINE void agg_max_float(int32_t *agg, const float val)
__device__ const int64_t * init_shared_mem(const int64_t *global_groups_buffer, const int32_t groups_buffer_size)
RUNTIME_EXPORT ALWAYS_INLINE int32_t checked_single_agg_id_double(int64_t *agg, const double val, const double null_val)
GPU_RT_STUB int32_t checked_single_agg_id_int16_shared(int16_t *agg, const int16_t val, const int16_t null_val)
RUNTIME_EXPORT ALWAYS_INLINE double load_avg_float(const int32_t *agg, const int32_t *count, const double null_val)
#define DEF_BINARY_NULLABLE_ALL_OPS(type, null_type)
RUNTIME_EXPORT ALWAYS_INLINE SumAndCountPair< double > * get_double_derived_aggregation_tree(int64_t **aggregation_trees, size_t partition_idx)
RUNTIME_EXPORT NEVER_INLINE void init_group_by_buffer_impl(int64_t *groups_buffer, const int64_t *init_vals, const uint32_t groups_buffer_entry_count, const uint32_t key_qw_count, const uint32_t agg_col_count, const bool keyless, const int8_t warp_size)
RUNTIME_EXPORT ALWAYS_INLINE uint32_t agg_count_int32(uint32_t *agg, const int32_t)
RUNTIME_EXPORT ALWAYS_INLINE void set_matching_group_value_perfect_hash_columnar(int64_t *groups_buffer, const uint32_t hashed_index, const int64_t *key, const uint32_t key_count, const uint32_t entry_count)
#define DEF_GET_VALUE_IN_FRAME(col_type, logical_type)
RUNTIME_EXPORT ALWAYS_INLINE int64_t compute_row_mode_end_index_sub(int64_t candidate_index, int64_t current_partition_start_offset, int64_t frame_bound)
AGG_TYPE compute_window_func_via_aggregation_tree(AGG_TYPE *aggregation_tree_for_partition, size_t query_range_start_idx, size_t query_range_end_idx, size_t leaf_level, size_t tree_fanout, AGG_TYPE init_val, AGG_TYPE invalid_val, AGG_TYPE null_val)
RUNTIME_EXPORT ALWAYS_INLINE DEVICE int32_t width_bucket_expr_nullable(const double target_value, const bool reversed, const double lower_bound, const double upper_bound, const int32_t partition_count, const double null_val)
#define DEF_ARITH_NULLABLE(type, null_type, opname, opsym)
RUNTIME_EXPORT ALWAYS_INLINE int32_t agg_sum_if_int32(int32_t *agg, const int32_t val, const int8_t cond)
__device__ void agg_min_int16_skip_val_shared(int16_t *agg, const int16_t val, const int16_t skip_val)
RUNTIME_EXPORT ALWAYS_INLINE int32_t width_bucket_reversed_nullable(const double target_value, const double lower_bound, const double upper_bound, const double scale_factor, const int32_t partition_count, const double null_val)
int64_t compute_lower_bound_from_ordered_partition_index(const int64_t num_elems, const TARGET_VAL_TYPE target_val, const COL_TYPE *col_buf, const int32_t *partition_rowid_buf, const int64_t *ordered_index_buf, const NULL_TYPE null_val, const bool nulls_first, const int64_t null_start_offset, const int64_t null_end_offset)
RUNTIME_EXPORT ALWAYS_INLINE int64_t agg_sum_skip_val(int64_t *agg, const int64_t val, const int64_t skip_val)
__device__ void sync_threadblock()
RUNTIME_EXPORT ALWAYS_INLINE int64_t compute_int64_t_lower_bound(const int64_t entry_cnt, const int64_t target_value, const int64_t *col_buf)
__device__ void agg_min_int8_skip_val_shared(int8_t *agg, const int8_t val, const int8_t skip_val)
RUNTIME_EXPORT ALWAYS_INLINE int32_t agg_sum_int32_skip_val(int32_t *agg, const int32_t val, const int32_t skip_val)
RUNTIME_EXPORT ALWAYS_INLINE double load_avg_double(const int64_t *agg, const int64_t *count, const double null_val)
RUNTIME_EXPORT ALWAYS_INLINE uint64_t agg_count(uint64_t *agg, const int64_t)
RUNTIME_EXPORT void multifrag_query_hoisted_literals(const int8_t ***col_buffers, const uint64_t *num_fragments, const int8_t *literals, const int64_t *num_rows, const uint64_t *frag_row_offsets, const int32_t *max_matched, int32_t *total_matched, const int64_t *init_agg_value, int64_t **out, int32_t *error_code, const uint32_t *num_tables_ptr, const int64_t *join_hash_tables, const int8_t *row_func_mgr)
RUNTIME_EXPORT ALWAYS_INLINE void agg_sum_float(int32_t *agg, const float val)
RUNTIME_EXPORT ALWAYS_INLINE double load_avg_decimal(const int64_t *sum, const int64_t *count, const double null_val, const uint32_t scale)
RUNTIME_EXPORT bool check_interrupt_init(unsigned command)
RUNTIME_EXPORT ALWAYS_INLINE int32_t get_error_code(int32_t *error_codes)
RUNTIME_EXPORT ALWAYS_INLINE int64_t agg_sum(int64_t *agg, const int64_t val)
#define DEF_SHARED_AGG_STUBS(base_agg_func)
__device__ int32_t get_matching_group_value_columnar_slot(int64_t *groups_buffer, const uint32_t entry_count, const uint32_t h, const T *key, const uint32_t key_count)
__device__ int32_t checked_single_agg_id_shared(int64_t *agg, const int64_t val, const int64_t null_val)
#define DEF_HANDLE_NULL_FOR_WINDOW_FRAMING_AGG(agg_type, null_type)
#define DEF_AGG_ID_INT(n)
RUNTIME_EXPORT ALWAYS_INLINE void record_error_code(const int32_t err_code, int32_t *error_codes)
__device__ void agg_sum_if_float_skip_val_shared(int32_t *agg, const float val, const float skip_val, const int8_t cond)
RUNTIME_EXPORT ALWAYS_INLINE DEVICE bool sample_ratio(const double proportion, const int64_t row_offset)
FORCE_INLINE HOST DEVICE T align_to_int64(T addr)
RUNTIME_EXPORT ALWAYS_INLINE double percent_window_func(const int64_t output_buff, const int64_t pos)
RUNTIME_EXPORT ALWAYS_INLINE int8_t logical_or(const int8_t lhs, const int8_t rhs, const int8_t null_val)
__device__ void force_sync()
#define DEF_SKIP_IF_AGG(skip_agg_func, base_agg_func)
#define DEF_RANGE_MODE_FRAME_LOWER_BOUND(target_val_type, col_type, null_type, opname, opsym)
RUNTIME_EXPORT void agg_min_int32(int32_t *agg, const int32_t val)
RUNTIME_EXPORT ALWAYS_INLINE void agg_sum_if_double(int64_t *agg, const double val, const int8_t cond)
RUNTIME_EXPORT ALWAYS_INLINE uint32_t agg_count_float_skip_val(uint32_t *agg, const float val, const float skip_val)
RUNTIME_EXPORT ALWAYS_INLINE DEVICE int32_t char_length_nullable(const char *str, const int32_t str_len, const int32_t int_null)
__device__ int32_t agg_sum_if_int32_skip_val_shared(int32_t *agg, const int32_t val, const int32_t skip_val, const int8_t cond)
RUNTIME_EXPORT ALWAYS_INLINE SumAndCountPair< int64_t > * get_integer_derived_aggregation_tree(int64_t **aggregation_trees, size_t partition_idx)
RUNTIME_EXPORT ALWAYS_INLINE uint32_t agg_count_float(uint32_t *agg, const float val)
RUNTIME_EXPORT ALWAYS_INLINE DEVICE int32_t width_bucket_reversed_no_oob_check(const double target_value, const double lower_bound, const double scale_factor)
RUNTIME_EXPORT ALWAYS_INLINE DEVICE int32_t width_bucket_expr(const double target_value, const bool reversed, const double lower_bound, const double upper_bound, const int32_t partition_count)
__device__ int32_t group_buff_idx_impl()