_runtime_functions_8cpp_source.html

 /*

  * Copyright 2022 HEAVY.AI, Inc.

  *

  * Licensed under the Apache License, Version 2.0 (the "License");

  * you may not use this file except in compliance with the License.

  * You may obtain a copy of the License at

  *

  *     http://www.apache.org/licenses/LICENSE-2.0

  *

  * Unless required by applicable law or agreed to in writing, software

  * distributed under the License is distributed on an "AS IS" BASIS,

  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

  * See the License for the specific language governing permissions and

  * limitations under the License.

  */


 #ifdef __CUDACC__

 #error This code is not intended to be compiled with a CUDA C++ compiler

 #endif  // __CUDACC__


 #include "RuntimeFunctions.h"

 #include "BufferCompaction.h"

 #include "DecisionTreeEntry.h"

 #include "HyperLogLogRank.h"

 #include "MurmurHash.h"

 #include "Shared/Datum.h"

 #include "Shared/quantile.h"

 #include "TypePunning.h"

 #include "Utils/SegmentTreeUtils.h"


 #include <atomic>

 #include <cfloat>

 #include <chrono>

 #include <cmath>

 #include <cstring>

 #include <functional>

 #include <thread>

 #include <tuple>


 // decoder implementations


 #include "DecodersImpl.h"


 // arithmetic operator implementations


 #define DEF_ARITH_NULLABLE(type, null_type, opname, opsym)                 \

   extern "C" RUNTIME_EXPORT ALWAYS_INLINE type opname##_##type##_nullable( \

       const type lhs, const type rhs, const null_type null_val) {          \

     if (lhs != null_val && rhs != null_val) {                              \

       return lhs opsym rhs;                                                \

     }                                                                      \

     return null_val;                                                       \

   }


 #define DEF_ARITH_NULLABLE_LHS(type, null_type, opname, opsym)                 \

   extern "C" RUNTIME_EXPORT ALWAYS_INLINE type opname##_##type##_nullable_lhs( \

       const type lhs, const type rhs, const null_type null_val) {              \

     if (lhs != null_val) {                                                     \

       return lhs opsym rhs;                                                    \

     }                                                                          \

     return null_val;                                                           \

   }


 #define DEF_ARITH_NULLABLE_RHS(type, null_type, opname, opsym)                 \

   extern "C" RUNTIME_EXPORT ALWAYS_INLINE type opname##_##type##_nullable_rhs( \

       const type lhs, const type rhs, const null_type null_val) {              \

     if (rhs != null_val) {                                                     \

       return lhs opsym rhs;                                                    \

     }                                                                          \

     return null_val;                                                           \

   }


 #define DEF_CMP_NULLABLE(type, null_type, opname, opsym)                     \

   extern "C" RUNTIME_EXPORT ALWAYS_INLINE int8_t opname##_##type##_nullable( \

       const type lhs,                                                        \

       const type rhs,                                                        \

       const null_type null_val,                                              \

       const int8_t null_bool_val) {                                          \

     if (lhs != null_val && rhs != null_val) {                                \

       return lhs opsym rhs;                                                  \

     }                                                                        \

     return null_bool_val;                                                    \

   }


 #define DEF_CMP_NULLABLE_LHS(type, null_type, opname, opsym)                     \

   extern "C" RUNTIME_EXPORT ALWAYS_INLINE int8_t opname##_##type##_nullable_lhs( \

       const type lhs,                                                            \

       const type rhs,                                                            \

       const null_type null_val,                                                  \

       const int8_t null_bool_val) {                                              \

     if (lhs != null_val) {                                                       \

       return lhs opsym rhs;                                                      \

     }                                                                            \

     return null_bool_val;                                                        \

   }


 #define DEF_CMP_NULLABLE_RHS(type, null_type, opname, opsym)                     \

   extern "C" RUNTIME_EXPORT ALWAYS_INLINE int8_t opname##_##type##_nullable_rhs( \

       const type lhs,                                                            \

       const type rhs,                                                            \

       const null_type null_val,                                                  \

       const int8_t null_bool_val) {                                              \

     if (rhs != null_val) {                                                       \

       return lhs opsym rhs;                                                      \

     }                                                                            \

     return null_bool_val;                                                        \

   }


 #define DEF_SAFE_DIV_NULLABLE(type, null_type, opname)            \

   extern "C" RUNTIME_EXPORT ALWAYS_INLINE type safe_div_##type(   \

       const type lhs, const type rhs, const null_type null_val) { \

     if (lhs != null_val && rhs != null_val && rhs != 0) {         \

       return lhs / rhs;                                           \

     }                                                             \

     return null_val;                                              \

   }


 #define DEF_BINARY_NULLABLE_ALL_OPS(type, null_type) \

   DEF_ARITH_NULLABLE(type, null_type, add, +)        \

   DEF_ARITH_NULLABLE(type, null_type, sub, -)        \

   DEF_ARITH_NULLABLE(type, null_type, mul, *)        \

   DEF_ARITH_NULLABLE(type, null_type, div, /)        \

   DEF_SAFE_DIV_NULLABLE(type, null_type, safe_div)   \

   DEF_ARITH_NULLABLE_LHS(type, null_type, add, +)    \

   DEF_ARITH_NULLABLE_LHS(type, null_type, sub, -)    \

   DEF_ARITH_NULLABLE_LHS(type, null_type, mul, *)    \

   DEF_ARITH_NULLABLE_LHS(type, null_type, div, /)    \

   DEF_ARITH_NULLABLE_RHS(type, null_type, add, +)    \

   DEF_ARITH_NULLABLE_RHS(type, null_type, sub, -)    \

   DEF_ARITH_NULLABLE_RHS(type, null_type, mul, *)    \

   DEF_ARITH_NULLABLE_RHS(type, null_type, div, /)    \

   DEF_CMP_NULLABLE(type, null_type, eq, ==)          \

   DEF_CMP_NULLABLE(type, null_type, ne, !=)          \

   DEF_CMP_NULLABLE(type, null_type, lt, <)           \

   DEF_CMP_NULLABLE(type, null_type, gt, >)           \

   DEF_CMP_NULLABLE(type, null_type, le, <=)          \

   DEF_CMP_NULLABLE(type, null_type, ge, >=)          \

   DEF_CMP_NULLABLE_LHS(type, null_type, eq, ==)      \

   DEF_CMP_NULLABLE_LHS(type, null_type, ne, !=)      \

   DEF_CMP_NULLABLE_LHS(type, null_type, lt, <)       \

   DEF_CMP_NULLABLE_LHS(type, null_type, gt, >)       \

   DEF_CMP_NULLABLE_LHS(type, null_type, le, <=)      \

   DEF_CMP_NULLABLE_LHS(type, null_type, ge, >=)      \

   DEF_CMP_NULLABLE_RHS(type, null_type, eq, ==)      \

   DEF_CMP_NULLABLE_RHS(type, null_type, ne, !=)      \

   DEF_CMP_NULLABLE_RHS(type, null_type, lt, <)       \

   DEF_CMP_NULLABLE_RHS(type, null_type, gt, >)       \

   DEF_CMP_NULLABLE_RHS(type, null_type, le, <=)      \

   DEF_CMP_NULLABLE_RHS(type, null_type, ge, >=)


 DEF_BINARY_NULLABLE_ALL_OPS(int8_t, int64_t)

 DEF_BINARY_NULLABLE_ALL_OPS(int16_t, int64_t)

 DEF_BINARY_NULLABLE_ALL_OPS(int32_t, int64_t)

 DEF_BINARY_NULLABLE_ALL_OPS(int64_t, int64_t)

 DEF_BINARY_NULLABLE_ALL_OPS(float, float)

 DEF_BINARY_NULLABLE_ALL_OPS(double, double)

 DEF_ARITH_NULLABLE(int8_t, int64_t, mod, %)

 DEF_ARITH_NULLABLE(int16_t, int64_t, mod, %)

 DEF_ARITH_NULLABLE(int32_t, int64_t, mod, %)

 DEF_ARITH_NULLABLE(int64_t, int64_t, mod, %)

 DEF_ARITH_NULLABLE_LHS(int8_t, int64_t, mod, %)

 DEF_ARITH_NULLABLE_LHS(int16_t, int64_t, mod, %)

 DEF_ARITH_NULLABLE_LHS(int32_t, int64_t, mod, %)

 DEF_ARITH_NULLABLE_LHS(int64_t, int64_t, mod, %)

 DEF_ARITH_NULLABLE_RHS(int8_t, int64_t, mod, %)

 DEF_ARITH_NULLABLE_RHS(int16_t, int64_t, mod, %)

 DEF_ARITH_NULLABLE_RHS(int32_t, int64_t, mod, %)

 DEF_ARITH_NULLABLE_RHS(int64_t, int64_t, mod, %)


 #undef DEF_BINARY_NULLABLE_ALL_OPS

 #undef DEF_SAFE_DIV_NULLABLE

 #undef DEF_CMP_NULLABLE_RHS

 #undef DEF_CMP_NULLABLE_LHS

 #undef DEF_CMP_NULLABLE

 #undef DEF_ARITH_NULLABLE_RHS

 #undef DEF_ARITH_NULLABLE_LHS

 #undef DEF_ARITH_NULLABLE


 #define DEF_MAP_STRING_TO_DATUM(value_type, value_name)                        \

   extern "C" ALWAYS_INLINE DEVICE value_type map_string_to_datum_##value_name( \

       const int32_t string_id,                                                 \

       const int64_t translation_map_handle,                                    \

       const int32_t min_source_id) {                                           \

     const Datum* translation_map =                                             \

         reinterpret_cast<const Datum*>(translation_map_handle);                \

     const Datum& out_datum = translation_map[string_id - min_source_id];       \

     return out_datum.value_name##val;                                          \

   }


 DEF_MAP_STRING_TO_DATUM(int8_t, bool)

 DEF_MAP_STRING_TO_DATUM(int8_t, tinyint)

 DEF_MAP_STRING_TO_DATUM(int16_t, smallint)

 DEF_MAP_STRING_TO_DATUM(int32_t, int)

 DEF_MAP_STRING_TO_DATUM(int64_t, bigint)

 DEF_MAP_STRING_TO_DATUM(float, float)

 DEF_MAP_STRING_TO_DATUM(double, double)


 #undef DEF_MAP_STRING_TO_DATUM


 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int64_t

 scale_decimal_up(const int64_t operand,

                  const uint64_t scale,

                  const int64_t operand_null_val,

                  const int64_t result_null_val) {

   return operand != operand_null_val ? operand * scale : result_null_val;

 }


 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int64_t

 scale_decimal_down_nullable(const int64_t operand,

                             const int64_t scale,

                             const int64_t null_val) {

   // rounded scale down of a decimal

   if (operand == null_val) {

     return null_val;

   }


   int64_t tmp = scale >> 1;

   tmp = operand >= 0 ? operand + tmp : operand - tmp;

   return tmp / scale;

 }


 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int64_t

 scale_decimal_down_not_nullable(const int64_t operand,

                                 const int64_t scale,

                                 const int64_t null_val) {

   int64_t tmp = scale >> 1;

   tmp = operand >= 0 ? operand + tmp : operand - tmp;

   return tmp / scale;

 }


 // Return floor(dividend / divisor).

 // Assumes 0 < divisor.

 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int64_t floor_div_lhs(const int64_t dividend,

                                                               const int64_t divisor) {

   return (dividend < 0 ? dividend - (divisor - 1) : dividend) / divisor;

 }


 // Return floor(dividend / divisor) or NULL if dividend IS NULL.

 // Assumes 0 < divisor.

 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int64_t

 floor_div_nullable_lhs(const int64_t dividend,

                        const int64_t divisor,

                        const int64_t null_val) {

   return dividend == null_val ? null_val : floor_div_lhs(dividend, divisor);

 }


 #define DEF_UMINUS_NULLABLE(type, null_type)                             \

   extern "C" RUNTIME_EXPORT ALWAYS_INLINE type uminus_##type##_nullable( \

       const type operand, const null_type null_val) {                    \

     return operand == null_val ? null_val : -operand;                    \

   }


 DEF_UMINUS_NULLABLE(int8_t, int8_t)

 DEF_UMINUS_NULLABLE(int16_t, int16_t)

 DEF_UMINUS_NULLABLE(int32_t, int32_t)

 DEF_UMINUS_NULLABLE(int64_t, int64_t)

 DEF_UMINUS_NULLABLE(float, float)

 DEF_UMINUS_NULLABLE(double, double)


 #undef DEF_UMINUS_NULLABLE


 #define DEF_CAST_NULLABLE(from_type, to_type)                                   \

   extern "C" RUNTIME_EXPORT ALWAYS_INLINE to_type                               \

       cast_##from_type##_to_##to_type##_nullable(const from_type operand,       \

                                                  const from_type from_null_val, \

                                                  const to_type to_null_val) {   \

     return operand == from_null_val ? to_null_val : operand;                    \

   }


 #define DEF_CAST_SCALED_NULLABLE(from_type, to_type)                                   \

   extern "C" RUNTIME_EXPORT ALWAYS_INLINE to_type                                      \

       cast_##from_type##_to_##to_type##_scaled_nullable(const from_type operand,       \

                                                         const from_type from_null_val, \

                                                         const to_type to_null_val,     \

                                                         const to_type divider) {       \

     return operand == from_null_val ? to_null_val : operand / divider;                 \

   }


 #define DEF_CAST_NULLABLE_BIDIR(type1, type2) \

   DEF_CAST_NULLABLE(type1, type2)             \

   DEF_CAST_NULLABLE(type2, type1)


 #define DEF_ROUND_NULLABLE(from_type, to_type)                                  \

   extern "C" RUNTIME_EXPORT ALWAYS_INLINE to_type                               \

       cast_##from_type##_to_##to_type##_nullable(const from_type operand,       \

                                                  const from_type from_null_val, \

                                                  const to_type to_null_val) {   \

     return operand == from_null_val                                             \

                ? to_null_val                                                    \

                : static_cast<to_type>(operand + (operand < from_type(0)         \

                                                      ? from_type(-0.5)          \

                                                      : from_type(0.5)));        \

   }


 DEF_CAST_NULLABLE_BIDIR(int8_t, int16_t)

 DEF_CAST_NULLABLE_BIDIR(int8_t, int32_t)

 DEF_CAST_NULLABLE_BIDIR(int8_t, int64_t)

 DEF_CAST_NULLABLE_BIDIR(int16_t, int32_t)

 DEF_CAST_NULLABLE_BIDIR(int16_t, int64_t)

 DEF_CAST_NULLABLE_BIDIR(int32_t, int64_t)

 DEF_CAST_NULLABLE_BIDIR(float, double)


 DEF_CAST_NULLABLE(int8_t, float)

 DEF_CAST_NULLABLE(int16_t, float)

 DEF_CAST_NULLABLE(int32_t, float)

 DEF_CAST_NULLABLE(int64_t, float)

 DEF_CAST_NULLABLE(int8_t, double)

 DEF_CAST_NULLABLE(int16_t, double)

 DEF_CAST_NULLABLE(int32_t, double)

 DEF_CAST_NULLABLE(int64_t, double)


 DEF_ROUND_NULLABLE(float, int8_t)

 DEF_ROUND_NULLABLE(float, int16_t)

 DEF_ROUND_NULLABLE(float, int32_t)

 DEF_ROUND_NULLABLE(float, int64_t)

 DEF_ROUND_NULLABLE(double, int8_t)

 DEF_ROUND_NULLABLE(double, int16_t)

 DEF_ROUND_NULLABLE(double, int32_t)

 DEF_ROUND_NULLABLE(double, int64_t)


 DEF_CAST_NULLABLE(uint8_t, int32_t)

 DEF_CAST_NULLABLE(uint16_t, int32_t)

 DEF_CAST_SCALED_NULLABLE(int64_t, float)

 DEF_CAST_SCALED_NULLABLE(int64_t, double)


 #undef DEF_ROUND_NULLABLE

 #undef DEF_CAST_NULLABLE_BIDIR

 #undef DEF_CAST_SCALED_NULLABLE

 #undef DEF_CAST_NULLABLE


 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int8_t logical_not(const int8_t operand,

                                                            const int8_t null_val) {

   return operand == null_val ? operand : (operand ? 0 : 1);

 }


 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int8_t logical_and(const int8_t lhs,

                                                            const int8_t rhs,

                                                            const int8_t null_val) {

   if (lhs == null_val) {

     return rhs == 0 ? rhs : null_val;

   }

   if (rhs == null_val) {

     return lhs == 0 ? lhs : null_val;

   }

   return (lhs && rhs) ? 1 : 0;

 }


 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int8_t logical_or(const int8_t lhs,

                                                           const int8_t rhs,

                                                           const int8_t null_val) {

   if (lhs == null_val) {

     return rhs == 0 ? null_val : rhs;

   }

   if (rhs == null_val) {

     return lhs == 0 ? null_val : lhs;

   }

   return (lhs || rhs) ? 1 : 0;

 }


 // aggregator implementations


 extern "C" RUNTIME_EXPORT ALWAYS_INLINE uint64_t agg_count(uint64_t* agg, const int64_t) {

   return (*agg)++;

 }


 extern "C" RUNTIME_EXPORT ALWAYS_INLINE void agg_count_distinct_bitmap(

     int64_t* agg,

     const int64_t val,

     const int64_t min_val,

     const int64_t bucket_size) {

   uint64_t bitmap_idx = val - min_val;

   if (1 < bucket_size) {

     bitmap_idx /= static_cast<uint64_t>(bucket_size);

   }

   reinterpret_cast<int8_t*>(*agg)[bitmap_idx >> 3] |= (1 << (bitmap_idx & 7));

 }


 #ifdef _MSC_VER

 #define GPU_RT_STUB NEVER_INLINE

 #else

 #define GPU_RT_STUB NEVER_INLINE __attribute__((optnone))

 #endif


 extern "C" GPU_RT_STUB void agg_count_distinct_bitmap_gpu(int64_t*,

                                                           const int64_t,

                                                           const int64_t,

                                                           const int64_t,

                                                           const int64_t,

                                                           const int64_t,

                                                           const uint64_t,

                                                           const uint64_t) {}


 extern "C" RUNTIME_EXPORT NEVER_INLINE void

 agg_approximate_count_distinct(int64_t* agg, const int64_t key, const uint32_t b) {

   const uint64_t hash = MurmurHash64A(&key, sizeof(key), 0);

   const uint32_t index = hash >> (64 - b);

   const uint8_t rank = get_rank(hash << b, 64 - b);

   uint8_t* M = reinterpret_cast<uint8_t*>(*agg);

   M[index] = std::max(M[index], rank);

 }


 extern "C" GPU_RT_STUB void agg_approximate_count_distinct_gpu(int64_t*,

                                                                const int64_t,

                                                                const uint32_t,

                                                                const int64_t,

                                                                const int64_t) {}


 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int8_t bit_is_set(const int8_t* bitset,

                                                           const int64_t val,

                                                           const int64_t min_val,

                                                           const int64_t max_val,

                                                           const int64_t null_val,

                                                           const int8_t null_bool_val) {

   if (val == null_val) {

     return null_bool_val;

   }

   if (val < min_val || val > max_val) {

     return 0;

   }

   if (!bitset) {

     return 0;

   }

   const uint64_t bitmap_idx = val - min_val;

   return bitset[bitmap_idx >> 3] & (1 << (bitmap_idx & 7)) ? 1 : 0;

 }


 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int64_t

 compute_int64_t_lower_bound(const int64_t entry_cnt,

                             const int64_t target_value,

                             const int64_t* col_buf) {

   int64_t l = 0;

   int64_t h = entry_cnt - 1;

   while (l < h) {

     int64_t mid = l + (h - l) / 2;

     if (target_value < col_buf[mid]) {

       h = mid;

     } else {

       l = mid + 1;

     }

   }

   return l;

 }


 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int64_t

 get_valid_buf_start_pos(const int64_t null_start_pos, const int64_t null_end_pos) {

   return null_start_pos == 0 ? null_end_pos + 1 : 0;

 }


 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int64_t

 get_valid_buf_end_pos(const int64_t num_elems,

                       const int64_t null_start_pos,

                       const int64_t null_end_pos) {

   return null_end_pos == num_elems ? null_start_pos : num_elems;

 }


 template <typename T, typename Comparator>

 inline int64_t compute_current_row_idx_in_frame(const int64_t num_elems,

                                                 const int64_t cur_row_idx,

                                                 const T* col_buf,

                                                 const int32_t* partition_rowid_buf,

                                                 const int64_t* ordered_index_buf,

                                                 const T null_val,

                                                 const bool nulls_first,

                                                 const int64_t null_start_pos,

                                                 const int64_t null_end_pos,

                                                 Comparator cmp) {

   const auto target_value = col_buf[cur_row_idx];

   if (target_value == null_val) {

     for (int64_t target_offset = null_start_pos; target_offset < null_end_pos;

          target_offset++) {

       const auto candidate_offset = partition_rowid_buf[ordered_index_buf[target_offset]];

       if (candidate_offset == cur_row_idx) {

         return target_offset;

       }

     }

   }

   auto const modified_null_end_pos = nulls_first ? null_end_pos - 1 : null_end_pos;

   int64_t l = get_valid_buf_start_pos(null_start_pos, modified_null_end_pos);

   int64_t h = get_valid_buf_end_pos(num_elems, null_start_pos, modified_null_end_pos);

   while (l < h) {

     int64_t mid = l + (h - l) / 2;

     auto const target_row_idx = partition_rowid_buf[ordered_index_buf[mid]];

     auto const cur_value = col_buf[target_row_idx];

     if (cmp(target_value, cur_value)) {

       h = mid;

     } else {

       l = mid + 1;

     }

   }

   int64_t target_offset = l;

   int64_t candidate_row_idx = partition_rowid_buf[ordered_index_buf[target_offset]];

   while (col_buf[candidate_row_idx] == target_value && target_offset < num_elems) {

     if (candidate_row_idx == cur_row_idx) {

       return target_offset;

     }

     candidate_row_idx = partition_rowid_buf[ordered_index_buf[++target_offset]];

   }

   return -1;

 }


 #define DEF_COMPUTE_CURRENT_ROW_IDX_IN_FRAME(value_type, oper_name)                    \

   extern "C" RUNTIME_EXPORT ALWAYS_INLINE int64_t                                      \

       compute_##value_type##_##oper_name##_current_row_idx_in_frame(                   \

           const int64_t num_elems,                                                     \

           const int64_t cur_row_idx,                                                   \

           const value_type* col_buf,                                                   \

           const int32_t* partition_rowid_buf,                                          \

           const int64_t* ordered_index_buf,                                            \

           const value_type null_val,                                                   \

           const bool nulls_first,                                                      \

           const int64_t null_start_pos,                                                \

           const int64_t null_end_pos) {                                                \

     return compute_current_row_idx_in_frame<value_type>(num_elems,                     \

                                                         cur_row_idx,                   \

                                                         col_buf,                       \

                                                         partition_rowid_buf,           \

                                                         ordered_index_buf,             \

                                                         null_val,                      \

                                                         nulls_first,                   \

                                                         null_start_pos,                \

                                                         null_end_pos,                  \

                                                         std::oper_name<value_type>{}); \

   }

 #define DEF_COMPUTE_CURRENT_ROW_IDX_IN_FRAME_ALL_TYPES(oper_name) \

   DEF_COMPUTE_CURRENT_ROW_IDX_IN_FRAME(int8_t, oper_name)         \

   DEF_COMPUTE_CURRENT_ROW_IDX_IN_FRAME(int16_t, oper_name)        \

   DEF_COMPUTE_CURRENT_ROW_IDX_IN_FRAME(int32_t, oper_name)        \

   DEF_COMPUTE_CURRENT_ROW_IDX_IN_FRAME(int64_t, oper_name)        \

   DEF_COMPUTE_CURRENT_ROW_IDX_IN_FRAME(float, oper_name)          \

   DEF_COMPUTE_CURRENT_ROW_IDX_IN_FRAME(double, oper_name)


 DEF_COMPUTE_CURRENT_ROW_IDX_IN_FRAME_ALL_TYPES(greater_equal)

 DEF_COMPUTE_CURRENT_ROW_IDX_IN_FRAME_ALL_TYPES(less_equal)


 #undef DEF_COMPUTE_CURRENT_ROW_IDX_IN_FRAME_ALL_TYPES

 #undef DEF_COMPUTE_CURRENT_ROW_IDX_IN_FRAME


 template <typename TARGET_VAL_TYPE, typename COL_TYPE, typename NULL_TYPE>

 inline int64_t compute_lower_bound_from_ordered_partition_index(

     const int64_t num_elems,

     const TARGET_VAL_TYPE target_val,

     const COL_TYPE* col_buf,

     const int32_t* partition_rowid_buf,

     const int64_t* ordered_index_buf,

     const NULL_TYPE null_val,

     const bool nulls_first,

     const int64_t null_start_offset,

     const int64_t null_end_offset) {

   if (target_val == null_val) {

     return null_start_offset;

   }

   auto const modified_null_end_pos = nulls_first ? null_end_offset - 1 : null_end_offset;

   int64_t l = get_valid_buf_start_pos(null_start_offset, modified_null_end_pos);

   int64_t h = get_valid_buf_end_pos(num_elems, null_start_offset, modified_null_end_pos);

   while (l < h) {

     int64_t mid = l + (h - l) / 2;

     if (target_val <= col_buf[partition_rowid_buf[ordered_index_buf[mid]]]) {

       h = mid;

     } else {

       l = mid + 1;

     }

   }

   return l;

 }


 #define DEF_RANGE_MODE_FRAME_LOWER_BOUND(                                                     \

     target_val_type, col_type, null_type, opname, opsym)                                      \

   extern "C" RUNTIME_EXPORT ALWAYS_INLINE int64_t                                             \

       range_mode_##target_val_type##_##col_type##_##null_type##_##opname##_frame_lower_bound( \

           const int64_t num_elems,                                                            \

           const target_val_type target_value,                                                 \

           const col_type* col_buf,                                                            \

           const int32_t* partition_rowid_buf,                                                 \

           const int64_t* ordered_index_buf,                                                   \

           const int64_t frame_bound_val,                                                      \

           const null_type null_val,                                                           \

           const bool nulls_first,                                                             \

           const int64_t null_start_pos,                                                       \

           const int64_t null_end_pos) {                                                       \

     if (target_value == null_val) {                                                           \

       return null_start_pos;                                                                  \

     }                                                                                         \

     target_val_type new_val = target_value opsym frame_bound_val;                             \

     return compute_lower_bound_from_ordered_partition_index<target_val_type,                  \

                                                             col_type,                         \

                                                             null_type>(                       \

         num_elems,                                                                            \

         new_val,                                                                              \

         col_buf,                                                                              \

         partition_rowid_buf,                                                                  \

         ordered_index_buf,                                                                    \

         null_val,                                                                             \

         nulls_first,                                                                          \

         null_start_pos,                                                                       \

         null_end_pos);                                                                        \

   }

 DEF_RANGE_MODE_FRAME_LOWER_BOUND(int8_t, int8_t, int8_t, add, +)

 DEF_RANGE_MODE_FRAME_LOWER_BOUND(int8_t, int8_t, int8_t, sub, -)

 DEF_RANGE_MODE_FRAME_LOWER_BOUND(int16_t, int16_t, int16_t, add, +)

 DEF_RANGE_MODE_FRAME_LOWER_BOUND(int16_t, int16_t, int16_t, sub, -)

 DEF_RANGE_MODE_FRAME_LOWER_BOUND(int16_t, int16_t, int64_t, add, +)

 DEF_RANGE_MODE_FRAME_LOWER_BOUND(int16_t, int16_t, int64_t, sub, -)

 DEF_RANGE_MODE_FRAME_LOWER_BOUND(int32_t, int32_t, int32_t, add, +)

 DEF_RANGE_MODE_FRAME_LOWER_BOUND(int32_t, int32_t, int32_t, sub, -)

 DEF_RANGE_MODE_FRAME_LOWER_BOUND(int32_t, int32_t, int64_t, add, +)

 DEF_RANGE_MODE_FRAME_LOWER_BOUND(int32_t, int32_t, int64_t, sub, -)

 DEF_RANGE_MODE_FRAME_LOWER_BOUND(int64_t, int16_t, int64_t, add, +)

 DEF_RANGE_MODE_FRAME_LOWER_BOUND(int64_t, int16_t, int64_t, sub, -)

 DEF_RANGE_MODE_FRAME_LOWER_BOUND(int64_t, int32_t, int64_t, add, +)

 DEF_RANGE_MODE_FRAME_LOWER_BOUND(int64_t, int32_t, int64_t, sub, -)

 DEF_RANGE_MODE_FRAME_LOWER_BOUND(int64_t, int64_t, int64_t, add, +)

 DEF_RANGE_MODE_FRAME_LOWER_BOUND(int64_t, int64_t, int64_t, sub, -)

 DEF_RANGE_MODE_FRAME_LOWER_BOUND(float, float, float, add, +)

 DEF_RANGE_MODE_FRAME_LOWER_BOUND(float, float, float, sub, -)

 DEF_RANGE_MODE_FRAME_LOWER_BOUND(double, double, double, add, +)

 DEF_RANGE_MODE_FRAME_LOWER_BOUND(double, double, double, sub, -)

 #undef DEF_RANGE_MODE_FRAME_LOWER_BOUND


 template <typename TARGET_VAL_TYPE, typename COL_TYPE, typename NULL_TYPE>

 inline int64_t compute_upper_bound_from_ordered_partition_index(

     const int64_t num_elems,

     const TARGET_VAL_TYPE target_val,

     const COL_TYPE* col_buf,

     const int32_t* partition_rowid_buf,

     const int64_t* ordered_index_buf,

     const NULL_TYPE null_val,

     const bool nulls_first,

     const int64_t null_start_offset,

     const int64_t null_end_offset) {

   if (target_val == null_val) {

     return null_end_offset;

   }

   auto const modified_null_end_pos = nulls_first ? null_end_offset - 1 : null_end_offset;

   int64_t l = get_valid_buf_start_pos(null_start_offset, modified_null_end_pos);

   int64_t h = get_valid_buf_end_pos(num_elems, null_start_offset, modified_null_end_pos);

   while (l < h) {

     int64_t mid = l + (h - l) / 2;

     if (target_val >= col_buf[partition_rowid_buf[ordered_index_buf[mid]]]) {

       l = mid + 1;

     } else {

       h = mid;

     }

   }

   return l;

 }


 #define DEF_RANGE_MODE_FRAME_UPPER_BOUND(                                                     \

     target_val_type, col_type, null_type, opname, opsym)                                      \

   extern "C" RUNTIME_EXPORT ALWAYS_INLINE int64_t                                             \

       range_mode_##target_val_type##_##col_type##_##null_type##_##opname##_frame_upper_bound( \

           const int64_t num_elems,                                                            \

           const target_val_type target_value,                                                 \

           const col_type* col_buf,                                                            \

           const int32_t* partition_rowid_buf,                                                 \

           const int64_t* ordered_index_buf,                                                   \

           const int64_t frame_bound_val,                                                      \

           const null_type null_val,                                                           \

           const bool nulls_first,                                                             \

           const int64_t null_start_pos,                                                       \

           const int64_t null_end_pos) {                                                       \

     if (target_value == null_val) {                                                           \

       return null_end_pos;                                                                    \

     }                                                                                         \

     target_val_type new_val = target_value opsym frame_bound_val;                             \

     return compute_upper_bound_from_ordered_partition_index<target_val_type,                  \

                                                             col_type,                         \

                                                             null_type>(                       \

         num_elems,                                                                            \

         new_val,                                                                              \

         col_buf,                                                                              \

         partition_rowid_buf,                                                                  \

         ordered_index_buf,                                                                    \

         null_val,                                                                             \

         nulls_first,                                                                          \

         null_start_pos,                                                                       \

         null_end_pos);                                                                        \

   }

 DEF_RANGE_MODE_FRAME_UPPER_BOUND(int8_t, int8_t, int8_t, add, +)

 DEF_RANGE_MODE_FRAME_UPPER_BOUND(int8_t, int8_t, int8_t, sub, -)

 DEF_RANGE_MODE_FRAME_UPPER_BOUND(int16_t, int16_t, int16_t, add, +)

 DEF_RANGE_MODE_FRAME_UPPER_BOUND(int16_t, int16_t, int16_t, sub, -)

 DEF_RANGE_MODE_FRAME_UPPER_BOUND(int16_t, int16_t, int64_t, add, +)

 DEF_RANGE_MODE_FRAME_UPPER_BOUND(int16_t, int16_t, int64_t, sub, -)

 DEF_RANGE_MODE_FRAME_UPPER_BOUND(int32_t, int32_t, int32_t, add, +)

 DEF_RANGE_MODE_FRAME_UPPER_BOUND(int32_t, int32_t, int32_t, sub, -)

 DEF_RANGE_MODE_FRAME_UPPER_BOUND(int32_t, int32_t, int64_t, add, +)

 DEF_RANGE_MODE_FRAME_UPPER_BOUND(int32_t, int32_t, int64_t, sub, -)

 DEF_RANGE_MODE_FRAME_UPPER_BOUND(int64_t, int16_t, int64_t, add, +)

 DEF_RANGE_MODE_FRAME_UPPER_BOUND(int64_t, int16_t, int64_t, sub, -)

 DEF_RANGE_MODE_FRAME_UPPER_BOUND(int64_t, int32_t, int64_t, add, +)

 DEF_RANGE_MODE_FRAME_UPPER_BOUND(int64_t, int32_t, int64_t, sub, -)

 DEF_RANGE_MODE_FRAME_UPPER_BOUND(int64_t, int64_t, int64_t, add, +)

 DEF_RANGE_MODE_FRAME_UPPER_BOUND(int64_t, int64_t, int64_t, sub, -)

 DEF_RANGE_MODE_FRAME_UPPER_BOUND(float, float, float, add, +)

 DEF_RANGE_MODE_FRAME_UPPER_BOUND(float, float, float, sub, -)

 DEF_RANGE_MODE_FRAME_UPPER_BOUND(double, double, double, add, +)

 DEF_RANGE_MODE_FRAME_UPPER_BOUND(double, double, double, sub, -)

 #undef DEF_RANGE_MODE_FRAME_UPPER_BOUND


 template <typename COL_TYPE, typename LOGICAL_TYPE>

 inline LOGICAL_TYPE get_value_in_window_frame(const int64_t target_row_idx_in_frame,

                                               const int64_t frame_start_offset,

                                               const int64_t frame_end_offset,

                                               const COL_TYPE* col_buf,

                                               const int32_t* partition_rowid_buf,

                                               const int64_t* ordered_index_buf,

                                               const LOGICAL_TYPE logical_null_val,

                                               const LOGICAL_TYPE col_null_val) {

   if (target_row_idx_in_frame < frame_start_offset ||

       target_row_idx_in_frame > frame_end_offset) {

     return logical_null_val;

   }

   const auto target_offset =

       partition_rowid_buf[ordered_index_buf[target_row_idx_in_frame]];

   LOGICAL_TYPE target_val = col_buf[target_offset];

   if (target_val == col_null_val) {

     return logical_null_val;

   }

   return target_val;

 }


 #define DEF_GET_VALUE_IN_FRAME(col_type, logical_type)                                \

   extern "C" RUNTIME_EXPORT ALWAYS_INLINE logical_type                                \

       get_##col_type##_value_##logical_type##_type_in_frame(                          \

           const int64_t target_row_idx_in_frame,                                      \

           const int64_t frame_start_offset,                                           \

           const int64_t frame_end_offset,                                             \

           const col_type* col_buf,                                                    \

           const int32_t* partition_rowid_buf,                                         \

           const int64_t* ordered_index_buf,                                           \

           const logical_type logical_null_val,                                        \

           const logical_type col_null_val) {                                          \

     return get_value_in_window_frame<col_type, logical_type>(target_row_idx_in_frame, \

                                                              frame_start_offset,      \

                                                              frame_end_offset,        \

                                                              col_buf,                 \

                                                              partition_rowid_buf,     \

                                                              ordered_index_buf,       \

                                                              logical_null_val,        \

                                                              col_null_val);           \

   }

 DEF_GET_VALUE_IN_FRAME(int8_t, int8_t)

 DEF_GET_VALUE_IN_FRAME(int8_t, int16_t)

 DEF_GET_VALUE_IN_FRAME(int8_t, int32_t)

 DEF_GET_VALUE_IN_FRAME(int8_t, int64_t)

 DEF_GET_VALUE_IN_FRAME(int16_t, int16_t)

 DEF_GET_VALUE_IN_FRAME(int16_t, int32_t)

 DEF_GET_VALUE_IN_FRAME(int16_t, int64_t)

 DEF_GET_VALUE_IN_FRAME(int32_t, int32_t)

 DEF_GET_VALUE_IN_FRAME(int32_t, int64_t)

 DEF_GET_VALUE_IN_FRAME(int64_t, int64_t)

 DEF_GET_VALUE_IN_FRAME(float, float)

 DEF_GET_VALUE_IN_FRAME(double, double)

 #undef DEF_GET_VALUE_IN_FRAME


 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int64_t encode_date(int64_t decoded_val,

                                                             int64_t null_val,

                                                             int64_t multiplier) {

   return decoded_val == null_val ? decoded_val : decoded_val * multiplier;

 }


 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int64_t

 compute_row_mode_start_index_sub(int64_t candidate_index,

                                  int64_t current_partition_start_offset,

                                  int64_t frame_bound) {

   int64_t index = candidate_index - current_partition_start_offset - frame_bound;

   return index < 0 ? 0 : index;

 }


 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int64_t

 compute_row_mode_start_index_add(int64_t candidate_index,

                                  int64_t current_partition_start_offset,

                                  int64_t frame_bound,

                                  int64_t num_current_partition_elem) {

   int64_t index = candidate_index - current_partition_start_offset + frame_bound;

   return index >= num_current_partition_elem ? num_current_partition_elem : index;

 }


 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int64_t

 compute_row_mode_end_index_sub(int64_t candidate_index,

                                int64_t current_partition_start_offset,

                                int64_t frame_bound) {

   int64_t index = candidate_index - current_partition_start_offset - frame_bound;

   return index < 0 ? 0 : index + 1;

 }


 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int64_t

 compute_row_mode_end_index_add(int64_t candidate_index,

                                int64_t current_partition_start_offset,

                                int64_t frame_bound,

                                int64_t num_current_partition_elem) {

   int64_t index = candidate_index - current_partition_start_offset + frame_bound;

   return index >= num_current_partition_elem ? num_current_partition_elem : index + 1;

 }


 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int64_t* get_integer_aggregation_tree(

     int64_t** aggregation_trees,

     size_t partition_idx) {

   return aggregation_trees[partition_idx];

 }


 extern "C" RUNTIME_EXPORT ALWAYS_INLINE double* get_double_aggregation_tree(

     int64_t** aggregation_trees,

     size_t partition_idx) {

   double** casted_aggregation_trees = reinterpret_cast<double**>(aggregation_trees);

   return casted_aggregation_trees[partition_idx];

 }


 extern "C" RUNTIME_EXPORT ALWAYS_INLINE SumAndCountPair<int64_t>*

 get_integer_derived_aggregation_tree(int64_t** aggregation_trees, size_t partition_idx) {

   SumAndCountPair<int64_t>** casted_aggregation_trees =

       reinterpret_cast<SumAndCountPair<int64_t>**>(aggregation_trees);

   return casted_aggregation_trees[partition_idx];

 }


 extern "C" RUNTIME_EXPORT ALWAYS_INLINE SumAndCountPair<double>*

 get_double_derived_aggregation_tree(int64_t** aggregation_trees, size_t partition_idx) {

   SumAndCountPair<double>** casted_aggregation_trees =

       reinterpret_cast<SumAndCountPair<double>**>(aggregation_trees);

   return casted_aggregation_trees[partition_idx];

 }


 extern "C" RUNTIME_EXPORT ALWAYS_INLINE size_t

 getStartOffsetForSegmentTreeTraversal(size_t level, size_t tree_fanout) {

   size_t offset = 0;

   for (size_t i = 0; i < level; i++) {

     offset += pow(tree_fanout, i);

   }

   return offset;

 }

 namespace {

 enum class AggFuncType { MIN, MAX, SUM };


 template <AggFuncType AGG_FUNC_TYPE, typename AGG_TYPE>

 inline AGG_TYPE agg_func(AGG_TYPE const lhs, AGG_TYPE const rhs) {

   if constexpr (AGG_FUNC_TYPE == AggFuncType::MIN) {

     return std::min(lhs, rhs);

   } else if constexpr (AGG_FUNC_TYPE == AggFuncType::MAX) {

     return std::max(lhs, rhs);

   } else {

     return lhs + rhs;

   }

 }

 }  // namespace


 template <AggFuncType AGG_FUNC_TYPE, typename AGG_TYPE>

 inline AGG_TYPE compute_window_func_via_aggregation_tree(

     AGG_TYPE* aggregation_tree_for_partition,

     size_t query_range_start_idx,

     size_t query_range_end_idx,

     size_t leaf_level,

     size_t tree_fanout,

     AGG_TYPE init_val,

     AGG_TYPE invalid_val,

     AGG_TYPE null_val) {

   size_t leaf_start_idx = getStartOffsetForSegmentTreeTraversal(leaf_level, tree_fanout);

   size_t begin = leaf_start_idx + query_range_start_idx;

   size_t end = leaf_start_idx + query_range_end_idx;

   AGG_TYPE res = init_val;

   bool all_nulls = true;

   for (int level = leaf_level; level >= 0; level--) {

     size_t parentBegin = begin / tree_fanout;

     size_t parentEnd = (end - 1) / tree_fanout;

     if (parentBegin == parentEnd) {

       for (size_t pos = begin; pos < end; pos++) {

         if (aggregation_tree_for_partition[pos] != null_val) {

           all_nulls = false;

           res = agg_func<AGG_FUNC_TYPE>(res, aggregation_tree_for_partition[pos]);

         }

       }

       return all_nulls ? null_val : res;

     } else if (parentBegin > parentEnd) {

       return null_val;

     }

     size_t group_begin = (parentBegin * tree_fanout) + 1;

     if (begin != group_begin) {

       size_t limit = (parentBegin * tree_fanout) + tree_fanout + 1;

       for (size_t pos = begin; pos < limit; pos++) {

         if (aggregation_tree_for_partition[pos] != null_val) {

           all_nulls = false;

           res = agg_func<AGG_FUNC_TYPE>(res, aggregation_tree_for_partition[pos]);

         }

       }

       parentBegin++;

     }

     size_t group_end = (parentEnd * tree_fanout) + 1;

     if (end != group_end) {

       for (size_t pos = group_end; pos < end; pos++) {

         if (aggregation_tree_for_partition[pos] != null_val) {

           all_nulls = false;

           res = agg_func<AGG_FUNC_TYPE>(res, aggregation_tree_for_partition[pos]);

         }

       }

     }

     begin = parentBegin;

     end = parentEnd;

   }

   return invalid_val;

 }


 #define DEF_SEARCH_AGGREGATION_TREE(agg_value_type)                                      \

   extern "C" RUNTIME_EXPORT ALWAYS_INLINE agg_value_type                                 \

       search_##agg_value_type##_aggregation_tree(                                        \

           agg_value_type* aggregated_tree_for_partition,                                 \

           size_t query_range_start_idx,                                                  \

           size_t query_range_end_idx,                                                    \

           size_t leaf_level,                                                             \

           size_t tree_fanout,                                                            \

           bool decimal_type,                                                             \

           size_t scale,                                                                  \

           agg_value_type invalid_val,                                                    \

           agg_value_type null_val,                                                       \

           int32_t agg_type) {                                                            \

     if (!aggregated_tree_for_partition || query_range_start_idx > query_range_end_idx) { \

       return null_val;                                                                   \

     }                                                                                    \

     switch (static_cast<AggFuncType>(agg_type)) {                                        \

       case AggFuncType::MIN:                                                             \

         return compute_window_func_via_aggregation_tree<AggFuncType::MIN>(               \

             aggregated_tree_for_partition,                                               \

             query_range_start_idx,                                                       \

             query_range_end_idx,                                                         \

             leaf_level,                                                                  \

             tree_fanout,                                                                 \

             std::numeric_limits<agg_value_type>::max(),                                  \

             invalid_val,                                                                 \

             null_val);                                                                   \

       case AggFuncType::MAX:                                                             \

         return compute_window_func_via_aggregation_tree<AggFuncType::MAX>(               \

             aggregated_tree_for_partition,                                               \

             query_range_start_idx,                                                       \

             query_range_end_idx,                                                         \

             leaf_level,                                                                  \

             tree_fanout,                                                                 \

             std::numeric_limits<agg_value_type>::lowest(),                               \

             invalid_val,                                                                 \

             null_val);                                                                   \

       default:                                                                           \

         return compute_window_func_via_aggregation_tree<AggFuncType::SUM>(               \

             aggregated_tree_for_partition,                                               \

             query_range_start_idx,                                                       \

             query_range_end_idx,                                                         \

             leaf_level,                                                                  \

             tree_fanout,                                                                 \

             static_cast<agg_value_type>(0),                                              \

             invalid_val,                                                                 \

             null_val);                                                                   \

     }                                                                                    \

   }


 DEF_SEARCH_AGGREGATION_TREE(int64_t)

 DEF_SEARCH_AGGREGATION_TREE(double)

 #undef DEF_SEARCH_AGGREGATION_TREE


 template <typename AGG_VALUE_TYPE>

 inline void compute_derived_aggregates(

     SumAndCountPair<AGG_VALUE_TYPE>* aggregation_tree_for_partition,

     SumAndCountPair<AGG_VALUE_TYPE>& res,

     size_t query_range_start_idx,

     size_t query_range_end_idx,

     size_t leaf_level,

     size_t tree_fanout,

     AGG_VALUE_TYPE invalid_val,

     AGG_VALUE_TYPE null_val) {

   size_t leaf_start_idx = getStartOffsetForSegmentTreeTraversal(leaf_level, tree_fanout);

   size_t begin = leaf_start_idx + query_range_start_idx;

   size_t end = leaf_start_idx + query_range_end_idx;

   SumAndCountPair<AGG_VALUE_TYPE> null_res{null_val, 0};

   SumAndCountPair<AGG_VALUE_TYPE> invalid_res{invalid_val, 0};

   bool all_nulls = true;

   for (int level = leaf_level; level >= 0; level--) {

     size_t parentBegin = begin / tree_fanout;

     size_t parentEnd = (end - 1) / tree_fanout;

     if (parentBegin == parentEnd) {

       for (size_t pos = begin; pos < end; pos++) {

         if (aggregation_tree_for_partition[pos].sum != null_val) {

           all_nulls = false;

           res.sum += aggregation_tree_for_partition[pos].sum;

           res.count += aggregation_tree_for_partition[pos].count;

         }

       }

       if (all_nulls) {

         res = null_res;

       }

       return;

     } else if (parentBegin > parentEnd) {

       res = null_res;

       return;

     }

     size_t group_begin = (parentBegin * tree_fanout) + 1;

     if (begin != group_begin) {

       size_t limit = (parentBegin * tree_fanout) + tree_fanout + 1;

       for (size_t pos = begin; pos < limit; pos++) {

         if (aggregation_tree_for_partition[pos].sum != null_val) {

           all_nulls = false;

           res.sum += aggregation_tree_for_partition[pos].sum;

           res.count += aggregation_tree_for_partition[pos].count;

         }

       }

       parentBegin++;

     }

     size_t group_end = (parentEnd * tree_fanout) + 1;

     if (end != group_end) {

       for (size_t pos = group_end; pos < end; pos++) {

         if (aggregation_tree_for_partition[pos].sum != null_val) {

           all_nulls = false;

           res.sum += aggregation_tree_for_partition[pos].sum;

           res.count += aggregation_tree_for_partition[pos].count;

         }

       }

     }

     begin = parentBegin;

     end = parentEnd;

   }

   res = invalid_res;

   return;

 }


 #define DEF_SEARCH_DERIVED_AGGREGATION_TREE(agg_value_type)                              \

   extern "C" RUNTIME_EXPORT ALWAYS_INLINE double                                         \

       search_##agg_value_type##_derived_aggregation_tree(                                \

           SumAndCountPair<agg_value_type>* aggregated_tree_for_partition,                \

           size_t query_range_start_idx,                                                  \

           size_t query_range_end_idx,                                                    \

           size_t leaf_level,                                                             \

           size_t tree_fanout,                                                            \

           bool decimal_type,                                                             \

           size_t scale,                                                                  \

           agg_value_type invalid_val,                                                    \

           agg_value_type null_val,                                                       \

           int32_t agg_type) {                                                            \

     if (!aggregated_tree_for_partition || query_range_start_idx > query_range_end_idx) { \

       return null_val;                                                                   \

     }                                                                                    \

     SumAndCountPair<agg_value_type> res{0, 0};                                           \

     compute_derived_aggregates<agg_value_type>(aggregated_tree_for_partition,            \

                                                res,                                      \

                                                query_range_start_idx,                    \

                                                query_range_end_idx,                      \

                                                leaf_level,                               \

                                                tree_fanout,                              \

                                                invalid_val,                              \

                                                null_val);                                \

     if (res.sum == null_val) {                                                           \

       return null_val;                                                                   \

     } else if (res.count > 0) {                                                          \

       if (decimal_type) {                                                                \

         return (static_cast<double>(res.sum) / pow(10, scale)) / res.count;              \

       }                                                                                  \

       return (static_cast<double>(res.sum)) / res.count;                                 \

     } else {                                                                             \

       return invalid_val;                                                                \

     }                                                                                    \

   }


 DEF_SEARCH_DERIVED_AGGREGATION_TREE(int64_t)

 DEF_SEARCH_DERIVED_AGGREGATION_TREE(double)

 #undef DEF_SEARCH_DERIVED_AGGREGATION_TREE


 #define DEF_HANDLE_NULL_FOR_WINDOW_FRAMING_AGG(agg_type, null_type)            \

   extern "C" RUNTIME_EXPORT ALWAYS_INLINE agg_type                             \

       handle_null_val_##agg_type##_##null_type##_window_framing_agg(           \

           agg_type res, null_type agg_null_val, agg_type input_col_null_val) { \

     if (res == agg_null_val) {                                                 \

       return input_col_null_val;                                               \

     }                                                                          \

     return res;                                                                \

   }

 DEF_HANDLE_NULL_FOR_WINDOW_FRAMING_AGG(double, int64_t)

 DEF_HANDLE_NULL_FOR_WINDOW_FRAMING_AGG(double, double)

 DEF_HANDLE_NULL_FOR_WINDOW_FRAMING_AGG(int64_t, int64_t)

 #undef DEF_HANDLE_NULL_FOR_WINDOW_FRAMING_AGG


 template <typename T>

 T fill_missing_value(int64_t const cur_idx,

                      T const null_val,

                      T* const col_buf,

                      int64_t const num_elems_in_partition,

                      int32_t* const partition_rowid_buf,

                      int64_t* const ordered_index_buf,

                      bool const is_forward_fill) {

   T const cur_val = col_buf[partition_rowid_buf[ordered_index_buf[cur_idx]]];

   if (cur_val == null_val) {

     if (is_forward_fill) {

       for (int64_t cand_idx = cur_idx - 1; cand_idx >= 0; --cand_idx) {

         T const candidate_val = col_buf[partition_rowid_buf[ordered_index_buf[cand_idx]]];

         if (candidate_val != null_val) {

           return candidate_val;

         }

       }

     } else {

       for (int64_t cand_idx = cur_idx + 1; cand_idx < num_elems_in_partition;

            ++cand_idx) {

         T const candidate_val = col_buf[partition_rowid_buf[ordered_index_buf[cand_idx]]];

         if (candidate_val != null_val) {

           return candidate_val;

         }

       }

     }

   }

   return cur_val;

 }

 #define DEF_FILL_MISSING_VALUE(col_type)                                            \

   extern "C" RUNTIME_EXPORT ALWAYS_INLINE col_type fill_##col_type##_missing_value( \

       int64_t const cur_row_idx_in_frame,                                           \

       col_type const null_val,                                                      \

       col_type* const col_buf,                                                      \

       int64_t const num_elems_in_partition,                                         \

       int32_t* const partition_rowid_buf,                                           \

       int64_t* const ordered_index_buf,                                             \

       bool const is_forward_fill) {                                                 \

     return fill_missing_value<col_type>(cur_row_idx_in_frame,                       \

                                         null_val,                                   \

                                         col_buf,                                    \

                                         num_elems_in_partition,                     \

                                         partition_rowid_buf,                        \

                                         ordered_index_buf,                          \

                                         is_forward_fill);                           \

   }

 DEF_FILL_MISSING_VALUE(int8_t)

 DEF_FILL_MISSING_VALUE(int16_t)

 DEF_FILL_MISSING_VALUE(int32_t)

 DEF_FILL_MISSING_VALUE(int64_t)

 DEF_FILL_MISSING_VALUE(float)

 DEF_FILL_MISSING_VALUE(double)

 #undef DEF_FILL_MISSING_VALUE


 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int64_t agg_sum(int64_t* agg, const int64_t val) {

   const auto old = *agg;

   *agg += val;

   return old;

 }


 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int64_t agg_sum_if(int64_t* agg,

                                                            const int64_t val,

                                                            const int8_t cond) {

   return cond ? agg_sum(agg, val) : *agg;

 }


 extern "C" RUNTIME_EXPORT ALWAYS_INLINE void agg_max(int64_t* agg, const int64_t val) {

   *agg = std::max(*agg, val);

 }


 extern "C" RUNTIME_EXPORT ALWAYS_INLINE void agg_min(int64_t* agg, const int64_t val) {

   *agg = std::min(*agg, val);

 }


 extern "C" RUNTIME_EXPORT ALWAYS_INLINE void agg_id(int64_t* agg, const int64_t val) {

   *agg = val;

 }


 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int8_t* agg_id_varlen(int8_t* varlen_buffer,

                                                               const int64_t offset,

                                                               const int8_t* value,

                                                               const int64_t size_bytes) {

   for (auto i = 0; i < size_bytes; i++) {

     varlen_buffer[offset + i] = value[i];

   }

   return &varlen_buffer[offset];

 }


 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int32_t

 checked_single_agg_id(int64_t* agg, const int64_t val, const int64_t null_val) {

   if (val == null_val) {

     return 0;

   }


   if (*agg == val) {

     return 0;

   } else if (*agg == null_val) {

     *agg = val;

     return 0;

   } else {

     // see Execute::ERR_SINGLE_VALUE_FOUND_MULTIPLE_VALUES

     return 15;

   }

 }


 extern "C" RUNTIME_EXPORT ALWAYS_INLINE void agg_count_distinct_bitmap_skip_val(

     int64_t* agg,

     const int64_t val,

     const int64_t min_val,

     const int64_t bucket_size,

     const int64_t skip_val) {

   if (val != skip_val) {

     agg_count_distinct_bitmap(agg, val, min_val, bucket_size);

   }

 }


 extern "C" GPU_RT_STUB void agg_count_distinct_bitmap_skip_val_gpu(int64_t*,

                                                                    const int64_t,

                                                                    const int64_t,

                                                                    const int64_t,

                                                                    const int64_t,

                                                                    const int64_t,

                                                                    const int64_t,

                                                                    const uint64_t,

                                                                    const uint64_t) {}


 extern "C" RUNTIME_EXPORT ALWAYS_INLINE uint32_t agg_count_int32(uint32_t* agg,

                                                                  const int32_t) {

   return (*agg)++;

 }


 extern "C" RUNTIME_EXPORT ALWAYS_INLINE uint32_t agg_count_if_int32(uint32_t* agg,

                                                                     const int32_t cond) {

   return cond ? (*agg)++ : *agg;

 }


 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int32_t agg_sum_int32(int32_t* agg,

                                                               const int32_t val) {

   const auto old = *agg;

   *agg += val;

   return old;

 }


 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int32_t agg_sum_if_int32(int32_t* agg,

                                                                  const int32_t val,

                                                                  const int8_t cond) {

   return cond ? agg_sum_int32(agg, val) : *agg;

 }


 #define DEF_AGG_MAX_INT(n)                                                            \

   extern "C" RUNTIME_EXPORT ALWAYS_INLINE void agg_max_int##n(int##n##_t* agg,        \

                                                               const int##n##_t val) { \

     *agg = std::max(*agg, val);                                                       \

   }


 DEF_AGG_MAX_INT(32)

 DEF_AGG_MAX_INT(16)

 DEF_AGG_MAX_INT(8)

 #undef DEF_AGG_MAX_INT


 #define DEF_AGG_MIN_INT(n)                                                            \

   extern "C" RUNTIME_EXPORT ALWAYS_INLINE void agg_min_int##n(int##n##_t* agg,        \

                                                               const int##n##_t val) { \

     *agg = std::min(*agg, val);                                                       \

   }


 DEF_AGG_MIN_INT(32)

 DEF_AGG_MIN_INT(16)

 DEF_AGG_MIN_INT(8)

 #undef DEF_AGG_MIN_INT


 #define DEF_AGG_ID_INT(n)                                                            \

   extern "C" RUNTIME_EXPORT ALWAYS_INLINE void agg_id_int##n(int##n##_t* agg,        \

                                                              const int##n##_t val) { \

     *agg = val;                                                                      \

   }


 #define DEF_CHECKED_SINGLE_AGG_ID_INT(n)                                        \

   extern "C" RUNTIME_EXPORT ALWAYS_INLINE int32_t checked_single_agg_id_int##n( \

       int##n##_t* agg, const int##n##_t val, const int##n##_t null_val) {       \

     if (val == null_val) {                                                      \

       return 0;                                                                 \

     }                                                                           \

     if (*agg == val) {                                                          \

       return 0;                                                                 \

     } else if (*agg == null_val) {                                              \

       *agg = val;                                                               \

       return 0;                                                                 \

     } else {                                                                    \

       /* see Execute::ERR_SINGLE_VALUE_FOUND_MULTIPLE_VALUES*/                  \

       return 15;                                                                \

     }                                                                           \

   }


 DEF_AGG_ID_INT(32)

 DEF_AGG_ID_INT(16)

 DEF_AGG_ID_INT(8)


 DEF_CHECKED_SINGLE_AGG_ID_INT(32)

 DEF_CHECKED_SINGLE_AGG_ID_INT(16)

 DEF_CHECKED_SINGLE_AGG_ID_INT(8)


 #undef DEF_AGG_ID_INT

 #undef DEF_CHECKED_SINGLE_AGG_ID_INT


 #define DEF_WRITE_PROJECTION_INT(n)                                     \

   extern "C" RUNTIME_EXPORT ALWAYS_INLINE void write_projection_int##n( \

       int8_t* slot_ptr, const int##n##_t val, const int64_t init_val) { \

     if (val != init_val) {                                              \

       *reinterpret_cast<int##n##_t*>(slot_ptr) = val;                   \

     }                                                                   \

   }


 DEF_WRITE_PROJECTION_INT(64)

 DEF_WRITE_PROJECTION_INT(32)

 #undef DEF_WRITE_PROJECTION_INT


 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int64_t agg_sum_skip_val(int64_t* agg,

                                                                  const int64_t val,

                                                                  const int64_t skip_val) {

   const auto old = *agg;

   if (val != skip_val) {

     if (old != skip_val) {

       return agg_sum(agg, val);

     } else {

       *agg = val;

     }

   }

   return old;

 }


 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int32_t

 agg_sum_int32_skip_val(int32_t* agg, const int32_t val, const int32_t skip_val) {

   const auto old = *agg;

   if (val != skip_val) {

     if (old != skip_val) {

       return agg_sum_int32(agg, val);

     } else {

       *agg = val;

     }

   }

   return old;

 }


 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int64_t

 agg_sum_if_skip_val(int64_t* agg,

                     const int64_t val,

                     const int64_t skip_val,

                     const int8_t cond) {

   return cond ? agg_sum_skip_val(agg, val, skip_val) : *agg;

 }


 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int32_t

 agg_sum_if_int32_skip_val(int32_t* agg,

                           const int32_t val,

                           const int32_t skip_val,

                           const int8_t cond) {

   return cond ? agg_sum_int32_skip_val(agg, val, skip_val) : *agg;

 }


 extern "C" RUNTIME_EXPORT ALWAYS_INLINE uint64_t agg_count_if(uint64_t* agg,

                                                               const int64_t cond) {

   return cond ? (*agg)++ : *agg;

 }


 extern "C" RUNTIME_EXPORT ALWAYS_INLINE uint64_t

 agg_count_skip_val(uint64_t* agg, const int64_t val, const int64_t skip_val) {

   if (val != skip_val) {

     return agg_count(agg, val);

   }

   return *agg;

 }


 extern "C" RUNTIME_EXPORT ALWAYS_INLINE uint64_t

 agg_count_if_skip_val(uint64_t* agg, const int64_t cond, const int64_t skip_val) {

   if (cond != skip_val) {

     return agg_count_if(agg, cond);

   }

   return *agg;

 }


 extern "C" RUNTIME_EXPORT ALWAYS_INLINE uint32_t

 agg_count_int32_skip_val(uint32_t* agg, const int32_t val, const int32_t skip_val) {

   if (val != skip_val) {

     return agg_count_int32(agg, val);

   }

   return *agg;

 }


 extern "C" RUNTIME_EXPORT ALWAYS_INLINE uint32_t

 agg_count_if_int32_skip_val(uint32_t* agg, const int32_t cond, const int32_t skip_val) {

   if (cond != skip_val) {

     return agg_count_if_int32(agg, cond);

   }

   return *agg;

 }


 #define DEF_SKIP_AGG_ADD(base_agg_func)                                  \

   extern "C" RUNTIME_EXPORT ALWAYS_INLINE void base_agg_func##_skip_val( \

       DATA_T* agg, const DATA_T val, const DATA_T skip_val) {            \

     if (val != skip_val) {                                               \

       base_agg_func(agg, val);                                           \

     }                                                                    \

   }


 #define DEF_SKIP_AGG(base_agg_func)                                      \

   extern "C" RUNTIME_EXPORT ALWAYS_INLINE void base_agg_func##_skip_val( \

       DATA_T* agg, const DATA_T val, const DATA_T skip_val) {            \

     if (val != skip_val) {                                               \

       const DATA_T old_agg = *agg;                                       \

       if (old_agg != skip_val) {                                         \

         base_agg_func(agg, val);                                         \

       } else {                                                           \

         *agg = val;                                                      \

       }                                                                  \

     }                                                                    \

   }


 #define DATA_T int64_t

 DEF_SKIP_AGG(agg_max)

 DEF_SKIP_AGG(agg_min)

 #undef DATA_T


 #define DATA_T int32_t

 DEF_SKIP_AGG(agg_max_int32)

 DEF_SKIP_AGG(agg_min_int32)

 #undef DATA_T


 #define DATA_T int16_t

 DEF_SKIP_AGG(agg_max_int16)

 DEF_SKIP_AGG(agg_min_int16)

 #undef DATA_T


 #define DATA_T int8_t

 DEF_SKIP_AGG(agg_max_int8)

 DEF_SKIP_AGG(agg_min_int8)

 #undef DATA_T


 #undef DEF_SKIP_AGG_ADD

 #undef DEF_SKIP_AGG


 // TODO(alex): fix signature


 extern "C" RUNTIME_EXPORT ALWAYS_INLINE uint64_t agg_count_double(uint64_t* agg,

                                                                   const double val) {

   return (*agg)++;

 }


 extern "C" RUNTIME_EXPORT ALWAYS_INLINE void agg_sum_double(int64_t* agg,

                                                             const double val) {

   const auto r = *reinterpret_cast<const double*>(agg) + val;

   *agg = *reinterpret_cast<const int64_t*>(may_alias_ptr(&r));

 }


 extern "C" RUNTIME_EXPORT ALWAYS_INLINE void agg_sum_if_double(int64_t* agg,

                                                                const double val,

                                                                const int8_t cond) {

   if (cond) {

     agg_sum_double(agg, val);

   }

 }


 extern "C" RUNTIME_EXPORT ALWAYS_INLINE void agg_max_double(int64_t* agg,

                                                             const double val) {

   const auto r = std::max(*reinterpret_cast<const double*>(agg), val);

   *agg = *(reinterpret_cast<const int64_t*>(may_alias_ptr(&r)));

 }


 extern "C" RUNTIME_EXPORT ALWAYS_INLINE void agg_min_double(int64_t* agg,

                                                             const double val) {

   const auto r = std::min(*reinterpret_cast<const double*>(agg), val);

   *agg = *(reinterpret_cast<const int64_t*>(may_alias_ptr(&r)));

 }


 extern "C" RUNTIME_EXPORT ALWAYS_INLINE void agg_id_double(int64_t* agg,

                                                            const double val) {

   *agg = *(reinterpret_cast<const int64_t*>(may_alias_ptr(&val)));

 }


 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int32_t

 checked_single_agg_id_double(int64_t* agg, const double val, const double null_val) {

   if (val == null_val) {

     return 0;

   }


   if (*agg == *(reinterpret_cast<const int64_t*>(may_alias_ptr(&val)))) {

     return 0;

   } else if (*agg == *(reinterpret_cast<const int64_t*>(may_alias_ptr(&null_val)))) {

     *agg = *(reinterpret_cast<const int64_t*>(may_alias_ptr(&val)));

     return 0;

   } else {

     // see Execute::ERR_SINGLE_VALUE_FOUND_MULTIPLE_VALUES

     return 15;

   }

 }


 extern "C" RUNTIME_EXPORT ALWAYS_INLINE uint32_t agg_count_float(uint32_t* agg,

                                                                  const float val) {

   return (*agg)++;

 }


 extern "C" RUNTIME_EXPORT ALWAYS_INLINE void agg_sum_float(int32_t* agg,

                                                            const float val) {

   const auto r = *reinterpret_cast<const float*>(agg) + val;

   *agg = *reinterpret_cast<const int32_t*>(may_alias_ptr(&r));

 }


 extern "C" RUNTIME_EXPORT ALWAYS_INLINE void agg_if_sum_float(int32_t* agg,

                                                               const float val,

                                                               const int8_t cond) {

   if (cond) {

     agg_sum_float(agg, val);

   }

 }


 extern "C" RUNTIME_EXPORT ALWAYS_INLINE void agg_max_float(int32_t* agg,

                                                            const float val) {

   const auto r = std::max(*reinterpret_cast<const float*>(agg), val);

   *agg = *(reinterpret_cast<const int32_t*>(may_alias_ptr(&r)));

 }


 extern "C" RUNTIME_EXPORT ALWAYS_INLINE void agg_min_float(int32_t* agg,

                                                            const float val) {

   const auto r = std::min(*reinterpret_cast<const float*>(agg), val);

   *agg = *(reinterpret_cast<const int32_t*>(may_alias_ptr(&r)));

 }


 extern "C" RUNTIME_EXPORT ALWAYS_INLINE void agg_id_float(int32_t* agg, const float val) {

   *agg = *(reinterpret_cast<const int32_t*>(may_alias_ptr(&val)));

 }


 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int32_t

 checked_single_agg_id_float(int32_t* agg, const float val, const float null_val) {

   if (val == null_val) {

     return 0;

   }


   if (*agg == *(reinterpret_cast<const int32_t*>(may_alias_ptr(&val)))) {

     return 0;

   } else if (*agg == *(reinterpret_cast<const int32_t*>(may_alias_ptr(&null_val)))) {

     *agg = *(reinterpret_cast<const int32_t*>(may_alias_ptr(&val)));

     return 0;

   } else {

     // see Execute::ERR_SINGLE_VALUE_FOUND_MULTIPLE_VALUES

     return 15;

   }

 }


 extern "C" RUNTIME_EXPORT ALWAYS_INLINE uint64_t

 agg_count_double_skip_val(uint64_t* agg, const double val, const double skip_val) {

   if (val != skip_val) {

     return agg_count_double(agg, val);

   }

   return *agg;

 }


 extern "C" RUNTIME_EXPORT ALWAYS_INLINE uint32_t

 agg_count_float_skip_val(uint32_t* agg, const float val, const float skip_val) {

   if (val != skip_val) {

     return agg_count_float(agg, val);

   }

   return *agg;

 }


 #define DEF_SKIP_AGG(base_agg_func)                                                \

   extern "C" RUNTIME_EXPORT ALWAYS_INLINE void base_agg_func##_skip_val(           \

       ADDR_T* agg, const DATA_T val, const DATA_T skip_val) {                      \

     if (val != skip_val) {                                                         \

       const ADDR_T old_agg = *agg;                                                 \

       if (old_agg != *reinterpret_cast<const ADDR_T*>(may_alias_ptr(&skip_val))) { \

         base_agg_func(agg, val);                                                   \

       } else {                                                                     \

         *agg = *reinterpret_cast<const ADDR_T*>(may_alias_ptr(&val));              \

       }                                                                            \

     }                                                                              \

   }


 #define DEF_SKIP_IF_AGG(skip_agg_func, base_agg_func)                            \

   extern "C" RUNTIME_EXPORT ALWAYS_INLINE void skip_agg_func##_skip_val(         \

       ADDR_T* agg, const DATA_T val, const DATA_T skip_val, const int8_t cond) { \

     if (cond) {                                                                  \

       base_agg_func##_skip_val(agg, val, skip_val);                              \

     }                                                                            \

   }


 #define DATA_T double

 #define ADDR_T int64_t

 DEF_SKIP_AGG(agg_sum_double)

 DEF_SKIP_IF_AGG(agg_sum_if_double, agg_sum_double)

 DEF_SKIP_AGG(agg_max_double)

 DEF_SKIP_AGG(agg_min_double)

 #undef ADDR_T

 #undef DATA_T


 #define DATA_T float

 #define ADDR_T int32_t

 DEF_SKIP_AGG(agg_sum_float)

 DEF_SKIP_IF_AGG(agg_sum_if_float, agg_sum_float)

 DEF_SKIP_AGG(agg_max_float)

 DEF_SKIP_AGG(agg_min_float)

 #undef ADDR_T

 #undef DATA_T


 #undef DEF_SKIP_AGG

 #undef DEF_SKIP_IF_AGG


 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int64_t decimal_floor(const int64_t x,

                                                               const int64_t scale) {

   if (x >= 0) {

     return x / scale * scale;

   }

   if (!(x % scale)) {

     return x;

   }

   return x / scale * scale - scale;

 }


 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int64_t decimal_ceil(const int64_t x,

                                                              const int64_t scale) {

   return decimal_floor(x, scale) + (x % scale ? scale : 0);

 }


 // Shared memory aggregators. Should never be called,

 // real implementations are in cuda_mapd_rt.cu.

 #define DEF_SHARED_AGG_RET_STUBS(base_agg_func)                                     \

   extern "C" GPU_RT_STUB uint64_t base_agg_func##_shared(uint64_t* agg,             \

                                                          const int64_t val) {       \

     return 0;                                                                       \

   }                                                                                 \

                                                                                     \

   extern "C" GPU_RT_STUB uint64_t base_agg_func##_skip_val_shared(                  \

       uint64_t* agg, const int64_t val, const int64_t skip_val) {                   \

     return 0;                                                                       \

   }                                                                                 \

   extern "C" GPU_RT_STUB uint32_t base_agg_func##_int32_shared(uint32_t* agg,       \

                                                                const int32_t val) { \

     return 0;                                                                       \

   }                                                                                 \

                                                                                     \

   extern "C" GPU_RT_STUB uint32_t base_agg_func##_int32_skip_val_shared(            \

       uint32_t* agg, const int32_t val, const int32_t skip_val) {                   \

     return 0;                                                                       \

   }                                                                                 \

                                                                                     \

   extern "C" GPU_RT_STUB uint64_t base_agg_func##_double_shared(uint64_t* agg,      \

                                                                 const double val) { \

     return 0;                                                                       \

   }                                                                                 \

                                                                                     \

   extern "C" GPU_RT_STUB uint64_t base_agg_func##_double_skip_val_shared(           \

       uint64_t* agg, const double val, const double skip_val) {                     \

     return 0;                                                                       \

   }                                                                                 \

   extern "C" GPU_RT_STUB uint32_t base_agg_func##_float_shared(uint32_t* agg,       \

                                                                const float val) {   \

     return 0;                                                                       \

   }                                                                                 \

                                                                                     \

   extern "C" GPU_RT_STUB uint32_t base_agg_func##_float_skip_val_shared(            \

       uint32_t* agg, const float val, const float skip_val) {                       \

     return 0;                                                                       \

   }


 #define DEF_SHARED_AGG_STUBS(base_agg_func)                                              \

   extern "C" GPU_RT_STUB void base_agg_func##_shared(int64_t* agg, const int64_t val) {} \

                                                                                          \

   extern "C" GPU_RT_STUB void base_agg_func##_skip_val_shared(                           \

       int64_t* agg, const int64_t val, const int64_t skip_val) {}                        \

   extern "C" GPU_RT_STUB void base_agg_func##_int32_shared(int32_t* agg,                 \

                                                            const int32_t val) {}         \

   extern "C" GPU_RT_STUB void base_agg_func##_int16_shared(int16_t* agg,                 \

                                                            const int16_t val) {}         \

   extern "C" GPU_RT_STUB void base_agg_func##_int8_shared(int8_t* agg,                   \

                                                           const int8_t val) {}           \

                                                                                          \

   extern "C" GPU_RT_STUB void base_agg_func##_int32_skip_val_shared(                     \

       int32_t* agg, const int32_t val, const int32_t skip_val) {}                        \

                                                                                          \

   extern "C" GPU_RT_STUB void base_agg_func##_double_shared(int64_t* agg,                \

                                                             const double val) {}         \

                                                                                          \

   extern "C" GPU_RT_STUB void base_agg_func##_double_skip_val_shared(                    \

       int64_t* agg, const double val, const double skip_val) {}                          \

   extern "C" GPU_RT_STUB void base_agg_func##_float_shared(int32_t* agg,                 \

                                                            const float val) {}           \

                                                                                          \

   extern "C" GPU_RT_STUB void base_agg_func##_float_skip_val_shared(                     \

       int32_t* agg, const float val, const float skip_val) {}


 DEF_SHARED_AGG_RET_STUBS(agg_count)

 DEF_SHARED_AGG_RET_STUBS(agg_count_if)

 DEF_SHARED_AGG_STUBS(agg_max)

 DEF_SHARED_AGG_STUBS(agg_min)

 DEF_SHARED_AGG_STUBS(agg_id)


 extern "C" GPU_RT_STUB int8_t* agg_id_varlen_shared(int8_t* varlen_buffer,

                                                     const int64_t offset,

                                                     const int8_t* value,

                                                     const int64_t size_bytes) {

   return nullptr;

 }


 extern "C" GPU_RT_STUB int32_t checked_single_agg_id_shared(int64_t* agg,

                                                             const int64_t val,

                                                             const int64_t null_val) {

   return 0;

 }


 extern "C" GPU_RT_STUB int32_t

 checked_single_agg_id_int32_shared(int32_t* agg,

                                    const int32_t val,

                                    const int32_t null_val) {

   return 0;

 }

 extern "C" GPU_RT_STUB int32_t

 checked_single_agg_id_int16_shared(int16_t* agg,

                                    const int16_t val,

                                    const int16_t null_val) {

   return 0;

 }

 extern "C" GPU_RT_STUB int32_t checked_single_agg_id_int8_shared(int8_t* agg,

                                                                  const int8_t val,

                                                                  const int8_t null_val) {

   return 0;

 }


 extern "C" GPU_RT_STUB int32_t

 checked_single_agg_id_double_shared(int64_t* agg,

                                     const double val,

                                     const double null_val) {

   return 0;

 }


 extern "C" GPU_RT_STUB int32_t checked_single_agg_id_float_shared(int32_t* agg,

                                                                   const float val,

                                                                   const float null_val) {

   return 0;

 }


 extern "C" GPU_RT_STUB void agg_max_int16_skip_val_shared(int16_t* agg,

                                                           const int16_t val,

                                                           const int16_t skip_val) {}


 extern "C" GPU_RT_STUB void agg_max_int8_skip_val_shared(int8_t* agg,

                                                          const int8_t val,

                                                          const int8_t skip_val) {}


 extern "C" GPU_RT_STUB void agg_min_int16_skip_val_shared(int16_t* agg,

                                                           const int16_t val,

                                                           const int16_t skip_val) {}


 extern "C" GPU_RT_STUB void agg_min_int8_skip_val_shared(int8_t* agg,

                                                          const int8_t val,

                                                          const int8_t skip_val) {}


 extern "C" GPU_RT_STUB void agg_id_double_shared_slow(int64_t* agg, const double* val) {}


 extern "C" GPU_RT_STUB int64_t agg_sum_shared(int64_t* agg, const int64_t val) {

   return 0;

 }


 extern "C" GPU_RT_STUB int64_t agg_sum_if_shared(int64_t* agg,

                                                  const int64_t val,

                                                  const int8_t cond) {

   return 0;

 }


 extern "C" GPU_RT_STUB int64_t agg_sum_skip_val_shared(int64_t* agg,

                                                        const int64_t val,

                                                        const int64_t skip_val) {

   return 0;

 }


 extern "C" GPU_RT_STUB int64_t agg_sum_if_skip_val_shared(int64_t* agg,

                                                           const int64_t val,

                                                           const int64_t skip_val,

                                                           const int8_t cond) {

   return 0;

 }

 extern "C" GPU_RT_STUB int32_t agg_sum_int32_shared(int32_t* agg, const int32_t val) {

   return 0;

 }


 extern "C" GPU_RT_STUB int32_t agg_sum_int32_skip_val_shared(int32_t* agg,

                                                              const int32_t val,

                                                              const int32_t skip_val) {

   return 0;

 }


 extern "C" GPU_RT_STUB void agg_sum_double_shared(int64_t* agg, const double val) {}


 extern "C" GPU_RT_STUB void agg_sum_double_skip_val_shared(int64_t* agg,

                                                            const double val,

                                                            const double skip_val) {}

 extern "C" GPU_RT_STUB void agg_sum_float_shared(int32_t* agg, const float val) {}


 extern "C" GPU_RT_STUB void agg_sum_float_skip_val_shared(int32_t* agg,

                                                           const float val,

                                                           const float skip_val) {}


 extern "C" GPU_RT_STUB int32_t agg_sum_if_int32_shared(int32_t* agg,

                                                        const int32_t val,

                                                        const int8_t cond) {

   return 0;

 }


 extern "C" GPU_RT_STUB int32_t agg_sum_if_int32_skip_val_shared(int32_t* agg,

                                                                 const int32_t val,

                                                                 const int32_t skip_val,

                                                                 const int8_t cond) {

   return 0;

 }


 extern "C" GPU_RT_STUB void agg_sum_if_double_shared(int64_t* agg,

                                                      const double val,

                                                      const int8_t cond) {}


 extern "C" GPU_RT_STUB void agg_sum_if_double_skip_val_shared(int64_t* agg,

                                                               const double val,

                                                               const double skip_val,

                                                               const int8_t cond) {}

 extern "C" GPU_RT_STUB void agg_sum_if_float_shared(int32_t* agg,

                                                     const float val,

                                                     const int8_t cond) {}


 extern "C" GPU_RT_STUB void agg_sum_if_float_skip_val_shared(int32_t* agg,

                                                              const float val,

                                                              const float skip_val,

                                                              const int8_t cond) {}


 extern "C" GPU_RT_STUB void force_sync() {}


 extern "C" GPU_RT_STUB void sync_warp() {}

 extern "C" GPU_RT_STUB void sync_warp_protected(int64_t thread_pos, int64_t row_count) {}

 extern "C" GPU_RT_STUB void sync_threadblock() {}


 extern "C" GPU_RT_STUB void write_back_non_grouped_agg(int64_t* input_buffer,

                                                        int64_t* output_buffer,

                                                        const int32_t num_agg_cols){};

 // x64 stride functions


 extern "C" RUNTIME_EXPORT NEVER_INLINE int32_t

 pos_start_impl(int32_t const* row_index_resume) {

   return row_index_resume ? *row_index_resume : 0;

 }


 extern "C" RUNTIME_EXPORT NEVER_INLINE int32_t group_buff_idx_impl() {

   return pos_start_impl(nullptr);

 }


 extern "C" RUNTIME_EXPORT NEVER_INLINE int32_t pos_step_impl() {

   return 1;

 }


 extern "C" GPU_RT_STUB int8_t thread_warp_idx(const int8_t warp_sz) {

   return 0;

 }


 extern "C" GPU_RT_STUB int64_t get_thread_index() {

   return 0;

 }


 extern "C" GPU_RT_STUB int64_t* declare_dynamic_shared_memory() {

   return nullptr;

 }


 extern "C" GPU_RT_STUB int64_t get_block_index() {

   return 0;

 }


 #undef GPU_RT_STUB


 extern "C" RUNTIME_EXPORT ALWAYS_INLINE void record_error_code(const int32_t err_code,

                                                                int32_t* error_codes) {

   // NB: never override persistent error codes (with code greater than zero).

   // On GPU, a projection query with a limit can run out of slots without it

   // being an actual error if the limit has been hit. If a persistent error

   // (division by zero, for example) occurs before running out of slots, we

   // have to avoid overriding it, because there's a risk that the query would

   // go through if we override with a potentially benign out-of-slots code.

   if (err_code && error_codes[pos_start_impl(nullptr)] <= 0) {

     error_codes[pos_start_impl(nullptr)] = err_code;

   }

 }


 // error_codes points to an array on GPU, but a single value on CPU.

 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int32_t get_error_code(int32_t* error_codes) {

   return error_codes[pos_start_impl(nullptr)];

 }


 // group by helpers


 extern "C" RUNTIME_EXPORT NEVER_INLINE const int64_t* init_shared_mem_nop(

     const int64_t* groups_buffer,

     const int32_t groups_buffer_size) {

   return groups_buffer;

 }


 extern "C" RUNTIME_EXPORT NEVER_INLINE void write_back_nop(int64_t* dest,

                                                            int64_t* src,

                                                            const int32_t sz) {

 #ifndef _WIN32

   // the body is not really needed, just make sure the call is not optimized away

   assert(dest);

 #endif

 }


 extern "C" RUNTIME_EXPORT int64_t* init_shared_mem(const int64_t* global_groups_buffer,

                                                    const int32_t groups_buffer_size) {

   return nullptr;

 }


 extern "C" RUNTIME_EXPORT NEVER_INLINE void init_group_by_buffer_gpu(

     int64_t* groups_buffer,

     const int64_t* init_vals,

     const uint32_t groups_buffer_entry_count,

     const uint32_t key_qw_count,

     const uint32_t agg_col_count,

     const bool keyless,

     const int8_t warp_size) {

 #ifndef _WIN32

   // the body is not really needed, just make sure the call is not optimized away

   assert(groups_buffer);

 #endif

 }


 extern "C" RUNTIME_EXPORT NEVER_INLINE void init_columnar_group_by_buffer_gpu(

     int64_t* groups_buffer,

     const int64_t* init_vals,

     const uint32_t groups_buffer_entry_count,

     const uint32_t key_qw_count,

     const uint32_t agg_col_count,

     const bool keyless,

     const bool blocks_share_memory,

     const int32_t frag_idx) {

 #ifndef _WIN32

   // the body is not really needed, just make sure the call is not optimized away

   assert(groups_buffer);

 #endif

 }


 extern "C" RUNTIME_EXPORT NEVER_INLINE void init_group_by_buffer_impl(

     int64_t* groups_buffer,

     const int64_t* init_vals,

     const uint32_t groups_buffer_entry_count,

     const uint32_t key_qw_count,

     const uint32_t agg_col_count,

     const bool keyless,

     const int8_t warp_size) {

 #ifndef _WIN32

   // the body is not really needed, just make sure the call is not optimized away

   assert(groups_buffer);

 #endif

 }


 template <typename T>

 ALWAYS_INLINE int64_t* get_matching_group_value(int64_t* groups_buffer,

                                                 const uint32_t h,

                                                 const T* key,

                                                 const uint32_t key_count,

                                                 const uint32_t row_size_quad) {

   auto off = h * row_size_quad;

   auto row_ptr = reinterpret_cast<T*>(groups_buffer + off);

   if (*row_ptr == get_empty_key<T>()) {

     memcpy(row_ptr, key, key_count * sizeof(T));

     auto row_ptr_i8 = reinterpret_cast<int8_t*>(row_ptr + key_count);

     return reinterpret_cast<int64_t*>(align_to_int64(row_ptr_i8));

   }

   if (memcmp(row_ptr, key, key_count * sizeof(T)) == 0) {

     auto row_ptr_i8 = reinterpret_cast<int8_t*>(row_ptr + key_count);

     return reinterpret_cast<int64_t*>(align_to_int64(row_ptr_i8));

   }

   return nullptr;

 }


 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int64_t* get_matching_group_value(

     int64_t* groups_buffer,

     const uint32_t h,

     const int64_t* key,

     const uint32_t key_count,

     const uint32_t key_width,

     const uint32_t row_size_quad) {

   switch (key_width) {

     case 4:

       return get_matching_group_value(groups_buffer,

                                       h,

                                       reinterpret_cast<const int32_t*>(key),

                                       key_count,

                                       row_size_quad);

     case 8:

       return get_matching_group_value(groups_buffer, h, key, key_count, row_size_quad);

     default:;

   }

   return nullptr;

 }


 template <typename T>

 ALWAYS_INLINE int32_t get_matching_group_value_columnar_slot(int64_t* groups_buffer,

                                                              const uint32_t entry_count,

                                                              const uint32_t h,

                                                              const T* key,

                                                              const uint32_t key_count) {

   auto off = h;

   auto key_buffer = reinterpret_cast<T*>(groups_buffer);

   if (key_buffer[off] == get_empty_key<T>()) {

     for (size_t i = 0; i < key_count; ++i) {

       key_buffer[off] = key[i];

       off += entry_count;

     }

     return h;

   }

   off = h;

   for (size_t i = 0; i < key_count; ++i) {

     if (key_buffer[off] != key[i]) {

       return -1;

     }

     off += entry_count;

   }

   return h;

 }


 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int32_t

 get_matching_group_value_columnar_slot(int64_t* groups_buffer,

                                        const uint32_t entry_count,

                                        const uint32_t h,

                                        const int64_t* key,

                                        const uint32_t key_count,

                                        const uint32_t key_width) {

   switch (key_width) {

     case 4:

       return get_matching_group_value_columnar_slot(groups_buffer,

                                                     entry_count,

                                                     h,

                                                     reinterpret_cast<const int32_t*>(key),

                                                     key_count);

     case 8:

       return get_matching_group_value_columnar_slot(

           groups_buffer, entry_count, h, key, key_count);

     default:

       return -1;

   }

   return -1;

 }


 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int64_t* get_matching_group_value_columnar(

     int64_t* groups_buffer,

     const uint32_t h,

     const int64_t* key,

     const uint32_t key_qw_count,

     const size_t entry_count) {

   auto off = h;

   if (groups_buffer[off] == EMPTY_KEY_64) {

     for (size_t i = 0; i < key_qw_count; ++i) {

       groups_buffer[off] = key[i];

       off += entry_count;

     }

     return &groups_buffer[off];

   }

   off = h;

   for (size_t i = 0; i < key_qw_count; ++i) {

     if (groups_buffer[off] != key[i]) {

       return nullptr;

     }

     off += entry_count;

   }

   return &groups_buffer[off];

 }


 /*

  * For a particular hashed_index, returns the row-wise offset

  * to the first matching agg column in memory.

  * It also checks the corresponding group column, and initialize all

  * available keys if they are not empty (it is assumed all group columns are

  * 64-bit wide).

  *

  * Memory layout:

  *

  * | prepended group columns (64-bit each) | agg columns |

  */

 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int64_t* get_matching_group_value_perfect_hash(

     int64_t* groups_buffer,

     const uint32_t hashed_index,

     const int64_t* key,

     const uint32_t key_count,

     const uint32_t row_size_quad) {

   uint32_t off = hashed_index * row_size_quad;

   if (groups_buffer[off] == EMPTY_KEY_64) {

     for (uint32_t i = 0; i < key_count; ++i) {

       groups_buffer[off + i] = key[i];

     }

   }

   return groups_buffer + off + key_count;

 }


 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int64_t*

 get_matching_group_value_perfect_hash_keyless(int64_t* groups_buffer,

                                               const uint32_t hashed_index,

                                               const uint32_t row_size_quad) {

   return groups_buffer + row_size_quad * hashed_index;

 }


 /*

  * For a particular hashed_index, find and initialize (if necessary) all the group

  * columns corresponding to a key. It is assumed that all group columns are 64-bit wide.

  */

 extern "C" RUNTIME_EXPORT ALWAYS_INLINE void

 set_matching_group_value_perfect_hash_columnar(int64_t* groups_buffer,

                                                const uint32_t hashed_index,

                                                const int64_t* key,

                                                const uint32_t key_count,

                                                const uint32_t entry_count) {

   if (groups_buffer[hashed_index] == EMPTY_KEY_64) {

     for (uint32_t i = 0; i < key_count; i++) {

       groups_buffer[i * entry_count + hashed_index] = key[i];

     }

   }

 }


 #include "GeoOpsRuntime.cpp"

 #include "GroupByRuntime.cpp"

 #include "JoinHashTable/Runtime/JoinHashTableQueryRuntime.cpp"


 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int64_t* get_group_value_fast_keyless(

     int64_t* groups_buffer,

     const int64_t key,

     const int64_t min_key,

     const int64_t /* bucket */,

     const uint32_t row_size_quad) {

   return groups_buffer + row_size_quad * (key - min_key);

 }


 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int64_t* get_group_value_fast_keyless_semiprivate(

     int64_t* groups_buffer,

     const int64_t key,

     const int64_t min_key,

     const int64_t /* bucket */,

     const uint32_t row_size_quad,

     const uint8_t thread_warp_idx,

     const uint8_t warp_size) {

   return groups_buffer + row_size_quad * (warp_size * (key - min_key) + thread_warp_idx);

 }


 extern "C" RUNTIME_EXPORT ALWAYS_INLINE StringView string_pack(const int8_t* ptr,

                                                                const int32_t len) {

   return {reinterpret_cast<char const*>(ptr), static_cast<uint64_t>(len)};

 }


 #ifdef __clang__

 #include "../Utils/StringLike.cpp"

 #endif


 #ifndef __CUDACC__

 #include "TopKRuntime.cpp"

 #endif


 extern "C" RUNTIME_EXPORT ALWAYS_INLINE DEVICE int32_t

 char_length(const char* str, const int32_t str_len) {

   return str_len;

 }


 extern "C" RUNTIME_EXPORT ALWAYS_INLINE DEVICE int32_t

 char_length_nullable(const char* str, const int32_t str_len, const int32_t int_null) {

   if (!str) {

     return int_null;

   }

   return str_len;

 }


 extern "C" RUNTIME_EXPORT ALWAYS_INLINE DEVICE int32_t

 key_for_string_encoded(const int32_t str_id) {

   return str_id;

 }


 extern "C" ALWAYS_INLINE DEVICE int32_t

 map_string_dict_id(const int32_t string_id,

                    const int64_t translation_map_handle,

                    const int32_t min_source_id) {

   const int32_t* translation_map =

       reinterpret_cast<const int32_t*>(translation_map_handle);

   return translation_map[string_id - min_source_id];

 }


 extern "C" ALWAYS_INLINE DEVICE double tree_model_reg_predict(

     const double* regressor_inputs,

     const int64_t decision_tree_table_handle,

     const int64_t decision_tree_offsets_handle,

     const int32_t num_regressors,

     const int32_t num_trees,

     const bool compute_avg,

     const double null_value) {

   for (int32_t regressor_idx = 0; regressor_idx < num_regressors; ++regressor_idx) {

     if (regressor_inputs[regressor_idx] == null_value) {

       return null_value;

     }

   }

   const DecisionTreeEntry* decision_tree_table =

       reinterpret_cast<const DecisionTreeEntry*>(decision_tree_table_handle);

   const int64_t* decision_tree_offsets =

       reinterpret_cast<const int64_t*>(decision_tree_offsets_handle);

   double sum_tree_results{0};

   for (int32_t tree_idx = 0; tree_idx < num_trees; ++tree_idx) {

     int64_t row_idx = decision_tree_offsets[tree_idx];

     while (true) {

       const DecisionTreeEntry& current_entry = decision_tree_table[row_idx];

       if (!current_entry.isSplitNode()) {

         sum_tree_results += current_entry.value;

         break;

       }

       const auto regressor_input = regressor_inputs[current_entry.feature_index];

       row_idx = regressor_input <= current_entry.value

                     ? current_entry.left_child_row_idx

                     : current_entry.right_child_row_idx;

     }

   }

   return compute_avg ? sum_tree_results / num_trees : sum_tree_results;

 }


 extern "C" RUNTIME_EXPORT ALWAYS_INLINE DEVICE bool sample_ratio(

     const double proportion,

     const int64_t row_offset) {

   const int64_t threshold = 4294967296 * proportion;

   return (row_offset * 2654435761) % 4294967296 < threshold;

 }


 extern "C" RUNTIME_EXPORT ALWAYS_INLINE DEVICE int32_t

 width_bucket(const double target_value,

              const double lower_bound,

              const double upper_bound,

              const double scale_factor,

              const int32_t partition_count) {

   if (target_value < lower_bound) {

     return 0;

   } else if (target_value >= upper_bound) {

     return partition_count + 1;

   }

   return ((target_value - lower_bound) * scale_factor) + 1;

 }


 extern "C" RUNTIME_EXPORT ALWAYS_INLINE DEVICE int32_t

 width_bucket_reversed(const double target_value,

                       const double lower_bound,

                       const double upper_bound,

                       const double scale_factor,

                       const int32_t partition_count) {

   if (target_value > lower_bound) {

     return 0;

   } else if (target_value <= upper_bound) {

     return partition_count + 1;

   }

   return ((lower_bound - target_value) * scale_factor) + 1;

 }


 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int32_t

 width_bucket_nullable(const double target_value,

                       const double lower_bound,

                       const double upper_bound,

                       const double scale_factor,

                       const int32_t partition_count,

                       const double null_val) {

   if (target_value == null_val) {

     return INT32_MIN;

   }

   return width_bucket(

       target_value, lower_bound, upper_bound, scale_factor, partition_count);

 }


 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int32_t

 width_bucket_reversed_nullable(const double target_value,

                                const double lower_bound,

                                const double upper_bound,

                                const double scale_factor,

                                const int32_t partition_count,

                                const double null_val) {

   if (target_value == null_val) {

     return INT32_MIN;

   }

   return width_bucket_reversed(

       target_value, lower_bound, upper_bound, scale_factor, partition_count);

 }


 // width_bucket with no out-of-bound check version which can be called

 // if we can assure the input target_value expr always resides in the valid range

 // (so we can also avoid null checking)

 extern "C" RUNTIME_EXPORT ALWAYS_INLINE DEVICE int32_t

 width_bucket_no_oob_check(const double target_value,

                           const double lower_bound,

                           const double scale_factor) {

   int32_t calc = (target_value - lower_bound) * scale_factor;

   return calc + 1;

 }


 extern "C" RUNTIME_EXPORT ALWAYS_INLINE DEVICE int32_t

 width_bucket_reversed_no_oob_check(const double target_value,

                                    const double lower_bound,

                                    const double scale_factor) {

   int32_t calc = (lower_bound - target_value) * scale_factor;

   return calc + 1;

 }


 extern "C" RUNTIME_EXPORT ALWAYS_INLINE DEVICE int32_t

 width_bucket_expr(const double target_value,

                   const bool reversed,

                   const double lower_bound,

                   const double upper_bound,

                   const int32_t partition_count) {

   if (reversed) {

     return width_bucket_reversed(target_value,

                                  lower_bound,

                                  upper_bound,

                                  partition_count / (lower_bound - upper_bound),

                                  partition_count);

   }

   return width_bucket(target_value,

                       lower_bound,

                       upper_bound,

                       partition_count / (upper_bound - lower_bound),

                       partition_count);

 }


 extern "C" RUNTIME_EXPORT ALWAYS_INLINE DEVICE int32_t

 width_bucket_expr_nullable(const double target_value,

                            const bool reversed,

                            const double lower_bound,

                            const double upper_bound,

                            const int32_t partition_count,

                            const double null_val) {

   if (target_value == null_val) {

     return INT32_MIN;

   }

   return width_bucket_expr(

       target_value, reversed, lower_bound, upper_bound, partition_count);

 }


 extern "C" RUNTIME_EXPORT ALWAYS_INLINE DEVICE int32_t

 width_bucket_expr_no_oob_check(const double target_value,

                                const bool reversed,

                                const double lower_bound,

                                const double upper_bound,

                                const int32_t partition_count) {

   if (reversed) {

     return width_bucket_reversed_no_oob_check(

         target_value, lower_bound, partition_count / (lower_bound - upper_bound));

   }

   return width_bucket_no_oob_check(

       target_value, lower_bound, partition_count / (upper_bound - lower_bound));

 }


 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int64_t

 row_number_window_func(const int64_t output_buff, const int64_t pos) {

   return reinterpret_cast<const int64_t*>(output_buff)[pos];

 }


 extern "C" RUNTIME_EXPORT ALWAYS_INLINE double percent_window_func(

     const int64_t output_buff,

     const int64_t pos) {

   return reinterpret_cast<const double*>(output_buff)[pos];

 }


 extern "C" RUNTIME_EXPORT ALWAYS_INLINE double load_double(const int64_t* agg) {

   return *reinterpret_cast<const double*>(may_alias_ptr(agg));

 }


 extern "C" RUNTIME_EXPORT ALWAYS_INLINE float load_float(const int32_t* agg) {

   return *reinterpret_cast<const float*>(may_alias_ptr(agg));

 }


 extern "C" RUNTIME_EXPORT ALWAYS_INLINE double load_avg_int(const int64_t* sum,

                                                             const int64_t* count,

                                                             const double null_val) {

   return *count != 0 ? static_cast<double>(*sum) / *count : null_val;

 }


 extern "C" RUNTIME_EXPORT ALWAYS_INLINE double load_avg_decimal(const int64_t* sum,

                                                                 const int64_t* count,

                                                                 const double null_val,

                                                                 const uint32_t scale) {

   return *count != 0 ? (static_cast<double>(*sum) / pow(10, scale)) / *count : null_val;

 }


 extern "C" RUNTIME_EXPORT ALWAYS_INLINE double load_avg_double(const int64_t* agg,

                                                                const int64_t* count,

                                                                const double null_val) {

   return *count != 0 ? *reinterpret_cast<const double*>(may_alias_ptr(agg)) / *count

                      : null_val;

 }


 extern "C" RUNTIME_EXPORT ALWAYS_INLINE double load_avg_float(const int32_t* agg,

                                                               const int32_t* count,

                                                               const double null_val) {

   return *count != 0 ? *reinterpret_cast<const float*>(may_alias_ptr(agg)) / *count

                      : null_val;

 }


 extern "C" RUNTIME_EXPORT NEVER_INLINE void linear_probabilistic_count(

     uint8_t* bitmap,

     const uint32_t bitmap_bytes,

     const uint8_t* key_bytes,

     const uint32_t key_len) {

   const uint32_t bit_pos = MurmurHash3(key_bytes, key_len, 0) % (bitmap_bytes * 8);

   const uint32_t word_idx = bit_pos / 32;

   const uint32_t bit_idx = bit_pos % 32;

   reinterpret_cast<uint32_t*>(bitmap)[word_idx] |= 1 << bit_idx;

 }


 // First 3 parameters are output, the rest are input.

 extern "C" RUNTIME_EXPORT NEVER_INLINE void query_stub_hoisted_literals(

     int32_t* error_codes,

     int32_t* total_matched,

     int64_t** out,

     const uint32_t frag_idx,

     const uint32_t* row_index_resume,

     const int8_t** col_buffers,

     const int8_t* literals,

     const int64_t* num_rows,

     const uint64_t* frag_row_offsets,

     const int32_t* max_matched,

     const int64_t* init_agg_value,

     const int64_t* join_hash_tables,

     const int8_t* row_func_mgr) {

 #ifndef _WIN32

   assert(error_codes || total_matched || out || frag_idx || row_index_resume ||

          col_buffers || literals || num_rows || frag_row_offsets || max_matched ||

          init_agg_value || join_hash_tables || row_func_mgr);

 #endif

 }


 // First 3 parameters are output, the rest are input.

 extern "C" RUNTIME_EXPORT void multifrag_query_hoisted_literals(

     int32_t* error_codes,

     int32_t* total_matched,

     int64_t** out,

     const uint32_t* num_fragments_ptr,

     const uint32_t* num_tables_ptr,

     const uint32_t* row_index_resume,  // aka start_rowid

     const int8_t*** col_buffers,

     const int8_t* literals,

     const int64_t* num_rows,

     const uint64_t* frag_row_offsets,

     const int32_t* max_matched,

     const int64_t* init_agg_value,

     const int64_t* join_hash_tables,

     const int8_t* row_func_mgr) {

   uint32_t const num_fragments = *num_fragments_ptr;

   uint32_t const num_tables = *num_tables_ptr;

   // num_fragments_ptr and num_tables_ptr are replaced by frag_idx when passed below.

   for (uint32_t frag_idx = 0;

        frag_idx < num_fragments && get_error_code(error_codes) == 0;

        ++frag_idx) {

     query_stub_hoisted_literals(error_codes,

                                 total_matched,

                                 out,

                                 frag_idx,

                                 row_index_resume,

                                 col_buffers ? col_buffers[frag_idx] : nullptr,

                                 literals,

                                 &num_rows[frag_idx * num_tables],

                                 &frag_row_offsets[frag_idx * num_tables],

                                 max_matched,

                                 init_agg_value,

                                 join_hash_tables,

                                 row_func_mgr);

   }

 }


 // First 3 parameters are output, the rest are input.

 extern "C" RUNTIME_EXPORT NEVER_INLINE void query_stub(int32_t* error_codes,

                                                        int32_t* total_matched,

                                                        int64_t** out,

                                                        const uint32_t frag_idx,

                                                        const uint32_t* row_index_resume,

                                                        const int8_t** col_buffers,

                                                        const int64_t* num_rows,

                                                        const uint64_t* frag_row_offsets,

                                                        const int32_t* max_matched,

                                                        const int64_t* init_agg_value,

                                                        const int64_t* join_hash_tables,

                                                        const int8_t* row_func_mgr) {

 #ifndef _WIN32

   assert(error_codes || total_matched || out || frag_idx || row_index_resume ||

          col_buffers || num_rows || frag_row_offsets || max_matched || init_agg_value ||

          join_hash_tables || row_func_mgr);

 #endif

 }


 // First 3 parameters are output, the rest are input.

 extern "C" RUNTIME_EXPORT void multifrag_query(int32_t* error_codes,

                                                int32_t* total_matched,

                                                int64_t** out,

                                                const uint32_t* num_fragments_ptr,

                                                const uint32_t* num_tables_ptr,

                                                const uint32_t* row_index_resume,

                                                const int8_t*** col_buffers,

                                                const int64_t* num_rows,

                                                const uint64_t* frag_row_offsets,

                                                const int32_t* max_matched,

                                                const int64_t* init_agg_value,

                                                const int64_t* join_hash_tables,

                                                const int8_t* row_func_mgr) {

   uint32_t const num_fragments = *num_fragments_ptr;

   uint32_t const num_tables = *num_tables_ptr;

   // num_fragments_ptr and num_tables_ptr are replaced by frag_idx when passed below.

   for (uint32_t frag_idx = 0;

        frag_idx < num_fragments && get_error_code(error_codes) == 0;

        ++frag_idx) {

     query_stub(error_codes,

                total_matched,

                out,

                frag_idx,

                row_index_resume,

                col_buffers ? col_buffers[frag_idx] : nullptr,

                &num_rows[frag_idx * num_tables],

                &frag_row_offsets[frag_idx * num_tables],

                max_matched,

                init_agg_value,

                join_hash_tables,

                row_func_mgr);

   }

 }


 // WARNING: Don't add #include "Shared/InlineNullValues.h" to this file.

 // It may build fine, but during runtime results in

 // CUDA_ERROR_INVALID_PTX (218): a PTX JIT compilation failed: ptxas application ptx

 // input, line 10; fatal   : Parsing error near '.globl': syntax error


 // See spatial_type::Codegen::pointIsNullFunctionName() for selecting

 // which of the following two functions to use to determine point IS NULL.

 extern "C" RUNTIME_EXPORT ALWAYS_INLINE DEVICE bool point_int32_is_null(int32_t* point) {

   constexpr uint32_t null_array_compressed_32 = 0x80000000U;  // Shared/InlineNullValues.h

   return point == nullptr || uint32_t(*point) == null_array_compressed_32;

 }


 extern "C" RUNTIME_EXPORT ALWAYS_INLINE DEVICE bool point_double_is_null(double* point) {

   constexpr double null_array_double = 2 * DBL_MIN;  // Shared/InlineNullValues.h

   return point == nullptr || *point == null_array_double;

 }


 extern "C" RUNTIME_EXPORT ALWAYS_INLINE DEVICE bool check_interrupt() {

   if (check_interrupt_init(static_cast<unsigned>(INT_CHECK))) {

     return true;

   }

   return false;

 }


 extern "C" RUNTIME_EXPORT bool check_interrupt_init(unsigned command) {

   static std::atomic_bool runtime_interrupt_flag{false};


   if (command == static_cast<unsigned>(INT_CHECK)) {

     if (runtime_interrupt_flag.load()) {

       return true;

     }

     return false;

   }

   if (command == static_cast<unsigned>(INT_ABORT)) {

     runtime_interrupt_flag.store(true);

     return false;

   }

   if (command == static_cast<unsigned>(INT_RESET)) {

     runtime_interrupt_flag.store(false);

     return false;

   }

   return false;

 }

gpu_enabled::upper_bound
DEVICE auto upper_bound(ARGS &&...args)
Definition: gpu_enabled.h:123

sync_warp_protected
__device__ void sync_warp_protected(int64_t thread_pos, int64_t row_count)
Definition: cuda_mapd_rt.cu:1375

heavydb.dtypes.T
T
Definition: dtypes.py:8

query_stub_hoisted_literals
RUNTIME_EXPORT NEVER_INLINE void query_stub_hoisted_literals(int32_t *error_codes, int32_t *total_matched, int64_t **out, const uint32_t frag_idx, const uint32_t *row_index_resume, const int8_t **col_buffers, const int8_t *literals, const int64_t *num_rows, const uint64_t *frag_row_offsets, const int32_t *max_matched, const int64_t *init_agg_value, const int64_t *join_hash_tables, const int8_t *row_func_mgr)
Definition: RuntimeFunctions.cpp:2385

agg_min_int8
RUNTIME_EXPORT void agg_min_int8(int8_t *agg, const int8_t val)

encode_date
RUNTIME_EXPORT ALWAYS_INLINE int64_t encode_date(int64_t decoded_val, int64_t null_val, int64_t multiplier)
Definition: RuntimeFunctions.cpp:756

DecodersImpl.h

agg_sum_if_int32_skip_val
RUNTIME_EXPORT ALWAYS_INLINE int32_t agg_sum_if_int32_skip_val(int32_t *agg, const int32_t val, const int32_t skip_val, const int8_t cond)
Definition: RuntimeFunctions.cpp:1327

compute_row_mode_start_index_sub
RUNTIME_EXPORT ALWAYS_INLINE int64_t compute_row_mode_start_index_sub(int64_t candidate_index, int64_t current_partition_start_offset, int64_t frame_bound)
Definition: RuntimeFunctions.cpp:763

DecisionTreeEntry::value
double value
Definition: DecisionTreeEntry.h:20

DEF_UMINUS_NULLABLE
#define DEF_UMINUS_NULLABLE(type, null_type)
Definition: RuntimeFunctions.cpp:247

checked_single_agg_id_int32_shared
GPU_RT_STUB int32_t checked_single_agg_id_int32_shared(int32_t *agg, const int32_t val, const int32_t null_val)
Definition: RuntimeFunctions.cpp:1683

DEF_CHECKED_SINGLE_AGG_ID_INT
#define DEF_CHECKED_SINGLE_AGG_ID_INT(n)
Definition: RuntimeFunctions.cpp:1251

agg_id_varlen
RUNTIME_EXPORT ALWAYS_INLINE int8_t * agg_id_varlen(int8_t *varlen_buffer, const int64_t offset, const int8_t *value, const int64_t size_bytes)
Definition: RuntimeFunctions.cpp:1152

agg_max_int32
RUNTIME_EXPORT void agg_max_int32(int32_t *agg, const int32_t val)

scale_decimal_down_not_nullable
RUNTIME_EXPORT ALWAYS_INLINE int64_t scale_decimal_down_not_nullable(const int64_t operand, const int64_t scale, const int64_t null_val)
Definition: RuntimeFunctions.cpp:223

multifrag_query
RUNTIME_EXPORT void multifrag_query(int32_t *error_codes, int32_t *total_matched, int64_t **out, const uint32_t *num_fragments_ptr, const uint32_t *num_tables_ptr, const uint32_t *row_index_resume, const int8_t ***col_buffers, const int64_t *num_rows, const uint64_t *frag_row_offsets, const int32_t *max_matched, const int64_t *init_agg_value, const int64_t *join_hash_tables, const int8_t *row_func_mgr)
Definition: RuntimeFunctions.cpp:2465

get_matching_group_value_columnar
__device__ int64_t * get_matching_group_value_columnar(int64_t *groups_buffer, const uint32_t h, const int64_t *key, const uint32_t key_qw_count, const size_t entry_count)
Definition: cuda_mapd_rt.cu:296

EMPTY_KEY_64
#define EMPTY_KEY_64
Definition: GpuRtConstants.h:28

get_matching_group_value_perfect_hash
RUNTIME_EXPORT ALWAYS_INLINE int64_t * get_matching_group_value_perfect_hash(int64_t *groups_buffer, const uint32_t hashed_index, const int64_t *key, const uint32_t key_count, const uint32_t row_size_quad)
Definition: RuntimeFunctions.cpp:2053

DecisionTreeEntry::left_child_row_idx
int64_t left_child_row_idx
Definition: DecisionTreeEntry.h:22

agg_count_if
RUNTIME_EXPORT ALWAYS_INLINE uint64_t agg_count_if(uint64_t *agg, const int64_t cond)
Definition: RuntimeFunctions.cpp:1334

write_back_nop
__device__ void write_back_nop(int64_t *dest, int64_t *src, const int32_t sz)
Definition: cuda_mapd_rt.cu:49

agg_min_int16
RUNTIME_EXPORT void agg_min_int16(int16_t *agg, const int16_t val)

string_pack
RUNTIME_EXPORT ALWAYS_INLINE StringView string_pack(const int8_t *ptr, const int32_t len)
Definition: RuntimeFunctions.cpp:2122

GPU_RT_STUB
#define GPU_RT_STUB
Definition: RuntimeFunctions.cpp:381

agg_sum_float_skip_val_shared
__device__ void agg_sum_float_skip_val_shared(int32_t *agg, const float val, const float skip_val)
Definition: cuda_mapd_rt.cu:1104

DecisionTreeEntry::feature_index
int64_t feature_index
Definition: DecisionTreeEntry.h:21

floor_div_nullable_lhs
RUNTIME_EXPORT ALWAYS_INLINE int64_t floor_div_nullable_lhs(const int64_t dividend, const int64_t divisor, const int64_t null_val)
Definition: RuntimeFunctions.cpp:241

MurmurHash.h

DEF_COMPUTE_CURRENT_ROW_IDX_IN_FRAME_ALL_TYPES
#define DEF_COMPUTE_CURRENT_ROW_IDX_IN_FRAME_ALL_TYPES(oper_name)
Definition: RuntimeFunctions.cpp:524

agg_max_int16
RUNTIME_EXPORT void agg_max_int16(int16_t *agg, const int16_t val)

point_int32_is_null
RUNTIME_EXPORT ALWAYS_INLINE DEVICE bool point_int32_is_null(int32_t *point)
Definition: RuntimeFunctions.cpp:2506

DEF_CAST_NULLABLE_BIDIR
#define DEF_CAST_NULLABLE_BIDIR(type1, type2)
Definition: RuntimeFunctions.cpp:279

get_group_value_fast_keyless
RUNTIME_EXPORT ALWAYS_INLINE int64_t * get_group_value_fast_keyless(int64_t *groups_buffer, const int64_t key, const int64_t min_key, const int64_t, const uint32_t row_size_quad)
Definition: RuntimeFunctions.cpp:2102

agg_count_int32_skip_val
RUNTIME_EXPORT ALWAYS_INLINE uint32_t agg_count_int32_skip_val(uint32_t *agg, const int32_t val, const int32_t skip_val)
Definition: RuntimeFunctions.cpp:1356

query_stub
RUNTIME_EXPORT NEVER_INLINE void query_stub(int32_t *error_codes, int32_t *total_matched, int64_t **out, const uint32_t frag_idx, const uint32_t *row_index_resume, const int8_t **col_buffers, const int64_t *num_rows, const uint64_t *frag_row_offsets, const int32_t *max_matched, const int64_t *init_agg_value, const int64_t *join_hash_tables, const int8_t *row_func_mgr)
Definition: RuntimeFunctions.cpp:2445

DecisionTreeEntry.h

INT_RESET
Definition: RuntimeFunctions.h:174

agg_approximate_count_distinct
RUNTIME_EXPORT NEVER_INLINE void agg_approximate_count_distinct(int64_t *agg, const int64_t key, const uint32_t b)
Definition: RuntimeFunctions.cpp:394

get_rank
FORCE_INLINE uint8_t get_rank(uint64_t x, uint32_t b)
Definition: HyperLogLogRank.h:30

run_benchmark_import.res
tuple res
Definition: run_benchmark_import.py:381

thread_warp_idx
__device__ int8_t thread_warp_idx(const int8_t warp_sz)
Definition: cuda_mapd_rt.cu:39

init_group_by_buffer_gpu
__global__ void init_group_by_buffer_gpu(int64_t *groups_buffer, const int64_t *init_vals, const uint32_t groups_buffer_entry_count, const uint32_t key_count, const uint32_t key_width, const uint32_t row_size_quad, const bool keyless, const int8_t warp_size)
Definition: GpuInitGroups.cu:125

DEF_CAST_NULLABLE
#define DEF_CAST_NULLABLE(from_type, to_type)
Definition: RuntimeFunctions.cpp:262

agg_max_double
RUNTIME_EXPORT ALWAYS_INLINE void agg_max_double(int64_t *agg, const double val)
Definition: RuntimeFunctions.cpp:1436

agg_sum_if
RUNTIME_EXPORT ALWAYS_INLINE int64_t agg_sum_if(int64_t *agg, const int64_t val, const int8_t cond)
Definition: RuntimeFunctions.cpp:1134

agg_max
RUNTIME_EXPORT ALWAYS_INLINE void agg_max(int64_t *agg, const int64_t val)
Definition: RuntimeFunctions.cpp:1140

agg_count_skip_val
RUNTIME_EXPORT ALWAYS_INLINE uint64_t agg_count_skip_val(uint64_t *agg, const int64_t val, const int64_t skip_val)
Definition: RuntimeFunctions.cpp:1340

get_thread_index
__device__ int64_t get_thread_index()
Definition: cuda_mapd_rt.cu:19

MurmurHash64A
RUNTIME_EXPORT NEVER_INLINE DEVICE uint64_t MurmurHash64A(const void *key, int len, uint64_t seed)
Definition: MurmurHash.cpp:27

agg_sum_if_double_skip_val_shared
__device__ void agg_sum_if_double_skip_val_shared(int64_t *agg, const double val, const double skip_val, const int8_t cond)
Definition: cuda_mapd_rt.cu:1139

agg_min_float
RUNTIME_EXPORT ALWAYS_INLINE void agg_min_float(int32_t *agg, const float val)
Definition: RuntimeFunctions.cpp:1495

pos_step_impl
__device__ int32_t pos_step_impl()
Definition: cuda_mapd_rt.cu:35

INT_CHECK
Definition: RuntimeFunctions.h:174

write_back_non_grouped_agg
__device__ void write_back_non_grouped_agg(int64_t *input_buffer, int64_t *output_buffer, const int32_t agg_idx)
Definition: cuda_mapd_rt.cu:1395

quantile.h
Calculate approximate median and general quantiles, based on &quot;Computing Extremely Accurate Quantiles ...

load_avg_int
RUNTIME_EXPORT ALWAYS_INLINE double load_avg_int(const int64_t *sum, const int64_t *count, const double null_val)
Definition: RuntimeFunctions.cpp:2346

TopKRuntime.cpp
Structures and runtime functions of streaming top-k heap.

checked_single_agg_id_double_shared
__device__ int32_t checked_single_agg_id_double_shared(int64_t *agg, const double val, const double null_val)
Definition: cuda_mapd_rt.cu:778

init_shared_mem_nop
__device__ const int64_t * init_shared_mem_nop(const int64_t *groups_buffer, const int32_t groups_buffer_size)
Definition: cuda_mapd_rt.cu:43

checked_single_agg_id
RUNTIME_EXPORT ALWAYS_INLINE int32_t checked_single_agg_id(int64_t *agg, const int64_t val, const int64_t null_val)
Definition: RuntimeFunctions.cpp:1163

agg_sum_if_float_shared
__device__ void agg_sum_if_float_shared(int32_t *agg, const float val, const int8_t cond)
Definition: cuda_mapd_rt.cu:491

DEF_ARITH_NULLABLE_RHS
#define DEF_ARITH_NULLABLE_RHS(type, null_type, opname, opsym)
Definition: RuntimeFunctions.cpp:64

TypePunning.h

DEF_AGG_MAX_INT
#define DEF_AGG_MAX_INT(n)
Definition: RuntimeFunctions.cpp:1223

Datum.h
Definitions for core Datum union type.

get_integer_aggregation_tree
RUNTIME_EXPORT ALWAYS_INLINE int64_t * get_integer_aggregation_tree(int64_t **aggregation_trees, size_t partition_idx)
Definition: RuntimeFunctions.cpp:796

checked_single_agg_id_float_shared
__device__ int32_t checked_single_agg_id_float_shared(int32_t *agg, const float val, const float null_val)
Definition: cuda_mapd_rt.cu:842

compute_upper_bound_from_ordered_partition_index
int64_t compute_upper_bound_from_ordered_partition_index(const int64_t num_elems, const TARGET_VAL_TYPE target_val, const COL_TYPE *col_buf, const int32_t *partition_rowid_buf, const int64_t *ordered_index_buf, const NULL_TYPE null_val, const bool nulls_first, const int64_t null_start_offset, const int64_t null_end_offset)
Definition: RuntimeFunctions.cpp:620

anonymous_namespace{RuntimeFunctions.cpp}::agg_func
AGG_TYPE agg_func(AGG_TYPE const lhs, AGG_TYPE const rhs)
Definition: RuntimeFunctions.cpp:835

BufferCompaction.h
Macros and functions for groupby buffer compaction.

get_matching_group_value
__device__ int64_t * get_matching_group_value(int64_t *groups_buffer, const uint32_t h, const T *key, const uint32_t key_count, const uint32_t row_size_quad)
Definition: cuda_mapd_rt.cu:174

logical_and
RUNTIME_EXPORT ALWAYS_INLINE int8_t logical_and(const int8_t lhs, const int8_t rhs, const int8_t null_val)
Definition: RuntimeFunctions.cpp:336

DEF_CAST_SCALED_NULLABLE
#define DEF_CAST_SCALED_NULLABLE(from_type, to_type)
Definition: RuntimeFunctions.cpp:270

agg_count_distinct_bitmap
RUNTIME_EXPORT ALWAYS_INLINE void agg_count_distinct_bitmap(int64_t *agg, const int64_t val, const int64_t min_val, const int64_t bucket_size)
Definition: RuntimeFunctions.cpp:366

fill_missing_value
T fill_missing_value(int64_t const cur_idx, T const null_val, T *const col_buf, int64_t const num_elems_in_partition, int32_t *const partition_rowid_buf, int64_t *const ordered_index_buf, bool const is_forward_fill)
Definition: RuntimeFunctions.cpp:1075

SumAndCountPair::sum
T sum
Definition: SegmentTreeUtils.h:25

agg_sum_if_double_shared
__device__ void agg_sum_if_double_shared(int64_t *agg, const double val, const int8_t cond)
Definition: cuda_mapd_rt.cu:499

agg_sum_shared
__device__ int64_t agg_sum_shared(int64_t *agg, const int64_t val)
Definition: cuda_mapd_rt.cu:456

agg_sum_if_float
RUNTIME_EXPORT void agg_sum_if_float(int32_t *agg, const float val, const int8_t cond)

agg_id_double_shared_slow
__device__ void agg_id_double_shared_slow(int64_t *agg, const double *val)
Definition: cuda_mapd_rt.cu:805

agg_count_distinct_bitmap_skip_val
RUNTIME_EXPORT ALWAYS_INLINE void agg_count_distinct_bitmap_skip_val(int64_t *agg, const int64_t val, const int64_t min_val, const int64_t bucket_size, const int64_t skip_val)
Definition: RuntimeFunctions.cpp:1179

anonymous_namespace{RuntimeFunctions.cpp}::AggFuncType
AggFuncType
Definition: RuntimeFunctions.cpp:832

floor_div_lhs
RUNTIME_EXPORT ALWAYS_INLINE int64_t floor_div_lhs(const int64_t dividend, const int64_t divisor)
Definition: RuntimeFunctions.cpp:233

agg_sum_if_int32_shared
__device__ int32_t agg_sum_if_int32_shared(int32_t *agg, const int32_t val, const int8_t cond)
Definition: cuda_mapd_rt.cu:482

key_for_string_encoded
RUNTIME_EXPORT ALWAYS_INLINE DEVICE int32_t key_for_string_encoded(const int32_t str_id)
Definition: RuntimeFunctions.cpp:2149

get_matching_group_value_perfect_hash_keyless
RUNTIME_EXPORT ALWAYS_INLINE int64_t * get_matching_group_value_perfect_hash_keyless(int64_t *groups_buffer, const uint32_t hashed_index, const uint32_t row_size_quad)
Definition: RuntimeFunctions.cpp:2075

RuntimeFunctions.h

agg_id_varlen_shared
__device__ int8_t * agg_id_varlen_shared(int8_t *varlen_buffer, const int64_t offset, const int8_t *value, const int64_t size_bytes)
Definition: cuda_mapd_rt.cu:725

agg_count_if_int32_skip_val
RUNTIME_EXPORT ALWAYS_INLINE uint32_t agg_count_if_int32_skip_val(uint32_t *agg, const int32_t cond, const int32_t skip_val)
Definition: RuntimeFunctions.cpp:1364

compute_current_row_idx_in_frame
int64_t compute_current_row_idx_in_frame(const int64_t num_elems, const int64_t cur_row_idx, const T *col_buf, const int32_t *partition_rowid_buf, const int64_t *ordered_index_buf, const T null_val, const bool nulls_first, const int64_t null_start_pos, const int64_t null_end_pos, Comparator cmp)
Definition: RuntimeFunctions.cpp:457

declare_dynamic_shared_memory
__device__ int64_t * declare_dynamic_shared_memory()
Definition: cuda_mapd_rt.cu:56

compute_row_mode_end_index_add
RUNTIME_EXPORT ALWAYS_INLINE int64_t compute_row_mode_end_index_add(int64_t candidate_index, int64_t current_partition_start_offset, int64_t frame_bound, int64_t num_current_partition_elem)
Definition: RuntimeFunctions.cpp:788

map_string_dict_id
ALWAYS_INLINE DEVICE int32_t map_string_dict_id(const int32_t string_id, const int64_t translation_map_handle, const int32_t min_source_id)
Definition: RuntimeFunctions.cpp:2154

agg_sum_int32_shared
__device__ int32_t agg_sum_int32_shared(int32_t *agg, const int32_t val)
Definition: cuda_mapd_rt.cu:460

agg_sum_skip_val_shared
__device__ int64_t agg_sum_skip_val_shared(int64_t *agg, const int64_t val, const int64_t skip_val)
Definition: cuda_mapd_rt.cu:983

agg_sum_float_shared
__device__ void agg_sum_float_shared(int32_t *agg, const float val)
Definition: cuda_mapd_rt.cu:464

agg_sum_if_shared
__device__ int64_t agg_sum_if_shared(int64_t *agg, const int64_t val, const int8_t cond)
Definition: cuda_mapd_rt.cu:472

DecisionTreeEntry
Definition: DecisionTreeEntry.h:19

DEF_ROUND_NULLABLE
#define DEF_ROUND_NULLABLE(from_type, to_type)
Definition: RuntimeFunctions.cpp:283

tree_model_reg_predict
ALWAYS_INLINE DEVICE double tree_model_reg_predict(const double *regressor_inputs, const int64_t decision_tree_table_handle, const int64_t decision_tree_offsets_handle, const int32_t num_regressors, const int32_t num_trees, const bool compute_avg, const double null_value)
Definition: RuntimeFunctions.cpp:2162

width_bucket
RUNTIME_EXPORT ALWAYS_INLINE DEVICE int32_t width_bucket(const double target_value, const double lower_bound, const double upper_bound, const double scale_factor, const int32_t partition_count)
Definition: RuntimeFunctions.cpp:2205

DEVICE
#define DEVICE
Definition: funcannotations.h:20

point_double_is_null
RUNTIME_EXPORT ALWAYS_INLINE DEVICE bool point_double_is_null(double *point)
Definition: RuntimeFunctions.cpp:2511

multifrag_query_hoisted_literals
RUNTIME_EXPORT void multifrag_query_hoisted_literals(int32_t *error_codes, int32_t *total_matched, int64_t **out, const uint32_t *num_fragments_ptr, const uint32_t *num_tables_ptr, const uint32_t *row_index_resume, const int8_t ***col_buffers, const int8_t *literals, const int64_t *num_rows, const uint64_t *frag_row_offsets, const int32_t *max_matched, const int64_t *init_agg_value, const int64_t *join_hash_tables, const int8_t *row_func_mgr)
Definition: RuntimeFunctions.cpp:2407

StringView
Definition: Datum.h:41

DEF_SKIP_AGG
#define DEF_SKIP_AGG(base_agg_func)
Definition: cuda_mapd_rt.cu:1050

get_block_index
__device__ int64_t get_block_index()
Definition: cuda_mapd_rt.cu:23

check_interrupt
__device__ bool check_interrupt()
Definition: cuda_mapd_rt.cu:159

DEF_WRITE_PROJECTION_INT
#define DEF_WRITE_PROJECTION_INT(n)
Definition: RuntimeFunctions.cpp:1279

checked_single_agg_id_int8_shared
GPU_RT_STUB int32_t checked_single_agg_id_int8_shared(int8_t *agg, const int8_t val, const int8_t null_val)
Definition: RuntimeFunctions.cpp:1694

compute_row_mode_start_index_add
RUNTIME_EXPORT ALWAYS_INLINE int64_t compute_row_mode_start_index_add(int64_t candidate_index, int64_t current_partition_start_offset, int64_t frame_bound, int64_t num_current_partition_elem)
Definition: RuntimeFunctions.cpp:771

agg_sum_int32_skip_val_shared
__device__ int32_t agg_sum_int32_skip_val_shared(int32_t *agg, const int32_t val, const int32_t skip_val)
Definition: cuda_mapd_rt.cu:957

decimal_floor
RUNTIME_EXPORT ALWAYS_INLINE int64_t decimal_floor(const int64_t x, const int64_t scale)
Definition: RuntimeFunctions.cpp:1580

DEF_SEARCH_DERIVED_AGGREGATION_TREE
#define DEF_SEARCH_DERIVED_AGGREGATION_TREE(agg_value_type)
Definition: RuntimeFunctions.cpp:1019

linear_probabilistic_count
__device__ void linear_probabilistic_count(uint8_t *bitmap, const uint32_t bitmap_bytes, const uint8_t *key_bytes, const uint32_t key_len)
Definition: cuda_mapd_rt.cu:1293

agg_count_double
RUNTIME_EXPORT ALWAYS_INLINE uint64_t agg_count_double(uint64_t *agg, const double val)
Definition: RuntimeFunctions.cpp:1417

DEF_SHARED_AGG_RET_STUBS
#define DEF_SHARED_AGG_RET_STUBS(base_agg_func)
Definition: RuntimeFunctions.cpp:1598

agg_sum_double_shared
__device__ void agg_sum_double_shared(int64_t *agg, const double val)
Definition: cuda_mapd_rt.cu:468

agg_min_double
RUNTIME_EXPORT ALWAYS_INLINE void agg_min_double(int64_t *agg, const double val)
Definition: RuntimeFunctions.cpp:1442

width_bucket_no_oob_check
RUNTIME_EXPORT ALWAYS_INLINE DEVICE int32_t width_bucket_no_oob_check(const double target_value, const double lower_bound, const double scale_factor)
Definition: RuntimeFunctions.cpp:2264

decimal_ceil
RUNTIME_EXPORT ALWAYS_INLINE int64_t decimal_ceil(const int64_t x, const int64_t scale)
Definition: RuntimeFunctions.cpp:1591

DEF_ARITH_NULLABLE_LHS
#define DEF_ARITH_NULLABLE_LHS(type, null_type, opname, opsym)
Definition: RuntimeFunctions.cpp:55

MAX
Definition: FilterPushdownTestTableFunctions.cpp:32

GroupByRuntime.cpp

agg_sum_if_skip_val_shared
__device__ int64_t agg_sum_if_skip_val_shared(int64_t *agg, const int64_t val, const int64_t skip_val, const int8_t cond)
Definition: cuda_mapd_rt.cu:992

agg_if_sum_float
RUNTIME_EXPORT ALWAYS_INLINE void agg_if_sum_float(int32_t *agg, const float val, const int8_t cond)
Definition: RuntimeFunctions.cpp:1481

DEF_MAP_STRING_TO_DATUM
#define DEF_MAP_STRING_TO_DATUM(value_type, value_name)
Definition: RuntimeFunctions.cpp:179

get_valid_buf_start_pos
RUNTIME_EXPORT ALWAYS_INLINE int64_t get_valid_buf_start_pos(const int64_t null_start_pos, const int64_t null_end_pos)
Definition: RuntimeFunctions.cpp:445

DEF_AGG_MIN_INT
#define DEF_AGG_MIN_INT(n)
Definition: RuntimeFunctions.cpp:1234

agg_count_double_skip_val
RUNTIME_EXPORT ALWAYS_INLINE uint64_t agg_count_double_skip_val(uint64_t *agg, const double val, const double skip_val)
Definition: RuntimeFunctions.cpp:1523

agg_min
RUNTIME_EXPORT ALWAYS_INLINE void agg_min(int64_t *agg, const int64_t val)
Definition: RuntimeFunctions.cpp:1144

pos_start_impl
__device__ int32_t pos_start_impl(const int32_t *row_index_resume)
Definition: cuda_mapd_rt.cu:27

width_bucket_nullable
RUNTIME_EXPORT ALWAYS_INLINE int32_t width_bucket_nullable(const double target_value, const double lower_bound, const double upper_bound, const double scale_factor, const int32_t partition_count, const double null_val)
Definition: RuntimeFunctions.cpp:2233

logical_not
RUNTIME_EXPORT ALWAYS_INLINE int8_t logical_not(const int8_t operand, const int8_t null_val)
Definition: RuntimeFunctions.cpp:331

SegmentTreeUtils.h

agg_id_float
RUNTIME_EXPORT ALWAYS_INLINE void agg_id_float(int32_t *agg, const float val)
Definition: RuntimeFunctions.cpp:1501

load_float
RUNTIME_EXPORT ALWAYS_INLINE float load_float(const int32_t *agg)
Definition: RuntimeFunctions.cpp:2342

runtime_interrupt_flag
__device__ int32_t runtime_interrupt_flag
Definition: cuda_mapd_rt.cu:95

agg_sum_double
RUNTIME_EXPORT ALWAYS_INLINE void agg_sum_double(int64_t *agg, const double val)
Definition: RuntimeFunctions.cpp:1422

row_number_window_func
RUNTIME_EXPORT ALWAYS_INLINE int64_t row_number_window_func(const int64_t output_buff, const int64_t pos)
Definition: RuntimeFunctions.cpp:2328

init_columnar_group_by_buffer_gpu
RUNTIME_EXPORT NEVER_INLINE void init_columnar_group_by_buffer_gpu(int64_t *groups_buffer, const int64_t *init_vals, const uint32_t groups_buffer_entry_count, const uint32_t key_qw_count, const uint32_t agg_col_count, const bool keyless, const bool blocks_share_memory, const int32_t frag_idx)
Definition: RuntimeFunctions.cpp:1900

Comparator
std::function< bool(const PermutationIdx, const PermutationIdx)> Comparator
Definition: ResultSet.h:155

agg_approximate_count_distinct_gpu
__device__ void agg_approximate_count_distinct_gpu(int64_t *agg, const int64_t key, const uint32_t b, const int64_t base_dev_addr, const int64_t base_host_addr)
Definition: cuda_mapd_rt.cu:1346

DecisionTreeEntry::isSplitNode
bool isSplitNode() const
Definition: DecisionTreeEntry.h:39

sync_warp
__device__ void sync_warp()
Definition: cuda_mapd_rt.cu:1364

scale_decimal_down_nullable
RUNTIME_EXPORT ALWAYS_INLINE int64_t scale_decimal_down_nullable(const int64_t operand, const int64_t scale, const int64_t null_val)
Definition: RuntimeFunctions.cpp:209

agg_count_distinct_bitmap_skip_val_gpu
__device__ void agg_count_distinct_bitmap_skip_val_gpu(int64_t *agg, const int64_t val, const int64_t min_val, const int64_t bucket_size, const int64_t skip_val, const int64_t base_dev_addr, const int64_t base_host_addr, const uint64_t sub_bitmap_count, const uint64_t bitmap_bytes)
Definition: cuda_mapd_rt.cu:1324

agg_sum_if_skip_val
RUNTIME_EXPORT ALWAYS_INLINE int64_t agg_sum_if_skip_val(int64_t *agg, const int64_t val, const int64_t skip_val, const int8_t cond)
Definition: RuntimeFunctions.cpp:1319

agg_id
RUNTIME_EXPORT ALWAYS_INLINE void agg_id(int64_t *agg, const int64_t val)
Definition: RuntimeFunctions.cpp:1148

scale_decimal_up
RUNTIME_EXPORT ALWAYS_INLINE int64_t scale_decimal_up(const int64_t operand, const uint64_t scale, const int64_t operand_null_val, const int64_t result_null_val)
Definition: RuntimeFunctions.cpp:201

char_length
RUNTIME_EXPORT ALWAYS_INLINE DEVICE int32_t char_length(const char *str, const int32_t str_len)
Definition: RuntimeFunctions.cpp:2136

get_double_aggregation_tree
RUNTIME_EXPORT ALWAYS_INLINE double * get_double_aggregation_tree(int64_t **aggregation_trees, size_t partition_idx)
Definition: RuntimeFunctions.cpp:802

RUNTIME_EXPORT
#define RUNTIME_EXPORT
Definition: funcannotations.h:77

width_bucket_reversed
RUNTIME_EXPORT ALWAYS_INLINE DEVICE int32_t width_bucket_reversed(const double target_value, const double lower_bound, const double upper_bound, const double scale_factor, const int32_t partition_count)
Definition: RuntimeFunctions.cpp:2219

agg_sum_int32
RUNTIME_EXPORT ALWAYS_INLINE int32_t agg_sum_int32(int32_t *agg, const int32_t val)
Definition: RuntimeFunctions.cpp:1210

checked_single_agg_id_float
RUNTIME_EXPORT ALWAYS_INLINE int32_t checked_single_agg_id_float(int32_t *agg, const float val, const float null_val)
Definition: RuntimeFunctions.cpp:1506

agg_sum_double_skip_val_shared
__device__ void agg_sum_double_skip_val_shared(int64_t *agg, const double val, const double skip_val)
Definition: cuda_mapd_rt.cu:1131

DecisionTreeEntry::right_child_row_idx
int64_t right_child_row_idx
Definition: DecisionTreeEntry.h:23

agg_count_if_skip_val
RUNTIME_EXPORT ALWAYS_INLINE uint64_t agg_count_if_skip_val(uint64_t *agg, const int64_t cond, const int64_t skip_val)
Definition: RuntimeFunctions.cpp:1348

get_value_in_window_frame
LOGICAL_TYPE get_value_in_window_frame(const int64_t target_row_idx_in_frame, const int64_t frame_start_offset, const int64_t frame_end_offset, const COL_TYPE *col_buf, const int32_t *partition_rowid_buf, const int64_t *ordered_index_buf, const LOGICAL_TYPE logical_null_val, const LOGICAL_TYPE col_null_val)
Definition: RuntimeFunctions.cpp:701

agg_max_int8_skip_val_shared
__device__ void agg_max_int8_skip_val_shared(int8_t *agg, const int8_t val, const int8_t skip_val)
Definition: cuda_mapd_rt.cu:917

get_valid_buf_end_pos
RUNTIME_EXPORT ALWAYS_INLINE int64_t get_valid_buf_end_pos(const int64_t num_elems, const int64_t null_start_pos, const int64_t null_end_pos)
Definition: RuntimeFunctions.cpp:450

load_double
RUNTIME_EXPORT ALWAYS_INLINE double load_double(const int64_t *agg)
Definition: RuntimeFunctions.cpp:2338

agg_id_double
RUNTIME_EXPORT ALWAYS_INLINE void agg_id_double(int64_t *agg, const double val)
Definition: RuntimeFunctions.cpp:1448

DEF_FILL_MISSING_VALUE
#define DEF_FILL_MISSING_VALUE(col_type)
Definition: RuntimeFunctions.cpp:1103

bit_is_set
RUNTIME_EXPORT ALWAYS_INLINE int8_t bit_is_set(const int8_t *bitset, const int64_t val, const int64_t min_val, const int64_t max_val, const int64_t null_val, const int8_t null_bool_val)
Definition: RuntimeFunctions.cpp:408

SumAndCountPair::count
size_t count
Definition: SegmentTreeUtils.h:26

width_bucket_expr_no_oob_check
RUNTIME_EXPORT ALWAYS_INLINE DEVICE int32_t width_bucket_expr_no_oob_check(const double target_value, const bool reversed, const double lower_bound, const double upper_bound, const int32_t partition_count)
Definition: RuntimeFunctions.cpp:2314

run_benchmark_import.dest
string dest
Definition: run_benchmark_import.py:74

gpu_enabled::lower_bound
DEVICE auto lower_bound(ARGS &&...args)
Definition: gpu_enabled.h:78

agg_count_if_int32
RUNTIME_EXPORT ALWAYS_INLINE uint32_t agg_count_if_int32(uint32_t *agg, const int32_t cond)
Definition: RuntimeFunctions.cpp:1205

agg_max_int16_skip_val_shared
__device__ void agg_max_int16_skip_val_shared(int16_t *agg, const int16_t val, const int16_t skip_val)
Definition: cuda_mapd_rt.cu:901

DEF_SEARCH_AGGREGATION_TREE
#define DEF_SEARCH_AGGREGATION_TREE(agg_value_type)
Definition: RuntimeFunctions.cpp:901

MIN
Definition: FilterPushdownTestTableFunctions.cpp:32

MurmurHash3
RUNTIME_EXPORT NEVER_INLINE DEVICE uint32_t MurmurHash3(const void *key, int len, const uint32_t seed)
Definition: MurmurHash.cpp:33

compute_derived_aggregates
void compute_derived_aggregates(SumAndCountPair< AGG_VALUE_TYPE > *aggregation_tree_for_partition, SumAndCountPair< AGG_VALUE_TYPE > &res, size_t query_range_start_idx, size_t query_range_end_idx, size_t leaf_level, size_t tree_fanout, AGG_VALUE_TYPE invalid_val, AGG_VALUE_TYPE null_val)
Definition: RuntimeFunctions.cpp:956

DEF_RANGE_MODE_FRAME_UPPER_BOUND
#define DEF_RANGE_MODE_FRAME_UPPER_BOUND(target_val_type, col_type, null_type, opname, opsym)
Definition: RuntimeFunctions.cpp:647

agg_max_int8
RUNTIME_EXPORT void agg_max_int8(int8_t *agg, const int8_t val)

get_group_value_fast_keyless_semiprivate
RUNTIME_EXPORT ALWAYS_INLINE int64_t * get_group_value_fast_keyless_semiprivate(int64_t *groups_buffer, const int64_t key, const int64_t min_key, const int64_t, const uint32_t row_size_quad, const uint8_t thread_warp_idx, const uint8_t warp_size)
Definition: RuntimeFunctions.cpp:2111

GeoOpsRuntime.cpp

getStartOffsetForSegmentTreeTraversal
RUNTIME_EXPORT ALWAYS_INLINE size_t getStartOffsetForSegmentTreeTraversal(size_t level, size_t tree_fanout)
Definition: RuntimeFunctions.cpp:824

agg_max_float
RUNTIME_EXPORT ALWAYS_INLINE void agg_max_float(int32_t *agg, const float val)
Definition: RuntimeFunctions.cpp:1489

init_shared_mem
__device__ const int64_t * init_shared_mem(const int64_t *global_groups_buffer, const int32_t groups_buffer_size)
Definition: cuda_mapd_rt.cu:66

RasterAggType::MAX

checked_single_agg_id_double
RUNTIME_EXPORT ALWAYS_INLINE int32_t checked_single_agg_id_double(int64_t *agg, const double val, const double null_val)
Definition: RuntimeFunctions.cpp:1454

checked_single_agg_id_int16_shared
GPU_RT_STUB int32_t checked_single_agg_id_int16_shared(int16_t *agg, const int16_t val, const int16_t null_val)
Definition: RuntimeFunctions.cpp:1689

RasterAggType::MIN

load_avg_float
RUNTIME_EXPORT ALWAYS_INLINE double load_avg_float(const int32_t *agg, const int32_t *count, const double null_val)
Definition: RuntimeFunctions.cpp:2366

DEF_BINARY_NULLABLE_ALL_OPS
#define DEF_BINARY_NULLABLE_ALL_OPS(type, null_type)
Definition: RuntimeFunctions.cpp:118

get_double_derived_aggregation_tree
RUNTIME_EXPORT ALWAYS_INLINE SumAndCountPair< double > * get_double_derived_aggregation_tree(int64_t **aggregation_trees, size_t partition_idx)
Definition: RuntimeFunctions.cpp:817

init_group_by_buffer_impl
RUNTIME_EXPORT NEVER_INLINE void init_group_by_buffer_impl(int64_t *groups_buffer, const int64_t *init_vals, const uint32_t groups_buffer_entry_count, const uint32_t key_qw_count, const uint32_t agg_col_count, const bool keyless, const int8_t warp_size)
Definition: RuntimeFunctions.cpp:1915

agg_count_int32
RUNTIME_EXPORT ALWAYS_INLINE uint32_t agg_count_int32(uint32_t *agg, const int32_t)
Definition: RuntimeFunctions.cpp:1200

set_matching_group_value_perfect_hash_columnar
RUNTIME_EXPORT ALWAYS_INLINE void set_matching_group_value_perfect_hash_columnar(int64_t *groups_buffer, const uint32_t hashed_index, const int64_t *key, const uint32_t key_count, const uint32_t entry_count)
Definition: RuntimeFunctions.cpp:2086

DEF_GET_VALUE_IN_FRAME
#define DEF_GET_VALUE_IN_FRAME(col_type, logical_type)
Definition: RuntimeFunctions.cpp:722

NEVER_INLINE
#define NEVER_INLINE
Definition: funcannotations.h:65

compute_row_mode_end_index_sub
RUNTIME_EXPORT ALWAYS_INLINE int64_t compute_row_mode_end_index_sub(int64_t candidate_index, int64_t current_partition_start_offset, int64_t frame_bound)
Definition: RuntimeFunctions.cpp:780

compute_window_func_via_aggregation_tree
AGG_TYPE compute_window_func_via_aggregation_tree(AGG_TYPE *aggregation_tree_for_partition, size_t query_range_start_idx, size_t query_range_end_idx, size_t leaf_level, size_t tree_fanout, AGG_TYPE init_val, AGG_TYPE invalid_val, AGG_TYPE null_val)
Definition: RuntimeFunctions.cpp:847

width_bucket_expr_nullable
RUNTIME_EXPORT ALWAYS_INLINE DEVICE int32_t width_bucket_expr_nullable(const double target_value, const bool reversed, const double lower_bound, const double upper_bound, const int32_t partition_count, const double null_val)
Definition: RuntimeFunctions.cpp:2300

DEF_ARITH_NULLABLE
#define DEF_ARITH_NULLABLE(type, null_type, opname, opsym)
Definition: RuntimeFunctions.cpp:46

agg_sum_if_int32
RUNTIME_EXPORT ALWAYS_INLINE int32_t agg_sum_if_int32(int32_t *agg, const int32_t val, const int8_t cond)
Definition: RuntimeFunctions.cpp:1217

agg_min_int16_skip_val_shared
__device__ void agg_min_int16_skip_val_shared(int16_t *agg, const int16_t val, const int16_t skip_val)
Definition: cuda_mapd_rt.cu:909

width_bucket_reversed_nullable
RUNTIME_EXPORT ALWAYS_INLINE int32_t width_bucket_reversed_nullable(const double target_value, const double lower_bound, const double upper_bound, const double scale_factor, const int32_t partition_count, const double null_val)
Definition: RuntimeFunctions.cpp:2247

compute_lower_bound_from_ordered_partition_index
int64_t compute_lower_bound_from_ordered_partition_index(const int64_t num_elems, const TARGET_VAL_TYPE target_val, const COL_TYPE *col_buf, const int32_t *partition_rowid_buf, const int64_t *ordered_index_buf, const NULL_TYPE null_val, const bool nulls_first, const int64_t null_start_offset, const int64_t null_end_offset)
Definition: RuntimeFunctions.cpp:539

agg_sum_skip_val
RUNTIME_EXPORT ALWAYS_INLINE int64_t agg_sum_skip_val(int64_t *agg, const int64_t val, const int64_t skip_val)
Definition: RuntimeFunctions.cpp:1291

sync_threadblock
__device__ void sync_threadblock()
Definition: cuda_mapd_rt.cu:1383

compute_int64_t_lower_bound
RUNTIME_EXPORT ALWAYS_INLINE int64_t compute_int64_t_lower_bound(const int64_t entry_cnt, const int64_t target_value, const int64_t *col_buf)
Definition: RuntimeFunctions.cpp:428

agg_min_int8_skip_val_shared
__device__ void agg_min_int8_skip_val_shared(int8_t *agg, const int8_t val, const int8_t skip_val)
Definition: cuda_mapd_rt.cu:925

agg_sum_int32_skip_val
RUNTIME_EXPORT ALWAYS_INLINE int32_t agg_sum_int32_skip_val(int32_t *agg, const int32_t val, const int32_t skip_val)
Definition: RuntimeFunctions.cpp:1306

load_avg_double
RUNTIME_EXPORT ALWAYS_INLINE double load_avg_double(const int64_t *agg, const int64_t *count, const double null_val)
Definition: RuntimeFunctions.cpp:2359

agg_count
RUNTIME_EXPORT ALWAYS_INLINE uint64_t agg_count(uint64_t *agg, const int64_t)
Definition: RuntimeFunctions.cpp:362

JoinHashTableQueryRuntime.cpp

SumAndCountPair
Definition: SegmentTreeUtils.h:24

HyperLogLogRank.h

agg_sum_float
RUNTIME_EXPORT ALWAYS_INLINE void agg_sum_float(int32_t *agg, const float val)
Definition: RuntimeFunctions.cpp:1475

load_avg_decimal
RUNTIME_EXPORT ALWAYS_INLINE double load_avg_decimal(const int64_t *sum, const int64_t *count, const double null_val, const uint32_t scale)
Definition: RuntimeFunctions.cpp:2352

check_interrupt_init
RUNTIME_EXPORT bool check_interrupt_init(unsigned command)
Definition: RuntimeFunctions.cpp:2523

get_error_code
RUNTIME_EXPORT ALWAYS_INLINE int32_t get_error_code(int32_t *error_codes)
Definition: RuntimeFunctions.cpp:1860

agg_count_distinct_bitmap_gpu
__device__ void agg_count_distinct_bitmap_gpu(int64_t *agg, const int64_t val, const int64_t min_val, const int64_t bucket_size, const int64_t base_dev_addr, const int64_t base_host_addr, const uint64_t sub_bitmap_count, const uint64_t bitmap_bytes)
Definition: cuda_mapd_rt.cu:1303

agg_sum
RUNTIME_EXPORT ALWAYS_INLINE int64_t agg_sum(int64_t *agg, const int64_t val)
Definition: RuntimeFunctions.cpp:1128

DEF_SHARED_AGG_STUBS
#define DEF_SHARED_AGG_STUBS(base_agg_func)
Definition: RuntimeFunctions.cpp:1637

get_matching_group_value_columnar_slot
__device__ int32_t get_matching_group_value_columnar_slot(int64_t *groups_buffer, const uint32_t entry_count, const uint32_t h, const T *key, const uint32_t key_count)
Definition: cuda_mapd_rt.cu:236

checked_single_agg_id_shared
__device__ int32_t checked_single_agg_id_shared(int64_t *agg, const int64_t val, const int64_t null_val)
Definition: cuda_mapd_rt.cu:735

ALWAYS_INLINE
#define ALWAYS_INLINE
Definition: funcannotations.h:57

DEF_HANDLE_NULL_FOR_WINDOW_FRAMING_AGG
#define DEF_HANDLE_NULL_FOR_WINDOW_FRAMING_AGG(agg_type, null_type)
Definition: RuntimeFunctions.cpp:1060

DEF_AGG_ID_INT
#define DEF_AGG_ID_INT(n)
Definition: RuntimeFunctions.cpp:1245

INT_ABORT
Definition: RuntimeFunctions.h:174

record_error_code
RUNTIME_EXPORT ALWAYS_INLINE void record_error_code(const int32_t err_code, int32_t *error_codes)
Definition: RuntimeFunctions.cpp:1846

agg_sum_if_float_skip_val_shared
__device__ void agg_sum_if_float_skip_val_shared(int32_t *agg, const float val, const float skip_val, const int8_t cond)
Definition: cuda_mapd_rt.cu:1112

sample_ratio
RUNTIME_EXPORT ALWAYS_INLINE DEVICE bool sample_ratio(const double proportion, const int64_t row_offset)
Definition: RuntimeFunctions.cpp:2197

align_to_int64
FORCE_INLINE HOST DEVICE T align_to_int64(T addr)
Definition: BufferCompaction.h:42

percent_window_func
RUNTIME_EXPORT ALWAYS_INLINE double percent_window_func(const int64_t output_buff, const int64_t pos)
Definition: RuntimeFunctions.cpp:2332

logical_or
RUNTIME_EXPORT ALWAYS_INLINE int8_t logical_or(const int8_t lhs, const int8_t rhs, const int8_t null_val)
Definition: RuntimeFunctions.cpp:348

RasterAggType::SUM

force_sync
__device__ void force_sync()
Definition: cuda_mapd_rt.cu:1360

DEF_SKIP_IF_AGG
#define DEF_SKIP_IF_AGG(skip_agg_func, base_agg_func)
Definition: RuntimeFunctions.cpp:1551

DEF_RANGE_MODE_FRAME_LOWER_BOUND
#define DEF_RANGE_MODE_FRAME_LOWER_BOUND(target_val_type, col_type, null_type, opname, opsym)
Definition: RuntimeFunctions.cpp:566

agg_min_int32
RUNTIME_EXPORT void agg_min_int32(int32_t *agg, const int32_t val)

agg_sum_if_double
RUNTIME_EXPORT ALWAYS_INLINE void agg_sum_if_double(int64_t *agg, const double val, const int8_t cond)
Definition: RuntimeFunctions.cpp:1428

agg_count_float_skip_val
RUNTIME_EXPORT ALWAYS_INLINE uint32_t agg_count_float_skip_val(uint32_t *agg, const float val, const float skip_val)
Definition: RuntimeFunctions.cpp:1531

char_length_nullable
RUNTIME_EXPORT ALWAYS_INLINE DEVICE int32_t char_length_nullable(const char *str, const int32_t str_len, const int32_t int_null)
Definition: RuntimeFunctions.cpp:2141

agg_sum_if_int32_skip_val_shared
__device__ int32_t agg_sum_if_int32_skip_val_shared(int32_t *agg, const int32_t val, const int32_t skip_val, const int8_t cond)
Definition: cuda_mapd_rt.cu:967

get_integer_derived_aggregation_tree
RUNTIME_EXPORT ALWAYS_INLINE SumAndCountPair< int64_t > * get_integer_derived_aggregation_tree(int64_t **aggregation_trees, size_t partition_idx)
Definition: RuntimeFunctions.cpp:810

agg_count_float
RUNTIME_EXPORT ALWAYS_INLINE uint32_t agg_count_float(uint32_t *agg, const float val)
Definition: RuntimeFunctions.cpp:1470

width_bucket_reversed_no_oob_check
RUNTIME_EXPORT ALWAYS_INLINE DEVICE int32_t width_bucket_reversed_no_oob_check(const double target_value, const double lower_bound, const double scale_factor)
Definition: RuntimeFunctions.cpp:2272

width_bucket_expr
RUNTIME_EXPORT ALWAYS_INLINE DEVICE int32_t width_bucket_expr(const double target_value, const bool reversed, const double lower_bound, const double upper_bound, const int32_t partition_count)
Definition: RuntimeFunctions.cpp:2280

group_buff_idx_impl
__device__ int32_t group_buff_idx_impl()
Definition: cuda_mapd_rt.cu:31