OmniSciDB  340b00dbf6
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
cuda_mapd_rt.cu File Reference
#include <cuda.h>
#include <float.h>
#include <stdint.h>
#include <stdio.h>
#include <limits>
#include "BufferCompaction.h"
#include "ExtensionFunctions.hpp"
#include "GpuRtConstants.h"
#include "HyperLogLogRank.h"
#include "TableFunctions/TableFunctions.hpp"
#include "GpuInitGroups.cu"
#include "GroupByRuntime.cpp"
#include "JoinHashTable/JoinHashTableQueryRuntime.cpp"
#include "MurmurHash.cpp"
#include "TopKRuntime.cpp"
#include "../Utils/ChunkIter.cpp"
#include "DateTruncate.cpp"
#include "ExtractFromTime.cpp"
#include "ArrayOps.cpp"
#include "DateAdd.cpp"
#include "StringFunctions.cpp"
#include "../Utils/Regexp.cpp"
#include "../Utils/StringLike.cpp"
+ Include dependency graph for cuda_mapd_rt.cu:

Go to the source code of this file.

Macros

#define init_group_by_buffer_gpu_impl   init_group_by_buffer_gpu
 
#define DEF_AGG_ID_INT_SHARED(n)
 
#define DEF_SKIP_AGG(base_agg_func)
 
#define DATA_T   int64_t
 
#define ADDR_T   uint64_t
 
#define DATA_T   int32_t
 
#define ADDR_T   uint32_t
 
#define DEF_SKIP_AGG(base_agg_func)
 
#define DATA_T   double
 
#define ADDR_T   uint64_t
 
#define DATA_T   float
 
#define ADDR_T   uint32_t
 
#define EXECUTE_INCLUDE
 

Functions

__device__ int64_t get_thread_index ()
 
__device__ int64_t get_block_index ()
 
__device__ int32_t pos_start_impl (const int32_t *row_index_resume)
 
__device__ int32_t group_buff_idx_impl ()
 
__device__ int32_t pos_step_impl ()
 
__device__ int8_t thread_warp_idx (const int8_t warp_sz)
 
__device__ const int64_t * init_shared_mem_nop (const int64_t *groups_buffer, const int32_t groups_buffer_size)
 
__device__ void write_back_nop (int64_t *dest, int64_t *src, const int32_t sz)
 
__device__ int64_t * declare_dynamic_shared_memory ()
 
__device__ const int64_t * init_shared_mem (const int64_t *global_groups_buffer, const int32_t groups_buffer_size)
 
__inline__ __device__ uint32_t get_smid (void)
 
__device__ bool dynamic_watchdog ()
 
__device__ bool check_interrupt ()
 
template<typename T = unsigned long long>
__device__ T get_empty_key ()
 
template<>
__device__ unsigned int get_empty_key ()
 
template<typename T >
__device__ int64_t * get_matching_group_value (int64_t *groups_buffer, const uint32_t h, const T *key, const uint32_t key_count, const uint32_t row_size_quad)
 
__device__ int64_t * get_matching_group_value (int64_t *groups_buffer, const uint32_t h, const int64_t *key, const uint32_t key_count, const uint32_t key_width, const uint32_t row_size_quad, const int64_t *init_vals)
 
template<typename T >
__device__ int32_t get_matching_group_value_columnar_slot (int64_t *groups_buffer, const uint32_t entry_count, const uint32_t h, const T *key, const uint32_t key_count)
 
__device__ int32_t get_matching_group_value_columnar_slot (int64_t *groups_buffer, const uint32_t entry_count, const uint32_t h, const int64_t *key, const uint32_t key_count, const uint32_t key_width)
 
__device__ int64_t * get_matching_group_value_columnar (int64_t *groups_buffer, const uint32_t h, const int64_t *key, const uint32_t key_qw_count, const size_t entry_count)
 
__device__ int64_t atomicMax64 (int64_t *address, int64_t val)
 
__device__ int64_t atomicMin64 (int64_t *address, int64_t val)
 
__device__ double atomicMax (double *address, double val)
 
__device__ float atomicMax (float *address, float val)
 
__device__ double atomicMin (double *address, double val)
 
__device__ double atomicMin (float *address, float val)
 
__device__ uint64_t agg_count_shared (uint64_t *agg, const int64_t val)
 
__device__ uint32_t agg_count_int32_shared (uint32_t *agg, const int32_t val)
 
__device__ uint64_t agg_count_double_shared (uint64_t *agg, const double val)
 
__device__ uint32_t agg_count_float_shared (uint32_t *agg, const float val)
 
__device__ int64_t agg_sum_shared (int64_t *agg, const int64_t val)
 
__device__ int32_t agg_sum_int32_shared (int32_t *agg, const int32_t val)
 
__device__ void agg_sum_float_shared (int32_t *agg, const float val)
 
__device__ void agg_sum_double_shared (int64_t *agg, const double val)
 
__device__ void agg_max_shared (int64_t *agg, const int64_t val)
 
__device__ void agg_max_int32_shared (int32_t *agg, const int32_t val)
 
__device__ void agg_max_double_shared (int64_t *agg, const double val)
 
__device__ void agg_max_float_shared (int32_t *agg, const float val)
 
__device__ void agg_min_shared (int64_t *agg, const int64_t val)
 
__device__ void agg_min_int32_shared (int32_t *agg, const int32_t val)
 
__device__ void atomicMax16 (int16_t *agg, const int16_t val)
 
__device__ void atomicMax8 (int8_t *agg, const int8_t val)
 
__device__ void atomicMin16 (int16_t *agg, const int16_t val)
 
__device__ void atomicMin16SkipVal (int16_t *agg, const int16_t val, const int16_t skip_val)
 
__device__ void atomicMin8 (int8_t *agg, const int8_t val)
 
__device__ void atomicMin8SkipVal (int8_t *agg, const int8_t val, const int8_t skip_val)
 
__device__ void agg_max_int16_shared (int16_t *agg, const int16_t val)
 
__device__ void agg_max_int8_shared (int8_t *agg, const int8_t val)
 
__device__ void agg_min_int16_shared (int16_t *agg, const int16_t val)
 
__device__ void agg_min_int8_shared (int8_t *agg, const int8_t val)
 
__device__ void agg_min_double_shared (int64_t *agg, const double val)
 
__device__ void agg_min_float_shared (int32_t *agg, const float val)
 
__device__ void agg_id_shared (int64_t *agg, const int64_t val)
 
__device__ int32_t checked_single_agg_id_shared (int64_t *agg, const int64_t val, const int64_t null_val)
 
__device__ void agg_id_double_shared (int64_t *agg, const double val)
 
__device__ int32_t checked_single_agg_id_double_shared (int64_t *agg, const double val, const double null_val)
 
__device__ void agg_id_double_shared_slow (int64_t *agg, const double *val)
 
__device__ int32_t checked_single_agg_id_double_shared_slow (int64_t *agg, const double *valp, const double null_val)
 
__device__ void agg_id_float_shared (int32_t *agg, const float val)
 
__device__ int32_t checked_single_agg_id_float_shared (int32_t *agg, const float val, const float null_val)
 
__device__ void agg_max_int32_skip_val_shared (int32_t *agg, const int32_t val, const int32_t skip_val)
 
__device__ void agg_max_int16_skip_val_shared (int16_t *agg, const int16_t val, const int16_t skip_val)
 
__device__ void agg_min_int16_skip_val_shared (int16_t *agg, const int16_t val, const int16_t skip_val)
 
__device__ void agg_max_int8_skip_val_shared (int8_t *agg, const int8_t val, const int8_t skip_val)
 
__device__ void agg_min_int8_skip_val_shared (int8_t *agg, const int8_t val, const int8_t skip_val)
 
__device__ int32_t atomicMin32SkipVal (int32_t *address, int32_t val, const int32_t skip_val)
 
__device__ void agg_min_int32_skip_val_shared (int32_t *agg, const int32_t val, const int32_t skip_val)
 
__device__ int32_t atomicSum32SkipVal (int32_t *address, const int32_t val, const int32_t skip_val)
 
__device__ int32_t agg_sum_int32_skip_val_shared (int32_t *agg, const int32_t val, const int32_t skip_val)
 
__device__ int64_t atomicSum64SkipVal (int64_t *address, const int64_t val, const int64_t skip_val)
 
__device__ int64_t agg_sum_skip_val_shared (int64_t *agg, const int64_t val, const int64_t skip_val)
 
__device__ int64_t atomicMin64SkipVal (int64_t *address, int64_t val, const int64_t skip_val)
 
__device__ void agg_min_skip_val_shared (int64_t *agg, const int64_t val, const int64_t skip_val)
 
__device__ int64_t atomicMax64SkipVal (int64_t *address, int64_t val, const int64_t skip_val)
 
__device__ void agg_max_skip_val_shared (int64_t *agg, const int64_t val, const int64_t skip_val)
 
__device__ void agg_max_float_skip_val_shared (int32_t *agg, const float val, const float skip_val)
 
__device__ float atomicMinFltSkipVal (int32_t *address, float val, const float skip_val)
 
__device__ void agg_min_float_skip_val_shared (int32_t *agg, const float val, const float skip_val)
 
__device__ void atomicSumFltSkipVal (float *address, const float val, const float skip_val)
 
__device__ void agg_sum_float_skip_val_shared (int32_t *agg, const float val, const float skip_val)
 
__device__ void atomicSumDblSkipVal (double *address, const double val, const double skip_val)
 
__device__ void agg_sum_double_skip_val_shared (int64_t *agg, const double val, const double skip_val)
 
__device__ double atomicMinDblSkipVal (double *address, double val, const double skip_val)
 
__device__ void agg_min_double_skip_val_shared (int64_t *agg, const double val, const double skip_val)
 
__device__ void agg_max_double_skip_val_shared (int64_t *agg, const double val, const double skip_val)
 
__device__ bool slotEmptyKeyCAS (int64_t *slot, int64_t new_val, int64_t init_val)
 
__device__ bool slotEmptyKeyCAS_int32 (int32_t *slot, int32_t new_val, int32_t init_val)
 
__device__ bool slotEmptyKeyCAS_int16 (int16_t *slot, int16_t new_val, int16_t init_val)
 
__device__ bool slotEmptyKeyCAS_int8 (int8_t *slot, int8_t new_val, int8_t init_val)
 
__device__ uint64_t string_decode (int8_t *chunk_iter_, int64_t pos)
 
__device__ void linear_probabilistic_count (uint8_t *bitmap, const uint32_t bitmap_bytes, const uint8_t *key_bytes, const uint32_t key_len)
 
__device__ void agg_count_distinct_bitmap_gpu (int64_t *agg, const int64_t val, const int64_t min_val, const int64_t base_dev_addr, const int64_t base_host_addr, const uint64_t sub_bitmap_count, const uint64_t bitmap_bytes)
 
__device__ void agg_count_distinct_bitmap_skip_val_gpu (int64_t *agg, const int64_t val, const int64_t min_val, const int64_t skip_val, const int64_t base_dev_addr, const int64_t base_host_addr, const uint64_t sub_bitmap_count, const uint64_t bitmap_bytes)
 
__device__ void agg_approximate_count_distinct_gpu (int64_t *agg, const int64_t key, const uint32_t b, const int64_t base_dev_addr, const int64_t base_host_addr)
 
__device__ void force_sync ()
 
__device__ void sync_warp ()
 
__device__ void sync_warp_protected (int64_t thread_pos, int64_t row_count)
 
__device__ void sync_threadblock ()
 
__device__ void write_back_non_grouped_agg (int64_t *input_buffer, int64_t *output_buffer, const int32_t agg_idx)
 

Variables

__device__ int64_t dw_sm_cycle_start [128]
 
__device__ int64_t dw_cycle_budget = 0
 
__device__ int32_t dw_abort = 0
 
__device__ int32_t runtime_interrupt_flag = 0
 

Macro Definition Documentation

#define ADDR_T   uint64_t

Definition at line 997 of file cuda_mapd_rt.cu.

#define ADDR_T   uint32_t

Definition at line 997 of file cuda_mapd_rt.cu.

#define ADDR_T   uint64_t

Definition at line 997 of file cuda_mapd_rt.cu.

#define ADDR_T   uint32_t

Definition at line 997 of file cuda_mapd_rt.cu.

#define DATA_T   int64_t

Definition at line 996 of file cuda_mapd_rt.cu.

#define DATA_T   int32_t

Definition at line 996 of file cuda_mapd_rt.cu.

#define DATA_T   double

Definition at line 996 of file cuda_mapd_rt.cu.

#define DATA_T   float

Definition at line 996 of file cuda_mapd_rt.cu.

#define DEF_AGG_ID_INT_SHARED (   n)
Value:
extern "C" __device__ void agg_id_int##n##_shared(int##n##_t* agg, \
const int##n##_t val) { \
*agg = val; \
}

Definition at line 709 of file cuda_mapd_rt.cu.

#define DEF_SKIP_AGG (   base_agg_func)
Value:
extern "C" __device__ ADDR_T base_agg_func##_skip_val_shared( \
ADDR_T* agg, const DATA_T val, const DATA_T skip_val) { \
if (val != skip_val) { \
return base_agg_func##_shared(agg, val); \
} \
return 0; \
}
#define DATA_T
#define ADDR_T

Definition at line 981 of file cuda_mapd_rt.cu.

#define DEF_SKIP_AGG (   base_agg_func)
Value:
extern "C" __device__ ADDR_T base_agg_func##_skip_val_shared( \
ADDR_T* agg, const DATA_T val, const DATA_T skip_val) { \
if (val != skip_val) { \
return base_agg_func##_shared(agg, val); \
} \
return *agg; \
}
#define DATA_T
#define ADDR_T

Definition at line 981 of file cuda_mapd_rt.cu.

#define EXECUTE_INCLUDE

Definition at line 1186 of file cuda_mapd_rt.cu.

#define init_group_by_buffer_gpu_impl   init_group_by_buffer_gpu

Definition at line 83 of file cuda_mapd_rt.cu.

Function Documentation

__device__ void agg_approximate_count_distinct_gpu ( int64_t *  agg,
const int64_t  key,
const uint32_t  b,
const int64_t  base_dev_addr,
const int64_t  base_host_addr 
)

Definition at line 1262 of file cuda_mapd_rt.cu.

References atomicMax(), get_rank(), and MurmurHash64A().

1267  {
1268  const uint64_t hash = MurmurHash64A(&key, sizeof(key), 0);
1269  const uint32_t index = hash >> (64 - b);
1270  const int32_t rank = get_rank(hash << b, 64 - b);
1271  const int64_t host_addr = *agg;
1272  int32_t* M = (int32_t*)(base_dev_addr + host_addr - base_host_addr);
1273  atomicMax(&M[index], rank);
1274 }
FORCE_INLINE uint8_t get_rank(uint64_t x, uint32_t b)
NEVER_INLINE DEVICE uint64_t MurmurHash64A(const void *key, int len, uint64_t seed)
Definition: MurmurHash.cpp:26
__device__ double atomicMax(double *address, double val)

+ Here is the call graph for this function:

__device__ void agg_count_distinct_bitmap_gpu ( int64_t *  agg,
const int64_t  val,
const int64_t  min_val,
const int64_t  base_dev_addr,
const int64_t  base_host_addr,
const uint64_t  sub_bitmap_count,
const uint64_t  bitmap_bytes 
)

Definition at line 1215 of file cuda_mapd_rt.cu.

Referenced by agg_count_distinct_bitmap_skip_val_gpu().

1221  {
1222  const uint64_t bitmap_idx = val - min_val;
1223  const uint32_t byte_idx = bitmap_idx >> 3;
1224  const uint32_t word_idx = byte_idx >> 2;
1225  const uint32_t byte_word_idx = byte_idx & 3;
1226  const int64_t host_addr = *agg;
1227  uint32_t* bitmap = (uint32_t*)(base_dev_addr + host_addr - base_host_addr +
1228  (threadIdx.x & (sub_bitmap_count - 1)) * bitmap_bytes);
1229  switch (byte_word_idx) {
1230  case 0:
1231  atomicOr(&bitmap[word_idx], 1 << (bitmap_idx & 7));
1232  break;
1233  case 1:
1234  atomicOr(&bitmap[word_idx], 1 << ((bitmap_idx & 7) + 8));
1235  break;
1236  case 2:
1237  atomicOr(&bitmap[word_idx], 1 << ((bitmap_idx & 7) + 16));
1238  break;
1239  case 3:
1240  atomicOr(&bitmap[word_idx], 1 << ((bitmap_idx & 7) + 24));
1241  break;
1242  default:
1243  break;
1244  }
1245 }

+ Here is the caller graph for this function:

__device__ void agg_count_distinct_bitmap_skip_val_gpu ( int64_t *  agg,
const int64_t  val,
const int64_t  min_val,
const int64_t  skip_val,
const int64_t  base_dev_addr,
const int64_t  base_host_addr,
const uint64_t  sub_bitmap_count,
const uint64_t  bitmap_bytes 
)

Definition at line 1247 of file cuda_mapd_rt.cu.

References agg_count_distinct_bitmap_gpu().

1255  {
1256  if (val != skip_val) {
1258  agg, val, min_val, base_dev_addr, base_host_addr, sub_bitmap_count, bitmap_bytes);
1259  }
1260 }
__device__ void agg_count_distinct_bitmap_gpu(int64_t *agg, const int64_t val, const int64_t min_val, const int64_t base_dev_addr, const int64_t base_host_addr, const uint64_t sub_bitmap_count, const uint64_t bitmap_bytes)

+ Here is the call graph for this function:

__device__ uint64_t agg_count_double_shared ( uint64_t *  agg,
const double  val 
)

Definition at line 440 of file cuda_mapd_rt.cu.

References agg_count_shared().

440  {
441  return agg_count_shared(agg, val);
442 }
__device__ uint64_t agg_count_shared(uint64_t *agg, const int64_t val)

+ Here is the call graph for this function:

__device__ uint32_t agg_count_float_shared ( uint32_t *  agg,
const float  val 
)

Definition at line 444 of file cuda_mapd_rt.cu.

References agg_count_int32_shared().

444  {
445  return agg_count_int32_shared(agg, val);
446 }
__device__ uint32_t agg_count_int32_shared(uint32_t *agg, const int32_t val)

+ Here is the call graph for this function:

__device__ uint32_t agg_count_int32_shared ( uint32_t *  agg,
const int32_t  val 
)

Definition at line 436 of file cuda_mapd_rt.cu.

Referenced by agg_count_float_shared().

436  {
437  return atomicAdd(agg, 1UL);
438 }

+ Here is the caller graph for this function:

__device__ uint64_t agg_count_shared ( uint64_t *  agg,
const int64_t  val 
)

Definition at line 432 of file cuda_mapd_rt.cu.

Referenced by agg_count_double_shared().

432  {
433  return static_cast<uint64_t>(atomicAdd(reinterpret_cast<uint32_t*>(agg), 1UL));
434 }

+ Here is the caller graph for this function:

__device__ void agg_id_double_shared ( int64_t *  agg,
const double  val 
)

Definition at line 721 of file cuda_mapd_rt.cu.

721  {
722  *agg = *(reinterpret_cast<const int64_t*>(&val));
723 }
__device__ void agg_id_double_shared_slow ( int64_t *  agg,
const double *  val 
)

Definition at line 752 of file cuda_mapd_rt.cu.

752  {
753  *agg = *(reinterpret_cast<const int64_t*>(val));
754 }
__device__ void agg_id_float_shared ( int32_t *  agg,
const float  val 
)

Definition at line 785 of file cuda_mapd_rt.cu.

785  {
786  *agg = __float_as_int(val);
787 }
__device__ void agg_id_shared ( int64_t *  agg,
const int64_t  val 
)

Definition at line 678 of file cuda_mapd_rt.cu.

678  {
679  *agg = val;
680 }
__device__ void agg_max_double_shared ( int64_t *  agg,
const double  val 
)

Definition at line 472 of file cuda_mapd_rt.cu.

References atomicMax().

472  {
473  atomicMax(reinterpret_cast<double*>(agg), val);
474 }
__device__ double atomicMax(double *address, double val)

+ Here is the call graph for this function:

__device__ void agg_max_double_skip_val_shared ( int64_t *  agg,
const double  val,
const double  skip_val 
)

Definition at line 1091 of file cuda_mapd_rt.cu.

References atomicMax().

1093  {
1094  if (__double_as_longlong(val) != __double_as_longlong(skip_val)) {
1095  double old = __longlong_as_double(atomicExch(
1096  reinterpret_cast<unsigned long long int*>(agg), __double_as_longlong(-DBL_MAX)));
1097  atomicMax(reinterpret_cast<double*>(agg),
1098  __double_as_longlong(old) == __double_as_longlong(skip_val)
1099  ? val
1100  : fmax(old, val));
1101  }
1102 }
__device__ double atomicMax(double *address, double val)

+ Here is the call graph for this function:

__device__ void agg_max_float_shared ( int32_t *  agg,
const float  val 
)

Definition at line 476 of file cuda_mapd_rt.cu.

References atomicMax().

476  {
477  atomicMax(reinterpret_cast<float*>(agg), val);
478 }
__device__ double atomicMax(double *address, double val)

+ Here is the call graph for this function:

__device__ void agg_max_float_skip_val_shared ( int32_t *  agg,
const float  val,
const float  skip_val 
)

Definition at line 1003 of file cuda_mapd_rt.cu.

References atomicMax().

1005  {
1006  if (__float_as_int(val) != __float_as_int(skip_val)) {
1007  float old = atomicExch(reinterpret_cast<float*>(agg), -FLT_MAX);
1008  atomicMax(reinterpret_cast<float*>(agg),
1009  __float_as_int(old) == __float_as_int(skip_val) ? val : fmaxf(old, val));
1010  }
1011 }
__device__ double atomicMax(double *address, double val)

+ Here is the call graph for this function:

__device__ void agg_max_int16_shared ( int16_t *  agg,
const int16_t  val 
)

Definition at line 654 of file cuda_mapd_rt.cu.

References atomicMax16().

Referenced by agg_max_int16_skip_val_shared().

654  {
655  return atomicMax16(agg, val);
656 }
__device__ void atomicMax16(int16_t *agg, const int16_t val)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

__device__ void agg_max_int16_skip_val_shared ( int16_t *  agg,
const int16_t  val,
const int16_t  skip_val 
)

Definition at line 846 of file cuda_mapd_rt.cu.

References agg_max_int16_shared().

848  {
849  if (val != skip_val) {
850  agg_max_int16_shared(agg, val);
851  }
852 }
__device__ void agg_max_int16_shared(int16_t *agg, const int16_t val)

+ Here is the call graph for this function:

__device__ void agg_max_int32_shared ( int32_t *  agg,
const int32_t  val 
)

Definition at line 468 of file cuda_mapd_rt.cu.

References atomicMax().

Referenced by agg_max_int32_skip_val_shared().

468  {
469  atomicMax(agg, val);
470 }
__device__ double atomicMax(double *address, double val)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

__device__ void agg_max_int32_skip_val_shared ( int32_t *  agg,
const int32_t  val,
const int32_t  skip_val 
)

Definition at line 838 of file cuda_mapd_rt.cu.

References agg_max_int32_shared().

840  {
841  if (val != skip_val) {
842  agg_max_int32_shared(agg, val);
843  }
844 }
__device__ void agg_max_int32_shared(int32_t *agg, const int32_t val)

+ Here is the call graph for this function:

__device__ void agg_max_int8_shared ( int8_t *  agg,
const int8_t  val 
)

Definition at line 658 of file cuda_mapd_rt.cu.

References atomicMax8().

Referenced by agg_max_int8_skip_val_shared().

658  {
659  return atomicMax8(agg, val);
660 }
__device__ void atomicMax8(int8_t *agg, const int8_t val)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

__device__ void agg_max_int8_skip_val_shared ( int8_t *  agg,
const int8_t  val,
const int8_t  skip_val 
)

Definition at line 862 of file cuda_mapd_rt.cu.

References agg_max_int8_shared().

864  {
865  if (val != skip_val) {
866  agg_max_int8_shared(agg, val);
867  }
868 }
__device__ void agg_max_int8_shared(int8_t *agg, const int8_t val)

+ Here is the call graph for this function:

__device__ void agg_max_shared ( int64_t *  agg,
const int64_t  val 
)

Definition at line 464 of file cuda_mapd_rt.cu.

References atomicMax64().

464  {
465  atomicMax64(agg, val);
466 }
__device__ int64_t atomicMax64(int64_t *address, int64_t val)

+ Here is the call graph for this function:

__device__ void agg_max_skip_val_shared ( int64_t *  agg,
const int64_t  val,
const int64_t  skip_val 
)

Definition at line 972 of file cuda_mapd_rt.cu.

References atomicMax64SkipVal().

974  {
975  if (val != skip_val) {
976  atomicMax64SkipVal(agg, val, skip_val);
977  }
978 }
__device__ int64_t atomicMax64SkipVal(int64_t *address, int64_t val, const int64_t skip_val)

+ Here is the call graph for this function:

__device__ void agg_min_double_shared ( int64_t *  agg,
const double  val 
)

Definition at line 670 of file cuda_mapd_rt.cu.

References atomicMin().

670  {
671  atomicMin(reinterpret_cast<double*>(agg), val);
672 }
__device__ double atomicMin(double *address, double val)

+ Here is the call graph for this function:

__device__ void agg_min_double_skip_val_shared ( int64_t *  agg,
const double  val,
const double  skip_val 
)

Definition at line 1083 of file cuda_mapd_rt.cu.

References atomicMinDblSkipVal().

1085  {
1086  if (val != skip_val) {
1087  atomicMinDblSkipVal(reinterpret_cast<double*>(agg), val, skip_val);
1088  }
1089 }
__device__ double atomicMinDblSkipVal(double *address, double val, const double skip_val)

+ Here is the call graph for this function:

__device__ void agg_min_float_shared ( int32_t *  agg,
const float  val 
)

Definition at line 674 of file cuda_mapd_rt.cu.

References atomicMin().

674  {
675  atomicMin(reinterpret_cast<float*>(agg), val);
676 }
__device__ double atomicMin(double *address, double val)

+ Here is the call graph for this function:

__device__ void agg_min_float_skip_val_shared ( int32_t *  agg,
const float  val,
const float  skip_val 
)

Definition at line 1020 of file cuda_mapd_rt.cu.

References atomicMinFltSkipVal().

1022  {
1023  if (__float_as_int(val) != __float_as_int(skip_val)) {
1024  atomicMinFltSkipVal(agg, val, skip_val);
1025  }
1026 }
__device__ float atomicMinFltSkipVal(int32_t *address, float val, const float skip_val)

+ Here is the call graph for this function:

__device__ void agg_min_int16_shared ( int16_t *  agg,
const int16_t  val 
)

Definition at line 662 of file cuda_mapd_rt.cu.

References atomicMin16().

662  {
663  return atomicMin16(agg, val);
664 }
__device__ void atomicMin16(int16_t *agg, const int16_t val)

+ Here is the call graph for this function:

__device__ void agg_min_int16_skip_val_shared ( int16_t *  agg,
const int16_t  val,
const int16_t  skip_val 
)

Definition at line 854 of file cuda_mapd_rt.cu.

References atomicMin16SkipVal().

856  {
857  if (val != skip_val) {
858  atomicMin16SkipVal(agg, val, skip_val);
859  }
860 }
__device__ void atomicMin16SkipVal(int16_t *agg, const int16_t val, const int16_t skip_val)

+ Here is the call graph for this function:

__device__ void agg_min_int32_shared ( int32_t *  agg,
const int32_t  val 
)

Definition at line 484 of file cuda_mapd_rt.cu.

References atomicMin().

484  {
485  atomicMin(agg, val);
486 }
__device__ double atomicMin(double *address, double val)

+ Here is the call graph for this function:

__device__ void agg_min_int32_skip_val_shared ( int32_t *  agg,
const int32_t  val,
const int32_t  skip_val 
)

Definition at line 885 of file cuda_mapd_rt.cu.

References atomicMin32SkipVal().

887  {
888  if (val != skip_val) {
889  atomicMin32SkipVal(agg, val, skip_val);
890  }
891 }
__device__ int32_t atomicMin32SkipVal(int32_t *address, int32_t val, const int32_t skip_val)

+ Here is the call graph for this function:

__device__ void agg_min_int8_shared ( int8_t *  agg,
const int8_t  val 
)

Definition at line 666 of file cuda_mapd_rt.cu.

References atomicMin8().

666  {
667  return atomicMin8(agg, val);
668 }
__device__ void atomicMin8(int8_t *agg, const int8_t val)

+ Here is the call graph for this function:

__device__ void agg_min_int8_skip_val_shared ( int8_t *  agg,
const int8_t  val,
const int8_t  skip_val 
)

Definition at line 870 of file cuda_mapd_rt.cu.

References atomicMin8SkipVal().

872  {
873  if (val != skip_val) {
874  atomicMin8SkipVal(agg, val, skip_val);
875  }
876 }
__device__ void atomicMin8SkipVal(int8_t *agg, const int8_t val, const int8_t skip_val)

+ Here is the call graph for this function:

__device__ void agg_min_shared ( int64_t *  agg,
const int64_t  val 
)

Definition at line 480 of file cuda_mapd_rt.cu.

References atomicMin64().

480  {
481  atomicMin64(agg, val);
482 }
__device__ int64_t atomicMin64(int64_t *address, int64_t val)

+ Here is the call graph for this function:

__device__ void agg_min_skip_val_shared ( int64_t *  agg,
const int64_t  val,
const int64_t  skip_val 
)

Definition at line 947 of file cuda_mapd_rt.cu.

References atomicMin64SkipVal().

949  {
950  if (val != skip_val) {
951  atomicMin64SkipVal(agg, val, skip_val);
952  }
953 }
__device__ int64_t atomicMin64SkipVal(int64_t *address, int64_t val, const int64_t skip_val)

+ Here is the call graph for this function:

__device__ void agg_sum_double_shared ( int64_t *  agg,
const double  val 
)

Definition at line 460 of file cuda_mapd_rt.cu.

460  {
461  atomicAdd(reinterpret_cast<double*>(agg), val);
462 }
__device__ void agg_sum_double_skip_val_shared ( int64_t *  agg,
const double  val,
const double  skip_val 
)

Definition at line 1053 of file cuda_mapd_rt.cu.

References atomicSumDblSkipVal().

1055  {
1056  if (__double_as_longlong(val) != __double_as_longlong(skip_val)) {
1057  atomicSumDblSkipVal(reinterpret_cast<double*>(agg), val, skip_val);
1058  }
1059 }
__device__ void atomicSumDblSkipVal(double *address, const double val, const double skip_val)

+ Here is the call graph for this function:

__device__ void agg_sum_float_shared ( int32_t *  agg,
const float  val 
)

Definition at line 456 of file cuda_mapd_rt.cu.

456  {
457  atomicAdd(reinterpret_cast<float*>(agg), val);
458 }
__device__ void agg_sum_float_skip_val_shared ( int32_t *  agg,
const float  val,
const float  skip_val 
)

Definition at line 1035 of file cuda_mapd_rt.cu.

References atomicSumFltSkipVal().

1037  {
1038  if (__float_as_int(val) != __float_as_int(skip_val)) {
1039  atomicSumFltSkipVal(reinterpret_cast<float*>(agg), val, skip_val);
1040  }
1041 }
__device__ void atomicSumFltSkipVal(float *address, const float val, const float skip_val)

+ Here is the call graph for this function:

__device__ int32_t agg_sum_int32_shared ( int32_t *  agg,
const int32_t  val 
)

Definition at line 452 of file cuda_mapd_rt.cu.

452  {
453  return atomicAdd(agg, val);
454 }
__device__ int32_t agg_sum_int32_skip_val_shared ( int32_t *  agg,
const int32_t  val,
const int32_t  skip_val 
)

Definition at line 902 of file cuda_mapd_rt.cu.

References atomicSum32SkipVal().

904  {
905  if (val != skip_val) {
906  const int32_t old = atomicSum32SkipVal(agg, val, skip_val);
907  return old;
908  }
909  return 0;
910 }
__device__ int32_t atomicSum32SkipVal(int32_t *address, const int32_t val, const int32_t skip_val)

+ Here is the call graph for this function:

__device__ int64_t agg_sum_shared ( int64_t *  agg,
const int64_t  val 
)

Definition at line 448 of file cuda_mapd_rt.cu.

Referenced by write_back_non_grouped_agg().

448  {
449  return atomicAdd(reinterpret_cast<unsigned long long*>(agg), val);
450 }

+ Here is the caller graph for this function:

__device__ int64_t agg_sum_skip_val_shared ( int64_t *  agg,
const int64_t  val,
const int64_t  skip_val 
)

Definition at line 921 of file cuda_mapd_rt.cu.

References atomicSum64SkipVal().

923  {
924  if (val != skip_val) {
925  return atomicSum64SkipVal(agg, val, skip_val);
926  }
927  return 0;
928 }
__device__ int64_t atomicSum64SkipVal(int64_t *address, const int64_t val, const int64_t skip_val)

+ Here is the call graph for this function:

__device__ double atomicMax ( double *  address,
double  val 
)

Definition at line 374 of file cuda_mapd_rt.cu.

Referenced by agg_approximate_count_distinct_gpu(), agg_max_double_shared(), agg_max_double_skip_val_shared(), agg_max_float_shared(), agg_max_float_skip_val_shared(), agg_max_int32_shared(), and approximate_distinct_tuples_impl().

374  {
375  unsigned long long int* address_as_ull = (unsigned long long int*)address;
376  unsigned long long int old = *address_as_ull, assumed;
377 
378  do {
379  assumed = old;
380  old = atomicCAS(address_as_ull,
381  assumed,
382  __double_as_longlong(max(val, __longlong_as_double(assumed))));
383 
384  // Note: uses integer comparison to avoid hang in case of NaN (since NaN != NaN)
385  } while (assumed != old);
386 
387  return __longlong_as_double(old);
388 }

+ Here is the caller graph for this function:

__device__ float atomicMax ( float *  address,
float  val 
)

Definition at line 390 of file cuda_mapd_rt.cu.

390  {
391  int* address_as_int = (int*)address;
392  int old = *address_as_int, assumed;
393 
394  do {
395  assumed = old;
396  old = atomicCAS(
397  address_as_int, assumed, __float_as_int(max(val, __int_as_float(assumed))));
398 
399  // Note: uses integer comparison to avoid hang in case of NaN (since NaN != NaN)
400  } while (assumed != old);
401 
402  return __int_as_float(old);
403 }
__device__ void atomicMax16 ( int16_t *  agg,
const int16_t  val 
)

Definition at line 502 of file cuda_mapd_rt.cu.

Referenced by agg_max_int16_shared().

502  {
503  // properly align the input pointer:
504  unsigned int* base_address_u32 =
505  reinterpret_cast<unsigned int*>(reinterpret_cast<size_t>(agg) & ~0x3);
506 
507  unsigned int old_value = *base_address_u32;
508  unsigned int swap_value, compare_value;
509  do {
510  compare_value = old_value;
511  swap_value =
512  (reinterpret_cast<size_t>(agg) & 0x2)
513  ? static_cast<unsigned int>(max(static_cast<int16_t>(old_value >> 16), val))
514  << 16 |
515  (old_value & 0xFFFF)
516  : (old_value & 0xFFFF0000) |
517  static_cast<unsigned int>(
518  max(static_cast<int16_t>(old_value & 0xFFFF), val));
519  old_value = atomicCAS(base_address_u32, compare_value, swap_value);
520  } while (old_value != compare_value);
521 }

+ Here is the caller graph for this function:

__device__ int64_t atomicMax64 ( int64_t *  address,
int64_t  val 
)

Definition at line 332 of file cuda_mapd_rt.cu.

Referenced by agg_max_shared().

332  {
333  unsigned long long int* address_as_ull = (unsigned long long int*)address;
334  unsigned long long int old = *address_as_ull, assumed;
335 
336  do {
337  assumed = old;
338  old = atomicCAS(address_as_ull, assumed, max((long long)val, (long long)assumed));
339  } while (assumed != old);
340 
341  return old;
342 }

+ Here is the caller graph for this function:

__device__ int64_t atomicMax64SkipVal ( int64_t *  address,
int64_t  val,
const int64_t  skip_val 
)

Definition at line 955 of file cuda_mapd_rt.cu.

Referenced by agg_max_skip_val_shared().

957  {
958  unsigned long long int* address_as_ull =
959  reinterpret_cast<unsigned long long int*>(address);
960  unsigned long long int old = *address_as_ull, assumed;
961 
962  do {
963  assumed = old;
964  old = atomicCAS(address_as_ull,
965  assumed,
966  assumed == skip_val ? val : max((long long)val, (long long)assumed));
967  } while (assumed != old);
968 
969  return old;
970 }

+ Here is the caller graph for this function:

__device__ void atomicMax8 ( int8_t *  agg,
const int8_t  val 
)

Definition at line 524 of file cuda_mapd_rt.cu.

Referenced by agg_max_int8_shared().

524  {
525  // properly align the input pointer:
526  unsigned int* base_address_u32 =
527  reinterpret_cast<unsigned int*>(reinterpret_cast<size_t>(agg) & ~0x3);
528 
529  // __byte_perm(unsigned int A, unsigned int B, unsigned int s):
530  // if s == 0x3214 returns {A[31..24], A[23..16], A[15..8], B[7..0]}
531  // if s == 0x3240 returns {A[31..24], A[23..16], B[7...0], A[7..0]}
532  // if s == 0x3410 returns {A[31..24], B[7....0], A[15..8], A[7..0]}
533  // if s == 0x4210 returns {B[7....0], A[23..16], A[15..8], A[7..0]}
534  constexpr unsigned int byte_permutations[] = {0x3214, 0x3240, 0x3410, 0x4210};
535  unsigned int old_value = *base_address_u32;
536  unsigned int swap_value, compare_value;
537  do {
538  compare_value = old_value;
539  auto max_value = static_cast<unsigned int>(
540  // compare val with its corresponding bits in the compare_value
541  max(val,
542  static_cast<int8_t>(__byte_perm(
543  compare_value, 0, (reinterpret_cast<size_t>(agg) & 0x3) | 0x4440))));
544  swap_value = __byte_perm(
545  compare_value, max_value, byte_permutations[reinterpret_cast<size_t>(agg) & 0x3]);
546  old_value = atomicCAS(base_address_u32, compare_value, swap_value);
547  } while (compare_value != old_value);
548 }

+ Here is the caller graph for this function:

__device__ double atomicMin ( double *  address,
double  val 
)

Definition at line 405 of file cuda_mapd_rt.cu.

Referenced by agg_min_double_shared(), agg_min_float_shared(), agg_min_int32_shared(), atomicMin32SkipVal(), atomicMinFltSkipVal(), and compute_bucket_sizes_impl().

405  {
406  unsigned long long int* address_as_ull = (unsigned long long int*)address;
407  unsigned long long int old = *address_as_ull, assumed;
408 
409  do {
410  assumed = old;
411  old = atomicCAS(address_as_ull,
412  assumed,
413  __double_as_longlong(min(val, __longlong_as_double(assumed))));
414  } while (assumed != old);
415 
416  return __longlong_as_double(old);
417 }

+ Here is the caller graph for this function:

__device__ double atomicMin ( float *  address,
float  val 
)

Definition at line 419 of file cuda_mapd_rt.cu.

419  {
420  int* address_as_ull = (int*)address;
421  int old = *address_as_ull, assumed;
422 
423  do {
424  assumed = old;
425  old = atomicCAS(
426  address_as_ull, assumed, __float_as_int(min(val, __int_as_float(assumed))));
427  } while (assumed != old);
428 
429  return __int_as_float(old);
430 }
__device__ void atomicMin16 ( int16_t *  agg,
const int16_t  val 
)

Definition at line 564 of file cuda_mapd_rt.cu.

Referenced by agg_min_int16_shared().

564  {
565  // properly align the input pointer:
566  unsigned int* base_address_u32 =
567  reinterpret_cast<unsigned int*>(reinterpret_cast<size_t>(agg) & ~0x3);
568 
569  unsigned int old_value = *base_address_u32;
570  unsigned int swap_value, compare_value;
571  do {
572  compare_value = old_value;
573  swap_value =
574  (reinterpret_cast<size_t>(agg) & 0x2)
575  ? static_cast<unsigned int>(min(static_cast<int16_t>(old_value >> 16), val))
576  << 16 |
577  (old_value & 0xFFFF)
578  : (old_value & 0xFFFF0000) |
579  static_cast<unsigned int>(
580  min(static_cast<int16_t>(old_value & 0xFFFF), val));
581  old_value = atomicCAS(base_address_u32, compare_value, swap_value);
582  } while (old_value != compare_value);
583 }

+ Here is the caller graph for this function:

__device__ void atomicMin16SkipVal ( int16_t *  agg,
const int16_t  val,
const int16_t  skip_val 
)

Definition at line 586 of file cuda_mapd_rt.cu.

Referenced by agg_min_int16_skip_val_shared().

588  {
589  // properly align the input pointer:
590  unsigned int* base_address_u32 =
591  reinterpret_cast<unsigned int*>(reinterpret_cast<size_t>(agg) & ~0x3);
592 
593  unsigned int old_value = *base_address_u32;
594  unsigned int swap_value, compare_value;
595  do {
596  compare_value = old_value;
597  int16_t selected_old_val = (reinterpret_cast<size_t>(agg) & 0x2)
598  ? static_cast<int16_t>(old_value >> 16)
599  : static_cast<int16_t>(old_value & 0xFFFF);
600 
601  swap_value =
602  (reinterpret_cast<size_t>(agg) & 0x2)
603  ? static_cast<unsigned int>(
604  selected_old_val == skip_val ? val : min(selected_old_val, val))
605  << 16 |
606  (old_value & 0xFFFF)
607  : (old_value & 0xFFFF0000) |
608  static_cast<unsigned int>(
609  selected_old_val == skip_val ? val : min(selected_old_val, val));
610  old_value = atomicCAS(base_address_u32, compare_value, swap_value);
611  } while (old_value != compare_value);
612 }

+ Here is the caller graph for this function:

__device__ int32_t atomicMin32SkipVal ( int32_t *  address,
int32_t  val,
const int32_t  skip_val 
)

Definition at line 878 of file cuda_mapd_rt.cu.

References atomicMin().

Referenced by agg_min_int32_skip_val_shared().

880  {
881  int32_t old = atomicExch(address, INT_MAX);
882  return atomicMin(address, old == skip_val ? val : min(old, val));
883 }
__device__ double atomicMin(double *address, double val)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

__device__ int64_t atomicMin64 ( int64_t *  address,
int64_t  val 
)

Definition at line 344 of file cuda_mapd_rt.cu.

Referenced by agg_min_shared().

344  {
345  unsigned long long int* address_as_ull = (unsigned long long int*)address;
346  unsigned long long int old = *address_as_ull, assumed;
347 
348  do {
349  assumed = old;
350  old = atomicCAS(address_as_ull, assumed, min((long long)val, (long long)assumed));
351  } while (assumed != old);
352 
353  return old;
354 }

+ Here is the caller graph for this function:

__device__ int64_t atomicMin64SkipVal ( int64_t *  address,
int64_t  val,
const int64_t  skip_val 
)

Definition at line 930 of file cuda_mapd_rt.cu.

Referenced by agg_min_skip_val_shared().

932  {
933  unsigned long long int* address_as_ull =
934  reinterpret_cast<unsigned long long int*>(address);
935  unsigned long long int old = *address_as_ull, assumed;
936 
937  do {
938  assumed = old;
939  old = atomicCAS(address_as_ull,
940  assumed,
941  assumed == skip_val ? val : min((long long)val, (long long)assumed));
942  } while (assumed != old);
943 
944  return old;
945 }

+ Here is the caller graph for this function:

__device__ void atomicMin8 ( int8_t *  agg,
const int8_t  val 
)

Definition at line 614 of file cuda_mapd_rt.cu.

Referenced by agg_min_int8_shared().

614  {
615  // properly align the input pointer:
616  unsigned int* base_address_u32 =
617  reinterpret_cast<unsigned int*>(reinterpret_cast<size_t>(agg) & ~0x3);
618 
619  constexpr unsigned int byte_permutations[] = {0x3214, 0x3240, 0x3410, 0x4210};
620  unsigned int old_value = *base_address_u32;
621  unsigned int swap_value, compare_value;
622  do {
623  compare_value = old_value;
624  auto min_value = static_cast<unsigned int>(
625  min(val,
626  static_cast<int8_t>(__byte_perm(
627  compare_value, 0, (reinterpret_cast<size_t>(agg) & 0x3) | 0x4440))));
628  swap_value = __byte_perm(
629  compare_value, min_value, byte_permutations[reinterpret_cast<size_t>(agg) & 0x3]);
630  old_value = atomicCAS(base_address_u32, compare_value, swap_value);
631  } while (compare_value != old_value);
632 }

+ Here is the caller graph for this function:

__device__ void atomicMin8SkipVal ( int8_t *  agg,
const int8_t  val,
const int8_t  skip_val 
)

Definition at line 634 of file cuda_mapd_rt.cu.

Referenced by agg_min_int8_skip_val_shared().

634  {
635  // properly align the input pointer:
636  unsigned int* base_address_u32 =
637  reinterpret_cast<unsigned int*>(reinterpret_cast<size_t>(agg) & ~0x3);
638 
639  constexpr unsigned int byte_permutations[] = {0x3214, 0x3240, 0x3410, 0x4210};
640  unsigned int old_value = *base_address_u32;
641  unsigned int swap_value, compare_value;
642  do {
643  compare_value = old_value;
644  int8_t selected_old_val = static_cast<int8_t>(
645  __byte_perm(compare_value, 0, (reinterpret_cast<size_t>(agg) & 0x3) | 0x4440));
646  auto min_value = static_cast<unsigned int>(
647  selected_old_val == skip_val ? val : min(val, selected_old_val));
648  swap_value = __byte_perm(
649  compare_value, min_value, byte_permutations[reinterpret_cast<size_t>(agg) & 0x3]);
650  old_value = atomicCAS(base_address_u32, compare_value, swap_value);
651  } while (compare_value != old_value);
652 }

+ Here is the caller graph for this function:

__device__ double atomicMinDblSkipVal ( double *  address,
double  val,
const double  skip_val 
)

Definition at line 1061 of file cuda_mapd_rt.cu.

Referenced by agg_min_double_skip_val_shared().

1063  {
1064  unsigned long long int* address_as_ull =
1065  reinterpret_cast<unsigned long long int*>(address);
1066  unsigned long long int old = *address_as_ull;
1067  unsigned long long int skip_val_as_ull =
1068  *reinterpret_cast<const unsigned long long*>(&skip_val);
1069  unsigned long long int assumed;
1070 
1071  do {
1072  assumed = old;
1073  old = atomicCAS(address_as_ull,
1074  assumed,
1075  assumed == skip_val_as_ull
1076  ? *reinterpret_cast<unsigned long long*>(&val)
1077  : __double_as_longlong(min(val, __longlong_as_double(assumed))));
1078  } while (assumed != old);
1079 
1080  return __longlong_as_double(old);
1081 }

+ Here is the caller graph for this function:

__device__ float atomicMinFltSkipVal ( int32_t *  address,
float  val,
const float  skip_val 
)

Definition at line 1013 of file cuda_mapd_rt.cu.

References atomicMin().

Referenced by agg_min_float_skip_val_shared().

1013  {
1014  float old = atomicExch(reinterpret_cast<float*>(address), FLT_MAX);
1015  return atomicMin(
1016  reinterpret_cast<float*>(address),
1017  __float_as_int(old) == __float_as_int(skip_val) ? val : fminf(old, val));
1018 }
__device__ double atomicMin(double *address, double val)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

__device__ int32_t atomicSum32SkipVal ( int32_t *  address,
const int32_t  val,
const int32_t  skip_val 
)

Definition at line 893 of file cuda_mapd_rt.cu.

Referenced by agg_sum_int32_skip_val_shared().

895  {
896  unsigned int* address_as_int = (unsigned int*)address;
897  int32_t old = atomicExch(address_as_int, 0);
898  int32_t old2 = atomicAdd(address_as_int, old == skip_val ? val : (val + old));
899  return old == skip_val ? old2 : (old2 + old);
900 }

+ Here is the caller graph for this function:

__device__ int64_t atomicSum64SkipVal ( int64_t *  address,
const int64_t  val,
const int64_t  skip_val 
)

Definition at line 912 of file cuda_mapd_rt.cu.

Referenced by agg_sum_skip_val_shared().

914  {
915  unsigned long long int* address_as_ull = (unsigned long long int*)address;
916  int64_t old = atomicExch(address_as_ull, 0);
917  int64_t old2 = atomicAdd(address_as_ull, old == skip_val ? val : (val + old));
918  return old == skip_val ? old2 : (old2 + old);
919 }

+ Here is the caller graph for this function:

__device__ void atomicSumDblSkipVal ( double *  address,
const double  val,
const double  skip_val 
)

Definition at line 1043 of file cuda_mapd_rt.cu.

Referenced by agg_sum_double_skip_val_shared().

1045  {
1046  unsigned long long int* address_as_ull = (unsigned long long int*)address;
1047  double old = __longlong_as_double(atomicExch(address_as_ull, __double_as_longlong(0.)));
1048  atomicAdd(
1049  address,
1050  __double_as_longlong(old) == __double_as_longlong(skip_val) ? val : (val + old));
1051 }

+ Here is the caller graph for this function:

__device__ void atomicSumFltSkipVal ( float *  address,
const float  val,
const float  skip_val 
)

Definition at line 1028 of file cuda_mapd_rt.cu.

Referenced by agg_sum_float_skip_val_shared().

1030  {
1031  float old = atomicExch(address, 0.f);
1032  atomicAdd(address, __float_as_int(old) == __float_as_int(skip_val) ? val : (val + old));
1033 }

+ Here is the caller graph for this function:

__device__ bool check_interrupt ( )

Definition at line 160 of file cuda_mapd_rt.cu.

160  {
161  return (runtime_interrupt_flag == 1) ? true : false;
162 }
__device__ int32_t runtime_interrupt_flag
Definition: cuda_mapd_rt.cu:96
__device__ int32_t checked_single_agg_id_double_shared ( int64_t *  agg,
const double  val,
const double  null_val 
)

Definition at line 725 of file cuda_mapd_rt.cu.

727  {
728  unsigned long long int* address_as_ull = reinterpret_cast<unsigned long long int*>(agg);
729  unsigned long long int old = *address_as_ull, assumed;
730 
731  if (val == null_val) {
732  return 0;
733  }
734 
735  do {
736  if (static_cast<int64_t>(old) != __double_as_longlong(null_val)) {
737  if (static_cast<int64_t>(old) != __double_as_longlong(val)) {
738  // see Execute::ERR_SINGLE_VALUE_FOUND_MULTIPLE_VALUES
739  return 15;
740  } else {
741  break;
742  }
743  }
744 
745  assumed = old;
746  old = atomicCAS(address_as_ull, assumed, __double_as_longlong(val));
747  } while (assumed != old);
748 
749  return 0;
750 }
__device__ int32_t checked_single_agg_id_double_shared_slow ( int64_t *  agg,
const double *  valp,
const double  null_val 
)

Definition at line 757 of file cuda_mapd_rt.cu.

759  {
760  unsigned long long int* address_as_ull = reinterpret_cast<unsigned long long int*>(agg);
761  unsigned long long int old = *address_as_ull, assumed;
762  double val = *valp;
763 
764  if (val == null_val) {
765  return 0;
766  }
767 
768  do {
769  if (static_cast<int64_t>(old) != __double_as_longlong(null_val)) {
770  if (static_cast<int64_t>(old) != __double_as_longlong(val)) {
771  // see Execute::ERR_SINGLE_VALUE_FOUND_MULTIPLE_VALUES
772  return 15;
773  } else {
774  break;
775  }
776  }
777 
778  assumed = old;
779  old = atomicCAS(address_as_ull, assumed, __double_as_longlong(val));
780  } while (assumed != old);
781 
782  return 0;
783 }
__device__ int32_t checked_single_agg_id_float_shared ( int32_t *  agg,
const float  val,
const float  null_val 
)

Definition at line 789 of file cuda_mapd_rt.cu.

791  {
792  int* address_as_ull = reinterpret_cast<int*>(agg);
793  int old = *address_as_ull, assumed;
794 
795  if (val == null_val) {
796  return 0;
797  }
798 
799  do {
800  if (old != __float_as_int(null_val)) {
801  if (old != __float_as_int(val)) {
802  // see Execute::ERR_SINGLE_VALUE_FOUND_MULTIPLE_VALUES
803  return 15;
804  } else {
805  break;
806  }
807  }
808 
809  assumed = old;
810  old = atomicCAS(address_as_ull, assumed, __float_as_int(val));
811  } while (assumed != old);
812 
813  return 0;
814 }
__device__ int32_t checked_single_agg_id_shared ( int64_t *  agg,
const int64_t  val,
const int64_t  null_val 
)

Definition at line 682 of file cuda_mapd_rt.cu.

684  {
685  unsigned long long int* address_as_ull = reinterpret_cast<unsigned long long int*>(agg);
686  unsigned long long int old = *address_as_ull, assumed;
687 
688  if (val == null_val) {
689  return 0;
690  }
691 
692  do {
693  if (static_cast<int64_t>(old) != null_val) {
694  if (static_cast<int64_t>(old) != val) {
695  // see Execute::ERR_SINGLE_VALUE_FOUND_MULTIPLE_VALUES
696  return 15;
697  } else {
698  break;
699  }
700  }
701 
702  assumed = old;
703  old = atomicCAS(address_as_ull, assumed, val);
704  } while (assumed != old);
705 
706  return 0;
707 }
__device__ int64_t* declare_dynamic_shared_memory ( )

Definition at line 57 of file cuda_mapd_rt.cu.

57  {
58  extern __shared__ int64_t shared_mem_buffer[];
59  return shared_mem_buffer;
60 }
__device__ bool dynamic_watchdog ( )

Definition at line 116 of file cuda_mapd_rt.cu.

Referenced by anonymous_namespace{ResultSetReduction.cpp}::check_watchdog(), check_watchdog_rt(), get_group_value_columnar_slot_with_watchdog(), get_group_value_columnar_with_watchdog(), and get_group_value_with_watchdog().

116  {
117  // check for dynamic watchdog, if triggered all threads return true
118  if (dw_cycle_budget == 0LL) {
119  return false; // Uninitialized watchdog can't check time
120  }
121  if (dw_abort == 1) {
122  return true; // Received host request to abort
123  }
124  uint32_t smid = get_smid();
125  if (smid >= 128) {
126  return false;
127  }
128  __shared__ volatile int64_t dw_block_cycle_start; // Thread block shared cycle start
129  __shared__ volatile bool
130  dw_should_terminate; // all threads within a block should return together if
131  // watchdog criteria is met
132 
133  // thread 0 either initializes or read the initial clock cycle, the result is stored
134  // into shared memory. Since all threads wihtin a block shares the same SM, there's no
135  // point in using more threads here.
136  if (threadIdx.x == 0) {
137  dw_block_cycle_start = 0LL;
138  int64_t cycle_count = static_cast<int64_t>(clock64());
139  // Make sure the block hasn't switched SMs
140  if (smid == get_smid()) {
141  dw_block_cycle_start = static_cast<int64_t>(
142  atomicCAS(reinterpret_cast<unsigned long long*>(&dw_sm_cycle_start[smid]),
143  0ULL,
144  static_cast<unsigned long long>(cycle_count)));
145  }
146 
147  int64_t cycles = cycle_count - dw_block_cycle_start;
148  if ((smid == get_smid()) && (dw_block_cycle_start > 0LL) &&
149  (cycles > dw_cycle_budget)) {
150  // Check if we're out of time on this particular SM
151  dw_should_terminate = true;
152  } else {
153  dw_should_terminate = false;
154  }
155  }
156  __syncthreads();
157  return dw_should_terminate;
158 }
__device__ int64_t dw_sm_cycle_start[128]
Definition: cuda_mapd_rt.cu:92
__device__ int64_t dw_cycle_budget
Definition: cuda_mapd_rt.cu:94
__inline__ __device__ uint32_t get_smid(void)
Definition: cuda_mapd_rt.cu:98
__device__ int32_t dw_abort
Definition: cuda_mapd_rt.cu:95

+ Here is the caller graph for this function:

__device__ void force_sync ( )

Definition at line 1276 of file cuda_mapd_rt.cu.

1276  {
1277  __threadfence_block();
1278 }
__device__ int64_t get_block_index ( )

Definition at line 24 of file cuda_mapd_rt.cu.

24  {
25  return blockIdx.x;
26 }
template<typename T = unsigned long long>
__device__ T get_empty_key ( )
inline

Definition at line 165 of file cuda_mapd_rt.cu.

References EMPTY_KEY_64.

165  {
166  return EMPTY_KEY_64;
167 }
#define EMPTY_KEY_64
template<>
__device__ unsigned int get_empty_key ( )
inline

Definition at line 170 of file cuda_mapd_rt.cu.

References EMPTY_KEY_32.

170  {
171  return EMPTY_KEY_32;
172 }
#define EMPTY_KEY_32
template<typename T >
__device__ int64_t* get_matching_group_value ( int64_t *  groups_buffer,
const uint32_t  h,
const T *  key,
const uint32_t  key_count,
const uint32_t  row_size_quad 
)
inline

Definition at line 175 of file cuda_mapd_rt.cu.

References align_to_int64(), and omnisci.dtypes::T.

Referenced by get_group_value(), get_group_value_with_watchdog(), and get_matching_group_value().

179  {
180  const T empty_key = get_empty_key<T>();
181  uint32_t off = h * row_size_quad;
182  auto row_ptr = reinterpret_cast<T*>(groups_buffer + off);
183  {
184  const T old = atomicCAS(row_ptr, empty_key, *key);
185  if (empty_key == old && key_count > 1) {
186  for (size_t i = 1; i <= key_count - 1; ++i) {
187  atomicExch(row_ptr + i, key[i]);
188  }
189  }
190  }
191  if (key_count > 1) {
192  while (atomicAdd(row_ptr + key_count - 1, 0) == empty_key) {
193  // spin until the winning thread has finished writing the entire key and the init
194  // value
195  }
196  }
197  bool match = true;
198  for (uint32_t i = 0; i < key_count; ++i) {
199  if (row_ptr[i] != key[i]) {
200  match = false;
201  break;
202  }
203  }
204 
205  if (match) {
206  auto row_ptr_i8 = reinterpret_cast<int8_t*>(row_ptr + key_count);
207  return reinterpret_cast<int64_t*>(align_to_int64(row_ptr_i8));
208  }
209  return NULL;
210 }
FORCE_INLINE HOST DEVICE T align_to_int64(T addr)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

__device__ int64_t* get_matching_group_value ( int64_t *  groups_buffer,
const uint32_t  h,
const int64_t *  key,
const uint32_t  key_count,
const uint32_t  key_width,
const uint32_t  row_size_quad,
const int64_t *  init_vals 
)

Definition at line 212 of file cuda_mapd_rt.cu.

References get_matching_group_value().

218  {
219  switch (key_width) {
220  case 4:
221  return get_matching_group_value(groups_buffer,
222  h,
223  reinterpret_cast<const unsigned int*>(key),
224  key_count,
225  row_size_quad);
226  case 8:
227  return get_matching_group_value(groups_buffer,
228  h,
229  reinterpret_cast<const unsigned long long*>(key),
230  key_count,
231  row_size_quad);
232  default:
233  return NULL;
234  }
235 }
__device__ int64_t * get_matching_group_value(int64_t *groups_buffer, const uint32_t h, const T *key, const uint32_t key_count, const uint32_t row_size_quad)

+ Here is the call graph for this function:

__device__ int64_t* get_matching_group_value_columnar ( int64_t *  groups_buffer,
const uint32_t  h,
const int64_t *  key,
const uint32_t  key_qw_count,
const size_t  entry_count 
)

Definition at line 298 of file cuda_mapd_rt.cu.

References EMPTY_KEY_64.

Referenced by get_group_value_columnar(), and get_group_value_columnar_with_watchdog().

303  {
304  uint32_t off = h;
305  {
306  const uint64_t old = atomicCAS(
307  reinterpret_cast<unsigned long long*>(groups_buffer + off), EMPTY_KEY_64, *key);
308  if (EMPTY_KEY_64 == old) {
309  for (size_t i = 0; i < key_qw_count; ++i) {
310  groups_buffer[off] = key[i];
311  off += entry_count;
312  }
313  return &groups_buffer[off];
314  }
315  }
316  __syncthreads();
317  off = h;
318  for (size_t i = 0; i < key_qw_count; ++i) {
319  if (groups_buffer[off] != key[i]) {
320  return NULL;
321  }
322  off += entry_count;
323  }
324  return &groups_buffer[off];
325 }
#define EMPTY_KEY_64

+ Here is the caller graph for this function:

template<typename T >
__device__ int32_t get_matching_group_value_columnar_slot ( int64_t *  groups_buffer,
const uint32_t  entry_count,
const uint32_t  h,
const T *  key,
const uint32_t  key_count 
)

Definition at line 238 of file cuda_mapd_rt.cu.

References omnisci.dtypes::T.

Referenced by get_group_value_columnar_slot(), get_group_value_columnar_slot_with_watchdog(), and get_matching_group_value_columnar_slot().

242  {
243  const T empty_key = get_empty_key<T>();
244  const uint64_t old =
245  atomicCAS(reinterpret_cast<T*>(groups_buffer + h), empty_key, *key);
246  // the winner thread proceeds with writing the rest fo the keys
247  if (old == empty_key) {
248  uint32_t offset = h + entry_count;
249  for (size_t i = 1; i < key_count; ++i) {
250  *reinterpret_cast<T*>(groups_buffer + offset) = key[i];
251  offset += entry_count;
252  }
253  }
254 
255  __threadfence();
256  // for all threads except the winning thread, memory content of the keys
257  // related to the hash offset are checked again. In case of a complete match
258  // the hash offset is returned, otherwise -1 is returned
259  if (old != empty_key) {
260  uint32_t offset = h;
261  for (uint32_t i = 0; i < key_count; ++i) {
262  if (*reinterpret_cast<T*>(groups_buffer + offset) != key[i]) {
263  return -1;
264  }
265  offset += entry_count;
266  }
267  }
268  return h;
269 }

+ Here is the caller graph for this function:

__device__ int32_t get_matching_group_value_columnar_slot ( int64_t *  groups_buffer,
const uint32_t  entry_count,
const uint32_t  h,
const int64_t *  key,
const uint32_t  key_count,
const uint32_t  key_width 
)

Definition at line 272 of file cuda_mapd_rt.cu.

References get_matching_group_value_columnar_slot().

277  {
278  switch (key_width) {
279  case 4:
281  groups_buffer,
282  entry_count,
283  h,
284  reinterpret_cast<const unsigned int*>(key),
285  key_count);
286  case 8:
288  groups_buffer,
289  entry_count,
290  h,
291  reinterpret_cast<const unsigned long long*>(key),
292  key_count);
293  default:
294  return -1;
295  }
296 }
__device__ int32_t get_matching_group_value_columnar_slot(int64_t *groups_buffer, const uint32_t entry_count, const uint32_t h, const T *key, const uint32_t key_count)

+ Here is the call graph for this function:

__inline__ __device__ uint32_t get_smid ( void  )

Definition at line 98 of file cuda_mapd_rt.cu.

Referenced by dynamic_watchdog().

98  {
99  uint32_t ret;
100  asm("mov.u32 %0, %%smid;" : "=r"(ret));
101  return ret;
102 }

+ Here is the caller graph for this function:

__device__ int64_t get_thread_index ( )

Definition at line 20 of file cuda_mapd_rt.cu.

20  {
21  return threadIdx.x;
22 }
__device__ int32_t group_buff_idx_impl ( )

Definition at line 32 of file cuda_mapd_rt.cu.

References pos_start_impl().

32  {
33  return pos_start_impl(NULL);
34 }
__device__ int32_t pos_start_impl(const int32_t *row_index_resume)
Definition: cuda_mapd_rt.cu:28

+ Here is the call graph for this function:

__device__ const int64_t* init_shared_mem ( const int64_t *  global_groups_buffer,
const int32_t  groups_buffer_size 
)

Initializes the shared memory buffer for perfect hash group by. In this function, we simply copy the global group by buffer (already initialized on the host and transferred) to all shared memory group by buffers.

Definition at line 67 of file cuda_mapd_rt.cu.

68  {
69  // dynamic shared memory declaration
70  extern __shared__ int64_t shared_groups_buffer[];
71 
72  // it is assumed that buffer size is aligned with 64-bit units
73  // so it is safe to assign 64-bit to each thread
74  const int32_t buffer_units = groups_buffer_size >> 3;
75 
76  for (int32_t pos = threadIdx.x; pos < buffer_units; pos += blockDim.x) {
77  shared_groups_buffer[pos] = global_groups_buffer[pos];
78  }
79  __syncthreads();
80  return shared_groups_buffer;
81 }
__device__ const int64_t* init_shared_mem_nop ( const int64_t *  groups_buffer,
const int32_t  groups_buffer_size 
)

Definition at line 44 of file cuda_mapd_rt.cu.

46  {
47  return groups_buffer;
48 }
__device__ void linear_probabilistic_count ( uint8_t *  bitmap,
const uint32_t  bitmap_bytes,
const uint8_t *  key_bytes,
const uint32_t  key_len 
)

Definition at line 1205 of file cuda_mapd_rt.cu.

1208  {
1209  const uint32_t bit_pos = MurmurHash1(key_bytes, key_len, 0) % (bitmap_bytes * 8);
1210  const uint32_t word_idx = bit_pos / 32;
1211  const uint32_t bit_idx = bit_pos % 32;
1212  atomicOr(((uint32_t*)bitmap) + word_idx, 1 << bit_idx);
1213 }
NEVER_INLINE DEVICE uint32_t MurmurHash1(const void *key, int len, const uint32_t seed)
Definition: MurmurHash.cpp:20
__device__ int32_t pos_start_impl ( const int32_t *  row_index_resume)

Definition at line 28 of file cuda_mapd_rt.cu.

Referenced by get_bin_from_k_heap_impl(), get_error_code(), group_buff_idx_impl(), and record_error_code().

28  {
29  return blockIdx.x * blockDim.x + threadIdx.x;
30 }

+ Here is the caller graph for this function:

__device__ int32_t pos_step_impl ( )

Definition at line 36 of file cuda_mapd_rt.cu.

Referenced by get_bin_from_k_heap_impl().

36  {
37  return blockDim.x * gridDim.x;
38 }

+ Here is the caller graph for this function:

__device__ bool slotEmptyKeyCAS ( int64_t *  slot,
int64_t  new_val,
int64_t  init_val 
)

Definition at line 1106 of file cuda_mapd_rt.cu.

1108  {
1109  auto slot_address = reinterpret_cast<unsigned long long int*>(slot);
1110  const auto empty_key =
1111  static_cast<unsigned long long int*>(static_cast<void*>(&init_val));
1112  const auto new_val_cast =
1113  static_cast<unsigned long long int*>(static_cast<void*>(&new_val));
1114 
1115  const auto old_val = atomicCAS(slot_address, *empty_key, *new_val_cast);
1116  if (old_val == *empty_key) {
1117  return true;
1118  } else {
1119  return false;
1120  }
1121 }
__device__ bool slotEmptyKeyCAS_int16 ( int16_t *  slot,
int16_t  new_val,
int16_t  init_val 
)

Definition at line 1134 of file cuda_mapd_rt.cu.

1136  {
1137  unsigned int* base_slot_address =
1138  reinterpret_cast<unsigned int*>(reinterpret_cast<size_t>(slot) & ~0x3);
1139  unsigned int old_value = *base_slot_address;
1140  unsigned int swap_value, compare_value;
1141  do {
1142  compare_value = old_value;
1143  // exit criteria: if init_val does not exist in the slot (some other thread has
1144  // succeeded)
1145  if (static_cast<unsigned int>(init_val) !=
1146  __byte_perm(
1147  compare_value, 0, (reinterpret_cast<size_t>(slot) & 0x2 ? 0x3244 : 0x4410))) {
1148  return false;
1149  }
1150  swap_value = __byte_perm(compare_value,
1151  static_cast<unsigned int>(new_val),
1152  (reinterpret_cast<size_t>(slot) & 0x2) ? 0x5410 : 0x3254);
1153  old_value = atomicCAS(base_slot_address, compare_value, swap_value);
1154  } while (compare_value != old_value);
1155  return true;
1156 }
__device__ bool slotEmptyKeyCAS_int32 ( int32_t *  slot,
int32_t  new_val,
int32_t  init_val 
)

Definition at line 1123 of file cuda_mapd_rt.cu.

1125  {
1126  unsigned int* slot_address = reinterpret_cast<unsigned int*>(slot);
1127  unsigned int compare_value = static_cast<unsigned int>(init_val);
1128  unsigned int swap_value = static_cast<unsigned int>(new_val);
1129 
1130  const unsigned int old_value = atomicCAS(slot_address, compare_value, swap_value);
1131  return old_value == compare_value;
1132 }
__device__ bool slotEmptyKeyCAS_int8 ( int8_t *  slot,
int8_t  new_val,
int8_t  init_val 
)

Definition at line 1158 of file cuda_mapd_rt.cu.

1160  {
1161  // properly align the slot address:
1162  unsigned int* base_slot_address =
1163  reinterpret_cast<unsigned int*>(reinterpret_cast<size_t>(slot) & ~0x3);
1164  constexpr unsigned int byte_permutations[] = {0x3214, 0x3240, 0x3410, 0x4210};
1165  unsigned int old_value = *base_slot_address;
1166  unsigned int swap_value, compare_value;
1167  do {
1168  compare_value = old_value;
1169  // exit criteria: if init_val does not exist in the slot (some other thread has
1170  // succeeded)
1171  if (static_cast<unsigned int>(init_val) !=
1172  __byte_perm(compare_value, 0, (reinterpret_cast<size_t>(slot) & 0x3) | 0x4440)) {
1173  return false;
1174  }
1175  swap_value = __byte_perm(compare_value,
1176  static_cast<unsigned int>(new_val),
1177  byte_permutations[reinterpret_cast<size_t>(slot) & 0x3]);
1178  old_value = atomicCAS(base_slot_address, compare_value, swap_value);
1179  } while (compare_value != old_value);
1180  return true;
1181 }
__device__ uint64_t string_decode ( int8_t *  chunk_iter_,
int64_t  pos 
)

Definition at line 1194 of file cuda_mapd_rt.cu.

References ChunkIter_get_nth(), VarlenDatum::is_null, VarlenDatum::length, and VarlenDatum::pointer.

1194  {
1195  // TODO(alex): de-dup, the x64 version is basically identical
1196  ChunkIter* chunk_iter = reinterpret_cast<ChunkIter*>(chunk_iter_);
1197  VarlenDatum vd;
1198  bool is_end;
1199  ChunkIter_get_nth(chunk_iter, pos, false, &vd, &is_end);
1200  return vd.is_null ? 0
1201  : (reinterpret_cast<uint64_t>(vd.pointer) & 0xffffffffffff) |
1202  (static_cast<uint64_t>(vd.length) << 48);
1203 }
bool is_null
Definition: sqltypes.h:147
DEVICE void ChunkIter_get_nth(ChunkIter *it, int n, bool uncompress, VarlenDatum *result, bool *is_end)
Definition: ChunkIter.cpp:181
int8_t * pointer
Definition: sqltypes.h:146
size_t length
Definition: sqltypes.h:145

+ Here is the call graph for this function:

__device__ void sync_threadblock ( )

Definition at line 1299 of file cuda_mapd_rt.cu.

Referenced by GpuSharedMemCodeBuilder::codegenInitialization(), and GpuSharedMemCodeBuilder::codegenReduction().

1299  {
1300  __syncthreads();
1301 }

+ Here is the caller graph for this function:

__device__ void sync_warp ( )

Definition at line 1280 of file cuda_mapd_rt.cu.

1280  {
1281  __syncwarp();
1282 }
__device__ void sync_warp_protected ( int64_t  thread_pos,
int64_t  row_count 
)

Protected warp synchornization to make sure all (or none) threads within a warp go through a synchronization barrier. thread_pos: the current thread position to be used for a memory access row_count: maximum number of rows to be processed The function performs warp sync iff all 32 threads within that warp will process valid data NOTE: it currently assumes that warp size is 32.

Definition at line 1291 of file cuda_mapd_rt.cu.

1291  {
1292  // only syncing if NOT within the same warp as those threads experiencing the critical
1293  // edge
1294  if ((((row_count - 1) | 0x1F) - thread_pos) >= 32) {
1295  __syncwarp();
1296  }
1297 }
__device__ int8_t thread_warp_idx ( const int8_t  warp_sz)

Definition at line 40 of file cuda_mapd_rt.cu.

40  {
41  return threadIdx.x % warp_sz;
42 }
__device__ void write_back_non_grouped_agg ( int64_t *  input_buffer,
int64_t *  output_buffer,
const int32_t  agg_idx 
)

Definition at line 1311 of file cuda_mapd_rt.cu.

References agg_sum_shared().

1313  {
1314  if (threadIdx.x == agg_idx) {
1315  agg_sum_shared(output_buffer, input_buffer[agg_idx]);
1316  }
1317 }
__device__ int64_t agg_sum_shared(int64_t *agg, const int64_t val)

+ Here is the call graph for this function:

__device__ void write_back_nop ( int64_t *  dest,
int64_t *  src,
const int32_t  sz 
)

Definition at line 50 of file cuda_mapd_rt.cu.

50  {
51 }

Variable Documentation

__device__ int32_t dw_abort = 0

Definition at line 95 of file cuda_mapd_rt.cu.

Referenced by dynamic_watchdog(), dynamic_watchdog_init(), and Executor::interrupt().

__device__ int64_t dw_cycle_budget = 0

Definition at line 94 of file cuda_mapd_rt.cu.

Referenced by dynamic_watchdog(), and dynamic_watchdog_init().

__device__ int64_t dw_sm_cycle_start[128]

Definition at line 92 of file cuda_mapd_rt.cu.

Referenced by dynamic_watchdog().

__device__ int32_t runtime_interrupt_flag = 0

Definition at line 96 of file cuda_mapd_rt.cu.

Referenced by check_interrupt(), check_interrupt_init(), and Executor::interrupt().