OmniSciDB  8fa3bf436f
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
cuda_mapd_rt.cu File Reference
#include <cuda.h>
#include <float.h>
#include <stdint.h>
#include <stdio.h>
#include <limits>
#include "BufferCompaction.h"
#include "ExtensionFunctions.hpp"
#include "GpuRtConstants.h"
#include "HyperLogLogRank.h"
#include "TableFunctions/TableFunctions.hpp"
#include "GpuInitGroups.cu"
#include "GroupByRuntime.cpp"
#include "JoinHashTable/Runtime/JoinHashTableQueryRuntime.cpp"
#include "MurmurHash.cpp"
#include "TopKRuntime.cpp"
#include "../Utils/ChunkIter.cpp"
#include "DateTruncate.cpp"
#include "ExtractFromTime.cpp"
#include "ArrayOps.cpp"
#include "DateAdd.cpp"
#include "StringFunctions.cpp"
#include "../Utils/Regexp.cpp"
#include "../Utils/StringLike.cpp"
+ Include dependency graph for cuda_mapd_rt.cu:

Go to the source code of this file.

Macros

#define init_group_by_buffer_gpu_impl   init_group_by_buffer_gpu
 
#define DEF_AGG_ID_INT_SHARED(n)
 
#define DEF_SKIP_AGG(base_agg_func)
 
#define DATA_T   int64_t
 
#define ADDR_T   uint64_t
 
#define DATA_T   int32_t
 
#define ADDR_T   uint32_t
 
#define DEF_SKIP_AGG(base_agg_func)
 
#define DATA_T   double
 
#define ADDR_T   uint64_t
 
#define DATA_T   float
 
#define ADDR_T   uint32_t
 
#define EXECUTE_INCLUDE
 

Functions

__device__ int64_t get_thread_index ()
 
__device__ int64_t get_block_index ()
 
__device__ int32_t pos_start_impl (const int32_t *row_index_resume)
 
__device__ int32_t group_buff_idx_impl ()
 
__device__ int32_t pos_step_impl ()
 
__device__ int8_t thread_warp_idx (const int8_t warp_sz)
 
__device__ const int64_t * init_shared_mem_nop (const int64_t *groups_buffer, const int32_t groups_buffer_size)
 
__device__ void write_back_nop (int64_t *dest, int64_t *src, const int32_t sz)
 
__device__ int64_t * declare_dynamic_shared_memory ()
 
__device__ const int64_t * init_shared_mem (const int64_t *global_groups_buffer, const int32_t groups_buffer_size)
 
__inline__ __device__ uint32_t get_smid (void)
 
__device__ bool dynamic_watchdog ()
 
__device__ bool check_interrupt ()
 
template<typename T = unsigned long long>
__device__ T get_empty_key ()
 
template<>
__device__ unsigned int get_empty_key ()
 
template<typename T >
__device__ int64_t * get_matching_group_value (int64_t *groups_buffer, const uint32_t h, const T *key, const uint32_t key_count, const uint32_t row_size_quad)
 
__device__ int64_t * get_matching_group_value (int64_t *groups_buffer, const uint32_t h, const int64_t *key, const uint32_t key_count, const uint32_t key_width, const uint32_t row_size_quad)
 
template<typename T >
__device__ int32_t get_matching_group_value_columnar_slot (int64_t *groups_buffer, const uint32_t entry_count, const uint32_t h, const T *key, const uint32_t key_count)
 
__device__ int32_t get_matching_group_value_columnar_slot (int64_t *groups_buffer, const uint32_t entry_count, const uint32_t h, const int64_t *key, const uint32_t key_count, const uint32_t key_width)
 
__device__ int64_t * get_matching_group_value_columnar (int64_t *groups_buffer, const uint32_t h, const int64_t *key, const uint32_t key_qw_count, const size_t entry_count)
 
__device__ int64_t atomicMax64 (int64_t *address, int64_t val)
 
__device__ int64_t atomicMin64 (int64_t *address, int64_t val)
 
__device__ double atomicMax (double *address, double val)
 
__device__ float atomicMax (float *address, float val)
 
__device__ double atomicMin (double *address, double val)
 
__device__ double atomicMin (float *address, float val)
 
__device__ uint64_t agg_count_shared (uint64_t *agg, const int64_t val)
 
__device__ uint32_t agg_count_int32_shared (uint32_t *agg, const int32_t val)
 
__device__ uint64_t agg_count_double_shared (uint64_t *agg, const double val)
 
__device__ uint32_t agg_count_float_shared (uint32_t *agg, const float val)
 
__device__ int64_t agg_sum_shared (int64_t *agg, const int64_t val)
 
__device__ int32_t agg_sum_int32_shared (int32_t *agg, const int32_t val)
 
__device__ void agg_sum_float_shared (int32_t *agg, const float val)
 
__device__ void agg_sum_double_shared (int64_t *agg, const double val)
 
__device__ void agg_max_shared (int64_t *agg, const int64_t val)
 
__device__ void agg_max_int32_shared (int32_t *agg, const int32_t val)
 
__device__ void agg_max_double_shared (int64_t *agg, const double val)
 
__device__ void agg_max_float_shared (int32_t *agg, const float val)
 
__device__ void agg_min_shared (int64_t *agg, const int64_t val)
 
__device__ void agg_min_int32_shared (int32_t *agg, const int32_t val)
 
__device__ void atomicMax16 (int16_t *agg, const int16_t val)
 
__device__ void atomicMax8 (int8_t *agg, const int8_t val)
 
__device__ void atomicMin16 (int16_t *agg, const int16_t val)
 
__device__ void atomicMin16SkipVal (int16_t *agg, const int16_t val, const int16_t skip_val)
 
__device__ void atomicMin8 (int8_t *agg, const int8_t val)
 
__device__ void atomicMin8SkipVal (int8_t *agg, const int8_t val, const int8_t skip_val)
 
__device__ void agg_max_int16_shared (int16_t *agg, const int16_t val)
 
__device__ void agg_max_int8_shared (int8_t *agg, const int8_t val)
 
__device__ void agg_min_int16_shared (int16_t *agg, const int16_t val)
 
__device__ void agg_min_int8_shared (int8_t *agg, const int8_t val)
 
__device__ void agg_min_double_shared (int64_t *agg, const double val)
 
__device__ void agg_min_float_shared (int32_t *agg, const float val)
 
__device__ void agg_id_shared (int64_t *agg, const int64_t val)
 
__device__ int32_t checked_single_agg_id_shared (int64_t *agg, const int64_t val, const int64_t null_val)
 
__device__ void agg_id_double_shared (int64_t *agg, const double val)
 
__device__ int32_t checked_single_agg_id_double_shared (int64_t *agg, const double val, const double null_val)
 
__device__ void agg_id_double_shared_slow (int64_t *agg, const double *val)
 
__device__ int32_t checked_single_agg_id_double_shared_slow (int64_t *agg, const double *valp, const double null_val)
 
__device__ void agg_id_float_shared (int32_t *agg, const float val)
 
__device__ int32_t checked_single_agg_id_float_shared (int32_t *agg, const float val, const float null_val)
 
__device__ void agg_max_int32_skip_val_shared (int32_t *agg, const int32_t val, const int32_t skip_val)
 
__device__ void agg_max_int16_skip_val_shared (int16_t *agg, const int16_t val, const int16_t skip_val)
 
__device__ void agg_min_int16_skip_val_shared (int16_t *agg, const int16_t val, const int16_t skip_val)
 
__device__ void agg_max_int8_skip_val_shared (int8_t *agg, const int8_t val, const int8_t skip_val)
 
__device__ void agg_min_int8_skip_val_shared (int8_t *agg, const int8_t val, const int8_t skip_val)
 
__device__ int32_t atomicMin32SkipVal (int32_t *address, int32_t val, const int32_t skip_val)
 
__device__ void agg_min_int32_skip_val_shared (int32_t *agg, const int32_t val, const int32_t skip_val)
 
__device__ int32_t atomicSum32SkipVal (int32_t *address, const int32_t val, const int32_t skip_val)
 
__device__ int32_t agg_sum_int32_skip_val_shared (int32_t *agg, const int32_t val, const int32_t skip_val)
 
__device__ int64_t atomicSum64SkipVal (int64_t *address, const int64_t val, const int64_t skip_val)
 
__device__ int64_t agg_sum_skip_val_shared (int64_t *agg, const int64_t val, const int64_t skip_val)
 
__device__ int64_t atomicMin64SkipVal (int64_t *address, int64_t val, const int64_t skip_val)
 
__device__ void agg_min_skip_val_shared (int64_t *agg, const int64_t val, const int64_t skip_val)
 
__device__ int64_t atomicMax64SkipVal (int64_t *address, int64_t val, const int64_t skip_val)
 
__device__ void agg_max_skip_val_shared (int64_t *agg, const int64_t val, const int64_t skip_val)
 
__device__ void agg_max_float_skip_val_shared (int32_t *agg, const float val, const float skip_val)
 
__device__ float atomicMinFltSkipVal (int32_t *address, float val, const float skip_val)
 
__device__ void agg_min_float_skip_val_shared (int32_t *agg, const float val, const float skip_val)
 
__device__ void atomicSumFltSkipVal (float *address, const float val, const float skip_val)
 
__device__ void agg_sum_float_skip_val_shared (int32_t *agg, const float val, const float skip_val)
 
__device__ void atomicSumDblSkipVal (double *address, const double val, const double skip_val)
 
__device__ void agg_sum_double_skip_val_shared (int64_t *agg, const double val, const double skip_val)
 
__device__ double atomicMinDblSkipVal (double *address, double val, const double skip_val)
 
__device__ void agg_min_double_skip_val_shared (int64_t *agg, const double val, const double skip_val)
 
__device__ void agg_max_double_skip_val_shared (int64_t *agg, const double val, const double skip_val)
 
__device__ bool slotEmptyKeyCAS (int64_t *slot, int64_t new_val, int64_t init_val)
 
__device__ bool slotEmptyKeyCAS_int32 (int32_t *slot, int32_t new_val, int32_t init_val)
 
__device__ bool slotEmptyKeyCAS_int16 (int16_t *slot, int16_t new_val, int16_t init_val)
 
__device__ bool slotEmptyKeyCAS_int8 (int8_t *slot, int8_t new_val, int8_t init_val)
 
__device__ uint64_t string_decode (int8_t *chunk_iter_, int64_t pos)
 
__device__ void linear_probabilistic_count (uint8_t *bitmap, const uint32_t bitmap_bytes, const uint8_t *key_bytes, const uint32_t key_len)
 
__device__ void agg_count_distinct_bitmap_gpu (int64_t *agg, const int64_t val, const int64_t min_val, const int64_t base_dev_addr, const int64_t base_host_addr, const uint64_t sub_bitmap_count, const uint64_t bitmap_bytes)
 
__device__ void agg_count_distinct_bitmap_skip_val_gpu (int64_t *agg, const int64_t val, const int64_t min_val, const int64_t skip_val, const int64_t base_dev_addr, const int64_t base_host_addr, const uint64_t sub_bitmap_count, const uint64_t bitmap_bytes)
 
__device__ void agg_approximate_count_distinct_gpu (int64_t *agg, const int64_t key, const uint32_t b, const int64_t base_dev_addr, const int64_t base_host_addr)
 
__device__ void force_sync ()
 
__device__ void sync_warp ()
 
__device__ void sync_warp_protected (int64_t thread_pos, int64_t row_count)
 
__device__ void sync_threadblock ()
 
__device__ void write_back_non_grouped_agg (int64_t *input_buffer, int64_t *output_buffer, const int32_t agg_idx)
 

Variables

__device__ int64_t dw_sm_cycle_start [128]
 
__device__ int64_t dw_cycle_budget = 0
 
__device__ int32_t dw_abort = 0
 
__device__ int32_t runtime_interrupt_flag = 0
 

Macro Definition Documentation

#define ADDR_T   uint64_t

Definition at line 996 of file cuda_mapd_rt.cu.

#define ADDR_T   uint32_t

Definition at line 996 of file cuda_mapd_rt.cu.

#define ADDR_T   uint64_t

Definition at line 996 of file cuda_mapd_rt.cu.

#define ADDR_T   uint32_t

Definition at line 996 of file cuda_mapd_rt.cu.

#define DATA_T   int64_t

Definition at line 995 of file cuda_mapd_rt.cu.

#define DATA_T   int32_t

Definition at line 995 of file cuda_mapd_rt.cu.

#define DATA_T   double

Definition at line 995 of file cuda_mapd_rt.cu.

#define DATA_T   float

Definition at line 995 of file cuda_mapd_rt.cu.

#define DEF_AGG_ID_INT_SHARED (   n)
Value:
extern "C" __device__ void agg_id_int##n##_shared(int##n##_t* agg, \
const int##n##_t val) { \
*agg = val; \
}

Definition at line 708 of file cuda_mapd_rt.cu.

#define DEF_SKIP_AGG (   base_agg_func)
Value:
extern "C" __device__ ADDR_T base_agg_func##_skip_val_shared( \
ADDR_T* agg, const DATA_T val, const DATA_T skip_val) { \
if (val != skip_val) { \
return base_agg_func##_shared(agg, val); \
} \
return 0; \
}
#define DATA_T
#define ADDR_T
if(yyssp >=yyss+yystacksize-1)

Definition at line 980 of file cuda_mapd_rt.cu.

#define DEF_SKIP_AGG (   base_agg_func)
Value:
extern "C" __device__ ADDR_T base_agg_func##_skip_val_shared( \
ADDR_T* agg, const DATA_T val, const DATA_T skip_val) { \
if (val != skip_val) { \
return base_agg_func##_shared(agg, val); \
} \
return *agg; \
}
#define DATA_T
#define ADDR_T
if(yyssp >=yyss+yystacksize-1)

Definition at line 980 of file cuda_mapd_rt.cu.

#define EXECUTE_INCLUDE

Definition at line 1185 of file cuda_mapd_rt.cu.

#define init_group_by_buffer_gpu_impl   init_group_by_buffer_gpu

Definition at line 83 of file cuda_mapd_rt.cu.

Function Documentation

__device__ void agg_approximate_count_distinct_gpu ( int64_t *  agg,
const int64_t  key,
const uint32_t  b,
const int64_t  base_dev_addr,
const int64_t  base_host_addr 
)

Definition at line 1261 of file cuda_mapd_rt.cu.

References atomicMax(), get_rank(), and MurmurHash64A().

1266  {
1267  const uint64_t hash = MurmurHash64A(&key, sizeof(key), 0);
1268  const uint32_t index = hash >> (64 - b);
1269  const int32_t rank = get_rank(hash << b, 64 - b);
1270  const int64_t host_addr = *agg;
1271  int32_t* M = (int32_t*)(base_dev_addr + host_addr - base_host_addr);
1272  atomicMax(&M[index], rank);
1273 }
FORCE_INLINE uint8_t get_rank(uint64_t x, uint32_t b)
RUNTIME_EXPORT NEVER_INLINE DEVICE uint64_t MurmurHash64A(const void *key, int len, uint64_t seed)
Definition: MurmurHash.cpp:26
__device__ double atomicMax(double *address, double val)

+ Here is the call graph for this function:

__device__ void agg_count_distinct_bitmap_gpu ( int64_t *  agg,
const int64_t  val,
const int64_t  min_val,
const int64_t  base_dev_addr,
const int64_t  base_host_addr,
const uint64_t  sub_bitmap_count,
const uint64_t  bitmap_bytes 
)

Definition at line 1214 of file cuda_mapd_rt.cu.

Referenced by agg_count_distinct_bitmap_skip_val_gpu().

1220  {
1221  const uint64_t bitmap_idx = val - min_val;
1222  const uint32_t byte_idx = bitmap_idx >> 3;
1223  const uint32_t word_idx = byte_idx >> 2;
1224  const uint32_t byte_word_idx = byte_idx & 3;
1225  const int64_t host_addr = *agg;
1226  uint32_t* bitmap = (uint32_t*)(base_dev_addr + host_addr - base_host_addr +
1227  (threadIdx.x & (sub_bitmap_count - 1)) * bitmap_bytes);
1228  switch (byte_word_idx) {
1229  case 0:
1230  atomicOr(&bitmap[word_idx], 1 << (bitmap_idx & 7));
1231  break;
1232  case 1:
1233  atomicOr(&bitmap[word_idx], 1 << ((bitmap_idx & 7) + 8));
1234  break;
1235  case 2:
1236  atomicOr(&bitmap[word_idx], 1 << ((bitmap_idx & 7) + 16));
1237  break;
1238  case 3:
1239  atomicOr(&bitmap[word_idx], 1 << ((bitmap_idx & 7) + 24));
1240  break;
1241  default:
1242  break;
1243  }
1244 }

+ Here is the caller graph for this function:

__device__ void agg_count_distinct_bitmap_skip_val_gpu ( int64_t *  agg,
const int64_t  val,
const int64_t  min_val,
const int64_t  skip_val,
const int64_t  base_dev_addr,
const int64_t  base_host_addr,
const uint64_t  sub_bitmap_count,
const uint64_t  bitmap_bytes 
)

Definition at line 1246 of file cuda_mapd_rt.cu.

References agg_count_distinct_bitmap_gpu().

1254  {
1255  if (val != skip_val) {
1257  agg, val, min_val, base_dev_addr, base_host_addr, sub_bitmap_count, bitmap_bytes);
1258  }
1259 }
__device__ void agg_count_distinct_bitmap_gpu(int64_t *agg, const int64_t val, const int64_t min_val, const int64_t base_dev_addr, const int64_t base_host_addr, const uint64_t sub_bitmap_count, const uint64_t bitmap_bytes)

+ Here is the call graph for this function:

__device__ uint64_t agg_count_double_shared ( uint64_t *  agg,
const double  val 
)

Definition at line 439 of file cuda_mapd_rt.cu.

References agg_count_shared().

439  {
440  return agg_count_shared(agg, val);
441 }
__device__ uint64_t agg_count_shared(uint64_t *agg, const int64_t val)

+ Here is the call graph for this function:

__device__ uint32_t agg_count_float_shared ( uint32_t *  agg,
const float  val 
)

Definition at line 443 of file cuda_mapd_rt.cu.

References agg_count_int32_shared().

443  {
444  return agg_count_int32_shared(agg, val);
445 }
__device__ uint32_t agg_count_int32_shared(uint32_t *agg, const int32_t val)

+ Here is the call graph for this function:

__device__ uint32_t agg_count_int32_shared ( uint32_t *  agg,
const int32_t  val 
)

Definition at line 435 of file cuda_mapd_rt.cu.

Referenced by agg_count_float_shared().

435  {
436  return atomicAdd(agg, 1UL);
437 }

+ Here is the caller graph for this function:

__device__ uint64_t agg_count_shared ( uint64_t *  agg,
const int64_t  val 
)

Definition at line 431 of file cuda_mapd_rt.cu.

Referenced by agg_count_double_shared().

431  {
432  return static_cast<uint64_t>(atomicAdd(reinterpret_cast<uint32_t*>(agg), 1UL));
433 }

+ Here is the caller graph for this function:

__device__ void agg_id_double_shared ( int64_t *  agg,
const double  val 
)

Definition at line 720 of file cuda_mapd_rt.cu.

720  {
721  *agg = *(reinterpret_cast<const int64_t*>(&val));
722 }
__device__ void agg_id_double_shared_slow ( int64_t *  agg,
const double *  val 
)

Definition at line 751 of file cuda_mapd_rt.cu.

751  {
752  *agg = *(reinterpret_cast<const int64_t*>(val));
753 }
__device__ void agg_id_float_shared ( int32_t *  agg,
const float  val 
)

Definition at line 784 of file cuda_mapd_rt.cu.

784  {
785  *agg = __float_as_int(val);
786 }
__device__ void agg_id_shared ( int64_t *  agg,
const int64_t  val 
)

Definition at line 677 of file cuda_mapd_rt.cu.

677  {
678  *agg = val;
679 }
__device__ void agg_max_double_shared ( int64_t *  agg,
const double  val 
)

Definition at line 471 of file cuda_mapd_rt.cu.

References atomicMax().

471  {
472  atomicMax(reinterpret_cast<double*>(agg), val);
473 }
__device__ double atomicMax(double *address, double val)

+ Here is the call graph for this function:

__device__ void agg_max_double_skip_val_shared ( int64_t *  agg,
const double  val,
const double  skip_val 
)

Definition at line 1090 of file cuda_mapd_rt.cu.

References atomicMax().

1092  {
1093  if (__double_as_longlong(val) != __double_as_longlong(skip_val)) {
1094  double old = __longlong_as_double(atomicExch(
1095  reinterpret_cast<unsigned long long int*>(agg), __double_as_longlong(-DBL_MAX)));
1096  atomicMax(reinterpret_cast<double*>(agg),
1097  __double_as_longlong(old) == __double_as_longlong(skip_val)
1098  ? val
1099  : fmax(old, val));
1100  }
1101 }
__device__ double atomicMax(double *address, double val)

+ Here is the call graph for this function:

__device__ void agg_max_float_shared ( int32_t *  agg,
const float  val 
)

Definition at line 475 of file cuda_mapd_rt.cu.

References atomicMax().

475  {
476  atomicMax(reinterpret_cast<float*>(agg), val);
477 }
__device__ double atomicMax(double *address, double val)

+ Here is the call graph for this function:

__device__ void agg_max_float_skip_val_shared ( int32_t *  agg,
const float  val,
const float  skip_val 
)

Definition at line 1002 of file cuda_mapd_rt.cu.

References atomicMax().

1004  {
1005  if (__float_as_int(val) != __float_as_int(skip_val)) {
1006  float old = atomicExch(reinterpret_cast<float*>(agg), -FLT_MAX);
1007  atomicMax(reinterpret_cast<float*>(agg),
1008  __float_as_int(old) == __float_as_int(skip_val) ? val : fmaxf(old, val));
1009  }
1010 }
__device__ double atomicMax(double *address, double val)

+ Here is the call graph for this function:

__device__ void agg_max_int16_shared ( int16_t *  agg,
const int16_t  val 
)

Definition at line 653 of file cuda_mapd_rt.cu.

References atomicMax16().

Referenced by agg_max_int16_skip_val_shared().

653  {
654  return atomicMax16(agg, val);
655 }
__device__ void atomicMax16(int16_t *agg, const int16_t val)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

__device__ void agg_max_int16_skip_val_shared ( int16_t *  agg,
const int16_t  val,
const int16_t  skip_val 
)

Definition at line 845 of file cuda_mapd_rt.cu.

References agg_max_int16_shared().

847  {
848  if (val != skip_val) {
849  agg_max_int16_shared(agg, val);
850  }
851 }
__device__ void agg_max_int16_shared(int16_t *agg, const int16_t val)

+ Here is the call graph for this function:

__device__ void agg_max_int32_shared ( int32_t *  agg,
const int32_t  val 
)

Definition at line 467 of file cuda_mapd_rt.cu.

References atomicMax().

Referenced by agg_max_int32_skip_val_shared().

467  {
468  atomicMax(agg, val);
469 }
__device__ double atomicMax(double *address, double val)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

__device__ void agg_max_int32_skip_val_shared ( int32_t *  agg,
const int32_t  val,
const int32_t  skip_val 
)

Definition at line 837 of file cuda_mapd_rt.cu.

References agg_max_int32_shared().

839  {
840  if (val != skip_val) {
841  agg_max_int32_shared(agg, val);
842  }
843 }
__device__ void agg_max_int32_shared(int32_t *agg, const int32_t val)

+ Here is the call graph for this function:

__device__ void agg_max_int8_shared ( int8_t *  agg,
const int8_t  val 
)

Definition at line 657 of file cuda_mapd_rt.cu.

References atomicMax8().

Referenced by agg_max_int8_skip_val_shared().

657  {
658  return atomicMax8(agg, val);
659 }
__device__ void atomicMax8(int8_t *agg, const int8_t val)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

__device__ void agg_max_int8_skip_val_shared ( int8_t *  agg,
const int8_t  val,
const int8_t  skip_val 
)

Definition at line 861 of file cuda_mapd_rt.cu.

References agg_max_int8_shared().

863  {
864  if (val != skip_val) {
865  agg_max_int8_shared(agg, val);
866  }
867 }
__device__ void agg_max_int8_shared(int8_t *agg, const int8_t val)

+ Here is the call graph for this function:

__device__ void agg_max_shared ( int64_t *  agg,
const int64_t  val 
)

Definition at line 463 of file cuda_mapd_rt.cu.

References atomicMax64().

463  {
464  atomicMax64(agg, val);
465 }
__device__ int64_t atomicMax64(int64_t *address, int64_t val)

+ Here is the call graph for this function:

__device__ void agg_max_skip_val_shared ( int64_t *  agg,
const int64_t  val,
const int64_t  skip_val 
)

Definition at line 971 of file cuda_mapd_rt.cu.

References atomicMax64SkipVal().

973  {
974  if (val != skip_val) {
975  atomicMax64SkipVal(agg, val, skip_val);
976  }
977 }
__device__ int64_t atomicMax64SkipVal(int64_t *address, int64_t val, const int64_t skip_val)

+ Here is the call graph for this function:

__device__ void agg_min_double_shared ( int64_t *  agg,
const double  val 
)

Definition at line 669 of file cuda_mapd_rt.cu.

References atomicMin().

669  {
670  atomicMin(reinterpret_cast<double*>(agg), val);
671 }
__device__ double atomicMin(double *address, double val)

+ Here is the call graph for this function:

__device__ void agg_min_double_skip_val_shared ( int64_t *  agg,
const double  val,
const double  skip_val 
)

Definition at line 1082 of file cuda_mapd_rt.cu.

References atomicMinDblSkipVal().

1084  {
1085  if (val != skip_val) {
1086  atomicMinDblSkipVal(reinterpret_cast<double*>(agg), val, skip_val);
1087  }
1088 }
__device__ double atomicMinDblSkipVal(double *address, double val, const double skip_val)

+ Here is the call graph for this function:

__device__ void agg_min_float_shared ( int32_t *  agg,
const float  val 
)

Definition at line 673 of file cuda_mapd_rt.cu.

References atomicMin().

673  {
674  atomicMin(reinterpret_cast<float*>(agg), val);
675 }
__device__ double atomicMin(double *address, double val)

+ Here is the call graph for this function:

__device__ void agg_min_float_skip_val_shared ( int32_t *  agg,
const float  val,
const float  skip_val 
)

Definition at line 1019 of file cuda_mapd_rt.cu.

References atomicMinFltSkipVal().

1021  {
1022  if (__float_as_int(val) != __float_as_int(skip_val)) {
1023  atomicMinFltSkipVal(agg, val, skip_val);
1024  }
1025 }
__device__ float atomicMinFltSkipVal(int32_t *address, float val, const float skip_val)

+ Here is the call graph for this function:

__device__ void agg_min_int16_shared ( int16_t *  agg,
const int16_t  val 
)

Definition at line 661 of file cuda_mapd_rt.cu.

References atomicMin16().

661  {
662  return atomicMin16(agg, val);
663 }
__device__ void atomicMin16(int16_t *agg, const int16_t val)

+ Here is the call graph for this function:

__device__ void agg_min_int16_skip_val_shared ( int16_t *  agg,
const int16_t  val,
const int16_t  skip_val 
)

Definition at line 853 of file cuda_mapd_rt.cu.

References atomicMin16SkipVal().

855  {
856  if (val != skip_val) {
857  atomicMin16SkipVal(agg, val, skip_val);
858  }
859 }
__device__ void atomicMin16SkipVal(int16_t *agg, const int16_t val, const int16_t skip_val)

+ Here is the call graph for this function:

__device__ void agg_min_int32_shared ( int32_t *  agg,
const int32_t  val 
)

Definition at line 483 of file cuda_mapd_rt.cu.

References atomicMin().

483  {
484  atomicMin(agg, val);
485 }
__device__ double atomicMin(double *address, double val)

+ Here is the call graph for this function:

__device__ void agg_min_int32_skip_val_shared ( int32_t *  agg,
const int32_t  val,
const int32_t  skip_val 
)

Definition at line 884 of file cuda_mapd_rt.cu.

References atomicMin32SkipVal().

886  {
887  if (val != skip_val) {
888  atomicMin32SkipVal(agg, val, skip_val);
889  }
890 }
__device__ int32_t atomicMin32SkipVal(int32_t *address, int32_t val, const int32_t skip_val)

+ Here is the call graph for this function:

__device__ void agg_min_int8_shared ( int8_t *  agg,
const int8_t  val 
)

Definition at line 665 of file cuda_mapd_rt.cu.

References atomicMin8().

665  {
666  return atomicMin8(agg, val);
667 }
__device__ void atomicMin8(int8_t *agg, const int8_t val)

+ Here is the call graph for this function:

__device__ void agg_min_int8_skip_val_shared ( int8_t *  agg,
const int8_t  val,
const int8_t  skip_val 
)

Definition at line 869 of file cuda_mapd_rt.cu.

References atomicMin8SkipVal().

871  {
872  if (val != skip_val) {
873  atomicMin8SkipVal(agg, val, skip_val);
874  }
875 }
__device__ void atomicMin8SkipVal(int8_t *agg, const int8_t val, const int8_t skip_val)

+ Here is the call graph for this function:

__device__ void agg_min_shared ( int64_t *  agg,
const int64_t  val 
)

Definition at line 479 of file cuda_mapd_rt.cu.

References atomicMin64().

479  {
480  atomicMin64(agg, val);
481 }
__device__ int64_t atomicMin64(int64_t *address, int64_t val)

+ Here is the call graph for this function:

__device__ void agg_min_skip_val_shared ( int64_t *  agg,
const int64_t  val,
const int64_t  skip_val 
)

Definition at line 946 of file cuda_mapd_rt.cu.

References atomicMin64SkipVal().

948  {
949  if (val != skip_val) {
950  atomicMin64SkipVal(agg, val, skip_val);
951  }
952 }
__device__ int64_t atomicMin64SkipVal(int64_t *address, int64_t val, const int64_t skip_val)

+ Here is the call graph for this function:

__device__ void agg_sum_double_shared ( int64_t *  agg,
const double  val 
)

Definition at line 459 of file cuda_mapd_rt.cu.

459  {
460  atomicAdd(reinterpret_cast<double*>(agg), val);
461 }
__device__ void agg_sum_double_skip_val_shared ( int64_t *  agg,
const double  val,
const double  skip_val 
)

Definition at line 1052 of file cuda_mapd_rt.cu.

References atomicSumDblSkipVal().

1054  {
1055  if (__double_as_longlong(val) != __double_as_longlong(skip_val)) {
1056  atomicSumDblSkipVal(reinterpret_cast<double*>(agg), val, skip_val);
1057  }
1058 }
__device__ void atomicSumDblSkipVal(double *address, const double val, const double skip_val)

+ Here is the call graph for this function:

__device__ void agg_sum_float_shared ( int32_t *  agg,
const float  val 
)

Definition at line 455 of file cuda_mapd_rt.cu.

455  {
456  atomicAdd(reinterpret_cast<float*>(agg), val);
457 }
__device__ void agg_sum_float_skip_val_shared ( int32_t *  agg,
const float  val,
const float  skip_val 
)

Definition at line 1034 of file cuda_mapd_rt.cu.

References atomicSumFltSkipVal().

1036  {
1037  if (__float_as_int(val) != __float_as_int(skip_val)) {
1038  atomicSumFltSkipVal(reinterpret_cast<float*>(agg), val, skip_val);
1039  }
1040 }
__device__ void atomicSumFltSkipVal(float *address, const float val, const float skip_val)

+ Here is the call graph for this function:

__device__ int32_t agg_sum_int32_shared ( int32_t *  agg,
const int32_t  val 
)

Definition at line 451 of file cuda_mapd_rt.cu.

451  {
452  return atomicAdd(agg, val);
453 }
__device__ int32_t agg_sum_int32_skip_val_shared ( int32_t *  agg,
const int32_t  val,
const int32_t  skip_val 
)

Definition at line 901 of file cuda_mapd_rt.cu.

References atomicSum32SkipVal().

903  {
904  if (val != skip_val) {
905  const int32_t old = atomicSum32SkipVal(agg, val, skip_val);
906  return old;
907  }
908  return 0;
909 }
__device__ int32_t atomicSum32SkipVal(int32_t *address, const int32_t val, const int32_t skip_val)

+ Here is the call graph for this function:

__device__ int64_t agg_sum_shared ( int64_t *  agg,
const int64_t  val 
)

Definition at line 447 of file cuda_mapd_rt.cu.

Referenced by write_back_non_grouped_agg().

447  {
448  return atomicAdd(reinterpret_cast<unsigned long long*>(agg), val);
449 }

+ Here is the caller graph for this function:

__device__ int64_t agg_sum_skip_val_shared ( int64_t *  agg,
const int64_t  val,
const int64_t  skip_val 
)

Definition at line 920 of file cuda_mapd_rt.cu.

References atomicSum64SkipVal().

922  {
923  if (val != skip_val) {
924  return atomicSum64SkipVal(agg, val, skip_val);
925  }
926  return 0;
927 }
__device__ int64_t atomicSum64SkipVal(int64_t *address, const int64_t val, const int64_t skip_val)

+ Here is the call graph for this function:

__device__ double atomicMax ( double *  address,
double  val 
)

Definition at line 373 of file cuda_mapd_rt.cu.

Referenced by agg_approximate_count_distinct_gpu(), agg_max_double_shared(), agg_max_double_skip_val_shared(), agg_max_float_shared(), agg_max_float_skip_val_shared(), agg_max_int32_shared(), and approximate_distinct_tuples_impl().

373  {
374  unsigned long long int* address_as_ull = (unsigned long long int*)address;
375  unsigned long long int old = *address_as_ull, assumed;
376 
377  do {
378  assumed = old;
379  old = atomicCAS(address_as_ull,
380  assumed,
381  __double_as_longlong(max(val, __longlong_as_double(assumed))));
382 
383  // Note: uses integer comparison to avoid hang in case of NaN (since NaN != NaN)
384  } while (assumed != old);
385 
386  return __longlong_as_double(old);
387 }

+ Here is the caller graph for this function:

__device__ float atomicMax ( float *  address,
float  val 
)

Definition at line 389 of file cuda_mapd_rt.cu.

389  {
390  int* address_as_int = (int*)address;
391  int old = *address_as_int, assumed;
392 
393  do {
394  assumed = old;
395  old = atomicCAS(
396  address_as_int, assumed, __float_as_int(max(val, __int_as_float(assumed))));
397 
398  // Note: uses integer comparison to avoid hang in case of NaN (since NaN != NaN)
399  } while (assumed != old);
400 
401  return __int_as_float(old);
402 }
__device__ void atomicMax16 ( int16_t *  agg,
const int16_t  val 
)

Definition at line 501 of file cuda_mapd_rt.cu.

Referenced by agg_max_int16_shared().

501  {
502  // properly align the input pointer:
503  unsigned int* base_address_u32 =
504  reinterpret_cast<unsigned int*>(reinterpret_cast<size_t>(agg) & ~0x3);
505 
506  unsigned int old_value = *base_address_u32;
507  unsigned int swap_value, compare_value;
508  do {
509  compare_value = old_value;
510  swap_value =
511  (reinterpret_cast<size_t>(agg) & 0x2)
512  ? static_cast<unsigned int>(max(static_cast<int16_t>(old_value >> 16), val))
513  << 16 |
514  (old_value & 0xFFFF)
515  : (old_value & 0xFFFF0000) |
516  static_cast<unsigned int>(
517  max(static_cast<int16_t>(old_value & 0xFFFF), val));
518  old_value = atomicCAS(base_address_u32, compare_value, swap_value);
519  } while (old_value != compare_value);
520 }

+ Here is the caller graph for this function:

__device__ int64_t atomicMax64 ( int64_t *  address,
int64_t  val 
)

Definition at line 331 of file cuda_mapd_rt.cu.

Referenced by agg_max_shared().

331  {
332  unsigned long long int* address_as_ull = (unsigned long long int*)address;
333  unsigned long long int old = *address_as_ull, assumed;
334 
335  do {
336  assumed = old;
337  old = atomicCAS(address_as_ull, assumed, max((long long)val, (long long)assumed));
338  } while (assumed != old);
339 
340  return old;
341 }

+ Here is the caller graph for this function:

__device__ int64_t atomicMax64SkipVal ( int64_t *  address,
int64_t  val,
const int64_t  skip_val 
)

Definition at line 954 of file cuda_mapd_rt.cu.

Referenced by agg_max_skip_val_shared().

956  {
957  unsigned long long int* address_as_ull =
958  reinterpret_cast<unsigned long long int*>(address);
959  unsigned long long int old = *address_as_ull, assumed;
960 
961  do {
962  assumed = old;
963  old = atomicCAS(address_as_ull,
964  assumed,
965  assumed == skip_val ? val : max((long long)val, (long long)assumed));
966  } while (assumed != old);
967 
968  return old;
969 }

+ Here is the caller graph for this function:

__device__ void atomicMax8 ( int8_t *  agg,
const int8_t  val 
)

Definition at line 523 of file cuda_mapd_rt.cu.

Referenced by agg_max_int8_shared().

523  {
524  // properly align the input pointer:
525  unsigned int* base_address_u32 =
526  reinterpret_cast<unsigned int*>(reinterpret_cast<size_t>(agg) & ~0x3);
527 
528  // __byte_perm(unsigned int A, unsigned int B, unsigned int s):
529  // if s == 0x3214 returns {A[31..24], A[23..16], A[15..8], B[7..0]}
530  // if s == 0x3240 returns {A[31..24], A[23..16], B[7...0], A[7..0]}
531  // if s == 0x3410 returns {A[31..24], B[7....0], A[15..8], A[7..0]}
532  // if s == 0x4210 returns {B[7....0], A[23..16], A[15..8], A[7..0]}
533  constexpr unsigned int byte_permutations[] = {0x3214, 0x3240, 0x3410, 0x4210};
534  unsigned int old_value = *base_address_u32;
535  unsigned int swap_value, compare_value;
536  do {
537  compare_value = old_value;
538  auto max_value = static_cast<unsigned int>(
539  // compare val with its corresponding bits in the compare_value
540  max(val,
541  static_cast<int8_t>(__byte_perm(
542  compare_value, 0, (reinterpret_cast<size_t>(agg) & 0x3) | 0x4440))));
543  swap_value = __byte_perm(
544  compare_value, max_value, byte_permutations[reinterpret_cast<size_t>(agg) & 0x3]);
545  old_value = atomicCAS(base_address_u32, compare_value, swap_value);
546  } while (compare_value != old_value);
547 }

+ Here is the caller graph for this function:

__device__ double atomicMin ( double *  address,
double  val 
)

Definition at line 404 of file cuda_mapd_rt.cu.

Referenced by agg_min_double_shared(), agg_min_float_shared(), agg_min_int32_shared(), atomicMin32SkipVal(), atomicMinFltSkipVal(), and compute_bucket_sizes_impl().

404  {
405  unsigned long long int* address_as_ull = (unsigned long long int*)address;
406  unsigned long long int old = *address_as_ull, assumed;
407 
408  do {
409  assumed = old;
410  old = atomicCAS(address_as_ull,
411  assumed,
412  __double_as_longlong(min(val, __longlong_as_double(assumed))));
413  } while (assumed != old);
414 
415  return __longlong_as_double(old);
416 }

+ Here is the caller graph for this function:

__device__ double atomicMin ( float *  address,
float  val 
)

Definition at line 418 of file cuda_mapd_rt.cu.

418  {
419  int* address_as_ull = (int*)address;
420  int old = *address_as_ull, assumed;
421 
422  do {
423  assumed = old;
424  old = atomicCAS(
425  address_as_ull, assumed, __float_as_int(min(val, __int_as_float(assumed))));
426  } while (assumed != old);
427 
428  return __int_as_float(old);
429 }
__device__ void atomicMin16 ( int16_t *  agg,
const int16_t  val 
)

Definition at line 563 of file cuda_mapd_rt.cu.

Referenced by agg_min_int16_shared().

563  {
564  // properly align the input pointer:
565  unsigned int* base_address_u32 =
566  reinterpret_cast<unsigned int*>(reinterpret_cast<size_t>(agg) & ~0x3);
567 
568  unsigned int old_value = *base_address_u32;
569  unsigned int swap_value, compare_value;
570  do {
571  compare_value = old_value;
572  swap_value =
573  (reinterpret_cast<size_t>(agg) & 0x2)
574  ? static_cast<unsigned int>(min(static_cast<int16_t>(old_value >> 16), val))
575  << 16 |
576  (old_value & 0xFFFF)
577  : (old_value & 0xFFFF0000) |
578  static_cast<unsigned int>(
579  min(static_cast<int16_t>(old_value & 0xFFFF), val));
580  old_value = atomicCAS(base_address_u32, compare_value, swap_value);
581  } while (old_value != compare_value);
582 }

+ Here is the caller graph for this function:

__device__ void atomicMin16SkipVal ( int16_t *  agg,
const int16_t  val,
const int16_t  skip_val 
)

Definition at line 585 of file cuda_mapd_rt.cu.

Referenced by agg_min_int16_skip_val_shared().

587  {
588  // properly align the input pointer:
589  unsigned int* base_address_u32 =
590  reinterpret_cast<unsigned int*>(reinterpret_cast<size_t>(agg) & ~0x3);
591 
592  unsigned int old_value = *base_address_u32;
593  unsigned int swap_value, compare_value;
594  do {
595  compare_value = old_value;
596  int16_t selected_old_val = (reinterpret_cast<size_t>(agg) & 0x2)
597  ? static_cast<int16_t>(old_value >> 16)
598  : static_cast<int16_t>(old_value & 0xFFFF);
599 
600  swap_value =
601  (reinterpret_cast<size_t>(agg) & 0x2)
602  ? static_cast<unsigned int>(
603  selected_old_val == skip_val ? val : min(selected_old_val, val))
604  << 16 |
605  (old_value & 0xFFFF)
606  : (old_value & 0xFFFF0000) |
607  static_cast<unsigned int>(
608  selected_old_val == skip_val ? val : min(selected_old_val, val));
609  old_value = atomicCAS(base_address_u32, compare_value, swap_value);
610  } while (old_value != compare_value);
611 }

+ Here is the caller graph for this function:

__device__ int32_t atomicMin32SkipVal ( int32_t *  address,
int32_t  val,
const int32_t  skip_val 
)

Definition at line 877 of file cuda_mapd_rt.cu.

References atomicMin().

Referenced by agg_min_int32_skip_val_shared().

879  {
880  int32_t old = atomicExch(address, INT_MAX);
881  return atomicMin(address, old == skip_val ? val : min(old, val));
882 }
__device__ double atomicMin(double *address, double val)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

__device__ int64_t atomicMin64 ( int64_t *  address,
int64_t  val 
)

Definition at line 343 of file cuda_mapd_rt.cu.

Referenced by agg_min_shared().

343  {
344  unsigned long long int* address_as_ull = (unsigned long long int*)address;
345  unsigned long long int old = *address_as_ull, assumed;
346 
347  do {
348  assumed = old;
349  old = atomicCAS(address_as_ull, assumed, min((long long)val, (long long)assumed));
350  } while (assumed != old);
351 
352  return old;
353 }

+ Here is the caller graph for this function:

__device__ int64_t atomicMin64SkipVal ( int64_t *  address,
int64_t  val,
const int64_t  skip_val 
)

Definition at line 929 of file cuda_mapd_rt.cu.

Referenced by agg_min_skip_val_shared().

931  {
932  unsigned long long int* address_as_ull =
933  reinterpret_cast<unsigned long long int*>(address);
934  unsigned long long int old = *address_as_ull, assumed;
935 
936  do {
937  assumed = old;
938  old = atomicCAS(address_as_ull,
939  assumed,
940  assumed == skip_val ? val : min((long long)val, (long long)assumed));
941  } while (assumed != old);
942 
943  return old;
944 }

+ Here is the caller graph for this function:

__device__ void atomicMin8 ( int8_t *  agg,
const int8_t  val 
)

Definition at line 613 of file cuda_mapd_rt.cu.

Referenced by agg_min_int8_shared().

613  {
614  // properly align the input pointer:
615  unsigned int* base_address_u32 =
616  reinterpret_cast<unsigned int*>(reinterpret_cast<size_t>(agg) & ~0x3);
617 
618  constexpr unsigned int byte_permutations[] = {0x3214, 0x3240, 0x3410, 0x4210};
619  unsigned int old_value = *base_address_u32;
620  unsigned int swap_value, compare_value;
621  do {
622  compare_value = old_value;
623  auto min_value = static_cast<unsigned int>(
624  min(val,
625  static_cast<int8_t>(__byte_perm(
626  compare_value, 0, (reinterpret_cast<size_t>(agg) & 0x3) | 0x4440))));
627  swap_value = __byte_perm(
628  compare_value, min_value, byte_permutations[reinterpret_cast<size_t>(agg) & 0x3]);
629  old_value = atomicCAS(base_address_u32, compare_value, swap_value);
630  } while (compare_value != old_value);
631 }

+ Here is the caller graph for this function:

__device__ void atomicMin8SkipVal ( int8_t *  agg,
const int8_t  val,
const int8_t  skip_val 
)

Definition at line 633 of file cuda_mapd_rt.cu.

Referenced by agg_min_int8_skip_val_shared().

633  {
634  // properly align the input pointer:
635  unsigned int* base_address_u32 =
636  reinterpret_cast<unsigned int*>(reinterpret_cast<size_t>(agg) & ~0x3);
637 
638  constexpr unsigned int byte_permutations[] = {0x3214, 0x3240, 0x3410, 0x4210};
639  unsigned int old_value = *base_address_u32;
640  unsigned int swap_value, compare_value;
641  do {
642  compare_value = old_value;
643  int8_t selected_old_val = static_cast<int8_t>(
644  __byte_perm(compare_value, 0, (reinterpret_cast<size_t>(agg) & 0x3) | 0x4440));
645  auto min_value = static_cast<unsigned int>(
646  selected_old_val == skip_val ? val : min(val, selected_old_val));
647  swap_value = __byte_perm(
648  compare_value, min_value, byte_permutations[reinterpret_cast<size_t>(agg) & 0x3]);
649  old_value = atomicCAS(base_address_u32, compare_value, swap_value);
650  } while (compare_value != old_value);
651 }

+ Here is the caller graph for this function:

__device__ double atomicMinDblSkipVal ( double *  address,
double  val,
const double  skip_val 
)

Definition at line 1060 of file cuda_mapd_rt.cu.

Referenced by agg_min_double_skip_val_shared().

1062  {
1063  unsigned long long int* address_as_ull =
1064  reinterpret_cast<unsigned long long int*>(address);
1065  unsigned long long int old = *address_as_ull;
1066  unsigned long long int skip_val_as_ull =
1067  *reinterpret_cast<const unsigned long long*>(&skip_val);
1068  unsigned long long int assumed;
1069 
1070  do {
1071  assumed = old;
1072  old = atomicCAS(address_as_ull,
1073  assumed,
1074  assumed == skip_val_as_ull
1075  ? *reinterpret_cast<unsigned long long*>(&val)
1076  : __double_as_longlong(min(val, __longlong_as_double(assumed))));
1077  } while (assumed != old);
1078 
1079  return __longlong_as_double(old);
1080 }

+ Here is the caller graph for this function:

__device__ float atomicMinFltSkipVal ( int32_t *  address,
float  val,
const float  skip_val 
)

Definition at line 1012 of file cuda_mapd_rt.cu.

References atomicMin().

Referenced by agg_min_float_skip_val_shared().

1012  {
1013  float old = atomicExch(reinterpret_cast<float*>(address), FLT_MAX);
1014  return atomicMin(
1015  reinterpret_cast<float*>(address),
1016  __float_as_int(old) == __float_as_int(skip_val) ? val : fminf(old, val));
1017 }
__device__ double atomicMin(double *address, double val)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

__device__ int32_t atomicSum32SkipVal ( int32_t *  address,
const int32_t  val,
const int32_t  skip_val 
)

Definition at line 892 of file cuda_mapd_rt.cu.

Referenced by agg_sum_int32_skip_val_shared().

894  {
895  unsigned int* address_as_int = (unsigned int*)address;
896  int32_t old = atomicExch(address_as_int, 0);
897  int32_t old2 = atomicAdd(address_as_int, old == skip_val ? val : (val + old));
898  return old == skip_val ? old2 : (old2 + old);
899 }

+ Here is the caller graph for this function:

__device__ int64_t atomicSum64SkipVal ( int64_t *  address,
const int64_t  val,
const int64_t  skip_val 
)

Definition at line 911 of file cuda_mapd_rt.cu.

Referenced by agg_sum_skip_val_shared().

913  {
914  unsigned long long int* address_as_ull = (unsigned long long int*)address;
915  int64_t old = atomicExch(address_as_ull, 0);
916  int64_t old2 = atomicAdd(address_as_ull, old == skip_val ? val : (val + old));
917  return old == skip_val ? old2 : (old2 + old);
918 }

+ Here is the caller graph for this function:

__device__ void atomicSumDblSkipVal ( double *  address,
const double  val,
const double  skip_val 
)

Definition at line 1042 of file cuda_mapd_rt.cu.

Referenced by agg_sum_double_skip_val_shared().

1044  {
1045  unsigned long long int* address_as_ull = (unsigned long long int*)address;
1046  double old = __longlong_as_double(atomicExch(address_as_ull, __double_as_longlong(0.)));
1047  atomicAdd(
1048  address,
1049  __double_as_longlong(old) == __double_as_longlong(skip_val) ? val : (val + old));
1050 }

+ Here is the caller graph for this function:

__device__ void atomicSumFltSkipVal ( float *  address,
const float  val,
const float  skip_val 
)

Definition at line 1027 of file cuda_mapd_rt.cu.

References f.

Referenced by agg_sum_float_skip_val_shared().

1029  {
1030  float old = atomicExch(address, 0.f);
1031  atomicAdd(address, __float_as_int(old) == __float_as_int(skip_val) ? val : (val + old));
1032 }
char * f

+ Here is the caller graph for this function:

__device__ int32_t checked_single_agg_id_double_shared ( int64_t *  agg,
const double  val,
const double  null_val 
)

Definition at line 724 of file cuda_mapd_rt.cu.

726  {
727  unsigned long long int* address_as_ull = reinterpret_cast<unsigned long long int*>(agg);
728  unsigned long long int old = *address_as_ull, assumed;
729 
730  if (val == null_val) {
731  return 0;
732  }
733 
734  do {
735  if (static_cast<int64_t>(old) != __double_as_longlong(null_val)) {
736  if (static_cast<int64_t>(old) != __double_as_longlong(val)) {
737  // see Execute::ERR_SINGLE_VALUE_FOUND_MULTIPLE_VALUES
738  return 15;
739  } else {
740  break;
741  }
742  }
743 
744  assumed = old;
745  old = atomicCAS(address_as_ull, assumed, __double_as_longlong(val));
746  } while (assumed != old);
747 
748  return 0;
749 }
__device__ int32_t checked_single_agg_id_double_shared_slow ( int64_t *  agg,
const double *  valp,
const double  null_val 
)

Definition at line 756 of file cuda_mapd_rt.cu.

758  {
759  unsigned long long int* address_as_ull = reinterpret_cast<unsigned long long int*>(agg);
760  unsigned long long int old = *address_as_ull, assumed;
761  double val = *valp;
762 
763  if (val == null_val) {
764  return 0;
765  }
766 
767  do {
768  if (static_cast<int64_t>(old) != __double_as_longlong(null_val)) {
769  if (static_cast<int64_t>(old) != __double_as_longlong(val)) {
770  // see Execute::ERR_SINGLE_VALUE_FOUND_MULTIPLE_VALUES
771  return 15;
772  } else {
773  break;
774  }
775  }
776 
777  assumed = old;
778  old = atomicCAS(address_as_ull, assumed, __double_as_longlong(val));
779  } while (assumed != old);
780 
781  return 0;
782 }
__device__ int32_t checked_single_agg_id_float_shared ( int32_t *  agg,
const float  val,
const float  null_val 
)

Definition at line 788 of file cuda_mapd_rt.cu.

790  {
791  int* address_as_ull = reinterpret_cast<int*>(agg);
792  int old = *address_as_ull, assumed;
793 
794  if (val == null_val) {
795  return 0;
796  }
797 
798  do {
799  if (old != __float_as_int(null_val)) {
800  if (old != __float_as_int(val)) {
801  // see Execute::ERR_SINGLE_VALUE_FOUND_MULTIPLE_VALUES
802  return 15;
803  } else {
804  break;
805  }
806  }
807 
808  assumed = old;
809  old = atomicCAS(address_as_ull, assumed, __float_as_int(val));
810  } while (assumed != old);
811 
812  return 0;
813 }
__device__ int32_t checked_single_agg_id_shared ( int64_t *  agg,
const int64_t  val,
const int64_t  null_val 
)

Definition at line 681 of file cuda_mapd_rt.cu.

683  {
684  unsigned long long int* address_as_ull = reinterpret_cast<unsigned long long int*>(agg);
685  unsigned long long int old = *address_as_ull, assumed;
686 
687  if (val == null_val) {
688  return 0;
689  }
690 
691  do {
692  if (static_cast<int64_t>(old) != null_val) {
693  if (static_cast<int64_t>(old) != val) {
694  // see Execute::ERR_SINGLE_VALUE_FOUND_MULTIPLE_VALUES
695  return 15;
696  } else {
697  break;
698  }
699  }
700 
701  assumed = old;
702  old = atomicCAS(address_as_ull, assumed, val);
703  } while (assumed != old);
704 
705  return 0;
706 }
__device__ int64_t* declare_dynamic_shared_memory ( )

Definition at line 57 of file cuda_mapd_rt.cu.

57  {
58  extern __shared__ int64_t shared_mem_buffer[];
59  return shared_mem_buffer;
60 }
__device__ bool dynamic_watchdog ( )

Definition at line 116 of file cuda_mapd_rt.cu.

Referenced by anonymous_namespace{ResultSetReduction.cpp}::check_watchdog(), check_watchdog_rt(), anonymous_namespace{ResultSetReduction.cpp}::check_watchdog_with_seed(), get_group_value_columnar_slot_with_watchdog(), get_group_value_columnar_with_watchdog(), and get_group_value_with_watchdog().

116  {
117  // check for dynamic watchdog, if triggered all threads return true
118  if (dw_cycle_budget == 0LL) {
119  return false; // Uninitialized watchdog can't check time
120  }
121  if (dw_abort == 1) {
122  return true; // Received host request to abort
123  }
124  uint32_t smid = get_smid();
125  if (smid >= 128) {
126  return false;
127  }
128  __shared__ volatile int64_t dw_block_cycle_start; // Thread block shared cycle start
129  __shared__ volatile bool
130  dw_should_terminate; // all threads within a block should return together if
131  // watchdog criteria is met
132 
133  // thread 0 either initializes or read the initial clock cycle, the result is stored
134  // into shared memory. Since all threads wihtin a block shares the same SM, there's no
135  // point in using more threads here.
136  if (threadIdx.x == 0) {
137  dw_block_cycle_start = 0LL;
138  int64_t cycle_count = static_cast<int64_t>(clock64());
139  // Make sure the block hasn't switched SMs
140  if (smid == get_smid()) {
141  dw_block_cycle_start = static_cast<int64_t>(
142  atomicCAS(reinterpret_cast<unsigned long long*>(&dw_sm_cycle_start[smid]),
143  0ULL,
144  static_cast<unsigned long long>(cycle_count)));
145  }
146 
147  int64_t cycles = cycle_count - dw_block_cycle_start;
148  if ((smid == get_smid()) && (dw_block_cycle_start > 0LL) &&
149  (cycles > dw_cycle_budget)) {
150  // Check if we're out of time on this particular SM
151  dw_should_terminate = true;
152  } else {
153  dw_should_terminate = false;
154  }
155  }
156  __syncthreads();
157  return dw_should_terminate;
158 }
__device__ int64_t dw_sm_cycle_start[128]
Definition: cuda_mapd_rt.cu:92
__device__ int64_t dw_cycle_budget
Definition: cuda_mapd_rt.cu:94
__inline__ __device__ uint32_t get_smid(void)
Definition: cuda_mapd_rt.cu:98
__device__ int32_t dw_abort
Definition: cuda_mapd_rt.cu:95

+ Here is the caller graph for this function:

__device__ void force_sync ( )

Definition at line 1275 of file cuda_mapd_rt.cu.

1275  {
1276  __threadfence_block();
1277 }
__device__ int64_t get_block_index ( )

Definition at line 24 of file cuda_mapd_rt.cu.

24  {
25  return blockIdx.x;
26 }
template<typename T = unsigned long long>
__device__ T get_empty_key ( )
inline

Definition at line 165 of file cuda_mapd_rt.cu.

References EMPTY_KEY_64.

165  {
166  return EMPTY_KEY_64;
167 }
#define EMPTY_KEY_64
template<>
__device__ unsigned int get_empty_key ( )
inline

Definition at line 170 of file cuda_mapd_rt.cu.

References EMPTY_KEY_32.

170  {
171  return EMPTY_KEY_32;
172 }
#define EMPTY_KEY_32
template<typename T >
__device__ int64_t* get_matching_group_value ( int64_t *  groups_buffer,
const uint32_t  h,
const T *  key,
const uint32_t  key_count,
const uint32_t  row_size_quad 
)
inline

Definition at line 175 of file cuda_mapd_rt.cu.

References align_to_int64(), i, and omnisci.dtypes::T.

Referenced by get_group_value(), get_group_value_with_watchdog(), and get_matching_group_value().

179  {
180  const T empty_key = get_empty_key<T>();
181  uint32_t off = h * row_size_quad;
182  auto row_ptr = reinterpret_cast<T*>(groups_buffer + off);
183  {
184  const T old = atomicCAS(row_ptr, empty_key, *key);
185  if (empty_key == old && key_count > 1) {
186  for (size_t i = 1; i <= key_count - 1; ++i) {
187  atomicExch(row_ptr + i, key[i]);
188  }
189  }
190  }
191  if (key_count > 1) {
192  while (atomicAdd(row_ptr + key_count - 1, 0) == empty_key) {
193  // spin until the winning thread has finished writing the entire key and the init
194  // value
195  }
196  }
197  bool match = true;
198  for (uint32_t i = 0; i < key_count; ++i) {
199  if (row_ptr[i] != key[i]) {
200  match = false;
201  break;
202  }
203  }
204 
205  if (match) {
206  auto row_ptr_i8 = reinterpret_cast<int8_t*>(row_ptr + key_count);
207  return reinterpret_cast<int64_t*>(align_to_int64(row_ptr_i8));
208  }
209  return NULL;
210 }
FORCE_INLINE HOST DEVICE T align_to_int64(T addr)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

__device__ int64_t* get_matching_group_value ( int64_t *  groups_buffer,
const uint32_t  h,
const int64_t *  key,
const uint32_t  key_count,
const uint32_t  key_width,
const uint32_t  row_size_quad 
)

Definition at line 212 of file cuda_mapd_rt.cu.

References get_matching_group_value().

217  {
218  switch (key_width) {
219  case 4:
220  return get_matching_group_value(groups_buffer,
221  h,
222  reinterpret_cast<const unsigned int*>(key),
223  key_count,
224  row_size_quad);
225  case 8:
226  return get_matching_group_value(groups_buffer,
227  h,
228  reinterpret_cast<const unsigned long long*>(key),
229  key_count,
230  row_size_quad);
231  default:
232  return NULL;
233  }
234 }
__device__ int64_t * get_matching_group_value(int64_t *groups_buffer, const uint32_t h, const T *key, const uint32_t key_count, const uint32_t row_size_quad)

+ Here is the call graph for this function:

__device__ int64_t* get_matching_group_value_columnar ( int64_t *  groups_buffer,
const uint32_t  h,
const int64_t *  key,
const uint32_t  key_qw_count,
const size_t  entry_count 
)

Definition at line 297 of file cuda_mapd_rt.cu.

References EMPTY_KEY_64, and i.

Referenced by get_group_value_columnar(), and get_group_value_columnar_with_watchdog().

302  {
303  uint32_t off = h;
304  {
305  const uint64_t old = atomicCAS(
306  reinterpret_cast<unsigned long long*>(groups_buffer + off), EMPTY_KEY_64, *key);
307  if (EMPTY_KEY_64 == old) {
308  for (size_t i = 0; i < key_qw_count; ++i) {
309  groups_buffer[off] = key[i];
310  off += entry_count;
311  }
312  return &groups_buffer[off];
313  }
314  }
315  __syncthreads();
316  off = h;
317  for (size_t i = 0; i < key_qw_count; ++i) {
318  if (groups_buffer[off] != key[i]) {
319  return NULL;
320  }
321  off += entry_count;
322  }
323  return &groups_buffer[off];
324 }
#define EMPTY_KEY_64

+ Here is the caller graph for this function:

template<typename T >
__device__ int32_t get_matching_group_value_columnar_slot ( int64_t *  groups_buffer,
const uint32_t  entry_count,
const uint32_t  h,
const T *  key,
const uint32_t  key_count 
)

Definition at line 237 of file cuda_mapd_rt.cu.

References i, and omnisci.dtypes::T.

Referenced by get_group_value_columnar_slot(), get_group_value_columnar_slot_with_watchdog(), and get_matching_group_value_columnar_slot().

241  {
242  const T empty_key = get_empty_key<T>();
243  const uint64_t old =
244  atomicCAS(reinterpret_cast<T*>(groups_buffer + h), empty_key, *key);
245  // the winner thread proceeds with writing the rest fo the keys
246  if (old == empty_key) {
247  uint32_t offset = h + entry_count;
248  for (size_t i = 1; i < key_count; ++i) {
249  *reinterpret_cast<T*>(groups_buffer + offset) = key[i];
250  offset += entry_count;
251  }
252  }
253 
254  __threadfence();
255  // for all threads except the winning thread, memory content of the keys
256  // related to the hash offset are checked again. In case of a complete match
257  // the hash offset is returned, otherwise -1 is returned
258  if (old != empty_key) {
259  uint32_t offset = h;
260  for (uint32_t i = 0; i < key_count; ++i) {
261  if (*reinterpret_cast<T*>(groups_buffer + offset) != key[i]) {
262  return -1;
263  }
264  offset += entry_count;
265  }
266  }
267  return h;
268 }

+ Here is the caller graph for this function:

__device__ int32_t get_matching_group_value_columnar_slot ( int64_t *  groups_buffer,
const uint32_t  entry_count,
const uint32_t  h,
const int64_t *  key,
const uint32_t  key_count,
const uint32_t  key_width 
)

Definition at line 271 of file cuda_mapd_rt.cu.

References get_matching_group_value_columnar_slot().

276  {
277  switch (key_width) {
278  case 4:
280  groups_buffer,
281  entry_count,
282  h,
283  reinterpret_cast<const unsigned int*>(key),
284  key_count);
285  case 8:
287  groups_buffer,
288  entry_count,
289  h,
290  reinterpret_cast<const unsigned long long*>(key),
291  key_count);
292  default:
293  return -1;
294  }
295 }
__device__ int32_t get_matching_group_value_columnar_slot(int64_t *groups_buffer, const uint32_t entry_count, const uint32_t h, const T *key, const uint32_t key_count)

+ Here is the call graph for this function:

__inline__ __device__ uint32_t get_smid ( void  )

Definition at line 98 of file cuda_mapd_rt.cu.

Referenced by dynamic_watchdog().

98  {
99  uint32_t ret;
100  asm("mov.u32 %0, %%smid;" : "=r"(ret));
101  return ret;
102 }

+ Here is the caller graph for this function:

__device__ int64_t get_thread_index ( )

Definition at line 20 of file cuda_mapd_rt.cu.

20  {
21  return threadIdx.x;
22 }
__device__ int32_t group_buff_idx_impl ( )

Definition at line 32 of file cuda_mapd_rt.cu.

References pos_start_impl().

32  {
33  return pos_start_impl(NULL);
34 }
__device__ int32_t pos_start_impl(const int32_t *row_index_resume)
Definition: cuda_mapd_rt.cu:28

+ Here is the call graph for this function:

__device__ const int64_t* init_shared_mem ( const int64_t *  global_groups_buffer,
const int32_t  groups_buffer_size 
)

Initializes the shared memory buffer for perfect hash group by. In this function, we simply copy the global group by buffer (already initialized on the host and transferred) to all shared memory group by buffers.

Definition at line 67 of file cuda_mapd_rt.cu.

68  {
69  // dynamic shared memory declaration
70  extern __shared__ int64_t shared_groups_buffer[];
71 
72  // it is assumed that buffer size is aligned with 64-bit units
73  // so it is safe to assign 64-bit to each thread
74  const int32_t buffer_units = groups_buffer_size >> 3;
75 
76  for (int32_t pos = threadIdx.x; pos < buffer_units; pos += blockDim.x) {
77  shared_groups_buffer[pos] = global_groups_buffer[pos];
78  }
79  __syncthreads();
80  return shared_groups_buffer;
81 }
__device__ const int64_t* init_shared_mem_nop ( const int64_t *  groups_buffer,
const int32_t  groups_buffer_size 
)

Definition at line 44 of file cuda_mapd_rt.cu.

46  {
47  return groups_buffer;
48 }
__device__ void linear_probabilistic_count ( uint8_t *  bitmap,
const uint32_t  bitmap_bytes,
const uint8_t *  key_bytes,
const uint32_t  key_len 
)

Definition at line 1204 of file cuda_mapd_rt.cu.

1207  {
1208  const uint32_t bit_pos = MurmurHash1(key_bytes, key_len, 0) % (bitmap_bytes * 8);
1209  const uint32_t word_idx = bit_pos / 32;
1210  const uint32_t bit_idx = bit_pos % 32;
1211  atomicOr(((uint32_t*)bitmap) + word_idx, 1 << bit_idx);
1212 }
RUNTIME_EXPORT NEVER_INLINE DEVICE uint32_t MurmurHash1(const void *key, int len, const uint32_t seed)
Definition: MurmurHash.cpp:20
__device__ int32_t pos_start_impl ( const int32_t *  row_index_resume)

Definition at line 28 of file cuda_mapd_rt.cu.

Referenced by get_bin_from_k_heap_impl(), get_error_code(), group_buff_idx_impl(), and record_error_code().

28  {
29  return blockIdx.x * blockDim.x + threadIdx.x;
30 }

+ Here is the caller graph for this function:

__device__ int32_t pos_step_impl ( )

Definition at line 36 of file cuda_mapd_rt.cu.

Referenced by get_bin_from_k_heap_impl().

36  {
37  return blockDim.x * gridDim.x;
38 }

+ Here is the caller graph for this function:

__device__ bool slotEmptyKeyCAS ( int64_t *  slot,
int64_t  new_val,
int64_t  init_val 
)

Definition at line 1105 of file cuda_mapd_rt.cu.

1107  {
1108  auto slot_address = reinterpret_cast<unsigned long long int*>(slot);
1109  const auto empty_key =
1110  static_cast<unsigned long long int*>(static_cast<void*>(&init_val));
1111  const auto new_val_cast =
1112  static_cast<unsigned long long int*>(static_cast<void*>(&new_val));
1113 
1114  const auto old_val = atomicCAS(slot_address, *empty_key, *new_val_cast);
1115  if (old_val == *empty_key) {
1116  return true;
1117  } else {
1118  return false;
1119  }
1120 }
__device__ bool slotEmptyKeyCAS_int16 ( int16_t *  slot,
int16_t  new_val,
int16_t  init_val 
)

Definition at line 1133 of file cuda_mapd_rt.cu.

1135  {
1136  unsigned int* base_slot_address =
1137  reinterpret_cast<unsigned int*>(reinterpret_cast<size_t>(slot) & ~0x3);
1138  unsigned int old_value = *base_slot_address;
1139  unsigned int swap_value, compare_value;
1140  do {
1141  compare_value = old_value;
1142  // exit criteria: if init_val does not exist in the slot (some other thread has
1143  // succeeded)
1144  if (static_cast<unsigned int>(init_val) !=
1145  __byte_perm(
1146  compare_value, 0, (reinterpret_cast<size_t>(slot) & 0x2 ? 0x3244 : 0x4410))) {
1147  return false;
1148  }
1149  swap_value = __byte_perm(compare_value,
1150  static_cast<unsigned int>(new_val),
1151  (reinterpret_cast<size_t>(slot) & 0x2) ? 0x5410 : 0x3254);
1152  old_value = atomicCAS(base_slot_address, compare_value, swap_value);
1153  } while (compare_value != old_value);
1154  return true;
1155 }
__device__ bool slotEmptyKeyCAS_int32 ( int32_t *  slot,
int32_t  new_val,
int32_t  init_val 
)

Definition at line 1122 of file cuda_mapd_rt.cu.

1124  {
1125  unsigned int* slot_address = reinterpret_cast<unsigned int*>(slot);
1126  unsigned int compare_value = static_cast<unsigned int>(init_val);
1127  unsigned int swap_value = static_cast<unsigned int>(new_val);
1128 
1129  const unsigned int old_value = atomicCAS(slot_address, compare_value, swap_value);
1130  return old_value == compare_value;
1131 }
__device__ bool slotEmptyKeyCAS_int8 ( int8_t *  slot,
int8_t  new_val,
int8_t  init_val 
)

Definition at line 1157 of file cuda_mapd_rt.cu.

1159  {
1160  // properly align the slot address:
1161  unsigned int* base_slot_address =
1162  reinterpret_cast<unsigned int*>(reinterpret_cast<size_t>(slot) & ~0x3);
1163  constexpr unsigned int byte_permutations[] = {0x3214, 0x3240, 0x3410, 0x4210};
1164  unsigned int old_value = *base_slot_address;
1165  unsigned int swap_value, compare_value;
1166  do {
1167  compare_value = old_value;
1168  // exit criteria: if init_val does not exist in the slot (some other thread has
1169  // succeeded)
1170  if (static_cast<unsigned int>(init_val) !=
1171  __byte_perm(compare_value, 0, (reinterpret_cast<size_t>(slot) & 0x3) | 0x4440)) {
1172  return false;
1173  }
1174  swap_value = __byte_perm(compare_value,
1175  static_cast<unsigned int>(new_val),
1176  byte_permutations[reinterpret_cast<size_t>(slot) & 0x3]);
1177  old_value = atomicCAS(base_slot_address, compare_value, swap_value);
1178  } while (compare_value != old_value);
1179  return true;
1180 }
__device__ uint64_t string_decode ( int8_t *  chunk_iter_,
int64_t  pos 
)

Definition at line 1193 of file cuda_mapd_rt.cu.

References ChunkIter_get_nth(), VarlenDatum::is_null, VarlenDatum::length, and VarlenDatum::pointer.

1193  {
1194  // TODO(alex): de-dup, the x64 version is basically identical
1195  ChunkIter* chunk_iter = reinterpret_cast<ChunkIter*>(chunk_iter_);
1196  VarlenDatum vd;
1197  bool is_end;
1198  ChunkIter_get_nth(chunk_iter, pos, false, &vd, &is_end);
1199  return vd.is_null ? 0
1200  : (reinterpret_cast<uint64_t>(vd.pointer) & 0xffffffffffff) |
1201  (static_cast<uint64_t>(vd.length) << 48);
1202 }
bool is_null
Definition: sqltypes.h:147
DEVICE void ChunkIter_get_nth(ChunkIter *it, int n, bool uncompress, VarlenDatum *result, bool *is_end)
Definition: ChunkIter.cpp:181
int8_t * pointer
Definition: sqltypes.h:146
size_t length
Definition: sqltypes.h:145

+ Here is the call graph for this function:

__device__ void sync_threadblock ( )

Definition at line 1298 of file cuda_mapd_rt.cu.

Referenced by GpuSharedMemCodeBuilder::codegenInitialization(), and GpuSharedMemCodeBuilder::codegenReduction().

1298  {
1299  __syncthreads();
1300 }

+ Here is the caller graph for this function:

__device__ void sync_warp ( )

Definition at line 1279 of file cuda_mapd_rt.cu.

1279  {
1280  __syncwarp();
1281 }
__device__ void sync_warp_protected ( int64_t  thread_pos,
int64_t  row_count 
)

Protected warp synchornization to make sure all (or none) threads within a warp go through a synchronization barrier. thread_pos: the current thread position to be used for a memory access row_count: maximum number of rows to be processed The function performs warp sync iff all 32 threads within that warp will process valid data NOTE: it currently assumes that warp size is 32.

Definition at line 1290 of file cuda_mapd_rt.cu.

1290  {
1291  // only syncing if NOT within the same warp as those threads experiencing the critical
1292  // edge
1293  if ((((row_count - 1) | 0x1F) - thread_pos) >= 32) {
1294  __syncwarp();
1295  }
1296 }
__device__ int8_t thread_warp_idx ( const int8_t  warp_sz)

Definition at line 40 of file cuda_mapd_rt.cu.

40  {
41  return threadIdx.x % warp_sz;
42 }
__device__ void write_back_non_grouped_agg ( int64_t *  input_buffer,
int64_t *  output_buffer,
const int32_t  agg_idx 
)

Definition at line 1310 of file cuda_mapd_rt.cu.

References agg_sum_shared().

1312  {
1313  if (threadIdx.x == agg_idx) {
1314  agg_sum_shared(output_buffer, input_buffer[agg_idx]);
1315  }
1316 }
__device__ int64_t agg_sum_shared(int64_t *agg, const int64_t val)

+ Here is the call graph for this function:

__device__ void write_back_nop ( int64_t *  dest,
int64_t *  src,
const int32_t  sz 
)

Definition at line 50 of file cuda_mapd_rt.cu.

50  {
51 }

Variable Documentation

__device__ int32_t dw_abort = 0

Definition at line 95 of file cuda_mapd_rt.cu.

Referenced by dynamic_watchdog(), dynamic_watchdog_init(), and Executor::interrupt().

__device__ int64_t dw_cycle_budget = 0

Definition at line 94 of file cuda_mapd_rt.cu.

Referenced by dynamic_watchdog(), and dynamic_watchdog_init().

__device__ int64_t dw_sm_cycle_start[128]

Definition at line 92 of file cuda_mapd_rt.cu.

Referenced by dynamic_watchdog().

__device__ int32_t runtime_interrupt_flag = 0

Definition at line 96 of file cuda_mapd_rt.cu.

Referenced by check_interrupt(), check_interrupt_init(), and Executor::interrupt().