OmniSciDB  340b00dbf6
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
DynamicWatchdog.h File Reference
#include <cstdint>
+ Include dependency graph for DynamicWatchdog.h:
+ This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Enumerations

enum  DynamicWatchdogFlags { DW_DEADLINE = 0, DW_ABORT = -1, DW_RESET = -2 }
 

Functions

uint64_t dynamic_watchdog_init (unsigned ms_budget)
 
bool dynamic_watchdog ()
 

Enumeration Type Documentation

Enumerator
DW_DEADLINE 
DW_ABORT 
DW_RESET 

Definition at line 22 of file DynamicWatchdog.h.

Function Documentation

bool dynamic_watchdog ( )

Definition at line 116 of file cuda_mapd_rt.cu.

116  {
117  // check for dynamic watchdog, if triggered all threads return true
118  if (dw_cycle_budget == 0LL) {
119  return false; // Uninitialized watchdog can't check time
120  }
121  if (dw_abort == 1) {
122  return true; // Received host request to abort
123  }
124  uint32_t smid = get_smid();
125  if (smid >= 128) {
126  return false;
127  }
128  __shared__ volatile int64_t dw_block_cycle_start; // Thread block shared cycle start
129  __shared__ volatile bool
130  dw_should_terminate; // all threads within a block should return together if
131  // watchdog criteria is met
132 
133  // thread 0 either initializes or read the initial clock cycle, the result is stored
134  // into shared memory. Since all threads wihtin a block shares the same SM, there's no
135  // point in using more threads here.
136  if (threadIdx.x == 0) {
137  dw_block_cycle_start = 0LL;
138  int64_t cycle_count = static_cast<int64_t>(clock64());
139  // Make sure the block hasn't switched SMs
140  if (smid == get_smid()) {
141  dw_block_cycle_start = static_cast<int64_t>(
142  atomicCAS(reinterpret_cast<unsigned long long*>(&dw_sm_cycle_start[smid]),
143  0ULL,
144  static_cast<unsigned long long>(cycle_count)));
145  }
146 
147  int64_t cycles = cycle_count - dw_block_cycle_start;
148  if ((smid == get_smid()) && (dw_block_cycle_start > 0LL) &&
149  (cycles > dw_cycle_budget)) {
150  // Check if we're out of time on this particular SM
151  dw_should_terminate = true;
152  } else {
153  dw_should_terminate = false;
154  }
155  }
156  __syncthreads();
157  return dw_should_terminate;
158 }
__device__ int64_t dw_sm_cycle_start[128]
Definition: cuda_mapd_rt.cu:92
__device__ int64_t dw_cycle_budget
Definition: cuda_mapd_rt.cu:94
__inline__ __device__ uint32_t get_smid(void)
Definition: cuda_mapd_rt.cu:98
__device__ int32_t dw_abort
Definition: cuda_mapd_rt.cu:95
uint64_t dynamic_watchdog_init ( unsigned  ms_budget)

Definition at line 36 of file DynamicWatchdog.cpp.

References DW_ABORT, dw_abort, dw_cycle_budget, DW_DEADLINE, DW_RESET, read_cycle_counter(), and VLOG.

Referenced by dynamic_watchdog(), Executor::interrupt(), Executor::resetInterrupt(), and ExecutionKernel::runImpl().

36  {
37  static uint64_t dw_cycle_start = 0ULL;
38  static uint64_t dw_cycle_budget = 0ULL;
39  static std::atomic_bool dw_abort{false};
40 
41  if (ms_budget == static_cast<unsigned>(DW_DEADLINE)) {
42  if (dw_abort.load()) {
43  {
44  return 0LL;
45  }
46  }
47  return dw_cycle_start + dw_cycle_budget;
48  }
49  if (ms_budget == static_cast<unsigned>(DW_ABORT)) {
50  dw_abort = true;
51  return 0LL;
52  }
53  if (ms_budget == static_cast<unsigned>(DW_RESET)) {
54  dw_abort = false;
55  return 0LL;
56  }
57 
58  // Init cycle start, measure freq, set and return cycle budget
59  dw_cycle_start = read_cycle_counter();
60  std::this_thread::sleep_for(std::chrono::milliseconds(1));
61  auto freq_kHz = read_cycle_counter() - dw_cycle_start;
62  dw_cycle_budget = freq_kHz * static_cast<uint64_t>(ms_budget);
63  VLOG(1) << "INIT: thread " << std::this_thread::get_id() << ": ms_budget " << ms_budget
64  << ", cycle_start " << dw_cycle_start << ", cycle_budget " << dw_cycle_budget
65  << ", dw_deadline " << dw_cycle_start + dw_cycle_budget;
66  return dw_cycle_budget;
67 }
__device__ int64_t dw_cycle_budget
Definition: cuda_mapd_rt.cu:94
static FORCE_INLINE uint64_t read_cycle_counter(void)
__device__ int32_t dw_abort
Definition: cuda_mapd_rt.cu:95
#define VLOG(n)
Definition: Logger.h:291

+ Here is the call graph for this function:

+ Here is the caller graph for this function: