OmniSciDB  1dac507f6e
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
DynamicWatchdog.h File Reference
#include <cstdint>
+ Include dependency graph for DynamicWatchdog.h:
+ This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Enumerations

enum  DynamicWatchdogFlags { DW_DEADLINE = 0, DW_ABORT = -1, DW_RESET = -2 }
 

Functions

uint64_t dynamic_watchdog_init (unsigned ms_budget)
 
bool dynamic_watchdog ()
 

Enumeration Type Documentation

Enumerator
DW_DEADLINE 
DW_ABORT 
DW_RESET 

Definition at line 22 of file DynamicWatchdog.h.

Function Documentation

bool dynamic_watchdog ( )

Definition at line 181 of file cuda_mapd_rt.cu.

181  {
182  // check for dynamic watchdog, if triggered all threads return true
183  if (dw_cycle_budget == 0LL) {
184  return false; // Uninitialized watchdog can't check time
185  }
186  if (dw_abort == 1) {
187  return true; // Received host request to abort
188  }
189  uint32_t smid = get_smid();
190  if (smid >= 128) {
191  return false;
192  }
193  __shared__ volatile int64_t dw_block_cycle_start; // Thread block shared cycle start
194  __shared__ volatile bool
195  dw_should_terminate; // all threads within a block should return together if
196  // watchdog criteria is met
197 
198  // thread 0 either initializes or read the initial clock cycle, the result is stored
199  // into shared memory. Since all threads wihtin a block shares the same SM, there's no
200  // point in using more threads here.
201  if (threadIdx.x == 0) {
202  dw_block_cycle_start = 0LL;
203  int64_t cycle_count = static_cast<int64_t>(clock64());
204  // Make sure the block hasn't switched SMs
205  if (smid == get_smid()) {
206  dw_block_cycle_start = static_cast<int64_t>(
207  atomicCAS(reinterpret_cast<unsigned long long*>(&dw_sm_cycle_start[smid]),
208  0ULL,
209  static_cast<unsigned long long>(cycle_count)));
210  }
211 
212  int64_t cycles = cycle_count - dw_block_cycle_start;
213  if ((smid == get_smid()) && (dw_block_cycle_start > 0LL) &&
214  (cycles > dw_cycle_budget)) {
215  // Check if we're out of time on this particular SM
216  dw_should_terminate = true;
217  } else {
218  dw_should_terminate = false;
219  }
220  }
221  __syncthreads();
222  return dw_should_terminate;
223 }
__device__ int64_t dw_sm_cycle_start[128]
__device__ int64_t dw_cycle_budget
__inline__ __device__ uint32_t get_smid(void)
__device__ int32_t dw_abort
uint64_t dynamic_watchdog_init ( unsigned  ms_budget)

Definition at line 35 of file DynamicWatchdog.cpp.

References DW_ABORT, dw_abort, dw_cycle_budget, DW_DEADLINE, DW_RESET, read_cycle_counter(), and VLOG.

Referenced by dynamic_watchdog(), Executor::interrupt(), Executor::resetInterrupt(), and Executor::ExecutionDispatch::runImpl().

35  {
36  static uint64_t dw_cycle_start = 0ULL;
37  static uint64_t dw_cycle_budget = 0ULL;
38  static std::atomic_bool dw_abort{false};
39 
40  if (ms_budget == static_cast<unsigned>(DW_DEADLINE)) {
41  if (dw_abort.load()) {
42  {
43  return 0LL;
44  }
45  }
46  return dw_cycle_start + dw_cycle_budget;
47  }
48  if (ms_budget == static_cast<unsigned>(DW_ABORT)) {
49  dw_abort = true;
50  return 0LL;
51  }
52  if (ms_budget == static_cast<unsigned>(DW_RESET)) {
53  dw_abort = false;
54  return 0LL;
55  }
56 
57  // Init cycle start, measure freq, set and return cycle budget
58  dw_cycle_start = read_cycle_counter();
59  std::this_thread::sleep_for(std::chrono::milliseconds(1));
60  auto freq_kHz = read_cycle_counter() - dw_cycle_start;
61  dw_cycle_budget = freq_kHz * static_cast<uint64_t>(ms_budget);
62  VLOG(1) << "INIT: thread " << std::this_thread::get_id() << ": ms_budget " << ms_budget
63  << ", cycle_start " << dw_cycle_start << ", cycle_budget " << dw_cycle_budget
64  << ", dw_deadline " << dw_cycle_start + dw_cycle_budget;
65  return dw_cycle_budget;
66 }
static __inline__ uint64_t read_cycle_counter(void)
__device__ int64_t dw_cycle_budget
__device__ int32_t dw_abort
#define VLOG(n)
Definition: Logger.h:280

+ Here is the call graph for this function:

+ Here is the caller graph for this function: