OmniSciDB  72c90bc290
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
DynamicWatchdog.h File Reference
#include "Shared/funcannotations.h"
#include <cstdint>
+ Include dependency graph for DynamicWatchdog.h:
+ This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Enumerations

enum  DynamicWatchdogFlags { DW_DEADLINE = 0, DW_ABORT = -1, DW_RESET = -2 }
 

Functions

RUNTIME_EXPORT uint64_t dynamic_watchdog_init (unsigned ms_budget)
 
RUNTIME_EXPORT bool dynamic_watchdog ()
 

Enumeration Type Documentation

Enumerator
DW_DEADLINE 
DW_ABORT 
DW_RESET 

Definition at line 24 of file DynamicWatchdog.h.

Function Documentation

RUNTIME_EXPORT bool dynamic_watchdog ( )

Definition at line 115 of file cuda_mapd_rt.cu.

115  {
116  // check for dynamic watchdog, if triggered all threads return true
117  if (dw_cycle_budget == 0LL) {
118  return false; // Uninitialized watchdog can't check time
119  }
120  if (dw_abort == 1) {
121  return true; // Received host request to abort
122  }
123  uint32_t smid = get_smid();
124  if (smid >= 128) {
125  return false;
126  }
127  __shared__ volatile int64_t dw_block_cycle_start; // Thread block shared cycle start
128  __shared__ volatile bool
129  dw_should_terminate; // all threads within a block should return together if
130  // watchdog criteria is met
131 
132  // thread 0 either initializes or read the initial clock cycle, the result is stored
133  // into shared memory. Since all threads wihtin a block shares the same SM, there's no
134  // point in using more threads here.
135  if (threadIdx.x == 0) {
136  dw_block_cycle_start = 0LL;
137  int64_t cycle_count = static_cast<int64_t>(clock64());
138  // Make sure the block hasn't switched SMs
139  if (smid == get_smid()) {
140  dw_block_cycle_start = static_cast<int64_t>(
141  atomicCAS(reinterpret_cast<unsigned long long*>(&dw_sm_cycle_start[smid]),
142  0ULL,
143  static_cast<unsigned long long>(cycle_count)));
144  }
145 
146  int64_t cycles = cycle_count - dw_block_cycle_start;
147  if ((smid == get_smid()) && (dw_block_cycle_start > 0LL) &&
148  (cycles > dw_cycle_budget)) {
149  // Check if we're out of time on this particular SM
150  dw_should_terminate = true;
151  } else {
152  dw_should_terminate = false;
153  }
154  }
155  __syncthreads();
156  return dw_should_terminate;
157 }
__device__ int64_t dw_sm_cycle_start[128]
Definition: cuda_mapd_rt.cu:91
__device__ int64_t dw_cycle_budget
Definition: cuda_mapd_rt.cu:93
__inline__ __device__ uint32_t get_smid(void)
Definition: cuda_mapd_rt.cu:97
__device__ int32_t dw_abort
Definition: cuda_mapd_rt.cu:94
RUNTIME_EXPORT uint64_t dynamic_watchdog_init ( unsigned  ms_budget)

Definition at line 36 of file DynamicWatchdog.cpp.

References DW_ABORT, dw_abort, dw_cycle_budget, DW_DEADLINE, DW_RESET, read_cycle_counter(), and VLOG.

Referenced by dynamic_watchdog(), Executor::interrupt(), Executor::resetInterrupt(), and ExecutionKernel::runImpl().

36  {
37  static uint64_t dw_cycle_start = 0ULL;
38  static uint64_t dw_cycle_budget = 0ULL;
39  static std::atomic_bool dw_abort{false};
40 
41  if (ms_budget == static_cast<unsigned>(DW_DEADLINE)) {
42  if (dw_abort.load()) {
43  { return 0LL; }
44  }
45  return dw_cycle_start + dw_cycle_budget;
46  }
47  if (ms_budget == static_cast<unsigned>(DW_ABORT)) {
48  dw_abort = true;
49  return 0LL;
50  }
51  if (ms_budget == static_cast<unsigned>(DW_RESET)) {
52  dw_abort = false;
53  return 0LL;
54  }
55 
56  // Init cycle start, measure freq, set and return cycle budget
57  dw_cycle_start = read_cycle_counter();
58  std::this_thread::sleep_for(std::chrono::milliseconds(1));
59  auto freq_kHz = read_cycle_counter() - dw_cycle_start;
60  dw_cycle_budget = freq_kHz * static_cast<uint64_t>(ms_budget);
61  VLOG(1) << "INIT: thread " << std::this_thread::get_id() << ": ms_budget " << ms_budget
62  << ", cycle_start " << dw_cycle_start << ", cycle_budget " << dw_cycle_budget
63  << ", dw_deadline " << dw_cycle_start + dw_cycle_budget;
64  return dw_cycle_budget;
65 }
__device__ int64_t dw_cycle_budget
Definition: cuda_mapd_rt.cu:93
static FORCE_INLINE uint64_t read_cycle_counter(void)
__device__ int32_t dw_abort
Definition: cuda_mapd_rt.cu:94
#define VLOG(n)
Definition: Logger.h:388

+ Here is the call graph for this function:

+ Here is the caller graph for this function: