OmniSciDB  6686921089
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
DeviceKernel.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2021 OmniSci, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "DeviceKernel.h"
18 
19 #include "CompilationContext.h"
20 #include "NvidiaKernel.h"
21 
22 #ifdef HAVE_CUDA
23 class CudaEventClock : public DeviceClock {
24  public:
25  CudaEventClock() {
26  cuEventCreate(&start_, 0);
27  cuEventCreate(&stop_, 0);
28  }
29  virtual void start() override { cuEventRecord(start_, 0); }
30  virtual int stop() override {
31  cuEventRecord(stop_, 0);
32  cuEventSynchronize(stop_);
33  float ms = 0;
34  cuEventElapsedTime(&ms, start_, stop_);
35  return ms;
36  }
37 
38  private:
39  CUevent start_, stop_; // preparation
40 };
41 
42 class NvidiaKernel : public DeviceKernel {
43  public:
44  NvidiaKernel(const CompilationContext* ctx, int device_id) : device_id(device_id) {
45  auto cuda_ctx = dynamic_cast<const GpuCompilationContext*>(ctx);
46  CHECK(cuda_ctx);
47  const auto native_code = cuda_ctx->getNativeCode(device_id);
48  function_ptr = static_cast<CUfunction>(native_code.first);
49  module_ptr = static_cast<CUmodule>(native_code.second);
50  }
51 
52  void launch(unsigned int gridDimX,
53  unsigned int gridDimY,
54  unsigned int gridDimZ,
55  unsigned int blockDimX,
56  unsigned int blockDimY,
57  unsigned int blockDimZ,
58  unsigned int sharedMemBytes,
59  void** kernelParams) override {
60  checkCudaErrors(cuLaunchKernel(function_ptr,
61  gridDimX,
62  gridDimY,
63  gridDimZ,
64  blockDimX,
65  blockDimY,
66  blockDimZ,
67  sharedMemBytes,
68  nullptr,
69  kernelParams,
70  nullptr));
71  }
72 
73  void initializeDynamicWatchdog(bool could_interrupt, uint64_t cycle_budget) override {
74  CHECK(module_ptr);
75  CUevent start, stop;
76  cuEventCreate(&start, 0);
77  cuEventCreate(&stop, 0);
78  cuEventRecord(start, 0);
79 
81  size_t dw_cycle_budget_size;
82  // Translate milliseconds to device cycles
83  if (device_id == 0) {
84  LOG(INFO) << "Dynamic Watchdog budget: GPU: "
86  << std::to_string(cycle_budget) << " cycles";
87  }
88  checkCudaErrors(cuModuleGetGlobal(
89  &dw_cycle_budget, &dw_cycle_budget_size, module_ptr, "dw_cycle_budget"));
90  CHECK_EQ(dw_cycle_budget_size, sizeof(uint64_t));
91  checkCudaErrors(cuMemcpyHtoD(
92  dw_cycle_budget, reinterpret_cast<void*>(&cycle_budget), sizeof(uint64_t)));
93 
95  size_t dw_sm_cycle_start_size;
96  checkCudaErrors(cuModuleGetGlobal(
97  &dw_sm_cycle_start, &dw_sm_cycle_start_size, module_ptr, "dw_sm_cycle_start"));
98  CHECK_EQ(dw_sm_cycle_start_size, 128 * sizeof(uint64_t));
99  checkCudaErrors(cuMemsetD32(dw_sm_cycle_start, 0, 128 * 2));
100 
101  if (!could_interrupt) {
102  // Executor is not marked as interrupted, make sure dynamic watchdog doesn't block
103  // execution
105  size_t dw_abort_size;
107  cuModuleGetGlobal(&dw_abort, &dw_abort_size, module_ptr, "dw_abort"));
108  CHECK_EQ(dw_abort_size, sizeof(uint32_t));
109  checkCudaErrors(cuMemsetD32(dw_abort, 0, 1));
110  }
111 
112  cuEventRecord(stop, 0);
113  cuEventSynchronize(stop);
114  float milliseconds = 0;
115  cuEventElapsedTime(&milliseconds, start, stop);
116  VLOG(1) << "Device " << std::to_string(device_id)
117  << ": launchGpuCode: dynamic watchdog init: " << std::to_string(milliseconds)
118  << " ms\n";
119  }
120 
121  void initializeRuntimeInterrupter() override {
122  CHECK(module_ptr);
123  CUevent start, stop;
124  cuEventCreate(&start, 0);
125  cuEventCreate(&stop, 0);
126  cuEventRecord(start, 0);
127 
129  size_t runtime_interrupt_flag_size;
130  checkCudaErrors(cuModuleGetGlobal(&runtime_interrupt_flag,
131  &runtime_interrupt_flag_size,
132  module_ptr,
133  "runtime_interrupt_flag"));
134  CHECK_EQ(runtime_interrupt_flag_size, sizeof(uint32_t));
135  checkCudaErrors(cuMemsetD32(runtime_interrupt_flag, 0, 1));
136 
137  cuEventRecord(stop, 0);
138  cuEventSynchronize(stop);
139  float milliseconds = 0;
140  cuEventElapsedTime(&milliseconds, start, stop);
141  VLOG(1) << "Device " << std::to_string(device_id)
142  << ": launchGpuCode: runtime query interrupter init: "
143  << std::to_string(milliseconds) << " ms";
144  }
145 
146  std::unique_ptr<DeviceClock> make_clock() override {
147  return std::make_unique<CudaEventClock>();
148  }
149 
150  private:
151  CUfunction function_ptr;
152  CUmodule module_ptr;
153  int device_id;
154 };
155 #endif
156 
157 std::unique_ptr<DeviceKernel> create_device_kernel(const CompilationContext* ctx,
158  int device_id) {
159 #ifdef HAVE_CUDA
160  return std::make_unique<NvidiaKernel>(ctx, device_id);
161 #else
162  return nullptr;
163 #endif
164 }
virtual int stop()=0
#define CHECK_EQ(x, y)
Definition: Logger.h:217
__device__ int64_t dw_sm_cycle_start[128]
Definition: cuda_mapd_rt.cu:92
#define LOG(tag)
Definition: Logger.h:203
void checkCudaErrors(CUresult err)
Definition: sample.cpp:38
unsigned long long CUdeviceptr
Definition: nocuda.h:27
__device__ int64_t dw_cycle_budget
Definition: cuda_mapd_rt.cu:94
virtual std::unique_ptr< DeviceClock > make_clock()=0
std::string to_string(char const *&&v)
virtual void launch(unsigned int gridDimX, unsigned int gridDimY, unsigned int gridDimZ, unsigned int blockDimX, unsigned int blockDimY, unsigned int blockDimZ, unsigned int sharedMemBytes, void **kernelParams)=0
void * CUfunction
Definition: nocuda.h:24
__device__ int32_t runtime_interrupt_flag
Definition: cuda_mapd_rt.cu:96
virtual void initializeDynamicWatchdog(bool could_interrupt, uint64_t cycle_budget)=0
virtual void initializeRuntimeInterrupter()=0
virtual void start()=0
std::unique_ptr< DeviceKernel > create_device_kernel(const CompilationContext *ctx, int device_id)
#define CHECK(condition)
Definition: Logger.h:209
unsigned g_dynamic_watchdog_time_limit
Definition: Execute.cpp:81
__device__ int32_t dw_abort
Definition: cuda_mapd_rt.cu:95
#define VLOG(n)
Definition: Logger.h:303
void * CUmodule
Definition: nocuda.h:23