OmniSciDB  2e3a973ef4
CudaMgr.h
Go to the documentation of this file.
1 /*
2  * Copyright 2018 OmniSci, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #pragma once
17 
18 #include <cstdlib>
19 #include <mutex>
20 #include <string>
21 #include <vector>
22 
23 #include "Logger/Logger.h"
24 #include "Shared/DeviceGroup.h"
25 
26 #ifdef HAVE_CUDA
27 #include <cuda.h>
28 #else
29 #include "Shared/nocuda.h"
30 #endif // HAVE_CUDA
31 
32 namespace CudaMgr_Namespace {
33 
34 enum class NvidiaDeviceArch {
35  Kepler, // compute major = 3
36  Maxwell, // compute major = 5
37  Pascal, // compute major = 6
38  Volta, // compute major = 7, compute minor = 0
39  Turing, // compute major = 7, compute minor = 5
40  Ampere // compute major = 8
41 };
42 
43 #ifdef HAVE_CUDA
44 std::string errorMessage(CUresult const);
45 
46 class CudaErrorException : public std::runtime_error {
47  public:
48  CudaErrorException(CUresult status);
49 
50  CUresult getStatus() const { return status_; }
51 
52  private:
53  CUresult const status_;
54 };
55 #endif
56 
62  size_t globalMem;
66  int numMPs;
67  int warpSize;
71  int pciBusId;
74  int memoryBusWidth; // in bits
76  int clockKhz;
77  int numCore;
78 };
79 
80 class CudaMgr {
81  public:
82  CudaMgr(const int num_gpus, const int start_gpu = 0);
83  ~CudaMgr();
84 
85  void synchronizeDevices() const;
86  int getDeviceCount() const { return device_count_; }
87  int getStartGpu() const { return start_gpu_; }
88  const omnisci::DeviceGroup& getDeviceGroup() const { return device_group_; }
89 
90  void copyHostToDevice(int8_t* device_ptr,
91  const int8_t* host_ptr,
92  const size_t num_bytes,
93  const int device_num);
94  void copyDeviceToHost(int8_t* host_ptr,
95  const int8_t* device_ptr,
96  const size_t num_bytes,
97  const int device_num);
98  void copyDeviceToDevice(int8_t* dest_ptr,
99  int8_t* src_ptr,
100  const size_t num_bytes,
101  const int dest_device_num,
102  const int src_device_num);
103 
104  int8_t* allocatePinnedHostMem(const size_t num_bytes);
105  int8_t* allocateDeviceMem(const size_t num_bytes, const int device_num);
106  void freePinnedHostMem(int8_t* host_ptr);
107  void freeDeviceMem(int8_t* device_ptr);
108  void zeroDeviceMem(int8_t* device_ptr, const size_t num_bytes, const int device_num);
109  void setDeviceMem(int8_t* device_ptr,
110  const unsigned char uc,
111  const size_t num_bytes,
112  const int device_num);
113 
115  return min_shared_memory_per_block_for_all_devices;
116  }
117 
118  size_t getMinNumMPsForAllDevices() const { return min_num_mps_for_all_devices; }
119 
120  const std::vector<DeviceProperties>& getAllDeviceProperties() const {
121  return device_properties_;
122  }
123  const DeviceProperties* getDeviceProperties(const size_t device_num) const {
124  // device_num is the device number relative to start_gpu_ (real_device_num -
125  // start_gpu_)
126  if (device_num < device_properties_.size()) {
127  return &device_properties_[device_num];
128  }
129  throw std::runtime_error("Specified device number " + std::to_string(device_num) +
130  " is out of range of number of devices (" +
131  std::to_string(device_properties_.size()) + ")");
132  }
133  inline bool isArchMaxwell() const {
134  return (getDeviceCount() > 0 && device_properties_[0].computeMajor == 5);
135  }
136  inline bool isArchMaxwellOrLater() const {
137  return (getDeviceCount() > 0 && device_properties_[0].computeMajor >= 5);
138  }
139  inline bool isArchPascal() const {
140  return (getDeviceCount() > 0 && device_properties_[0].computeMajor == 6);
141  }
142  inline bool isArchPascalOrLater() const {
143  return (getDeviceCount() > 0 && device_properties_[0].computeMajor >= 6);
144  }
145  bool isArchMaxwellOrLaterForAll() const;
146  bool isArchVoltaOrGreaterForAll() const;
147 
148  static std::string deviceArchToSM(const NvidiaDeviceArch arch) {
149  // Must match ${CUDA_COMPILATION_ARCH} CMAKE flag
150  switch (arch) {
152  return "sm_35";
154  return "sm_50";
156  return "sm_60";
158  return "sm_70";
160  return "sm_75";
162  return "sm_75";
163  default:
164  LOG(WARNING) << "Unrecognized Nvidia device architecture, falling back to "
165  "Kepler-compatibility.";
166  return "sm_35";
167  }
168  UNREACHABLE();
169  return "";
170  }
171 
173  if (device_properties_.size() > 0) {
174  const auto& device_properties = device_properties_.front();
175  switch (device_properties.computeMajor) {
176  case 3:
178  case 5:
180  case 6:
182  case 7:
183  if (device_properties.computeMinor == 0) {
185  } else {
187  }
188  case 8:
190  default:
192  }
193  } else {
194  // always fallback to Kepler if an architecture cannot be detected
196  }
197  }
198 
199  void setContext(const int device_num) const;
200 
201 #ifdef HAVE_CUDA
202 
203  void printDeviceProperties() const;
204 
205  const std::vector<CUcontext>& getDeviceContexts() const { return device_contexts_; }
206  const int getGpuDriverVersion() const { return gpu_driver_version_; }
207 
208  void loadGpuModuleData(CUmodule* module,
209  const void* image,
210  unsigned int num_options,
211  CUjit_option* options,
212  void** option_values,
213  const int device_id) const;
214  void unloadGpuModuleData(CUmodule* module, const int device_id) const;
215 
216  struct CudaMemoryUsage {
217  size_t free; // available GPU RAM memory on active card in bytes
218  size_t total; // total GPU RAM memory on active card in bytes
219  };
220 
221  static CudaMemoryUsage getCudaMemoryUsage();
222 #endif
223 
224  private:
225 #ifdef HAVE_CUDA
226  void fillDeviceProperties();
227  void initDeviceGroup();
228  void createDeviceContexts();
229  size_t computeMinSharedMemoryPerBlockForAllDevices() const;
230  size_t computeMinNumMPsForAllDevices() const;
231  void checkError(CUresult cu_result) const;
232 
233  int gpu_driver_version_;
234 #endif
235 
240  std::vector<DeviceProperties> device_properties_;
242  std::vector<CUcontext> device_contexts_;
243 
244  mutable std::mutex device_cleanup_mutex_;
245 };
246 
247 } // Namespace CudaMgr_Namespace
bool isArchMaxwellOrLater() const
Definition: CudaMgr.h:136
const DeviceProperties * getDeviceProperties(const size_t device_num) const
Definition: CudaMgr.h:123
size_t min_num_mps_for_all_devices
Definition: CudaMgr.h:239
int CUjit_option
Definition: nocuda.h:25
std::mutex device_cleanup_mutex_
Definition: CudaMgr.h:244
#define LOG(tag)
Definition: Logger.h:188
#define UNREACHABLE()
Definition: Logger.h:241
size_t min_shared_memory_per_block_for_all_devices
Definition: CudaMgr.h:238
NvidiaDeviceArch getDeviceArch() const
Definition: CudaMgr.h:172
int getDeviceCount() const
Definition: CudaMgr.h:86
std::string to_string(char const *&&v)
const omnisci::DeviceGroup & getDeviceGroup() const
Definition: CudaMgr.h:88
bool isArchMaxwell() const
Definition: CudaMgr.h:133
std::vector< CUcontext > device_contexts_
Definition: CudaMgr.h:242
omnisci::DeviceGroup device_group_
Definition: CudaMgr.h:241
std::string errorMessage(CUresult const status)
Definition: CudaMgr.cpp:40
bool isArchPascalOrLater() const
Definition: CudaMgr.h:142
static std::string deviceArchToSM(const NvidiaDeviceArch arch)
Definition: CudaMgr.h:148
int getStartGpu() const
Definition: CudaMgr.h:87
int CUresult
Definition: nocuda.h:21
std::vector< DeviceProperties > device_properties_
Definition: CudaMgr.h:240
int CUdevice
Definition: nocuda.h:20
size_t getMinSharedMemoryPerBlockForAllDevices() const
Definition: CudaMgr.h:114
size_t getMinNumMPsForAllDevices() const
Definition: CudaMgr.h:118
std::vector< DeviceIdentifier > DeviceGroup
Definition: DeviceGroup.h:15
const std::vector< DeviceProperties > & getAllDeviceProperties() const
Definition: CudaMgr.h:120
void * CUmodule
Definition: nocuda.h:23
bool isArchPascal() const
Definition: CudaMgr.h:139