OmniSciDB  c07336695a
CudaMgr.h
Go to the documentation of this file.
1 /*
2  * Copyright 2018 OmniSci, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef CUDAMGR_H
18 #define CUDAMGR_H
19 
20 #include <cstdlib>
21 #include <mutex>
22 #include <string>
23 #include <vector>
24 #ifdef HAVE_CUDA
25 #include <cuda.h>
26 #else
27 #include "../Shared/nocuda.h"
28 #endif // HAVE_CUDA
29 
30 namespace CudaMgr_Namespace {
31 
32 #ifdef HAVE_CUDA
33 class CudaErrorException : public std::runtime_error {
34  public:
35  CudaErrorException(CUresult status)
36  : std::runtime_error(processStatus(status)), status_(status) {}
37 
38  CUresult getStatus() const { return status_; }
39 
40  private:
41  CUresult status_;
42  std::string processStatus(CUresult status) {
43  const char* errorString{nullptr};
44  cuGetErrorString(status, &errorString);
45  return errorString
46  ? "CUDA Error: " + std::string(errorString)
47  : std::string("CUDA Driver API error code ") + std::to_string(status);
48  }
49 };
50 #endif
51 
56  size_t globalMem;
60  int numMPs;
61  int warpSize;
65  int pciBusId;
68  int memoryBusWidth; // in bits
70  int clockKhz;
71  int numCore;
72  std::string arch;
73 };
74 
75 class CudaMgr {
76  public:
77  CudaMgr(const int num_gpus, const int start_gpu = 0);
78  ~CudaMgr();
79 
80  void synchronizeDevices() const;
81  int getDeviceCount() const { return device_count_; }
82 
83  void copyHostToDevice(int8_t* device_ptr,
84  const int8_t* host_ptr,
85  const size_t num_bytes,
86  const int device_num);
87  void copyDeviceToHost(int8_t* host_ptr,
88  const int8_t* device_ptr,
89  const size_t num_bytes,
90  const int device_num);
91  void copyDeviceToDevice(int8_t* dest_ptr,
92  int8_t* src_ptr,
93  const size_t num_bytes,
94  const int dest_device_num,
95  const int src_device_num);
96 
97  int8_t* allocatePinnedHostMem(const size_t num_bytes);
98  int8_t* allocateDeviceMem(const size_t num_bytes, const int device_num);
99  void freePinnedHostMem(int8_t* host_ptr);
100  void freeDeviceMem(int8_t* device_ptr);
101  void zeroDeviceMem(int8_t* device_ptr, const size_t num_bytes, const int device_num);
102  void setDeviceMem(int8_t* device_ptr,
103  const unsigned char uc,
104  const size_t num_bytes,
105  const int device_num);
106 
107  int getStartGpu() const { return start_gpu_; }
108  size_t getMaxSharedMemoryForAll() const { return max_shared_memory_for_all_; }
109 
110  const std::vector<DeviceProperties>& getAllDeviceProperties() const {
111  return device_properties_;
112  }
113  const DeviceProperties* getDeviceProperties(const size_t device_num) const {
114  // device_num is the device number relative to start_gpu_ (real_device_num -
115  // start_gpu_)
116  if (device_num < device_properties_.size()) {
117  return &device_properties_[device_num];
118  }
119  throw std::runtime_error("Specified device number " + std::to_string(device_num) +
120  " is out of range of number of devices (" +
121  std::to_string(device_properties_.size()) + ")");
122  }
123  inline bool isArchMaxwell() const {
124  return (getDeviceCount() > 0 && device_properties_[0].computeMajor == 5);
125  }
126  inline bool isArchMaxwellOrLater() const {
127  return (getDeviceCount() > 0 && device_properties_[0].computeMajor >= 5);
128  }
129  inline bool isArchPascal() const {
130  return (getDeviceCount() > 0 && device_properties_[0].computeMajor == 6);
131  }
132  inline bool isArchPascalOrLater() const {
133  return (getDeviceCount() > 0 && device_properties_[0].computeMajor >= 6);
134  }
135  bool isArchMaxwellOrLaterForAll() const;
136  bool isArchVoltaForAll() const;
137 
138  void setContext(const int device_num) const;
139 
140 #ifdef HAVE_CUDA
141  void printDeviceProperties() const;
142 
143  const std::vector<CUcontext>& getDeviceContexts() const { return device_contexts_; }
144  const int getGpuDriverVersion() const { return gpu_driver_version_; }
145 
146  void loadGpuModuleData(CUmodule* module,
147  const void* image,
148  unsigned int num_options,
149  CUjit_option* options,
150  void** option_values,
151  const int device_id) const;
152  void unloadGpuModuleData(CUmodule* module, const int device_id) const;
153 #endif
154 
155  private:
156 #ifdef HAVE_CUDA
157  void fillDeviceProperties();
158  void createDeviceContexts();
159  size_t computeMaxSharedMemoryForAll() const;
160  void checkError(CUresult cu_result) const;
161 #endif
162 
167  std::vector<DeviceProperties> device_properties_;
168  std::vector<CUcontext> device_contexts_;
169 
170  mutable std::mutex device_cleanup_mutex_;
171 };
172 
173 } // Namespace CudaMgr_Namespace
174 
175 #endif // CUDAMGR_H
bool isArchMaxwellOrLater() const
Definition: CudaMgr.h:126
const DeviceProperties * getDeviceProperties(const size_t device_num) const
Definition: CudaMgr.h:113
int CUjit_option
Definition: nocuda.h:25
std::mutex device_cleanup_mutex_
Definition: CudaMgr.h:170
int getDeviceCount() const
Definition: CudaMgr.h:81
std::string to_string(char const *&&v)
bool isArchMaxwell() const
Definition: CudaMgr.h:123
std::vector< CUcontext > device_contexts_
Definition: CudaMgr.h:168
bool isArchPascalOrLater() const
Definition: CudaMgr.h:132
size_t getMaxSharedMemoryForAll() const
Definition: CudaMgr.h:108
int getStartGpu() const
Definition: CudaMgr.h:107
int CUresult
Definition: nocuda.h:21
std::vector< DeviceProperties > device_properties_
Definition: CudaMgr.h:167
size_t max_shared_memory_for_all_
Definition: CudaMgr.h:166
int CUdevice
Definition: nocuda.h:20
const std::vector< DeviceProperties > & getAllDeviceProperties() const
Definition: CudaMgr.h:110
void * CUmodule
Definition: nocuda.h:23
bool isArchPascal() const
Definition: CudaMgr.h:129