OmniSciDB  b24e664e58
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
CudaMgr.h
Go to the documentation of this file.
1 /*
2  * Copyright 2018 OmniSci, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #pragma once
17 
18 #include <cstdlib>
19 #include <mutex>
20 #include <string>
21 #include <vector>
22 
23 #include "Shared/uuid.h"
24 
25 #ifdef HAVE_CUDA
26 #include <cuda.h>
27 #else
28 #include "Shared/nocuda.h"
29 #endif // HAVE_CUDA
30 
31 namespace omnisci {
33  const int index;
34  const int cuda_id;
35  const UUID uuid;
36 };
37 
38 using DeviceGroup = std::vector<DeviceIdentifier>;
39 } // namespace omnisci
40 
41 namespace CudaMgr_Namespace {
42 
43 #ifdef HAVE_CUDA
44 class CudaErrorException : public std::runtime_error {
45  public:
46  CudaErrorException(CUresult status)
47  : std::runtime_error(processStatus(status)), status_(status) {}
48 
49  CUresult getStatus() const { return status_; }
50 
51  private:
52  CUresult status_;
53  std::string processStatus(CUresult status) {
54  const char* errorString{nullptr};
55  cuGetErrorString(status, &errorString);
56  return errorString
57  ? "CUDA Error: " + std::string(errorString)
58  : std::string("CUDA Driver API error code ") + std::to_string(status);
59  }
60 };
61 #endif
62 
68  size_t globalMem;
72  int numMPs;
73  int warpSize;
77  int pciBusId;
80  int memoryBusWidth; // in bits
82  int clockKhz;
83  int numCore;
84  std::string arch;
85 };
86 
87 class CudaMgr {
88  public:
89  CudaMgr(const int num_gpus, const int start_gpu = 0);
90  ~CudaMgr();
91 
92  void synchronizeDevices() const;
93  int getDeviceCount() const { return device_count_; }
94  int getStartGpu() const { return start_gpu_; }
96 
97  void copyHostToDevice(int8_t* device_ptr,
98  const int8_t* host_ptr,
99  const size_t num_bytes,
100  const int device_num);
101  void copyDeviceToHost(int8_t* host_ptr,
102  const int8_t* device_ptr,
103  const size_t num_bytes,
104  const int device_num);
105  void copyDeviceToDevice(int8_t* dest_ptr,
106  int8_t* src_ptr,
107  const size_t num_bytes,
108  const int dest_device_num,
109  const int src_device_num);
110 
111  int8_t* allocatePinnedHostMem(const size_t num_bytes);
112  int8_t* allocateDeviceMem(const size_t num_bytes, const int device_num);
113  void freePinnedHostMem(int8_t* host_ptr);
114  void freeDeviceMem(int8_t* device_ptr);
115  void zeroDeviceMem(int8_t* device_ptr, const size_t num_bytes, const int device_num);
116  void setDeviceMem(int8_t* device_ptr,
117  const unsigned char uc,
118  const size_t num_bytes,
119  const int device_num);
120 
122 
123  const std::vector<DeviceProperties>& getAllDeviceProperties() const {
124  return device_properties_;
125  }
126  const DeviceProperties* getDeviceProperties(const size_t device_num) const {
127  // device_num is the device number relative to start_gpu_ (real_device_num -
128  // start_gpu_)
129  if (device_num < device_properties_.size()) {
130  return &device_properties_[device_num];
131  }
132  throw std::runtime_error("Specified device number " + std::to_string(device_num) +
133  " is out of range of number of devices (" +
134  std::to_string(device_properties_.size()) + ")");
135  }
136  inline bool isArchMaxwell() const {
137  return (getDeviceCount() > 0 && device_properties_[0].computeMajor == 5);
138  }
139  inline bool isArchMaxwellOrLater() const {
140  return (getDeviceCount() > 0 && device_properties_[0].computeMajor >= 5);
141  }
142  inline bool isArchPascal() const {
143  return (getDeviceCount() > 0 && device_properties_[0].computeMajor == 6);
144  }
145  inline bool isArchPascalOrLater() const {
146  return (getDeviceCount() > 0 && device_properties_[0].computeMajor >= 6);
147  }
148  bool isArchMaxwellOrLaterForAll() const;
149  bool isArchVoltaForAll() const;
150 
151  void setContext(const int device_num) const;
152 
153 #ifdef HAVE_CUDA
154  void printDeviceProperties() const;
155 
156  const std::vector<CUcontext>& getDeviceContexts() const { return device_contexts_; }
157  const int getGpuDriverVersion() const { return gpu_driver_version_; }
158 
159  void loadGpuModuleData(CUmodule* module,
160  const void* image,
161  unsigned int num_options,
162  CUjit_option* options,
163  void** option_values,
164  const int device_id) const;
165  void unloadGpuModuleData(CUmodule* module, const int device_id) const;
166 #endif
167 
168  private:
169 #ifdef HAVE_CUDA
170  void fillDeviceProperties();
171  void initDeviceGroup();
172  void createDeviceContexts();
173  size_t computeMaxSharedMemoryForAll() const;
174  void checkError(CUresult cu_result) const;
175 #endif
176 
181  std::vector<DeviceProperties> device_properties_;
183  std::vector<CUcontext> device_contexts_;
184 
185  mutable std::mutex device_cleanup_mutex_;
186 };
187 
188 } // Namespace CudaMgr_Namespace
std::unique_ptr< llvm::Module > module(runtime_module_shallow_copy(cgen_state))
int CUjit_option
Definition: nocuda.h:25
const omnisci::DeviceGroup & getDeviceGroup() const
Definition: CudaMgr.h:95
void copyDeviceToHost(int8_t *host_ptr, const int8_t *device_ptr, const size_t num_bytes, const int device_num)
Definition: CudaMgr.cpp:90
size_t getMaxSharedMemoryForAll() const
Definition: CudaMgr.h:121
std::mutex device_cleanup_mutex_
Definition: CudaMgr.h:185
int8_t * allocatePinnedHostMem(const size_t num_bytes)
Definition: CudaMgr.cpp:210
void setContext(const int device_num) const
Definition: CudaMgr.cpp:311
No-frills UUID type class to allow easy containerization and comparison of device UUIDs from differen...
bool isArchPascalOrLater() const
Definition: CudaMgr.h:145
void copyDeviceToDevice(int8_t *dest_ptr, int8_t *src_ptr, const size_t num_bytes, const int dest_device_num, const int src_device_num)
Definition: CudaMgr.cpp:99
int getStartGpu() const
Definition: CudaMgr.h:94
std::string to_string(char const *&&v)
std::vector< CUcontext > device_contexts_
Definition: CudaMgr.h:183
void freeDeviceMem(int8_t *device_ptr)
Definition: CudaMgr.cpp:228
omnisci::DeviceGroup device_group_
Definition: CudaMgr.h:182
int getDeviceCount() const
Definition: CudaMgr.h:93
const UUID uuid
UUID for device (hardware invariant)
Definition: CudaMgr.h:35
void setDeviceMem(int8_t *device_ptr, const unsigned char uc, const size_t num_bytes, const int device_num)
Definition: CudaMgr.cpp:240
void copyHostToDevice(int8_t *device_ptr, const int8_t *host_ptr, const size_t num_bytes, const int device_num)
Definition: CudaMgr.cpp:81
bool isArchMaxwellOrLaterForAll() const
Definition: CudaMgr.cpp:252
int CUresult
Definition: nocuda.h:21
std::vector< DeviceProperties > device_properties_
Definition: CudaMgr.h:181
size_t max_shared_memory_for_all_
Definition: CudaMgr.h:180
int CUdevice
Definition: nocuda.h:20
bool isArchVoltaForAll() const
Definition: CudaMgr.cpp:265
void freePinnedHostMem(int8_t *host_ptr)
Definition: CudaMgr.cpp:224
void synchronizeDevices() const
Definition: CudaMgr.cpp:74
const DeviceProperties * getDeviceProperties(const size_t device_num) const
Definition: CudaMgr.h:126
const int index
index into device group (currently num_gpus - start_gpu)
Definition: CudaMgr.h:33
bool isArchMaxwell() const
Definition: CudaMgr.h:136
bool isArchPascal() const
Definition: CudaMgr.h:142
CudaMgr(const int num_gpus, const int start_gpu=0)
Definition: CudaMgr.cpp:28
std::vector< DeviceIdentifier > DeviceGroup
Definition: CudaMgr.h:38
const std::vector< DeviceProperties > & getAllDeviceProperties() const
Definition: CudaMgr.h:123
void zeroDeviceMem(int8_t *device_ptr, const size_t num_bytes, const int device_num)
Definition: CudaMgr.cpp:234
bool isArchMaxwellOrLater() const
Definition: CudaMgr.h:139
void * CUmodule
Definition: nocuda.h:23
int8_t * allocateDeviceMem(const size_t num_bytes, const int device_num)
Definition: CudaMgr.cpp:217
const int cuda_id
Cuda ID for device (ignores start_gpu)
Definition: CudaMgr.h:34