OmniSciDB  1dac507f6e
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
CudaMgr_Namespace::CudaMgr Class Reference

#include <CudaMgr.h>

Public Member Functions

 CudaMgr (const int num_gpus, const int start_gpu=0)
 
 ~CudaMgr ()
 
void synchronizeDevices () const
 
int getDeviceCount () const
 
int getStartGpu () const
 
const omnisci::DeviceGroupgetDeviceGroup () const
 
void copyHostToDevice (int8_t *device_ptr, const int8_t *host_ptr, const size_t num_bytes, const int device_num)
 
void copyDeviceToHost (int8_t *host_ptr, const int8_t *device_ptr, const size_t num_bytes, const int device_num)
 
void copyDeviceToDevice (int8_t *dest_ptr, int8_t *src_ptr, const size_t num_bytes, const int dest_device_num, const int src_device_num)
 
int8_t * allocatePinnedHostMem (const size_t num_bytes)
 
int8_t * allocateDeviceMem (const size_t num_bytes, const int device_num)
 
void freePinnedHostMem (int8_t *host_ptr)
 
void freeDeviceMem (int8_t *device_ptr)
 
void zeroDeviceMem (int8_t *device_ptr, const size_t num_bytes, const int device_num)
 
void setDeviceMem (int8_t *device_ptr, const unsigned char uc, const size_t num_bytes, const int device_num)
 
size_t getMaxSharedMemoryForAll () const
 
const std::vector
< DeviceProperties > & 
getAllDeviceProperties () const
 
const DevicePropertiesgetDeviceProperties (const size_t device_num) const
 
bool isArchMaxwell () const
 
bool isArchMaxwellOrLater () const
 
bool isArchPascal () const
 
bool isArchPascalOrLater () const
 
bool isArchMaxwellOrLaterForAll () const
 
bool isArchVoltaForAll () const
 
void setContext (const int device_num) const
 

Private Attributes

int device_count_
 
int gpu_driver_version_
 
int start_gpu_
 
size_t max_shared_memory_for_all_
 
std::vector< DevicePropertiesdevice_properties_
 
omnisci::DeviceGroup device_group_
 
std::vector< CUcontextdevice_contexts_
 
std::mutex device_cleanup_mutex_
 

Detailed Description

Definition at line 87 of file CudaMgr.h.

Constructor & Destructor Documentation

CudaMgr_Namespace::CudaMgr::CudaMgr ( const int  num_gpus,
const int  start_gpu = 0 
)

Definition at line 28 of file CudaMgr.cpp.

References CHECK_EQ, CHECK_LE, device_count_, and start_gpu_.

29  : start_gpu_(start_gpu), max_shared_memory_for_all_(0) {
30  checkError(cuInit(0));
31  checkError(cuDeviceGetCount(&device_count_));
32 
33  if (num_gpus > 0) { // numGpus <= 0 will just use number of gpus found
34  CHECK_LE(num_gpus + start_gpu_, device_count_);
35  device_count_ = std::min(device_count_, num_gpus);
36  } else {
37  // if we are using all gpus we cannot start on a gpu other than 0
38  CHECK_EQ(start_gpu_, 0);
39  }
40  fillDeviceProperties();
41  initDeviceGroup();
42  createDeviceContexts();
43  printDeviceProperties();
44 }
#define CHECK_EQ(x, y)
Definition: Logger.h:198
#define CHECK_LE(x, y)
Definition: Logger.h:201
size_t max_shared_memory_for_all_
Definition: CudaMgr.h:180
CudaMgr_Namespace::CudaMgr::~CudaMgr ( )

Definition at line 53 of file CudaMgr.cpp.

References device_cleanup_mutex_, device_contexts_, device_count_, logger::ERROR, LOG, and synchronizeDevices().

53  {
54  try {
55  // We don't want to remove the cudaMgr before all other processes have cleaned up.
56  // This should be enforced by the lifetime policies, but take this lock to be safe.
57  std::lock_guard<std::mutex> gpu_lock(device_cleanup_mutex_);
58 
60  for (int d = 0; d < device_count_; ++d) {
61  checkError(cuCtxDestroy(device_contexts_[d]));
62  }
63  } catch (const CudaErrorException& e) {
64  if (e.getStatus() == CUDA_ERROR_DEINITIALIZED) {
65  // TODO(adb / asuhan): Verify cuModuleUnload removes the context
66  return;
67  }
68  LOG(ERROR) << "CUDA Error: " << e.what();
69  } catch (const std::runtime_error& e) {
70  LOG(ERROR) << "CUDA Error: " << e.what();
71  }
72 }
std::mutex device_cleanup_mutex_
Definition: CudaMgr.h:185
#define LOG(tag)
Definition: Logger.h:185
std::vector< CUcontext > device_contexts_
Definition: CudaMgr.h:183
void synchronizeDevices() const
Definition: CudaMgr.cpp:74

+ Here is the call graph for this function:

Member Function Documentation

int8_t * CudaMgr_Namespace::CudaMgr::allocateDeviceMem ( const size_t  num_bytes,
const int  device_num 
)

Definition at line 217 of file CudaMgr.cpp.

References setContext().

Referenced by Buffer_Namespace::GpuCudaBufferMgr::addSlab().

217  {
218  setContext(device_num);
219  CUdeviceptr device_ptr;
220  checkError(cuMemAlloc(&device_ptr, num_bytes));
221  return reinterpret_cast<int8_t*>(device_ptr);
222 }
unsigned long long CUdeviceptr
Definition: nocuda.h:27
void setContext(const int device_num) const
Definition: CudaMgr.cpp:311

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

int8_t * CudaMgr_Namespace::CudaMgr::allocatePinnedHostMem ( const size_t  num_bytes)

Definition at line 210 of file CudaMgr.cpp.

References setContext().

210  {
211  setContext(0);
212  void* host_ptr;
213  checkError(cuMemHostAlloc(&host_ptr, num_bytes, CU_MEMHOSTALLOC_PORTABLE));
214  return reinterpret_cast<int8_t*>(host_ptr);
215 }
void setContext(const int device_num) const
Definition: CudaMgr.cpp:311

+ Here is the call graph for this function:

void CudaMgr_Namespace::CudaMgr::copyDeviceToDevice ( int8_t *  dest_ptr,
int8_t *  src_ptr,
const size_t  num_bytes,
const int  dest_device_num,
const int  src_device_num 
)

Definition at line 99 of file CudaMgr.cpp.

References device_contexts_, and setContext().

Referenced by Buffer_Namespace::GpuCudaBuffer::readData(), and Buffer_Namespace::GpuCudaBuffer::writeData().

103  {
104  // dest_device_num and src_device_num are the device numbers relative to start_gpu_
105  // (real_device_num - start_gpu_)
106  if (src_device_num == dest_device_num) {
107  setContext(src_device_num);
108  checkError(cuMemcpy(reinterpret_cast<CUdeviceptr>(dest_ptr),
109  reinterpret_cast<CUdeviceptr>(src_ptr),
110  num_bytes));
111  } else {
112  checkError(cuMemcpyPeer(reinterpret_cast<CUdeviceptr>(dest_ptr),
113  device_contexts_[dest_device_num],
114  reinterpret_cast<CUdeviceptr>(src_ptr),
115  device_contexts_[src_device_num],
116  num_bytes)); // will we always have peer?
117  }
118 }
void setContext(const int device_num) const
Definition: CudaMgr.cpp:311
std::vector< CUcontext > device_contexts_
Definition: CudaMgr.h:183

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void CudaMgr_Namespace::CudaMgr::copyDeviceToHost ( int8_t *  host_ptr,
const int8_t *  device_ptr,
const size_t  num_bytes,
const int  device_num 
)

Definition at line 90 of file CudaMgr.cpp.

References setContext().

Referenced by Buffer_Namespace::GpuCudaBuffer::readData(), and Buffer_Namespace::CpuBuffer::writeData().

93  {
94  setContext(device_num);
95  checkError(
96  cuMemcpyDtoH(host_ptr, reinterpret_cast<const CUdeviceptr>(device_ptr), num_bytes));
97 }
void setContext(const int device_num) const
Definition: CudaMgr.cpp:311

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void CudaMgr_Namespace::CudaMgr::copyHostToDevice ( int8_t *  device_ptr,
const int8_t *  host_ptr,
const size_t  num_bytes,
const int  device_num 
)

Definition at line 81 of file CudaMgr.cpp.

References setContext().

Referenced by Buffer_Namespace::CpuBuffer::readData(), and Buffer_Namespace::GpuCudaBuffer::writeData().

84  {
85  setContext(device_num);
86  checkError(
87  cuMemcpyHtoD(reinterpret_cast<CUdeviceptr>(device_ptr), host_ptr, num_bytes));
88 }
void setContext(const int device_num) const
Definition: CudaMgr.cpp:311

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void CudaMgr_Namespace::CudaMgr::freeDeviceMem ( int8_t *  device_ptr)

Definition at line 228 of file CudaMgr.cpp.

References device_cleanup_mutex_.

Referenced by Buffer_Namespace::GpuCudaBufferMgr::freeAllMem().

228  {
229  std::lock_guard<std::mutex> gpu_lock(device_cleanup_mutex_);
230 
231  checkError(cuMemFree(reinterpret_cast<CUdeviceptr>(device_ptr)));
232 }
std::mutex device_cleanup_mutex_
Definition: CudaMgr.h:185

+ Here is the caller graph for this function:

void CudaMgr_Namespace::CudaMgr::freePinnedHostMem ( int8_t *  host_ptr)

Definition at line 224 of file CudaMgr.cpp.

224  {
225  checkError(cuMemFreeHost(reinterpret_cast<void*>(host_ptr)));
226 }
const std::vector<DeviceProperties>& CudaMgr_Namespace::CudaMgr::getAllDeviceProperties ( ) const
inline

Definition at line 123 of file CudaMgr.h.

References device_properties_.

123  {
124  return device_properties_;
125  }
std::vector< DeviceProperties > device_properties_
Definition: CudaMgr.h:181
int CudaMgr_Namespace::CudaMgr::getDeviceCount ( ) const
inline

Definition at line 93 of file CudaMgr.h.

References device_count_.

Referenced by get_available_gpus(), isArchMaxwell(), isArchMaxwellOrLater(), isArchPascal(), and isArchPascalOrLater().

93 { return device_count_; }

+ Here is the caller graph for this function:

const omnisci::DeviceGroup& CudaMgr_Namespace::CudaMgr::getDeviceGroup ( ) const
inline

Definition at line 95 of file CudaMgr.h.

References device_group_.

95 { return device_group_; }
omnisci::DeviceGroup device_group_
Definition: CudaMgr.h:182
const DeviceProperties* CudaMgr_Namespace::CudaMgr::getDeviceProperties ( const size_t  device_num) const
inline

Definition at line 126 of file CudaMgr.h.

References device_properties_, and to_string().

126  {
127  // device_num is the device number relative to start_gpu_ (real_device_num -
128  // start_gpu_)
129  if (device_num < device_properties_.size()) {
130  return &device_properties_[device_num];
131  }
132  throw std::runtime_error("Specified device number " + std::to_string(device_num) +
133  " is out of range of number of devices (" +
134  std::to_string(device_properties_.size()) + ")");
135  }
std::string to_string(char const *&&v)
std::vector< DeviceProperties > device_properties_
Definition: CudaMgr.h:181

+ Here is the call graph for this function:

size_t CudaMgr_Namespace::CudaMgr::getMaxSharedMemoryForAll ( ) const
inline

Definition at line 121 of file CudaMgr.h.

References max_shared_memory_for_all_.

121 { return max_shared_memory_for_all_; }
size_t max_shared_memory_for_all_
Definition: CudaMgr.h:180
int CudaMgr_Namespace::CudaMgr::getStartGpu ( ) const
inline

Definition at line 94 of file CudaMgr.h.

References start_gpu_.

94 { return start_gpu_; }
bool CudaMgr_Namespace::CudaMgr::isArchMaxwell ( ) const
inline

Definition at line 136 of file CudaMgr.h.

References device_properties_, and getDeviceCount().

136  {
137  return (getDeviceCount() > 0 && device_properties_[0].computeMajor == 5);
138  }
int getDeviceCount() const
Definition: CudaMgr.h:93
std::vector< DeviceProperties > device_properties_
Definition: CudaMgr.h:181

+ Here is the call graph for this function:

bool CudaMgr_Namespace::CudaMgr::isArchMaxwellOrLater ( ) const
inline

Definition at line 139 of file CudaMgr.h.

References device_properties_, and getDeviceCount().

139  {
140  return (getDeviceCount() > 0 && device_properties_[0].computeMajor >= 5);
141  }
int getDeviceCount() const
Definition: CudaMgr.h:93
std::vector< DeviceProperties > device_properties_
Definition: CudaMgr.h:181

+ Here is the call graph for this function:

bool CudaMgr_Namespace::CudaMgr::isArchMaxwellOrLaterForAll ( ) const

Returns true if all devices have Maxwell micro-architecture, or later. Returns false, if there is any device with compute capability of < 5.0

Definition at line 252 of file CudaMgr.cpp.

References device_count_, and device_properties_.

252  {
253  for (int i = 0; i < device_count_; i++) {
254  if (device_properties_[i].computeMajor < 5) {
255  return false;
256  }
257  }
258  return true;
259 }
std::vector< DeviceProperties > device_properties_
Definition: CudaMgr.h:181
bool CudaMgr_Namespace::CudaMgr::isArchPascal ( ) const
inline

Definition at line 142 of file CudaMgr.h.

References device_properties_, and getDeviceCount().

142  {
143  return (getDeviceCount() > 0 && device_properties_[0].computeMajor == 6);
144  }
int getDeviceCount() const
Definition: CudaMgr.h:93
std::vector< DeviceProperties > device_properties_
Definition: CudaMgr.h:181

+ Here is the call graph for this function:

bool CudaMgr_Namespace::CudaMgr::isArchPascalOrLater ( ) const
inline

Definition at line 145 of file CudaMgr.h.

References device_properties_, and getDeviceCount().

145  {
146  return (getDeviceCount() > 0 && device_properties_[0].computeMajor >= 6);
147  }
int getDeviceCount() const
Definition: CudaMgr.h:93
std::vector< DeviceProperties > device_properties_
Definition: CudaMgr.h:181

+ Here is the call graph for this function:

bool CudaMgr_Namespace::CudaMgr::isArchVoltaForAll ( ) const

Returns true if all devices have Volta micro-architecture Returns false, if there is any non-Volta device available.

Definition at line 265 of file CudaMgr.cpp.

References device_count_, and device_properties_.

265  {
266  for (int i = 0; i < device_count_; i++) {
267  if (device_properties_[i].computeMajor != 7) {
268  return false;
269  }
270  }
271  return true;
272 }
std::vector< DeviceProperties > device_properties_
Definition: CudaMgr.h:181
void CudaMgr_Namespace::CudaMgr::setContext ( const int  device_num) const

Definition at line 311 of file CudaMgr.cpp.

References CHECK_LT, and device_contexts_.

Referenced by allocateDeviceMem(), allocatePinnedHostMem(), copyDeviceToDevice(), copyDeviceToHost(), copyHostToDevice(), setDeviceMem(), and synchronizeDevices().

311  {
312  // deviceNum is the device number relative to startGpu (realDeviceNum - startGpu_)
313  CHECK_LT(device_num, device_count_);
314  cuCtxSetCurrent(device_contexts_[device_num]);
315 }
std::vector< CUcontext > device_contexts_
Definition: CudaMgr.h:183
#define CHECK_LT(x, y)
Definition: Logger.h:200

+ Here is the caller graph for this function:

void CudaMgr_Namespace::CudaMgr::setDeviceMem ( int8_t *  device_ptr,
const unsigned char  uc,
const size_t  num_bytes,
const int  device_num 
)

Definition at line 240 of file CudaMgr.cpp.

References setContext().

Referenced by zeroDeviceMem().

243  {
244  setContext(device_num);
245  checkError(cuMemsetD8(reinterpret_cast<CUdeviceptr>(device_ptr), uc, num_bytes));
246 }
void setContext(const int device_num) const
Definition: CudaMgr.cpp:311

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void CudaMgr_Namespace::CudaMgr::synchronizeDevices ( ) const

Definition at line 74 of file CudaMgr.cpp.

References device_count_, and setContext().

Referenced by ~CudaMgr(), and Buffer_Namespace::GpuCudaBufferMgr::~GpuCudaBufferMgr().

74  {
75  for (int d = 0; d < device_count_; ++d) {
76  setContext(d);
77  checkError(cuCtxSynchronize());
78  }
79 }
void setContext(const int device_num) const
Definition: CudaMgr.cpp:311

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void CudaMgr_Namespace::CudaMgr::zeroDeviceMem ( int8_t *  device_ptr,
const size_t  num_bytes,
const int  device_num 
)

Definition at line 234 of file CudaMgr.cpp.

References setDeviceMem().

236  {
237  setDeviceMem(device_ptr, 0, num_bytes, device_num);
238 }
void setDeviceMem(int8_t *device_ptr, const unsigned char uc, const size_t num_bytes, const int device_num)
Definition: CudaMgr.cpp:240

+ Here is the call graph for this function:

Member Data Documentation

std::mutex CudaMgr_Namespace::CudaMgr::device_cleanup_mutex_
mutableprivate

Definition at line 185 of file CudaMgr.h.

Referenced by freeDeviceMem(), and ~CudaMgr().

std::vector<CUcontext> CudaMgr_Namespace::CudaMgr::device_contexts_
private

Definition at line 183 of file CudaMgr.h.

Referenced by copyDeviceToDevice(), setContext(), and ~CudaMgr().

int CudaMgr_Namespace::CudaMgr::device_count_
private
omnisci::DeviceGroup CudaMgr_Namespace::CudaMgr::device_group_
private

Definition at line 182 of file CudaMgr.h.

Referenced by getDeviceGroup().

std::vector<DeviceProperties> CudaMgr_Namespace::CudaMgr::device_properties_
private
int CudaMgr_Namespace::CudaMgr::gpu_driver_version_
private

Definition at line 178 of file CudaMgr.h.

size_t CudaMgr_Namespace::CudaMgr::max_shared_memory_for_all_
private

Definition at line 180 of file CudaMgr.h.

Referenced by getMaxSharedMemoryForAll().

int CudaMgr_Namespace::CudaMgr::start_gpu_
private

Definition at line 179 of file CudaMgr.h.

Referenced by CudaMgr(), and getStartGpu().


The documentation for this class was generated from the following files: