OmniSciDB  c07336695a
CudaMgr_Namespace::CudaMgr Class Reference

#include <CudaMgr.h>

Public Member Functions

 CudaMgr (const int num_gpus, const int start_gpu=0)
 
 ~CudaMgr ()
 
void synchronizeDevices () const
 
int getDeviceCount () const
 
void copyHostToDevice (int8_t *device_ptr, const int8_t *host_ptr, const size_t num_bytes, const int device_num)
 
void copyDeviceToHost (int8_t *host_ptr, const int8_t *device_ptr, const size_t num_bytes, const int device_num)
 
void copyDeviceToDevice (int8_t *dest_ptr, int8_t *src_ptr, const size_t num_bytes, const int dest_device_num, const int src_device_num)
 
int8_t * allocatePinnedHostMem (const size_t num_bytes)
 
int8_t * allocateDeviceMem (const size_t num_bytes, const int device_num)
 
void freePinnedHostMem (int8_t *host_ptr)
 
void freeDeviceMem (int8_t *device_ptr)
 
void zeroDeviceMem (int8_t *device_ptr, const size_t num_bytes, const int device_num)
 
void setDeviceMem (int8_t *device_ptr, const unsigned char uc, const size_t num_bytes, const int device_num)
 
int getStartGpu () const
 
size_t getMaxSharedMemoryForAll () const
 
const std::vector< DeviceProperties > & getAllDeviceProperties () const
 
const DevicePropertiesgetDeviceProperties (const size_t device_num) const
 
bool isArchMaxwell () const
 
bool isArchMaxwellOrLater () const
 
bool isArchPascal () const
 
bool isArchPascalOrLater () const
 
bool isArchMaxwellOrLaterForAll () const
 
bool isArchVoltaForAll () const
 
void setContext (const int device_num) const
 

Private Attributes

int device_count_
 
int gpu_driver_version_
 
int start_gpu_
 
size_t max_shared_memory_for_all_
 
std::vector< DevicePropertiesdevice_properties_
 
std::vector< CUcontextdevice_contexts_
 
std::mutex device_cleanup_mutex_
 

Detailed Description

Definition at line 75 of file CudaMgr.h.

Constructor & Destructor Documentation

◆ CudaMgr()

CudaMgr_Namespace::CudaMgr::CudaMgr ( const int  num_gpus,
const int  start_gpu = 0 
)

Definition at line 26 of file CudaMgr.cpp.

References CHECK_EQ, CHECK_LE, device_count_, and start_gpu_.

27  : start_gpu_(start_gpu), max_shared_memory_for_all_(0) {
28  checkError(cuInit(0));
29  checkError(cuDeviceGetCount(&device_count_));
30 
31  if (num_gpus > 0) { // numGpus <= 0 will just use number of gpus found
32  CHECK_LE(num_gpus + start_gpu_, device_count_);
33  device_count_ = std::min(device_count_, num_gpus);
34  } else {
35  // if we are using all gpus we cannot start on a gpu other than 0
36  CHECK_EQ(start_gpu_, 0);
37  }
38  fillDeviceProperties();
39  createDeviceContexts();
40  printDeviceProperties();
41 }
#define CHECK_EQ(x, y)
Definition: Logger.h:195
#define CHECK_LE(x, y)
Definition: Logger.h:198
size_t max_shared_memory_for_all_
Definition: CudaMgr.h:166

◆ ~CudaMgr()

CudaMgr_Namespace::CudaMgr::~CudaMgr ( )

Definition at line 43 of file CudaMgr.cpp.

References anonymous_namespace{ImportTest.cpp}::d(), device_cleanup_mutex_, device_contexts_, device_count_, logger::ERROR, LOG, and synchronizeDevices().

43  {
44  try {
45  // We don't want to remove the cudaMgr before all other processes have cleaned up.
46  // This should be enforced by the lifetime policies, but take this lock to be safe.
47  std::lock_guard<std::mutex> gpu_lock(device_cleanup_mutex_);
48 
50  for (int d = 0; d < device_count_; ++d) {
51  checkError(cuCtxDestroy(device_contexts_[d]));
52  }
53  } catch (const CudaErrorException& e) {
54  if (e.getStatus() == CUDA_ERROR_DEINITIALIZED) {
55  // TODO(adb / asuhan): Verify cuModuleUnload removes the context
56  return;
57  }
58  LOG(ERROR) << "CUDA Error: " << e.what();
59  } catch (const std::runtime_error& e) {
60  LOG(ERROR) << "CUDA Error: " << e.what();
61  }
62 }
void d(const SQLTypes expected_type, const std::string &str)
Definition: ImportTest.cpp:268
std::mutex device_cleanup_mutex_
Definition: CudaMgr.h:170
#define LOG(tag)
Definition: Logger.h:182
std::vector< CUcontext > device_contexts_
Definition: CudaMgr.h:168
void synchronizeDevices() const
Definition: CudaMgr.cpp:64
+ Here is the call graph for this function:

Member Function Documentation

◆ allocateDeviceMem()

int8_t * CudaMgr_Namespace::CudaMgr::allocateDeviceMem ( const size_t  num_bytes,
const int  device_num 
)

Definition at line 204 of file CudaMgr.cpp.

References setContext().

Referenced by Buffer_Namespace::GpuCudaBufferMgr::addSlab(), and TEST().

204  {
205  setContext(device_num);
206  CUdeviceptr device_ptr;
207  checkError(cuMemAlloc(&device_ptr, num_bytes));
208  return reinterpret_cast<int8_t*>(device_ptr);
209 }
unsigned long long CUdeviceptr
Definition: nocuda.h:27
void setContext(const int device_num) const
Definition: CudaMgr.cpp:298
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ allocatePinnedHostMem()

int8_t * CudaMgr_Namespace::CudaMgr::allocatePinnedHostMem ( const size_t  num_bytes)

Definition at line 197 of file CudaMgr.cpp.

References setContext().

197  {
198  setContext(0);
199  void* host_ptr;
200  checkError(cuMemHostAlloc(&host_ptr, num_bytes, CU_MEMHOSTALLOC_PORTABLE));
201  return reinterpret_cast<int8_t*>(host_ptr);
202 }
void setContext(const int device_num) const
Definition: CudaMgr.cpp:298
+ Here is the call graph for this function:

◆ copyDeviceToDevice()

void CudaMgr_Namespace::CudaMgr::copyDeviceToDevice ( int8_t *  dest_ptr,
int8_t *  src_ptr,
const size_t  num_bytes,
const int  dest_device_num,
const int  src_device_num 
)

Definition at line 89 of file CudaMgr.cpp.

References CHECK, device_cleanup_mutex_, device_contexts_, device_count_, device_properties_, logger::ERROR, gpu_driver_version_, LOG, max_shared_memory_for_all_, setContext(), and start_gpu_.

Referenced by Buffer_Namespace::GpuCudaBuffer::readData(), and Buffer_Namespace::GpuCudaBuffer::writeData().

93  {
94  // dest_device_num and src_device_num are the device numbers relative to start_gpu_
95  // (real_device_num - start_gpu_)
96  if (src_device_num == dest_device_num) {
97  setContext(src_device_num);
98  checkError(cuMemcpy(reinterpret_cast<CUdeviceptr>(dest_ptr),
99  reinterpret_cast<CUdeviceptr>(src_ptr),
100  num_bytes));
101  } else {
102  checkError(cuMemcpyPeer(reinterpret_cast<CUdeviceptr>(dest_ptr),
103  device_contexts_[dest_device_num],
104  reinterpret_cast<CUdeviceptr>(src_ptr),
105  device_contexts_[src_device_num],
106  num_bytes)); // will we always have peer?
107  }
108 }
std::vector< CUcontext > device_contexts_
Definition: CudaMgr.h:168
void setContext(const int device_num) const
Definition: CudaMgr.cpp:298
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ copyDeviceToHost()

void CudaMgr_Namespace::CudaMgr::copyDeviceToHost ( int8_t *  host_ptr,
const int8_t *  device_ptr,
const size_t  num_bytes,
const int  device_num 
)

Definition at line 80 of file CudaMgr.cpp.

References setContext().

Referenced by Buffer_Namespace::GpuCudaBuffer::readData(), TEST(), and Buffer_Namespace::CpuBuffer::writeData().

83  {
84  setContext(device_num);
85  checkError(
86  cuMemcpyDtoH(host_ptr, reinterpret_cast<const CUdeviceptr>(device_ptr), num_bytes));
87 }
void setContext(const int device_num) const
Definition: CudaMgr.cpp:298
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ copyHostToDevice()

void CudaMgr_Namespace::CudaMgr::copyHostToDevice ( int8_t *  device_ptr,
const int8_t *  host_ptr,
const size_t  num_bytes,
const int  device_num 
)

Definition at line 71 of file CudaMgr.cpp.

References setContext().

Referenced by Buffer_Namespace::CpuBuffer::readData(), TEST(), and Buffer_Namespace::GpuCudaBuffer::writeData().

74  {
75  setContext(device_num);
76  checkError(
77  cuMemcpyHtoD(reinterpret_cast<CUdeviceptr>(device_ptr), host_ptr, num_bytes));
78 }
void setContext(const int device_num) const
Definition: CudaMgr.cpp:298
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ freeDeviceMem()

void CudaMgr_Namespace::CudaMgr::freeDeviceMem ( int8_t *  device_ptr)

Definition at line 215 of file CudaMgr.cpp.

References device_cleanup_mutex_.

Referenced by Buffer_Namespace::GpuCudaBufferMgr::freeAllMem(), and TEST().

215  {
216  std::lock_guard<std::mutex> gpu_lock(device_cleanup_mutex_);
217 
218  checkError(cuMemFree(reinterpret_cast<CUdeviceptr>(device_ptr)));
219 }
std::mutex device_cleanup_mutex_
Definition: CudaMgr.h:170
+ Here is the caller graph for this function:

◆ freePinnedHostMem()

void CudaMgr_Namespace::CudaMgr::freePinnedHostMem ( int8_t *  host_ptr)

Definition at line 211 of file CudaMgr.cpp.

211  {
212  checkError(cuMemFreeHost(reinterpret_cast<void*>(host_ptr)));
213 }

◆ getAllDeviceProperties()

const std::vector<DeviceProperties>& CudaMgr_Namespace::CudaMgr::getAllDeviceProperties ( ) const
inline

Definition at line 110 of file CudaMgr.h.

110  {
111  return device_properties_;
112  }
std::vector< DeviceProperties > device_properties_
Definition: CudaMgr.h:167

◆ getDeviceCount()

int CudaMgr_Namespace::CudaMgr::getDeviceCount ( ) const
inline

Definition at line 81 of file CudaMgr.h.

Referenced by CodeGenerator::generateNativeGPUCode(), and get_available_gpus().

81 { return device_count_; }
+ Here is the caller graph for this function:

◆ getDeviceProperties()

const DeviceProperties* CudaMgr_Namespace::CudaMgr::getDeviceProperties ( const size_t  device_num) const
inline

Definition at line 113 of file CudaMgr.h.

References to_string().

113  {
114  // device_num is the device number relative to start_gpu_ (real_device_num -
115  // start_gpu_)
116  if (device_num < device_properties_.size()) {
117  return &device_properties_[device_num];
118  }
119  throw std::runtime_error("Specified device number " + std::to_string(device_num) +
120  " is out of range of number of devices (" +
121  std::to_string(device_properties_.size()) + ")");
122  }
std::string to_string(char const *&&v)
std::vector< DeviceProperties > device_properties_
Definition: CudaMgr.h:167
+ Here is the call graph for this function:

◆ getMaxSharedMemoryForAll()

size_t CudaMgr_Namespace::CudaMgr::getMaxSharedMemoryForAll ( ) const
inline

Definition at line 108 of file CudaMgr.h.

108 { return max_shared_memory_for_all_; }
size_t max_shared_memory_for_all_
Definition: CudaMgr.h:166

◆ getStartGpu()

int CudaMgr_Namespace::CudaMgr::getStartGpu ( ) const
inline

Definition at line 107 of file CudaMgr.h.

107 { return start_gpu_; }

◆ isArchMaxwell()

bool CudaMgr_Namespace::CudaMgr::isArchMaxwell ( ) const
inline

Definition at line 123 of file CudaMgr.h.

123  {
124  return (getDeviceCount() > 0 && device_properties_[0].computeMajor == 5);
125  }
int getDeviceCount() const
Definition: CudaMgr.h:81
std::vector< DeviceProperties > device_properties_
Definition: CudaMgr.h:167

◆ isArchMaxwellOrLater()

bool CudaMgr_Namespace::CudaMgr::isArchMaxwellOrLater ( ) const
inline

Definition at line 126 of file CudaMgr.h.

126  {
127  return (getDeviceCount() > 0 && device_properties_[0].computeMajor >= 5);
128  }
int getDeviceCount() const
Definition: CudaMgr.h:81
std::vector< DeviceProperties > device_properties_
Definition: CudaMgr.h:167

◆ isArchMaxwellOrLaterForAll()

bool CudaMgr_Namespace::CudaMgr::isArchMaxwellOrLaterForAll ( ) const

Returns true if all devices have Maxwell micro-architecture, or later. Returns false, if there is any device with compute capability of < 5.0

Definition at line 239 of file CudaMgr.cpp.

References device_count_, and device_properties_.

239  {
240  for (int i = 0; i < device_count_; i++) {
241  if (device_properties_[i].computeMajor < 5) {
242  return false;
243  }
244  }
245  return true;
246 }
std::vector< DeviceProperties > device_properties_
Definition: CudaMgr.h:167

◆ isArchPascal()

bool CudaMgr_Namespace::CudaMgr::isArchPascal ( ) const
inline

Definition at line 129 of file CudaMgr.h.

129  {
130  return (getDeviceCount() > 0 && device_properties_[0].computeMajor == 6);
131  }
int getDeviceCount() const
Definition: CudaMgr.h:81
std::vector< DeviceProperties > device_properties_
Definition: CudaMgr.h:167

◆ isArchPascalOrLater()

bool CudaMgr_Namespace::CudaMgr::isArchPascalOrLater ( ) const
inline

Definition at line 132 of file CudaMgr.h.

132  {
133  return (getDeviceCount() > 0 && device_properties_[0].computeMajor >= 6);
134  }
int getDeviceCount() const
Definition: CudaMgr.h:81
std::vector< DeviceProperties > device_properties_
Definition: CudaMgr.h:167

◆ isArchVoltaForAll()

bool CudaMgr_Namespace::CudaMgr::isArchVoltaForAll ( ) const

Returns true if all devices have Volta micro-architecture Returns false, if there is any non-Volta device available.

Definition at line 252 of file CudaMgr.cpp.

References CHECK_EQ, anonymous_namespace{ImportTest.cpp}::d(), device_contexts_, device_count_, device_properties_, logger::ERROR, and LOG.

252  {
253  for (int i = 0; i < device_count_; i++) {
254  if (device_properties_[i].computeMajor != 7) {
255  return false;
256  }
257  }
258  return true;
259 }
std::vector< DeviceProperties > device_properties_
Definition: CudaMgr.h:167
+ Here is the call graph for this function:

◆ setContext()

void CudaMgr_Namespace::CudaMgr::setContext ( const int  device_num) const

Definition at line 298 of file CudaMgr.cpp.

References CHECK_LT, anonymous_namespace{ImportTest.cpp}::d(), device_contexts_, device_count_, device_properties_, logger::INFO, LOG, and VLOG.

Referenced by allocateDeviceMem(), allocatePinnedHostMem(), copyDeviceToDevice(), copyDeviceToHost(), copyHostToDevice(), setDeviceMem(), and synchronizeDevices().

298  {
299  // deviceNum is the device number relative to startGpu (realDeviceNum - startGpu_)
300  CHECK_LT(device_num, device_count_);
301  cuCtxSetCurrent(device_contexts_[device_num]);
302 }
std::vector< CUcontext > device_contexts_
Definition: CudaMgr.h:168
#define CHECK_LT(x, y)
Definition: Logger.h:197
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ setDeviceMem()

void CudaMgr_Namespace::CudaMgr::setDeviceMem ( int8_t *  device_ptr,
const unsigned char  uc,
const size_t  num_bytes,
const int  device_num 
)

Definition at line 227 of file CudaMgr.cpp.

References setContext().

Referenced by zeroDeviceMem().

230  {
231  setContext(device_num);
232  checkError(cuMemsetD8(reinterpret_cast<CUdeviceptr>(device_ptr), uc, num_bytes));
233 }
void setContext(const int device_num) const
Definition: CudaMgr.cpp:298
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ synchronizeDevices()

void CudaMgr_Namespace::CudaMgr::synchronizeDevices ( ) const

Definition at line 64 of file CudaMgr.cpp.

References anonymous_namespace{ImportTest.cpp}::d(), device_count_, and setContext().

Referenced by ~CudaMgr(), and Buffer_Namespace::GpuCudaBufferMgr::~GpuCudaBufferMgr().

64  {
65  for (int d = 0; d < device_count_; ++d) {
66  setContext(d);
67  checkError(cuCtxSynchronize());
68  }
69 }
void d(const SQLTypes expected_type, const std::string &str)
Definition: ImportTest.cpp:268
void setContext(const int device_num) const
Definition: CudaMgr.cpp:298
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ zeroDeviceMem()

void CudaMgr_Namespace::CudaMgr::zeroDeviceMem ( int8_t *  device_ptr,
const size_t  num_bytes,
const int  device_num 
)

Definition at line 221 of file CudaMgr.cpp.

References setDeviceMem().

Referenced by ResultSet::ResultSet().

223  {
224  setDeviceMem(device_ptr, 0, num_bytes, device_num);
225 }
void setDeviceMem(int8_t *device_ptr, const unsigned char uc, const size_t num_bytes, const int device_num)
Definition: CudaMgr.cpp:227
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

Member Data Documentation

◆ device_cleanup_mutex_

std::mutex CudaMgr_Namespace::CudaMgr::device_cleanup_mutex_
mutableprivate

Definition at line 170 of file CudaMgr.h.

Referenced by copyDeviceToDevice(), freeDeviceMem(), and ~CudaMgr().

◆ device_contexts_

std::vector<CUcontext> CudaMgr_Namespace::CudaMgr::device_contexts_
private

Definition at line 168 of file CudaMgr.h.

Referenced by copyDeviceToDevice(), isArchVoltaForAll(), setContext(), and ~CudaMgr().

◆ device_count_

int CudaMgr_Namespace::CudaMgr::device_count_
private

◆ device_properties_

std::vector<DeviceProperties> CudaMgr_Namespace::CudaMgr::device_properties_
private

◆ gpu_driver_version_

int CudaMgr_Namespace::CudaMgr::gpu_driver_version_
private

Definition at line 164 of file CudaMgr.h.

Referenced by copyDeviceToDevice().

◆ max_shared_memory_for_all_

size_t CudaMgr_Namespace::CudaMgr::max_shared_memory_for_all_
private

Definition at line 166 of file CudaMgr.h.

Referenced by copyDeviceToDevice().

◆ start_gpu_

int CudaMgr_Namespace::CudaMgr::start_gpu_
private

Definition at line 165 of file CudaMgr.h.

Referenced by copyDeviceToDevice(), and CudaMgr().


The documentation for this class was generated from the following files: