OmniSciDB  29e35f4d58
CudaMgr_Namespace::CudaMgr Class Reference

#include <CudaMgr.h>

Public Member Functions

 CudaMgr (const int num_gpus, const int start_gpu=0)
 
 ~CudaMgr ()
 
void synchronizeDevices () const
 
int getDeviceCount () const
 
int getStartGpu () const
 
const omnisci::DeviceGroupgetDeviceGroup () const
 
void copyHostToDevice (int8_t *device_ptr, const int8_t *host_ptr, const size_t num_bytes, const int device_num)
 
void copyDeviceToHost (int8_t *host_ptr, const int8_t *device_ptr, const size_t num_bytes, const int device_num)
 
void copyDeviceToDevice (int8_t *dest_ptr, int8_t *src_ptr, const size_t num_bytes, const int dest_device_num, const int src_device_num)
 
int8_t * allocatePinnedHostMem (const size_t num_bytes)
 
int8_t * allocateDeviceMem (const size_t num_bytes, const int device_num)
 
void freePinnedHostMem (int8_t *host_ptr)
 
void freeDeviceMem (int8_t *device_ptr)
 
void zeroDeviceMem (int8_t *device_ptr, const size_t num_bytes, const int device_num)
 
void setDeviceMem (int8_t *device_ptr, const unsigned char uc, const size_t num_bytes, const int device_num)
 
size_t getMaxSharedMemoryForAll () const
 
const std::vector< DeviceProperties > & getAllDeviceProperties () const
 
const DevicePropertiesgetDeviceProperties (const size_t device_num) const
 
bool isArchMaxwell () const
 
bool isArchMaxwellOrLater () const
 
bool isArchPascal () const
 
bool isArchPascalOrLater () const
 
bool isArchMaxwellOrLaterForAll () const
 
bool isArchVoltaForAll () const
 
void setContext (const int device_num) const
 

Private Attributes

int device_count_
 
int gpu_driver_version_
 
int start_gpu_
 
size_t max_shared_memory_for_all_
 
std::vector< DevicePropertiesdevice_properties_
 
omnisci::DeviceGroup device_group_
 
std::vector< CUcontextdevice_contexts_
 
std::mutex device_cleanup_mutex_
 

Detailed Description

Definition at line 82 of file CudaMgr.h.

Constructor & Destructor Documentation

◆ CudaMgr()

CudaMgr_Namespace::CudaMgr::CudaMgr ( const int  num_gpus,
const int  start_gpu = 0 
)

Definition at line 35 of file CudaMgr.cpp.

References CHECK_EQ, CHECK_LE, device_count_, device_group_, device_properties_, and start_gpu_.

36  : start_gpu_(start_gpu), max_shared_memory_for_all_(0) {
37  checkError(cuInit(0));
38  checkError(cuDeviceGetCount(&device_count_));
39 
40  if (num_gpus > 0) { // numGpus <= 0 will just use number of gpus found
41  CHECK_LE(num_gpus + start_gpu_, device_count_);
42  device_count_ = std::min(device_count_, num_gpus);
43  } else {
44  // if we are using all gpus we cannot start on a gpu other than 0
45  CHECK_EQ(start_gpu_, 0);
46  }
47  fillDeviceProperties();
48  initDeviceGroup();
49  createDeviceContexts();
50  printDeviceProperties();
51 }
#define CHECK_EQ(x, y)
Definition: Logger.h:201
#define CHECK_LE(x, y)
Definition: Logger.h:204
size_t max_shared_memory_for_all_
Definition: CudaMgr.h:175

◆ ~CudaMgr()

CudaMgr_Namespace::CudaMgr::~CudaMgr ( )

Definition at line 60 of file CudaMgr.cpp.

References device_cleanup_mutex_, device_contexts_, device_count_, logger::ERROR, LOG, and synchronizeDevices().

60  {
61  try {
62  // We don't want to remove the cudaMgr before all other processes have cleaned up.
63  // This should be enforced by the lifetime policies, but take this lock to be safe.
64  std::lock_guard<std::mutex> gpu_lock(device_cleanup_mutex_);
65 
67  for (int d = 0; d < device_count_; ++d) {
68  checkError(cuCtxDestroy(device_contexts_[d]));
69  }
70  } catch (const CudaErrorException& e) {
71  if (e.getStatus() == CUDA_ERROR_DEINITIALIZED) {
72  // TODO(adb / asuhan): Verify cuModuleUnload removes the context
73  return;
74  }
75  LOG(ERROR) << "CUDA Error: " << e.what();
76  } catch (const std::runtime_error& e) {
77  LOG(ERROR) << "CUDA Error: " << e.what();
78  }
79 }
std::mutex device_cleanup_mutex_
Definition: CudaMgr.h:180
#define LOG(tag)
Definition: Logger.h:188
std::vector< CUcontext > device_contexts_
Definition: CudaMgr.h:178
void synchronizeDevices() const
Definition: CudaMgr.cpp:81
+ Here is the call graph for this function:

Member Function Documentation

◆ allocateDeviceMem()

int8_t * CudaMgr_Namespace::CudaMgr::allocateDeviceMem ( const size_t  num_bytes,
const int  device_num 
)

Definition at line 224 of file CudaMgr.cpp.

References setContext().

Referenced by Buffer_Namespace::GpuCudaBufferMgr::addSlab().

224  {
225  setContext(device_num);
226  CUdeviceptr device_ptr;
227  checkError(cuMemAlloc(&device_ptr, num_bytes));
228  return reinterpret_cast<int8_t*>(device_ptr);
229 }
unsigned long long CUdeviceptr
Definition: nocuda.h:27
void setContext(const int device_num) const
Definition: CudaMgr.cpp:322
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ allocatePinnedHostMem()

int8_t * CudaMgr_Namespace::CudaMgr::allocatePinnedHostMem ( const size_t  num_bytes)

Definition at line 217 of file CudaMgr.cpp.

References setContext().

217  {
218  setContext(0);
219  void* host_ptr;
220  checkError(cuMemHostAlloc(&host_ptr, num_bytes, CU_MEMHOSTALLOC_PORTABLE));
221  return reinterpret_cast<int8_t*>(host_ptr);
222 }
void setContext(const int device_num) const
Definition: CudaMgr.cpp:322
+ Here is the call graph for this function:

◆ copyDeviceToDevice()

void CudaMgr_Namespace::CudaMgr::copyDeviceToDevice ( int8_t *  dest_ptr,
int8_t *  src_ptr,
const size_t  num_bytes,
const int  dest_device_num,
const int  src_device_num 
)

Definition at line 106 of file CudaMgr.cpp.

References CHECK, device_cleanup_mutex_, device_contexts_, device_count_, device_properties_, logger::ERROR, gpu_driver_version_, LOG, max_shared_memory_for_all_, setContext(), and start_gpu_.

Referenced by Buffer_Namespace::GpuCudaBuffer::readData(), and Buffer_Namespace::GpuCudaBuffer::writeData().

110  {
111  // dest_device_num and src_device_num are the device numbers relative to start_gpu_
112  // (real_device_num - start_gpu_)
113  if (src_device_num == dest_device_num) {
114  setContext(src_device_num);
115  checkError(cuMemcpy(reinterpret_cast<CUdeviceptr>(dest_ptr),
116  reinterpret_cast<CUdeviceptr>(src_ptr),
117  num_bytes));
118  } else {
119  checkError(cuMemcpyPeer(reinterpret_cast<CUdeviceptr>(dest_ptr),
120  device_contexts_[dest_device_num],
121  reinterpret_cast<CUdeviceptr>(src_ptr),
122  device_contexts_[src_device_num],
123  num_bytes)); // will we always have peer?
124  }
125 }
std::vector< CUcontext > device_contexts_
Definition: CudaMgr.h:178
void setContext(const int device_num) const
Definition: CudaMgr.cpp:322
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ copyDeviceToHost()

void CudaMgr_Namespace::CudaMgr::copyDeviceToHost ( int8_t *  host_ptr,
const int8_t *  device_ptr,
const size_t  num_bytes,
const int  device_num 
)

Definition at line 97 of file CudaMgr.cpp.

References setContext().

Referenced by Buffer_Namespace::GpuCudaBuffer::readData(), and Buffer_Namespace::CpuBuffer::writeData().

100  {
101  setContext(device_num);
102  checkError(
103  cuMemcpyDtoH(host_ptr, reinterpret_cast<const CUdeviceptr>(device_ptr), num_bytes));
104 }
void setContext(const int device_num) const
Definition: CudaMgr.cpp:322
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ copyHostToDevice()

void CudaMgr_Namespace::CudaMgr::copyHostToDevice ( int8_t *  device_ptr,
const int8_t *  host_ptr,
const size_t  num_bytes,
const int  device_num 
)

Definition at line 88 of file CudaMgr.cpp.

References setContext().

Referenced by Buffer_Namespace::CpuBuffer::readData(), and Buffer_Namespace::GpuCudaBuffer::writeData().

91  {
92  setContext(device_num);
93  checkError(
94  cuMemcpyHtoD(reinterpret_cast<CUdeviceptr>(device_ptr), host_ptr, num_bytes));
95 }
void setContext(const int device_num) const
Definition: CudaMgr.cpp:322
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ freeDeviceMem()

void CudaMgr_Namespace::CudaMgr::freeDeviceMem ( int8_t *  device_ptr)

Definition at line 235 of file CudaMgr.cpp.

References device_cleanup_mutex_.

Referenced by Buffer_Namespace::GpuCudaBufferMgr::freeAllMem().

235  {
236  std::lock_guard<std::mutex> gpu_lock(device_cleanup_mutex_);
237 
238  checkError(cuMemFree(reinterpret_cast<CUdeviceptr>(device_ptr)));
239 }
std::mutex device_cleanup_mutex_
Definition: CudaMgr.h:180
+ Here is the caller graph for this function:

◆ freePinnedHostMem()

void CudaMgr_Namespace::CudaMgr::freePinnedHostMem ( int8_t *  host_ptr)

Definition at line 231 of file CudaMgr.cpp.

231  {
232  checkError(cuMemFreeHost(reinterpret_cast<void*>(host_ptr)));
233 }

◆ getAllDeviceProperties()

const std::vector<DeviceProperties>& CudaMgr_Namespace::CudaMgr::getAllDeviceProperties ( ) const
inline

Definition at line 118 of file CudaMgr.h.

118  {
119  return device_properties_;
120  }
std::vector< DeviceProperties > device_properties_
Definition: CudaMgr.h:176

◆ getDeviceCount()

int CudaMgr_Namespace::CudaMgr::getDeviceCount ( ) const
inline

Definition at line 88 of file CudaMgr.h.

Referenced by CodeGenerator::generateNativeGPUCode(), and get_available_gpus().

88 { return device_count_; }
+ Here is the caller graph for this function:

◆ getDeviceGroup()

const omnisci::DeviceGroup& CudaMgr_Namespace::CudaMgr::getDeviceGroup ( ) const
inline

Definition at line 90 of file CudaMgr.h.

90 { return device_group_; }
omnisci::DeviceGroup device_group_
Definition: CudaMgr.h:177

◆ getDeviceProperties()

const DeviceProperties* CudaMgr_Namespace::CudaMgr::getDeviceProperties ( const size_t  device_num) const
inline

Definition at line 121 of file CudaMgr.h.

References to_string().

121  {
122  // device_num is the device number relative to start_gpu_ (real_device_num -
123  // start_gpu_)
124  if (device_num < device_properties_.size()) {
125  return &device_properties_[device_num];
126  }
127  throw std::runtime_error("Specified device number " + std::to_string(device_num) +
128  " is out of range of number of devices (" +
129  std::to_string(device_properties_.size()) + ")");
130  }
std::string to_string(char const *&&v)
std::vector< DeviceProperties > device_properties_
Definition: CudaMgr.h:176
+ Here is the call graph for this function:

◆ getMaxSharedMemoryForAll()

size_t CudaMgr_Namespace::CudaMgr::getMaxSharedMemoryForAll ( ) const
inline

Definition at line 116 of file CudaMgr.h.

116 { return max_shared_memory_for_all_; }
size_t max_shared_memory_for_all_
Definition: CudaMgr.h:175

◆ getStartGpu()

int CudaMgr_Namespace::CudaMgr::getStartGpu ( ) const
inline

Definition at line 89 of file CudaMgr.h.

89 { return start_gpu_; }

◆ isArchMaxwell()

bool CudaMgr_Namespace::CudaMgr::isArchMaxwell ( ) const
inline

Definition at line 131 of file CudaMgr.h.

131  {
132  return (getDeviceCount() > 0 && device_properties_[0].computeMajor == 5);
133  }
int getDeviceCount() const
Definition: CudaMgr.h:88
std::vector< DeviceProperties > device_properties_
Definition: CudaMgr.h:176

◆ isArchMaxwellOrLater()

bool CudaMgr_Namespace::CudaMgr::isArchMaxwellOrLater ( ) const
inline

Definition at line 134 of file CudaMgr.h.

134  {
135  return (getDeviceCount() > 0 && device_properties_[0].computeMajor >= 5);
136  }
int getDeviceCount() const
Definition: CudaMgr.h:88
std::vector< DeviceProperties > device_properties_
Definition: CudaMgr.h:176

◆ isArchMaxwellOrLaterForAll()

bool CudaMgr_Namespace::CudaMgr::isArchMaxwellOrLaterForAll ( ) const

Returns true if all devices have Maxwell micro-architecture, or later. Returns false, if there is any device with compute capability of < 5.0

Definition at line 259 of file CudaMgr.cpp.

References device_count_, and device_properties_.

259  {
260  for (int i = 0; i < device_count_; i++) {
261  if (device_properties_[i].computeMajor < 5) {
262  return false;
263  }
264  }
265  return true;
266 }
std::vector< DeviceProperties > device_properties_
Definition: CudaMgr.h:176

◆ isArchPascal()

bool CudaMgr_Namespace::CudaMgr::isArchPascal ( ) const
inline

Definition at line 137 of file CudaMgr.h.

137  {
138  return (getDeviceCount() > 0 && device_properties_[0].computeMajor == 6);
139  }
int getDeviceCount() const
Definition: CudaMgr.h:88
std::vector< DeviceProperties > device_properties_
Definition: CudaMgr.h:176

◆ isArchPascalOrLater()

bool CudaMgr_Namespace::CudaMgr::isArchPascalOrLater ( ) const
inline

Definition at line 140 of file CudaMgr.h.

140  {
141  return (getDeviceCount() > 0 && device_properties_[0].computeMajor >= 6);
142  }
int getDeviceCount() const
Definition: CudaMgr.h:88
std::vector< DeviceProperties > device_properties_
Definition: CudaMgr.h:176

◆ isArchVoltaForAll()

bool CudaMgr_Namespace::CudaMgr::isArchVoltaForAll ( ) const

Returns true if all devices have Volta micro-architecture Returns false, if there is any non-Volta device available.

Definition at line 272 of file CudaMgr.cpp.

References CHECK_EQ, device_contexts_, device_count_, device_properties_, logger::ERROR, CudaMgr_Namespace::errorMessage(), and LOG.

272  {
273  for (int i = 0; i < device_count_; i++) {
274  if (device_properties_[i].computeMajor != 7) {
275  return false;
276  }
277  }
278  return true;
279 }
std::vector< DeviceProperties > device_properties_
Definition: CudaMgr.h:176
+ Here is the call graph for this function:

◆ setContext()

void CudaMgr_Namespace::CudaMgr::setContext ( const int  device_num) const

Definition at line 322 of file CudaMgr.cpp.

References CHECK_LT, device_contexts_, device_count_, device_properties_, logger::INFO, LOG, and VLOG.

Referenced by allocateDeviceMem(), allocatePinnedHostMem(), copyDeviceToDevice(), copyDeviceToHost(), copyHostToDevice(), setDeviceMem(), and synchronizeDevices().

322  {
323  // deviceNum is the device number relative to startGpu (realDeviceNum - startGpu_)
324  CHECK_LT(device_num, device_count_);
325  cuCtxSetCurrent(device_contexts_[device_num]);
326 }
std::vector< CUcontext > device_contexts_
Definition: CudaMgr.h:178
#define CHECK_LT(x, y)
Definition: Logger.h:203
+ Here is the caller graph for this function:

◆ setDeviceMem()

void CudaMgr_Namespace::CudaMgr::setDeviceMem ( int8_t *  device_ptr,
const unsigned char  uc,
const size_t  num_bytes,
const int  device_num 
)

Definition at line 247 of file CudaMgr.cpp.

References setContext().

Referenced by zeroDeviceMem().

250  {
251  setContext(device_num);
252  checkError(cuMemsetD8(reinterpret_cast<CUdeviceptr>(device_ptr), uc, num_bytes));
253 }
void setContext(const int device_num) const
Definition: CudaMgr.cpp:322
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ synchronizeDevices()

void CudaMgr_Namespace::CudaMgr::synchronizeDevices ( ) const

Definition at line 81 of file CudaMgr.cpp.

References device_count_, and setContext().

Referenced by ~CudaMgr(), and Buffer_Namespace::GpuCudaBufferMgr::~GpuCudaBufferMgr().

81  {
82  for (int d = 0; d < device_count_; ++d) {
83  setContext(d);
84  checkError(cuCtxSynchronize());
85  }
86 }
void setContext(const int device_num) const
Definition: CudaMgr.cpp:322
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ zeroDeviceMem()

void CudaMgr_Namespace::CudaMgr::zeroDeviceMem ( int8_t *  device_ptr,
const size_t  num_bytes,
const int  device_num 
)

Definition at line 241 of file CudaMgr.cpp.

References setDeviceMem().

Referenced by ResultSet::ResultSet().

243  {
244  setDeviceMem(device_ptr, 0, num_bytes, device_num);
245 }
void setDeviceMem(int8_t *device_ptr, const unsigned char uc, const size_t num_bytes, const int device_num)
Definition: CudaMgr.cpp:247
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

Member Data Documentation

◆ device_cleanup_mutex_

std::mutex CudaMgr_Namespace::CudaMgr::device_cleanup_mutex_
mutableprivate

Definition at line 180 of file CudaMgr.h.

Referenced by copyDeviceToDevice(), freeDeviceMem(), and ~CudaMgr().

◆ device_contexts_

std::vector<CUcontext> CudaMgr_Namespace::CudaMgr::device_contexts_
private

Definition at line 178 of file CudaMgr.h.

Referenced by copyDeviceToDevice(), isArchVoltaForAll(), setContext(), and ~CudaMgr().

◆ device_count_

int CudaMgr_Namespace::CudaMgr::device_count_
private

◆ device_group_

omnisci::DeviceGroup CudaMgr_Namespace::CudaMgr::device_group_
private

Definition at line 177 of file CudaMgr.h.

Referenced by CudaMgr().

◆ device_properties_

std::vector<DeviceProperties> CudaMgr_Namespace::CudaMgr::device_properties_
private

◆ gpu_driver_version_

int CudaMgr_Namespace::CudaMgr::gpu_driver_version_
private

Definition at line 173 of file CudaMgr.h.

Referenced by copyDeviceToDevice().

◆ max_shared_memory_for_all_

size_t CudaMgr_Namespace::CudaMgr::max_shared_memory_for_all_
private

Definition at line 175 of file CudaMgr.h.

Referenced by copyDeviceToDevice().

◆ start_gpu_

int CudaMgr_Namespace::CudaMgr::start_gpu_
private

Definition at line 174 of file CudaMgr.h.

Referenced by copyDeviceToDevice(), and CudaMgr().


The documentation for this class was generated from the following files: