OmniSciDB  0fdbebe030
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
CudaMgr_Namespace::CudaMgr Class Reference

#include <CudaMgr.h>

Public Member Functions

 CudaMgr (const int num_gpus, const int start_gpu=0)
 
 ~CudaMgr ()
 
void synchronizeDevices () const
 
int getDeviceCount () const
 
int getStartGpu () const
 
const omnisci::DeviceGroupgetDeviceGroup () const
 
void copyHostToDevice (int8_t *device_ptr, const int8_t *host_ptr, const size_t num_bytes, const int device_num)
 
void copyDeviceToHost (int8_t *host_ptr, const int8_t *device_ptr, const size_t num_bytes, const int device_num)
 
void copyDeviceToDevice (int8_t *dest_ptr, int8_t *src_ptr, const size_t num_bytes, const int dest_device_num, const int src_device_num)
 
int8_t * allocatePinnedHostMem (const size_t num_bytes)
 
int8_t * allocateDeviceMem (const size_t num_bytes, const int device_num)
 
void freePinnedHostMem (int8_t *host_ptr)
 
void freeDeviceMem (int8_t *device_ptr)
 
void zeroDeviceMem (int8_t *device_ptr, const size_t num_bytes, const int device_num)
 
void setDeviceMem (int8_t *device_ptr, const unsigned char uc, const size_t num_bytes, const int device_num)
 
size_t getMaxSharedMemoryForAll () const
 
const std::vector
< DeviceProperties > & 
getAllDeviceProperties () const
 
const DevicePropertiesgetDeviceProperties (const size_t device_num) const
 
bool isArchMaxwell () const
 
bool isArchMaxwellOrLater () const
 
bool isArchPascal () const
 
bool isArchPascalOrLater () const
 
bool isArchMaxwellOrLaterForAll () const
 
bool isArchVoltaForAll () const
 
void setContext (const int device_num) const
 

Private Attributes

int device_count_
 
int start_gpu_
 
size_t max_shared_memory_for_all_
 
std::vector< DevicePropertiesdevice_properties_
 
omnisci::DeviceGroup device_group_
 
std::vector< CUcontextdevice_contexts_
 
std::mutex device_cleanup_mutex_
 

Detailed Description

Definition at line 82 of file CudaMgr.h.

Constructor & Destructor Documentation

CudaMgr_Namespace::CudaMgr::CudaMgr ( const int  num_gpus,
const int  start_gpu = 0 
)

Definition at line 36 of file CudaMgr.cpp.

References CHECK_EQ, CHECK_LE, device_count_, and start_gpu_.

37  : start_gpu_(start_gpu), max_shared_memory_for_all_(0) {
38  checkError(cuInit(0));
39  checkError(cuDeviceGetCount(&device_count_));
40 
41  if (num_gpus > 0) { // numGpus <= 0 will just use number of gpus found
42  CHECK_LE(num_gpus + start_gpu_, device_count_);
43  device_count_ = std::min(device_count_, num_gpus);
44  } else {
45  // if we are using all gpus we cannot start on a gpu other than 0
46  CHECK_EQ(start_gpu_, 0);
47  }
48  fillDeviceProperties();
49  initDeviceGroup();
50  createDeviceContexts();
51  printDeviceProperties();
52 }
#define CHECK_EQ(x, y)
Definition: Logger.h:205
#define CHECK_LE(x, y)
Definition: Logger.h:208
size_t max_shared_memory_for_all_
Definition: CudaMgr.h:183
CudaMgr_Namespace::CudaMgr::~CudaMgr ( )

Definition at line 61 of file CudaMgr.cpp.

References device_cleanup_mutex_, device_contexts_, device_count_, logger::ERROR, LOG, and synchronizeDevices().

61  {
62  try {
63  // We don't want to remove the cudaMgr before all other processes have cleaned up.
64  // This should be enforced by the lifetime policies, but take this lock to be safe.
65  std::lock_guard<std::mutex> gpu_lock(device_cleanup_mutex_);
66 
68  for (int d = 0; d < device_count_; ++d) {
69  checkError(cuCtxDestroy(device_contexts_[d]));
70  }
71  } catch (const CudaErrorException& e) {
72  if (e.getStatus() == CUDA_ERROR_DEINITIALIZED) {
73  // TODO(adb / asuhan): Verify cuModuleUnload removes the context
74  return;
75  }
76  LOG(ERROR) << "CUDA Error: " << e.what();
77  } catch (const std::runtime_error& e) {
78  LOG(ERROR) << "CUDA Error: " << e.what();
79  }
80 }
std::mutex device_cleanup_mutex_
Definition: CudaMgr.h:188
#define LOG(tag)
Definition: Logger.h:188
std::vector< CUcontext > device_contexts_
Definition: CudaMgr.h:186
void synchronizeDevices() const
Definition: CudaMgr.cpp:82

+ Here is the call graph for this function:

Member Function Documentation

int8_t * CudaMgr_Namespace::CudaMgr::allocateDeviceMem ( const size_t  num_bytes,
const int  device_num 
)

Definition at line 231 of file CudaMgr.cpp.

References setContext().

Referenced by Buffer_Namespace::GpuCudaBufferMgr::addSlab().

231  {
232  setContext(device_num);
233  CUdeviceptr device_ptr;
234  checkError(cuMemAlloc(&device_ptr, num_bytes));
235  return reinterpret_cast<int8_t*>(device_ptr);
236 }
unsigned long long CUdeviceptr
Definition: nocuda.h:27
void setContext(const int device_num) const
Definition: CudaMgr.cpp:329

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

int8_t * CudaMgr_Namespace::CudaMgr::allocatePinnedHostMem ( const size_t  num_bytes)

Definition at line 224 of file CudaMgr.cpp.

References setContext().

224  {
225  setContext(0);
226  void* host_ptr;
227  checkError(cuMemHostAlloc(&host_ptr, num_bytes, CU_MEMHOSTALLOC_PORTABLE));
228  return reinterpret_cast<int8_t*>(host_ptr);
229 }
void setContext(const int device_num) const
Definition: CudaMgr.cpp:329

+ Here is the call graph for this function:

void CudaMgr_Namespace::CudaMgr::copyDeviceToDevice ( int8_t *  dest_ptr,
int8_t *  src_ptr,
const size_t  num_bytes,
const int  dest_device_num,
const int  src_device_num 
)

Definition at line 107 of file CudaMgr.cpp.

References device_contexts_, and setContext().

Referenced by Buffer_Namespace::GpuCudaBuffer::readData(), and Buffer_Namespace::GpuCudaBuffer::writeData().

111  {
112  // dest_device_num and src_device_num are the device numbers relative to start_gpu_
113  // (real_device_num - start_gpu_)
114  if (src_device_num == dest_device_num) {
115  setContext(src_device_num);
116  checkError(cuMemcpy(reinterpret_cast<CUdeviceptr>(dest_ptr),
117  reinterpret_cast<CUdeviceptr>(src_ptr),
118  num_bytes));
119  } else {
120  checkError(cuMemcpyPeer(reinterpret_cast<CUdeviceptr>(dest_ptr),
121  device_contexts_[dest_device_num],
122  reinterpret_cast<CUdeviceptr>(src_ptr),
123  device_contexts_[src_device_num],
124  num_bytes)); // will we always have peer?
125  }
126 }
void setContext(const int device_num) const
Definition: CudaMgr.cpp:329
std::vector< CUcontext > device_contexts_
Definition: CudaMgr.h:186

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void CudaMgr_Namespace::CudaMgr::copyDeviceToHost ( int8_t *  host_ptr,
const int8_t *  device_ptr,
const size_t  num_bytes,
const int  device_num 
)

Definition at line 98 of file CudaMgr.cpp.

References setContext().

Referenced by Buffer_Namespace::GpuCudaBuffer::readData(), and Buffer_Namespace::CpuBuffer::writeData().

101  {
102  setContext(device_num);
103  checkError(
104  cuMemcpyDtoH(host_ptr, reinterpret_cast<const CUdeviceptr>(device_ptr), num_bytes));
105 }
void setContext(const int device_num) const
Definition: CudaMgr.cpp:329

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void CudaMgr_Namespace::CudaMgr::copyHostToDevice ( int8_t *  device_ptr,
const int8_t *  host_ptr,
const size_t  num_bytes,
const int  device_num 
)

Definition at line 89 of file CudaMgr.cpp.

References setContext().

Referenced by Buffer_Namespace::CpuBuffer::readData(), and Buffer_Namespace::GpuCudaBuffer::writeData().

92  {
93  setContext(device_num);
94  checkError(
95  cuMemcpyHtoD(reinterpret_cast<CUdeviceptr>(device_ptr), host_ptr, num_bytes));
96 }
void setContext(const int device_num) const
Definition: CudaMgr.cpp:329

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void CudaMgr_Namespace::CudaMgr::freeDeviceMem ( int8_t *  device_ptr)

Definition at line 242 of file CudaMgr.cpp.

References device_cleanup_mutex_.

Referenced by Buffer_Namespace::GpuCudaBufferMgr::freeAllMem().

242  {
243  std::lock_guard<std::mutex> gpu_lock(device_cleanup_mutex_);
244 
245  checkError(cuMemFree(reinterpret_cast<CUdeviceptr>(device_ptr)));
246 }
std::mutex device_cleanup_mutex_
Definition: CudaMgr.h:188

+ Here is the caller graph for this function:

void CudaMgr_Namespace::CudaMgr::freePinnedHostMem ( int8_t *  host_ptr)

Definition at line 238 of file CudaMgr.cpp.

238  {
239  checkError(cuMemFreeHost(reinterpret_cast<void*>(host_ptr)));
240 }
const std::vector<DeviceProperties>& CudaMgr_Namespace::CudaMgr::getAllDeviceProperties ( ) const
inline

Definition at line 118 of file CudaMgr.h.

References device_properties_.

118  {
119  return device_properties_;
120  }
std::vector< DeviceProperties > device_properties_
Definition: CudaMgr.h:184
int CudaMgr_Namespace::CudaMgr::getDeviceCount ( ) const
inline

Definition at line 88 of file CudaMgr.h.

References device_count_.

Referenced by get_available_gpus(), isArchMaxwell(), isArchMaxwellOrLater(), isArchPascal(), and isArchPascalOrLater().

88 { return device_count_; }

+ Here is the caller graph for this function:

const omnisci::DeviceGroup& CudaMgr_Namespace::CudaMgr::getDeviceGroup ( ) const
inline

Definition at line 90 of file CudaMgr.h.

References device_group_.

90 { return device_group_; }
omnisci::DeviceGroup device_group_
Definition: CudaMgr.h:185
const DeviceProperties* CudaMgr_Namespace::CudaMgr::getDeviceProperties ( const size_t  device_num) const
inline

Definition at line 121 of file CudaMgr.h.

References device_properties_, and to_string().

121  {
122  // device_num is the device number relative to start_gpu_ (real_device_num -
123  // start_gpu_)
124  if (device_num < device_properties_.size()) {
125  return &device_properties_[device_num];
126  }
127  throw std::runtime_error("Specified device number " + std::to_string(device_num) +
128  " is out of range of number of devices (" +
129  std::to_string(device_properties_.size()) + ")");
130  }
std::string to_string(char const *&&v)
std::vector< DeviceProperties > device_properties_
Definition: CudaMgr.h:184

+ Here is the call graph for this function:

size_t CudaMgr_Namespace::CudaMgr::getMaxSharedMemoryForAll ( ) const
inline

Definition at line 116 of file CudaMgr.h.

References max_shared_memory_for_all_.

116 { return max_shared_memory_for_all_; }
size_t max_shared_memory_for_all_
Definition: CudaMgr.h:183
int CudaMgr_Namespace::CudaMgr::getStartGpu ( ) const
inline

Definition at line 89 of file CudaMgr.h.

References start_gpu_.

89 { return start_gpu_; }
bool CudaMgr_Namespace::CudaMgr::isArchMaxwell ( ) const
inline

Definition at line 131 of file CudaMgr.h.

References device_properties_, and getDeviceCount().

131  {
132  return (getDeviceCount() > 0 && device_properties_[0].computeMajor == 5);
133  }
int getDeviceCount() const
Definition: CudaMgr.h:88
std::vector< DeviceProperties > device_properties_
Definition: CudaMgr.h:184

+ Here is the call graph for this function:

bool CudaMgr_Namespace::CudaMgr::isArchMaxwellOrLater ( ) const
inline

Definition at line 134 of file CudaMgr.h.

References device_properties_, and getDeviceCount().

134  {
135  return (getDeviceCount() > 0 && device_properties_[0].computeMajor >= 5);
136  }
int getDeviceCount() const
Definition: CudaMgr.h:88
std::vector< DeviceProperties > device_properties_
Definition: CudaMgr.h:184

+ Here is the call graph for this function:

bool CudaMgr_Namespace::CudaMgr::isArchMaxwellOrLaterForAll ( ) const

Returns true if all devices have Maxwell micro-architecture, or later. Returns false, if there is any device with compute capability of < 5.0

Definition at line 266 of file CudaMgr.cpp.

References device_count_, and device_properties_.

266  {
267  for (int i = 0; i < device_count_; i++) {
268  if (device_properties_[i].computeMajor < 5) {
269  return false;
270  }
271  }
272  return true;
273 }
std::vector< DeviceProperties > device_properties_
Definition: CudaMgr.h:184
bool CudaMgr_Namespace::CudaMgr::isArchPascal ( ) const
inline

Definition at line 137 of file CudaMgr.h.

References device_properties_, and getDeviceCount().

137  {
138  return (getDeviceCount() > 0 && device_properties_[0].computeMajor == 6);
139  }
int getDeviceCount() const
Definition: CudaMgr.h:88
std::vector< DeviceProperties > device_properties_
Definition: CudaMgr.h:184

+ Here is the call graph for this function:

bool CudaMgr_Namespace::CudaMgr::isArchPascalOrLater ( ) const
inline

Definition at line 140 of file CudaMgr.h.

References device_properties_, and getDeviceCount().

140  {
141  return (getDeviceCount() > 0 && device_properties_[0].computeMajor >= 6);
142  }
int getDeviceCount() const
Definition: CudaMgr.h:88
std::vector< DeviceProperties > device_properties_
Definition: CudaMgr.h:184

+ Here is the call graph for this function:

bool CudaMgr_Namespace::CudaMgr::isArchVoltaForAll ( ) const

Returns true if all devices have Volta micro-architecture Returns false, if there is any non-Volta device available.

Definition at line 279 of file CudaMgr.cpp.

References device_count_, and device_properties_.

279  {
280  for (int i = 0; i < device_count_; i++) {
281  if (device_properties_[i].computeMajor != 7) {
282  return false;
283  }
284  }
285  return true;
286 }
std::vector< DeviceProperties > device_properties_
Definition: CudaMgr.h:184
void CudaMgr_Namespace::CudaMgr::setContext ( const int  device_num) const

Definition at line 329 of file CudaMgr.cpp.

References CHECK_LT, and device_contexts_.

Referenced by allocateDeviceMem(), allocatePinnedHostMem(), copyDeviceToDevice(), copyDeviceToHost(), copyHostToDevice(), setDeviceMem(), and synchronizeDevices().

329  {
330  // deviceNum is the device number relative to startGpu (realDeviceNum - startGpu_)
331  CHECK_LT(device_num, device_count_);
332  cuCtxSetCurrent(device_contexts_[device_num]);
333 }
std::vector< CUcontext > device_contexts_
Definition: CudaMgr.h:186
#define CHECK_LT(x, y)
Definition: Logger.h:207

+ Here is the caller graph for this function:

void CudaMgr_Namespace::CudaMgr::setDeviceMem ( int8_t *  device_ptr,
const unsigned char  uc,
const size_t  num_bytes,
const int  device_num 
)

Definition at line 254 of file CudaMgr.cpp.

References setContext().

Referenced by zeroDeviceMem().

257  {
258  setContext(device_num);
259  checkError(cuMemsetD8(reinterpret_cast<CUdeviceptr>(device_ptr), uc, num_bytes));
260 }
void setContext(const int device_num) const
Definition: CudaMgr.cpp:329

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void CudaMgr_Namespace::CudaMgr::synchronizeDevices ( ) const

Definition at line 82 of file CudaMgr.cpp.

References device_count_, and setContext().

Referenced by ~CudaMgr(), and Buffer_Namespace::GpuCudaBufferMgr::~GpuCudaBufferMgr().

82  {
83  for (int d = 0; d < device_count_; ++d) {
84  setContext(d);
85  checkError(cuCtxSynchronize());
86  }
87 }
void setContext(const int device_num) const
Definition: CudaMgr.cpp:329

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void CudaMgr_Namespace::CudaMgr::zeroDeviceMem ( int8_t *  device_ptr,
const size_t  num_bytes,
const int  device_num 
)

Definition at line 248 of file CudaMgr.cpp.

References setDeviceMem().

250  {
251  setDeviceMem(device_ptr, 0, num_bytes, device_num);
252 }
void setDeviceMem(int8_t *device_ptr, const unsigned char uc, const size_t num_bytes, const int device_num)
Definition: CudaMgr.cpp:254

+ Here is the call graph for this function:

Member Data Documentation

std::mutex CudaMgr_Namespace::CudaMgr::device_cleanup_mutex_
mutableprivate

Definition at line 188 of file CudaMgr.h.

Referenced by freeDeviceMem(), and ~CudaMgr().

std::vector<CUcontext> CudaMgr_Namespace::CudaMgr::device_contexts_
private

Definition at line 186 of file CudaMgr.h.

Referenced by copyDeviceToDevice(), setContext(), and ~CudaMgr().

int CudaMgr_Namespace::CudaMgr::device_count_
private
omnisci::DeviceGroup CudaMgr_Namespace::CudaMgr::device_group_
private

Definition at line 185 of file CudaMgr.h.

Referenced by getDeviceGroup().

std::vector<DeviceProperties> CudaMgr_Namespace::CudaMgr::device_properties_
private
size_t CudaMgr_Namespace::CudaMgr::max_shared_memory_for_all_
private

Definition at line 183 of file CudaMgr.h.

Referenced by getMaxSharedMemoryForAll().

int CudaMgr_Namespace::CudaMgr::start_gpu_
private

Definition at line 182 of file CudaMgr.h.

Referenced by CudaMgr(), and getStartGpu().


The documentation for this class was generated from the following files: